aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig20
-rw-r--r--arch/x86/ia32/ia32entry.S43
-rw-r--r--arch/x86/include/asm/alternative-asm.h4
-rw-r--r--arch/x86/include/asm/apic.h6
-rw-r--r--arch/x86/include/asm/apic_flat_64.h7
-rw-r--r--arch/x86/include/asm/apicdef.h1
-rw-r--r--arch/x86/include/asm/bitops.h76
-rw-r--r--arch/x86/include/asm/cmpxchg.h163
-rw-r--r--arch/x86/include/asm/cmpxchg_32.h46
-rw-r--r--arch/x86/include/asm/cmpxchg_64.h43
-rw-r--r--arch/x86/include/asm/div64.h22
-rw-r--r--arch/x86/include/asm/e820.h10
-rw-r--r--arch/x86/include/asm/efi.h5
-rw-r--r--arch/x86/include/asm/hardirq.h1
-rw-r--r--arch/x86/include/asm/i387.h2
-rw-r--r--arch/x86/include/asm/insn.h7
-rw-r--r--arch/x86/include/asm/mach_timer.h2
-rw-r--r--arch/x86/include/asm/mc146818rtc.h4
-rw-r--r--arch/x86/include/asm/mce.h14
-rw-r--r--arch/x86/include/asm/memblock.h23
-rw-r--r--arch/x86/include/asm/microcode.h2
-rw-r--r--arch/x86/include/asm/mrst.h2
-rw-r--r--arch/x86/include/asm/numachip/numachip_csr.h167
-rw-r--r--arch/x86/include/asm/percpu.h53
-rw-r--r--arch/x86/include/asm/perf_event.h44
-rw-r--r--arch/x86/include/asm/pgtable.h2
-rw-r--r--arch/x86/include/asm/processor-flags.h1
-rw-r--r--arch/x86/include/asm/processor.h2
-rw-r--r--arch/x86/include/asm/spinlock.h15
-rw-r--r--arch/x86/include/asm/thread_info.h11
-rw-r--r--arch/x86/include/asm/topology.h2
-rw-r--r--arch/x86/include/asm/tsc.h2
-rw-r--r--arch/x86/include/asm/uaccess.h2
-rw-r--r--arch/x86/include/asm/x86_init.h3
-rw-r--r--arch/x86/kernel/acpi/boot.c10
-rw-r--r--arch/x86/kernel/amd_nb.c8
-rw-r--r--arch/x86/kernel/aperture_64.c4
-rw-r--r--arch/x86/kernel/apic/Makefile1
-rw-r--r--arch/x86/kernel/apic/apic.c113
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c9
-rw-r--r--arch/x86/kernel/apic/apic_numachip.c294
-rw-r--r--arch/x86/kernel/apic/io_apic.c6
-rw-r--r--arch/x86/kernel/check.c34
-rw-r--r--arch/x86/kernel/cpu/amd.c9
-rw-r--r--arch/x86/kernel/cpu/centaur.c2
-rw-r--r--arch/x86/kernel/cpu/common.c14
-rw-r--r--arch/x86/kernel/cpu/cpu.h5
-rw-r--r--arch/x86/kernel/cpu/intel.c2
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c25
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c34
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-internal.h4
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c194
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c18
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c94
-rw-r--r--arch/x86/kernel/cpu/mcheck/threshold.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event.c262
-rw-r--r--arch/x86/kernel/cpu/perf_event.h51
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c90
-rw-r--r--arch/x86/kernel/cpu/powerflags.c3
-rw-r--r--arch/x86/kernel/cpu/proc.c4
-rw-r--r--arch/x86/kernel/cpuid.c2
-rw-r--r--arch/x86/kernel/dumpstack_32.c8
-rw-r--r--arch/x86/kernel/dumpstack_64.c8
-rw-r--r--arch/x86/kernel/e820.c61
-rw-r--r--arch/x86/kernel/early_printk.c2
-rw-r--r--arch/x86/kernel/entry_32.S4
-rw-r--r--arch/x86/kernel/entry_64.S31
-rw-r--r--arch/x86/kernel/head.c2
-rw-r--r--arch/x86/kernel/head32.c7
-rw-r--r--arch/x86/kernel/head64.c7
-rw-r--r--arch/x86/kernel/hpet.c8
-rw-r--r--arch/x86/kernel/irq.c11
-rw-r--r--arch/x86/kernel/irqinit.c2
-rw-r--r--arch/x86/kernel/jump_label.c2
-rw-r--r--arch/x86/kernel/microcode_amd.c209
-rw-r--r--arch/x86/kernel/microcode_core.c69
-rw-r--r--arch/x86/kernel/mpparse.c12
-rw-r--r--arch/x86/kernel/msr.c2
-rw-r--r--arch/x86/kernel/process.c2
-rw-r--r--arch/x86/kernel/process_32.c6
-rw-r--r--arch/x86/kernel/process_64.c15
-rw-r--r--arch/x86/kernel/ptrace.c3
-rw-r--r--arch/x86/kernel/setup.c42
-rw-r--r--arch/x86/kernel/smpboot.c3
-rw-r--r--arch/x86/kernel/trampoline.c4
-rw-r--r--arch/x86/kernel/traps.c7
-rw-r--r--arch/x86/kernel/tsc.c6
-rw-r--r--arch/x86/kernel/tsc_sync.c4
-rw-r--r--arch/x86/kernel/vsyscall_64.c77
-rw-r--r--arch/x86/kernel/x86_init.c1
-rw-r--r--arch/x86/kvm/i8254.c10
-rw-r--r--arch/x86/kvm/x86.c19
-rw-r--r--arch/x86/lib/inat.c9
-rw-r--r--arch/x86/lib/insn.c4
-rw-r--r--arch/x86/lib/string_32.c8
-rw-r--r--arch/x86/lib/x86-opcode-map.txt606
-rw-r--r--arch/x86/mm/Makefile2
-rw-r--r--arch/x86/mm/extable.c2
-rw-r--r--arch/x86/mm/fault.c22
-rw-r--r--arch/x86/mm/init.c8
-rw-r--r--arch/x86/mm/init_32.c36
-rw-r--r--arch/x86/mm/init_64.c2
-rw-r--r--arch/x86/mm/memblock.c348
-rw-r--r--arch/x86/mm/memtest.c33
-rw-r--r--arch/x86/mm/numa.c37
-rw-r--r--arch/x86/mm/numa_32.c10
-rw-r--r--arch/x86/mm/numa_64.c2
-rw-r--r--arch/x86/mm/numa_emulation.c36
-rw-r--r--arch/x86/mm/pageattr.c2
-rw-r--r--arch/x86/mm/srat.c7
-rw-r--r--arch/x86/net/bpf_jit_comp.c4
-rw-r--r--arch/x86/oprofile/Makefile3
-rw-r--r--arch/x86/oprofile/init.c30
-rw-r--r--arch/x86/oprofile/nmi_int.c27
-rw-r--r--arch/x86/oprofile/nmi_timer_int.c50
-rw-r--r--arch/x86/pci/pcbios.c2
-rw-r--r--arch/x86/platform/efi/efi.c41
-rw-r--r--arch/x86/platform/efi/efi_32.c48
-rw-r--r--arch/x86/platform/efi/efi_64.c17
-rw-r--r--arch/x86/platform/mrst/early_printk_mrst.c16
-rw-r--r--arch/x86/platform/uv/uv_sysfs.c2
-rw-r--r--arch/x86/tools/Makefile11
-rw-r--r--arch/x86/tools/gen-insn-attr-x86.awk21
-rw-r--r--arch/x86/tools/insn_sanity.c275
-rw-r--r--arch/x86/xen/debugfs.c2
-rw-r--r--arch/x86/xen/debugfs.h2
-rw-r--r--arch/x86/xen/enlighten.c2
-rw-r--r--arch/x86/xen/mmu.c12
-rw-r--r--arch/x86/xen/setup.c25
130 files changed, 2667 insertions, 1844 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index efb42949cc09..1d2a69dd36d8 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -26,6 +26,8 @@ config X86
26 select HAVE_IOREMAP_PROT 26 select HAVE_IOREMAP_PROT
27 select HAVE_KPROBES 27 select HAVE_KPROBES
28 select HAVE_MEMBLOCK 28 select HAVE_MEMBLOCK
29 select HAVE_MEMBLOCK_NODE_MAP
30 select ARCH_DISCARD_MEMBLOCK
29 select ARCH_WANT_OPTIONAL_GPIOLIB 31 select ARCH_WANT_OPTIONAL_GPIOLIB
30 select ARCH_WANT_FRAME_POINTERS 32 select ARCH_WANT_FRAME_POINTERS
31 select HAVE_DMA_ATTRS 33 select HAVE_DMA_ATTRS
@@ -204,9 +206,6 @@ config ZONE_DMA32
204 bool 206 bool
205 default X86_64 207 default X86_64
206 208
207config ARCH_POPULATES_NODE_MAP
208 def_bool y
209
210config AUDIT_ARCH 209config AUDIT_ARCH
211 bool 210 bool
212 default X86_64 211 default X86_64
@@ -343,6 +342,7 @@ config X86_EXTENDED_PLATFORM
343 342
344 If you enable this option then you'll be able to select support 343 If you enable this option then you'll be able to select support
345 for the following (non-PC) 64 bit x86 platforms: 344 for the following (non-PC) 64 bit x86 platforms:
345 Numascale NumaChip
346 ScaleMP vSMP 346 ScaleMP vSMP
347 SGI Ultraviolet 347 SGI Ultraviolet
348 348
@@ -351,6 +351,18 @@ config X86_EXTENDED_PLATFORM
351endif 351endif
352# This is an alphabetically sorted list of 64 bit extended platforms 352# This is an alphabetically sorted list of 64 bit extended platforms
353# Please maintain the alphabetic order if and when there are additions 353# Please maintain the alphabetic order if and when there are additions
354config X86_NUMACHIP
355 bool "Numascale NumaChip"
356 depends on X86_64
357 depends on X86_EXTENDED_PLATFORM
358 depends on NUMA
359 depends on SMP
360 depends on X86_X2APIC
361 depends on !EDAC_AMD64
362 ---help---
363 Adds support for Numascale NumaChip large-SMP systems. Needed to
364 enable more than ~168 cores.
365 If you don't have one of these, you should say N here.
354 366
355config X86_VSMP 367config X86_VSMP
356 bool "ScaleMP vSMP" 368 bool "ScaleMP vSMP"
@@ -1730,7 +1742,7 @@ source "drivers/sfi/Kconfig"
1730 1742
1731config X86_APM_BOOT 1743config X86_APM_BOOT
1732 def_bool y 1744 def_bool y
1733 depends on APM || APM_MODULE 1745 depends on APM
1734 1746
1735menuconfig APM 1747menuconfig APM
1736 tristate "APM (Advanced Power Management) BIOS support" 1748 tristate "APM (Advanced Power Management) BIOS support"
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index a6253ec1b284..3e274564f6bf 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -134,7 +134,7 @@ ENTRY(ia32_sysenter_target)
134 CFI_REL_OFFSET rsp,0 134 CFI_REL_OFFSET rsp,0
135 pushfq_cfi 135 pushfq_cfi
136 /*CFI_REL_OFFSET rflags,0*/ 136 /*CFI_REL_OFFSET rflags,0*/
137 movl 8*3-THREAD_SIZE+TI_sysenter_return(%rsp), %r10d 137 movl TI_sysenter_return+THREAD_INFO(%rsp,3*8-KERNEL_STACK_OFFSET),%r10d
138 CFI_REGISTER rip,r10 138 CFI_REGISTER rip,r10
139 pushq_cfi $__USER32_CS 139 pushq_cfi $__USER32_CS
140 /*CFI_REL_OFFSET cs,0*/ 140 /*CFI_REL_OFFSET cs,0*/
@@ -150,9 +150,8 @@ ENTRY(ia32_sysenter_target)
150 .section __ex_table,"a" 150 .section __ex_table,"a"
151 .quad 1b,ia32_badarg 151 .quad 1b,ia32_badarg
152 .previous 152 .previous
153 GET_THREAD_INFO(%r10) 153 orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
154 orl $TS_COMPAT,TI_status(%r10) 154 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
155 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
156 CFI_REMEMBER_STATE 155 CFI_REMEMBER_STATE
157 jnz sysenter_tracesys 156 jnz sysenter_tracesys
158 cmpq $(IA32_NR_syscalls-1),%rax 157 cmpq $(IA32_NR_syscalls-1),%rax
@@ -162,13 +161,12 @@ sysenter_do_call:
162sysenter_dispatch: 161sysenter_dispatch:
163 call *ia32_sys_call_table(,%rax,8) 162 call *ia32_sys_call_table(,%rax,8)
164 movq %rax,RAX-ARGOFFSET(%rsp) 163 movq %rax,RAX-ARGOFFSET(%rsp)
165 GET_THREAD_INFO(%r10)
166 DISABLE_INTERRUPTS(CLBR_NONE) 164 DISABLE_INTERRUPTS(CLBR_NONE)
167 TRACE_IRQS_OFF 165 TRACE_IRQS_OFF
168 testl $_TIF_ALLWORK_MASK,TI_flags(%r10) 166 testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
169 jnz sysexit_audit 167 jnz sysexit_audit
170sysexit_from_sys_call: 168sysexit_from_sys_call:
171 andl $~TS_COMPAT,TI_status(%r10) 169 andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
172 /* clear IF, that popfq doesn't enable interrupts early */ 170 /* clear IF, that popfq doesn't enable interrupts early */
173 andl $~0x200,EFLAGS-R11(%rsp) 171 andl $~0x200,EFLAGS-R11(%rsp)
174 movl RIP-R11(%rsp),%edx /* User %eip */ 172 movl RIP-R11(%rsp),%edx /* User %eip */
@@ -205,7 +203,7 @@ sysexit_from_sys_call:
205 .endm 203 .endm
206 204
207 .macro auditsys_exit exit 205 .macro auditsys_exit exit
208 testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) 206 testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
209 jnz ia32_ret_from_sys_call 207 jnz ia32_ret_from_sys_call
210 TRACE_IRQS_ON 208 TRACE_IRQS_ON
211 sti 209 sti
@@ -215,12 +213,11 @@ sysexit_from_sys_call:
215 movzbl %al,%edi /* zero-extend that into %edi */ 213 movzbl %al,%edi /* zero-extend that into %edi */
216 inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ 214 inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
217 call audit_syscall_exit 215 call audit_syscall_exit
218 GET_THREAD_INFO(%r10)
219 movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall return value */ 216 movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall return value */
220 movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi 217 movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
221 cli 218 cli
222 TRACE_IRQS_OFF 219 TRACE_IRQS_OFF
223 testl %edi,TI_flags(%r10) 220 testl %edi,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
224 jz \exit 221 jz \exit
225 CLEAR_RREGS -ARGOFFSET 222 CLEAR_RREGS -ARGOFFSET
226 jmp int_with_check 223 jmp int_with_check
@@ -238,7 +235,7 @@ sysexit_audit:
238 235
239sysenter_tracesys: 236sysenter_tracesys:
240#ifdef CONFIG_AUDITSYSCALL 237#ifdef CONFIG_AUDITSYSCALL
241 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) 238 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
242 jz sysenter_auditsys 239 jz sysenter_auditsys
243#endif 240#endif
244 SAVE_REST 241 SAVE_REST
@@ -309,9 +306,8 @@ ENTRY(ia32_cstar_target)
309 .section __ex_table,"a" 306 .section __ex_table,"a"
310 .quad 1b,ia32_badarg 307 .quad 1b,ia32_badarg
311 .previous 308 .previous
312 GET_THREAD_INFO(%r10) 309 orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
313 orl $TS_COMPAT,TI_status(%r10) 310 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
314 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
315 CFI_REMEMBER_STATE 311 CFI_REMEMBER_STATE
316 jnz cstar_tracesys 312 jnz cstar_tracesys
317 cmpq $IA32_NR_syscalls-1,%rax 313 cmpq $IA32_NR_syscalls-1,%rax
@@ -321,13 +317,12 @@ cstar_do_call:
321cstar_dispatch: 317cstar_dispatch:
322 call *ia32_sys_call_table(,%rax,8) 318 call *ia32_sys_call_table(,%rax,8)
323 movq %rax,RAX-ARGOFFSET(%rsp) 319 movq %rax,RAX-ARGOFFSET(%rsp)
324 GET_THREAD_INFO(%r10)
325 DISABLE_INTERRUPTS(CLBR_NONE) 320 DISABLE_INTERRUPTS(CLBR_NONE)
326 TRACE_IRQS_OFF 321 TRACE_IRQS_OFF
327 testl $_TIF_ALLWORK_MASK,TI_flags(%r10) 322 testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
328 jnz sysretl_audit 323 jnz sysretl_audit
329sysretl_from_sys_call: 324sysretl_from_sys_call:
330 andl $~TS_COMPAT,TI_status(%r10) 325 andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
331 RESTORE_ARGS 0,-ARG_SKIP,0,0,0 326 RESTORE_ARGS 0,-ARG_SKIP,0,0,0
332 movl RIP-ARGOFFSET(%rsp),%ecx 327 movl RIP-ARGOFFSET(%rsp),%ecx
333 CFI_REGISTER rip,rcx 328 CFI_REGISTER rip,rcx
@@ -355,7 +350,7 @@ sysretl_audit:
355 350
356cstar_tracesys: 351cstar_tracesys:
357#ifdef CONFIG_AUDITSYSCALL 352#ifdef CONFIG_AUDITSYSCALL
358 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) 353 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
359 jz cstar_auditsys 354 jz cstar_auditsys
360#endif 355#endif
361 xchgl %r9d,%ebp 356 xchgl %r9d,%ebp
@@ -420,9 +415,8 @@ ENTRY(ia32_syscall)
420 /* note the registers are not zero extended to the sf. 415 /* note the registers are not zero extended to the sf.
421 this could be a problem. */ 416 this could be a problem. */
422 SAVE_ARGS 0,1,0 417 SAVE_ARGS 0,1,0
423 GET_THREAD_INFO(%r10) 418 orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
424 orl $TS_COMPAT,TI_status(%r10) 419 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
425 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
426 jnz ia32_tracesys 420 jnz ia32_tracesys
427 cmpq $(IA32_NR_syscalls-1),%rax 421 cmpq $(IA32_NR_syscalls-1),%rax
428 ja ia32_badsys 422 ja ia32_badsys
@@ -459,8 +453,8 @@ quiet_ni_syscall:
459 CFI_ENDPROC 453 CFI_ENDPROC
460 454
461 .macro PTREGSCALL label, func, arg 455 .macro PTREGSCALL label, func, arg
462 .globl \label 456 ALIGN
463\label: 457GLOBAL(\label)
464 leaq \func(%rip),%rax 458 leaq \func(%rip),%rax
465 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */ 459 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
466 jmp ia32_ptregs_common 460 jmp ia32_ptregs_common
@@ -477,7 +471,8 @@ quiet_ni_syscall:
477 PTREGSCALL stub32_vfork, sys_vfork, %rdi 471 PTREGSCALL stub32_vfork, sys_vfork, %rdi
478 PTREGSCALL stub32_iopl, sys_iopl, %rsi 472 PTREGSCALL stub32_iopl, sys_iopl, %rsi
479 473
480ENTRY(ia32_ptregs_common) 474 ALIGN
475ia32_ptregs_common:
481 popq %r11 476 popq %r11
482 CFI_ENDPROC 477 CFI_ENDPROC
483 CFI_STARTPROC32 simple 478 CFI_STARTPROC32 simple
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
index 091508b533b4..952bd0100c5c 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -4,10 +4,10 @@
4 4
5#ifdef CONFIG_SMP 5#ifdef CONFIG_SMP
6 .macro LOCK_PREFIX 6 .macro LOCK_PREFIX
71: lock 7672: lock
8 .section .smp_locks,"a" 8 .section .smp_locks,"a"
9 .balign 4 9 .balign 4
10 .long 1b - . 10 .long 672b - .
11 .previous 11 .previous
12 .endm 12 .endm
13#else 13#else
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 1a6c09af048f..3ab9bdd87e79 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -176,6 +176,7 @@ static inline u64 native_x2apic_icr_read(void)
176} 176}
177 177
178extern int x2apic_phys; 178extern int x2apic_phys;
179extern int x2apic_preenabled;
179extern void check_x2apic(void); 180extern void check_x2apic(void);
180extern void enable_x2apic(void); 181extern void enable_x2apic(void);
181extern void x2apic_icr_write(u32 low, u32 id); 182extern void x2apic_icr_write(u32 low, u32 id);
@@ -198,6 +199,9 @@ static inline void x2apic_force_phys(void)
198 x2apic_phys = 1; 199 x2apic_phys = 1;
199} 200}
200#else 201#else
202static inline void disable_x2apic(void)
203{
204}
201static inline void check_x2apic(void) 205static inline void check_x2apic(void)
202{ 206{
203} 207}
@@ -212,6 +216,7 @@ static inline void x2apic_force_phys(void)
212{ 216{
213} 217}
214 218
219#define nox2apic 0
215#define x2apic_preenabled 0 220#define x2apic_preenabled 0
216#define x2apic_supported() 0 221#define x2apic_supported() 0
217#endif 222#endif
@@ -410,6 +415,7 @@ extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip);
410#endif 415#endif
411 416
412#ifdef CONFIG_X86_LOCAL_APIC 417#ifdef CONFIG_X86_LOCAL_APIC
418
413static inline u32 apic_read(u32 reg) 419static inline u32 apic_read(u32 reg)
414{ 420{
415 return apic->read(reg); 421 return apic->read(reg);
diff --git a/arch/x86/include/asm/apic_flat_64.h b/arch/x86/include/asm/apic_flat_64.h
new file mode 100644
index 000000000000..a2d312796440
--- /dev/null
+++ b/arch/x86/include/asm/apic_flat_64.h
@@ -0,0 +1,7 @@
1#ifndef _ASM_X86_APIC_FLAT_64_H
2#define _ASM_X86_APIC_FLAT_64_H
3
4extern void flat_init_apic_ldr(void);
5
6#endif
7
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index 3925d8007864..134bba00df09 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -144,6 +144,7 @@
144 144
145#define APIC_BASE (fix_to_virt(FIX_APIC_BASE)) 145#define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
146#define APIC_BASE_MSR 0x800 146#define APIC_BASE_MSR 0x800
147#define XAPIC_ENABLE (1UL << 11)
147#define X2APIC_ENABLE (1UL << 10) 148#define X2APIC_ENABLE (1UL << 10)
148 149
149#ifdef CONFIG_X86_32 150#ifdef CONFIG_X86_32
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 1775d6e5920e..b97596e2b68c 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -380,6 +380,8 @@ static inline unsigned long __fls(unsigned long word)
380 return word; 380 return word;
381} 381}
382 382
383#undef ADDR
384
383#ifdef __KERNEL__ 385#ifdef __KERNEL__
384/** 386/**
385 * ffs - find first set bit in word 387 * ffs - find first set bit in word
@@ -395,10 +397,25 @@ static inline unsigned long __fls(unsigned long word)
395static inline int ffs(int x) 397static inline int ffs(int x)
396{ 398{
397 int r; 399 int r;
398#ifdef CONFIG_X86_CMOV 400
401#ifdef CONFIG_X86_64
402 /*
403 * AMD64 says BSFL won't clobber the dest reg if x==0; Intel64 says the
404 * dest reg is undefined if x==0, but their CPU architect says its
405 * value is written to set it to the same as before, except that the
406 * top 32 bits will be cleared.
407 *
408 * We cannot do this on 32 bits because at the very least some
409 * 486 CPUs did not behave this way.
410 */
411 long tmp = -1;
412 asm("bsfl %1,%0"
413 : "=r" (r)
414 : "rm" (x), "0" (tmp));
415#elif defined(CONFIG_X86_CMOV)
399 asm("bsfl %1,%0\n\t" 416 asm("bsfl %1,%0\n\t"
400 "cmovzl %2,%0" 417 "cmovzl %2,%0"
401 : "=r" (r) : "rm" (x), "r" (-1)); 418 : "=&r" (r) : "rm" (x), "r" (-1));
402#else 419#else
403 asm("bsfl %1,%0\n\t" 420 asm("bsfl %1,%0\n\t"
404 "jnz 1f\n\t" 421 "jnz 1f\n\t"
@@ -422,7 +439,22 @@ static inline int ffs(int x)
422static inline int fls(int x) 439static inline int fls(int x)
423{ 440{
424 int r; 441 int r;
425#ifdef CONFIG_X86_CMOV 442
443#ifdef CONFIG_X86_64
444 /*
445 * AMD64 says BSRL won't clobber the dest reg if x==0; Intel64 says the
446 * dest reg is undefined if x==0, but their CPU architect says its
447 * value is written to set it to the same as before, except that the
448 * top 32 bits will be cleared.
449 *
450 * We cannot do this on 32 bits because at the very least some
451 * 486 CPUs did not behave this way.
452 */
453 long tmp = -1;
454 asm("bsrl %1,%0"
455 : "=r" (r)
456 : "rm" (x), "0" (tmp));
457#elif defined(CONFIG_X86_CMOV)
426 asm("bsrl %1,%0\n\t" 458 asm("bsrl %1,%0\n\t"
427 "cmovzl %2,%0" 459 "cmovzl %2,%0"
428 : "=&r" (r) : "rm" (x), "rm" (-1)); 460 : "=&r" (r) : "rm" (x), "rm" (-1));
@@ -434,11 +466,35 @@ static inline int fls(int x)
434#endif 466#endif
435 return r + 1; 467 return r + 1;
436} 468}
437#endif /* __KERNEL__ */
438
439#undef ADDR
440 469
441#ifdef __KERNEL__ 470/**
471 * fls64 - find last set bit in a 64-bit word
472 * @x: the word to search
473 *
474 * This is defined in a similar way as the libc and compiler builtin
475 * ffsll, but returns the position of the most significant set bit.
476 *
477 * fls64(value) returns 0 if value is 0 or the position of the last
478 * set bit if value is nonzero. The last (most significant) bit is
479 * at position 64.
480 */
481#ifdef CONFIG_X86_64
482static __always_inline int fls64(__u64 x)
483{
484 long bitpos = -1;
485 /*
486 * AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the
487 * dest reg is undefined if x==0, but their CPU architect says its
488 * value is written to set it to the same as before.
489 */
490 asm("bsrq %1,%0"
491 : "+r" (bitpos)
492 : "rm" (x));
493 return bitpos + 1;
494}
495#else
496#include <asm-generic/bitops/fls64.h>
497#endif
442 498
443#include <asm-generic/bitops/find.h> 499#include <asm-generic/bitops/find.h>
444 500
@@ -450,12 +506,6 @@ static inline int fls(int x)
450 506
451#include <asm-generic/bitops/const_hweight.h> 507#include <asm-generic/bitops/const_hweight.h>
452 508
453#endif /* __KERNEL__ */
454
455#include <asm-generic/bitops/fls64.h>
456
457#ifdef __KERNEL__
458
459#include <asm-generic/bitops/le.h> 509#include <asm-generic/bitops/le.h>
460 510
461#include <asm-generic/bitops/ext2-atomic-setbit.h> 511#include <asm-generic/bitops/ext2-atomic-setbit.h>
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index 5d3acdf5a7a6..0c9fa2745f13 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -14,6 +14,8 @@ extern void __cmpxchg_wrong_size(void)
14 __compiletime_error("Bad argument size for cmpxchg"); 14 __compiletime_error("Bad argument size for cmpxchg");
15extern void __xadd_wrong_size(void) 15extern void __xadd_wrong_size(void)
16 __compiletime_error("Bad argument size for xadd"); 16 __compiletime_error("Bad argument size for xadd");
17extern void __add_wrong_size(void)
18 __compiletime_error("Bad argument size for add");
17 19
18/* 20/*
19 * Constants for operation sizes. On 32-bit, the 64-bit size it set to 21 * Constants for operation sizes. On 32-bit, the 64-bit size it set to
@@ -31,60 +33,47 @@ extern void __xadd_wrong_size(void)
31#define __X86_CASE_Q -1 /* sizeof will never return -1 */ 33#define __X86_CASE_Q -1 /* sizeof will never return -1 */
32#endif 34#endif
33 35
36/*
37 * An exchange-type operation, which takes a value and a pointer, and
38 * returns a the old value.
39 */
40#define __xchg_op(ptr, arg, op, lock) \
41 ({ \
42 __typeof__ (*(ptr)) __ret = (arg); \
43 switch (sizeof(*(ptr))) { \
44 case __X86_CASE_B: \
45 asm volatile (lock #op "b %b0, %1\n" \
46 : "+r" (__ret), "+m" (*(ptr)) \
47 : : "memory", "cc"); \
48 break; \
49 case __X86_CASE_W: \
50 asm volatile (lock #op "w %w0, %1\n" \
51 : "+r" (__ret), "+m" (*(ptr)) \
52 : : "memory", "cc"); \
53 break; \
54 case __X86_CASE_L: \
55 asm volatile (lock #op "l %0, %1\n" \
56 : "+r" (__ret), "+m" (*(ptr)) \
57 : : "memory", "cc"); \
58 break; \
59 case __X86_CASE_Q: \
60 asm volatile (lock #op "q %q0, %1\n" \
61 : "+r" (__ret), "+m" (*(ptr)) \
62 : : "memory", "cc"); \
63 break; \
64 default: \
65 __ ## op ## _wrong_size(); \
66 } \
67 __ret; \
68 })
69
34/* 70/*
35 * Note: no "lock" prefix even on SMP: xchg always implies lock anyway. 71 * Note: no "lock" prefix even on SMP: xchg always implies lock anyway.
36 * Since this is generally used to protect other memory information, we 72 * Since this is generally used to protect other memory information, we
37 * use "asm volatile" and "memory" clobbers to prevent gcc from moving 73 * use "asm volatile" and "memory" clobbers to prevent gcc from moving
38 * information around. 74 * information around.
39 */ 75 */
40#define __xchg(x, ptr, size) \ 76#define xchg(ptr, v) __xchg_op((ptr), (v), xchg, "")
41({ \
42 __typeof(*(ptr)) __x = (x); \
43 switch (size) { \
44 case __X86_CASE_B: \
45 { \
46 volatile u8 *__ptr = (volatile u8 *)(ptr); \
47 asm volatile("xchgb %0,%1" \
48 : "=q" (__x), "+m" (*__ptr) \
49 : "0" (__x) \
50 : "memory"); \
51 break; \
52 } \
53 case __X86_CASE_W: \
54 { \
55 volatile u16 *__ptr = (volatile u16 *)(ptr); \
56 asm volatile("xchgw %0,%1" \
57 : "=r" (__x), "+m" (*__ptr) \
58 : "0" (__x) \
59 : "memory"); \
60 break; \
61 } \
62 case __X86_CASE_L: \
63 { \
64 volatile u32 *__ptr = (volatile u32 *)(ptr); \
65 asm volatile("xchgl %0,%1" \
66 : "=r" (__x), "+m" (*__ptr) \
67 : "0" (__x) \
68 : "memory"); \
69 break; \
70 } \
71 case __X86_CASE_Q: \
72 { \
73 volatile u64 *__ptr = (volatile u64 *)(ptr); \
74 asm volatile("xchgq %0,%1" \
75 : "=r" (__x), "+m" (*__ptr) \
76 : "0" (__x) \
77 : "memory"); \
78 break; \
79 } \
80 default: \
81 __xchg_wrong_size(); \
82 } \
83 __x; \
84})
85
86#define xchg(ptr, v) \
87 __xchg((v), (ptr), sizeof(*ptr))
88 77
89/* 78/*
90 * Atomic compare and exchange. Compare OLD with MEM, if identical, 79 * Atomic compare and exchange. Compare OLD with MEM, if identical,
@@ -165,46 +154,80 @@ extern void __xadd_wrong_size(void)
165 __cmpxchg_local((ptr), (old), (new), sizeof(*ptr)) 154 __cmpxchg_local((ptr), (old), (new), sizeof(*ptr))
166#endif 155#endif
167 156
168#define __xadd(ptr, inc, lock) \ 157/*
158 * xadd() adds "inc" to "*ptr" and atomically returns the previous
159 * value of "*ptr".
160 *
161 * xadd() is locked when multiple CPUs are online
162 * xadd_sync() is always locked
163 * xadd_local() is never locked
164 */
165#define __xadd(ptr, inc, lock) __xchg_op((ptr), (inc), xadd, lock)
166#define xadd(ptr, inc) __xadd((ptr), (inc), LOCK_PREFIX)
167#define xadd_sync(ptr, inc) __xadd((ptr), (inc), "lock; ")
168#define xadd_local(ptr, inc) __xadd((ptr), (inc), "")
169
170#define __add(ptr, inc, lock) \
169 ({ \ 171 ({ \
170 __typeof__ (*(ptr)) __ret = (inc); \ 172 __typeof__ (*(ptr)) __ret = (inc); \
171 switch (sizeof(*(ptr))) { \ 173 switch (sizeof(*(ptr))) { \
172 case __X86_CASE_B: \ 174 case __X86_CASE_B: \
173 asm volatile (lock "xaddb %b0, %1\n" \ 175 asm volatile (lock "addb %b1, %0\n" \
174 : "+r" (__ret), "+m" (*(ptr)) \ 176 : "+m" (*(ptr)) : "ri" (inc) \
175 : : "memory", "cc"); \ 177 : "memory", "cc"); \
176 break; \ 178 break; \
177 case __X86_CASE_W: \ 179 case __X86_CASE_W: \
178 asm volatile (lock "xaddw %w0, %1\n" \ 180 asm volatile (lock "addw %w1, %0\n" \
179 : "+r" (__ret), "+m" (*(ptr)) \ 181 : "+m" (*(ptr)) : "ri" (inc) \
180 : : "memory", "cc"); \ 182 : "memory", "cc"); \
181 break; \ 183 break; \
182 case __X86_CASE_L: \ 184 case __X86_CASE_L: \
183 asm volatile (lock "xaddl %0, %1\n" \ 185 asm volatile (lock "addl %1, %0\n" \
184 : "+r" (__ret), "+m" (*(ptr)) \ 186 : "+m" (*(ptr)) : "ri" (inc) \
185 : : "memory", "cc"); \ 187 : "memory", "cc"); \
186 break; \ 188 break; \
187 case __X86_CASE_Q: \ 189 case __X86_CASE_Q: \
188 asm volatile (lock "xaddq %q0, %1\n" \ 190 asm volatile (lock "addq %1, %0\n" \
189 : "+r" (__ret), "+m" (*(ptr)) \ 191 : "+m" (*(ptr)) : "ri" (inc) \
190 : : "memory", "cc"); \ 192 : "memory", "cc"); \
191 break; \ 193 break; \
192 default: \ 194 default: \
193 __xadd_wrong_size(); \ 195 __add_wrong_size(); \
194 } \ 196 } \
195 __ret; \ 197 __ret; \
196 }) 198 })
197 199
198/* 200/*
199 * xadd() adds "inc" to "*ptr" and atomically returns the previous 201 * add_*() adds "inc" to "*ptr"
200 * value of "*ptr".
201 * 202 *
202 * xadd() is locked when multiple CPUs are online 203 * __add() takes a lock prefix
203 * xadd_sync() is always locked 204 * add_smp() is locked when multiple CPUs are online
204 * xadd_local() is never locked 205 * add_sync() is always locked
205 */ 206 */
206#define xadd(ptr, inc) __xadd((ptr), (inc), LOCK_PREFIX) 207#define add_smp(ptr, inc) __add((ptr), (inc), LOCK_PREFIX)
207#define xadd_sync(ptr, inc) __xadd((ptr), (inc), "lock; ") 208#define add_sync(ptr, inc) __add((ptr), (inc), "lock; ")
208#define xadd_local(ptr, inc) __xadd((ptr), (inc), "") 209
210#define __cmpxchg_double(pfx, p1, p2, o1, o2, n1, n2) \
211({ \
212 bool __ret; \
213 __typeof__(*(p1)) __old1 = (o1), __new1 = (n1); \
214 __typeof__(*(p2)) __old2 = (o2), __new2 = (n2); \
215 BUILD_BUG_ON(sizeof(*(p1)) != sizeof(long)); \
216 BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long)); \
217 VM_BUG_ON((unsigned long)(p1) % (2 * sizeof(long))); \
218 VM_BUG_ON((unsigned long)((p1) + 1) != (unsigned long)(p2)); \
219 asm volatile(pfx "cmpxchg%c4b %2; sete %0" \
220 : "=a" (__ret), "+d" (__old2), \
221 "+m" (*(p1)), "+m" (*(p2)) \
222 : "i" (2 * sizeof(long)), "a" (__old1), \
223 "b" (__new1), "c" (__new2)); \
224 __ret; \
225})
226
227#define cmpxchg_double(p1, p2, o1, o2, n1, n2) \
228 __cmpxchg_double(LOCK_PREFIX, p1, p2, o1, o2, n1, n2)
229
230#define cmpxchg_double_local(p1, p2, o1, o2, n1, n2) \
231 __cmpxchg_double(, p1, p2, o1, o2, n1, n2)
209 232
210#endif /* ASM_X86_CMPXCHG_H */ 233#endif /* ASM_X86_CMPXCHG_H */
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h
index fbebb07dd80b..53f4b219336b 100644
--- a/arch/x86/include/asm/cmpxchg_32.h
+++ b/arch/x86/include/asm/cmpxchg_32.h
@@ -166,52 +166,6 @@ static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old,
166 166
167#endif 167#endif
168 168
169#define cmpxchg8b(ptr, o1, o2, n1, n2) \
170({ \
171 char __ret; \
172 __typeof__(o2) __dummy; \
173 __typeof__(*(ptr)) __old1 = (o1); \
174 __typeof__(o2) __old2 = (o2); \
175 __typeof__(*(ptr)) __new1 = (n1); \
176 __typeof__(o2) __new2 = (n2); \
177 asm volatile(LOCK_PREFIX "cmpxchg8b %2; setz %1" \
178 : "=d"(__dummy), "=a" (__ret), "+m" (*ptr)\
179 : "a" (__old1), "d"(__old2), \
180 "b" (__new1), "c" (__new2) \
181 : "memory"); \
182 __ret; })
183
184
185#define cmpxchg8b_local(ptr, o1, o2, n1, n2) \
186({ \
187 char __ret; \
188 __typeof__(o2) __dummy; \
189 __typeof__(*(ptr)) __old1 = (o1); \
190 __typeof__(o2) __old2 = (o2); \
191 __typeof__(*(ptr)) __new1 = (n1); \
192 __typeof__(o2) __new2 = (n2); \
193 asm volatile("cmpxchg8b %2; setz %1" \
194 : "=d"(__dummy), "=a"(__ret), "+m" (*ptr)\
195 : "a" (__old), "d"(__old2), \
196 "b" (__new1), "c" (__new2), \
197 : "memory"); \
198 __ret; })
199
200
201#define cmpxchg_double(ptr, o1, o2, n1, n2) \
202({ \
203 BUILD_BUG_ON(sizeof(*(ptr)) != 4); \
204 VM_BUG_ON((unsigned long)(ptr) % 8); \
205 cmpxchg8b((ptr), (o1), (o2), (n1), (n2)); \
206})
207
208#define cmpxchg_double_local(ptr, o1, o2, n1, n2) \
209({ \
210 BUILD_BUG_ON(sizeof(*(ptr)) != 4); \
211 VM_BUG_ON((unsigned long)(ptr) % 8); \
212 cmpxchg16b_local((ptr), (o1), (o2), (n1), (n2)); \
213})
214
215#define system_has_cmpxchg_double() cpu_has_cx8 169#define system_has_cmpxchg_double() cpu_has_cx8
216 170
217#endif /* _ASM_X86_CMPXCHG_32_H */ 171#endif /* _ASM_X86_CMPXCHG_32_H */
diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h
index 285da02c38fa..614be87f1a9b 100644
--- a/arch/x86/include/asm/cmpxchg_64.h
+++ b/arch/x86/include/asm/cmpxchg_64.h
@@ -20,49 +20,6 @@ static inline void set_64bit(volatile u64 *ptr, u64 val)
20 cmpxchg_local((ptr), (o), (n)); \ 20 cmpxchg_local((ptr), (o), (n)); \
21}) 21})
22 22
23#define cmpxchg16b(ptr, o1, o2, n1, n2) \
24({ \
25 char __ret; \
26 __typeof__(o2) __junk; \
27 __typeof__(*(ptr)) __old1 = (o1); \
28 __typeof__(o2) __old2 = (o2); \
29 __typeof__(*(ptr)) __new1 = (n1); \
30 __typeof__(o2) __new2 = (n2); \
31 asm volatile(LOCK_PREFIX "cmpxchg16b %2;setz %1" \
32 : "=d"(__junk), "=a"(__ret), "+m" (*ptr) \
33 : "b"(__new1), "c"(__new2), \
34 "a"(__old1), "d"(__old2)); \
35 __ret; })
36
37
38#define cmpxchg16b_local(ptr, o1, o2, n1, n2) \
39({ \
40 char __ret; \
41 __typeof__(o2) __junk; \
42 __typeof__(*(ptr)) __old1 = (o1); \
43 __typeof__(o2) __old2 = (o2); \
44 __typeof__(*(ptr)) __new1 = (n1); \
45 __typeof__(o2) __new2 = (n2); \
46 asm volatile("cmpxchg16b %2;setz %1" \
47 : "=d"(__junk), "=a"(__ret), "+m" (*ptr) \
48 : "b"(__new1), "c"(__new2), \
49 "a"(__old1), "d"(__old2)); \
50 __ret; })
51
52#define cmpxchg_double(ptr, o1, o2, n1, n2) \
53({ \
54 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
55 VM_BUG_ON((unsigned long)(ptr) % 16); \
56 cmpxchg16b((ptr), (o1), (o2), (n1), (n2)); \
57})
58
59#define cmpxchg_double_local(ptr, o1, o2, n1, n2) \
60({ \
61 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
62 VM_BUG_ON((unsigned long)(ptr) % 16); \
63 cmpxchg16b_local((ptr), (o1), (o2), (n1), (n2)); \
64})
65
66#define system_has_cmpxchg_double() cpu_has_cx16 23#define system_has_cmpxchg_double() cpu_has_cx16
67 24
68#endif /* _ASM_X86_CMPXCHG_64_H */ 25#endif /* _ASM_X86_CMPXCHG_64_H */
diff --git a/arch/x86/include/asm/div64.h b/arch/x86/include/asm/div64.h
index 9a2d644c08ef..ced283ac79df 100644
--- a/arch/x86/include/asm/div64.h
+++ b/arch/x86/include/asm/div64.h
@@ -4,6 +4,7 @@
4#ifdef CONFIG_X86_32 4#ifdef CONFIG_X86_32
5 5
6#include <linux/types.h> 6#include <linux/types.h>
7#include <linux/log2.h>
7 8
8/* 9/*
9 * do_div() is NOT a C function. It wants to return 10 * do_div() is NOT a C function. It wants to return
@@ -21,15 +22,20 @@
21({ \ 22({ \
22 unsigned long __upper, __low, __high, __mod, __base; \ 23 unsigned long __upper, __low, __high, __mod, __base; \
23 __base = (base); \ 24 __base = (base); \
24 asm("":"=a" (__low), "=d" (__high) : "A" (n)); \ 25 if (__builtin_constant_p(__base) && is_power_of_2(__base)) { \
25 __upper = __high; \ 26 __mod = n & (__base - 1); \
26 if (__high) { \ 27 n >>= ilog2(__base); \
27 __upper = __high % (__base); \ 28 } else { \
28 __high = __high / (__base); \ 29 asm("" : "=a" (__low), "=d" (__high) : "A" (n));\
30 __upper = __high; \
31 if (__high) { \
32 __upper = __high % (__base); \
33 __high = __high / (__base); \
34 } \
35 asm("divl %2" : "=a" (__low), "=d" (__mod) \
36 : "rm" (__base), "0" (__low), "1" (__upper)); \
37 asm("" : "=A" (n) : "a" (__low), "d" (__high)); \
29 } \ 38 } \
30 asm("divl %2":"=a" (__low), "=d" (__mod) \
31 : "rm" (__base), "0" (__low), "1" (__upper)); \
32 asm("":"=A" (n) : "a" (__low), "d" (__high)); \
33 __mod; \ 39 __mod; \
34}) 40})
35 41
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index c9547033e38e..37782566af24 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -53,13 +53,6 @@
53 */ 53 */
54#define E820_RESERVED_KERN 128 54#define E820_RESERVED_KERN 128
55 55
56/*
57 * Address ranges that need to be mapped by the kernel direct
58 * mapping. This is used to make sure regions such as
59 * EFI_RUNTIME_SERVICES_DATA are directly mapped. See setup_arch().
60 */
61#define E820_RESERVED_EFI 129
62
63#ifndef __ASSEMBLY__ 56#ifndef __ASSEMBLY__
64#include <linux/types.h> 57#include <linux/types.h>
65struct e820entry { 58struct e820entry {
@@ -122,10 +115,9 @@ static inline void early_memtest(unsigned long start, unsigned long end)
122} 115}
123#endif 116#endif
124 117
125extern unsigned long e820_end_pfn(unsigned long limit_pfn, unsigned type);
126extern unsigned long e820_end_of_ram_pfn(void); 118extern unsigned long e820_end_of_ram_pfn(void);
127extern unsigned long e820_end_of_low_ram_pfn(void); 119extern unsigned long e820_end_of_low_ram_pfn(void);
128extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align); 120extern u64 early_reserve_e820(u64 sizet, u64 align);
129 121
130void memblock_x86_fill(void); 122void memblock_x86_fill(void);
131void memblock_find_dma_reserve(void); 123void memblock_find_dma_reserve(void);
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index b8d8bfcd44a9..7093e4a6a0bc 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -33,6 +33,8 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...);
33#define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \ 33#define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \
34 efi_call_virt(f, a1, a2, a3, a4, a5, a6) 34 efi_call_virt(f, a1, a2, a3, a4, a5, a6)
35 35
36#define efi_ioremap(addr, size, type) ioremap_cache(addr, size)
37
36#else /* !CONFIG_X86_32 */ 38#else /* !CONFIG_X86_32 */
37 39
38extern u64 efi_call0(void *fp); 40extern u64 efi_call0(void *fp);
@@ -82,6 +84,9 @@ extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3,
82 efi_call6((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ 84 efi_call6((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
83 (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6)) 85 (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6))
84 86
87extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
88 u32 type);
89
85#endif /* CONFIG_X86_32 */ 90#endif /* CONFIG_X86_32 */
86 91
87extern int add_efi_memmap; 92extern int add_efi_memmap;
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 55e4de613f0e..da0b3ca815b7 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -11,6 +11,7 @@ typedef struct {
11#ifdef CONFIG_X86_LOCAL_APIC 11#ifdef CONFIG_X86_LOCAL_APIC
12 unsigned int apic_timer_irqs; /* arch dependent */ 12 unsigned int apic_timer_irqs; /* arch dependent */
13 unsigned int irq_spurious_count; 13 unsigned int irq_spurious_count;
14 unsigned int icr_read_retry_count;
14#endif 15#endif
15 unsigned int x86_platform_ipis; /* arch dependent */ 16 unsigned int x86_platform_ipis; /* arch dependent */
16 unsigned int apic_perf_irqs; 17 unsigned int apic_perf_irqs;
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index c9e09ea05644..6919e936345b 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -218,7 +218,7 @@ static inline void fpu_fxsave(struct fpu *fpu)
218#ifdef CONFIG_SMP 218#ifdef CONFIG_SMP
219#define safe_address (__per_cpu_offset[0]) 219#define safe_address (__per_cpu_offset[0])
220#else 220#else
221#define safe_address (kstat_cpu(0).cpustat.user) 221#define safe_address (__get_cpu_var(kernel_cpustat).cpustat[CPUTIME_USER])
222#endif 222#endif
223 223
224/* 224/*
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index 88c765e16410..74df3f1eddfd 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -137,6 +137,13 @@ static inline int insn_is_avx(struct insn *insn)
137 return (insn->vex_prefix.value != 0); 137 return (insn->vex_prefix.value != 0);
138} 138}
139 139
140/* Ensure this instruction is decoded completely */
141static inline int insn_complete(struct insn *insn)
142{
143 return insn->opcode.got && insn->modrm.got && insn->sib.got &&
144 insn->displacement.got && insn->immediate.got;
145}
146
140static inline insn_byte_t insn_vex_m_bits(struct insn *insn) 147static inline insn_byte_t insn_vex_m_bits(struct insn *insn)
141{ 148{
142 if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ 149 if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */
diff --git a/arch/x86/include/asm/mach_timer.h b/arch/x86/include/asm/mach_timer.h
index 853728519ae9..88d0c3c74c13 100644
--- a/arch/x86/include/asm/mach_timer.h
+++ b/arch/x86/include/asm/mach_timer.h
@@ -15,7 +15,7 @@
15 15
16#define CALIBRATE_TIME_MSEC 30 /* 30 msecs */ 16#define CALIBRATE_TIME_MSEC 30 /* 30 msecs */
17#define CALIBRATE_LATCH \ 17#define CALIBRATE_LATCH \
18 ((CLOCK_TICK_RATE * CALIBRATE_TIME_MSEC + 1000/2)/1000) 18 ((PIT_TICK_RATE * CALIBRATE_TIME_MSEC + 1000/2)/1000)
19 19
20static inline void mach_prepare_counter(void) 20static inline void mach_prepare_counter(void)
21{ 21{
diff --git a/arch/x86/include/asm/mc146818rtc.h b/arch/x86/include/asm/mc146818rtc.h
index 01fdf5674e24..0e8e85bb7c51 100644
--- a/arch/x86/include/asm/mc146818rtc.h
+++ b/arch/x86/include/asm/mc146818rtc.h
@@ -81,8 +81,8 @@ static inline unsigned char current_lock_cmos_reg(void)
81#else 81#else
82#define lock_cmos_prefix(reg) do {} while (0) 82#define lock_cmos_prefix(reg) do {} while (0)
83#define lock_cmos_suffix(reg) do {} while (0) 83#define lock_cmos_suffix(reg) do {} while (0)
84#define lock_cmos(reg) 84#define lock_cmos(reg) do { } while (0)
85#define unlock_cmos() 85#define unlock_cmos() do { } while (0)
86#define do_i_have_lock_cmos() 0 86#define do_i_have_lock_cmos() 0
87#define current_lock_cmos_reg() 0 87#define current_lock_cmos_reg() 0
88#endif 88#endif
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 0e8ae57d3656..f35ce43c1a77 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -50,10 +50,11 @@
50#define MCJ_CTX_MASK 3 50#define MCJ_CTX_MASK 3
51#define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK) 51#define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK)
52#define MCJ_CTX_RANDOM 0 /* inject context: random */ 52#define MCJ_CTX_RANDOM 0 /* inject context: random */
53#define MCJ_CTX_PROCESS 1 /* inject context: process */ 53#define MCJ_CTX_PROCESS 0x1 /* inject context: process */
54#define MCJ_CTX_IRQ 2 /* inject context: IRQ */ 54#define MCJ_CTX_IRQ 0x2 /* inject context: IRQ */
55#define MCJ_NMI_BROADCAST 4 /* do NMI broadcasting */ 55#define MCJ_NMI_BROADCAST 0x4 /* do NMI broadcasting */
56#define MCJ_EXCEPTION 8 /* raise as exception */ 56#define MCJ_EXCEPTION 0x8 /* raise as exception */
57#define MCJ_IRQ_BRAODCAST 0x10 /* do IRQ broadcasting */
57 58
58/* Fields are zero when not available */ 59/* Fields are zero when not available */
59struct mce { 60struct mce {
@@ -120,7 +121,8 @@ struct mce_log {
120 121
121#ifdef __KERNEL__ 122#ifdef __KERNEL__
122 123
123extern struct atomic_notifier_head x86_mce_decoder_chain; 124extern void mce_register_decode_chain(struct notifier_block *nb);
125extern void mce_unregister_decode_chain(struct notifier_block *nb);
124 126
125#include <linux/percpu.h> 127#include <linux/percpu.h>
126#include <linux/init.h> 128#include <linux/init.h>
@@ -149,7 +151,7 @@ static inline void enable_p5_mce(void) {}
149 151
150void mce_setup(struct mce *m); 152void mce_setup(struct mce *m);
151void mce_log(struct mce *m); 153void mce_log(struct mce *m);
152DECLARE_PER_CPU(struct sys_device, mce_sysdev); 154DECLARE_PER_CPU(struct device, mce_device);
153 155
154/* 156/*
155 * Maximum banks number. 157 * Maximum banks number.
diff --git a/arch/x86/include/asm/memblock.h b/arch/x86/include/asm/memblock.h
deleted file mode 100644
index 0cd3800f33b9..000000000000
--- a/arch/x86/include/asm/memblock.h
+++ /dev/null
@@ -1,23 +0,0 @@
1#ifndef _X86_MEMBLOCK_H
2#define _X86_MEMBLOCK_H
3
4#define ARCH_DISCARD_MEMBLOCK
5
6u64 memblock_x86_find_in_range_size(u64 start, u64 *sizep, u64 align);
7
8void memblock_x86_reserve_range(u64 start, u64 end, char *name);
9void memblock_x86_free_range(u64 start, u64 end);
10struct range;
11int __get_free_all_memory_range(struct range **range, int nodeid,
12 unsigned long start_pfn, unsigned long end_pfn);
13int get_free_all_memory_range(struct range **rangep, int nodeid);
14
15void memblock_x86_register_active_regions(int nid, unsigned long start_pfn,
16 unsigned long last_pfn);
17u64 memblock_x86_hole_size(u64 start, u64 end);
18u64 memblock_x86_find_in_range_node(int nid, u64 start, u64 end, u64 size, u64 align);
19u64 memblock_x86_free_memory_in_range(u64 addr, u64 limit);
20u64 memblock_x86_memory_in_range(u64 addr, u64 limit);
21bool memblock_x86_check_reserved_size(u64 *addrp, u64 *sizep, u64 align);
22
23#endif
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 24215072d0e1..4ebe157bf73d 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -48,6 +48,7 @@ static inline struct microcode_ops * __init init_intel_microcode(void)
48 48
49#ifdef CONFIG_MICROCODE_AMD 49#ifdef CONFIG_MICROCODE_AMD
50extern struct microcode_ops * __init init_amd_microcode(void); 50extern struct microcode_ops * __init init_amd_microcode(void);
51extern void __exit exit_amd_microcode(void);
51 52
52static inline void get_ucode_data(void *to, const u8 *from, size_t n) 53static inline void get_ucode_data(void *to, const u8 *from, size_t n)
53{ 54{
@@ -59,6 +60,7 @@ static inline struct microcode_ops * __init init_amd_microcode(void)
59{ 60{
60 return NULL; 61 return NULL;
61} 62}
63static inline void __exit exit_amd_microcode(void) {}
62#endif 64#endif
63 65
64#endif /* _ASM_X86_MICROCODE_H */ 66#endif /* _ASM_X86_MICROCODE_H */
diff --git a/arch/x86/include/asm/mrst.h b/arch/x86/include/asm/mrst.h
index 93f79094c224..0a0a95460434 100644
--- a/arch/x86/include/asm/mrst.h
+++ b/arch/x86/include/asm/mrst.h
@@ -67,7 +67,7 @@ extern struct console early_mrst_console;
67extern void mrst_early_console_init(void); 67extern void mrst_early_console_init(void);
68 68
69extern struct console early_hsu_console; 69extern struct console early_hsu_console;
70extern void hsu_early_console_init(void); 70extern void hsu_early_console_init(const char *);
71 71
72extern void intel_scu_devices_create(void); 72extern void intel_scu_devices_create(void);
73extern void intel_scu_devices_destroy(void); 73extern void intel_scu_devices_destroy(void);
diff --git a/arch/x86/include/asm/numachip/numachip_csr.h b/arch/x86/include/asm/numachip/numachip_csr.h
new file mode 100644
index 000000000000..660f843df928
--- /dev/null
+++ b/arch/x86/include/asm/numachip/numachip_csr.h
@@ -0,0 +1,167 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Numascale NumaConnect-Specific Header file
7 *
8 * Copyright (C) 2011 Numascale AS. All rights reserved.
9 *
10 * Send feedback to <support@numascale.com>
11 *
12 */
13
14#ifndef _ASM_X86_NUMACHIP_NUMACHIP_CSR_H
15#define _ASM_X86_NUMACHIP_NUMACHIP_CSR_H
16
17#include <linux/numa.h>
18#include <linux/percpu.h>
19#include <linux/io.h>
20#include <linux/swab.h>
21#include <asm/types.h>
22#include <asm/processor.h>
23
24#define CSR_NODE_SHIFT 16
25#define CSR_NODE_BITS(p) (((unsigned long)(p)) << CSR_NODE_SHIFT)
26#define CSR_NODE_MASK 0x0fff /* 4K nodes */
27
28/* 32K CSR space, b15 indicates geo/non-geo */
29#define CSR_OFFSET_MASK 0x7fffUL
30
31/* Global CSR space covers all 4K possible nodes with 64K CSR space per node */
32#define NUMACHIP_GCSR_BASE 0x3fff00000000ULL
33#define NUMACHIP_GCSR_LIM 0x3fff0fffffffULL
34#define NUMACHIP_GCSR_SIZE (NUMACHIP_GCSR_LIM - NUMACHIP_GCSR_BASE + 1)
35
36/*
37 * Local CSR space starts in global CSR space with "nodeid" = 0xfff0, however
38 * when using the direct mapping on x86_64, both start and size needs to be
39 * aligned with PMD_SIZE which is 2M
40 */
41#define NUMACHIP_LCSR_BASE 0x3ffffe000000ULL
42#define NUMACHIP_LCSR_LIM 0x3fffffffffffULL
43#define NUMACHIP_LCSR_SIZE (NUMACHIP_LCSR_LIM - NUMACHIP_LCSR_BASE + 1)
44
45static inline void *gcsr_address(int node, unsigned long offset)
46{
47 return __va(NUMACHIP_GCSR_BASE | (1UL << 15) |
48 CSR_NODE_BITS(node & CSR_NODE_MASK) | (offset & CSR_OFFSET_MASK));
49}
50
51static inline void *lcsr_address(unsigned long offset)
52{
53 return __va(NUMACHIP_LCSR_BASE | (1UL << 15) |
54 CSR_NODE_BITS(0xfff0) | (offset & CSR_OFFSET_MASK));
55}
56
57static inline unsigned int read_gcsr(int node, unsigned long offset)
58{
59 return swab32(readl(gcsr_address(node, offset)));
60}
61
62static inline void write_gcsr(int node, unsigned long offset, unsigned int val)
63{
64 writel(swab32(val), gcsr_address(node, offset));
65}
66
67static inline unsigned int read_lcsr(unsigned long offset)
68{
69 return swab32(readl(lcsr_address(offset)));
70}
71
72static inline void write_lcsr(unsigned long offset, unsigned int val)
73{
74 writel(swab32(val), lcsr_address(offset));
75}
76
77/* ========================================================================= */
78/* CSR_G0_STATE_CLEAR */
79/* ========================================================================= */
80
81#define CSR_G0_STATE_CLEAR (0x000 + (0 << 12))
82union numachip_csr_g0_state_clear {
83 unsigned int v;
84 struct numachip_csr_g0_state_clear_s {
85 unsigned int _state:2;
86 unsigned int _rsvd_2_6:5;
87 unsigned int _lost:1;
88 unsigned int _rsvd_8_31:24;
89 } s;
90};
91
92/* ========================================================================= */
93/* CSR_G0_NODE_IDS */
94/* ========================================================================= */
95
96#define CSR_G0_NODE_IDS (0x008 + (0 << 12))
97union numachip_csr_g0_node_ids {
98 unsigned int v;
99 struct numachip_csr_g0_node_ids_s {
100 unsigned int _initialid:16;
101 unsigned int _nodeid:12;
102 unsigned int _rsvd_28_31:4;
103 } s;
104};
105
106/* ========================================================================= */
107/* CSR_G3_EXT_IRQ_GEN */
108/* ========================================================================= */
109
110#define CSR_G3_EXT_IRQ_GEN (0x030 + (3 << 12))
111union numachip_csr_g3_ext_irq_gen {
112 unsigned int v;
113 struct numachip_csr_g3_ext_irq_gen_s {
114 unsigned int _vector:8;
115 unsigned int _msgtype:3;
116 unsigned int _index:5;
117 unsigned int _destination_apic_id:16;
118 } s;
119};
120
121/* ========================================================================= */
122/* CSR_G3_EXT_IRQ_STATUS */
123/* ========================================================================= */
124
125#define CSR_G3_EXT_IRQ_STATUS (0x034 + (3 << 12))
126union numachip_csr_g3_ext_irq_status {
127 unsigned int v;
128 struct numachip_csr_g3_ext_irq_status_s {
129 unsigned int _result:32;
130 } s;
131};
132
133/* ========================================================================= */
134/* CSR_G3_EXT_IRQ_DEST */
135/* ========================================================================= */
136
137#define CSR_G3_EXT_IRQ_DEST (0x038 + (3 << 12))
138union numachip_csr_g3_ext_irq_dest {
139 unsigned int v;
140 struct numachip_csr_g3_ext_irq_dest_s {
141 unsigned int _irq:8;
142 unsigned int _rsvd_8_31:24;
143 } s;
144};
145
146/* ========================================================================= */
147/* CSR_G3_NC_ATT_MAP_SELECT */
148/* ========================================================================= */
149
150#define CSR_G3_NC_ATT_MAP_SELECT (0x7fc + (3 << 12))
151union numachip_csr_g3_nc_att_map_select {
152 unsigned int v;
153 struct numachip_csr_g3_nc_att_map_select_s {
154 unsigned int _upper_address_bits:4;
155 unsigned int _select_ram:4;
156 unsigned int _rsvd_8_31:24;
157 } s;
158};
159
160/* ========================================================================= */
161/* CSR_G3_NC_ATT_MAP_SELECT_0-255 */
162/* ========================================================================= */
163
164#define CSR_G3_NC_ATT_MAP_SELECT_0 (0x800 + (3 << 12))
165
166#endif /* _ASM_X86_NUMACHIP_NUMACHIP_CSR_H */
167
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 3470c9d0ebba..529bf07e8067 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -451,23 +451,20 @@ do { \
451#endif /* !CONFIG_M386 */ 451#endif /* !CONFIG_M386 */
452 452
453#ifdef CONFIG_X86_CMPXCHG64 453#ifdef CONFIG_X86_CMPXCHG64
454#define percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) \ 454#define percpu_cmpxchg8b_double(pcp1, pcp2, o1, o2, n1, n2) \
455({ \ 455({ \
456 char __ret; \ 456 bool __ret; \
457 typeof(o1) __o1 = o1; \ 457 typeof(pcp1) __o1 = (o1), __n1 = (n1); \
458 typeof(o1) __n1 = n1; \ 458 typeof(pcp2) __o2 = (o2), __n2 = (n2); \
459 typeof(o2) __o2 = o2; \
460 typeof(o2) __n2 = n2; \
461 typeof(o2) __dummy = n2; \
462 asm volatile("cmpxchg8b "__percpu_arg(1)"\n\tsetz %0\n\t" \ 459 asm volatile("cmpxchg8b "__percpu_arg(1)"\n\tsetz %0\n\t" \
463 : "=a"(__ret), "=m" (pcp1), "=d"(__dummy) \ 460 : "=a" (__ret), "+m" (pcp1), "+m" (pcp2), "+d" (__o2) \
464 : "b"(__n1), "c"(__n2), "a"(__o1), "d"(__o2)); \ 461 : "b" (__n1), "c" (__n2), "a" (__o1)); \
465 __ret; \ 462 __ret; \
466}) 463})
467 464
468#define __this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) 465#define __this_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double
469#define this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) 466#define this_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double
470#define irqsafe_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) 467#define irqsafe_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double
471#endif /* CONFIG_X86_CMPXCHG64 */ 468#endif /* CONFIG_X86_CMPXCHG64 */
472 469
473/* 470/*
@@ -508,31 +505,23 @@ do { \
508 * it in software. The address used in the cmpxchg16 instruction must be 505 * it in software. The address used in the cmpxchg16 instruction must be
509 * aligned to a 16 byte boundary. 506 * aligned to a 16 byte boundary.
510 */ 507 */
511#ifdef CONFIG_SMP 508#define percpu_cmpxchg16b_double(pcp1, pcp2, o1, o2, n1, n2) \
512#define CMPXCHG16B_EMU_CALL "call this_cpu_cmpxchg16b_emu\n\t" ASM_NOP3
513#else
514#define CMPXCHG16B_EMU_CALL "call this_cpu_cmpxchg16b_emu\n\t" ASM_NOP2
515#endif
516#define percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) \
517({ \ 509({ \
518 char __ret; \ 510 bool __ret; \
519 typeof(o1) __o1 = o1; \ 511 typeof(pcp1) __o1 = (o1), __n1 = (n1); \
520 typeof(o1) __n1 = n1; \ 512 typeof(pcp2) __o2 = (o2), __n2 = (n2); \
521 typeof(o2) __o2 = o2; \ 513 alternative_io("leaq %P1,%%rsi\n\tcall this_cpu_cmpxchg16b_emu\n\t", \
522 typeof(o2) __n2 = n2; \ 514 "cmpxchg16b " __percpu_arg(1) "\n\tsetz %0\n\t", \
523 typeof(o2) __dummy; \
524 alternative_io(CMPXCHG16B_EMU_CALL, \
525 "cmpxchg16b " __percpu_prefix "(%%rsi)\n\tsetz %0\n\t", \
526 X86_FEATURE_CX16, \ 515 X86_FEATURE_CX16, \
527 ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)), \ 516 ASM_OUTPUT2("=a" (__ret), "+m" (pcp1), \
528 "S" (&pcp1), "b"(__n1), "c"(__n2), \ 517 "+m" (pcp2), "+d" (__o2)), \
529 "a"(__o1), "d"(__o2) : "memory"); \ 518 "b" (__n1), "c" (__n2), "a" (__o1) : "rsi"); \
530 __ret; \ 519 __ret; \
531}) 520})
532 521
533#define __this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) 522#define __this_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double
534#define this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) 523#define this_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double
535#define irqsafe_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) 524#define irqsafe_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double
536 525
537#endif 526#endif
538 527
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index f61c62f7d5d8..096c975e099f 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -57,6 +57,7 @@
57 (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) 57 (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX))
58 58
59#define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6 59#define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6
60#define ARCH_PERFMON_EVENTS_COUNT 7
60 61
61/* 62/*
62 * Intel "Architectural Performance Monitoring" CPUID 63 * Intel "Architectural Performance Monitoring" CPUID
@@ -72,6 +73,19 @@ union cpuid10_eax {
72 unsigned int full; 73 unsigned int full;
73}; 74};
74 75
76union cpuid10_ebx {
77 struct {
78 unsigned int no_unhalted_core_cycles:1;
79 unsigned int no_instructions_retired:1;
80 unsigned int no_unhalted_reference_cycles:1;
81 unsigned int no_llc_reference:1;
82 unsigned int no_llc_misses:1;
83 unsigned int no_branch_instruction_retired:1;
84 unsigned int no_branch_misses_retired:1;
85 } split;
86 unsigned int full;
87};
88
75union cpuid10_edx { 89union cpuid10_edx {
76 struct { 90 struct {
77 unsigned int num_counters_fixed:5; 91 unsigned int num_counters_fixed:5;
@@ -81,6 +95,15 @@ union cpuid10_edx {
81 unsigned int full; 95 unsigned int full;
82}; 96};
83 97
98struct x86_pmu_capability {
99 int version;
100 int num_counters_gp;
101 int num_counters_fixed;
102 int bit_width_gp;
103 int bit_width_fixed;
104 unsigned int events_mask;
105 int events_mask_len;
106};
84 107
85/* 108/*
86 * Fixed-purpose performance events: 109 * Fixed-purpose performance events:
@@ -89,23 +112,24 @@ union cpuid10_edx {
89/* 112/*
90 * All 3 fixed-mode PMCs are configured via this single MSR: 113 * All 3 fixed-mode PMCs are configured via this single MSR:
91 */ 114 */
92#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d 115#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d
93 116
94/* 117/*
95 * The counts are available in three separate MSRs: 118 * The counts are available in three separate MSRs:
96 */ 119 */
97 120
98/* Instr_Retired.Any: */ 121/* Instr_Retired.Any: */
99#define MSR_ARCH_PERFMON_FIXED_CTR0 0x309 122#define MSR_ARCH_PERFMON_FIXED_CTR0 0x309
100#define X86_PMC_IDX_FIXED_INSTRUCTIONS (X86_PMC_IDX_FIXED + 0) 123#define X86_PMC_IDX_FIXED_INSTRUCTIONS (X86_PMC_IDX_FIXED + 0)
101 124
102/* CPU_CLK_Unhalted.Core: */ 125/* CPU_CLK_Unhalted.Core: */
103#define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a 126#define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a
104#define X86_PMC_IDX_FIXED_CPU_CYCLES (X86_PMC_IDX_FIXED + 1) 127#define X86_PMC_IDX_FIXED_CPU_CYCLES (X86_PMC_IDX_FIXED + 1)
105 128
106/* CPU_CLK_Unhalted.Ref: */ 129/* CPU_CLK_Unhalted.Ref: */
107#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b 130#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b
108#define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) 131#define X86_PMC_IDX_FIXED_REF_CYCLES (X86_PMC_IDX_FIXED + 2)
132#define X86_PMC_MSK_FIXED_REF_CYCLES (1ULL << X86_PMC_IDX_FIXED_REF_CYCLES)
109 133
110/* 134/*
111 * We model BTS tracing as another fixed-mode PMC. 135 * We model BTS tracing as another fixed-mode PMC.
@@ -202,6 +226,7 @@ struct perf_guest_switch_msr {
202}; 226};
203 227
204extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr); 228extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
229extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
205#else 230#else
206static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr) 231static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
207{ 232{
@@ -209,6 +234,11 @@ static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
209 return NULL; 234 return NULL;
210} 235}
211 236
237static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
238{
239 memset(cap, 0, sizeof(*cap));
240}
241
212static inline void perf_events_lapic_init(void) { } 242static inline void perf_events_lapic_init(void) { }
213#endif 243#endif
214 244
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 18601c86fab1..49afb3f41eb6 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -703,7 +703,7 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm,
703 pte_update(mm, addr, ptep); 703 pte_update(mm, addr, ptep);
704} 704}
705 705
706#define flush_tlb_fix_spurious_fault(vma, address) 706#define flush_tlb_fix_spurious_fault(vma, address) do { } while (0)
707 707
708#define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot)) 708#define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot))
709 709
diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h
index 2dddb317bb39..f8ab3eaad128 100644
--- a/arch/x86/include/asm/processor-flags.h
+++ b/arch/x86/include/asm/processor-flags.h
@@ -6,6 +6,7 @@
6 * EFLAGS bits 6 * EFLAGS bits
7 */ 7 */
8#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */ 8#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */
9#define X86_EFLAGS_BIT1 0x00000002 /* Bit 1 - always on */
9#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */ 10#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */
10#define X86_EFLAGS_AF 0x00000010 /* Auxiliary carry Flag */ 11#define X86_EFLAGS_AF 0x00000010 /* Auxiliary carry Flag */
11#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */ 12#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index b650435ffb53..aa9088c26931 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -99,7 +99,6 @@ struct cpuinfo_x86 {
99 u16 apicid; 99 u16 apicid;
100 u16 initial_apicid; 100 u16 initial_apicid;
101 u16 x86_clflush_size; 101 u16 x86_clflush_size;
102#ifdef CONFIG_SMP
103 /* number of cores as seen by the OS: */ 102 /* number of cores as seen by the OS: */
104 u16 booted_cores; 103 u16 booted_cores;
105 /* Physical processor id: */ 104 /* Physical processor id: */
@@ -110,7 +109,6 @@ struct cpuinfo_x86 {
110 u8 compute_unit_id; 109 u8 compute_unit_id;
111 /* Index into per_cpu list: */ 110 /* Index into per_cpu list: */
112 u16 cpu_index; 111 u16 cpu_index;
113#endif
114 u32 microcode; 112 u32 microcode;
115} __attribute__((__aligned__(SMP_CACHE_BYTES))); 113} __attribute__((__aligned__(SMP_CACHE_BYTES)));
116 114
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index 972c260919a3..a82c2bf504b6 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -79,23 +79,10 @@ static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
79 return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail; 79 return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail;
80} 80}
81 81
82#if (NR_CPUS < 256)
83static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) 82static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
84{ 83{
85 asm volatile(UNLOCK_LOCK_PREFIX "incb %0" 84 __add(&lock->tickets.head, 1, UNLOCK_LOCK_PREFIX);
86 : "+m" (lock->head_tail)
87 :
88 : "memory", "cc");
89} 85}
90#else
91static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
92{
93 asm volatile(UNLOCK_LOCK_PREFIX "incw %0"
94 : "+m" (lock->head_tail)
95 :
96 : "memory", "cc");
97}
98#endif
99 86
100static inline int __ticket_spin_is_locked(arch_spinlock_t *lock) 87static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
101{ 88{
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index a1fe5c127b52..74047159d0ab 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -40,7 +40,8 @@ struct thread_info {
40 */ 40 */
41 __u8 supervisor_stack[0]; 41 __u8 supervisor_stack[0];
42#endif 42#endif
43 int uaccess_err; 43 int sig_on_uaccess_error:1;
44 int uaccess_err:1; /* uaccess failed */
44}; 45};
45 46
46#define INIT_THREAD_INFO(tsk) \ 47#define INIT_THREAD_INFO(tsk) \
@@ -90,7 +91,6 @@ struct thread_info {
90#define TIF_MEMDIE 20 /* is terminating due to OOM killer */ 91#define TIF_MEMDIE 20 /* is terminating due to OOM killer */
91#define TIF_DEBUG 21 /* uses debug registers */ 92#define TIF_DEBUG 21 /* uses debug registers */
92#define TIF_IO_BITMAP 22 /* uses I/O bitmap */ 93#define TIF_IO_BITMAP 22 /* uses I/O bitmap */
93#define TIF_FREEZE 23 /* is freezing for suspend */
94#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ 94#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */
95#define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */ 95#define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */
96#define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */ 96#define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */
@@ -112,7 +112,6 @@ struct thread_info {
112#define _TIF_FORK (1 << TIF_FORK) 112#define _TIF_FORK (1 << TIF_FORK)
113#define _TIF_DEBUG (1 << TIF_DEBUG) 113#define _TIF_DEBUG (1 << TIF_DEBUG)
114#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) 114#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP)
115#define _TIF_FREEZE (1 << TIF_FREEZE)
116#define _TIF_FORCED_TF (1 << TIF_FORCED_TF) 115#define _TIF_FORCED_TF (1 << TIF_FORCED_TF)
117#define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP) 116#define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP)
118#define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES) 117#define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES)
@@ -231,6 +230,12 @@ static inline struct thread_info *current_thread_info(void)
231 movq PER_CPU_VAR(kernel_stack),reg ; \ 230 movq PER_CPU_VAR(kernel_stack),reg ; \
232 subq $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg 231 subq $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg
233 232
233/*
234 * Same if PER_CPU_VAR(kernel_stack) is, perhaps with some offset, already in
235 * a certain register (to be used in assembler memory operands).
236 */
237#define THREAD_INFO(reg, off) KERNEL_STACK_OFFSET+(off)-THREAD_SIZE(reg)
238
234#endif 239#endif
235 240
236#endif /* !X86_32 */ 241#endif /* !X86_32 */
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index c00692476e9f..800f77c60051 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -130,10 +130,8 @@ extern void setup_node_to_cpumask_map(void);
130 .balance_interval = 1, \ 130 .balance_interval = 1, \
131} 131}
132 132
133#ifdef CONFIG_X86_64
134extern int __node_distance(int, int); 133extern int __node_distance(int, int);
135#define node_distance(a, b) __node_distance(a, b) 134#define node_distance(a, b) __node_distance(a, b)
136#endif
137 135
138#else /* !CONFIG_NUMA */ 136#else /* !CONFIG_NUMA */
139 137
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 83e2efd181e2..15d99153a96d 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -51,6 +51,8 @@ extern int unsynchronized_tsc(void);
51extern int check_tsc_unstable(void); 51extern int check_tsc_unstable(void);
52extern unsigned long native_calibrate_tsc(void); 52extern unsigned long native_calibrate_tsc(void);
53 53
54extern int tsc_clocksource_reliable;
55
54/* 56/*
55 * Boot-time check whether the TSCs are synchronized across 57 * Boot-time check whether the TSCs are synchronized across
56 * all CPUs/cores: 58 * all CPUs/cores:
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 36361bf6fdd1..8be5f54d9360 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -462,7 +462,7 @@ struct __large_struct { unsigned long buf[100]; };
462 barrier(); 462 barrier();
463 463
464#define uaccess_catch(err) \ 464#define uaccess_catch(err) \
465 (err) |= current_thread_info()->uaccess_err; \ 465 (err) |= (current_thread_info()->uaccess_err ? -EFAULT : 0); \
466 current_thread_info()->uaccess_err = prev_err; \ 466 current_thread_info()->uaccess_err = prev_err; \
467} while (0) 467} while (0)
468 468
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 1971e652d24b..1ac860a09849 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -7,6 +7,7 @@
7struct mpc_bus; 7struct mpc_bus;
8struct mpc_cpu; 8struct mpc_cpu;
9struct mpc_table; 9struct mpc_table;
10struct cpuinfo_x86;
10 11
11/** 12/**
12 * struct x86_init_mpparse - platform specific mpparse ops 13 * struct x86_init_mpparse - platform specific mpparse ops
@@ -147,6 +148,7 @@ struct x86_init_ops {
147 */ 148 */
148struct x86_cpuinit_ops { 149struct x86_cpuinit_ops {
149 void (*setup_percpu_clockev)(void); 150 void (*setup_percpu_clockev)(void);
151 void (*fixup_cpu_id)(struct cpuinfo_x86 *c, int node);
150}; 152};
151 153
152/** 154/**
@@ -186,5 +188,6 @@ extern struct x86_msi_ops x86_msi;
186 188
187extern void x86_init_noop(void); 189extern void x86_init_noop(void);
188extern void x86_init_uint_noop(unsigned int unused); 190extern void x86_init_uint_noop(unsigned int unused);
191extern void x86_default_fixup_cpu_id(struct cpuinfo_x86 *c, int node);
189 192
190#endif 193#endif
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 4558f0d0822d..ce664f33ea8e 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -219,6 +219,8 @@ static int __init
219acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end) 219acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end)
220{ 220{
221 struct acpi_madt_local_x2apic *processor = NULL; 221 struct acpi_madt_local_x2apic *processor = NULL;
222 int apic_id;
223 u8 enabled;
222 224
223 processor = (struct acpi_madt_local_x2apic *)header; 225 processor = (struct acpi_madt_local_x2apic *)header;
224 226
@@ -227,6 +229,8 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end)
227 229
228 acpi_table_print_madt_entry(header); 230 acpi_table_print_madt_entry(header);
229 231
232 apic_id = processor->local_apic_id;
233 enabled = processor->lapic_flags & ACPI_MADT_ENABLED;
230#ifdef CONFIG_X86_X2APIC 234#ifdef CONFIG_X86_X2APIC
231 /* 235 /*
232 * We need to register disabled CPU as well to permit 236 * We need to register disabled CPU as well to permit
@@ -235,8 +239,10 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end)
235 * to not preallocating memory for all NR_CPUS 239 * to not preallocating memory for all NR_CPUS
236 * when we use CPU hotplug. 240 * when we use CPU hotplug.
237 */ 241 */
238 acpi_register_lapic(processor->local_apic_id, /* APIC ID */ 242 if (!cpu_has_x2apic && (apic_id >= 0xff) && enabled)
239 processor->lapic_flags & ACPI_MADT_ENABLED); 243 printk(KERN_WARNING PREFIX "x2apic entry ignored\n");
244 else
245 acpi_register_lapic(apic_id, enabled);
240#else 246#else
241 printk(KERN_WARNING PREFIX "x2apic entry ignored\n"); 247 printk(KERN_WARNING PREFIX "x2apic entry ignored\n");
242#endif 248#endif
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 4c39baa8facc..013c1810ce72 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -123,16 +123,14 @@ int amd_get_subcaches(int cpu)
123{ 123{
124 struct pci_dev *link = node_to_amd_nb(amd_get_nb_id(cpu))->link; 124 struct pci_dev *link = node_to_amd_nb(amd_get_nb_id(cpu))->link;
125 unsigned int mask; 125 unsigned int mask;
126 int cuid = 0; 126 int cuid;
127 127
128 if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) 128 if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
129 return 0; 129 return 0;
130 130
131 pci_read_config_dword(link, 0x1d4, &mask); 131 pci_read_config_dword(link, 0x1d4, &mask);
132 132
133#ifdef CONFIG_SMP
134 cuid = cpu_data(cpu).compute_unit_id; 133 cuid = cpu_data(cpu).compute_unit_id;
135#endif
136 return (mask >> (4 * cuid)) & 0xf; 134 return (mask >> (4 * cuid)) & 0xf;
137} 135}
138 136
@@ -141,7 +139,7 @@ int amd_set_subcaches(int cpu, int mask)
141 static unsigned int reset, ban; 139 static unsigned int reset, ban;
142 struct amd_northbridge *nb = node_to_amd_nb(amd_get_nb_id(cpu)); 140 struct amd_northbridge *nb = node_to_amd_nb(amd_get_nb_id(cpu));
143 unsigned int reg; 141 unsigned int reg;
144 int cuid = 0; 142 int cuid;
145 143
146 if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING) || mask > 0xf) 144 if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING) || mask > 0xf)
147 return -EINVAL; 145 return -EINVAL;
@@ -159,9 +157,7 @@ int amd_set_subcaches(int cpu, int mask)
159 pci_write_config_dword(nb->misc, 0x1b8, reg & ~0x180000); 157 pci_write_config_dword(nb->misc, 0x1b8, reg & ~0x180000);
160 } 158 }
161 159
162#ifdef CONFIG_SMP
163 cuid = cpu_data(cpu).compute_unit_id; 160 cuid = cpu_data(cpu).compute_unit_id;
164#endif
165 mask <<= 4 * cuid; 161 mask <<= 4 * cuid;
166 mask |= (0xf ^ (1 << cuid)) << 26; 162 mask |= (0xf ^ (1 << cuid)) << 26;
167 163
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 3d2661ca6542..6e76c191a835 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -88,13 +88,13 @@ static u32 __init allocate_aperture(void)
88 */ 88 */
89 addr = memblock_find_in_range(GART_MIN_ADDR, GART_MAX_ADDR, 89 addr = memblock_find_in_range(GART_MIN_ADDR, GART_MAX_ADDR,
90 aper_size, aper_size); 90 aper_size, aper_size);
91 if (addr == MEMBLOCK_ERROR || addr + aper_size > GART_MAX_ADDR) { 91 if (!addr || addr + aper_size > GART_MAX_ADDR) {
92 printk(KERN_ERR 92 printk(KERN_ERR
93 "Cannot allocate aperture memory hole (%lx,%uK)\n", 93 "Cannot allocate aperture memory hole (%lx,%uK)\n",
94 addr, aper_size>>10); 94 addr, aper_size>>10);
95 return 0; 95 return 0;
96 } 96 }
97 memblock_x86_reserve_range(addr, addr + aper_size, "aperture64"); 97 memblock_reserve(addr, aper_size);
98 /* 98 /*
99 * Kmemleak should not scan this block as it may not be mapped via the 99 * Kmemleak should not scan this block as it may not be mapped via the
100 * kernel direct mapping. 100 * kernel direct mapping.
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index 767fd04f2843..0ae0323b1f9c 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -10,6 +10,7 @@ obj-$(CONFIG_SMP) += ipi.o
10 10
11ifeq ($(CONFIG_X86_64),y) 11ifeq ($(CONFIG_X86_64),y)
12# APIC probe will depend on the listing order here 12# APIC probe will depend on the listing order here
13obj-$(CONFIG_X86_NUMACHIP) += apic_numachip.o
13obj-$(CONFIG_X86_UV) += x2apic_uv_x.o 14obj-$(CONFIG_X86_UV) += x2apic_uv_x.o
14obj-$(CONFIG_X86_X2APIC) += x2apic_phys.o 15obj-$(CONFIG_X86_X2APIC) += x2apic_phys.o
15obj-$(CONFIG_X86_X2APIC) += x2apic_cluster.o 16obj-$(CONFIG_X86_X2APIC) += x2apic_cluster.o
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index f98d84caf94c..2eec05b6d1b8 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -146,16 +146,26 @@ __setup("apicpmtimer", setup_apicpmtimer);
146int x2apic_mode; 146int x2apic_mode;
147#ifdef CONFIG_X86_X2APIC 147#ifdef CONFIG_X86_X2APIC
148/* x2apic enabled before OS handover */ 148/* x2apic enabled before OS handover */
149static int x2apic_preenabled; 149int x2apic_preenabled;
150static int x2apic_disabled;
151static int nox2apic;
150static __init int setup_nox2apic(char *str) 152static __init int setup_nox2apic(char *str)
151{ 153{
152 if (x2apic_enabled()) { 154 if (x2apic_enabled()) {
153 pr_warning("Bios already enabled x2apic, " 155 int apicid = native_apic_msr_read(APIC_ID);
154 "can't enforce nox2apic"); 156
155 return 0; 157 if (apicid >= 255) {
156 } 158 pr_warning("Apicid: %08x, cannot enforce nox2apic\n",
159 apicid);
160 return 0;
161 }
162
163 pr_warning("x2apic already enabled. will disable it\n");
164 } else
165 setup_clear_cpu_cap(X86_FEATURE_X2APIC);
166
167 nox2apic = 1;
157 168
158 setup_clear_cpu_cap(X86_FEATURE_X2APIC);
159 return 0; 169 return 0;
160} 170}
161early_param("nox2apic", setup_nox2apic); 171early_param("nox2apic", setup_nox2apic);
@@ -250,6 +260,7 @@ u32 native_safe_apic_wait_icr_idle(void)
250 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; 260 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
251 if (!send_status) 261 if (!send_status)
252 break; 262 break;
263 inc_irq_stat(icr_read_retry_count);
253 udelay(100); 264 udelay(100);
254 } while (timeout++ < 1000); 265 } while (timeout++ < 1000);
255 266
@@ -876,8 +887,8 @@ void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
876 * Besides, if we don't timer interrupts ignore the global 887 * Besides, if we don't timer interrupts ignore the global
877 * interrupt lock, which is the WrongThing (tm) to do. 888 * interrupt lock, which is the WrongThing (tm) to do.
878 */ 889 */
879 exit_idle();
880 irq_enter(); 890 irq_enter();
891 exit_idle();
881 local_apic_timer_interrupt(); 892 local_apic_timer_interrupt();
882 irq_exit(); 893 irq_exit();
883 894
@@ -1431,6 +1442,45 @@ void __init bsp_end_local_APIC_setup(void)
1431} 1442}
1432 1443
1433#ifdef CONFIG_X86_X2APIC 1444#ifdef CONFIG_X86_X2APIC
1445/*
1446 * Need to disable xapic and x2apic at the same time and then enable xapic mode
1447 */
1448static inline void __disable_x2apic(u64 msr)
1449{
1450 wrmsrl(MSR_IA32_APICBASE,
1451 msr & ~(X2APIC_ENABLE | XAPIC_ENABLE));
1452 wrmsrl(MSR_IA32_APICBASE, msr & ~X2APIC_ENABLE);
1453}
1454
1455static __init void disable_x2apic(void)
1456{
1457 u64 msr;
1458
1459 if (!cpu_has_x2apic)
1460 return;
1461
1462 rdmsrl(MSR_IA32_APICBASE, msr);
1463 if (msr & X2APIC_ENABLE) {
1464 u32 x2apic_id = read_apic_id();
1465
1466 if (x2apic_id >= 255)
1467 panic("Cannot disable x2apic, id: %08x\n", x2apic_id);
1468
1469 pr_info("Disabling x2apic\n");
1470 __disable_x2apic(msr);
1471
1472 if (nox2apic) {
1473 clear_cpu_cap(&cpu_data(0), X86_FEATURE_X2APIC);
1474 setup_clear_cpu_cap(X86_FEATURE_X2APIC);
1475 }
1476
1477 x2apic_disabled = 1;
1478 x2apic_mode = 0;
1479
1480 register_lapic_address(mp_lapic_addr);
1481 }
1482}
1483
1434void check_x2apic(void) 1484void check_x2apic(void)
1435{ 1485{
1436 if (x2apic_enabled()) { 1486 if (x2apic_enabled()) {
@@ -1441,15 +1491,20 @@ void check_x2apic(void)
1441 1491
1442void enable_x2apic(void) 1492void enable_x2apic(void)
1443{ 1493{
1444 int msr, msr2; 1494 u64 msr;
1495
1496 rdmsrl(MSR_IA32_APICBASE, msr);
1497 if (x2apic_disabled) {
1498 __disable_x2apic(msr);
1499 return;
1500 }
1445 1501
1446 if (!x2apic_mode) 1502 if (!x2apic_mode)
1447 return; 1503 return;
1448 1504
1449 rdmsr(MSR_IA32_APICBASE, msr, msr2);
1450 if (!(msr & X2APIC_ENABLE)) { 1505 if (!(msr & X2APIC_ENABLE)) {
1451 printk_once(KERN_INFO "Enabling x2apic\n"); 1506 printk_once(KERN_INFO "Enabling x2apic\n");
1452 wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, msr2); 1507 wrmsrl(MSR_IA32_APICBASE, msr | X2APIC_ENABLE);
1453 } 1508 }
1454} 1509}
1455#endif /* CONFIG_X86_X2APIC */ 1510#endif /* CONFIG_X86_X2APIC */
@@ -1486,25 +1541,34 @@ void __init enable_IR_x2apic(void)
1486 ret = save_ioapic_entries(); 1541 ret = save_ioapic_entries();
1487 if (ret) { 1542 if (ret) {
1488 pr_info("Saving IO-APIC state failed: %d\n", ret); 1543 pr_info("Saving IO-APIC state failed: %d\n", ret);
1489 goto out; 1544 return;
1490 } 1545 }
1491 1546
1492 local_irq_save(flags); 1547 local_irq_save(flags);
1493 legacy_pic->mask_all(); 1548 legacy_pic->mask_all();
1494 mask_ioapic_entries(); 1549 mask_ioapic_entries();
1495 1550
1551 if (x2apic_preenabled && nox2apic)
1552 disable_x2apic();
1553
1496 if (dmar_table_init_ret) 1554 if (dmar_table_init_ret)
1497 ret = -1; 1555 ret = -1;
1498 else 1556 else
1499 ret = enable_IR(); 1557 ret = enable_IR();
1500 1558
1559 if (!x2apic_supported())
1560 goto skip_x2apic;
1561
1501 if (ret < 0) { 1562 if (ret < 0) {
1502 /* IR is required if there is APIC ID > 255 even when running 1563 /* IR is required if there is APIC ID > 255 even when running
1503 * under KVM 1564 * under KVM
1504 */ 1565 */
1505 if (max_physical_apicid > 255 || 1566 if (max_physical_apicid > 255 ||
1506 !hypervisor_x2apic_available()) 1567 !hypervisor_x2apic_available()) {
1507 goto nox2apic; 1568 if (x2apic_preenabled)
1569 disable_x2apic();
1570 goto skip_x2apic;
1571 }
1508 /* 1572 /*
1509 * without IR all CPUs can be addressed by IOAPIC/MSI 1573 * without IR all CPUs can be addressed by IOAPIC/MSI
1510 * only in physical mode 1574 * only in physical mode
@@ -1512,8 +1576,10 @@ void __init enable_IR_x2apic(void)
1512 x2apic_force_phys(); 1576 x2apic_force_phys();
1513 } 1577 }
1514 1578
1515 if (ret == IRQ_REMAP_XAPIC_MODE) 1579 if (ret == IRQ_REMAP_XAPIC_MODE) {
1516 goto nox2apic; 1580 pr_info("x2apic not enabled, IRQ remapping is in xapic mode\n");
1581 goto skip_x2apic;
1582 }
1517 1583
1518 x2apic_enabled = 1; 1584 x2apic_enabled = 1;
1519 1585
@@ -1523,22 +1589,11 @@ void __init enable_IR_x2apic(void)
1523 pr_info("Enabled x2apic\n"); 1589 pr_info("Enabled x2apic\n");
1524 } 1590 }
1525 1591
1526nox2apic: 1592skip_x2apic:
1527 if (ret < 0) /* IR enabling failed */ 1593 if (ret < 0) /* IR enabling failed */
1528 restore_ioapic_entries(); 1594 restore_ioapic_entries();
1529 legacy_pic->restore_mask(); 1595 legacy_pic->restore_mask();
1530 local_irq_restore(flags); 1596 local_irq_restore(flags);
1531
1532out:
1533 if (x2apic_enabled || !x2apic_supported())
1534 return;
1535
1536 if (x2apic_preenabled)
1537 panic("x2apic: enabled by BIOS but kernel init failed.");
1538 else if (ret == IRQ_REMAP_XAPIC_MODE)
1539 pr_info("x2apic not enabled, IRQ remapping is in xapic mode\n");
1540 else if (ret < 0)
1541 pr_info("x2apic not enabled, IRQ remapping init failed\n");
1542} 1597}
1543 1598
1544#ifdef CONFIG_X86_64 1599#ifdef CONFIG_X86_64
@@ -1809,8 +1864,8 @@ void smp_spurious_interrupt(struct pt_regs *regs)
1809{ 1864{
1810 u32 v; 1865 u32 v;
1811 1866
1812 exit_idle();
1813 irq_enter(); 1867 irq_enter();
1868 exit_idle();
1814 /* 1869 /*
1815 * Check if this really is a spurious interrupt and ACK it 1870 * Check if this really is a spurious interrupt and ACK it
1816 * if it is a vectored one. Just in case... 1871 * if it is a vectored one. Just in case...
@@ -1846,8 +1901,8 @@ void smp_error_interrupt(struct pt_regs *regs)
1846 "Illegal register address", /* APIC Error Bit 7 */ 1901 "Illegal register address", /* APIC Error Bit 7 */
1847 }; 1902 };
1848 1903
1849 exit_idle();
1850 irq_enter(); 1904 irq_enter();
1905 exit_idle();
1851 /* First tickle the hardware, only then report what went on. -- REW */ 1906 /* First tickle the hardware, only then report what went on. -- REW */
1852 v0 = apic_read(APIC_ESR); 1907 v0 = apic_read(APIC_ESR);
1853 apic_write(APIC_ESR, 0); 1908 apic_write(APIC_ESR, 0);
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index f7a41e4cae47..8c3cdded6f2b 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -62,7 +62,7 @@ static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask)
62 * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel 62 * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
63 * document number 292116). So here it goes... 63 * document number 292116). So here it goes...
64 */ 64 */
65static void flat_init_apic_ldr(void) 65void flat_init_apic_ldr(void)
66{ 66{
67 unsigned long val; 67 unsigned long val;
68 unsigned long num, id; 68 unsigned long num, id;
@@ -171,9 +171,14 @@ static int flat_phys_pkg_id(int initial_apic_id, int index_msb)
171 return initial_apic_id >> index_msb; 171 return initial_apic_id >> index_msb;
172} 172}
173 173
174static int flat_probe(void)
175{
176 return 1;
177}
178
174static struct apic apic_flat = { 179static struct apic apic_flat = {
175 .name = "flat", 180 .name = "flat",
176 .probe = NULL, 181 .probe = flat_probe,
177 .acpi_madt_oem_check = flat_acpi_madt_oem_check, 182 .acpi_madt_oem_check = flat_acpi_madt_oem_check,
178 .apic_id_registered = flat_apic_id_registered, 183 .apic_id_registered = flat_apic_id_registered,
179 184
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
new file mode 100644
index 000000000000..09d3d8c1cd99
--- /dev/null
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -0,0 +1,294 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Numascale NumaConnect-Specific APIC Code
7 *
8 * Copyright (C) 2011 Numascale AS. All rights reserved.
9 *
10 * Send feedback to <support@numascale.com>
11 *
12 */
13
14#include <linux/errno.h>
15#include <linux/threads.h>
16#include <linux/cpumask.h>
17#include <linux/string.h>
18#include <linux/kernel.h>
19#include <linux/module.h>
20#include <linux/ctype.h>
21#include <linux/init.h>
22#include <linux/hardirq.h>
23#include <linux/delay.h>
24
25#include <asm/numachip/numachip_csr.h>
26#include <asm/smp.h>
27#include <asm/apic.h>
28#include <asm/ipi.h>
29#include <asm/apic_flat_64.h>
30
31static int numachip_system __read_mostly;
32
33static struct apic apic_numachip __read_mostly;
34
35static unsigned int get_apic_id(unsigned long x)
36{
37 unsigned long value;
38 unsigned int id;
39
40 rdmsrl(MSR_FAM10H_NODE_ID, value);
41 id = ((x >> 24) & 0xffU) | ((value << 2) & 0x3f00U);
42
43 return id;
44}
45
46static unsigned long set_apic_id(unsigned int id)
47{
48 unsigned long x;
49
50 x = ((id & 0xffU) << 24);
51 return x;
52}
53
54static unsigned int read_xapic_id(void)
55{
56 return get_apic_id(apic_read(APIC_ID));
57}
58
59static int numachip_apic_id_registered(void)
60{
61 return physid_isset(read_xapic_id(), phys_cpu_present_map);
62}
63
64static int numachip_phys_pkg_id(int initial_apic_id, int index_msb)
65{
66 return initial_apic_id >> index_msb;
67}
68
69static const struct cpumask *numachip_target_cpus(void)
70{
71 return cpu_online_mask;
72}
73
74static void numachip_vector_allocation_domain(int cpu, struct cpumask *retmask)
75{
76 cpumask_clear(retmask);
77 cpumask_set_cpu(cpu, retmask);
78}
79
80static int __cpuinit numachip_wakeup_secondary(int phys_apicid, unsigned long start_rip)
81{
82 union numachip_csr_g3_ext_irq_gen int_gen;
83
84 int_gen.s._destination_apic_id = phys_apicid;
85 int_gen.s._vector = 0;
86 int_gen.s._msgtype = APIC_DM_INIT >> 8;
87 int_gen.s._index = 0;
88
89 write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v);
90
91 int_gen.s._msgtype = APIC_DM_STARTUP >> 8;
92 int_gen.s._vector = start_rip >> 12;
93
94 write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v);
95
96 atomic_set(&init_deasserted, 1);
97 return 0;
98}
99
100static void numachip_send_IPI_one(int cpu, int vector)
101{
102 union numachip_csr_g3_ext_irq_gen int_gen;
103 int apicid = per_cpu(x86_cpu_to_apicid, cpu);
104
105 int_gen.s._destination_apic_id = apicid;
106 int_gen.s._vector = vector;
107 int_gen.s._msgtype = (vector == NMI_VECTOR ? APIC_DM_NMI : APIC_DM_FIXED) >> 8;
108 int_gen.s._index = 0;
109
110 write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v);
111}
112
113static void numachip_send_IPI_mask(const struct cpumask *mask, int vector)
114{
115 unsigned int cpu;
116
117 for_each_cpu(cpu, mask)
118 numachip_send_IPI_one(cpu, vector);
119}
120
121static void numachip_send_IPI_mask_allbutself(const struct cpumask *mask,
122 int vector)
123{
124 unsigned int this_cpu = smp_processor_id();
125 unsigned int cpu;
126
127 for_each_cpu(cpu, mask) {
128 if (cpu != this_cpu)
129 numachip_send_IPI_one(cpu, vector);
130 }
131}
132
133static void numachip_send_IPI_allbutself(int vector)
134{
135 unsigned int this_cpu = smp_processor_id();
136 unsigned int cpu;
137
138 for_each_online_cpu(cpu) {
139 if (cpu != this_cpu)
140 numachip_send_IPI_one(cpu, vector);
141 }
142}
143
144static void numachip_send_IPI_all(int vector)
145{
146 numachip_send_IPI_mask(cpu_online_mask, vector);
147}
148
149static void numachip_send_IPI_self(int vector)
150{
151 __default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
152}
153
154static unsigned int numachip_cpu_mask_to_apicid(const struct cpumask *cpumask)
155{
156 int cpu;
157
158 /*
159 * We're using fixed IRQ delivery, can only return one phys APIC ID.
160 * May as well be the first.
161 */
162 cpu = cpumask_first(cpumask);
163 if (likely((unsigned)cpu < nr_cpu_ids))
164 return per_cpu(x86_cpu_to_apicid, cpu);
165
166 return BAD_APICID;
167}
168
169static unsigned int
170numachip_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
171 const struct cpumask *andmask)
172{
173 int cpu;
174
175 /*
176 * We're using fixed IRQ delivery, can only return one phys APIC ID.
177 * May as well be the first.
178 */
179 for_each_cpu_and(cpu, cpumask, andmask) {
180 if (cpumask_test_cpu(cpu, cpu_online_mask))
181 break;
182 }
183 return per_cpu(x86_cpu_to_apicid, cpu);
184}
185
186static int __init numachip_probe(void)
187{
188 return apic == &apic_numachip;
189}
190
191static void __init map_csrs(void)
192{
193 printk(KERN_INFO "NumaChip: Mapping local CSR space (%016llx - %016llx)\n",
194 NUMACHIP_LCSR_BASE, NUMACHIP_LCSR_BASE + NUMACHIP_LCSR_SIZE - 1);
195 init_extra_mapping_uc(NUMACHIP_LCSR_BASE, NUMACHIP_LCSR_SIZE);
196
197 printk(KERN_INFO "NumaChip: Mapping global CSR space (%016llx - %016llx)\n",
198 NUMACHIP_GCSR_BASE, NUMACHIP_GCSR_BASE + NUMACHIP_GCSR_SIZE - 1);
199 init_extra_mapping_uc(NUMACHIP_GCSR_BASE, NUMACHIP_GCSR_SIZE);
200}
201
202static void fixup_cpu_id(struct cpuinfo_x86 *c, int node)
203{
204 c->phys_proc_id = node;
205 per_cpu(cpu_llc_id, smp_processor_id()) = node;
206}
207
208static int __init numachip_system_init(void)
209{
210 unsigned int val;
211
212 if (!numachip_system)
213 return 0;
214
215 x86_cpuinit.fixup_cpu_id = fixup_cpu_id;
216
217 map_csrs();
218
219 val = read_lcsr(CSR_G0_NODE_IDS);
220 printk(KERN_INFO "NumaChip: Local NodeID = %08x\n", val);
221
222 return 0;
223}
224early_initcall(numachip_system_init);
225
226static int numachip_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
227{
228 if (!strncmp(oem_id, "NUMASC", 6)) {
229 numachip_system = 1;
230 return 1;
231 }
232
233 return 0;
234}
235
236static struct apic apic_numachip __refconst = {
237
238 .name = "NumaConnect system",
239 .probe = numachip_probe,
240 .acpi_madt_oem_check = numachip_acpi_madt_oem_check,
241 .apic_id_registered = numachip_apic_id_registered,
242
243 .irq_delivery_mode = dest_Fixed,
244 .irq_dest_mode = 0, /* physical */
245
246 .target_cpus = numachip_target_cpus,
247 .disable_esr = 0,
248 .dest_logical = 0,
249 .check_apicid_used = NULL,
250 .check_apicid_present = NULL,
251
252 .vector_allocation_domain = numachip_vector_allocation_domain,
253 .init_apic_ldr = flat_init_apic_ldr,
254
255 .ioapic_phys_id_map = NULL,
256 .setup_apic_routing = NULL,
257 .multi_timer_check = NULL,
258 .cpu_present_to_apicid = default_cpu_present_to_apicid,
259 .apicid_to_cpu_present = NULL,
260 .setup_portio_remap = NULL,
261 .check_phys_apicid_present = default_check_phys_apicid_present,
262 .enable_apic_mode = NULL,
263 .phys_pkg_id = numachip_phys_pkg_id,
264 .mps_oem_check = NULL,
265
266 .get_apic_id = get_apic_id,
267 .set_apic_id = set_apic_id,
268 .apic_id_mask = 0xffU << 24,
269
270 .cpu_mask_to_apicid = numachip_cpu_mask_to_apicid,
271 .cpu_mask_to_apicid_and = numachip_cpu_mask_to_apicid_and,
272
273 .send_IPI_mask = numachip_send_IPI_mask,
274 .send_IPI_mask_allbutself = numachip_send_IPI_mask_allbutself,
275 .send_IPI_allbutself = numachip_send_IPI_allbutself,
276 .send_IPI_all = numachip_send_IPI_all,
277 .send_IPI_self = numachip_send_IPI_self,
278
279 .wakeup_secondary_cpu = numachip_wakeup_secondary,
280 .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
281 .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
282 .wait_for_init_deassert = NULL,
283 .smp_callin_clear_local_apic = NULL,
284 .inquire_remote_apic = NULL, /* REMRD not supported */
285
286 .read = native_apic_mem_read,
287 .write = native_apic_mem_write,
288 .icr_read = native_apic_icr_read,
289 .icr_write = native_apic_icr_write,
290 .wait_icr_idle = native_apic_wait_icr_idle,
291 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
292};
293apic_driver(apic_numachip);
294
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 6d939d7847e2..fb072754bc1d 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2421,8 +2421,8 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
2421 unsigned vector, me; 2421 unsigned vector, me;
2422 2422
2423 ack_APIC_irq(); 2423 ack_APIC_irq();
2424 exit_idle();
2425 irq_enter(); 2424 irq_enter();
2425 exit_idle();
2426 2426
2427 me = smp_processor_id(); 2427 me = smp_processor_id();
2428 for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { 2428 for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
@@ -2948,6 +2948,10 @@ static inline void __init check_timer(void)
2948 } 2948 }
2949 local_irq_disable(); 2949 local_irq_disable();
2950 apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); 2950 apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
2951 if (x2apic_preenabled)
2952 apic_printk(APIC_QUIET, KERN_INFO
2953 "Perhaps problem with the pre-enabled x2apic mode\n"
2954 "Try booting with x2apic and interrupt-remapping disabled in the bios.\n");
2951 panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " 2955 panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a "
2952 "report. Then try booting with the 'noapic' option.\n"); 2956 "report. Then try booting with the 'noapic' option.\n");
2953out: 2957out:
diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c
index 452932d34730..5da1269e8ddc 100644
--- a/arch/x86/kernel/check.c
+++ b/arch/x86/kernel/check.c
@@ -62,7 +62,8 @@ early_param("memory_corruption_check_size", set_corruption_check_size);
62 62
63void __init setup_bios_corruption_check(void) 63void __init setup_bios_corruption_check(void)
64{ 64{
65 u64 addr = PAGE_SIZE; /* assume first page is reserved anyway */ 65 phys_addr_t start, end;
66 u64 i;
66 67
67 if (memory_corruption_check == -1) { 68 if (memory_corruption_check == -1) {
68 memory_corruption_check = 69 memory_corruption_check =
@@ -82,28 +83,23 @@ void __init setup_bios_corruption_check(void)
82 83
83 corruption_check_size = round_up(corruption_check_size, PAGE_SIZE); 84 corruption_check_size = round_up(corruption_check_size, PAGE_SIZE);
84 85
85 while (addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) { 86 for_each_free_mem_range(i, MAX_NUMNODES, &start, &end, NULL) {
86 u64 size; 87 start = clamp_t(phys_addr_t, round_up(start, PAGE_SIZE),
87 addr = memblock_x86_find_in_range_size(addr, &size, PAGE_SIZE); 88 PAGE_SIZE, corruption_check_size);
89 end = clamp_t(phys_addr_t, round_down(end, PAGE_SIZE),
90 PAGE_SIZE, corruption_check_size);
91 if (start >= end)
92 continue;
88 93
89 if (addr == MEMBLOCK_ERROR) 94 memblock_reserve(start, end - start);
90 break; 95 scan_areas[num_scan_areas].addr = start;
91 96 scan_areas[num_scan_areas].size = end - start;
92 if (addr >= corruption_check_size)
93 break;
94
95 if ((addr + size) > corruption_check_size)
96 size = corruption_check_size - addr;
97
98 memblock_x86_reserve_range(addr, addr + size, "SCAN RAM");
99 scan_areas[num_scan_areas].addr = addr;
100 scan_areas[num_scan_areas].size = size;
101 num_scan_areas++;
102 97
103 /* Assume we've already mapped this early memory */ 98 /* Assume we've already mapped this early memory */
104 memset(__va(addr), 0, size); 99 memset(__va(start), 0, end - start);
105 100
106 addr += size; 101 if (++num_scan_areas >= MAX_SCAN_AREAS)
102 break;
107 } 103 }
108 104
109 if (num_scan_areas) 105 if (num_scan_areas)
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 0bab2b18bb20..f4773f4aae35 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -148,7 +148,6 @@ static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c)
148 148
149static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c) 149static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c)
150{ 150{
151#ifdef CONFIG_SMP
152 /* calling is from identify_secondary_cpu() ? */ 151 /* calling is from identify_secondary_cpu() ? */
153 if (!c->cpu_index) 152 if (!c->cpu_index)
154 return; 153 return;
@@ -192,7 +191,6 @@ static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c)
192 191
193valid_k7: 192valid_k7:
194 ; 193 ;
195#endif
196} 194}
197 195
198static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) 196static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c)
@@ -353,6 +351,13 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
353 if (node == NUMA_NO_NODE) 351 if (node == NUMA_NO_NODE)
354 node = per_cpu(cpu_llc_id, cpu); 352 node = per_cpu(cpu_llc_id, cpu);
355 353
354 /*
355 * If core numbers are inconsistent, it's likely a multi-fabric platform,
356 * so invoke platform-specific handler
357 */
358 if (c->phys_proc_id != node)
359 x86_cpuinit.fixup_cpu_id(c, node);
360
356 if (!node_online(node)) { 361 if (!node_online(node)) {
357 /* 362 /*
358 * Two possibilities here: 363 * Two possibilities here:
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index e58d978e0758..159103c0b1f4 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -278,7 +278,7 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c)
278 } 278 }
279#ifdef CONFIG_X86_32 279#ifdef CONFIG_X86_32
280 /* Cyrix III family needs CX8 & PGE explicitly enabled. */ 280 /* Cyrix III family needs CX8 & PGE explicitly enabled. */
281 if (c->x86_model >= 6 && c->x86_model <= 9) { 281 if (c->x86_model >= 6 && c->x86_model <= 13) {
282 rdmsr(MSR_VIA_FCR, lo, hi); 282 rdmsr(MSR_VIA_FCR, lo, hi);
283 lo |= (1<<1 | 1<<7); 283 lo |= (1<<1 | 1<<7);
284 wrmsr(MSR_VIA_FCR, lo, hi); 284 wrmsr(MSR_VIA_FCR, lo, hi);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index aa003b13a831..850f2963a420 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -676,9 +676,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
676 if (this_cpu->c_early_init) 676 if (this_cpu->c_early_init)
677 this_cpu->c_early_init(c); 677 this_cpu->c_early_init(c);
678 678
679#ifdef CONFIG_SMP
680 c->cpu_index = 0; 679 c->cpu_index = 0;
681#endif
682 filter_cpuid_features(c, false); 680 filter_cpuid_features(c, false);
683 681
684 setup_smep(c); 682 setup_smep(c);
@@ -764,10 +762,7 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
764 c->apicid = c->initial_apicid; 762 c->apicid = c->initial_apicid;
765# endif 763# endif
766#endif 764#endif
767
768#ifdef CONFIG_X86_HT
769 c->phys_proc_id = c->initial_apicid; 765 c->phys_proc_id = c->initial_apicid;
770#endif
771 } 766 }
772 767
773 setup_smep(c); 768 setup_smep(c);
@@ -1141,6 +1136,15 @@ static void dbg_restore_debug_regs(void)
1141#endif /* ! CONFIG_KGDB */ 1136#endif /* ! CONFIG_KGDB */
1142 1137
1143/* 1138/*
1139 * Prints an error where the NUMA and configured core-number mismatch and the
1140 * platform didn't override this to fix it up
1141 */
1142void __cpuinit x86_default_fixup_cpu_id(struct cpuinfo_x86 *c, int node)
1143{
1144 pr_err("NUMA core number %d differs from configured core number %d\n", node, c->phys_proc_id);
1145}
1146
1147/*
1144 * cpu_init() initializes state that is per-CPU. Some data is already 1148 * cpu_init() initializes state that is per-CPU. Some data is already
1145 * initialized (naturally) in the bootstrap process, such as the GDT 1149 * initialized (naturally) in the bootstrap process, such as the GDT
1146 * and IDT. We reload them nevertheless, this function acts as a 1150 * and IDT. We reload them nevertheless, this function acts as a
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 1b22dcc51af4..8bacc7826fb3 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -1,5 +1,4 @@
1#ifndef ARCH_X86_CPU_H 1#ifndef ARCH_X86_CPU_H
2
3#define ARCH_X86_CPU_H 2#define ARCH_X86_CPU_H
4 3
5struct cpu_model_info { 4struct cpu_model_info {
@@ -35,6 +34,4 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[],
35 34
36extern void get_cpu_cap(struct cpuinfo_x86 *c); 35extern void get_cpu_cap(struct cpuinfo_x86 *c);
37extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); 36extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
38extern void get_cpu_cap(struct cpuinfo_x86 *c); 37#endif /* ARCH_X86_CPU_H */
39
40#endif
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 523131213f08..3e6ff6cbf42a 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -181,7 +181,6 @@ static void __cpuinit trap_init_f00f_bug(void)
181 181
182static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c) 182static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c)
183{ 183{
184#ifdef CONFIG_SMP
185 /* calling is from identify_secondary_cpu() ? */ 184 /* calling is from identify_secondary_cpu() ? */
186 if (!c->cpu_index) 185 if (!c->cpu_index)
187 return; 186 return;
@@ -198,7 +197,6 @@ static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c)
198 WARN_ONCE(1, "WARNING: SMP operation may be unreliable" 197 WARN_ONCE(1, "WARNING: SMP operation may be unreliable"
199 "with B stepping processors.\n"); 198 "with B stepping processors.\n");
200 } 199 }
201#endif
202} 200}
203 201
204static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) 202static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index a3b0811693c9..6b45e5e7a901 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -844,8 +844,7 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu)
844 844
845#include <linux/kobject.h> 845#include <linux/kobject.h>
846#include <linux/sysfs.h> 846#include <linux/sysfs.h>
847 847#include <linux/cpu.h>
848extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */
849 848
850/* pointer to kobject for cpuX/cache */ 849/* pointer to kobject for cpuX/cache */
851static DEFINE_PER_CPU(struct kobject *, ici_cache_kobject); 850static DEFINE_PER_CPU(struct kobject *, ici_cache_kobject);
@@ -1073,9 +1072,9 @@ err_out:
1073static DECLARE_BITMAP(cache_dev_map, NR_CPUS); 1072static DECLARE_BITMAP(cache_dev_map, NR_CPUS);
1074 1073
1075/* Add/Remove cache interface for CPU device */ 1074/* Add/Remove cache interface for CPU device */
1076static int __cpuinit cache_add_dev(struct sys_device * sys_dev) 1075static int __cpuinit cache_add_dev(struct device *dev)
1077{ 1076{
1078 unsigned int cpu = sys_dev->id; 1077 unsigned int cpu = dev->id;
1079 unsigned long i, j; 1078 unsigned long i, j;
1080 struct _index_kobject *this_object; 1079 struct _index_kobject *this_object;
1081 struct _cpuid4_info *this_leaf; 1080 struct _cpuid4_info *this_leaf;
@@ -1087,7 +1086,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
1087 1086
1088 retval = kobject_init_and_add(per_cpu(ici_cache_kobject, cpu), 1087 retval = kobject_init_and_add(per_cpu(ici_cache_kobject, cpu),
1089 &ktype_percpu_entry, 1088 &ktype_percpu_entry,
1090 &sys_dev->kobj, "%s", "cache"); 1089 &dev->kobj, "%s", "cache");
1091 if (retval < 0) { 1090 if (retval < 0) {
1092 cpuid4_cache_sysfs_exit(cpu); 1091 cpuid4_cache_sysfs_exit(cpu);
1093 return retval; 1092 return retval;
@@ -1124,9 +1123,9 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
1124 return 0; 1123 return 0;
1125} 1124}
1126 1125
1127static void __cpuinit cache_remove_dev(struct sys_device * sys_dev) 1126static void __cpuinit cache_remove_dev(struct device *dev)
1128{ 1127{
1129 unsigned int cpu = sys_dev->id; 1128 unsigned int cpu = dev->id;
1130 unsigned long i; 1129 unsigned long i;
1131 1130
1132 if (per_cpu(ici_cpuid4_info, cpu) == NULL) 1131 if (per_cpu(ici_cpuid4_info, cpu) == NULL)
@@ -1145,17 +1144,17 @@ static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb,
1145 unsigned long action, void *hcpu) 1144 unsigned long action, void *hcpu)
1146{ 1145{
1147 unsigned int cpu = (unsigned long)hcpu; 1146 unsigned int cpu = (unsigned long)hcpu;
1148 struct sys_device *sys_dev; 1147 struct device *dev;
1149 1148
1150 sys_dev = get_cpu_sysdev(cpu); 1149 dev = get_cpu_device(cpu);
1151 switch (action) { 1150 switch (action) {
1152 case CPU_ONLINE: 1151 case CPU_ONLINE:
1153 case CPU_ONLINE_FROZEN: 1152 case CPU_ONLINE_FROZEN:
1154 cache_add_dev(sys_dev); 1153 cache_add_dev(dev);
1155 break; 1154 break;
1156 case CPU_DEAD: 1155 case CPU_DEAD:
1157 case CPU_DEAD_FROZEN: 1156 case CPU_DEAD_FROZEN:
1158 cache_remove_dev(sys_dev); 1157 cache_remove_dev(dev);
1159 break; 1158 break;
1160 } 1159 }
1161 return NOTIFY_OK; 1160 return NOTIFY_OK;
@@ -1174,9 +1173,9 @@ static int __cpuinit cache_sysfs_init(void)
1174 1173
1175 for_each_online_cpu(i) { 1174 for_each_online_cpu(i) {
1176 int err; 1175 int err;
1177 struct sys_device *sys_dev = get_cpu_sysdev(i); 1176 struct device *dev = get_cpu_device(i);
1178 1177
1179 err = cache_add_dev(sys_dev); 1178 err = cache_add_dev(dev);
1180 if (err) 1179 if (err)
1181 return err; 1180 return err;
1182 } 1181 }
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 319882ef848d..fc4beb393577 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -17,6 +17,7 @@
17#include <linux/kernel.h> 17#include <linux/kernel.h>
18#include <linux/string.h> 18#include <linux/string.h>
19#include <linux/fs.h> 19#include <linux/fs.h>
20#include <linux/preempt.h>
20#include <linux/smp.h> 21#include <linux/smp.h>
21#include <linux/notifier.h> 22#include <linux/notifier.h>
22#include <linux/kdebug.h> 23#include <linux/kdebug.h>
@@ -92,6 +93,18 @@ static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs)
92 return NMI_HANDLED; 93 return NMI_HANDLED;
93} 94}
94 95
96static void mce_irq_ipi(void *info)
97{
98 int cpu = smp_processor_id();
99 struct mce *m = &__get_cpu_var(injectm);
100
101 if (cpumask_test_cpu(cpu, mce_inject_cpumask) &&
102 m->inject_flags & MCJ_EXCEPTION) {
103 cpumask_clear_cpu(cpu, mce_inject_cpumask);
104 raise_exception(m, NULL);
105 }
106}
107
95/* Inject mce on current CPU */ 108/* Inject mce on current CPU */
96static int raise_local(void) 109static int raise_local(void)
97{ 110{
@@ -139,9 +152,10 @@ static void raise_mce(struct mce *m)
139 return; 152 return;
140 153
141#ifdef CONFIG_X86_LOCAL_APIC 154#ifdef CONFIG_X86_LOCAL_APIC
142 if (m->inject_flags & MCJ_NMI_BROADCAST) { 155 if (m->inject_flags & (MCJ_IRQ_BRAODCAST | MCJ_NMI_BROADCAST)) {
143 unsigned long start; 156 unsigned long start;
144 int cpu; 157 int cpu;
158
145 get_online_cpus(); 159 get_online_cpus();
146 cpumask_copy(mce_inject_cpumask, cpu_online_mask); 160 cpumask_copy(mce_inject_cpumask, cpu_online_mask);
147 cpumask_clear_cpu(get_cpu(), mce_inject_cpumask); 161 cpumask_clear_cpu(get_cpu(), mce_inject_cpumask);
@@ -151,13 +165,25 @@ static void raise_mce(struct mce *m)
151 MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM) 165 MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM)
152 cpumask_clear_cpu(cpu, mce_inject_cpumask); 166 cpumask_clear_cpu(cpu, mce_inject_cpumask);
153 } 167 }
154 if (!cpumask_empty(mce_inject_cpumask)) 168 if (!cpumask_empty(mce_inject_cpumask)) {
155 apic->send_IPI_mask(mce_inject_cpumask, NMI_VECTOR); 169 if (m->inject_flags & MCJ_IRQ_BRAODCAST) {
170 /*
171 * don't wait because mce_irq_ipi is necessary
172 * to be sync with following raise_local
173 */
174 preempt_disable();
175 smp_call_function_many(mce_inject_cpumask,
176 mce_irq_ipi, NULL, 0);
177 preempt_enable();
178 } else if (m->inject_flags & MCJ_NMI_BROADCAST)
179 apic->send_IPI_mask(mce_inject_cpumask,
180 NMI_VECTOR);
181 }
156 start = jiffies; 182 start = jiffies;
157 while (!cpumask_empty(mce_inject_cpumask)) { 183 while (!cpumask_empty(mce_inject_cpumask)) {
158 if (!time_before(jiffies, start + 2*HZ)) { 184 if (!time_before(jiffies, start + 2*HZ)) {
159 printk(KERN_ERR 185 printk(KERN_ERR
160 "Timeout waiting for mce inject NMI %lx\n", 186 "Timeout waiting for mce inject %lx\n",
161 *cpumask_bits(mce_inject_cpumask)); 187 *cpumask_bits(mce_inject_cpumask));
162 break; 188 break;
163 } 189 }
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index fefcc69ee8b5..ed44c8a65858 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -1,4 +1,4 @@
1#include <linux/sysdev.h> 1#include <linux/device.h>
2#include <asm/mce.h> 2#include <asm/mce.h>
3 3
4enum severity_level { 4enum severity_level {
@@ -17,7 +17,7 @@ enum severity_level {
17struct mce_bank { 17struct mce_bank {
18 u64 ctl; /* subevents to enable */ 18 u64 ctl; /* subevents to enable */
19 unsigned char init; /* initialise bank? */ 19 unsigned char init; /* initialise bank? */
20 struct sysdev_attribute attr; /* sysdev attribute */ 20 struct device_attribute attr; /* device attribute */
21 char attrname[ATTR_LEN]; /* attribute name */ 21 char attrname[ATTR_LEN]; /* attribute name */
22}; 22};
23 23
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 2af127d4c3d1..f22a9f7f6390 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -19,7 +19,7 @@
19#include <linux/kernel.h> 19#include <linux/kernel.h>
20#include <linux/percpu.h> 20#include <linux/percpu.h>
21#include <linux/string.h> 21#include <linux/string.h>
22#include <linux/sysdev.h> 22#include <linux/device.h>
23#include <linux/syscore_ops.h> 23#include <linux/syscore_ops.h>
24#include <linux/delay.h> 24#include <linux/delay.h>
25#include <linux/ctype.h> 25#include <linux/ctype.h>
@@ -95,13 +95,6 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait);
95static DEFINE_PER_CPU(struct mce, mces_seen); 95static DEFINE_PER_CPU(struct mce, mces_seen);
96static int cpu_missing; 96static int cpu_missing;
97 97
98/*
99 * CPU/chipset specific EDAC code can register a notifier call here to print
100 * MCE errors in a human-readable form.
101 */
102ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
103EXPORT_SYMBOL_GPL(x86_mce_decoder_chain);
104
105/* MCA banks polled by the period polling timer for corrected events */ 98/* MCA banks polled by the period polling timer for corrected events */
106DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { 99DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
107 [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL 100 [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
@@ -109,6 +102,12 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
109 102
110static DEFINE_PER_CPU(struct work_struct, mce_work); 103static DEFINE_PER_CPU(struct work_struct, mce_work);
111 104
105/*
106 * CPU/chipset specific EDAC code can register a notifier call here to print
107 * MCE errors in a human-readable form.
108 */
109ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
110
112/* Do initial initialization of a struct mce */ 111/* Do initial initialization of a struct mce */
113void mce_setup(struct mce *m) 112void mce_setup(struct mce *m)
114{ 113{
@@ -119,9 +118,7 @@ void mce_setup(struct mce *m)
119 m->time = get_seconds(); 118 m->time = get_seconds();
120 m->cpuvendor = boot_cpu_data.x86_vendor; 119 m->cpuvendor = boot_cpu_data.x86_vendor;
121 m->cpuid = cpuid_eax(1); 120 m->cpuid = cpuid_eax(1);
122#ifdef CONFIG_SMP
123 m->socketid = cpu_data(m->extcpu).phys_proc_id; 121 m->socketid = cpu_data(m->extcpu).phys_proc_id;
124#endif
125 m->apicid = cpu_data(m->extcpu).initial_apicid; 122 m->apicid = cpu_data(m->extcpu).initial_apicid;
126 rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap); 123 rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap);
127} 124}
@@ -190,6 +187,57 @@ void mce_log(struct mce *mce)
190 set_bit(0, &mce_need_notify); 187 set_bit(0, &mce_need_notify);
191} 188}
192 189
190static void drain_mcelog_buffer(void)
191{
192 unsigned int next, i, prev = 0;
193
194 next = rcu_dereference_check_mce(mcelog.next);
195
196 do {
197 struct mce *m;
198
199 /* drain what was logged during boot */
200 for (i = prev; i < next; i++) {
201 unsigned long start = jiffies;
202 unsigned retries = 1;
203
204 m = &mcelog.entry[i];
205
206 while (!m->finished) {
207 if (time_after_eq(jiffies, start + 2*retries))
208 retries++;
209
210 cpu_relax();
211
212 if (!m->finished && retries >= 4) {
213 pr_err("MCE: skipping error being logged currently!\n");
214 break;
215 }
216 }
217 smp_rmb();
218 atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m);
219 }
220
221 memset(mcelog.entry + prev, 0, (next - prev) * sizeof(*m));
222 prev = next;
223 next = cmpxchg(&mcelog.next, prev, 0);
224 } while (next != prev);
225}
226
227
228void mce_register_decode_chain(struct notifier_block *nb)
229{
230 atomic_notifier_chain_register(&x86_mce_decoder_chain, nb);
231 drain_mcelog_buffer();
232}
233EXPORT_SYMBOL_GPL(mce_register_decode_chain);
234
235void mce_unregister_decode_chain(struct notifier_block *nb)
236{
237 atomic_notifier_chain_unregister(&x86_mce_decoder_chain, nb);
238}
239EXPORT_SYMBOL_GPL(mce_unregister_decode_chain);
240
193static void print_mce(struct mce *m) 241static void print_mce(struct mce *m)
194{ 242{
195 int ret = 0; 243 int ret = 0;
@@ -1770,7 +1818,7 @@ static struct syscore_ops mce_syscore_ops = {
1770}; 1818};
1771 1819
1772/* 1820/*
1773 * mce_sysdev: Sysfs support 1821 * mce_device: Sysfs support
1774 */ 1822 */
1775 1823
1776static void mce_cpu_restart(void *data) 1824static void mce_cpu_restart(void *data)
@@ -1806,27 +1854,28 @@ static void mce_enable_ce(void *all)
1806 __mcheck_cpu_init_timer(); 1854 __mcheck_cpu_init_timer();
1807} 1855}
1808 1856
1809static struct sysdev_class mce_sysdev_class = { 1857static struct bus_type mce_subsys = {
1810 .name = "machinecheck", 1858 .name = "machinecheck",
1859 .dev_name = "machinecheck",
1811}; 1860};
1812 1861
1813DEFINE_PER_CPU(struct sys_device, mce_sysdev); 1862DEFINE_PER_CPU(struct device, mce_device);
1814 1863
1815__cpuinitdata 1864__cpuinitdata
1816void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); 1865void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
1817 1866
1818static inline struct mce_bank *attr_to_bank(struct sysdev_attribute *attr) 1867static inline struct mce_bank *attr_to_bank(struct device_attribute *attr)
1819{ 1868{
1820 return container_of(attr, struct mce_bank, attr); 1869 return container_of(attr, struct mce_bank, attr);
1821} 1870}
1822 1871
1823static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr, 1872static ssize_t show_bank(struct device *s, struct device_attribute *attr,
1824 char *buf) 1873 char *buf)
1825{ 1874{
1826 return sprintf(buf, "%llx\n", attr_to_bank(attr)->ctl); 1875 return sprintf(buf, "%llx\n", attr_to_bank(attr)->ctl);
1827} 1876}
1828 1877
1829static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, 1878static ssize_t set_bank(struct device *s, struct device_attribute *attr,
1830 const char *buf, size_t size) 1879 const char *buf, size_t size)
1831{ 1880{
1832 u64 new; 1881 u64 new;
@@ -1841,14 +1890,14 @@ static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
1841} 1890}
1842 1891
1843static ssize_t 1892static ssize_t
1844show_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *buf) 1893show_trigger(struct device *s, struct device_attribute *attr, char *buf)
1845{ 1894{
1846 strcpy(buf, mce_helper); 1895 strcpy(buf, mce_helper);
1847 strcat(buf, "\n"); 1896 strcat(buf, "\n");
1848 return strlen(mce_helper) + 1; 1897 return strlen(mce_helper) + 1;
1849} 1898}
1850 1899
1851static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, 1900static ssize_t set_trigger(struct device *s, struct device_attribute *attr,
1852 const char *buf, size_t siz) 1901 const char *buf, size_t siz)
1853{ 1902{
1854 char *p; 1903 char *p;
@@ -1863,8 +1912,8 @@ static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr,
1863 return strlen(mce_helper) + !!p; 1912 return strlen(mce_helper) + !!p;
1864} 1913}
1865 1914
1866static ssize_t set_ignore_ce(struct sys_device *s, 1915static ssize_t set_ignore_ce(struct device *s,
1867 struct sysdev_attribute *attr, 1916 struct device_attribute *attr,
1868 const char *buf, size_t size) 1917 const char *buf, size_t size)
1869{ 1918{
1870 u64 new; 1919 u64 new;
@@ -1887,8 +1936,8 @@ static ssize_t set_ignore_ce(struct sys_device *s,
1887 return size; 1936 return size;
1888} 1937}
1889 1938
1890static ssize_t set_cmci_disabled(struct sys_device *s, 1939static ssize_t set_cmci_disabled(struct device *s,
1891 struct sysdev_attribute *attr, 1940 struct device_attribute *attr,
1892 const char *buf, size_t size) 1941 const char *buf, size_t size)
1893{ 1942{
1894 u64 new; 1943 u64 new;
@@ -1910,108 +1959,107 @@ static ssize_t set_cmci_disabled(struct sys_device *s,
1910 return size; 1959 return size;
1911} 1960}
1912 1961
1913static ssize_t store_int_with_restart(struct sys_device *s, 1962static ssize_t store_int_with_restart(struct device *s,
1914 struct sysdev_attribute *attr, 1963 struct device_attribute *attr,
1915 const char *buf, size_t size) 1964 const char *buf, size_t size)
1916{ 1965{
1917 ssize_t ret = sysdev_store_int(s, attr, buf, size); 1966 ssize_t ret = device_store_int(s, attr, buf, size);
1918 mce_restart(); 1967 mce_restart();
1919 return ret; 1968 return ret;
1920} 1969}
1921 1970
1922static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); 1971static DEVICE_ATTR(trigger, 0644, show_trigger, set_trigger);
1923static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); 1972static DEVICE_INT_ATTR(tolerant, 0644, tolerant);
1924static SYSDEV_INT_ATTR(monarch_timeout, 0644, monarch_timeout); 1973static DEVICE_INT_ATTR(monarch_timeout, 0644, monarch_timeout);
1925static SYSDEV_INT_ATTR(dont_log_ce, 0644, mce_dont_log_ce); 1974static DEVICE_INT_ATTR(dont_log_ce, 0644, mce_dont_log_ce);
1926 1975
1927static struct sysdev_ext_attribute attr_check_interval = { 1976static struct dev_ext_attribute dev_attr_check_interval = {
1928 _SYSDEV_ATTR(check_interval, 0644, sysdev_show_int, 1977 __ATTR(check_interval, 0644, device_show_int, store_int_with_restart),
1929 store_int_with_restart),
1930 &check_interval 1978 &check_interval
1931}; 1979};
1932 1980
1933static struct sysdev_ext_attribute attr_ignore_ce = { 1981static struct dev_ext_attribute dev_attr_ignore_ce = {
1934 _SYSDEV_ATTR(ignore_ce, 0644, sysdev_show_int, set_ignore_ce), 1982 __ATTR(ignore_ce, 0644, device_show_int, set_ignore_ce),
1935 &mce_ignore_ce 1983 &mce_ignore_ce
1936}; 1984};
1937 1985
1938static struct sysdev_ext_attribute attr_cmci_disabled = { 1986static struct dev_ext_attribute dev_attr_cmci_disabled = {
1939 _SYSDEV_ATTR(cmci_disabled, 0644, sysdev_show_int, set_cmci_disabled), 1987 __ATTR(cmci_disabled, 0644, device_show_int, set_cmci_disabled),
1940 &mce_cmci_disabled 1988 &mce_cmci_disabled
1941}; 1989};
1942 1990
1943static struct sysdev_attribute *mce_sysdev_attrs[] = { 1991static struct device_attribute *mce_device_attrs[] = {
1944 &attr_tolerant.attr, 1992 &dev_attr_tolerant.attr,
1945 &attr_check_interval.attr, 1993 &dev_attr_check_interval.attr,
1946 &attr_trigger, 1994 &dev_attr_trigger,
1947 &attr_monarch_timeout.attr, 1995 &dev_attr_monarch_timeout.attr,
1948 &attr_dont_log_ce.attr, 1996 &dev_attr_dont_log_ce.attr,
1949 &attr_ignore_ce.attr, 1997 &dev_attr_ignore_ce.attr,
1950 &attr_cmci_disabled.attr, 1998 &dev_attr_cmci_disabled.attr,
1951 NULL 1999 NULL
1952}; 2000};
1953 2001
1954static cpumask_var_t mce_sysdev_initialized; 2002static cpumask_var_t mce_device_initialized;
1955 2003
1956/* Per cpu sysdev init. All of the cpus still share the same ctrl bank: */ 2004/* Per cpu device init. All of the cpus still share the same ctrl bank: */
1957static __cpuinit int mce_sysdev_create(unsigned int cpu) 2005static __cpuinit int mce_device_create(unsigned int cpu)
1958{ 2006{
1959 struct sys_device *sysdev = &per_cpu(mce_sysdev, cpu); 2007 struct device *dev = &per_cpu(mce_device, cpu);
1960 int err; 2008 int err;
1961 int i, j; 2009 int i, j;
1962 2010
1963 if (!mce_available(&boot_cpu_data)) 2011 if (!mce_available(&boot_cpu_data))
1964 return -EIO; 2012 return -EIO;
1965 2013
1966 memset(&sysdev->kobj, 0, sizeof(struct kobject)); 2014 memset(&dev->kobj, 0, sizeof(struct kobject));
1967 sysdev->id = cpu; 2015 dev->id = cpu;
1968 sysdev->cls = &mce_sysdev_class; 2016 dev->bus = &mce_subsys;
1969 2017
1970 err = sysdev_register(sysdev); 2018 err = device_register(dev);
1971 if (err) 2019 if (err)
1972 return err; 2020 return err;
1973 2021
1974 for (i = 0; mce_sysdev_attrs[i]; i++) { 2022 for (i = 0; mce_device_attrs[i]; i++) {
1975 err = sysdev_create_file(sysdev, mce_sysdev_attrs[i]); 2023 err = device_create_file(dev, mce_device_attrs[i]);
1976 if (err) 2024 if (err)
1977 goto error; 2025 goto error;
1978 } 2026 }
1979 for (j = 0; j < banks; j++) { 2027 for (j = 0; j < banks; j++) {
1980 err = sysdev_create_file(sysdev, &mce_banks[j].attr); 2028 err = device_create_file(dev, &mce_banks[j].attr);
1981 if (err) 2029 if (err)
1982 goto error2; 2030 goto error2;
1983 } 2031 }
1984 cpumask_set_cpu(cpu, mce_sysdev_initialized); 2032 cpumask_set_cpu(cpu, mce_device_initialized);
1985 2033
1986 return 0; 2034 return 0;
1987error2: 2035error2:
1988 while (--j >= 0) 2036 while (--j >= 0)
1989 sysdev_remove_file(sysdev, &mce_banks[j].attr); 2037 device_remove_file(dev, &mce_banks[j].attr);
1990error: 2038error:
1991 while (--i >= 0) 2039 while (--i >= 0)
1992 sysdev_remove_file(sysdev, mce_sysdev_attrs[i]); 2040 device_remove_file(dev, mce_device_attrs[i]);
1993 2041
1994 sysdev_unregister(sysdev); 2042 device_unregister(dev);
1995 2043
1996 return err; 2044 return err;
1997} 2045}
1998 2046
1999static __cpuinit void mce_sysdev_remove(unsigned int cpu) 2047static __cpuinit void mce_device_remove(unsigned int cpu)
2000{ 2048{
2001 struct sys_device *sysdev = &per_cpu(mce_sysdev, cpu); 2049 struct device *dev = &per_cpu(mce_device, cpu);
2002 int i; 2050 int i;
2003 2051
2004 if (!cpumask_test_cpu(cpu, mce_sysdev_initialized)) 2052 if (!cpumask_test_cpu(cpu, mce_device_initialized))
2005 return; 2053 return;
2006 2054
2007 for (i = 0; mce_sysdev_attrs[i]; i++) 2055 for (i = 0; mce_device_attrs[i]; i++)
2008 sysdev_remove_file(sysdev, mce_sysdev_attrs[i]); 2056 device_remove_file(dev, mce_device_attrs[i]);
2009 2057
2010 for (i = 0; i < banks; i++) 2058 for (i = 0; i < banks; i++)
2011 sysdev_remove_file(sysdev, &mce_banks[i].attr); 2059 device_remove_file(dev, &mce_banks[i].attr);
2012 2060
2013 sysdev_unregister(sysdev); 2061 device_unregister(dev);
2014 cpumask_clear_cpu(cpu, mce_sysdev_initialized); 2062 cpumask_clear_cpu(cpu, mce_device_initialized);
2015} 2063}
2016 2064
2017/* Make sure there are no machine checks on offlined CPUs. */ 2065/* Make sure there are no machine checks on offlined CPUs. */
@@ -2061,7 +2109,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
2061 switch (action) { 2109 switch (action) {
2062 case CPU_ONLINE: 2110 case CPU_ONLINE:
2063 case CPU_ONLINE_FROZEN: 2111 case CPU_ONLINE_FROZEN:
2064 mce_sysdev_create(cpu); 2112 mce_device_create(cpu);
2065 if (threshold_cpu_callback) 2113 if (threshold_cpu_callback)
2066 threshold_cpu_callback(action, cpu); 2114 threshold_cpu_callback(action, cpu);
2067 break; 2115 break;
@@ -2069,7 +2117,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
2069 case CPU_DEAD_FROZEN: 2117 case CPU_DEAD_FROZEN:
2070 if (threshold_cpu_callback) 2118 if (threshold_cpu_callback)
2071 threshold_cpu_callback(action, cpu); 2119 threshold_cpu_callback(action, cpu);
2072 mce_sysdev_remove(cpu); 2120 mce_device_remove(cpu);
2073 break; 2121 break;
2074 case CPU_DOWN_PREPARE: 2122 case CPU_DOWN_PREPARE:
2075 case CPU_DOWN_PREPARE_FROZEN: 2123 case CPU_DOWN_PREPARE_FROZEN:
@@ -2103,7 +2151,7 @@ static __init void mce_init_banks(void)
2103 2151
2104 for (i = 0; i < banks; i++) { 2152 for (i = 0; i < banks; i++) {
2105 struct mce_bank *b = &mce_banks[i]; 2153 struct mce_bank *b = &mce_banks[i];
2106 struct sysdev_attribute *a = &b->attr; 2154 struct device_attribute *a = &b->attr;
2107 2155
2108 sysfs_attr_init(&a->attr); 2156 sysfs_attr_init(&a->attr);
2109 a->attr.name = b->attrname; 2157 a->attr.name = b->attrname;
@@ -2123,16 +2171,16 @@ static __init int mcheck_init_device(void)
2123 if (!mce_available(&boot_cpu_data)) 2171 if (!mce_available(&boot_cpu_data))
2124 return -EIO; 2172 return -EIO;
2125 2173
2126 zalloc_cpumask_var(&mce_sysdev_initialized, GFP_KERNEL); 2174 zalloc_cpumask_var(&mce_device_initialized, GFP_KERNEL);
2127 2175
2128 mce_init_banks(); 2176 mce_init_banks();
2129 2177
2130 err = sysdev_class_register(&mce_sysdev_class); 2178 err = subsys_system_register(&mce_subsys, NULL);
2131 if (err) 2179 if (err)
2132 return err; 2180 return err;
2133 2181
2134 for_each_online_cpu(i) { 2182 for_each_online_cpu(i) {
2135 err = mce_sysdev_create(i); 2183 err = mce_device_create(i);
2136 if (err) 2184 if (err)
2137 return err; 2185 return err;
2138 } 2186 }
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index f5474218cffe..ba0b94a7e204 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -17,7 +17,6 @@
17#include <linux/notifier.h> 17#include <linux/notifier.h>
18#include <linux/kobject.h> 18#include <linux/kobject.h>
19#include <linux/percpu.h> 19#include <linux/percpu.h>
20#include <linux/sysdev.h>
21#include <linux/errno.h> 20#include <linux/errno.h>
22#include <linux/sched.h> 21#include <linux/sched.h>
23#include <linux/sysfs.h> 22#include <linux/sysfs.h>
@@ -64,11 +63,9 @@ struct threshold_bank {
64}; 63};
65static DEFINE_PER_CPU(struct threshold_bank * [NR_BANKS], threshold_banks); 64static DEFINE_PER_CPU(struct threshold_bank * [NR_BANKS], threshold_banks);
66 65
67#ifdef CONFIG_SMP
68static unsigned char shared_bank[NR_BANKS] = { 66static unsigned char shared_bank[NR_BANKS] = {
69 0, 0, 0, 0, 1 67 0, 0, 0, 0, 1
70}; 68};
71#endif
72 69
73static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */ 70static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */
74 71
@@ -202,10 +199,9 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
202 199
203 if (!block) 200 if (!block)
204 per_cpu(bank_map, cpu) |= (1 << bank); 201 per_cpu(bank_map, cpu) |= (1 << bank);
205#ifdef CONFIG_SMP
206 if (shared_bank[bank] && c->cpu_core_id) 202 if (shared_bank[bank] && c->cpu_core_id)
207 break; 203 break;
208#endif 204
209 offset = setup_APIC_mce(offset, 205 offset = setup_APIC_mce(offset,
210 (high & MASK_LVTOFF_HI) >> 20); 206 (high & MASK_LVTOFF_HI) >> 20);
211 207
@@ -531,7 +527,6 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
531 527
532 sprintf(name, "threshold_bank%i", bank); 528 sprintf(name, "threshold_bank%i", bank);
533 529
534#ifdef CONFIG_SMP
535 if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ 530 if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */
536 i = cpumask_first(cpu_llc_shared_mask(cpu)); 531 i = cpumask_first(cpu_llc_shared_mask(cpu));
537 532
@@ -548,7 +543,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
548 if (!b) 543 if (!b)
549 goto out; 544 goto out;
550 545
551 err = sysfs_create_link(&per_cpu(mce_sysdev, cpu).kobj, 546 err = sysfs_create_link(&per_cpu(mce_device, cpu).kobj,
552 b->kobj, name); 547 b->kobj, name);
553 if (err) 548 if (err)
554 goto out; 549 goto out;
@@ -558,7 +553,6 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
558 553
559 goto out; 554 goto out;
560 } 555 }
561#endif
562 556
563 b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL); 557 b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL);
564 if (!b) { 558 if (!b) {
@@ -571,7 +565,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
571 goto out; 565 goto out;
572 } 566 }
573 567
574 b->kobj = kobject_create_and_add(name, &per_cpu(mce_sysdev, cpu).kobj); 568 b->kobj = kobject_create_and_add(name, &per_cpu(mce_device, cpu).kobj);
575 if (!b->kobj) 569 if (!b->kobj)
576 goto out_free; 570 goto out_free;
577 571
@@ -591,7 +585,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
591 if (i == cpu) 585 if (i == cpu)
592 continue; 586 continue;
593 587
594 err = sysfs_create_link(&per_cpu(mce_sysdev, i).kobj, 588 err = sysfs_create_link(&per_cpu(mce_device, i).kobj,
595 b->kobj, name); 589 b->kobj, name);
596 if (err) 590 if (err)
597 goto out; 591 goto out;
@@ -669,7 +663,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
669#ifdef CONFIG_SMP 663#ifdef CONFIG_SMP
670 /* sibling symlink */ 664 /* sibling symlink */
671 if (shared_bank[bank] && b->blocks->cpu != cpu) { 665 if (shared_bank[bank] && b->blocks->cpu != cpu) {
672 sysfs_remove_link(&per_cpu(mce_sysdev, cpu).kobj, name); 666 sysfs_remove_link(&per_cpu(mce_device, cpu).kobj, name);
673 per_cpu(threshold_banks, cpu)[bank] = NULL; 667 per_cpu(threshold_banks, cpu)[bank] = NULL;
674 668
675 return; 669 return;
@@ -681,7 +675,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
681 if (i == cpu) 675 if (i == cpu)
682 continue; 676 continue;
683 677
684 sysfs_remove_link(&per_cpu(mce_sysdev, i).kobj, name); 678 sysfs_remove_link(&per_cpu(mce_device, i).kobj, name);
685 per_cpu(threshold_banks, i)[bank] = NULL; 679 per_cpu(threshold_banks, i)[bank] = NULL;
686 } 680 }
687 681
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 787e06c84ea6..67bb17a37a0a 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -19,7 +19,6 @@
19#include <linux/kernel.h> 19#include <linux/kernel.h>
20#include <linux/percpu.h> 20#include <linux/percpu.h>
21#include <linux/export.h> 21#include <linux/export.h>
22#include <linux/sysdev.h>
23#include <linux/types.h> 22#include <linux/types.h>
24#include <linux/init.h> 23#include <linux/init.h>
25#include <linux/smp.h> 24#include <linux/smp.h>
@@ -69,16 +68,16 @@ static atomic_t therm_throt_en = ATOMIC_INIT(0);
69static u32 lvtthmr_init __read_mostly; 68static u32 lvtthmr_init __read_mostly;
70 69
71#ifdef CONFIG_SYSFS 70#ifdef CONFIG_SYSFS
72#define define_therm_throt_sysdev_one_ro(_name) \ 71#define define_therm_throt_device_one_ro(_name) \
73 static SYSDEV_ATTR(_name, 0444, \ 72 static DEVICE_ATTR(_name, 0444, \
74 therm_throt_sysdev_show_##_name, \ 73 therm_throt_device_show_##_name, \
75 NULL) \ 74 NULL) \
76 75
77#define define_therm_throt_sysdev_show_func(event, name) \ 76#define define_therm_throt_device_show_func(event, name) \
78 \ 77 \
79static ssize_t therm_throt_sysdev_show_##event##_##name( \ 78static ssize_t therm_throt_device_show_##event##_##name( \
80 struct sys_device *dev, \ 79 struct device *dev, \
81 struct sysdev_attribute *attr, \ 80 struct device_attribute *attr, \
82 char *buf) \ 81 char *buf) \
83{ \ 82{ \
84 unsigned int cpu = dev->id; \ 83 unsigned int cpu = dev->id; \
@@ -95,20 +94,20 @@ static ssize_t therm_throt_sysdev_show_##event##_##name( \
95 return ret; \ 94 return ret; \
96} 95}
97 96
98define_therm_throt_sysdev_show_func(core_throttle, count); 97define_therm_throt_device_show_func(core_throttle, count);
99define_therm_throt_sysdev_one_ro(core_throttle_count); 98define_therm_throt_device_one_ro(core_throttle_count);
100 99
101define_therm_throt_sysdev_show_func(core_power_limit, count); 100define_therm_throt_device_show_func(core_power_limit, count);
102define_therm_throt_sysdev_one_ro(core_power_limit_count); 101define_therm_throt_device_one_ro(core_power_limit_count);
103 102
104define_therm_throt_sysdev_show_func(package_throttle, count); 103define_therm_throt_device_show_func(package_throttle, count);
105define_therm_throt_sysdev_one_ro(package_throttle_count); 104define_therm_throt_device_one_ro(package_throttle_count);
106 105
107define_therm_throt_sysdev_show_func(package_power_limit, count); 106define_therm_throt_device_show_func(package_power_limit, count);
108define_therm_throt_sysdev_one_ro(package_power_limit_count); 107define_therm_throt_device_one_ro(package_power_limit_count);
109 108
110static struct attribute *thermal_throttle_attrs[] = { 109static struct attribute *thermal_throttle_attrs[] = {
111 &attr_core_throttle_count.attr, 110 &dev_attr_core_throttle_count.attr,
112 NULL 111 NULL
113}; 112};
114 113
@@ -223,36 +222,36 @@ static int thresh_event_valid(int event)
223 222
224#ifdef CONFIG_SYSFS 223#ifdef CONFIG_SYSFS
225/* Add/Remove thermal_throttle interface for CPU device: */ 224/* Add/Remove thermal_throttle interface for CPU device: */
226static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev, 225static __cpuinit int thermal_throttle_add_dev(struct device *dev,
227 unsigned int cpu) 226 unsigned int cpu)
228{ 227{
229 int err; 228 int err;
230 struct cpuinfo_x86 *c = &cpu_data(cpu); 229 struct cpuinfo_x86 *c = &cpu_data(cpu);
231 230
232 err = sysfs_create_group(&sys_dev->kobj, &thermal_attr_group); 231 err = sysfs_create_group(&dev->kobj, &thermal_attr_group);
233 if (err) 232 if (err)
234 return err; 233 return err;
235 234
236 if (cpu_has(c, X86_FEATURE_PLN)) 235 if (cpu_has(c, X86_FEATURE_PLN))
237 err = sysfs_add_file_to_group(&sys_dev->kobj, 236 err = sysfs_add_file_to_group(&dev->kobj,
238 &attr_core_power_limit_count.attr, 237 &dev_attr_core_power_limit_count.attr,
239 thermal_attr_group.name); 238 thermal_attr_group.name);
240 if (cpu_has(c, X86_FEATURE_PTS)) { 239 if (cpu_has(c, X86_FEATURE_PTS)) {
241 err = sysfs_add_file_to_group(&sys_dev->kobj, 240 err = sysfs_add_file_to_group(&dev->kobj,
242 &attr_package_throttle_count.attr, 241 &dev_attr_package_throttle_count.attr,
243 thermal_attr_group.name); 242 thermal_attr_group.name);
244 if (cpu_has(c, X86_FEATURE_PLN)) 243 if (cpu_has(c, X86_FEATURE_PLN))
245 err = sysfs_add_file_to_group(&sys_dev->kobj, 244 err = sysfs_add_file_to_group(&dev->kobj,
246 &attr_package_power_limit_count.attr, 245 &dev_attr_package_power_limit_count.attr,
247 thermal_attr_group.name); 246 thermal_attr_group.name);
248 } 247 }
249 248
250 return err; 249 return err;
251} 250}
252 251
253static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev) 252static __cpuinit void thermal_throttle_remove_dev(struct device *dev)
254{ 253{
255 sysfs_remove_group(&sys_dev->kobj, &thermal_attr_group); 254 sysfs_remove_group(&dev->kobj, &thermal_attr_group);
256} 255}
257 256
258/* Mutex protecting device creation against CPU hotplug: */ 257/* Mutex protecting device creation against CPU hotplug: */
@@ -265,16 +264,16 @@ thermal_throttle_cpu_callback(struct notifier_block *nfb,
265 void *hcpu) 264 void *hcpu)
266{ 265{
267 unsigned int cpu = (unsigned long)hcpu; 266 unsigned int cpu = (unsigned long)hcpu;
268 struct sys_device *sys_dev; 267 struct device *dev;
269 int err = 0; 268 int err = 0;
270 269
271 sys_dev = get_cpu_sysdev(cpu); 270 dev = get_cpu_device(cpu);
272 271
273 switch (action) { 272 switch (action) {
274 case CPU_UP_PREPARE: 273 case CPU_UP_PREPARE:
275 case CPU_UP_PREPARE_FROZEN: 274 case CPU_UP_PREPARE_FROZEN:
276 mutex_lock(&therm_cpu_lock); 275 mutex_lock(&therm_cpu_lock);
277 err = thermal_throttle_add_dev(sys_dev, cpu); 276 err = thermal_throttle_add_dev(dev, cpu);
278 mutex_unlock(&therm_cpu_lock); 277 mutex_unlock(&therm_cpu_lock);
279 WARN_ON(err); 278 WARN_ON(err);
280 break; 279 break;
@@ -283,7 +282,7 @@ thermal_throttle_cpu_callback(struct notifier_block *nfb,
283 case CPU_DEAD: 282 case CPU_DEAD:
284 case CPU_DEAD_FROZEN: 283 case CPU_DEAD_FROZEN:
285 mutex_lock(&therm_cpu_lock); 284 mutex_lock(&therm_cpu_lock);
286 thermal_throttle_remove_dev(sys_dev); 285 thermal_throttle_remove_dev(dev);
287 mutex_unlock(&therm_cpu_lock); 286 mutex_unlock(&therm_cpu_lock);
288 break; 287 break;
289 } 288 }
@@ -310,7 +309,7 @@ static __init int thermal_throttle_init_device(void)
310#endif 309#endif
311 /* connect live CPUs to sysfs */ 310 /* connect live CPUs to sysfs */
312 for_each_online_cpu(cpu) { 311 for_each_online_cpu(cpu) {
313 err = thermal_throttle_add_dev(get_cpu_sysdev(cpu), cpu); 312 err = thermal_throttle_add_dev(get_cpu_device(cpu), cpu);
314 WARN_ON(err); 313 WARN_ON(err);
315 } 314 }
316#ifdef CONFIG_HOTPLUG_CPU 315#ifdef CONFIG_HOTPLUG_CPU
@@ -323,17 +322,6 @@ device_initcall(thermal_throttle_init_device);
323 322
324#endif /* CONFIG_SYSFS */ 323#endif /* CONFIG_SYSFS */
325 324
326/*
327 * Set up the most two significant bit to notify mce log that this thermal
328 * event type.
329 * This is a temp solution. May be changed in the future with mce log
330 * infrasture.
331 */
332#define CORE_THROTTLED (0)
333#define CORE_POWER_LIMIT ((__u64)1 << 62)
334#define PACKAGE_THROTTLED ((__u64)2 << 62)
335#define PACKAGE_POWER_LIMIT ((__u64)3 << 62)
336
337static void notify_thresholds(__u64 msr_val) 325static void notify_thresholds(__u64 msr_val)
338{ 326{
339 /* check whether the interrupt handler is defined; 327 /* check whether the interrupt handler is defined;
@@ -363,27 +351,23 @@ static void intel_thermal_interrupt(void)
363 if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT, 351 if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT,
364 THERMAL_THROTTLING_EVENT, 352 THERMAL_THROTTLING_EVENT,
365 CORE_LEVEL) != 0) 353 CORE_LEVEL) != 0)
366 mce_log_therm_throt_event(CORE_THROTTLED | msr_val); 354 mce_log_therm_throt_event(msr_val);
367 355
368 if (this_cpu_has(X86_FEATURE_PLN)) 356 if (this_cpu_has(X86_FEATURE_PLN))
369 if (therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT, 357 therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT,
370 POWER_LIMIT_EVENT, 358 POWER_LIMIT_EVENT,
371 CORE_LEVEL) != 0) 359 CORE_LEVEL);
372 mce_log_therm_throt_event(CORE_POWER_LIMIT | msr_val);
373 360
374 if (this_cpu_has(X86_FEATURE_PTS)) { 361 if (this_cpu_has(X86_FEATURE_PTS)) {
375 rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val); 362 rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
376 if (therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT, 363 therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT,
377 THERMAL_THROTTLING_EVENT, 364 THERMAL_THROTTLING_EVENT,
378 PACKAGE_LEVEL) != 0) 365 PACKAGE_LEVEL);
379 mce_log_therm_throt_event(PACKAGE_THROTTLED | msr_val);
380 if (this_cpu_has(X86_FEATURE_PLN)) 366 if (this_cpu_has(X86_FEATURE_PLN))
381 if (therm_throt_process(msr_val & 367 therm_throt_process(msr_val &
382 PACKAGE_THERM_STATUS_POWER_LIMIT, 368 PACKAGE_THERM_STATUS_POWER_LIMIT,
383 POWER_LIMIT_EVENT, 369 POWER_LIMIT_EVENT,
384 PACKAGE_LEVEL) != 0) 370 PACKAGE_LEVEL);
385 mce_log_therm_throt_event(PACKAGE_POWER_LIMIT
386 | msr_val);
387 } 371 }
388} 372}
389 373
@@ -397,8 +381,8 @@ static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;
397 381
398asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) 382asmlinkage void smp_thermal_interrupt(struct pt_regs *regs)
399{ 383{
400 exit_idle();
401 irq_enter(); 384 irq_enter();
385 exit_idle();
402 inc_irq_stat(irq_thermal_count); 386 inc_irq_stat(irq_thermal_count);
403 smp_thermal_vector(); 387 smp_thermal_vector();
404 irq_exit(); 388 irq_exit();
diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c
index d746df2909c9..aa578cadb940 100644
--- a/arch/x86/kernel/cpu/mcheck/threshold.c
+++ b/arch/x86/kernel/cpu/mcheck/threshold.c
@@ -19,8 +19,8 @@ void (*mce_threshold_vector)(void) = default_threshold_interrupt;
19 19
20asmlinkage void smp_threshold_interrupt(void) 20asmlinkage void smp_threshold_interrupt(void)
21{ 21{
22 exit_idle();
23 irq_enter(); 22 irq_enter();
23 exit_idle();
24 inc_irq_stat(irq_threshold_count); 24 inc_irq_stat(irq_threshold_count);
25 mce_threshold_vector(); 25 mce_threshold_vector();
26 irq_exit(); 26 irq_exit();
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 2bda212a0010..5adce1040b11 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -484,18 +484,195 @@ static inline int is_x86_event(struct perf_event *event)
484 return event->pmu == &pmu; 484 return event->pmu == &pmu;
485} 485}
486 486
487/*
488 * Event scheduler state:
489 *
490 * Assign events iterating over all events and counters, beginning
491 * with events with least weights first. Keep the current iterator
492 * state in struct sched_state.
493 */
494struct sched_state {
495 int weight;
496 int event; /* event index */
497 int counter; /* counter index */
498 int unassigned; /* number of events to be assigned left */
499 unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
500};
501
502/* Total max is X86_PMC_IDX_MAX, but we are O(n!) limited */
503#define SCHED_STATES_MAX 2
504
505struct perf_sched {
506 int max_weight;
507 int max_events;
508 struct event_constraint **constraints;
509 struct sched_state state;
510 int saved_states;
511 struct sched_state saved[SCHED_STATES_MAX];
512};
513
514/*
515 * Initialize interator that runs through all events and counters.
516 */
517static void perf_sched_init(struct perf_sched *sched, struct event_constraint **c,
518 int num, int wmin, int wmax)
519{
520 int idx;
521
522 memset(sched, 0, sizeof(*sched));
523 sched->max_events = num;
524 sched->max_weight = wmax;
525 sched->constraints = c;
526
527 for (idx = 0; idx < num; idx++) {
528 if (c[idx]->weight == wmin)
529 break;
530 }
531
532 sched->state.event = idx; /* start with min weight */
533 sched->state.weight = wmin;
534 sched->state.unassigned = num;
535}
536
537static void perf_sched_save_state(struct perf_sched *sched)
538{
539 if (WARN_ON_ONCE(sched->saved_states >= SCHED_STATES_MAX))
540 return;
541
542 sched->saved[sched->saved_states] = sched->state;
543 sched->saved_states++;
544}
545
546static bool perf_sched_restore_state(struct perf_sched *sched)
547{
548 if (!sched->saved_states)
549 return false;
550
551 sched->saved_states--;
552 sched->state = sched->saved[sched->saved_states];
553
554 /* continue with next counter: */
555 clear_bit(sched->state.counter++, sched->state.used);
556
557 return true;
558}
559
560/*
561 * Select a counter for the current event to schedule. Return true on
562 * success.
563 */
564static bool __perf_sched_find_counter(struct perf_sched *sched)
565{
566 struct event_constraint *c;
567 int idx;
568
569 if (!sched->state.unassigned)
570 return false;
571
572 if (sched->state.event >= sched->max_events)
573 return false;
574
575 c = sched->constraints[sched->state.event];
576
577 /* Prefer fixed purpose counters */
578 if (x86_pmu.num_counters_fixed) {
579 idx = X86_PMC_IDX_FIXED;
580 for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_MAX) {
581 if (!__test_and_set_bit(idx, sched->state.used))
582 goto done;
583 }
584 }
585 /* Grab the first unused counter starting with idx */
586 idx = sched->state.counter;
587 for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_FIXED) {
588 if (!__test_and_set_bit(idx, sched->state.used))
589 goto done;
590 }
591
592 return false;
593
594done:
595 sched->state.counter = idx;
596
597 if (c->overlap)
598 perf_sched_save_state(sched);
599
600 return true;
601}
602
603static bool perf_sched_find_counter(struct perf_sched *sched)
604{
605 while (!__perf_sched_find_counter(sched)) {
606 if (!perf_sched_restore_state(sched))
607 return false;
608 }
609
610 return true;
611}
612
613/*
614 * Go through all unassigned events and find the next one to schedule.
615 * Take events with the least weight first. Return true on success.
616 */
617static bool perf_sched_next_event(struct perf_sched *sched)
618{
619 struct event_constraint *c;
620
621 if (!sched->state.unassigned || !--sched->state.unassigned)
622 return false;
623
624 do {
625 /* next event */
626 sched->state.event++;
627 if (sched->state.event >= sched->max_events) {
628 /* next weight */
629 sched->state.event = 0;
630 sched->state.weight++;
631 if (sched->state.weight > sched->max_weight)
632 return false;
633 }
634 c = sched->constraints[sched->state.event];
635 } while (c->weight != sched->state.weight);
636
637 sched->state.counter = 0; /* start with first counter */
638
639 return true;
640}
641
642/*
643 * Assign a counter for each event.
644 */
645static int perf_assign_events(struct event_constraint **constraints, int n,
646 int wmin, int wmax, int *assign)
647{
648 struct perf_sched sched;
649
650 perf_sched_init(&sched, constraints, n, wmin, wmax);
651
652 do {
653 if (!perf_sched_find_counter(&sched))
654 break; /* failed */
655 if (assign)
656 assign[sched.state.event] = sched.state.counter;
657 } while (perf_sched_next_event(&sched));
658
659 return sched.state.unassigned;
660}
661
487int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) 662int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
488{ 663{
489 struct event_constraint *c, *constraints[X86_PMC_IDX_MAX]; 664 struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
490 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; 665 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
491 int i, j, w, wmax, num = 0; 666 int i, wmin, wmax, num = 0;
492 struct hw_perf_event *hwc; 667 struct hw_perf_event *hwc;
493 668
494 bitmap_zero(used_mask, X86_PMC_IDX_MAX); 669 bitmap_zero(used_mask, X86_PMC_IDX_MAX);
495 670
496 for (i = 0; i < n; i++) { 671 for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
497 c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]); 672 c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
498 constraints[i] = c; 673 constraints[i] = c;
674 wmin = min(wmin, c->weight);
675 wmax = max(wmax, c->weight);
499 } 676 }
500 677
501 /* 678 /*
@@ -521,60 +698,12 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
521 if (assign) 698 if (assign)
522 assign[i] = hwc->idx; 699 assign[i] = hwc->idx;
523 } 700 }
524 if (i == n)
525 goto done;
526
527 /*
528 * begin slow path
529 */
530
531 bitmap_zero(used_mask, X86_PMC_IDX_MAX);
532 701
533 /* 702 /* slow path */
534 * weight = number of possible counters 703 if (i != n)
535 * 704 num = perf_assign_events(constraints, n, wmin, wmax, assign);
536 * 1 = most constrained, only works on one counter
537 * wmax = least constrained, works on any counter
538 *
539 * assign events to counters starting with most
540 * constrained events.
541 */
542 wmax = x86_pmu.num_counters;
543 705
544 /* 706 /*
545 * when fixed event counters are present,
546 * wmax is incremented by 1 to account
547 * for one more choice
548 */
549 if (x86_pmu.num_counters_fixed)
550 wmax++;
551
552 for (w = 1, num = n; num && w <= wmax; w++) {
553 /* for each event */
554 for (i = 0; num && i < n; i++) {
555 c = constraints[i];
556 hwc = &cpuc->event_list[i]->hw;
557
558 if (c->weight != w)
559 continue;
560
561 for_each_set_bit(j, c->idxmsk, X86_PMC_IDX_MAX) {
562 if (!test_bit(j, used_mask))
563 break;
564 }
565
566 if (j == X86_PMC_IDX_MAX)
567 break;
568
569 __set_bit(j, used_mask);
570
571 if (assign)
572 assign[i] = j;
573 num--;
574 }
575 }
576done:
577 /*
578 * scheduling failed or is just a simulation, 707 * scheduling failed or is just a simulation,
579 * free resources if necessary 708 * free resources if necessary
580 */ 709 */
@@ -1119,6 +1248,7 @@ static void __init pmu_check_apic(void)
1119 1248
1120static int __init init_hw_perf_events(void) 1249static int __init init_hw_perf_events(void)
1121{ 1250{
1251 struct x86_pmu_quirk *quirk;
1122 struct event_constraint *c; 1252 struct event_constraint *c;
1123 int err; 1253 int err;
1124 1254
@@ -1147,8 +1277,8 @@ static int __init init_hw_perf_events(void)
1147 1277
1148 pr_cont("%s PMU driver.\n", x86_pmu.name); 1278 pr_cont("%s PMU driver.\n", x86_pmu.name);
1149 1279
1150 if (x86_pmu.quirks) 1280 for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next)
1151 x86_pmu.quirks(); 1281 quirk->func();
1152 1282
1153 if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { 1283 if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
1154 WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", 1284 WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
@@ -1171,12 +1301,18 @@ static int __init init_hw_perf_events(void)
1171 1301
1172 unconstrained = (struct event_constraint) 1302 unconstrained = (struct event_constraint)
1173 __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, 1303 __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
1174 0, x86_pmu.num_counters); 1304 0, x86_pmu.num_counters, 0);
1175 1305
1176 if (x86_pmu.event_constraints) { 1306 if (x86_pmu.event_constraints) {
1307 /*
1308 * event on fixed counter2 (REF_CYCLES) only works on this
1309 * counter, so do not extend mask to generic counters
1310 */
1177 for_each_event_constraint(c, x86_pmu.event_constraints) { 1311 for_each_event_constraint(c, x86_pmu.event_constraints) {
1178 if (c->cmask != X86_RAW_EVENT_MASK) 1312 if (c->cmask != X86_RAW_EVENT_MASK
1313 || c->idxmsk64 == X86_PMC_MSK_FIXED_REF_CYCLES) {
1179 continue; 1314 continue;
1315 }
1180 1316
1181 c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; 1317 c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
1182 c->weight += x86_pmu.num_counters; 1318 c->weight += x86_pmu.num_counters;
@@ -1566,3 +1702,15 @@ unsigned long perf_misc_flags(struct pt_regs *regs)
1566 1702
1567 return misc; 1703 return misc;
1568} 1704}
1705
1706void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
1707{
1708 cap->version = x86_pmu.version;
1709 cap->num_counters_gp = x86_pmu.num_counters;
1710 cap->num_counters_fixed = x86_pmu.num_counters_fixed;
1711 cap->bit_width_gp = x86_pmu.cntval_bits;
1712 cap->bit_width_fixed = x86_pmu.cntval_bits;
1713 cap->events_mask = (unsigned int)x86_pmu.events_maskl;
1714 cap->events_mask_len = x86_pmu.events_mask_len;
1715}
1716EXPORT_SYMBOL_GPL(perf_get_x86_pmu_capability);
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index b9698d40ac4b..8944062f46e2 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -45,6 +45,7 @@ struct event_constraint {
45 u64 code; 45 u64 code;
46 u64 cmask; 46 u64 cmask;
47 int weight; 47 int weight;
48 int overlap;
48}; 49};
49 50
50struct amd_nb { 51struct amd_nb {
@@ -151,15 +152,40 @@ struct cpu_hw_events {
151 void *kfree_on_online; 152 void *kfree_on_online;
152}; 153};
153 154
154#define __EVENT_CONSTRAINT(c, n, m, w) {\ 155#define __EVENT_CONSTRAINT(c, n, m, w, o) {\
155 { .idxmsk64 = (n) }, \ 156 { .idxmsk64 = (n) }, \
156 .code = (c), \ 157 .code = (c), \
157 .cmask = (m), \ 158 .cmask = (m), \
158 .weight = (w), \ 159 .weight = (w), \
160 .overlap = (o), \
159} 161}
160 162
161#define EVENT_CONSTRAINT(c, n, m) \ 163#define EVENT_CONSTRAINT(c, n, m) \
162 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) 164 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0)
165
166/*
167 * The overlap flag marks event constraints with overlapping counter
168 * masks. This is the case if the counter mask of such an event is not
169 * a subset of any other counter mask of a constraint with an equal or
170 * higher weight, e.g.:
171 *
172 * c_overlaps = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0);
173 * c_another1 = EVENT_CONSTRAINT(0, 0x07, 0);
174 * c_another2 = EVENT_CONSTRAINT(0, 0x38, 0);
175 *
176 * The event scheduler may not select the correct counter in the first
177 * cycle because it needs to know which subsequent events will be
178 * scheduled. It may fail to schedule the events then. So we set the
179 * overlap flag for such constraints to give the scheduler a hint which
180 * events to select for counter rescheduling.
181 *
182 * Care must be taken as the rescheduling algorithm is O(n!) which
183 * will increase scheduling cycles for an over-commited system
184 * dramatically. The number of such EVENT_CONSTRAINT_OVERLAP() macros
185 * and its counter masks must be kept at a minimum.
186 */
187#define EVENT_CONSTRAINT_OVERLAP(c, n, m) \
188 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1)
163 189
164/* 190/*
165 * Constraint on the Event code. 191 * Constraint on the Event code.
@@ -235,6 +261,11 @@ union perf_capabilities {
235 u64 capabilities; 261 u64 capabilities;
236}; 262};
237 263
264struct x86_pmu_quirk {
265 struct x86_pmu_quirk *next;
266 void (*func)(void);
267};
268
238/* 269/*
239 * struct x86_pmu - generic x86 pmu 270 * struct x86_pmu - generic x86 pmu
240 */ 271 */
@@ -259,6 +290,11 @@ struct x86_pmu {
259 int num_counters_fixed; 290 int num_counters_fixed;
260 int cntval_bits; 291 int cntval_bits;
261 u64 cntval_mask; 292 u64 cntval_mask;
293 union {
294 unsigned long events_maskl;
295 unsigned long events_mask[BITS_TO_LONGS(ARCH_PERFMON_EVENTS_COUNT)];
296 };
297 int events_mask_len;
262 int apic; 298 int apic;
263 u64 max_period; 299 u64 max_period;
264 struct event_constraint * 300 struct event_constraint *
@@ -268,7 +304,7 @@ struct x86_pmu {
268 void (*put_event_constraints)(struct cpu_hw_events *cpuc, 304 void (*put_event_constraints)(struct cpu_hw_events *cpuc,
269 struct perf_event *event); 305 struct perf_event *event);
270 struct event_constraint *event_constraints; 306 struct event_constraint *event_constraints;
271 void (*quirks)(void); 307 struct x86_pmu_quirk *quirks;
272 int perfctr_second_write; 308 int perfctr_second_write;
273 309
274 int (*cpu_prepare)(int cpu); 310 int (*cpu_prepare)(int cpu);
@@ -309,6 +345,15 @@ struct x86_pmu {
309 struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr); 345 struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr);
310}; 346};
311 347
348#define x86_add_quirk(func_) \
349do { \
350 static struct x86_pmu_quirk __quirk __initdata = { \
351 .func = func_, \
352 }; \
353 __quirk.next = x86_pmu.quirks; \
354 x86_pmu.quirks = &__quirk; \
355} while (0)
356
312#define ERF_NO_HT_SHARING 1 357#define ERF_NO_HT_SHARING 1
313#define ERF_HAS_RSP_1 2 358#define ERF_HAS_RSP_1 2
314 359
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index aeefd45697a2..0397b23be8e9 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -492,7 +492,7 @@ static __initconst const struct x86_pmu amd_pmu = {
492static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0); 492static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0);
493static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0); 493static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0);
494static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0); 494static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0);
495static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT(0, 0x09, 0); 495static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0);
496static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0); 496static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
497static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0); 497static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
498 498
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 8d601b18bf9f..3bd37bdf1b8e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -28,6 +28,7 @@ static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly =
28 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, 28 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
29 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, 29 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
30 [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, 30 [PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
31 [PERF_COUNT_HW_REF_CPU_CYCLES] = 0x0300, /* pseudo-encoding */
31}; 32};
32 33
33static struct event_constraint intel_core_event_constraints[] __read_mostly = 34static struct event_constraint intel_core_event_constraints[] __read_mostly =
@@ -45,12 +46,7 @@ static struct event_constraint intel_core2_event_constraints[] __read_mostly =
45{ 46{
46 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 47 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
47 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 48 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
48 /* 49 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
49 * Core2 has Fixed Counter 2 listed as CPU_CLK_UNHALTED.REF and event
50 * 0x013c as CPU_CLK_UNHALTED.BUS and specifies there is a fixed
51 * ratio between these counters.
52 */
53 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
54 INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ 50 INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
55 INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ 51 INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
56 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ 52 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
@@ -68,7 +64,7 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
68{ 64{
69 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 65 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
70 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 66 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
71 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ 67 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
72 INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ 68 INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
73 INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ 69 INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
74 INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ 70 INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
@@ -90,7 +86,7 @@ static struct event_constraint intel_westmere_event_constraints[] __read_mostly
90{ 86{
91 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 87 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
92 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 88 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
93 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ 89 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
94 INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ 90 INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
95 INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */ 91 INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
96 INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ 92 INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
@@ -102,7 +98,7 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly =
102{ 98{
103 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 99 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
104 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 100 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
105 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ 101 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
106 INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */ 102 INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
107 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ 103 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
108 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ 104 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
@@ -125,7 +121,7 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly =
125{ 121{
126 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 122 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
127 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 123 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
128 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ 124 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
129 EVENT_CONSTRAINT_END 125 EVENT_CONSTRAINT_END
130}; 126};
131 127
@@ -1169,7 +1165,7 @@ again:
1169 */ 1165 */
1170 c = &unconstrained; 1166 c = &unconstrained;
1171 } else if (intel_try_alt_er(event, orig_idx)) { 1167 } else if (intel_try_alt_er(event, orig_idx)) {
1172 raw_spin_unlock(&era->lock); 1168 raw_spin_unlock_irqrestore(&era->lock, flags);
1173 goto again; 1169 goto again;
1174 } 1170 }
1175 raw_spin_unlock_irqrestore(&era->lock, flags); 1171 raw_spin_unlock_irqrestore(&era->lock, flags);
@@ -1519,7 +1515,7 @@ static __initconst const struct x86_pmu intel_pmu = {
1519 .guest_get_msrs = intel_guest_get_msrs, 1515 .guest_get_msrs = intel_guest_get_msrs,
1520}; 1516};
1521 1517
1522static void intel_clovertown_quirks(void) 1518static __init void intel_clovertown_quirk(void)
1523{ 1519{
1524 /* 1520 /*
1525 * PEBS is unreliable due to: 1521 * PEBS is unreliable due to:
@@ -1545,19 +1541,60 @@ static void intel_clovertown_quirks(void)
1545 x86_pmu.pebs_constraints = NULL; 1541 x86_pmu.pebs_constraints = NULL;
1546} 1542}
1547 1543
1548static void intel_sandybridge_quirks(void) 1544static __init void intel_sandybridge_quirk(void)
1549{ 1545{
1550 printk(KERN_WARNING "PEBS disabled due to CPU errata.\n"); 1546 printk(KERN_WARNING "PEBS disabled due to CPU errata.\n");
1551 x86_pmu.pebs = 0; 1547 x86_pmu.pebs = 0;
1552 x86_pmu.pebs_constraints = NULL; 1548 x86_pmu.pebs_constraints = NULL;
1553} 1549}
1554 1550
1551static const struct { int id; char *name; } intel_arch_events_map[] __initconst = {
1552 { PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" },
1553 { PERF_COUNT_HW_INSTRUCTIONS, "instructions" },
1554 { PERF_COUNT_HW_BUS_CYCLES, "bus cycles" },
1555 { PERF_COUNT_HW_CACHE_REFERENCES, "cache references" },
1556 { PERF_COUNT_HW_CACHE_MISSES, "cache misses" },
1557 { PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch instructions" },
1558 { PERF_COUNT_HW_BRANCH_MISSES, "branch misses" },
1559};
1560
1561static __init void intel_arch_events_quirk(void)
1562{
1563 int bit;
1564
1565 /* disable event that reported as not presend by cpuid */
1566 for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) {
1567 intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0;
1568 printk(KERN_WARNING "CPUID marked event: \'%s\' unavailable\n",
1569 intel_arch_events_map[bit].name);
1570 }
1571}
1572
1573static __init void intel_nehalem_quirk(void)
1574{
1575 union cpuid10_ebx ebx;
1576
1577 ebx.full = x86_pmu.events_maskl;
1578 if (ebx.split.no_branch_misses_retired) {
1579 /*
1580 * Erratum AAJ80 detected, we work it around by using
1581 * the BR_MISP_EXEC.ANY event. This will over-count
1582 * branch-misses, but it's still much better than the
1583 * architectural event which is often completely bogus:
1584 */
1585 intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
1586 ebx.split.no_branch_misses_retired = 0;
1587 x86_pmu.events_maskl = ebx.full;
1588 printk(KERN_INFO "CPU erratum AAJ80 worked around\n");
1589 }
1590}
1591
1555__init int intel_pmu_init(void) 1592__init int intel_pmu_init(void)
1556{ 1593{
1557 union cpuid10_edx edx; 1594 union cpuid10_edx edx;
1558 union cpuid10_eax eax; 1595 union cpuid10_eax eax;
1596 union cpuid10_ebx ebx;
1559 unsigned int unused; 1597 unsigned int unused;
1560 unsigned int ebx;
1561 int version; 1598 int version;
1562 1599
1563 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { 1600 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
@@ -1574,8 +1611,8 @@ __init int intel_pmu_init(void)
1574 * Check whether the Architectural PerfMon supports 1611 * Check whether the Architectural PerfMon supports
1575 * Branch Misses Retired hw_event or not. 1612 * Branch Misses Retired hw_event or not.
1576 */ 1613 */
1577 cpuid(10, &eax.full, &ebx, &unused, &edx.full); 1614 cpuid(10, &eax.full, &ebx.full, &unused, &edx.full);
1578 if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) 1615 if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT)
1579 return -ENODEV; 1616 return -ENODEV;
1580 1617
1581 version = eax.split.version_id; 1618 version = eax.split.version_id;
@@ -1589,6 +1626,9 @@ __init int intel_pmu_init(void)
1589 x86_pmu.cntval_bits = eax.split.bit_width; 1626 x86_pmu.cntval_bits = eax.split.bit_width;
1590 x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1; 1627 x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1;
1591 1628
1629 x86_pmu.events_maskl = ebx.full;
1630 x86_pmu.events_mask_len = eax.split.mask_length;
1631
1592 /* 1632 /*
1593 * Quirk: v2 perfmon does not report fixed-purpose events, so 1633 * Quirk: v2 perfmon does not report fixed-purpose events, so
1594 * assume at least 3 events: 1634 * assume at least 3 events:
@@ -1608,6 +1648,8 @@ __init int intel_pmu_init(void)
1608 1648
1609 intel_ds_init(); 1649 intel_ds_init();
1610 1650
1651 x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */
1652
1611 /* 1653 /*
1612 * Install the hw-cache-events table: 1654 * Install the hw-cache-events table:
1613 */ 1655 */
@@ -1617,7 +1659,7 @@ __init int intel_pmu_init(void)
1617 break; 1659 break;
1618 1660
1619 case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ 1661 case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
1620 x86_pmu.quirks = intel_clovertown_quirks; 1662 x86_add_quirk(intel_clovertown_quirk);
1621 case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ 1663 case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
1622 case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ 1664 case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
1623 case 29: /* six-core 45 nm xeon "Dunnington" */ 1665 case 29: /* six-core 45 nm xeon "Dunnington" */
@@ -1651,17 +1693,8 @@ __init int intel_pmu_init(void)
1651 /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ 1693 /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
1652 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1; 1694 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1;
1653 1695
1654 if (ebx & 0x40) { 1696 x86_add_quirk(intel_nehalem_quirk);
1655 /*
1656 * Erratum AAJ80 detected, we work it around by using
1657 * the BR_MISP_EXEC.ANY event. This will over-count
1658 * branch-misses, but it's still much better than the
1659 * architectural event which is often completely bogus:
1660 */
1661 intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
1662 1697
1663 pr_cont("erratum AAJ80 worked around, ");
1664 }
1665 pr_cont("Nehalem events, "); 1698 pr_cont("Nehalem events, ");
1666 break; 1699 break;
1667 1700
@@ -1701,7 +1734,7 @@ __init int intel_pmu_init(void)
1701 break; 1734 break;
1702 1735
1703 case 42: /* SandyBridge */ 1736 case 42: /* SandyBridge */
1704 x86_pmu.quirks = intel_sandybridge_quirks; 1737 x86_add_quirk(intel_sandybridge_quirk);
1705 case 45: /* SandyBridge, "Romely-EP" */ 1738 case 45: /* SandyBridge, "Romely-EP" */
1706 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, 1739 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
1707 sizeof(hw_cache_event_ids)); 1740 sizeof(hw_cache_event_ids));
@@ -1738,5 +1771,6 @@ __init int intel_pmu_init(void)
1738 break; 1771 break;
1739 } 1772 }
1740 } 1773 }
1774
1741 return 0; 1775 return 0;
1742} 1776}
diff --git a/arch/x86/kernel/cpu/powerflags.c b/arch/x86/kernel/cpu/powerflags.c
index 5abbea297e0c..7b3fe56b1c21 100644
--- a/arch/x86/kernel/cpu/powerflags.c
+++ b/arch/x86/kernel/cpu/powerflags.c
@@ -16,5 +16,6 @@ const char *const x86_power_flags[32] = {
16 "100mhzsteps", 16 "100mhzsteps",
17 "hwpstate", 17 "hwpstate",
18 "", /* tsc invariant mapped to constant_tsc */ 18 "", /* tsc invariant mapped to constant_tsc */
19 /* nothing */ 19 "cpb", /* core performance boost */
20 "eff_freq_ro", /* Readonly aperf/mperf */
20}; 21};
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 14b23140e81f..8022c6681485 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -64,12 +64,10 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
64static int show_cpuinfo(struct seq_file *m, void *v) 64static int show_cpuinfo(struct seq_file *m, void *v)
65{ 65{
66 struct cpuinfo_x86 *c = v; 66 struct cpuinfo_x86 *c = v;
67 unsigned int cpu = 0; 67 unsigned int cpu;
68 int i; 68 int i;
69 69
70#ifdef CONFIG_SMP
71 cpu = c->cpu_index; 70 cpu = c->cpu_index;
72#endif
73 seq_printf(m, "processor\t: %u\n" 71 seq_printf(m, "processor\t: %u\n"
74 "vendor_id\t: %s\n" 72 "vendor_id\t: %s\n"
75 "cpu family\t: %d\n" 73 "cpu family\t: %d\n"
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 212a6a42527c..a524353d93f2 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -177,7 +177,7 @@ static struct notifier_block __refdata cpuid_class_cpu_notifier =
177 .notifier_call = cpuid_class_cpu_callback, 177 .notifier_call = cpuid_class_cpu_callback,
178}; 178};
179 179
180static char *cpuid_devnode(struct device *dev, mode_t *mode) 180static char *cpuid_devnode(struct device *dev, umode_t *mode)
181{ 181{
182 return kasprintf(GFP_KERNEL, "cpu/%u/cpuid", MINOR(dev->devt)); 182 return kasprintf(GFP_KERNEL, "cpu/%u/cpuid", MINOR(dev->devt));
183} 183}
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 3b97a80ce329..c99f9ed013d5 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -116,16 +116,16 @@ void show_registers(struct pt_regs *regs)
116 for (i = 0; i < code_len; i++, ip++) { 116 for (i = 0; i < code_len; i++, ip++) {
117 if (ip < (u8 *)PAGE_OFFSET || 117 if (ip < (u8 *)PAGE_OFFSET ||
118 probe_kernel_address(ip, c)) { 118 probe_kernel_address(ip, c)) {
119 printk(" Bad EIP value."); 119 printk(KERN_CONT " Bad EIP value.");
120 break; 120 break;
121 } 121 }
122 if (ip == (u8 *)regs->ip) 122 if (ip == (u8 *)regs->ip)
123 printk("<%02x> ", c); 123 printk(KERN_CONT "<%02x> ", c);
124 else 124 else
125 printk("%02x ", c); 125 printk(KERN_CONT "%02x ", c);
126 } 126 }
127 } 127 }
128 printk("\n"); 128 printk(KERN_CONT "\n");
129} 129}
130 130
131int is_valid_bugaddr(unsigned long ip) 131int is_valid_bugaddr(unsigned long ip)
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 19853ad8afc5..6d728d9284bd 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -284,16 +284,16 @@ void show_registers(struct pt_regs *regs)
284 for (i = 0; i < code_len; i++, ip++) { 284 for (i = 0; i < code_len; i++, ip++) {
285 if (ip < (u8 *)PAGE_OFFSET || 285 if (ip < (u8 *)PAGE_OFFSET ||
286 probe_kernel_address(ip, c)) { 286 probe_kernel_address(ip, c)) {
287 printk(" Bad RIP value."); 287 printk(KERN_CONT " Bad RIP value.");
288 break; 288 break;
289 } 289 }
290 if (ip == (u8 *)regs->ip) 290 if (ip == (u8 *)regs->ip)
291 printk("<%02x> ", c); 291 printk(KERN_CONT "<%02x> ", c);
292 else 292 else
293 printk("%02x ", c); 293 printk(KERN_CONT "%02x ", c);
294 } 294 }
295 } 295 }
296 printk("\n"); 296 printk(KERN_CONT "\n");
297} 297}
298 298
299int is_valid_bugaddr(unsigned long ip) 299int is_valid_bugaddr(unsigned long ip)
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 65ffd110a81b..8071e2f3d6eb 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -135,7 +135,6 @@ static void __init e820_print_type(u32 type)
135 printk(KERN_CONT "(usable)"); 135 printk(KERN_CONT "(usable)");
136 break; 136 break;
137 case E820_RESERVED: 137 case E820_RESERVED:
138 case E820_RESERVED_EFI:
139 printk(KERN_CONT "(reserved)"); 138 printk(KERN_CONT "(reserved)");
140 break; 139 break;
141 case E820_ACPI: 140 case E820_ACPI:
@@ -739,35 +738,17 @@ core_initcall(e820_mark_nvs_memory);
739/* 738/*
740 * pre allocated 4k and reserved it in memblock and e820_saved 739 * pre allocated 4k and reserved it in memblock and e820_saved
741 */ 740 */
742u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align) 741u64 __init early_reserve_e820(u64 size, u64 align)
743{ 742{
744 u64 size = 0;
745 u64 addr; 743 u64 addr;
746 u64 start;
747 744
748 for (start = startt; ; start += size) { 745 addr = __memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
749 start = memblock_x86_find_in_range_size(start, &size, align); 746 if (addr) {
750 if (start == MEMBLOCK_ERROR) 747 e820_update_range_saved(addr, size, E820_RAM, E820_RESERVED);
751 return 0; 748 printk(KERN_INFO "update e820_saved for early_reserve_e820\n");
752 if (size >= sizet) 749 update_e820_saved();
753 break;
754 } 750 }
755 751
756#ifdef CONFIG_X86_32
757 if (start >= MAXMEM)
758 return 0;
759 if (start + size > MAXMEM)
760 size = MAXMEM - start;
761#endif
762
763 addr = round_down(start + size - sizet, align);
764 if (addr < start)
765 return 0;
766 memblock_x86_reserve_range(addr, addr + sizet, "new next");
767 e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED);
768 printk(KERN_INFO "update e820_saved for early_reserve_e820\n");
769 update_e820_saved();
770
771 return addr; 752 return addr;
772} 753}
773 754
@@ -784,7 +765,7 @@ u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align)
784/* 765/*
785 * Find the highest page frame number we have available 766 * Find the highest page frame number we have available
786 */ 767 */
787unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type) 768static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)
788{ 769{
789 int i; 770 int i;
790 unsigned long last_pfn = 0; 771 unsigned long last_pfn = 0;
@@ -1091,7 +1072,7 @@ void __init memblock_x86_fill(void)
1091 * We are safe to enable resizing, beause memblock_x86_fill() 1072 * We are safe to enable resizing, beause memblock_x86_fill()
1092 * is rather later for x86 1073 * is rather later for x86
1093 */ 1074 */
1094 memblock_can_resize = 1; 1075 memblock_allow_resize();
1095 1076
1096 for (i = 0; i < e820.nr_map; i++) { 1077 for (i = 0; i < e820.nr_map; i++) {
1097 struct e820entry *ei = &e820.map[i]; 1078 struct e820entry *ei = &e820.map[i];
@@ -1106,22 +1087,36 @@ void __init memblock_x86_fill(void)
1106 memblock_add(ei->addr, ei->size); 1087 memblock_add(ei->addr, ei->size);
1107 } 1088 }
1108 1089
1109 memblock_analyze();
1110 memblock_dump_all(); 1090 memblock_dump_all();
1111} 1091}
1112 1092
1113void __init memblock_find_dma_reserve(void) 1093void __init memblock_find_dma_reserve(void)
1114{ 1094{
1115#ifdef CONFIG_X86_64 1095#ifdef CONFIG_X86_64
1116 u64 free_size_pfn; 1096 u64 nr_pages = 0, nr_free_pages = 0;
1117 u64 mem_size_pfn; 1097 unsigned long start_pfn, end_pfn;
1098 phys_addr_t start, end;
1099 int i;
1100 u64 u;
1101
1118 /* 1102 /*
1119 * need to find out used area below MAX_DMA_PFN 1103 * need to find out used area below MAX_DMA_PFN
1120 * need to use memblock to get free size in [0, MAX_DMA_PFN] 1104 * need to use memblock to get free size in [0, MAX_DMA_PFN]
1121 * at first, and assume boot_mem will not take below MAX_DMA_PFN 1105 * at first, and assume boot_mem will not take below MAX_DMA_PFN
1122 */ 1106 */
1123 mem_size_pfn = memblock_x86_memory_in_range(0, MAX_DMA_PFN << PAGE_SHIFT) >> PAGE_SHIFT; 1107 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) {
1124 free_size_pfn = memblock_x86_free_memory_in_range(0, MAX_DMA_PFN << PAGE_SHIFT) >> PAGE_SHIFT; 1108 start_pfn = min_t(unsigned long, start_pfn, MAX_DMA_PFN);
1125 set_dma_reserve(mem_size_pfn - free_size_pfn); 1109 end_pfn = min_t(unsigned long, end_pfn, MAX_DMA_PFN);
1110 nr_pages += end_pfn - start_pfn;
1111 }
1112
1113 for_each_free_mem_range(u, MAX_NUMNODES, &start, &end, NULL) {
1114 start_pfn = min_t(unsigned long, PFN_UP(start), MAX_DMA_PFN);
1115 end_pfn = min_t(unsigned long, PFN_DOWN(end), MAX_DMA_PFN);
1116 if (start_pfn < end_pfn)
1117 nr_free_pages += end_pfn - start_pfn;
1118 }
1119
1120 set_dma_reserve(nr_pages - nr_free_pages);
1126#endif 1121#endif
1127} 1122}
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index cd28a350f7f9..9d42a52d2331 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -247,7 +247,7 @@ static int __init setup_early_printk(char *buf)
247 } 247 }
248 248
249 if (!strncmp(buf, "hsu", 3)) { 249 if (!strncmp(buf, "hsu", 3)) {
250 hsu_early_console_init(); 250 hsu_early_console_init(buf + 3);
251 early_console_register(&early_hsu_console, keep); 251 early_console_register(&early_hsu_console, keep);
252 } 252 }
253#endif 253#endif
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index f3f6f5344001..22d0e21b4dd7 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -625,6 +625,8 @@ work_notifysig: # deal with pending signals and
625 movl %esp, %eax 625 movl %esp, %eax
626 jne work_notifysig_v86 # returning to kernel-space or 626 jne work_notifysig_v86 # returning to kernel-space or
627 # vm86-space 627 # vm86-space
628 TRACE_IRQS_ON
629 ENABLE_INTERRUPTS(CLBR_NONE)
628 xorl %edx, %edx 630 xorl %edx, %edx
629 call do_notify_resume 631 call do_notify_resume
630 jmp resume_userspace_sig 632 jmp resume_userspace_sig
@@ -638,6 +640,8 @@ work_notifysig_v86:
638#else 640#else
639 movl %esp, %eax 641 movl %esp, %eax
640#endif 642#endif
643 TRACE_IRQS_ON
644 ENABLE_INTERRUPTS(CLBR_NONE)
641 xorl %edx, %edx 645 xorl %edx, %edx
642 call do_notify_resume 646 call do_notify_resume
643 jmp resume_userspace_sig 647 jmp resume_userspace_sig
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index faf8d5e74b0b..a20e1cb9dc87 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -221,7 +221,7 @@ ENDPROC(native_usergs_sysret64)
221 /*CFI_REL_OFFSET ss,0*/ 221 /*CFI_REL_OFFSET ss,0*/
222 pushq_cfi %rax /* rsp */ 222 pushq_cfi %rax /* rsp */
223 CFI_REL_OFFSET rsp,0 223 CFI_REL_OFFSET rsp,0
224 pushq_cfi $X86_EFLAGS_IF /* eflags - interrupts on */ 224 pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_BIT1) /* eflags - interrupts on */
225 /*CFI_REL_OFFSET rflags,0*/ 225 /*CFI_REL_OFFSET rflags,0*/
226 pushq_cfi $__KERNEL_CS /* cs */ 226 pushq_cfi $__KERNEL_CS /* cs */
227 /*CFI_REL_OFFSET cs,0*/ 227 /*CFI_REL_OFFSET cs,0*/
@@ -411,7 +411,7 @@ ENTRY(ret_from_fork)
411 RESTORE_REST 411 RESTORE_REST
412 412
413 testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread? 413 testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread?
414 je int_ret_from_sys_call 414 jz retint_restore_args
415 415
416 testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET 416 testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET
417 jnz int_ret_from_sys_call 417 jnz int_ret_from_sys_call
@@ -465,7 +465,7 @@ ENTRY(system_call)
465 * after the swapgs, so that it can do the swapgs 465 * after the swapgs, so that it can do the swapgs
466 * for the guest and jump here on syscall. 466 * for the guest and jump here on syscall.
467 */ 467 */
468ENTRY(system_call_after_swapgs) 468GLOBAL(system_call_after_swapgs)
469 469
470 movq %rsp,PER_CPU_VAR(old_rsp) 470 movq %rsp,PER_CPU_VAR(old_rsp)
471 movq PER_CPU_VAR(kernel_stack),%rsp 471 movq PER_CPU_VAR(kernel_stack),%rsp
@@ -478,8 +478,7 @@ ENTRY(system_call_after_swapgs)
478 movq %rax,ORIG_RAX-ARGOFFSET(%rsp) 478 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
479 movq %rcx,RIP-ARGOFFSET(%rsp) 479 movq %rcx,RIP-ARGOFFSET(%rsp)
480 CFI_REL_OFFSET rip,RIP-ARGOFFSET 480 CFI_REL_OFFSET rip,RIP-ARGOFFSET
481 GET_THREAD_INFO(%rcx) 481 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
482 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
483 jnz tracesys 482 jnz tracesys
484system_call_fastpath: 483system_call_fastpath:
485 cmpq $__NR_syscall_max,%rax 484 cmpq $__NR_syscall_max,%rax
@@ -496,10 +495,9 @@ ret_from_sys_call:
496 /* edi: flagmask */ 495 /* edi: flagmask */
497sysret_check: 496sysret_check:
498 LOCKDEP_SYS_EXIT 497 LOCKDEP_SYS_EXIT
499 GET_THREAD_INFO(%rcx)
500 DISABLE_INTERRUPTS(CLBR_NONE) 498 DISABLE_INTERRUPTS(CLBR_NONE)
501 TRACE_IRQS_OFF 499 TRACE_IRQS_OFF
502 movl TI_flags(%rcx),%edx 500 movl TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET),%edx
503 andl %edi,%edx 501 andl %edi,%edx
504 jnz sysret_careful 502 jnz sysret_careful
505 CFI_REMEMBER_STATE 503 CFI_REMEMBER_STATE
@@ -583,7 +581,7 @@ sysret_audit:
583 /* Do syscall tracing */ 581 /* Do syscall tracing */
584tracesys: 582tracesys:
585#ifdef CONFIG_AUDITSYSCALL 583#ifdef CONFIG_AUDITSYSCALL
586 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx) 584 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
587 jz auditsys 585 jz auditsys
588#endif 586#endif
589 SAVE_REST 587 SAVE_REST
@@ -612,8 +610,6 @@ tracesys:
612GLOBAL(int_ret_from_sys_call) 610GLOBAL(int_ret_from_sys_call)
613 DISABLE_INTERRUPTS(CLBR_NONE) 611 DISABLE_INTERRUPTS(CLBR_NONE)
614 TRACE_IRQS_OFF 612 TRACE_IRQS_OFF
615 testl $3,CS-ARGOFFSET(%rsp)
616 je retint_restore_args
617 movl $_TIF_ALLWORK_MASK,%edi 613 movl $_TIF_ALLWORK_MASK,%edi
618 /* edi: mask to check */ 614 /* edi: mask to check */
619GLOBAL(int_with_check) 615GLOBAL(int_with_check)
@@ -953,6 +949,7 @@ END(common_interrupt)
953ENTRY(\sym) 949ENTRY(\sym)
954 INTR_FRAME 950 INTR_FRAME
955 pushq_cfi $~(\num) 951 pushq_cfi $~(\num)
952.Lcommon_\sym:
956 interrupt \do_sym 953 interrupt \do_sym
957 jmp ret_from_intr 954 jmp ret_from_intr
958 CFI_ENDPROC 955 CFI_ENDPROC
@@ -976,13 +973,21 @@ apicinterrupt X86_PLATFORM_IPI_VECTOR \
976 x86_platform_ipi smp_x86_platform_ipi 973 x86_platform_ipi smp_x86_platform_ipi
977 974
978#ifdef CONFIG_SMP 975#ifdef CONFIG_SMP
979.irp idx,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \ 976 ALIGN
977 INTR_FRAME
978.irp idx,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \
980 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 979 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
981.if NUM_INVALIDATE_TLB_VECTORS > \idx 980.if NUM_INVALIDATE_TLB_VECTORS > \idx
982apicinterrupt (INVALIDATE_TLB_VECTOR_START)+\idx \ 981ENTRY(invalidate_interrupt\idx)
983 invalidate_interrupt\idx smp_invalidate_interrupt 982 pushq_cfi $~(INVALIDATE_TLB_VECTOR_START+\idx)
983 jmp .Lcommon_invalidate_interrupt0
984 CFI_ADJUST_CFA_OFFSET -8
985END(invalidate_interrupt\idx)
984.endif 986.endif
985.endr 987.endr
988 CFI_ENDPROC
989apicinterrupt INVALIDATE_TLB_VECTOR_START, \
990 invalidate_interrupt0, smp_invalidate_interrupt
986#endif 991#endif
987 992
988apicinterrupt THRESHOLD_APIC_VECTOR \ 993apicinterrupt THRESHOLD_APIC_VECTOR \
diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c
index af0699ba48cf..48d9d4ea1020 100644
--- a/arch/x86/kernel/head.c
+++ b/arch/x86/kernel/head.c
@@ -52,5 +52,5 @@ void __init reserve_ebda_region(void)
52 lowmem = 0x9f000; 52 lowmem = 0x9f000;
53 53
54 /* reserve all memory between lowmem and the 1MB mark */ 54 /* reserve all memory between lowmem and the 1MB mark */
55 memblock_x86_reserve_range(lowmem, 0x100000, "* BIOS reserved"); 55 memblock_reserve(lowmem, 0x100000 - lowmem);
56} 56}
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 3bb08509a7a1..51ff18616d50 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -31,9 +31,8 @@ static void __init i386_default_early_setup(void)
31 31
32void __init i386_start_kernel(void) 32void __init i386_start_kernel(void)
33{ 33{
34 memblock_init(); 34 memblock_reserve(__pa_symbol(&_text),
35 35 __pa_symbol(&__bss_stop) - __pa_symbol(&_text));
36 memblock_x86_reserve_range(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
37 36
38#ifdef CONFIG_BLK_DEV_INITRD 37#ifdef CONFIG_BLK_DEV_INITRD
39 /* Reserve INITRD */ 38 /* Reserve INITRD */
@@ -42,7 +41,7 @@ void __init i386_start_kernel(void)
42 u64 ramdisk_image = boot_params.hdr.ramdisk_image; 41 u64 ramdisk_image = boot_params.hdr.ramdisk_image;
43 u64 ramdisk_size = boot_params.hdr.ramdisk_size; 42 u64 ramdisk_size = boot_params.hdr.ramdisk_size;
44 u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); 43 u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
45 memblock_x86_reserve_range(ramdisk_image, ramdisk_end, "RAMDISK"); 44 memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image);
46 } 45 }
47#endif 46#endif
48 47
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 5655c2272adb..3a3b779f41d3 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -98,9 +98,8 @@ void __init x86_64_start_reservations(char *real_mode_data)
98{ 98{
99 copy_bootdata(__va(real_mode_data)); 99 copy_bootdata(__va(real_mode_data));
100 100
101 memblock_init(); 101 memblock_reserve(__pa_symbol(&_text),
102 102 __pa_symbol(&__bss_stop) - __pa_symbol(&_text));
103 memblock_x86_reserve_range(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
104 103
105#ifdef CONFIG_BLK_DEV_INITRD 104#ifdef CONFIG_BLK_DEV_INITRD
106 /* Reserve INITRD */ 105 /* Reserve INITRD */
@@ -109,7 +108,7 @@ void __init x86_64_start_reservations(char *real_mode_data)
109 unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; 108 unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
110 unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; 109 unsigned long ramdisk_size = boot_params.hdr.ramdisk_size;
111 unsigned long ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); 110 unsigned long ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
112 memblock_x86_reserve_range(ramdisk_image, ramdisk_end, "RAMDISK"); 111 memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image);
113 } 112 }
114#endif 113#endif
115 114
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 1bb0bf4d92cd..ad0de0c2714e 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -2,7 +2,6 @@
2#include <linux/clockchips.h> 2#include <linux/clockchips.h>
3#include <linux/interrupt.h> 3#include <linux/interrupt.h>
4#include <linux/export.h> 4#include <linux/export.h>
5#include <linux/sysdev.h>
6#include <linux/delay.h> 5#include <linux/delay.h>
7#include <linux/errno.h> 6#include <linux/errno.h>
8#include <linux/i8253.h> 7#include <linux/i8253.h>
@@ -32,8 +31,6 @@
32#define HPET_MIN_CYCLES 128 31#define HPET_MIN_CYCLES 128
33#define HPET_MIN_PROG_DELTA (HPET_MIN_CYCLES + (HPET_MIN_CYCLES >> 1)) 32#define HPET_MIN_PROG_DELTA (HPET_MIN_CYCLES + (HPET_MIN_CYCLES >> 1))
34 33
35#define EVT_TO_HPET_DEV(evt) container_of(evt, struct hpet_dev, evt)
36
37/* 34/*
38 * HPET address is set in acpi/boot.c, when an ACPI entry exists 35 * HPET address is set in acpi/boot.c, when an ACPI entry exists
39 */ 36 */
@@ -55,6 +52,11 @@ struct hpet_dev {
55 char name[10]; 52 char name[10];
56}; 53};
57 54
55inline struct hpet_dev *EVT_TO_HPET_DEV(struct clock_event_device *evtdev)
56{
57 return container_of(evtdev, struct hpet_dev, evt);
58}
59
58inline unsigned int hpet_readl(unsigned int a) 60inline unsigned int hpet_readl(unsigned int a)
59{ 61{
60 return readl(hpet_virt_address + a); 62 return readl(hpet_virt_address + a);
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 429e0c92924e..7943e0c21bde 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -74,6 +74,10 @@ int arch_show_interrupts(struct seq_file *p, int prec)
74 for_each_online_cpu(j) 74 for_each_online_cpu(j)
75 seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs); 75 seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs);
76 seq_printf(p, " IRQ work interrupts\n"); 76 seq_printf(p, " IRQ work interrupts\n");
77 seq_printf(p, "%*s: ", prec, "RTR");
78 for_each_online_cpu(j)
79 seq_printf(p, "%10u ", irq_stats(j)->icr_read_retry_count);
80 seq_printf(p, " APIC ICR read retries\n");
77#endif 81#endif
78 if (x86_platform_ipi_callback) { 82 if (x86_platform_ipi_callback) {
79 seq_printf(p, "%*s: ", prec, "PLT"); 83 seq_printf(p, "%*s: ", prec, "PLT");
@@ -136,6 +140,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
136 sum += irq_stats(cpu)->irq_spurious_count; 140 sum += irq_stats(cpu)->irq_spurious_count;
137 sum += irq_stats(cpu)->apic_perf_irqs; 141 sum += irq_stats(cpu)->apic_perf_irqs;
138 sum += irq_stats(cpu)->apic_irq_work_irqs; 142 sum += irq_stats(cpu)->apic_irq_work_irqs;
143 sum += irq_stats(cpu)->icr_read_retry_count;
139#endif 144#endif
140 if (x86_platform_ipi_callback) 145 if (x86_platform_ipi_callback)
141 sum += irq_stats(cpu)->x86_platform_ipis; 146 sum += irq_stats(cpu)->x86_platform_ipis;
@@ -181,8 +186,8 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
181 unsigned vector = ~regs->orig_ax; 186 unsigned vector = ~regs->orig_ax;
182 unsigned irq; 187 unsigned irq;
183 188
184 exit_idle();
185 irq_enter(); 189 irq_enter();
190 exit_idle();
186 191
187 irq = __this_cpu_read(vector_irq[vector]); 192 irq = __this_cpu_read(vector_irq[vector]);
188 193
@@ -209,10 +214,10 @@ void smp_x86_platform_ipi(struct pt_regs *regs)
209 214
210 ack_APIC_irq(); 215 ack_APIC_irq();
211 216
212 exit_idle();
213
214 irq_enter(); 217 irq_enter();
215 218
219 exit_idle();
220
216 inc_irq_stat(x86_platform_ipis); 221 inc_irq_stat(x86_platform_ipis);
217 222
218 if (x86_platform_ipi_callback) 223 if (x86_platform_ipi_callback)
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index b3300e6bacef..313fb5cddbce 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -9,7 +9,7 @@
9#include <linux/kprobes.h> 9#include <linux/kprobes.h>
10#include <linux/init.h> 10#include <linux/init.h>
11#include <linux/kernel_stat.h> 11#include <linux/kernel_stat.h>
12#include <linux/sysdev.h> 12#include <linux/device.h>
13#include <linux/bitops.h> 13#include <linux/bitops.h>
14#include <linux/acpi.h> 14#include <linux/acpi.h>
15#include <linux/io.h> 15#include <linux/io.h>
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index ea9d5f2f13ef..2889b3d43882 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -50,7 +50,7 @@ void arch_jump_label_transform(struct jump_entry *entry,
50 put_online_cpus(); 50 put_online_cpus();
51} 51}
52 52
53void arch_jump_label_transform_static(struct jump_entry *entry, 53__init_or_module void arch_jump_label_transform_static(struct jump_entry *entry,
54 enum jump_label_type type) 54 enum jump_label_type type)
55{ 55{
56 __jump_label_transform(entry, type, text_poke_early); 56 __jump_label_transform(entry, type, text_poke_early);
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index d494799aafcd..fe86493f3ed1 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -1,14 +1,18 @@
1/* 1/*
2 * AMD CPU Microcode Update Driver for Linux 2 * AMD CPU Microcode Update Driver for Linux
3 * Copyright (C) 2008 Advanced Micro Devices Inc. 3 * Copyright (C) 2008-2011 Advanced Micro Devices Inc.
4 * 4 *
5 * Author: Peter Oruba <peter.oruba@amd.com> 5 * Author: Peter Oruba <peter.oruba@amd.com>
6 * 6 *
7 * Based on work by: 7 * Based on work by:
8 * Tigran Aivazian <tigran@aivazian.fsnet.co.uk> 8 * Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
9 * 9 *
10 * This driver allows to upgrade microcode on AMD 10 * Maintainers:
11 * family 0x10 and 0x11 processors. 11 * Andreas Herrmann <andreas.herrmann3@amd.com>
12 * Borislav Petkov <borislav.petkov@amd.com>
13 *
14 * This driver allows to upgrade microcode on F10h AMD
15 * CPUs and later.
12 * 16 *
13 * Licensed under the terms of the GNU General Public 17 * Licensed under the terms of the GNU General Public
14 * License version 2. See file COPYING for details. 18 * License version 2. See file COPYING for details.
@@ -71,6 +75,9 @@ struct microcode_amd {
71 75
72static struct equiv_cpu_entry *equiv_cpu_table; 76static struct equiv_cpu_entry *equiv_cpu_table;
73 77
78/* page-sized ucode patch buffer */
79void *patch;
80
74static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) 81static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
75{ 82{
76 struct cpuinfo_x86 *c = &cpu_data(cpu); 83 struct cpuinfo_x86 *c = &cpu_data(cpu);
@@ -86,27 +93,76 @@ static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
86 return 0; 93 return 0;
87} 94}
88 95
89static int get_matching_microcode(int cpu, struct microcode_header_amd *mc_hdr, 96static unsigned int verify_ucode_size(int cpu, u32 patch_size,
90 int rev) 97 unsigned int size)
91{ 98{
92 unsigned int current_cpu_id; 99 struct cpuinfo_x86 *c = &cpu_data(cpu);
93 u16 equiv_cpu_id = 0; 100 u32 max_size;
94 unsigned int i = 0; 101
102#define F1XH_MPB_MAX_SIZE 2048
103#define F14H_MPB_MAX_SIZE 1824
104#define F15H_MPB_MAX_SIZE 4096
105
106 switch (c->x86) {
107 case 0x14:
108 max_size = F14H_MPB_MAX_SIZE;
109 break;
110 case 0x15:
111 max_size = F15H_MPB_MAX_SIZE;
112 break;
113 default:
114 max_size = F1XH_MPB_MAX_SIZE;
115 break;
116 }
117
118 if (patch_size > min_t(u32, size, max_size)) {
119 pr_err("patch size mismatch\n");
120 return 0;
121 }
122
123 return patch_size;
124}
125
126static u16 find_equiv_id(void)
127{
128 unsigned int current_cpu_id, i = 0;
95 129
96 BUG_ON(equiv_cpu_table == NULL); 130 BUG_ON(equiv_cpu_table == NULL);
131
97 current_cpu_id = cpuid_eax(0x00000001); 132 current_cpu_id = cpuid_eax(0x00000001);
98 133
99 while (equiv_cpu_table[i].installed_cpu != 0) { 134 while (equiv_cpu_table[i].installed_cpu != 0) {
100 if (current_cpu_id == equiv_cpu_table[i].installed_cpu) { 135 if (current_cpu_id == equiv_cpu_table[i].installed_cpu)
101 equiv_cpu_id = equiv_cpu_table[i].equiv_cpu; 136 return equiv_cpu_table[i].equiv_cpu;
102 break; 137
103 }
104 i++; 138 i++;
105 } 139 }
140 return 0;
141}
106 142
143/*
144 * we signal a good patch is found by returning its size > 0
145 */
146static int get_matching_microcode(int cpu, const u8 *ucode_ptr,
147 unsigned int leftover_size, int rev,
148 unsigned int *current_size)
149{
150 struct microcode_header_amd *mc_hdr;
151 unsigned int actual_size;
152 u16 equiv_cpu_id;
153
154 /* size of the current patch we're staring at */
155 *current_size = *(u32 *)(ucode_ptr + 4) + SECTION_HDR_SIZE;
156
157 equiv_cpu_id = find_equiv_id();
107 if (!equiv_cpu_id) 158 if (!equiv_cpu_id)
108 return 0; 159 return 0;
109 160
161 /*
162 * let's look at the patch header itself now
163 */
164 mc_hdr = (struct microcode_header_amd *)(ucode_ptr + SECTION_HDR_SIZE);
165
110 if (mc_hdr->processor_rev_id != equiv_cpu_id) 166 if (mc_hdr->processor_rev_id != equiv_cpu_id)
111 return 0; 167 return 0;
112 168
@@ -120,7 +176,20 @@ static int get_matching_microcode(int cpu, struct microcode_header_amd *mc_hdr,
120 if (mc_hdr->patch_id <= rev) 176 if (mc_hdr->patch_id <= rev)
121 return 0; 177 return 0;
122 178
123 return 1; 179 /*
180 * now that the header looks sane, verify its size
181 */
182 actual_size = verify_ucode_size(cpu, *current_size, leftover_size);
183 if (!actual_size)
184 return 0;
185
186 /* clear the patch buffer */
187 memset(patch, 0, PAGE_SIZE);
188
189 /* all looks ok, get the binary patch */
190 get_ucode_data(patch, ucode_ptr + SECTION_HDR_SIZE, actual_size);
191
192 return actual_size;
124} 193}
125 194
126static int apply_microcode_amd(int cpu) 195static int apply_microcode_amd(int cpu)
@@ -155,63 +224,6 @@ static int apply_microcode_amd(int cpu)
155 return 0; 224 return 0;
156} 225}
157 226
158static unsigned int verify_ucode_size(int cpu, const u8 *buf, unsigned int size)
159{
160 struct cpuinfo_x86 *c = &cpu_data(cpu);
161 u32 max_size, actual_size;
162
163#define F1XH_MPB_MAX_SIZE 2048
164#define F14H_MPB_MAX_SIZE 1824
165#define F15H_MPB_MAX_SIZE 4096
166
167 switch (c->x86) {
168 case 0x14:
169 max_size = F14H_MPB_MAX_SIZE;
170 break;
171 case 0x15:
172 max_size = F15H_MPB_MAX_SIZE;
173 break;
174 default:
175 max_size = F1XH_MPB_MAX_SIZE;
176 break;
177 }
178
179 actual_size = *(u32 *)(buf + 4);
180
181 if (actual_size + SECTION_HDR_SIZE > size || actual_size > max_size) {
182 pr_err("section size mismatch\n");
183 return 0;
184 }
185
186 return actual_size;
187}
188
189static struct microcode_header_amd *
190get_next_ucode(int cpu, const u8 *buf, unsigned int size, unsigned int *mc_size)
191{
192 struct microcode_header_amd *mc = NULL;
193 unsigned int actual_size = 0;
194
195 if (*(u32 *)buf != UCODE_UCODE_TYPE) {
196 pr_err("invalid type field in container file section header\n");
197 goto out;
198 }
199
200 actual_size = verify_ucode_size(cpu, buf, size);
201 if (!actual_size)
202 goto out;
203
204 mc = vzalloc(actual_size);
205 if (!mc)
206 goto out;
207
208 get_ucode_data(mc, buf + SECTION_HDR_SIZE, actual_size);
209 *mc_size = actual_size + SECTION_HDR_SIZE;
210
211out:
212 return mc;
213}
214
215static int install_equiv_cpu_table(const u8 *buf) 227static int install_equiv_cpu_table(const u8 *buf)
216{ 228{
217 unsigned int *ibuf = (unsigned int *)buf; 229 unsigned int *ibuf = (unsigned int *)buf;
@@ -247,36 +259,38 @@ generic_load_microcode(int cpu, const u8 *data, size_t size)
247{ 259{
248 struct ucode_cpu_info *uci = ucode_cpu_info + cpu; 260 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
249 struct microcode_header_amd *mc_hdr = NULL; 261 struct microcode_header_amd *mc_hdr = NULL;
250 unsigned int mc_size, leftover; 262 unsigned int mc_size, leftover, current_size = 0;
251 int offset; 263 int offset;
252 const u8 *ucode_ptr = data; 264 const u8 *ucode_ptr = data;
253 void *new_mc = NULL; 265 void *new_mc = NULL;
254 unsigned int new_rev = uci->cpu_sig.rev; 266 unsigned int new_rev = uci->cpu_sig.rev;
255 enum ucode_state state = UCODE_OK; 267 enum ucode_state state = UCODE_ERROR;
256 268
257 offset = install_equiv_cpu_table(ucode_ptr); 269 offset = install_equiv_cpu_table(ucode_ptr);
258 if (offset < 0) { 270 if (offset < 0) {
259 pr_err("failed to create equivalent cpu table\n"); 271 pr_err("failed to create equivalent cpu table\n");
260 return UCODE_ERROR; 272 goto out;
261 } 273 }
262
263 ucode_ptr += offset; 274 ucode_ptr += offset;
264 leftover = size - offset; 275 leftover = size - offset;
265 276
266 while (leftover) { 277 if (*(u32 *)ucode_ptr != UCODE_UCODE_TYPE) {
267 mc_hdr = get_next_ucode(cpu, ucode_ptr, leftover, &mc_size); 278 pr_err("invalid type field in container file section header\n");
268 if (!mc_hdr) 279 goto free_table;
269 break; 280 }
270 281
271 if (get_matching_microcode(cpu, mc_hdr, new_rev)) { 282 while (leftover) {
272 vfree(new_mc); 283 mc_size = get_matching_microcode(cpu, ucode_ptr, leftover,
284 new_rev, &current_size);
285 if (mc_size) {
286 mc_hdr = patch;
287 new_mc = patch;
273 new_rev = mc_hdr->patch_id; 288 new_rev = mc_hdr->patch_id;
274 new_mc = mc_hdr; 289 goto out_ok;
275 } else 290 }
276 vfree(mc_hdr);
277 291
278 ucode_ptr += mc_size; 292 ucode_ptr += current_size;
279 leftover -= mc_size; 293 leftover -= current_size;
280 } 294 }
281 295
282 if (!new_mc) { 296 if (!new_mc) {
@@ -284,19 +298,16 @@ generic_load_microcode(int cpu, const u8 *data, size_t size)
284 goto free_table; 298 goto free_table;
285 } 299 }
286 300
287 if (!leftover) { 301out_ok:
288 vfree(uci->mc); 302 uci->mc = new_mc;
289 uci->mc = new_mc; 303 state = UCODE_OK;
290 pr_debug("CPU%d update ucode (0x%08x -> 0x%08x)\n", 304 pr_debug("CPU%d update ucode (0x%08x -> 0x%08x)\n",
291 cpu, uci->cpu_sig.rev, new_rev); 305 cpu, uci->cpu_sig.rev, new_rev);
292 } else {
293 vfree(new_mc);
294 state = UCODE_ERROR;
295 }
296 306
297free_table: 307free_table:
298 free_equiv_cpu_table(); 308 free_equiv_cpu_table();
299 309
310out:
300 return state; 311 return state;
301} 312}
302 313
@@ -337,7 +348,6 @@ static void microcode_fini_cpu_amd(int cpu)
337{ 348{
338 struct ucode_cpu_info *uci = ucode_cpu_info + cpu; 349 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
339 350
340 vfree(uci->mc);
341 uci->mc = NULL; 351 uci->mc = NULL;
342} 352}
343 353
@@ -351,5 +361,14 @@ static struct microcode_ops microcode_amd_ops = {
351 361
352struct microcode_ops * __init init_amd_microcode(void) 362struct microcode_ops * __init init_amd_microcode(void)
353{ 363{
364 patch = (void *)get_zeroed_page(GFP_KERNEL);
365 if (!patch)
366 return NULL;
367
354 return &microcode_amd_ops; 368 return &microcode_amd_ops;
355} 369}
370
371void __exit exit_amd_microcode(void)
372{
373 free_page((unsigned long)patch);
374}
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 9d46f5e43b51..fda91c307104 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -292,8 +292,8 @@ static int reload_for_cpu(int cpu)
292 return err; 292 return err;
293} 293}
294 294
295static ssize_t reload_store(struct sys_device *dev, 295static ssize_t reload_store(struct device *dev,
296 struct sysdev_attribute *attr, 296 struct device_attribute *attr,
297 const char *buf, size_t size) 297 const char *buf, size_t size)
298{ 298{
299 unsigned long val; 299 unsigned long val;
@@ -318,30 +318,30 @@ static ssize_t reload_store(struct sys_device *dev,
318 return ret; 318 return ret;
319} 319}
320 320
321static ssize_t version_show(struct sys_device *dev, 321static ssize_t version_show(struct device *dev,
322 struct sysdev_attribute *attr, char *buf) 322 struct device_attribute *attr, char *buf)
323{ 323{
324 struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; 324 struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
325 325
326 return sprintf(buf, "0x%x\n", uci->cpu_sig.rev); 326 return sprintf(buf, "0x%x\n", uci->cpu_sig.rev);
327} 327}
328 328
329static ssize_t pf_show(struct sys_device *dev, 329static ssize_t pf_show(struct device *dev,
330 struct sysdev_attribute *attr, char *buf) 330 struct device_attribute *attr, char *buf)
331{ 331{
332 struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; 332 struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
333 333
334 return sprintf(buf, "0x%x\n", uci->cpu_sig.pf); 334 return sprintf(buf, "0x%x\n", uci->cpu_sig.pf);
335} 335}
336 336
337static SYSDEV_ATTR(reload, 0200, NULL, reload_store); 337static DEVICE_ATTR(reload, 0200, NULL, reload_store);
338static SYSDEV_ATTR(version, 0400, version_show, NULL); 338static DEVICE_ATTR(version, 0400, version_show, NULL);
339static SYSDEV_ATTR(processor_flags, 0400, pf_show, NULL); 339static DEVICE_ATTR(processor_flags, 0400, pf_show, NULL);
340 340
341static struct attribute *mc_default_attrs[] = { 341static struct attribute *mc_default_attrs[] = {
342 &attr_reload.attr, 342 &dev_attr_reload.attr,
343 &attr_version.attr, 343 &dev_attr_version.attr,
344 &attr_processor_flags.attr, 344 &dev_attr_processor_flags.attr,
345 NULL 345 NULL
346}; 346};
347 347
@@ -405,43 +405,45 @@ static enum ucode_state microcode_update_cpu(int cpu)
405 return ustate; 405 return ustate;
406} 406}
407 407
408static int mc_sysdev_add(struct sys_device *sys_dev) 408static int mc_device_add(struct device *dev, struct subsys_interface *sif)
409{ 409{
410 int err, cpu = sys_dev->id; 410 int err, cpu = dev->id;
411 411
412 if (!cpu_online(cpu)) 412 if (!cpu_online(cpu))
413 return 0; 413 return 0;
414 414
415 pr_debug("CPU%d added\n", cpu); 415 pr_debug("CPU%d added\n", cpu);
416 416
417 err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group); 417 err = sysfs_create_group(&dev->kobj, &mc_attr_group);
418 if (err) 418 if (err)
419 return err; 419 return err;
420 420
421 if (microcode_init_cpu(cpu) == UCODE_ERROR) { 421 if (microcode_init_cpu(cpu) == UCODE_ERROR) {
422 sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); 422 sysfs_remove_group(&dev->kobj, &mc_attr_group);
423 return -EINVAL; 423 return -EINVAL;
424 } 424 }
425 425
426 return err; 426 return err;
427} 427}
428 428
429static int mc_sysdev_remove(struct sys_device *sys_dev) 429static int mc_device_remove(struct device *dev, struct subsys_interface *sif)
430{ 430{
431 int cpu = sys_dev->id; 431 int cpu = dev->id;
432 432
433 if (!cpu_online(cpu)) 433 if (!cpu_online(cpu))
434 return 0; 434 return 0;
435 435
436 pr_debug("CPU%d removed\n", cpu); 436 pr_debug("CPU%d removed\n", cpu);
437 microcode_fini_cpu(cpu); 437 microcode_fini_cpu(cpu);
438 sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); 438 sysfs_remove_group(&dev->kobj, &mc_attr_group);
439 return 0; 439 return 0;
440} 440}
441 441
442static struct sysdev_driver mc_sysdev_driver = { 442static struct subsys_interface mc_cpu_interface = {
443 .add = mc_sysdev_add, 443 .name = "microcode",
444 .remove = mc_sysdev_remove, 444 .subsys = &cpu_subsys,
445 .add_dev = mc_device_add,
446 .remove_dev = mc_device_remove,
445}; 447};
446 448
447/** 449/**
@@ -464,9 +466,9 @@ static __cpuinit int
464mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) 466mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
465{ 467{
466 unsigned int cpu = (unsigned long)hcpu; 468 unsigned int cpu = (unsigned long)hcpu;
467 struct sys_device *sys_dev; 469 struct device *dev;
468 470
469 sys_dev = get_cpu_sysdev(cpu); 471 dev = get_cpu_device(cpu);
470 switch (action) { 472 switch (action) {
471 case CPU_ONLINE: 473 case CPU_ONLINE:
472 case CPU_ONLINE_FROZEN: 474 case CPU_ONLINE_FROZEN:
@@ -474,13 +476,13 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
474 case CPU_DOWN_FAILED: 476 case CPU_DOWN_FAILED:
475 case CPU_DOWN_FAILED_FROZEN: 477 case CPU_DOWN_FAILED_FROZEN:
476 pr_debug("CPU%d added\n", cpu); 478 pr_debug("CPU%d added\n", cpu);
477 if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group)) 479 if (sysfs_create_group(&dev->kobj, &mc_attr_group))
478 pr_err("Failed to create group for CPU%d\n", cpu); 480 pr_err("Failed to create group for CPU%d\n", cpu);
479 break; 481 break;
480 case CPU_DOWN_PREPARE: 482 case CPU_DOWN_PREPARE:
481 case CPU_DOWN_PREPARE_FROZEN: 483 case CPU_DOWN_PREPARE_FROZEN:
482 /* Suspend is in progress, only remove the interface */ 484 /* Suspend is in progress, only remove the interface */
483 sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); 485 sysfs_remove_group(&dev->kobj, &mc_attr_group);
484 pr_debug("CPU%d removed\n", cpu); 486 pr_debug("CPU%d removed\n", cpu);
485 break; 487 break;
486 488
@@ -525,7 +527,7 @@ static int __init microcode_init(void)
525 get_online_cpus(); 527 get_online_cpus();
526 mutex_lock(&microcode_mutex); 528 mutex_lock(&microcode_mutex);
527 529
528 error = sysdev_driver_register(&cpu_sysdev_class, &mc_sysdev_driver); 530 error = subsys_interface_register(&mc_cpu_interface);
529 531
530 mutex_unlock(&microcode_mutex); 532 mutex_unlock(&microcode_mutex);
531 put_online_cpus(); 533 put_online_cpus();
@@ -535,7 +537,7 @@ static int __init microcode_init(void)
535 537
536 error = microcode_dev_init(); 538 error = microcode_dev_init();
537 if (error) 539 if (error)
538 goto out_sysdev_driver; 540 goto out_driver;
539 541
540 register_syscore_ops(&mc_syscore_ops); 542 register_syscore_ops(&mc_syscore_ops);
541 register_hotcpu_notifier(&mc_cpu_notifier); 543 register_hotcpu_notifier(&mc_cpu_notifier);
@@ -545,11 +547,11 @@ static int __init microcode_init(void)
545 547
546 return 0; 548 return 0;
547 549
548out_sysdev_driver: 550out_driver:
549 get_online_cpus(); 551 get_online_cpus();
550 mutex_lock(&microcode_mutex); 552 mutex_lock(&microcode_mutex);
551 553
552 sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver); 554 subsys_interface_unregister(&mc_cpu_interface);
553 555
554 mutex_unlock(&microcode_mutex); 556 mutex_unlock(&microcode_mutex);
555 put_online_cpus(); 557 put_online_cpus();
@@ -563,6 +565,8 @@ module_init(microcode_init);
563 565
564static void __exit microcode_exit(void) 566static void __exit microcode_exit(void)
565{ 567{
568 struct cpuinfo_x86 *c = &cpu_data(0);
569
566 microcode_dev_exit(); 570 microcode_dev_exit();
567 571
568 unregister_hotcpu_notifier(&mc_cpu_notifier); 572 unregister_hotcpu_notifier(&mc_cpu_notifier);
@@ -571,7 +575,7 @@ static void __exit microcode_exit(void)
571 get_online_cpus(); 575 get_online_cpus();
572 mutex_lock(&microcode_mutex); 576 mutex_lock(&microcode_mutex);
573 577
574 sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver); 578 subsys_interface_unregister(&mc_cpu_interface);
575 579
576 mutex_unlock(&microcode_mutex); 580 mutex_unlock(&microcode_mutex);
577 put_online_cpus(); 581 put_online_cpus();
@@ -580,6 +584,9 @@ static void __exit microcode_exit(void)
580 584
581 microcode_ops = NULL; 585 microcode_ops = NULL;
582 586
587 if (c->x86_vendor == X86_VENDOR_AMD)
588 exit_amd_microcode();
589
583 pr_info("Microcode Update Driver: v" MICROCODE_VERSION " removed.\n"); 590 pr_info("Microcode Update Driver: v" MICROCODE_VERSION " removed.\n");
584} 591}
585module_exit(microcode_exit); 592module_exit(microcode_exit);
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 0741b062a304..ca470e4c92dc 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -564,9 +564,7 @@ void __init default_get_smp_config(unsigned int early)
564 564
565static void __init smp_reserve_memory(struct mpf_intel *mpf) 565static void __init smp_reserve_memory(struct mpf_intel *mpf)
566{ 566{
567 unsigned long size = get_mpc_size(mpf->physptr); 567 memblock_reserve(mpf->physptr, get_mpc_size(mpf->physptr));
568
569 memblock_x86_reserve_range(mpf->physptr, mpf->physptr+size, "* MP-table mpc");
570} 568}
571 569
572static int __init smp_scan_config(unsigned long base, unsigned long length) 570static int __init smp_scan_config(unsigned long base, unsigned long length)
@@ -595,7 +593,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length)
595 mpf, (u64)virt_to_phys(mpf)); 593 mpf, (u64)virt_to_phys(mpf));
596 594
597 mem = virt_to_phys(mpf); 595 mem = virt_to_phys(mpf);
598 memblock_x86_reserve_range(mem, mem + sizeof(*mpf), "* MP-table mpf"); 596 memblock_reserve(mem, sizeof(*mpf));
599 if (mpf->physptr) 597 if (mpf->physptr)
600 smp_reserve_memory(mpf); 598 smp_reserve_memory(mpf);
601 599
@@ -836,10 +834,8 @@ early_param("alloc_mptable", parse_alloc_mptable_opt);
836 834
837void __init early_reserve_e820_mpc_new(void) 835void __init early_reserve_e820_mpc_new(void)
838{ 836{
839 if (enable_update_mptable && alloc_mptable) { 837 if (enable_update_mptable && alloc_mptable)
840 u64 startt = 0; 838 mpc_new_phys = early_reserve_e820(mpc_new_length, 4);
841 mpc_new_phys = early_reserve_e820(startt, mpc_new_length, 4);
842 }
843} 839}
844 840
845static int __init update_mp_table(void) 841static int __init update_mp_table(void)
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 12fcbe2c143e..96356762a51d 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -236,7 +236,7 @@ static struct notifier_block __refdata msr_class_cpu_notifier = {
236 .notifier_call = msr_class_cpu_callback, 236 .notifier_call = msr_class_cpu_callback,
237}; 237};
238 238
239static char *msr_devnode(struct device *dev, mode_t *mode) 239static char *msr_devnode(struct device *dev, umode_t *mode)
240{ 240{
241 return kasprintf(GFP_KERNEL, "cpu/%u/msr", MINOR(dev->devt)); 241 return kasprintf(GFP_KERNEL, "cpu/%u/msr", MINOR(dev->devt));
242} 242}
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index ee5d4fbd53b4..15763af7bfe3 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -293,7 +293,7 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
293 regs.orig_ax = -1; 293 regs.orig_ax = -1;
294 regs.ip = (unsigned long) kernel_thread_helper; 294 regs.ip = (unsigned long) kernel_thread_helper;
295 regs.cs = __KERNEL_CS | get_kernel_rpl(); 295 regs.cs = __KERNEL_CS | get_kernel_rpl();
296 regs.flags = X86_EFLAGS_IF | 0x2; 296 regs.flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
297 297
298 /* Ok, create the new process.. */ 298 /* Ok, create the new process.. */
299 return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL); 299 return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 795b79f984c2..485204f58cda 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -99,7 +99,8 @@ void cpu_idle(void)
99 99
100 /* endless idle loop with no priority at all */ 100 /* endless idle loop with no priority at all */
101 while (1) { 101 while (1) {
102 tick_nohz_stop_sched_tick(1); 102 tick_nohz_idle_enter();
103 rcu_idle_enter();
103 while (!need_resched()) { 104 while (!need_resched()) {
104 105
105 check_pgt_cache(); 106 check_pgt_cache();
@@ -116,7 +117,8 @@ void cpu_idle(void)
116 pm_idle(); 117 pm_idle();
117 start_critical_timings(); 118 start_critical_timings();
118 } 119 }
119 tick_nohz_restart_sched_tick(); 120 rcu_idle_exit();
121 tick_nohz_idle_exit();
120 preempt_enable_no_resched(); 122 preempt_enable_no_resched();
121 schedule(); 123 schedule();
122 preempt_disable(); 124 preempt_disable();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 3bd7e6eebf31..9b9fe4a85c87 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -122,7 +122,7 @@ void cpu_idle(void)
122 122
123 /* endless idle loop with no priority at all */ 123 /* endless idle loop with no priority at all */
124 while (1) { 124 while (1) {
125 tick_nohz_stop_sched_tick(1); 125 tick_nohz_idle_enter();
126 while (!need_resched()) { 126 while (!need_resched()) {
127 127
128 rmb(); 128 rmb();
@@ -139,8 +139,14 @@ void cpu_idle(void)
139 enter_idle(); 139 enter_idle();
140 /* Don't trace irqs off for idle */ 140 /* Don't trace irqs off for idle */
141 stop_critical_timings(); 141 stop_critical_timings();
142
143 /* enter_idle() needs rcu for notifiers */
144 rcu_idle_enter();
145
142 if (cpuidle_idle_call()) 146 if (cpuidle_idle_call())
143 pm_idle(); 147 pm_idle();
148
149 rcu_idle_exit();
144 start_critical_timings(); 150 start_critical_timings();
145 151
146 /* In many cases the interrupt that ended idle 152 /* In many cases the interrupt that ended idle
@@ -149,7 +155,7 @@ void cpu_idle(void)
149 __exit_idle(); 155 __exit_idle();
150 } 156 }
151 157
152 tick_nohz_restart_sched_tick(); 158 tick_nohz_idle_exit();
153 preempt_enable_no_resched(); 159 preempt_enable_no_resched();
154 schedule(); 160 schedule();
155 preempt_disable(); 161 preempt_disable();
@@ -293,13 +299,12 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
293 memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); 299 memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
294 300
295 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { 301 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
296 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); 302 p->thread.io_bitmap_ptr = kmemdup(me->thread.io_bitmap_ptr,
303 IO_BITMAP_BYTES, GFP_KERNEL);
297 if (!p->thread.io_bitmap_ptr) { 304 if (!p->thread.io_bitmap_ptr) {
298 p->thread.io_bitmap_max = 0; 305 p->thread.io_bitmap_max = 0;
299 return -ENOMEM; 306 return -ENOMEM;
300 } 307 }
301 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
302 IO_BITMAP_BYTES);
303 set_tsk_thread_flag(p, TIF_IO_BITMAP); 308 set_tsk_thread_flag(p, TIF_IO_BITMAP);
304 } 309 }
305 310
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 82528799c5de..89a04c7b5bb6 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -749,7 +749,8 @@ put:
749/* 749/*
750 * Handle PTRACE_POKEUSR calls for the debug register area. 750 * Handle PTRACE_POKEUSR calls for the debug register area.
751 */ 751 */
752int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val) 752static int ptrace_set_debugreg(struct task_struct *tsk, int n,
753 unsigned long val)
753{ 754{
754 struct thread_struct *thread = &(tsk->thread); 755 struct thread_struct *thread = &(tsk->thread);
755 int rc = 0; 756 int rc = 0;
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 9a9e40fb091c..d05444ac2aea 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -306,7 +306,8 @@ static void __init cleanup_highmap(void)
306static void __init reserve_brk(void) 306static void __init reserve_brk(void)
307{ 307{
308 if (_brk_end > _brk_start) 308 if (_brk_end > _brk_start)
309 memblock_x86_reserve_range(__pa(_brk_start), __pa(_brk_end), "BRK"); 309 memblock_reserve(__pa(_brk_start),
310 __pa(_brk_end) - __pa(_brk_start));
310 311
311 /* Mark brk area as locked down and no longer taking any 312 /* Mark brk area as locked down and no longer taking any
312 new allocations */ 313 new allocations */
@@ -331,13 +332,13 @@ static void __init relocate_initrd(void)
331 ramdisk_here = memblock_find_in_range(0, end_of_lowmem, area_size, 332 ramdisk_here = memblock_find_in_range(0, end_of_lowmem, area_size,
332 PAGE_SIZE); 333 PAGE_SIZE);
333 334
334 if (ramdisk_here == MEMBLOCK_ERROR) 335 if (!ramdisk_here)
335 panic("Cannot find place for new RAMDISK of size %lld\n", 336 panic("Cannot find place for new RAMDISK of size %lld\n",
336 ramdisk_size); 337 ramdisk_size);
337 338
338 /* Note: this includes all the lowmem currently occupied by 339 /* Note: this includes all the lowmem currently occupied by
339 the initrd, we rely on that fact to keep the data intact. */ 340 the initrd, we rely on that fact to keep the data intact. */
340 memblock_x86_reserve_range(ramdisk_here, ramdisk_here + area_size, "NEW RAMDISK"); 341 memblock_reserve(ramdisk_here, area_size);
341 initrd_start = ramdisk_here + PAGE_OFFSET; 342 initrd_start = ramdisk_here + PAGE_OFFSET;
342 initrd_end = initrd_start + ramdisk_size; 343 initrd_end = initrd_start + ramdisk_size;
343 printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n", 344 printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n",
@@ -393,7 +394,7 @@ static void __init reserve_initrd(void)
393 initrd_start = 0; 394 initrd_start = 0;
394 395
395 if (ramdisk_size >= (end_of_lowmem>>1)) { 396 if (ramdisk_size >= (end_of_lowmem>>1)) {
396 memblock_x86_free_range(ramdisk_image, ramdisk_end); 397 memblock_free(ramdisk_image, ramdisk_end - ramdisk_image);
397 printk(KERN_ERR "initrd too large to handle, " 398 printk(KERN_ERR "initrd too large to handle, "
398 "disabling initrd\n"); 399 "disabling initrd\n");
399 return; 400 return;
@@ -416,7 +417,7 @@ static void __init reserve_initrd(void)
416 417
417 relocate_initrd(); 418 relocate_initrd();
418 419
419 memblock_x86_free_range(ramdisk_image, ramdisk_end); 420 memblock_free(ramdisk_image, ramdisk_end - ramdisk_image);
420} 421}
421#else 422#else
422static void __init reserve_initrd(void) 423static void __init reserve_initrd(void)
@@ -490,15 +491,13 @@ static void __init memblock_x86_reserve_range_setup_data(void)
490{ 491{
491 struct setup_data *data; 492 struct setup_data *data;
492 u64 pa_data; 493 u64 pa_data;
493 char buf[32];
494 494
495 if (boot_params.hdr.version < 0x0209) 495 if (boot_params.hdr.version < 0x0209)
496 return; 496 return;
497 pa_data = boot_params.hdr.setup_data; 497 pa_data = boot_params.hdr.setup_data;
498 while (pa_data) { 498 while (pa_data) {
499 data = early_memremap(pa_data, sizeof(*data)); 499 data = early_memremap(pa_data, sizeof(*data));
500 sprintf(buf, "setup data %x", data->type); 500 memblock_reserve(pa_data, sizeof(*data) + data->len);
501 memblock_x86_reserve_range(pa_data, pa_data+sizeof(*data)+data->len, buf);
502 pa_data = data->next; 501 pa_data = data->next;
503 early_iounmap(data, sizeof(*data)); 502 early_iounmap(data, sizeof(*data));
504 } 503 }
@@ -554,7 +553,7 @@ static void __init reserve_crashkernel(void)
554 crash_base = memblock_find_in_range(alignment, 553 crash_base = memblock_find_in_range(alignment,
555 CRASH_KERNEL_ADDR_MAX, crash_size, alignment); 554 CRASH_KERNEL_ADDR_MAX, crash_size, alignment);
556 555
557 if (crash_base == MEMBLOCK_ERROR) { 556 if (!crash_base) {
558 pr_info("crashkernel reservation failed - No suitable area found.\n"); 557 pr_info("crashkernel reservation failed - No suitable area found.\n");
559 return; 558 return;
560 } 559 }
@@ -568,7 +567,7 @@ static void __init reserve_crashkernel(void)
568 return; 567 return;
569 } 568 }
570 } 569 }
571 memblock_x86_reserve_range(crash_base, crash_base + crash_size, "CRASH KERNEL"); 570 memblock_reserve(crash_base, crash_size);
572 571
573 printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " 572 printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
574 "for crashkernel (System RAM: %ldMB)\n", 573 "for crashkernel (System RAM: %ldMB)\n",
@@ -626,7 +625,7 @@ static __init void reserve_ibft_region(void)
626 addr = find_ibft_region(&size); 625 addr = find_ibft_region(&size);
627 626
628 if (size) 627 if (size)
629 memblock_x86_reserve_range(addr, addr + size, "* ibft"); 628 memblock_reserve(addr, size);
630} 629}
631 630
632static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10; 631static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10;
@@ -691,8 +690,6 @@ early_param("reservelow", parse_reservelow);
691 690
692void __init setup_arch(char **cmdline_p) 691void __init setup_arch(char **cmdline_p)
693{ 692{
694 unsigned long end_pfn;
695
696#ifdef CONFIG_X86_32 693#ifdef CONFIG_X86_32
697 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); 694 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
698 visws_early_detect(); 695 visws_early_detect();
@@ -934,24 +931,7 @@ void __init setup_arch(char **cmdline_p)
934 init_gbpages(); 931 init_gbpages();
935 932
936 /* max_pfn_mapped is updated here */ 933 /* max_pfn_mapped is updated here */
937 end_pfn = max_low_pfn; 934 max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
938
939#ifdef CONFIG_X86_64
940 /*
941 * There may be regions after the last E820_RAM region that we
942 * want to include in the kernel direct mapping, such as
943 * EFI_RUNTIME_SERVICES_DATA.
944 */
945 if (efi_enabled) {
946 unsigned long efi_end;
947
948 efi_end = e820_end_pfn(MAXMEM>>PAGE_SHIFT, E820_RESERVED_EFI);
949 if (efi_end > max_low_pfn)
950 end_pfn = efi_end;
951 }
952#endif
953
954 max_low_pfn_mapped = init_memory_mapping(0, end_pfn << PAGE_SHIFT);
955 max_pfn_mapped = max_low_pfn_mapped; 935 max_pfn_mapped = max_low_pfn_mapped;
956 936
957#ifdef CONFIG_X86_64 937#ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 9f548cb4a958..e38e21754eea 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -840,7 +840,8 @@ int __cpuinit native_cpu_up(unsigned int cpu)
840 pr_debug("++++++++++++++++++++=_---CPU UP %u\n", cpu); 840 pr_debug("++++++++++++++++++++=_---CPU UP %u\n", cpu);
841 841
842 if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid || 842 if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid ||
843 !physid_isset(apicid, phys_cpu_present_map)) { 843 !physid_isset(apicid, phys_cpu_present_map) ||
844 (!x2apic_mode && apicid >= 255)) {
844 printk(KERN_ERR "%s: bad cpu %d\n", __func__, cpu); 845 printk(KERN_ERR "%s: bad cpu %d\n", __func__, cpu);
845 return -EINVAL; 846 return -EINVAL;
846 } 847 }
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c
index a91ae7709b49..a73b61055ad6 100644
--- a/arch/x86/kernel/trampoline.c
+++ b/arch/x86/kernel/trampoline.c
@@ -14,11 +14,11 @@ void __init setup_trampolines(void)
14 14
15 /* Has to be in very low memory so we can execute real-mode AP code. */ 15 /* Has to be in very low memory so we can execute real-mode AP code. */
16 mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE); 16 mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE);
17 if (mem == MEMBLOCK_ERROR) 17 if (!mem)
18 panic("Cannot allocate trampoline\n"); 18 panic("Cannot allocate trampoline\n");
19 19
20 x86_trampoline_base = __va(mem); 20 x86_trampoline_base = __va(mem);
21 memblock_x86_reserve_range(mem, mem + size, "TRAMPOLINE"); 21 memblock_reserve(mem, size);
22 22
23 printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n", 23 printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n",
24 x86_trampoline_base, (unsigned long long)mem, size); 24 x86_trampoline_base, (unsigned long long)mem, size);
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index a8e3eb83466c..fa1191fb679d 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -306,15 +306,10 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
306 == NOTIFY_STOP) 306 == NOTIFY_STOP)
307 return; 307 return;
308#endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ 308#endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */
309#ifdef CONFIG_KPROBES 309
310 if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) 310 if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
311 == NOTIFY_STOP) 311 == NOTIFY_STOP)
312 return; 312 return;
313#else
314 if (notify_die(DIE_TRAP, "int3", regs, error_code, 3, SIGTRAP)
315 == NOTIFY_STOP)
316 return;
317#endif
318 313
319 preempt_conditional_sti(regs); 314 preempt_conditional_sti(regs);
320 do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); 315 do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index db483369f10b..2c9cf0fd78f5 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -35,7 +35,7 @@ static int __read_mostly tsc_unstable;
35 erroneous rdtsc usage on !cpu_has_tsc processors */ 35 erroneous rdtsc usage on !cpu_has_tsc processors */
36static int __read_mostly tsc_disabled = -1; 36static int __read_mostly tsc_disabled = -1;
37 37
38static int tsc_clocksource_reliable; 38int tsc_clocksource_reliable;
39/* 39/*
40 * Scheduler clock - returns current time in nanosec units. 40 * Scheduler clock - returns current time in nanosec units.
41 */ 41 */
@@ -178,11 +178,11 @@ static unsigned long calc_pmtimer_ref(u64 deltatsc, u64 pm1, u64 pm2)
178} 178}
179 179
180#define CAL_MS 10 180#define CAL_MS 10
181#define CAL_LATCH (CLOCK_TICK_RATE / (1000 / CAL_MS)) 181#define CAL_LATCH (PIT_TICK_RATE / (1000 / CAL_MS))
182#define CAL_PIT_LOOPS 1000 182#define CAL_PIT_LOOPS 1000
183 183
184#define CAL2_MS 50 184#define CAL2_MS 50
185#define CAL2_LATCH (CLOCK_TICK_RATE / (1000 / CAL2_MS)) 185#define CAL2_LATCH (PIT_TICK_RATE / (1000 / CAL2_MS))
186#define CAL2_PIT_LOOPS 5000 186#define CAL2_PIT_LOOPS 5000
187 187
188 188
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index 0aa5fed8b9e6..9eba29b46cb7 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -113,7 +113,7 @@ void __cpuinit check_tsc_sync_source(int cpu)
113 if (unsynchronized_tsc()) 113 if (unsynchronized_tsc())
114 return; 114 return;
115 115
116 if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) { 116 if (tsc_clocksource_reliable) {
117 if (cpu == (nr_cpu_ids-1) || system_state != SYSTEM_BOOTING) 117 if (cpu == (nr_cpu_ids-1) || system_state != SYSTEM_BOOTING)
118 pr_info( 118 pr_info(
119 "Skipped synchronization checks as TSC is reliable.\n"); 119 "Skipped synchronization checks as TSC is reliable.\n");
@@ -172,7 +172,7 @@ void __cpuinit check_tsc_sync_target(void)
172{ 172{
173 int cpus = 2; 173 int cpus = 2;
174 174
175 if (unsynchronized_tsc() || boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) 175 if (unsynchronized_tsc() || tsc_clocksource_reliable)
176 return; 176 return;
177 177
178 /* 178 /*
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index e4d4a22e8b94..b07ba9393564 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -57,7 +57,7 @@ DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) =
57 .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock), 57 .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock),
58}; 58};
59 59
60static enum { EMULATE, NATIVE, NONE } vsyscall_mode = NATIVE; 60static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE;
61 61
62static int __init vsyscall_setup(char *str) 62static int __init vsyscall_setup(char *str)
63{ 63{
@@ -140,11 +140,40 @@ static int addr_to_vsyscall_nr(unsigned long addr)
140 return nr; 140 return nr;
141} 141}
142 142
143static bool write_ok_or_segv(unsigned long ptr, size_t size)
144{
145 /*
146 * XXX: if access_ok, get_user, and put_user handled
147 * sig_on_uaccess_error, this could go away.
148 */
149
150 if (!access_ok(VERIFY_WRITE, (void __user *)ptr, size)) {
151 siginfo_t info;
152 struct thread_struct *thread = &current->thread;
153
154 thread->error_code = 6; /* user fault, no page, write */
155 thread->cr2 = ptr;
156 thread->trap_no = 14;
157
158 memset(&info, 0, sizeof(info));
159 info.si_signo = SIGSEGV;
160 info.si_errno = 0;
161 info.si_code = SEGV_MAPERR;
162 info.si_addr = (void __user *)ptr;
163
164 force_sig_info(SIGSEGV, &info, current);
165 return false;
166 } else {
167 return true;
168 }
169}
170
143bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) 171bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
144{ 172{
145 struct task_struct *tsk; 173 struct task_struct *tsk;
146 unsigned long caller; 174 unsigned long caller;
147 int vsyscall_nr; 175 int vsyscall_nr;
176 int prev_sig_on_uaccess_error;
148 long ret; 177 long ret;
149 178
150 /* 179 /*
@@ -180,35 +209,65 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
180 if (seccomp_mode(&tsk->seccomp)) 209 if (seccomp_mode(&tsk->seccomp))
181 do_exit(SIGKILL); 210 do_exit(SIGKILL);
182 211
212 /*
213 * With a real vsyscall, page faults cause SIGSEGV. We want to
214 * preserve that behavior to make writing exploits harder.
215 */
216 prev_sig_on_uaccess_error = current_thread_info()->sig_on_uaccess_error;
217 current_thread_info()->sig_on_uaccess_error = 1;
218
219 /*
220 * 0 is a valid user pointer (in the access_ok sense) on 32-bit and
221 * 64-bit, so we don't need to special-case it here. For all the
222 * vsyscalls, 0 means "don't write anything" not "write it at
223 * address 0".
224 */
225 ret = -EFAULT;
183 switch (vsyscall_nr) { 226 switch (vsyscall_nr) {
184 case 0: 227 case 0:
228 if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) ||
229 !write_ok_or_segv(regs->si, sizeof(struct timezone)))
230 break;
231
185 ret = sys_gettimeofday( 232 ret = sys_gettimeofday(
186 (struct timeval __user *)regs->di, 233 (struct timeval __user *)regs->di,
187 (struct timezone __user *)regs->si); 234 (struct timezone __user *)regs->si);
188 break; 235 break;
189 236
190 case 1: 237 case 1:
238 if (!write_ok_or_segv(regs->di, sizeof(time_t)))
239 break;
240
191 ret = sys_time((time_t __user *)regs->di); 241 ret = sys_time((time_t __user *)regs->di);
192 break; 242 break;
193 243
194 case 2: 244 case 2:
245 if (!write_ok_or_segv(regs->di, sizeof(unsigned)) ||
246 !write_ok_or_segv(regs->si, sizeof(unsigned)))
247 break;
248
195 ret = sys_getcpu((unsigned __user *)regs->di, 249 ret = sys_getcpu((unsigned __user *)regs->di,
196 (unsigned __user *)regs->si, 250 (unsigned __user *)regs->si,
197 0); 251 0);
198 break; 252 break;
199 } 253 }
200 254
255 current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error;
256
201 if (ret == -EFAULT) { 257 if (ret == -EFAULT) {
202 /* 258 /* Bad news -- userspace fed a bad pointer to a vsyscall. */
203 * Bad news -- userspace fed a bad pointer to a vsyscall.
204 *
205 * With a real vsyscall, that would have caused SIGSEGV.
206 * To make writing reliable exploits using the emulated
207 * vsyscalls harder, generate SIGSEGV here as well.
208 */
209 warn_bad_vsyscall(KERN_INFO, regs, 259 warn_bad_vsyscall(KERN_INFO, regs,
210 "vsyscall fault (exploit attempt?)"); 260 "vsyscall fault (exploit attempt?)");
211 goto sigsegv; 261
262 /*
263 * If we failed to generate a signal for any reason,
264 * generate one here. (This should be impossible.)
265 */
266 if (WARN_ON_ONCE(!sigismember(&tsk->pending.signal, SIGBUS) &&
267 !sigismember(&tsk->pending.signal, SIGSEGV)))
268 goto sigsegv;
269
270 return true; /* Don't emulate the ret. */
212 } 271 }
213 272
214 regs->ax = ret; 273 regs->ax = ret;
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index c1d6cd549397..91f83e21b989 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -92,6 +92,7 @@ struct x86_init_ops x86_init __initdata = {
92 92
93struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = { 93struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = {
94 .setup_percpu_clockev = setup_secondary_APIC_clock, 94 .setup_percpu_clockev = setup_secondary_APIC_clock,
95 .fixup_cpu_id = x86_default_fixup_cpu_id,
95}; 96};
96 97
97static void default_nmi_init(void) { }; 98static void default_nmi_init(void) { };
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 76e3f1cd0369..405f2620392f 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -338,11 +338,15 @@ static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
338 return HRTIMER_NORESTART; 338 return HRTIMER_NORESTART;
339} 339}
340 340
341static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period) 341static void create_pit_timer(struct kvm *kvm, u32 val, int is_period)
342{ 342{
343 struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
343 struct kvm_timer *pt = &ps->pit_timer; 344 struct kvm_timer *pt = &ps->pit_timer;
344 s64 interval; 345 s64 interval;
345 346
347 if (!irqchip_in_kernel(kvm))
348 return;
349
346 interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ); 350 interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ);
347 351
348 pr_debug("create pit timer, interval is %llu nsec\n", interval); 352 pr_debug("create pit timer, interval is %llu nsec\n", interval);
@@ -394,13 +398,13 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val)
394 /* FIXME: enhance mode 4 precision */ 398 /* FIXME: enhance mode 4 precision */
395 case 4: 399 case 4:
396 if (!(ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)) { 400 if (!(ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)) {
397 create_pit_timer(ps, val, 0); 401 create_pit_timer(kvm, val, 0);
398 } 402 }
399 break; 403 break;
400 case 2: 404 case 2:
401 case 3: 405 case 3:
402 if (!(ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)){ 406 if (!(ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)){
403 create_pit_timer(ps, val, 1); 407 create_pit_timer(kvm, val, 1);
404 } 408 }
405 break; 409 break;
406 default: 410 default:
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c38efd7b792e..4c938da2ba00 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -602,7 +602,6 @@ static void update_cpuid(struct kvm_vcpu *vcpu)
602{ 602{
603 struct kvm_cpuid_entry2 *best; 603 struct kvm_cpuid_entry2 *best;
604 struct kvm_lapic *apic = vcpu->arch.apic; 604 struct kvm_lapic *apic = vcpu->arch.apic;
605 u32 timer_mode_mask;
606 605
607 best = kvm_find_cpuid_entry(vcpu, 1, 0); 606 best = kvm_find_cpuid_entry(vcpu, 1, 0);
608 if (!best) 607 if (!best)
@@ -615,15 +614,12 @@ static void update_cpuid(struct kvm_vcpu *vcpu)
615 best->ecx |= bit(X86_FEATURE_OSXSAVE); 614 best->ecx |= bit(X86_FEATURE_OSXSAVE);
616 } 615 }
617 616
618 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && 617 if (apic) {
619 best->function == 0x1) { 618 if (best->ecx & bit(X86_FEATURE_TSC_DEADLINE_TIMER))
620 best->ecx |= bit(X86_FEATURE_TSC_DEADLINE_TIMER); 619 apic->lapic_timer.timer_mode_mask = 3 << 17;
621 timer_mode_mask = 3 << 17; 620 else
622 } else 621 apic->lapic_timer.timer_mode_mask = 1 << 17;
623 timer_mode_mask = 1 << 17; 622 }
624
625 if (apic)
626 apic->lapic_timer.timer_mode_mask = timer_mode_mask;
627} 623}
628 624
629int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 625int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
@@ -2135,6 +2131,9 @@ int kvm_dev_ioctl_check_extension(long ext)
2135 case KVM_CAP_TSC_CONTROL: 2131 case KVM_CAP_TSC_CONTROL:
2136 r = kvm_has_tsc_control; 2132 r = kvm_has_tsc_control;
2137 break; 2133 break;
2134 case KVM_CAP_TSC_DEADLINE_TIMER:
2135 r = boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER);
2136 break;
2138 default: 2137 default:
2139 r = 0; 2138 r = 0;
2140 break; 2139 break;
diff --git a/arch/x86/lib/inat.c b/arch/x86/lib/inat.c
index 46fc4ee09fc4..88ad5fbda6e1 100644
--- a/arch/x86/lib/inat.c
+++ b/arch/x86/lib/inat.c
@@ -82,9 +82,16 @@ insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m,
82 const insn_attr_t *table; 82 const insn_attr_t *table;
83 if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX) 83 if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX)
84 return 0; 84 return 0;
85 table = inat_avx_tables[vex_m][vex_p]; 85 /* At first, this checks the master table */
86 table = inat_avx_tables[vex_m][0];
86 if (!table) 87 if (!table)
87 return 0; 88 return 0;
89 if (!inat_is_group(table[opcode]) && vex_p) {
90 /* If this is not a group, get attribute directly */
91 table = inat_avx_tables[vex_m][vex_p];
92 if (!table)
93 return 0;
94 }
88 return table[opcode]; 95 return table[opcode];
89} 96}
90 97
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
index 374562ed6704..5a1f9f3e3fbb 100644
--- a/arch/x86/lib/insn.c
+++ b/arch/x86/lib/insn.c
@@ -202,7 +202,7 @@ void insn_get_opcode(struct insn *insn)
202 m = insn_vex_m_bits(insn); 202 m = insn_vex_m_bits(insn);
203 p = insn_vex_p_bits(insn); 203 p = insn_vex_p_bits(insn);
204 insn->attr = inat_get_avx_attribute(op, m, p); 204 insn->attr = inat_get_avx_attribute(op, m, p);
205 if (!inat_accept_vex(insn->attr)) 205 if (!inat_accept_vex(insn->attr) && !inat_is_group(insn->attr))
206 insn->attr = 0; /* This instruction is bad */ 206 insn->attr = 0; /* This instruction is bad */
207 goto end; /* VEX has only 1 byte for opcode */ 207 goto end; /* VEX has only 1 byte for opcode */
208 } 208 }
@@ -249,6 +249,8 @@ void insn_get_modrm(struct insn *insn)
249 pfx = insn_last_prefix(insn); 249 pfx = insn_last_prefix(insn);
250 insn->attr = inat_get_group_attribute(mod, pfx, 250 insn->attr = inat_get_group_attribute(mod, pfx,
251 insn->attr); 251 insn->attr);
252 if (insn_is_avx(insn) && !inat_accept_vex(insn->attr))
253 insn->attr = 0; /* This is bad */
252 } 254 }
253 } 255 }
254 256
diff --git a/arch/x86/lib/string_32.c b/arch/x86/lib/string_32.c
index 82004d2bf05e..bd59090825db 100644
--- a/arch/x86/lib/string_32.c
+++ b/arch/x86/lib/string_32.c
@@ -164,15 +164,13 @@ EXPORT_SYMBOL(strchr);
164size_t strlen(const char *s) 164size_t strlen(const char *s)
165{ 165{
166 int d0; 166 int d0;
167 int res; 167 size_t res;
168 asm volatile("repne\n\t" 168 asm volatile("repne\n\t"
169 "scasb\n\t" 169 "scasb"
170 "notl %0\n\t"
171 "decl %0"
172 : "=c" (res), "=&D" (d0) 170 : "=c" (res), "=&D" (d0)
173 : "1" (s), "a" (0), "0" (0xffffffffu) 171 : "1" (s), "a" (0), "0" (0xffffffffu)
174 : "memory"); 172 : "memory");
175 return res; 173 return ~res - 1;
176} 174}
177EXPORT_SYMBOL(strlen); 175EXPORT_SYMBOL(strlen);
178#endif 176#endif
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index a793da5e560e..5b83c51c12e0 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -1,5 +1,11 @@
1# x86 Opcode Maps 1# x86 Opcode Maps
2# 2#
3# This is (mostly) based on following documentations.
4# - Intel(R) 64 and IA-32 Architectures Software Developer's Manual Vol.2
5# (#325383-040US, October 2011)
6# - Intel(R) Advanced Vector Extensions Programming Reference
7# (#319433-011,JUNE 2011).
8#
3#<Opcode maps> 9#<Opcode maps>
4# Table: table-name 10# Table: table-name
5# Referrer: escaped-name 11# Referrer: escaped-name
@@ -15,10 +21,13 @@
15# EndTable 21# EndTable
16# 22#
17# AVX Superscripts 23# AVX Superscripts
18# (VEX): this opcode can accept VEX prefix. 24# (v): this opcode requires VEX prefix.
19# (oVEX): this opcode requires VEX prefix. 25# (v1): this opcode only supports 128bit VEX.
20# (o128): this opcode only supports 128bit VEX. 26#
21# (o256): this opcode only supports 256bit VEX. 27# Last Prefix Superscripts
28# - (66): the last prefix is 0x66
29# - (F3): the last prefix is 0xF3
30# - (F2): the last prefix is 0xF2
22# 31#
23 32
24Table: one byte opcode 33Table: one byte opcode
@@ -199,8 +208,8 @@ a0: MOV AL,Ob
199a1: MOV rAX,Ov 208a1: MOV rAX,Ov
200a2: MOV Ob,AL 209a2: MOV Ob,AL
201a3: MOV Ov,rAX 210a3: MOV Ov,rAX
202a4: MOVS/B Xb,Yb 211a4: MOVS/B Yb,Xb
203a5: MOVS/W/D/Q Xv,Yv 212a5: MOVS/W/D/Q Yv,Xv
204a6: CMPS/B Xb,Yb 213a6: CMPS/B Xb,Yb
205a7: CMPS/W/D Xv,Yv 214a7: CMPS/W/D Xv,Yv
206a8: TEST AL,Ib 215a8: TEST AL,Ib
@@ -233,8 +242,8 @@ c0: Grp2 Eb,Ib (1A)
233c1: Grp2 Ev,Ib (1A) 242c1: Grp2 Ev,Ib (1A)
234c2: RETN Iw (f64) 243c2: RETN Iw (f64)
235c3: RETN 244c3: RETN
236c4: LES Gz,Mp (i64) | 3bytes-VEX (Prefix) 245c4: LES Gz,Mp (i64) | VEX+2byte (Prefix)
237c5: LDS Gz,Mp (i64) | 2bytes-VEX (Prefix) 246c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix)
238c6: Grp11 Eb,Ib (1A) 247c6: Grp11 Eb,Ib (1A)
239c7: Grp11 Ev,Iz (1A) 248c7: Grp11 Ev,Iz (1A)
240c8: ENTER Iw,Ib 249c8: ENTER Iw,Ib
@@ -320,14 +329,19 @@ AVXcode: 1
320# 3DNow! uses the last imm byte as opcode extension. 329# 3DNow! uses the last imm byte as opcode extension.
3210f: 3DNow! Pq,Qq,Ib 3300f: 3DNow! Pq,Qq,Ib
322# 0x0f 0x10-0x1f 331# 0x0f 0x10-0x1f
32310: movups Vps,Wps (VEX) | movss Vss,Wss (F3),(VEX),(o128) | movupd Vpd,Wpd (66),(VEX) | movsd Vsd,Wsd (F2),(VEX),(o128) 332# NOTE: According to Intel SDM opcode map, vmovups and vmovupd has no operands
32411: movups Wps,Vps (VEX) | movss Wss,Vss (F3),(VEX),(o128) | movupd Wpd,Vpd (66),(VEX) | movsd Wsd,Vsd (F2),(VEX),(o128) 333# but it actually has operands. And also, vmovss and vmovsd only accept 128bit.
32512: movlps Vq,Mq (VEX),(o128) | movlpd Vq,Mq (66),(VEX),(o128) | movhlps Vq,Uq (VEX),(o128) | movddup Vq,Wq (F2),(VEX) | movsldup Vq,Wq (F3),(VEX) 334# MOVSS/MOVSD has too many forms(3) on SDM. This map just shows a typical form.
32613: mpvlps Mq,Vq (VEX),(o128) | movlpd Mq,Vq (66),(VEX),(o128) 335# Many AVX instructions lack v1 superscript, according to Intel AVX-Prgramming
32714: unpcklps Vps,Wq (VEX) | unpcklpd Vpd,Wq (66),(VEX) 336# Reference A.1
32815: unpckhps Vps,Wq (VEX) | unpckhpd Vpd,Wq (66),(VEX) 33710: vmovups Vps,Wps | vmovupd Vpd,Wpd (66) | vmovss Vx,Hx,Wss (F3),(v1) | vmovsd Vx,Hx,Wsd (F2),(v1)
32916: movhps Vq,Mq (VEX),(o128) | movhpd Vq,Mq (66),(VEX),(o128) | movlsps Vq,Uq (VEX),(o128) | movshdup Vq,Wq (F3),(VEX) 33811: vmovups Wps,Vps | vmovupd Wpd,Vpd (66) | vmovss Wss,Hx,Vss (F3),(v1) | vmovsd Wsd,Hx,Vsd (F2),(v1)
33017: movhps Mq,Vq (VEX),(o128) | movhpd Mq,Vq (66),(VEX),(o128) 33912: vmovlps Vq,Hq,Mq (v1) | vmovhlps Vq,Hq,Uq (v1) | vmovlpd Vq,Hq,Mq (66),(v1) | vmovsldup Vx,Wx (F3) | vmovddup Vx,Wx (F2)
34013: vmovlps Mq,Vq (v1) | vmovlpd Mq,Vq (66),(v1)
34114: vunpcklps Vx,Hx,Wx | vunpcklpd Vx,Hx,Wx (66)
34215: vunpckhps Vx,Hx,Wx | vunpckhpd Vx,Hx,Wx (66)
34316: vmovhps Vdq,Hq,Mq (v1) | vmovlhps Vdq,Hq,Uq (v1) | vmovhpd Vdq,Hq,Mq (66),(v1) | vmovshdup Vx,Wx (F3)
34417: vmovhps Mq,Vq (v1) | vmovhpd Mq,Vq (66),(v1)
33118: Grp16 (1A) 34518: Grp16 (1A)
33219: 34619:
3331a: 3471a:
@@ -345,14 +359,14 @@ AVXcode: 1
34525: 35925:
34626: 36026:
34727: 36127:
34828: movaps Vps,Wps (VEX) | movapd Vpd,Wpd (66),(VEX) 36228: vmovaps Vps,Wps | vmovapd Vpd,Wpd (66)
34929: movaps Wps,Vps (VEX) | movapd Wpd,Vpd (66),(VEX) 36329: vmovaps Wps,Vps | vmovapd Wpd,Vpd (66)
3502a: cvtpi2ps Vps,Qpi | cvtsi2ss Vss,Ed/q (F3),(VEX),(o128) | cvtpi2pd Vpd,Qpi (66) | cvtsi2sd Vsd,Ed/q (F2),(VEX),(o128) 3642a: cvtpi2ps Vps,Qpi | cvtpi2pd Vpd,Qpi (66) | vcvtsi2ss Vss,Hss,Ey (F3),(v1) | vcvtsi2sd Vsd,Hsd,Ey (F2),(v1)
3512b: movntps Mps,Vps (VEX) | movntpd Mpd,Vpd (66),(VEX) 3652b: vmovntps Mps,Vps | vmovntpd Mpd,Vpd (66)
3522c: cvttps2pi Ppi,Wps | cvttss2si Gd/q,Wss (F3),(VEX),(o128) | cvttpd2pi Ppi,Wpd (66) | cvttsd2si Gd/q,Wsd (F2),(VEX),(o128) 3662c: cvttps2pi Ppi,Wps | cvttpd2pi Ppi,Wpd (66) | vcvttss2si Gy,Wss (F3),(v1) | vcvttsd2si Gy,Wsd (F2),(v1)
3532d: cvtps2pi Ppi,Wps | cvtss2si Gd/q,Wss (F3),(VEX),(o128) | cvtpd2pi Qpi,Wpd (66) | cvtsd2si Gd/q,Wsd (F2),(VEX),(o128) 3672d: cvtps2pi Ppi,Wps | cvtpd2pi Qpi,Wpd (66) | vcvtss2si Gy,Wss (F3),(v1) | vcvtsd2si Gy,Wsd (F2),(v1)
3542e: ucomiss Vss,Wss (VEX),(o128) | ucomisd Vsd,Wsd (66),(VEX),(o128) 3682e: vucomiss Vss,Wss (v1) | vucomisd Vsd,Wsd (66),(v1)
3552f: comiss Vss,Wss (VEX),(o128) | comisd Vsd,Wsd (66),(VEX),(o128) 3692f: vcomiss Vss,Wss (v1) | vcomisd Vsd,Wsd (66),(v1)
356# 0x0f 0x30-0x3f 370# 0x0f 0x30-0x3f
35730: WRMSR 37130: WRMSR
35831: RDTSC 37231: RDTSC
@@ -388,65 +402,66 @@ AVXcode: 1
3884e: CMOVLE/NG Gv,Ev 4024e: CMOVLE/NG Gv,Ev
3894f: CMOVNLE/G Gv,Ev 4034f: CMOVNLE/G Gv,Ev
390# 0x0f 0x50-0x5f 404# 0x0f 0x50-0x5f
39150: movmskps Gd/q,Ups (VEX) | movmskpd Gd/q,Upd (66),(VEX) 40550: vmovmskps Gy,Ups | vmovmskpd Gy,Upd (66)
39251: sqrtps Vps,Wps (VEX) | sqrtss Vss,Wss (F3),(VEX),(o128) | sqrtpd Vpd,Wpd (66),(VEX) | sqrtsd Vsd,Wsd (F2),(VEX),(o128) 40651: vsqrtps Vps,Wps | vsqrtpd Vpd,Wpd (66) | vsqrtss Vss,Hss,Wss (F3),(v1) | vsqrtsd Vsd,Hsd,Wsd (F2),(v1)
39352: rsqrtps Vps,Wps (VEX) | rsqrtss Vss,Wss (F3),(VEX),(o128) 40752: vrsqrtps Vps,Wps | vrsqrtss Vss,Hss,Wss (F3),(v1)
39453: rcpps Vps,Wps (VEX) | rcpss Vss,Wss (F3),(VEX),(o128) 40853: vrcpps Vps,Wps | vrcpss Vss,Hss,Wss (F3),(v1)
39554: andps Vps,Wps (VEX) | andpd Vpd,Wpd (66),(VEX) 40954: vandps Vps,Hps,Wps | vandpd Vpd,Hpd,Wpd (66)
39655: andnps Vps,Wps (VEX) | andnpd Vpd,Wpd (66),(VEX) 41055: vandnps Vps,Hps,Wps | vandnpd Vpd,Hpd,Wpd (66)
39756: orps Vps,Wps (VEX) | orpd Vpd,Wpd (66),(VEX) 41156: vorps Vps,Hps,Wps | vorpd Vpd,Hpd,Wpd (66)
39857: xorps Vps,Wps (VEX) | xorpd Vpd,Wpd (66),(VEX) 41257: vxorps Vps,Hps,Wps | vxorpd Vpd,Hpd,Wpd (66)
39958: addps Vps,Wps (VEX) | addss Vss,Wss (F3),(VEX),(o128) | addpd Vpd,Wpd (66),(VEX) | addsd Vsd,Wsd (F2),(VEX),(o128) 41358: vaddps Vps,Hps,Wps | vaddpd Vpd,Hpd,Wpd (66) | vaddss Vss,Hss,Wss (F3),(v1) | vaddsd Vsd,Hsd,Wsd (F2),(v1)
40059: mulps Vps,Wps (VEX) | mulss Vss,Wss (F3),(VEX),(o128) | mulpd Vpd,Wpd (66),(VEX) | mulsd Vsd,Wsd (F2),(VEX),(o128) 41459: vmulps Vps,Hps,Wps | vmulpd Vpd,Hpd,Wpd (66) | vmulss Vss,Hss,Wss (F3),(v1) | vmulsd Vsd,Hsd,Wsd (F2),(v1)
4015a: cvtps2pd Vpd,Wps (VEX) | cvtss2sd Vsd,Wss (F3),(VEX),(o128) | cvtpd2ps Vps,Wpd (66),(VEX) | cvtsd2ss Vsd,Wsd (F2),(VEX),(o128) 4155a: vcvtps2pd Vpd,Wps | vcvtpd2ps Vps,Wpd (66) | vcvtss2sd Vsd,Hx,Wss (F3),(v1) | vcvtsd2ss Vss,Hx,Wsd (F2),(v1)
4025b: cvtdq2ps Vps,Wdq (VEX) | cvtps2dq Vdq,Wps (66),(VEX) | cvttps2dq Vdq,Wps (F3),(VEX) 4165b: vcvtdq2ps Vps,Wdq | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3)
4035c: subps Vps,Wps (VEX) | subss Vss,Wss (F3),(VEX),(o128) | subpd Vpd,Wpd (66),(VEX) | subsd Vsd,Wsd (F2),(VEX),(o128) 4175c: vsubps Vps,Hps,Wps | vsubpd Vpd,Hpd,Wpd (66) | vsubss Vss,Hss,Wss (F3),(v1) | vsubsd Vsd,Hsd,Wsd (F2),(v1)
4045d: minps Vps,Wps (VEX) | minss Vss,Wss (F3),(VEX),(o128) | minpd Vpd,Wpd (66),(VEX) | minsd Vsd,Wsd (F2),(VEX),(o128) 4185d: vminps Vps,Hps,Wps | vminpd Vpd,Hpd,Wpd (66) | vminss Vss,Hss,Wss (F3),(v1) | vminsd Vsd,Hsd,Wsd (F2),(v1)
4055e: divps Vps,Wps (VEX) | divss Vss,Wss (F3),(VEX),(o128) | divpd Vpd,Wpd (66),(VEX) | divsd Vsd,Wsd (F2),(VEX),(o128) 4195e: vdivps Vps,Hps,Wps | vdivpd Vpd,Hpd,Wpd (66) | vdivss Vss,Hss,Wss (F3),(v1) | vdivsd Vsd,Hsd,Wsd (F2),(v1)
4065f: maxps Vps,Wps (VEX) | maxss Vss,Wss (F3),(VEX),(o128) | maxpd Vpd,Wpd (66),(VEX) | maxsd Vsd,Wsd (F2),(VEX),(o128) 4205f: vmaxps Vps,Hps,Wps | vmaxpd Vpd,Hpd,Wpd (66) | vmaxss Vss,Hss,Wss (F3),(v1) | vmaxsd Vsd,Hsd,Wsd (F2),(v1)
407# 0x0f 0x60-0x6f 421# 0x0f 0x60-0x6f
40860: punpcklbw Pq,Qd | punpcklbw Vdq,Wdq (66),(VEX),(o128) 42260: punpcklbw Pq,Qd | vpunpcklbw Vx,Hx,Wx (66),(v1)
40961: punpcklwd Pq,Qd | punpcklwd Vdq,Wdq (66),(VEX),(o128) 42361: punpcklwd Pq,Qd | vpunpcklwd Vx,Hx,Wx (66),(v1)
41062: punpckldq Pq,Qd | punpckldq Vdq,Wdq (66),(VEX),(o128) 42462: punpckldq Pq,Qd | vpunpckldq Vx,Hx,Wx (66),(v1)
41163: packsswb Pq,Qq | packsswb Vdq,Wdq (66),(VEX),(o128) 42563: packsswb Pq,Qq | vpacksswb Vx,Hx,Wx (66),(v1)
41264: pcmpgtb Pq,Qq | pcmpgtb Vdq,Wdq (66),(VEX),(o128) 42664: pcmpgtb Pq,Qq | vpcmpgtb Vx,Hx,Wx (66),(v1)
41365: pcmpgtw Pq,Qq | pcmpgtw Vdq,Wdq (66),(VEX),(o128) 42765: pcmpgtw Pq,Qq | vpcmpgtw Vx,Hx,Wx (66),(v1)
41466: pcmpgtd Pq,Qq | pcmpgtd Vdq,Wdq (66),(VEX),(o128) 42866: pcmpgtd Pq,Qq | vpcmpgtd Vx,Hx,Wx (66),(v1)
41567: packuswb Pq,Qq | packuswb Vdq,Wdq (66),(VEX),(o128) 42967: packuswb Pq,Qq | vpackuswb Vx,Hx,Wx (66),(v1)
41668: punpckhbw Pq,Qd | punpckhbw Vdq,Wdq (66),(VEX),(o128) 43068: punpckhbw Pq,Qd | vpunpckhbw Vx,Hx,Wx (66),(v1)
41769: punpckhwd Pq,Qd | punpckhwd Vdq,Wdq (66),(VEX),(o128) 43169: punpckhwd Pq,Qd | vpunpckhwd Vx,Hx,Wx (66),(v1)
4186a: punpckhdq Pq,Qd | punpckhdq Vdq,Wdq (66),(VEX),(o128) 4326a: punpckhdq Pq,Qd | vpunpckhdq Vx,Hx,Wx (66),(v1)
4196b: packssdw Pq,Qd | packssdw Vdq,Wdq (66),(VEX),(o128) 4336b: packssdw Pq,Qd | vpackssdw Vx,Hx,Wx (66),(v1)
4206c: punpcklqdq Vdq,Wdq (66),(VEX),(o128) 4346c: vpunpcklqdq Vx,Hx,Wx (66),(v1)
4216d: punpckhqdq Vdq,Wdq (66),(VEX),(o128) 4356d: vpunpckhqdq Vx,Hx,Wx (66),(v1)
4226e: movd/q/ Pd,Ed/q | movd/q Vdq,Ed/q (66),(VEX),(o128) 4366e: movd/q Pd,Ey | vmovd/q Vy,Ey (66),(v1)
4236f: movq Pq,Qq | movdqa Vdq,Wdq (66),(VEX) | movdqu Vdq,Wdq (F3),(VEX) 4376f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqu Vx,Wx (F3)
424# 0x0f 0x70-0x7f 438# 0x0f 0x70-0x7f
42570: pshufw Pq,Qq,Ib | pshufd Vdq,Wdq,Ib (66),(VEX),(o128) | pshufhw Vdq,Wdq,Ib (F3),(VEX),(o128) | pshuflw VdqWdq,Ib (F2),(VEX),(o128) 43970: pshufw Pq,Qq,Ib | vpshufd Vx,Wx,Ib (66),(v1) | vpshufhw Vx,Wx,Ib (F3),(v1) | vpshuflw Vx,Wx,Ib (F2),(v1)
42671: Grp12 (1A) 44071: Grp12 (1A)
42772: Grp13 (1A) 44172: Grp13 (1A)
42873: Grp14 (1A) 44273: Grp14 (1A)
42974: pcmpeqb Pq,Qq | pcmpeqb Vdq,Wdq (66),(VEX),(o128) 44374: pcmpeqb Pq,Qq | vpcmpeqb Vx,Hx,Wx (66),(v1)
43075: pcmpeqw Pq,Qq | pcmpeqw Vdq,Wdq (66),(VEX),(o128) 44475: pcmpeqw Pq,Qq | vpcmpeqw Vx,Hx,Wx (66),(v1)
43176: pcmpeqd Pq,Qq | pcmpeqd Vdq,Wdq (66),(VEX),(o128) 44576: pcmpeqd Pq,Qq | vpcmpeqd Vx,Hx,Wx (66),(v1)
43277: emms/vzeroupper/vzeroall (VEX) 446# Note: Remove (v), because vzeroall and vzeroupper becomes emms without VEX.
43378: VMREAD Ed/q,Gd/q 44777: emms | vzeroupper | vzeroall
43479: VMWRITE Gd/q,Ed/q 44878: VMREAD Ey,Gy
44979: VMWRITE Gy,Ey
4357a: 4507a:
4367b: 4517b:
4377c: haddps Vps,Wps (F2),(VEX) | haddpd Vpd,Wpd (66),(VEX) 4527c: vhaddpd Vpd,Hpd,Wpd (66) | vhaddps Vps,Hps,Wps (F2)
4387d: hsubps Vps,Wps (F2),(VEX) | hsubpd Vpd,Wpd (66),(VEX) 4537d: vhsubpd Vpd,Hpd,Wpd (66) | vhsubps Vps,Hps,Wps (F2)
4397e: movd/q Ed/q,Pd | movd/q Ed/q,Vdq (66),(VEX),(o128) | movq Vq,Wq (F3),(VEX),(o128) 4547e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1)
4407f: movq Qq,Pq | movdqa Wdq,Vdq (66),(VEX) | movdqu Wdq,Vdq (F3),(VEX) 4557f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3)
441# 0x0f 0x80-0x8f 456# 0x0f 0x80-0x8f
44280: JO Jz (f64) 45780: JO Jz (f64)
44381: JNO Jz (f64) 45881: JNO Jz (f64)
44482: JB/JNAE/JC Jz (f64) 45982: JB/JC/JNAE Jz (f64)
44583: JNB/JAE/JNC Jz (f64) 46083: JAE/JNB/JNC Jz (f64)
44684: JZ/JE Jz (f64) 46184: JE/JZ Jz (f64)
44785: JNZ/JNE Jz (f64) 46285: JNE/JNZ Jz (f64)
44886: JBE/JNA Jz (f64) 46386: JBE/JNA Jz (f64)
44987: JNBE/JA Jz (f64) 46487: JA/JNBE Jz (f64)
45088: JS Jz (f64) 46588: JS Jz (f64)
45189: JNS Jz (f64) 46689: JNS Jz (f64)
4528a: JP/JPE Jz (f64) 4678a: JP/JPE Jz (f64)
@@ -502,18 +517,18 @@ b8: JMPE | POPCNT Gv,Ev (F3)
502b9: Grp10 (1A) 517b9: Grp10 (1A)
503ba: Grp8 Ev,Ib (1A) 518ba: Grp8 Ev,Ib (1A)
504bb: BTC Ev,Gv 519bb: BTC Ev,Gv
505bc: BSF Gv,Ev 520bc: BSF Gv,Ev | TZCNT Gv,Ev (F3)
506bd: BSR Gv,Ev 521bd: BSR Gv,Ev | LZCNT Gv,Ev (F3)
507be: MOVSX Gv,Eb 522be: MOVSX Gv,Eb
508bf: MOVSX Gv,Ew 523bf: MOVSX Gv,Ew
509# 0x0f 0xc0-0xcf 524# 0x0f 0xc0-0xcf
510c0: XADD Eb,Gb 525c0: XADD Eb,Gb
511c1: XADD Ev,Gv 526c1: XADD Ev,Gv
512c2: cmpps Vps,Wps,Ib (VEX) | cmpss Vss,Wss,Ib (F3),(VEX),(o128) | cmppd Vpd,Wpd,Ib (66),(VEX) | cmpsd Vsd,Wsd,Ib (F2),(VEX) 527c2: vcmpps Vps,Hps,Wps,Ib | vcmppd Vpd,Hpd,Wpd,Ib (66) | vcmpss Vss,Hss,Wss,Ib (F3),(v1) | vcmpsd Vsd,Hsd,Wsd,Ib (F2),(v1)
513c3: movnti Md/q,Gd/q 528c3: movnti My,Gy
514c4: pinsrw Pq,Rd/q/Mw,Ib | pinsrw Vdq,Rd/q/Mw,Ib (66),(VEX),(o128) 529c4: pinsrw Pq,Ry/Mw,Ib | vpinsrw Vdq,Hdq,Ry/Mw,Ib (66),(v1)
515c5: pextrw Gd,Nq,Ib | pextrw Gd,Udq,Ib (66),(VEX),(o128) 530c5: pextrw Gd,Nq,Ib | vpextrw Gd,Udq,Ib (66),(v1)
516c6: shufps Vps,Wps,Ib (VEX) | shufpd Vpd,Wpd,Ib (66),(VEX) 531c6: vshufps Vps,Hps,Wps,Ib | vshufpd Vpd,Hpd,Wpd,Ib (66)
517c7: Grp9 (1A) 532c7: Grp9 (1A)
518c8: BSWAP RAX/EAX/R8/R8D 533c8: BSWAP RAX/EAX/R8/R8D
519c9: BSWAP RCX/ECX/R9/R9D 534c9: BSWAP RCX/ECX/R9/R9D
@@ -524,55 +539,55 @@ cd: BSWAP RBP/EBP/R13/R13D
524ce: BSWAP RSI/ESI/R14/R14D 539ce: BSWAP RSI/ESI/R14/R14D
525cf: BSWAP RDI/EDI/R15/R15D 540cf: BSWAP RDI/EDI/R15/R15D
526# 0x0f 0xd0-0xdf 541# 0x0f 0xd0-0xdf
527d0: addsubps Vps,Wps (F2),(VEX) | addsubpd Vpd,Wpd (66),(VEX) 542d0: vaddsubpd Vpd,Hpd,Wpd (66) | vaddsubps Vps,Hps,Wps (F2)
528d1: psrlw Pq,Qq | psrlw Vdq,Wdq (66),(VEX),(o128) 543d1: psrlw Pq,Qq | vpsrlw Vx,Hx,Wx (66),(v1)
529d2: psrld Pq,Qq | psrld Vdq,Wdq (66),(VEX),(o128) 544d2: psrld Pq,Qq | vpsrld Vx,Hx,Wx (66),(v1)
530d3: psrlq Pq,Qq | psrlq Vdq,Wdq (66),(VEX),(o128) 545d3: psrlq Pq,Qq | vpsrlq Vx,Hx,Wx (66),(v1)
531d4: paddq Pq,Qq | paddq Vdq,Wdq (66),(VEX),(o128) 546d4: paddq Pq,Qq | vpaddq Vx,Hx,Wx (66),(v1)
532d5: pmullw Pq,Qq | pmullw Vdq,Wdq (66),(VEX),(o128) 547d5: pmullw Pq,Qq | vpmullw Vx,Hx,Wx (66),(v1)
533d6: movq Wq,Vq (66),(VEX),(o128) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2) 548d6: vmovq Wq,Vq (66),(v1) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2)
534d7: pmovmskb Gd,Nq | pmovmskb Gd,Udq (66),(VEX),(o128) 549d7: pmovmskb Gd,Nq | vpmovmskb Gd,Ux (66),(v1)
535d8: psubusb Pq,Qq | psubusb Vdq,Wdq (66),(VEX),(o128) 550d8: psubusb Pq,Qq | vpsubusb Vx,Hx,Wx (66),(v1)
536d9: psubusw Pq,Qq | psubusw Vdq,Wdq (66),(VEX),(o128) 551d9: psubusw Pq,Qq | vpsubusw Vx,Hx,Wx (66),(v1)
537da: pminub Pq,Qq | pminub Vdq,Wdq (66),(VEX),(o128) 552da: pminub Pq,Qq | vpminub Vx,Hx,Wx (66),(v1)
538db: pand Pq,Qq | pand Vdq,Wdq (66),(VEX),(o128) 553db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1)
539dc: paddusb Pq,Qq | paddusb Vdq,Wdq (66),(VEX),(o128) 554dc: paddusb Pq,Qq | vpaddusb Vx,Hx,Wx (66),(v1)
540dd: paddusw Pq,Qq | paddusw Vdq,Wdq (66),(VEX),(o128) 555dd: paddusw Pq,Qq | vpaddusw Vx,Hx,Wx (66),(v1)
541de: pmaxub Pq,Qq | pmaxub Vdq,Wdq (66),(VEX),(o128) 556de: pmaxub Pq,Qq | vpmaxub Vx,Hx,Wx (66),(v1)
542df: pandn Pq,Qq | pandn Vdq,Wdq (66),(VEX),(o128) 557df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1)
543# 0x0f 0xe0-0xef 558# 0x0f 0xe0-0xef
544e0: pavgb Pq,Qq | pavgb Vdq,Wdq (66),(VEX),(o128) 559e0: pavgb Pq,Qq | vpavgb Vx,Hx,Wx (66),(v1)
545e1: psraw Pq,Qq | psraw Vdq,Wdq (66),(VEX),(o128) 560e1: psraw Pq,Qq | vpsraw Vx,Hx,Wx (66),(v1)
546e2: psrad Pq,Qq | psrad Vdq,Wdq (66),(VEX),(o128) 561e2: psrad Pq,Qq | vpsrad Vx,Hx,Wx (66),(v1)
547e3: pavgw Pq,Qq | pavgw Vdq,Wdq (66),(VEX),(o128) 562e3: pavgw Pq,Qq | vpavgw Vx,Hx,Wx (66),(v1)
548e4: pmulhuw Pq,Qq | pmulhuw Vdq,Wdq (66),(VEX),(o128) 563e4: pmulhuw Pq,Qq | vpmulhuw Vx,Hx,Wx (66),(v1)
549e5: pmulhw Pq,Qq | pmulhw Vdq,Wdq (66),(VEX),(o128) 564e5: pmulhw Pq,Qq | vpmulhw Vx,Hx,Wx (66),(v1)
550e6: cvtpd2dq Vdq,Wpd (F2),(VEX) | cvttpd2dq Vdq,Wpd (66),(VEX) | cvtdq2pd Vpd,Wdq (F3),(VEX) 565e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtpd2dq Vx,Wpd (F2)
551e7: movntq Mq,Pq | movntdq Mdq,Vdq (66),(VEX) 566e7: movntq Mq,Pq | vmovntdq Mx,Vx (66)
552e8: psubsb Pq,Qq | psubsb Vdq,Wdq (66),(VEX),(o128) 567e8: psubsb Pq,Qq | vpsubsb Vx,Hx,Wx (66),(v1)
553e9: psubsw Pq,Qq | psubsw Vdq,Wdq (66),(VEX),(o128) 568e9: psubsw Pq,Qq | vpsubsw Vx,Hx,Wx (66),(v1)
554ea: pminsw Pq,Qq | pminsw Vdq,Wdq (66),(VEX),(o128) 569ea: pminsw Pq,Qq | vpminsw Vx,Hx,Wx (66),(v1)
555eb: por Pq,Qq | por Vdq,Wdq (66),(VEX),(o128) 570eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1)
556ec: paddsb Pq,Qq | paddsb Vdq,Wdq (66),(VEX),(o128) 571ec: paddsb Pq,Qq | vpaddsb Vx,Hx,Wx (66),(v1)
557ed: paddsw Pq,Qq | paddsw Vdq,Wdq (66),(VEX),(o128) 572ed: paddsw Pq,Qq | vpaddsw Vx,Hx,Wx (66),(v1)
558ee: pmaxsw Pq,Qq | pmaxsw Vdq,Wdq (66),(VEX),(o128) 573ee: pmaxsw Pq,Qq | vpmaxsw Vx,Hx,Wx (66),(v1)
559ef: pxor Pq,Qq | pxor Vdq,Wdq (66),(VEX),(o128) 574ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1)
560# 0x0f 0xf0-0xff 575# 0x0f 0xf0-0xff
561f0: lddqu Vdq,Mdq (F2),(VEX) 576f0: vlddqu Vx,Mx (F2)
562f1: psllw Pq,Qq | psllw Vdq,Wdq (66),(VEX),(o128) 577f1: psllw Pq,Qq | vpsllw Vx,Hx,Wx (66),(v1)
563f2: pslld Pq,Qq | pslld Vdq,Wdq (66),(VEX),(o128) 578f2: pslld Pq,Qq | vpslld Vx,Hx,Wx (66),(v1)
564f3: psllq Pq,Qq | psllq Vdq,Wdq (66),(VEX),(o128) 579f3: psllq Pq,Qq | vpsllq Vx,Hx,Wx (66),(v1)
565f4: pmuludq Pq,Qq | pmuludq Vdq,Wdq (66),(VEX),(o128) 580f4: pmuludq Pq,Qq | vpmuludq Vx,Hx,Wx (66),(v1)
566f5: pmaddwd Pq,Qq | pmaddwd Vdq,Wdq (66),(VEX),(o128) 581f5: pmaddwd Pq,Qq | vpmaddwd Vx,Hx,Wx (66),(v1)
567f6: psadbw Pq,Qq | psadbw Vdq,Wdq (66),(VEX),(o128) 582f6: psadbw Pq,Qq | vpsadbw Vx,Hx,Wx (66),(v1)
568f7: maskmovq Pq,Nq | maskmovdqu Vdq,Udq (66),(VEX),(o128) 583f7: maskmovq Pq,Nq | vmaskmovdqu Vx,Ux (66),(v1)
569f8: psubb Pq,Qq | psubb Vdq,Wdq (66),(VEX),(o128) 584f8: psubb Pq,Qq | vpsubb Vx,Hx,Wx (66),(v1)
570f9: psubw Pq,Qq | psubw Vdq,Wdq (66),(VEX),(o128) 585f9: psubw Pq,Qq | vpsubw Vx,Hx,Wx (66),(v1)
571fa: psubd Pq,Qq | psubd Vdq,Wdq (66),(VEX),(o128) 586fa: psubd Pq,Qq | vpsubd Vx,Hx,Wx (66),(v1)
572fb: psubq Pq,Qq | psubq Vdq,Wdq (66),(VEX),(o128) 587fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1)
573fc: paddb Pq,Qq | paddb Vdq,Wdq (66),(VEX),(o128) 588fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1)
574fd: paddw Pq,Qq | paddw Vdq,Wdq (66),(VEX),(o128) 589fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1)
575fe: paddd Pq,Qq | paddd Vdq,Wdq (66),(VEX),(o128) 590fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1)
576ff: 591ff:
577EndTable 592EndTable
578 593
@@ -580,155 +595,193 @@ Table: 3-byte opcode 1 (0x0f 0x38)
580Referrer: 3-byte escape 1 595Referrer: 3-byte escape 1
581AVXcode: 2 596AVXcode: 2
582# 0x0f 0x38 0x00-0x0f 597# 0x0f 0x38 0x00-0x0f
58300: pshufb Pq,Qq | pshufb Vdq,Wdq (66),(VEX),(o128) 59800: pshufb Pq,Qq | vpshufb Vx,Hx,Wx (66),(v1)
58401: phaddw Pq,Qq | phaddw Vdq,Wdq (66),(VEX),(o128) 59901: phaddw Pq,Qq | vphaddw Vx,Hx,Wx (66),(v1)
58502: phaddd Pq,Qq | phaddd Vdq,Wdq (66),(VEX),(o128) 60002: phaddd Pq,Qq | vphaddd Vx,Hx,Wx (66),(v1)
58603: phaddsw Pq,Qq | phaddsw Vdq,Wdq (66),(VEX),(o128) 60103: phaddsw Pq,Qq | vphaddsw Vx,Hx,Wx (66),(v1)
58704: pmaddubsw Pq,Qq | pmaddubsw Vdq,Wdq (66),(VEX),(o128) 60204: pmaddubsw Pq,Qq | vpmaddubsw Vx,Hx,Wx (66),(v1)
58805: phsubw Pq,Qq | phsubw Vdq,Wdq (66),(VEX),(o128) 60305: phsubw Pq,Qq | vphsubw Vx,Hx,Wx (66),(v1)
58906: phsubd Pq,Qq | phsubd Vdq,Wdq (66),(VEX),(o128) 60406: phsubd Pq,Qq | vphsubd Vx,Hx,Wx (66),(v1)
59007: phsubsw Pq,Qq | phsubsw Vdq,Wdq (66),(VEX),(o128) 60507: phsubsw Pq,Qq | vphsubsw Vx,Hx,Wx (66),(v1)
59108: psignb Pq,Qq | psignb Vdq,Wdq (66),(VEX),(o128) 60608: psignb Pq,Qq | vpsignb Vx,Hx,Wx (66),(v1)
59209: psignw Pq,Qq | psignw Vdq,Wdq (66),(VEX),(o128) 60709: psignw Pq,Qq | vpsignw Vx,Hx,Wx (66),(v1)
5930a: psignd Pq,Qq | psignd Vdq,Wdq (66),(VEX),(o128) 6080a: psignd Pq,Qq | vpsignd Vx,Hx,Wx (66),(v1)
5940b: pmulhrsw Pq,Qq | pmulhrsw Vdq,Wdq (66),(VEX),(o128) 6090b: pmulhrsw Pq,Qq | vpmulhrsw Vx,Hx,Wx (66),(v1)
5950c: Vpermilps /r (66),(oVEX) 6100c: vpermilps Vx,Hx,Wx (66),(v)
5960d: Vpermilpd /r (66),(oVEX) 6110d: vpermilpd Vx,Hx,Wx (66),(v)
5970e: vtestps /r (66),(oVEX) 6120e: vtestps Vx,Wx (66),(v)
5980f: vtestpd /r (66),(oVEX) 6130f: vtestpd Vx,Wx (66),(v)
599# 0x0f 0x38 0x10-0x1f 614# 0x0f 0x38 0x10-0x1f
60010: pblendvb Vdq,Wdq (66) 61510: pblendvb Vdq,Wdq (66)
60111: 61611:
60212: 61712:
60313: 61813: vcvtph2ps Vx,Wx,Ib (66),(v)
60414: blendvps Vdq,Wdq (66) 61914: blendvps Vdq,Wdq (66)
60515: blendvpd Vdq,Wdq (66) 62015: blendvpd Vdq,Wdq (66)
60616: 62116: vpermps Vqq,Hqq,Wqq (66),(v)
60717: ptest Vdq,Wdq (66),(VEX) 62217: vptest Vx,Wx (66)
60818: vbroadcastss /r (66),(oVEX) 62318: vbroadcastss Vx,Wd (66),(v)
60919: vbroadcastsd /r (66),(oVEX),(o256) 62419: vbroadcastsd Vqq,Wq (66),(v)
6101a: vbroadcastf128 /r (66),(oVEX),(o256) 6251a: vbroadcastf128 Vqq,Mdq (66),(v)
6111b: 6261b:
6121c: pabsb Pq,Qq | pabsb Vdq,Wdq (66),(VEX),(o128) 6271c: pabsb Pq,Qq | vpabsb Vx,Wx (66),(v1)
6131d: pabsw Pq,Qq | pabsw Vdq,Wdq (66),(VEX),(o128) 6281d: pabsw Pq,Qq | vpabsw Vx,Wx (66),(v1)
6141e: pabsd Pq,Qq | pabsd Vdq,Wdq (66),(VEX),(o128) 6291e: pabsd Pq,Qq | vpabsd Vx,Wx (66),(v1)
6151f: 6301f:
616# 0x0f 0x38 0x20-0x2f 631# 0x0f 0x38 0x20-0x2f
61720: pmovsxbw Vdq,Udq/Mq (66),(VEX),(o128) 63220: vpmovsxbw Vx,Ux/Mq (66),(v1)
61821: pmovsxbd Vdq,Udq/Md (66),(VEX),(o128) 63321: vpmovsxbd Vx,Ux/Md (66),(v1)
61922: pmovsxbq Vdq,Udq/Mw (66),(VEX),(o128) 63422: vpmovsxbq Vx,Ux/Mw (66),(v1)
62023: pmovsxwd Vdq,Udq/Mq (66),(VEX),(o128) 63523: vpmovsxwd Vx,Ux/Mq (66),(v1)
62124: pmovsxwq Vdq,Udq/Md (66),(VEX),(o128) 63624: vpmovsxwq Vx,Ux/Md (66),(v1)
62225: pmovsxdq Vdq,Udq/Mq (66),(VEX),(o128) 63725: vpmovsxdq Vx,Ux/Mq (66),(v1)
62326: 63826:
62427: 63927:
62528: pmuldq Vdq,Wdq (66),(VEX),(o128) 64028: vpmuldq Vx,Hx,Wx (66),(v1)
62629: pcmpeqq Vdq,Wdq (66),(VEX),(o128) 64129: vpcmpeqq Vx,Hx,Wx (66),(v1)
6272a: movntdqa Vdq,Mdq (66),(VEX),(o128) 6422a: vmovntdqa Vx,Mx (66),(v1)
6282b: packusdw Vdq,Wdq (66),(VEX),(o128) 6432b: vpackusdw Vx,Hx,Wx (66),(v1)
6292c: vmaskmovps(ld) /r (66),(oVEX) 6442c: vmaskmovps Vx,Hx,Mx (66),(v)
6302d: vmaskmovpd(ld) /r (66),(oVEX) 6452d: vmaskmovpd Vx,Hx,Mx (66),(v)
6312e: vmaskmovps(st) /r (66),(oVEX) 6462e: vmaskmovps Mx,Hx,Vx (66),(v)
6322f: vmaskmovpd(st) /r (66),(oVEX) 6472f: vmaskmovpd Mx,Hx,Vx (66),(v)
633# 0x0f 0x38 0x30-0x3f 648# 0x0f 0x38 0x30-0x3f
63430: pmovzxbw Vdq,Udq/Mq (66),(VEX),(o128) 64930: vpmovzxbw Vx,Ux/Mq (66),(v1)
63531: pmovzxbd Vdq,Udq/Md (66),(VEX),(o128) 65031: vpmovzxbd Vx,Ux/Md (66),(v1)
63632: pmovzxbq Vdq,Udq/Mw (66),(VEX),(o128) 65132: vpmovzxbq Vx,Ux/Mw (66),(v1)
63733: pmovzxwd Vdq,Udq/Mq (66),(VEX),(o128) 65233: vpmovzxwd Vx,Ux/Mq (66),(v1)
63834: pmovzxwq Vdq,Udq/Md (66),(VEX),(o128) 65334: vpmovzxwq Vx,Ux/Md (66),(v1)
63935: pmovzxdq Vdq,Udq/Mq (66),(VEX),(o128) 65435: vpmovzxdq Vx,Ux/Mq (66),(v1)
64036: 65536: vpermd Vqq,Hqq,Wqq (66),(v)
64137: pcmpgtq Vdq,Wdq (66),(VEX),(o128) 65637: vpcmpgtq Vx,Hx,Wx (66),(v1)
64238: pminsb Vdq,Wdq (66),(VEX),(o128) 65738: vpminsb Vx,Hx,Wx (66),(v1)
64339: pminsd Vdq,Wdq (66),(VEX),(o128) 65839: vpminsd Vx,Hx,Wx (66),(v1)
6443a: pminuw Vdq,Wdq (66),(VEX),(o128) 6593a: vpminuw Vx,Hx,Wx (66),(v1)
6453b: pminud Vdq,Wdq (66),(VEX),(o128) 6603b: vpminud Vx,Hx,Wx (66),(v1)
6463c: pmaxsb Vdq,Wdq (66),(VEX),(o128) 6613c: vpmaxsb Vx,Hx,Wx (66),(v1)
6473d: pmaxsd Vdq,Wdq (66),(VEX),(o128) 6623d: vpmaxsd Vx,Hx,Wx (66),(v1)
6483e: pmaxuw Vdq,Wdq (66),(VEX),(o128) 6633e: vpmaxuw Vx,Hx,Wx (66),(v1)
6493f: pmaxud Vdq,Wdq (66),(VEX),(o128) 6643f: vpmaxud Vx,Hx,Wx (66),(v1)
650# 0x0f 0x38 0x40-0x8f 665# 0x0f 0x38 0x40-0x8f
65140: pmulld Vdq,Wdq (66),(VEX),(o128) 66640: vpmulld Vx,Hx,Wx (66),(v1)
65241: phminposuw Vdq,Wdq (66),(VEX),(o128) 66741: vphminposuw Vdq,Wdq (66),(v1)
65380: INVEPT Gd/q,Mdq (66) 66842:
65481: INVPID Gd/q,Mdq (66) 66943:
67044:
67145: vpsrlvd/q Vx,Hx,Wx (66),(v)
67246: vpsravd Vx,Hx,Wx (66),(v)
67347: vpsllvd/q Vx,Hx,Wx (66),(v)
674# Skip 0x48-0x57
67558: vpbroadcastd Vx,Wx (66),(v)
67659: vpbroadcastq Vx,Wx (66),(v)
6775a: vbroadcasti128 Vqq,Mdq (66),(v)
678# Skip 0x5b-0x77
67978: vpbroadcastb Vx,Wx (66),(v)
68079: vpbroadcastw Vx,Wx (66),(v)
681# Skip 0x7a-0x7f
68280: INVEPT Gy,Mdq (66)
68381: INVPID Gy,Mdq (66)
68482: INVPCID Gy,Mdq (66)
6858c: vpmaskmovd/q Vx,Hx,Mx (66),(v)
6868e: vpmaskmovd/q Mx,Vx,Hx (66),(v)
655# 0x0f 0x38 0x90-0xbf (FMA) 687# 0x0f 0x38 0x90-0xbf (FMA)
65696: vfmaddsub132pd/ps /r (66),(VEX) 68890: vgatherdd/q Vx,Hx,Wx (66),(v)
65797: vfmsubadd132pd/ps /r (66),(VEX) 68991: vgatherqd/q Vx,Hx,Wx (66),(v)
65898: vfmadd132pd/ps /r (66),(VEX) 69092: vgatherdps/d Vx,Hx,Wx (66),(v)
65999: vfmadd132sd/ss /r (66),(VEX),(o128) 69193: vgatherqps/d Vx,Hx,Wx (66),(v)
6609a: vfmsub132pd/ps /r (66),(VEX) 69294:
6619b: vfmsub132sd/ss /r (66),(VEX),(o128) 69395:
6629c: vfnmadd132pd/ps /r (66),(VEX) 69496: vfmaddsub132ps/d Vx,Hx,Wx (66),(v)
6639d: vfnmadd132sd/ss /r (66),(VEX),(o128) 69597: vfmsubadd132ps/d Vx,Hx,Wx (66),(v)
6649e: vfnmsub132pd/ps /r (66),(VEX) 69698: vfmadd132ps/d Vx,Hx,Wx (66),(v)
6659f: vfnmsub132sd/ss /r (66),(VEX),(o128) 69799: vfmadd132ss/d Vx,Hx,Wx (66),(v),(v1)
666a6: vfmaddsub213pd/ps /r (66),(VEX) 6989a: vfmsub132ps/d Vx,Hx,Wx (66),(v)
667a7: vfmsubadd213pd/ps /r (66),(VEX) 6999b: vfmsub132ss/d Vx,Hx,Wx (66),(v),(v1)
668a8: vfmadd213pd/ps /r (66),(VEX) 7009c: vfnmadd132ps/d Vx,Hx,Wx (66),(v)
669a9: vfmadd213sd/ss /r (66),(VEX),(o128) 7019d: vfnmadd132ss/d Vx,Hx,Wx (66),(v),(v1)
670aa: vfmsub213pd/ps /r (66),(VEX) 7029e: vfnmsub132ps/d Vx,Hx,Wx (66),(v)
671ab: vfmsub213sd/ss /r (66),(VEX),(o128) 7039f: vfnmsub132ss/d Vx,Hx,Wx (66),(v),(v1)
672ac: vfnmadd213pd/ps /r (66),(VEX) 704a6: vfmaddsub213ps/d Vx,Hx,Wx (66),(v)
673ad: vfnmadd213sd/ss /r (66),(VEX),(o128) 705a7: vfmsubadd213ps/d Vx,Hx,Wx (66),(v)
674ae: vfnmsub213pd/ps /r (66),(VEX) 706a8: vfmadd213ps/d Vx,Hx,Wx (66),(v)
675af: vfnmsub213sd/ss /r (66),(VEX),(o128) 707a9: vfmadd213ss/d Vx,Hx,Wx (66),(v),(v1)
676b6: vfmaddsub231pd/ps /r (66),(VEX) 708aa: vfmsub213ps/d Vx,Hx,Wx (66),(v)
677b7: vfmsubadd231pd/ps /r (66),(VEX) 709ab: vfmsub213ss/d Vx,Hx,Wx (66),(v),(v1)
678b8: vfmadd231pd/ps /r (66),(VEX) 710ac: vfnmadd213ps/d Vx,Hx,Wx (66),(v)
679b9: vfmadd231sd/ss /r (66),(VEX),(o128) 711ad: vfnmadd213ss/d Vx,Hx,Wx (66),(v),(v1)
680ba: vfmsub231pd/ps /r (66),(VEX) 712ae: vfnmsub213ps/d Vx,Hx,Wx (66),(v)
681bb: vfmsub231sd/ss /r (66),(VEX),(o128) 713af: vfnmsub213ss/d Vx,Hx,Wx (66),(v),(v1)
682bc: vfnmadd231pd/ps /r (66),(VEX) 714b6: vfmaddsub231ps/d Vx,Hx,Wx (66),(v)
683bd: vfnmadd231sd/ss /r (66),(VEX),(o128) 715b7: vfmsubadd231ps/d Vx,Hx,Wx (66),(v)
684be: vfnmsub231pd/ps /r (66),(VEX) 716b8: vfmadd231ps/d Vx,Hx,Wx (66),(v)
685bf: vfnmsub231sd/ss /r (66),(VEX),(o128) 717b9: vfmadd231ss/d Vx,Hx,Wx (66),(v),(v1)
718ba: vfmsub231ps/d Vx,Hx,Wx (66),(v)
719bb: vfmsub231ss/d Vx,Hx,Wx (66),(v),(v1)
720bc: vfnmadd231ps/d Vx,Hx,Wx (66),(v)
721bd: vfnmadd231ss/d Vx,Hx,Wx (66),(v),(v1)
722be: vfnmsub231ps/d Vx,Hx,Wx (66),(v)
723bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1)
686# 0x0f 0x38 0xc0-0xff 724# 0x0f 0x38 0xc0-0xff
687db: aesimc Vdq,Wdq (66),(VEX),(o128) 725db: VAESIMC Vdq,Wdq (66),(v1)
688dc: aesenc Vdq,Wdq (66),(VEX),(o128) 726dc: VAESENC Vdq,Hdq,Wdq (66),(v1)
689dd: aesenclast Vdq,Wdq (66),(VEX),(o128) 727dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1)
690de: aesdec Vdq,Wdq (66),(VEX),(o128) 728de: VAESDEC Vdq,Hdq,Wdq (66),(v1)
691df: aesdeclast Vdq,Wdq (66),(VEX),(o128) 729df: VAESDECLAST Vdq,Hdq,Wdq (66),(v1)
692f0: MOVBE Gv,Mv | CRC32 Gd,Eb (F2) 730f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2)
693f1: MOVBE Mv,Gv | CRC32 Gd,Ev (F2) 731f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2)
732f3: ANDN Gy,By,Ey (v)
733f4: Grp17 (1A)
734f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v)
735f6: MULX By,Gy,rDX,Ey (F2),(v)
736f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v)
694EndTable 737EndTable
695 738
696Table: 3-byte opcode 2 (0x0f 0x3a) 739Table: 3-byte opcode 2 (0x0f 0x3a)
697Referrer: 3-byte escape 2 740Referrer: 3-byte escape 2
698AVXcode: 3 741AVXcode: 3
699# 0x0f 0x3a 0x00-0xff 742# 0x0f 0x3a 0x00-0xff
70004: vpermilps /r,Ib (66),(oVEX) 74300: vpermq Vqq,Wqq,Ib (66),(v)
70105: vpermilpd /r,Ib (66),(oVEX) 74401: vpermpd Vqq,Wqq,Ib (66),(v)
70206: vperm2f128 /r,Ib (66),(oVEX),(o256) 74502: vpblendd Vx,Hx,Wx,Ib (66),(v)
70308: roundps Vdq,Wdq,Ib (66),(VEX) 74603:
70409: roundpd Vdq,Wdq,Ib (66),(VEX) 74704: vpermilps Vx,Wx,Ib (66),(v)
7050a: roundss Vss,Wss,Ib (66),(VEX),(o128) 74805: vpermilpd Vx,Wx,Ib (66),(v)
7060b: roundsd Vsd,Wsd,Ib (66),(VEX),(o128) 74906: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v)
7070c: blendps Vdq,Wdq,Ib (66),(VEX) 75007:
7080d: blendpd Vdq,Wdq,Ib (66),(VEX) 75108: vroundps Vx,Wx,Ib (66)
7090e: pblendw Vdq,Wdq,Ib (66),(VEX),(o128) 75209: vroundpd Vx,Wx,Ib (66)
7100f: palignr Pq,Qq,Ib | palignr Vdq,Wdq,Ib (66),(VEX),(o128) 7530a: vroundss Vss,Wss,Ib (66),(v1)
71114: pextrb Rd/Mb,Vdq,Ib (66),(VEX),(o128) 7540b: vroundsd Vsd,Wsd,Ib (66),(v1)
71215: pextrw Rd/Mw,Vdq,Ib (66),(VEX),(o128) 7550c: vblendps Vx,Hx,Wx,Ib (66)
71316: pextrd/pextrq Ed/q,Vdq,Ib (66),(VEX),(o128) 7560d: vblendpd Vx,Hx,Wx,Ib (66)
71417: extractps Ed,Vdq,Ib (66),(VEX),(o128) 7570e: vpblendw Vx,Hx,Wx,Ib (66),(v1)
71518: vinsertf128 /r,Ib (66),(oVEX),(o256) 7580f: palignr Pq,Qq,Ib | vpalignr Vx,Hx,Wx,Ib (66),(v1)
71619: vextractf128 /r,Ib (66),(oVEX),(o256) 75914: vpextrb Rd/Mb,Vdq,Ib (66),(v1)
71720: pinsrb Vdq,Rd/q/Mb,Ib (66),(VEX),(o128) 76015: vpextrw Rd/Mw,Vdq,Ib (66),(v1)
71821: insertps Vdq,Udq/Md,Ib (66),(VEX),(o128) 76116: vpextrd/q Ey,Vdq,Ib (66),(v1)
71922: pinsrd/pinsrq Vdq,Ed/q,Ib (66),(VEX),(o128) 76217: vextractps Ed,Vdq,Ib (66),(v1)
72040: dpps Vdq,Wdq,Ib (66),(VEX) 76318: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v)
72141: dppd Vdq,Wdq,Ib (66),(VEX),(o128) 76419: vextractf128 Wdq,Vqq,Ib (66),(v)
72242: mpsadbw Vdq,Wdq,Ib (66),(VEX),(o128) 7651d: vcvtps2ph Wx,Vx,Ib (66),(v)
72344: pclmulq Vdq,Wdq,Ib (66),(VEX),(o128) 76620: vpinsrb Vdq,Hdq,Ry/Mb,Ib (66),(v1)
7244a: vblendvps /r,Ib (66),(oVEX) 76721: vinsertps Vdq,Hdq,Udq/Md,Ib (66),(v1)
7254b: vblendvpd /r,Ib (66),(oVEX) 76822: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1)
7264c: vpblendvb /r,Ib (66),(oVEX),(o128) 76938: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v)
72760: pcmpestrm Vdq,Wdq,Ib (66),(VEX),(o128) 77039: vextracti128 Wdq,Vqq,Ib (66),(v)
72861: pcmpestri Vdq,Wdq,Ib (66),(VEX),(o128) 77140: vdpps Vx,Hx,Wx,Ib (66)
72962: pcmpistrm Vdq,Wdq,Ib (66),(VEX),(o128) 77241: vdppd Vdq,Hdq,Wdq,Ib (66),(v1)
73063: pcmpistri Vdq,Wdq,Ib (66),(VEX),(o128) 77342: vmpsadbw Vx,Hx,Wx,Ib (66),(v1)
731df: aeskeygenassist Vdq,Wdq,Ib (66),(VEX),(o128) 77444: vpclmulqdq Vdq,Hdq,Wdq,Ib (66),(v1)
77546: vperm2i128 Vqq,Hqq,Wqq,Ib (66),(v)
7764a: vblendvps Vx,Hx,Wx,Lx (66),(v)
7774b: vblendvpd Vx,Hx,Wx,Lx (66),(v)
7784c: vpblendvb Vx,Hx,Wx,Lx (66),(v1)
77960: vpcmpestrm Vdq,Wdq,Ib (66),(v1)
78061: vpcmpestri Vdq,Wdq,Ib (66),(v1)
78162: vpcmpistrm Vdq,Wdq,Ib (66),(v1)
78263: vpcmpistri Vdq,Wdq,Ib (66),(v1)
783df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1)
784f0: RORX Gy,Ey,Ib (F2),(v)
732EndTable 785EndTable
733 786
734GrpTable: Grp1 787GrpTable: Grp1
@@ -790,7 +843,7 @@ GrpTable: Grp5
7902: CALLN Ev (f64) 8432: CALLN Ev (f64)
7913: CALLF Ep 8443: CALLF Ep
7924: JMPN Ev (f64) 8454: JMPN Ev (f64)
7935: JMPF Ep 8465: JMPF Mp
7946: PUSH Ev (d64) 8476: PUSH Ev (d64)
7957: 8487:
796EndTable 849EndTable
@@ -807,7 +860,7 @@ EndTable
807GrpTable: Grp7 860GrpTable: Grp7
8080: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) 8610: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B)
8091: SIDT Ms | MONITOR (000),(11B) | MWAIT (001) 8621: SIDT Ms | MONITOR (000),(11B) | MWAIT (001)
8102: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) 8632: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B)
8113: LIDT Ms 8643: LIDT Ms
8124: SMSW Mw/Rv 8654: SMSW Mw/Rv
8135: 8665:
@@ -824,44 +877,45 @@ EndTable
824 877
825GrpTable: Grp9 878GrpTable: Grp9
8261: CMPXCHG8B/16B Mq/Mdq 8791: CMPXCHG8B/16B Mq/Mdq
8276: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) 8806: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B)
8287: VMPTRST Mq 8817: VMPTRST Mq | VMPTRST Mq (F3)
829EndTable 882EndTable
830 883
831GrpTable: Grp10 884GrpTable: Grp10
832EndTable 885EndTable
833 886
834GrpTable: Grp11 887GrpTable: Grp11
888# Note: the operands are given by group opcode
8350: MOV 8890: MOV
836EndTable 890EndTable
837 891
838GrpTable: Grp12 892GrpTable: Grp12
8392: psrlw Nq,Ib (11B) | psrlw Udq,Ib (66),(11B),(VEX),(o128) 8932: psrlw Nq,Ib (11B) | vpsrlw Hx,Ux,Ib (66),(11B),(v1)
8404: psraw Nq,Ib (11B) | psraw Udq,Ib (66),(11B),(VEX),(o128) 8944: psraw Nq,Ib (11B) | vpsraw Hx,Ux,Ib (66),(11B),(v1)
8416: psllw Nq,Ib (11B) | psllw Udq,Ib (66),(11B),(VEX),(o128) 8956: psllw Nq,Ib (11B) | vpsllw Hx,Ux,Ib (66),(11B),(v1)
842EndTable 896EndTable
843 897
844GrpTable: Grp13 898GrpTable: Grp13
8452: psrld Nq,Ib (11B) | psrld Udq,Ib (66),(11B),(VEX),(o128) 8992: psrld Nq,Ib (11B) | vpsrld Hx,Ux,Ib (66),(11B),(v1)
8464: psrad Nq,Ib (11B) | psrad Udq,Ib (66),(11B),(VEX),(o128) 9004: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1)
8476: pslld Nq,Ib (11B) | pslld Udq,Ib (66),(11B),(VEX),(o128) 9016: pslld Nq,Ib (11B) | vpslld Hx,Ux,Ib (66),(11B),(v1)
848EndTable 902EndTable
849 903
850GrpTable: Grp14 904GrpTable: Grp14
8512: psrlq Nq,Ib (11B) | psrlq Udq,Ib (66),(11B),(VEX),(o128) 9052: psrlq Nq,Ib (11B) | vpsrlq Hx,Ux,Ib (66),(11B),(v1)
8523: psrldq Udq,Ib (66),(11B),(VEX),(o128) 9063: vpsrldq Hx,Ux,Ib (66),(11B),(v1)
8536: psllq Nq,Ib (11B) | psllq Udq,Ib (66),(11B),(VEX),(o128) 9076: psllq Nq,Ib (11B) | vpsllq Hx,Ux,Ib (66),(11B),(v1)
8547: pslldq Udq,Ib (66),(11B),(VEX),(o128) 9087: vpslldq Hx,Ux,Ib (66),(11B),(v1)
855EndTable 909EndTable
856 910
857GrpTable: Grp15 911GrpTable: Grp15
8580: fxsave 9120: fxsave | RDFSBASE Ry (F3),(11B)
8591: fxstor 9131: fxstor | RDGSBASE Ry (F3),(11B)
8602: ldmxcsr (VEX) 9142: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B)
8613: stmxcsr (VEX) 9153: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B)
8624: XSAVE 9164: XSAVE
8635: XRSTOR | lfence (11B) 9175: XRSTOR | lfence (11B)
8646: mfence (11B) 9186: XSAVEOPT | mfence (11B)
8657: clflush | sfence (11B) 9197: clflush | sfence (11B)
866EndTable 920EndTable
867 921
@@ -872,6 +926,12 @@ GrpTable: Grp16
8723: prefetch T2 9263: prefetch T2
873EndTable 927EndTable
874 928
929GrpTable: Grp17
9301: BLSR By,Ey (v)
9312: BLSMSK By,Ey (v)
9323: BLSI By,Ey (v)
933EndTable
934
875# AMD's Prefetch Group 935# AMD's Prefetch Group
876GrpTable: GrpP 936GrpTable: GrpP
8770: PREFETCH 9370: PREFETCH
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 3d11327c9ab4..23d8e5fecf76 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -27,6 +27,4 @@ obj-$(CONFIG_AMD_NUMA) += amdtopology.o
27obj-$(CONFIG_ACPI_NUMA) += srat.o 27obj-$(CONFIG_ACPI_NUMA) += srat.o
28obj-$(CONFIG_NUMA_EMU) += numa_emulation.o 28obj-$(CONFIG_NUMA_EMU) += numa_emulation.o
29 29
30obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o
31
32obj-$(CONFIG_MEMTEST) += memtest.o 30obj-$(CONFIG_MEMTEST) += memtest.o
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index d0474ad2a6e5..1fb85dbe390a 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -25,7 +25,7 @@ int fixup_exception(struct pt_regs *regs)
25 if (fixup) { 25 if (fixup) {
26 /* If fixup is less than 16, it means uaccess error */ 26 /* If fixup is less than 16, it means uaccess error */
27 if (fixup->fixup < 16) { 27 if (fixup->fixup < 16) {
28 current_thread_info()->uaccess_err = -EFAULT; 28 current_thread_info()->uaccess_err = 1;
29 regs->ip += fixup->fixup; 29 regs->ip += fixup->fixup;
30 return 1; 30 return 1;
31 } 31 }
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 5db0490deb07..9d74824a708d 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -626,7 +626,7 @@ pgtable_bad(struct pt_regs *regs, unsigned long error_code,
626 626
627static noinline void 627static noinline void
628no_context(struct pt_regs *regs, unsigned long error_code, 628no_context(struct pt_regs *regs, unsigned long error_code,
629 unsigned long address) 629 unsigned long address, int signal, int si_code)
630{ 630{
631 struct task_struct *tsk = current; 631 struct task_struct *tsk = current;
632 unsigned long *stackend; 632 unsigned long *stackend;
@@ -634,8 +634,17 @@ no_context(struct pt_regs *regs, unsigned long error_code,
634 int sig; 634 int sig;
635 635
636 /* Are we prepared to handle this kernel fault? */ 636 /* Are we prepared to handle this kernel fault? */
637 if (fixup_exception(regs)) 637 if (fixup_exception(regs)) {
638 if (current_thread_info()->sig_on_uaccess_error && signal) {
639 tsk->thread.trap_no = 14;
640 tsk->thread.error_code = error_code | PF_USER;
641 tsk->thread.cr2 = address;
642
643 /* XXX: hwpoison faults will set the wrong code. */
644 force_sig_info_fault(signal, si_code, address, tsk, 0);
645 }
638 return; 646 return;
647 }
639 648
640 /* 649 /*
641 * 32-bit: 650 * 32-bit:
@@ -755,7 +764,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
755 if (is_f00f_bug(regs, address)) 764 if (is_f00f_bug(regs, address))
756 return; 765 return;
757 766
758 no_context(regs, error_code, address); 767 no_context(regs, error_code, address, SIGSEGV, si_code);
759} 768}
760 769
761static noinline void 770static noinline void
@@ -819,7 +828,7 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
819 828
820 /* Kernel mode? Handle exceptions or die: */ 829 /* Kernel mode? Handle exceptions or die: */
821 if (!(error_code & PF_USER)) { 830 if (!(error_code & PF_USER)) {
822 no_context(regs, error_code, address); 831 no_context(regs, error_code, address, SIGBUS, BUS_ADRERR);
823 return; 832 return;
824 } 833 }
825 834
@@ -854,7 +863,7 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
854 if (!(fault & VM_FAULT_RETRY)) 863 if (!(fault & VM_FAULT_RETRY))
855 up_read(&current->mm->mmap_sem); 864 up_read(&current->mm->mmap_sem);
856 if (!(error_code & PF_USER)) 865 if (!(error_code & PF_USER))
857 no_context(regs, error_code, address); 866 no_context(regs, error_code, address, 0, 0);
858 return 1; 867 return 1;
859 } 868 }
860 if (!(fault & VM_FAULT_ERROR)) 869 if (!(fault & VM_FAULT_ERROR))
@@ -864,7 +873,8 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
864 /* Kernel mode? Handle exceptions or die: */ 873 /* Kernel mode? Handle exceptions or die: */
865 if (!(error_code & PF_USER)) { 874 if (!(error_code & PF_USER)) {
866 up_read(&current->mm->mmap_sem); 875 up_read(&current->mm->mmap_sem);
867 no_context(regs, error_code, address); 876 no_context(regs, error_code, address,
877 SIGSEGV, SEGV_MAPERR);
868 return 1; 878 return 1;
869 } 879 }
870 880
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 87488b93a65c..a298914058f9 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -67,7 +67,7 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
67 good_end = max_pfn_mapped << PAGE_SHIFT; 67 good_end = max_pfn_mapped << PAGE_SHIFT;
68 68
69 base = memblock_find_in_range(start, good_end, tables, PAGE_SIZE); 69 base = memblock_find_in_range(start, good_end, tables, PAGE_SIZE);
70 if (base == MEMBLOCK_ERROR) 70 if (!base)
71 panic("Cannot find space for the kernel page tables"); 71 panic("Cannot find space for the kernel page tables");
72 72
73 pgt_buf_start = base >> PAGE_SHIFT; 73 pgt_buf_start = base >> PAGE_SHIFT;
@@ -80,7 +80,7 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
80 80
81void __init native_pagetable_reserve(u64 start, u64 end) 81void __init native_pagetable_reserve(u64 start, u64 end)
82{ 82{
83 memblock_x86_reserve_range(start, end, "PGTABLE"); 83 memblock_reserve(start, end - start);
84} 84}
85 85
86struct map_range { 86struct map_range {
@@ -279,8 +279,8 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
279 * pgt_buf_end) and free the other ones (pgt_buf_end - pgt_buf_top) 279 * pgt_buf_end) and free the other ones (pgt_buf_end - pgt_buf_top)
280 * so that they can be reused for other purposes. 280 * so that they can be reused for other purposes.
281 * 281 *
282 * On native it just means calling memblock_x86_reserve_range, on Xen it 282 * On native it just means calling memblock_reserve, on Xen it also
283 * also means marking RW the pagetable pages that we allocated before 283 * means marking RW the pagetable pages that we allocated before
284 * but that haven't been used. 284 * but that haven't been used.
285 * 285 *
286 * In fact on xen we mark RO the whole range pgt_buf_start - 286 * In fact on xen we mark RO the whole range pgt_buf_start -
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 29f7c6d98179..0c1da394a634 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -427,23 +427,17 @@ static void __init add_one_highpage_init(struct page *page)
427void __init add_highpages_with_active_regions(int nid, 427void __init add_highpages_with_active_regions(int nid,
428 unsigned long start_pfn, unsigned long end_pfn) 428 unsigned long start_pfn, unsigned long end_pfn)
429{ 429{
430 struct range *range; 430 phys_addr_t start, end;
431 int nr_range; 431 u64 i;
432 int i; 432
433 433 for_each_free_mem_range(i, nid, &start, &end, NULL) {
434 nr_range = __get_free_all_memory_range(&range, nid, start_pfn, end_pfn); 434 unsigned long pfn = clamp_t(unsigned long, PFN_UP(start),
435 435 start_pfn, end_pfn);
436 for (i = 0; i < nr_range; i++) { 436 unsigned long e_pfn = clamp_t(unsigned long, PFN_DOWN(end),
437 struct page *page; 437 start_pfn, end_pfn);
438 int node_pfn; 438 for ( ; pfn < e_pfn; pfn++)
439 439 if (pfn_valid(pfn))
440 for (node_pfn = range[i].start; node_pfn < range[i].end; 440 add_one_highpage_init(pfn_to_page(pfn));
441 node_pfn++) {
442 if (!pfn_valid(node_pfn))
443 continue;
444 page = pfn_to_page(node_pfn);
445 add_one_highpage_init(page);
446 }
447 } 441 }
448} 442}
449#else 443#else
@@ -650,18 +644,18 @@ void __init initmem_init(void)
650 highstart_pfn = highend_pfn = max_pfn; 644 highstart_pfn = highend_pfn = max_pfn;
651 if (max_pfn > max_low_pfn) 645 if (max_pfn > max_low_pfn)
652 highstart_pfn = max_low_pfn; 646 highstart_pfn = max_low_pfn;
653 memblock_x86_register_active_regions(0, 0, highend_pfn);
654 sparse_memory_present_with_active_regions(0);
655 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", 647 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
656 pages_to_mb(highend_pfn - highstart_pfn)); 648 pages_to_mb(highend_pfn - highstart_pfn));
657 num_physpages = highend_pfn; 649 num_physpages = highend_pfn;
658 high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; 650 high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
659#else 651#else
660 memblock_x86_register_active_regions(0, 0, max_low_pfn);
661 sparse_memory_present_with_active_regions(0);
662 num_physpages = max_low_pfn; 652 num_physpages = max_low_pfn;
663 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; 653 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
664#endif 654#endif
655
656 memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0);
657 sparse_memory_present_with_active_regions(0);
658
665#ifdef CONFIG_FLATMEM 659#ifdef CONFIG_FLATMEM
666 max_mapnr = num_physpages; 660 max_mapnr = num_physpages;
667#endif 661#endif
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index bbaaa005bf0e..a8a56ce3a962 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -608,7 +608,7 @@ kernel_physical_mapping_init(unsigned long start,
608#ifndef CONFIG_NUMA 608#ifndef CONFIG_NUMA
609void __init initmem_init(void) 609void __init initmem_init(void)
610{ 610{
611 memblock_x86_register_active_regions(0, 0, max_pfn); 611 memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0);
612} 612}
613#endif 613#endif
614 614
diff --git a/arch/x86/mm/memblock.c b/arch/x86/mm/memblock.c
deleted file mode 100644
index 992da5ec5a64..000000000000
--- a/arch/x86/mm/memblock.c
+++ /dev/null
@@ -1,348 +0,0 @@
1#include <linux/kernel.h>
2#include <linux/types.h>
3#include <linux/init.h>
4#include <linux/bitops.h>
5#include <linux/memblock.h>
6#include <linux/bootmem.h>
7#include <linux/mm.h>
8#include <linux/range.h>
9
10/* Check for already reserved areas */
11bool __init memblock_x86_check_reserved_size(u64 *addrp, u64 *sizep, u64 align)
12{
13 struct memblock_region *r;
14 u64 addr = *addrp, last;
15 u64 size = *sizep;
16 bool changed = false;
17
18again:
19 last = addr + size;
20 for_each_memblock(reserved, r) {
21 if (last > r->base && addr < r->base) {
22 size = r->base - addr;
23 changed = true;
24 goto again;
25 }
26 if (last > (r->base + r->size) && addr < (r->base + r->size)) {
27 addr = round_up(r->base + r->size, align);
28 size = last - addr;
29 changed = true;
30 goto again;
31 }
32 if (last <= (r->base + r->size) && addr >= r->base) {
33 *sizep = 0;
34 return false;
35 }
36 }
37 if (changed) {
38 *addrp = addr;
39 *sizep = size;
40 }
41 return changed;
42}
43
44/*
45 * Find next free range after start, and size is returned in *sizep
46 */
47u64 __init memblock_x86_find_in_range_size(u64 start, u64 *sizep, u64 align)
48{
49 struct memblock_region *r;
50
51 for_each_memblock(memory, r) {
52 u64 ei_start = r->base;
53 u64 ei_last = ei_start + r->size;
54 u64 addr;
55
56 addr = round_up(ei_start, align);
57 if (addr < start)
58 addr = round_up(start, align);
59 if (addr >= ei_last)
60 continue;
61 *sizep = ei_last - addr;
62 while (memblock_x86_check_reserved_size(&addr, sizep, align))
63 ;
64
65 if (*sizep)
66 return addr;
67 }
68
69 return MEMBLOCK_ERROR;
70}
71
72static __init struct range *find_range_array(int count)
73{
74 u64 end, size, mem;
75 struct range *range;
76
77 size = sizeof(struct range) * count;
78 end = memblock.current_limit;
79
80 mem = memblock_find_in_range(0, end, size, sizeof(struct range));
81 if (mem == MEMBLOCK_ERROR)
82 panic("can not find more space for range array");
83
84 /*
85 * This range is tempoaray, so don't reserve it, it will not be
86 * overlapped because We will not alloccate new buffer before
87 * We discard this one
88 */
89 range = __va(mem);
90 memset(range, 0, size);
91
92 return range;
93}
94
95static void __init memblock_x86_subtract_reserved(struct range *range, int az)
96{
97 u64 final_start, final_end;
98 struct memblock_region *r;
99
100 /* Take out region array itself at first*/
101 memblock_free_reserved_regions();
102
103 memblock_dbg("Subtract (%ld early reservations)\n", memblock.reserved.cnt);
104
105 for_each_memblock(reserved, r) {
106 memblock_dbg(" [%010llx-%010llx]\n", (u64)r->base, (u64)r->base + r->size - 1);
107 final_start = PFN_DOWN(r->base);
108 final_end = PFN_UP(r->base + r->size);
109 if (final_start >= final_end)
110 continue;
111 subtract_range(range, az, final_start, final_end);
112 }
113
114 /* Put region array back ? */
115 memblock_reserve_reserved_regions();
116}
117
118struct count_data {
119 int nr;
120};
121
122static int __init count_work_fn(unsigned long start_pfn,
123 unsigned long end_pfn, void *datax)
124{
125 struct count_data *data = datax;
126
127 data->nr++;
128
129 return 0;
130}
131
132static int __init count_early_node_map(int nodeid)
133{
134 struct count_data data;
135
136 data.nr = 0;
137 work_with_active_regions(nodeid, count_work_fn, &data);
138
139 return data.nr;
140}
141
142int __init __get_free_all_memory_range(struct range **rangep, int nodeid,
143 unsigned long start_pfn, unsigned long end_pfn)
144{
145 int count;
146 struct range *range;
147 int nr_range;
148
149 count = (memblock.reserved.cnt + count_early_node_map(nodeid)) * 2;
150
151 range = find_range_array(count);
152 nr_range = 0;
153
154 /*
155 * Use early_node_map[] and memblock.reserved.region to get range array
156 * at first
157 */
158 nr_range = add_from_early_node_map(range, count, nr_range, nodeid);
159 subtract_range(range, count, 0, start_pfn);
160 subtract_range(range, count, end_pfn, -1ULL);
161
162 memblock_x86_subtract_reserved(range, count);
163 nr_range = clean_sort_range(range, count);
164
165 *rangep = range;
166 return nr_range;
167}
168
169int __init get_free_all_memory_range(struct range **rangep, int nodeid)
170{
171 unsigned long end_pfn = -1UL;
172
173#ifdef CONFIG_X86_32
174 end_pfn = max_low_pfn;
175#endif
176 return __get_free_all_memory_range(rangep, nodeid, 0, end_pfn);
177}
178
179static u64 __init __memblock_x86_memory_in_range(u64 addr, u64 limit, bool get_free)
180{
181 int i, count;
182 struct range *range;
183 int nr_range;
184 u64 final_start, final_end;
185 u64 free_size;
186 struct memblock_region *r;
187
188 count = (memblock.reserved.cnt + memblock.memory.cnt) * 2;
189
190 range = find_range_array(count);
191 nr_range = 0;
192
193 addr = PFN_UP(addr);
194 limit = PFN_DOWN(limit);
195
196 for_each_memblock(memory, r) {
197 final_start = PFN_UP(r->base);
198 final_end = PFN_DOWN(r->base + r->size);
199 if (final_start >= final_end)
200 continue;
201 if (final_start >= limit || final_end <= addr)
202 continue;
203
204 nr_range = add_range(range, count, nr_range, final_start, final_end);
205 }
206 subtract_range(range, count, 0, addr);
207 subtract_range(range, count, limit, -1ULL);
208
209 /* Subtract memblock.reserved.region in range ? */
210 if (!get_free)
211 goto sort_and_count_them;
212 for_each_memblock(reserved, r) {
213 final_start = PFN_DOWN(r->base);
214 final_end = PFN_UP(r->base + r->size);
215 if (final_start >= final_end)
216 continue;
217 if (final_start >= limit || final_end <= addr)
218 continue;
219
220 subtract_range(range, count, final_start, final_end);
221 }
222
223sort_and_count_them:
224 nr_range = clean_sort_range(range, count);
225
226 free_size = 0;
227 for (i = 0; i < nr_range; i++)
228 free_size += range[i].end - range[i].start;
229
230 return free_size << PAGE_SHIFT;
231}
232
233u64 __init memblock_x86_free_memory_in_range(u64 addr, u64 limit)
234{
235 return __memblock_x86_memory_in_range(addr, limit, true);
236}
237
238u64 __init memblock_x86_memory_in_range(u64 addr, u64 limit)
239{
240 return __memblock_x86_memory_in_range(addr, limit, false);
241}
242
243void __init memblock_x86_reserve_range(u64 start, u64 end, char *name)
244{
245 if (start == end)
246 return;
247
248 if (WARN_ONCE(start > end, "memblock_x86_reserve_range: wrong range [%#llx, %#llx)\n", start, end))
249 return;
250
251 memblock_dbg(" memblock_x86_reserve_range: [%#010llx-%#010llx] %16s\n", start, end - 1, name);
252
253 memblock_reserve(start, end - start);
254}
255
256void __init memblock_x86_free_range(u64 start, u64 end)
257{
258 if (start == end)
259 return;
260
261 if (WARN_ONCE(start > end, "memblock_x86_free_range: wrong range [%#llx, %#llx)\n", start, end))
262 return;
263
264 memblock_dbg(" memblock_x86_free_range: [%#010llx-%#010llx]\n", start, end - 1);
265
266 memblock_free(start, end - start);
267}
268
269/*
270 * Need to call this function after memblock_x86_register_active_regions,
271 * so early_node_map[] is filled already.
272 */
273u64 __init memblock_x86_find_in_range_node(int nid, u64 start, u64 end, u64 size, u64 align)
274{
275 u64 addr;
276 addr = find_memory_core_early(nid, size, align, start, end);
277 if (addr != MEMBLOCK_ERROR)
278 return addr;
279
280 /* Fallback, should already have start end within node range */
281 return memblock_find_in_range(start, end, size, align);
282}
283
284/*
285 * Finds an active region in the address range from start_pfn to last_pfn and
286 * returns its range in ei_startpfn and ei_endpfn for the memblock entry.
287 */
288static int __init memblock_x86_find_active_region(const struct memblock_region *ei,
289 unsigned long start_pfn,
290 unsigned long last_pfn,
291 unsigned long *ei_startpfn,
292 unsigned long *ei_endpfn)
293{
294 u64 align = PAGE_SIZE;
295
296 *ei_startpfn = round_up(ei->base, align) >> PAGE_SHIFT;
297 *ei_endpfn = round_down(ei->base + ei->size, align) >> PAGE_SHIFT;
298
299 /* Skip map entries smaller than a page */
300 if (*ei_startpfn >= *ei_endpfn)
301 return 0;
302
303 /* Skip if map is outside the node */
304 if (*ei_endpfn <= start_pfn || *ei_startpfn >= last_pfn)
305 return 0;
306
307 /* Check for overlaps */
308 if (*ei_startpfn < start_pfn)
309 *ei_startpfn = start_pfn;
310 if (*ei_endpfn > last_pfn)
311 *ei_endpfn = last_pfn;
312
313 return 1;
314}
315
316/* Walk the memblock.memory map and register active regions within a node */
317void __init memblock_x86_register_active_regions(int nid, unsigned long start_pfn,
318 unsigned long last_pfn)
319{
320 unsigned long ei_startpfn;
321 unsigned long ei_endpfn;
322 struct memblock_region *r;
323
324 for_each_memblock(memory, r)
325 if (memblock_x86_find_active_region(r, start_pfn, last_pfn,
326 &ei_startpfn, &ei_endpfn))
327 add_active_range(nid, ei_startpfn, ei_endpfn);
328}
329
330/*
331 * Find the hole size (in bytes) in the memory range.
332 * @start: starting address of the memory range to scan
333 * @end: ending address of the memory range to scan
334 */
335u64 __init memblock_x86_hole_size(u64 start, u64 end)
336{
337 unsigned long start_pfn = start >> PAGE_SHIFT;
338 unsigned long last_pfn = end >> PAGE_SHIFT;
339 unsigned long ei_startpfn, ei_endpfn, ram = 0;
340 struct memblock_region *r;
341
342 for_each_memblock(memory, r)
343 if (memblock_x86_find_active_region(r, start_pfn, last_pfn,
344 &ei_startpfn, &ei_endpfn))
345 ram += ei_endpfn - ei_startpfn;
346
347 return end - start - ((u64)ram << PAGE_SHIFT);
348}
diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c
index 92faf3a1c53e..c80b9fb95734 100644
--- a/arch/x86/mm/memtest.c
+++ b/arch/x86/mm/memtest.c
@@ -34,7 +34,7 @@ static void __init reserve_bad_mem(u64 pattern, u64 start_bad, u64 end_bad)
34 (unsigned long long) pattern, 34 (unsigned long long) pattern,
35 (unsigned long long) start_bad, 35 (unsigned long long) start_bad,
36 (unsigned long long) end_bad); 36 (unsigned long long) end_bad);
37 memblock_x86_reserve_range(start_bad, end_bad, "BAD RAM"); 37 memblock_reserve(start_bad, end_bad - start_bad);
38} 38}
39 39
40static void __init memtest(u64 pattern, u64 start_phys, u64 size) 40static void __init memtest(u64 pattern, u64 start_phys, u64 size)
@@ -70,24 +70,19 @@ static void __init memtest(u64 pattern, u64 start_phys, u64 size)
70 70
71static void __init do_one_pass(u64 pattern, u64 start, u64 end) 71static void __init do_one_pass(u64 pattern, u64 start, u64 end)
72{ 72{
73 u64 size = 0; 73 u64 i;
74 74 phys_addr_t this_start, this_end;
75 while (start < end) { 75
76 start = memblock_x86_find_in_range_size(start, &size, 1); 76 for_each_free_mem_range(i, MAX_NUMNODES, &this_start, &this_end, NULL) {
77 77 this_start = clamp_t(phys_addr_t, this_start, start, end);
78 /* done ? */ 78 this_end = clamp_t(phys_addr_t, this_end, start, end);
79 if (start >= end) 79 if (this_start < this_end) {
80 break; 80 printk(KERN_INFO " %010llx - %010llx pattern %016llx\n",
81 if (start + size > end) 81 (unsigned long long)this_start,
82 size = end - start; 82 (unsigned long long)this_end,
83 83 (unsigned long long)cpu_to_be64(pattern));
84 printk(KERN_INFO " %010llx - %010llx pattern %016llx\n", 84 memtest(pattern, this_start, this_end - this_start);
85 (unsigned long long) start, 85 }
86 (unsigned long long) start + size,
87 (unsigned long long) cpu_to_be64(pattern));
88 memtest(pattern, start, size);
89
90 start += size;
91 } 86 }
92} 87}
93 88
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index fbeaaf416610..496f494593bf 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -192,8 +192,6 @@ int __init numa_add_memblk(int nid, u64 start, u64 end)
192/* Initialize NODE_DATA for a node on the local memory */ 192/* Initialize NODE_DATA for a node on the local memory */
193static void __init setup_node_data(int nid, u64 start, u64 end) 193static void __init setup_node_data(int nid, u64 start, u64 end)
194{ 194{
195 const u64 nd_low = PFN_PHYS(MAX_DMA_PFN);
196 const u64 nd_high = PFN_PHYS(max_pfn_mapped);
197 const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE); 195 const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
198 bool remapped = false; 196 bool remapped = false;
199 u64 nd_pa; 197 u64 nd_pa;
@@ -224,17 +222,12 @@ static void __init setup_node_data(int nid, u64 start, u64 end)
224 nd_pa = __pa(nd); 222 nd_pa = __pa(nd);
225 remapped = true; 223 remapped = true;
226 } else { 224 } else {
227 nd_pa = memblock_x86_find_in_range_node(nid, nd_low, nd_high, 225 nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid);
228 nd_size, SMP_CACHE_BYTES); 226 if (!nd_pa) {
229 if (nd_pa == MEMBLOCK_ERROR)
230 nd_pa = memblock_find_in_range(nd_low, nd_high,
231 nd_size, SMP_CACHE_BYTES);
232 if (nd_pa == MEMBLOCK_ERROR) {
233 pr_err("Cannot find %zu bytes in node %d\n", 227 pr_err("Cannot find %zu bytes in node %d\n",
234 nd_size, nid); 228 nd_size, nid);
235 return; 229 return;
236 } 230 }
237 memblock_x86_reserve_range(nd_pa, nd_pa + nd_size, "NODE_DATA");
238 nd = __va(nd_pa); 231 nd = __va(nd_pa);
239 } 232 }
240 233
@@ -371,8 +364,7 @@ void __init numa_reset_distance(void)
371 364
372 /* numa_distance could be 1LU marking allocation failure, test cnt */ 365 /* numa_distance could be 1LU marking allocation failure, test cnt */
373 if (numa_distance_cnt) 366 if (numa_distance_cnt)
374 memblock_x86_free_range(__pa(numa_distance), 367 memblock_free(__pa(numa_distance), size);
375 __pa(numa_distance) + size);
376 numa_distance_cnt = 0; 368 numa_distance_cnt = 0;
377 numa_distance = NULL; /* enable table creation */ 369 numa_distance = NULL; /* enable table creation */
378} 370}
@@ -395,13 +387,13 @@ static int __init numa_alloc_distance(void)
395 387
396 phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), 388 phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
397 size, PAGE_SIZE); 389 size, PAGE_SIZE);
398 if (phys == MEMBLOCK_ERROR) { 390 if (!phys) {
399 pr_warning("NUMA: Warning: can't allocate distance table!\n"); 391 pr_warning("NUMA: Warning: can't allocate distance table!\n");
400 /* don't retry until explicitly reset */ 392 /* don't retry until explicitly reset */
401 numa_distance = (void *)1LU; 393 numa_distance = (void *)1LU;
402 return -ENOMEM; 394 return -ENOMEM;
403 } 395 }
404 memblock_x86_reserve_range(phys, phys + size, "NUMA DIST"); 396 memblock_reserve(phys, size);
405 397
406 numa_distance = __va(phys); 398 numa_distance = __va(phys);
407 numa_distance_cnt = cnt; 399 numa_distance_cnt = cnt;
@@ -482,8 +474,8 @@ static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
482 numaram = 0; 474 numaram = 0;
483 } 475 }
484 476
485 e820ram = max_pfn - (memblock_x86_hole_size(0, 477 e820ram = max_pfn - absent_pages_in_range(0, max_pfn);
486 PFN_PHYS(max_pfn)) >> PAGE_SHIFT); 478
487 /* We seem to lose 3 pages somewhere. Allow 1M of slack. */ 479 /* We seem to lose 3 pages somewhere. Allow 1M of slack. */
488 if ((s64)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) { 480 if ((s64)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) {
489 printk(KERN_ERR "NUMA: nodes only cover %LuMB of your %LuMB e820 RAM. Not used.\n", 481 printk(KERN_ERR "NUMA: nodes only cover %LuMB of your %LuMB e820 RAM. Not used.\n",
@@ -505,13 +497,10 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
505 if (WARN_ON(nodes_empty(node_possible_map))) 497 if (WARN_ON(nodes_empty(node_possible_map)))
506 return -EINVAL; 498 return -EINVAL;
507 499
508 for (i = 0; i < mi->nr_blks; i++) 500 for (i = 0; i < mi->nr_blks; i++) {
509 memblock_x86_register_active_regions(mi->blk[i].nid, 501 struct numa_memblk *mb = &mi->blk[i];
510 mi->blk[i].start >> PAGE_SHIFT, 502 memblock_set_node(mb->start, mb->end - mb->start, mb->nid);
511 mi->blk[i].end >> PAGE_SHIFT); 503 }
512
513 /* for out of order entries */
514 sort_node_map();
515 504
516 /* 505 /*
517 * If sections array is gonna be used for pfn -> nid mapping, check 506 * If sections array is gonna be used for pfn -> nid mapping, check
@@ -545,6 +534,8 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
545 setup_node_data(nid, start, end); 534 setup_node_data(nid, start, end);
546 } 535 }
547 536
537 /* Dump memblock with node info and return. */
538 memblock_dump_all();
548 return 0; 539 return 0;
549} 540}
550 541
@@ -582,7 +573,7 @@ static int __init numa_init(int (*init_func)(void))
582 nodes_clear(node_possible_map); 573 nodes_clear(node_possible_map);
583 nodes_clear(node_online_map); 574 nodes_clear(node_online_map);
584 memset(&numa_meminfo, 0, sizeof(numa_meminfo)); 575 memset(&numa_meminfo, 0, sizeof(numa_meminfo));
585 remove_all_active_ranges(); 576 WARN_ON(memblock_set_node(0, ULLONG_MAX, MAX_NUMNODES));
586 numa_reset_distance(); 577 numa_reset_distance();
587 578
588 ret = init_func(); 579 ret = init_func();
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 3adebe7e536a..534255a36b6b 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -199,23 +199,23 @@ void __init init_alloc_remap(int nid, u64 start, u64 end)
199 199
200 /* allocate node memory and the lowmem remap area */ 200 /* allocate node memory and the lowmem remap area */
201 node_pa = memblock_find_in_range(start, end, size, LARGE_PAGE_BYTES); 201 node_pa = memblock_find_in_range(start, end, size, LARGE_PAGE_BYTES);
202 if (node_pa == MEMBLOCK_ERROR) { 202 if (!node_pa) {
203 pr_warning("remap_alloc: failed to allocate %lu bytes for node %d\n", 203 pr_warning("remap_alloc: failed to allocate %lu bytes for node %d\n",
204 size, nid); 204 size, nid);
205 return; 205 return;
206 } 206 }
207 memblock_x86_reserve_range(node_pa, node_pa + size, "KVA RAM"); 207 memblock_reserve(node_pa, size);
208 208
209 remap_pa = memblock_find_in_range(min_low_pfn << PAGE_SHIFT, 209 remap_pa = memblock_find_in_range(min_low_pfn << PAGE_SHIFT,
210 max_low_pfn << PAGE_SHIFT, 210 max_low_pfn << PAGE_SHIFT,
211 size, LARGE_PAGE_BYTES); 211 size, LARGE_PAGE_BYTES);
212 if (remap_pa == MEMBLOCK_ERROR) { 212 if (!remap_pa) {
213 pr_warning("remap_alloc: failed to allocate %lu bytes remap area for node %d\n", 213 pr_warning("remap_alloc: failed to allocate %lu bytes remap area for node %d\n",
214 size, nid); 214 size, nid);
215 memblock_x86_free_range(node_pa, node_pa + size); 215 memblock_free(node_pa, size);
216 return; 216 return;
217 } 217 }
218 memblock_x86_reserve_range(remap_pa, remap_pa + size, "KVA PG"); 218 memblock_reserve(remap_pa, size);
219 remap_va = phys_to_virt(remap_pa); 219 remap_va = phys_to_virt(remap_pa);
220 220
221 /* perform actual remap */ 221 /* perform actual remap */
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index dd27f401f0a0..92e27119ee1a 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -19,7 +19,7 @@ unsigned long __init numa_free_all_bootmem(void)
19 for_each_online_node(i) 19 for_each_online_node(i)
20 pages += free_all_bootmem_node(NODE_DATA(i)); 20 pages += free_all_bootmem_node(NODE_DATA(i));
21 21
22 pages += free_all_memory_core_early(MAX_NUMNODES); 22 pages += free_low_memory_core_early(MAX_NUMNODES);
23 23
24 return pages; 24 return pages;
25} 25}
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index d0ed086b6247..46db56845f18 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -28,6 +28,16 @@ static int __init emu_find_memblk_by_nid(int nid, const struct numa_meminfo *mi)
28 return -ENOENT; 28 return -ENOENT;
29} 29}
30 30
31static u64 mem_hole_size(u64 start, u64 end)
32{
33 unsigned long start_pfn = PFN_UP(start);
34 unsigned long end_pfn = PFN_DOWN(end);
35
36 if (start_pfn < end_pfn)
37 return PFN_PHYS(absent_pages_in_range(start_pfn, end_pfn));
38 return 0;
39}
40
31/* 41/*
32 * Sets up nid to range from @start to @end. The return value is -errno if 42 * Sets up nid to range from @start to @end. The return value is -errno if
33 * something went wrong, 0 otherwise. 43 * something went wrong, 0 otherwise.
@@ -89,7 +99,7 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei,
89 * Calculate target node size. x86_32 freaks on __udivdi3() so do 99 * Calculate target node size. x86_32 freaks on __udivdi3() so do
90 * the division in ulong number of pages and convert back. 100 * the division in ulong number of pages and convert back.
91 */ 101 */
92 size = max_addr - addr - memblock_x86_hole_size(addr, max_addr); 102 size = max_addr - addr - mem_hole_size(addr, max_addr);
93 size = PFN_PHYS((unsigned long)(size >> PAGE_SHIFT) / nr_nodes); 103 size = PFN_PHYS((unsigned long)(size >> PAGE_SHIFT) / nr_nodes);
94 104
95 /* 105 /*
@@ -135,8 +145,7 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei,
135 * Continue to add memory to this fake node if its 145 * Continue to add memory to this fake node if its
136 * non-reserved memory is less than the per-node size. 146 * non-reserved memory is less than the per-node size.
137 */ 147 */
138 while (end - start - 148 while (end - start - mem_hole_size(start, end) < size) {
139 memblock_x86_hole_size(start, end) < size) {
140 end += FAKE_NODE_MIN_SIZE; 149 end += FAKE_NODE_MIN_SIZE;
141 if (end > limit) { 150 if (end > limit) {
142 end = limit; 151 end = limit;
@@ -150,7 +159,7 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei,
150 * this one must extend to the boundary. 159 * this one must extend to the boundary.
151 */ 160 */
152 if (end < dma32_end && dma32_end - end - 161 if (end < dma32_end && dma32_end - end -
153 memblock_x86_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE) 162 mem_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE)
154 end = dma32_end; 163 end = dma32_end;
155 164
156 /* 165 /*
@@ -158,8 +167,7 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei,
158 * next node, this one must extend to the end of the 167 * next node, this one must extend to the end of the
159 * physical node. 168 * physical node.
160 */ 169 */
161 if (limit - end - 170 if (limit - end - mem_hole_size(end, limit) < size)
162 memblock_x86_hole_size(end, limit) < size)
163 end = limit; 171 end = limit;
164 172
165 ret = emu_setup_memblk(ei, pi, nid++ % nr_nodes, 173 ret = emu_setup_memblk(ei, pi, nid++ % nr_nodes,
@@ -180,7 +188,7 @@ static u64 __init find_end_of_node(u64 start, u64 max_addr, u64 size)
180{ 188{
181 u64 end = start + size; 189 u64 end = start + size;
182 190
183 while (end - start - memblock_x86_hole_size(start, end) < size) { 191 while (end - start - mem_hole_size(start, end) < size) {
184 end += FAKE_NODE_MIN_SIZE; 192 end += FAKE_NODE_MIN_SIZE;
185 if (end > max_addr) { 193 if (end > max_addr) {
186 end = max_addr; 194 end = max_addr;
@@ -211,8 +219,7 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
211 * creates a uniform distribution of node sizes across the entire 219 * creates a uniform distribution of node sizes across the entire
212 * machine (but not necessarily over physical nodes). 220 * machine (but not necessarily over physical nodes).
213 */ 221 */
214 min_size = (max_addr - addr - memblock_x86_hole_size(addr, max_addr)) / 222 min_size = (max_addr - addr - mem_hole_size(addr, max_addr)) / MAX_NUMNODES;
215 MAX_NUMNODES;
216 min_size = max(min_size, FAKE_NODE_MIN_SIZE); 223 min_size = max(min_size, FAKE_NODE_MIN_SIZE);
217 if ((min_size & FAKE_NODE_MIN_HASH_MASK) < min_size) 224 if ((min_size & FAKE_NODE_MIN_HASH_MASK) < min_size)
218 min_size = (min_size + FAKE_NODE_MIN_SIZE) & 225 min_size = (min_size + FAKE_NODE_MIN_SIZE) &
@@ -252,7 +259,7 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
252 * this one must extend to the boundary. 259 * this one must extend to the boundary.
253 */ 260 */
254 if (end < dma32_end && dma32_end - end - 261 if (end < dma32_end && dma32_end - end -
255 memblock_x86_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE) 262 mem_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE)
256 end = dma32_end; 263 end = dma32_end;
257 264
258 /* 265 /*
@@ -260,8 +267,7 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
260 * next node, this one must extend to the end of the 267 * next node, this one must extend to the end of the
261 * physical node. 268 * physical node.
262 */ 269 */
263 if (limit - end - 270 if (limit - end - mem_hole_size(end, limit) < size)
264 memblock_x86_hole_size(end, limit) < size)
265 end = limit; 271 end = limit;
266 272
267 ret = emu_setup_memblk(ei, pi, nid++ % MAX_NUMNODES, 273 ret = emu_setup_memblk(ei, pi, nid++ % MAX_NUMNODES,
@@ -351,11 +357,11 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
351 357
352 phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), 358 phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
353 phys_size, PAGE_SIZE); 359 phys_size, PAGE_SIZE);
354 if (phys == MEMBLOCK_ERROR) { 360 if (!phys) {
355 pr_warning("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n"); 361 pr_warning("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n");
356 goto no_emu; 362 goto no_emu;
357 } 363 }
358 memblock_x86_reserve_range(phys, phys + phys_size, "TMP NUMA DIST"); 364 memblock_reserve(phys, phys_size);
359 phys_dist = __va(phys); 365 phys_dist = __va(phys);
360 366
361 for (i = 0; i < numa_dist_cnt; i++) 367 for (i = 0; i < numa_dist_cnt; i++)
@@ -424,7 +430,7 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
424 430
425 /* free the copied physical distance table */ 431 /* free the copied physical distance table */
426 if (phys_dist) 432 if (phys_dist)
427 memblock_x86_free_range(__pa(phys_dist), __pa(phys_dist) + phys_size); 433 memblock_free(__pa(phys_dist), phys_size);
428 return; 434 return;
429 435
430no_emu: 436no_emu:
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index f9e526742fa1..eda2acbb6e81 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -998,7 +998,7 @@ out_err:
998} 998}
999EXPORT_SYMBOL(set_memory_uc); 999EXPORT_SYMBOL(set_memory_uc);
1000 1000
1001int _set_memory_array(unsigned long *addr, int addrinarray, 1001static int _set_memory_array(unsigned long *addr, int addrinarray,
1002 unsigned long new_type) 1002 unsigned long new_type)
1003{ 1003{
1004 int i, j; 1004 int i, j;
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c
index 81dbfdeb080d..fd61b3fb7341 100644
--- a/arch/x86/mm/srat.c
+++ b/arch/x86/mm/srat.c
@@ -69,6 +69,12 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
69 if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0) 69 if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
70 return; 70 return;
71 pxm = pa->proximity_domain; 71 pxm = pa->proximity_domain;
72 apic_id = pa->apic_id;
73 if (!cpu_has_x2apic && (apic_id >= 0xff)) {
74 printk(KERN_INFO "SRAT: PXM %u -> X2APIC 0x%04x ignored\n",
75 pxm, apic_id);
76 return;
77 }
72 node = setup_node(pxm); 78 node = setup_node(pxm);
73 if (node < 0) { 79 if (node < 0) {
74 printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm); 80 printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
@@ -76,7 +82,6 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
76 return; 82 return;
77 } 83 }
78 84
79 apic_id = pa->apic_id;
80 if (apic_id >= MAX_LOCAL_APIC) { 85 if (apic_id >= MAX_LOCAL_APIC) {
81 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node); 86 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node);
82 return; 87 return;
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index bfab3fa10edc..7b65f752c5f8 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -568,8 +568,8 @@ cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i];
568 break; 568 break;
569 } 569 }
570 if (filter[i].jt != 0) { 570 if (filter[i].jt != 0) {
571 if (filter[i].jf) 571 if (filter[i].jf && f_offset)
572 t_offset += is_near(f_offset) ? 2 : 6; 572 t_offset += is_near(f_offset) ? 2 : 5;
573 EMIT_COND_JMP(t_op, t_offset); 573 EMIT_COND_JMP(t_op, t_offset);
574 if (filter[i].jf) 574 if (filter[i].jf)
575 EMIT_JMP(f_offset); 575 EMIT_JMP(f_offset);
diff --git a/arch/x86/oprofile/Makefile b/arch/x86/oprofile/Makefile
index 446902b2a6b6..1599f568f0e2 100644
--- a/arch/x86/oprofile/Makefile
+++ b/arch/x86/oprofile/Makefile
@@ -4,9 +4,8 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
4 oprof.o cpu_buffer.o buffer_sync.o \ 4 oprof.o cpu_buffer.o buffer_sync.o \
5 event_buffer.o oprofile_files.o \ 5 event_buffer.o oprofile_files.o \
6 oprofilefs.o oprofile_stats.o \ 6 oprofilefs.o oprofile_stats.o \
7 timer_int.o ) 7 timer_int.o nmi_timer_int.o )
8 8
9oprofile-y := $(DRIVER_OBJS) init.o backtrace.o 9oprofile-y := $(DRIVER_OBJS) init.o backtrace.o
10oprofile-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_amd.o \ 10oprofile-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_amd.o \
11 op_model_ppro.o op_model_p4.o 11 op_model_ppro.o op_model_p4.o
12oprofile-$(CONFIG_X86_IO_APIC) += nmi_timer_int.o
diff --git a/arch/x86/oprofile/init.c b/arch/x86/oprofile/init.c
index f148cf652678..9e138d00ad36 100644
--- a/arch/x86/oprofile/init.c
+++ b/arch/x86/oprofile/init.c
@@ -16,37 +16,23 @@
16 * with the NMI mode driver. 16 * with the NMI mode driver.
17 */ 17 */
18 18
19#ifdef CONFIG_X86_LOCAL_APIC
19extern int op_nmi_init(struct oprofile_operations *ops); 20extern int op_nmi_init(struct oprofile_operations *ops);
20extern int op_nmi_timer_init(struct oprofile_operations *ops);
21extern void op_nmi_exit(void); 21extern void op_nmi_exit(void);
22extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth); 22#else
23static int op_nmi_init(struct oprofile_operations *ops) { return -ENODEV; }
24static void op_nmi_exit(void) { }
25#endif
23 26
24static int nmi_timer; 27extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth);
25 28
26int __init oprofile_arch_init(struct oprofile_operations *ops) 29int __init oprofile_arch_init(struct oprofile_operations *ops)
27{ 30{
28 int ret;
29
30 ret = -ENODEV;
31
32#ifdef CONFIG_X86_LOCAL_APIC
33 ret = op_nmi_init(ops);
34#endif
35 nmi_timer = (ret != 0);
36#ifdef CONFIG_X86_IO_APIC
37 if (nmi_timer)
38 ret = op_nmi_timer_init(ops);
39#endif
40 ops->backtrace = x86_backtrace; 31 ops->backtrace = x86_backtrace;
41 32 return op_nmi_init(ops);
42 return ret;
43} 33}
44 34
45
46void oprofile_arch_exit(void) 35void oprofile_arch_exit(void)
47{ 36{
48#ifdef CONFIG_X86_LOCAL_APIC 37 op_nmi_exit();
49 if (!nmi_timer)
50 op_nmi_exit();
51#endif
52} 38}
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 75f9528e0372..26b8a8514ee5 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -595,24 +595,36 @@ static int __init p4_init(char **cpu_type)
595 return 0; 595 return 0;
596} 596}
597 597
598static int force_arch_perfmon; 598enum __force_cpu_type {
599static int force_cpu_type(const char *str, struct kernel_param *kp) 599 reserved = 0, /* do not force */
600 timer,
601 arch_perfmon,
602};
603
604static int force_cpu_type;
605
606static int set_cpu_type(const char *str, struct kernel_param *kp)
600{ 607{
601 if (!strcmp(str, "arch_perfmon")) { 608 if (!strcmp(str, "timer")) {
602 force_arch_perfmon = 1; 609 force_cpu_type = timer;
610 printk(KERN_INFO "oprofile: forcing NMI timer mode\n");
611 } else if (!strcmp(str, "arch_perfmon")) {
612 force_cpu_type = arch_perfmon;
603 printk(KERN_INFO "oprofile: forcing architectural perfmon\n"); 613 printk(KERN_INFO "oprofile: forcing architectural perfmon\n");
614 } else {
615 force_cpu_type = 0;
604 } 616 }
605 617
606 return 0; 618 return 0;
607} 619}
608module_param_call(cpu_type, force_cpu_type, NULL, NULL, 0); 620module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0);
609 621
610static int __init ppro_init(char **cpu_type) 622static int __init ppro_init(char **cpu_type)
611{ 623{
612 __u8 cpu_model = boot_cpu_data.x86_model; 624 __u8 cpu_model = boot_cpu_data.x86_model;
613 struct op_x86_model_spec *spec = &op_ppro_spec; /* default */ 625 struct op_x86_model_spec *spec = &op_ppro_spec; /* default */
614 626
615 if (force_arch_perfmon && cpu_has_arch_perfmon) 627 if (force_cpu_type == arch_perfmon && cpu_has_arch_perfmon)
616 return 0; 628 return 0;
617 629
618 /* 630 /*
@@ -679,6 +691,9 @@ int __init op_nmi_init(struct oprofile_operations *ops)
679 if (!cpu_has_apic) 691 if (!cpu_has_apic)
680 return -ENODEV; 692 return -ENODEV;
681 693
694 if (force_cpu_type == timer)
695 return -ENODEV;
696
682 switch (vendor) { 697 switch (vendor) {
683 case X86_VENDOR_AMD: 698 case X86_VENDOR_AMD:
684 /* Needs to be at least an Athlon (or hammer in 32bit mode) */ 699 /* Needs to be at least an Athlon (or hammer in 32bit mode) */
diff --git a/arch/x86/oprofile/nmi_timer_int.c b/arch/x86/oprofile/nmi_timer_int.c
deleted file mode 100644
index 7f8052cd6620..000000000000
--- a/arch/x86/oprofile/nmi_timer_int.c
+++ /dev/null
@@ -1,50 +0,0 @@
1/**
2 * @file nmi_timer_int.c
3 *
4 * @remark Copyright 2003 OProfile authors
5 * @remark Read the file COPYING
6 *
7 * @author Zwane Mwaikambo <zwane@linuxpower.ca>
8 */
9
10#include <linux/init.h>
11#include <linux/smp.h>
12#include <linux/errno.h>
13#include <linux/oprofile.h>
14#include <linux/rcupdate.h>
15#include <linux/kdebug.h>
16
17#include <asm/nmi.h>
18#include <asm/apic.h>
19#include <asm/ptrace.h>
20
21static int profile_timer_exceptions_notify(unsigned int val, struct pt_regs *regs)
22{
23 oprofile_add_sample(regs, 0);
24 return NMI_HANDLED;
25}
26
27static int timer_start(void)
28{
29 if (register_nmi_handler(NMI_LOCAL, profile_timer_exceptions_notify,
30 0, "oprofile-timer"))
31 return 1;
32 return 0;
33}
34
35
36static void timer_stop(void)
37{
38 unregister_nmi_handler(NMI_LOCAL, "oprofile-timer");
39 synchronize_sched(); /* Allow already-started NMIs to complete. */
40}
41
42
43int __init op_nmi_timer_init(struct oprofile_operations *ops)
44{
45 ops->start = timer_start;
46 ops->stop = timer_stop;
47 ops->cpu_type = "timer";
48 printk(KERN_INFO "oprofile: using NMI timer interrupt.\n");
49 return 0;
50}
diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c
index db0e9a51e611..da8fe0535ff4 100644
--- a/arch/x86/pci/pcbios.c
+++ b/arch/x86/pci/pcbios.c
@@ -44,7 +44,7 @@ static inline void set_bios_x(void)
44 pcibios_enabled = 1; 44 pcibios_enabled = 1;
45 set_memory_x(PAGE_OFFSET + BIOS_BEGIN, (BIOS_END - BIOS_BEGIN) >> PAGE_SHIFT); 45 set_memory_x(PAGE_OFFSET + BIOS_BEGIN, (BIOS_END - BIOS_BEGIN) >> PAGE_SHIFT);
46 if (__supported_pte_mask & _PAGE_NX) 46 if (__supported_pte_mask & _PAGE_NX)
47 printk(KERN_INFO "PCI : PCI BIOS aera is rw and x. Use pci=nobios if you want it NX.\n"); 47 printk(KERN_INFO "PCI : PCI BIOS area is rw and x. Use pci=nobios if you want it NX.\n");
48} 48}
49 49
50/* 50/*
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index c9718a16be15..4cf9bd0a1653 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -238,7 +238,8 @@ static efi_status_t __init phys_efi_get_time(efi_time_t *tm,
238 238
239 spin_lock_irqsave(&rtc_lock, flags); 239 spin_lock_irqsave(&rtc_lock, flags);
240 efi_call_phys_prelog(); 240 efi_call_phys_prelog();
241 status = efi_call_phys2(efi_phys.get_time, tm, tc); 241 status = efi_call_phys2(efi_phys.get_time, virt_to_phys(tm),
242 virt_to_phys(tc));
242 efi_call_phys_epilog(); 243 efi_call_phys_epilog();
243 spin_unlock_irqrestore(&rtc_lock, flags); 244 spin_unlock_irqrestore(&rtc_lock, flags);
244 return status; 245 return status;
@@ -323,13 +324,10 @@ static void __init do_add_efi_memmap(void)
323 case EFI_UNUSABLE_MEMORY: 324 case EFI_UNUSABLE_MEMORY:
324 e820_type = E820_UNUSABLE; 325 e820_type = E820_UNUSABLE;
325 break; 326 break;
326 case EFI_RUNTIME_SERVICES_DATA:
327 e820_type = E820_RESERVED_EFI;
328 break;
329 default: 327 default:
330 /* 328 /*
331 * EFI_RESERVED_TYPE EFI_RUNTIME_SERVICES_CODE 329 * EFI_RESERVED_TYPE EFI_RUNTIME_SERVICES_CODE
332 * EFI_MEMORY_MAPPED_IO 330 * EFI_RUNTIME_SERVICES_DATA EFI_MEMORY_MAPPED_IO
333 * EFI_MEMORY_MAPPED_IO_PORT_SPACE EFI_PAL_CODE 331 * EFI_MEMORY_MAPPED_IO_PORT_SPACE EFI_PAL_CODE
334 */ 332 */
335 e820_type = E820_RESERVED; 333 e820_type = E820_RESERVED;
@@ -355,8 +353,7 @@ void __init efi_memblock_x86_reserve_range(void)
355 boot_params.efi_info.efi_memdesc_size; 353 boot_params.efi_info.efi_memdesc_size;
356 memmap.desc_version = boot_params.efi_info.efi_memdesc_version; 354 memmap.desc_version = boot_params.efi_info.efi_memdesc_version;
357 memmap.desc_size = boot_params.efi_info.efi_memdesc_size; 355 memmap.desc_size = boot_params.efi_info.efi_memdesc_size;
358 memblock_x86_reserve_range(pmap, pmap + memmap.nr_map * memmap.desc_size, 356 memblock_reserve(pmap, memmap.nr_map * memmap.desc_size);
359 "EFI memmap");
360} 357}
361 358
362#if EFI_DEBUG 359#if EFI_DEBUG
@@ -400,16 +397,14 @@ void __init efi_reserve_boot_services(void)
400 if ((start+size >= virt_to_phys(_text) 397 if ((start+size >= virt_to_phys(_text)
401 && start <= virt_to_phys(_end)) || 398 && start <= virt_to_phys(_end)) ||
402 !e820_all_mapped(start, start+size, E820_RAM) || 399 !e820_all_mapped(start, start+size, E820_RAM) ||
403 memblock_x86_check_reserved_size(&start, &size, 400 memblock_is_region_reserved(start, size)) {
404 1<<EFI_PAGE_SHIFT)) {
405 /* Could not reserve, skip it */ 401 /* Could not reserve, skip it */
406 md->num_pages = 0; 402 md->num_pages = 0;
407 memblock_dbg(PFX "Could not reserve boot range " 403 memblock_dbg(PFX "Could not reserve boot range "
408 "[0x%010llx-0x%010llx]\n", 404 "[0x%010llx-0x%010llx]\n",
409 start, start+size-1); 405 start, start+size-1);
410 } else 406 } else
411 memblock_x86_reserve_range(start, start+size, 407 memblock_reserve(start, size);
412 "EFI Boot");
413 } 408 }
414} 409}
415 410
@@ -674,21 +669,10 @@ void __init efi_enter_virtual_mode(void)
674 end_pfn = PFN_UP(end); 669 end_pfn = PFN_UP(end);
675 if (end_pfn <= max_low_pfn_mapped 670 if (end_pfn <= max_low_pfn_mapped
676 || (end_pfn > (1UL << (32 - PAGE_SHIFT)) 671 || (end_pfn > (1UL << (32 - PAGE_SHIFT))
677 && end_pfn <= max_pfn_mapped)) { 672 && end_pfn <= max_pfn_mapped))
678 va = __va(md->phys_addr); 673 va = __va(md->phys_addr);
679 674 else
680 if (!(md->attribute & EFI_MEMORY_WB)) { 675 va = efi_ioremap(md->phys_addr, size, md->type);
681 addr = (u64) (unsigned long)va;
682 npages = md->num_pages;
683 memrange_efi_to_native(&addr, &npages);
684 set_memory_uc(addr, npages);
685 }
686 } else {
687 if (!(md->attribute & EFI_MEMORY_WB))
688 va = ioremap_nocache(md->phys_addr, size);
689 else
690 va = ioremap_cache(md->phys_addr, size);
691 }
692 676
693 md->virt_addr = (u64) (unsigned long) va; 677 md->virt_addr = (u64) (unsigned long) va;
694 678
@@ -698,6 +682,13 @@ void __init efi_enter_virtual_mode(void)
698 continue; 682 continue;
699 } 683 }
700 684
685 if (!(md->attribute & EFI_MEMORY_WB)) {
686 addr = md->virt_addr;
687 npages = md->num_pages;
688 memrange_efi_to_native(&addr, &npages);
689 set_memory_uc(addr, npages);
690 }
691
701 systab = (u64) (unsigned long) efi_phys.systab; 692 systab = (u64) (unsigned long) efi_phys.systab;
702 if (md->phys_addr <= systab && systab < end) { 693 if (md->phys_addr <= systab && systab < end) {
703 systab += md->virt_addr - md->phys_addr; 694 systab += md->virt_addr - md->phys_addr;
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
index e36bf714cb77..40e446941dd7 100644
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@@ -39,43 +39,14 @@
39 */ 39 */
40 40
41static unsigned long efi_rt_eflags; 41static unsigned long efi_rt_eflags;
42static pgd_t efi_bak_pg_dir_pointer[2];
43 42
44void efi_call_phys_prelog(void) 43void efi_call_phys_prelog(void)
45{ 44{
46 unsigned long cr4;
47 unsigned long temp;
48 struct desc_ptr gdt_descr; 45 struct desc_ptr gdt_descr;
49 46
50 local_irq_save(efi_rt_eflags); 47 local_irq_save(efi_rt_eflags);
51 48
52 /* 49 load_cr3(initial_page_table);
53 * If I don't have PAE, I should just duplicate two entries in page
54 * directory. If I have PAE, I just need to duplicate one entry in
55 * page directory.
56 */
57 cr4 = read_cr4_safe();
58
59 if (cr4 & X86_CR4_PAE) {
60 efi_bak_pg_dir_pointer[0].pgd =
61 swapper_pg_dir[pgd_index(0)].pgd;
62 swapper_pg_dir[0].pgd =
63 swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd;
64 } else {
65 efi_bak_pg_dir_pointer[0].pgd =
66 swapper_pg_dir[pgd_index(0)].pgd;
67 efi_bak_pg_dir_pointer[1].pgd =
68 swapper_pg_dir[pgd_index(0x400000)].pgd;
69 swapper_pg_dir[pgd_index(0)].pgd =
70 swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd;
71 temp = PAGE_OFFSET + 0x400000;
72 swapper_pg_dir[pgd_index(0x400000)].pgd =
73 swapper_pg_dir[pgd_index(temp)].pgd;
74 }
75
76 /*
77 * After the lock is released, the original page table is restored.
78 */
79 __flush_tlb_all(); 50 __flush_tlb_all();
80 51
81 gdt_descr.address = __pa(get_cpu_gdt_table(0)); 52 gdt_descr.address = __pa(get_cpu_gdt_table(0));
@@ -85,28 +56,13 @@ void efi_call_phys_prelog(void)
85 56
86void efi_call_phys_epilog(void) 57void efi_call_phys_epilog(void)
87{ 58{
88 unsigned long cr4;
89 struct desc_ptr gdt_descr; 59 struct desc_ptr gdt_descr;
90 60
91 gdt_descr.address = (unsigned long)get_cpu_gdt_table(0); 61 gdt_descr.address = (unsigned long)get_cpu_gdt_table(0);
92 gdt_descr.size = GDT_SIZE - 1; 62 gdt_descr.size = GDT_SIZE - 1;
93 load_gdt(&gdt_descr); 63 load_gdt(&gdt_descr);
94 64
95 cr4 = read_cr4_safe(); 65 load_cr3(swapper_pg_dir);
96
97 if (cr4 & X86_CR4_PAE) {
98 swapper_pg_dir[pgd_index(0)].pgd =
99 efi_bak_pg_dir_pointer[0].pgd;
100 } else {
101 swapper_pg_dir[pgd_index(0)].pgd =
102 efi_bak_pg_dir_pointer[0].pgd;
103 swapper_pg_dir[pgd_index(0x400000)].pgd =
104 efi_bak_pg_dir_pointer[1].pgd;
105 }
106
107 /*
108 * After the lock is released, the original page table is restored.
109 */
110 __flush_tlb_all(); 66 __flush_tlb_all();
111 67
112 local_irq_restore(efi_rt_eflags); 68 local_irq_restore(efi_rt_eflags);
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 312250c6b2de..ac3aa54e2654 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -80,3 +80,20 @@ void __init efi_call_phys_epilog(void)
80 local_irq_restore(efi_flags); 80 local_irq_restore(efi_flags);
81 early_code_mapping_set_exec(0); 81 early_code_mapping_set_exec(0);
82} 82}
83
84void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size,
85 u32 type)
86{
87 unsigned long last_map_pfn;
88
89 if (type == EFI_MEMORY_MAPPED_IO)
90 return ioremap(phys_addr, size);
91
92 last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size);
93 if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) {
94 unsigned long top = last_map_pfn << PAGE_SHIFT;
95 efi_ioremap(top, size - (top - phys_addr), type);
96 }
97
98 return (void __iomem *)__va(phys_addr);
99}
diff --git a/arch/x86/platform/mrst/early_printk_mrst.c b/arch/x86/platform/mrst/early_printk_mrst.c
index 25bfdbb5b130..3c6e328483c7 100644
--- a/arch/x86/platform/mrst/early_printk_mrst.c
+++ b/arch/x86/platform/mrst/early_printk_mrst.c
@@ -245,16 +245,24 @@ struct console early_mrst_console = {
245 * Following is the early console based on Medfield HSU (High 245 * Following is the early console based on Medfield HSU (High
246 * Speed UART) device. 246 * Speed UART) device.
247 */ 247 */
248#define HSU_PORT2_PADDR 0xffa28180 248#define HSU_PORT_BASE 0xffa28080
249 249
250static void __iomem *phsu; 250static void __iomem *phsu;
251 251
252void hsu_early_console_init(void) 252void hsu_early_console_init(const char *s)
253{ 253{
254 unsigned long paddr, port = 0;
254 u8 lcr; 255 u8 lcr;
255 256
256 phsu = (void *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE, 257 /*
257 HSU_PORT2_PADDR); 258 * Select the early HSU console port if specified by user in the
259 * kernel command line.
260 */
261 if (*s && !kstrtoul(s, 10, &port))
262 port = clamp_val(port, 0, 2);
263
264 paddr = HSU_PORT_BASE + port * 0x80;
265 phsu = (void *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE, paddr);
258 266
259 /* Disable FIFO */ 267 /* Disable FIFO */
260 writeb(0x0, phsu + UART_FCR); 268 writeb(0x0, phsu + UART_FCR);
diff --git a/arch/x86/platform/uv/uv_sysfs.c b/arch/x86/platform/uv/uv_sysfs.c
index 309c70fb7759..5d4ba301e776 100644
--- a/arch/x86/platform/uv/uv_sysfs.c
+++ b/arch/x86/platform/uv/uv_sysfs.c
@@ -19,7 +19,7 @@
19 * Copyright (c) Russ Anderson 19 * Copyright (c) Russ Anderson
20 */ 20 */
21 21
22#include <linux/sysdev.h> 22#include <linux/device.h>
23#include <asm/uv/bios.h> 23#include <asm/uv/bios.h>
24#include <asm/uv/uv.h> 24#include <asm/uv/uv.h>
25 25
diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile
index f82082677337..d511aa97533a 100644
--- a/arch/x86/tools/Makefile
+++ b/arch/x86/tools/Makefile
@@ -18,14 +18,21 @@ chkobjdump = $(srctree)/arch/x86/tools/chkobjdump.awk
18quiet_cmd_posttest = TEST $@ 18quiet_cmd_posttest = TEST $@
19 cmd_posttest = ($(OBJDUMP) -v | $(AWK) -f $(chkobjdump)) || $(OBJDUMP) -d -j .text $(objtree)/vmlinux | $(AWK) -f $(distill_awk) | $(obj)/test_get_len $(posttest_64bit) $(posttest_verbose) 19 cmd_posttest = ($(OBJDUMP) -v | $(AWK) -f $(chkobjdump)) || $(OBJDUMP) -d -j .text $(objtree)/vmlinux | $(AWK) -f $(distill_awk) | $(obj)/test_get_len $(posttest_64bit) $(posttest_verbose)
20 20
21posttest: $(obj)/test_get_len vmlinux 21quiet_cmd_sanitytest = TEST $@
22 cmd_sanitytest = $(obj)/insn_sanity $(posttest_64bit) -m 1000000
23
24posttest: $(obj)/test_get_len vmlinux $(obj)/insn_sanity
22 $(call cmd,posttest) 25 $(call cmd,posttest)
26 $(call cmd,sanitytest)
23 27
24hostprogs-y := test_get_len 28hostprogs-y += test_get_len insn_sanity
25 29
26# -I needed for generated C source and C source which in the kernel tree. 30# -I needed for generated C source and C source which in the kernel tree.
27HOSTCFLAGS_test_get_len.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/ 31HOSTCFLAGS_test_get_len.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/
28 32
33HOSTCFLAGS_insn_sanity.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/
34
29# Dependencies are also needed. 35# Dependencies are also needed.
30$(obj)/test_get_len.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c 36$(obj)/test_get_len.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c
31 37
38$(obj)/insn_sanity.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c
diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk
index eaf11f52fc0b..5f6a5b6c3a15 100644
--- a/arch/x86/tools/gen-insn-attr-x86.awk
+++ b/arch/x86/tools/gen-insn-attr-x86.awk
@@ -47,7 +47,7 @@ BEGIN {
47 sep_expr = "^\\|$" 47 sep_expr = "^\\|$"
48 group_expr = "^Grp[0-9A-Za-z]+" 48 group_expr = "^Grp[0-9A-Za-z]+"
49 49
50 imm_expr = "^[IJAO][a-z]" 50 imm_expr = "^[IJAOL][a-z]"
51 imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" 51 imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
52 imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" 52 imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
53 imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)" 53 imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)"
@@ -59,6 +59,7 @@ BEGIN {
59 imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)" 59 imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)"
60 imm_flag["Ob"] = "INAT_MOFFSET" 60 imm_flag["Ob"] = "INAT_MOFFSET"
61 imm_flag["Ov"] = "INAT_MOFFSET" 61 imm_flag["Ov"] = "INAT_MOFFSET"
62 imm_flag["Lx"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
62 63
63 modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])" 64 modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])"
64 force64_expr = "\\([df]64\\)" 65 force64_expr = "\\([df]64\\)"
@@ -70,8 +71,12 @@ BEGIN {
70 lprefix3_expr = "\\(F2\\)" 71 lprefix3_expr = "\\(F2\\)"
71 max_lprefix = 4 72 max_lprefix = 4
72 73
73 vexok_expr = "\\(VEX\\)" 74 # All opcodes starting with lower-case 'v' or with (v1) superscript
74 vexonly_expr = "\\(oVEX\\)" 75 # accepts VEX prefix
76 vexok_opcode_expr = "^v.*"
77 vexok_expr = "\\(v1\\)"
78 # All opcodes with (v) superscript supports *only* VEX prefix
79 vexonly_expr = "\\(v\\)"
75 80
76 prefix_expr = "\\(Prefix\\)" 81 prefix_expr = "\\(Prefix\\)"
77 prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" 82 prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ"
@@ -85,8 +90,8 @@ BEGIN {
85 prefix_num["SEG=GS"] = "INAT_PFX_GS" 90 prefix_num["SEG=GS"] = "INAT_PFX_GS"
86 prefix_num["SEG=SS"] = "INAT_PFX_SS" 91 prefix_num["SEG=SS"] = "INAT_PFX_SS"
87 prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ" 92 prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ"
88 prefix_num["2bytes-VEX"] = "INAT_PFX_VEX2" 93 prefix_num["VEX+1byte"] = "INAT_PFX_VEX2"
89 prefix_num["3bytes-VEX"] = "INAT_PFX_VEX3" 94 prefix_num["VEX+2byte"] = "INAT_PFX_VEX3"
90 95
91 clear_vars() 96 clear_vars()
92} 97}
@@ -310,12 +315,10 @@ function convert_operands(count,opnd, i,j,imm,mod)
310 if (match(opcode, fpu_expr)) 315 if (match(opcode, fpu_expr))
311 flags = add_flags(flags, "INAT_MODRM") 316 flags = add_flags(flags, "INAT_MODRM")
312 317
313 # check VEX only code 318 # check VEX codes
314 if (match(ext, vexonly_expr)) 319 if (match(ext, vexonly_expr))
315 flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY") 320 flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY")
316 321 else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr))
317 # check VEX only code
318 if (match(ext, vexok_expr))
319 flags = add_flags(flags, "INAT_VEXOK") 322 flags = add_flags(flags, "INAT_VEXOK")
320 323
321 # check prefixes 324 # check prefixes
diff --git a/arch/x86/tools/insn_sanity.c b/arch/x86/tools/insn_sanity.c
new file mode 100644
index 000000000000..cc2f8c131286
--- /dev/null
+++ b/arch/x86/tools/insn_sanity.c
@@ -0,0 +1,275 @@
1/*
2 * x86 decoder sanity test - based on test_get_insn.c
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright (C) IBM Corporation, 2009
19 * Copyright (C) Hitachi, Ltd., 2011
20 */
21
22#include <stdlib.h>
23#include <stdio.h>
24#include <string.h>
25#include <assert.h>
26#include <unistd.h>
27#include <sys/types.h>
28#include <sys/stat.h>
29#include <fcntl.h>
30
31#define unlikely(cond) (cond)
32#define ARRAY_SIZE(a) (sizeof(a)/sizeof(a[0]))
33
34#include <asm/insn.h>
35#include <inat.c>
36#include <insn.c>
37
38/*
39 * Test of instruction analysis against tampering.
40 * Feed random binary to instruction decoder and ensure not to
41 * access out-of-instruction-buffer.
42 */
43
44#define DEFAULT_MAX_ITER 10000
45#define INSN_NOP 0x90
46
47static const char *prog; /* Program name */
48static int verbose; /* Verbosity */
49static int x86_64; /* x86-64 bit mode flag */
50static unsigned int seed; /* Random seed */
51static unsigned long iter_start; /* Start of iteration number */
52static unsigned long iter_end = DEFAULT_MAX_ITER; /* End of iteration number */
53static FILE *input_file; /* Input file name */
54
55static void usage(const char *err)
56{
57 if (err)
58 fprintf(stderr, "Error: %s\n\n", err);
59 fprintf(stderr, "Usage: %s [-y|-n|-v] [-s seed[,no]] [-m max] [-i input]\n", prog);
60 fprintf(stderr, "\t-y 64bit mode\n");
61 fprintf(stderr, "\t-n 32bit mode\n");
62 fprintf(stderr, "\t-v Verbosity(-vv dumps any decoded result)\n");
63 fprintf(stderr, "\t-s Give a random seed (and iteration number)\n");
64 fprintf(stderr, "\t-m Give a maximum iteration number\n");
65 fprintf(stderr, "\t-i Give an input file with decoded binary\n");
66 exit(1);
67}
68
69static void dump_field(FILE *fp, const char *name, const char *indent,
70 struct insn_field *field)
71{
72 fprintf(fp, "%s.%s = {\n", indent, name);
73 fprintf(fp, "%s\t.value = %d, bytes[] = {%x, %x, %x, %x},\n",
74 indent, field->value, field->bytes[0], field->bytes[1],
75 field->bytes[2], field->bytes[3]);
76 fprintf(fp, "%s\t.got = %d, .nbytes = %d},\n", indent,
77 field->got, field->nbytes);
78}
79
80static void dump_insn(FILE *fp, struct insn *insn)
81{
82 fprintf(fp, "Instruction = {\n");
83 dump_field(fp, "prefixes", "\t", &insn->prefixes);
84 dump_field(fp, "rex_prefix", "\t", &insn->rex_prefix);
85 dump_field(fp, "vex_prefix", "\t", &insn->vex_prefix);
86 dump_field(fp, "opcode", "\t", &insn->opcode);
87 dump_field(fp, "modrm", "\t", &insn->modrm);
88 dump_field(fp, "sib", "\t", &insn->sib);
89 dump_field(fp, "displacement", "\t", &insn->displacement);
90 dump_field(fp, "immediate1", "\t", &insn->immediate1);
91 dump_field(fp, "immediate2", "\t", &insn->immediate2);
92 fprintf(fp, "\t.attr = %x, .opnd_bytes = %d, .addr_bytes = %d,\n",
93 insn->attr, insn->opnd_bytes, insn->addr_bytes);
94 fprintf(fp, "\t.length = %d, .x86_64 = %d, .kaddr = %p}\n",
95 insn->length, insn->x86_64, insn->kaddr);
96}
97
98static void dump_stream(FILE *fp, const char *msg, unsigned long nr_iter,
99 unsigned char *insn_buf, struct insn *insn)
100{
101 int i;
102
103 fprintf(fp, "%s:\n", msg);
104
105 dump_insn(fp, insn);
106
107 fprintf(fp, "You can reproduce this with below command(s);\n");
108
109 /* Input a decoded instruction sequence directly */
110 fprintf(fp, " $ echo ");
111 for (i = 0; i < MAX_INSN_SIZE; i++)
112 fprintf(fp, " %02x", insn_buf[i]);
113 fprintf(fp, " | %s -i -\n", prog);
114
115 if (!input_file) {
116 fprintf(fp, "Or \n");
117 /* Give a seed and iteration number */
118 fprintf(fp, " $ %s -s 0x%x,%lu\n", prog, seed, nr_iter);
119 }
120}
121
122static void init_random_seed(void)
123{
124 int fd;
125
126 fd = open("/dev/urandom", O_RDONLY);
127 if (fd < 0)
128 goto fail;
129
130 if (read(fd, &seed, sizeof(seed)) != sizeof(seed))
131 goto fail;
132
133 close(fd);
134 return;
135fail:
136 usage("Failed to open /dev/urandom");
137}
138
139/* Read given instruction sequence from the input file */
140static int read_next_insn(unsigned char *insn_buf)
141{
142 char buf[256] = "", *tmp;
143 int i;
144
145 tmp = fgets(buf, ARRAY_SIZE(buf), input_file);
146 if (tmp == NULL || feof(input_file))
147 return 0;
148
149 for (i = 0; i < MAX_INSN_SIZE; i++) {
150 insn_buf[i] = (unsigned char)strtoul(tmp, &tmp, 16);
151 if (*tmp != ' ')
152 break;
153 }
154
155 return i;
156}
157
158static int generate_insn(unsigned char *insn_buf)
159{
160 int i;
161
162 if (input_file)
163 return read_next_insn(insn_buf);
164
165 /* Fills buffer with random binary up to MAX_INSN_SIZE */
166 for (i = 0; i < MAX_INSN_SIZE - 1; i += 2)
167 *(unsigned short *)(&insn_buf[i]) = random() & 0xffff;
168
169 while (i < MAX_INSN_SIZE)
170 insn_buf[i++] = random() & 0xff;
171
172 return i;
173}
174
175static void parse_args(int argc, char **argv)
176{
177 int c;
178 char *tmp = NULL;
179 int set_seed = 0;
180
181 prog = argv[0];
182 while ((c = getopt(argc, argv, "ynvs:m:i:")) != -1) {
183 switch (c) {
184 case 'y':
185 x86_64 = 1;
186 break;
187 case 'n':
188 x86_64 = 0;
189 break;
190 case 'v':
191 verbose++;
192 break;
193 case 'i':
194 if (strcmp("-", optarg) == 0)
195 input_file = stdin;
196 else
197 input_file = fopen(optarg, "r");
198 if (!input_file)
199 usage("Failed to open input file");
200 break;
201 case 's':
202 seed = (unsigned int)strtoul(optarg, &tmp, 0);
203 if (*tmp == ',') {
204 optarg = tmp + 1;
205 iter_start = strtoul(optarg, &tmp, 0);
206 }
207 if (*tmp != '\0' || tmp == optarg)
208 usage("Failed to parse seed");
209 set_seed = 1;
210 break;
211 case 'm':
212 iter_end = strtoul(optarg, &tmp, 0);
213 if (*tmp != '\0' || tmp == optarg)
214 usage("Failed to parse max_iter");
215 break;
216 default:
217 usage(NULL);
218 }
219 }
220
221 /* Check errors */
222 if (iter_end < iter_start)
223 usage("Max iteration number must be bigger than iter-num");
224
225 if (set_seed && input_file)
226 usage("Don't use input file (-i) with random seed (-s)");
227
228 /* Initialize random seed */
229 if (!input_file) {
230 if (!set_seed) /* No seed is given */
231 init_random_seed();
232 srand(seed);
233 }
234}
235
236int main(int argc, char **argv)
237{
238 struct insn insn;
239 int insns = 0;
240 int errors = 0;
241 unsigned long i;
242 unsigned char insn_buf[MAX_INSN_SIZE * 2];
243
244 parse_args(argc, argv);
245
246 /* Prepare stop bytes with NOPs */
247 memset(insn_buf + MAX_INSN_SIZE, INSN_NOP, MAX_INSN_SIZE);
248
249 for (i = 0; i < iter_end; i++) {
250 if (generate_insn(insn_buf) <= 0)
251 break;
252
253 if (i < iter_start) /* Skip to given iteration number */
254 continue;
255
256 /* Decode an instruction */
257 insn_init(&insn, insn_buf, x86_64);
258 insn_get_length(&insn);
259
260 if (insn.next_byte <= insn.kaddr ||
261 insn.kaddr + MAX_INSN_SIZE < insn.next_byte) {
262 /* Access out-of-range memory */
263 dump_stream(stderr, "Error: Found an access violation", i, insn_buf, &insn);
264 errors++;
265 } else if (verbose && !insn_complete(&insn))
266 dump_stream(stdout, "Info: Found an undecodable input", i, insn_buf, &insn);
267 else if (verbose >= 2)
268 dump_insn(stdout, &insn);
269 insns++;
270 }
271
272 fprintf(stdout, "%s: decoded and checked %d %s instructions with %d errors (seed:0x%x)\n", (errors) ? "Failure" : "Success", insns, (input_file) ? "given" : "random", errors, seed);
273
274 return errors ? 1 : 0;
275}
diff --git a/arch/x86/xen/debugfs.c b/arch/x86/xen/debugfs.c
index 7c0fedd98ea0..ef1db1900d86 100644
--- a/arch/x86/xen/debugfs.c
+++ b/arch/x86/xen/debugfs.c
@@ -109,7 +109,7 @@ static const struct file_operations u32_array_fops = {
109 .llseek = no_llseek, 109 .llseek = no_llseek,
110}; 110};
111 111
112struct dentry *xen_debugfs_create_u32_array(const char *name, mode_t mode, 112struct dentry *xen_debugfs_create_u32_array(const char *name, umode_t mode,
113 struct dentry *parent, 113 struct dentry *parent,
114 u32 *array, unsigned elements) 114 u32 *array, unsigned elements)
115{ 115{
diff --git a/arch/x86/xen/debugfs.h b/arch/x86/xen/debugfs.h
index e28132084832..78d25499be5b 100644
--- a/arch/x86/xen/debugfs.h
+++ b/arch/x86/xen/debugfs.h
@@ -3,7 +3,7 @@
3 3
4struct dentry * __init xen_init_debugfs(void); 4struct dentry * __init xen_init_debugfs(void);
5 5
6struct dentry *xen_debugfs_create_u32_array(const char *name, mode_t mode, 6struct dentry *xen_debugfs_create_u32_array(const char *name, umode_t mode,
7 struct dentry *parent, 7 struct dentry *parent,
8 u32 *array, unsigned elements); 8 u32 *array, unsigned elements);
9 9
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 1f928659c338..12eb07bfb267 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1215,8 +1215,6 @@ asmlinkage void __init xen_start_kernel(void)
1215 local_irq_disable(); 1215 local_irq_disable();
1216 early_boot_irqs_disabled = true; 1216 early_boot_irqs_disabled = true;
1217 1217
1218 memblock_init();
1219
1220 xen_raw_console_write("mapping kernel into physical memory\n"); 1218 xen_raw_console_write("mapping kernel into physical memory\n");
1221 pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages); 1219 pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages);
1222 xen_ident_map_ISA(); 1220 xen_ident_map_ISA();
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 87f6673b1207..f4bf8aa574f4 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1774,10 +1774,8 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
1774 __xen_write_cr3(true, __pa(pgd)); 1774 __xen_write_cr3(true, __pa(pgd));
1775 xen_mc_issue(PARAVIRT_LAZY_CPU); 1775 xen_mc_issue(PARAVIRT_LAZY_CPU);
1776 1776
1777 memblock_x86_reserve_range(__pa(xen_start_info->pt_base), 1777 memblock_reserve(__pa(xen_start_info->pt_base),
1778 __pa(xen_start_info->pt_base + 1778 xen_start_info->nr_pt_frames * PAGE_SIZE);
1779 xen_start_info->nr_pt_frames * PAGE_SIZE),
1780 "XEN PAGETABLES");
1781 1779
1782 return pgd; 1780 return pgd;
1783} 1781}
@@ -1853,10 +1851,8 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
1853 PFN_DOWN(__pa(initial_page_table))); 1851 PFN_DOWN(__pa(initial_page_table)));
1854 xen_write_cr3(__pa(initial_page_table)); 1852 xen_write_cr3(__pa(initial_page_table));
1855 1853
1856 memblock_x86_reserve_range(__pa(xen_start_info->pt_base), 1854 memblock_reserve(__pa(xen_start_info->pt_base),
1857 __pa(xen_start_info->pt_base + 1855 xen_start_info->nr_pt_frames * PAGE_SIZE));
1858 xen_start_info->nr_pt_frames * PAGE_SIZE),
1859 "XEN PAGETABLES");
1860 1856
1861 return initial_page_table; 1857 return initial_page_table;
1862} 1858}
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 1093f80c162d..e03c63692176 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -75,7 +75,7 @@ static void __init xen_add_extra_mem(u64 start, u64 size)
75 if (i == XEN_EXTRA_MEM_MAX_REGIONS) 75 if (i == XEN_EXTRA_MEM_MAX_REGIONS)
76 printk(KERN_WARNING "Warning: not enough extra memory regions\n"); 76 printk(KERN_WARNING "Warning: not enough extra memory regions\n");
77 77
78 memblock_x86_reserve_range(start, start + size, "XEN EXTRA"); 78 memblock_reserve(start, size);
79 79
80 xen_max_p2m_pfn = PFN_DOWN(start + size); 80 xen_max_p2m_pfn = PFN_DOWN(start + size);
81 81
@@ -173,9 +173,21 @@ static unsigned long __init xen_get_max_pages(void)
173 domid_t domid = DOMID_SELF; 173 domid_t domid = DOMID_SELF;
174 int ret; 174 int ret;
175 175
176 ret = HYPERVISOR_memory_op(XENMEM_maximum_reservation, &domid); 176 /*
177 if (ret > 0) 177 * For the initial domain we use the maximum reservation as
178 max_pages = ret; 178 * the maximum page.
179 *
180 * For guest domains the current maximum reservation reflects
181 * the current maximum rather than the static maximum. In this
182 * case the e820 map provided to us will cover the static
183 * maximum region.
184 */
185 if (xen_initial_domain()) {
186 ret = HYPERVISOR_memory_op(XENMEM_maximum_reservation, &domid);
187 if (ret > 0)
188 max_pages = ret;
189 }
190
179 return min(max_pages, MAX_DOMAIN_PAGES); 191 return min(max_pages, MAX_DOMAIN_PAGES);
180} 192}
181 193
@@ -299,9 +311,8 @@ char * __init xen_memory_setup(void)
299 * - xen_start_info 311 * - xen_start_info
300 * See comment above "struct start_info" in <xen/interface/xen.h> 312 * See comment above "struct start_info" in <xen/interface/xen.h>
301 */ 313 */
302 memblock_x86_reserve_range(__pa(xen_start_info->mfn_list), 314 memblock_reserve(__pa(xen_start_info->mfn_list),
303 __pa(xen_start_info->pt_base), 315 xen_start_info->pt_base - xen_start_info->mfn_list);
304 "XEN START INFO");
305 316
306 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); 317 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
307 318