aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig18
-rw-r--r--arch/x86/ia32/ia32entry.S43
-rw-r--r--arch/x86/include/asm/alternative-asm.h4
-rw-r--r--arch/x86/include/asm/apic.h6
-rw-r--r--arch/x86/include/asm/apic_flat_64.h7
-rw-r--r--arch/x86/include/asm/apicdef.h1
-rw-r--r--arch/x86/include/asm/bitops.h76
-rw-r--r--arch/x86/include/asm/cmpxchg.h163
-rw-r--r--arch/x86/include/asm/cmpxchg_32.h46
-rw-r--r--arch/x86/include/asm/cmpxchg_64.h43
-rw-r--r--arch/x86/include/asm/div64.h22
-rw-r--r--arch/x86/include/asm/e820.h2
-rw-r--r--arch/x86/include/asm/hardirq.h1
-rw-r--r--arch/x86/include/asm/i387.h2
-rw-r--r--arch/x86/include/asm/insn.h7
-rw-r--r--arch/x86/include/asm/mach_timer.h2
-rw-r--r--arch/x86/include/asm/mc146818rtc.h4
-rw-r--r--arch/x86/include/asm/memblock.h23
-rw-r--r--arch/x86/include/asm/numachip/numachip_csr.h167
-rw-r--r--arch/x86/include/asm/percpu.h53
-rw-r--r--arch/x86/include/asm/perf_event.h44
-rw-r--r--arch/x86/include/asm/pgtable.h2
-rw-r--r--arch/x86/include/asm/processor-flags.h1
-rw-r--r--arch/x86/include/asm/processor.h2
-rw-r--r--arch/x86/include/asm/spinlock.h15
-rw-r--r--arch/x86/include/asm/thread_info.h9
-rw-r--r--arch/x86/include/asm/topology.h2
-rw-r--r--arch/x86/include/asm/tsc.h2
-rw-r--r--arch/x86/include/asm/uaccess.h2
-rw-r--r--arch/x86/include/asm/x86_init.h3
-rw-r--r--arch/x86/kernel/acpi/boot.c10
-rw-r--r--arch/x86/kernel/amd_nb.c8
-rw-r--r--arch/x86/kernel/aperture_64.c4
-rw-r--r--arch/x86/kernel/apic/Makefile1
-rw-r--r--arch/x86/kernel/apic/apic.c113
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c9
-rw-r--r--arch/x86/kernel/apic/apic_numachip.c294
-rw-r--r--arch/x86/kernel/apic/io_apic.c6
-rw-r--r--arch/x86/kernel/check.c34
-rw-r--r--arch/x86/kernel/cpu/amd.c9
-rw-r--r--arch/x86/kernel/cpu/centaur.c2
-rw-r--r--arch/x86/kernel/cpu/common.c14
-rw-r--r--arch/x86/kernel/cpu/cpu.h5
-rw-r--r--arch/x86/kernel/cpu/intel.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c7
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/threshold.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event.c262
-rw-r--r--arch/x86/kernel/cpu/perf_event.h51
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c90
-rw-r--r--arch/x86/kernel/cpu/powerflags.c3
-rw-r--r--arch/x86/kernel/cpu/proc.c4
-rw-r--r--arch/x86/kernel/e820.c58
-rw-r--r--arch/x86/kernel/entry_32.S4
-rw-r--r--arch/x86/kernel/entry_64.S31
-rw-r--r--arch/x86/kernel/head.c2
-rw-r--r--arch/x86/kernel/head32.c7
-rw-r--r--arch/x86/kernel/head64.c7
-rw-r--r--arch/x86/kernel/hpet.c7
-rw-r--r--arch/x86/kernel/irq.c11
-rw-r--r--arch/x86/kernel/jump_label.c2
-rw-r--r--arch/x86/kernel/mpparse.c12
-rw-r--r--arch/x86/kernel/process.c2
-rw-r--r--arch/x86/kernel/process_32.c6
-rw-r--r--arch/x86/kernel/process_64.c15
-rw-r--r--arch/x86/kernel/ptrace.c3
-rw-r--r--arch/x86/kernel/setup.c21
-rw-r--r--arch/x86/kernel/smpboot.c3
-rw-r--r--arch/x86/kernel/trampoline.c4
-rw-r--r--arch/x86/kernel/traps.c7
-rw-r--r--arch/x86/kernel/tsc.c6
-rw-r--r--arch/x86/kernel/tsc_sync.c4
-rw-r--r--arch/x86/kernel/vsyscall_64.c77
-rw-r--r--arch/x86/kernel/x86_init.c1
-rw-r--r--arch/x86/kvm/i8254.c10
-rw-r--r--arch/x86/kvm/x86.c19
-rw-r--r--arch/x86/lib/inat.c9
-rw-r--r--arch/x86/lib/insn.c4
-rw-r--r--arch/x86/lib/string_32.c8
-rw-r--r--arch/x86/lib/x86-opcode-map.txt606
-rw-r--r--arch/x86/mm/Makefile2
-rw-r--r--arch/x86/mm/extable.c2
-rw-r--r--arch/x86/mm/fault.c22
-rw-r--r--arch/x86/mm/init.c8
-rw-r--r--arch/x86/mm/init_32.c36
-rw-r--r--arch/x86/mm/init_64.c2
-rw-r--r--arch/x86/mm/memblock.c348
-rw-r--r--arch/x86/mm/memtest.c33
-rw-r--r--arch/x86/mm/numa.c37
-rw-r--r--arch/x86/mm/numa_32.c10
-rw-r--r--arch/x86/mm/numa_64.c2
-rw-r--r--arch/x86/mm/numa_emulation.c36
-rw-r--r--arch/x86/mm/pageattr.c2
-rw-r--r--arch/x86/mm/srat.c7
-rw-r--r--arch/x86/oprofile/Makefile3
-rw-r--r--arch/x86/oprofile/init.c30
-rw-r--r--arch/x86/oprofile/nmi_int.c27
-rw-r--r--arch/x86/oprofile/nmi_timer_int.c50
-rw-r--r--arch/x86/platform/efi/efi.c12
-rw-r--r--arch/x86/tools/Makefile11
-rw-r--r--arch/x86/tools/gen-insn-attr-x86.awk21
-rw-r--r--arch/x86/tools/insn_sanity.c275
-rw-r--r--arch/x86/xen/enlighten.c2
-rw-r--r--arch/x86/xen/mmu.c12
-rw-r--r--arch/x86/xen/setup.c7
107 files changed, 2213 insertions, 1438 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index efb42949cc09..5731eb70e0a0 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -26,6 +26,8 @@ config X86
26 select HAVE_IOREMAP_PROT 26 select HAVE_IOREMAP_PROT
27 select HAVE_KPROBES 27 select HAVE_KPROBES
28 select HAVE_MEMBLOCK 28 select HAVE_MEMBLOCK
29 select HAVE_MEMBLOCK_NODE_MAP
30 select ARCH_DISCARD_MEMBLOCK
29 select ARCH_WANT_OPTIONAL_GPIOLIB 31 select ARCH_WANT_OPTIONAL_GPIOLIB
30 select ARCH_WANT_FRAME_POINTERS 32 select ARCH_WANT_FRAME_POINTERS
31 select HAVE_DMA_ATTRS 33 select HAVE_DMA_ATTRS
@@ -204,9 +206,6 @@ config ZONE_DMA32
204 bool 206 bool
205 default X86_64 207 default X86_64
206 208
207config ARCH_POPULATES_NODE_MAP
208 def_bool y
209
210config AUDIT_ARCH 209config AUDIT_ARCH
211 bool 210 bool
212 default X86_64 211 default X86_64
@@ -343,6 +342,7 @@ config X86_EXTENDED_PLATFORM
343 342
344 If you enable this option then you'll be able to select support 343 If you enable this option then you'll be able to select support
345 for the following (non-PC) 64 bit x86 platforms: 344 for the following (non-PC) 64 bit x86 platforms:
345 Numascale NumaChip
346 ScaleMP vSMP 346 ScaleMP vSMP
347 SGI Ultraviolet 347 SGI Ultraviolet
348 348
@@ -351,6 +351,18 @@ config X86_EXTENDED_PLATFORM
351endif 351endif
352# This is an alphabetically sorted list of 64 bit extended platforms 352# This is an alphabetically sorted list of 64 bit extended platforms
353# Please maintain the alphabetic order if and when there are additions 353# Please maintain the alphabetic order if and when there are additions
354config X86_NUMACHIP
355 bool "Numascale NumaChip"
356 depends on X86_64
357 depends on X86_EXTENDED_PLATFORM
358 depends on NUMA
359 depends on SMP
360 depends on X86_X2APIC
361 depends on !EDAC_AMD64
362 ---help---
363 Adds support for Numascale NumaChip large-SMP systems. Needed to
364 enable more than ~168 cores.
365 If you don't have one of these, you should say N here.
354 366
355config X86_VSMP 367config X86_VSMP
356 bool "ScaleMP vSMP" 368 bool "ScaleMP vSMP"
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index a6253ec1b284..3e274564f6bf 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -134,7 +134,7 @@ ENTRY(ia32_sysenter_target)
134 CFI_REL_OFFSET rsp,0 134 CFI_REL_OFFSET rsp,0
135 pushfq_cfi 135 pushfq_cfi
136 /*CFI_REL_OFFSET rflags,0*/ 136 /*CFI_REL_OFFSET rflags,0*/
137 movl 8*3-THREAD_SIZE+TI_sysenter_return(%rsp), %r10d 137 movl TI_sysenter_return+THREAD_INFO(%rsp,3*8-KERNEL_STACK_OFFSET),%r10d
138 CFI_REGISTER rip,r10 138 CFI_REGISTER rip,r10
139 pushq_cfi $__USER32_CS 139 pushq_cfi $__USER32_CS
140 /*CFI_REL_OFFSET cs,0*/ 140 /*CFI_REL_OFFSET cs,0*/
@@ -150,9 +150,8 @@ ENTRY(ia32_sysenter_target)
150 .section __ex_table,"a" 150 .section __ex_table,"a"
151 .quad 1b,ia32_badarg 151 .quad 1b,ia32_badarg
152 .previous 152 .previous
153 GET_THREAD_INFO(%r10) 153 orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
154 orl $TS_COMPAT,TI_status(%r10) 154 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
155 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
156 CFI_REMEMBER_STATE 155 CFI_REMEMBER_STATE
157 jnz sysenter_tracesys 156 jnz sysenter_tracesys
158 cmpq $(IA32_NR_syscalls-1),%rax 157 cmpq $(IA32_NR_syscalls-1),%rax
@@ -162,13 +161,12 @@ sysenter_do_call:
162sysenter_dispatch: 161sysenter_dispatch:
163 call *ia32_sys_call_table(,%rax,8) 162 call *ia32_sys_call_table(,%rax,8)
164 movq %rax,RAX-ARGOFFSET(%rsp) 163 movq %rax,RAX-ARGOFFSET(%rsp)
165 GET_THREAD_INFO(%r10)
166 DISABLE_INTERRUPTS(CLBR_NONE) 164 DISABLE_INTERRUPTS(CLBR_NONE)
167 TRACE_IRQS_OFF 165 TRACE_IRQS_OFF
168 testl $_TIF_ALLWORK_MASK,TI_flags(%r10) 166 testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
169 jnz sysexit_audit 167 jnz sysexit_audit
170sysexit_from_sys_call: 168sysexit_from_sys_call:
171 andl $~TS_COMPAT,TI_status(%r10) 169 andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
172 /* clear IF, that popfq doesn't enable interrupts early */ 170 /* clear IF, that popfq doesn't enable interrupts early */
173 andl $~0x200,EFLAGS-R11(%rsp) 171 andl $~0x200,EFLAGS-R11(%rsp)
174 movl RIP-R11(%rsp),%edx /* User %eip */ 172 movl RIP-R11(%rsp),%edx /* User %eip */
@@ -205,7 +203,7 @@ sysexit_from_sys_call:
205 .endm 203 .endm
206 204
207 .macro auditsys_exit exit 205 .macro auditsys_exit exit
208 testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) 206 testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
209 jnz ia32_ret_from_sys_call 207 jnz ia32_ret_from_sys_call
210 TRACE_IRQS_ON 208 TRACE_IRQS_ON
211 sti 209 sti
@@ -215,12 +213,11 @@ sysexit_from_sys_call:
215 movzbl %al,%edi /* zero-extend that into %edi */ 213 movzbl %al,%edi /* zero-extend that into %edi */
216 inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ 214 inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
217 call audit_syscall_exit 215 call audit_syscall_exit
218 GET_THREAD_INFO(%r10)
219 movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall return value */ 216 movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall return value */
220 movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi 217 movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
221 cli 218 cli
222 TRACE_IRQS_OFF 219 TRACE_IRQS_OFF
223 testl %edi,TI_flags(%r10) 220 testl %edi,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
224 jz \exit 221 jz \exit
225 CLEAR_RREGS -ARGOFFSET 222 CLEAR_RREGS -ARGOFFSET
226 jmp int_with_check 223 jmp int_with_check
@@ -238,7 +235,7 @@ sysexit_audit:
238 235
239sysenter_tracesys: 236sysenter_tracesys:
240#ifdef CONFIG_AUDITSYSCALL 237#ifdef CONFIG_AUDITSYSCALL
241 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) 238 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
242 jz sysenter_auditsys 239 jz sysenter_auditsys
243#endif 240#endif
244 SAVE_REST 241 SAVE_REST
@@ -309,9 +306,8 @@ ENTRY(ia32_cstar_target)
309 .section __ex_table,"a" 306 .section __ex_table,"a"
310 .quad 1b,ia32_badarg 307 .quad 1b,ia32_badarg
311 .previous 308 .previous
312 GET_THREAD_INFO(%r10) 309 orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
313 orl $TS_COMPAT,TI_status(%r10) 310 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
314 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
315 CFI_REMEMBER_STATE 311 CFI_REMEMBER_STATE
316 jnz cstar_tracesys 312 jnz cstar_tracesys
317 cmpq $IA32_NR_syscalls-1,%rax 313 cmpq $IA32_NR_syscalls-1,%rax
@@ -321,13 +317,12 @@ cstar_do_call:
321cstar_dispatch: 317cstar_dispatch:
322 call *ia32_sys_call_table(,%rax,8) 318 call *ia32_sys_call_table(,%rax,8)
323 movq %rax,RAX-ARGOFFSET(%rsp) 319 movq %rax,RAX-ARGOFFSET(%rsp)
324 GET_THREAD_INFO(%r10)
325 DISABLE_INTERRUPTS(CLBR_NONE) 320 DISABLE_INTERRUPTS(CLBR_NONE)
326 TRACE_IRQS_OFF 321 TRACE_IRQS_OFF
327 testl $_TIF_ALLWORK_MASK,TI_flags(%r10) 322 testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
328 jnz sysretl_audit 323 jnz sysretl_audit
329sysretl_from_sys_call: 324sysretl_from_sys_call:
330 andl $~TS_COMPAT,TI_status(%r10) 325 andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
331 RESTORE_ARGS 0,-ARG_SKIP,0,0,0 326 RESTORE_ARGS 0,-ARG_SKIP,0,0,0
332 movl RIP-ARGOFFSET(%rsp),%ecx 327 movl RIP-ARGOFFSET(%rsp),%ecx
333 CFI_REGISTER rip,rcx 328 CFI_REGISTER rip,rcx
@@ -355,7 +350,7 @@ sysretl_audit:
355 350
356cstar_tracesys: 351cstar_tracesys:
357#ifdef CONFIG_AUDITSYSCALL 352#ifdef CONFIG_AUDITSYSCALL
358 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) 353 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
359 jz cstar_auditsys 354 jz cstar_auditsys
360#endif 355#endif
361 xchgl %r9d,%ebp 356 xchgl %r9d,%ebp
@@ -420,9 +415,8 @@ ENTRY(ia32_syscall)
420 /* note the registers are not zero extended to the sf. 415 /* note the registers are not zero extended to the sf.
421 this could be a problem. */ 416 this could be a problem. */
422 SAVE_ARGS 0,1,0 417 SAVE_ARGS 0,1,0
423 GET_THREAD_INFO(%r10) 418 orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
424 orl $TS_COMPAT,TI_status(%r10) 419 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
425 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
426 jnz ia32_tracesys 420 jnz ia32_tracesys
427 cmpq $(IA32_NR_syscalls-1),%rax 421 cmpq $(IA32_NR_syscalls-1),%rax
428 ja ia32_badsys 422 ja ia32_badsys
@@ -459,8 +453,8 @@ quiet_ni_syscall:
459 CFI_ENDPROC 453 CFI_ENDPROC
460 454
461 .macro PTREGSCALL label, func, arg 455 .macro PTREGSCALL label, func, arg
462 .globl \label 456 ALIGN
463\label: 457GLOBAL(\label)
464 leaq \func(%rip),%rax 458 leaq \func(%rip),%rax
465 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */ 459 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
466 jmp ia32_ptregs_common 460 jmp ia32_ptregs_common
@@ -477,7 +471,8 @@ quiet_ni_syscall:
477 PTREGSCALL stub32_vfork, sys_vfork, %rdi 471 PTREGSCALL stub32_vfork, sys_vfork, %rdi
478 PTREGSCALL stub32_iopl, sys_iopl, %rsi 472 PTREGSCALL stub32_iopl, sys_iopl, %rsi
479 473
480ENTRY(ia32_ptregs_common) 474 ALIGN
475ia32_ptregs_common:
481 popq %r11 476 popq %r11
482 CFI_ENDPROC 477 CFI_ENDPROC
483 CFI_STARTPROC32 simple 478 CFI_STARTPROC32 simple
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
index 091508b533b4..952bd0100c5c 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -4,10 +4,10 @@
4 4
5#ifdef CONFIG_SMP 5#ifdef CONFIG_SMP
6 .macro LOCK_PREFIX 6 .macro LOCK_PREFIX
71: lock 7672: lock
8 .section .smp_locks,"a" 8 .section .smp_locks,"a"
9 .balign 4 9 .balign 4
10 .long 1b - . 10 .long 672b - .
11 .previous 11 .previous
12 .endm 12 .endm
13#else 13#else
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 1a6c09af048f..3ab9bdd87e79 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -176,6 +176,7 @@ static inline u64 native_x2apic_icr_read(void)
176} 176}
177 177
178extern int x2apic_phys; 178extern int x2apic_phys;
179extern int x2apic_preenabled;
179extern void check_x2apic(void); 180extern void check_x2apic(void);
180extern void enable_x2apic(void); 181extern void enable_x2apic(void);
181extern void x2apic_icr_write(u32 low, u32 id); 182extern void x2apic_icr_write(u32 low, u32 id);
@@ -198,6 +199,9 @@ static inline void x2apic_force_phys(void)
198 x2apic_phys = 1; 199 x2apic_phys = 1;
199} 200}
200#else 201#else
202static inline void disable_x2apic(void)
203{
204}
201static inline void check_x2apic(void) 205static inline void check_x2apic(void)
202{ 206{
203} 207}
@@ -212,6 +216,7 @@ static inline void x2apic_force_phys(void)
212{ 216{
213} 217}
214 218
219#define nox2apic 0
215#define x2apic_preenabled 0 220#define x2apic_preenabled 0
216#define x2apic_supported() 0 221#define x2apic_supported() 0
217#endif 222#endif
@@ -410,6 +415,7 @@ extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip);
410#endif 415#endif
411 416
412#ifdef CONFIG_X86_LOCAL_APIC 417#ifdef CONFIG_X86_LOCAL_APIC
418
413static inline u32 apic_read(u32 reg) 419static inline u32 apic_read(u32 reg)
414{ 420{
415 return apic->read(reg); 421 return apic->read(reg);
diff --git a/arch/x86/include/asm/apic_flat_64.h b/arch/x86/include/asm/apic_flat_64.h
new file mode 100644
index 000000000000..a2d312796440
--- /dev/null
+++ b/arch/x86/include/asm/apic_flat_64.h
@@ -0,0 +1,7 @@
1#ifndef _ASM_X86_APIC_FLAT_64_H
2#define _ASM_X86_APIC_FLAT_64_H
3
4extern void flat_init_apic_ldr(void);
5
6#endif
7
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index 3925d8007864..134bba00df09 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -144,6 +144,7 @@
144 144
145#define APIC_BASE (fix_to_virt(FIX_APIC_BASE)) 145#define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
146#define APIC_BASE_MSR 0x800 146#define APIC_BASE_MSR 0x800
147#define XAPIC_ENABLE (1UL << 11)
147#define X2APIC_ENABLE (1UL << 10) 148#define X2APIC_ENABLE (1UL << 10)
148 149
149#ifdef CONFIG_X86_32 150#ifdef CONFIG_X86_32
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 1775d6e5920e..b97596e2b68c 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -380,6 +380,8 @@ static inline unsigned long __fls(unsigned long word)
380 return word; 380 return word;
381} 381}
382 382
383#undef ADDR
384
383#ifdef __KERNEL__ 385#ifdef __KERNEL__
384/** 386/**
385 * ffs - find first set bit in word 387 * ffs - find first set bit in word
@@ -395,10 +397,25 @@ static inline unsigned long __fls(unsigned long word)
395static inline int ffs(int x) 397static inline int ffs(int x)
396{ 398{
397 int r; 399 int r;
398#ifdef CONFIG_X86_CMOV 400
401#ifdef CONFIG_X86_64
402 /*
403 * AMD64 says BSFL won't clobber the dest reg if x==0; Intel64 says the
404 * dest reg is undefined if x==0, but their CPU architect says its
405 * value is written to set it to the same as before, except that the
406 * top 32 bits will be cleared.
407 *
408 * We cannot do this on 32 bits because at the very least some
409 * 486 CPUs did not behave this way.
410 */
411 long tmp = -1;
412 asm("bsfl %1,%0"
413 : "=r" (r)
414 : "rm" (x), "0" (tmp));
415#elif defined(CONFIG_X86_CMOV)
399 asm("bsfl %1,%0\n\t" 416 asm("bsfl %1,%0\n\t"
400 "cmovzl %2,%0" 417 "cmovzl %2,%0"
401 : "=r" (r) : "rm" (x), "r" (-1)); 418 : "=&r" (r) : "rm" (x), "r" (-1));
402#else 419#else
403 asm("bsfl %1,%0\n\t" 420 asm("bsfl %1,%0\n\t"
404 "jnz 1f\n\t" 421 "jnz 1f\n\t"
@@ -422,7 +439,22 @@ static inline int ffs(int x)
422static inline int fls(int x) 439static inline int fls(int x)
423{ 440{
424 int r; 441 int r;
425#ifdef CONFIG_X86_CMOV 442
443#ifdef CONFIG_X86_64
444 /*
445 * AMD64 says BSRL won't clobber the dest reg if x==0; Intel64 says the
446 * dest reg is undefined if x==0, but their CPU architect says its
447 * value is written to set it to the same as before, except that the
448 * top 32 bits will be cleared.
449 *
450 * We cannot do this on 32 bits because at the very least some
451 * 486 CPUs did not behave this way.
452 */
453 long tmp = -1;
454 asm("bsrl %1,%0"
455 : "=r" (r)
456 : "rm" (x), "0" (tmp));
457#elif defined(CONFIG_X86_CMOV)
426 asm("bsrl %1,%0\n\t" 458 asm("bsrl %1,%0\n\t"
427 "cmovzl %2,%0" 459 "cmovzl %2,%0"
428 : "=&r" (r) : "rm" (x), "rm" (-1)); 460 : "=&r" (r) : "rm" (x), "rm" (-1));
@@ -434,11 +466,35 @@ static inline int fls(int x)
434#endif 466#endif
435 return r + 1; 467 return r + 1;
436} 468}
437#endif /* __KERNEL__ */
438
439#undef ADDR
440 469
441#ifdef __KERNEL__ 470/**
471 * fls64 - find last set bit in a 64-bit word
472 * @x: the word to search
473 *
474 * This is defined in a similar way as the libc and compiler builtin
475 * ffsll, but returns the position of the most significant set bit.
476 *
477 * fls64(value) returns 0 if value is 0 or the position of the last
478 * set bit if value is nonzero. The last (most significant) bit is
479 * at position 64.
480 */
481#ifdef CONFIG_X86_64
482static __always_inline int fls64(__u64 x)
483{
484 long bitpos = -1;
485 /*
486 * AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the
487 * dest reg is undefined if x==0, but their CPU architect says its
488 * value is written to set it to the same as before.
489 */
490 asm("bsrq %1,%0"
491 : "+r" (bitpos)
492 : "rm" (x));
493 return bitpos + 1;
494}
495#else
496#include <asm-generic/bitops/fls64.h>
497#endif
442 498
443#include <asm-generic/bitops/find.h> 499#include <asm-generic/bitops/find.h>
444 500
@@ -450,12 +506,6 @@ static inline int fls(int x)
450 506
451#include <asm-generic/bitops/const_hweight.h> 507#include <asm-generic/bitops/const_hweight.h>
452 508
453#endif /* __KERNEL__ */
454
455#include <asm-generic/bitops/fls64.h>
456
457#ifdef __KERNEL__
458
459#include <asm-generic/bitops/le.h> 509#include <asm-generic/bitops/le.h>
460 510
461#include <asm-generic/bitops/ext2-atomic-setbit.h> 511#include <asm-generic/bitops/ext2-atomic-setbit.h>
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index 5d3acdf5a7a6..0c9fa2745f13 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -14,6 +14,8 @@ extern void __cmpxchg_wrong_size(void)
14 __compiletime_error("Bad argument size for cmpxchg"); 14 __compiletime_error("Bad argument size for cmpxchg");
15extern void __xadd_wrong_size(void) 15extern void __xadd_wrong_size(void)
16 __compiletime_error("Bad argument size for xadd"); 16 __compiletime_error("Bad argument size for xadd");
17extern void __add_wrong_size(void)
18 __compiletime_error("Bad argument size for add");
17 19
18/* 20/*
19 * Constants for operation sizes. On 32-bit, the 64-bit size it set to 21 * Constants for operation sizes. On 32-bit, the 64-bit size it set to
@@ -31,60 +33,47 @@ extern void __xadd_wrong_size(void)
31#define __X86_CASE_Q -1 /* sizeof will never return -1 */ 33#define __X86_CASE_Q -1 /* sizeof will never return -1 */
32#endif 34#endif
33 35
36/*
37 * An exchange-type operation, which takes a value and a pointer, and
38 * returns a the old value.
39 */
40#define __xchg_op(ptr, arg, op, lock) \
41 ({ \
42 __typeof__ (*(ptr)) __ret = (arg); \
43 switch (sizeof(*(ptr))) { \
44 case __X86_CASE_B: \
45 asm volatile (lock #op "b %b0, %1\n" \
46 : "+r" (__ret), "+m" (*(ptr)) \
47 : : "memory", "cc"); \
48 break; \
49 case __X86_CASE_W: \
50 asm volatile (lock #op "w %w0, %1\n" \
51 : "+r" (__ret), "+m" (*(ptr)) \
52 : : "memory", "cc"); \
53 break; \
54 case __X86_CASE_L: \
55 asm volatile (lock #op "l %0, %1\n" \
56 : "+r" (__ret), "+m" (*(ptr)) \
57 : : "memory", "cc"); \
58 break; \
59 case __X86_CASE_Q: \
60 asm volatile (lock #op "q %q0, %1\n" \
61 : "+r" (__ret), "+m" (*(ptr)) \
62 : : "memory", "cc"); \
63 break; \
64 default: \
65 __ ## op ## _wrong_size(); \
66 } \
67 __ret; \
68 })
69
34/* 70/*
35 * Note: no "lock" prefix even on SMP: xchg always implies lock anyway. 71 * Note: no "lock" prefix even on SMP: xchg always implies lock anyway.
36 * Since this is generally used to protect other memory information, we 72 * Since this is generally used to protect other memory information, we
37 * use "asm volatile" and "memory" clobbers to prevent gcc from moving 73 * use "asm volatile" and "memory" clobbers to prevent gcc from moving
38 * information around. 74 * information around.
39 */ 75 */
40#define __xchg(x, ptr, size) \ 76#define xchg(ptr, v) __xchg_op((ptr), (v), xchg, "")
41({ \
42 __typeof(*(ptr)) __x = (x); \
43 switch (size) { \
44 case __X86_CASE_B: \
45 { \
46 volatile u8 *__ptr = (volatile u8 *)(ptr); \
47 asm volatile("xchgb %0,%1" \
48 : "=q" (__x), "+m" (*__ptr) \
49 : "0" (__x) \
50 : "memory"); \
51 break; \
52 } \
53 case __X86_CASE_W: \
54 { \
55 volatile u16 *__ptr = (volatile u16 *)(ptr); \
56 asm volatile("xchgw %0,%1" \
57 : "=r" (__x), "+m" (*__ptr) \
58 : "0" (__x) \
59 : "memory"); \
60 break; \
61 } \
62 case __X86_CASE_L: \
63 { \
64 volatile u32 *__ptr = (volatile u32 *)(ptr); \
65 asm volatile("xchgl %0,%1" \
66 : "=r" (__x), "+m" (*__ptr) \
67 : "0" (__x) \
68 : "memory"); \
69 break; \
70 } \
71 case __X86_CASE_Q: \
72 { \
73 volatile u64 *__ptr = (volatile u64 *)(ptr); \
74 asm volatile("xchgq %0,%1" \
75 : "=r" (__x), "+m" (*__ptr) \
76 : "0" (__x) \
77 : "memory"); \
78 break; \
79 } \
80 default: \
81 __xchg_wrong_size(); \
82 } \
83 __x; \
84})
85
86#define xchg(ptr, v) \
87 __xchg((v), (ptr), sizeof(*ptr))
88 77
89/* 78/*
90 * Atomic compare and exchange. Compare OLD with MEM, if identical, 79 * Atomic compare and exchange. Compare OLD with MEM, if identical,
@@ -165,46 +154,80 @@ extern void __xadd_wrong_size(void)
165 __cmpxchg_local((ptr), (old), (new), sizeof(*ptr)) 154 __cmpxchg_local((ptr), (old), (new), sizeof(*ptr))
166#endif 155#endif
167 156
168#define __xadd(ptr, inc, lock) \ 157/*
158 * xadd() adds "inc" to "*ptr" and atomically returns the previous
159 * value of "*ptr".
160 *
161 * xadd() is locked when multiple CPUs are online
162 * xadd_sync() is always locked
163 * xadd_local() is never locked
164 */
165#define __xadd(ptr, inc, lock) __xchg_op((ptr), (inc), xadd, lock)
166#define xadd(ptr, inc) __xadd((ptr), (inc), LOCK_PREFIX)
167#define xadd_sync(ptr, inc) __xadd((ptr), (inc), "lock; ")
168#define xadd_local(ptr, inc) __xadd((ptr), (inc), "")
169
170#define __add(ptr, inc, lock) \
169 ({ \ 171 ({ \
170 __typeof__ (*(ptr)) __ret = (inc); \ 172 __typeof__ (*(ptr)) __ret = (inc); \
171 switch (sizeof(*(ptr))) { \ 173 switch (sizeof(*(ptr))) { \
172 case __X86_CASE_B: \ 174 case __X86_CASE_B: \
173 asm volatile (lock "xaddb %b0, %1\n" \ 175 asm volatile (lock "addb %b1, %0\n" \
174 : "+r" (__ret), "+m" (*(ptr)) \ 176 : "+m" (*(ptr)) : "ri" (inc) \
175 : : "memory", "cc"); \ 177 : "memory", "cc"); \
176 break; \ 178 break; \
177 case __X86_CASE_W: \ 179 case __X86_CASE_W: \
178 asm volatile (lock "xaddw %w0, %1\n" \ 180 asm volatile (lock "addw %w1, %0\n" \
179 : "+r" (__ret), "+m" (*(ptr)) \ 181 : "+m" (*(ptr)) : "ri" (inc) \
180 : : "memory", "cc"); \ 182 : "memory", "cc"); \
181 break; \ 183 break; \
182 case __X86_CASE_L: \ 184 case __X86_CASE_L: \
183 asm volatile (lock "xaddl %0, %1\n" \ 185 asm volatile (lock "addl %1, %0\n" \
184 : "+r" (__ret), "+m" (*(ptr)) \ 186 : "+m" (*(ptr)) : "ri" (inc) \
185 : : "memory", "cc"); \ 187 : "memory", "cc"); \
186 break; \ 188 break; \
187 case __X86_CASE_Q: \ 189 case __X86_CASE_Q: \
188 asm volatile (lock "xaddq %q0, %1\n" \ 190 asm volatile (lock "addq %1, %0\n" \
189 : "+r" (__ret), "+m" (*(ptr)) \ 191 : "+m" (*(ptr)) : "ri" (inc) \
190 : : "memory", "cc"); \ 192 : "memory", "cc"); \
191 break; \ 193 break; \
192 default: \ 194 default: \
193 __xadd_wrong_size(); \ 195 __add_wrong_size(); \
194 } \ 196 } \
195 __ret; \ 197 __ret; \
196 }) 198 })
197 199
198/* 200/*
199 * xadd() adds "inc" to "*ptr" and atomically returns the previous 201 * add_*() adds "inc" to "*ptr"
200 * value of "*ptr".
201 * 202 *
202 * xadd() is locked when multiple CPUs are online 203 * __add() takes a lock prefix
203 * xadd_sync() is always locked 204 * add_smp() is locked when multiple CPUs are online
204 * xadd_local() is never locked 205 * add_sync() is always locked
205 */ 206 */
206#define xadd(ptr, inc) __xadd((ptr), (inc), LOCK_PREFIX) 207#define add_smp(ptr, inc) __add((ptr), (inc), LOCK_PREFIX)
207#define xadd_sync(ptr, inc) __xadd((ptr), (inc), "lock; ") 208#define add_sync(ptr, inc) __add((ptr), (inc), "lock; ")
208#define xadd_local(ptr, inc) __xadd((ptr), (inc), "") 209
210#define __cmpxchg_double(pfx, p1, p2, o1, o2, n1, n2) \
211({ \
212 bool __ret; \
213 __typeof__(*(p1)) __old1 = (o1), __new1 = (n1); \
214 __typeof__(*(p2)) __old2 = (o2), __new2 = (n2); \
215 BUILD_BUG_ON(sizeof(*(p1)) != sizeof(long)); \
216 BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long)); \
217 VM_BUG_ON((unsigned long)(p1) % (2 * sizeof(long))); \
218 VM_BUG_ON((unsigned long)((p1) + 1) != (unsigned long)(p2)); \
219 asm volatile(pfx "cmpxchg%c4b %2; sete %0" \
220 : "=a" (__ret), "+d" (__old2), \
221 "+m" (*(p1)), "+m" (*(p2)) \
222 : "i" (2 * sizeof(long)), "a" (__old1), \
223 "b" (__new1), "c" (__new2)); \
224 __ret; \
225})
226
227#define cmpxchg_double(p1, p2, o1, o2, n1, n2) \
228 __cmpxchg_double(LOCK_PREFIX, p1, p2, o1, o2, n1, n2)
229
230#define cmpxchg_double_local(p1, p2, o1, o2, n1, n2) \
231 __cmpxchg_double(, p1, p2, o1, o2, n1, n2)
209 232
210#endif /* ASM_X86_CMPXCHG_H */ 233#endif /* ASM_X86_CMPXCHG_H */
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h
index fbebb07dd80b..53f4b219336b 100644
--- a/arch/x86/include/asm/cmpxchg_32.h
+++ b/arch/x86/include/asm/cmpxchg_32.h
@@ -166,52 +166,6 @@ static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old,
166 166
167#endif 167#endif
168 168
169#define cmpxchg8b(ptr, o1, o2, n1, n2) \
170({ \
171 char __ret; \
172 __typeof__(o2) __dummy; \
173 __typeof__(*(ptr)) __old1 = (o1); \
174 __typeof__(o2) __old2 = (o2); \
175 __typeof__(*(ptr)) __new1 = (n1); \
176 __typeof__(o2) __new2 = (n2); \
177 asm volatile(LOCK_PREFIX "cmpxchg8b %2; setz %1" \
178 : "=d"(__dummy), "=a" (__ret), "+m" (*ptr)\
179 : "a" (__old1), "d"(__old2), \
180 "b" (__new1), "c" (__new2) \
181 : "memory"); \
182 __ret; })
183
184
185#define cmpxchg8b_local(ptr, o1, o2, n1, n2) \
186({ \
187 char __ret; \
188 __typeof__(o2) __dummy; \
189 __typeof__(*(ptr)) __old1 = (o1); \
190 __typeof__(o2) __old2 = (o2); \
191 __typeof__(*(ptr)) __new1 = (n1); \
192 __typeof__(o2) __new2 = (n2); \
193 asm volatile("cmpxchg8b %2; setz %1" \
194 : "=d"(__dummy), "=a"(__ret), "+m" (*ptr)\
195 : "a" (__old), "d"(__old2), \
196 "b" (__new1), "c" (__new2), \
197 : "memory"); \
198 __ret; })
199
200
201#define cmpxchg_double(ptr, o1, o2, n1, n2) \
202({ \
203 BUILD_BUG_ON(sizeof(*(ptr)) != 4); \
204 VM_BUG_ON((unsigned long)(ptr) % 8); \
205 cmpxchg8b((ptr), (o1), (o2), (n1), (n2)); \
206})
207
208#define cmpxchg_double_local(ptr, o1, o2, n1, n2) \
209({ \
210 BUILD_BUG_ON(sizeof(*(ptr)) != 4); \
211 VM_BUG_ON((unsigned long)(ptr) % 8); \
212 cmpxchg16b_local((ptr), (o1), (o2), (n1), (n2)); \
213})
214
215#define system_has_cmpxchg_double() cpu_has_cx8 169#define system_has_cmpxchg_double() cpu_has_cx8
216 170
217#endif /* _ASM_X86_CMPXCHG_32_H */ 171#endif /* _ASM_X86_CMPXCHG_32_H */
diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h
index 285da02c38fa..614be87f1a9b 100644
--- a/arch/x86/include/asm/cmpxchg_64.h
+++ b/arch/x86/include/asm/cmpxchg_64.h
@@ -20,49 +20,6 @@ static inline void set_64bit(volatile u64 *ptr, u64 val)
20 cmpxchg_local((ptr), (o), (n)); \ 20 cmpxchg_local((ptr), (o), (n)); \
21}) 21})
22 22
23#define cmpxchg16b(ptr, o1, o2, n1, n2) \
24({ \
25 char __ret; \
26 __typeof__(o2) __junk; \
27 __typeof__(*(ptr)) __old1 = (o1); \
28 __typeof__(o2) __old2 = (o2); \
29 __typeof__(*(ptr)) __new1 = (n1); \
30 __typeof__(o2) __new2 = (n2); \
31 asm volatile(LOCK_PREFIX "cmpxchg16b %2;setz %1" \
32 : "=d"(__junk), "=a"(__ret), "+m" (*ptr) \
33 : "b"(__new1), "c"(__new2), \
34 "a"(__old1), "d"(__old2)); \
35 __ret; })
36
37
38#define cmpxchg16b_local(ptr, o1, o2, n1, n2) \
39({ \
40 char __ret; \
41 __typeof__(o2) __junk; \
42 __typeof__(*(ptr)) __old1 = (o1); \
43 __typeof__(o2) __old2 = (o2); \
44 __typeof__(*(ptr)) __new1 = (n1); \
45 __typeof__(o2) __new2 = (n2); \
46 asm volatile("cmpxchg16b %2;setz %1" \
47 : "=d"(__junk), "=a"(__ret), "+m" (*ptr) \
48 : "b"(__new1), "c"(__new2), \
49 "a"(__old1), "d"(__old2)); \
50 __ret; })
51
52#define cmpxchg_double(ptr, o1, o2, n1, n2) \
53({ \
54 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
55 VM_BUG_ON((unsigned long)(ptr) % 16); \
56 cmpxchg16b((ptr), (o1), (o2), (n1), (n2)); \
57})
58
59#define cmpxchg_double_local(ptr, o1, o2, n1, n2) \
60({ \
61 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
62 VM_BUG_ON((unsigned long)(ptr) % 16); \
63 cmpxchg16b_local((ptr), (o1), (o2), (n1), (n2)); \
64})
65
66#define system_has_cmpxchg_double() cpu_has_cx16 23#define system_has_cmpxchg_double() cpu_has_cx16
67 24
68#endif /* _ASM_X86_CMPXCHG_64_H */ 25#endif /* _ASM_X86_CMPXCHG_64_H */
diff --git a/arch/x86/include/asm/div64.h b/arch/x86/include/asm/div64.h
index 9a2d644c08ef..ced283ac79df 100644
--- a/arch/x86/include/asm/div64.h
+++ b/arch/x86/include/asm/div64.h
@@ -4,6 +4,7 @@
4#ifdef CONFIG_X86_32 4#ifdef CONFIG_X86_32
5 5
6#include <linux/types.h> 6#include <linux/types.h>
7#include <linux/log2.h>
7 8
8/* 9/*
9 * do_div() is NOT a C function. It wants to return 10 * do_div() is NOT a C function. It wants to return
@@ -21,15 +22,20 @@
21({ \ 22({ \
22 unsigned long __upper, __low, __high, __mod, __base; \ 23 unsigned long __upper, __low, __high, __mod, __base; \
23 __base = (base); \ 24 __base = (base); \
24 asm("":"=a" (__low), "=d" (__high) : "A" (n)); \ 25 if (__builtin_constant_p(__base) && is_power_of_2(__base)) { \
25 __upper = __high; \ 26 __mod = n & (__base - 1); \
26 if (__high) { \ 27 n >>= ilog2(__base); \
27 __upper = __high % (__base); \ 28 } else { \
28 __high = __high / (__base); \ 29 asm("" : "=a" (__low), "=d" (__high) : "A" (n));\
30 __upper = __high; \
31 if (__high) { \
32 __upper = __high % (__base); \
33 __high = __high / (__base); \
34 } \
35 asm("divl %2" : "=a" (__low), "=d" (__mod) \
36 : "rm" (__base), "0" (__low), "1" (__upper)); \
37 asm("" : "=A" (n) : "a" (__low), "d" (__high)); \
29 } \ 38 } \
30 asm("divl %2":"=a" (__low), "=d" (__mod) \
31 : "rm" (__base), "0" (__low), "1" (__upper)); \
32 asm("":"=A" (n) : "a" (__low), "d" (__high)); \
33 __mod; \ 39 __mod; \
34}) 40})
35 41
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index 908b96957d88..37782566af24 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -117,7 +117,7 @@ static inline void early_memtest(unsigned long start, unsigned long end)
117 117
118extern unsigned long e820_end_of_ram_pfn(void); 118extern unsigned long e820_end_of_ram_pfn(void);
119extern unsigned long e820_end_of_low_ram_pfn(void); 119extern unsigned long e820_end_of_low_ram_pfn(void);
120extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align); 120extern u64 early_reserve_e820(u64 sizet, u64 align);
121 121
122void memblock_x86_fill(void); 122void memblock_x86_fill(void);
123void memblock_find_dma_reserve(void); 123void memblock_find_dma_reserve(void);
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 55e4de613f0e..da0b3ca815b7 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -11,6 +11,7 @@ typedef struct {
11#ifdef CONFIG_X86_LOCAL_APIC 11#ifdef CONFIG_X86_LOCAL_APIC
12 unsigned int apic_timer_irqs; /* arch dependent */ 12 unsigned int apic_timer_irqs; /* arch dependent */
13 unsigned int irq_spurious_count; 13 unsigned int irq_spurious_count;
14 unsigned int icr_read_retry_count;
14#endif 15#endif
15 unsigned int x86_platform_ipis; /* arch dependent */ 16 unsigned int x86_platform_ipis; /* arch dependent */
16 unsigned int apic_perf_irqs; 17 unsigned int apic_perf_irqs;
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index c9e09ea05644..6919e936345b 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -218,7 +218,7 @@ static inline void fpu_fxsave(struct fpu *fpu)
218#ifdef CONFIG_SMP 218#ifdef CONFIG_SMP
219#define safe_address (__per_cpu_offset[0]) 219#define safe_address (__per_cpu_offset[0])
220#else 220#else
221#define safe_address (kstat_cpu(0).cpustat.user) 221#define safe_address (__get_cpu_var(kernel_cpustat).cpustat[CPUTIME_USER])
222#endif 222#endif
223 223
224/* 224/*
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index 88c765e16410..74df3f1eddfd 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -137,6 +137,13 @@ static inline int insn_is_avx(struct insn *insn)
137 return (insn->vex_prefix.value != 0); 137 return (insn->vex_prefix.value != 0);
138} 138}
139 139
140/* Ensure this instruction is decoded completely */
141static inline int insn_complete(struct insn *insn)
142{
143 return insn->opcode.got && insn->modrm.got && insn->sib.got &&
144 insn->displacement.got && insn->immediate.got;
145}
146
140static inline insn_byte_t insn_vex_m_bits(struct insn *insn) 147static inline insn_byte_t insn_vex_m_bits(struct insn *insn)
141{ 148{
142 if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ 149 if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */
diff --git a/arch/x86/include/asm/mach_timer.h b/arch/x86/include/asm/mach_timer.h
index 853728519ae9..88d0c3c74c13 100644
--- a/arch/x86/include/asm/mach_timer.h
+++ b/arch/x86/include/asm/mach_timer.h
@@ -15,7 +15,7 @@
15 15
16#define CALIBRATE_TIME_MSEC 30 /* 30 msecs */ 16#define CALIBRATE_TIME_MSEC 30 /* 30 msecs */
17#define CALIBRATE_LATCH \ 17#define CALIBRATE_LATCH \
18 ((CLOCK_TICK_RATE * CALIBRATE_TIME_MSEC + 1000/2)/1000) 18 ((PIT_TICK_RATE * CALIBRATE_TIME_MSEC + 1000/2)/1000)
19 19
20static inline void mach_prepare_counter(void) 20static inline void mach_prepare_counter(void)
21{ 21{
diff --git a/arch/x86/include/asm/mc146818rtc.h b/arch/x86/include/asm/mc146818rtc.h
index 01fdf5674e24..0e8e85bb7c51 100644
--- a/arch/x86/include/asm/mc146818rtc.h
+++ b/arch/x86/include/asm/mc146818rtc.h
@@ -81,8 +81,8 @@ static inline unsigned char current_lock_cmos_reg(void)
81#else 81#else
82#define lock_cmos_prefix(reg) do {} while (0) 82#define lock_cmos_prefix(reg) do {} while (0)
83#define lock_cmos_suffix(reg) do {} while (0) 83#define lock_cmos_suffix(reg) do {} while (0)
84#define lock_cmos(reg) 84#define lock_cmos(reg) do { } while (0)
85#define unlock_cmos() 85#define unlock_cmos() do { } while (0)
86#define do_i_have_lock_cmos() 0 86#define do_i_have_lock_cmos() 0
87#define current_lock_cmos_reg() 0 87#define current_lock_cmos_reg() 0
88#endif 88#endif
diff --git a/arch/x86/include/asm/memblock.h b/arch/x86/include/asm/memblock.h
deleted file mode 100644
index 0cd3800f33b9..000000000000
--- a/arch/x86/include/asm/memblock.h
+++ /dev/null
@@ -1,23 +0,0 @@
1#ifndef _X86_MEMBLOCK_H
2#define _X86_MEMBLOCK_H
3
4#define ARCH_DISCARD_MEMBLOCK
5
6u64 memblock_x86_find_in_range_size(u64 start, u64 *sizep, u64 align);
7
8void memblock_x86_reserve_range(u64 start, u64 end, char *name);
9void memblock_x86_free_range(u64 start, u64 end);
10struct range;
11int __get_free_all_memory_range(struct range **range, int nodeid,
12 unsigned long start_pfn, unsigned long end_pfn);
13int get_free_all_memory_range(struct range **rangep, int nodeid);
14
15void memblock_x86_register_active_regions(int nid, unsigned long start_pfn,
16 unsigned long last_pfn);
17u64 memblock_x86_hole_size(u64 start, u64 end);
18u64 memblock_x86_find_in_range_node(int nid, u64 start, u64 end, u64 size, u64 align);
19u64 memblock_x86_free_memory_in_range(u64 addr, u64 limit);
20u64 memblock_x86_memory_in_range(u64 addr, u64 limit);
21bool memblock_x86_check_reserved_size(u64 *addrp, u64 *sizep, u64 align);
22
23#endif
diff --git a/arch/x86/include/asm/numachip/numachip_csr.h b/arch/x86/include/asm/numachip/numachip_csr.h
new file mode 100644
index 000000000000..660f843df928
--- /dev/null
+++ b/arch/x86/include/asm/numachip/numachip_csr.h
@@ -0,0 +1,167 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Numascale NumaConnect-Specific Header file
7 *
8 * Copyright (C) 2011 Numascale AS. All rights reserved.
9 *
10 * Send feedback to <support@numascale.com>
11 *
12 */
13
14#ifndef _ASM_X86_NUMACHIP_NUMACHIP_CSR_H
15#define _ASM_X86_NUMACHIP_NUMACHIP_CSR_H
16
17#include <linux/numa.h>
18#include <linux/percpu.h>
19#include <linux/io.h>
20#include <linux/swab.h>
21#include <asm/types.h>
22#include <asm/processor.h>
23
24#define CSR_NODE_SHIFT 16
25#define CSR_NODE_BITS(p) (((unsigned long)(p)) << CSR_NODE_SHIFT)
26#define CSR_NODE_MASK 0x0fff /* 4K nodes */
27
28/* 32K CSR space, b15 indicates geo/non-geo */
29#define CSR_OFFSET_MASK 0x7fffUL
30
31/* Global CSR space covers all 4K possible nodes with 64K CSR space per node */
32#define NUMACHIP_GCSR_BASE 0x3fff00000000ULL
33#define NUMACHIP_GCSR_LIM 0x3fff0fffffffULL
34#define NUMACHIP_GCSR_SIZE (NUMACHIP_GCSR_LIM - NUMACHIP_GCSR_BASE + 1)
35
36/*
37 * Local CSR space starts in global CSR space with "nodeid" = 0xfff0, however
38 * when using the direct mapping on x86_64, both start and size needs to be
39 * aligned with PMD_SIZE which is 2M
40 */
41#define NUMACHIP_LCSR_BASE 0x3ffffe000000ULL
42#define NUMACHIP_LCSR_LIM 0x3fffffffffffULL
43#define NUMACHIP_LCSR_SIZE (NUMACHIP_LCSR_LIM - NUMACHIP_LCSR_BASE + 1)
44
45static inline void *gcsr_address(int node, unsigned long offset)
46{
47 return __va(NUMACHIP_GCSR_BASE | (1UL << 15) |
48 CSR_NODE_BITS(node & CSR_NODE_MASK) | (offset & CSR_OFFSET_MASK));
49}
50
51static inline void *lcsr_address(unsigned long offset)
52{
53 return __va(NUMACHIP_LCSR_BASE | (1UL << 15) |
54 CSR_NODE_BITS(0xfff0) | (offset & CSR_OFFSET_MASK));
55}
56
57static inline unsigned int read_gcsr(int node, unsigned long offset)
58{
59 return swab32(readl(gcsr_address(node, offset)));
60}
61
62static inline void write_gcsr(int node, unsigned long offset, unsigned int val)
63{
64 writel(swab32(val), gcsr_address(node, offset));
65}
66
67static inline unsigned int read_lcsr(unsigned long offset)
68{
69 return swab32(readl(lcsr_address(offset)));
70}
71
72static inline void write_lcsr(unsigned long offset, unsigned int val)
73{
74 writel(swab32(val), lcsr_address(offset));
75}
76
77/* ========================================================================= */
78/* CSR_G0_STATE_CLEAR */
79/* ========================================================================= */
80
81#define CSR_G0_STATE_CLEAR (0x000 + (0 << 12))
82union numachip_csr_g0_state_clear {
83 unsigned int v;
84 struct numachip_csr_g0_state_clear_s {
85 unsigned int _state:2;
86 unsigned int _rsvd_2_6:5;
87 unsigned int _lost:1;
88 unsigned int _rsvd_8_31:24;
89 } s;
90};
91
92/* ========================================================================= */
93/* CSR_G0_NODE_IDS */
94/* ========================================================================= */
95
96#define CSR_G0_NODE_IDS (0x008 + (0 << 12))
97union numachip_csr_g0_node_ids {
98 unsigned int v;
99 struct numachip_csr_g0_node_ids_s {
100 unsigned int _initialid:16;
101 unsigned int _nodeid:12;
102 unsigned int _rsvd_28_31:4;
103 } s;
104};
105
106/* ========================================================================= */
107/* CSR_G3_EXT_IRQ_GEN */
108/* ========================================================================= */
109
110#define CSR_G3_EXT_IRQ_GEN (0x030 + (3 << 12))
111union numachip_csr_g3_ext_irq_gen {
112 unsigned int v;
113 struct numachip_csr_g3_ext_irq_gen_s {
114 unsigned int _vector:8;
115 unsigned int _msgtype:3;
116 unsigned int _index:5;
117 unsigned int _destination_apic_id:16;
118 } s;
119};
120
121/* ========================================================================= */
122/* CSR_G3_EXT_IRQ_STATUS */
123/* ========================================================================= */
124
125#define CSR_G3_EXT_IRQ_STATUS (0x034 + (3 << 12))
126union numachip_csr_g3_ext_irq_status {
127 unsigned int v;
128 struct numachip_csr_g3_ext_irq_status_s {
129 unsigned int _result:32;
130 } s;
131};
132
133/* ========================================================================= */
134/* CSR_G3_EXT_IRQ_DEST */
135/* ========================================================================= */
136
137#define CSR_G3_EXT_IRQ_DEST (0x038 + (3 << 12))
138union numachip_csr_g3_ext_irq_dest {
139 unsigned int v;
140 struct numachip_csr_g3_ext_irq_dest_s {
141 unsigned int _irq:8;
142 unsigned int _rsvd_8_31:24;
143 } s;
144};
145
146/* ========================================================================= */
147/* CSR_G3_NC_ATT_MAP_SELECT */
148/* ========================================================================= */
149
150#define CSR_G3_NC_ATT_MAP_SELECT (0x7fc + (3 << 12))
151union numachip_csr_g3_nc_att_map_select {
152 unsigned int v;
153 struct numachip_csr_g3_nc_att_map_select_s {
154 unsigned int _upper_address_bits:4;
155 unsigned int _select_ram:4;
156 unsigned int _rsvd_8_31:24;
157 } s;
158};
159
160/* ========================================================================= */
161/* CSR_G3_NC_ATT_MAP_SELECT_0-255 */
162/* ========================================================================= */
163
164#define CSR_G3_NC_ATT_MAP_SELECT_0 (0x800 + (3 << 12))
165
166#endif /* _ASM_X86_NUMACHIP_NUMACHIP_CSR_H */
167
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 3470c9d0ebba..529bf07e8067 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -451,23 +451,20 @@ do { \
451#endif /* !CONFIG_M386 */ 451#endif /* !CONFIG_M386 */
452 452
453#ifdef CONFIG_X86_CMPXCHG64 453#ifdef CONFIG_X86_CMPXCHG64
454#define percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) \ 454#define percpu_cmpxchg8b_double(pcp1, pcp2, o1, o2, n1, n2) \
455({ \ 455({ \
456 char __ret; \ 456 bool __ret; \
457 typeof(o1) __o1 = o1; \ 457 typeof(pcp1) __o1 = (o1), __n1 = (n1); \
458 typeof(o1) __n1 = n1; \ 458 typeof(pcp2) __o2 = (o2), __n2 = (n2); \
459 typeof(o2) __o2 = o2; \
460 typeof(o2) __n2 = n2; \
461 typeof(o2) __dummy = n2; \
462 asm volatile("cmpxchg8b "__percpu_arg(1)"\n\tsetz %0\n\t" \ 459 asm volatile("cmpxchg8b "__percpu_arg(1)"\n\tsetz %0\n\t" \
463 : "=a"(__ret), "=m" (pcp1), "=d"(__dummy) \ 460 : "=a" (__ret), "+m" (pcp1), "+m" (pcp2), "+d" (__o2) \
464 : "b"(__n1), "c"(__n2), "a"(__o1), "d"(__o2)); \ 461 : "b" (__n1), "c" (__n2), "a" (__o1)); \
465 __ret; \ 462 __ret; \
466}) 463})
467 464
468#define __this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) 465#define __this_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double
469#define this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) 466#define this_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double
470#define irqsafe_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) 467#define irqsafe_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double
471#endif /* CONFIG_X86_CMPXCHG64 */ 468#endif /* CONFIG_X86_CMPXCHG64 */
472 469
473/* 470/*
@@ -508,31 +505,23 @@ do { \
508 * it in software. The address used in the cmpxchg16 instruction must be 505 * it in software. The address used in the cmpxchg16 instruction must be
509 * aligned to a 16 byte boundary. 506 * aligned to a 16 byte boundary.
510 */ 507 */
511#ifdef CONFIG_SMP 508#define percpu_cmpxchg16b_double(pcp1, pcp2, o1, o2, n1, n2) \
512#define CMPXCHG16B_EMU_CALL "call this_cpu_cmpxchg16b_emu\n\t" ASM_NOP3
513#else
514#define CMPXCHG16B_EMU_CALL "call this_cpu_cmpxchg16b_emu\n\t" ASM_NOP2
515#endif
516#define percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) \
517({ \ 509({ \
518 char __ret; \ 510 bool __ret; \
519 typeof(o1) __o1 = o1; \ 511 typeof(pcp1) __o1 = (o1), __n1 = (n1); \
520 typeof(o1) __n1 = n1; \ 512 typeof(pcp2) __o2 = (o2), __n2 = (n2); \
521 typeof(o2) __o2 = o2; \ 513 alternative_io("leaq %P1,%%rsi\n\tcall this_cpu_cmpxchg16b_emu\n\t", \
522 typeof(o2) __n2 = n2; \ 514 "cmpxchg16b " __percpu_arg(1) "\n\tsetz %0\n\t", \
523 typeof(o2) __dummy; \
524 alternative_io(CMPXCHG16B_EMU_CALL, \
525 "cmpxchg16b " __percpu_prefix "(%%rsi)\n\tsetz %0\n\t", \
526 X86_FEATURE_CX16, \ 515 X86_FEATURE_CX16, \
527 ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)), \ 516 ASM_OUTPUT2("=a" (__ret), "+m" (pcp1), \
528 "S" (&pcp1), "b"(__n1), "c"(__n2), \ 517 "+m" (pcp2), "+d" (__o2)), \
529 "a"(__o1), "d"(__o2) : "memory"); \ 518 "b" (__n1), "c" (__n2), "a" (__o1) : "rsi"); \
530 __ret; \ 519 __ret; \
531}) 520})
532 521
533#define __this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) 522#define __this_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double
534#define this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) 523#define this_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double
535#define irqsafe_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) 524#define irqsafe_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double
536 525
537#endif 526#endif
538 527
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index f61c62f7d5d8..096c975e099f 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -57,6 +57,7 @@
57 (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) 57 (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX))
58 58
59#define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6 59#define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6
60#define ARCH_PERFMON_EVENTS_COUNT 7
60 61
61/* 62/*
62 * Intel "Architectural Performance Monitoring" CPUID 63 * Intel "Architectural Performance Monitoring" CPUID
@@ -72,6 +73,19 @@ union cpuid10_eax {
72 unsigned int full; 73 unsigned int full;
73}; 74};
74 75
76union cpuid10_ebx {
77 struct {
78 unsigned int no_unhalted_core_cycles:1;
79 unsigned int no_instructions_retired:1;
80 unsigned int no_unhalted_reference_cycles:1;
81 unsigned int no_llc_reference:1;
82 unsigned int no_llc_misses:1;
83 unsigned int no_branch_instruction_retired:1;
84 unsigned int no_branch_misses_retired:1;
85 } split;
86 unsigned int full;
87};
88
75union cpuid10_edx { 89union cpuid10_edx {
76 struct { 90 struct {
77 unsigned int num_counters_fixed:5; 91 unsigned int num_counters_fixed:5;
@@ -81,6 +95,15 @@ union cpuid10_edx {
81 unsigned int full; 95 unsigned int full;
82}; 96};
83 97
98struct x86_pmu_capability {
99 int version;
100 int num_counters_gp;
101 int num_counters_fixed;
102 int bit_width_gp;
103 int bit_width_fixed;
104 unsigned int events_mask;
105 int events_mask_len;
106};
84 107
85/* 108/*
86 * Fixed-purpose performance events: 109 * Fixed-purpose performance events:
@@ -89,23 +112,24 @@ union cpuid10_edx {
89/* 112/*
90 * All 3 fixed-mode PMCs are configured via this single MSR: 113 * All 3 fixed-mode PMCs are configured via this single MSR:
91 */ 114 */
92#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d 115#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d
93 116
94/* 117/*
95 * The counts are available in three separate MSRs: 118 * The counts are available in three separate MSRs:
96 */ 119 */
97 120
98/* Instr_Retired.Any: */ 121/* Instr_Retired.Any: */
99#define MSR_ARCH_PERFMON_FIXED_CTR0 0x309 122#define MSR_ARCH_PERFMON_FIXED_CTR0 0x309
100#define X86_PMC_IDX_FIXED_INSTRUCTIONS (X86_PMC_IDX_FIXED + 0) 123#define X86_PMC_IDX_FIXED_INSTRUCTIONS (X86_PMC_IDX_FIXED + 0)
101 124
102/* CPU_CLK_Unhalted.Core: */ 125/* CPU_CLK_Unhalted.Core: */
103#define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a 126#define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a
104#define X86_PMC_IDX_FIXED_CPU_CYCLES (X86_PMC_IDX_FIXED + 1) 127#define X86_PMC_IDX_FIXED_CPU_CYCLES (X86_PMC_IDX_FIXED + 1)
105 128
106/* CPU_CLK_Unhalted.Ref: */ 129/* CPU_CLK_Unhalted.Ref: */
107#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b 130#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b
108#define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) 131#define X86_PMC_IDX_FIXED_REF_CYCLES (X86_PMC_IDX_FIXED + 2)
132#define X86_PMC_MSK_FIXED_REF_CYCLES (1ULL << X86_PMC_IDX_FIXED_REF_CYCLES)
109 133
110/* 134/*
111 * We model BTS tracing as another fixed-mode PMC. 135 * We model BTS tracing as another fixed-mode PMC.
@@ -202,6 +226,7 @@ struct perf_guest_switch_msr {
202}; 226};
203 227
204extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr); 228extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
229extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
205#else 230#else
206static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr) 231static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
207{ 232{
@@ -209,6 +234,11 @@ static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
209 return NULL; 234 return NULL;
210} 235}
211 236
237static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
238{
239 memset(cap, 0, sizeof(*cap));
240}
241
212static inline void perf_events_lapic_init(void) { } 242static inline void perf_events_lapic_init(void) { }
213#endif 243#endif
214 244
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 18601c86fab1..49afb3f41eb6 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -703,7 +703,7 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm,
703 pte_update(mm, addr, ptep); 703 pte_update(mm, addr, ptep);
704} 704}
705 705
706#define flush_tlb_fix_spurious_fault(vma, address) 706#define flush_tlb_fix_spurious_fault(vma, address) do { } while (0)
707 707
708#define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot)) 708#define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot))
709 709
diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h
index 2dddb317bb39..f8ab3eaad128 100644
--- a/arch/x86/include/asm/processor-flags.h
+++ b/arch/x86/include/asm/processor-flags.h
@@ -6,6 +6,7 @@
6 * EFLAGS bits 6 * EFLAGS bits
7 */ 7 */
8#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */ 8#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */
9#define X86_EFLAGS_BIT1 0x00000002 /* Bit 1 - always on */
9#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */ 10#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */
10#define X86_EFLAGS_AF 0x00000010 /* Auxiliary carry Flag */ 11#define X86_EFLAGS_AF 0x00000010 /* Auxiliary carry Flag */
11#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */ 12#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index b650435ffb53..aa9088c26931 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -99,7 +99,6 @@ struct cpuinfo_x86 {
99 u16 apicid; 99 u16 apicid;
100 u16 initial_apicid; 100 u16 initial_apicid;
101 u16 x86_clflush_size; 101 u16 x86_clflush_size;
102#ifdef CONFIG_SMP
103 /* number of cores as seen by the OS: */ 102 /* number of cores as seen by the OS: */
104 u16 booted_cores; 103 u16 booted_cores;
105 /* Physical processor id: */ 104 /* Physical processor id: */
@@ -110,7 +109,6 @@ struct cpuinfo_x86 {
110 u8 compute_unit_id; 109 u8 compute_unit_id;
111 /* Index into per_cpu list: */ 110 /* Index into per_cpu list: */
112 u16 cpu_index; 111 u16 cpu_index;
113#endif
114 u32 microcode; 112 u32 microcode;
115} __attribute__((__aligned__(SMP_CACHE_BYTES))); 113} __attribute__((__aligned__(SMP_CACHE_BYTES)));
116 114
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index 972c260919a3..a82c2bf504b6 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -79,23 +79,10 @@ static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
79 return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail; 79 return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail;
80} 80}
81 81
82#if (NR_CPUS < 256)
83static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) 82static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
84{ 83{
85 asm volatile(UNLOCK_LOCK_PREFIX "incb %0" 84 __add(&lock->tickets.head, 1, UNLOCK_LOCK_PREFIX);
86 : "+m" (lock->head_tail)
87 :
88 : "memory", "cc");
89} 85}
90#else
91static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
92{
93 asm volatile(UNLOCK_LOCK_PREFIX "incw %0"
94 : "+m" (lock->head_tail)
95 :
96 : "memory", "cc");
97}
98#endif
99 86
100static inline int __ticket_spin_is_locked(arch_spinlock_t *lock) 87static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
101{ 88{
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index a1fe5c127b52..185b719ec61a 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -40,7 +40,8 @@ struct thread_info {
40 */ 40 */
41 __u8 supervisor_stack[0]; 41 __u8 supervisor_stack[0];
42#endif 42#endif
43 int uaccess_err; 43 int sig_on_uaccess_error:1;
44 int uaccess_err:1; /* uaccess failed */
44}; 45};
45 46
46#define INIT_THREAD_INFO(tsk) \ 47#define INIT_THREAD_INFO(tsk) \
@@ -231,6 +232,12 @@ static inline struct thread_info *current_thread_info(void)
231 movq PER_CPU_VAR(kernel_stack),reg ; \ 232 movq PER_CPU_VAR(kernel_stack),reg ; \
232 subq $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg 233 subq $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg
233 234
235/*
236 * Same if PER_CPU_VAR(kernel_stack) is, perhaps with some offset, already in
237 * a certain register (to be used in assembler memory operands).
238 */
239#define THREAD_INFO(reg, off) KERNEL_STACK_OFFSET+(off)-THREAD_SIZE(reg)
240
234#endif 241#endif
235 242
236#endif /* !X86_32 */ 243#endif /* !X86_32 */
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index c00692476e9f..800f77c60051 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -130,10 +130,8 @@ extern void setup_node_to_cpumask_map(void);
130 .balance_interval = 1, \ 130 .balance_interval = 1, \
131} 131}
132 132
133#ifdef CONFIG_X86_64
134extern int __node_distance(int, int); 133extern int __node_distance(int, int);
135#define node_distance(a, b) __node_distance(a, b) 134#define node_distance(a, b) __node_distance(a, b)
136#endif
137 135
138#else /* !CONFIG_NUMA */ 136#else /* !CONFIG_NUMA */
139 137
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 83e2efd181e2..15d99153a96d 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -51,6 +51,8 @@ extern int unsynchronized_tsc(void);
51extern int check_tsc_unstable(void); 51extern int check_tsc_unstable(void);
52extern unsigned long native_calibrate_tsc(void); 52extern unsigned long native_calibrate_tsc(void);
53 53
54extern int tsc_clocksource_reliable;
55
54/* 56/*
55 * Boot-time check whether the TSCs are synchronized across 57 * Boot-time check whether the TSCs are synchronized across
56 * all CPUs/cores: 58 * all CPUs/cores:
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 36361bf6fdd1..8be5f54d9360 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -462,7 +462,7 @@ struct __large_struct { unsigned long buf[100]; };
462 barrier(); 462 barrier();
463 463
464#define uaccess_catch(err) \ 464#define uaccess_catch(err) \
465 (err) |= current_thread_info()->uaccess_err; \ 465 (err) |= (current_thread_info()->uaccess_err ? -EFAULT : 0); \
466 current_thread_info()->uaccess_err = prev_err; \ 466 current_thread_info()->uaccess_err = prev_err; \
467} while (0) 467} while (0)
468 468
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 1971e652d24b..1ac860a09849 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -7,6 +7,7 @@
7struct mpc_bus; 7struct mpc_bus;
8struct mpc_cpu; 8struct mpc_cpu;
9struct mpc_table; 9struct mpc_table;
10struct cpuinfo_x86;
10 11
11/** 12/**
12 * struct x86_init_mpparse - platform specific mpparse ops 13 * struct x86_init_mpparse - platform specific mpparse ops
@@ -147,6 +148,7 @@ struct x86_init_ops {
147 */ 148 */
148struct x86_cpuinit_ops { 149struct x86_cpuinit_ops {
149 void (*setup_percpu_clockev)(void); 150 void (*setup_percpu_clockev)(void);
151 void (*fixup_cpu_id)(struct cpuinfo_x86 *c, int node);
150}; 152};
151 153
152/** 154/**
@@ -186,5 +188,6 @@ extern struct x86_msi_ops x86_msi;
186 188
187extern void x86_init_noop(void); 189extern void x86_init_noop(void);
188extern void x86_init_uint_noop(unsigned int unused); 190extern void x86_init_uint_noop(unsigned int unused);
191extern void x86_default_fixup_cpu_id(struct cpuinfo_x86 *c, int node);
189 192
190#endif 193#endif
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 4558f0d0822d..ce664f33ea8e 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -219,6 +219,8 @@ static int __init
219acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end) 219acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end)
220{ 220{
221 struct acpi_madt_local_x2apic *processor = NULL; 221 struct acpi_madt_local_x2apic *processor = NULL;
222 int apic_id;
223 u8 enabled;
222 224
223 processor = (struct acpi_madt_local_x2apic *)header; 225 processor = (struct acpi_madt_local_x2apic *)header;
224 226
@@ -227,6 +229,8 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end)
227 229
228 acpi_table_print_madt_entry(header); 230 acpi_table_print_madt_entry(header);
229 231
232 apic_id = processor->local_apic_id;
233 enabled = processor->lapic_flags & ACPI_MADT_ENABLED;
230#ifdef CONFIG_X86_X2APIC 234#ifdef CONFIG_X86_X2APIC
231 /* 235 /*
232 * We need to register disabled CPU as well to permit 236 * We need to register disabled CPU as well to permit
@@ -235,8 +239,10 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end)
235 * to not preallocating memory for all NR_CPUS 239 * to not preallocating memory for all NR_CPUS
236 * when we use CPU hotplug. 240 * when we use CPU hotplug.
237 */ 241 */
238 acpi_register_lapic(processor->local_apic_id, /* APIC ID */ 242 if (!cpu_has_x2apic && (apic_id >= 0xff) && enabled)
239 processor->lapic_flags & ACPI_MADT_ENABLED); 243 printk(KERN_WARNING PREFIX "x2apic entry ignored\n");
244 else
245 acpi_register_lapic(apic_id, enabled);
240#else 246#else
241 printk(KERN_WARNING PREFIX "x2apic entry ignored\n"); 247 printk(KERN_WARNING PREFIX "x2apic entry ignored\n");
242#endif 248#endif
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 4c39baa8facc..013c1810ce72 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -123,16 +123,14 @@ int amd_get_subcaches(int cpu)
123{ 123{
124 struct pci_dev *link = node_to_amd_nb(amd_get_nb_id(cpu))->link; 124 struct pci_dev *link = node_to_amd_nb(amd_get_nb_id(cpu))->link;
125 unsigned int mask; 125 unsigned int mask;
126 int cuid = 0; 126 int cuid;
127 127
128 if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) 128 if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
129 return 0; 129 return 0;
130 130
131 pci_read_config_dword(link, 0x1d4, &mask); 131 pci_read_config_dword(link, 0x1d4, &mask);
132 132
133#ifdef CONFIG_SMP
134 cuid = cpu_data(cpu).compute_unit_id; 133 cuid = cpu_data(cpu).compute_unit_id;
135#endif
136 return (mask >> (4 * cuid)) & 0xf; 134 return (mask >> (4 * cuid)) & 0xf;
137} 135}
138 136
@@ -141,7 +139,7 @@ int amd_set_subcaches(int cpu, int mask)
141 static unsigned int reset, ban; 139 static unsigned int reset, ban;
142 struct amd_northbridge *nb = node_to_amd_nb(amd_get_nb_id(cpu)); 140 struct amd_northbridge *nb = node_to_amd_nb(amd_get_nb_id(cpu));
143 unsigned int reg; 141 unsigned int reg;
144 int cuid = 0; 142 int cuid;
145 143
146 if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING) || mask > 0xf) 144 if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING) || mask > 0xf)
147 return -EINVAL; 145 return -EINVAL;
@@ -159,9 +157,7 @@ int amd_set_subcaches(int cpu, int mask)
159 pci_write_config_dword(nb->misc, 0x1b8, reg & ~0x180000); 157 pci_write_config_dword(nb->misc, 0x1b8, reg & ~0x180000);
160 } 158 }
161 159
162#ifdef CONFIG_SMP
163 cuid = cpu_data(cpu).compute_unit_id; 160 cuid = cpu_data(cpu).compute_unit_id;
164#endif
165 mask <<= 4 * cuid; 161 mask <<= 4 * cuid;
166 mask |= (0xf ^ (1 << cuid)) << 26; 162 mask |= (0xf ^ (1 << cuid)) << 26;
167 163
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 3d2661ca6542..6e76c191a835 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -88,13 +88,13 @@ static u32 __init allocate_aperture(void)
88 */ 88 */
89 addr = memblock_find_in_range(GART_MIN_ADDR, GART_MAX_ADDR, 89 addr = memblock_find_in_range(GART_MIN_ADDR, GART_MAX_ADDR,
90 aper_size, aper_size); 90 aper_size, aper_size);
91 if (addr == MEMBLOCK_ERROR || addr + aper_size > GART_MAX_ADDR) { 91 if (!addr || addr + aper_size > GART_MAX_ADDR) {
92 printk(KERN_ERR 92 printk(KERN_ERR
93 "Cannot allocate aperture memory hole (%lx,%uK)\n", 93 "Cannot allocate aperture memory hole (%lx,%uK)\n",
94 addr, aper_size>>10); 94 addr, aper_size>>10);
95 return 0; 95 return 0;
96 } 96 }
97 memblock_x86_reserve_range(addr, addr + aper_size, "aperture64"); 97 memblock_reserve(addr, aper_size);
98 /* 98 /*
99 * Kmemleak should not scan this block as it may not be mapped via the 99 * Kmemleak should not scan this block as it may not be mapped via the
100 * kernel direct mapping. 100 * kernel direct mapping.
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index 767fd04f2843..0ae0323b1f9c 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -10,6 +10,7 @@ obj-$(CONFIG_SMP) += ipi.o
10 10
11ifeq ($(CONFIG_X86_64),y) 11ifeq ($(CONFIG_X86_64),y)
12# APIC probe will depend on the listing order here 12# APIC probe will depend on the listing order here
13obj-$(CONFIG_X86_NUMACHIP) += apic_numachip.o
13obj-$(CONFIG_X86_UV) += x2apic_uv_x.o 14obj-$(CONFIG_X86_UV) += x2apic_uv_x.o
14obj-$(CONFIG_X86_X2APIC) += x2apic_phys.o 15obj-$(CONFIG_X86_X2APIC) += x2apic_phys.o
15obj-$(CONFIG_X86_X2APIC) += x2apic_cluster.o 16obj-$(CONFIG_X86_X2APIC) += x2apic_cluster.o
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index f98d84caf94c..2eec05b6d1b8 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -146,16 +146,26 @@ __setup("apicpmtimer", setup_apicpmtimer);
146int x2apic_mode; 146int x2apic_mode;
147#ifdef CONFIG_X86_X2APIC 147#ifdef CONFIG_X86_X2APIC
148/* x2apic enabled before OS handover */ 148/* x2apic enabled before OS handover */
149static int x2apic_preenabled; 149int x2apic_preenabled;
150static int x2apic_disabled;
151static int nox2apic;
150static __init int setup_nox2apic(char *str) 152static __init int setup_nox2apic(char *str)
151{ 153{
152 if (x2apic_enabled()) { 154 if (x2apic_enabled()) {
153 pr_warning("Bios already enabled x2apic, " 155 int apicid = native_apic_msr_read(APIC_ID);
154 "can't enforce nox2apic"); 156
155 return 0; 157 if (apicid >= 255) {
156 } 158 pr_warning("Apicid: %08x, cannot enforce nox2apic\n",
159 apicid);
160 return 0;
161 }
162
163 pr_warning("x2apic already enabled. will disable it\n");
164 } else
165 setup_clear_cpu_cap(X86_FEATURE_X2APIC);
166
167 nox2apic = 1;
157 168
158 setup_clear_cpu_cap(X86_FEATURE_X2APIC);
159 return 0; 169 return 0;
160} 170}
161early_param("nox2apic", setup_nox2apic); 171early_param("nox2apic", setup_nox2apic);
@@ -250,6 +260,7 @@ u32 native_safe_apic_wait_icr_idle(void)
250 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; 260 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
251 if (!send_status) 261 if (!send_status)
252 break; 262 break;
263 inc_irq_stat(icr_read_retry_count);
253 udelay(100); 264 udelay(100);
254 } while (timeout++ < 1000); 265 } while (timeout++ < 1000);
255 266
@@ -876,8 +887,8 @@ void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
876 * Besides, if we don't timer interrupts ignore the global 887 * Besides, if we don't timer interrupts ignore the global
877 * interrupt lock, which is the WrongThing (tm) to do. 888 * interrupt lock, which is the WrongThing (tm) to do.
878 */ 889 */
879 exit_idle();
880 irq_enter(); 890 irq_enter();
891 exit_idle();
881 local_apic_timer_interrupt(); 892 local_apic_timer_interrupt();
882 irq_exit(); 893 irq_exit();
883 894
@@ -1431,6 +1442,45 @@ void __init bsp_end_local_APIC_setup(void)
1431} 1442}
1432 1443
1433#ifdef CONFIG_X86_X2APIC 1444#ifdef CONFIG_X86_X2APIC
1445/*
1446 * Need to disable xapic and x2apic at the same time and then enable xapic mode
1447 */
1448static inline void __disable_x2apic(u64 msr)
1449{
1450 wrmsrl(MSR_IA32_APICBASE,
1451 msr & ~(X2APIC_ENABLE | XAPIC_ENABLE));
1452 wrmsrl(MSR_IA32_APICBASE, msr & ~X2APIC_ENABLE);
1453}
1454
1455static __init void disable_x2apic(void)
1456{
1457 u64 msr;
1458
1459 if (!cpu_has_x2apic)
1460 return;
1461
1462 rdmsrl(MSR_IA32_APICBASE, msr);
1463 if (msr & X2APIC_ENABLE) {
1464 u32 x2apic_id = read_apic_id();
1465
1466 if (x2apic_id >= 255)
1467 panic("Cannot disable x2apic, id: %08x\n", x2apic_id);
1468
1469 pr_info("Disabling x2apic\n");
1470 __disable_x2apic(msr);
1471
1472 if (nox2apic) {
1473 clear_cpu_cap(&cpu_data(0), X86_FEATURE_X2APIC);
1474 setup_clear_cpu_cap(X86_FEATURE_X2APIC);
1475 }
1476
1477 x2apic_disabled = 1;
1478 x2apic_mode = 0;
1479
1480 register_lapic_address(mp_lapic_addr);
1481 }
1482}
1483
1434void check_x2apic(void) 1484void check_x2apic(void)
1435{ 1485{
1436 if (x2apic_enabled()) { 1486 if (x2apic_enabled()) {
@@ -1441,15 +1491,20 @@ void check_x2apic(void)
1441 1491
1442void enable_x2apic(void) 1492void enable_x2apic(void)
1443{ 1493{
1444 int msr, msr2; 1494 u64 msr;
1495
1496 rdmsrl(MSR_IA32_APICBASE, msr);
1497 if (x2apic_disabled) {
1498 __disable_x2apic(msr);
1499 return;
1500 }
1445 1501
1446 if (!x2apic_mode) 1502 if (!x2apic_mode)
1447 return; 1503 return;
1448 1504
1449 rdmsr(MSR_IA32_APICBASE, msr, msr2);
1450 if (!(msr & X2APIC_ENABLE)) { 1505 if (!(msr & X2APIC_ENABLE)) {
1451 printk_once(KERN_INFO "Enabling x2apic\n"); 1506 printk_once(KERN_INFO "Enabling x2apic\n");
1452 wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, msr2); 1507 wrmsrl(MSR_IA32_APICBASE, msr | X2APIC_ENABLE);
1453 } 1508 }
1454} 1509}
1455#endif /* CONFIG_X86_X2APIC */ 1510#endif /* CONFIG_X86_X2APIC */
@@ -1486,25 +1541,34 @@ void __init enable_IR_x2apic(void)
1486 ret = save_ioapic_entries(); 1541 ret = save_ioapic_entries();
1487 if (ret) { 1542 if (ret) {
1488 pr_info("Saving IO-APIC state failed: %d\n", ret); 1543 pr_info("Saving IO-APIC state failed: %d\n", ret);
1489 goto out; 1544 return;
1490 } 1545 }
1491 1546
1492 local_irq_save(flags); 1547 local_irq_save(flags);
1493 legacy_pic->mask_all(); 1548 legacy_pic->mask_all();
1494 mask_ioapic_entries(); 1549 mask_ioapic_entries();
1495 1550
1551 if (x2apic_preenabled && nox2apic)
1552 disable_x2apic();
1553
1496 if (dmar_table_init_ret) 1554 if (dmar_table_init_ret)
1497 ret = -1; 1555 ret = -1;
1498 else 1556 else
1499 ret = enable_IR(); 1557 ret = enable_IR();
1500 1558
1559 if (!x2apic_supported())
1560 goto skip_x2apic;
1561
1501 if (ret < 0) { 1562 if (ret < 0) {
1502 /* IR is required if there is APIC ID > 255 even when running 1563 /* IR is required if there is APIC ID > 255 even when running
1503 * under KVM 1564 * under KVM
1504 */ 1565 */
1505 if (max_physical_apicid > 255 || 1566 if (max_physical_apicid > 255 ||
1506 !hypervisor_x2apic_available()) 1567 !hypervisor_x2apic_available()) {
1507 goto nox2apic; 1568 if (x2apic_preenabled)
1569 disable_x2apic();
1570 goto skip_x2apic;
1571 }
1508 /* 1572 /*
1509 * without IR all CPUs can be addressed by IOAPIC/MSI 1573 * without IR all CPUs can be addressed by IOAPIC/MSI
1510 * only in physical mode 1574 * only in physical mode
@@ -1512,8 +1576,10 @@ void __init enable_IR_x2apic(void)
1512 x2apic_force_phys(); 1576 x2apic_force_phys();
1513 } 1577 }
1514 1578
1515 if (ret == IRQ_REMAP_XAPIC_MODE) 1579 if (ret == IRQ_REMAP_XAPIC_MODE) {
1516 goto nox2apic; 1580 pr_info("x2apic not enabled, IRQ remapping is in xapic mode\n");
1581 goto skip_x2apic;
1582 }
1517 1583
1518 x2apic_enabled = 1; 1584 x2apic_enabled = 1;
1519 1585
@@ -1523,22 +1589,11 @@ void __init enable_IR_x2apic(void)
1523 pr_info("Enabled x2apic\n"); 1589 pr_info("Enabled x2apic\n");
1524 } 1590 }
1525 1591
1526nox2apic: 1592skip_x2apic:
1527 if (ret < 0) /* IR enabling failed */ 1593 if (ret < 0) /* IR enabling failed */
1528 restore_ioapic_entries(); 1594 restore_ioapic_entries();
1529 legacy_pic->restore_mask(); 1595 legacy_pic->restore_mask();
1530 local_irq_restore(flags); 1596 local_irq_restore(flags);
1531
1532out:
1533 if (x2apic_enabled || !x2apic_supported())
1534 return;
1535
1536 if (x2apic_preenabled)
1537 panic("x2apic: enabled by BIOS but kernel init failed.");
1538 else if (ret == IRQ_REMAP_XAPIC_MODE)
1539 pr_info("x2apic not enabled, IRQ remapping is in xapic mode\n");
1540 else if (ret < 0)
1541 pr_info("x2apic not enabled, IRQ remapping init failed\n");
1542} 1597}
1543 1598
1544#ifdef CONFIG_X86_64 1599#ifdef CONFIG_X86_64
@@ -1809,8 +1864,8 @@ void smp_spurious_interrupt(struct pt_regs *regs)
1809{ 1864{
1810 u32 v; 1865 u32 v;
1811 1866
1812 exit_idle();
1813 irq_enter(); 1867 irq_enter();
1868 exit_idle();
1814 /* 1869 /*
1815 * Check if this really is a spurious interrupt and ACK it 1870 * Check if this really is a spurious interrupt and ACK it
1816 * if it is a vectored one. Just in case... 1871 * if it is a vectored one. Just in case...
@@ -1846,8 +1901,8 @@ void smp_error_interrupt(struct pt_regs *regs)
1846 "Illegal register address", /* APIC Error Bit 7 */ 1901 "Illegal register address", /* APIC Error Bit 7 */
1847 }; 1902 };
1848 1903
1849 exit_idle();
1850 irq_enter(); 1904 irq_enter();
1905 exit_idle();
1851 /* First tickle the hardware, only then report what went on. -- REW */ 1906 /* First tickle the hardware, only then report what went on. -- REW */
1852 v0 = apic_read(APIC_ESR); 1907 v0 = apic_read(APIC_ESR);
1853 apic_write(APIC_ESR, 0); 1908 apic_write(APIC_ESR, 0);
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index f7a41e4cae47..8c3cdded6f2b 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -62,7 +62,7 @@ static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask)
62 * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel 62 * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
63 * document number 292116). So here it goes... 63 * document number 292116). So here it goes...
64 */ 64 */
65static void flat_init_apic_ldr(void) 65void flat_init_apic_ldr(void)
66{ 66{
67 unsigned long val; 67 unsigned long val;
68 unsigned long num, id; 68 unsigned long num, id;
@@ -171,9 +171,14 @@ static int flat_phys_pkg_id(int initial_apic_id, int index_msb)
171 return initial_apic_id >> index_msb; 171 return initial_apic_id >> index_msb;
172} 172}
173 173
174static int flat_probe(void)
175{
176 return 1;
177}
178
174static struct apic apic_flat = { 179static struct apic apic_flat = {
175 .name = "flat", 180 .name = "flat",
176 .probe = NULL, 181 .probe = flat_probe,
177 .acpi_madt_oem_check = flat_acpi_madt_oem_check, 182 .acpi_madt_oem_check = flat_acpi_madt_oem_check,
178 .apic_id_registered = flat_apic_id_registered, 183 .apic_id_registered = flat_apic_id_registered,
179 184
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
new file mode 100644
index 000000000000..09d3d8c1cd99
--- /dev/null
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -0,0 +1,294 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Numascale NumaConnect-Specific APIC Code
7 *
8 * Copyright (C) 2011 Numascale AS. All rights reserved.
9 *
10 * Send feedback to <support@numascale.com>
11 *
12 */
13
14#include <linux/errno.h>
15#include <linux/threads.h>
16#include <linux/cpumask.h>
17#include <linux/string.h>
18#include <linux/kernel.h>
19#include <linux/module.h>
20#include <linux/ctype.h>
21#include <linux/init.h>
22#include <linux/hardirq.h>
23#include <linux/delay.h>
24
25#include <asm/numachip/numachip_csr.h>
26#include <asm/smp.h>
27#include <asm/apic.h>
28#include <asm/ipi.h>
29#include <asm/apic_flat_64.h>
30
31static int numachip_system __read_mostly;
32
33static struct apic apic_numachip __read_mostly;
34
35static unsigned int get_apic_id(unsigned long x)
36{
37 unsigned long value;
38 unsigned int id;
39
40 rdmsrl(MSR_FAM10H_NODE_ID, value);
41 id = ((x >> 24) & 0xffU) | ((value << 2) & 0x3f00U);
42
43 return id;
44}
45
46static unsigned long set_apic_id(unsigned int id)
47{
48 unsigned long x;
49
50 x = ((id & 0xffU) << 24);
51 return x;
52}
53
54static unsigned int read_xapic_id(void)
55{
56 return get_apic_id(apic_read(APIC_ID));
57}
58
59static int numachip_apic_id_registered(void)
60{
61 return physid_isset(read_xapic_id(), phys_cpu_present_map);
62}
63
64static int numachip_phys_pkg_id(int initial_apic_id, int index_msb)
65{
66 return initial_apic_id >> index_msb;
67}
68
69static const struct cpumask *numachip_target_cpus(void)
70{
71 return cpu_online_mask;
72}
73
74static void numachip_vector_allocation_domain(int cpu, struct cpumask *retmask)
75{
76 cpumask_clear(retmask);
77 cpumask_set_cpu(cpu, retmask);
78}
79
80static int __cpuinit numachip_wakeup_secondary(int phys_apicid, unsigned long start_rip)
81{
82 union numachip_csr_g3_ext_irq_gen int_gen;
83
84 int_gen.s._destination_apic_id = phys_apicid;
85 int_gen.s._vector = 0;
86 int_gen.s._msgtype = APIC_DM_INIT >> 8;
87 int_gen.s._index = 0;
88
89 write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v);
90
91 int_gen.s._msgtype = APIC_DM_STARTUP >> 8;
92 int_gen.s._vector = start_rip >> 12;
93
94 write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v);
95
96 atomic_set(&init_deasserted, 1);
97 return 0;
98}
99
100static void numachip_send_IPI_one(int cpu, int vector)
101{
102 union numachip_csr_g3_ext_irq_gen int_gen;
103 int apicid = per_cpu(x86_cpu_to_apicid, cpu);
104
105 int_gen.s._destination_apic_id = apicid;
106 int_gen.s._vector = vector;
107 int_gen.s._msgtype = (vector == NMI_VECTOR ? APIC_DM_NMI : APIC_DM_FIXED) >> 8;
108 int_gen.s._index = 0;
109
110 write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v);
111}
112
113static void numachip_send_IPI_mask(const struct cpumask *mask, int vector)
114{
115 unsigned int cpu;
116
117 for_each_cpu(cpu, mask)
118 numachip_send_IPI_one(cpu, vector);
119}
120
121static void numachip_send_IPI_mask_allbutself(const struct cpumask *mask,
122 int vector)
123{
124 unsigned int this_cpu = smp_processor_id();
125 unsigned int cpu;
126
127 for_each_cpu(cpu, mask) {
128 if (cpu != this_cpu)
129 numachip_send_IPI_one(cpu, vector);
130 }
131}
132
133static void numachip_send_IPI_allbutself(int vector)
134{
135 unsigned int this_cpu = smp_processor_id();
136 unsigned int cpu;
137
138 for_each_online_cpu(cpu) {
139 if (cpu != this_cpu)
140 numachip_send_IPI_one(cpu, vector);
141 }
142}
143
144static void numachip_send_IPI_all(int vector)
145{
146 numachip_send_IPI_mask(cpu_online_mask, vector);
147}
148
149static void numachip_send_IPI_self(int vector)
150{
151 __default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
152}
153
154static unsigned int numachip_cpu_mask_to_apicid(const struct cpumask *cpumask)
155{
156 int cpu;
157
158 /*
159 * We're using fixed IRQ delivery, can only return one phys APIC ID.
160 * May as well be the first.
161 */
162 cpu = cpumask_first(cpumask);
163 if (likely((unsigned)cpu < nr_cpu_ids))
164 return per_cpu(x86_cpu_to_apicid, cpu);
165
166 return BAD_APICID;
167}
168
169static unsigned int
170numachip_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
171 const struct cpumask *andmask)
172{
173 int cpu;
174
175 /*
176 * We're using fixed IRQ delivery, can only return one phys APIC ID.
177 * May as well be the first.
178 */
179 for_each_cpu_and(cpu, cpumask, andmask) {
180 if (cpumask_test_cpu(cpu, cpu_online_mask))
181 break;
182 }
183 return per_cpu(x86_cpu_to_apicid, cpu);
184}
185
186static int __init numachip_probe(void)
187{
188 return apic == &apic_numachip;
189}
190
191static void __init map_csrs(void)
192{
193 printk(KERN_INFO "NumaChip: Mapping local CSR space (%016llx - %016llx)\n",
194 NUMACHIP_LCSR_BASE, NUMACHIP_LCSR_BASE + NUMACHIP_LCSR_SIZE - 1);
195 init_extra_mapping_uc(NUMACHIP_LCSR_BASE, NUMACHIP_LCSR_SIZE);
196
197 printk(KERN_INFO "NumaChip: Mapping global CSR space (%016llx - %016llx)\n",
198 NUMACHIP_GCSR_BASE, NUMACHIP_GCSR_BASE + NUMACHIP_GCSR_SIZE - 1);
199 init_extra_mapping_uc(NUMACHIP_GCSR_BASE, NUMACHIP_GCSR_SIZE);
200}
201
202static void fixup_cpu_id(struct cpuinfo_x86 *c, int node)
203{
204 c->phys_proc_id = node;
205 per_cpu(cpu_llc_id, smp_processor_id()) = node;
206}
207
208static int __init numachip_system_init(void)
209{
210 unsigned int val;
211
212 if (!numachip_system)
213 return 0;
214
215 x86_cpuinit.fixup_cpu_id = fixup_cpu_id;
216
217 map_csrs();
218
219 val = read_lcsr(CSR_G0_NODE_IDS);
220 printk(KERN_INFO "NumaChip: Local NodeID = %08x\n", val);
221
222 return 0;
223}
224early_initcall(numachip_system_init);
225
226static int numachip_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
227{
228 if (!strncmp(oem_id, "NUMASC", 6)) {
229 numachip_system = 1;
230 return 1;
231 }
232
233 return 0;
234}
235
236static struct apic apic_numachip __refconst = {
237
238 .name = "NumaConnect system",
239 .probe = numachip_probe,
240 .acpi_madt_oem_check = numachip_acpi_madt_oem_check,
241 .apic_id_registered = numachip_apic_id_registered,
242
243 .irq_delivery_mode = dest_Fixed,
244 .irq_dest_mode = 0, /* physical */
245
246 .target_cpus = numachip_target_cpus,
247 .disable_esr = 0,
248 .dest_logical = 0,
249 .check_apicid_used = NULL,
250 .check_apicid_present = NULL,
251
252 .vector_allocation_domain = numachip_vector_allocation_domain,
253 .init_apic_ldr = flat_init_apic_ldr,
254
255 .ioapic_phys_id_map = NULL,
256 .setup_apic_routing = NULL,
257 .multi_timer_check = NULL,
258 .cpu_present_to_apicid = default_cpu_present_to_apicid,
259 .apicid_to_cpu_present = NULL,
260 .setup_portio_remap = NULL,
261 .check_phys_apicid_present = default_check_phys_apicid_present,
262 .enable_apic_mode = NULL,
263 .phys_pkg_id = numachip_phys_pkg_id,
264 .mps_oem_check = NULL,
265
266 .get_apic_id = get_apic_id,
267 .set_apic_id = set_apic_id,
268 .apic_id_mask = 0xffU << 24,
269
270 .cpu_mask_to_apicid = numachip_cpu_mask_to_apicid,
271 .cpu_mask_to_apicid_and = numachip_cpu_mask_to_apicid_and,
272
273 .send_IPI_mask = numachip_send_IPI_mask,
274 .send_IPI_mask_allbutself = numachip_send_IPI_mask_allbutself,
275 .send_IPI_allbutself = numachip_send_IPI_allbutself,
276 .send_IPI_all = numachip_send_IPI_all,
277 .send_IPI_self = numachip_send_IPI_self,
278
279 .wakeup_secondary_cpu = numachip_wakeup_secondary,
280 .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
281 .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
282 .wait_for_init_deassert = NULL,
283 .smp_callin_clear_local_apic = NULL,
284 .inquire_remote_apic = NULL, /* REMRD not supported */
285
286 .read = native_apic_mem_read,
287 .write = native_apic_mem_write,
288 .icr_read = native_apic_icr_read,
289 .icr_write = native_apic_icr_write,
290 .wait_icr_idle = native_apic_wait_icr_idle,
291 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
292};
293apic_driver(apic_numachip);
294
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 6d939d7847e2..fb072754bc1d 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2421,8 +2421,8 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
2421 unsigned vector, me; 2421 unsigned vector, me;
2422 2422
2423 ack_APIC_irq(); 2423 ack_APIC_irq();
2424 exit_idle();
2425 irq_enter(); 2424 irq_enter();
2425 exit_idle();
2426 2426
2427 me = smp_processor_id(); 2427 me = smp_processor_id();
2428 for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { 2428 for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
@@ -2948,6 +2948,10 @@ static inline void __init check_timer(void)
2948 } 2948 }
2949 local_irq_disable(); 2949 local_irq_disable();
2950 apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); 2950 apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
2951 if (x2apic_preenabled)
2952 apic_printk(APIC_QUIET, KERN_INFO
2953 "Perhaps problem with the pre-enabled x2apic mode\n"
2954 "Try booting with x2apic and interrupt-remapping disabled in the bios.\n");
2951 panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " 2955 panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a "
2952 "report. Then try booting with the 'noapic' option.\n"); 2956 "report. Then try booting with the 'noapic' option.\n");
2953out: 2957out:
diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c
index 452932d34730..5da1269e8ddc 100644
--- a/arch/x86/kernel/check.c
+++ b/arch/x86/kernel/check.c
@@ -62,7 +62,8 @@ early_param("memory_corruption_check_size", set_corruption_check_size);
62 62
63void __init setup_bios_corruption_check(void) 63void __init setup_bios_corruption_check(void)
64{ 64{
65 u64 addr = PAGE_SIZE; /* assume first page is reserved anyway */ 65 phys_addr_t start, end;
66 u64 i;
66 67
67 if (memory_corruption_check == -1) { 68 if (memory_corruption_check == -1) {
68 memory_corruption_check = 69 memory_corruption_check =
@@ -82,28 +83,23 @@ void __init setup_bios_corruption_check(void)
82 83
83 corruption_check_size = round_up(corruption_check_size, PAGE_SIZE); 84 corruption_check_size = round_up(corruption_check_size, PAGE_SIZE);
84 85
85 while (addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) { 86 for_each_free_mem_range(i, MAX_NUMNODES, &start, &end, NULL) {
86 u64 size; 87 start = clamp_t(phys_addr_t, round_up(start, PAGE_SIZE),
87 addr = memblock_x86_find_in_range_size(addr, &size, PAGE_SIZE); 88 PAGE_SIZE, corruption_check_size);
89 end = clamp_t(phys_addr_t, round_down(end, PAGE_SIZE),
90 PAGE_SIZE, corruption_check_size);
91 if (start >= end)
92 continue;
88 93
89 if (addr == MEMBLOCK_ERROR) 94 memblock_reserve(start, end - start);
90 break; 95 scan_areas[num_scan_areas].addr = start;
91 96 scan_areas[num_scan_areas].size = end - start;
92 if (addr >= corruption_check_size)
93 break;
94
95 if ((addr + size) > corruption_check_size)
96 size = corruption_check_size - addr;
97
98 memblock_x86_reserve_range(addr, addr + size, "SCAN RAM");
99 scan_areas[num_scan_areas].addr = addr;
100 scan_areas[num_scan_areas].size = size;
101 num_scan_areas++;
102 97
103 /* Assume we've already mapped this early memory */ 98 /* Assume we've already mapped this early memory */
104 memset(__va(addr), 0, size); 99 memset(__va(start), 0, end - start);
105 100
106 addr += size; 101 if (++num_scan_areas >= MAX_SCAN_AREAS)
102 break;
107 } 103 }
108 104
109 if (num_scan_areas) 105 if (num_scan_areas)
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 0bab2b18bb20..f4773f4aae35 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -148,7 +148,6 @@ static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c)
148 148
149static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c) 149static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c)
150{ 150{
151#ifdef CONFIG_SMP
152 /* calling is from identify_secondary_cpu() ? */ 151 /* calling is from identify_secondary_cpu() ? */
153 if (!c->cpu_index) 152 if (!c->cpu_index)
154 return; 153 return;
@@ -192,7 +191,6 @@ static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c)
192 191
193valid_k7: 192valid_k7:
194 ; 193 ;
195#endif
196} 194}
197 195
198static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) 196static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c)
@@ -353,6 +351,13 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
353 if (node == NUMA_NO_NODE) 351 if (node == NUMA_NO_NODE)
354 node = per_cpu(cpu_llc_id, cpu); 352 node = per_cpu(cpu_llc_id, cpu);
355 353
354 /*
355 * If core numbers are inconsistent, it's likely a multi-fabric platform,
356 * so invoke platform-specific handler
357 */
358 if (c->phys_proc_id != node)
359 x86_cpuinit.fixup_cpu_id(c, node);
360
356 if (!node_online(node)) { 361 if (!node_online(node)) {
357 /* 362 /*
358 * Two possibilities here: 363 * Two possibilities here:
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index e58d978e0758..159103c0b1f4 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -278,7 +278,7 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c)
278 } 278 }
279#ifdef CONFIG_X86_32 279#ifdef CONFIG_X86_32
280 /* Cyrix III family needs CX8 & PGE explicitly enabled. */ 280 /* Cyrix III family needs CX8 & PGE explicitly enabled. */
281 if (c->x86_model >= 6 && c->x86_model <= 9) { 281 if (c->x86_model >= 6 && c->x86_model <= 13) {
282 rdmsr(MSR_VIA_FCR, lo, hi); 282 rdmsr(MSR_VIA_FCR, lo, hi);
283 lo |= (1<<1 | 1<<7); 283 lo |= (1<<1 | 1<<7);
284 wrmsr(MSR_VIA_FCR, lo, hi); 284 wrmsr(MSR_VIA_FCR, lo, hi);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index aa003b13a831..850f2963a420 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -676,9 +676,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
676 if (this_cpu->c_early_init) 676 if (this_cpu->c_early_init)
677 this_cpu->c_early_init(c); 677 this_cpu->c_early_init(c);
678 678
679#ifdef CONFIG_SMP
680 c->cpu_index = 0; 679 c->cpu_index = 0;
681#endif
682 filter_cpuid_features(c, false); 680 filter_cpuid_features(c, false);
683 681
684 setup_smep(c); 682 setup_smep(c);
@@ -764,10 +762,7 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
764 c->apicid = c->initial_apicid; 762 c->apicid = c->initial_apicid;
765# endif 763# endif
766#endif 764#endif
767
768#ifdef CONFIG_X86_HT
769 c->phys_proc_id = c->initial_apicid; 765 c->phys_proc_id = c->initial_apicid;
770#endif
771 } 766 }
772 767
773 setup_smep(c); 768 setup_smep(c);
@@ -1141,6 +1136,15 @@ static void dbg_restore_debug_regs(void)
1141#endif /* ! CONFIG_KGDB */ 1136#endif /* ! CONFIG_KGDB */
1142 1137
1143/* 1138/*
1139 * Prints an error where the NUMA and configured core-number mismatch and the
1140 * platform didn't override this to fix it up
1141 */
1142void __cpuinit x86_default_fixup_cpu_id(struct cpuinfo_x86 *c, int node)
1143{
1144 pr_err("NUMA core number %d differs from configured core number %d\n", node, c->phys_proc_id);
1145}
1146
1147/*
1144 * cpu_init() initializes state that is per-CPU. Some data is already 1148 * cpu_init() initializes state that is per-CPU. Some data is already
1145 * initialized (naturally) in the bootstrap process, such as the GDT 1149 * initialized (naturally) in the bootstrap process, such as the GDT
1146 * and IDT. We reload them nevertheless, this function acts as a 1150 * and IDT. We reload them nevertheless, this function acts as a
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 1b22dcc51af4..8bacc7826fb3 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -1,5 +1,4 @@
1#ifndef ARCH_X86_CPU_H 1#ifndef ARCH_X86_CPU_H
2
3#define ARCH_X86_CPU_H 2#define ARCH_X86_CPU_H
4 3
5struct cpu_model_info { 4struct cpu_model_info {
@@ -35,6 +34,4 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[],
35 34
36extern void get_cpu_cap(struct cpuinfo_x86 *c); 35extern void get_cpu_cap(struct cpuinfo_x86 *c);
37extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); 36extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
38extern void get_cpu_cap(struct cpuinfo_x86 *c); 37#endif /* ARCH_X86_CPU_H */
39
40#endif
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 523131213f08..3e6ff6cbf42a 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -181,7 +181,6 @@ static void __cpuinit trap_init_f00f_bug(void)
181 181
182static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c) 182static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c)
183{ 183{
184#ifdef CONFIG_SMP
185 /* calling is from identify_secondary_cpu() ? */ 184 /* calling is from identify_secondary_cpu() ? */
186 if (!c->cpu_index) 185 if (!c->cpu_index)
187 return; 186 return;
@@ -198,7 +197,6 @@ static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c)
198 WARN_ONCE(1, "WARNING: SMP operation may be unreliable" 197 WARN_ONCE(1, "WARNING: SMP operation may be unreliable"
199 "with B stepping processors.\n"); 198 "with B stepping processors.\n");
200 } 199 }
201#endif
202} 200}
203 201
204static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) 202static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 2af127d4c3d1..e9c9d0aab36a 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -119,9 +119,7 @@ void mce_setup(struct mce *m)
119 m->time = get_seconds(); 119 m->time = get_seconds();
120 m->cpuvendor = boot_cpu_data.x86_vendor; 120 m->cpuvendor = boot_cpu_data.x86_vendor;
121 m->cpuid = cpuid_eax(1); 121 m->cpuid = cpuid_eax(1);
122#ifdef CONFIG_SMP
123 m->socketid = cpu_data(m->extcpu).phys_proc_id; 122 m->socketid = cpu_data(m->extcpu).phys_proc_id;
124#endif
125 m->apicid = cpu_data(m->extcpu).initial_apicid; 123 m->apicid = cpu_data(m->extcpu).initial_apicid;
126 rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap); 124 rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap);
127} 125}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index f5474218cffe..1d76872b6a45 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -64,11 +64,9 @@ struct threshold_bank {
64}; 64};
65static DEFINE_PER_CPU(struct threshold_bank * [NR_BANKS], threshold_banks); 65static DEFINE_PER_CPU(struct threshold_bank * [NR_BANKS], threshold_banks);
66 66
67#ifdef CONFIG_SMP
68static unsigned char shared_bank[NR_BANKS] = { 67static unsigned char shared_bank[NR_BANKS] = {
69 0, 0, 0, 0, 1 68 0, 0, 0, 0, 1
70}; 69};
71#endif
72 70
73static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */ 71static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */
74 72
@@ -202,10 +200,9 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
202 200
203 if (!block) 201 if (!block)
204 per_cpu(bank_map, cpu) |= (1 << bank); 202 per_cpu(bank_map, cpu) |= (1 << bank);
205#ifdef CONFIG_SMP
206 if (shared_bank[bank] && c->cpu_core_id) 203 if (shared_bank[bank] && c->cpu_core_id)
207 break; 204 break;
208#endif 205
209 offset = setup_APIC_mce(offset, 206 offset = setup_APIC_mce(offset,
210 (high & MASK_LVTOFF_HI) >> 20); 207 (high & MASK_LVTOFF_HI) >> 20);
211 208
@@ -531,7 +528,6 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
531 528
532 sprintf(name, "threshold_bank%i", bank); 529 sprintf(name, "threshold_bank%i", bank);
533 530
534#ifdef CONFIG_SMP
535 if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ 531 if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */
536 i = cpumask_first(cpu_llc_shared_mask(cpu)); 532 i = cpumask_first(cpu_llc_shared_mask(cpu));
537 533
@@ -558,7 +554,6 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
558 554
559 goto out; 555 goto out;
560 } 556 }
561#endif
562 557
563 b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL); 558 b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL);
564 if (!b) { 559 if (!b) {
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 787e06c84ea6..ce215616d5b9 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -397,8 +397,8 @@ static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;
397 397
398asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) 398asmlinkage void smp_thermal_interrupt(struct pt_regs *regs)
399{ 399{
400 exit_idle();
401 irq_enter(); 400 irq_enter();
401 exit_idle();
402 inc_irq_stat(irq_thermal_count); 402 inc_irq_stat(irq_thermal_count);
403 smp_thermal_vector(); 403 smp_thermal_vector();
404 irq_exit(); 404 irq_exit();
diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c
index d746df2909c9..aa578cadb940 100644
--- a/arch/x86/kernel/cpu/mcheck/threshold.c
+++ b/arch/x86/kernel/cpu/mcheck/threshold.c
@@ -19,8 +19,8 @@ void (*mce_threshold_vector)(void) = default_threshold_interrupt;
19 19
20asmlinkage void smp_threshold_interrupt(void) 20asmlinkage void smp_threshold_interrupt(void)
21{ 21{
22 exit_idle();
23 irq_enter(); 22 irq_enter();
23 exit_idle();
24 inc_irq_stat(irq_threshold_count); 24 inc_irq_stat(irq_threshold_count);
25 mce_threshold_vector(); 25 mce_threshold_vector();
26 irq_exit(); 26 irq_exit();
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 2bda212a0010..5adce1040b11 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -484,18 +484,195 @@ static inline int is_x86_event(struct perf_event *event)
484 return event->pmu == &pmu; 484 return event->pmu == &pmu;
485} 485}
486 486
487/*
488 * Event scheduler state:
489 *
490 * Assign events iterating over all events and counters, beginning
491 * with events with least weights first. Keep the current iterator
492 * state in struct sched_state.
493 */
494struct sched_state {
495 int weight;
496 int event; /* event index */
497 int counter; /* counter index */
498 int unassigned; /* number of events to be assigned left */
499 unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
500};
501
502/* Total max is X86_PMC_IDX_MAX, but we are O(n!) limited */
503#define SCHED_STATES_MAX 2
504
505struct perf_sched {
506 int max_weight;
507 int max_events;
508 struct event_constraint **constraints;
509 struct sched_state state;
510 int saved_states;
511 struct sched_state saved[SCHED_STATES_MAX];
512};
513
514/*
515 * Initialize interator that runs through all events and counters.
516 */
517static void perf_sched_init(struct perf_sched *sched, struct event_constraint **c,
518 int num, int wmin, int wmax)
519{
520 int idx;
521
522 memset(sched, 0, sizeof(*sched));
523 sched->max_events = num;
524 sched->max_weight = wmax;
525 sched->constraints = c;
526
527 for (idx = 0; idx < num; idx++) {
528 if (c[idx]->weight == wmin)
529 break;
530 }
531
532 sched->state.event = idx; /* start with min weight */
533 sched->state.weight = wmin;
534 sched->state.unassigned = num;
535}
536
537static void perf_sched_save_state(struct perf_sched *sched)
538{
539 if (WARN_ON_ONCE(sched->saved_states >= SCHED_STATES_MAX))
540 return;
541
542 sched->saved[sched->saved_states] = sched->state;
543 sched->saved_states++;
544}
545
546static bool perf_sched_restore_state(struct perf_sched *sched)
547{
548 if (!sched->saved_states)
549 return false;
550
551 sched->saved_states--;
552 sched->state = sched->saved[sched->saved_states];
553
554 /* continue with next counter: */
555 clear_bit(sched->state.counter++, sched->state.used);
556
557 return true;
558}
559
560/*
561 * Select a counter for the current event to schedule. Return true on
562 * success.
563 */
564static bool __perf_sched_find_counter(struct perf_sched *sched)
565{
566 struct event_constraint *c;
567 int idx;
568
569 if (!sched->state.unassigned)
570 return false;
571
572 if (sched->state.event >= sched->max_events)
573 return false;
574
575 c = sched->constraints[sched->state.event];
576
577 /* Prefer fixed purpose counters */
578 if (x86_pmu.num_counters_fixed) {
579 idx = X86_PMC_IDX_FIXED;
580 for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_MAX) {
581 if (!__test_and_set_bit(idx, sched->state.used))
582 goto done;
583 }
584 }
585 /* Grab the first unused counter starting with idx */
586 idx = sched->state.counter;
587 for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_FIXED) {
588 if (!__test_and_set_bit(idx, sched->state.used))
589 goto done;
590 }
591
592 return false;
593
594done:
595 sched->state.counter = idx;
596
597 if (c->overlap)
598 perf_sched_save_state(sched);
599
600 return true;
601}
602
603static bool perf_sched_find_counter(struct perf_sched *sched)
604{
605 while (!__perf_sched_find_counter(sched)) {
606 if (!perf_sched_restore_state(sched))
607 return false;
608 }
609
610 return true;
611}
612
613/*
614 * Go through all unassigned events and find the next one to schedule.
615 * Take events with the least weight first. Return true on success.
616 */
617static bool perf_sched_next_event(struct perf_sched *sched)
618{
619 struct event_constraint *c;
620
621 if (!sched->state.unassigned || !--sched->state.unassigned)
622 return false;
623
624 do {
625 /* next event */
626 sched->state.event++;
627 if (sched->state.event >= sched->max_events) {
628 /* next weight */
629 sched->state.event = 0;
630 sched->state.weight++;
631 if (sched->state.weight > sched->max_weight)
632 return false;
633 }
634 c = sched->constraints[sched->state.event];
635 } while (c->weight != sched->state.weight);
636
637 sched->state.counter = 0; /* start with first counter */
638
639 return true;
640}
641
642/*
643 * Assign a counter for each event.
644 */
645static int perf_assign_events(struct event_constraint **constraints, int n,
646 int wmin, int wmax, int *assign)
647{
648 struct perf_sched sched;
649
650 perf_sched_init(&sched, constraints, n, wmin, wmax);
651
652 do {
653 if (!perf_sched_find_counter(&sched))
654 break; /* failed */
655 if (assign)
656 assign[sched.state.event] = sched.state.counter;
657 } while (perf_sched_next_event(&sched));
658
659 return sched.state.unassigned;
660}
661
487int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) 662int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
488{ 663{
489 struct event_constraint *c, *constraints[X86_PMC_IDX_MAX]; 664 struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
490 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; 665 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
491 int i, j, w, wmax, num = 0; 666 int i, wmin, wmax, num = 0;
492 struct hw_perf_event *hwc; 667 struct hw_perf_event *hwc;
493 668
494 bitmap_zero(used_mask, X86_PMC_IDX_MAX); 669 bitmap_zero(used_mask, X86_PMC_IDX_MAX);
495 670
496 for (i = 0; i < n; i++) { 671 for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
497 c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]); 672 c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
498 constraints[i] = c; 673 constraints[i] = c;
674 wmin = min(wmin, c->weight);
675 wmax = max(wmax, c->weight);
499 } 676 }
500 677
501 /* 678 /*
@@ -521,60 +698,12 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
521 if (assign) 698 if (assign)
522 assign[i] = hwc->idx; 699 assign[i] = hwc->idx;
523 } 700 }
524 if (i == n)
525 goto done;
526
527 /*
528 * begin slow path
529 */
530
531 bitmap_zero(used_mask, X86_PMC_IDX_MAX);
532 701
533 /* 702 /* slow path */
534 * weight = number of possible counters 703 if (i != n)
535 * 704 num = perf_assign_events(constraints, n, wmin, wmax, assign);
536 * 1 = most constrained, only works on one counter
537 * wmax = least constrained, works on any counter
538 *
539 * assign events to counters starting with most
540 * constrained events.
541 */
542 wmax = x86_pmu.num_counters;
543 705
544 /* 706 /*
545 * when fixed event counters are present,
546 * wmax is incremented by 1 to account
547 * for one more choice
548 */
549 if (x86_pmu.num_counters_fixed)
550 wmax++;
551
552 for (w = 1, num = n; num && w <= wmax; w++) {
553 /* for each event */
554 for (i = 0; num && i < n; i++) {
555 c = constraints[i];
556 hwc = &cpuc->event_list[i]->hw;
557
558 if (c->weight != w)
559 continue;
560
561 for_each_set_bit(j, c->idxmsk, X86_PMC_IDX_MAX) {
562 if (!test_bit(j, used_mask))
563 break;
564 }
565
566 if (j == X86_PMC_IDX_MAX)
567 break;
568
569 __set_bit(j, used_mask);
570
571 if (assign)
572 assign[i] = j;
573 num--;
574 }
575 }
576done:
577 /*
578 * scheduling failed or is just a simulation, 707 * scheduling failed or is just a simulation,
579 * free resources if necessary 708 * free resources if necessary
580 */ 709 */
@@ -1119,6 +1248,7 @@ static void __init pmu_check_apic(void)
1119 1248
1120static int __init init_hw_perf_events(void) 1249static int __init init_hw_perf_events(void)
1121{ 1250{
1251 struct x86_pmu_quirk *quirk;
1122 struct event_constraint *c; 1252 struct event_constraint *c;
1123 int err; 1253 int err;
1124 1254
@@ -1147,8 +1277,8 @@ static int __init init_hw_perf_events(void)
1147 1277
1148 pr_cont("%s PMU driver.\n", x86_pmu.name); 1278 pr_cont("%s PMU driver.\n", x86_pmu.name);
1149 1279
1150 if (x86_pmu.quirks) 1280 for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next)
1151 x86_pmu.quirks(); 1281 quirk->func();
1152 1282
1153 if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { 1283 if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
1154 WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", 1284 WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
@@ -1171,12 +1301,18 @@ static int __init init_hw_perf_events(void)
1171 1301
1172 unconstrained = (struct event_constraint) 1302 unconstrained = (struct event_constraint)
1173 __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, 1303 __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
1174 0, x86_pmu.num_counters); 1304 0, x86_pmu.num_counters, 0);
1175 1305
1176 if (x86_pmu.event_constraints) { 1306 if (x86_pmu.event_constraints) {
1307 /*
1308 * event on fixed counter2 (REF_CYCLES) only works on this
1309 * counter, so do not extend mask to generic counters
1310 */
1177 for_each_event_constraint(c, x86_pmu.event_constraints) { 1311 for_each_event_constraint(c, x86_pmu.event_constraints) {
1178 if (c->cmask != X86_RAW_EVENT_MASK) 1312 if (c->cmask != X86_RAW_EVENT_MASK
1313 || c->idxmsk64 == X86_PMC_MSK_FIXED_REF_CYCLES) {
1179 continue; 1314 continue;
1315 }
1180 1316
1181 c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; 1317 c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
1182 c->weight += x86_pmu.num_counters; 1318 c->weight += x86_pmu.num_counters;
@@ -1566,3 +1702,15 @@ unsigned long perf_misc_flags(struct pt_regs *regs)
1566 1702
1567 return misc; 1703 return misc;
1568} 1704}
1705
1706void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
1707{
1708 cap->version = x86_pmu.version;
1709 cap->num_counters_gp = x86_pmu.num_counters;
1710 cap->num_counters_fixed = x86_pmu.num_counters_fixed;
1711 cap->bit_width_gp = x86_pmu.cntval_bits;
1712 cap->bit_width_fixed = x86_pmu.cntval_bits;
1713 cap->events_mask = (unsigned int)x86_pmu.events_maskl;
1714 cap->events_mask_len = x86_pmu.events_mask_len;
1715}
1716EXPORT_SYMBOL_GPL(perf_get_x86_pmu_capability);
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index b9698d40ac4b..8944062f46e2 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -45,6 +45,7 @@ struct event_constraint {
45 u64 code; 45 u64 code;
46 u64 cmask; 46 u64 cmask;
47 int weight; 47 int weight;
48 int overlap;
48}; 49};
49 50
50struct amd_nb { 51struct amd_nb {
@@ -151,15 +152,40 @@ struct cpu_hw_events {
151 void *kfree_on_online; 152 void *kfree_on_online;
152}; 153};
153 154
154#define __EVENT_CONSTRAINT(c, n, m, w) {\ 155#define __EVENT_CONSTRAINT(c, n, m, w, o) {\
155 { .idxmsk64 = (n) }, \ 156 { .idxmsk64 = (n) }, \
156 .code = (c), \ 157 .code = (c), \
157 .cmask = (m), \ 158 .cmask = (m), \
158 .weight = (w), \ 159 .weight = (w), \
160 .overlap = (o), \
159} 161}
160 162
161#define EVENT_CONSTRAINT(c, n, m) \ 163#define EVENT_CONSTRAINT(c, n, m) \
162 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) 164 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0)
165
166/*
167 * The overlap flag marks event constraints with overlapping counter
168 * masks. This is the case if the counter mask of such an event is not
169 * a subset of any other counter mask of a constraint with an equal or
170 * higher weight, e.g.:
171 *
172 * c_overlaps = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0);
173 * c_another1 = EVENT_CONSTRAINT(0, 0x07, 0);
174 * c_another2 = EVENT_CONSTRAINT(0, 0x38, 0);
175 *
176 * The event scheduler may not select the correct counter in the first
177 * cycle because it needs to know which subsequent events will be
178 * scheduled. It may fail to schedule the events then. So we set the
179 * overlap flag for such constraints to give the scheduler a hint which
180 * events to select for counter rescheduling.
181 *
182 * Care must be taken as the rescheduling algorithm is O(n!) which
183 * will increase scheduling cycles for an over-commited system
184 * dramatically. The number of such EVENT_CONSTRAINT_OVERLAP() macros
185 * and its counter masks must be kept at a minimum.
186 */
187#define EVENT_CONSTRAINT_OVERLAP(c, n, m) \
188 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1)
163 189
164/* 190/*
165 * Constraint on the Event code. 191 * Constraint on the Event code.
@@ -235,6 +261,11 @@ union perf_capabilities {
235 u64 capabilities; 261 u64 capabilities;
236}; 262};
237 263
264struct x86_pmu_quirk {
265 struct x86_pmu_quirk *next;
266 void (*func)(void);
267};
268
238/* 269/*
239 * struct x86_pmu - generic x86 pmu 270 * struct x86_pmu - generic x86 pmu
240 */ 271 */
@@ -259,6 +290,11 @@ struct x86_pmu {
259 int num_counters_fixed; 290 int num_counters_fixed;
260 int cntval_bits; 291 int cntval_bits;
261 u64 cntval_mask; 292 u64 cntval_mask;
293 union {
294 unsigned long events_maskl;
295 unsigned long events_mask[BITS_TO_LONGS(ARCH_PERFMON_EVENTS_COUNT)];
296 };
297 int events_mask_len;
262 int apic; 298 int apic;
263 u64 max_period; 299 u64 max_period;
264 struct event_constraint * 300 struct event_constraint *
@@ -268,7 +304,7 @@ struct x86_pmu {
268 void (*put_event_constraints)(struct cpu_hw_events *cpuc, 304 void (*put_event_constraints)(struct cpu_hw_events *cpuc,
269 struct perf_event *event); 305 struct perf_event *event);
270 struct event_constraint *event_constraints; 306 struct event_constraint *event_constraints;
271 void (*quirks)(void); 307 struct x86_pmu_quirk *quirks;
272 int perfctr_second_write; 308 int perfctr_second_write;
273 309
274 int (*cpu_prepare)(int cpu); 310 int (*cpu_prepare)(int cpu);
@@ -309,6 +345,15 @@ struct x86_pmu {
309 struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr); 345 struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr);
310}; 346};
311 347
348#define x86_add_quirk(func_) \
349do { \
350 static struct x86_pmu_quirk __quirk __initdata = { \
351 .func = func_, \
352 }; \
353 __quirk.next = x86_pmu.quirks; \
354 x86_pmu.quirks = &__quirk; \
355} while (0)
356
312#define ERF_NO_HT_SHARING 1 357#define ERF_NO_HT_SHARING 1
313#define ERF_HAS_RSP_1 2 358#define ERF_HAS_RSP_1 2
314 359
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index aeefd45697a2..0397b23be8e9 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -492,7 +492,7 @@ static __initconst const struct x86_pmu amd_pmu = {
492static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0); 492static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0);
493static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0); 493static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0);
494static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0); 494static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0);
495static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT(0, 0x09, 0); 495static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0);
496static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0); 496static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
497static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0); 497static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
498 498
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 8d601b18bf9f..3bd37bdf1b8e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -28,6 +28,7 @@ static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly =
28 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, 28 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
29 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, 29 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
30 [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, 30 [PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
31 [PERF_COUNT_HW_REF_CPU_CYCLES] = 0x0300, /* pseudo-encoding */
31}; 32};
32 33
33static struct event_constraint intel_core_event_constraints[] __read_mostly = 34static struct event_constraint intel_core_event_constraints[] __read_mostly =
@@ -45,12 +46,7 @@ static struct event_constraint intel_core2_event_constraints[] __read_mostly =
45{ 46{
46 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 47 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
47 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 48 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
48 /* 49 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
49 * Core2 has Fixed Counter 2 listed as CPU_CLK_UNHALTED.REF and event
50 * 0x013c as CPU_CLK_UNHALTED.BUS and specifies there is a fixed
51 * ratio between these counters.
52 */
53 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
54 INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ 50 INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
55 INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ 51 INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
56 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ 52 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
@@ -68,7 +64,7 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
68{ 64{
69 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 65 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
70 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 66 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
71 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ 67 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
72 INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ 68 INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
73 INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ 69 INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
74 INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ 70 INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
@@ -90,7 +86,7 @@ static struct event_constraint intel_westmere_event_constraints[] __read_mostly
90{ 86{
91 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 87 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
92 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 88 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
93 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ 89 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
94 INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ 90 INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
95 INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */ 91 INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
96 INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ 92 INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
@@ -102,7 +98,7 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly =
102{ 98{
103 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 99 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
104 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 100 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
105 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ 101 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
106 INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */ 102 INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
107 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ 103 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
108 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ 104 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
@@ -125,7 +121,7 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly =
125{ 121{
126 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 122 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
127 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 123 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
128 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ 124 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
129 EVENT_CONSTRAINT_END 125 EVENT_CONSTRAINT_END
130}; 126};
131 127
@@ -1169,7 +1165,7 @@ again:
1169 */ 1165 */
1170 c = &unconstrained; 1166 c = &unconstrained;
1171 } else if (intel_try_alt_er(event, orig_idx)) { 1167 } else if (intel_try_alt_er(event, orig_idx)) {
1172 raw_spin_unlock(&era->lock); 1168 raw_spin_unlock_irqrestore(&era->lock, flags);
1173 goto again; 1169 goto again;
1174 } 1170 }
1175 raw_spin_unlock_irqrestore(&era->lock, flags); 1171 raw_spin_unlock_irqrestore(&era->lock, flags);
@@ -1519,7 +1515,7 @@ static __initconst const struct x86_pmu intel_pmu = {
1519 .guest_get_msrs = intel_guest_get_msrs, 1515 .guest_get_msrs = intel_guest_get_msrs,
1520}; 1516};
1521 1517
1522static void intel_clovertown_quirks(void) 1518static __init void intel_clovertown_quirk(void)
1523{ 1519{
1524 /* 1520 /*
1525 * PEBS is unreliable due to: 1521 * PEBS is unreliable due to:
@@ -1545,19 +1541,60 @@ static void intel_clovertown_quirks(void)
1545 x86_pmu.pebs_constraints = NULL; 1541 x86_pmu.pebs_constraints = NULL;
1546} 1542}
1547 1543
1548static void intel_sandybridge_quirks(void) 1544static __init void intel_sandybridge_quirk(void)
1549{ 1545{
1550 printk(KERN_WARNING "PEBS disabled due to CPU errata.\n"); 1546 printk(KERN_WARNING "PEBS disabled due to CPU errata.\n");
1551 x86_pmu.pebs = 0; 1547 x86_pmu.pebs = 0;
1552 x86_pmu.pebs_constraints = NULL; 1548 x86_pmu.pebs_constraints = NULL;
1553} 1549}
1554 1550
1551static const struct { int id; char *name; } intel_arch_events_map[] __initconst = {
1552 { PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" },
1553 { PERF_COUNT_HW_INSTRUCTIONS, "instructions" },
1554 { PERF_COUNT_HW_BUS_CYCLES, "bus cycles" },
1555 { PERF_COUNT_HW_CACHE_REFERENCES, "cache references" },
1556 { PERF_COUNT_HW_CACHE_MISSES, "cache misses" },
1557 { PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch instructions" },
1558 { PERF_COUNT_HW_BRANCH_MISSES, "branch misses" },
1559};
1560
1561static __init void intel_arch_events_quirk(void)
1562{
1563 int bit;
1564
1565 /* disable event that reported as not presend by cpuid */
1566 for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) {
1567 intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0;
1568 printk(KERN_WARNING "CPUID marked event: \'%s\' unavailable\n",
1569 intel_arch_events_map[bit].name);
1570 }
1571}
1572
1573static __init void intel_nehalem_quirk(void)
1574{
1575 union cpuid10_ebx ebx;
1576
1577 ebx.full = x86_pmu.events_maskl;
1578 if (ebx.split.no_branch_misses_retired) {
1579 /*
1580 * Erratum AAJ80 detected, we work it around by using
1581 * the BR_MISP_EXEC.ANY event. This will over-count
1582 * branch-misses, but it's still much better than the
1583 * architectural event which is often completely bogus:
1584 */
1585 intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
1586 ebx.split.no_branch_misses_retired = 0;
1587 x86_pmu.events_maskl = ebx.full;
1588 printk(KERN_INFO "CPU erratum AAJ80 worked around\n");
1589 }
1590}
1591
1555__init int intel_pmu_init(void) 1592__init int intel_pmu_init(void)
1556{ 1593{
1557 union cpuid10_edx edx; 1594 union cpuid10_edx edx;
1558 union cpuid10_eax eax; 1595 union cpuid10_eax eax;
1596 union cpuid10_ebx ebx;
1559 unsigned int unused; 1597 unsigned int unused;
1560 unsigned int ebx;
1561 int version; 1598 int version;
1562 1599
1563 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { 1600 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
@@ -1574,8 +1611,8 @@ __init int intel_pmu_init(void)
1574 * Check whether the Architectural PerfMon supports 1611 * Check whether the Architectural PerfMon supports
1575 * Branch Misses Retired hw_event or not. 1612 * Branch Misses Retired hw_event or not.
1576 */ 1613 */
1577 cpuid(10, &eax.full, &ebx, &unused, &edx.full); 1614 cpuid(10, &eax.full, &ebx.full, &unused, &edx.full);
1578 if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) 1615 if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT)
1579 return -ENODEV; 1616 return -ENODEV;
1580 1617
1581 version = eax.split.version_id; 1618 version = eax.split.version_id;
@@ -1589,6 +1626,9 @@ __init int intel_pmu_init(void)
1589 x86_pmu.cntval_bits = eax.split.bit_width; 1626 x86_pmu.cntval_bits = eax.split.bit_width;
1590 x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1; 1627 x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1;
1591 1628
1629 x86_pmu.events_maskl = ebx.full;
1630 x86_pmu.events_mask_len = eax.split.mask_length;
1631
1592 /* 1632 /*
1593 * Quirk: v2 perfmon does not report fixed-purpose events, so 1633 * Quirk: v2 perfmon does not report fixed-purpose events, so
1594 * assume at least 3 events: 1634 * assume at least 3 events:
@@ -1608,6 +1648,8 @@ __init int intel_pmu_init(void)
1608 1648
1609 intel_ds_init(); 1649 intel_ds_init();
1610 1650
1651 x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */
1652
1611 /* 1653 /*
1612 * Install the hw-cache-events table: 1654 * Install the hw-cache-events table:
1613 */ 1655 */
@@ -1617,7 +1659,7 @@ __init int intel_pmu_init(void)
1617 break; 1659 break;
1618 1660
1619 case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ 1661 case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
1620 x86_pmu.quirks = intel_clovertown_quirks; 1662 x86_add_quirk(intel_clovertown_quirk);
1621 case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ 1663 case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
1622 case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ 1664 case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
1623 case 29: /* six-core 45 nm xeon "Dunnington" */ 1665 case 29: /* six-core 45 nm xeon "Dunnington" */
@@ -1651,17 +1693,8 @@ __init int intel_pmu_init(void)
1651 /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ 1693 /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
1652 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1; 1694 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1;
1653 1695
1654 if (ebx & 0x40) { 1696 x86_add_quirk(intel_nehalem_quirk);
1655 /*
1656 * Erratum AAJ80 detected, we work it around by using
1657 * the BR_MISP_EXEC.ANY event. This will over-count
1658 * branch-misses, but it's still much better than the
1659 * architectural event which is often completely bogus:
1660 */
1661 intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
1662 1697
1663 pr_cont("erratum AAJ80 worked around, ");
1664 }
1665 pr_cont("Nehalem events, "); 1698 pr_cont("Nehalem events, ");
1666 break; 1699 break;
1667 1700
@@ -1701,7 +1734,7 @@ __init int intel_pmu_init(void)
1701 break; 1734 break;
1702 1735
1703 case 42: /* SandyBridge */ 1736 case 42: /* SandyBridge */
1704 x86_pmu.quirks = intel_sandybridge_quirks; 1737 x86_add_quirk(intel_sandybridge_quirk);
1705 case 45: /* SandyBridge, "Romely-EP" */ 1738 case 45: /* SandyBridge, "Romely-EP" */
1706 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, 1739 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
1707 sizeof(hw_cache_event_ids)); 1740 sizeof(hw_cache_event_ids));
@@ -1738,5 +1771,6 @@ __init int intel_pmu_init(void)
1738 break; 1771 break;
1739 } 1772 }
1740 } 1773 }
1774
1741 return 0; 1775 return 0;
1742} 1776}
diff --git a/arch/x86/kernel/cpu/powerflags.c b/arch/x86/kernel/cpu/powerflags.c
index 5abbea297e0c..7b3fe56b1c21 100644
--- a/arch/x86/kernel/cpu/powerflags.c
+++ b/arch/x86/kernel/cpu/powerflags.c
@@ -16,5 +16,6 @@ const char *const x86_power_flags[32] = {
16 "100mhzsteps", 16 "100mhzsteps",
17 "hwpstate", 17 "hwpstate",
18 "", /* tsc invariant mapped to constant_tsc */ 18 "", /* tsc invariant mapped to constant_tsc */
19 /* nothing */ 19 "cpb", /* core performance boost */
20 "eff_freq_ro", /* Readonly aperf/mperf */
20}; 21};
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 14b23140e81f..8022c6681485 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -64,12 +64,10 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
64static int show_cpuinfo(struct seq_file *m, void *v) 64static int show_cpuinfo(struct seq_file *m, void *v)
65{ 65{
66 struct cpuinfo_x86 *c = v; 66 struct cpuinfo_x86 *c = v;
67 unsigned int cpu = 0; 67 unsigned int cpu;
68 int i; 68 int i;
69 69
70#ifdef CONFIG_SMP
71 cpu = c->cpu_index; 70 cpu = c->cpu_index;
72#endif
73 seq_printf(m, "processor\t: %u\n" 71 seq_printf(m, "processor\t: %u\n"
74 "vendor_id\t: %s\n" 72 "vendor_id\t: %s\n"
75 "cpu family\t: %d\n" 73 "cpu family\t: %d\n"
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 303a0e48f076..8071e2f3d6eb 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -738,35 +738,17 @@ core_initcall(e820_mark_nvs_memory);
738/* 738/*
739 * pre allocated 4k and reserved it in memblock and e820_saved 739 * pre allocated 4k and reserved it in memblock and e820_saved
740 */ 740 */
741u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align) 741u64 __init early_reserve_e820(u64 size, u64 align)
742{ 742{
743 u64 size = 0;
744 u64 addr; 743 u64 addr;
745 u64 start;
746 744
747 for (start = startt; ; start += size) { 745 addr = __memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
748 start = memblock_x86_find_in_range_size(start, &size, align); 746 if (addr) {
749 if (start == MEMBLOCK_ERROR) 747 e820_update_range_saved(addr, size, E820_RAM, E820_RESERVED);
750 return 0; 748 printk(KERN_INFO "update e820_saved for early_reserve_e820\n");
751 if (size >= sizet) 749 update_e820_saved();
752 break;
753 } 750 }
754 751
755#ifdef CONFIG_X86_32
756 if (start >= MAXMEM)
757 return 0;
758 if (start + size > MAXMEM)
759 size = MAXMEM - start;
760#endif
761
762 addr = round_down(start + size - sizet, align);
763 if (addr < start)
764 return 0;
765 memblock_x86_reserve_range(addr, addr + sizet, "new next");
766 e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED);
767 printk(KERN_INFO "update e820_saved for early_reserve_e820\n");
768 update_e820_saved();
769
770 return addr; 752 return addr;
771} 753}
772 754
@@ -1090,7 +1072,7 @@ void __init memblock_x86_fill(void)
1090 * We are safe to enable resizing, beause memblock_x86_fill() 1072 * We are safe to enable resizing, beause memblock_x86_fill()
1091 * is rather later for x86 1073 * is rather later for x86
1092 */ 1074 */
1093 memblock_can_resize = 1; 1075 memblock_allow_resize();
1094 1076
1095 for (i = 0; i < e820.nr_map; i++) { 1077 for (i = 0; i < e820.nr_map; i++) {
1096 struct e820entry *ei = &e820.map[i]; 1078 struct e820entry *ei = &e820.map[i];
@@ -1105,22 +1087,36 @@ void __init memblock_x86_fill(void)
1105 memblock_add(ei->addr, ei->size); 1087 memblock_add(ei->addr, ei->size);
1106 } 1088 }
1107 1089
1108 memblock_analyze();
1109 memblock_dump_all(); 1090 memblock_dump_all();
1110} 1091}
1111 1092
1112void __init memblock_find_dma_reserve(void) 1093void __init memblock_find_dma_reserve(void)
1113{ 1094{
1114#ifdef CONFIG_X86_64 1095#ifdef CONFIG_X86_64
1115 u64 free_size_pfn; 1096 u64 nr_pages = 0, nr_free_pages = 0;
1116 u64 mem_size_pfn; 1097 unsigned long start_pfn, end_pfn;
1098 phys_addr_t start, end;
1099 int i;
1100 u64 u;
1101
1117 /* 1102 /*
1118 * need to find out used area below MAX_DMA_PFN 1103 * need to find out used area below MAX_DMA_PFN
1119 * need to use memblock to get free size in [0, MAX_DMA_PFN] 1104 * need to use memblock to get free size in [0, MAX_DMA_PFN]
1120 * at first, and assume boot_mem will not take below MAX_DMA_PFN 1105 * at first, and assume boot_mem will not take below MAX_DMA_PFN
1121 */ 1106 */
1122 mem_size_pfn = memblock_x86_memory_in_range(0, MAX_DMA_PFN << PAGE_SHIFT) >> PAGE_SHIFT; 1107 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) {
1123 free_size_pfn = memblock_x86_free_memory_in_range(0, MAX_DMA_PFN << PAGE_SHIFT) >> PAGE_SHIFT; 1108 start_pfn = min_t(unsigned long, start_pfn, MAX_DMA_PFN);
1124 set_dma_reserve(mem_size_pfn - free_size_pfn); 1109 end_pfn = min_t(unsigned long, end_pfn, MAX_DMA_PFN);
1110 nr_pages += end_pfn - start_pfn;
1111 }
1112
1113 for_each_free_mem_range(u, MAX_NUMNODES, &start, &end, NULL) {
1114 start_pfn = min_t(unsigned long, PFN_UP(start), MAX_DMA_PFN);
1115 end_pfn = min_t(unsigned long, PFN_DOWN(end), MAX_DMA_PFN);
1116 if (start_pfn < end_pfn)
1117 nr_free_pages += end_pfn - start_pfn;
1118 }
1119
1120 set_dma_reserve(nr_pages - nr_free_pages);
1125#endif 1121#endif
1126} 1122}
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index f3f6f5344001..22d0e21b4dd7 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -625,6 +625,8 @@ work_notifysig: # deal with pending signals and
625 movl %esp, %eax 625 movl %esp, %eax
626 jne work_notifysig_v86 # returning to kernel-space or 626 jne work_notifysig_v86 # returning to kernel-space or
627 # vm86-space 627 # vm86-space
628 TRACE_IRQS_ON
629 ENABLE_INTERRUPTS(CLBR_NONE)
628 xorl %edx, %edx 630 xorl %edx, %edx
629 call do_notify_resume 631 call do_notify_resume
630 jmp resume_userspace_sig 632 jmp resume_userspace_sig
@@ -638,6 +640,8 @@ work_notifysig_v86:
638#else 640#else
639 movl %esp, %eax 641 movl %esp, %eax
640#endif 642#endif
643 TRACE_IRQS_ON
644 ENABLE_INTERRUPTS(CLBR_NONE)
641 xorl %edx, %edx 645 xorl %edx, %edx
642 call do_notify_resume 646 call do_notify_resume
643 jmp resume_userspace_sig 647 jmp resume_userspace_sig
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index faf8d5e74b0b..a20e1cb9dc87 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -221,7 +221,7 @@ ENDPROC(native_usergs_sysret64)
221 /*CFI_REL_OFFSET ss,0*/ 221 /*CFI_REL_OFFSET ss,0*/
222 pushq_cfi %rax /* rsp */ 222 pushq_cfi %rax /* rsp */
223 CFI_REL_OFFSET rsp,0 223 CFI_REL_OFFSET rsp,0
224 pushq_cfi $X86_EFLAGS_IF /* eflags - interrupts on */ 224 pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_BIT1) /* eflags - interrupts on */
225 /*CFI_REL_OFFSET rflags,0*/ 225 /*CFI_REL_OFFSET rflags,0*/
226 pushq_cfi $__KERNEL_CS /* cs */ 226 pushq_cfi $__KERNEL_CS /* cs */
227 /*CFI_REL_OFFSET cs,0*/ 227 /*CFI_REL_OFFSET cs,0*/
@@ -411,7 +411,7 @@ ENTRY(ret_from_fork)
411 RESTORE_REST 411 RESTORE_REST
412 412
413 testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread? 413 testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread?
414 je int_ret_from_sys_call 414 jz retint_restore_args
415 415
416 testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET 416 testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET
417 jnz int_ret_from_sys_call 417 jnz int_ret_from_sys_call
@@ -465,7 +465,7 @@ ENTRY(system_call)
465 * after the swapgs, so that it can do the swapgs 465 * after the swapgs, so that it can do the swapgs
466 * for the guest and jump here on syscall. 466 * for the guest and jump here on syscall.
467 */ 467 */
468ENTRY(system_call_after_swapgs) 468GLOBAL(system_call_after_swapgs)
469 469
470 movq %rsp,PER_CPU_VAR(old_rsp) 470 movq %rsp,PER_CPU_VAR(old_rsp)
471 movq PER_CPU_VAR(kernel_stack),%rsp 471 movq PER_CPU_VAR(kernel_stack),%rsp
@@ -478,8 +478,7 @@ ENTRY(system_call_after_swapgs)
478 movq %rax,ORIG_RAX-ARGOFFSET(%rsp) 478 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
479 movq %rcx,RIP-ARGOFFSET(%rsp) 479 movq %rcx,RIP-ARGOFFSET(%rsp)
480 CFI_REL_OFFSET rip,RIP-ARGOFFSET 480 CFI_REL_OFFSET rip,RIP-ARGOFFSET
481 GET_THREAD_INFO(%rcx) 481 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
482 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
483 jnz tracesys 482 jnz tracesys
484system_call_fastpath: 483system_call_fastpath:
485 cmpq $__NR_syscall_max,%rax 484 cmpq $__NR_syscall_max,%rax
@@ -496,10 +495,9 @@ ret_from_sys_call:
496 /* edi: flagmask */ 495 /* edi: flagmask */
497sysret_check: 496sysret_check:
498 LOCKDEP_SYS_EXIT 497 LOCKDEP_SYS_EXIT
499 GET_THREAD_INFO(%rcx)
500 DISABLE_INTERRUPTS(CLBR_NONE) 498 DISABLE_INTERRUPTS(CLBR_NONE)
501 TRACE_IRQS_OFF 499 TRACE_IRQS_OFF
502 movl TI_flags(%rcx),%edx 500 movl TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET),%edx
503 andl %edi,%edx 501 andl %edi,%edx
504 jnz sysret_careful 502 jnz sysret_careful
505 CFI_REMEMBER_STATE 503 CFI_REMEMBER_STATE
@@ -583,7 +581,7 @@ sysret_audit:
583 /* Do syscall tracing */ 581 /* Do syscall tracing */
584tracesys: 582tracesys:
585#ifdef CONFIG_AUDITSYSCALL 583#ifdef CONFIG_AUDITSYSCALL
586 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx) 584 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
587 jz auditsys 585 jz auditsys
588#endif 586#endif
589 SAVE_REST 587 SAVE_REST
@@ -612,8 +610,6 @@ tracesys:
612GLOBAL(int_ret_from_sys_call) 610GLOBAL(int_ret_from_sys_call)
613 DISABLE_INTERRUPTS(CLBR_NONE) 611 DISABLE_INTERRUPTS(CLBR_NONE)
614 TRACE_IRQS_OFF 612 TRACE_IRQS_OFF
615 testl $3,CS-ARGOFFSET(%rsp)
616 je retint_restore_args
617 movl $_TIF_ALLWORK_MASK,%edi 613 movl $_TIF_ALLWORK_MASK,%edi
618 /* edi: mask to check */ 614 /* edi: mask to check */
619GLOBAL(int_with_check) 615GLOBAL(int_with_check)
@@ -953,6 +949,7 @@ END(common_interrupt)
953ENTRY(\sym) 949ENTRY(\sym)
954 INTR_FRAME 950 INTR_FRAME
955 pushq_cfi $~(\num) 951 pushq_cfi $~(\num)
952.Lcommon_\sym:
956 interrupt \do_sym 953 interrupt \do_sym
957 jmp ret_from_intr 954 jmp ret_from_intr
958 CFI_ENDPROC 955 CFI_ENDPROC
@@ -976,13 +973,21 @@ apicinterrupt X86_PLATFORM_IPI_VECTOR \
976 x86_platform_ipi smp_x86_platform_ipi 973 x86_platform_ipi smp_x86_platform_ipi
977 974
978#ifdef CONFIG_SMP 975#ifdef CONFIG_SMP
979.irp idx,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \ 976 ALIGN
977 INTR_FRAME
978.irp idx,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \
980 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 979 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
981.if NUM_INVALIDATE_TLB_VECTORS > \idx 980.if NUM_INVALIDATE_TLB_VECTORS > \idx
982apicinterrupt (INVALIDATE_TLB_VECTOR_START)+\idx \ 981ENTRY(invalidate_interrupt\idx)
983 invalidate_interrupt\idx smp_invalidate_interrupt 982 pushq_cfi $~(INVALIDATE_TLB_VECTOR_START+\idx)
983 jmp .Lcommon_invalidate_interrupt0
984 CFI_ADJUST_CFA_OFFSET -8
985END(invalidate_interrupt\idx)
984.endif 986.endif
985.endr 987.endr
988 CFI_ENDPROC
989apicinterrupt INVALIDATE_TLB_VECTOR_START, \
990 invalidate_interrupt0, smp_invalidate_interrupt
986#endif 991#endif
987 992
988apicinterrupt THRESHOLD_APIC_VECTOR \ 993apicinterrupt THRESHOLD_APIC_VECTOR \
diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c
index af0699ba48cf..48d9d4ea1020 100644
--- a/arch/x86/kernel/head.c
+++ b/arch/x86/kernel/head.c
@@ -52,5 +52,5 @@ void __init reserve_ebda_region(void)
52 lowmem = 0x9f000; 52 lowmem = 0x9f000;
53 53
54 /* reserve all memory between lowmem and the 1MB mark */ 54 /* reserve all memory between lowmem and the 1MB mark */
55 memblock_x86_reserve_range(lowmem, 0x100000, "* BIOS reserved"); 55 memblock_reserve(lowmem, 0x100000 - lowmem);
56} 56}
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 3bb08509a7a1..51ff18616d50 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -31,9 +31,8 @@ static void __init i386_default_early_setup(void)
31 31
32void __init i386_start_kernel(void) 32void __init i386_start_kernel(void)
33{ 33{
34 memblock_init(); 34 memblock_reserve(__pa_symbol(&_text),
35 35 __pa_symbol(&__bss_stop) - __pa_symbol(&_text));
36 memblock_x86_reserve_range(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
37 36
38#ifdef CONFIG_BLK_DEV_INITRD 37#ifdef CONFIG_BLK_DEV_INITRD
39 /* Reserve INITRD */ 38 /* Reserve INITRD */
@@ -42,7 +41,7 @@ void __init i386_start_kernel(void)
42 u64 ramdisk_image = boot_params.hdr.ramdisk_image; 41 u64 ramdisk_image = boot_params.hdr.ramdisk_image;
43 u64 ramdisk_size = boot_params.hdr.ramdisk_size; 42 u64 ramdisk_size = boot_params.hdr.ramdisk_size;
44 u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); 43 u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
45 memblock_x86_reserve_range(ramdisk_image, ramdisk_end, "RAMDISK"); 44 memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image);
46 } 45 }
47#endif 46#endif
48 47
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 5655c2272adb..3a3b779f41d3 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -98,9 +98,8 @@ void __init x86_64_start_reservations(char *real_mode_data)
98{ 98{
99 copy_bootdata(__va(real_mode_data)); 99 copy_bootdata(__va(real_mode_data));
100 100
101 memblock_init(); 101 memblock_reserve(__pa_symbol(&_text),
102 102 __pa_symbol(&__bss_stop) - __pa_symbol(&_text));
103 memblock_x86_reserve_range(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
104 103
105#ifdef CONFIG_BLK_DEV_INITRD 104#ifdef CONFIG_BLK_DEV_INITRD
106 /* Reserve INITRD */ 105 /* Reserve INITRD */
@@ -109,7 +108,7 @@ void __init x86_64_start_reservations(char *real_mode_data)
109 unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; 108 unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
110 unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; 109 unsigned long ramdisk_size = boot_params.hdr.ramdisk_size;
111 unsigned long ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); 110 unsigned long ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
112 memblock_x86_reserve_range(ramdisk_image, ramdisk_end, "RAMDISK"); 111 memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image);
113 } 112 }
114#endif 113#endif
115 114
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 1bb0bf4d92cd..07b0a56a754d 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -32,8 +32,6 @@
32#define HPET_MIN_CYCLES 128 32#define HPET_MIN_CYCLES 128
33#define HPET_MIN_PROG_DELTA (HPET_MIN_CYCLES + (HPET_MIN_CYCLES >> 1)) 33#define HPET_MIN_PROG_DELTA (HPET_MIN_CYCLES + (HPET_MIN_CYCLES >> 1))
34 34
35#define EVT_TO_HPET_DEV(evt) container_of(evt, struct hpet_dev, evt)
36
37/* 35/*
38 * HPET address is set in acpi/boot.c, when an ACPI entry exists 36 * HPET address is set in acpi/boot.c, when an ACPI entry exists
39 */ 37 */
@@ -55,6 +53,11 @@ struct hpet_dev {
55 char name[10]; 53 char name[10];
56}; 54};
57 55
56inline struct hpet_dev *EVT_TO_HPET_DEV(struct clock_event_device *evtdev)
57{
58 return container_of(evtdev, struct hpet_dev, evt);
59}
60
58inline unsigned int hpet_readl(unsigned int a) 61inline unsigned int hpet_readl(unsigned int a)
59{ 62{
60 return readl(hpet_virt_address + a); 63 return readl(hpet_virt_address + a);
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 429e0c92924e..7943e0c21bde 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -74,6 +74,10 @@ int arch_show_interrupts(struct seq_file *p, int prec)
74 for_each_online_cpu(j) 74 for_each_online_cpu(j)
75 seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs); 75 seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs);
76 seq_printf(p, " IRQ work interrupts\n"); 76 seq_printf(p, " IRQ work interrupts\n");
77 seq_printf(p, "%*s: ", prec, "RTR");
78 for_each_online_cpu(j)
79 seq_printf(p, "%10u ", irq_stats(j)->icr_read_retry_count);
80 seq_printf(p, " APIC ICR read retries\n");
77#endif 81#endif
78 if (x86_platform_ipi_callback) { 82 if (x86_platform_ipi_callback) {
79 seq_printf(p, "%*s: ", prec, "PLT"); 83 seq_printf(p, "%*s: ", prec, "PLT");
@@ -136,6 +140,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
136 sum += irq_stats(cpu)->irq_spurious_count; 140 sum += irq_stats(cpu)->irq_spurious_count;
137 sum += irq_stats(cpu)->apic_perf_irqs; 141 sum += irq_stats(cpu)->apic_perf_irqs;
138 sum += irq_stats(cpu)->apic_irq_work_irqs; 142 sum += irq_stats(cpu)->apic_irq_work_irqs;
143 sum += irq_stats(cpu)->icr_read_retry_count;
139#endif 144#endif
140 if (x86_platform_ipi_callback) 145 if (x86_platform_ipi_callback)
141 sum += irq_stats(cpu)->x86_platform_ipis; 146 sum += irq_stats(cpu)->x86_platform_ipis;
@@ -181,8 +186,8 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
181 unsigned vector = ~regs->orig_ax; 186 unsigned vector = ~regs->orig_ax;
182 unsigned irq; 187 unsigned irq;
183 188
184 exit_idle();
185 irq_enter(); 189 irq_enter();
190 exit_idle();
186 191
187 irq = __this_cpu_read(vector_irq[vector]); 192 irq = __this_cpu_read(vector_irq[vector]);
188 193
@@ -209,10 +214,10 @@ void smp_x86_platform_ipi(struct pt_regs *regs)
209 214
210 ack_APIC_irq(); 215 ack_APIC_irq();
211 216
212 exit_idle();
213
214 irq_enter(); 217 irq_enter();
215 218
219 exit_idle();
220
216 inc_irq_stat(x86_platform_ipis); 221 inc_irq_stat(x86_platform_ipis);
217 222
218 if (x86_platform_ipi_callback) 223 if (x86_platform_ipi_callback)
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index ea9d5f2f13ef..2889b3d43882 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -50,7 +50,7 @@ void arch_jump_label_transform(struct jump_entry *entry,
50 put_online_cpus(); 50 put_online_cpus();
51} 51}
52 52
53void arch_jump_label_transform_static(struct jump_entry *entry, 53__init_or_module void arch_jump_label_transform_static(struct jump_entry *entry,
54 enum jump_label_type type) 54 enum jump_label_type type)
55{ 55{
56 __jump_label_transform(entry, type, text_poke_early); 56 __jump_label_transform(entry, type, text_poke_early);
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 0741b062a304..ca470e4c92dc 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -564,9 +564,7 @@ void __init default_get_smp_config(unsigned int early)
564 564
565static void __init smp_reserve_memory(struct mpf_intel *mpf) 565static void __init smp_reserve_memory(struct mpf_intel *mpf)
566{ 566{
567 unsigned long size = get_mpc_size(mpf->physptr); 567 memblock_reserve(mpf->physptr, get_mpc_size(mpf->physptr));
568
569 memblock_x86_reserve_range(mpf->physptr, mpf->physptr+size, "* MP-table mpc");
570} 568}
571 569
572static int __init smp_scan_config(unsigned long base, unsigned long length) 570static int __init smp_scan_config(unsigned long base, unsigned long length)
@@ -595,7 +593,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length)
595 mpf, (u64)virt_to_phys(mpf)); 593 mpf, (u64)virt_to_phys(mpf));
596 594
597 mem = virt_to_phys(mpf); 595 mem = virt_to_phys(mpf);
598 memblock_x86_reserve_range(mem, mem + sizeof(*mpf), "* MP-table mpf"); 596 memblock_reserve(mem, sizeof(*mpf));
599 if (mpf->physptr) 597 if (mpf->physptr)
600 smp_reserve_memory(mpf); 598 smp_reserve_memory(mpf);
601 599
@@ -836,10 +834,8 @@ early_param("alloc_mptable", parse_alloc_mptable_opt);
836 834
837void __init early_reserve_e820_mpc_new(void) 835void __init early_reserve_e820_mpc_new(void)
838{ 836{
839 if (enable_update_mptable && alloc_mptable) { 837 if (enable_update_mptable && alloc_mptable)
840 u64 startt = 0; 838 mpc_new_phys = early_reserve_e820(mpc_new_length, 4);
841 mpc_new_phys = early_reserve_e820(startt, mpc_new_length, 4);
842 }
843} 839}
844 840
845static int __init update_mp_table(void) 841static int __init update_mp_table(void)
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index ee5d4fbd53b4..15763af7bfe3 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -293,7 +293,7 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
293 regs.orig_ax = -1; 293 regs.orig_ax = -1;
294 regs.ip = (unsigned long) kernel_thread_helper; 294 regs.ip = (unsigned long) kernel_thread_helper;
295 regs.cs = __KERNEL_CS | get_kernel_rpl(); 295 regs.cs = __KERNEL_CS | get_kernel_rpl();
296 regs.flags = X86_EFLAGS_IF | 0x2; 296 regs.flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
297 297
298 /* Ok, create the new process.. */ 298 /* Ok, create the new process.. */
299 return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL); 299 return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 795b79f984c2..485204f58cda 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -99,7 +99,8 @@ void cpu_idle(void)
99 99
100 /* endless idle loop with no priority at all */ 100 /* endless idle loop with no priority at all */
101 while (1) { 101 while (1) {
102 tick_nohz_stop_sched_tick(1); 102 tick_nohz_idle_enter();
103 rcu_idle_enter();
103 while (!need_resched()) { 104 while (!need_resched()) {
104 105
105 check_pgt_cache(); 106 check_pgt_cache();
@@ -116,7 +117,8 @@ void cpu_idle(void)
116 pm_idle(); 117 pm_idle();
117 start_critical_timings(); 118 start_critical_timings();
118 } 119 }
119 tick_nohz_restart_sched_tick(); 120 rcu_idle_exit();
121 tick_nohz_idle_exit();
120 preempt_enable_no_resched(); 122 preempt_enable_no_resched();
121 schedule(); 123 schedule();
122 preempt_disable(); 124 preempt_disable();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 3bd7e6eebf31..9b9fe4a85c87 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -122,7 +122,7 @@ void cpu_idle(void)
122 122
123 /* endless idle loop with no priority at all */ 123 /* endless idle loop with no priority at all */
124 while (1) { 124 while (1) {
125 tick_nohz_stop_sched_tick(1); 125 tick_nohz_idle_enter();
126 while (!need_resched()) { 126 while (!need_resched()) {
127 127
128 rmb(); 128 rmb();
@@ -139,8 +139,14 @@ void cpu_idle(void)
139 enter_idle(); 139 enter_idle();
140 /* Don't trace irqs off for idle */ 140 /* Don't trace irqs off for idle */
141 stop_critical_timings(); 141 stop_critical_timings();
142
143 /* enter_idle() needs rcu for notifiers */
144 rcu_idle_enter();
145
142 if (cpuidle_idle_call()) 146 if (cpuidle_idle_call())
143 pm_idle(); 147 pm_idle();
148
149 rcu_idle_exit();
144 start_critical_timings(); 150 start_critical_timings();
145 151
146 /* In many cases the interrupt that ended idle 152 /* In many cases the interrupt that ended idle
@@ -149,7 +155,7 @@ void cpu_idle(void)
149 __exit_idle(); 155 __exit_idle();
150 } 156 }
151 157
152 tick_nohz_restart_sched_tick(); 158 tick_nohz_idle_exit();
153 preempt_enable_no_resched(); 159 preempt_enable_no_resched();
154 schedule(); 160 schedule();
155 preempt_disable(); 161 preempt_disable();
@@ -293,13 +299,12 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
293 memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); 299 memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
294 300
295 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { 301 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
296 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); 302 p->thread.io_bitmap_ptr = kmemdup(me->thread.io_bitmap_ptr,
303 IO_BITMAP_BYTES, GFP_KERNEL);
297 if (!p->thread.io_bitmap_ptr) { 304 if (!p->thread.io_bitmap_ptr) {
298 p->thread.io_bitmap_max = 0; 305 p->thread.io_bitmap_max = 0;
299 return -ENOMEM; 306 return -ENOMEM;
300 } 307 }
301 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
302 IO_BITMAP_BYTES);
303 set_tsk_thread_flag(p, TIF_IO_BITMAP); 308 set_tsk_thread_flag(p, TIF_IO_BITMAP);
304 } 309 }
305 310
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 82528799c5de..89a04c7b5bb6 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -749,7 +749,8 @@ put:
749/* 749/*
750 * Handle PTRACE_POKEUSR calls for the debug register area. 750 * Handle PTRACE_POKEUSR calls for the debug register area.
751 */ 751 */
752int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val) 752static int ptrace_set_debugreg(struct task_struct *tsk, int n,
753 unsigned long val)
753{ 754{
754 struct thread_struct *thread = &(tsk->thread); 755 struct thread_struct *thread = &(tsk->thread);
755 int rc = 0; 756 int rc = 0;
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index cf0ef986cb6d..d05444ac2aea 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -306,7 +306,8 @@ static void __init cleanup_highmap(void)
306static void __init reserve_brk(void) 306static void __init reserve_brk(void)
307{ 307{
308 if (_brk_end > _brk_start) 308 if (_brk_end > _brk_start)
309 memblock_x86_reserve_range(__pa(_brk_start), __pa(_brk_end), "BRK"); 309 memblock_reserve(__pa(_brk_start),
310 __pa(_brk_end) - __pa(_brk_start));
310 311
311 /* Mark brk area as locked down and no longer taking any 312 /* Mark brk area as locked down and no longer taking any
312 new allocations */ 313 new allocations */
@@ -331,13 +332,13 @@ static void __init relocate_initrd(void)
331 ramdisk_here = memblock_find_in_range(0, end_of_lowmem, area_size, 332 ramdisk_here = memblock_find_in_range(0, end_of_lowmem, area_size,
332 PAGE_SIZE); 333 PAGE_SIZE);
333 334
334 if (ramdisk_here == MEMBLOCK_ERROR) 335 if (!ramdisk_here)
335 panic("Cannot find place for new RAMDISK of size %lld\n", 336 panic("Cannot find place for new RAMDISK of size %lld\n",
336 ramdisk_size); 337 ramdisk_size);
337 338
338 /* Note: this includes all the lowmem currently occupied by 339 /* Note: this includes all the lowmem currently occupied by
339 the initrd, we rely on that fact to keep the data intact. */ 340 the initrd, we rely on that fact to keep the data intact. */
340 memblock_x86_reserve_range(ramdisk_here, ramdisk_here + area_size, "NEW RAMDISK"); 341 memblock_reserve(ramdisk_here, area_size);
341 initrd_start = ramdisk_here + PAGE_OFFSET; 342 initrd_start = ramdisk_here + PAGE_OFFSET;
342 initrd_end = initrd_start + ramdisk_size; 343 initrd_end = initrd_start + ramdisk_size;
343 printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n", 344 printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n",
@@ -393,7 +394,7 @@ static void __init reserve_initrd(void)
393 initrd_start = 0; 394 initrd_start = 0;
394 395
395 if (ramdisk_size >= (end_of_lowmem>>1)) { 396 if (ramdisk_size >= (end_of_lowmem>>1)) {
396 memblock_x86_free_range(ramdisk_image, ramdisk_end); 397 memblock_free(ramdisk_image, ramdisk_end - ramdisk_image);
397 printk(KERN_ERR "initrd too large to handle, " 398 printk(KERN_ERR "initrd too large to handle, "
398 "disabling initrd\n"); 399 "disabling initrd\n");
399 return; 400 return;
@@ -416,7 +417,7 @@ static void __init reserve_initrd(void)
416 417
417 relocate_initrd(); 418 relocate_initrd();
418 419
419 memblock_x86_free_range(ramdisk_image, ramdisk_end); 420 memblock_free(ramdisk_image, ramdisk_end - ramdisk_image);
420} 421}
421#else 422#else
422static void __init reserve_initrd(void) 423static void __init reserve_initrd(void)
@@ -490,15 +491,13 @@ static void __init memblock_x86_reserve_range_setup_data(void)
490{ 491{
491 struct setup_data *data; 492 struct setup_data *data;
492 u64 pa_data; 493 u64 pa_data;
493 char buf[32];
494 494
495 if (boot_params.hdr.version < 0x0209) 495 if (boot_params.hdr.version < 0x0209)
496 return; 496 return;
497 pa_data = boot_params.hdr.setup_data; 497 pa_data = boot_params.hdr.setup_data;
498 while (pa_data) { 498 while (pa_data) {
499 data = early_memremap(pa_data, sizeof(*data)); 499 data = early_memremap(pa_data, sizeof(*data));
500 sprintf(buf, "setup data %x", data->type); 500 memblock_reserve(pa_data, sizeof(*data) + data->len);
501 memblock_x86_reserve_range(pa_data, pa_data+sizeof(*data)+data->len, buf);
502 pa_data = data->next; 501 pa_data = data->next;
503 early_iounmap(data, sizeof(*data)); 502 early_iounmap(data, sizeof(*data));
504 } 503 }
@@ -554,7 +553,7 @@ static void __init reserve_crashkernel(void)
554 crash_base = memblock_find_in_range(alignment, 553 crash_base = memblock_find_in_range(alignment,
555 CRASH_KERNEL_ADDR_MAX, crash_size, alignment); 554 CRASH_KERNEL_ADDR_MAX, crash_size, alignment);
556 555
557 if (crash_base == MEMBLOCK_ERROR) { 556 if (!crash_base) {
558 pr_info("crashkernel reservation failed - No suitable area found.\n"); 557 pr_info("crashkernel reservation failed - No suitable area found.\n");
559 return; 558 return;
560 } 559 }
@@ -568,7 +567,7 @@ static void __init reserve_crashkernel(void)
568 return; 567 return;
569 } 568 }
570 } 569 }
571 memblock_x86_reserve_range(crash_base, crash_base + crash_size, "CRASH KERNEL"); 570 memblock_reserve(crash_base, crash_size);
572 571
573 printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " 572 printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
574 "for crashkernel (System RAM: %ldMB)\n", 573 "for crashkernel (System RAM: %ldMB)\n",
@@ -626,7 +625,7 @@ static __init void reserve_ibft_region(void)
626 addr = find_ibft_region(&size); 625 addr = find_ibft_region(&size);
627 626
628 if (size) 627 if (size)
629 memblock_x86_reserve_range(addr, addr + size, "* ibft"); 628 memblock_reserve(addr, size);
630} 629}
631 630
632static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10; 631static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10;
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 9f548cb4a958..e38e21754eea 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -840,7 +840,8 @@ int __cpuinit native_cpu_up(unsigned int cpu)
840 pr_debug("++++++++++++++++++++=_---CPU UP %u\n", cpu); 840 pr_debug("++++++++++++++++++++=_---CPU UP %u\n", cpu);
841 841
842 if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid || 842 if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid ||
843 !physid_isset(apicid, phys_cpu_present_map)) { 843 !physid_isset(apicid, phys_cpu_present_map) ||
844 (!x2apic_mode && apicid >= 255)) {
844 printk(KERN_ERR "%s: bad cpu %d\n", __func__, cpu); 845 printk(KERN_ERR "%s: bad cpu %d\n", __func__, cpu);
845 return -EINVAL; 846 return -EINVAL;
846 } 847 }
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c
index a91ae7709b49..a73b61055ad6 100644
--- a/arch/x86/kernel/trampoline.c
+++ b/arch/x86/kernel/trampoline.c
@@ -14,11 +14,11 @@ void __init setup_trampolines(void)
14 14
15 /* Has to be in very low memory so we can execute real-mode AP code. */ 15 /* Has to be in very low memory so we can execute real-mode AP code. */
16 mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE); 16 mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE);
17 if (mem == MEMBLOCK_ERROR) 17 if (!mem)
18 panic("Cannot allocate trampoline\n"); 18 panic("Cannot allocate trampoline\n");
19 19
20 x86_trampoline_base = __va(mem); 20 x86_trampoline_base = __va(mem);
21 memblock_x86_reserve_range(mem, mem + size, "TRAMPOLINE"); 21 memblock_reserve(mem, size);
22 22
23 printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n", 23 printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n",
24 x86_trampoline_base, (unsigned long long)mem, size); 24 x86_trampoline_base, (unsigned long long)mem, size);
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index a8e3eb83466c..fa1191fb679d 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -306,15 +306,10 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
306 == NOTIFY_STOP) 306 == NOTIFY_STOP)
307 return; 307 return;
308#endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ 308#endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */
309#ifdef CONFIG_KPROBES 309
310 if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) 310 if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
311 == NOTIFY_STOP) 311 == NOTIFY_STOP)
312 return; 312 return;
313#else
314 if (notify_die(DIE_TRAP, "int3", regs, error_code, 3, SIGTRAP)
315 == NOTIFY_STOP)
316 return;
317#endif
318 313
319 preempt_conditional_sti(regs); 314 preempt_conditional_sti(regs);
320 do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); 315 do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index db483369f10b..2c9cf0fd78f5 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -35,7 +35,7 @@ static int __read_mostly tsc_unstable;
35 erroneous rdtsc usage on !cpu_has_tsc processors */ 35 erroneous rdtsc usage on !cpu_has_tsc processors */
36static int __read_mostly tsc_disabled = -1; 36static int __read_mostly tsc_disabled = -1;
37 37
38static int tsc_clocksource_reliable; 38int tsc_clocksource_reliable;
39/* 39/*
40 * Scheduler clock - returns current time in nanosec units. 40 * Scheduler clock - returns current time in nanosec units.
41 */ 41 */
@@ -178,11 +178,11 @@ static unsigned long calc_pmtimer_ref(u64 deltatsc, u64 pm1, u64 pm2)
178} 178}
179 179
180#define CAL_MS 10 180#define CAL_MS 10
181#define CAL_LATCH (CLOCK_TICK_RATE / (1000 / CAL_MS)) 181#define CAL_LATCH (PIT_TICK_RATE / (1000 / CAL_MS))
182#define CAL_PIT_LOOPS 1000 182#define CAL_PIT_LOOPS 1000
183 183
184#define CAL2_MS 50 184#define CAL2_MS 50
185#define CAL2_LATCH (CLOCK_TICK_RATE / (1000 / CAL2_MS)) 185#define CAL2_LATCH (PIT_TICK_RATE / (1000 / CAL2_MS))
186#define CAL2_PIT_LOOPS 5000 186#define CAL2_PIT_LOOPS 5000
187 187
188 188
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index 0aa5fed8b9e6..9eba29b46cb7 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -113,7 +113,7 @@ void __cpuinit check_tsc_sync_source(int cpu)
113 if (unsynchronized_tsc()) 113 if (unsynchronized_tsc())
114 return; 114 return;
115 115
116 if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) { 116 if (tsc_clocksource_reliable) {
117 if (cpu == (nr_cpu_ids-1) || system_state != SYSTEM_BOOTING) 117 if (cpu == (nr_cpu_ids-1) || system_state != SYSTEM_BOOTING)
118 pr_info( 118 pr_info(
119 "Skipped synchronization checks as TSC is reliable.\n"); 119 "Skipped synchronization checks as TSC is reliable.\n");
@@ -172,7 +172,7 @@ void __cpuinit check_tsc_sync_target(void)
172{ 172{
173 int cpus = 2; 173 int cpus = 2;
174 174
175 if (unsynchronized_tsc() || boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) 175 if (unsynchronized_tsc() || tsc_clocksource_reliable)
176 return; 176 return;
177 177
178 /* 178 /*
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index e4d4a22e8b94..b07ba9393564 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -57,7 +57,7 @@ DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) =
57 .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock), 57 .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock),
58}; 58};
59 59
60static enum { EMULATE, NATIVE, NONE } vsyscall_mode = NATIVE; 60static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE;
61 61
62static int __init vsyscall_setup(char *str) 62static int __init vsyscall_setup(char *str)
63{ 63{
@@ -140,11 +140,40 @@ static int addr_to_vsyscall_nr(unsigned long addr)
140 return nr; 140 return nr;
141} 141}
142 142
143static bool write_ok_or_segv(unsigned long ptr, size_t size)
144{
145 /*
146 * XXX: if access_ok, get_user, and put_user handled
147 * sig_on_uaccess_error, this could go away.
148 */
149
150 if (!access_ok(VERIFY_WRITE, (void __user *)ptr, size)) {
151 siginfo_t info;
152 struct thread_struct *thread = &current->thread;
153
154 thread->error_code = 6; /* user fault, no page, write */
155 thread->cr2 = ptr;
156 thread->trap_no = 14;
157
158 memset(&info, 0, sizeof(info));
159 info.si_signo = SIGSEGV;
160 info.si_errno = 0;
161 info.si_code = SEGV_MAPERR;
162 info.si_addr = (void __user *)ptr;
163
164 force_sig_info(SIGSEGV, &info, current);
165 return false;
166 } else {
167 return true;
168 }
169}
170
143bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) 171bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
144{ 172{
145 struct task_struct *tsk; 173 struct task_struct *tsk;
146 unsigned long caller; 174 unsigned long caller;
147 int vsyscall_nr; 175 int vsyscall_nr;
176 int prev_sig_on_uaccess_error;
148 long ret; 177 long ret;
149 178
150 /* 179 /*
@@ -180,35 +209,65 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
180 if (seccomp_mode(&tsk->seccomp)) 209 if (seccomp_mode(&tsk->seccomp))
181 do_exit(SIGKILL); 210 do_exit(SIGKILL);
182 211
212 /*
213 * With a real vsyscall, page faults cause SIGSEGV. We want to
214 * preserve that behavior to make writing exploits harder.
215 */
216 prev_sig_on_uaccess_error = current_thread_info()->sig_on_uaccess_error;
217 current_thread_info()->sig_on_uaccess_error = 1;
218
219 /*
220 * 0 is a valid user pointer (in the access_ok sense) on 32-bit and
221 * 64-bit, so we don't need to special-case it here. For all the
222 * vsyscalls, 0 means "don't write anything" not "write it at
223 * address 0".
224 */
225 ret = -EFAULT;
183 switch (vsyscall_nr) { 226 switch (vsyscall_nr) {
184 case 0: 227 case 0:
228 if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) ||
229 !write_ok_or_segv(regs->si, sizeof(struct timezone)))
230 break;
231
185 ret = sys_gettimeofday( 232 ret = sys_gettimeofday(
186 (struct timeval __user *)regs->di, 233 (struct timeval __user *)regs->di,
187 (struct timezone __user *)regs->si); 234 (struct timezone __user *)regs->si);
188 break; 235 break;
189 236
190 case 1: 237 case 1:
238 if (!write_ok_or_segv(regs->di, sizeof(time_t)))
239 break;
240
191 ret = sys_time((time_t __user *)regs->di); 241 ret = sys_time((time_t __user *)regs->di);
192 break; 242 break;
193 243
194 case 2: 244 case 2:
245 if (!write_ok_or_segv(regs->di, sizeof(unsigned)) ||
246 !write_ok_or_segv(regs->si, sizeof(unsigned)))
247 break;
248
195 ret = sys_getcpu((unsigned __user *)regs->di, 249 ret = sys_getcpu((unsigned __user *)regs->di,
196 (unsigned __user *)regs->si, 250 (unsigned __user *)regs->si,
197 0); 251 0);
198 break; 252 break;
199 } 253 }
200 254
255 current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error;
256
201 if (ret == -EFAULT) { 257 if (ret == -EFAULT) {
202 /* 258 /* Bad news -- userspace fed a bad pointer to a vsyscall. */
203 * Bad news -- userspace fed a bad pointer to a vsyscall.
204 *
205 * With a real vsyscall, that would have caused SIGSEGV.
206 * To make writing reliable exploits using the emulated
207 * vsyscalls harder, generate SIGSEGV here as well.
208 */
209 warn_bad_vsyscall(KERN_INFO, regs, 259 warn_bad_vsyscall(KERN_INFO, regs,
210 "vsyscall fault (exploit attempt?)"); 260 "vsyscall fault (exploit attempt?)");
211 goto sigsegv; 261
262 /*
263 * If we failed to generate a signal for any reason,
264 * generate one here. (This should be impossible.)
265 */
266 if (WARN_ON_ONCE(!sigismember(&tsk->pending.signal, SIGBUS) &&
267 !sigismember(&tsk->pending.signal, SIGSEGV)))
268 goto sigsegv;
269
270 return true; /* Don't emulate the ret. */
212 } 271 }
213 272
214 regs->ax = ret; 273 regs->ax = ret;
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index c1d6cd549397..91f83e21b989 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -92,6 +92,7 @@ struct x86_init_ops x86_init __initdata = {
92 92
93struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = { 93struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = {
94 .setup_percpu_clockev = setup_secondary_APIC_clock, 94 .setup_percpu_clockev = setup_secondary_APIC_clock,
95 .fixup_cpu_id = x86_default_fixup_cpu_id,
95}; 96};
96 97
97static void default_nmi_init(void) { }; 98static void default_nmi_init(void) { };
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 76e3f1cd0369..405f2620392f 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -338,11 +338,15 @@ static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
338 return HRTIMER_NORESTART; 338 return HRTIMER_NORESTART;
339} 339}
340 340
341static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period) 341static void create_pit_timer(struct kvm *kvm, u32 val, int is_period)
342{ 342{
343 struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
343 struct kvm_timer *pt = &ps->pit_timer; 344 struct kvm_timer *pt = &ps->pit_timer;
344 s64 interval; 345 s64 interval;
345 346
347 if (!irqchip_in_kernel(kvm))
348 return;
349
346 interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ); 350 interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ);
347 351
348 pr_debug("create pit timer, interval is %llu nsec\n", interval); 352 pr_debug("create pit timer, interval is %llu nsec\n", interval);
@@ -394,13 +398,13 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val)
394 /* FIXME: enhance mode 4 precision */ 398 /* FIXME: enhance mode 4 precision */
395 case 4: 399 case 4:
396 if (!(ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)) { 400 if (!(ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)) {
397 create_pit_timer(ps, val, 0); 401 create_pit_timer(kvm, val, 0);
398 } 402 }
399 break; 403 break;
400 case 2: 404 case 2:
401 case 3: 405 case 3:
402 if (!(ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)){ 406 if (!(ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)){
403 create_pit_timer(ps, val, 1); 407 create_pit_timer(kvm, val, 1);
404 } 408 }
405 break; 409 break;
406 default: 410 default:
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c38efd7b792e..4c938da2ba00 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -602,7 +602,6 @@ static void update_cpuid(struct kvm_vcpu *vcpu)
602{ 602{
603 struct kvm_cpuid_entry2 *best; 603 struct kvm_cpuid_entry2 *best;
604 struct kvm_lapic *apic = vcpu->arch.apic; 604 struct kvm_lapic *apic = vcpu->arch.apic;
605 u32 timer_mode_mask;
606 605
607 best = kvm_find_cpuid_entry(vcpu, 1, 0); 606 best = kvm_find_cpuid_entry(vcpu, 1, 0);
608 if (!best) 607 if (!best)
@@ -615,15 +614,12 @@ static void update_cpuid(struct kvm_vcpu *vcpu)
615 best->ecx |= bit(X86_FEATURE_OSXSAVE); 614 best->ecx |= bit(X86_FEATURE_OSXSAVE);
616 } 615 }
617 616
618 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && 617 if (apic) {
619 best->function == 0x1) { 618 if (best->ecx & bit(X86_FEATURE_TSC_DEADLINE_TIMER))
620 best->ecx |= bit(X86_FEATURE_TSC_DEADLINE_TIMER); 619 apic->lapic_timer.timer_mode_mask = 3 << 17;
621 timer_mode_mask = 3 << 17; 620 else
622 } else 621 apic->lapic_timer.timer_mode_mask = 1 << 17;
623 timer_mode_mask = 1 << 17; 622 }
624
625 if (apic)
626 apic->lapic_timer.timer_mode_mask = timer_mode_mask;
627} 623}
628 624
629int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 625int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
@@ -2135,6 +2131,9 @@ int kvm_dev_ioctl_check_extension(long ext)
2135 case KVM_CAP_TSC_CONTROL: 2131 case KVM_CAP_TSC_CONTROL:
2136 r = kvm_has_tsc_control; 2132 r = kvm_has_tsc_control;
2137 break; 2133 break;
2134 case KVM_CAP_TSC_DEADLINE_TIMER:
2135 r = boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER);
2136 break;
2138 default: 2137 default:
2139 r = 0; 2138 r = 0;
2140 break; 2139 break;
diff --git a/arch/x86/lib/inat.c b/arch/x86/lib/inat.c
index 46fc4ee09fc4..88ad5fbda6e1 100644
--- a/arch/x86/lib/inat.c
+++ b/arch/x86/lib/inat.c
@@ -82,9 +82,16 @@ insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m,
82 const insn_attr_t *table; 82 const insn_attr_t *table;
83 if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX) 83 if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX)
84 return 0; 84 return 0;
85 table = inat_avx_tables[vex_m][vex_p]; 85 /* At first, this checks the master table */
86 table = inat_avx_tables[vex_m][0];
86 if (!table) 87 if (!table)
87 return 0; 88 return 0;
89 if (!inat_is_group(table[opcode]) && vex_p) {
90 /* If this is not a group, get attribute directly */
91 table = inat_avx_tables[vex_m][vex_p];
92 if (!table)
93 return 0;
94 }
88 return table[opcode]; 95 return table[opcode];
89} 96}
90 97
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
index 374562ed6704..5a1f9f3e3fbb 100644
--- a/arch/x86/lib/insn.c
+++ b/arch/x86/lib/insn.c
@@ -202,7 +202,7 @@ void insn_get_opcode(struct insn *insn)
202 m = insn_vex_m_bits(insn); 202 m = insn_vex_m_bits(insn);
203 p = insn_vex_p_bits(insn); 203 p = insn_vex_p_bits(insn);
204 insn->attr = inat_get_avx_attribute(op, m, p); 204 insn->attr = inat_get_avx_attribute(op, m, p);
205 if (!inat_accept_vex(insn->attr)) 205 if (!inat_accept_vex(insn->attr) && !inat_is_group(insn->attr))
206 insn->attr = 0; /* This instruction is bad */ 206 insn->attr = 0; /* This instruction is bad */
207 goto end; /* VEX has only 1 byte for opcode */ 207 goto end; /* VEX has only 1 byte for opcode */
208 } 208 }
@@ -249,6 +249,8 @@ void insn_get_modrm(struct insn *insn)
249 pfx = insn_last_prefix(insn); 249 pfx = insn_last_prefix(insn);
250 insn->attr = inat_get_group_attribute(mod, pfx, 250 insn->attr = inat_get_group_attribute(mod, pfx,
251 insn->attr); 251 insn->attr);
252 if (insn_is_avx(insn) && !inat_accept_vex(insn->attr))
253 insn->attr = 0; /* This is bad */
252 } 254 }
253 } 255 }
254 256
diff --git a/arch/x86/lib/string_32.c b/arch/x86/lib/string_32.c
index 82004d2bf05e..bd59090825db 100644
--- a/arch/x86/lib/string_32.c
+++ b/arch/x86/lib/string_32.c
@@ -164,15 +164,13 @@ EXPORT_SYMBOL(strchr);
164size_t strlen(const char *s) 164size_t strlen(const char *s)
165{ 165{
166 int d0; 166 int d0;
167 int res; 167 size_t res;
168 asm volatile("repne\n\t" 168 asm volatile("repne\n\t"
169 "scasb\n\t" 169 "scasb"
170 "notl %0\n\t"
171 "decl %0"
172 : "=c" (res), "=&D" (d0) 170 : "=c" (res), "=&D" (d0)
173 : "1" (s), "a" (0), "0" (0xffffffffu) 171 : "1" (s), "a" (0), "0" (0xffffffffu)
174 : "memory"); 172 : "memory");
175 return res; 173 return ~res - 1;
176} 174}
177EXPORT_SYMBOL(strlen); 175EXPORT_SYMBOL(strlen);
178#endif 176#endif
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index a793da5e560e..5b83c51c12e0 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -1,5 +1,11 @@
1# x86 Opcode Maps 1# x86 Opcode Maps
2# 2#
3# This is (mostly) based on following documentations.
4# - Intel(R) 64 and IA-32 Architectures Software Developer's Manual Vol.2
5# (#325383-040US, October 2011)
6# - Intel(R) Advanced Vector Extensions Programming Reference
7# (#319433-011,JUNE 2011).
8#
3#<Opcode maps> 9#<Opcode maps>
4# Table: table-name 10# Table: table-name
5# Referrer: escaped-name 11# Referrer: escaped-name
@@ -15,10 +21,13 @@
15# EndTable 21# EndTable
16# 22#
17# AVX Superscripts 23# AVX Superscripts
18# (VEX): this opcode can accept VEX prefix. 24# (v): this opcode requires VEX prefix.
19# (oVEX): this opcode requires VEX prefix. 25# (v1): this opcode only supports 128bit VEX.
20# (o128): this opcode only supports 128bit VEX. 26#
21# (o256): this opcode only supports 256bit VEX. 27# Last Prefix Superscripts
28# - (66): the last prefix is 0x66
29# - (F3): the last prefix is 0xF3
30# - (F2): the last prefix is 0xF2
22# 31#
23 32
24Table: one byte opcode 33Table: one byte opcode
@@ -199,8 +208,8 @@ a0: MOV AL,Ob
199a1: MOV rAX,Ov 208a1: MOV rAX,Ov
200a2: MOV Ob,AL 209a2: MOV Ob,AL
201a3: MOV Ov,rAX 210a3: MOV Ov,rAX
202a4: MOVS/B Xb,Yb 211a4: MOVS/B Yb,Xb
203a5: MOVS/W/D/Q Xv,Yv 212a5: MOVS/W/D/Q Yv,Xv
204a6: CMPS/B Xb,Yb 213a6: CMPS/B Xb,Yb
205a7: CMPS/W/D Xv,Yv 214a7: CMPS/W/D Xv,Yv
206a8: TEST AL,Ib 215a8: TEST AL,Ib
@@ -233,8 +242,8 @@ c0: Grp2 Eb,Ib (1A)
233c1: Grp2 Ev,Ib (1A) 242c1: Grp2 Ev,Ib (1A)
234c2: RETN Iw (f64) 243c2: RETN Iw (f64)
235c3: RETN 244c3: RETN
236c4: LES Gz,Mp (i64) | 3bytes-VEX (Prefix) 245c4: LES Gz,Mp (i64) | VEX+2byte (Prefix)
237c5: LDS Gz,Mp (i64) | 2bytes-VEX (Prefix) 246c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix)
238c6: Grp11 Eb,Ib (1A) 247c6: Grp11 Eb,Ib (1A)
239c7: Grp11 Ev,Iz (1A) 248c7: Grp11 Ev,Iz (1A)
240c8: ENTER Iw,Ib 249c8: ENTER Iw,Ib
@@ -320,14 +329,19 @@ AVXcode: 1
320# 3DNow! uses the last imm byte as opcode extension. 329# 3DNow! uses the last imm byte as opcode extension.
3210f: 3DNow! Pq,Qq,Ib 3300f: 3DNow! Pq,Qq,Ib
322# 0x0f 0x10-0x1f 331# 0x0f 0x10-0x1f
32310: movups Vps,Wps (VEX) | movss Vss,Wss (F3),(VEX),(o128) | movupd Vpd,Wpd (66),(VEX) | movsd Vsd,Wsd (F2),(VEX),(o128) 332# NOTE: According to Intel SDM opcode map, vmovups and vmovupd has no operands
32411: movups Wps,Vps (VEX) | movss Wss,Vss (F3),(VEX),(o128) | movupd Wpd,Vpd (66),(VEX) | movsd Wsd,Vsd (F2),(VEX),(o128) 333# but it actually has operands. And also, vmovss and vmovsd only accept 128bit.
32512: movlps Vq,Mq (VEX),(o128) | movlpd Vq,Mq (66),(VEX),(o128) | movhlps Vq,Uq (VEX),(o128) | movddup Vq,Wq (F2),(VEX) | movsldup Vq,Wq (F3),(VEX) 334# MOVSS/MOVSD has too many forms(3) on SDM. This map just shows a typical form.
32613: mpvlps Mq,Vq (VEX),(o128) | movlpd Mq,Vq (66),(VEX),(o128) 335# Many AVX instructions lack v1 superscript, according to Intel AVX-Prgramming
32714: unpcklps Vps,Wq (VEX) | unpcklpd Vpd,Wq (66),(VEX) 336# Reference A.1
32815: unpckhps Vps,Wq (VEX) | unpckhpd Vpd,Wq (66),(VEX) 33710: vmovups Vps,Wps | vmovupd Vpd,Wpd (66) | vmovss Vx,Hx,Wss (F3),(v1) | vmovsd Vx,Hx,Wsd (F2),(v1)
32916: movhps Vq,Mq (VEX),(o128) | movhpd Vq,Mq (66),(VEX),(o128) | movlsps Vq,Uq (VEX),(o128) | movshdup Vq,Wq (F3),(VEX) 33811: vmovups Wps,Vps | vmovupd Wpd,Vpd (66) | vmovss Wss,Hx,Vss (F3),(v1) | vmovsd Wsd,Hx,Vsd (F2),(v1)
33017: movhps Mq,Vq (VEX),(o128) | movhpd Mq,Vq (66),(VEX),(o128) 33912: vmovlps Vq,Hq,Mq (v1) | vmovhlps Vq,Hq,Uq (v1) | vmovlpd Vq,Hq,Mq (66),(v1) | vmovsldup Vx,Wx (F3) | vmovddup Vx,Wx (F2)
34013: vmovlps Mq,Vq (v1) | vmovlpd Mq,Vq (66),(v1)
34114: vunpcklps Vx,Hx,Wx | vunpcklpd Vx,Hx,Wx (66)
34215: vunpckhps Vx,Hx,Wx | vunpckhpd Vx,Hx,Wx (66)
34316: vmovhps Vdq,Hq,Mq (v1) | vmovlhps Vdq,Hq,Uq (v1) | vmovhpd Vdq,Hq,Mq (66),(v1) | vmovshdup Vx,Wx (F3)
34417: vmovhps Mq,Vq (v1) | vmovhpd Mq,Vq (66),(v1)
33118: Grp16 (1A) 34518: Grp16 (1A)
33219: 34619:
3331a: 3471a:
@@ -345,14 +359,14 @@ AVXcode: 1
34525: 35925:
34626: 36026:
34727: 36127:
34828: movaps Vps,Wps (VEX) | movapd Vpd,Wpd (66),(VEX) 36228: vmovaps Vps,Wps | vmovapd Vpd,Wpd (66)
34929: movaps Wps,Vps (VEX) | movapd Wpd,Vpd (66),(VEX) 36329: vmovaps Wps,Vps | vmovapd Wpd,Vpd (66)
3502a: cvtpi2ps Vps,Qpi | cvtsi2ss Vss,Ed/q (F3),(VEX),(o128) | cvtpi2pd Vpd,Qpi (66) | cvtsi2sd Vsd,Ed/q (F2),(VEX),(o128) 3642a: cvtpi2ps Vps,Qpi | cvtpi2pd Vpd,Qpi (66) | vcvtsi2ss Vss,Hss,Ey (F3),(v1) | vcvtsi2sd Vsd,Hsd,Ey (F2),(v1)
3512b: movntps Mps,Vps (VEX) | movntpd Mpd,Vpd (66),(VEX) 3652b: vmovntps Mps,Vps | vmovntpd Mpd,Vpd (66)
3522c: cvttps2pi Ppi,Wps | cvttss2si Gd/q,Wss (F3),(VEX),(o128) | cvttpd2pi Ppi,Wpd (66) | cvttsd2si Gd/q,Wsd (F2),(VEX),(o128) 3662c: cvttps2pi Ppi,Wps | cvttpd2pi Ppi,Wpd (66) | vcvttss2si Gy,Wss (F3),(v1) | vcvttsd2si Gy,Wsd (F2),(v1)
3532d: cvtps2pi Ppi,Wps | cvtss2si Gd/q,Wss (F3),(VEX),(o128) | cvtpd2pi Qpi,Wpd (66) | cvtsd2si Gd/q,Wsd (F2),(VEX),(o128) 3672d: cvtps2pi Ppi,Wps | cvtpd2pi Qpi,Wpd (66) | vcvtss2si Gy,Wss (F3),(v1) | vcvtsd2si Gy,Wsd (F2),(v1)
3542e: ucomiss Vss,Wss (VEX),(o128) | ucomisd Vsd,Wsd (66),(VEX),(o128) 3682e: vucomiss Vss,Wss (v1) | vucomisd Vsd,Wsd (66),(v1)
3552f: comiss Vss,Wss (VEX),(o128) | comisd Vsd,Wsd (66),(VEX),(o128) 3692f: vcomiss Vss,Wss (v1) | vcomisd Vsd,Wsd (66),(v1)
356# 0x0f 0x30-0x3f 370# 0x0f 0x30-0x3f
35730: WRMSR 37130: WRMSR
35831: RDTSC 37231: RDTSC
@@ -388,65 +402,66 @@ AVXcode: 1
3884e: CMOVLE/NG Gv,Ev 4024e: CMOVLE/NG Gv,Ev
3894f: CMOVNLE/G Gv,Ev 4034f: CMOVNLE/G Gv,Ev
390# 0x0f 0x50-0x5f 404# 0x0f 0x50-0x5f
39150: movmskps Gd/q,Ups (VEX) | movmskpd Gd/q,Upd (66),(VEX) 40550: vmovmskps Gy,Ups | vmovmskpd Gy,Upd (66)
39251: sqrtps Vps,Wps (VEX) | sqrtss Vss,Wss (F3),(VEX),(o128) | sqrtpd Vpd,Wpd (66),(VEX) | sqrtsd Vsd,Wsd (F2),(VEX),(o128) 40651: vsqrtps Vps,Wps | vsqrtpd Vpd,Wpd (66) | vsqrtss Vss,Hss,Wss (F3),(v1) | vsqrtsd Vsd,Hsd,Wsd (F2),(v1)
39352: rsqrtps Vps,Wps (VEX) | rsqrtss Vss,Wss (F3),(VEX),(o128) 40752: vrsqrtps Vps,Wps | vrsqrtss Vss,Hss,Wss (F3),(v1)
39453: rcpps Vps,Wps (VEX) | rcpss Vss,Wss (F3),(VEX),(o128) 40853: vrcpps Vps,Wps | vrcpss Vss,Hss,Wss (F3),(v1)
39554: andps Vps,Wps (VEX) | andpd Vpd,Wpd (66),(VEX) 40954: vandps Vps,Hps,Wps | vandpd Vpd,Hpd,Wpd (66)
39655: andnps Vps,Wps (VEX) | andnpd Vpd,Wpd (66),(VEX) 41055: vandnps Vps,Hps,Wps | vandnpd Vpd,Hpd,Wpd (66)
39756: orps Vps,Wps (VEX) | orpd Vpd,Wpd (66),(VEX) 41156: vorps Vps,Hps,Wps | vorpd Vpd,Hpd,Wpd (66)
39857: xorps Vps,Wps (VEX) | xorpd Vpd,Wpd (66),(VEX) 41257: vxorps Vps,Hps,Wps | vxorpd Vpd,Hpd,Wpd (66)
39958: addps Vps,Wps (VEX) | addss Vss,Wss (F3),(VEX),(o128) | addpd Vpd,Wpd (66),(VEX) | addsd Vsd,Wsd (F2),(VEX),(o128) 41358: vaddps Vps,Hps,Wps | vaddpd Vpd,Hpd,Wpd (66) | vaddss Vss,Hss,Wss (F3),(v1) | vaddsd Vsd,Hsd,Wsd (F2),(v1)
40059: mulps Vps,Wps (VEX) | mulss Vss,Wss (F3),(VEX),(o128) | mulpd Vpd,Wpd (66),(VEX) | mulsd Vsd,Wsd (F2),(VEX),(o128) 41459: vmulps Vps,Hps,Wps | vmulpd Vpd,Hpd,Wpd (66) | vmulss Vss,Hss,Wss (F3),(v1) | vmulsd Vsd,Hsd,Wsd (F2),(v1)
4015a: cvtps2pd Vpd,Wps (VEX) | cvtss2sd Vsd,Wss (F3),(VEX),(o128) | cvtpd2ps Vps,Wpd (66),(VEX) | cvtsd2ss Vsd,Wsd (F2),(VEX),(o128) 4155a: vcvtps2pd Vpd,Wps | vcvtpd2ps Vps,Wpd (66) | vcvtss2sd Vsd,Hx,Wss (F3),(v1) | vcvtsd2ss Vss,Hx,Wsd (F2),(v1)
4025b: cvtdq2ps Vps,Wdq (VEX) | cvtps2dq Vdq,Wps (66),(VEX) | cvttps2dq Vdq,Wps (F3),(VEX) 4165b: vcvtdq2ps Vps,Wdq | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3)
4035c: subps Vps,Wps (VEX) | subss Vss,Wss (F3),(VEX),(o128) | subpd Vpd,Wpd (66),(VEX) | subsd Vsd,Wsd (F2),(VEX),(o128) 4175c: vsubps Vps,Hps,Wps | vsubpd Vpd,Hpd,Wpd (66) | vsubss Vss,Hss,Wss (F3),(v1) | vsubsd Vsd,Hsd,Wsd (F2),(v1)
4045d: minps Vps,Wps (VEX) | minss Vss,Wss (F3),(VEX),(o128) | minpd Vpd,Wpd (66),(VEX) | minsd Vsd,Wsd (F2),(VEX),(o128) 4185d: vminps Vps,Hps,Wps | vminpd Vpd,Hpd,Wpd (66) | vminss Vss,Hss,Wss (F3),(v1) | vminsd Vsd,Hsd,Wsd (F2),(v1)
4055e: divps Vps,Wps (VEX) | divss Vss,Wss (F3),(VEX),(o128) | divpd Vpd,Wpd (66),(VEX) | divsd Vsd,Wsd (F2),(VEX),(o128) 4195e: vdivps Vps,Hps,Wps | vdivpd Vpd,Hpd,Wpd (66) | vdivss Vss,Hss,Wss (F3),(v1) | vdivsd Vsd,Hsd,Wsd (F2),(v1)
4065f: maxps Vps,Wps (VEX) | maxss Vss,Wss (F3),(VEX),(o128) | maxpd Vpd,Wpd (66),(VEX) | maxsd Vsd,Wsd (F2),(VEX),(o128) 4205f: vmaxps Vps,Hps,Wps | vmaxpd Vpd,Hpd,Wpd (66) | vmaxss Vss,Hss,Wss (F3),(v1) | vmaxsd Vsd,Hsd,Wsd (F2),(v1)
407# 0x0f 0x60-0x6f 421# 0x0f 0x60-0x6f
40860: punpcklbw Pq,Qd | punpcklbw Vdq,Wdq (66),(VEX),(o128) 42260: punpcklbw Pq,Qd | vpunpcklbw Vx,Hx,Wx (66),(v1)
40961: punpcklwd Pq,Qd | punpcklwd Vdq,Wdq (66),(VEX),(o128) 42361: punpcklwd Pq,Qd | vpunpcklwd Vx,Hx,Wx (66),(v1)
41062: punpckldq Pq,Qd | punpckldq Vdq,Wdq (66),(VEX),(o128) 42462: punpckldq Pq,Qd | vpunpckldq Vx,Hx,Wx (66),(v1)
41163: packsswb Pq,Qq | packsswb Vdq,Wdq (66),(VEX),(o128) 42563: packsswb Pq,Qq | vpacksswb Vx,Hx,Wx (66),(v1)
41264: pcmpgtb Pq,Qq | pcmpgtb Vdq,Wdq (66),(VEX),(o128) 42664: pcmpgtb Pq,Qq | vpcmpgtb Vx,Hx,Wx (66),(v1)
41365: pcmpgtw Pq,Qq | pcmpgtw Vdq,Wdq (66),(VEX),(o128) 42765: pcmpgtw Pq,Qq | vpcmpgtw Vx,Hx,Wx (66),(v1)
41466: pcmpgtd Pq,Qq | pcmpgtd Vdq,Wdq (66),(VEX),(o128) 42866: pcmpgtd Pq,Qq | vpcmpgtd Vx,Hx,Wx (66),(v1)
41567: packuswb Pq,Qq | packuswb Vdq,Wdq (66),(VEX),(o128) 42967: packuswb Pq,Qq | vpackuswb Vx,Hx,Wx (66),(v1)
41668: punpckhbw Pq,Qd | punpckhbw Vdq,Wdq (66),(VEX),(o128) 43068: punpckhbw Pq,Qd | vpunpckhbw Vx,Hx,Wx (66),(v1)
41769: punpckhwd Pq,Qd | punpckhwd Vdq,Wdq (66),(VEX),(o128) 43169: punpckhwd Pq,Qd | vpunpckhwd Vx,Hx,Wx (66),(v1)
4186a: punpckhdq Pq,Qd | punpckhdq Vdq,Wdq (66),(VEX),(o128) 4326a: punpckhdq Pq,Qd | vpunpckhdq Vx,Hx,Wx (66),(v1)
4196b: packssdw Pq,Qd | packssdw Vdq,Wdq (66),(VEX),(o128) 4336b: packssdw Pq,Qd | vpackssdw Vx,Hx,Wx (66),(v1)
4206c: punpcklqdq Vdq,Wdq (66),(VEX),(o128) 4346c: vpunpcklqdq Vx,Hx,Wx (66),(v1)
4216d: punpckhqdq Vdq,Wdq (66),(VEX),(o128) 4356d: vpunpckhqdq Vx,Hx,Wx (66),(v1)
4226e: movd/q/ Pd,Ed/q | movd/q Vdq,Ed/q (66),(VEX),(o128) 4366e: movd/q Pd,Ey | vmovd/q Vy,Ey (66),(v1)
4236f: movq Pq,Qq | movdqa Vdq,Wdq (66),(VEX) | movdqu Vdq,Wdq (F3),(VEX) 4376f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqu Vx,Wx (F3)
424# 0x0f 0x70-0x7f 438# 0x0f 0x70-0x7f
42570: pshufw Pq,Qq,Ib | pshufd Vdq,Wdq,Ib (66),(VEX),(o128) | pshufhw Vdq,Wdq,Ib (F3),(VEX),(o128) | pshuflw VdqWdq,Ib (F2),(VEX),(o128) 43970: pshufw Pq,Qq,Ib | vpshufd Vx,Wx,Ib (66),(v1) | vpshufhw Vx,Wx,Ib (F3),(v1) | vpshuflw Vx,Wx,Ib (F2),(v1)
42671: Grp12 (1A) 44071: Grp12 (1A)
42772: Grp13 (1A) 44172: Grp13 (1A)
42873: Grp14 (1A) 44273: Grp14 (1A)
42974: pcmpeqb Pq,Qq | pcmpeqb Vdq,Wdq (66),(VEX),(o128) 44374: pcmpeqb Pq,Qq | vpcmpeqb Vx,Hx,Wx (66),(v1)
43075: pcmpeqw Pq,Qq | pcmpeqw Vdq,Wdq (66),(VEX),(o128) 44475: pcmpeqw Pq,Qq | vpcmpeqw Vx,Hx,Wx (66),(v1)
43176: pcmpeqd Pq,Qq | pcmpeqd Vdq,Wdq (66),(VEX),(o128) 44576: pcmpeqd Pq,Qq | vpcmpeqd Vx,Hx,Wx (66),(v1)
43277: emms/vzeroupper/vzeroall (VEX) 446# Note: Remove (v), because vzeroall and vzeroupper becomes emms without VEX.
43378: VMREAD Ed/q,Gd/q 44777: emms | vzeroupper | vzeroall
43479: VMWRITE Gd/q,Ed/q 44878: VMREAD Ey,Gy
44979: VMWRITE Gy,Ey
4357a: 4507a:
4367b: 4517b:
4377c: haddps Vps,Wps (F2),(VEX) | haddpd Vpd,Wpd (66),(VEX) 4527c: vhaddpd Vpd,Hpd,Wpd (66) | vhaddps Vps,Hps,Wps (F2)
4387d: hsubps Vps,Wps (F2),(VEX) | hsubpd Vpd,Wpd (66),(VEX) 4537d: vhsubpd Vpd,Hpd,Wpd (66) | vhsubps Vps,Hps,Wps (F2)
4397e: movd/q Ed/q,Pd | movd/q Ed/q,Vdq (66),(VEX),(o128) | movq Vq,Wq (F3),(VEX),(o128) 4547e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1)
4407f: movq Qq,Pq | movdqa Wdq,Vdq (66),(VEX) | movdqu Wdq,Vdq (F3),(VEX) 4557f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3)
441# 0x0f 0x80-0x8f 456# 0x0f 0x80-0x8f
44280: JO Jz (f64) 45780: JO Jz (f64)
44381: JNO Jz (f64) 45881: JNO Jz (f64)
44482: JB/JNAE/JC Jz (f64) 45982: JB/JC/JNAE Jz (f64)
44583: JNB/JAE/JNC Jz (f64) 46083: JAE/JNB/JNC Jz (f64)
44684: JZ/JE Jz (f64) 46184: JE/JZ Jz (f64)
44785: JNZ/JNE Jz (f64) 46285: JNE/JNZ Jz (f64)
44886: JBE/JNA Jz (f64) 46386: JBE/JNA Jz (f64)
44987: JNBE/JA Jz (f64) 46487: JA/JNBE Jz (f64)
45088: JS Jz (f64) 46588: JS Jz (f64)
45189: JNS Jz (f64) 46689: JNS Jz (f64)
4528a: JP/JPE Jz (f64) 4678a: JP/JPE Jz (f64)
@@ -502,18 +517,18 @@ b8: JMPE | POPCNT Gv,Ev (F3)
502b9: Grp10 (1A) 517b9: Grp10 (1A)
503ba: Grp8 Ev,Ib (1A) 518ba: Grp8 Ev,Ib (1A)
504bb: BTC Ev,Gv 519bb: BTC Ev,Gv
505bc: BSF Gv,Ev 520bc: BSF Gv,Ev | TZCNT Gv,Ev (F3)
506bd: BSR Gv,Ev 521bd: BSR Gv,Ev | LZCNT Gv,Ev (F3)
507be: MOVSX Gv,Eb 522be: MOVSX Gv,Eb
508bf: MOVSX Gv,Ew 523bf: MOVSX Gv,Ew
509# 0x0f 0xc0-0xcf 524# 0x0f 0xc0-0xcf
510c0: XADD Eb,Gb 525c0: XADD Eb,Gb
511c1: XADD Ev,Gv 526c1: XADD Ev,Gv
512c2: cmpps Vps,Wps,Ib (VEX) | cmpss Vss,Wss,Ib (F3),(VEX),(o128) | cmppd Vpd,Wpd,Ib (66),(VEX) | cmpsd Vsd,Wsd,Ib (F2),(VEX) 527c2: vcmpps Vps,Hps,Wps,Ib | vcmppd Vpd,Hpd,Wpd,Ib (66) | vcmpss Vss,Hss,Wss,Ib (F3),(v1) | vcmpsd Vsd,Hsd,Wsd,Ib (F2),(v1)
513c3: movnti Md/q,Gd/q 528c3: movnti My,Gy
514c4: pinsrw Pq,Rd/q/Mw,Ib | pinsrw Vdq,Rd/q/Mw,Ib (66),(VEX),(o128) 529c4: pinsrw Pq,Ry/Mw,Ib | vpinsrw Vdq,Hdq,Ry/Mw,Ib (66),(v1)
515c5: pextrw Gd,Nq,Ib | pextrw Gd,Udq,Ib (66),(VEX),(o128) 530c5: pextrw Gd,Nq,Ib | vpextrw Gd,Udq,Ib (66),(v1)
516c6: shufps Vps,Wps,Ib (VEX) | shufpd Vpd,Wpd,Ib (66),(VEX) 531c6: vshufps Vps,Hps,Wps,Ib | vshufpd Vpd,Hpd,Wpd,Ib (66)
517c7: Grp9 (1A) 532c7: Grp9 (1A)
518c8: BSWAP RAX/EAX/R8/R8D 533c8: BSWAP RAX/EAX/R8/R8D
519c9: BSWAP RCX/ECX/R9/R9D 534c9: BSWAP RCX/ECX/R9/R9D
@@ -524,55 +539,55 @@ cd: BSWAP RBP/EBP/R13/R13D
524ce: BSWAP RSI/ESI/R14/R14D 539ce: BSWAP RSI/ESI/R14/R14D
525cf: BSWAP RDI/EDI/R15/R15D 540cf: BSWAP RDI/EDI/R15/R15D
526# 0x0f 0xd0-0xdf 541# 0x0f 0xd0-0xdf
527d0: addsubps Vps,Wps (F2),(VEX) | addsubpd Vpd,Wpd (66),(VEX) 542d0: vaddsubpd Vpd,Hpd,Wpd (66) | vaddsubps Vps,Hps,Wps (F2)
528d1: psrlw Pq,Qq | psrlw Vdq,Wdq (66),(VEX),(o128) 543d1: psrlw Pq,Qq | vpsrlw Vx,Hx,Wx (66),(v1)
529d2: psrld Pq,Qq | psrld Vdq,Wdq (66),(VEX),(o128) 544d2: psrld Pq,Qq | vpsrld Vx,Hx,Wx (66),(v1)
530d3: psrlq Pq,Qq | psrlq Vdq,Wdq (66),(VEX),(o128) 545d3: psrlq Pq,Qq | vpsrlq Vx,Hx,Wx (66),(v1)
531d4: paddq Pq,Qq | paddq Vdq,Wdq (66),(VEX),(o128) 546d4: paddq Pq,Qq | vpaddq Vx,Hx,Wx (66),(v1)
532d5: pmullw Pq,Qq | pmullw Vdq,Wdq (66),(VEX),(o128) 547d5: pmullw Pq,Qq | vpmullw Vx,Hx,Wx (66),(v1)
533d6: movq Wq,Vq (66),(VEX),(o128) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2) 548d6: vmovq Wq,Vq (66),(v1) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2)
534d7: pmovmskb Gd,Nq | pmovmskb Gd,Udq (66),(VEX),(o128) 549d7: pmovmskb Gd,Nq | vpmovmskb Gd,Ux (66),(v1)
535d8: psubusb Pq,Qq | psubusb Vdq,Wdq (66),(VEX),(o128) 550d8: psubusb Pq,Qq | vpsubusb Vx,Hx,Wx (66),(v1)
536d9: psubusw Pq,Qq | psubusw Vdq,Wdq (66),(VEX),(o128) 551d9: psubusw Pq,Qq | vpsubusw Vx,Hx,Wx (66),(v1)
537da: pminub Pq,Qq | pminub Vdq,Wdq (66),(VEX),(o128) 552da: pminub Pq,Qq | vpminub Vx,Hx,Wx (66),(v1)
538db: pand Pq,Qq | pand Vdq,Wdq (66),(VEX),(o128) 553db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1)
539dc: paddusb Pq,Qq | paddusb Vdq,Wdq (66),(VEX),(o128) 554dc: paddusb Pq,Qq | vpaddusb Vx,Hx,Wx (66),(v1)
540dd: paddusw Pq,Qq | paddusw Vdq,Wdq (66),(VEX),(o128) 555dd: paddusw Pq,Qq | vpaddusw Vx,Hx,Wx (66),(v1)
541de: pmaxub Pq,Qq | pmaxub Vdq,Wdq (66),(VEX),(o128) 556de: pmaxub Pq,Qq | vpmaxub Vx,Hx,Wx (66),(v1)
542df: pandn Pq,Qq | pandn Vdq,Wdq (66),(VEX),(o128) 557df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1)
543# 0x0f 0xe0-0xef 558# 0x0f 0xe0-0xef
544e0: pavgb Pq,Qq | pavgb Vdq,Wdq (66),(VEX),(o128) 559e0: pavgb Pq,Qq | vpavgb Vx,Hx,Wx (66),(v1)
545e1: psraw Pq,Qq | psraw Vdq,Wdq (66),(VEX),(o128) 560e1: psraw Pq,Qq | vpsraw Vx,Hx,Wx (66),(v1)
546e2: psrad Pq,Qq | psrad Vdq,Wdq (66),(VEX),(o128) 561e2: psrad Pq,Qq | vpsrad Vx,Hx,Wx (66),(v1)
547e3: pavgw Pq,Qq | pavgw Vdq,Wdq (66),(VEX),(o128) 562e3: pavgw Pq,Qq | vpavgw Vx,Hx,Wx (66),(v1)
548e4: pmulhuw Pq,Qq | pmulhuw Vdq,Wdq (66),(VEX),(o128) 563e4: pmulhuw Pq,Qq | vpmulhuw Vx,Hx,Wx (66),(v1)
549e5: pmulhw Pq,Qq | pmulhw Vdq,Wdq (66),(VEX),(o128) 564e5: pmulhw Pq,Qq | vpmulhw Vx,Hx,Wx (66),(v1)
550e6: cvtpd2dq Vdq,Wpd (F2),(VEX) | cvttpd2dq Vdq,Wpd (66),(VEX) | cvtdq2pd Vpd,Wdq (F3),(VEX) 565e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtpd2dq Vx,Wpd (F2)
551e7: movntq Mq,Pq | movntdq Mdq,Vdq (66),(VEX) 566e7: movntq Mq,Pq | vmovntdq Mx,Vx (66)
552e8: psubsb Pq,Qq | psubsb Vdq,Wdq (66),(VEX),(o128) 567e8: psubsb Pq,Qq | vpsubsb Vx,Hx,Wx (66),(v1)
553e9: psubsw Pq,Qq | psubsw Vdq,Wdq (66),(VEX),(o128) 568e9: psubsw Pq,Qq | vpsubsw Vx,Hx,Wx (66),(v1)
554ea: pminsw Pq,Qq | pminsw Vdq,Wdq (66),(VEX),(o128) 569ea: pminsw Pq,Qq | vpminsw Vx,Hx,Wx (66),(v1)
555eb: por Pq,Qq | por Vdq,Wdq (66),(VEX),(o128) 570eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1)
556ec: paddsb Pq,Qq | paddsb Vdq,Wdq (66),(VEX),(o128) 571ec: paddsb Pq,Qq | vpaddsb Vx,Hx,Wx (66),(v1)
557ed: paddsw Pq,Qq | paddsw Vdq,Wdq (66),(VEX),(o128) 572ed: paddsw Pq,Qq | vpaddsw Vx,Hx,Wx (66),(v1)
558ee: pmaxsw Pq,Qq | pmaxsw Vdq,Wdq (66),(VEX),(o128) 573ee: pmaxsw Pq,Qq | vpmaxsw Vx,Hx,Wx (66),(v1)
559ef: pxor Pq,Qq | pxor Vdq,Wdq (66),(VEX),(o128) 574ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1)
560# 0x0f 0xf0-0xff 575# 0x0f 0xf0-0xff
561f0: lddqu Vdq,Mdq (F2),(VEX) 576f0: vlddqu Vx,Mx (F2)
562f1: psllw Pq,Qq | psllw Vdq,Wdq (66),(VEX),(o128) 577f1: psllw Pq,Qq | vpsllw Vx,Hx,Wx (66),(v1)
563f2: pslld Pq,Qq | pslld Vdq,Wdq (66),(VEX),(o128) 578f2: pslld Pq,Qq | vpslld Vx,Hx,Wx (66),(v1)
564f3: psllq Pq,Qq | psllq Vdq,Wdq (66),(VEX),(o128) 579f3: psllq Pq,Qq | vpsllq Vx,Hx,Wx (66),(v1)
565f4: pmuludq Pq,Qq | pmuludq Vdq,Wdq (66),(VEX),(o128) 580f4: pmuludq Pq,Qq | vpmuludq Vx,Hx,Wx (66),(v1)
566f5: pmaddwd Pq,Qq | pmaddwd Vdq,Wdq (66),(VEX),(o128) 581f5: pmaddwd Pq,Qq | vpmaddwd Vx,Hx,Wx (66),(v1)
567f6: psadbw Pq,Qq | psadbw Vdq,Wdq (66),(VEX),(o128) 582f6: psadbw Pq,Qq | vpsadbw Vx,Hx,Wx (66),(v1)
568f7: maskmovq Pq,Nq | maskmovdqu Vdq,Udq (66),(VEX),(o128) 583f7: maskmovq Pq,Nq | vmaskmovdqu Vx,Ux (66),(v1)
569f8: psubb Pq,Qq | psubb Vdq,Wdq (66),(VEX),(o128) 584f8: psubb Pq,Qq | vpsubb Vx,Hx,Wx (66),(v1)
570f9: psubw Pq,Qq | psubw Vdq,Wdq (66),(VEX),(o128) 585f9: psubw Pq,Qq | vpsubw Vx,Hx,Wx (66),(v1)
571fa: psubd Pq,Qq | psubd Vdq,Wdq (66),(VEX),(o128) 586fa: psubd Pq,Qq | vpsubd Vx,Hx,Wx (66),(v1)
572fb: psubq Pq,Qq | psubq Vdq,Wdq (66),(VEX),(o128) 587fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1)
573fc: paddb Pq,Qq | paddb Vdq,Wdq (66),(VEX),(o128) 588fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1)
574fd: paddw Pq,Qq | paddw Vdq,Wdq (66),(VEX),(o128) 589fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1)
575fe: paddd Pq,Qq | paddd Vdq,Wdq (66),(VEX),(o128) 590fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1)
576ff: 591ff:
577EndTable 592EndTable
578 593
@@ -580,155 +595,193 @@ Table: 3-byte opcode 1 (0x0f 0x38)
580Referrer: 3-byte escape 1 595Referrer: 3-byte escape 1
581AVXcode: 2 596AVXcode: 2
582# 0x0f 0x38 0x00-0x0f 597# 0x0f 0x38 0x00-0x0f
58300: pshufb Pq,Qq | pshufb Vdq,Wdq (66),(VEX),(o128) 59800: pshufb Pq,Qq | vpshufb Vx,Hx,Wx (66),(v1)
58401: phaddw Pq,Qq | phaddw Vdq,Wdq (66),(VEX),(o128) 59901: phaddw Pq,Qq | vphaddw Vx,Hx,Wx (66),(v1)
58502: phaddd Pq,Qq | phaddd Vdq,Wdq (66),(VEX),(o128) 60002: phaddd Pq,Qq | vphaddd Vx,Hx,Wx (66),(v1)
58603: phaddsw Pq,Qq | phaddsw Vdq,Wdq (66),(VEX),(o128) 60103: phaddsw Pq,Qq | vphaddsw Vx,Hx,Wx (66),(v1)
58704: pmaddubsw Pq,Qq | pmaddubsw Vdq,Wdq (66),(VEX),(o128) 60204: pmaddubsw Pq,Qq | vpmaddubsw Vx,Hx,Wx (66),(v1)
58805: phsubw Pq,Qq | phsubw Vdq,Wdq (66),(VEX),(o128) 60305: phsubw Pq,Qq | vphsubw Vx,Hx,Wx (66),(v1)
58906: phsubd Pq,Qq | phsubd Vdq,Wdq (66),(VEX),(o128) 60406: phsubd Pq,Qq | vphsubd Vx,Hx,Wx (66),(v1)
59007: phsubsw Pq,Qq | phsubsw Vdq,Wdq (66),(VEX),(o128) 60507: phsubsw Pq,Qq | vphsubsw Vx,Hx,Wx (66),(v1)
59108: psignb Pq,Qq | psignb Vdq,Wdq (66),(VEX),(o128) 60608: psignb Pq,Qq | vpsignb Vx,Hx,Wx (66),(v1)
59209: psignw Pq,Qq | psignw Vdq,Wdq (66),(VEX),(o128) 60709: psignw Pq,Qq | vpsignw Vx,Hx,Wx (66),(v1)
5930a: psignd Pq,Qq | psignd Vdq,Wdq (66),(VEX),(o128) 6080a: psignd Pq,Qq | vpsignd Vx,Hx,Wx (66),(v1)
5940b: pmulhrsw Pq,Qq | pmulhrsw Vdq,Wdq (66),(VEX),(o128) 6090b: pmulhrsw Pq,Qq | vpmulhrsw Vx,Hx,Wx (66),(v1)
5950c: Vpermilps /r (66),(oVEX) 6100c: vpermilps Vx,Hx,Wx (66),(v)
5960d: Vpermilpd /r (66),(oVEX) 6110d: vpermilpd Vx,Hx,Wx (66),(v)
5970e: vtestps /r (66),(oVEX) 6120e: vtestps Vx,Wx (66),(v)
5980f: vtestpd /r (66),(oVEX) 6130f: vtestpd Vx,Wx (66),(v)
599# 0x0f 0x38 0x10-0x1f 614# 0x0f 0x38 0x10-0x1f
60010: pblendvb Vdq,Wdq (66) 61510: pblendvb Vdq,Wdq (66)
60111: 61611:
60212: 61712:
60313: 61813: vcvtph2ps Vx,Wx,Ib (66),(v)
60414: blendvps Vdq,Wdq (66) 61914: blendvps Vdq,Wdq (66)
60515: blendvpd Vdq,Wdq (66) 62015: blendvpd Vdq,Wdq (66)
60616: 62116: vpermps Vqq,Hqq,Wqq (66),(v)
60717: ptest Vdq,Wdq (66),(VEX) 62217: vptest Vx,Wx (66)
60818: vbroadcastss /r (66),(oVEX) 62318: vbroadcastss Vx,Wd (66),(v)
60919: vbroadcastsd /r (66),(oVEX),(o256) 62419: vbroadcastsd Vqq,Wq (66),(v)
6101a: vbroadcastf128 /r (66),(oVEX),(o256) 6251a: vbroadcastf128 Vqq,Mdq (66),(v)
6111b: 6261b:
6121c: pabsb Pq,Qq | pabsb Vdq,Wdq (66),(VEX),(o128) 6271c: pabsb Pq,Qq | vpabsb Vx,Wx (66),(v1)
6131d: pabsw Pq,Qq | pabsw Vdq,Wdq (66),(VEX),(o128) 6281d: pabsw Pq,Qq | vpabsw Vx,Wx (66),(v1)
6141e: pabsd Pq,Qq | pabsd Vdq,Wdq (66),(VEX),(o128) 6291e: pabsd Pq,Qq | vpabsd Vx,Wx (66),(v1)
6151f: 6301f:
616# 0x0f 0x38 0x20-0x2f 631# 0x0f 0x38 0x20-0x2f
61720: pmovsxbw Vdq,Udq/Mq (66),(VEX),(o128) 63220: vpmovsxbw Vx,Ux/Mq (66),(v1)
61821: pmovsxbd Vdq,Udq/Md (66),(VEX),(o128) 63321: vpmovsxbd Vx,Ux/Md (66),(v1)
61922: pmovsxbq Vdq,Udq/Mw (66),(VEX),(o128) 63422: vpmovsxbq Vx,Ux/Mw (66),(v1)
62023: pmovsxwd Vdq,Udq/Mq (66),(VEX),(o128) 63523: vpmovsxwd Vx,Ux/Mq (66),(v1)
62124: pmovsxwq Vdq,Udq/Md (66),(VEX),(o128) 63624: vpmovsxwq Vx,Ux/Md (66),(v1)
62225: pmovsxdq Vdq,Udq/Mq (66),(VEX),(o128) 63725: vpmovsxdq Vx,Ux/Mq (66),(v1)
62326: 63826:
62427: 63927:
62528: pmuldq Vdq,Wdq (66),(VEX),(o128) 64028: vpmuldq Vx,Hx,Wx (66),(v1)
62629: pcmpeqq Vdq,Wdq (66),(VEX),(o128) 64129: vpcmpeqq Vx,Hx,Wx (66),(v1)
6272a: movntdqa Vdq,Mdq (66),(VEX),(o128) 6422a: vmovntdqa Vx,Mx (66),(v1)
6282b: packusdw Vdq,Wdq (66),(VEX),(o128) 6432b: vpackusdw Vx,Hx,Wx (66),(v1)
6292c: vmaskmovps(ld) /r (66),(oVEX) 6442c: vmaskmovps Vx,Hx,Mx (66),(v)
6302d: vmaskmovpd(ld) /r (66),(oVEX) 6452d: vmaskmovpd Vx,Hx,Mx (66),(v)
6312e: vmaskmovps(st) /r (66),(oVEX) 6462e: vmaskmovps Mx,Hx,Vx (66),(v)
6322f: vmaskmovpd(st) /r (66),(oVEX) 6472f: vmaskmovpd Mx,Hx,Vx (66),(v)
633# 0x0f 0x38 0x30-0x3f 648# 0x0f 0x38 0x30-0x3f
63430: pmovzxbw Vdq,Udq/Mq (66),(VEX),(o128) 64930: vpmovzxbw Vx,Ux/Mq (66),(v1)
63531: pmovzxbd Vdq,Udq/Md (66),(VEX),(o128) 65031: vpmovzxbd Vx,Ux/Md (66),(v1)
63632: pmovzxbq Vdq,Udq/Mw (66),(VEX),(o128) 65132: vpmovzxbq Vx,Ux/Mw (66),(v1)
63733: pmovzxwd Vdq,Udq/Mq (66),(VEX),(o128) 65233: vpmovzxwd Vx,Ux/Mq (66),(v1)
63834: pmovzxwq Vdq,Udq/Md (66),(VEX),(o128) 65334: vpmovzxwq Vx,Ux/Md (66),(v1)
63935: pmovzxdq Vdq,Udq/Mq (66),(VEX),(o128) 65435: vpmovzxdq Vx,Ux/Mq (66),(v1)
64036: 65536: vpermd Vqq,Hqq,Wqq (66),(v)
64137: pcmpgtq Vdq,Wdq (66),(VEX),(o128) 65637: vpcmpgtq Vx,Hx,Wx (66),(v1)
64238: pminsb Vdq,Wdq (66),(VEX),(o128) 65738: vpminsb Vx,Hx,Wx (66),(v1)
64339: pminsd Vdq,Wdq (66),(VEX),(o128) 65839: vpminsd Vx,Hx,Wx (66),(v1)
6443a: pminuw Vdq,Wdq (66),(VEX),(o128) 6593a: vpminuw Vx,Hx,Wx (66),(v1)
6453b: pminud Vdq,Wdq (66),(VEX),(o128) 6603b: vpminud Vx,Hx,Wx (66),(v1)
6463c: pmaxsb Vdq,Wdq (66),(VEX),(o128) 6613c: vpmaxsb Vx,Hx,Wx (66),(v1)
6473d: pmaxsd Vdq,Wdq (66),(VEX),(o128) 6623d: vpmaxsd Vx,Hx,Wx (66),(v1)
6483e: pmaxuw Vdq,Wdq (66),(VEX),(o128) 6633e: vpmaxuw Vx,Hx,Wx (66),(v1)
6493f: pmaxud Vdq,Wdq (66),(VEX),(o128) 6643f: vpmaxud Vx,Hx,Wx (66),(v1)
650# 0x0f 0x38 0x40-0x8f 665# 0x0f 0x38 0x40-0x8f
65140: pmulld Vdq,Wdq (66),(VEX),(o128) 66640: vpmulld Vx,Hx,Wx (66),(v1)
65241: phminposuw Vdq,Wdq (66),(VEX),(o128) 66741: vphminposuw Vdq,Wdq (66),(v1)
65380: INVEPT Gd/q,Mdq (66) 66842:
65481: INVPID Gd/q,Mdq (66) 66943:
67044:
67145: vpsrlvd/q Vx,Hx,Wx (66),(v)
67246: vpsravd Vx,Hx,Wx (66),(v)
67347: vpsllvd/q Vx,Hx,Wx (66),(v)
674# Skip 0x48-0x57
67558: vpbroadcastd Vx,Wx (66),(v)
67659: vpbroadcastq Vx,Wx (66),(v)
6775a: vbroadcasti128 Vqq,Mdq (66),(v)
678# Skip 0x5b-0x77
67978: vpbroadcastb Vx,Wx (66),(v)
68079: vpbroadcastw Vx,Wx (66),(v)
681# Skip 0x7a-0x7f
68280: INVEPT Gy,Mdq (66)
68381: INVPID Gy,Mdq (66)
68482: INVPCID Gy,Mdq (66)
6858c: vpmaskmovd/q Vx,Hx,Mx (66),(v)
6868e: vpmaskmovd/q Mx,Vx,Hx (66),(v)
655# 0x0f 0x38 0x90-0xbf (FMA) 687# 0x0f 0x38 0x90-0xbf (FMA)
65696: vfmaddsub132pd/ps /r (66),(VEX) 68890: vgatherdd/q Vx,Hx,Wx (66),(v)
65797: vfmsubadd132pd/ps /r (66),(VEX) 68991: vgatherqd/q Vx,Hx,Wx (66),(v)
65898: vfmadd132pd/ps /r (66),(VEX) 69092: vgatherdps/d Vx,Hx,Wx (66),(v)
65999: vfmadd132sd/ss /r (66),(VEX),(o128) 69193: vgatherqps/d Vx,Hx,Wx (66),(v)
6609a: vfmsub132pd/ps /r (66),(VEX) 69294:
6619b: vfmsub132sd/ss /r (66),(VEX),(o128) 69395:
6629c: vfnmadd132pd/ps /r (66),(VEX) 69496: vfmaddsub132ps/d Vx,Hx,Wx (66),(v)
6639d: vfnmadd132sd/ss /r (66),(VEX),(o128) 69597: vfmsubadd132ps/d Vx,Hx,Wx (66),(v)
6649e: vfnmsub132pd/ps /r (66),(VEX) 69698: vfmadd132ps/d Vx,Hx,Wx (66),(v)
6659f: vfnmsub132sd/ss /r (66),(VEX),(o128) 69799: vfmadd132ss/d Vx,Hx,Wx (66),(v),(v1)
666a6: vfmaddsub213pd/ps /r (66),(VEX) 6989a: vfmsub132ps/d Vx,Hx,Wx (66),(v)
667a7: vfmsubadd213pd/ps /r (66),(VEX) 6999b: vfmsub132ss/d Vx,Hx,Wx (66),(v),(v1)
668a8: vfmadd213pd/ps /r (66),(VEX) 7009c: vfnmadd132ps/d Vx,Hx,Wx (66),(v)
669a9: vfmadd213sd/ss /r (66),(VEX),(o128) 7019d: vfnmadd132ss/d Vx,Hx,Wx (66),(v),(v1)
670aa: vfmsub213pd/ps /r (66),(VEX) 7029e: vfnmsub132ps/d Vx,Hx,Wx (66),(v)
671ab: vfmsub213sd/ss /r (66),(VEX),(o128) 7039f: vfnmsub132ss/d Vx,Hx,Wx (66),(v),(v1)
672ac: vfnmadd213pd/ps /r (66),(VEX) 704a6: vfmaddsub213ps/d Vx,Hx,Wx (66),(v)
673ad: vfnmadd213sd/ss /r (66),(VEX),(o128) 705a7: vfmsubadd213ps/d Vx,Hx,Wx (66),(v)
674ae: vfnmsub213pd/ps /r (66),(VEX) 706a8: vfmadd213ps/d Vx,Hx,Wx (66),(v)
675af: vfnmsub213sd/ss /r (66),(VEX),(o128) 707a9: vfmadd213ss/d Vx,Hx,Wx (66),(v),(v1)
676b6: vfmaddsub231pd/ps /r (66),(VEX) 708aa: vfmsub213ps/d Vx,Hx,Wx (66),(v)
677b7: vfmsubadd231pd/ps /r (66),(VEX) 709ab: vfmsub213ss/d Vx,Hx,Wx (66),(v),(v1)
678b8: vfmadd231pd/ps /r (66),(VEX) 710ac: vfnmadd213ps/d Vx,Hx,Wx (66),(v)
679b9: vfmadd231sd/ss /r (66),(VEX),(o128) 711ad: vfnmadd213ss/d Vx,Hx,Wx (66),(v),(v1)
680ba: vfmsub231pd/ps /r (66),(VEX) 712ae: vfnmsub213ps/d Vx,Hx,Wx (66),(v)
681bb: vfmsub231sd/ss /r (66),(VEX),(o128) 713af: vfnmsub213ss/d Vx,Hx,Wx (66),(v),(v1)
682bc: vfnmadd231pd/ps /r (66),(VEX) 714b6: vfmaddsub231ps/d Vx,Hx,Wx (66),(v)
683bd: vfnmadd231sd/ss /r (66),(VEX),(o128) 715b7: vfmsubadd231ps/d Vx,Hx,Wx (66),(v)
684be: vfnmsub231pd/ps /r (66),(VEX) 716b8: vfmadd231ps/d Vx,Hx,Wx (66),(v)
685bf: vfnmsub231sd/ss /r (66),(VEX),(o128) 717b9: vfmadd231ss/d Vx,Hx,Wx (66),(v),(v1)
718ba: vfmsub231ps/d Vx,Hx,Wx (66),(v)
719bb: vfmsub231ss/d Vx,Hx,Wx (66),(v),(v1)
720bc: vfnmadd231ps/d Vx,Hx,Wx (66),(v)
721bd: vfnmadd231ss/d Vx,Hx,Wx (66),(v),(v1)
722be: vfnmsub231ps/d Vx,Hx,Wx (66),(v)
723bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1)
686# 0x0f 0x38 0xc0-0xff 724# 0x0f 0x38 0xc0-0xff
687db: aesimc Vdq,Wdq (66),(VEX),(o128) 725db: VAESIMC Vdq,Wdq (66),(v1)
688dc: aesenc Vdq,Wdq (66),(VEX),(o128) 726dc: VAESENC Vdq,Hdq,Wdq (66),(v1)
689dd: aesenclast Vdq,Wdq (66),(VEX),(o128) 727dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1)
690de: aesdec Vdq,Wdq (66),(VEX),(o128) 728de: VAESDEC Vdq,Hdq,Wdq (66),(v1)
691df: aesdeclast Vdq,Wdq (66),(VEX),(o128) 729df: VAESDECLAST Vdq,Hdq,Wdq (66),(v1)
692f0: MOVBE Gv,Mv | CRC32 Gd,Eb (F2) 730f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2)
693f1: MOVBE Mv,Gv | CRC32 Gd,Ev (F2) 731f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2)
732f3: ANDN Gy,By,Ey (v)
733f4: Grp17 (1A)
734f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v)
735f6: MULX By,Gy,rDX,Ey (F2),(v)
736f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v)
694EndTable 737EndTable
695 738
696Table: 3-byte opcode 2 (0x0f 0x3a) 739Table: 3-byte opcode 2 (0x0f 0x3a)
697Referrer: 3-byte escape 2 740Referrer: 3-byte escape 2
698AVXcode: 3 741AVXcode: 3
699# 0x0f 0x3a 0x00-0xff 742# 0x0f 0x3a 0x00-0xff
70004: vpermilps /r,Ib (66),(oVEX) 74300: vpermq Vqq,Wqq,Ib (66),(v)
70105: vpermilpd /r,Ib (66),(oVEX) 74401: vpermpd Vqq,Wqq,Ib (66),(v)
70206: vperm2f128 /r,Ib (66),(oVEX),(o256) 74502: vpblendd Vx,Hx,Wx,Ib (66),(v)
70308: roundps Vdq,Wdq,Ib (66),(VEX) 74603:
70409: roundpd Vdq,Wdq,Ib (66),(VEX) 74704: vpermilps Vx,Wx,Ib (66),(v)
7050a: roundss Vss,Wss,Ib (66),(VEX),(o128) 74805: vpermilpd Vx,Wx,Ib (66),(v)
7060b: roundsd Vsd,Wsd,Ib (66),(VEX),(o128) 74906: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v)
7070c: blendps Vdq,Wdq,Ib (66),(VEX) 75007:
7080d: blendpd Vdq,Wdq,Ib (66),(VEX) 75108: vroundps Vx,Wx,Ib (66)
7090e: pblendw Vdq,Wdq,Ib (66),(VEX),(o128) 75209: vroundpd Vx,Wx,Ib (66)
7100f: palignr Pq,Qq,Ib | palignr Vdq,Wdq,Ib (66),(VEX),(o128) 7530a: vroundss Vss,Wss,Ib (66),(v1)
71114: pextrb Rd/Mb,Vdq,Ib (66),(VEX),(o128) 7540b: vroundsd Vsd,Wsd,Ib (66),(v1)
71215: pextrw Rd/Mw,Vdq,Ib (66),(VEX),(o128) 7550c: vblendps Vx,Hx,Wx,Ib (66)
71316: pextrd/pextrq Ed/q,Vdq,Ib (66),(VEX),(o128) 7560d: vblendpd Vx,Hx,Wx,Ib (66)
71417: extractps Ed,Vdq,Ib (66),(VEX),(o128) 7570e: vpblendw Vx,Hx,Wx,Ib (66),(v1)
71518: vinsertf128 /r,Ib (66),(oVEX),(o256) 7580f: palignr Pq,Qq,Ib | vpalignr Vx,Hx,Wx,Ib (66),(v1)
71619: vextractf128 /r,Ib (66),(oVEX),(o256) 75914: vpextrb Rd/Mb,Vdq,Ib (66),(v1)
71720: pinsrb Vdq,Rd/q/Mb,Ib (66),(VEX),(o128) 76015: vpextrw Rd/Mw,Vdq,Ib (66),(v1)
71821: insertps Vdq,Udq/Md,Ib (66),(VEX),(o128) 76116: vpextrd/q Ey,Vdq,Ib (66),(v1)
71922: pinsrd/pinsrq Vdq,Ed/q,Ib (66),(VEX),(o128) 76217: vextractps Ed,Vdq,Ib (66),(v1)
72040: dpps Vdq,Wdq,Ib (66),(VEX) 76318: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v)
72141: dppd Vdq,Wdq,Ib (66),(VEX),(o128) 76419: vextractf128 Wdq,Vqq,Ib (66),(v)
72242: mpsadbw Vdq,Wdq,Ib (66),(VEX),(o128) 7651d: vcvtps2ph Wx,Vx,Ib (66),(v)
72344: pclmulq Vdq,Wdq,Ib (66),(VEX),(o128) 76620: vpinsrb Vdq,Hdq,Ry/Mb,Ib (66),(v1)
7244a: vblendvps /r,Ib (66),(oVEX) 76721: vinsertps Vdq,Hdq,Udq/Md,Ib (66),(v1)
7254b: vblendvpd /r,Ib (66),(oVEX) 76822: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1)
7264c: vpblendvb /r,Ib (66),(oVEX),(o128) 76938: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v)
72760: pcmpestrm Vdq,Wdq,Ib (66),(VEX),(o128) 77039: vextracti128 Wdq,Vqq,Ib (66),(v)
72861: pcmpestri Vdq,Wdq,Ib (66),(VEX),(o128) 77140: vdpps Vx,Hx,Wx,Ib (66)
72962: pcmpistrm Vdq,Wdq,Ib (66),(VEX),(o128) 77241: vdppd Vdq,Hdq,Wdq,Ib (66),(v1)
73063: pcmpistri Vdq,Wdq,Ib (66),(VEX),(o128) 77342: vmpsadbw Vx,Hx,Wx,Ib (66),(v1)
731df: aeskeygenassist Vdq,Wdq,Ib (66),(VEX),(o128) 77444: vpclmulqdq Vdq,Hdq,Wdq,Ib (66),(v1)
77546: vperm2i128 Vqq,Hqq,Wqq,Ib (66),(v)
7764a: vblendvps Vx,Hx,Wx,Lx (66),(v)
7774b: vblendvpd Vx,Hx,Wx,Lx (66),(v)
7784c: vpblendvb Vx,Hx,Wx,Lx (66),(v1)
77960: vpcmpestrm Vdq,Wdq,Ib (66),(v1)
78061: vpcmpestri Vdq,Wdq,Ib (66),(v1)
78162: vpcmpistrm Vdq,Wdq,Ib (66),(v1)
78263: vpcmpistri Vdq,Wdq,Ib (66),(v1)
783df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1)
784f0: RORX Gy,Ey,Ib (F2),(v)
732EndTable 785EndTable
733 786
734GrpTable: Grp1 787GrpTable: Grp1
@@ -790,7 +843,7 @@ GrpTable: Grp5
7902: CALLN Ev (f64) 8432: CALLN Ev (f64)
7913: CALLF Ep 8443: CALLF Ep
7924: JMPN Ev (f64) 8454: JMPN Ev (f64)
7935: JMPF Ep 8465: JMPF Mp
7946: PUSH Ev (d64) 8476: PUSH Ev (d64)
7957: 8487:
796EndTable 849EndTable
@@ -807,7 +860,7 @@ EndTable
807GrpTable: Grp7 860GrpTable: Grp7
8080: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) 8610: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B)
8091: SIDT Ms | MONITOR (000),(11B) | MWAIT (001) 8621: SIDT Ms | MONITOR (000),(11B) | MWAIT (001)
8102: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) 8632: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B)
8113: LIDT Ms 8643: LIDT Ms
8124: SMSW Mw/Rv 8654: SMSW Mw/Rv
8135: 8665:
@@ -824,44 +877,45 @@ EndTable
824 877
825GrpTable: Grp9 878GrpTable: Grp9
8261: CMPXCHG8B/16B Mq/Mdq 8791: CMPXCHG8B/16B Mq/Mdq
8276: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) 8806: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B)
8287: VMPTRST Mq 8817: VMPTRST Mq | VMPTRST Mq (F3)
829EndTable 882EndTable
830 883
831GrpTable: Grp10 884GrpTable: Grp10
832EndTable 885EndTable
833 886
834GrpTable: Grp11 887GrpTable: Grp11
888# Note: the operands are given by group opcode
8350: MOV 8890: MOV
836EndTable 890EndTable
837 891
838GrpTable: Grp12 892GrpTable: Grp12
8392: psrlw Nq,Ib (11B) | psrlw Udq,Ib (66),(11B),(VEX),(o128) 8932: psrlw Nq,Ib (11B) | vpsrlw Hx,Ux,Ib (66),(11B),(v1)
8404: psraw Nq,Ib (11B) | psraw Udq,Ib (66),(11B),(VEX),(o128) 8944: psraw Nq,Ib (11B) | vpsraw Hx,Ux,Ib (66),(11B),(v1)
8416: psllw Nq,Ib (11B) | psllw Udq,Ib (66),(11B),(VEX),(o128) 8956: psllw Nq,Ib (11B) | vpsllw Hx,Ux,Ib (66),(11B),(v1)
842EndTable 896EndTable
843 897
844GrpTable: Grp13 898GrpTable: Grp13
8452: psrld Nq,Ib (11B) | psrld Udq,Ib (66),(11B),(VEX),(o128) 8992: psrld Nq,Ib (11B) | vpsrld Hx,Ux,Ib (66),(11B),(v1)
8464: psrad Nq,Ib (11B) | psrad Udq,Ib (66),(11B),(VEX),(o128) 9004: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1)
8476: pslld Nq,Ib (11B) | pslld Udq,Ib (66),(11B),(VEX),(o128) 9016: pslld Nq,Ib (11B) | vpslld Hx,Ux,Ib (66),(11B),(v1)
848EndTable 902EndTable
849 903
850GrpTable: Grp14 904GrpTable: Grp14
8512: psrlq Nq,Ib (11B) | psrlq Udq,Ib (66),(11B),(VEX),(o128) 9052: psrlq Nq,Ib (11B) | vpsrlq Hx,Ux,Ib (66),(11B),(v1)
8523: psrldq Udq,Ib (66),(11B),(VEX),(o128) 9063: vpsrldq Hx,Ux,Ib (66),(11B),(v1)
8536: psllq Nq,Ib (11B) | psllq Udq,Ib (66),(11B),(VEX),(o128) 9076: psllq Nq,Ib (11B) | vpsllq Hx,Ux,Ib (66),(11B),(v1)
8547: pslldq Udq,Ib (66),(11B),(VEX),(o128) 9087: vpslldq Hx,Ux,Ib (66),(11B),(v1)
855EndTable 909EndTable
856 910
857GrpTable: Grp15 911GrpTable: Grp15
8580: fxsave 9120: fxsave | RDFSBASE Ry (F3),(11B)
8591: fxstor 9131: fxstor | RDGSBASE Ry (F3),(11B)
8602: ldmxcsr (VEX) 9142: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B)
8613: stmxcsr (VEX) 9153: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B)
8624: XSAVE 9164: XSAVE
8635: XRSTOR | lfence (11B) 9175: XRSTOR | lfence (11B)
8646: mfence (11B) 9186: XSAVEOPT | mfence (11B)
8657: clflush | sfence (11B) 9197: clflush | sfence (11B)
866EndTable 920EndTable
867 921
@@ -872,6 +926,12 @@ GrpTable: Grp16
8723: prefetch T2 9263: prefetch T2
873EndTable 927EndTable
874 928
929GrpTable: Grp17
9301: BLSR By,Ey (v)
9312: BLSMSK By,Ey (v)
9323: BLSI By,Ey (v)
933EndTable
934
875# AMD's Prefetch Group 935# AMD's Prefetch Group
876GrpTable: GrpP 936GrpTable: GrpP
8770: PREFETCH 9370: PREFETCH
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 3d11327c9ab4..23d8e5fecf76 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -27,6 +27,4 @@ obj-$(CONFIG_AMD_NUMA) += amdtopology.o
27obj-$(CONFIG_ACPI_NUMA) += srat.o 27obj-$(CONFIG_ACPI_NUMA) += srat.o
28obj-$(CONFIG_NUMA_EMU) += numa_emulation.o 28obj-$(CONFIG_NUMA_EMU) += numa_emulation.o
29 29
30obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o
31
32obj-$(CONFIG_MEMTEST) += memtest.o 30obj-$(CONFIG_MEMTEST) += memtest.o
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index d0474ad2a6e5..1fb85dbe390a 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -25,7 +25,7 @@ int fixup_exception(struct pt_regs *regs)
25 if (fixup) { 25 if (fixup) {
26 /* If fixup is less than 16, it means uaccess error */ 26 /* If fixup is less than 16, it means uaccess error */
27 if (fixup->fixup < 16) { 27 if (fixup->fixup < 16) {
28 current_thread_info()->uaccess_err = -EFAULT; 28 current_thread_info()->uaccess_err = 1;
29 regs->ip += fixup->fixup; 29 regs->ip += fixup->fixup;
30 return 1; 30 return 1;
31 } 31 }
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 5db0490deb07..9d74824a708d 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -626,7 +626,7 @@ pgtable_bad(struct pt_regs *regs, unsigned long error_code,
626 626
627static noinline void 627static noinline void
628no_context(struct pt_regs *regs, unsigned long error_code, 628no_context(struct pt_regs *regs, unsigned long error_code,
629 unsigned long address) 629 unsigned long address, int signal, int si_code)
630{ 630{
631 struct task_struct *tsk = current; 631 struct task_struct *tsk = current;
632 unsigned long *stackend; 632 unsigned long *stackend;
@@ -634,8 +634,17 @@ no_context(struct pt_regs *regs, unsigned long error_code,
634 int sig; 634 int sig;
635 635
636 /* Are we prepared to handle this kernel fault? */ 636 /* Are we prepared to handle this kernel fault? */
637 if (fixup_exception(regs)) 637 if (fixup_exception(regs)) {
638 if (current_thread_info()->sig_on_uaccess_error && signal) {
639 tsk->thread.trap_no = 14;
640 tsk->thread.error_code = error_code | PF_USER;
641 tsk->thread.cr2 = address;
642
643 /* XXX: hwpoison faults will set the wrong code. */
644 force_sig_info_fault(signal, si_code, address, tsk, 0);
645 }
638 return; 646 return;
647 }
639 648
640 /* 649 /*
641 * 32-bit: 650 * 32-bit:
@@ -755,7 +764,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
755 if (is_f00f_bug(regs, address)) 764 if (is_f00f_bug(regs, address))
756 return; 765 return;
757 766
758 no_context(regs, error_code, address); 767 no_context(regs, error_code, address, SIGSEGV, si_code);
759} 768}
760 769
761static noinline void 770static noinline void
@@ -819,7 +828,7 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
819 828
820 /* Kernel mode? Handle exceptions or die: */ 829 /* Kernel mode? Handle exceptions or die: */
821 if (!(error_code & PF_USER)) { 830 if (!(error_code & PF_USER)) {
822 no_context(regs, error_code, address); 831 no_context(regs, error_code, address, SIGBUS, BUS_ADRERR);
823 return; 832 return;
824 } 833 }
825 834
@@ -854,7 +863,7 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
854 if (!(fault & VM_FAULT_RETRY)) 863 if (!(fault & VM_FAULT_RETRY))
855 up_read(&current->mm->mmap_sem); 864 up_read(&current->mm->mmap_sem);
856 if (!(error_code & PF_USER)) 865 if (!(error_code & PF_USER))
857 no_context(regs, error_code, address); 866 no_context(regs, error_code, address, 0, 0);
858 return 1; 867 return 1;
859 } 868 }
860 if (!(fault & VM_FAULT_ERROR)) 869 if (!(fault & VM_FAULT_ERROR))
@@ -864,7 +873,8 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
864 /* Kernel mode? Handle exceptions or die: */ 873 /* Kernel mode? Handle exceptions or die: */
865 if (!(error_code & PF_USER)) { 874 if (!(error_code & PF_USER)) {
866 up_read(&current->mm->mmap_sem); 875 up_read(&current->mm->mmap_sem);
867 no_context(regs, error_code, address); 876 no_context(regs, error_code, address,
877 SIGSEGV, SEGV_MAPERR);
868 return 1; 878 return 1;
869 } 879 }
870 880
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 87488b93a65c..a298914058f9 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -67,7 +67,7 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
67 good_end = max_pfn_mapped << PAGE_SHIFT; 67 good_end = max_pfn_mapped << PAGE_SHIFT;
68 68
69 base = memblock_find_in_range(start, good_end, tables, PAGE_SIZE); 69 base = memblock_find_in_range(start, good_end, tables, PAGE_SIZE);
70 if (base == MEMBLOCK_ERROR) 70 if (!base)
71 panic("Cannot find space for the kernel page tables"); 71 panic("Cannot find space for the kernel page tables");
72 72
73 pgt_buf_start = base >> PAGE_SHIFT; 73 pgt_buf_start = base >> PAGE_SHIFT;
@@ -80,7 +80,7 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
80 80
81void __init native_pagetable_reserve(u64 start, u64 end) 81void __init native_pagetable_reserve(u64 start, u64 end)
82{ 82{
83 memblock_x86_reserve_range(start, end, "PGTABLE"); 83 memblock_reserve(start, end - start);
84} 84}
85 85
86struct map_range { 86struct map_range {
@@ -279,8 +279,8 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
279 * pgt_buf_end) and free the other ones (pgt_buf_end - pgt_buf_top) 279 * pgt_buf_end) and free the other ones (pgt_buf_end - pgt_buf_top)
280 * so that they can be reused for other purposes. 280 * so that they can be reused for other purposes.
281 * 281 *
282 * On native it just means calling memblock_x86_reserve_range, on Xen it 282 * On native it just means calling memblock_reserve, on Xen it also
283 * also means marking RW the pagetable pages that we allocated before 283 * means marking RW the pagetable pages that we allocated before
284 * but that haven't been used. 284 * but that haven't been used.
285 * 285 *
286 * In fact on xen we mark RO the whole range pgt_buf_start - 286 * In fact on xen we mark RO the whole range pgt_buf_start -
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 29f7c6d98179..0c1da394a634 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -427,23 +427,17 @@ static void __init add_one_highpage_init(struct page *page)
427void __init add_highpages_with_active_regions(int nid, 427void __init add_highpages_with_active_regions(int nid,
428 unsigned long start_pfn, unsigned long end_pfn) 428 unsigned long start_pfn, unsigned long end_pfn)
429{ 429{
430 struct range *range; 430 phys_addr_t start, end;
431 int nr_range; 431 u64 i;
432 int i; 432
433 433 for_each_free_mem_range(i, nid, &start, &end, NULL) {
434 nr_range = __get_free_all_memory_range(&range, nid, start_pfn, end_pfn); 434 unsigned long pfn = clamp_t(unsigned long, PFN_UP(start),
435 435 start_pfn, end_pfn);
436 for (i = 0; i < nr_range; i++) { 436 unsigned long e_pfn = clamp_t(unsigned long, PFN_DOWN(end),
437 struct page *page; 437 start_pfn, end_pfn);
438 int node_pfn; 438 for ( ; pfn < e_pfn; pfn++)
439 439 if (pfn_valid(pfn))
440 for (node_pfn = range[i].start; node_pfn < range[i].end; 440 add_one_highpage_init(pfn_to_page(pfn));
441 node_pfn++) {
442 if (!pfn_valid(node_pfn))
443 continue;
444 page = pfn_to_page(node_pfn);
445 add_one_highpage_init(page);
446 }
447 } 441 }
448} 442}
449#else 443#else
@@ -650,18 +644,18 @@ void __init initmem_init(void)
650 highstart_pfn = highend_pfn = max_pfn; 644 highstart_pfn = highend_pfn = max_pfn;
651 if (max_pfn > max_low_pfn) 645 if (max_pfn > max_low_pfn)
652 highstart_pfn = max_low_pfn; 646 highstart_pfn = max_low_pfn;
653 memblock_x86_register_active_regions(0, 0, highend_pfn);
654 sparse_memory_present_with_active_regions(0);
655 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", 647 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
656 pages_to_mb(highend_pfn - highstart_pfn)); 648 pages_to_mb(highend_pfn - highstart_pfn));
657 num_physpages = highend_pfn; 649 num_physpages = highend_pfn;
658 high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; 650 high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
659#else 651#else
660 memblock_x86_register_active_regions(0, 0, max_low_pfn);
661 sparse_memory_present_with_active_regions(0);
662 num_physpages = max_low_pfn; 652 num_physpages = max_low_pfn;
663 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; 653 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
664#endif 654#endif
655
656 memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0);
657 sparse_memory_present_with_active_regions(0);
658
665#ifdef CONFIG_FLATMEM 659#ifdef CONFIG_FLATMEM
666 max_mapnr = num_physpages; 660 max_mapnr = num_physpages;
667#endif 661#endif
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index bbaaa005bf0e..a8a56ce3a962 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -608,7 +608,7 @@ kernel_physical_mapping_init(unsigned long start,
608#ifndef CONFIG_NUMA 608#ifndef CONFIG_NUMA
609void __init initmem_init(void) 609void __init initmem_init(void)
610{ 610{
611 memblock_x86_register_active_regions(0, 0, max_pfn); 611 memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0);
612} 612}
613#endif 613#endif
614 614
diff --git a/arch/x86/mm/memblock.c b/arch/x86/mm/memblock.c
deleted file mode 100644
index 992da5ec5a64..000000000000
--- a/arch/x86/mm/memblock.c
+++ /dev/null
@@ -1,348 +0,0 @@
1#include <linux/kernel.h>
2#include <linux/types.h>
3#include <linux/init.h>
4#include <linux/bitops.h>
5#include <linux/memblock.h>
6#include <linux/bootmem.h>
7#include <linux/mm.h>
8#include <linux/range.h>
9
10/* Check for already reserved areas */
11bool __init memblock_x86_check_reserved_size(u64 *addrp, u64 *sizep, u64 align)
12{
13 struct memblock_region *r;
14 u64 addr = *addrp, last;
15 u64 size = *sizep;
16 bool changed = false;
17
18again:
19 last = addr + size;
20 for_each_memblock(reserved, r) {
21 if (last > r->base && addr < r->base) {
22 size = r->base - addr;
23 changed = true;
24 goto again;
25 }
26 if (last > (r->base + r->size) && addr < (r->base + r->size)) {
27 addr = round_up(r->base + r->size, align);
28 size = last - addr;
29 changed = true;
30 goto again;
31 }
32 if (last <= (r->base + r->size) && addr >= r->base) {
33 *sizep = 0;
34 return false;
35 }
36 }
37 if (changed) {
38 *addrp = addr;
39 *sizep = size;
40 }
41 return changed;
42}
43
44/*
45 * Find next free range after start, and size is returned in *sizep
46 */
47u64 __init memblock_x86_find_in_range_size(u64 start, u64 *sizep, u64 align)
48{
49 struct memblock_region *r;
50
51 for_each_memblock(memory, r) {
52 u64 ei_start = r->base;
53 u64 ei_last = ei_start + r->size;
54 u64 addr;
55
56 addr = round_up(ei_start, align);
57 if (addr < start)
58 addr = round_up(start, align);
59 if (addr >= ei_last)
60 continue;
61 *sizep = ei_last - addr;
62 while (memblock_x86_check_reserved_size(&addr, sizep, align))
63 ;
64
65 if (*sizep)
66 return addr;
67 }
68
69 return MEMBLOCK_ERROR;
70}
71
72static __init struct range *find_range_array(int count)
73{
74 u64 end, size, mem;
75 struct range *range;
76
77 size = sizeof(struct range) * count;
78 end = memblock.current_limit;
79
80 mem = memblock_find_in_range(0, end, size, sizeof(struct range));
81 if (mem == MEMBLOCK_ERROR)
82 panic("can not find more space for range array");
83
84 /*
85 * This range is tempoaray, so don't reserve it, it will not be
86 * overlapped because We will not alloccate new buffer before
87 * We discard this one
88 */
89 range = __va(mem);
90 memset(range, 0, size);
91
92 return range;
93}
94
95static void __init memblock_x86_subtract_reserved(struct range *range, int az)
96{
97 u64 final_start, final_end;
98 struct memblock_region *r;
99
100 /* Take out region array itself at first*/
101 memblock_free_reserved_regions();
102
103 memblock_dbg("Subtract (%ld early reservations)\n", memblock.reserved.cnt);
104
105 for_each_memblock(reserved, r) {
106 memblock_dbg(" [%010llx-%010llx]\n", (u64)r->base, (u64)r->base + r->size - 1);
107 final_start = PFN_DOWN(r->base);
108 final_end = PFN_UP(r->base + r->size);
109 if (final_start >= final_end)
110 continue;
111 subtract_range(range, az, final_start, final_end);
112 }
113
114 /* Put region array back ? */
115 memblock_reserve_reserved_regions();
116}
117
118struct count_data {
119 int nr;
120};
121
122static int __init count_work_fn(unsigned long start_pfn,
123 unsigned long end_pfn, void *datax)
124{
125 struct count_data *data = datax;
126
127 data->nr++;
128
129 return 0;
130}
131
132static int __init count_early_node_map(int nodeid)
133{
134 struct count_data data;
135
136 data.nr = 0;
137 work_with_active_regions(nodeid, count_work_fn, &data);
138
139 return data.nr;
140}
141
142int __init __get_free_all_memory_range(struct range **rangep, int nodeid,
143 unsigned long start_pfn, unsigned long end_pfn)
144{
145 int count;
146 struct range *range;
147 int nr_range;
148
149 count = (memblock.reserved.cnt + count_early_node_map(nodeid)) * 2;
150
151 range = find_range_array(count);
152 nr_range = 0;
153
154 /*
155 * Use early_node_map[] and memblock.reserved.region to get range array
156 * at first
157 */
158 nr_range = add_from_early_node_map(range, count, nr_range, nodeid);
159 subtract_range(range, count, 0, start_pfn);
160 subtract_range(range, count, end_pfn, -1ULL);
161
162 memblock_x86_subtract_reserved(range, count);
163 nr_range = clean_sort_range(range, count);
164
165 *rangep = range;
166 return nr_range;
167}
168
169int __init get_free_all_memory_range(struct range **rangep, int nodeid)
170{
171 unsigned long end_pfn = -1UL;
172
173#ifdef CONFIG_X86_32
174 end_pfn = max_low_pfn;
175#endif
176 return __get_free_all_memory_range(rangep, nodeid, 0, end_pfn);
177}
178
179static u64 __init __memblock_x86_memory_in_range(u64 addr, u64 limit, bool get_free)
180{
181 int i, count;
182 struct range *range;
183 int nr_range;
184 u64 final_start, final_end;
185 u64 free_size;
186 struct memblock_region *r;
187
188 count = (memblock.reserved.cnt + memblock.memory.cnt) * 2;
189
190 range = find_range_array(count);
191 nr_range = 0;
192
193 addr = PFN_UP(addr);
194 limit = PFN_DOWN(limit);
195
196 for_each_memblock(memory, r) {
197 final_start = PFN_UP(r->base);
198 final_end = PFN_DOWN(r->base + r->size);
199 if (final_start >= final_end)
200 continue;
201 if (final_start >= limit || final_end <= addr)
202 continue;
203
204 nr_range = add_range(range, count, nr_range, final_start, final_end);
205 }
206 subtract_range(range, count, 0, addr);
207 subtract_range(range, count, limit, -1ULL);
208
209 /* Subtract memblock.reserved.region in range ? */
210 if (!get_free)
211 goto sort_and_count_them;
212 for_each_memblock(reserved, r) {
213 final_start = PFN_DOWN(r->base);
214 final_end = PFN_UP(r->base + r->size);
215 if (final_start >= final_end)
216 continue;
217 if (final_start >= limit || final_end <= addr)
218 continue;
219
220 subtract_range(range, count, final_start, final_end);
221 }
222
223sort_and_count_them:
224 nr_range = clean_sort_range(range, count);
225
226 free_size = 0;
227 for (i = 0; i < nr_range; i++)
228 free_size += range[i].end - range[i].start;
229
230 return free_size << PAGE_SHIFT;
231}
232
233u64 __init memblock_x86_free_memory_in_range(u64 addr, u64 limit)
234{
235 return __memblock_x86_memory_in_range(addr, limit, true);
236}
237
238u64 __init memblock_x86_memory_in_range(u64 addr, u64 limit)
239{
240 return __memblock_x86_memory_in_range(addr, limit, false);
241}
242
243void __init memblock_x86_reserve_range(u64 start, u64 end, char *name)
244{
245 if (start == end)
246 return;
247
248 if (WARN_ONCE(start > end, "memblock_x86_reserve_range: wrong range [%#llx, %#llx)\n", start, end))
249 return;
250
251 memblock_dbg(" memblock_x86_reserve_range: [%#010llx-%#010llx] %16s\n", start, end - 1, name);
252
253 memblock_reserve(start, end - start);
254}
255
256void __init memblock_x86_free_range(u64 start, u64 end)
257{
258 if (start == end)
259 return;
260
261 if (WARN_ONCE(start > end, "memblock_x86_free_range: wrong range [%#llx, %#llx)\n", start, end))
262 return;
263
264 memblock_dbg(" memblock_x86_free_range: [%#010llx-%#010llx]\n", start, end - 1);
265
266 memblock_free(start, end - start);
267}
268
269/*
270 * Need to call this function after memblock_x86_register_active_regions,
271 * so early_node_map[] is filled already.
272 */
273u64 __init memblock_x86_find_in_range_node(int nid, u64 start, u64 end, u64 size, u64 align)
274{
275 u64 addr;
276 addr = find_memory_core_early(nid, size, align, start, end);
277 if (addr != MEMBLOCK_ERROR)
278 return addr;
279
280 /* Fallback, should already have start end within node range */
281 return memblock_find_in_range(start, end, size, align);
282}
283
284/*
285 * Finds an active region in the address range from start_pfn to last_pfn and
286 * returns its range in ei_startpfn and ei_endpfn for the memblock entry.
287 */
288static int __init memblock_x86_find_active_region(const struct memblock_region *ei,
289 unsigned long start_pfn,
290 unsigned long last_pfn,
291 unsigned long *ei_startpfn,
292 unsigned long *ei_endpfn)
293{
294 u64 align = PAGE_SIZE;
295
296 *ei_startpfn = round_up(ei->base, align) >> PAGE_SHIFT;
297 *ei_endpfn = round_down(ei->base + ei->size, align) >> PAGE_SHIFT;
298
299 /* Skip map entries smaller than a page */
300 if (*ei_startpfn >= *ei_endpfn)
301 return 0;
302
303 /* Skip if map is outside the node */
304 if (*ei_endpfn <= start_pfn || *ei_startpfn >= last_pfn)
305 return 0;
306
307 /* Check for overlaps */
308 if (*ei_startpfn < start_pfn)
309 *ei_startpfn = start_pfn;
310 if (*ei_endpfn > last_pfn)
311 *ei_endpfn = last_pfn;
312
313 return 1;
314}
315
316/* Walk the memblock.memory map and register active regions within a node */
317void __init memblock_x86_register_active_regions(int nid, unsigned long start_pfn,
318 unsigned long last_pfn)
319{
320 unsigned long ei_startpfn;
321 unsigned long ei_endpfn;
322 struct memblock_region *r;
323
324 for_each_memblock(memory, r)
325 if (memblock_x86_find_active_region(r, start_pfn, last_pfn,
326 &ei_startpfn, &ei_endpfn))
327 add_active_range(nid, ei_startpfn, ei_endpfn);
328}
329
330/*
331 * Find the hole size (in bytes) in the memory range.
332 * @start: starting address of the memory range to scan
333 * @end: ending address of the memory range to scan
334 */
335u64 __init memblock_x86_hole_size(u64 start, u64 end)
336{
337 unsigned long start_pfn = start >> PAGE_SHIFT;
338 unsigned long last_pfn = end >> PAGE_SHIFT;
339 unsigned long ei_startpfn, ei_endpfn, ram = 0;
340 struct memblock_region *r;
341
342 for_each_memblock(memory, r)
343 if (memblock_x86_find_active_region(r, start_pfn, last_pfn,
344 &ei_startpfn, &ei_endpfn))
345 ram += ei_endpfn - ei_startpfn;
346
347 return end - start - ((u64)ram << PAGE_SHIFT);
348}
diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c
index 92faf3a1c53e..c80b9fb95734 100644
--- a/arch/x86/mm/memtest.c
+++ b/arch/x86/mm/memtest.c
@@ -34,7 +34,7 @@ static void __init reserve_bad_mem(u64 pattern, u64 start_bad, u64 end_bad)
34 (unsigned long long) pattern, 34 (unsigned long long) pattern,
35 (unsigned long long) start_bad, 35 (unsigned long long) start_bad,
36 (unsigned long long) end_bad); 36 (unsigned long long) end_bad);
37 memblock_x86_reserve_range(start_bad, end_bad, "BAD RAM"); 37 memblock_reserve(start_bad, end_bad - start_bad);
38} 38}
39 39
40static void __init memtest(u64 pattern, u64 start_phys, u64 size) 40static void __init memtest(u64 pattern, u64 start_phys, u64 size)
@@ -70,24 +70,19 @@ static void __init memtest(u64 pattern, u64 start_phys, u64 size)
70 70
71static void __init do_one_pass(u64 pattern, u64 start, u64 end) 71static void __init do_one_pass(u64 pattern, u64 start, u64 end)
72{ 72{
73 u64 size = 0; 73 u64 i;
74 74 phys_addr_t this_start, this_end;
75 while (start < end) { 75
76 start = memblock_x86_find_in_range_size(start, &size, 1); 76 for_each_free_mem_range(i, MAX_NUMNODES, &this_start, &this_end, NULL) {
77 77 this_start = clamp_t(phys_addr_t, this_start, start, end);
78 /* done ? */ 78 this_end = clamp_t(phys_addr_t, this_end, start, end);
79 if (start >= end) 79 if (this_start < this_end) {
80 break; 80 printk(KERN_INFO " %010llx - %010llx pattern %016llx\n",
81 if (start + size > end) 81 (unsigned long long)this_start,
82 size = end - start; 82 (unsigned long long)this_end,
83 83 (unsigned long long)cpu_to_be64(pattern));
84 printk(KERN_INFO " %010llx - %010llx pattern %016llx\n", 84 memtest(pattern, this_start, this_end - this_start);
85 (unsigned long long) start, 85 }
86 (unsigned long long) start + size,
87 (unsigned long long) cpu_to_be64(pattern));
88 memtest(pattern, start, size);
89
90 start += size;
91 } 86 }
92} 87}
93 88
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index fbeaaf416610..496f494593bf 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -192,8 +192,6 @@ int __init numa_add_memblk(int nid, u64 start, u64 end)
192/* Initialize NODE_DATA for a node on the local memory */ 192/* Initialize NODE_DATA for a node on the local memory */
193static void __init setup_node_data(int nid, u64 start, u64 end) 193static void __init setup_node_data(int nid, u64 start, u64 end)
194{ 194{
195 const u64 nd_low = PFN_PHYS(MAX_DMA_PFN);
196 const u64 nd_high = PFN_PHYS(max_pfn_mapped);
197 const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE); 195 const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
198 bool remapped = false; 196 bool remapped = false;
199 u64 nd_pa; 197 u64 nd_pa;
@@ -224,17 +222,12 @@ static void __init setup_node_data(int nid, u64 start, u64 end)
224 nd_pa = __pa(nd); 222 nd_pa = __pa(nd);
225 remapped = true; 223 remapped = true;
226 } else { 224 } else {
227 nd_pa = memblock_x86_find_in_range_node(nid, nd_low, nd_high, 225 nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid);
228 nd_size, SMP_CACHE_BYTES); 226 if (!nd_pa) {
229 if (nd_pa == MEMBLOCK_ERROR)
230 nd_pa = memblock_find_in_range(nd_low, nd_high,
231 nd_size, SMP_CACHE_BYTES);
232 if (nd_pa == MEMBLOCK_ERROR) {
233 pr_err("Cannot find %zu bytes in node %d\n", 227 pr_err("Cannot find %zu bytes in node %d\n",
234 nd_size, nid); 228 nd_size, nid);
235 return; 229 return;
236 } 230 }
237 memblock_x86_reserve_range(nd_pa, nd_pa + nd_size, "NODE_DATA");
238 nd = __va(nd_pa); 231 nd = __va(nd_pa);
239 } 232 }
240 233
@@ -371,8 +364,7 @@ void __init numa_reset_distance(void)
371 364
372 /* numa_distance could be 1LU marking allocation failure, test cnt */ 365 /* numa_distance could be 1LU marking allocation failure, test cnt */
373 if (numa_distance_cnt) 366 if (numa_distance_cnt)
374 memblock_x86_free_range(__pa(numa_distance), 367 memblock_free(__pa(numa_distance), size);
375 __pa(numa_distance) + size);
376 numa_distance_cnt = 0; 368 numa_distance_cnt = 0;
377 numa_distance = NULL; /* enable table creation */ 369 numa_distance = NULL; /* enable table creation */
378} 370}
@@ -395,13 +387,13 @@ static int __init numa_alloc_distance(void)
395 387
396 phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), 388 phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
397 size, PAGE_SIZE); 389 size, PAGE_SIZE);
398 if (phys == MEMBLOCK_ERROR) { 390 if (!phys) {
399 pr_warning("NUMA: Warning: can't allocate distance table!\n"); 391 pr_warning("NUMA: Warning: can't allocate distance table!\n");
400 /* don't retry until explicitly reset */ 392 /* don't retry until explicitly reset */
401 numa_distance = (void *)1LU; 393 numa_distance = (void *)1LU;
402 return -ENOMEM; 394 return -ENOMEM;
403 } 395 }
404 memblock_x86_reserve_range(phys, phys + size, "NUMA DIST"); 396 memblock_reserve(phys, size);
405 397
406 numa_distance = __va(phys); 398 numa_distance = __va(phys);
407 numa_distance_cnt = cnt; 399 numa_distance_cnt = cnt;
@@ -482,8 +474,8 @@ static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
482 numaram = 0; 474 numaram = 0;
483 } 475 }
484 476
485 e820ram = max_pfn - (memblock_x86_hole_size(0, 477 e820ram = max_pfn - absent_pages_in_range(0, max_pfn);
486 PFN_PHYS(max_pfn)) >> PAGE_SHIFT); 478
487 /* We seem to lose 3 pages somewhere. Allow 1M of slack. */ 479 /* We seem to lose 3 pages somewhere. Allow 1M of slack. */
488 if ((s64)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) { 480 if ((s64)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) {
489 printk(KERN_ERR "NUMA: nodes only cover %LuMB of your %LuMB e820 RAM. Not used.\n", 481 printk(KERN_ERR "NUMA: nodes only cover %LuMB of your %LuMB e820 RAM. Not used.\n",
@@ -505,13 +497,10 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
505 if (WARN_ON(nodes_empty(node_possible_map))) 497 if (WARN_ON(nodes_empty(node_possible_map)))
506 return -EINVAL; 498 return -EINVAL;
507 499
508 for (i = 0; i < mi->nr_blks; i++) 500 for (i = 0; i < mi->nr_blks; i++) {
509 memblock_x86_register_active_regions(mi->blk[i].nid, 501 struct numa_memblk *mb = &mi->blk[i];
510 mi->blk[i].start >> PAGE_SHIFT, 502 memblock_set_node(mb->start, mb->end - mb->start, mb->nid);
511 mi->blk[i].end >> PAGE_SHIFT); 503 }
512
513 /* for out of order entries */
514 sort_node_map();
515 504
516 /* 505 /*
517 * If sections array is gonna be used for pfn -> nid mapping, check 506 * If sections array is gonna be used for pfn -> nid mapping, check
@@ -545,6 +534,8 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
545 setup_node_data(nid, start, end); 534 setup_node_data(nid, start, end);
546 } 535 }
547 536
537 /* Dump memblock with node info and return. */
538 memblock_dump_all();
548 return 0; 539 return 0;
549} 540}
550 541
@@ -582,7 +573,7 @@ static int __init numa_init(int (*init_func)(void))
582 nodes_clear(node_possible_map); 573 nodes_clear(node_possible_map);
583 nodes_clear(node_online_map); 574 nodes_clear(node_online_map);
584 memset(&numa_meminfo, 0, sizeof(numa_meminfo)); 575 memset(&numa_meminfo, 0, sizeof(numa_meminfo));
585 remove_all_active_ranges(); 576 WARN_ON(memblock_set_node(0, ULLONG_MAX, MAX_NUMNODES));
586 numa_reset_distance(); 577 numa_reset_distance();
587 578
588 ret = init_func(); 579 ret = init_func();
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 3adebe7e536a..534255a36b6b 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -199,23 +199,23 @@ void __init init_alloc_remap(int nid, u64 start, u64 end)
199 199
200 /* allocate node memory and the lowmem remap area */ 200 /* allocate node memory and the lowmem remap area */
201 node_pa = memblock_find_in_range(start, end, size, LARGE_PAGE_BYTES); 201 node_pa = memblock_find_in_range(start, end, size, LARGE_PAGE_BYTES);
202 if (node_pa == MEMBLOCK_ERROR) { 202 if (!node_pa) {
203 pr_warning("remap_alloc: failed to allocate %lu bytes for node %d\n", 203 pr_warning("remap_alloc: failed to allocate %lu bytes for node %d\n",
204 size, nid); 204 size, nid);
205 return; 205 return;
206 } 206 }
207 memblock_x86_reserve_range(node_pa, node_pa + size, "KVA RAM"); 207 memblock_reserve(node_pa, size);
208 208
209 remap_pa = memblock_find_in_range(min_low_pfn << PAGE_SHIFT, 209 remap_pa = memblock_find_in_range(min_low_pfn << PAGE_SHIFT,
210 max_low_pfn << PAGE_SHIFT, 210 max_low_pfn << PAGE_SHIFT,
211 size, LARGE_PAGE_BYTES); 211 size, LARGE_PAGE_BYTES);
212 if (remap_pa == MEMBLOCK_ERROR) { 212 if (!remap_pa) {
213 pr_warning("remap_alloc: failed to allocate %lu bytes remap area for node %d\n", 213 pr_warning("remap_alloc: failed to allocate %lu bytes remap area for node %d\n",
214 size, nid); 214 size, nid);
215 memblock_x86_free_range(node_pa, node_pa + size); 215 memblock_free(node_pa, size);
216 return; 216 return;
217 } 217 }
218 memblock_x86_reserve_range(remap_pa, remap_pa + size, "KVA PG"); 218 memblock_reserve(remap_pa, size);
219 remap_va = phys_to_virt(remap_pa); 219 remap_va = phys_to_virt(remap_pa);
220 220
221 /* perform actual remap */ 221 /* perform actual remap */
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index dd27f401f0a0..92e27119ee1a 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -19,7 +19,7 @@ unsigned long __init numa_free_all_bootmem(void)
19 for_each_online_node(i) 19 for_each_online_node(i)
20 pages += free_all_bootmem_node(NODE_DATA(i)); 20 pages += free_all_bootmem_node(NODE_DATA(i));
21 21
22 pages += free_all_memory_core_early(MAX_NUMNODES); 22 pages += free_low_memory_core_early(MAX_NUMNODES);
23 23
24 return pages; 24 return pages;
25} 25}
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index d0ed086b6247..46db56845f18 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -28,6 +28,16 @@ static int __init emu_find_memblk_by_nid(int nid, const struct numa_meminfo *mi)
28 return -ENOENT; 28 return -ENOENT;
29} 29}
30 30
31static u64 mem_hole_size(u64 start, u64 end)
32{
33 unsigned long start_pfn = PFN_UP(start);
34 unsigned long end_pfn = PFN_DOWN(end);
35
36 if (start_pfn < end_pfn)
37 return PFN_PHYS(absent_pages_in_range(start_pfn, end_pfn));
38 return 0;
39}
40
31/* 41/*
32 * Sets up nid to range from @start to @end. The return value is -errno if 42 * Sets up nid to range from @start to @end. The return value is -errno if
33 * something went wrong, 0 otherwise. 43 * something went wrong, 0 otherwise.
@@ -89,7 +99,7 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei,
89 * Calculate target node size. x86_32 freaks on __udivdi3() so do 99 * Calculate target node size. x86_32 freaks on __udivdi3() so do
90 * the division in ulong number of pages and convert back. 100 * the division in ulong number of pages and convert back.
91 */ 101 */
92 size = max_addr - addr - memblock_x86_hole_size(addr, max_addr); 102 size = max_addr - addr - mem_hole_size(addr, max_addr);
93 size = PFN_PHYS((unsigned long)(size >> PAGE_SHIFT) / nr_nodes); 103 size = PFN_PHYS((unsigned long)(size >> PAGE_SHIFT) / nr_nodes);
94 104
95 /* 105 /*
@@ -135,8 +145,7 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei,
135 * Continue to add memory to this fake node if its 145 * Continue to add memory to this fake node if its
136 * non-reserved memory is less than the per-node size. 146 * non-reserved memory is less than the per-node size.
137 */ 147 */
138 while (end - start - 148 while (end - start - mem_hole_size(start, end) < size) {
139 memblock_x86_hole_size(start, end) < size) {
140 end += FAKE_NODE_MIN_SIZE; 149 end += FAKE_NODE_MIN_SIZE;
141 if (end > limit) { 150 if (end > limit) {
142 end = limit; 151 end = limit;
@@ -150,7 +159,7 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei,
150 * this one must extend to the boundary. 159 * this one must extend to the boundary.
151 */ 160 */
152 if (end < dma32_end && dma32_end - end - 161 if (end < dma32_end && dma32_end - end -
153 memblock_x86_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE) 162 mem_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE)
154 end = dma32_end; 163 end = dma32_end;
155 164
156 /* 165 /*
@@ -158,8 +167,7 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei,
158 * next node, this one must extend to the end of the 167 * next node, this one must extend to the end of the
159 * physical node. 168 * physical node.
160 */ 169 */
161 if (limit - end - 170 if (limit - end - mem_hole_size(end, limit) < size)
162 memblock_x86_hole_size(end, limit) < size)
163 end = limit; 171 end = limit;
164 172
165 ret = emu_setup_memblk(ei, pi, nid++ % nr_nodes, 173 ret = emu_setup_memblk(ei, pi, nid++ % nr_nodes,
@@ -180,7 +188,7 @@ static u64 __init find_end_of_node(u64 start, u64 max_addr, u64 size)
180{ 188{
181 u64 end = start + size; 189 u64 end = start + size;
182 190
183 while (end - start - memblock_x86_hole_size(start, end) < size) { 191 while (end - start - mem_hole_size(start, end) < size) {
184 end += FAKE_NODE_MIN_SIZE; 192 end += FAKE_NODE_MIN_SIZE;
185 if (end > max_addr) { 193 if (end > max_addr) {
186 end = max_addr; 194 end = max_addr;
@@ -211,8 +219,7 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
211 * creates a uniform distribution of node sizes across the entire 219 * creates a uniform distribution of node sizes across the entire
212 * machine (but not necessarily over physical nodes). 220 * machine (but not necessarily over physical nodes).
213 */ 221 */
214 min_size = (max_addr - addr - memblock_x86_hole_size(addr, max_addr)) / 222 min_size = (max_addr - addr - mem_hole_size(addr, max_addr)) / MAX_NUMNODES;
215 MAX_NUMNODES;
216 min_size = max(min_size, FAKE_NODE_MIN_SIZE); 223 min_size = max(min_size, FAKE_NODE_MIN_SIZE);
217 if ((min_size & FAKE_NODE_MIN_HASH_MASK) < min_size) 224 if ((min_size & FAKE_NODE_MIN_HASH_MASK) < min_size)
218 min_size = (min_size + FAKE_NODE_MIN_SIZE) & 225 min_size = (min_size + FAKE_NODE_MIN_SIZE) &
@@ -252,7 +259,7 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
252 * this one must extend to the boundary. 259 * this one must extend to the boundary.
253 */ 260 */
254 if (end < dma32_end && dma32_end - end - 261 if (end < dma32_end && dma32_end - end -
255 memblock_x86_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE) 262 mem_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE)
256 end = dma32_end; 263 end = dma32_end;
257 264
258 /* 265 /*
@@ -260,8 +267,7 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
260 * next node, this one must extend to the end of the 267 * next node, this one must extend to the end of the
261 * physical node. 268 * physical node.
262 */ 269 */
263 if (limit - end - 270 if (limit - end - mem_hole_size(end, limit) < size)
264 memblock_x86_hole_size(end, limit) < size)
265 end = limit; 271 end = limit;
266 272
267 ret = emu_setup_memblk(ei, pi, nid++ % MAX_NUMNODES, 273 ret = emu_setup_memblk(ei, pi, nid++ % MAX_NUMNODES,
@@ -351,11 +357,11 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
351 357
352 phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), 358 phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
353 phys_size, PAGE_SIZE); 359 phys_size, PAGE_SIZE);
354 if (phys == MEMBLOCK_ERROR) { 360 if (!phys) {
355 pr_warning("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n"); 361 pr_warning("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n");
356 goto no_emu; 362 goto no_emu;
357 } 363 }
358 memblock_x86_reserve_range(phys, phys + phys_size, "TMP NUMA DIST"); 364 memblock_reserve(phys, phys_size);
359 phys_dist = __va(phys); 365 phys_dist = __va(phys);
360 366
361 for (i = 0; i < numa_dist_cnt; i++) 367 for (i = 0; i < numa_dist_cnt; i++)
@@ -424,7 +430,7 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
424 430
425 /* free the copied physical distance table */ 431 /* free the copied physical distance table */
426 if (phys_dist) 432 if (phys_dist)
427 memblock_x86_free_range(__pa(phys_dist), __pa(phys_dist) + phys_size); 433 memblock_free(__pa(phys_dist), phys_size);
428 return; 434 return;
429 435
430no_emu: 436no_emu:
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index f9e526742fa1..eda2acbb6e81 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -998,7 +998,7 @@ out_err:
998} 998}
999EXPORT_SYMBOL(set_memory_uc); 999EXPORT_SYMBOL(set_memory_uc);
1000 1000
1001int _set_memory_array(unsigned long *addr, int addrinarray, 1001static int _set_memory_array(unsigned long *addr, int addrinarray,
1002 unsigned long new_type) 1002 unsigned long new_type)
1003{ 1003{
1004 int i, j; 1004 int i, j;
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c
index 81dbfdeb080d..fd61b3fb7341 100644
--- a/arch/x86/mm/srat.c
+++ b/arch/x86/mm/srat.c
@@ -69,6 +69,12 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
69 if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0) 69 if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
70 return; 70 return;
71 pxm = pa->proximity_domain; 71 pxm = pa->proximity_domain;
72 apic_id = pa->apic_id;
73 if (!cpu_has_x2apic && (apic_id >= 0xff)) {
74 printk(KERN_INFO "SRAT: PXM %u -> X2APIC 0x%04x ignored\n",
75 pxm, apic_id);
76 return;
77 }
72 node = setup_node(pxm); 78 node = setup_node(pxm);
73 if (node < 0) { 79 if (node < 0) {
74 printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm); 80 printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
@@ -76,7 +82,6 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
76 return; 82 return;
77 } 83 }
78 84
79 apic_id = pa->apic_id;
80 if (apic_id >= MAX_LOCAL_APIC) { 85 if (apic_id >= MAX_LOCAL_APIC) {
81 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node); 86 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node);
82 return; 87 return;
diff --git a/arch/x86/oprofile/Makefile b/arch/x86/oprofile/Makefile
index 446902b2a6b6..1599f568f0e2 100644
--- a/arch/x86/oprofile/Makefile
+++ b/arch/x86/oprofile/Makefile
@@ -4,9 +4,8 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
4 oprof.o cpu_buffer.o buffer_sync.o \ 4 oprof.o cpu_buffer.o buffer_sync.o \
5 event_buffer.o oprofile_files.o \ 5 event_buffer.o oprofile_files.o \
6 oprofilefs.o oprofile_stats.o \ 6 oprofilefs.o oprofile_stats.o \
7 timer_int.o ) 7 timer_int.o nmi_timer_int.o )
8 8
9oprofile-y := $(DRIVER_OBJS) init.o backtrace.o 9oprofile-y := $(DRIVER_OBJS) init.o backtrace.o
10oprofile-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_amd.o \ 10oprofile-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_amd.o \
11 op_model_ppro.o op_model_p4.o 11 op_model_ppro.o op_model_p4.o
12oprofile-$(CONFIG_X86_IO_APIC) += nmi_timer_int.o
diff --git a/arch/x86/oprofile/init.c b/arch/x86/oprofile/init.c
index f148cf652678..9e138d00ad36 100644
--- a/arch/x86/oprofile/init.c
+++ b/arch/x86/oprofile/init.c
@@ -16,37 +16,23 @@
16 * with the NMI mode driver. 16 * with the NMI mode driver.
17 */ 17 */
18 18
19#ifdef CONFIG_X86_LOCAL_APIC
19extern int op_nmi_init(struct oprofile_operations *ops); 20extern int op_nmi_init(struct oprofile_operations *ops);
20extern int op_nmi_timer_init(struct oprofile_operations *ops);
21extern void op_nmi_exit(void); 21extern void op_nmi_exit(void);
22extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth); 22#else
23static int op_nmi_init(struct oprofile_operations *ops) { return -ENODEV; }
24static void op_nmi_exit(void) { }
25#endif
23 26
24static int nmi_timer; 27extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth);
25 28
26int __init oprofile_arch_init(struct oprofile_operations *ops) 29int __init oprofile_arch_init(struct oprofile_operations *ops)
27{ 30{
28 int ret;
29
30 ret = -ENODEV;
31
32#ifdef CONFIG_X86_LOCAL_APIC
33 ret = op_nmi_init(ops);
34#endif
35 nmi_timer = (ret != 0);
36#ifdef CONFIG_X86_IO_APIC
37 if (nmi_timer)
38 ret = op_nmi_timer_init(ops);
39#endif
40 ops->backtrace = x86_backtrace; 31 ops->backtrace = x86_backtrace;
41 32 return op_nmi_init(ops);
42 return ret;
43} 33}
44 34
45
46void oprofile_arch_exit(void) 35void oprofile_arch_exit(void)
47{ 36{
48#ifdef CONFIG_X86_LOCAL_APIC 37 op_nmi_exit();
49 if (!nmi_timer)
50 op_nmi_exit();
51#endif
52} 38}
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 75f9528e0372..26b8a8514ee5 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -595,24 +595,36 @@ static int __init p4_init(char **cpu_type)
595 return 0; 595 return 0;
596} 596}
597 597
598static int force_arch_perfmon; 598enum __force_cpu_type {
599static int force_cpu_type(const char *str, struct kernel_param *kp) 599 reserved = 0, /* do not force */
600 timer,
601 arch_perfmon,
602};
603
604static int force_cpu_type;
605
606static int set_cpu_type(const char *str, struct kernel_param *kp)
600{ 607{
601 if (!strcmp(str, "arch_perfmon")) { 608 if (!strcmp(str, "timer")) {
602 force_arch_perfmon = 1; 609 force_cpu_type = timer;
610 printk(KERN_INFO "oprofile: forcing NMI timer mode\n");
611 } else if (!strcmp(str, "arch_perfmon")) {
612 force_cpu_type = arch_perfmon;
603 printk(KERN_INFO "oprofile: forcing architectural perfmon\n"); 613 printk(KERN_INFO "oprofile: forcing architectural perfmon\n");
614 } else {
615 force_cpu_type = 0;
604 } 616 }
605 617
606 return 0; 618 return 0;
607} 619}
608module_param_call(cpu_type, force_cpu_type, NULL, NULL, 0); 620module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0);
609 621
610static int __init ppro_init(char **cpu_type) 622static int __init ppro_init(char **cpu_type)
611{ 623{
612 __u8 cpu_model = boot_cpu_data.x86_model; 624 __u8 cpu_model = boot_cpu_data.x86_model;
613 struct op_x86_model_spec *spec = &op_ppro_spec; /* default */ 625 struct op_x86_model_spec *spec = &op_ppro_spec; /* default */
614 626
615 if (force_arch_perfmon && cpu_has_arch_perfmon) 627 if (force_cpu_type == arch_perfmon && cpu_has_arch_perfmon)
616 return 0; 628 return 0;
617 629
618 /* 630 /*
@@ -679,6 +691,9 @@ int __init op_nmi_init(struct oprofile_operations *ops)
679 if (!cpu_has_apic) 691 if (!cpu_has_apic)
680 return -ENODEV; 692 return -ENODEV;
681 693
694 if (force_cpu_type == timer)
695 return -ENODEV;
696
682 switch (vendor) { 697 switch (vendor) {
683 case X86_VENDOR_AMD: 698 case X86_VENDOR_AMD:
684 /* Needs to be at least an Athlon (or hammer in 32bit mode) */ 699 /* Needs to be at least an Athlon (or hammer in 32bit mode) */
diff --git a/arch/x86/oprofile/nmi_timer_int.c b/arch/x86/oprofile/nmi_timer_int.c
deleted file mode 100644
index 7f8052cd6620..000000000000
--- a/arch/x86/oprofile/nmi_timer_int.c
+++ /dev/null
@@ -1,50 +0,0 @@
1/**
2 * @file nmi_timer_int.c
3 *
4 * @remark Copyright 2003 OProfile authors
5 * @remark Read the file COPYING
6 *
7 * @author Zwane Mwaikambo <zwane@linuxpower.ca>
8 */
9
10#include <linux/init.h>
11#include <linux/smp.h>
12#include <linux/errno.h>
13#include <linux/oprofile.h>
14#include <linux/rcupdate.h>
15#include <linux/kdebug.h>
16
17#include <asm/nmi.h>
18#include <asm/apic.h>
19#include <asm/ptrace.h>
20
21static int profile_timer_exceptions_notify(unsigned int val, struct pt_regs *regs)
22{
23 oprofile_add_sample(regs, 0);
24 return NMI_HANDLED;
25}
26
27static int timer_start(void)
28{
29 if (register_nmi_handler(NMI_LOCAL, profile_timer_exceptions_notify,
30 0, "oprofile-timer"))
31 return 1;
32 return 0;
33}
34
35
36static void timer_stop(void)
37{
38 unregister_nmi_handler(NMI_LOCAL, "oprofile-timer");
39 synchronize_sched(); /* Allow already-started NMIs to complete. */
40}
41
42
43int __init op_nmi_timer_init(struct oprofile_operations *ops)
44{
45 ops->start = timer_start;
46 ops->stop = timer_stop;
47 ops->cpu_type = "timer";
48 printk(KERN_INFO "oprofile: using NMI timer interrupt.\n");
49 return 0;
50}
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 37718f0f053d..4cf9bd0a1653 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -238,7 +238,8 @@ static efi_status_t __init phys_efi_get_time(efi_time_t *tm,
238 238
239 spin_lock_irqsave(&rtc_lock, flags); 239 spin_lock_irqsave(&rtc_lock, flags);
240 efi_call_phys_prelog(); 240 efi_call_phys_prelog();
241 status = efi_call_phys2(efi_phys.get_time, tm, tc); 241 status = efi_call_phys2(efi_phys.get_time, virt_to_phys(tm),
242 virt_to_phys(tc));
242 efi_call_phys_epilog(); 243 efi_call_phys_epilog();
243 spin_unlock_irqrestore(&rtc_lock, flags); 244 spin_unlock_irqrestore(&rtc_lock, flags);
244 return status; 245 return status;
@@ -352,8 +353,7 @@ void __init efi_memblock_x86_reserve_range(void)
352 boot_params.efi_info.efi_memdesc_size; 353 boot_params.efi_info.efi_memdesc_size;
353 memmap.desc_version = boot_params.efi_info.efi_memdesc_version; 354 memmap.desc_version = boot_params.efi_info.efi_memdesc_version;
354 memmap.desc_size = boot_params.efi_info.efi_memdesc_size; 355 memmap.desc_size = boot_params.efi_info.efi_memdesc_size;
355 memblock_x86_reserve_range(pmap, pmap + memmap.nr_map * memmap.desc_size, 356 memblock_reserve(pmap, memmap.nr_map * memmap.desc_size);
356 "EFI memmap");
357} 357}
358 358
359#if EFI_DEBUG 359#if EFI_DEBUG
@@ -397,16 +397,14 @@ void __init efi_reserve_boot_services(void)
397 if ((start+size >= virt_to_phys(_text) 397 if ((start+size >= virt_to_phys(_text)
398 && start <= virt_to_phys(_end)) || 398 && start <= virt_to_phys(_end)) ||
399 !e820_all_mapped(start, start+size, E820_RAM) || 399 !e820_all_mapped(start, start+size, E820_RAM) ||
400 memblock_x86_check_reserved_size(&start, &size, 400 memblock_is_region_reserved(start, size)) {
401 1<<EFI_PAGE_SHIFT)) {
402 /* Could not reserve, skip it */ 401 /* Could not reserve, skip it */
403 md->num_pages = 0; 402 md->num_pages = 0;
404 memblock_dbg(PFX "Could not reserve boot range " 403 memblock_dbg(PFX "Could not reserve boot range "
405 "[0x%010llx-0x%010llx]\n", 404 "[0x%010llx-0x%010llx]\n",
406 start, start+size-1); 405 start, start+size-1);
407 } else 406 } else
408 memblock_x86_reserve_range(start, start+size, 407 memblock_reserve(start, size);
409 "EFI Boot");
410 } 408 }
411} 409}
412 410
diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile
index f82082677337..d511aa97533a 100644
--- a/arch/x86/tools/Makefile
+++ b/arch/x86/tools/Makefile
@@ -18,14 +18,21 @@ chkobjdump = $(srctree)/arch/x86/tools/chkobjdump.awk
18quiet_cmd_posttest = TEST $@ 18quiet_cmd_posttest = TEST $@
19 cmd_posttest = ($(OBJDUMP) -v | $(AWK) -f $(chkobjdump)) || $(OBJDUMP) -d -j .text $(objtree)/vmlinux | $(AWK) -f $(distill_awk) | $(obj)/test_get_len $(posttest_64bit) $(posttest_verbose) 19 cmd_posttest = ($(OBJDUMP) -v | $(AWK) -f $(chkobjdump)) || $(OBJDUMP) -d -j .text $(objtree)/vmlinux | $(AWK) -f $(distill_awk) | $(obj)/test_get_len $(posttest_64bit) $(posttest_verbose)
20 20
21posttest: $(obj)/test_get_len vmlinux 21quiet_cmd_sanitytest = TEST $@
22 cmd_sanitytest = $(obj)/insn_sanity $(posttest_64bit) -m 1000000
23
24posttest: $(obj)/test_get_len vmlinux $(obj)/insn_sanity
22 $(call cmd,posttest) 25 $(call cmd,posttest)
26 $(call cmd,sanitytest)
23 27
24hostprogs-y := test_get_len 28hostprogs-y += test_get_len insn_sanity
25 29
26# -I needed for generated C source and C source which in the kernel tree. 30# -I needed for generated C source and C source which in the kernel tree.
27HOSTCFLAGS_test_get_len.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/ 31HOSTCFLAGS_test_get_len.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/
28 32
33HOSTCFLAGS_insn_sanity.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/
34
29# Dependencies are also needed. 35# Dependencies are also needed.
30$(obj)/test_get_len.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c 36$(obj)/test_get_len.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c
31 37
38$(obj)/insn_sanity.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c
diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk
index eaf11f52fc0b..5f6a5b6c3a15 100644
--- a/arch/x86/tools/gen-insn-attr-x86.awk
+++ b/arch/x86/tools/gen-insn-attr-x86.awk
@@ -47,7 +47,7 @@ BEGIN {
47 sep_expr = "^\\|$" 47 sep_expr = "^\\|$"
48 group_expr = "^Grp[0-9A-Za-z]+" 48 group_expr = "^Grp[0-9A-Za-z]+"
49 49
50 imm_expr = "^[IJAO][a-z]" 50 imm_expr = "^[IJAOL][a-z]"
51 imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" 51 imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
52 imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" 52 imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
53 imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)" 53 imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)"
@@ -59,6 +59,7 @@ BEGIN {
59 imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)" 59 imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)"
60 imm_flag["Ob"] = "INAT_MOFFSET" 60 imm_flag["Ob"] = "INAT_MOFFSET"
61 imm_flag["Ov"] = "INAT_MOFFSET" 61 imm_flag["Ov"] = "INAT_MOFFSET"
62 imm_flag["Lx"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
62 63
63 modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])" 64 modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])"
64 force64_expr = "\\([df]64\\)" 65 force64_expr = "\\([df]64\\)"
@@ -70,8 +71,12 @@ BEGIN {
70 lprefix3_expr = "\\(F2\\)" 71 lprefix3_expr = "\\(F2\\)"
71 max_lprefix = 4 72 max_lprefix = 4
72 73
73 vexok_expr = "\\(VEX\\)" 74 # All opcodes starting with lower-case 'v' or with (v1) superscript
74 vexonly_expr = "\\(oVEX\\)" 75 # accepts VEX prefix
76 vexok_opcode_expr = "^v.*"
77 vexok_expr = "\\(v1\\)"
78 # All opcodes with (v) superscript supports *only* VEX prefix
79 vexonly_expr = "\\(v\\)"
75 80
76 prefix_expr = "\\(Prefix\\)" 81 prefix_expr = "\\(Prefix\\)"
77 prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" 82 prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ"
@@ -85,8 +90,8 @@ BEGIN {
85 prefix_num["SEG=GS"] = "INAT_PFX_GS" 90 prefix_num["SEG=GS"] = "INAT_PFX_GS"
86 prefix_num["SEG=SS"] = "INAT_PFX_SS" 91 prefix_num["SEG=SS"] = "INAT_PFX_SS"
87 prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ" 92 prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ"
88 prefix_num["2bytes-VEX"] = "INAT_PFX_VEX2" 93 prefix_num["VEX+1byte"] = "INAT_PFX_VEX2"
89 prefix_num["3bytes-VEX"] = "INAT_PFX_VEX3" 94 prefix_num["VEX+2byte"] = "INAT_PFX_VEX3"
90 95
91 clear_vars() 96 clear_vars()
92} 97}
@@ -310,12 +315,10 @@ function convert_operands(count,opnd, i,j,imm,mod)
310 if (match(opcode, fpu_expr)) 315 if (match(opcode, fpu_expr))
311 flags = add_flags(flags, "INAT_MODRM") 316 flags = add_flags(flags, "INAT_MODRM")
312 317
313 # check VEX only code 318 # check VEX codes
314 if (match(ext, vexonly_expr)) 319 if (match(ext, vexonly_expr))
315 flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY") 320 flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY")
316 321 else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr))
317 # check VEX only code
318 if (match(ext, vexok_expr))
319 flags = add_flags(flags, "INAT_VEXOK") 322 flags = add_flags(flags, "INAT_VEXOK")
320 323
321 # check prefixes 324 # check prefixes
diff --git a/arch/x86/tools/insn_sanity.c b/arch/x86/tools/insn_sanity.c
new file mode 100644
index 000000000000..cc2f8c131286
--- /dev/null
+++ b/arch/x86/tools/insn_sanity.c
@@ -0,0 +1,275 @@
1/*
2 * x86 decoder sanity test - based on test_get_insn.c
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright (C) IBM Corporation, 2009
19 * Copyright (C) Hitachi, Ltd., 2011
20 */
21
22#include <stdlib.h>
23#include <stdio.h>
24#include <string.h>
25#include <assert.h>
26#include <unistd.h>
27#include <sys/types.h>
28#include <sys/stat.h>
29#include <fcntl.h>
30
31#define unlikely(cond) (cond)
32#define ARRAY_SIZE(a) (sizeof(a)/sizeof(a[0]))
33
34#include <asm/insn.h>
35#include <inat.c>
36#include <insn.c>
37
38/*
39 * Test of instruction analysis against tampering.
40 * Feed random binary to instruction decoder and ensure not to
41 * access out-of-instruction-buffer.
42 */
43
44#define DEFAULT_MAX_ITER 10000
45#define INSN_NOP 0x90
46
47static const char *prog; /* Program name */
48static int verbose; /* Verbosity */
49static int x86_64; /* x86-64 bit mode flag */
50static unsigned int seed; /* Random seed */
51static unsigned long iter_start; /* Start of iteration number */
52static unsigned long iter_end = DEFAULT_MAX_ITER; /* End of iteration number */
53static FILE *input_file; /* Input file name */
54
55static void usage(const char *err)
56{
57 if (err)
58 fprintf(stderr, "Error: %s\n\n", err);
59 fprintf(stderr, "Usage: %s [-y|-n|-v] [-s seed[,no]] [-m max] [-i input]\n", prog);
60 fprintf(stderr, "\t-y 64bit mode\n");
61 fprintf(stderr, "\t-n 32bit mode\n");
62 fprintf(stderr, "\t-v Verbosity(-vv dumps any decoded result)\n");
63 fprintf(stderr, "\t-s Give a random seed (and iteration number)\n");
64 fprintf(stderr, "\t-m Give a maximum iteration number\n");
65 fprintf(stderr, "\t-i Give an input file with decoded binary\n");
66 exit(1);
67}
68
69static void dump_field(FILE *fp, const char *name, const char *indent,
70 struct insn_field *field)
71{
72 fprintf(fp, "%s.%s = {\n", indent, name);
73 fprintf(fp, "%s\t.value = %d, bytes[] = {%x, %x, %x, %x},\n",
74 indent, field->value, field->bytes[0], field->bytes[1],
75 field->bytes[2], field->bytes[3]);
76 fprintf(fp, "%s\t.got = %d, .nbytes = %d},\n", indent,
77 field->got, field->nbytes);
78}
79
80static void dump_insn(FILE *fp, struct insn *insn)
81{
82 fprintf(fp, "Instruction = {\n");
83 dump_field(fp, "prefixes", "\t", &insn->prefixes);
84 dump_field(fp, "rex_prefix", "\t", &insn->rex_prefix);
85 dump_field(fp, "vex_prefix", "\t", &insn->vex_prefix);
86 dump_field(fp, "opcode", "\t", &insn->opcode);
87 dump_field(fp, "modrm", "\t", &insn->modrm);
88 dump_field(fp, "sib", "\t", &insn->sib);
89 dump_field(fp, "displacement", "\t", &insn->displacement);
90 dump_field(fp, "immediate1", "\t", &insn->immediate1);
91 dump_field(fp, "immediate2", "\t", &insn->immediate2);
92 fprintf(fp, "\t.attr = %x, .opnd_bytes = %d, .addr_bytes = %d,\n",
93 insn->attr, insn->opnd_bytes, insn->addr_bytes);
94 fprintf(fp, "\t.length = %d, .x86_64 = %d, .kaddr = %p}\n",
95 insn->length, insn->x86_64, insn->kaddr);
96}
97
98static void dump_stream(FILE *fp, const char *msg, unsigned long nr_iter,
99 unsigned char *insn_buf, struct insn *insn)
100{
101 int i;
102
103 fprintf(fp, "%s:\n", msg);
104
105 dump_insn(fp, insn);
106
107 fprintf(fp, "You can reproduce this with below command(s);\n");
108
109 /* Input a decoded instruction sequence directly */
110 fprintf(fp, " $ echo ");
111 for (i = 0; i < MAX_INSN_SIZE; i++)
112 fprintf(fp, " %02x", insn_buf[i]);
113 fprintf(fp, " | %s -i -\n", prog);
114
115 if (!input_file) {
116 fprintf(fp, "Or \n");
117 /* Give a seed and iteration number */
118 fprintf(fp, " $ %s -s 0x%x,%lu\n", prog, seed, nr_iter);
119 }
120}
121
122static void init_random_seed(void)
123{
124 int fd;
125
126 fd = open("/dev/urandom", O_RDONLY);
127 if (fd < 0)
128 goto fail;
129
130 if (read(fd, &seed, sizeof(seed)) != sizeof(seed))
131 goto fail;
132
133 close(fd);
134 return;
135fail:
136 usage("Failed to open /dev/urandom");
137}
138
139/* Read given instruction sequence from the input file */
140static int read_next_insn(unsigned char *insn_buf)
141{
142 char buf[256] = "", *tmp;
143 int i;
144
145 tmp = fgets(buf, ARRAY_SIZE(buf), input_file);
146 if (tmp == NULL || feof(input_file))
147 return 0;
148
149 for (i = 0; i < MAX_INSN_SIZE; i++) {
150 insn_buf[i] = (unsigned char)strtoul(tmp, &tmp, 16);
151 if (*tmp != ' ')
152 break;
153 }
154
155 return i;
156}
157
158static int generate_insn(unsigned char *insn_buf)
159{
160 int i;
161
162 if (input_file)
163 return read_next_insn(insn_buf);
164
165 /* Fills buffer with random binary up to MAX_INSN_SIZE */
166 for (i = 0; i < MAX_INSN_SIZE - 1; i += 2)
167 *(unsigned short *)(&insn_buf[i]) = random() & 0xffff;
168
169 while (i < MAX_INSN_SIZE)
170 insn_buf[i++] = random() & 0xff;
171
172 return i;
173}
174
175static void parse_args(int argc, char **argv)
176{
177 int c;
178 char *tmp = NULL;
179 int set_seed = 0;
180
181 prog = argv[0];
182 while ((c = getopt(argc, argv, "ynvs:m:i:")) != -1) {
183 switch (c) {
184 case 'y':
185 x86_64 = 1;
186 break;
187 case 'n':
188 x86_64 = 0;
189 break;
190 case 'v':
191 verbose++;
192 break;
193 case 'i':
194 if (strcmp("-", optarg) == 0)
195 input_file = stdin;
196 else
197 input_file = fopen(optarg, "r");
198 if (!input_file)
199 usage("Failed to open input file");
200 break;
201 case 's':
202 seed = (unsigned int)strtoul(optarg, &tmp, 0);
203 if (*tmp == ',') {
204 optarg = tmp + 1;
205 iter_start = strtoul(optarg, &tmp, 0);
206 }
207 if (*tmp != '\0' || tmp == optarg)
208 usage("Failed to parse seed");
209 set_seed = 1;
210 break;
211 case 'm':
212 iter_end = strtoul(optarg, &tmp, 0);
213 if (*tmp != '\0' || tmp == optarg)
214 usage("Failed to parse max_iter");
215 break;
216 default:
217 usage(NULL);
218 }
219 }
220
221 /* Check errors */
222 if (iter_end < iter_start)
223 usage("Max iteration number must be bigger than iter-num");
224
225 if (set_seed && input_file)
226 usage("Don't use input file (-i) with random seed (-s)");
227
228 /* Initialize random seed */
229 if (!input_file) {
230 if (!set_seed) /* No seed is given */
231 init_random_seed();
232 srand(seed);
233 }
234}
235
236int main(int argc, char **argv)
237{
238 struct insn insn;
239 int insns = 0;
240 int errors = 0;
241 unsigned long i;
242 unsigned char insn_buf[MAX_INSN_SIZE * 2];
243
244 parse_args(argc, argv);
245
246 /* Prepare stop bytes with NOPs */
247 memset(insn_buf + MAX_INSN_SIZE, INSN_NOP, MAX_INSN_SIZE);
248
249 for (i = 0; i < iter_end; i++) {
250 if (generate_insn(insn_buf) <= 0)
251 break;
252
253 if (i < iter_start) /* Skip to given iteration number */
254 continue;
255
256 /* Decode an instruction */
257 insn_init(&insn, insn_buf, x86_64);
258 insn_get_length(&insn);
259
260 if (insn.next_byte <= insn.kaddr ||
261 insn.kaddr + MAX_INSN_SIZE < insn.next_byte) {
262 /* Access out-of-range memory */
263 dump_stream(stderr, "Error: Found an access violation", i, insn_buf, &insn);
264 errors++;
265 } else if (verbose && !insn_complete(&insn))
266 dump_stream(stdout, "Info: Found an undecodable input", i, insn_buf, &insn);
267 else if (verbose >= 2)
268 dump_insn(stdout, &insn);
269 insns++;
270 }
271
272 fprintf(stdout, "%s: decoded and checked %d %s instructions with %d errors (seed:0x%x)\n", (errors) ? "Failure" : "Success", insns, (input_file) ? "given" : "random", errors, seed);
273
274 return errors ? 1 : 0;
275}
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 1f928659c338..12eb07bfb267 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1215,8 +1215,6 @@ asmlinkage void __init xen_start_kernel(void)
1215 local_irq_disable(); 1215 local_irq_disable();
1216 early_boot_irqs_disabled = true; 1216 early_boot_irqs_disabled = true;
1217 1217
1218 memblock_init();
1219
1220 xen_raw_console_write("mapping kernel into physical memory\n"); 1218 xen_raw_console_write("mapping kernel into physical memory\n");
1221 pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages); 1219 pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages);
1222 xen_ident_map_ISA(); 1220 xen_ident_map_ISA();
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 87f6673b1207..f4bf8aa574f4 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1774,10 +1774,8 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
1774 __xen_write_cr3(true, __pa(pgd)); 1774 __xen_write_cr3(true, __pa(pgd));
1775 xen_mc_issue(PARAVIRT_LAZY_CPU); 1775 xen_mc_issue(PARAVIRT_LAZY_CPU);
1776 1776
1777 memblock_x86_reserve_range(__pa(xen_start_info->pt_base), 1777 memblock_reserve(__pa(xen_start_info->pt_base),
1778 __pa(xen_start_info->pt_base + 1778 xen_start_info->nr_pt_frames * PAGE_SIZE);
1779 xen_start_info->nr_pt_frames * PAGE_SIZE),
1780 "XEN PAGETABLES");
1781 1779
1782 return pgd; 1780 return pgd;
1783} 1781}
@@ -1853,10 +1851,8 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
1853 PFN_DOWN(__pa(initial_page_table))); 1851 PFN_DOWN(__pa(initial_page_table)));
1854 xen_write_cr3(__pa(initial_page_table)); 1852 xen_write_cr3(__pa(initial_page_table));
1855 1853
1856 memblock_x86_reserve_range(__pa(xen_start_info->pt_base), 1854 memblock_reserve(__pa(xen_start_info->pt_base),
1857 __pa(xen_start_info->pt_base + 1855 xen_start_info->nr_pt_frames * PAGE_SIZE));
1858 xen_start_info->nr_pt_frames * PAGE_SIZE),
1859 "XEN PAGETABLES");
1860 1856
1861 return initial_page_table; 1857 return initial_page_table;
1862} 1858}
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index b2c7179fa263..e03c63692176 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -75,7 +75,7 @@ static void __init xen_add_extra_mem(u64 start, u64 size)
75 if (i == XEN_EXTRA_MEM_MAX_REGIONS) 75 if (i == XEN_EXTRA_MEM_MAX_REGIONS)
76 printk(KERN_WARNING "Warning: not enough extra memory regions\n"); 76 printk(KERN_WARNING "Warning: not enough extra memory regions\n");
77 77
78 memblock_x86_reserve_range(start, start + size, "XEN EXTRA"); 78 memblock_reserve(start, size);
79 79
80 xen_max_p2m_pfn = PFN_DOWN(start + size); 80 xen_max_p2m_pfn = PFN_DOWN(start + size);
81 81
@@ -311,9 +311,8 @@ char * __init xen_memory_setup(void)
311 * - xen_start_info 311 * - xen_start_info
312 * See comment above "struct start_info" in <xen/interface/xen.h> 312 * See comment above "struct start_info" in <xen/interface/xen.h>
313 */ 313 */
314 memblock_x86_reserve_range(__pa(xen_start_info->mfn_list), 314 memblock_reserve(__pa(xen_start_info->mfn_list),
315 __pa(xen_start_info->pt_base), 315 xen_start_info->pt_base - xen_start_info->mfn_list);
316 "XEN START INFO");
317 316
318 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); 317 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
319 318