aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig6
-rw-r--r--arch/x86/Kconfig.cpu29
-rw-r--r--arch/x86/Makefile_32.cpu1
-rw-r--r--arch/x86/configs/i386_defconfig1
-rw-r--r--arch/x86/configs/x86_64_defconfig1
-rw-r--r--arch/x86/ia32/ia32entry.S26
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/alternative.c2
-rw-r--r--arch/x86/kernel/apic_32.c4
-rw-r--r--arch/x86/kernel/apic_64.c4
-rw-r--r--arch/x86/kernel/cpu/common.c45
-rw-r--r--arch/x86/kernel/doublefault_32.c2
-rw-r--r--arch/x86/kernel/dumpstack_32.c447
-rw-r--r--arch/x86/kernel/dumpstack_64.c573
-rw-r--r--arch/x86/kernel/entry_32.S22
-rw-r--r--arch/x86/kernel/entry_64.S15
-rw-r--r--arch/x86/kernel/es7000_32.c28
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c23
-rw-r--r--arch/x86/kernel/head.c1
-rw-r--r--arch/x86/kernel/hpet.c6
-rw-r--r--arch/x86/kernel/irqinit_64.c43
-rw-r--r--arch/x86/kernel/process_32.c4
-rw-r--r--arch/x86/kernel/process_64.c7
-rw-r--r--arch/x86/kernel/quirks.c41
-rw-r--r--arch/x86/kernel/setup.c10
-rw-r--r--arch/x86/kernel/smpboot.c87
-rw-r--r--arch/x86/kernel/time_32.c7
-rw-r--r--arch/x86/kernel/time_64.c23
-rw-r--r--arch/x86/kernel/traps.c (renamed from arch/x86/kernel/traps_32.c)893
-rw-r--r--arch/x86/kernel/traps_64.c1214
-rw-r--r--arch/x86/mach-generic/es7000.c20
-rw-r--r--arch/x86/mm/Makefile6
-rw-r--r--arch/x86/mm/fault.c5
-rw-r--r--arch/x86/mm/gup.c10
-rw-r--r--arch/x86/mm/init_32.c2
-rw-r--r--arch/x86/mm/init_64.c9
-rw-r--r--arch/x86/mm/ioremap.c143
-rw-r--r--arch/x86/mm/numa_32.c (renamed from arch/x86/mm/discontig_32.c)0
-rw-r--r--arch/x86/mm/srat_64.c2
-rw-r--r--arch/x86/oprofile/Makefile2
-rw-r--r--arch/x86/oprofile/nmi_int.c27
-rw-r--r--arch/x86/oprofile/op_model_amd.c543
-rw-r--r--arch/x86/oprofile/op_model_athlon.c190
-rw-r--r--arch/x86/oprofile/op_x86_model.h4
-rw-r--r--arch/x86/pci/fixup.c28
45 files changed, 2352 insertions, 2206 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index fc8351f374fd..f65c2744d573 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -18,6 +18,7 @@ config X86_64
18### Arch settings 18### Arch settings
19config X86 19config X86
20 def_bool y 20 def_bool y
21 select HAVE_AOUT if X86_32
21 select HAVE_UNSTABLE_SCHED_CLOCK 22 select HAVE_UNSTABLE_SCHED_CLOCK
22 select HAVE_IDE 23 select HAVE_IDE
23 select HAVE_OPROFILE 24 select HAVE_OPROFILE
@@ -152,9 +153,6 @@ config AUDIT_ARCH
152 bool 153 bool
153 default X86_64 154 default X86_64
154 155
155config ARCH_SUPPORTS_AOUT
156 def_bool y
157
158config ARCH_SUPPORTS_OPTIMIZED_INLINING 156config ARCH_SUPPORTS_OPTIMIZED_INLINING
159 def_bool y 157 def_bool y
160 158
@@ -1885,7 +1883,7 @@ config IA32_EMULATION
1885 1883
1886config IA32_AOUT 1884config IA32_AOUT
1887 tristate "IA32 a.out support" 1885 tristate "IA32 a.out support"
1888 depends on IA32_EMULATION && ARCH_SUPPORTS_AOUT 1886 depends on IA32_EMULATION
1889 help 1887 help
1890 Support old a.out binaries in the 32bit emulation. 1888 Support old a.out binaries in the 32bit emulation.
1891 1889
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index c5f101360520..0b7c4a3f0651 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -38,8 +38,7 @@ config M386
38 - "Crusoe" for the Transmeta Crusoe series. 38 - "Crusoe" for the Transmeta Crusoe series.
39 - "Efficeon" for the Transmeta Efficeon series. 39 - "Efficeon" for the Transmeta Efficeon series.
40 - "Winchip-C6" for original IDT Winchip. 40 - "Winchip-C6" for original IDT Winchip.
41 - "Winchip-2" for IDT Winchip 2. 41 - "Winchip-2" for IDT Winchips with 3dNow! capabilities.
42 - "Winchip-2A" for IDT Winchips with 3dNow! capabilities.
43 - "GeodeGX1" for Geode GX1 (Cyrix MediaGX). 42 - "GeodeGX1" for Geode GX1 (Cyrix MediaGX).
44 - "Geode GX/LX" For AMD Geode GX and LX processors. 43 - "Geode GX/LX" For AMD Geode GX and LX processors.
45 - "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3. 44 - "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3.
@@ -194,19 +193,11 @@ config MWINCHIPC6
194 treat this chip as a 586TSC with some extended instructions 193 treat this chip as a 586TSC with some extended instructions
195 and alignment requirements. 194 and alignment requirements.
196 195
197config MWINCHIP2
198 bool "Winchip-2"
199 depends on X86_32
200 help
201 Select this for an IDT Winchip-2. Linux and GCC
202 treat this chip as a 586TSC with some extended instructions
203 and alignment requirements.
204
205config MWINCHIP3D 196config MWINCHIP3D
206 bool "Winchip-2A/Winchip-3" 197 bool "Winchip-2/Winchip-2A/Winchip-3"
207 depends on X86_32 198 depends on X86_32
208 help 199 help
209 Select this for an IDT Winchip-2A or 3. Linux and GCC 200 Select this for an IDT Winchip-2, 2A or 3. Linux and GCC
210 treat this chip as a 586TSC with some extended instructions 201 treat this chip as a 586TSC with some extended instructions
211 and alignment requirements. Also enable out of order memory 202 and alignment requirements. Also enable out of order memory
212 stores for this CPU, which can increase performance of some 203 stores for this CPU, which can increase performance of some
@@ -318,7 +309,7 @@ config X86_L1_CACHE_SHIFT
318 int 309 int
319 default "7" if MPENTIUM4 || X86_GENERIC || GENERIC_CPU || MPSC 310 default "7" if MPENTIUM4 || X86_GENERIC || GENERIC_CPU || MPSC
320 default "4" if X86_ELAN || M486 || M386 || MGEODEGX1 311 default "4" if X86_ELAN || M486 || M386 || MGEODEGX1
321 default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX 312 default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
322 default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MVIAC7 313 default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MVIAC7
323 314
324config X86_XADD 315config X86_XADD
@@ -360,7 +351,7 @@ config X86_POPAD_OK
360 351
361config X86_ALIGNMENT_16 352config X86_ALIGNMENT_16
362 def_bool y 353 def_bool y
363 depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1 354 depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1
364 355
365config X86_INTEL_USERCOPY 356config X86_INTEL_USERCOPY
366 def_bool y 357 def_bool y
@@ -368,7 +359,7 @@ config X86_INTEL_USERCOPY
368 359
369config X86_USE_PPRO_CHECKSUM 360config X86_USE_PPRO_CHECKSUM
370 def_bool y 361 def_bool y
371 depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON || MGEODE_LX || MCORE2 362 depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON || MGEODE_LX || MCORE2
372 363
373config X86_USE_3DNOW 364config X86_USE_3DNOW
374 def_bool y 365 def_bool y
@@ -376,7 +367,7 @@ config X86_USE_3DNOW
376 367
377config X86_OOSTORE 368config X86_OOSTORE
378 def_bool y 369 def_bool y
379 depends on (MWINCHIP3D || MWINCHIP2 || MWINCHIPC6) && MTRR 370 depends on (MWINCHIP3D || MWINCHIPC6) && MTRR
380 371
381# 372#
382# P6_NOPs are a relatively minor optimization that require a family >= 373# P6_NOPs are a relatively minor optimization that require a family >=
@@ -396,7 +387,7 @@ config X86_P6_NOP
396 387
397config X86_TSC 388config X86_TSC
398 def_bool y 389 def_bool y
399 depends on ((MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ) || X86_64 390 depends on ((MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ) || X86_64
400 391
401config X86_CMPXCHG64 392config X86_CMPXCHG64
402 def_bool y 393 def_bool y
@@ -406,7 +397,7 @@ config X86_CMPXCHG64
406# generates cmov. 397# generates cmov.
407config X86_CMOV 398config X86_CMOV
408 def_bool y 399 def_bool y
409 depends on (MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || X86_64) 400 depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64)
410 401
411config X86_MINIMUM_CPU_FAMILY 402config X86_MINIMUM_CPU_FAMILY
412 int 403 int
@@ -417,7 +408,7 @@ config X86_MINIMUM_CPU_FAMILY
417 408
418config X86_DEBUGCTLMSR 409config X86_DEBUGCTLMSR
419 def_bool y 410 def_bool y
420 depends on !(MK6 || MWINCHIPC6 || MWINCHIP2 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486 || M386) 411 depends on !(MK6 || MWINCHIPC6 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486 || M386)
421 412
422menuconfig PROCESSOR_SELECT 413menuconfig PROCESSOR_SELECT
423 bool "Supported processor vendors" if EMBEDDED 414 bool "Supported processor vendors" if EMBEDDED
diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu
index b72b4f753113..80177ec052f0 100644
--- a/arch/x86/Makefile_32.cpu
+++ b/arch/x86/Makefile_32.cpu
@@ -28,7 +28,6 @@ cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8,-march=athlon)
28cflags-$(CONFIG_MCRUSOE) += -march=i686 $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 28cflags-$(CONFIG_MCRUSOE) += -march=i686 $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
29cflags-$(CONFIG_MEFFICEON) += -march=i686 $(call tune,pentium3) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 29cflags-$(CONFIG_MEFFICEON) += -march=i686 $(call tune,pentium3) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
30cflags-$(CONFIG_MWINCHIPC6) += $(call cc-option,-march=winchip-c6,-march=i586) 30cflags-$(CONFIG_MWINCHIPC6) += $(call cc-option,-march=winchip-c6,-march=i586)
31cflags-$(CONFIG_MWINCHIP2) += $(call cc-option,-march=winchip2,-march=i586)
32cflags-$(CONFIG_MWINCHIP3D) += $(call cc-option,-march=winchip2,-march=i586) 31cflags-$(CONFIG_MWINCHIP3D) += $(call cc-option,-march=winchip2,-march=i586)
33cflags-$(CONFIG_MCYRIXIII) += $(call cc-option,-march=c3,-march=i486) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 32cflags-$(CONFIG_MCYRIXIII) += $(call cc-option,-march=c3,-march=i486) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
34cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686) 33cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686)
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index ca226ca31288..52d0359719d7 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -213,7 +213,6 @@ CONFIG_M686=y
213# CONFIG_MCRUSOE is not set 213# CONFIG_MCRUSOE is not set
214# CONFIG_MEFFICEON is not set 214# CONFIG_MEFFICEON is not set
215# CONFIG_MWINCHIPC6 is not set 215# CONFIG_MWINCHIPC6 is not set
216# CONFIG_MWINCHIP2 is not set
217# CONFIG_MWINCHIP3D is not set 216# CONFIG_MWINCHIP3D is not set
218# CONFIG_MGEODEGX1 is not set 217# CONFIG_MGEODEGX1 is not set
219# CONFIG_MGEODE_LX is not set 218# CONFIG_MGEODE_LX is not set
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 2c4b1c771e28..f0a03d7a7d63 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -210,7 +210,6 @@ CONFIG_X86_PC=y
210# CONFIG_MCRUSOE is not set 210# CONFIG_MCRUSOE is not set
211# CONFIG_MEFFICEON is not set 211# CONFIG_MEFFICEON is not set
212# CONFIG_MWINCHIPC6 is not set 212# CONFIG_MWINCHIPC6 is not set
213# CONFIG_MWINCHIP2 is not set
214# CONFIG_MWINCHIP3D is not set 213# CONFIG_MWINCHIP3D is not set
215# CONFIG_MGEODEGX1 is not set 214# CONFIG_MGEODEGX1 is not set
216# CONFIG_MGEODE_LX is not set 215# CONFIG_MGEODE_LX is not set
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index ffc1bb4fed7d..eb4314768bf7 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -39,11 +39,11 @@
39 .endm 39 .endm
40 40
41 /* clobbers %eax */ 41 /* clobbers %eax */
42 .macro CLEAR_RREGS 42 .macro CLEAR_RREGS _r9=rax
43 xorl %eax,%eax 43 xorl %eax,%eax
44 movq %rax,R11(%rsp) 44 movq %rax,R11(%rsp)
45 movq %rax,R10(%rsp) 45 movq %rax,R10(%rsp)
46 movq %rax,R9(%rsp) 46 movq %\_r9,R9(%rsp)
47 movq %rax,R8(%rsp) 47 movq %rax,R8(%rsp)
48 .endm 48 .endm
49 49
@@ -52,11 +52,10 @@
52 * We don't reload %eax because syscall_trace_enter() returned 52 * We don't reload %eax because syscall_trace_enter() returned
53 * the value it wants us to use in the table lookup. 53 * the value it wants us to use in the table lookup.
54 */ 54 */
55 .macro LOAD_ARGS32 offset 55 .macro LOAD_ARGS32 offset, _r9=0
56 movl \offset(%rsp),%r11d 56 .if \_r9
57 movl \offset+8(%rsp),%r10d
58 movl \offset+16(%rsp),%r9d 57 movl \offset+16(%rsp),%r9d
59 movl \offset+24(%rsp),%r8d 58 .endif
60 movl \offset+40(%rsp),%ecx 59 movl \offset+40(%rsp),%ecx
61 movl \offset+48(%rsp),%edx 60 movl \offset+48(%rsp),%edx
62 movl \offset+56(%rsp),%esi 61 movl \offset+56(%rsp),%esi
@@ -145,7 +144,7 @@ ENTRY(ia32_sysenter_target)
145 SAVE_ARGS 0,0,1 144 SAVE_ARGS 0,0,1
146 /* no need to do an access_ok check here because rbp has been 145 /* no need to do an access_ok check here because rbp has been
147 32bit zero extended */ 146 32bit zero extended */
1481: movl (%rbp),%r9d 1471: movl (%rbp),%ebp
149 .section __ex_table,"a" 148 .section __ex_table,"a"
150 .quad 1b,ia32_badarg 149 .quad 1b,ia32_badarg
151 .previous 150 .previous
@@ -157,7 +156,7 @@ ENTRY(ia32_sysenter_target)
157 cmpl $(IA32_NR_syscalls-1),%eax 156 cmpl $(IA32_NR_syscalls-1),%eax
158 ja ia32_badsys 157 ja ia32_badsys
159sysenter_do_call: 158sysenter_do_call:
160 IA32_ARG_FIXUP 1 159 IA32_ARG_FIXUP
161sysenter_dispatch: 160sysenter_dispatch:
162 call *ia32_sys_call_table(,%rax,8) 161 call *ia32_sys_call_table(,%rax,8)
163 movq %rax,RAX-ARGOFFSET(%rsp) 162 movq %rax,RAX-ARGOFFSET(%rsp)
@@ -234,20 +233,17 @@ sysexit_audit:
234#endif 233#endif
235 234
236sysenter_tracesys: 235sysenter_tracesys:
237 xchgl %r9d,%ebp
238#ifdef CONFIG_AUDITSYSCALL 236#ifdef CONFIG_AUDITSYSCALL
239 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) 237 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10)
240 jz sysenter_auditsys 238 jz sysenter_auditsys
241#endif 239#endif
242 SAVE_REST 240 SAVE_REST
243 CLEAR_RREGS 241 CLEAR_RREGS
244 movq %r9,R9(%rsp)
245 movq $-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */ 242 movq $-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */
246 movq %rsp,%rdi /* &pt_regs -> arg1 */ 243 movq %rsp,%rdi /* &pt_regs -> arg1 */
247 call syscall_trace_enter 244 call syscall_trace_enter
248 LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ 245 LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
249 RESTORE_REST 246 RESTORE_REST
250 xchgl %ebp,%r9d
251 cmpl $(IA32_NR_syscalls-1),%eax 247 cmpl $(IA32_NR_syscalls-1),%eax
252 ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */ 248 ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */
253 jmp sysenter_do_call 249 jmp sysenter_do_call
@@ -314,9 +310,9 @@ ENTRY(ia32_cstar_target)
314 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) 310 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
315 CFI_REMEMBER_STATE 311 CFI_REMEMBER_STATE
316 jnz cstar_tracesys 312 jnz cstar_tracesys
317cstar_do_call:
318 cmpl $IA32_NR_syscalls-1,%eax 313 cmpl $IA32_NR_syscalls-1,%eax
319 ja ia32_badsys 314 ja ia32_badsys
315cstar_do_call:
320 IA32_ARG_FIXUP 1 316 IA32_ARG_FIXUP 1
321cstar_dispatch: 317cstar_dispatch:
322 call *ia32_sys_call_table(,%rax,8) 318 call *ia32_sys_call_table(,%rax,8)
@@ -357,15 +353,13 @@ cstar_tracesys:
357#endif 353#endif
358 xchgl %r9d,%ebp 354 xchgl %r9d,%ebp
359 SAVE_REST 355 SAVE_REST
360 CLEAR_RREGS 356 CLEAR_RREGS r9
361 movq %r9,R9(%rsp)
362 movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ 357 movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
363 movq %rsp,%rdi /* &pt_regs -> arg1 */ 358 movq %rsp,%rdi /* &pt_regs -> arg1 */
364 call syscall_trace_enter 359 call syscall_trace_enter
365 LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ 360 LOAD_ARGS32 ARGOFFSET, 1 /* reload args from stack in case ptrace changed it */
366 RESTORE_REST 361 RESTORE_REST
367 xchgl %ebp,%r9d 362 xchgl %ebp,%r9d
368 movl RSP-ARGOFFSET(%rsp), %r8d
369 cmpl $(IA32_NR_syscalls-1),%eax 363 cmpl $(IA32_NR_syscalls-1),%eax
370 ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */ 364 ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */
371 jmp cstar_do_call 365 jmp cstar_do_call
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 5098585f87ce..0d41f0343dc0 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -23,7 +23,7 @@ CFLAGS_hpet.o := $(nostackp)
23CFLAGS_tsc.o := $(nostackp) 23CFLAGS_tsc.o := $(nostackp)
24 24
25obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o 25obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o
26obj-y += traps_$(BITS).o irq_$(BITS).o 26obj-y += traps.o irq_$(BITS).o dumpstack_$(BITS).o
27obj-y += time_$(BITS).o ioport.o ldt.o 27obj-y += time_$(BITS).o ioport.o ldt.o
28obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o 28obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o
29obj-$(CONFIG_X86_VISWS) += visws_quirks.o 29obj-$(CONFIG_X86_VISWS) += visws_quirks.o
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index fb04e49776ba..a84ac7b570e6 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -444,7 +444,7 @@ void __init alternative_instructions(void)
444 _text, _etext); 444 _text, _etext);
445 445
446 /* Only switch to UP mode if we don't immediately boot others */ 446 /* Only switch to UP mode if we don't immediately boot others */
447 if (num_possible_cpus() == 1 || setup_max_cpus <= 1) 447 if (num_present_cpus() == 1 || setup_max_cpus <= 1)
448 alternatives_smp_switch(0); 448 alternatives_smp_switch(0);
449 } 449 }
450#endif 450#endif
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index a91c57cb666a..21c831d96af3 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -295,6 +295,9 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
295 * 295 *
296 * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and 296 * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and
297 * MCE interrupts are supported. Thus MCE offset must be set to 0. 297 * MCE interrupts are supported. Thus MCE offset must be set to 0.
298 *
299 * If mask=1, the LVT entry does not generate interrupts while mask=0
300 * enables the vector. See also the BKDGs.
298 */ 301 */
299 302
300#define APIC_EILVT_LVTOFF_MCE 0 303#define APIC_EILVT_LVTOFF_MCE 0
@@ -319,6 +322,7 @@ u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask)
319 setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); 322 setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask);
320 return APIC_EILVT_LVTOFF_IBS; 323 return APIC_EILVT_LVTOFF_IBS;
321} 324}
325EXPORT_SYMBOL_GPL(setup_APIC_eilvt_ibs);
322 326
323/* 327/*
324 * Program the next event, relative to now 328 * Program the next event, relative to now
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 53898b65a6ae..94ddb69ae15e 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -307,6 +307,9 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
307 * 307 *
308 * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and 308 * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and
309 * MCE interrupts are supported. Thus MCE offset must be set to 0. 309 * MCE interrupts are supported. Thus MCE offset must be set to 0.
310 *
311 * If mask=1, the LVT entry does not generate interrupts while mask=0
312 * enables the vector. See also the BKDGs.
310 */ 313 */
311 314
312#define APIC_EILVT_LVTOFF_MCE 0 315#define APIC_EILVT_LVTOFF_MCE 0
@@ -331,6 +334,7 @@ u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask)
331 setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); 334 setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask);
332 return APIC_EILVT_LVTOFF_IBS; 335 return APIC_EILVT_LVTOFF_IBS;
333} 336}
337EXPORT_SYMBOL_GPL(setup_APIC_eilvt_ibs);
334 338
335/* 339/*
336 * Program the next event, relative to now 340 * Program the next event, relative to now
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index fb789dd9e691..25581dcb280e 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -124,18 +124,25 @@ static inline int flag_is_changeable_p(u32 flag)
124{ 124{
125 u32 f1, f2; 125 u32 f1, f2;
126 126
127 asm("pushfl\n\t" 127 /*
128 "pushfl\n\t" 128 * Cyrix and IDT cpus allow disabling of CPUID
129 "popl %0\n\t" 129 * so the code below may return different results
130 "movl %0,%1\n\t" 130 * when it is executed before and after enabling
131 "xorl %2,%0\n\t" 131 * the CPUID. Add "volatile" to not allow gcc to
132 "pushl %0\n\t" 132 * optimize the subsequent calls to this function.
133 "popfl\n\t" 133 */
134 "pushfl\n\t" 134 asm volatile ("pushfl\n\t"
135 "popl %0\n\t" 135 "pushfl\n\t"
136 "popfl\n\t" 136 "popl %0\n\t"
137 : "=&r" (f1), "=&r" (f2) 137 "movl %0,%1\n\t"
138 : "ir" (flag)); 138 "xorl %2,%0\n\t"
139 "pushl %0\n\t"
140 "popfl\n\t"
141 "pushfl\n\t"
142 "popl %0\n\t"
143 "popfl\n\t"
144 : "=&r" (f1), "=&r" (f2)
145 : "ir" (flag));
139 146
140 return ((f1^f2) & flag) != 0; 147 return ((f1^f2) & flag) != 0;
141} 148}
@@ -719,12 +726,24 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
719#endif 726#endif
720} 727}
721 728
729#ifdef CONFIG_X86_64
730static void vgetcpu_set_mode(void)
731{
732 if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP))
733 vgetcpu_mode = VGETCPU_RDTSCP;
734 else
735 vgetcpu_mode = VGETCPU_LSL;
736}
737#endif
738
722void __init identify_boot_cpu(void) 739void __init identify_boot_cpu(void)
723{ 740{
724 identify_cpu(&boot_cpu_data); 741 identify_cpu(&boot_cpu_data);
725#ifdef CONFIG_X86_32 742#ifdef CONFIG_X86_32
726 sysenter_setup(); 743 sysenter_setup();
727 enable_sep_cpu(); 744 enable_sep_cpu();
745#else
746 vgetcpu_set_mode();
728#endif 747#endif
729} 748}
730 749
@@ -797,7 +816,7 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
797 else if (c->cpuid_level >= 0) 816 else if (c->cpuid_level >= 0)
798 vendor = c->x86_vendor_id; 817 vendor = c->x86_vendor_id;
799 818
800 if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor))) 819 if (vendor && !strstr(c->x86_model_id, vendor))
801 printk(KERN_CONT "%s ", vendor); 820 printk(KERN_CONT "%s ", vendor);
802 821
803 if (c->x86_model_id[0]) 822 if (c->x86_model_id[0])
diff --git a/arch/x86/kernel/doublefault_32.c b/arch/x86/kernel/doublefault_32.c
index 395acb12b0d1..b4f14c6c09d9 100644
--- a/arch/x86/kernel/doublefault_32.c
+++ b/arch/x86/kernel/doublefault_32.c
@@ -66,6 +66,6 @@ struct tss_struct doublefault_tss __cacheline_aligned = {
66 .ds = __USER_DS, 66 .ds = __USER_DS,
67 .fs = __KERNEL_PERCPU, 67 .fs = __KERNEL_PERCPU,
68 68
69 .__cr3 = __phys_addr_const((unsigned long)swapper_pg_dir) 69 .__cr3 = __pa_nodebug(swapper_pg_dir),
70 } 70 }
71}; 71};
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
new file mode 100644
index 000000000000..201ee359a1a9
--- /dev/null
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -0,0 +1,447 @@
1/*
2 * Copyright (C) 1991, 1992 Linus Torvalds
3 * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
4 */
5#include <linux/kallsyms.h>
6#include <linux/kprobes.h>
7#include <linux/uaccess.h>
8#include <linux/utsname.h>
9#include <linux/hardirq.h>
10#include <linux/kdebug.h>
11#include <linux/module.h>
12#include <linux/ptrace.h>
13#include <linux/kexec.h>
14#include <linux/bug.h>
15#include <linux/nmi.h>
16
17#include <asm/stacktrace.h>
18
19#define STACKSLOTS_PER_LINE 8
20#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :)
21
22int panic_on_unrecovered_nmi;
23int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
24static unsigned int code_bytes = 64;
25static int die_counter;
26
27void printk_address(unsigned long address, int reliable)
28{
29 printk(" [<%p>] %s%pS\n", (void *) address,
30 reliable ? "" : "? ", (void *) address);
31}
32
33static inline int valid_stack_ptr(struct thread_info *tinfo,
34 void *p, unsigned int size, void *end)
35{
36 void *t = tinfo;
37 if (end) {
38 if (p < end && p >= (end-THREAD_SIZE))
39 return 1;
40 else
41 return 0;
42 }
43 return p > t && p < t + THREAD_SIZE - size;
44}
45
46/* The form of the top of the frame on the stack */
47struct stack_frame {
48 struct stack_frame *next_frame;
49 unsigned long return_address;
50};
51
52static inline unsigned long
53print_context_stack(struct thread_info *tinfo,
54 unsigned long *stack, unsigned long bp,
55 const struct stacktrace_ops *ops, void *data,
56 unsigned long *end)
57{
58 struct stack_frame *frame = (struct stack_frame *)bp;
59
60 while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) {
61 unsigned long addr;
62
63 addr = *stack;
64 if (__kernel_text_address(addr)) {
65 if ((unsigned long) stack == bp + sizeof(long)) {
66 ops->address(data, addr, 1);
67 frame = frame->next_frame;
68 bp = (unsigned long) frame;
69 } else {
70 ops->address(data, addr, bp == 0);
71 }
72 }
73 stack++;
74 }
75 return bp;
76}
77
78void dump_trace(struct task_struct *task, struct pt_regs *regs,
79 unsigned long *stack, unsigned long bp,
80 const struct stacktrace_ops *ops, void *data)
81{
82 if (!task)
83 task = current;
84
85 if (!stack) {
86 unsigned long dummy;
87 stack = &dummy;
88 if (task && task != current)
89 stack = (unsigned long *)task->thread.sp;
90 }
91
92#ifdef CONFIG_FRAME_POINTER
93 if (!bp) {
94 if (task == current) {
95 /* Grab bp right from our regs */
96 get_bp(bp);
97 } else {
98 /* bp is the last reg pushed by switch_to */
99 bp = *(unsigned long *) task->thread.sp;
100 }
101 }
102#endif
103
104 for (;;) {
105 struct thread_info *context;
106
107 context = (struct thread_info *)
108 ((unsigned long)stack & (~(THREAD_SIZE - 1)));
109 bp = print_context_stack(context, stack, bp, ops, data, NULL);
110
111 stack = (unsigned long *)context->previous_esp;
112 if (!stack)
113 break;
114 if (ops->stack(data, "IRQ") < 0)
115 break;
116 touch_nmi_watchdog();
117 }
118}
119EXPORT_SYMBOL(dump_trace);
120
121static void
122print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
123{
124 printk(data);
125 print_symbol(msg, symbol);
126 printk("\n");
127}
128
129static void print_trace_warning(void *data, char *msg)
130{
131 printk("%s%s\n", (char *)data, msg);
132}
133
134static int print_trace_stack(void *data, char *name)
135{
136 printk("%s <%s> ", (char *)data, name);
137 return 0;
138}
139
140/*
141 * Print one address/symbol entries per line.
142 */
143static void print_trace_address(void *data, unsigned long addr, int reliable)
144{
145 touch_nmi_watchdog();
146 printk(data);
147 printk_address(addr, reliable);
148}
149
150static const struct stacktrace_ops print_trace_ops = {
151 .warning = print_trace_warning,
152 .warning_symbol = print_trace_warning_symbol,
153 .stack = print_trace_stack,
154 .address = print_trace_address,
155};
156
157static void
158show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
159 unsigned long *stack, unsigned long bp, char *log_lvl)
160{
161 printk("%sCall Trace:\n", log_lvl);
162 dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
163}
164
165void show_trace(struct task_struct *task, struct pt_regs *regs,
166 unsigned long *stack, unsigned long bp)
167{
168 show_trace_log_lvl(task, regs, stack, bp, "");
169}
170
171static void
172show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
173 unsigned long *sp, unsigned long bp, char *log_lvl)
174{
175 unsigned long *stack;
176 int i;
177
178 if (sp == NULL) {
179 if (task)
180 sp = (unsigned long *)task->thread.sp;
181 else
182 sp = (unsigned long *)&sp;
183 }
184
185 stack = sp;
186 for (i = 0; i < kstack_depth_to_print; i++) {
187 if (kstack_end(stack))
188 break;
189 if (i && ((i % STACKSLOTS_PER_LINE) == 0))
190 printk("\n%s", log_lvl);
191 printk(" %08lx", *stack++);
192 touch_nmi_watchdog();
193 }
194 printk("\n");
195 show_trace_log_lvl(task, regs, sp, bp, log_lvl);
196}
197
198void show_stack(struct task_struct *task, unsigned long *sp)
199{
200 show_stack_log_lvl(task, NULL, sp, 0, "");
201}
202
203/*
204 * The architecture-independent dump_stack generator
205 */
206void dump_stack(void)
207{
208 unsigned long bp = 0;
209 unsigned long stack;
210
211#ifdef CONFIG_FRAME_POINTER
212 if (!bp)
213 get_bp(bp);
214#endif
215
216 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
217 current->pid, current->comm, print_tainted(),
218 init_utsname()->release,
219 (int)strcspn(init_utsname()->version, " "),
220 init_utsname()->version);
221 show_trace(NULL, NULL, &stack, bp);
222}
223
224EXPORT_SYMBOL(dump_stack);
225
226void show_registers(struct pt_regs *regs)
227{
228 int i;
229
230 print_modules();
231 __show_regs(regs, 0);
232
233 printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n",
234 TASK_COMM_LEN, current->comm, task_pid_nr(current),
235 current_thread_info(), current, task_thread_info(current));
236 /*
237 * When in-kernel, we also print out the stack and code at the
238 * time of the fault..
239 */
240 if (!user_mode_vm(regs)) {
241 unsigned int code_prologue = code_bytes * 43 / 64;
242 unsigned int code_len = code_bytes;
243 unsigned char c;
244 u8 *ip;
245
246 printk(KERN_EMERG "Stack:\n");
247 show_stack_log_lvl(NULL, regs, &regs->sp,
248 0, KERN_EMERG);
249
250 printk(KERN_EMERG "Code: ");
251
252 ip = (u8 *)regs->ip - code_prologue;
253 if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) {
254 /* try starting at IP */
255 ip = (u8 *)regs->ip;
256 code_len = code_len - code_prologue + 1;
257 }
258 for (i = 0; i < code_len; i++, ip++) {
259 if (ip < (u8 *)PAGE_OFFSET ||
260 probe_kernel_address(ip, c)) {
261 printk(" Bad EIP value.");
262 break;
263 }
264 if (ip == (u8 *)regs->ip)
265 printk("<%02x> ", c);
266 else
267 printk("%02x ", c);
268 }
269 }
270 printk("\n");
271}
272
273int is_valid_bugaddr(unsigned long ip)
274{
275 unsigned short ud2;
276
277 if (ip < PAGE_OFFSET)
278 return 0;
279 if (probe_kernel_address((unsigned short *)ip, ud2))
280 return 0;
281
282 return ud2 == 0x0b0f;
283}
284
285static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
286static int die_owner = -1;
287static unsigned int die_nest_count;
288
289unsigned __kprobes long oops_begin(void)
290{
291 unsigned long flags;
292
293 oops_enter();
294
295 if (die_owner != raw_smp_processor_id()) {
296 console_verbose();
297 raw_local_irq_save(flags);
298 __raw_spin_lock(&die_lock);
299 die_owner = smp_processor_id();
300 die_nest_count = 0;
301 bust_spinlocks(1);
302 } else {
303 raw_local_irq_save(flags);
304 }
305 die_nest_count++;
306 return flags;
307}
308
309void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
310{
311 bust_spinlocks(0);
312 die_owner = -1;
313 add_taint(TAINT_DIE);
314 __raw_spin_unlock(&die_lock);
315 raw_local_irq_restore(flags);
316
317 if (!regs)
318 return;
319
320 if (kexec_should_crash(current))
321 crash_kexec(regs);
322 if (in_interrupt())
323 panic("Fatal exception in interrupt");
324 if (panic_on_oops)
325 panic("Fatal exception");
326 oops_exit();
327 do_exit(signr);
328}
329
330int __kprobes __die(const char *str, struct pt_regs *regs, long err)
331{
332 unsigned short ss;
333 unsigned long sp;
334
335 printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
336#ifdef CONFIG_PREEMPT
337 printk("PREEMPT ");
338#endif
339#ifdef CONFIG_SMP
340 printk("SMP ");
341#endif
342#ifdef CONFIG_DEBUG_PAGEALLOC
343 printk("DEBUG_PAGEALLOC");
344#endif
345 printk("\n");
346 if (notify_die(DIE_OOPS, str, regs, err,
347 current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
348 return 1;
349
350 show_registers(regs);
351 /* Executive summary in case the oops scrolled away */
352 sp = (unsigned long) (&regs->sp);
353 savesegment(ss, ss);
354 if (user_mode(regs)) {
355 sp = regs->sp;
356 ss = regs->ss & 0xffff;
357 }
358 printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
359 print_symbol("%s", regs->ip);
360 printk(" SS:ESP %04x:%08lx\n", ss, sp);
361 return 0;
362}
363
364/*
365 * This is gone through when something in the kernel has done something bad
366 * and is about to be terminated:
367 */
368void die(const char *str, struct pt_regs *regs, long err)
369{
370 unsigned long flags = oops_begin();
371
372 if (die_nest_count < 3) {
373 report_bug(regs->ip, regs);
374
375 if (__die(str, regs, err))
376 regs = NULL;
377 } else {
378 printk(KERN_EMERG "Recursive die() failure, output suppressed\n");
379 }
380
381 oops_end(flags, regs, SIGSEGV);
382}
383
384static DEFINE_SPINLOCK(nmi_print_lock);
385
386void notrace __kprobes
387die_nmi(char *str, struct pt_regs *regs, int do_panic)
388{
389 if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
390 return;
391
392 spin_lock(&nmi_print_lock);
393 /*
394 * We are in trouble anyway, lets at least try
395 * to get a message out:
396 */
397 bust_spinlocks(1);
398 printk(KERN_EMERG "%s", str);
399 printk(" on CPU%d, ip %08lx, registers:\n",
400 smp_processor_id(), regs->ip);
401 show_registers(regs);
402 if (do_panic)
403 panic("Non maskable interrupt");
404 console_silent();
405 spin_unlock(&nmi_print_lock);
406 bust_spinlocks(0);
407
408 /*
409 * If we are in kernel we are probably nested up pretty bad
410 * and might aswell get out now while we still can:
411 */
412 if (!user_mode_vm(regs)) {
413 current->thread.trap_no = 2;
414 crash_kexec(regs);
415 }
416
417 do_exit(SIGSEGV);
418}
419
420static int __init oops_setup(char *s)
421{
422 if (!s)
423 return -EINVAL;
424 if (!strcmp(s, "panic"))
425 panic_on_oops = 1;
426 return 0;
427}
428early_param("oops", oops_setup);
429
430static int __init kstack_setup(char *s)
431{
432 if (!s)
433 return -EINVAL;
434 kstack_depth_to_print = simple_strtoul(s, NULL, 0);
435 return 0;
436}
437early_param("kstack", kstack_setup);
438
439static int __init code_bytes_setup(char *s)
440{
441 code_bytes = simple_strtoul(s, NULL, 0);
442 if (code_bytes > 8192)
443 code_bytes = 8192;
444
445 return 1;
446}
447__setup("code_bytes=", code_bytes_setup);
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
new file mode 100644
index 000000000000..086cc8118e39
--- /dev/null
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -0,0 +1,573 @@
1/*
2 * Copyright (C) 1991, 1992 Linus Torvalds
3 * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
4 */
5#include <linux/kallsyms.h>
6#include <linux/kprobes.h>
7#include <linux/uaccess.h>
8#include <linux/utsname.h>
9#include <linux/hardirq.h>
10#include <linux/kdebug.h>
11#include <linux/module.h>
12#include <linux/ptrace.h>
13#include <linux/kexec.h>
14#include <linux/bug.h>
15#include <linux/nmi.h>
16
17#include <asm/stacktrace.h>
18
19#define STACKSLOTS_PER_LINE 4
20#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
21
22int panic_on_unrecovered_nmi;
23int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
24static unsigned int code_bytes = 64;
25static int die_counter;
26
27void printk_address(unsigned long address, int reliable)
28{
29 printk(" [<%p>] %s%pS\n", (void *) address,
30 reliable ? "" : "? ", (void *) address);
31}
32
33static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
34 unsigned *usedp, char **idp)
35{
36 static char ids[][8] = {
37 [DEBUG_STACK - 1] = "#DB",
38 [NMI_STACK - 1] = "NMI",
39 [DOUBLEFAULT_STACK - 1] = "#DF",
40 [STACKFAULT_STACK - 1] = "#SS",
41 [MCE_STACK - 1] = "#MC",
42#if DEBUG_STKSZ > EXCEPTION_STKSZ
43 [N_EXCEPTION_STACKS ...
44 N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]"
45#endif
46 };
47 unsigned k;
48
49 /*
50 * Iterate over all exception stacks, and figure out whether
51 * 'stack' is in one of them:
52 */
53 for (k = 0; k < N_EXCEPTION_STACKS; k++) {
54 unsigned long end = per_cpu(orig_ist, cpu).ist[k];
55 /*
56 * Is 'stack' above this exception frame's end?
57 * If yes then skip to the next frame.
58 */
59 if (stack >= end)
60 continue;
61 /*
62 * Is 'stack' above this exception frame's start address?
63 * If yes then we found the right frame.
64 */
65 if (stack >= end - EXCEPTION_STKSZ) {
66 /*
67 * Make sure we only iterate through an exception
68 * stack once. If it comes up for the second time
69 * then there's something wrong going on - just
70 * break out and return NULL:
71 */
72 if (*usedp & (1U << k))
73 break;
74 *usedp |= 1U << k;
75 *idp = ids[k];
76 return (unsigned long *)end;
77 }
78 /*
79 * If this is a debug stack, and if it has a larger size than
80 * the usual exception stacks, then 'stack' might still
81 * be within the lower portion of the debug stack:
82 */
83#if DEBUG_STKSZ > EXCEPTION_STKSZ
84 if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) {
85 unsigned j = N_EXCEPTION_STACKS - 1;
86
87 /*
88 * Black magic. A large debug stack is composed of
89 * multiple exception stack entries, which we
90 * iterate through now. Dont look:
91 */
92 do {
93 ++j;
94 end -= EXCEPTION_STKSZ;
95 ids[j][4] = '1' + (j - N_EXCEPTION_STACKS);
96 } while (stack < end - EXCEPTION_STKSZ);
97 if (*usedp & (1U << j))
98 break;
99 *usedp |= 1U << j;
100 *idp = ids[j];
101 return (unsigned long *)end;
102 }
103#endif
104 }
105 return NULL;
106}
107
108/*
109 * x86-64 can have up to three kernel stacks:
110 * process stack
111 * interrupt stack
112 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
113 */
114
115static inline int valid_stack_ptr(struct thread_info *tinfo,
116 void *p, unsigned int size, void *end)
117{
118 void *t = tinfo;
119 if (end) {
120 if (p < end && p >= (end-THREAD_SIZE))
121 return 1;
122 else
123 return 0;
124 }
125 return p > t && p < t + THREAD_SIZE - size;
126}
127
128/* The form of the top of the frame on the stack */
129struct stack_frame {
130 struct stack_frame *next_frame;
131 unsigned long return_address;
132};
133
134static inline unsigned long
135print_context_stack(struct thread_info *tinfo,
136 unsigned long *stack, unsigned long bp,
137 const struct stacktrace_ops *ops, void *data,
138 unsigned long *end)
139{
140 struct stack_frame *frame = (struct stack_frame *)bp;
141
142 while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) {
143 unsigned long addr;
144
145 addr = *stack;
146 if (__kernel_text_address(addr)) {
147 if ((unsigned long) stack == bp + sizeof(long)) {
148 ops->address(data, addr, 1);
149 frame = frame->next_frame;
150 bp = (unsigned long) frame;
151 } else {
152 ops->address(data, addr, bp == 0);
153 }
154 }
155 stack++;
156 }
157 return bp;
158}
159
160void dump_trace(struct task_struct *task, struct pt_regs *regs,
161 unsigned long *stack, unsigned long bp,
162 const struct stacktrace_ops *ops, void *data)
163{
164 const unsigned cpu = get_cpu();
165 unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
166 unsigned used = 0;
167 struct thread_info *tinfo;
168
169 if (!task)
170 task = current;
171
172 if (!stack) {
173 unsigned long dummy;
174 stack = &dummy;
175 if (task && task != current)
176 stack = (unsigned long *)task->thread.sp;
177 }
178
179#ifdef CONFIG_FRAME_POINTER
180 if (!bp) {
181 if (task == current) {
182 /* Grab bp right from our regs */
183 get_bp(bp);
184 } else {
185 /* bp is the last reg pushed by switch_to */
186 bp = *(unsigned long *) task->thread.sp;
187 }
188 }
189#endif
190
191 /*
192 * Print function call entries in all stacks, starting at the
193 * current stack address. If the stacks consist of nested
194 * exceptions
195 */
196 tinfo = task_thread_info(task);
197 for (;;) {
198 char *id;
199 unsigned long *estack_end;
200 estack_end = in_exception_stack(cpu, (unsigned long)stack,
201 &used, &id);
202
203 if (estack_end) {
204 if (ops->stack(data, id) < 0)
205 break;
206
207 bp = print_context_stack(tinfo, stack, bp, ops,
208 data, estack_end);
209 ops->stack(data, "<EOE>");
210 /*
211 * We link to the next stack via the
212 * second-to-last pointer (index -2 to end) in the
213 * exception stack:
214 */
215 stack = (unsigned long *) estack_end[-2];
216 continue;
217 }
218 if (irqstack_end) {
219 unsigned long *irqstack;
220 irqstack = irqstack_end -
221 (IRQSTACKSIZE - 64) / sizeof(*irqstack);
222
223 if (stack >= irqstack && stack < irqstack_end) {
224 if (ops->stack(data, "IRQ") < 0)
225 break;
226 bp = print_context_stack(tinfo, stack, bp,
227 ops, data, irqstack_end);
228 /*
229 * We link to the next stack (which would be
230 * the process stack normally) the last
231 * pointer (index -1 to end) in the IRQ stack:
232 */
233 stack = (unsigned long *) (irqstack_end[-1]);
234 irqstack_end = NULL;
235 ops->stack(data, "EOI");
236 continue;
237 }
238 }
239 break;
240 }
241
242 /*
243 * This handles the process stack:
244 */
245 bp = print_context_stack(tinfo, stack, bp, ops, data, NULL);
246 put_cpu();
247}
248EXPORT_SYMBOL(dump_trace);
249
250static void
251print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
252{
253 printk(data);
254 print_symbol(msg, symbol);
255 printk("\n");
256}
257
258static void print_trace_warning(void *data, char *msg)
259{
260 printk("%s%s\n", (char *)data, msg);
261}
262
263static int print_trace_stack(void *data, char *name)
264{
265 printk("%s <%s> ", (char *)data, name);
266 return 0;
267}
268
269/*
270 * Print one address/symbol entries per line.
271 */
272static void print_trace_address(void *data, unsigned long addr, int reliable)
273{
274 touch_nmi_watchdog();
275 printk(data);
276 printk_address(addr, reliable);
277}
278
279static const struct stacktrace_ops print_trace_ops = {
280 .warning = print_trace_warning,
281 .warning_symbol = print_trace_warning_symbol,
282 .stack = print_trace_stack,
283 .address = print_trace_address,
284};
285
286static void
287show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
288 unsigned long *stack, unsigned long bp, char *log_lvl)
289{
290 printk("%sCall Trace:\n", log_lvl);
291 dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
292}
293
294void show_trace(struct task_struct *task, struct pt_regs *regs,
295 unsigned long *stack, unsigned long bp)
296{
297 show_trace_log_lvl(task, regs, stack, bp, "");
298}
299
300static void
301show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
302 unsigned long *sp, unsigned long bp, char *log_lvl)
303{
304 unsigned long *stack;
305 int i;
306 const int cpu = smp_processor_id();
307 unsigned long *irqstack_end =
308 (unsigned long *) (cpu_pda(cpu)->irqstackptr);
309 unsigned long *irqstack =
310 (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE);
311
312 /*
313 * debugging aid: "show_stack(NULL, NULL);" prints the
314 * back trace for this cpu.
315 */
316
317 if (sp == NULL) {
318 if (task)
319 sp = (unsigned long *)task->thread.sp;
320 else
321 sp = (unsigned long *)&sp;
322 }
323
324 stack = sp;
325 for (i = 0; i < kstack_depth_to_print; i++) {
326 if (stack >= irqstack && stack <= irqstack_end) {
327 if (stack == irqstack_end) {
328 stack = (unsigned long *) (irqstack_end[-1]);
329 printk(" <EOI> ");
330 }
331 } else {
332 if (((long) stack & (THREAD_SIZE-1)) == 0)
333 break;
334 }
335 if (i && ((i % STACKSLOTS_PER_LINE) == 0))
336 printk("\n%s", log_lvl);
337 printk(" %016lx", *stack++);
338 touch_nmi_watchdog();
339 }
340 printk("\n");
341 show_trace_log_lvl(task, regs, sp, bp, log_lvl);
342}
343
344void show_stack(struct task_struct *task, unsigned long *sp)
345{
346 show_stack_log_lvl(task, NULL, sp, 0, "");
347}
348
349/*
350 * The architecture-independent dump_stack generator
351 */
352void dump_stack(void)
353{
354 unsigned long bp = 0;
355 unsigned long stack;
356
357#ifdef CONFIG_FRAME_POINTER
358 if (!bp)
359 get_bp(bp);
360#endif
361
362 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
363 current->pid, current->comm, print_tainted(),
364 init_utsname()->release,
365 (int)strcspn(init_utsname()->version, " "),
366 init_utsname()->version);
367 show_trace(NULL, NULL, &stack, bp);
368}
369EXPORT_SYMBOL(dump_stack);
370
371void show_registers(struct pt_regs *regs)
372{
373 int i;
374 unsigned long sp;
375 const int cpu = smp_processor_id();
376 struct task_struct *cur = cpu_pda(cpu)->pcurrent;
377
378 sp = regs->sp;
379 printk("CPU %d ", cpu);
380 __show_regs(regs, 1);
381 printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
382 cur->comm, cur->pid, task_thread_info(cur), cur);
383
384 /*
385 * When in-kernel, we also print out the stack and code at the
386 * time of the fault..
387 */
388 if (!user_mode(regs)) {
389 unsigned int code_prologue = code_bytes * 43 / 64;
390 unsigned int code_len = code_bytes;
391 unsigned char c;
392 u8 *ip;
393
394 printk(KERN_EMERG "Stack:\n");
395 show_stack_log_lvl(NULL, regs, (unsigned long *)sp,
396 regs->bp, KERN_EMERG);
397
398 printk(KERN_EMERG "Code: ");
399
400 ip = (u8 *)regs->ip - code_prologue;
401 if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) {
402 /* try starting at IP */
403 ip = (u8 *)regs->ip;
404 code_len = code_len - code_prologue + 1;
405 }
406 for (i = 0; i < code_len; i++, ip++) {
407 if (ip < (u8 *)PAGE_OFFSET ||
408 probe_kernel_address(ip, c)) {
409 printk(" Bad RIP value.");
410 break;
411 }
412 if (ip == (u8 *)regs->ip)
413 printk("<%02x> ", c);
414 else
415 printk("%02x ", c);
416 }
417 }
418 printk("\n");
419}
420
421int is_valid_bugaddr(unsigned long ip)
422{
423 unsigned short ud2;
424
425 if (__copy_from_user(&ud2, (const void __user *) ip, sizeof(ud2)))
426 return 0;
427
428 return ud2 == 0x0b0f;
429}
430
431static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
432static int die_owner = -1;
433static unsigned int die_nest_count;
434
435unsigned __kprobes long oops_begin(void)
436{
437 int cpu;
438 unsigned long flags;
439
440 oops_enter();
441
442 /* racy, but better than risking deadlock. */
443 raw_local_irq_save(flags);
444 cpu = smp_processor_id();
445 if (!__raw_spin_trylock(&die_lock)) {
446 if (cpu == die_owner)
447 /* nested oops. should stop eventually */;
448 else
449 __raw_spin_lock(&die_lock);
450 }
451 die_nest_count++;
452 die_owner = cpu;
453 console_verbose();
454 bust_spinlocks(1);
455 return flags;
456}
457
458void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
459{
460 die_owner = -1;
461 bust_spinlocks(0);
462 die_nest_count--;
463 if (!die_nest_count)
464 /* Nest count reaches zero, release the lock. */
465 __raw_spin_unlock(&die_lock);
466 raw_local_irq_restore(flags);
467 if (!regs) {
468 oops_exit();
469 return;
470 }
471 if (in_interrupt())
472 panic("Fatal exception in interrupt");
473 if (panic_on_oops)
474 panic("Fatal exception");
475 oops_exit();
476 do_exit(signr);
477}
478
479int __kprobes __die(const char *str, struct pt_regs *regs, long err)
480{
481 printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
482#ifdef CONFIG_PREEMPT
483 printk("PREEMPT ");
484#endif
485#ifdef CONFIG_SMP
486 printk("SMP ");
487#endif
488#ifdef CONFIG_DEBUG_PAGEALLOC
489 printk("DEBUG_PAGEALLOC");
490#endif
491 printk("\n");
492 if (notify_die(DIE_OOPS, str, regs, err,
493 current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
494 return 1;
495
496 show_registers(regs);
497 add_taint(TAINT_DIE);
498 /* Executive summary in case the oops scrolled away */
499 printk(KERN_ALERT "RIP ");
500 printk_address(regs->ip, 1);
501 printk(" RSP <%016lx>\n", regs->sp);
502 if (kexec_should_crash(current))
503 crash_kexec(regs);
504 return 0;
505}
506
507void die(const char *str, struct pt_regs *regs, long err)
508{
509 unsigned long flags = oops_begin();
510
511 if (!user_mode(regs))
512 report_bug(regs->ip, regs);
513
514 if (__die(str, regs, err))
515 regs = NULL;
516 oops_end(flags, regs, SIGSEGV);
517}
518
519notrace __kprobes void
520die_nmi(char *str, struct pt_regs *regs, int do_panic)
521{
522 unsigned long flags;
523
524 if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
525 return;
526
527 flags = oops_begin();
528 /*
529 * We are in trouble anyway, lets at least try
530 * to get a message out.
531 */
532 printk(KERN_EMERG "%s", str);
533 printk(" on CPU%d, ip %08lx, registers:\n",
534 smp_processor_id(), regs->ip);
535 show_registers(regs);
536 if (kexec_should_crash(current))
537 crash_kexec(regs);
538 if (do_panic || panic_on_oops)
539 panic("Non maskable interrupt");
540 oops_end(flags, NULL, SIGBUS);
541 nmi_exit();
542 local_irq_enable();
543 do_exit(SIGBUS);
544}
545
546static int __init oops_setup(char *s)
547{
548 if (!s)
549 return -EINVAL;
550 if (!strcmp(s, "panic"))
551 panic_on_oops = 1;
552 return 0;
553}
554early_param("oops", oops_setup);
555
556static int __init kstack_setup(char *s)
557{
558 if (!s)
559 return -EINVAL;
560 kstack_depth_to_print = simple_strtoul(s, NULL, 0);
561 return 0;
562}
563early_param("kstack", kstack_setup);
564
565static int __init code_bytes_setup(char *s)
566{
567 code_bytes = simple_strtoul(s, NULL, 0);
568 if (code_bytes > 8192)
569 code_bytes = 8192;
570
571 return 1;
572}
573__setup("code_bytes=", code_bytes_setup);
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 109792bc7cfa..b21fbfaffe39 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -730,6 +730,7 @@ error_code:
730 movl $(__USER_DS), %ecx 730 movl $(__USER_DS), %ecx
731 movl %ecx, %ds 731 movl %ecx, %ds
732 movl %ecx, %es 732 movl %ecx, %es
733 TRACE_IRQS_OFF
733 movl %esp,%eax # pt_regs pointer 734 movl %esp,%eax # pt_regs pointer
734 call *%edi 735 call *%edi
735 jmp ret_from_exception 736 jmp ret_from_exception
@@ -760,20 +761,9 @@ ENTRY(device_not_available)
760 RING0_INT_FRAME 761 RING0_INT_FRAME
761 pushl $-1 # mark this as an int 762 pushl $-1 # mark this as an int
762 CFI_ADJUST_CFA_OFFSET 4 763 CFI_ADJUST_CFA_OFFSET 4
763 SAVE_ALL 764 pushl $do_device_not_available
764 GET_CR0_INTO_EAX
765 testl $0x4, %eax # EM (math emulation bit)
766 jne device_not_available_emulate
767 preempt_stop(CLBR_ANY)
768 call math_state_restore
769 jmp ret_from_exception
770device_not_available_emulate:
771 pushl $0 # temporary storage for ORIG_EIP
772 CFI_ADJUST_CFA_OFFSET 4 765 CFI_ADJUST_CFA_OFFSET 4
773 call math_emulate 766 jmp error_code
774 addl $4, %esp
775 CFI_ADJUST_CFA_OFFSET -4
776 jmp ret_from_exception
777 CFI_ENDPROC 767 CFI_ENDPROC
778END(device_not_available) 768END(device_not_available)
779 769
@@ -814,6 +804,7 @@ debug_stack_correct:
814 pushl $-1 # mark this as an int 804 pushl $-1 # mark this as an int
815 CFI_ADJUST_CFA_OFFSET 4 805 CFI_ADJUST_CFA_OFFSET 4
816 SAVE_ALL 806 SAVE_ALL
807 TRACE_IRQS_OFF
817 xorl %edx,%edx # error code 0 808 xorl %edx,%edx # error code 0
818 movl %esp,%eax # pt_regs pointer 809 movl %esp,%eax # pt_regs pointer
819 call do_debug 810 call do_debug
@@ -858,6 +849,7 @@ nmi_stack_correct:
858 pushl %eax 849 pushl %eax
859 CFI_ADJUST_CFA_OFFSET 4 850 CFI_ADJUST_CFA_OFFSET 4
860 SAVE_ALL 851 SAVE_ALL
852 TRACE_IRQS_OFF
861 xorl %edx,%edx # zero error code 853 xorl %edx,%edx # zero error code
862 movl %esp,%eax # pt_regs pointer 854 movl %esp,%eax # pt_regs pointer
863 call do_nmi 855 call do_nmi
@@ -898,6 +890,7 @@ nmi_espfix_stack:
898 pushl %eax 890 pushl %eax
899 CFI_ADJUST_CFA_OFFSET 4 891 CFI_ADJUST_CFA_OFFSET 4
900 SAVE_ALL 892 SAVE_ALL
893 TRACE_IRQS_OFF
901 FIXUP_ESPFIX_STACK # %eax == %esp 894 FIXUP_ESPFIX_STACK # %eax == %esp
902 xorl %edx,%edx # zero error code 895 xorl %edx,%edx # zero error code
903 call do_nmi 896 call do_nmi
@@ -928,6 +921,7 @@ KPROBE_ENTRY(int3)
928 pushl $-1 # mark this as an int 921 pushl $-1 # mark this as an int
929 CFI_ADJUST_CFA_OFFSET 4 922 CFI_ADJUST_CFA_OFFSET 4
930 SAVE_ALL 923 SAVE_ALL
924 TRACE_IRQS_OFF
931 xorl %edx,%edx # zero error code 925 xorl %edx,%edx # zero error code
932 movl %esp,%eax # pt_regs pointer 926 movl %esp,%eax # pt_regs pointer
933 call do_int3 927 call do_int3
@@ -1030,7 +1024,7 @@ ENTRY(machine_check)
1030 RING0_INT_FRAME 1024 RING0_INT_FRAME
1031 pushl $0 1025 pushl $0
1032 CFI_ADJUST_CFA_OFFSET 4 1026 CFI_ADJUST_CFA_OFFSET 4
1033 pushl machine_check_vector 1027 pushl $do_machine_check
1034 CFI_ADJUST_CFA_OFFSET 4 1028 CFI_ADJUST_CFA_OFFSET 4
1035 jmp error_code 1029 jmp error_code
1036 CFI_ENDPROC 1030 CFI_ENDPROC
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index cf3a0b2d0059..1db6ce4314e1 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -667,6 +667,13 @@ END(stub_rt_sigreturn)
667 SAVE_ARGS 667 SAVE_ARGS
668 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler 668 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
669 pushq %rbp 669 pushq %rbp
670 /*
671 * Save rbp twice: One is for marking the stack frame, as usual, and the
672 * other, to fill pt_regs properly. This is because bx comes right
673 * before the last saved register in that structure, and not bp. If the
674 * base pointer were in the place bx is today, this would not be needed.
675 */
676 movq %rbp, -8(%rsp)
670 CFI_ADJUST_CFA_OFFSET 8 677 CFI_ADJUST_CFA_OFFSET 8
671 CFI_REL_OFFSET rbp, 0 678 CFI_REL_OFFSET rbp, 0
672 movq %rsp,%rbp 679 movq %rsp,%rbp
@@ -932,6 +939,9 @@ END(spurious_interrupt)
932 .if \ist 939 .if \ist
933 movq %gs:pda_data_offset, %rbp 940 movq %gs:pda_data_offset, %rbp
934 .endif 941 .endif
942 .if \irqtrace
943 TRACE_IRQS_OFF
944 .endif
935 movq %rsp,%rdi 945 movq %rsp,%rdi
936 movq ORIG_RAX(%rsp),%rsi 946 movq ORIG_RAX(%rsp),%rsi
937 movq $-1,ORIG_RAX(%rsp) 947 movq $-1,ORIG_RAX(%rsp)
@@ -1058,7 +1068,8 @@ KPROBE_ENTRY(error_entry)
1058 je error_kernelspace 1068 je error_kernelspace
1059error_swapgs: 1069error_swapgs:
1060 SWAPGS 1070 SWAPGS
1061error_sti: 1071error_sti:
1072 TRACE_IRQS_OFF
1062 movq %rdi,RDI(%rsp) 1073 movq %rdi,RDI(%rsp)
1063 CFI_REL_OFFSET rdi,RDI 1074 CFI_REL_OFFSET rdi,RDI
1064 movq %rsp,%rdi 1075 movq %rsp,%rdi
@@ -1232,7 +1243,7 @@ ENTRY(simd_coprocessor_error)
1232END(simd_coprocessor_error) 1243END(simd_coprocessor_error)
1233 1244
1234ENTRY(device_not_available) 1245ENTRY(device_not_available)
1235 zeroentry math_state_restore 1246 zeroentry do_device_not_available
1236END(device_not_available) 1247END(device_not_available)
1237 1248
1238 /* runs on exception stack */ 1249 /* runs on exception stack */
diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c
index 849e5cd485b8..f454c78fcef6 100644
--- a/arch/x86/kernel/es7000_32.c
+++ b/arch/x86/kernel/es7000_32.c
@@ -109,6 +109,7 @@ struct oem_table {
109}; 109};
110 110
111extern int find_unisys_acpi_oem_table(unsigned long *oem_addr); 111extern int find_unisys_acpi_oem_table(unsigned long *oem_addr);
112extern void unmap_unisys_acpi_oem_table(unsigned long oem_addr);
112#endif 113#endif
113 114
114struct mip_reg { 115struct mip_reg {
@@ -243,21 +244,38 @@ parse_unisys_oem (char *oemptr)
243} 244}
244 245
245#ifdef CONFIG_ACPI 246#ifdef CONFIG_ACPI
246int __init 247static unsigned long oem_addrX;
247find_unisys_acpi_oem_table(unsigned long *oem_addr) 248static unsigned long oem_size;
249int __init find_unisys_acpi_oem_table(unsigned long *oem_addr)
248{ 250{
249 struct acpi_table_header *header = NULL; 251 struct acpi_table_header *header = NULL;
250 int i = 0; 252 int i = 0;
251 while (ACPI_SUCCESS(acpi_get_table("OEM1", i++, &header))) { 253 acpi_size tbl_size;
254
255 while (ACPI_SUCCESS(acpi_get_table_with_size("OEM1", i++, &header, &tbl_size))) {
252 if (!memcmp((char *) &header->oem_id, "UNISYS", 6)) { 256 if (!memcmp((char *) &header->oem_id, "UNISYS", 6)) {
253 struct oem_table *t = (struct oem_table *)header; 257 struct oem_table *t = (struct oem_table *)header;
254 *oem_addr = (unsigned long)__acpi_map_table(t->OEMTableAddr, 258
255 t->OEMTableSize); 259 oem_addrX = t->OEMTableAddr;
260 oem_size = t->OEMTableSize;
261 early_acpi_os_unmap_memory(header, tbl_size);
262
263 *oem_addr = (unsigned long)__acpi_map_table(oem_addrX,
264 oem_size);
256 return 0; 265 return 0;
257 } 266 }
267 early_acpi_os_unmap_memory(header, tbl_size);
258 } 268 }
259 return -1; 269 return -1;
260} 270}
271
272void __init unmap_unisys_acpi_oem_table(unsigned long oem_addr)
273{
274 if (!oem_addr)
275 return;
276
277 __acpi_unmap_table((char *)oem_addr, oem_size);
278}
261#endif 279#endif
262 280
263static void 281static void
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index ae2ffc8a400c..33581d94a90e 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -114,7 +114,7 @@ static void uv_send_IPI_one(int cpu, int vector)
114 unsigned long val, apicid, lapicid; 114 unsigned long val, apicid, lapicid;
115 int pnode; 115 int pnode;
116 116
117 apicid = per_cpu(x86_cpu_to_apicid, cpu); /* ZZZ - cache node-local ? */ 117 apicid = per_cpu(x86_cpu_to_apicid, cpu);
118 lapicid = apicid & 0x3f; /* ZZZ macro needed */ 118 lapicid = apicid & 0x3f; /* ZZZ macro needed */
119 pnode = uv_apicid_to_pnode(apicid); 119 pnode = uv_apicid_to_pnode(apicid);
120 val = 120 val =
@@ -202,12 +202,10 @@ static unsigned int phys_pkg_id(int index_msb)
202 return uv_read_apic_id() >> index_msb; 202 return uv_read_apic_id() >> index_msb;
203} 203}
204 204
205#ifdef ZZZ /* Needs x2apic patch */
206static void uv_send_IPI_self(int vector) 205static void uv_send_IPI_self(int vector)
207{ 206{
208 apic_write(APIC_SELF_IPI, vector); 207 apic_write(APIC_SELF_IPI, vector);
209} 208}
210#endif
211 209
212struct genapic apic_x2apic_uv_x = { 210struct genapic apic_x2apic_uv_x = {
213 .name = "UV large system", 211 .name = "UV large system",
@@ -215,15 +213,15 @@ struct genapic apic_x2apic_uv_x = {
215 .int_delivery_mode = dest_Fixed, 213 .int_delivery_mode = dest_Fixed,
216 .int_dest_mode = (APIC_DEST_PHYSICAL != 0), 214 .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
217 .target_cpus = uv_target_cpus, 215 .target_cpus = uv_target_cpus,
218 .vector_allocation_domain = uv_vector_allocation_domain,/* Fixme ZZZ */ 216 .vector_allocation_domain = uv_vector_allocation_domain,
219 .apic_id_registered = uv_apic_id_registered, 217 .apic_id_registered = uv_apic_id_registered,
220 .init_apic_ldr = uv_init_apic_ldr, 218 .init_apic_ldr = uv_init_apic_ldr,
221 .send_IPI_all = uv_send_IPI_all, 219 .send_IPI_all = uv_send_IPI_all,
222 .send_IPI_allbutself = uv_send_IPI_allbutself, 220 .send_IPI_allbutself = uv_send_IPI_allbutself,
223 .send_IPI_mask = uv_send_IPI_mask, 221 .send_IPI_mask = uv_send_IPI_mask,
224 /* ZZZ.send_IPI_self = uv_send_IPI_self, */ 222 .send_IPI_self = uv_send_IPI_self,
225 .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, 223 .cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
226 .phys_pkg_id = phys_pkg_id, /* Fixme ZZZ */ 224 .phys_pkg_id = phys_pkg_id,
227 .get_apic_id = get_apic_id, 225 .get_apic_id = get_apic_id,
228 .set_apic_id = set_apic_id, 226 .set_apic_id = set_apic_id,
229 .apic_id_mask = (0xFFFFFFFFu), 227 .apic_id_mask = (0xFFFFFFFFu),
@@ -286,12 +284,13 @@ static __init void map_low_mmrs(void)
286 284
287enum map_type {map_wb, map_uc}; 285enum map_type {map_wb, map_uc};
288 286
289static __init void map_high(char *id, unsigned long base, int shift, enum map_type map_type) 287static __init void map_high(char *id, unsigned long base, int shift,
288 int max_pnode, enum map_type map_type)
290{ 289{
291 unsigned long bytes, paddr; 290 unsigned long bytes, paddr;
292 291
293 paddr = base << shift; 292 paddr = base << shift;
294 bytes = (1UL << shift); 293 bytes = (1UL << shift) * (max_pnode + 1);
295 printk(KERN_INFO "UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr, 294 printk(KERN_INFO "UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr,
296 paddr + bytes); 295 paddr + bytes);
297 if (map_type == map_uc) 296 if (map_type == map_uc)
@@ -307,7 +306,7 @@ static __init void map_gru_high(int max_pnode)
307 306
308 gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR); 307 gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR);
309 if (gru.s.enable) 308 if (gru.s.enable)
310 map_high("GRU", gru.s.base, shift, map_wb); 309 map_high("GRU", gru.s.base, shift, max_pnode, map_wb);
311} 310}
312 311
313static __init void map_config_high(int max_pnode) 312static __init void map_config_high(int max_pnode)
@@ -317,7 +316,7 @@ static __init void map_config_high(int max_pnode)
317 316
318 cfg.v = uv_read_local_mmr(UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR); 317 cfg.v = uv_read_local_mmr(UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR);
319 if (cfg.s.enable) 318 if (cfg.s.enable)
320 map_high("CONFIG", cfg.s.base, shift, map_uc); 319 map_high("CONFIG", cfg.s.base, shift, max_pnode, map_uc);
321} 320}
322 321
323static __init void map_mmr_high(int max_pnode) 322static __init void map_mmr_high(int max_pnode)
@@ -327,7 +326,7 @@ static __init void map_mmr_high(int max_pnode)
327 326
328 mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR); 327 mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR);
329 if (mmr.s.enable) 328 if (mmr.s.enable)
330 map_high("MMR", mmr.s.base, shift, map_uc); 329 map_high("MMR", mmr.s.base, shift, max_pnode, map_uc);
331} 330}
332 331
333static __init void map_mmioh_high(int max_pnode) 332static __init void map_mmioh_high(int max_pnode)
@@ -337,7 +336,7 @@ static __init void map_mmioh_high(int max_pnode)
337 336
338 mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR); 337 mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR);
339 if (mmioh.s.enable) 338 if (mmioh.s.enable)
340 map_high("MMIOH", mmioh.s.base, shift, map_uc); 339 map_high("MMIOH", mmioh.s.base, shift, max_pnode, map_uc);
341} 340}
342 341
343static __init void uv_rtc_init(void) 342static __init void uv_rtc_init(void)
diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c
index 3e66bd364a9d..1dcb0f13897e 100644
--- a/arch/x86/kernel/head.c
+++ b/arch/x86/kernel/head.c
@@ -35,6 +35,7 @@ void __init reserve_ebda_region(void)
35 35
36 /* start of EBDA area */ 36 /* start of EBDA area */
37 ebda_addr = get_bios_ebda(); 37 ebda_addr = get_bios_ebda();
38 printk(KERN_INFO "BIOS EBDA/lowmem at: %08x/%08x\n", ebda_addr, lowmem);
38 39
39 /* Fixup: bios puts an EBDA in the top 64K segment */ 40 /* Fixup: bios puts an EBDA in the top 64K segment */
40 /* of conventional memory, but does not adjust lowmem. */ 41 /* of conventional memory, but does not adjust lowmem. */
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 73deaffadd03..acf62fc233da 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -115,13 +115,17 @@ static void hpet_reserve_platform_timers(unsigned long id)
115 hd.hd_phys_address = hpet_address; 115 hd.hd_phys_address = hpet_address;
116 hd.hd_address = hpet; 116 hd.hd_address = hpet;
117 hd.hd_nirqs = nrtimers; 117 hd.hd_nirqs = nrtimers;
118 hd.hd_flags = HPET_DATA_PLATFORM;
119 hpet_reserve_timer(&hd, 0); 118 hpet_reserve_timer(&hd, 0);
120 119
121#ifdef CONFIG_HPET_EMULATE_RTC 120#ifdef CONFIG_HPET_EMULATE_RTC
122 hpet_reserve_timer(&hd, 1); 121 hpet_reserve_timer(&hd, 1);
123#endif 122#endif
124 123
124 /*
125 * NOTE that hd_irq[] reflects IOAPIC input pins (LEGACY_8254
126 * is wrong for i8259!) not the output IRQ. Many BIOS writers
127 * don't bother configuring *any* comparator interrupts.
128 */
125 hd.hd_irq[0] = HPET_LEGACY_8254; 129 hd.hd_irq[0] = HPET_LEGACY_8254;
126 hd.hd_irq[1] = HPET_LEGACY_RTC; 130 hd.hd_irq[1] = HPET_LEGACY_RTC;
127 131
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index 1f26fd9ec4f4..5b5be9d43c2a 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -135,7 +135,7 @@ DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
135 [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 135 [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
136}; 136};
137 137
138static void __init init_ISA_irqs (void) 138void __init init_ISA_irqs(void)
139{ 139{
140 int i; 140 int i;
141 141
@@ -164,22 +164,8 @@ static void __init init_ISA_irqs (void)
164 164
165void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); 165void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
166 166
167void __init native_init_IRQ(void) 167static void __init smp_intr_init(void)
168{ 168{
169 int i;
170
171 init_ISA_irqs();
172 /*
173 * Cover the whole vector space, no vector can escape
174 * us. (some of these will be overridden and become
175 * 'special' SMP interrupts)
176 */
177 for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
178 int vector = FIRST_EXTERNAL_VECTOR + i;
179 if (vector != IA32_SYSCALL_VECTOR)
180 set_intr_gate(vector, interrupt[i]);
181 }
182
183#ifdef CONFIG_SMP 169#ifdef CONFIG_SMP
184 /* 170 /*
185 * The reschedule interrupt is a CPU-to-CPU reschedule-helper 171 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
@@ -207,6 +193,12 @@ void __init native_init_IRQ(void)
207 /* Low priority IPI to cleanup after moving an irq */ 193 /* Low priority IPI to cleanup after moving an irq */
208 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); 194 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
209#endif 195#endif
196}
197
198static void __init apic_intr_init(void)
199{
200 smp_intr_init();
201
210 alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); 202 alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
211 alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); 203 alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
212 204
@@ -216,6 +208,25 @@ void __init native_init_IRQ(void)
216 /* IPI vectors for APIC spurious and error interrupts */ 208 /* IPI vectors for APIC spurious and error interrupts */
217 alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); 209 alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
218 alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); 210 alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
211}
212
213void __init native_init_IRQ(void)
214{
215 int i;
216
217 init_ISA_irqs();
218 /*
219 * Cover the whole vector space, no vector can escape
220 * us. (some of these will be overridden and become
221 * 'special' SMP interrupts)
222 */
223 for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
224 int vector = FIRST_EXTERNAL_VECTOR + i;
225 if (vector != IA32_SYSCALL_VECTOR)
226 set_intr_gate(vector, interrupt[i]);
227 }
228
229 apic_intr_init();
219 230
220 if (!acpi_ioapic) 231 if (!acpi_ioapic)
221 setup_irq(2, &irq2); 232 setup_irq(2, &irq2);
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 922c14058f97..0a1302fe6d45 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -123,7 +123,7 @@ void cpu_idle(void)
123 } 123 }
124} 124}
125 125
126void __show_registers(struct pt_regs *regs, int all) 126void __show_regs(struct pt_regs *regs, int all)
127{ 127{
128 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; 128 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
129 unsigned long d0, d1, d2, d3, d6, d7; 129 unsigned long d0, d1, d2, d3, d6, d7;
@@ -189,7 +189,7 @@ void __show_registers(struct pt_regs *regs, int all)
189 189
190void show_regs(struct pt_regs *regs) 190void show_regs(struct pt_regs *regs)
191{ 191{
192 __show_registers(regs, 1); 192 __show_regs(regs, 1);
193 show_trace(NULL, regs, &regs->sp, regs->bp); 193 show_trace(NULL, regs, &regs->sp, regs->bp);
194} 194}
195 195
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index ca80394ef5b8..cd8c0ed02b7e 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -136,7 +136,7 @@ void cpu_idle(void)
136} 136}
137 137
138/* Prints also some state that isn't saved in the pt_regs */ 138/* Prints also some state that isn't saved in the pt_regs */
139void __show_regs(struct pt_regs *regs) 139void __show_regs(struct pt_regs *regs, int all)
140{ 140{
141 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs; 141 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
142 unsigned long d0, d1, d2, d3, d6, d7; 142 unsigned long d0, d1, d2, d3, d6, d7;
@@ -175,6 +175,9 @@ void __show_regs(struct pt_regs *regs)
175 rdmsrl(MSR_GS_BASE, gs); 175 rdmsrl(MSR_GS_BASE, gs);
176 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); 176 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
177 177
178 if (!all)
179 return;
180
178 cr0 = read_cr0(); 181 cr0 = read_cr0();
179 cr2 = read_cr2(); 182 cr2 = read_cr2();
180 cr3 = read_cr3(); 183 cr3 = read_cr3();
@@ -200,7 +203,7 @@ void __show_regs(struct pt_regs *regs)
200void show_regs(struct pt_regs *regs) 203void show_regs(struct pt_regs *regs)
201{ 204{
202 printk(KERN_INFO "CPU %d:", smp_processor_id()); 205 printk(KERN_INFO "CPU %d:", smp_processor_id());
203 __show_regs(regs); 206 __show_regs(regs, 1);
204 show_trace(NULL, regs, (void *)(regs + 1), regs->bp); 207 show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
205} 208}
206 209
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index d13858818100..f6a11b9b1f98 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -354,9 +354,27 @@ static void ati_force_hpet_resume(void)
354 printk(KERN_DEBUG "Force enabled HPET at resume\n"); 354 printk(KERN_DEBUG "Force enabled HPET at resume\n");
355} 355}
356 356
357static u32 ati_ixp4x0_rev(struct pci_dev *dev)
358{
359 u32 d;
360 u8 b;
361
362 pci_read_config_byte(dev, 0xac, &b);
363 b &= ~(1<<5);
364 pci_write_config_byte(dev, 0xac, b);
365 pci_read_config_dword(dev, 0x70, &d);
366 d |= 1<<8;
367 pci_write_config_dword(dev, 0x70, d);
368 pci_read_config_dword(dev, 0x8, &d);
369 d &= 0xff;
370 dev_printk(KERN_DEBUG, &dev->dev, "SB4X0 revision 0x%x\n", d);
371 return d;
372}
373
357static void ati_force_enable_hpet(struct pci_dev *dev) 374static void ati_force_enable_hpet(struct pci_dev *dev)
358{ 375{
359 u32 uninitialized_var(val); 376 u32 d, val;
377 u8 b;
360 378
361 if (hpet_address || force_hpet_address) 379 if (hpet_address || force_hpet_address)
362 return; 380 return;
@@ -366,14 +384,33 @@ static void ati_force_enable_hpet(struct pci_dev *dev)
366 return; 384 return;
367 } 385 }
368 386
387 d = ati_ixp4x0_rev(dev);
388 if (d < 0x82)
389 return;
390
391 /* base address */
369 pci_write_config_dword(dev, 0x14, 0xfed00000); 392 pci_write_config_dword(dev, 0x14, 0xfed00000);
370 pci_read_config_dword(dev, 0x14, &val); 393 pci_read_config_dword(dev, 0x14, &val);
394
395 /* enable interrupt */
396 outb(0x72, 0xcd6); b = inb(0xcd7);
397 b |= 0x1;
398 outb(0x72, 0xcd6); outb(b, 0xcd7);
399 outb(0x72, 0xcd6); b = inb(0xcd7);
400 if (!(b & 0x1))
401 return;
402 pci_read_config_dword(dev, 0x64, &d);
403 d |= (1<<10);
404 pci_write_config_dword(dev, 0x64, d);
405 pci_read_config_dword(dev, 0x64, &d);
406 if (!(d & (1<<10)))
407 return;
408
371 force_hpet_address = val; 409 force_hpet_address = val;
372 force_hpet_resume_type = ATI_FORCE_HPET_RESUME; 410 force_hpet_resume_type = ATI_FORCE_HPET_RESUME;
373 dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at 0x%lx\n", 411 dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at 0x%lx\n",
374 force_hpet_address); 412 force_hpet_address);
375 cached_dev = dev; 413 cached_dev = dev;
376 return;
377} 414}
378DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP400_SMBUS, 415DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP400_SMBUS,
379 ati_force_enable_hpet); 416 ati_force_enable_hpet);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 21b8e0a59780..2255782e8d4b 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -302,7 +302,7 @@ static void __init relocate_initrd(void)
302 if (clen > MAX_MAP_CHUNK-slop) 302 if (clen > MAX_MAP_CHUNK-slop)
303 clen = MAX_MAP_CHUNK-slop; 303 clen = MAX_MAP_CHUNK-slop;
304 mapaddr = ramdisk_image & PAGE_MASK; 304 mapaddr = ramdisk_image & PAGE_MASK;
305 p = early_ioremap(mapaddr, clen+slop); 305 p = early_memremap(mapaddr, clen+slop);
306 memcpy(q, p+slop, clen); 306 memcpy(q, p+slop, clen);
307 early_iounmap(p, clen+slop); 307 early_iounmap(p, clen+slop);
308 q += clen; 308 q += clen;
@@ -379,7 +379,7 @@ static void __init parse_setup_data(void)
379 return; 379 return;
380 pa_data = boot_params.hdr.setup_data; 380 pa_data = boot_params.hdr.setup_data;
381 while (pa_data) { 381 while (pa_data) {
382 data = early_ioremap(pa_data, PAGE_SIZE); 382 data = early_memremap(pa_data, PAGE_SIZE);
383 switch (data->type) { 383 switch (data->type) {
384 case SETUP_E820_EXT: 384 case SETUP_E820_EXT:
385 parse_e820_ext(data, pa_data); 385 parse_e820_ext(data, pa_data);
@@ -402,7 +402,7 @@ static void __init e820_reserve_setup_data(void)
402 return; 402 return;
403 pa_data = boot_params.hdr.setup_data; 403 pa_data = boot_params.hdr.setup_data;
404 while (pa_data) { 404 while (pa_data) {
405 data = early_ioremap(pa_data, sizeof(*data)); 405 data = early_memremap(pa_data, sizeof(*data));
406 e820_update_range(pa_data, sizeof(*data)+data->len, 406 e820_update_range(pa_data, sizeof(*data)+data->len,
407 E820_RAM, E820_RESERVED_KERN); 407 E820_RAM, E820_RESERVED_KERN);
408 found = 1; 408 found = 1;
@@ -428,7 +428,7 @@ static void __init reserve_early_setup_data(void)
428 return; 428 return;
429 pa_data = boot_params.hdr.setup_data; 429 pa_data = boot_params.hdr.setup_data;
430 while (pa_data) { 430 while (pa_data) {
431 data = early_ioremap(pa_data, sizeof(*data)); 431 data = early_memremap(pa_data, sizeof(*data));
432 sprintf(buf, "setup data %x", data->type); 432 sprintf(buf, "setup data %x", data->type);
433 reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf); 433 reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf);
434 pa_data = data->next; 434 pa_data = data->next;
@@ -998,6 +998,8 @@ void __init setup_arch(char **cmdline_p)
998 */ 998 */
999 acpi_boot_table_init(); 999 acpi_boot_table_init();
1000 1000
1001 early_acpi_boot_init();
1002
1001#ifdef CONFIG_ACPI_NUMA 1003#ifdef CONFIG_ACPI_NUMA
1002 /* 1004 /*
1003 * Parse SRAT to discover nodes. 1005 * Parse SRAT to discover nodes.
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 76b6f50978f7..8c3aca7cb343 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -334,14 +334,17 @@ static void __cpuinit start_secondary(void *unused)
334 * does not change while we are assigning vectors to cpus. Holding 334 * does not change while we are assigning vectors to cpus. Holding
335 * this lock ensures we don't half assign or remove an irq from a cpu. 335 * this lock ensures we don't half assign or remove an irq from a cpu.
336 */ 336 */
337 ipi_call_lock_irq(); 337 ipi_call_lock();
338 lock_vector_lock(); 338 lock_vector_lock();
339 __setup_vector_irq(smp_processor_id()); 339 __setup_vector_irq(smp_processor_id());
340 cpu_set(smp_processor_id(), cpu_online_map); 340 cpu_set(smp_processor_id(), cpu_online_map);
341 unlock_vector_lock(); 341 unlock_vector_lock();
342 ipi_call_unlock_irq(); 342 ipi_call_unlock();
343 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; 343 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
344 344
345 /* enable local interrupts */
346 local_irq_enable();
347
345 setup_secondary_clock(); 348 setup_secondary_clock();
346 349
347 wmb(); 350 wmb();
@@ -596,10 +599,12 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
596 * Give the other CPU some time to accept the IPI. 599 * Give the other CPU some time to accept the IPI.
597 */ 600 */
598 udelay(200); 601 udelay(200);
599 maxlvt = lapic_get_maxlvt(); 602 if (APIC_INTEGRATED(apic_version[phys_apicid])) {
600 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ 603 maxlvt = lapic_get_maxlvt();
601 apic_write(APIC_ESR, 0); 604 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
602 accept_status = (apic_read(APIC_ESR) & 0xEF); 605 apic_write(APIC_ESR, 0);
606 accept_status = (apic_read(APIC_ESR) & 0xEF);
607 }
603 pr_debug("NMI sent.\n"); 608 pr_debug("NMI sent.\n");
604 609
605 if (send_status) 610 if (send_status)
@@ -1256,39 +1261,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
1256 check_nmi_watchdog(); 1261 check_nmi_watchdog();
1257} 1262}
1258 1263
1259#ifdef CONFIG_HOTPLUG_CPU
1260
1261static void remove_siblinginfo(int cpu)
1262{
1263 int sibling;
1264 struct cpuinfo_x86 *c = &cpu_data(cpu);
1265
1266 for_each_cpu_mask_nr(sibling, per_cpu(cpu_core_map, cpu)) {
1267 cpu_clear(cpu, per_cpu(cpu_core_map, sibling));
1268 /*/
1269 * last thread sibling in this cpu core going down
1270 */
1271 if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1)
1272 cpu_data(sibling).booted_cores--;
1273 }
1274
1275 for_each_cpu_mask_nr(sibling, per_cpu(cpu_sibling_map, cpu))
1276 cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling));
1277 cpus_clear(per_cpu(cpu_sibling_map, cpu));
1278 cpus_clear(per_cpu(cpu_core_map, cpu));
1279 c->phys_proc_id = 0;
1280 c->cpu_core_id = 0;
1281 cpu_clear(cpu, cpu_sibling_setup_map);
1282}
1283
1284static int additional_cpus __initdata = -1;
1285
1286static __init int setup_additional_cpus(char *s)
1287{
1288 return s && get_option(&s, &additional_cpus) ? 0 : -EINVAL;
1289}
1290early_param("additional_cpus", setup_additional_cpus);
1291
1292/* 1264/*
1293 * cpu_possible_map should be static, it cannot change as cpu's 1265 * cpu_possible_map should be static, it cannot change as cpu's
1294 * are onlined, or offlined. The reason is per-cpu data-structures 1266 * are onlined, or offlined. The reason is per-cpu data-structures
@@ -1308,21 +1280,13 @@ early_param("additional_cpus", setup_additional_cpus);
1308 */ 1280 */
1309__init void prefill_possible_map(void) 1281__init void prefill_possible_map(void)
1310{ 1282{
1311 int i; 1283 int i, possible;
1312 int possible;
1313 1284
1314 /* no processor from mptable or madt */ 1285 /* no processor from mptable or madt */
1315 if (!num_processors) 1286 if (!num_processors)
1316 num_processors = 1; 1287 num_processors = 1;
1317 1288
1318 if (additional_cpus == -1) { 1289 possible = num_processors + disabled_cpus;
1319 if (disabled_cpus > 0)
1320 additional_cpus = disabled_cpus;
1321 else
1322 additional_cpus = 0;
1323 }
1324
1325 possible = num_processors + additional_cpus;
1326 if (possible > NR_CPUS) 1290 if (possible > NR_CPUS)
1327 possible = NR_CPUS; 1291 possible = NR_CPUS;
1328 1292
@@ -1335,6 +1299,31 @@ __init void prefill_possible_map(void)
1335 nr_cpu_ids = possible; 1299 nr_cpu_ids = possible;
1336} 1300}
1337 1301
1302#ifdef CONFIG_HOTPLUG_CPU
1303
1304static void remove_siblinginfo(int cpu)
1305{
1306 int sibling;
1307 struct cpuinfo_x86 *c = &cpu_data(cpu);
1308
1309 for_each_cpu_mask_nr(sibling, per_cpu(cpu_core_map, cpu)) {
1310 cpu_clear(cpu, per_cpu(cpu_core_map, sibling));
1311 /*/
1312 * last thread sibling in this cpu core going down
1313 */
1314 if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1)
1315 cpu_data(sibling).booted_cores--;
1316 }
1317
1318 for_each_cpu_mask_nr(sibling, per_cpu(cpu_sibling_map, cpu))
1319 cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling));
1320 cpus_clear(per_cpu(cpu_sibling_map, cpu));
1321 cpus_clear(per_cpu(cpu_core_map, cpu));
1322 c->phys_proc_id = 0;
1323 c->cpu_core_id = 0;
1324 cpu_clear(cpu, cpu_sibling_setup_map);
1325}
1326
1338static void __ref remove_cpu_from_maps(int cpu) 1327static void __ref remove_cpu_from_maps(int cpu)
1339{ 1328{
1340 cpu_clear(cpu, cpu_online_map); 1329 cpu_clear(cpu, cpu_online_map);
diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c
index bbecf8b6bf96..77b400f06ea2 100644
--- a/arch/x86/kernel/time_32.c
+++ b/arch/x86/kernel/time_32.c
@@ -47,10 +47,9 @@ unsigned long profile_pc(struct pt_regs *regs)
47 unsigned long pc = instruction_pointer(regs); 47 unsigned long pc = instruction_pointer(regs);
48 48
49#ifdef CONFIG_SMP 49#ifdef CONFIG_SMP
50 if (!v8086_mode(regs) && SEGMENT_IS_KERNEL_CODE(regs->cs) && 50 if (!user_mode_vm(regs) && in_lock_functions(pc)) {
51 in_lock_functions(pc)) {
52#ifdef CONFIG_FRAME_POINTER 51#ifdef CONFIG_FRAME_POINTER
53 return *(unsigned long *)(regs->bp + 4); 52 return *(unsigned long *)(regs->bp + sizeof(long));
54#else 53#else
55 unsigned long *sp = (unsigned long *)&regs->sp; 54 unsigned long *sp = (unsigned long *)&regs->sp;
56 55
@@ -95,6 +94,7 @@ irqreturn_t timer_interrupt(int irq, void *dev_id)
95 94
96 do_timer_interrupt_hook(); 95 do_timer_interrupt_hook();
97 96
97#ifdef CONFIG_MCA
98 if (MCA_bus) { 98 if (MCA_bus) {
99 /* The PS/2 uses level-triggered interrupts. You can't 99 /* The PS/2 uses level-triggered interrupts. You can't
100 turn them off, nor would you want to (any attempt to 100 turn them off, nor would you want to (any attempt to
@@ -108,6 +108,7 @@ irqreturn_t timer_interrupt(int irq, void *dev_id)
108 u8 irq_v = inb_p( 0x61 ); /* read the current state */ 108 u8 irq_v = inb_p( 0x61 ); /* read the current state */
109 outb_p( irq_v|0x80, 0x61 ); /* reset the IRQ */ 109 outb_p( irq_v|0x80, 0x61 ); /* reset the IRQ */
110 } 110 }
111#endif
111 112
112 return IRQ_HANDLED; 113 return IRQ_HANDLED;
113} 114}
diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c
index e3d49c553af2..cb19d650c216 100644
--- a/arch/x86/kernel/time_64.c
+++ b/arch/x86/kernel/time_64.c
@@ -16,6 +16,7 @@
16#include <linux/interrupt.h> 16#include <linux/interrupt.h>
17#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/time.h> 18#include <linux/time.h>
19#include <linux/mca.h>
19 20
20#include <asm/i8253.h> 21#include <asm/i8253.h>
21#include <asm/hpet.h> 22#include <asm/hpet.h>
@@ -33,23 +34,34 @@ unsigned long profile_pc(struct pt_regs *regs)
33 /* Assume the lock function has either no stack frame or a copy 34 /* Assume the lock function has either no stack frame or a copy
34 of flags from PUSHF 35 of flags from PUSHF
35 Eflags always has bits 22 and up cleared unlike kernel addresses. */ 36 Eflags always has bits 22 and up cleared unlike kernel addresses. */
36 if (!user_mode(regs) && in_lock_functions(pc)) { 37 if (!user_mode_vm(regs) && in_lock_functions(pc)) {
38#ifdef CONFIG_FRAME_POINTER
39 return *(unsigned long *)(regs->bp + sizeof(long));
40#else
37 unsigned long *sp = (unsigned long *)regs->sp; 41 unsigned long *sp = (unsigned long *)regs->sp;
38 if (sp[0] >> 22) 42 if (sp[0] >> 22)
39 return sp[0]; 43 return sp[0];
40 if (sp[1] >> 22) 44 if (sp[1] >> 22)
41 return sp[1]; 45 return sp[1];
46#endif
42 } 47 }
43 return pc; 48 return pc;
44} 49}
45EXPORT_SYMBOL(profile_pc); 50EXPORT_SYMBOL(profile_pc);
46 51
47static irqreturn_t timer_event_interrupt(int irq, void *dev_id) 52irqreturn_t timer_interrupt(int irq, void *dev_id)
48{ 53{
49 add_pda(irq0_irqs, 1); 54 add_pda(irq0_irqs, 1);
50 55
51 global_clock_event->event_handler(global_clock_event); 56 global_clock_event->event_handler(global_clock_event);
52 57
58#ifdef CONFIG_MCA
59 if (MCA_bus) {
60 u8 irq_v = inb_p(0x61); /* read the current state */
61 outb_p(irq_v|0x80, 0x61); /* reset the IRQ */
62 }
63#endif
64
53 return IRQ_HANDLED; 65 return IRQ_HANDLED;
54} 66}
55 67
@@ -100,7 +112,7 @@ unsigned long __init calibrate_cpu(void)
100} 112}
101 113
102static struct irqaction irq0 = { 114static struct irqaction irq0 = {
103 .handler = timer_event_interrupt, 115 .handler = timer_interrupt,
104 .flags = IRQF_DISABLED | IRQF_IRQPOLL | IRQF_NOBALANCING, 116 .flags = IRQF_DISABLED | IRQF_IRQPOLL | IRQF_NOBALANCING,
105 .mask = CPU_MASK_NONE, 117 .mask = CPU_MASK_NONE,
106 .name = "timer" 118 .name = "timer"
@@ -111,16 +123,13 @@ void __init hpet_time_init(void)
111 if (!hpet_enable()) 123 if (!hpet_enable())
112 setup_pit_timer(); 124 setup_pit_timer();
113 125
126 irq0.mask = cpumask_of_cpu(0);
114 setup_irq(0, &irq0); 127 setup_irq(0, &irq0);
115} 128}
116 129
117void __init time_init(void) 130void __init time_init(void)
118{ 131{
119 tsc_init(); 132 tsc_init();
120 if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP))
121 vgetcpu_mode = VGETCPU_RDTSCP;
122 else
123 vgetcpu_mode = VGETCPU_LSL;
124 133
125 late_time_init = choose_time_init(); 134 late_time_init = choose_time_init();
126} 135}
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps.c
index 0429c5de5ea9..e062974cce34 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps.c
@@ -7,13 +7,11 @@
7 */ 7 */
8 8
9/* 9/*
10 * 'Traps.c' handles hardware traps and faults after we have saved some 10 * Handle hardware traps and faults.
11 * state in 'asm.s'.
12 */ 11 */
13#include <linux/interrupt.h> 12#include <linux/interrupt.h>
14#include <linux/kallsyms.h> 13#include <linux/kallsyms.h>
15#include <linux/spinlock.h> 14#include <linux/spinlock.h>
16#include <linux/highmem.h>
17#include <linux/kprobes.h> 15#include <linux/kprobes.h>
18#include <linux/uaccess.h> 16#include <linux/uaccess.h>
19#include <linux/utsname.h> 17#include <linux/utsname.h>
@@ -32,6 +30,8 @@
32#include <linux/bug.h> 30#include <linux/bug.h>
33#include <linux/nmi.h> 31#include <linux/nmi.h>
34#include <linux/mm.h> 32#include <linux/mm.h>
33#include <linux/smp.h>
34#include <linux/io.h>
35 35
36#ifdef CONFIG_EISA 36#ifdef CONFIG_EISA
37#include <linux/ioport.h> 37#include <linux/ioport.h>
@@ -46,21 +46,31 @@
46#include <linux/edac.h> 46#include <linux/edac.h>
47#endif 47#endif
48 48
49#include <asm/arch_hooks.h>
50#include <asm/stacktrace.h> 49#include <asm/stacktrace.h>
51#include <asm/processor.h> 50#include <asm/processor.h>
52#include <asm/debugreg.h> 51#include <asm/debugreg.h>
53#include <asm/atomic.h> 52#include <asm/atomic.h>
54#include <asm/system.h> 53#include <asm/system.h>
55#include <asm/unwind.h> 54#include <asm/unwind.h>
55#include <asm/traps.h>
56#include <asm/desc.h> 56#include <asm/desc.h>
57#include <asm/i387.h> 57#include <asm/i387.h>
58
59#include <mach_traps.h>
60
61#ifdef CONFIG_X86_64
62#include <asm/pgalloc.h>
63#include <asm/proto.h>
64#include <asm/pda.h>
65#else
66#include <asm/processor-flags.h>
67#include <asm/arch_hooks.h>
58#include <asm/nmi.h> 68#include <asm/nmi.h>
59#include <asm/smp.h> 69#include <asm/smp.h>
60#include <asm/io.h> 70#include <asm/io.h>
61#include <asm/traps.h> 71#include <asm/traps.h>
62 72
63#include "mach_traps.h" 73#include "cpu/mcheck/mce.h"
64 74
65DECLARE_BITMAP(used_vectors, NR_VECTORS); 75DECLARE_BITMAP(used_vectors, NR_VECTORS);
66EXPORT_SYMBOL_GPL(used_vectors); 76EXPORT_SYMBOL_GPL(used_vectors);
@@ -77,418 +87,104 @@ char ignore_fpu_irq;
77 */ 87 */
78gate_desc idt_table[256] 88gate_desc idt_table[256]
79 __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; 89 __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, };
80
81int panic_on_unrecovered_nmi;
82int kstack_depth_to_print = 24;
83static unsigned int code_bytes = 64;
84static int ignore_nmis;
85static int die_counter;
86
87void printk_address(unsigned long address, int reliable)
88{
89#ifdef CONFIG_KALLSYMS
90 unsigned long offset = 0;
91 unsigned long symsize;
92 const char *symname;
93 char *modname;
94 char *delim = ":";
95 char namebuf[KSYM_NAME_LEN];
96 char reliab[4] = "";
97
98 symname = kallsyms_lookup(address, &symsize, &offset,
99 &modname, namebuf);
100 if (!symname) {
101 printk(" [<%08lx>]\n", address);
102 return;
103 }
104 if (!reliable)
105 strcpy(reliab, "? ");
106
107 if (!modname)
108 modname = delim = "";
109 printk(" [<%08lx>] %s%s%s%s%s+0x%lx/0x%lx\n",
110 address, reliab, delim, modname, delim, symname, offset, symsize);
111#else
112 printk(" [<%08lx>]\n", address);
113#endif 90#endif
114}
115
116static inline int valid_stack_ptr(struct thread_info *tinfo,
117 void *p, unsigned int size)
118{
119 void *t = tinfo;
120 return p > t && p <= t + THREAD_SIZE - size;
121}
122
123/* The form of the top of the frame on the stack */
124struct stack_frame {
125 struct stack_frame *next_frame;
126 unsigned long return_address;
127};
128
129static inline unsigned long
130print_context_stack(struct thread_info *tinfo,
131 unsigned long *stack, unsigned long bp,
132 const struct stacktrace_ops *ops, void *data)
133{
134 struct stack_frame *frame = (struct stack_frame *)bp;
135
136 while (valid_stack_ptr(tinfo, stack, sizeof(*stack))) {
137 unsigned long addr;
138
139 addr = *stack;
140 if (__kernel_text_address(addr)) {
141 if ((unsigned long) stack == bp + 4) {
142 ops->address(data, addr, 1);
143 frame = frame->next_frame;
144 bp = (unsigned long) frame;
145 } else {
146 ops->address(data, addr, bp == 0);
147 }
148 }
149 stack++;
150 }
151 return bp;
152}
153
154void dump_trace(struct task_struct *task, struct pt_regs *regs,
155 unsigned long *stack, unsigned long bp,
156 const struct stacktrace_ops *ops, void *data)
157{
158 if (!task)
159 task = current;
160
161 if (!stack) {
162 unsigned long dummy;
163 stack = &dummy;
164 if (task != current)
165 stack = (unsigned long *)task->thread.sp;
166 }
167
168#ifdef CONFIG_FRAME_POINTER
169 if (!bp) {
170 if (task == current) {
171 /* Grab bp right from our regs */
172 asm("movl %%ebp, %0" : "=r" (bp) :);
173 } else {
174 /* bp is the last reg pushed by switch_to */
175 bp = *(unsigned long *) task->thread.sp;
176 }
177 }
178#endif
179
180 for (;;) {
181 struct thread_info *context;
182
183 context = (struct thread_info *)
184 ((unsigned long)stack & (~(THREAD_SIZE - 1)));
185 bp = print_context_stack(context, stack, bp, ops, data);
186 /*
187 * Should be after the line below, but somewhere
188 * in early boot context comes out corrupted and we
189 * can't reference it:
190 */
191 if (ops->stack(data, "IRQ") < 0)
192 break;
193 stack = (unsigned long *)context->previous_esp;
194 if (!stack)
195 break;
196 touch_nmi_watchdog();
197 }
198}
199EXPORT_SYMBOL(dump_trace);
200
201static void
202print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
203{
204 printk(data);
205 print_symbol(msg, symbol);
206 printk("\n");
207}
208
209static void print_trace_warning(void *data, char *msg)
210{
211 printk("%s%s\n", (char *)data, msg);
212}
213 91
214static int print_trace_stack(void *data, char *name) 92static int ignore_nmis;
215{
216 return 0;
217}
218
219/*
220 * Print one address/symbol entries per line.
221 */
222static void print_trace_address(void *data, unsigned long addr, int reliable)
223{
224 printk("%s [<%08lx>] ", (char *)data, addr);
225 if (!reliable)
226 printk("? ");
227 print_symbol("%s\n", addr);
228 touch_nmi_watchdog();
229}
230
231static const struct stacktrace_ops print_trace_ops = {
232 .warning = print_trace_warning,
233 .warning_symbol = print_trace_warning_symbol,
234 .stack = print_trace_stack,
235 .address = print_trace_address,
236};
237 93
238static void 94static inline void conditional_sti(struct pt_regs *regs)
239show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
240 unsigned long *stack, unsigned long bp, char *log_lvl)
241{ 95{
242 dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); 96 if (regs->flags & X86_EFLAGS_IF)
243 printk("%s =======================\n", log_lvl); 97 local_irq_enable();
244} 98}
245 99
246void show_trace(struct task_struct *task, struct pt_regs *regs, 100static inline void preempt_conditional_sti(struct pt_regs *regs)
247 unsigned long *stack, unsigned long bp)
248{ 101{
249 show_trace_log_lvl(task, regs, stack, bp, ""); 102 inc_preempt_count();
103 if (regs->flags & X86_EFLAGS_IF)
104 local_irq_enable();
250} 105}
251 106
252static void 107static inline void preempt_conditional_cli(struct pt_regs *regs)
253show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
254 unsigned long *sp, unsigned long bp, char *log_lvl)
255{ 108{
256 unsigned long *stack; 109 if (regs->flags & X86_EFLAGS_IF)
257 int i; 110 local_irq_disable();
258 111 dec_preempt_count();
259 if (sp == NULL) {
260 if (task)
261 sp = (unsigned long *)task->thread.sp;
262 else
263 sp = (unsigned long *)&sp;
264 }
265
266 stack = sp;
267 for (i = 0; i < kstack_depth_to_print; i++) {
268 if (kstack_end(stack))
269 break;
270 if (i && ((i % 8) == 0))
271 printk("\n%s ", log_lvl);
272 printk("%08lx ", *stack++);
273 }
274 printk("\n%sCall Trace:\n", log_lvl);
275
276 show_trace_log_lvl(task, regs, sp, bp, log_lvl);
277} 112}
278 113
279void show_stack(struct task_struct *task, unsigned long *sp) 114#ifdef CONFIG_X86_32
115static inline void
116die_if_kernel(const char *str, struct pt_regs *regs, long err)
280{ 117{
281 printk(" "); 118 if (!user_mode_vm(regs))
282 show_stack_log_lvl(task, NULL, sp, 0, ""); 119 die(str, regs, err);
283} 120}
284 121
285/* 122/*
286 * The architecture-independent dump_stack generator 123 * Perform the lazy TSS's I/O bitmap copy. If the TSS has an
124 * invalid offset set (the LAZY one) and the faulting thread has
125 * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS,
126 * we set the offset field correctly and return 1.
287 */ 127 */
288void dump_stack(void) 128static int lazy_iobitmap_copy(void)
289{ 129{
290 unsigned long bp = 0; 130 struct thread_struct *thread;
291 unsigned long stack; 131 struct tss_struct *tss;
292 132 int cpu;
293#ifdef CONFIG_FRAME_POINTER
294 if (!bp)
295 asm("movl %%ebp, %0" : "=r" (bp):);
296#endif
297
298 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
299 current->pid, current->comm, print_tainted(),
300 init_utsname()->release,
301 (int)strcspn(init_utsname()->version, " "),
302 init_utsname()->version);
303
304 show_trace(current, NULL, &stack, bp);
305}
306
307EXPORT_SYMBOL(dump_stack);
308
309void show_registers(struct pt_regs *regs)
310{
311 int i;
312 133
313 print_modules(); 134 cpu = get_cpu();
314 __show_registers(regs, 0); 135 tss = &per_cpu(init_tss, cpu);
136 thread = &current->thread;
315 137
316 printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)", 138 if (tss->x86_tss.io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY &&
317 TASK_COMM_LEN, current->comm, task_pid_nr(current), 139 thread->io_bitmap_ptr) {
318 current_thread_info(), current, task_thread_info(current)); 140 memcpy(tss->io_bitmap, thread->io_bitmap_ptr,
319 /* 141 thread->io_bitmap_max);
320 * When in-kernel, we also print out the stack and code at the 142 /*
321 * time of the fault.. 143 * If the previously set map was extending to higher ports
322 */ 144 * than the current one, pad extra space with 0xff (no access).
323 if (!user_mode_vm(regs)) { 145 */
324 unsigned int code_prologue = code_bytes * 43 / 64; 146 if (thread->io_bitmap_max < tss->io_bitmap_max) {
325 unsigned int code_len = code_bytes; 147 memset((char *) tss->io_bitmap +
326 unsigned char c; 148 thread->io_bitmap_max, 0xff,
327 u8 *ip; 149 tss->io_bitmap_max - thread->io_bitmap_max);
328
329 printk("\n" KERN_EMERG "Stack: ");
330 show_stack_log_lvl(NULL, regs, &regs->sp, 0, KERN_EMERG);
331
332 printk(KERN_EMERG "Code: ");
333
334 ip = (u8 *)regs->ip - code_prologue;
335 if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) {
336 /* try starting at EIP */
337 ip = (u8 *)regs->ip;
338 code_len = code_len - code_prologue + 1;
339 }
340 for (i = 0; i < code_len; i++, ip++) {
341 if (ip < (u8 *)PAGE_OFFSET ||
342 probe_kernel_address(ip, c)) {
343 printk(" Bad EIP value.");
344 break;
345 }
346 if (ip == (u8 *)regs->ip)
347 printk("<%02x> ", c);
348 else
349 printk("%02x ", c);
350 } 150 }
351 } 151 tss->io_bitmap_max = thread->io_bitmap_max;
352 printk("\n"); 152 tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
353} 153 tss->io_bitmap_owner = thread;
354 154 put_cpu();
355int is_valid_bugaddr(unsigned long ip)
356{
357 unsigned short ud2;
358
359 if (ip < PAGE_OFFSET)
360 return 0;
361 if (probe_kernel_address((unsigned short *)ip, ud2))
362 return 0;
363
364 return ud2 == 0x0b0f;
365}
366
367static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
368static int die_owner = -1;
369static unsigned int die_nest_count;
370
371unsigned __kprobes long oops_begin(void)
372{
373 unsigned long flags;
374
375 oops_enter();
376
377 if (die_owner != raw_smp_processor_id()) {
378 console_verbose();
379 raw_local_irq_save(flags);
380 __raw_spin_lock(&die_lock);
381 die_owner = smp_processor_id();
382 die_nest_count = 0;
383 bust_spinlocks(1);
384 } else {
385 raw_local_irq_save(flags);
386 }
387 die_nest_count++;
388 return flags;
389}
390
391void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
392{
393 bust_spinlocks(0);
394 die_owner = -1;
395 add_taint(TAINT_DIE);
396 __raw_spin_unlock(&die_lock);
397 raw_local_irq_restore(flags);
398
399 if (!regs)
400 return;
401
402 if (kexec_should_crash(current))
403 crash_kexec(regs);
404
405 if (in_interrupt())
406 panic("Fatal exception in interrupt");
407
408 if (panic_on_oops)
409 panic("Fatal exception");
410
411 oops_exit();
412 do_exit(signr);
413}
414
415int __kprobes __die(const char *str, struct pt_regs *regs, long err)
416{
417 unsigned short ss;
418 unsigned long sp;
419 155
420 printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
421#ifdef CONFIG_PREEMPT
422 printk("PREEMPT ");
423#endif
424#ifdef CONFIG_SMP
425 printk("SMP ");
426#endif
427#ifdef CONFIG_DEBUG_PAGEALLOC
428 printk("DEBUG_PAGEALLOC");
429#endif
430 printk("\n");
431 if (notify_die(DIE_OOPS, str, regs, err,
432 current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
433 return 1; 156 return 1;
434
435 show_registers(regs);
436 /* Executive summary in case the oops scrolled away */
437 sp = (unsigned long) (&regs->sp);
438 savesegment(ss, ss);
439 if (user_mode(regs)) {
440 sp = regs->sp;
441 ss = regs->ss & 0xffff;
442 } 157 }
443 printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip); 158 put_cpu();
444 print_symbol("%s", regs->ip);
445 printk(" SS:ESP %04x:%08lx\n", ss, sp);
446 return 0;
447}
448
449/*
450 * This is gone through when something in the kernel has done something bad
451 * and is about to be terminated:
452 */
453void die(const char *str, struct pt_regs *regs, long err)
454{
455 unsigned long flags = oops_begin();
456
457 if (die_nest_count < 3) {
458 report_bug(regs->ip, regs);
459
460 if (__die(str, regs, err))
461 regs = NULL;
462 } else {
463 printk(KERN_EMERG "Recursive die() failure, output suppressed\n");
464 }
465
466 oops_end(flags, regs, SIGSEGV);
467}
468 159
469static inline void 160 return 0;
470die_if_kernel(const char *str, struct pt_regs *regs, long err)
471{
472 if (!user_mode_vm(regs))
473 die(str, regs, err);
474} 161}
162#endif
475 163
476static void __kprobes 164static void __kprobes
477do_trap(int trapnr, int signr, char *str, int vm86, struct pt_regs *regs, 165do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
478 long error_code, siginfo_t *info) 166 long error_code, siginfo_t *info)
479{ 167{
480 struct task_struct *tsk = current; 168 struct task_struct *tsk = current;
481 169
170#ifdef CONFIG_X86_32
482 if (regs->flags & X86_VM_MASK) { 171 if (regs->flags & X86_VM_MASK) {
483 if (vm86) 172 /*
173 * traps 0, 1, 3, 4, and 5 should be forwarded to vm86.
174 * On nmi (interrupt 2), do_trap should not be called.
175 */
176 if (trapnr < 6)
484 goto vm86_trap; 177 goto vm86_trap;
485 goto trap_signal; 178 goto trap_signal;
486 } 179 }
180#endif
487 181
488 if (!user_mode(regs)) 182 if (!user_mode(regs))
489 goto kernel_trap; 183 goto kernel_trap;
490 184
185#ifdef CONFIG_X86_32
491trap_signal: 186trap_signal:
187#endif
492 /* 188 /*
493 * We want error_code and trap_no set for userspace faults and 189 * We want error_code and trap_no set for userspace faults and
494 * kernelspace faults which result in die(), but not 190 * kernelspace faults which result in die(), but not
@@ -501,6 +197,18 @@ trap_signal:
501 tsk->thread.error_code = error_code; 197 tsk->thread.error_code = error_code;
502 tsk->thread.trap_no = trapnr; 198 tsk->thread.trap_no = trapnr;
503 199
200#ifdef CONFIG_X86_64
201 if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
202 printk_ratelimit()) {
203 printk(KERN_INFO
204 "%s[%d] trap %s ip:%lx sp:%lx error:%lx",
205 tsk->comm, tsk->pid, str,
206 regs->ip, regs->sp, error_code);
207 print_vma_addr(" in ", regs->ip);
208 printk("\n");
209 }
210#endif
211
504 if (info) 212 if (info)
505 force_sig_info(signr, info, tsk); 213 force_sig_info(signr, info, tsk);
506 else 214 else
@@ -515,29 +223,29 @@ kernel_trap:
515 } 223 }
516 return; 224 return;
517 225
226#ifdef CONFIG_X86_32
518vm86_trap: 227vm86_trap:
519 if (handle_vm86_trap((struct kernel_vm86_regs *) regs, 228 if (handle_vm86_trap((struct kernel_vm86_regs *) regs,
520 error_code, trapnr)) 229 error_code, trapnr))
521 goto trap_signal; 230 goto trap_signal;
522 return; 231 return;
232#endif
523} 233}
524 234
525#define DO_ERROR(trapnr, signr, str, name) \ 235#define DO_ERROR(trapnr, signr, str, name) \
526void do_##name(struct pt_regs *regs, long error_code) \ 236dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \
527{ \ 237{ \
528 trace_hardirqs_fixup(); \
529 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ 238 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
530 == NOTIFY_STOP) \ 239 == NOTIFY_STOP) \
531 return; \ 240 return; \
532 do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \ 241 conditional_sti(regs); \
242 do_trap(trapnr, signr, str, regs, error_code, NULL); \
533} 243}
534 244
535#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr, irq) \ 245#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
536void do_##name(struct pt_regs *regs, long error_code) \ 246dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \
537{ \ 247{ \
538 siginfo_t info; \ 248 siginfo_t info; \
539 if (irq) \
540 local_irq_enable(); \
541 info.si_signo = signr; \ 249 info.si_signo = signr; \
542 info.si_errno = 0; \ 250 info.si_errno = 0; \
543 info.si_code = sicode; \ 251 info.si_code = sicode; \
@@ -545,90 +253,68 @@ void do_##name(struct pt_regs *regs, long error_code) \
545 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ 253 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
546 == NOTIFY_STOP) \ 254 == NOTIFY_STOP) \
547 return; \ 255 return; \
548 do_trap(trapnr, signr, str, 0, regs, error_code, &info); \ 256 conditional_sti(regs); \
257 do_trap(trapnr, signr, str, regs, error_code, &info); \
549} 258}
550 259
551#define DO_VM86_ERROR(trapnr, signr, str, name) \ 260DO_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip)
552void do_##name(struct pt_regs *regs, long error_code) \ 261DO_ERROR(4, SIGSEGV, "overflow", overflow)
553{ \ 262DO_ERROR(5, SIGSEGV, "bounds", bounds)
554 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ 263DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip)
555 == NOTIFY_STOP) \
556 return; \
557 do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \
558}
559
560#define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
561void do_##name(struct pt_regs *regs, long error_code) \
562{ \
563 siginfo_t info; \
564 info.si_signo = signr; \
565 info.si_errno = 0; \
566 info.si_code = sicode; \
567 info.si_addr = (void __user *)siaddr; \
568 trace_hardirqs_fixup(); \
569 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
570 == NOTIFY_STOP) \
571 return; \
572 do_trap(trapnr, signr, str, 1, regs, error_code, &info); \
573}
574
575DO_VM86_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip)
576#ifndef CONFIG_KPROBES
577DO_VM86_ERROR(3, SIGTRAP, "int3", int3)
578#endif
579DO_VM86_ERROR(4, SIGSEGV, "overflow", overflow)
580DO_VM86_ERROR(5, SIGSEGV, "bounds", bounds)
581DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0)
582DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) 264DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
583DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) 265DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
584DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) 266DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
267#ifdef CONFIG_X86_32
585DO_ERROR(12, SIGBUS, "stack segment", stack_segment) 268DO_ERROR(12, SIGBUS, "stack segment", stack_segment)
586DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0) 269#endif
587DO_ERROR_INFO(32, SIGILL, "iret exception", iret_error, ILL_BADSTK, 0, 1) 270DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
271
272#ifdef CONFIG_X86_64
273/* Runs on IST stack */
274dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code)
275{
276 if (notify_die(DIE_TRAP, "stack segment", regs, error_code,
277 12, SIGBUS) == NOTIFY_STOP)
278 return;
279 preempt_conditional_sti(regs);
280 do_trap(12, SIGBUS, "stack segment", regs, error_code, NULL);
281 preempt_conditional_cli(regs);
282}
283
284dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
285{
286 static const char str[] = "double fault";
287 struct task_struct *tsk = current;
288
289 /* Return not checked because double check cannot be ignored */
290 notify_die(DIE_TRAP, str, regs, error_code, 8, SIGSEGV);
588 291
589void __kprobes 292 tsk->thread.error_code = error_code;
293 tsk->thread.trap_no = 8;
294
295 /* This is always a kernel trap and never fixable (and thus must
296 never return). */
297 for (;;)
298 die(str, regs, error_code);
299}
300#endif
301
302dotraplinkage void __kprobes
590do_general_protection(struct pt_regs *regs, long error_code) 303do_general_protection(struct pt_regs *regs, long error_code)
591{ 304{
592 struct task_struct *tsk; 305 struct task_struct *tsk;
593 struct thread_struct *thread;
594 struct tss_struct *tss;
595 int cpu;
596 306
597 cpu = get_cpu(); 307 conditional_sti(regs);
598 tss = &per_cpu(init_tss, cpu);
599 thread = &current->thread;
600
601 /*
602 * Perform the lazy TSS's I/O bitmap copy. If the TSS has an
603 * invalid offset set (the LAZY one) and the faulting thread has
604 * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS
605 * and we set the offset field correctly. Then we let the CPU to
606 * restart the faulting instruction.
607 */
608 if (tss->x86_tss.io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY &&
609 thread->io_bitmap_ptr) {
610 memcpy(tss->io_bitmap, thread->io_bitmap_ptr,
611 thread->io_bitmap_max);
612 /*
613 * If the previously set map was extending to higher ports
614 * than the current one, pad extra space with 0xff (no access).
615 */
616 if (thread->io_bitmap_max < tss->io_bitmap_max) {
617 memset((char *) tss->io_bitmap +
618 thread->io_bitmap_max, 0xff,
619 tss->io_bitmap_max - thread->io_bitmap_max);
620 }
621 tss->io_bitmap_max = thread->io_bitmap_max;
622 tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
623 tss->io_bitmap_owner = thread;
624 put_cpu();
625 308
309#ifdef CONFIG_X86_32
310 if (lazy_iobitmap_copy()) {
311 /* restart the faulting instruction */
626 return; 312 return;
627 } 313 }
628 put_cpu();
629 314
630 if (regs->flags & X86_VM_MASK) 315 if (regs->flags & X86_VM_MASK)
631 goto gp_in_vm86; 316 goto gp_in_vm86;
317#endif
632 318
633 tsk = current; 319 tsk = current;
634 if (!user_mode(regs)) 320 if (!user_mode(regs))
@@ -650,10 +336,12 @@ do_general_protection(struct pt_regs *regs, long error_code)
650 force_sig(SIGSEGV, tsk); 336 force_sig(SIGSEGV, tsk);
651 return; 337 return;
652 338
339#ifdef CONFIG_X86_32
653gp_in_vm86: 340gp_in_vm86:
654 local_irq_enable(); 341 local_irq_enable();
655 handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); 342 handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
656 return; 343 return;
344#endif
657 345
658gp_in_kernel: 346gp_in_kernel:
659 if (fixup_exception(regs)) 347 if (fixup_exception(regs))
@@ -690,7 +378,8 @@ mem_parity_error(unsigned char reason, struct pt_regs *regs)
690 printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); 378 printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
691 379
692 /* Clear and disable the memory parity error line. */ 380 /* Clear and disable the memory parity error line. */
693 clear_mem_error(reason); 381 reason = (reason & 0xf) | 4;
382 outb(reason, 0x61);
694} 383}
695 384
696static notrace __kprobes void 385static notrace __kprobes void
@@ -716,7 +405,8 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
716static notrace __kprobes void 405static notrace __kprobes void
717unknown_nmi_error(unsigned char reason, struct pt_regs *regs) 406unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
718{ 407{
719 if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) 408 if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) ==
409 NOTIFY_STOP)
720 return; 410 return;
721#ifdef CONFIG_MCA 411#ifdef CONFIG_MCA
722 /* 412 /*
@@ -739,41 +429,6 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
739 printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); 429 printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
740} 430}
741 431
742static DEFINE_SPINLOCK(nmi_print_lock);
743
744void notrace __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic)
745{
746 if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
747 return;
748
749 spin_lock(&nmi_print_lock);
750 /*
751 * We are in trouble anyway, lets at least try
752 * to get a message out:
753 */
754 bust_spinlocks(1);
755 printk(KERN_EMERG "%s", str);
756 printk(" on CPU%d, ip %08lx, registers:\n",
757 smp_processor_id(), regs->ip);
758 show_registers(regs);
759 if (do_panic)
760 panic("Non maskable interrupt");
761 console_silent();
762 spin_unlock(&nmi_print_lock);
763 bust_spinlocks(0);
764
765 /*
766 * If we are in kernel we are probably nested up pretty bad
767 * and might aswell get out now while we still can:
768 */
769 if (!user_mode_vm(regs)) {
770 current->thread.trap_no = 2;
771 crash_kexec(regs);
772 }
773
774 do_exit(SIGSEGV);
775}
776
777static notrace __kprobes void default_do_nmi(struct pt_regs *regs) 432static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
778{ 433{
779 unsigned char reason = 0; 434 unsigned char reason = 0;
@@ -812,22 +467,25 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
812 mem_parity_error(reason, regs); 467 mem_parity_error(reason, regs);
813 if (reason & 0x40) 468 if (reason & 0x40)
814 io_check_error(reason, regs); 469 io_check_error(reason, regs);
470#ifdef CONFIG_X86_32
815 /* 471 /*
816 * Reassert NMI in case it became active meanwhile 472 * Reassert NMI in case it became active meanwhile
817 * as it's edge-triggered: 473 * as it's edge-triggered:
818 */ 474 */
819 reassert_nmi(); 475 reassert_nmi();
476#endif
820} 477}
821 478
822notrace __kprobes void do_nmi(struct pt_regs *regs, long error_code) 479dotraplinkage notrace __kprobes void
480do_nmi(struct pt_regs *regs, long error_code)
823{ 481{
824 int cpu;
825
826 nmi_enter(); 482 nmi_enter();
827 483
828 cpu = smp_processor_id(); 484#ifdef CONFIG_X86_32
829 485 { int cpu; cpu = smp_processor_id(); ++nmi_count(cpu); }
830 ++nmi_count(cpu); 486#else
487 add_pda(__nmi_count, 1);
488#endif
831 489
832 if (!ignore_nmis) 490 if (!ignore_nmis)
833 default_do_nmi(regs); 491 default_do_nmi(regs);
@@ -847,21 +505,44 @@ void restart_nmi(void)
847 acpi_nmi_enable(); 505 acpi_nmi_enable();
848} 506}
849 507
850#ifdef CONFIG_KPROBES 508/* May run on IST stack. */
851void __kprobes do_int3(struct pt_regs *regs, long error_code) 509dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
852{ 510{
853 trace_hardirqs_fixup(); 511#ifdef CONFIG_KPROBES
854
855 if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) 512 if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
856 == NOTIFY_STOP) 513 == NOTIFY_STOP)
857 return; 514 return;
858 /* 515#else
859 * This is an interrupt gate, because kprobes wants interrupts 516 if (notify_die(DIE_TRAP, "int3", regs, error_code, 3, SIGTRAP)
860 * disabled. Normal trap handlers don't. 517 == NOTIFY_STOP)
861 */ 518 return;
862 restore_interrupts(regs); 519#endif
520
521 preempt_conditional_sti(regs);
522 do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
523 preempt_conditional_cli(regs);
524}
863 525
864 do_trap(3, SIGTRAP, "int3", 1, regs, error_code, NULL); 526#ifdef CONFIG_X86_64
527/* Help handler running on IST stack to switch back to user stack
528 for scheduling or signal handling. The actual stack switch is done in
529 entry.S */
530asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
531{
532 struct pt_regs *regs = eregs;
533 /* Did already sync */
534 if (eregs == (struct pt_regs *)eregs->sp)
535 ;
536 /* Exception from user space */
537 else if (user_mode(eregs))
538 regs = task_pt_regs(current);
539 /* Exception from kernel and interrupts are enabled. Move to
540 kernel process stack. */
541 else if (eregs->flags & X86_EFLAGS_IF)
542 regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs));
543 if (eregs != regs)
544 *regs = *eregs;
545 return regs;
865} 546}
866#endif 547#endif
867 548
@@ -886,15 +567,15 @@ void __kprobes do_int3(struct pt_regs *regs, long error_code)
886 * about restoring all the debug state, and ptrace doesn't have to 567 * about restoring all the debug state, and ptrace doesn't have to
887 * find every occurrence of the TF bit that could be saved away even 568 * find every occurrence of the TF bit that could be saved away even
888 * by user code) 569 * by user code)
570 *
571 * May run on IST stack.
889 */ 572 */
890void __kprobes do_debug(struct pt_regs *regs, long error_code) 573dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
891{ 574{
892 struct task_struct *tsk = current; 575 struct task_struct *tsk = current;
893 unsigned int condition; 576 unsigned long condition;
894 int si_code; 577 int si_code;
895 578
896 trace_hardirqs_fixup();
897
898 get_debugreg(condition, 6); 579 get_debugreg(condition, 6);
899 580
900 /* 581 /*
@@ -906,9 +587,9 @@ void __kprobes do_debug(struct pt_regs *regs, long error_code)
906 if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, 587 if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
907 SIGTRAP) == NOTIFY_STOP) 588 SIGTRAP) == NOTIFY_STOP)
908 return; 589 return;
590
909 /* It's safe to allow irq's after DR6 has been saved */ 591 /* It's safe to allow irq's after DR6 has been saved */
910 if (regs->flags & X86_EFLAGS_IF) 592 preempt_conditional_sti(regs);
911 local_irq_enable();
912 593
913 /* Mask out spurious debug traps due to lazy DR7 setting */ 594 /* Mask out spurious debug traps due to lazy DR7 setting */
914 if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { 595 if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
@@ -916,8 +597,10 @@ void __kprobes do_debug(struct pt_regs *regs, long error_code)
916 goto clear_dr7; 597 goto clear_dr7;
917 } 598 }
918 599
600#ifdef CONFIG_X86_32
919 if (regs->flags & X86_VM_MASK) 601 if (regs->flags & X86_VM_MASK)
920 goto debug_vm86; 602 goto debug_vm86;
603#endif
921 604
922 /* Save debug status register where ptrace can see it */ 605 /* Save debug status register where ptrace can see it */
923 tsk->thread.debugreg6 = condition; 606 tsk->thread.debugreg6 = condition;
@@ -927,16 +610,11 @@ void __kprobes do_debug(struct pt_regs *regs, long error_code)
927 * kernel space (but re-enable TF when returning to user mode). 610 * kernel space (but re-enable TF when returning to user mode).
928 */ 611 */
929 if (condition & DR_STEP) { 612 if (condition & DR_STEP) {
930 /*
931 * We already checked v86 mode above, so we can
932 * check for kernel mode by just checking the CPL
933 * of CS.
934 */
935 if (!user_mode(regs)) 613 if (!user_mode(regs))
936 goto clear_TF_reenable; 614 goto clear_TF_reenable;
937 } 615 }
938 616
939 si_code = get_si_code((unsigned long)condition); 617 si_code = get_si_code(condition);
940 /* Ok, finally something we can handle */ 618 /* Ok, finally something we can handle */
941 send_sigtrap(tsk, regs, error_code, si_code); 619 send_sigtrap(tsk, regs, error_code, si_code);
942 620
@@ -946,18 +624,37 @@ void __kprobes do_debug(struct pt_regs *regs, long error_code)
946 */ 624 */
947clear_dr7: 625clear_dr7:
948 set_debugreg(0, 7); 626 set_debugreg(0, 7);
627 preempt_conditional_cli(regs);
949 return; 628 return;
950 629
630#ifdef CONFIG_X86_32
951debug_vm86: 631debug_vm86:
952 handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); 632 handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1);
633 preempt_conditional_cli(regs);
953 return; 634 return;
635#endif
954 636
955clear_TF_reenable: 637clear_TF_reenable:
956 set_tsk_thread_flag(tsk, TIF_SINGLESTEP); 638 set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
957 regs->flags &= ~X86_EFLAGS_TF; 639 regs->flags &= ~X86_EFLAGS_TF;
640 preempt_conditional_cli(regs);
958 return; 641 return;
959} 642}
960 643
644#ifdef CONFIG_X86_64
645static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr)
646{
647 if (fixup_exception(regs))
648 return 1;
649
650 notify_die(DIE_GPF, str, regs, 0, trapnr, SIGFPE);
651 /* Illegal floating point operation in the kernel */
652 current->thread.trap_no = trapnr;
653 die(str, regs, 0);
654 return 0;
655}
656#endif
657
961/* 658/*
962 * Note that we play around with the 'TS' bit in an attempt to get 659 * Note that we play around with the 'TS' bit in an attempt to get
963 * the correct behaviour even in the presence of the asynchronous 660 * the correct behaviour even in the presence of the asynchronous
@@ -994,7 +691,9 @@ void math_error(void __user *ip)
994 swd = get_fpu_swd(task); 691 swd = get_fpu_swd(task);
995 switch (swd & ~cwd & 0x3f) { 692 switch (swd & ~cwd & 0x3f) {
996 case 0x000: /* No unmasked exception */ 693 case 0x000: /* No unmasked exception */
694#ifdef CONFIG_X86_32
997 return; 695 return;
696#endif
998 default: /* Multiple exceptions */ 697 default: /* Multiple exceptions */
999 break; 698 break;
1000 case 0x001: /* Invalid Op */ 699 case 0x001: /* Invalid Op */
@@ -1022,9 +721,18 @@ void math_error(void __user *ip)
1022 force_sig_info(SIGFPE, &info, task); 721 force_sig_info(SIGFPE, &info, task);
1023} 722}
1024 723
1025void do_coprocessor_error(struct pt_regs *regs, long error_code) 724dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code)
1026{ 725{
726 conditional_sti(regs);
727
728#ifdef CONFIG_X86_32
1027 ignore_fpu_irq = 1; 729 ignore_fpu_irq = 1;
730#else
731 if (!user_mode(regs) &&
732 kernel_math_error(regs, "kernel x87 math error", 16))
733 return;
734#endif
735
1028 math_error((void __user *)regs->ip); 736 math_error((void __user *)regs->ip);
1029} 737}
1030 738
@@ -1076,8 +784,12 @@ static void simd_math_error(void __user *ip)
1076 force_sig_info(SIGFPE, &info, task); 784 force_sig_info(SIGFPE, &info, task);
1077} 785}
1078 786
1079void do_simd_coprocessor_error(struct pt_regs *regs, long error_code) 787dotraplinkage void
788do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
1080{ 789{
790 conditional_sti(regs);
791
792#ifdef CONFIG_X86_32
1081 if (cpu_has_xmm) { 793 if (cpu_has_xmm) {
1082 /* Handle SIMD FPU exceptions on PIII+ processors. */ 794 /* Handle SIMD FPU exceptions on PIII+ processors. */
1083 ignore_fpu_irq = 1; 795 ignore_fpu_irq = 1;
@@ -1096,16 +808,25 @@ void do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
1096 current->thread.error_code = error_code; 808 current->thread.error_code = error_code;
1097 die_if_kernel("cache flush denied", regs, error_code); 809 die_if_kernel("cache flush denied", regs, error_code);
1098 force_sig(SIGSEGV, current); 810 force_sig(SIGSEGV, current);
811#else
812 if (!user_mode(regs) &&
813 kernel_math_error(regs, "kernel simd math error", 19))
814 return;
815 simd_math_error((void __user *)regs->ip);
816#endif
1099} 817}
1100 818
1101void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) 819dotraplinkage void
820do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
1102{ 821{
822 conditional_sti(regs);
1103#if 0 823#if 0
1104 /* No need to warn about this any longer. */ 824 /* No need to warn about this any longer. */
1105 printk(KERN_INFO "Ignoring P6 Local APIC Spurious Interrupt Bug...\n"); 825 printk(KERN_INFO "Ignoring P6 Local APIC Spurious Interrupt Bug...\n");
1106#endif 826#endif
1107} 827}
1108 828
829#ifdef CONFIG_X86_32
1109unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp) 830unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp)
1110{ 831{
1111 struct desc_struct *gdt = get_cpu_gdt_table(smp_processor_id()); 832 struct desc_struct *gdt = get_cpu_gdt_table(smp_processor_id());
@@ -1124,6 +845,15 @@ unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp)
1124 845
1125 return new_kesp; 846 return new_kesp;
1126} 847}
848#else
849asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
850{
851}
852
853asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void)
854{
855}
856#endif
1127 857
1128/* 858/*
1129 * 'math_state_restore()' saves the current math information in the 859 * 'math_state_restore()' saves the current math information in the
@@ -1156,14 +886,24 @@ asmlinkage void math_state_restore(void)
1156 } 886 }
1157 887
1158 clts(); /* Allow maths ops (or we recurse) */ 888 clts(); /* Allow maths ops (or we recurse) */
889#ifdef CONFIG_X86_32
1159 restore_fpu(tsk); 890 restore_fpu(tsk);
891#else
892 /*
893 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
894 */
895 if (unlikely(restore_fpu_checking(tsk))) {
896 stts();
897 force_sig(SIGSEGV, tsk);
898 return;
899 }
900#endif
1160 thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ 901 thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
1161 tsk->fpu_counter++; 902 tsk->fpu_counter++;
1162} 903}
1163EXPORT_SYMBOL_GPL(math_state_restore); 904EXPORT_SYMBOL_GPL(math_state_restore);
1164 905
1165#ifndef CONFIG_MATH_EMULATION 906#ifndef CONFIG_MATH_EMULATION
1166
1167asmlinkage void math_emulate(long arg) 907asmlinkage void math_emulate(long arg)
1168{ 908{
1169 printk(KERN_EMERG 909 printk(KERN_EMERG
@@ -1172,12 +912,54 @@ asmlinkage void math_emulate(long arg)
1172 force_sig(SIGFPE, current); 912 force_sig(SIGFPE, current);
1173 schedule(); 913 schedule();
1174} 914}
1175
1176#endif /* CONFIG_MATH_EMULATION */ 915#endif /* CONFIG_MATH_EMULATION */
1177 916
917dotraplinkage void __kprobes
918do_device_not_available(struct pt_regs *regs, long error)
919{
920#ifdef CONFIG_X86_32
921 if (read_cr0() & X86_CR0_EM) {
922 conditional_sti(regs);
923 math_emulate(0);
924 } else {
925 math_state_restore(); /* interrupts still off */
926 conditional_sti(regs);
927 }
928#else
929 math_state_restore();
930#endif
931}
932
933#ifdef CONFIG_X86_32
934#ifdef CONFIG_X86_MCE
935dotraplinkage void __kprobes do_machine_check(struct pt_regs *regs, long error)
936{
937 conditional_sti(regs);
938 machine_check_vector(regs, error);
939}
940#endif
941
942dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
943{
944 siginfo_t info;
945 local_irq_enable();
946
947 info.si_signo = SIGILL;
948 info.si_errno = 0;
949 info.si_code = ILL_BADSTK;
950 info.si_addr = 0;
951 if (notify_die(DIE_TRAP, "iret exception",
952 regs, error_code, 32, SIGILL) == NOTIFY_STOP)
953 return;
954 do_trap(32, SIGILL, "iret exception", regs, error_code, &info);
955}
956#endif
957
1178void __init trap_init(void) 958void __init trap_init(void)
1179{ 959{
960#ifdef CONFIG_X86_32
1180 int i; 961 int i;
962#endif
1181 963
1182#ifdef CONFIG_EISA 964#ifdef CONFIG_EISA
1183 void __iomem *p = early_ioremap(0x0FFFD9, 4); 965 void __iomem *p = early_ioremap(0x0FFFD9, 4);
@@ -1187,29 +969,40 @@ void __init trap_init(void)
1187 early_iounmap(p, 4); 969 early_iounmap(p, 4);
1188#endif 970#endif
1189 971
1190 set_trap_gate(0, &divide_error); 972 set_intr_gate(0, &divide_error);
1191 set_intr_gate(1, &debug); 973 set_intr_gate_ist(1, &debug, DEBUG_STACK);
1192 set_intr_gate(2, &nmi); 974 set_intr_gate_ist(2, &nmi, NMI_STACK);
1193 set_system_intr_gate(3, &int3); /* int3 can be called from all */ 975 /* int3 can be called from all */
1194 set_system_gate(4, &overflow); /* int4 can be called from all */ 976 set_system_intr_gate_ist(3, &int3, DEBUG_STACK);
1195 set_trap_gate(5, &bounds); 977 /* int4 can be called from all */
1196 set_trap_gate(6, &invalid_op); 978 set_system_intr_gate(4, &overflow);
1197 set_trap_gate(7, &device_not_available); 979 set_intr_gate(5, &bounds);
980 set_intr_gate(6, &invalid_op);
981 set_intr_gate(7, &device_not_available);
982#ifdef CONFIG_X86_32
1198 set_task_gate(8, GDT_ENTRY_DOUBLEFAULT_TSS); 983 set_task_gate(8, GDT_ENTRY_DOUBLEFAULT_TSS);
1199 set_trap_gate(9, &coprocessor_segment_overrun); 984#else
1200 set_trap_gate(10, &invalid_TSS); 985 set_intr_gate_ist(8, &double_fault, DOUBLEFAULT_STACK);
1201 set_trap_gate(11, &segment_not_present); 986#endif
1202 set_trap_gate(12, &stack_segment); 987 set_intr_gate(9, &coprocessor_segment_overrun);
1203 set_trap_gate(13, &general_protection); 988 set_intr_gate(10, &invalid_TSS);
989 set_intr_gate(11, &segment_not_present);
990 set_intr_gate_ist(12, &stack_segment, STACKFAULT_STACK);
991 set_intr_gate(13, &general_protection);
1204 set_intr_gate(14, &page_fault); 992 set_intr_gate(14, &page_fault);
1205 set_trap_gate(15, &spurious_interrupt_bug); 993 set_intr_gate(15, &spurious_interrupt_bug);
1206 set_trap_gate(16, &coprocessor_error); 994 set_intr_gate(16, &coprocessor_error);
1207 set_trap_gate(17, &alignment_check); 995 set_intr_gate(17, &alignment_check);
1208#ifdef CONFIG_X86_MCE 996#ifdef CONFIG_X86_MCE
1209 set_trap_gate(18, &machine_check); 997 set_intr_gate_ist(18, &machine_check, MCE_STACK);
1210#endif 998#endif
1211 set_trap_gate(19, &simd_coprocessor_error); 999 set_intr_gate(19, &simd_coprocessor_error);
1212 1000
1001#ifdef CONFIG_IA32_EMULATION
1002 set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall);
1003#endif
1004
1005#ifdef CONFIG_X86_32
1213 if (cpu_has_fxsr) { 1006 if (cpu_has_fxsr) {
1214 printk(KERN_INFO "Enabling fast FPU save and restore... "); 1007 printk(KERN_INFO "Enabling fast FPU save and restore... ");
1215 set_in_cr4(X86_CR4_OSFXSR); 1008 set_in_cr4(X86_CR4_OSFXSR);
@@ -1222,36 +1015,20 @@ void __init trap_init(void)
1222 printk("done.\n"); 1015 printk("done.\n");
1223 } 1016 }
1224 1017
1225 set_system_gate(SYSCALL_VECTOR, &system_call); 1018 set_system_trap_gate(SYSCALL_VECTOR, &system_call);
1226 1019
1227 /* Reserve all the builtin and the syscall vector: */ 1020 /* Reserve all the builtin and the syscall vector: */
1228 for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) 1021 for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
1229 set_bit(i, used_vectors); 1022 set_bit(i, used_vectors);
1230 1023
1231 set_bit(SYSCALL_VECTOR, used_vectors); 1024 set_bit(SYSCALL_VECTOR, used_vectors);
1232 1025#endif
1233 /* 1026 /*
1234 * Should be a barrier for any external CPU state: 1027 * Should be a barrier for any external CPU state:
1235 */ 1028 */
1236 cpu_init(); 1029 cpu_init();
1237 1030
1031#ifdef CONFIG_X86_32
1238 trap_init_hook(); 1032 trap_init_hook();
1033#endif
1239} 1034}
1240
1241static int __init kstack_setup(char *s)
1242{
1243 kstack_depth_to_print = simple_strtoul(s, NULL, 0);
1244
1245 return 1;
1246}
1247__setup("kstack=", kstack_setup);
1248
1249static int __init code_bytes_setup(char *s)
1250{
1251 code_bytes = simple_strtoul(s, NULL, 0);
1252 if (code_bytes > 8192)
1253 code_bytes = 8192;
1254
1255 return 1;
1256}
1257__setup("code_bytes=", code_bytes_setup);
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
deleted file mode 100644
index 9c0ac0cab013..000000000000
--- a/arch/x86/kernel/traps_64.c
+++ /dev/null
@@ -1,1214 +0,0 @@
1/*
2 * Copyright (C) 1991, 1992 Linus Torvalds
3 * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
4 *
5 * Pentium III FXSR, SSE support
6 * Gareth Hughes <gareth@valinux.com>, May 2000
7 */
8
9/*
10 * 'Traps.c' handles hardware traps and faults after we have saved some
11 * state in 'entry.S'.
12 */
13#include <linux/moduleparam.h>
14#include <linux/interrupt.h>
15#include <linux/kallsyms.h>
16#include <linux/spinlock.h>
17#include <linux/kprobes.h>
18#include <linux/uaccess.h>
19#include <linux/utsname.h>
20#include <linux/kdebug.h>
21#include <linux/kernel.h>
22#include <linux/module.h>
23#include <linux/ptrace.h>
24#include <linux/string.h>
25#include <linux/unwind.h>
26#include <linux/delay.h>
27#include <linux/errno.h>
28#include <linux/kexec.h>
29#include <linux/sched.h>
30#include <linux/timer.h>
31#include <linux/init.h>
32#include <linux/bug.h>
33#include <linux/nmi.h>
34#include <linux/mm.h>
35#include <linux/smp.h>
36#include <linux/io.h>
37
38#if defined(CONFIG_EDAC)
39#include <linux/edac.h>
40#endif
41
42#include <asm/stacktrace.h>
43#include <asm/processor.h>
44#include <asm/debugreg.h>
45#include <asm/atomic.h>
46#include <asm/system.h>
47#include <asm/unwind.h>
48#include <asm/desc.h>
49#include <asm/i387.h>
50#include <asm/pgalloc.h>
51#include <asm/proto.h>
52#include <asm/pda.h>
53#include <asm/traps.h>
54
55#include <mach_traps.h>
56
57int panic_on_unrecovered_nmi;
58int kstack_depth_to_print = 12;
59static unsigned int code_bytes = 64;
60static int ignore_nmis;
61static int die_counter;
62
63static inline void conditional_sti(struct pt_regs *regs)
64{
65 if (regs->flags & X86_EFLAGS_IF)
66 local_irq_enable();
67}
68
69static inline void preempt_conditional_sti(struct pt_regs *regs)
70{
71 inc_preempt_count();
72 if (regs->flags & X86_EFLAGS_IF)
73 local_irq_enable();
74}
75
76static inline void preempt_conditional_cli(struct pt_regs *regs)
77{
78 if (regs->flags & X86_EFLAGS_IF)
79 local_irq_disable();
80 /* Make sure to not schedule here because we could be running
81 on an exception stack. */
82 dec_preempt_count();
83}
84
85void printk_address(unsigned long address, int reliable)
86{
87 printk(" [<%016lx>] %s%pS\n",
88 address, reliable ? "" : "? ", (void *) address);
89}
90
91static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
92 unsigned *usedp, char **idp)
93{
94 static char ids[][8] = {
95 [DEBUG_STACK - 1] = "#DB",
96 [NMI_STACK - 1] = "NMI",
97 [DOUBLEFAULT_STACK - 1] = "#DF",
98 [STACKFAULT_STACK - 1] = "#SS",
99 [MCE_STACK - 1] = "#MC",
100#if DEBUG_STKSZ > EXCEPTION_STKSZ
101 [N_EXCEPTION_STACKS ...
102 N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]"
103#endif
104 };
105 unsigned k;
106
107 /*
108 * Iterate over all exception stacks, and figure out whether
109 * 'stack' is in one of them:
110 */
111 for (k = 0; k < N_EXCEPTION_STACKS; k++) {
112 unsigned long end = per_cpu(orig_ist, cpu).ist[k];
113 /*
114 * Is 'stack' above this exception frame's end?
115 * If yes then skip to the next frame.
116 */
117 if (stack >= end)
118 continue;
119 /*
120 * Is 'stack' above this exception frame's start address?
121 * If yes then we found the right frame.
122 */
123 if (stack >= end - EXCEPTION_STKSZ) {
124 /*
125 * Make sure we only iterate through an exception
126 * stack once. If it comes up for the second time
127 * then there's something wrong going on - just
128 * break out and return NULL:
129 */
130 if (*usedp & (1U << k))
131 break;
132 *usedp |= 1U << k;
133 *idp = ids[k];
134 return (unsigned long *)end;
135 }
136 /*
137 * If this is a debug stack, and if it has a larger size than
138 * the usual exception stacks, then 'stack' might still
139 * be within the lower portion of the debug stack:
140 */
141#if DEBUG_STKSZ > EXCEPTION_STKSZ
142 if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) {
143 unsigned j = N_EXCEPTION_STACKS - 1;
144
145 /*
146 * Black magic. A large debug stack is composed of
147 * multiple exception stack entries, which we
148 * iterate through now. Dont look:
149 */
150 do {
151 ++j;
152 end -= EXCEPTION_STKSZ;
153 ids[j][4] = '1' + (j - N_EXCEPTION_STACKS);
154 } while (stack < end - EXCEPTION_STKSZ);
155 if (*usedp & (1U << j))
156 break;
157 *usedp |= 1U << j;
158 *idp = ids[j];
159 return (unsigned long *)end;
160 }
161#endif
162 }
163 return NULL;
164}
165
166/*
167 * x86-64 can have up to three kernel stacks:
168 * process stack
169 * interrupt stack
170 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
171 */
172
173static inline int valid_stack_ptr(struct thread_info *tinfo,
174 void *p, unsigned int size, void *end)
175{
176 void *t = tinfo;
177 if (end) {
178 if (p < end && p >= (end-THREAD_SIZE))
179 return 1;
180 else
181 return 0;
182 }
183 return p > t && p < t + THREAD_SIZE - size;
184}
185
186/* The form of the top of the frame on the stack */
187struct stack_frame {
188 struct stack_frame *next_frame;
189 unsigned long return_address;
190};
191
192static inline unsigned long
193print_context_stack(struct thread_info *tinfo,
194 unsigned long *stack, unsigned long bp,
195 const struct stacktrace_ops *ops, void *data,
196 unsigned long *end)
197{
198 struct stack_frame *frame = (struct stack_frame *)bp;
199
200 while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) {
201 unsigned long addr;
202
203 addr = *stack;
204 if (__kernel_text_address(addr)) {
205 if ((unsigned long) stack == bp + 8) {
206 ops->address(data, addr, 1);
207 frame = frame->next_frame;
208 bp = (unsigned long) frame;
209 } else {
210 ops->address(data, addr, bp == 0);
211 }
212 }
213 stack++;
214 }
215 return bp;
216}
217
218void dump_trace(struct task_struct *task, struct pt_regs *regs,
219 unsigned long *stack, unsigned long bp,
220 const struct stacktrace_ops *ops, void *data)
221{
222 const unsigned cpu = get_cpu();
223 unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
224 unsigned used = 0;
225 struct thread_info *tinfo;
226
227 if (!task)
228 task = current;
229
230 if (!stack) {
231 unsigned long dummy;
232 stack = &dummy;
233 if (task && task != current)
234 stack = (unsigned long *)task->thread.sp;
235 }
236
237#ifdef CONFIG_FRAME_POINTER
238 if (!bp) {
239 if (task == current) {
240 /* Grab bp right from our regs */
241 asm("movq %%rbp, %0" : "=r" (bp) : );
242 } else {
243 /* bp is the last reg pushed by switch_to */
244 bp = *(unsigned long *) task->thread.sp;
245 }
246 }
247#endif
248
249 /*
250 * Print function call entries in all stacks, starting at the
251 * current stack address. If the stacks consist of nested
252 * exceptions
253 */
254 tinfo = task_thread_info(task);
255 for (;;) {
256 char *id;
257 unsigned long *estack_end;
258 estack_end = in_exception_stack(cpu, (unsigned long)stack,
259 &used, &id);
260
261 if (estack_end) {
262 if (ops->stack(data, id) < 0)
263 break;
264
265 bp = print_context_stack(tinfo, stack, bp, ops,
266 data, estack_end);
267 ops->stack(data, "<EOE>");
268 /*
269 * We link to the next stack via the
270 * second-to-last pointer (index -2 to end) in the
271 * exception stack:
272 */
273 stack = (unsigned long *) estack_end[-2];
274 continue;
275 }
276 if (irqstack_end) {
277 unsigned long *irqstack;
278 irqstack = irqstack_end -
279 (IRQSTACKSIZE - 64) / sizeof(*irqstack);
280
281 if (stack >= irqstack && stack < irqstack_end) {
282 if (ops->stack(data, "IRQ") < 0)
283 break;
284 bp = print_context_stack(tinfo, stack, bp,
285 ops, data, irqstack_end);
286 /*
287 * We link to the next stack (which would be
288 * the process stack normally) the last
289 * pointer (index -1 to end) in the IRQ stack:
290 */
291 stack = (unsigned long *) (irqstack_end[-1]);
292 irqstack_end = NULL;
293 ops->stack(data, "EOI");
294 continue;
295 }
296 }
297 break;
298 }
299
300 /*
301 * This handles the process stack:
302 */
303 bp = print_context_stack(tinfo, stack, bp, ops, data, NULL);
304 put_cpu();
305}
306EXPORT_SYMBOL(dump_trace);
307
308static void
309print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
310{
311 print_symbol(msg, symbol);
312 printk("\n");
313}
314
315static void print_trace_warning(void *data, char *msg)
316{
317 printk("%s\n", msg);
318}
319
320static int print_trace_stack(void *data, char *name)
321{
322 printk(" <%s> ", name);
323 return 0;
324}
325
326static void print_trace_address(void *data, unsigned long addr, int reliable)
327{
328 touch_nmi_watchdog();
329 printk_address(addr, reliable);
330}
331
332static const struct stacktrace_ops print_trace_ops = {
333 .warning = print_trace_warning,
334 .warning_symbol = print_trace_warning_symbol,
335 .stack = print_trace_stack,
336 .address = print_trace_address,
337};
338
339static void
340show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
341 unsigned long *stack, unsigned long bp, char *log_lvl)
342{
343 printk("Call Trace:\n");
344 dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
345}
346
347void show_trace(struct task_struct *task, struct pt_regs *regs,
348 unsigned long *stack, unsigned long bp)
349{
350 show_trace_log_lvl(task, regs, stack, bp, "");
351}
352
353static void
354show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
355 unsigned long *sp, unsigned long bp, char *log_lvl)
356{
357 unsigned long *stack;
358 int i;
359 const int cpu = smp_processor_id();
360 unsigned long *irqstack_end =
361 (unsigned long *) (cpu_pda(cpu)->irqstackptr);
362 unsigned long *irqstack =
363 (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE);
364
365 /*
366 * debugging aid: "show_stack(NULL, NULL);" prints the
367 * back trace for this cpu.
368 */
369
370 if (sp == NULL) {
371 if (task)
372 sp = (unsigned long *)task->thread.sp;
373 else
374 sp = (unsigned long *)&sp;
375 }
376
377 stack = sp;
378 for (i = 0; i < kstack_depth_to_print; i++) {
379 if (stack >= irqstack && stack <= irqstack_end) {
380 if (stack == irqstack_end) {
381 stack = (unsigned long *) (irqstack_end[-1]);
382 printk(" <EOI> ");
383 }
384 } else {
385 if (((long) stack & (THREAD_SIZE-1)) == 0)
386 break;
387 }
388 if (i && ((i % 4) == 0))
389 printk("\n");
390 printk(" %016lx", *stack++);
391 touch_nmi_watchdog();
392 }
393 printk("\n");
394 show_trace_log_lvl(task, regs, sp, bp, log_lvl);
395}
396
397void show_stack(struct task_struct *task, unsigned long *sp)
398{
399 show_stack_log_lvl(task, NULL, sp, 0, "");
400}
401
402/*
403 * The architecture-independent dump_stack generator
404 */
405void dump_stack(void)
406{
407 unsigned long bp = 0;
408 unsigned long stack;
409
410#ifdef CONFIG_FRAME_POINTER
411 if (!bp)
412 asm("movq %%rbp, %0" : "=r" (bp) : );
413#endif
414
415 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
416 current->pid, current->comm, print_tainted(),
417 init_utsname()->release,
418 (int)strcspn(init_utsname()->version, " "),
419 init_utsname()->version);
420 show_trace(NULL, NULL, &stack, bp);
421}
422EXPORT_SYMBOL(dump_stack);
423
424void show_registers(struct pt_regs *regs)
425{
426 int i;
427 unsigned long sp;
428 const int cpu = smp_processor_id();
429 struct task_struct *cur = cpu_pda(cpu)->pcurrent;
430
431 sp = regs->sp;
432 printk("CPU %d ", cpu);
433 __show_regs(regs);
434 printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
435 cur->comm, cur->pid, task_thread_info(cur), cur);
436
437 /*
438 * When in-kernel, we also print out the stack and code at the
439 * time of the fault..
440 */
441 if (!user_mode(regs)) {
442 unsigned int code_prologue = code_bytes * 43 / 64;
443 unsigned int code_len = code_bytes;
444 unsigned char c;
445 u8 *ip;
446
447 printk("Stack: ");
448 show_stack_log_lvl(NULL, regs, (unsigned long *)sp,
449 regs->bp, "");
450
451 printk(KERN_EMERG "Code: ");
452
453 ip = (u8 *)regs->ip - code_prologue;
454 if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) {
455 /* try starting at RIP */
456 ip = (u8 *)regs->ip;
457 code_len = code_len - code_prologue + 1;
458 }
459 for (i = 0; i < code_len; i++, ip++) {
460 if (ip < (u8 *)PAGE_OFFSET ||
461 probe_kernel_address(ip, c)) {
462 printk(" Bad RIP value.");
463 break;
464 }
465 if (ip == (u8 *)regs->ip)
466 printk("<%02x> ", c);
467 else
468 printk("%02x ", c);
469 }
470 }
471 printk("\n");
472}
473
474int is_valid_bugaddr(unsigned long ip)
475{
476 unsigned short ud2;
477
478 if (__copy_from_user(&ud2, (const void __user *) ip, sizeof(ud2)))
479 return 0;
480
481 return ud2 == 0x0b0f;
482}
483
484static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
485static int die_owner = -1;
486static unsigned int die_nest_count;
487
488unsigned __kprobes long oops_begin(void)
489{
490 int cpu;
491 unsigned long flags;
492
493 oops_enter();
494
495 /* racy, but better than risking deadlock. */
496 raw_local_irq_save(flags);
497 cpu = smp_processor_id();
498 if (!__raw_spin_trylock(&die_lock)) {
499 if (cpu == die_owner)
500 /* nested oops. should stop eventually */;
501 else
502 __raw_spin_lock(&die_lock);
503 }
504 die_nest_count++;
505 die_owner = cpu;
506 console_verbose();
507 bust_spinlocks(1);
508 return flags;
509}
510
511void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
512{
513 die_owner = -1;
514 bust_spinlocks(0);
515 die_nest_count--;
516 if (!die_nest_count)
517 /* Nest count reaches zero, release the lock. */
518 __raw_spin_unlock(&die_lock);
519 raw_local_irq_restore(flags);
520 if (!regs) {
521 oops_exit();
522 return;
523 }
524 if (panic_on_oops)
525 panic("Fatal exception");
526 oops_exit();
527 do_exit(signr);
528}
529
530int __kprobes __die(const char *str, struct pt_regs *regs, long err)
531{
532 printk(KERN_EMERG "%s: %04lx [%u] ", str, err & 0xffff, ++die_counter);
533#ifdef CONFIG_PREEMPT
534 printk("PREEMPT ");
535#endif
536#ifdef CONFIG_SMP
537 printk("SMP ");
538#endif
539#ifdef CONFIG_DEBUG_PAGEALLOC
540 printk("DEBUG_PAGEALLOC");
541#endif
542 printk("\n");
543 if (notify_die(DIE_OOPS, str, regs, err,
544 current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
545 return 1;
546
547 show_registers(regs);
548 add_taint(TAINT_DIE);
549 /* Executive summary in case the oops scrolled away */
550 printk(KERN_ALERT "RIP ");
551 printk_address(regs->ip, 1);
552 printk(" RSP <%016lx>\n", regs->sp);
553 if (kexec_should_crash(current))
554 crash_kexec(regs);
555 return 0;
556}
557
558void die(const char *str, struct pt_regs *regs, long err)
559{
560 unsigned long flags = oops_begin();
561
562 if (!user_mode(regs))
563 report_bug(regs->ip, regs);
564
565 if (__die(str, regs, err))
566 regs = NULL;
567 oops_end(flags, regs, SIGSEGV);
568}
569
570notrace __kprobes void
571die_nmi(char *str, struct pt_regs *regs, int do_panic)
572{
573 unsigned long flags;
574
575 if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
576 return;
577
578 flags = oops_begin();
579 /*
580 * We are in trouble anyway, lets at least try
581 * to get a message out.
582 */
583 printk(KERN_EMERG "%s", str);
584 printk(" on CPU%d, ip %08lx, registers:\n",
585 smp_processor_id(), regs->ip);
586 show_registers(regs);
587 if (kexec_should_crash(current))
588 crash_kexec(regs);
589 if (do_panic || panic_on_oops)
590 panic("Non maskable interrupt");
591 oops_end(flags, NULL, SIGBUS);
592 nmi_exit();
593 local_irq_enable();
594 do_exit(SIGBUS);
595}
596
597static void __kprobes
598do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
599 long error_code, siginfo_t *info)
600{
601 struct task_struct *tsk = current;
602
603 if (!user_mode(regs))
604 goto kernel_trap;
605
606 /*
607 * We want error_code and trap_no set for userspace faults and
608 * kernelspace faults which result in die(), but not
609 * kernelspace faults which are fixed up. die() gives the
610 * process no chance to handle the signal and notice the
611 * kernel fault information, so that won't result in polluting
612 * the information about previously queued, but not yet
613 * delivered, faults. See also do_general_protection below.
614 */
615 tsk->thread.error_code = error_code;
616 tsk->thread.trap_no = trapnr;
617
618 if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
619 printk_ratelimit()) {
620 printk(KERN_INFO
621 "%s[%d] trap %s ip:%lx sp:%lx error:%lx",
622 tsk->comm, tsk->pid, str,
623 regs->ip, regs->sp, error_code);
624 print_vma_addr(" in ", regs->ip);
625 printk("\n");
626 }
627
628 if (info)
629 force_sig_info(signr, info, tsk);
630 else
631 force_sig(signr, tsk);
632 return;
633
634kernel_trap:
635 if (!fixup_exception(regs)) {
636 tsk->thread.error_code = error_code;
637 tsk->thread.trap_no = trapnr;
638 die(str, regs, error_code);
639 }
640 return;
641}
642
643#define DO_ERROR(trapnr, signr, str, name) \
644asmlinkage void do_##name(struct pt_regs *regs, long error_code) \
645{ \
646 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
647 == NOTIFY_STOP) \
648 return; \
649 conditional_sti(regs); \
650 do_trap(trapnr, signr, str, regs, error_code, NULL); \
651}
652
653#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
654asmlinkage void do_##name(struct pt_regs *regs, long error_code) \
655{ \
656 siginfo_t info; \
657 info.si_signo = signr; \
658 info.si_errno = 0; \
659 info.si_code = sicode; \
660 info.si_addr = (void __user *)siaddr; \
661 trace_hardirqs_fixup(); \
662 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
663 == NOTIFY_STOP) \
664 return; \
665 conditional_sti(regs); \
666 do_trap(trapnr, signr, str, regs, error_code, &info); \
667}
668
669DO_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip)
670DO_ERROR(4, SIGSEGV, "overflow", overflow)
671DO_ERROR(5, SIGSEGV, "bounds", bounds)
672DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip)
673DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
674DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
675DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
676DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
677
678/* Runs on IST stack */
679asmlinkage void do_stack_segment(struct pt_regs *regs, long error_code)
680{
681 if (notify_die(DIE_TRAP, "stack segment", regs, error_code,
682 12, SIGBUS) == NOTIFY_STOP)
683 return;
684 preempt_conditional_sti(regs);
685 do_trap(12, SIGBUS, "stack segment", regs, error_code, NULL);
686 preempt_conditional_cli(regs);
687}
688
689asmlinkage void do_double_fault(struct pt_regs *regs, long error_code)
690{
691 static const char str[] = "double fault";
692 struct task_struct *tsk = current;
693
694 /* Return not checked because double check cannot be ignored */
695 notify_die(DIE_TRAP, str, regs, error_code, 8, SIGSEGV);
696
697 tsk->thread.error_code = error_code;
698 tsk->thread.trap_no = 8;
699
700 /* This is always a kernel trap and never fixable (and thus must
701 never return). */
702 for (;;)
703 die(str, regs, error_code);
704}
705
706asmlinkage void __kprobes
707do_general_protection(struct pt_regs *regs, long error_code)
708{
709 struct task_struct *tsk;
710
711 conditional_sti(regs);
712
713 tsk = current;
714 if (!user_mode(regs))
715 goto gp_in_kernel;
716
717 tsk->thread.error_code = error_code;
718 tsk->thread.trap_no = 13;
719
720 if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
721 printk_ratelimit()) {
722 printk(KERN_INFO
723 "%s[%d] general protection ip:%lx sp:%lx error:%lx",
724 tsk->comm, tsk->pid,
725 regs->ip, regs->sp, error_code);
726 print_vma_addr(" in ", regs->ip);
727 printk("\n");
728 }
729
730 force_sig(SIGSEGV, tsk);
731 return;
732
733gp_in_kernel:
734 if (fixup_exception(regs))
735 return;
736
737 tsk->thread.error_code = error_code;
738 tsk->thread.trap_no = 13;
739 if (notify_die(DIE_GPF, "general protection fault", regs,
740 error_code, 13, SIGSEGV) == NOTIFY_STOP)
741 return;
742 die("general protection fault", regs, error_code);
743}
744
745static notrace __kprobes void
746mem_parity_error(unsigned char reason, struct pt_regs *regs)
747{
748 printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
749 reason);
750 printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n");
751
752#if defined(CONFIG_EDAC)
753 if (edac_handler_set()) {
754 edac_atomic_assert_error();
755 return;
756 }
757#endif
758
759 if (panic_on_unrecovered_nmi)
760 panic("NMI: Not continuing");
761
762 printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
763
764 /* Clear and disable the memory parity error line. */
765 reason = (reason & 0xf) | 4;
766 outb(reason, 0x61);
767}
768
769static notrace __kprobes void
770io_check_error(unsigned char reason, struct pt_regs *regs)
771{
772 printk("NMI: IOCK error (debug interrupt?)\n");
773 show_registers(regs);
774
775 /* Re-enable the IOCK line, wait for a few seconds */
776 reason = (reason & 0xf) | 8;
777 outb(reason, 0x61);
778 mdelay(2000);
779 reason &= ~8;
780 outb(reason, 0x61);
781}
782
783static notrace __kprobes void
784unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
785{
786 if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) ==
787 NOTIFY_STOP)
788 return;
789 printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
790 reason);
791 printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
792
793 if (panic_on_unrecovered_nmi)
794 panic("NMI: Not continuing");
795
796 printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
797}
798
799/* Runs on IST stack. This code must keep interrupts off all the time.
800 Nested NMIs are prevented by the CPU. */
801asmlinkage notrace __kprobes void default_do_nmi(struct pt_regs *regs)
802{
803 unsigned char reason = 0;
804 int cpu;
805
806 cpu = smp_processor_id();
807
808 /* Only the BSP gets external NMIs from the system. */
809 if (!cpu)
810 reason = get_nmi_reason();
811
812 if (!(reason & 0xc0)) {
813 if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
814 == NOTIFY_STOP)
815 return;
816 /*
817 * Ok, so this is none of the documented NMI sources,
818 * so it must be the NMI watchdog.
819 */
820 if (nmi_watchdog_tick(regs, reason))
821 return;
822 if (!do_nmi_callback(regs, cpu))
823 unknown_nmi_error(reason, regs);
824
825 return;
826 }
827 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
828 return;
829
830 /* AK: following checks seem to be broken on modern chipsets. FIXME */
831 if (reason & 0x80)
832 mem_parity_error(reason, regs);
833 if (reason & 0x40)
834 io_check_error(reason, regs);
835}
836
837asmlinkage notrace __kprobes void
838do_nmi(struct pt_regs *regs, long error_code)
839{
840 nmi_enter();
841
842 add_pda(__nmi_count, 1);
843
844 if (!ignore_nmis)
845 default_do_nmi(regs);
846
847 nmi_exit();
848}
849
850void stop_nmi(void)
851{
852 acpi_nmi_disable();
853 ignore_nmis++;
854}
855
856void restart_nmi(void)
857{
858 ignore_nmis--;
859 acpi_nmi_enable();
860}
861
862/* runs on IST stack. */
863asmlinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
864{
865 trace_hardirqs_fixup();
866
867 if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
868 == NOTIFY_STOP)
869 return;
870
871 preempt_conditional_sti(regs);
872 do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
873 preempt_conditional_cli(regs);
874}
875
876/* Help handler running on IST stack to switch back to user stack
877 for scheduling or signal handling. The actual stack switch is done in
878 entry.S */
879asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
880{
881 struct pt_regs *regs = eregs;
882 /* Did already sync */
883 if (eregs == (struct pt_regs *)eregs->sp)
884 ;
885 /* Exception from user space */
886 else if (user_mode(eregs))
887 regs = task_pt_regs(current);
888 /* Exception from kernel and interrupts are enabled. Move to
889 kernel process stack. */
890 else if (eregs->flags & X86_EFLAGS_IF)
891 regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs));
892 if (eregs != regs)
893 *regs = *eregs;
894 return regs;
895}
896
897/* runs on IST stack. */
898asmlinkage void __kprobes do_debug(struct pt_regs *regs,
899 unsigned long error_code)
900{
901 struct task_struct *tsk = current;
902 unsigned long condition;
903 siginfo_t info;
904
905 trace_hardirqs_fixup();
906
907 get_debugreg(condition, 6);
908
909 /*
910 * The processor cleared BTF, so don't mark that we need it set.
911 */
912 clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR);
913 tsk->thread.debugctlmsr = 0;
914
915 if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
916 SIGTRAP) == NOTIFY_STOP)
917 return;
918
919 preempt_conditional_sti(regs);
920
921 /* Mask out spurious debug traps due to lazy DR7 setting */
922 if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
923 if (!tsk->thread.debugreg7)
924 goto clear_dr7;
925 }
926
927 tsk->thread.debugreg6 = condition;
928
929 /*
930 * Single-stepping through TF: make sure we ignore any events in
931 * kernel space (but re-enable TF when returning to user mode).
932 */
933 if (condition & DR_STEP) {
934 if (!user_mode(regs))
935 goto clear_TF_reenable;
936 }
937
938 /* Ok, finally something we can handle */
939 tsk->thread.trap_no = 1;
940 tsk->thread.error_code = error_code;
941 info.si_signo = SIGTRAP;
942 info.si_errno = 0;
943 info.si_code = get_si_code(condition);
944 info.si_addr = user_mode(regs) ? (void __user *)regs->ip : NULL;
945 force_sig_info(SIGTRAP, &info, tsk);
946
947clear_dr7:
948 set_debugreg(0, 7);
949 preempt_conditional_cli(regs);
950 return;
951
952clear_TF_reenable:
953 set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
954 regs->flags &= ~X86_EFLAGS_TF;
955 preempt_conditional_cli(regs);
956 return;
957}
958
959static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr)
960{
961 if (fixup_exception(regs))
962 return 1;
963
964 notify_die(DIE_GPF, str, regs, 0, trapnr, SIGFPE);
965 /* Illegal floating point operation in the kernel */
966 current->thread.trap_no = trapnr;
967 die(str, regs, 0);
968 return 0;
969}
970
971/*
972 * Note that we play around with the 'TS' bit in an attempt to get
973 * the correct behaviour even in the presence of the asynchronous
974 * IRQ13 behaviour
975 */
976asmlinkage void do_coprocessor_error(struct pt_regs *regs)
977{
978 void __user *ip = (void __user *)(regs->ip);
979 struct task_struct *task;
980 siginfo_t info;
981 unsigned short cwd, swd;
982
983 conditional_sti(regs);
984 if (!user_mode(regs) &&
985 kernel_math_error(regs, "kernel x87 math error", 16))
986 return;
987
988 /*
989 * Save the info for the exception handler and clear the error.
990 */
991 task = current;
992 save_init_fpu(task);
993 task->thread.trap_no = 16;
994 task->thread.error_code = 0;
995 info.si_signo = SIGFPE;
996 info.si_errno = 0;
997 info.si_code = __SI_FAULT;
998 info.si_addr = ip;
999 /*
1000 * (~cwd & swd) will mask out exceptions that are not set to unmasked
1001 * status. 0x3f is the exception bits in these regs, 0x200 is the
1002 * C1 reg you need in case of a stack fault, 0x040 is the stack
1003 * fault bit. We should only be taking one exception at a time,
1004 * so if this combination doesn't produce any single exception,
1005 * then we have a bad program that isn't synchronizing its FPU usage
1006 * and it will suffer the consequences since we won't be able to
1007 * fully reproduce the context of the exception
1008 */
1009 cwd = get_fpu_cwd(task);
1010 swd = get_fpu_swd(task);
1011 switch (swd & ~cwd & 0x3f) {
1012 case 0x000: /* No unmasked exception */
1013 default: /* Multiple exceptions */
1014 break;
1015 case 0x001: /* Invalid Op */
1016 /*
1017 * swd & 0x240 == 0x040: Stack Underflow
1018 * swd & 0x240 == 0x240: Stack Overflow
1019 * User must clear the SF bit (0x40) if set
1020 */
1021 info.si_code = FPE_FLTINV;
1022 break;
1023 case 0x002: /* Denormalize */
1024 case 0x010: /* Underflow */
1025 info.si_code = FPE_FLTUND;
1026 break;
1027 case 0x004: /* Zero Divide */
1028 info.si_code = FPE_FLTDIV;
1029 break;
1030 case 0x008: /* Overflow */
1031 info.si_code = FPE_FLTOVF;
1032 break;
1033 case 0x020: /* Precision */
1034 info.si_code = FPE_FLTRES;
1035 break;
1036 }
1037 force_sig_info(SIGFPE, &info, task);
1038}
1039
1040asmlinkage void bad_intr(void)
1041{
1042 printk("bad interrupt");
1043}
1044
1045asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs)
1046{
1047 void __user *ip = (void __user *)(regs->ip);
1048 struct task_struct *task;
1049 siginfo_t info;
1050 unsigned short mxcsr;
1051
1052 conditional_sti(regs);
1053 if (!user_mode(regs) &&
1054 kernel_math_error(regs, "kernel simd math error", 19))
1055 return;
1056
1057 /*
1058 * Save the info for the exception handler and clear the error.
1059 */
1060 task = current;
1061 save_init_fpu(task);
1062 task->thread.trap_no = 19;
1063 task->thread.error_code = 0;
1064 info.si_signo = SIGFPE;
1065 info.si_errno = 0;
1066 info.si_code = __SI_FAULT;
1067 info.si_addr = ip;
1068 /*
1069 * The SIMD FPU exceptions are handled a little differently, as there
1070 * is only a single status/control register. Thus, to determine which
1071 * unmasked exception was caught we must mask the exception mask bits
1072 * at 0x1f80, and then use these to mask the exception bits at 0x3f.
1073 */
1074 mxcsr = get_fpu_mxcsr(task);
1075 switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) {
1076 case 0x000:
1077 default:
1078 break;
1079 case 0x001: /* Invalid Op */
1080 info.si_code = FPE_FLTINV;
1081 break;
1082 case 0x002: /* Denormalize */
1083 case 0x010: /* Underflow */
1084 info.si_code = FPE_FLTUND;
1085 break;
1086 case 0x004: /* Zero Divide */
1087 info.si_code = FPE_FLTDIV;
1088 break;
1089 case 0x008: /* Overflow */
1090 info.si_code = FPE_FLTOVF;
1091 break;
1092 case 0x020: /* Precision */
1093 info.si_code = FPE_FLTRES;
1094 break;
1095 }
1096 force_sig_info(SIGFPE, &info, task);
1097}
1098
1099asmlinkage void do_spurious_interrupt_bug(struct pt_regs *regs)
1100{
1101}
1102
1103asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
1104{
1105}
1106
1107asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void)
1108{
1109}
1110
1111/*
1112 * 'math_state_restore()' saves the current math information in the
1113 * old math state array, and gets the new ones from the current task
1114 *
1115 * Careful.. There are problems with IBM-designed IRQ13 behaviour.
1116 * Don't touch unless you *really* know how it works.
1117 */
1118asmlinkage void math_state_restore(void)
1119{
1120 struct task_struct *me = current;
1121
1122 if (!used_math()) {
1123 local_irq_enable();
1124 /*
1125 * does a slab alloc which can sleep
1126 */
1127 if (init_fpu(me)) {
1128 /*
1129 * ran out of memory!
1130 */
1131 do_group_exit(SIGKILL);
1132 return;
1133 }
1134 local_irq_disable();
1135 }
1136
1137 clts(); /* Allow maths ops (or we recurse) */
1138 /*
1139 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
1140 */
1141 if (unlikely(restore_fpu_checking(me))) {
1142 stts();
1143 force_sig(SIGSEGV, me);
1144 return;
1145 }
1146 task_thread_info(me)->status |= TS_USEDFPU;
1147 me->fpu_counter++;
1148}
1149EXPORT_SYMBOL_GPL(math_state_restore);
1150
1151void __init trap_init(void)
1152{
1153 set_intr_gate(0, &divide_error);
1154 set_intr_gate_ist(1, &debug, DEBUG_STACK);
1155 set_intr_gate_ist(2, &nmi, NMI_STACK);
1156 /* int3 can be called from all */
1157 set_system_gate_ist(3, &int3, DEBUG_STACK);
1158 /* int4 can be called from all */
1159 set_system_gate(4, &overflow);
1160 set_intr_gate(5, &bounds);
1161 set_intr_gate(6, &invalid_op);
1162 set_intr_gate(7, &device_not_available);
1163 set_intr_gate_ist(8, &double_fault, DOUBLEFAULT_STACK);
1164 set_intr_gate(9, &coprocessor_segment_overrun);
1165 set_intr_gate(10, &invalid_TSS);
1166 set_intr_gate(11, &segment_not_present);
1167 set_intr_gate_ist(12, &stack_segment, STACKFAULT_STACK);
1168 set_intr_gate(13, &general_protection);
1169 set_intr_gate(14, &page_fault);
1170 set_intr_gate(15, &spurious_interrupt_bug);
1171 set_intr_gate(16, &coprocessor_error);
1172 set_intr_gate(17, &alignment_check);
1173#ifdef CONFIG_X86_MCE
1174 set_intr_gate_ist(18, &machine_check, MCE_STACK);
1175#endif
1176 set_intr_gate(19, &simd_coprocessor_error);
1177
1178#ifdef CONFIG_IA32_EMULATION
1179 set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall);
1180#endif
1181 /*
1182 * Should be a barrier for any external CPU state:
1183 */
1184 cpu_init();
1185}
1186
1187static int __init oops_setup(char *s)
1188{
1189 if (!s)
1190 return -EINVAL;
1191 if (!strcmp(s, "panic"))
1192 panic_on_oops = 1;
1193 return 0;
1194}
1195early_param("oops", oops_setup);
1196
1197static int __init kstack_setup(char *s)
1198{
1199 if (!s)
1200 return -EINVAL;
1201 kstack_depth_to_print = simple_strtoul(s, NULL, 0);
1202 return 0;
1203}
1204early_param("kstack", kstack_setup);
1205
1206static int __init code_bytes_setup(char *s)
1207{
1208 code_bytes = simple_strtoul(s, NULL, 0);
1209 if (code_bytes > 8192)
1210 code_bytes = 8192;
1211
1212 return 1;
1213}
1214__setup("code_bytes=", code_bytes_setup);
diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c
index 520cca0ee04e..6513d41ea21e 100644
--- a/arch/x86/mach-generic/es7000.c
+++ b/arch/x86/mach-generic/es7000.c
@@ -47,16 +47,26 @@ static __init int mps_oem_check(struct mp_config_table *mpc, char *oem,
47/* Hook from generic ACPI tables.c */ 47/* Hook from generic ACPI tables.c */
48static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) 48static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
49{ 49{
50 unsigned long oem_addr; 50 unsigned long oem_addr = 0;
51 int check_dsdt;
52 int ret = 0;
53
54 /* check dsdt at first to avoid clear fix_map for oem_addr */
55 check_dsdt = es7000_check_dsdt();
56
51 if (!find_unisys_acpi_oem_table(&oem_addr)) { 57 if (!find_unisys_acpi_oem_table(&oem_addr)) {
52 if (es7000_check_dsdt()) 58 if (check_dsdt)
53 return parse_unisys_oem((char *)oem_addr); 59 ret = parse_unisys_oem((char *)oem_addr);
54 else { 60 else {
55 setup_unisys(); 61 setup_unisys();
56 return 1; 62 ret = 1;
57 } 63 }
64 /*
65 * we need to unmap it
66 */
67 unmap_unisys_acpi_oem_table(oem_addr);
58 } 68 }
59 return 0; 69 return ret;
60} 70}
61#else 71#else
62static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) 72static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index dfb932dcf136..59f89b434b45 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -13,12 +13,8 @@ obj-$(CONFIG_MMIOTRACE) += mmiotrace.o
13mmiotrace-y := pf_in.o mmio-mod.o 13mmiotrace-y := pf_in.o mmio-mod.o
14obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o 14obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o
15 15
16ifeq ($(CONFIG_X86_32),y) 16obj-$(CONFIG_NUMA) += numa_$(BITS).o
17obj-$(CONFIG_NUMA) += discontig_32.o
18else
19obj-$(CONFIG_NUMA) += numa_64.o
20obj-$(CONFIG_K8_NUMA) += k8topology_64.o 17obj-$(CONFIG_K8_NUMA) += k8topology_64.o
21endif
22obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o 18obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o
23 19
24obj-$(CONFIG_MEMTEST) += memtest.o 20obj-$(CONFIG_MEMTEST) += memtest.o
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index a742d753d5b0..3f2b8962cbd0 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -592,11 +592,6 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
592 unsigned long flags; 592 unsigned long flags;
593#endif 593#endif
594 594
595 /*
596 * We can fault from pretty much anywhere, with unknown IRQ state.
597 */
598 trace_hardirqs_fixup();
599
600 tsk = current; 595 tsk = current;
601 mm = tsk->mm; 596 mm = tsk->mm;
602 prefetchw(&mm->mmap_sem); 597 prefetchw(&mm->mmap_sem);
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 007bb06c7504..4ba373c5b8c8 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -82,7 +82,7 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
82 pte_t pte = gup_get_pte(ptep); 82 pte_t pte = gup_get_pte(ptep);
83 struct page *page; 83 struct page *page;
84 84
85 if ((pte_val(pte) & (mask | _PAGE_SPECIAL)) != mask) { 85 if ((pte_flags(pte) & (mask | _PAGE_SPECIAL)) != mask) {
86 pte_unmap(ptep); 86 pte_unmap(ptep);
87 return 0; 87 return 0;
88 } 88 }
@@ -116,10 +116,10 @@ static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
116 mask = _PAGE_PRESENT|_PAGE_USER; 116 mask = _PAGE_PRESENT|_PAGE_USER;
117 if (write) 117 if (write)
118 mask |= _PAGE_RW; 118 mask |= _PAGE_RW;
119 if ((pte_val(pte) & mask) != mask) 119 if ((pte_flags(pte) & mask) != mask)
120 return 0; 120 return 0;
121 /* hugepages are never "special" */ 121 /* hugepages are never "special" */
122 VM_BUG_ON(pte_val(pte) & _PAGE_SPECIAL); 122 VM_BUG_ON(pte_flags(pte) & _PAGE_SPECIAL);
123 VM_BUG_ON(!pfn_valid(pte_pfn(pte))); 123 VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
124 124
125 refs = 0; 125 refs = 0;
@@ -173,10 +173,10 @@ static noinline int gup_huge_pud(pud_t pud, unsigned long addr,
173 mask = _PAGE_PRESENT|_PAGE_USER; 173 mask = _PAGE_PRESENT|_PAGE_USER;
174 if (write) 174 if (write)
175 mask |= _PAGE_RW; 175 mask |= _PAGE_RW;
176 if ((pte_val(pte) & mask) != mask) 176 if ((pte_flags(pte) & mask) != mask)
177 return 0; 177 return 0;
178 /* hugepages are never "special" */ 178 /* hugepages are never "special" */
179 VM_BUG_ON(pte_val(pte) & _PAGE_SPECIAL); 179 VM_BUG_ON(pte_flags(pte) & _PAGE_SPECIAL);
180 VM_BUG_ON(!pfn_valid(pte_pfn(pte))); 180 VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
181 181
182 refs = 0; 182 refs = 0;
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index bbe044dbe014..8396868e82c5 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -558,7 +558,7 @@ void zap_low_mappings(void)
558 558
559int nx_enabled; 559int nx_enabled;
560 560
561pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL); 561pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL | _PAGE_IOMAP);
562EXPORT_SYMBOL_GPL(__supported_pte_mask); 562EXPORT_SYMBOL_GPL(__supported_pte_mask);
563 563
564#ifdef CONFIG_X86_PAE 564#ifdef CONFIG_X86_PAE
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 3e10054c5731..b8e461d49412 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -89,7 +89,7 @@ early_param("gbpages", parse_direct_gbpages_on);
89 89
90int after_bootmem; 90int after_bootmem;
91 91
92unsigned long __supported_pte_mask __read_mostly = ~0UL; 92pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP;
93EXPORT_SYMBOL_GPL(__supported_pte_mask); 93EXPORT_SYMBOL_GPL(__supported_pte_mask);
94 94
95static int do_not_nx __cpuinitdata; 95static int do_not_nx __cpuinitdata;
@@ -196,9 +196,6 @@ set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte)
196 } 196 }
197 197
198 pte = pte_offset_kernel(pmd, vaddr); 198 pte = pte_offset_kernel(pmd, vaddr);
199 if (!pte_none(*pte) && pte_val(new_pte) &&
200 pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask))
201 pte_ERROR(*pte);
202 set_pte(pte, new_pte); 199 set_pte(pte, new_pte);
203 200
204 /* 201 /*
@@ -313,7 +310,7 @@ static __ref void *alloc_low_page(unsigned long *phys)
313 if (pfn >= table_top) 310 if (pfn >= table_top)
314 panic("alloc_low_page: ran out of memory"); 311 panic("alloc_low_page: ran out of memory");
315 312
316 adr = early_ioremap(pfn * PAGE_SIZE, PAGE_SIZE); 313 adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE);
317 memset(adr, 0, PAGE_SIZE); 314 memset(adr, 0, PAGE_SIZE);
318 *phys = pfn * PAGE_SIZE; 315 *phys = pfn * PAGE_SIZE;
319 return adr; 316 return adr;
@@ -749,7 +746,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
749 old_start = mr[i].start; 746 old_start = mr[i].start;
750 memmove(&mr[i], &mr[i+1], 747 memmove(&mr[i], &mr[i+1],
751 (nr_range - 1 - i) * sizeof (struct map_range)); 748 (nr_range - 1 - i) * sizeof (struct map_range));
752 mr[i].start = old_start; 749 mr[i--].start = old_start;
753 nr_range--; 750 nr_range--;
754 } 751 }
755 752
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 8cbeda15cd29..e4c43ec71b29 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -45,6 +45,27 @@ unsigned long __phys_addr(unsigned long x)
45} 45}
46EXPORT_SYMBOL(__phys_addr); 46EXPORT_SYMBOL(__phys_addr);
47 47
48bool __virt_addr_valid(unsigned long x)
49{
50 if (x >= __START_KERNEL_map) {
51 x -= __START_KERNEL_map;
52 if (x >= KERNEL_IMAGE_SIZE)
53 return false;
54 x += phys_base;
55 } else {
56 if (x < PAGE_OFFSET)
57 return false;
58 x -= PAGE_OFFSET;
59 if (system_state == SYSTEM_BOOTING ?
60 x > MAXMEM : !phys_addr_valid(x)) {
61 return false;
62 }
63 }
64
65 return pfn_valid(x >> PAGE_SHIFT);
66}
67EXPORT_SYMBOL(__virt_addr_valid);
68
48#else 69#else
49 70
50static inline int phys_addr_valid(unsigned long addr) 71static inline int phys_addr_valid(unsigned long addr)
@@ -56,13 +77,24 @@ static inline int phys_addr_valid(unsigned long addr)
56unsigned long __phys_addr(unsigned long x) 77unsigned long __phys_addr(unsigned long x)
57{ 78{
58 /* VMALLOC_* aren't constants; not available at the boot time */ 79 /* VMALLOC_* aren't constants; not available at the boot time */
59 VIRTUAL_BUG_ON(x < PAGE_OFFSET || (system_state != SYSTEM_BOOTING && 80 VIRTUAL_BUG_ON(x < PAGE_OFFSET);
60 is_vmalloc_addr((void *)x))); 81 VIRTUAL_BUG_ON(system_state != SYSTEM_BOOTING &&
82 is_vmalloc_addr((void *) x));
61 return x - PAGE_OFFSET; 83 return x - PAGE_OFFSET;
62} 84}
63EXPORT_SYMBOL(__phys_addr); 85EXPORT_SYMBOL(__phys_addr);
64#endif 86#endif
65 87
88bool __virt_addr_valid(unsigned long x)
89{
90 if (x < PAGE_OFFSET)
91 return false;
92 if (system_state != SYSTEM_BOOTING && is_vmalloc_addr((void *) x))
93 return false;
94 return pfn_valid((x - PAGE_OFFSET) >> PAGE_SHIFT);
95}
96EXPORT_SYMBOL(__virt_addr_valid);
97
66#endif 98#endif
67 99
68int page_is_ram(unsigned long pagenr) 100int page_is_ram(unsigned long pagenr)
@@ -242,16 +274,16 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
242 switch (prot_val) { 274 switch (prot_val) {
243 case _PAGE_CACHE_UC: 275 case _PAGE_CACHE_UC:
244 default: 276 default:
245 prot = PAGE_KERNEL_NOCACHE; 277 prot = PAGE_KERNEL_IO_NOCACHE;
246 break; 278 break;
247 case _PAGE_CACHE_UC_MINUS: 279 case _PAGE_CACHE_UC_MINUS:
248 prot = PAGE_KERNEL_UC_MINUS; 280 prot = PAGE_KERNEL_IO_UC_MINUS;
249 break; 281 break;
250 case _PAGE_CACHE_WC: 282 case _PAGE_CACHE_WC:
251 prot = PAGE_KERNEL_WC; 283 prot = PAGE_KERNEL_IO_WC;
252 break; 284 break;
253 case _PAGE_CACHE_WB: 285 case _PAGE_CACHE_WB:
254 prot = PAGE_KERNEL; 286 prot = PAGE_KERNEL_IO;
255 break; 287 break;
256 } 288 }
257 289
@@ -568,12 +600,12 @@ static void __init __early_set_fixmap(enum fixed_addresses idx,
568} 600}
569 601
570static inline void __init early_set_fixmap(enum fixed_addresses idx, 602static inline void __init early_set_fixmap(enum fixed_addresses idx,
571 unsigned long phys) 603 unsigned long phys, pgprot_t prot)
572{ 604{
573 if (after_paging_init) 605 if (after_paging_init)
574 set_fixmap(idx, phys); 606 __set_fixmap(idx, phys, prot);
575 else 607 else
576 __early_set_fixmap(idx, phys, PAGE_KERNEL); 608 __early_set_fixmap(idx, phys, prot);
577} 609}
578 610
579static inline void __init early_clear_fixmap(enum fixed_addresses idx) 611static inline void __init early_clear_fixmap(enum fixed_addresses idx)
@@ -584,16 +616,22 @@ static inline void __init early_clear_fixmap(enum fixed_addresses idx)
584 __early_set_fixmap(idx, 0, __pgprot(0)); 616 __early_set_fixmap(idx, 0, __pgprot(0));
585} 617}
586 618
587 619static void *prev_map[FIX_BTMAPS_SLOTS] __initdata;
588static int __initdata early_ioremap_nested; 620static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata;
589
590static int __init check_early_ioremap_leak(void) 621static int __init check_early_ioremap_leak(void)
591{ 622{
592 if (!early_ioremap_nested) 623 int count = 0;
624 int i;
625
626 for (i = 0; i < FIX_BTMAPS_SLOTS; i++)
627 if (prev_map[i])
628 count++;
629
630 if (!count)
593 return 0; 631 return 0;
594 WARN(1, KERN_WARNING 632 WARN(1, KERN_WARNING
595 "Debug warning: early ioremap leak of %d areas detected.\n", 633 "Debug warning: early ioremap leak of %d areas detected.\n",
596 early_ioremap_nested); 634 count);
597 printk(KERN_WARNING 635 printk(KERN_WARNING
598 "please boot with early_ioremap_debug and report the dmesg.\n"); 636 "please boot with early_ioremap_debug and report the dmesg.\n");
599 637
@@ -601,18 +639,33 @@ static int __init check_early_ioremap_leak(void)
601} 639}
602late_initcall(check_early_ioremap_leak); 640late_initcall(check_early_ioremap_leak);
603 641
604void __init *early_ioremap(unsigned long phys_addr, unsigned long size) 642static void __init *__early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot)
605{ 643{
606 unsigned long offset, last_addr; 644 unsigned long offset, last_addr;
607 unsigned int nrpages, nesting; 645 unsigned int nrpages;
608 enum fixed_addresses idx0, idx; 646 enum fixed_addresses idx0, idx;
647 int i, slot;
609 648
610 WARN_ON(system_state != SYSTEM_BOOTING); 649 WARN_ON(system_state != SYSTEM_BOOTING);
611 650
612 nesting = early_ioremap_nested; 651 slot = -1;
652 for (i = 0; i < FIX_BTMAPS_SLOTS; i++) {
653 if (!prev_map[i]) {
654 slot = i;
655 break;
656 }
657 }
658
659 if (slot < 0) {
660 printk(KERN_INFO "early_iomap(%08lx, %08lx) not found slot\n",
661 phys_addr, size);
662 WARN_ON(1);
663 return NULL;
664 }
665
613 if (early_ioremap_debug) { 666 if (early_ioremap_debug) {
614 printk(KERN_INFO "early_ioremap(%08lx, %08lx) [%d] => ", 667 printk(KERN_INFO "early_ioremap(%08lx, %08lx) [%d] => ",
615 phys_addr, size, nesting); 668 phys_addr, size, slot);
616 dump_stack(); 669 dump_stack();
617 } 670 }
618 671
@@ -623,11 +676,7 @@ void __init *early_ioremap(unsigned long phys_addr, unsigned long size)
623 return NULL; 676 return NULL;
624 } 677 }
625 678
626 if (nesting >= FIX_BTMAPS_NESTING) { 679 prev_size[slot] = size;
627 WARN_ON(1);
628 return NULL;
629 }
630 early_ioremap_nested++;
631 /* 680 /*
632 * Mappings have to be page-aligned 681 * Mappings have to be page-aligned
633 */ 682 */
@@ -647,10 +696,10 @@ void __init *early_ioremap(unsigned long phys_addr, unsigned long size)
647 /* 696 /*
648 * Ok, go for it.. 697 * Ok, go for it..
649 */ 698 */
650 idx0 = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*nesting; 699 idx0 = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot;
651 idx = idx0; 700 idx = idx0;
652 while (nrpages > 0) { 701 while (nrpages > 0) {
653 early_set_fixmap(idx, phys_addr); 702 early_set_fixmap(idx, phys_addr, prot);
654 phys_addr += PAGE_SIZE; 703 phys_addr += PAGE_SIZE;
655 --idx; 704 --idx;
656 --nrpages; 705 --nrpages;
@@ -658,7 +707,20 @@ void __init *early_ioremap(unsigned long phys_addr, unsigned long size)
658 if (early_ioremap_debug) 707 if (early_ioremap_debug)
659 printk(KERN_CONT "%08lx + %08lx\n", offset, fix_to_virt(idx0)); 708 printk(KERN_CONT "%08lx + %08lx\n", offset, fix_to_virt(idx0));
660 709
661 return (void *) (offset + fix_to_virt(idx0)); 710 prev_map[slot] = (void *) (offset + fix_to_virt(idx0));
711 return prev_map[slot];
712}
713
714/* Remap an IO device */
715void __init *early_ioremap(unsigned long phys_addr, unsigned long size)
716{
717 return __early_ioremap(phys_addr, size, PAGE_KERNEL_IO);
718}
719
720/* Remap memory */
721void __init *early_memremap(unsigned long phys_addr, unsigned long size)
722{
723 return __early_ioremap(phys_addr, size, PAGE_KERNEL);
662} 724}
663 725
664void __init early_iounmap(void *addr, unsigned long size) 726void __init early_iounmap(void *addr, unsigned long size)
@@ -667,15 +729,33 @@ void __init early_iounmap(void *addr, unsigned long size)
667 unsigned long offset; 729 unsigned long offset;
668 unsigned int nrpages; 730 unsigned int nrpages;
669 enum fixed_addresses idx; 731 enum fixed_addresses idx;
670 int nesting; 732 int i, slot;
733
734 slot = -1;
735 for (i = 0; i < FIX_BTMAPS_SLOTS; i++) {
736 if (prev_map[i] == addr) {
737 slot = i;
738 break;
739 }
740 }
671 741
672 nesting = --early_ioremap_nested; 742 if (slot < 0) {
673 if (WARN_ON(nesting < 0)) 743 printk(KERN_INFO "early_iounmap(%p, %08lx) not found slot\n",
744 addr, size);
745 WARN_ON(1);
746 return;
747 }
748
749 if (prev_size[slot] != size) {
750 printk(KERN_INFO "early_iounmap(%p, %08lx) [%d] size not consistent %08lx\n",
751 addr, size, slot, prev_size[slot]);
752 WARN_ON(1);
674 return; 753 return;
754 }
675 755
676 if (early_ioremap_debug) { 756 if (early_ioremap_debug) {
677 printk(KERN_INFO "early_iounmap(%p, %08lx) [%d]\n", addr, 757 printk(KERN_INFO "early_iounmap(%p, %08lx) [%d]\n", addr,
678 size, nesting); 758 size, slot);
679 dump_stack(); 759 dump_stack();
680 } 760 }
681 761
@@ -687,12 +767,13 @@ void __init early_iounmap(void *addr, unsigned long size)
687 offset = virt_addr & ~PAGE_MASK; 767 offset = virt_addr & ~PAGE_MASK;
688 nrpages = PAGE_ALIGN(offset + size - 1) >> PAGE_SHIFT; 768 nrpages = PAGE_ALIGN(offset + size - 1) >> PAGE_SHIFT;
689 769
690 idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*nesting; 770 idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot;
691 while (nrpages > 0) { 771 while (nrpages > 0) {
692 early_clear_fixmap(idx); 772 early_clear_fixmap(idx);
693 --idx; 773 --idx;
694 --nrpages; 774 --nrpages;
695 } 775 }
776 prev_map[slot] = 0;
696} 777}
697 778
698void __this_fixmap_does_not_exist(void) 779void __this_fixmap_does_not_exist(void)
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/numa_32.c
index 847c164725f4..847c164725f4 100644
--- a/arch/x86/mm/discontig_32.c
+++ b/arch/x86/mm/numa_32.c
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 1b4763e26ea9..51c0a2fc14fe 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -138,7 +138,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
138 return; 138 return;
139 } 139 }
140 140
141 if (is_uv_system()) 141 if (get_uv_system_type() >= UV_X2APIC)
142 apic_id = (pa->apic_id << 8) | pa->local_sapic_eid; 142 apic_id = (pa->apic_id << 8) | pa->local_sapic_eid;
143 else 143 else
144 apic_id = pa->apic_id; 144 apic_id = pa->apic_id;
diff --git a/arch/x86/oprofile/Makefile b/arch/x86/oprofile/Makefile
index 30f3eb366667..446902b2a6b6 100644
--- a/arch/x86/oprofile/Makefile
+++ b/arch/x86/oprofile/Makefile
@@ -7,6 +7,6 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
7 timer_int.o ) 7 timer_int.o )
8 8
9oprofile-y := $(DRIVER_OBJS) init.o backtrace.o 9oprofile-y := $(DRIVER_OBJS) init.o backtrace.o
10oprofile-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_athlon.o \ 10oprofile-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_amd.o \
11 op_model_ppro.o op_model_p4.o 11 op_model_ppro.o op_model_p4.o
12oprofile-$(CONFIG_X86_IO_APIC) += nmi_timer_int.o 12oprofile-$(CONFIG_X86_IO_APIC) += nmi_timer_int.o
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 8a5f1614a3d5..57f6c9088081 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -1,10 +1,11 @@
1/** 1/**
2 * @file nmi_int.c 2 * @file nmi_int.c
3 * 3 *
4 * @remark Copyright 2002 OProfile authors 4 * @remark Copyright 2002-2008 OProfile authors
5 * @remark Read the file COPYING 5 * @remark Read the file COPYING
6 * 6 *
7 * @author John Levon <levon@movementarian.org> 7 * @author John Levon <levon@movementarian.org>
8 * @author Robert Richter <robert.richter@amd.com>
8 */ 9 */
9 10
10#include <linux/init.h> 11#include <linux/init.h>
@@ -439,6 +440,7 @@ int __init op_nmi_init(struct oprofile_operations *ops)
439 __u8 vendor = boot_cpu_data.x86_vendor; 440 __u8 vendor = boot_cpu_data.x86_vendor;
440 __u8 family = boot_cpu_data.x86; 441 __u8 family = boot_cpu_data.x86;
441 char *cpu_type; 442 char *cpu_type;
443 int ret = 0;
442 444
443 if (!cpu_has_apic) 445 if (!cpu_has_apic)
444 return -ENODEV; 446 return -ENODEV;
@@ -451,19 +453,23 @@ int __init op_nmi_init(struct oprofile_operations *ops)
451 default: 453 default:
452 return -ENODEV; 454 return -ENODEV;
453 case 6: 455 case 6:
454 model = &op_athlon_spec; 456 model = &op_amd_spec;
455 cpu_type = "i386/athlon"; 457 cpu_type = "i386/athlon";
456 break; 458 break;
457 case 0xf: 459 case 0xf:
458 model = &op_athlon_spec; 460 model = &op_amd_spec;
459 /* Actually it could be i386/hammer too, but give 461 /* Actually it could be i386/hammer too, but give
460 user space an consistent name. */ 462 user space an consistent name. */
461 cpu_type = "x86-64/hammer"; 463 cpu_type = "x86-64/hammer";
462 break; 464 break;
463 case 0x10: 465 case 0x10:
464 model = &op_athlon_spec; 466 model = &op_amd_spec;
465 cpu_type = "x86-64/family10"; 467 cpu_type = "x86-64/family10";
466 break; 468 break;
469 case 0x11:
470 model = &op_amd_spec;
471 cpu_type = "x86-64/family11h";
472 break;
467 } 473 }
468 break; 474 break;
469 475
@@ -490,17 +496,24 @@ int __init op_nmi_init(struct oprofile_operations *ops)
490 return -ENODEV; 496 return -ENODEV;
491 } 497 }
492 498
493 init_sysfs();
494#ifdef CONFIG_SMP 499#ifdef CONFIG_SMP
495 register_cpu_notifier(&oprofile_cpu_nb); 500 register_cpu_notifier(&oprofile_cpu_nb);
496#endif 501#endif
497 using_nmi = 1; 502 /* default values, can be overwritten by model */
498 ops->create_files = nmi_create_files; 503 ops->create_files = nmi_create_files;
499 ops->setup = nmi_setup; 504 ops->setup = nmi_setup;
500 ops->shutdown = nmi_shutdown; 505 ops->shutdown = nmi_shutdown;
501 ops->start = nmi_start; 506 ops->start = nmi_start;
502 ops->stop = nmi_stop; 507 ops->stop = nmi_stop;
503 ops->cpu_type = cpu_type; 508 ops->cpu_type = cpu_type;
509
510 if (model->init)
511 ret = model->init(ops);
512 if (ret)
513 return ret;
514
515 init_sysfs();
516 using_nmi = 1;
504 printk(KERN_INFO "oprofile: using NMI interrupt.\n"); 517 printk(KERN_INFO "oprofile: using NMI interrupt.\n");
505 return 0; 518 return 0;
506} 519}
@@ -513,4 +526,6 @@ void op_nmi_exit(void)
513 unregister_cpu_notifier(&oprofile_cpu_nb); 526 unregister_cpu_notifier(&oprofile_cpu_nb);
514#endif 527#endif
515 } 528 }
529 if (model->exit)
530 model->exit();
516} 531}
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
new file mode 100644
index 000000000000..d9faf607b3a6
--- /dev/null
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -0,0 +1,543 @@
1/*
2 * @file op_model_amd.c
3 * athlon / K7 / K8 / Family 10h model-specific MSR operations
4 *
5 * @remark Copyright 2002-2008 OProfile authors
6 * @remark Read the file COPYING
7 *
8 * @author John Levon
9 * @author Philippe Elie
10 * @author Graydon Hoare
11 * @author Robert Richter <robert.richter@amd.com>
12 * @author Barry Kasindorf
13*/
14
15#include <linux/oprofile.h>
16#include <linux/device.h>
17#include <linux/pci.h>
18
19#include <asm/ptrace.h>
20#include <asm/msr.h>
21#include <asm/nmi.h>
22
23#include "op_x86_model.h"
24#include "op_counter.h"
25
26#define NUM_COUNTERS 4
27#define NUM_CONTROLS 4
28
29#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0)
30#define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0)
31#define CTR_WRITE(l, msrs, c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1); } while (0)
32#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
33
34#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0)
35#define CTRL_READ(l, h, msrs, c) do {rdmsr(msrs->controls[(c)].addr, (l), (h)); } while (0)
36#define CTRL_WRITE(l, h, msrs, c) do {wrmsr(msrs->controls[(c)].addr, (l), (h)); } while (0)
37#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
38#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
39#define CTRL_CLEAR_LO(x) (x &= (1<<21))
40#define CTRL_CLEAR_HI(x) (x &= 0xfffffcf0)
41#define CTRL_SET_ENABLE(val) (val |= 1<<20)
42#define CTRL_SET_USR(val, u) (val |= ((u & 1) << 16))
43#define CTRL_SET_KERN(val, k) (val |= ((k & 1) << 17))
44#define CTRL_SET_UM(val, m) (val |= (m << 8))
45#define CTRL_SET_EVENT_LOW(val, e) (val |= (e & 0xff))
46#define CTRL_SET_EVENT_HIGH(val, e) (val |= ((e >> 8) & 0xf))
47#define CTRL_SET_HOST_ONLY(val, h) (val |= ((h & 1) << 9))
48#define CTRL_SET_GUEST_ONLY(val, h) (val |= ((h & 1) << 8))
49
50static unsigned long reset_value[NUM_COUNTERS];
51
52#ifdef CONFIG_OPROFILE_IBS
53
54/* IbsFetchCtl bits/masks */
55#define IBS_FETCH_HIGH_VALID_BIT (1UL << 17) /* bit 49 */
56#define IBS_FETCH_HIGH_ENABLE (1UL << 16) /* bit 48 */
57#define IBS_FETCH_LOW_MAX_CNT_MASK 0x0000FFFFUL /* MaxCnt mask */
58
59/*IbsOpCtl bits */
60#define IBS_OP_LOW_VALID_BIT (1ULL<<18) /* bit 18 */
61#define IBS_OP_LOW_ENABLE (1ULL<<17) /* bit 17 */
62
63/* Codes used in cpu_buffer.c */
64/* This produces duplicate code, need to be fixed */
65#define IBS_FETCH_BEGIN 3
66#define IBS_OP_BEGIN 4
67
68/* The function interface needs to be fixed, something like add
69 data. Should then be added to linux/oprofile.h. */
70extern void oprofile_add_ibs_sample(struct pt_regs *const regs,
71 unsigned int * const ibs_sample, u8 code);
72
73struct ibs_fetch_sample {
74 /* MSRC001_1031 IBS Fetch Linear Address Register */
75 unsigned int ibs_fetch_lin_addr_low;
76 unsigned int ibs_fetch_lin_addr_high;
77 /* MSRC001_1030 IBS Fetch Control Register */
78 unsigned int ibs_fetch_ctl_low;
79 unsigned int ibs_fetch_ctl_high;
80 /* MSRC001_1032 IBS Fetch Physical Address Register */
81 unsigned int ibs_fetch_phys_addr_low;
82 unsigned int ibs_fetch_phys_addr_high;
83};
84
85struct ibs_op_sample {
86 /* MSRC001_1034 IBS Op Logical Address Register (IbsRIP) */
87 unsigned int ibs_op_rip_low;
88 unsigned int ibs_op_rip_high;
89 /* MSRC001_1035 IBS Op Data Register */
90 unsigned int ibs_op_data1_low;
91 unsigned int ibs_op_data1_high;
92 /* MSRC001_1036 IBS Op Data 2 Register */
93 unsigned int ibs_op_data2_low;
94 unsigned int ibs_op_data2_high;
95 /* MSRC001_1037 IBS Op Data 3 Register */
96 unsigned int ibs_op_data3_low;
97 unsigned int ibs_op_data3_high;
98 /* MSRC001_1038 IBS DC Linear Address Register (IbsDcLinAd) */
99 unsigned int ibs_dc_linear_low;
100 unsigned int ibs_dc_linear_high;
101 /* MSRC001_1039 IBS DC Physical Address Register (IbsDcPhysAd) */
102 unsigned int ibs_dc_phys_low;
103 unsigned int ibs_dc_phys_high;
104};
105
106/*
107 * unitialize the APIC for the IBS interrupts if needed on AMD Family10h+
108*/
109static void clear_ibs_nmi(void);
110
111static int ibs_allowed; /* AMD Family10h and later */
112
113struct op_ibs_config {
114 unsigned long op_enabled;
115 unsigned long fetch_enabled;
116 unsigned long max_cnt_fetch;
117 unsigned long max_cnt_op;
118 unsigned long rand_en;
119 unsigned long dispatched_ops;
120};
121
122static struct op_ibs_config ibs_config;
123
124#endif
125
126/* functions for op_amd_spec */
127
128static void op_amd_fill_in_addresses(struct op_msrs * const msrs)
129{
130 int i;
131
132 for (i = 0; i < NUM_COUNTERS; i++) {
133 if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
134 msrs->counters[i].addr = MSR_K7_PERFCTR0 + i;
135 else
136 msrs->counters[i].addr = 0;
137 }
138
139 for (i = 0; i < NUM_CONTROLS; i++) {
140 if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i))
141 msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i;
142 else
143 msrs->controls[i].addr = 0;
144 }
145}
146
147
148static void op_amd_setup_ctrs(struct op_msrs const * const msrs)
149{
150 unsigned int low, high;
151 int i;
152
153 /* clear all counters */
154 for (i = 0 ; i < NUM_CONTROLS; ++i) {
155 if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
156 continue;
157 CTRL_READ(low, high, msrs, i);
158 CTRL_CLEAR_LO(low);
159 CTRL_CLEAR_HI(high);
160 CTRL_WRITE(low, high, msrs, i);
161 }
162
163 /* avoid a false detection of ctr overflows in NMI handler */
164 for (i = 0; i < NUM_COUNTERS; ++i) {
165 if (unlikely(!CTR_IS_RESERVED(msrs, i)))
166 continue;
167 CTR_WRITE(1, msrs, i);
168 }
169
170 /* enable active counters */
171 for (i = 0; i < NUM_COUNTERS; ++i) {
172 if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) {
173 reset_value[i] = counter_config[i].count;
174
175 CTR_WRITE(counter_config[i].count, msrs, i);
176
177 CTRL_READ(low, high, msrs, i);
178 CTRL_CLEAR_LO(low);
179 CTRL_CLEAR_HI(high);
180 CTRL_SET_ENABLE(low);
181 CTRL_SET_USR(low, counter_config[i].user);
182 CTRL_SET_KERN(low, counter_config[i].kernel);
183 CTRL_SET_UM(low, counter_config[i].unit_mask);
184 CTRL_SET_EVENT_LOW(low, counter_config[i].event);
185 CTRL_SET_EVENT_HIGH(high, counter_config[i].event);
186 CTRL_SET_HOST_ONLY(high, 0);
187 CTRL_SET_GUEST_ONLY(high, 0);
188
189 CTRL_WRITE(low, high, msrs, i);
190 } else {
191 reset_value[i] = 0;
192 }
193 }
194}
195
196#ifdef CONFIG_OPROFILE_IBS
197
198static inline int
199op_amd_handle_ibs(struct pt_regs * const regs,
200 struct op_msrs const * const msrs)
201{
202 unsigned int low, high;
203 struct ibs_fetch_sample ibs_fetch;
204 struct ibs_op_sample ibs_op;
205
206 if (!ibs_allowed)
207 return 1;
208
209 if (ibs_config.fetch_enabled) {
210 rdmsr(MSR_AMD64_IBSFETCHCTL, low, high);
211 if (high & IBS_FETCH_HIGH_VALID_BIT) {
212 ibs_fetch.ibs_fetch_ctl_high = high;
213 ibs_fetch.ibs_fetch_ctl_low = low;
214 rdmsr(MSR_AMD64_IBSFETCHLINAD, low, high);
215 ibs_fetch.ibs_fetch_lin_addr_high = high;
216 ibs_fetch.ibs_fetch_lin_addr_low = low;
217 rdmsr(MSR_AMD64_IBSFETCHPHYSAD, low, high);
218 ibs_fetch.ibs_fetch_phys_addr_high = high;
219 ibs_fetch.ibs_fetch_phys_addr_low = low;
220
221 oprofile_add_ibs_sample(regs,
222 (unsigned int *)&ibs_fetch,
223 IBS_FETCH_BEGIN);
224
225 /*reenable the IRQ */
226 rdmsr(MSR_AMD64_IBSFETCHCTL, low, high);
227 high &= ~IBS_FETCH_HIGH_VALID_BIT;
228 high |= IBS_FETCH_HIGH_ENABLE;
229 low &= IBS_FETCH_LOW_MAX_CNT_MASK;
230 wrmsr(MSR_AMD64_IBSFETCHCTL, low, high);
231 }
232 }
233
234 if (ibs_config.op_enabled) {
235 rdmsr(MSR_AMD64_IBSOPCTL, low, high);
236 if (low & IBS_OP_LOW_VALID_BIT) {
237 rdmsr(MSR_AMD64_IBSOPRIP, low, high);
238 ibs_op.ibs_op_rip_low = low;
239 ibs_op.ibs_op_rip_high = high;
240 rdmsr(MSR_AMD64_IBSOPDATA, low, high);
241 ibs_op.ibs_op_data1_low = low;
242 ibs_op.ibs_op_data1_high = high;
243 rdmsr(MSR_AMD64_IBSOPDATA2, low, high);
244 ibs_op.ibs_op_data2_low = low;
245 ibs_op.ibs_op_data2_high = high;
246 rdmsr(MSR_AMD64_IBSOPDATA3, low, high);
247 ibs_op.ibs_op_data3_low = low;
248 ibs_op.ibs_op_data3_high = high;
249 rdmsr(MSR_AMD64_IBSDCLINAD, low, high);
250 ibs_op.ibs_dc_linear_low = low;
251 ibs_op.ibs_dc_linear_high = high;
252 rdmsr(MSR_AMD64_IBSDCPHYSAD, low, high);
253 ibs_op.ibs_dc_phys_low = low;
254 ibs_op.ibs_dc_phys_high = high;
255
256 /* reenable the IRQ */
257 oprofile_add_ibs_sample(regs,
258 (unsigned int *)&ibs_op,
259 IBS_OP_BEGIN);
260 rdmsr(MSR_AMD64_IBSOPCTL, low, high);
261 high = 0;
262 low &= ~IBS_OP_LOW_VALID_BIT;
263 low |= IBS_OP_LOW_ENABLE;
264 wrmsr(MSR_AMD64_IBSOPCTL, low, high);
265 }
266 }
267
268 return 1;
269}
270
271#endif
272
273static int op_amd_check_ctrs(struct pt_regs * const regs,
274 struct op_msrs const * const msrs)
275{
276 unsigned int low, high;
277 int i;
278
279 for (i = 0 ; i < NUM_COUNTERS; ++i) {
280 if (!reset_value[i])
281 continue;
282 CTR_READ(low, high, msrs, i);
283 if (CTR_OVERFLOWED(low)) {
284 oprofile_add_sample(regs, i);
285 CTR_WRITE(reset_value[i], msrs, i);
286 }
287 }
288
289#ifdef CONFIG_OPROFILE_IBS
290 op_amd_handle_ibs(regs, msrs);
291#endif
292
293 /* See op_model_ppro.c */
294 return 1;
295}
296
297static void op_amd_start(struct op_msrs const * const msrs)
298{
299 unsigned int low, high;
300 int i;
301 for (i = 0 ; i < NUM_COUNTERS ; ++i) {
302 if (reset_value[i]) {
303 CTRL_READ(low, high, msrs, i);
304 CTRL_SET_ACTIVE(low);
305 CTRL_WRITE(low, high, msrs, i);
306 }
307 }
308
309#ifdef CONFIG_OPROFILE_IBS
310 if (ibs_allowed && ibs_config.fetch_enabled) {
311 low = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF;
312 high = IBS_FETCH_HIGH_ENABLE;
313 wrmsr(MSR_AMD64_IBSFETCHCTL, low, high);
314 }
315
316 if (ibs_allowed && ibs_config.op_enabled) {
317 low = ((ibs_config.max_cnt_op >> 4) & 0xFFFF) + IBS_OP_LOW_ENABLE;
318 high = 0;
319 wrmsr(MSR_AMD64_IBSOPCTL, low, high);
320 }
321#endif
322}
323
324
325static void op_amd_stop(struct op_msrs const * const msrs)
326{
327 unsigned int low, high;
328 int i;
329
330 /* Subtle: stop on all counters to avoid race with
331 * setting our pm callback */
332 for (i = 0 ; i < NUM_COUNTERS ; ++i) {
333 if (!reset_value[i])
334 continue;
335 CTRL_READ(low, high, msrs, i);
336 CTRL_SET_INACTIVE(low);
337 CTRL_WRITE(low, high, msrs, i);
338 }
339
340#ifdef CONFIG_OPROFILE_IBS
341 if (ibs_allowed && ibs_config.fetch_enabled) {
342 low = 0; /* clear max count and enable */
343 high = 0;
344 wrmsr(MSR_AMD64_IBSFETCHCTL, low, high);
345 }
346
347 if (ibs_allowed && ibs_config.op_enabled) {
348 low = 0; /* clear max count and enable */
349 high = 0;
350 wrmsr(MSR_AMD64_IBSOPCTL, low, high);
351 }
352#endif
353}
354
355static void op_amd_shutdown(struct op_msrs const * const msrs)
356{
357 int i;
358
359 for (i = 0 ; i < NUM_COUNTERS ; ++i) {
360 if (CTR_IS_RESERVED(msrs, i))
361 release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
362 }
363 for (i = 0 ; i < NUM_CONTROLS ; ++i) {
364 if (CTRL_IS_RESERVED(msrs, i))
365 release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
366 }
367}
368
369#ifndef CONFIG_OPROFILE_IBS
370
371/* no IBS support */
372
373static int op_amd_init(struct oprofile_operations *ops)
374{
375 return 0;
376}
377
378static void op_amd_exit(void) {}
379
380#else
381
382static u8 ibs_eilvt_off;
383
384static inline void apic_init_ibs_nmi_per_cpu(void *arg)
385{
386 ibs_eilvt_off = setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_NMI, 0);
387}
388
389static inline void apic_clear_ibs_nmi_per_cpu(void *arg)
390{
391 setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_FIX, 1);
392}
393
394static int pfm_amd64_setup_eilvt(void)
395{
396#define IBSCTL_LVTOFFSETVAL (1 << 8)
397#define IBSCTL 0x1cc
398 struct pci_dev *cpu_cfg;
399 int nodes;
400 u32 value = 0;
401
402 /* per CPU setup */
403 on_each_cpu(apic_init_ibs_nmi_per_cpu, NULL, 1);
404
405 nodes = 0;
406 cpu_cfg = NULL;
407 do {
408 cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD,
409 PCI_DEVICE_ID_AMD_10H_NB_MISC,
410 cpu_cfg);
411 if (!cpu_cfg)
412 break;
413 ++nodes;
414 pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off
415 | IBSCTL_LVTOFFSETVAL);
416 pci_read_config_dword(cpu_cfg, IBSCTL, &value);
417 if (value != (ibs_eilvt_off | IBSCTL_LVTOFFSETVAL)) {
418 printk(KERN_DEBUG "Failed to setup IBS LVT offset, "
419 "IBSCTL = 0x%08x", value);
420 return 1;
421 }
422 } while (1);
423
424 if (!nodes) {
425 printk(KERN_DEBUG "No CPU node configured for IBS");
426 return 1;
427 }
428
429#ifdef CONFIG_NUMA
430 /* Sanity check */
431 /* Works only for 64bit with proper numa implementation. */
432 if (nodes != num_possible_nodes()) {
433 printk(KERN_DEBUG "Failed to setup CPU node(s) for IBS, "
434 "found: %d, expected %d",
435 nodes, num_possible_nodes());
436 return 1;
437 }
438#endif
439 return 0;
440}
441
442/*
443 * initialize the APIC for the IBS interrupts
444 * if available (AMD Family10h rev B0 and later)
445 */
446static void setup_ibs(void)
447{
448 ibs_allowed = boot_cpu_has(X86_FEATURE_IBS);
449
450 if (!ibs_allowed)
451 return;
452
453 if (pfm_amd64_setup_eilvt()) {
454 ibs_allowed = 0;
455 return;
456 }
457
458 printk(KERN_INFO "oprofile: AMD IBS detected\n");
459}
460
461
462/*
463 * unitialize the APIC for the IBS interrupts if needed on AMD Family10h
464 * rev B0 and later */
465static void clear_ibs_nmi(void)
466{
467 if (ibs_allowed)
468 on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1);
469}
470
471static int (*create_arch_files)(struct super_block * sb, struct dentry * root);
472
473static int setup_ibs_files(struct super_block * sb, struct dentry * root)
474{
475 char buf[12];
476 struct dentry *dir;
477 int ret = 0;
478
479 /* architecture specific files */
480 if (create_arch_files)
481 ret = create_arch_files(sb, root);
482
483 if (ret)
484 return ret;
485
486 if (!ibs_allowed)
487 return ret;
488
489 /* model specific files */
490
491 /* setup some reasonable defaults */
492 ibs_config.max_cnt_fetch = 250000;
493 ibs_config.fetch_enabled = 0;
494 ibs_config.max_cnt_op = 250000;
495 ibs_config.op_enabled = 0;
496 ibs_config.dispatched_ops = 1;
497 snprintf(buf, sizeof(buf), "ibs_fetch");
498 dir = oprofilefs_mkdir(sb, root, buf);
499 oprofilefs_create_ulong(sb, dir, "rand_enable",
500 &ibs_config.rand_en);
501 oprofilefs_create_ulong(sb, dir, "enable",
502 &ibs_config.fetch_enabled);
503 oprofilefs_create_ulong(sb, dir, "max_count",
504 &ibs_config.max_cnt_fetch);
505 snprintf(buf, sizeof(buf), "ibs_uops");
506 dir = oprofilefs_mkdir(sb, root, buf);
507 oprofilefs_create_ulong(sb, dir, "enable",
508 &ibs_config.op_enabled);
509 oprofilefs_create_ulong(sb, dir, "max_count",
510 &ibs_config.max_cnt_op);
511 oprofilefs_create_ulong(sb, dir, "dispatched_ops",
512 &ibs_config.dispatched_ops);
513
514 return 0;
515}
516
517static int op_amd_init(struct oprofile_operations *ops)
518{
519 setup_ibs();
520 create_arch_files = ops->create_files;
521 ops->create_files = setup_ibs_files;
522 return 0;
523}
524
525static void op_amd_exit(void)
526{
527 clear_ibs_nmi();
528}
529
530#endif
531
532struct op_x86_model_spec const op_amd_spec = {
533 .init = op_amd_init,
534 .exit = op_amd_exit,
535 .num_counters = NUM_COUNTERS,
536 .num_controls = NUM_CONTROLS,
537 .fill_in_addresses = &op_amd_fill_in_addresses,
538 .setup_ctrs = &op_amd_setup_ctrs,
539 .check_ctrs = &op_amd_check_ctrs,
540 .start = &op_amd_start,
541 .stop = &op_amd_stop,
542 .shutdown = &op_amd_shutdown
543};
diff --git a/arch/x86/oprofile/op_model_athlon.c b/arch/x86/oprofile/op_model_athlon.c
deleted file mode 100644
index 3d534879a9dc..000000000000
--- a/arch/x86/oprofile/op_model_athlon.c
+++ /dev/null
@@ -1,190 +0,0 @@
1/*
2 * @file op_model_athlon.h
3 * athlon / K7 / K8 / Family 10h model-specific MSR operations
4 *
5 * @remark Copyright 2002 OProfile authors
6 * @remark Read the file COPYING
7 *
8 * @author John Levon
9 * @author Philippe Elie
10 * @author Graydon Hoare
11 */
12
13#include <linux/oprofile.h>
14#include <asm/ptrace.h>
15#include <asm/msr.h>
16#include <asm/nmi.h>
17
18#include "op_x86_model.h"
19#include "op_counter.h"
20
21#define NUM_COUNTERS 4
22#define NUM_CONTROLS 4
23
24#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0)
25#define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0)
26#define CTR_WRITE(l, msrs, c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1); } while (0)
27#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
28
29#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0)
30#define CTRL_READ(l, h, msrs, c) do {rdmsr(msrs->controls[(c)].addr, (l), (h)); } while (0)
31#define CTRL_WRITE(l, h, msrs, c) do {wrmsr(msrs->controls[(c)].addr, (l), (h)); } while (0)
32#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
33#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
34#define CTRL_CLEAR_LO(x) (x &= (1<<21))
35#define CTRL_CLEAR_HI(x) (x &= 0xfffffcf0)
36#define CTRL_SET_ENABLE(val) (val |= 1<<20)
37#define CTRL_SET_USR(val, u) (val |= ((u & 1) << 16))
38#define CTRL_SET_KERN(val, k) (val |= ((k & 1) << 17))
39#define CTRL_SET_UM(val, m) (val |= (m << 8))
40#define CTRL_SET_EVENT_LOW(val, e) (val |= (e & 0xff))
41#define CTRL_SET_EVENT_HIGH(val, e) (val |= ((e >> 8) & 0xf))
42#define CTRL_SET_HOST_ONLY(val, h) (val |= ((h & 1) << 9))
43#define CTRL_SET_GUEST_ONLY(val, h) (val |= ((h & 1) << 8))
44
45static unsigned long reset_value[NUM_COUNTERS];
46
47static void athlon_fill_in_addresses(struct op_msrs * const msrs)
48{
49 int i;
50
51 for (i = 0; i < NUM_COUNTERS; i++) {
52 if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
53 msrs->counters[i].addr = MSR_K7_PERFCTR0 + i;
54 else
55 msrs->counters[i].addr = 0;
56 }
57
58 for (i = 0; i < NUM_CONTROLS; i++) {
59 if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i))
60 msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i;
61 else
62 msrs->controls[i].addr = 0;
63 }
64}
65
66
67static void athlon_setup_ctrs(struct op_msrs const * const msrs)
68{
69 unsigned int low, high;
70 int i;
71
72 /* clear all counters */
73 for (i = 0 ; i < NUM_CONTROLS; ++i) {
74 if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
75 continue;
76 CTRL_READ(low, high, msrs, i);
77 CTRL_CLEAR_LO(low);
78 CTRL_CLEAR_HI(high);
79 CTRL_WRITE(low, high, msrs, i);
80 }
81
82 /* avoid a false detection of ctr overflows in NMI handler */
83 for (i = 0; i < NUM_COUNTERS; ++i) {
84 if (unlikely(!CTR_IS_RESERVED(msrs, i)))
85 continue;
86 CTR_WRITE(1, msrs, i);
87 }
88
89 /* enable active counters */
90 for (i = 0; i < NUM_COUNTERS; ++i) {
91 if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) {
92 reset_value[i] = counter_config[i].count;
93
94 CTR_WRITE(counter_config[i].count, msrs, i);
95
96 CTRL_READ(low, high, msrs, i);
97 CTRL_CLEAR_LO(low);
98 CTRL_CLEAR_HI(high);
99 CTRL_SET_ENABLE(low);
100 CTRL_SET_USR(low, counter_config[i].user);
101 CTRL_SET_KERN(low, counter_config[i].kernel);
102 CTRL_SET_UM(low, counter_config[i].unit_mask);
103 CTRL_SET_EVENT_LOW(low, counter_config[i].event);
104 CTRL_SET_EVENT_HIGH(high, counter_config[i].event);
105 CTRL_SET_HOST_ONLY(high, 0);
106 CTRL_SET_GUEST_ONLY(high, 0);
107
108 CTRL_WRITE(low, high, msrs, i);
109 } else {
110 reset_value[i] = 0;
111 }
112 }
113}
114
115
116static int athlon_check_ctrs(struct pt_regs * const regs,
117 struct op_msrs const * const msrs)
118{
119 unsigned int low, high;
120 int i;
121
122 for (i = 0 ; i < NUM_COUNTERS; ++i) {
123 if (!reset_value[i])
124 continue;
125 CTR_READ(low, high, msrs, i);
126 if (CTR_OVERFLOWED(low)) {
127 oprofile_add_sample(regs, i);
128 CTR_WRITE(reset_value[i], msrs, i);
129 }
130 }
131
132 /* See op_model_ppro.c */
133 return 1;
134}
135
136
137static void athlon_start(struct op_msrs const * const msrs)
138{
139 unsigned int low, high;
140 int i;
141 for (i = 0 ; i < NUM_COUNTERS ; ++i) {
142 if (reset_value[i]) {
143 CTRL_READ(low, high, msrs, i);
144 CTRL_SET_ACTIVE(low);
145 CTRL_WRITE(low, high, msrs, i);
146 }
147 }
148}
149
150
151static void athlon_stop(struct op_msrs const * const msrs)
152{
153 unsigned int low, high;
154 int i;
155
156 /* Subtle: stop on all counters to avoid race with
157 * setting our pm callback */
158 for (i = 0 ; i < NUM_COUNTERS ; ++i) {
159 if (!reset_value[i])
160 continue;
161 CTRL_READ(low, high, msrs, i);
162 CTRL_SET_INACTIVE(low);
163 CTRL_WRITE(low, high, msrs, i);
164 }
165}
166
167static void athlon_shutdown(struct op_msrs const * const msrs)
168{
169 int i;
170
171 for (i = 0 ; i < NUM_COUNTERS ; ++i) {
172 if (CTR_IS_RESERVED(msrs, i))
173 release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
174 }
175 for (i = 0 ; i < NUM_CONTROLS ; ++i) {
176 if (CTRL_IS_RESERVED(msrs, i))
177 release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
178 }
179}
180
181struct op_x86_model_spec const op_athlon_spec = {
182 .num_counters = NUM_COUNTERS,
183 .num_controls = NUM_CONTROLS,
184 .fill_in_addresses = &athlon_fill_in_addresses,
185 .setup_ctrs = &athlon_setup_ctrs,
186 .check_ctrs = &athlon_check_ctrs,
187 .start = &athlon_start,
188 .stop = &athlon_stop,
189 .shutdown = &athlon_shutdown
190};
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index 45b605fa71d0..05a0261ba0c3 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -32,6 +32,8 @@ struct pt_regs;
32 * various x86 CPU models' perfctr support. 32 * various x86 CPU models' perfctr support.
33 */ 33 */
34struct op_x86_model_spec { 34struct op_x86_model_spec {
35 int (*init)(struct oprofile_operations *ops);
36 void (*exit)(void);
35 unsigned int const num_counters; 37 unsigned int const num_counters;
36 unsigned int const num_controls; 38 unsigned int const num_controls;
37 void (*fill_in_addresses)(struct op_msrs * const msrs); 39 void (*fill_in_addresses)(struct op_msrs * const msrs);
@@ -46,6 +48,6 @@ struct op_x86_model_spec {
46extern struct op_x86_model_spec const op_ppro_spec; 48extern struct op_x86_model_spec const op_ppro_spec;
47extern struct op_x86_model_spec const op_p4_spec; 49extern struct op_x86_model_spec const op_p4_spec;
48extern struct op_x86_model_spec const op_p4_ht2_spec; 50extern struct op_x86_model_spec const op_p4_ht2_spec;
49extern struct op_x86_model_spec const op_athlon_spec; 51extern struct op_x86_model_spec const op_amd_spec;
50 52
51#endif /* OP_X86_MODEL_H */ 53#endif /* OP_X86_MODEL_H */
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 4bdaa590375d..3c27a809393b 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -511,3 +511,31 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1201, fam10h_pci_cfg_space_size);
511DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1202, fam10h_pci_cfg_space_size); 511DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1202, fam10h_pci_cfg_space_size);
512DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1203, fam10h_pci_cfg_space_size); 512DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1203, fam10h_pci_cfg_space_size);
513DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1204, fam10h_pci_cfg_space_size); 513DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1204, fam10h_pci_cfg_space_size);
514
515/*
516 * SB600: Disable BAR1 on device 14.0 to avoid HPET resources from
517 * confusing the PCI engine:
518 */
519static void sb600_disable_hpet_bar(struct pci_dev *dev)
520{
521 u8 val;
522
523 /*
524 * The SB600 and SB700 both share the same device
525 * ID, but the PM register 0x55 does something different
526 * for the SB700, so make sure we are dealing with the
527 * SB600 before touching the bit:
528 */
529
530 pci_read_config_byte(dev, 0x08, &val);
531
532 if (val < 0x2F) {
533 outb(0x55, 0xCD6);
534 val = inb(0xCD7);
535
536 /* Set bit 7 in PM register 0x55 */
537 outb(0x55, 0xCD6);
538 outb(val | 0x80, 0xCD7);
539 }
540}
541DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_ATI, 0x4385, sb600_disable_hpet_bar);