diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-10-13 12:54:35 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-10-13 12:54:45 -0400 |
commit | e7f2f9918c0e97aa98ba147ca387e2c7238f0711 (patch) | |
tree | dd85d6d2907bffeda76b42ce55a445e3142fe738 /arch | |
parent | 11a96d1820113fde0d55c3487b7da7a9031326b8 (diff) | |
parent | c00193f9f09f9b852249a66391985f585d066084 (diff) |
Merge phase #5 (misc) of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
Merges oprofile, timers/hpet, x86/traps, x86/time, and x86/core misc items.
* 'x86-core-v4-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (132 commits)
x86: change early_ioremap to use slots instead of nesting
x86: adjust dependencies for CONFIG_X86_CMOV
dumpstack: x86: various small unification steps, fix
x86: remove additional_cpus
x86: remove additional_cpus configurability
x86: improve UP kernel when CPU-hotplug and SMP is enabled
dumpstack: x86: various small unification steps
dumpstack: i386: make kstack= an early boot-param and add oops=panic
dumpstack: x86: use log_lvl and unify trace formatting
dumptrace: x86: consistently include loglevel, print stack switch
dumpstack: x86: add "end" parameter to valid_stack_ptr and print_context_stack
dumpstack: x86: make printk_address equal
dumpstack: x86: move die_nmi to dumpstack_32.c
traps: x86: finalize unification of traps.c
traps: x86: make traps_32.c and traps_64.c equal
traps: x86: various noop-changes preparing for unification of traps_xx.c
traps: x86_64: use task_pid_nr(tsk) instead of tsk->pid in do_general_protection
traps: i386: expand clear_mem_error and remove from mach_traps.h
traps: x86_64: make io_check_error equal to the one on i386
traps: i386: use preempt_conditional_sti/cli in do_int3
...
Diffstat (limited to 'arch')
45 files changed, 2364 insertions, 2202 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 364c6dadde0a..0267babe5eb9 100644 --- a/arch/Kconfig +++ b/arch/Kconfig | |||
@@ -13,6 +13,20 @@ config OPROFILE | |||
13 | 13 | ||
14 | If unsure, say N. | 14 | If unsure, say N. |
15 | 15 | ||
16 | config OPROFILE_IBS | ||
17 | bool "OProfile AMD IBS support (EXPERIMENTAL)" | ||
18 | default n | ||
19 | depends on OPROFILE && SMP && X86 | ||
20 | help | ||
21 | Instruction-Based Sampling (IBS) is a new profiling | ||
22 | technique that provides rich, precise program performance | ||
23 | information. IBS is introduced by AMD Family10h processors | ||
24 | (AMD Opteron Quad-Core processor “Barcelona”) to overcome | ||
25 | the limitations of conventional performance counter | ||
26 | sampling. | ||
27 | |||
28 | If unsure, say N. | ||
29 | |||
16 | config HAVE_OPROFILE | 30 | config HAVE_OPROFILE |
17 | def_bool n | 31 | def_bool n |
18 | 32 | ||
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index c5f101360520..0b7c4a3f0651 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu | |||
@@ -38,8 +38,7 @@ config M386 | |||
38 | - "Crusoe" for the Transmeta Crusoe series. | 38 | - "Crusoe" for the Transmeta Crusoe series. |
39 | - "Efficeon" for the Transmeta Efficeon series. | 39 | - "Efficeon" for the Transmeta Efficeon series. |
40 | - "Winchip-C6" for original IDT Winchip. | 40 | - "Winchip-C6" for original IDT Winchip. |
41 | - "Winchip-2" for IDT Winchip 2. | 41 | - "Winchip-2" for IDT Winchips with 3dNow! capabilities. |
42 | - "Winchip-2A" for IDT Winchips with 3dNow! capabilities. | ||
43 | - "GeodeGX1" for Geode GX1 (Cyrix MediaGX). | 42 | - "GeodeGX1" for Geode GX1 (Cyrix MediaGX). |
44 | - "Geode GX/LX" For AMD Geode GX and LX processors. | 43 | - "Geode GX/LX" For AMD Geode GX and LX processors. |
45 | - "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3. | 44 | - "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3. |
@@ -194,19 +193,11 @@ config MWINCHIPC6 | |||
194 | treat this chip as a 586TSC with some extended instructions | 193 | treat this chip as a 586TSC with some extended instructions |
195 | and alignment requirements. | 194 | and alignment requirements. |
196 | 195 | ||
197 | config MWINCHIP2 | ||
198 | bool "Winchip-2" | ||
199 | depends on X86_32 | ||
200 | help | ||
201 | Select this for an IDT Winchip-2. Linux and GCC | ||
202 | treat this chip as a 586TSC with some extended instructions | ||
203 | and alignment requirements. | ||
204 | |||
205 | config MWINCHIP3D | 196 | config MWINCHIP3D |
206 | bool "Winchip-2A/Winchip-3" | 197 | bool "Winchip-2/Winchip-2A/Winchip-3" |
207 | depends on X86_32 | 198 | depends on X86_32 |
208 | help | 199 | help |
209 | Select this for an IDT Winchip-2A or 3. Linux and GCC | 200 | Select this for an IDT Winchip-2, 2A or 3. Linux and GCC |
210 | treat this chip as a 586TSC with some extended instructions | 201 | treat this chip as a 586TSC with some extended instructions |
211 | and alignment requirements. Also enable out of order memory | 202 | and alignment requirements. Also enable out of order memory |
212 | stores for this CPU, which can increase performance of some | 203 | stores for this CPU, which can increase performance of some |
@@ -318,7 +309,7 @@ config X86_L1_CACHE_SHIFT | |||
318 | int | 309 | int |
319 | default "7" if MPENTIUM4 || X86_GENERIC || GENERIC_CPU || MPSC | 310 | default "7" if MPENTIUM4 || X86_GENERIC || GENERIC_CPU || MPSC |
320 | default "4" if X86_ELAN || M486 || M386 || MGEODEGX1 | 311 | default "4" if X86_ELAN || M486 || M386 || MGEODEGX1 |
321 | default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX | 312 | default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX |
322 | default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MVIAC7 | 313 | default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MVIAC7 |
323 | 314 | ||
324 | config X86_XADD | 315 | config X86_XADD |
@@ -360,7 +351,7 @@ config X86_POPAD_OK | |||
360 | 351 | ||
361 | config X86_ALIGNMENT_16 | 352 | config X86_ALIGNMENT_16 |
362 | def_bool y | 353 | def_bool y |
363 | depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1 | 354 | depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1 |
364 | 355 | ||
365 | config X86_INTEL_USERCOPY | 356 | config X86_INTEL_USERCOPY |
366 | def_bool y | 357 | def_bool y |
@@ -368,7 +359,7 @@ config X86_INTEL_USERCOPY | |||
368 | 359 | ||
369 | config X86_USE_PPRO_CHECKSUM | 360 | config X86_USE_PPRO_CHECKSUM |
370 | def_bool y | 361 | def_bool y |
371 | depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON || MGEODE_LX || MCORE2 | 362 | depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON || MGEODE_LX || MCORE2 |
372 | 363 | ||
373 | config X86_USE_3DNOW | 364 | config X86_USE_3DNOW |
374 | def_bool y | 365 | def_bool y |
@@ -376,7 +367,7 @@ config X86_USE_3DNOW | |||
376 | 367 | ||
377 | config X86_OOSTORE | 368 | config X86_OOSTORE |
378 | def_bool y | 369 | def_bool y |
379 | depends on (MWINCHIP3D || MWINCHIP2 || MWINCHIPC6) && MTRR | 370 | depends on (MWINCHIP3D || MWINCHIPC6) && MTRR |
380 | 371 | ||
381 | # | 372 | # |
382 | # P6_NOPs are a relatively minor optimization that require a family >= | 373 | # P6_NOPs are a relatively minor optimization that require a family >= |
@@ -396,7 +387,7 @@ config X86_P6_NOP | |||
396 | 387 | ||
397 | config X86_TSC | 388 | config X86_TSC |
398 | def_bool y | 389 | def_bool y |
399 | depends on ((MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ) || X86_64 | 390 | depends on ((MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ) || X86_64 |
400 | 391 | ||
401 | config X86_CMPXCHG64 | 392 | config X86_CMPXCHG64 |
402 | def_bool y | 393 | def_bool y |
@@ -406,7 +397,7 @@ config X86_CMPXCHG64 | |||
406 | # generates cmov. | 397 | # generates cmov. |
407 | config X86_CMOV | 398 | config X86_CMOV |
408 | def_bool y | 399 | def_bool y |
409 | depends on (MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || X86_64) | 400 | depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64) |
410 | 401 | ||
411 | config X86_MINIMUM_CPU_FAMILY | 402 | config X86_MINIMUM_CPU_FAMILY |
412 | int | 403 | int |
@@ -417,7 +408,7 @@ config X86_MINIMUM_CPU_FAMILY | |||
417 | 408 | ||
418 | config X86_DEBUGCTLMSR | 409 | config X86_DEBUGCTLMSR |
419 | def_bool y | 410 | def_bool y |
420 | depends on !(MK6 || MWINCHIPC6 || MWINCHIP2 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486 || M386) | 411 | depends on !(MK6 || MWINCHIPC6 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486 || M386) |
421 | 412 | ||
422 | menuconfig PROCESSOR_SELECT | 413 | menuconfig PROCESSOR_SELECT |
423 | bool "Supported processor vendors" if EMBEDDED | 414 | bool "Supported processor vendors" if EMBEDDED |
diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu index b72b4f753113..80177ec052f0 100644 --- a/arch/x86/Makefile_32.cpu +++ b/arch/x86/Makefile_32.cpu | |||
@@ -28,7 +28,6 @@ cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8,-march=athlon) | |||
28 | cflags-$(CONFIG_MCRUSOE) += -march=i686 $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 | 28 | cflags-$(CONFIG_MCRUSOE) += -march=i686 $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 |
29 | cflags-$(CONFIG_MEFFICEON) += -march=i686 $(call tune,pentium3) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 | 29 | cflags-$(CONFIG_MEFFICEON) += -march=i686 $(call tune,pentium3) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 |
30 | cflags-$(CONFIG_MWINCHIPC6) += $(call cc-option,-march=winchip-c6,-march=i586) | 30 | cflags-$(CONFIG_MWINCHIPC6) += $(call cc-option,-march=winchip-c6,-march=i586) |
31 | cflags-$(CONFIG_MWINCHIP2) += $(call cc-option,-march=winchip2,-march=i586) | ||
32 | cflags-$(CONFIG_MWINCHIP3D) += $(call cc-option,-march=winchip2,-march=i586) | 31 | cflags-$(CONFIG_MWINCHIP3D) += $(call cc-option,-march=winchip2,-march=i586) |
33 | cflags-$(CONFIG_MCYRIXIII) += $(call cc-option,-march=c3,-march=i486) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 | 32 | cflags-$(CONFIG_MCYRIXIII) += $(call cc-option,-march=c3,-march=i486) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 |
34 | cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686) | 33 | cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686) |
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index ca226ca31288..52d0359719d7 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig | |||
@@ -213,7 +213,6 @@ CONFIG_M686=y | |||
213 | # CONFIG_MCRUSOE is not set | 213 | # CONFIG_MCRUSOE is not set |
214 | # CONFIG_MEFFICEON is not set | 214 | # CONFIG_MEFFICEON is not set |
215 | # CONFIG_MWINCHIPC6 is not set | 215 | # CONFIG_MWINCHIPC6 is not set |
216 | # CONFIG_MWINCHIP2 is not set | ||
217 | # CONFIG_MWINCHIP3D is not set | 216 | # CONFIG_MWINCHIP3D is not set |
218 | # CONFIG_MGEODEGX1 is not set | 217 | # CONFIG_MGEODEGX1 is not set |
219 | # CONFIG_MGEODE_LX is not set | 218 | # CONFIG_MGEODE_LX is not set |
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 2c4b1c771e28..f0a03d7a7d63 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig | |||
@@ -210,7 +210,6 @@ CONFIG_X86_PC=y | |||
210 | # CONFIG_MCRUSOE is not set | 210 | # CONFIG_MCRUSOE is not set |
211 | # CONFIG_MEFFICEON is not set | 211 | # CONFIG_MEFFICEON is not set |
212 | # CONFIG_MWINCHIPC6 is not set | 212 | # CONFIG_MWINCHIPC6 is not set |
213 | # CONFIG_MWINCHIP2 is not set | ||
214 | # CONFIG_MWINCHIP3D is not set | 213 | # CONFIG_MWINCHIP3D is not set |
215 | # CONFIG_MGEODEGX1 is not set | 214 | # CONFIG_MGEODEGX1 is not set |
216 | # CONFIG_MGEODE_LX is not set | 215 | # CONFIG_MGEODE_LX is not set |
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index ffc1bb4fed7d..eb4314768bf7 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S | |||
@@ -39,11 +39,11 @@ | |||
39 | .endm | 39 | .endm |
40 | 40 | ||
41 | /* clobbers %eax */ | 41 | /* clobbers %eax */ |
42 | .macro CLEAR_RREGS | 42 | .macro CLEAR_RREGS _r9=rax |
43 | xorl %eax,%eax | 43 | xorl %eax,%eax |
44 | movq %rax,R11(%rsp) | 44 | movq %rax,R11(%rsp) |
45 | movq %rax,R10(%rsp) | 45 | movq %rax,R10(%rsp) |
46 | movq %rax,R9(%rsp) | 46 | movq %\_r9,R9(%rsp) |
47 | movq %rax,R8(%rsp) | 47 | movq %rax,R8(%rsp) |
48 | .endm | 48 | .endm |
49 | 49 | ||
@@ -52,11 +52,10 @@ | |||
52 | * We don't reload %eax because syscall_trace_enter() returned | 52 | * We don't reload %eax because syscall_trace_enter() returned |
53 | * the value it wants us to use in the table lookup. | 53 | * the value it wants us to use in the table lookup. |
54 | */ | 54 | */ |
55 | .macro LOAD_ARGS32 offset | 55 | .macro LOAD_ARGS32 offset, _r9=0 |
56 | movl \offset(%rsp),%r11d | 56 | .if \_r9 |
57 | movl \offset+8(%rsp),%r10d | ||
58 | movl \offset+16(%rsp),%r9d | 57 | movl \offset+16(%rsp),%r9d |
59 | movl \offset+24(%rsp),%r8d | 58 | .endif |
60 | movl \offset+40(%rsp),%ecx | 59 | movl \offset+40(%rsp),%ecx |
61 | movl \offset+48(%rsp),%edx | 60 | movl \offset+48(%rsp),%edx |
62 | movl \offset+56(%rsp),%esi | 61 | movl \offset+56(%rsp),%esi |
@@ -145,7 +144,7 @@ ENTRY(ia32_sysenter_target) | |||
145 | SAVE_ARGS 0,0,1 | 144 | SAVE_ARGS 0,0,1 |
146 | /* no need to do an access_ok check here because rbp has been | 145 | /* no need to do an access_ok check here because rbp has been |
147 | 32bit zero extended */ | 146 | 32bit zero extended */ |
148 | 1: movl (%rbp),%r9d | 147 | 1: movl (%rbp),%ebp |
149 | .section __ex_table,"a" | 148 | .section __ex_table,"a" |
150 | .quad 1b,ia32_badarg | 149 | .quad 1b,ia32_badarg |
151 | .previous | 150 | .previous |
@@ -157,7 +156,7 @@ ENTRY(ia32_sysenter_target) | |||
157 | cmpl $(IA32_NR_syscalls-1),%eax | 156 | cmpl $(IA32_NR_syscalls-1),%eax |
158 | ja ia32_badsys | 157 | ja ia32_badsys |
159 | sysenter_do_call: | 158 | sysenter_do_call: |
160 | IA32_ARG_FIXUP 1 | 159 | IA32_ARG_FIXUP |
161 | sysenter_dispatch: | 160 | sysenter_dispatch: |
162 | call *ia32_sys_call_table(,%rax,8) | 161 | call *ia32_sys_call_table(,%rax,8) |
163 | movq %rax,RAX-ARGOFFSET(%rsp) | 162 | movq %rax,RAX-ARGOFFSET(%rsp) |
@@ -234,20 +233,17 @@ sysexit_audit: | |||
234 | #endif | 233 | #endif |
235 | 234 | ||
236 | sysenter_tracesys: | 235 | sysenter_tracesys: |
237 | xchgl %r9d,%ebp | ||
238 | #ifdef CONFIG_AUDITSYSCALL | 236 | #ifdef CONFIG_AUDITSYSCALL |
239 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) | 237 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) |
240 | jz sysenter_auditsys | 238 | jz sysenter_auditsys |
241 | #endif | 239 | #endif |
242 | SAVE_REST | 240 | SAVE_REST |
243 | CLEAR_RREGS | 241 | CLEAR_RREGS |
244 | movq %r9,R9(%rsp) | ||
245 | movq $-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */ | 242 | movq $-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */ |
246 | movq %rsp,%rdi /* &pt_regs -> arg1 */ | 243 | movq %rsp,%rdi /* &pt_regs -> arg1 */ |
247 | call syscall_trace_enter | 244 | call syscall_trace_enter |
248 | LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ | 245 | LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ |
249 | RESTORE_REST | 246 | RESTORE_REST |
250 | xchgl %ebp,%r9d | ||
251 | cmpl $(IA32_NR_syscalls-1),%eax | 247 | cmpl $(IA32_NR_syscalls-1),%eax |
252 | ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */ | 248 | ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */ |
253 | jmp sysenter_do_call | 249 | jmp sysenter_do_call |
@@ -314,9 +310,9 @@ ENTRY(ia32_cstar_target) | |||
314 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) | 310 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) |
315 | CFI_REMEMBER_STATE | 311 | CFI_REMEMBER_STATE |
316 | jnz cstar_tracesys | 312 | jnz cstar_tracesys |
317 | cstar_do_call: | ||
318 | cmpl $IA32_NR_syscalls-1,%eax | 313 | cmpl $IA32_NR_syscalls-1,%eax |
319 | ja ia32_badsys | 314 | ja ia32_badsys |
315 | cstar_do_call: | ||
320 | IA32_ARG_FIXUP 1 | 316 | IA32_ARG_FIXUP 1 |
321 | cstar_dispatch: | 317 | cstar_dispatch: |
322 | call *ia32_sys_call_table(,%rax,8) | 318 | call *ia32_sys_call_table(,%rax,8) |
@@ -357,15 +353,13 @@ cstar_tracesys: | |||
357 | #endif | 353 | #endif |
358 | xchgl %r9d,%ebp | 354 | xchgl %r9d,%ebp |
359 | SAVE_REST | 355 | SAVE_REST |
360 | CLEAR_RREGS | 356 | CLEAR_RREGS r9 |
361 | movq %r9,R9(%rsp) | ||
362 | movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ | 357 | movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ |
363 | movq %rsp,%rdi /* &pt_regs -> arg1 */ | 358 | movq %rsp,%rdi /* &pt_regs -> arg1 */ |
364 | call syscall_trace_enter | 359 | call syscall_trace_enter |
365 | LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ | 360 | LOAD_ARGS32 ARGOFFSET, 1 /* reload args from stack in case ptrace changed it */ |
366 | RESTORE_REST | 361 | RESTORE_REST |
367 | xchgl %ebp,%r9d | 362 | xchgl %ebp,%r9d |
368 | movl RSP-ARGOFFSET(%rsp), %r8d | ||
369 | cmpl $(IA32_NR_syscalls-1),%eax | 363 | cmpl $(IA32_NR_syscalls-1),%eax |
370 | ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */ | 364 | ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */ |
371 | jmp cstar_do_call | 365 | jmp cstar_do_call |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 5098585f87ce..0d41f0343dc0 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -23,7 +23,7 @@ CFLAGS_hpet.o := $(nostackp) | |||
23 | CFLAGS_tsc.o := $(nostackp) | 23 | CFLAGS_tsc.o := $(nostackp) |
24 | 24 | ||
25 | obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o | 25 | obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o |
26 | obj-y += traps_$(BITS).o irq_$(BITS).o | 26 | obj-y += traps.o irq_$(BITS).o dumpstack_$(BITS).o |
27 | obj-y += time_$(BITS).o ioport.o ldt.o | 27 | obj-y += time_$(BITS).o ioport.o ldt.o |
28 | obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o | 28 | obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o |
29 | obj-$(CONFIG_X86_VISWS) += visws_quirks.o | 29 | obj-$(CONFIG_X86_VISWS) += visws_quirks.o |
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index fb04e49776ba..a84ac7b570e6 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c | |||
@@ -444,7 +444,7 @@ void __init alternative_instructions(void) | |||
444 | _text, _etext); | 444 | _text, _etext); |
445 | 445 | ||
446 | /* Only switch to UP mode if we don't immediately boot others */ | 446 | /* Only switch to UP mode if we don't immediately boot others */ |
447 | if (num_possible_cpus() == 1 || setup_max_cpus <= 1) | 447 | if (num_present_cpus() == 1 || setup_max_cpus <= 1) |
448 | alternatives_smp_switch(0); | 448 | alternatives_smp_switch(0); |
449 | } | 449 | } |
450 | #endif | 450 | #endif |
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index a91c57cb666a..21c831d96af3 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c | |||
@@ -295,6 +295,9 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) | |||
295 | * | 295 | * |
296 | * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and | 296 | * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and |
297 | * MCE interrupts are supported. Thus MCE offset must be set to 0. | 297 | * MCE interrupts are supported. Thus MCE offset must be set to 0. |
298 | * | ||
299 | * If mask=1, the LVT entry does not generate interrupts while mask=0 | ||
300 | * enables the vector. See also the BKDGs. | ||
298 | */ | 301 | */ |
299 | 302 | ||
300 | #define APIC_EILVT_LVTOFF_MCE 0 | 303 | #define APIC_EILVT_LVTOFF_MCE 0 |
@@ -319,6 +322,7 @@ u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask) | |||
319 | setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); | 322 | setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); |
320 | return APIC_EILVT_LVTOFF_IBS; | 323 | return APIC_EILVT_LVTOFF_IBS; |
321 | } | 324 | } |
325 | EXPORT_SYMBOL_GPL(setup_APIC_eilvt_ibs); | ||
322 | 326 | ||
323 | /* | 327 | /* |
324 | * Program the next event, relative to now | 328 | * Program the next event, relative to now |
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 53898b65a6ae..94ddb69ae15e 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c | |||
@@ -307,6 +307,9 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) | |||
307 | * | 307 | * |
308 | * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and | 308 | * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and |
309 | * MCE interrupts are supported. Thus MCE offset must be set to 0. | 309 | * MCE interrupts are supported. Thus MCE offset must be set to 0. |
310 | * | ||
311 | * If mask=1, the LVT entry does not generate interrupts while mask=0 | ||
312 | * enables the vector. See also the BKDGs. | ||
310 | */ | 313 | */ |
311 | 314 | ||
312 | #define APIC_EILVT_LVTOFF_MCE 0 | 315 | #define APIC_EILVT_LVTOFF_MCE 0 |
@@ -331,6 +334,7 @@ u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask) | |||
331 | setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); | 334 | setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); |
332 | return APIC_EILVT_LVTOFF_IBS; | 335 | return APIC_EILVT_LVTOFF_IBS; |
333 | } | 336 | } |
337 | EXPORT_SYMBOL_GPL(setup_APIC_eilvt_ibs); | ||
334 | 338 | ||
335 | /* | 339 | /* |
336 | * Program the next event, relative to now | 340 | * Program the next event, relative to now |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index fb789dd9e691..25581dcb280e 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -124,18 +124,25 @@ static inline int flag_is_changeable_p(u32 flag) | |||
124 | { | 124 | { |
125 | u32 f1, f2; | 125 | u32 f1, f2; |
126 | 126 | ||
127 | asm("pushfl\n\t" | 127 | /* |
128 | "pushfl\n\t" | 128 | * Cyrix and IDT cpus allow disabling of CPUID |
129 | "popl %0\n\t" | 129 | * so the code below may return different results |
130 | "movl %0,%1\n\t" | 130 | * when it is executed before and after enabling |
131 | "xorl %2,%0\n\t" | 131 | * the CPUID. Add "volatile" to not allow gcc to |
132 | "pushl %0\n\t" | 132 | * optimize the subsequent calls to this function. |
133 | "popfl\n\t" | 133 | */ |
134 | "pushfl\n\t" | 134 | asm volatile ("pushfl\n\t" |
135 | "popl %0\n\t" | 135 | "pushfl\n\t" |
136 | "popfl\n\t" | 136 | "popl %0\n\t" |
137 | : "=&r" (f1), "=&r" (f2) | 137 | "movl %0,%1\n\t" |
138 | : "ir" (flag)); | 138 | "xorl %2,%0\n\t" |
139 | "pushl %0\n\t" | ||
140 | "popfl\n\t" | ||
141 | "pushfl\n\t" | ||
142 | "popl %0\n\t" | ||
143 | "popfl\n\t" | ||
144 | : "=&r" (f1), "=&r" (f2) | ||
145 | : "ir" (flag)); | ||
139 | 146 | ||
140 | return ((f1^f2) & flag) != 0; | 147 | return ((f1^f2) & flag) != 0; |
141 | } | 148 | } |
@@ -719,12 +726,24 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |||
719 | #endif | 726 | #endif |
720 | } | 727 | } |
721 | 728 | ||
729 | #ifdef CONFIG_X86_64 | ||
730 | static void vgetcpu_set_mode(void) | ||
731 | { | ||
732 | if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP)) | ||
733 | vgetcpu_mode = VGETCPU_RDTSCP; | ||
734 | else | ||
735 | vgetcpu_mode = VGETCPU_LSL; | ||
736 | } | ||
737 | #endif | ||
738 | |||
722 | void __init identify_boot_cpu(void) | 739 | void __init identify_boot_cpu(void) |
723 | { | 740 | { |
724 | identify_cpu(&boot_cpu_data); | 741 | identify_cpu(&boot_cpu_data); |
725 | #ifdef CONFIG_X86_32 | 742 | #ifdef CONFIG_X86_32 |
726 | sysenter_setup(); | 743 | sysenter_setup(); |
727 | enable_sep_cpu(); | 744 | enable_sep_cpu(); |
745 | #else | ||
746 | vgetcpu_set_mode(); | ||
728 | #endif | 747 | #endif |
729 | } | 748 | } |
730 | 749 | ||
@@ -797,7 +816,7 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) | |||
797 | else if (c->cpuid_level >= 0) | 816 | else if (c->cpuid_level >= 0) |
798 | vendor = c->x86_vendor_id; | 817 | vendor = c->x86_vendor_id; |
799 | 818 | ||
800 | if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor))) | 819 | if (vendor && !strstr(c->x86_model_id, vendor)) |
801 | printk(KERN_CONT "%s ", vendor); | 820 | printk(KERN_CONT "%s ", vendor); |
802 | 821 | ||
803 | if (c->x86_model_id[0]) | 822 | if (c->x86_model_id[0]) |
diff --git a/arch/x86/kernel/doublefault_32.c b/arch/x86/kernel/doublefault_32.c index 395acb12b0d1..b4f14c6c09d9 100644 --- a/arch/x86/kernel/doublefault_32.c +++ b/arch/x86/kernel/doublefault_32.c | |||
@@ -66,6 +66,6 @@ struct tss_struct doublefault_tss __cacheline_aligned = { | |||
66 | .ds = __USER_DS, | 66 | .ds = __USER_DS, |
67 | .fs = __KERNEL_PERCPU, | 67 | .fs = __KERNEL_PERCPU, |
68 | 68 | ||
69 | .__cr3 = __phys_addr_const((unsigned long)swapper_pg_dir) | 69 | .__cr3 = __pa_nodebug(swapper_pg_dir), |
70 | } | 70 | } |
71 | }; | 71 | }; |
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c new file mode 100644 index 000000000000..201ee359a1a9 --- /dev/null +++ b/arch/x86/kernel/dumpstack_32.c | |||
@@ -0,0 +1,447 @@ | |||
1 | /* | ||
2 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
3 | * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs | ||
4 | */ | ||
5 | #include <linux/kallsyms.h> | ||
6 | #include <linux/kprobes.h> | ||
7 | #include <linux/uaccess.h> | ||
8 | #include <linux/utsname.h> | ||
9 | #include <linux/hardirq.h> | ||
10 | #include <linux/kdebug.h> | ||
11 | #include <linux/module.h> | ||
12 | #include <linux/ptrace.h> | ||
13 | #include <linux/kexec.h> | ||
14 | #include <linux/bug.h> | ||
15 | #include <linux/nmi.h> | ||
16 | |||
17 | #include <asm/stacktrace.h> | ||
18 | |||
19 | #define STACKSLOTS_PER_LINE 8 | ||
20 | #define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :) | ||
21 | |||
22 | int panic_on_unrecovered_nmi; | ||
23 | int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; | ||
24 | static unsigned int code_bytes = 64; | ||
25 | static int die_counter; | ||
26 | |||
27 | void printk_address(unsigned long address, int reliable) | ||
28 | { | ||
29 | printk(" [<%p>] %s%pS\n", (void *) address, | ||
30 | reliable ? "" : "? ", (void *) address); | ||
31 | } | ||
32 | |||
33 | static inline int valid_stack_ptr(struct thread_info *tinfo, | ||
34 | void *p, unsigned int size, void *end) | ||
35 | { | ||
36 | void *t = tinfo; | ||
37 | if (end) { | ||
38 | if (p < end && p >= (end-THREAD_SIZE)) | ||
39 | return 1; | ||
40 | else | ||
41 | return 0; | ||
42 | } | ||
43 | return p > t && p < t + THREAD_SIZE - size; | ||
44 | } | ||
45 | |||
46 | /* The form of the top of the frame on the stack */ | ||
47 | struct stack_frame { | ||
48 | struct stack_frame *next_frame; | ||
49 | unsigned long return_address; | ||
50 | }; | ||
51 | |||
52 | static inline unsigned long | ||
53 | print_context_stack(struct thread_info *tinfo, | ||
54 | unsigned long *stack, unsigned long bp, | ||
55 | const struct stacktrace_ops *ops, void *data, | ||
56 | unsigned long *end) | ||
57 | { | ||
58 | struct stack_frame *frame = (struct stack_frame *)bp; | ||
59 | |||
60 | while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) { | ||
61 | unsigned long addr; | ||
62 | |||
63 | addr = *stack; | ||
64 | if (__kernel_text_address(addr)) { | ||
65 | if ((unsigned long) stack == bp + sizeof(long)) { | ||
66 | ops->address(data, addr, 1); | ||
67 | frame = frame->next_frame; | ||
68 | bp = (unsigned long) frame; | ||
69 | } else { | ||
70 | ops->address(data, addr, bp == 0); | ||
71 | } | ||
72 | } | ||
73 | stack++; | ||
74 | } | ||
75 | return bp; | ||
76 | } | ||
77 | |||
78 | void dump_trace(struct task_struct *task, struct pt_regs *regs, | ||
79 | unsigned long *stack, unsigned long bp, | ||
80 | const struct stacktrace_ops *ops, void *data) | ||
81 | { | ||
82 | if (!task) | ||
83 | task = current; | ||
84 | |||
85 | if (!stack) { | ||
86 | unsigned long dummy; | ||
87 | stack = &dummy; | ||
88 | if (task && task != current) | ||
89 | stack = (unsigned long *)task->thread.sp; | ||
90 | } | ||
91 | |||
92 | #ifdef CONFIG_FRAME_POINTER | ||
93 | if (!bp) { | ||
94 | if (task == current) { | ||
95 | /* Grab bp right from our regs */ | ||
96 | get_bp(bp); | ||
97 | } else { | ||
98 | /* bp is the last reg pushed by switch_to */ | ||
99 | bp = *(unsigned long *) task->thread.sp; | ||
100 | } | ||
101 | } | ||
102 | #endif | ||
103 | |||
104 | for (;;) { | ||
105 | struct thread_info *context; | ||
106 | |||
107 | context = (struct thread_info *) | ||
108 | ((unsigned long)stack & (~(THREAD_SIZE - 1))); | ||
109 | bp = print_context_stack(context, stack, bp, ops, data, NULL); | ||
110 | |||
111 | stack = (unsigned long *)context->previous_esp; | ||
112 | if (!stack) | ||
113 | break; | ||
114 | if (ops->stack(data, "IRQ") < 0) | ||
115 | break; | ||
116 | touch_nmi_watchdog(); | ||
117 | } | ||
118 | } | ||
119 | EXPORT_SYMBOL(dump_trace); | ||
120 | |||
121 | static void | ||
122 | print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) | ||
123 | { | ||
124 | printk(data); | ||
125 | print_symbol(msg, symbol); | ||
126 | printk("\n"); | ||
127 | } | ||
128 | |||
129 | static void print_trace_warning(void *data, char *msg) | ||
130 | { | ||
131 | printk("%s%s\n", (char *)data, msg); | ||
132 | } | ||
133 | |||
134 | static int print_trace_stack(void *data, char *name) | ||
135 | { | ||
136 | printk("%s <%s> ", (char *)data, name); | ||
137 | return 0; | ||
138 | } | ||
139 | |||
140 | /* | ||
141 | * Print one address/symbol entries per line. | ||
142 | */ | ||
143 | static void print_trace_address(void *data, unsigned long addr, int reliable) | ||
144 | { | ||
145 | touch_nmi_watchdog(); | ||
146 | printk(data); | ||
147 | printk_address(addr, reliable); | ||
148 | } | ||
149 | |||
150 | static const struct stacktrace_ops print_trace_ops = { | ||
151 | .warning = print_trace_warning, | ||
152 | .warning_symbol = print_trace_warning_symbol, | ||
153 | .stack = print_trace_stack, | ||
154 | .address = print_trace_address, | ||
155 | }; | ||
156 | |||
157 | static void | ||
158 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
159 | unsigned long *stack, unsigned long bp, char *log_lvl) | ||
160 | { | ||
161 | printk("%sCall Trace:\n", log_lvl); | ||
162 | dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); | ||
163 | } | ||
164 | |||
165 | void show_trace(struct task_struct *task, struct pt_regs *regs, | ||
166 | unsigned long *stack, unsigned long bp) | ||
167 | { | ||
168 | show_trace_log_lvl(task, regs, stack, bp, ""); | ||
169 | } | ||
170 | |||
171 | static void | ||
172 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
173 | unsigned long *sp, unsigned long bp, char *log_lvl) | ||
174 | { | ||
175 | unsigned long *stack; | ||
176 | int i; | ||
177 | |||
178 | if (sp == NULL) { | ||
179 | if (task) | ||
180 | sp = (unsigned long *)task->thread.sp; | ||
181 | else | ||
182 | sp = (unsigned long *)&sp; | ||
183 | } | ||
184 | |||
185 | stack = sp; | ||
186 | for (i = 0; i < kstack_depth_to_print; i++) { | ||
187 | if (kstack_end(stack)) | ||
188 | break; | ||
189 | if (i && ((i % STACKSLOTS_PER_LINE) == 0)) | ||
190 | printk("\n%s", log_lvl); | ||
191 | printk(" %08lx", *stack++); | ||
192 | touch_nmi_watchdog(); | ||
193 | } | ||
194 | printk("\n"); | ||
195 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); | ||
196 | } | ||
197 | |||
198 | void show_stack(struct task_struct *task, unsigned long *sp) | ||
199 | { | ||
200 | show_stack_log_lvl(task, NULL, sp, 0, ""); | ||
201 | } | ||
202 | |||
203 | /* | ||
204 | * The architecture-independent dump_stack generator | ||
205 | */ | ||
206 | void dump_stack(void) | ||
207 | { | ||
208 | unsigned long bp = 0; | ||
209 | unsigned long stack; | ||
210 | |||
211 | #ifdef CONFIG_FRAME_POINTER | ||
212 | if (!bp) | ||
213 | get_bp(bp); | ||
214 | #endif | ||
215 | |||
216 | printk("Pid: %d, comm: %.20s %s %s %.*s\n", | ||
217 | current->pid, current->comm, print_tainted(), | ||
218 | init_utsname()->release, | ||
219 | (int)strcspn(init_utsname()->version, " "), | ||
220 | init_utsname()->version); | ||
221 | show_trace(NULL, NULL, &stack, bp); | ||
222 | } | ||
223 | |||
224 | EXPORT_SYMBOL(dump_stack); | ||
225 | |||
226 | void show_registers(struct pt_regs *regs) | ||
227 | { | ||
228 | int i; | ||
229 | |||
230 | print_modules(); | ||
231 | __show_regs(regs, 0); | ||
232 | |||
233 | printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n", | ||
234 | TASK_COMM_LEN, current->comm, task_pid_nr(current), | ||
235 | current_thread_info(), current, task_thread_info(current)); | ||
236 | /* | ||
237 | * When in-kernel, we also print out the stack and code at the | ||
238 | * time of the fault.. | ||
239 | */ | ||
240 | if (!user_mode_vm(regs)) { | ||
241 | unsigned int code_prologue = code_bytes * 43 / 64; | ||
242 | unsigned int code_len = code_bytes; | ||
243 | unsigned char c; | ||
244 | u8 *ip; | ||
245 | |||
246 | printk(KERN_EMERG "Stack:\n"); | ||
247 | show_stack_log_lvl(NULL, regs, ®s->sp, | ||
248 | 0, KERN_EMERG); | ||
249 | |||
250 | printk(KERN_EMERG "Code: "); | ||
251 | |||
252 | ip = (u8 *)regs->ip - code_prologue; | ||
253 | if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { | ||
254 | /* try starting at IP */ | ||
255 | ip = (u8 *)regs->ip; | ||
256 | code_len = code_len - code_prologue + 1; | ||
257 | } | ||
258 | for (i = 0; i < code_len; i++, ip++) { | ||
259 | if (ip < (u8 *)PAGE_OFFSET || | ||
260 | probe_kernel_address(ip, c)) { | ||
261 | printk(" Bad EIP value."); | ||
262 | break; | ||
263 | } | ||
264 | if (ip == (u8 *)regs->ip) | ||
265 | printk("<%02x> ", c); | ||
266 | else | ||
267 | printk("%02x ", c); | ||
268 | } | ||
269 | } | ||
270 | printk("\n"); | ||
271 | } | ||
272 | |||
273 | int is_valid_bugaddr(unsigned long ip) | ||
274 | { | ||
275 | unsigned short ud2; | ||
276 | |||
277 | if (ip < PAGE_OFFSET) | ||
278 | return 0; | ||
279 | if (probe_kernel_address((unsigned short *)ip, ud2)) | ||
280 | return 0; | ||
281 | |||
282 | return ud2 == 0x0b0f; | ||
283 | } | ||
284 | |||
285 | static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; | ||
286 | static int die_owner = -1; | ||
287 | static unsigned int die_nest_count; | ||
288 | |||
289 | unsigned __kprobes long oops_begin(void) | ||
290 | { | ||
291 | unsigned long flags; | ||
292 | |||
293 | oops_enter(); | ||
294 | |||
295 | if (die_owner != raw_smp_processor_id()) { | ||
296 | console_verbose(); | ||
297 | raw_local_irq_save(flags); | ||
298 | __raw_spin_lock(&die_lock); | ||
299 | die_owner = smp_processor_id(); | ||
300 | die_nest_count = 0; | ||
301 | bust_spinlocks(1); | ||
302 | } else { | ||
303 | raw_local_irq_save(flags); | ||
304 | } | ||
305 | die_nest_count++; | ||
306 | return flags; | ||
307 | } | ||
308 | |||
309 | void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) | ||
310 | { | ||
311 | bust_spinlocks(0); | ||
312 | die_owner = -1; | ||
313 | add_taint(TAINT_DIE); | ||
314 | __raw_spin_unlock(&die_lock); | ||
315 | raw_local_irq_restore(flags); | ||
316 | |||
317 | if (!regs) | ||
318 | return; | ||
319 | |||
320 | if (kexec_should_crash(current)) | ||
321 | crash_kexec(regs); | ||
322 | if (in_interrupt()) | ||
323 | panic("Fatal exception in interrupt"); | ||
324 | if (panic_on_oops) | ||
325 | panic("Fatal exception"); | ||
326 | oops_exit(); | ||
327 | do_exit(signr); | ||
328 | } | ||
329 | |||
330 | int __kprobes __die(const char *str, struct pt_regs *regs, long err) | ||
331 | { | ||
332 | unsigned short ss; | ||
333 | unsigned long sp; | ||
334 | |||
335 | printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); | ||
336 | #ifdef CONFIG_PREEMPT | ||
337 | printk("PREEMPT "); | ||
338 | #endif | ||
339 | #ifdef CONFIG_SMP | ||
340 | printk("SMP "); | ||
341 | #endif | ||
342 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
343 | printk("DEBUG_PAGEALLOC"); | ||
344 | #endif | ||
345 | printk("\n"); | ||
346 | if (notify_die(DIE_OOPS, str, regs, err, | ||
347 | current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) | ||
348 | return 1; | ||
349 | |||
350 | show_registers(regs); | ||
351 | /* Executive summary in case the oops scrolled away */ | ||
352 | sp = (unsigned long) (®s->sp); | ||
353 | savesegment(ss, ss); | ||
354 | if (user_mode(regs)) { | ||
355 | sp = regs->sp; | ||
356 | ss = regs->ss & 0xffff; | ||
357 | } | ||
358 | printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip); | ||
359 | print_symbol("%s", regs->ip); | ||
360 | printk(" SS:ESP %04x:%08lx\n", ss, sp); | ||
361 | return 0; | ||
362 | } | ||
363 | |||
364 | /* | ||
365 | * This is gone through when something in the kernel has done something bad | ||
366 | * and is about to be terminated: | ||
367 | */ | ||
368 | void die(const char *str, struct pt_regs *regs, long err) | ||
369 | { | ||
370 | unsigned long flags = oops_begin(); | ||
371 | |||
372 | if (die_nest_count < 3) { | ||
373 | report_bug(regs->ip, regs); | ||
374 | |||
375 | if (__die(str, regs, err)) | ||
376 | regs = NULL; | ||
377 | } else { | ||
378 | printk(KERN_EMERG "Recursive die() failure, output suppressed\n"); | ||
379 | } | ||
380 | |||
381 | oops_end(flags, regs, SIGSEGV); | ||
382 | } | ||
383 | |||
384 | static DEFINE_SPINLOCK(nmi_print_lock); | ||
385 | |||
386 | void notrace __kprobes | ||
387 | die_nmi(char *str, struct pt_regs *regs, int do_panic) | ||
388 | { | ||
389 | if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) | ||
390 | return; | ||
391 | |||
392 | spin_lock(&nmi_print_lock); | ||
393 | /* | ||
394 | * We are in trouble anyway, lets at least try | ||
395 | * to get a message out: | ||
396 | */ | ||
397 | bust_spinlocks(1); | ||
398 | printk(KERN_EMERG "%s", str); | ||
399 | printk(" on CPU%d, ip %08lx, registers:\n", | ||
400 | smp_processor_id(), regs->ip); | ||
401 | show_registers(regs); | ||
402 | if (do_panic) | ||
403 | panic("Non maskable interrupt"); | ||
404 | console_silent(); | ||
405 | spin_unlock(&nmi_print_lock); | ||
406 | bust_spinlocks(0); | ||
407 | |||
408 | /* | ||
409 | * If we are in kernel we are probably nested up pretty bad | ||
410 | * and might aswell get out now while we still can: | ||
411 | */ | ||
412 | if (!user_mode_vm(regs)) { | ||
413 | current->thread.trap_no = 2; | ||
414 | crash_kexec(regs); | ||
415 | } | ||
416 | |||
417 | do_exit(SIGSEGV); | ||
418 | } | ||
419 | |||
420 | static int __init oops_setup(char *s) | ||
421 | { | ||
422 | if (!s) | ||
423 | return -EINVAL; | ||
424 | if (!strcmp(s, "panic")) | ||
425 | panic_on_oops = 1; | ||
426 | return 0; | ||
427 | } | ||
428 | early_param("oops", oops_setup); | ||
429 | |||
430 | static int __init kstack_setup(char *s) | ||
431 | { | ||
432 | if (!s) | ||
433 | return -EINVAL; | ||
434 | kstack_depth_to_print = simple_strtoul(s, NULL, 0); | ||
435 | return 0; | ||
436 | } | ||
437 | early_param("kstack", kstack_setup); | ||
438 | |||
439 | static int __init code_bytes_setup(char *s) | ||
440 | { | ||
441 | code_bytes = simple_strtoul(s, NULL, 0); | ||
442 | if (code_bytes > 8192) | ||
443 | code_bytes = 8192; | ||
444 | |||
445 | return 1; | ||
446 | } | ||
447 | __setup("code_bytes=", code_bytes_setup); | ||
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c new file mode 100644 index 000000000000..086cc8118e39 --- /dev/null +++ b/arch/x86/kernel/dumpstack_64.c | |||
@@ -0,0 +1,573 @@ | |||
1 | /* | ||
2 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
3 | * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs | ||
4 | */ | ||
5 | #include <linux/kallsyms.h> | ||
6 | #include <linux/kprobes.h> | ||
7 | #include <linux/uaccess.h> | ||
8 | #include <linux/utsname.h> | ||
9 | #include <linux/hardirq.h> | ||
10 | #include <linux/kdebug.h> | ||
11 | #include <linux/module.h> | ||
12 | #include <linux/ptrace.h> | ||
13 | #include <linux/kexec.h> | ||
14 | #include <linux/bug.h> | ||
15 | #include <linux/nmi.h> | ||
16 | |||
17 | #include <asm/stacktrace.h> | ||
18 | |||
19 | #define STACKSLOTS_PER_LINE 4 | ||
20 | #define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) | ||
21 | |||
22 | int panic_on_unrecovered_nmi; | ||
23 | int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; | ||
24 | static unsigned int code_bytes = 64; | ||
25 | static int die_counter; | ||
26 | |||
27 | void printk_address(unsigned long address, int reliable) | ||
28 | { | ||
29 | printk(" [<%p>] %s%pS\n", (void *) address, | ||
30 | reliable ? "" : "? ", (void *) address); | ||
31 | } | ||
32 | |||
33 | static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | ||
34 | unsigned *usedp, char **idp) | ||
35 | { | ||
36 | static char ids[][8] = { | ||
37 | [DEBUG_STACK - 1] = "#DB", | ||
38 | [NMI_STACK - 1] = "NMI", | ||
39 | [DOUBLEFAULT_STACK - 1] = "#DF", | ||
40 | [STACKFAULT_STACK - 1] = "#SS", | ||
41 | [MCE_STACK - 1] = "#MC", | ||
42 | #if DEBUG_STKSZ > EXCEPTION_STKSZ | ||
43 | [N_EXCEPTION_STACKS ... | ||
44 | N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]" | ||
45 | #endif | ||
46 | }; | ||
47 | unsigned k; | ||
48 | |||
49 | /* | ||
50 | * Iterate over all exception stacks, and figure out whether | ||
51 | * 'stack' is in one of them: | ||
52 | */ | ||
53 | for (k = 0; k < N_EXCEPTION_STACKS; k++) { | ||
54 | unsigned long end = per_cpu(orig_ist, cpu).ist[k]; | ||
55 | /* | ||
56 | * Is 'stack' above this exception frame's end? | ||
57 | * If yes then skip to the next frame. | ||
58 | */ | ||
59 | if (stack >= end) | ||
60 | continue; | ||
61 | /* | ||
62 | * Is 'stack' above this exception frame's start address? | ||
63 | * If yes then we found the right frame. | ||
64 | */ | ||
65 | if (stack >= end - EXCEPTION_STKSZ) { | ||
66 | /* | ||
67 | * Make sure we only iterate through an exception | ||
68 | * stack once. If it comes up for the second time | ||
69 | * then there's something wrong going on - just | ||
70 | * break out and return NULL: | ||
71 | */ | ||
72 | if (*usedp & (1U << k)) | ||
73 | break; | ||
74 | *usedp |= 1U << k; | ||
75 | *idp = ids[k]; | ||
76 | return (unsigned long *)end; | ||
77 | } | ||
78 | /* | ||
79 | * If this is a debug stack, and if it has a larger size than | ||
80 | * the usual exception stacks, then 'stack' might still | ||
81 | * be within the lower portion of the debug stack: | ||
82 | */ | ||
83 | #if DEBUG_STKSZ > EXCEPTION_STKSZ | ||
84 | if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) { | ||
85 | unsigned j = N_EXCEPTION_STACKS - 1; | ||
86 | |||
87 | /* | ||
88 | * Black magic. A large debug stack is composed of | ||
89 | * multiple exception stack entries, which we | ||
90 | * iterate through now. Dont look: | ||
91 | */ | ||
92 | do { | ||
93 | ++j; | ||
94 | end -= EXCEPTION_STKSZ; | ||
95 | ids[j][4] = '1' + (j - N_EXCEPTION_STACKS); | ||
96 | } while (stack < end - EXCEPTION_STKSZ); | ||
97 | if (*usedp & (1U << j)) | ||
98 | break; | ||
99 | *usedp |= 1U << j; | ||
100 | *idp = ids[j]; | ||
101 | return (unsigned long *)end; | ||
102 | } | ||
103 | #endif | ||
104 | } | ||
105 | return NULL; | ||
106 | } | ||
107 | |||
108 | /* | ||
109 | * x86-64 can have up to three kernel stacks: | ||
110 | * process stack | ||
111 | * interrupt stack | ||
112 | * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack | ||
113 | */ | ||
114 | |||
115 | static inline int valid_stack_ptr(struct thread_info *tinfo, | ||
116 | void *p, unsigned int size, void *end) | ||
117 | { | ||
118 | void *t = tinfo; | ||
119 | if (end) { | ||
120 | if (p < end && p >= (end-THREAD_SIZE)) | ||
121 | return 1; | ||
122 | else | ||
123 | return 0; | ||
124 | } | ||
125 | return p > t && p < t + THREAD_SIZE - size; | ||
126 | } | ||
127 | |||
128 | /* The form of the top of the frame on the stack */ | ||
129 | struct stack_frame { | ||
130 | struct stack_frame *next_frame; | ||
131 | unsigned long return_address; | ||
132 | }; | ||
133 | |||
134 | static inline unsigned long | ||
135 | print_context_stack(struct thread_info *tinfo, | ||
136 | unsigned long *stack, unsigned long bp, | ||
137 | const struct stacktrace_ops *ops, void *data, | ||
138 | unsigned long *end) | ||
139 | { | ||
140 | struct stack_frame *frame = (struct stack_frame *)bp; | ||
141 | |||
142 | while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) { | ||
143 | unsigned long addr; | ||
144 | |||
145 | addr = *stack; | ||
146 | if (__kernel_text_address(addr)) { | ||
147 | if ((unsigned long) stack == bp + sizeof(long)) { | ||
148 | ops->address(data, addr, 1); | ||
149 | frame = frame->next_frame; | ||
150 | bp = (unsigned long) frame; | ||
151 | } else { | ||
152 | ops->address(data, addr, bp == 0); | ||
153 | } | ||
154 | } | ||
155 | stack++; | ||
156 | } | ||
157 | return bp; | ||
158 | } | ||
159 | |||
160 | void dump_trace(struct task_struct *task, struct pt_regs *regs, | ||
161 | unsigned long *stack, unsigned long bp, | ||
162 | const struct stacktrace_ops *ops, void *data) | ||
163 | { | ||
164 | const unsigned cpu = get_cpu(); | ||
165 | unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; | ||
166 | unsigned used = 0; | ||
167 | struct thread_info *tinfo; | ||
168 | |||
169 | if (!task) | ||
170 | task = current; | ||
171 | |||
172 | if (!stack) { | ||
173 | unsigned long dummy; | ||
174 | stack = &dummy; | ||
175 | if (task && task != current) | ||
176 | stack = (unsigned long *)task->thread.sp; | ||
177 | } | ||
178 | |||
179 | #ifdef CONFIG_FRAME_POINTER | ||
180 | if (!bp) { | ||
181 | if (task == current) { | ||
182 | /* Grab bp right from our regs */ | ||
183 | get_bp(bp); | ||
184 | } else { | ||
185 | /* bp is the last reg pushed by switch_to */ | ||
186 | bp = *(unsigned long *) task->thread.sp; | ||
187 | } | ||
188 | } | ||
189 | #endif | ||
190 | |||
191 | /* | ||
192 | * Print function call entries in all stacks, starting at the | ||
193 | * current stack address. If the stacks consist of nested | ||
194 | * exceptions | ||
195 | */ | ||
196 | tinfo = task_thread_info(task); | ||
197 | for (;;) { | ||
198 | char *id; | ||
199 | unsigned long *estack_end; | ||
200 | estack_end = in_exception_stack(cpu, (unsigned long)stack, | ||
201 | &used, &id); | ||
202 | |||
203 | if (estack_end) { | ||
204 | if (ops->stack(data, id) < 0) | ||
205 | break; | ||
206 | |||
207 | bp = print_context_stack(tinfo, stack, bp, ops, | ||
208 | data, estack_end); | ||
209 | ops->stack(data, "<EOE>"); | ||
210 | /* | ||
211 | * We link to the next stack via the | ||
212 | * second-to-last pointer (index -2 to end) in the | ||
213 | * exception stack: | ||
214 | */ | ||
215 | stack = (unsigned long *) estack_end[-2]; | ||
216 | continue; | ||
217 | } | ||
218 | if (irqstack_end) { | ||
219 | unsigned long *irqstack; | ||
220 | irqstack = irqstack_end - | ||
221 | (IRQSTACKSIZE - 64) / sizeof(*irqstack); | ||
222 | |||
223 | if (stack >= irqstack && stack < irqstack_end) { | ||
224 | if (ops->stack(data, "IRQ") < 0) | ||
225 | break; | ||
226 | bp = print_context_stack(tinfo, stack, bp, | ||
227 | ops, data, irqstack_end); | ||
228 | /* | ||
229 | * We link to the next stack (which would be | ||
230 | * the process stack normally) the last | ||
231 | * pointer (index -1 to end) in the IRQ stack: | ||
232 | */ | ||
233 | stack = (unsigned long *) (irqstack_end[-1]); | ||
234 | irqstack_end = NULL; | ||
235 | ops->stack(data, "EOI"); | ||
236 | continue; | ||
237 | } | ||
238 | } | ||
239 | break; | ||
240 | } | ||
241 | |||
242 | /* | ||
243 | * This handles the process stack: | ||
244 | */ | ||
245 | bp = print_context_stack(tinfo, stack, bp, ops, data, NULL); | ||
246 | put_cpu(); | ||
247 | } | ||
248 | EXPORT_SYMBOL(dump_trace); | ||
249 | |||
250 | static void | ||
251 | print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) | ||
252 | { | ||
253 | printk(data); | ||
254 | print_symbol(msg, symbol); | ||
255 | printk("\n"); | ||
256 | } | ||
257 | |||
258 | static void print_trace_warning(void *data, char *msg) | ||
259 | { | ||
260 | printk("%s%s\n", (char *)data, msg); | ||
261 | } | ||
262 | |||
263 | static int print_trace_stack(void *data, char *name) | ||
264 | { | ||
265 | printk("%s <%s> ", (char *)data, name); | ||
266 | return 0; | ||
267 | } | ||
268 | |||
269 | /* | ||
270 | * Print one address/symbol entries per line. | ||
271 | */ | ||
272 | static void print_trace_address(void *data, unsigned long addr, int reliable) | ||
273 | { | ||
274 | touch_nmi_watchdog(); | ||
275 | printk(data); | ||
276 | printk_address(addr, reliable); | ||
277 | } | ||
278 | |||
279 | static const struct stacktrace_ops print_trace_ops = { | ||
280 | .warning = print_trace_warning, | ||
281 | .warning_symbol = print_trace_warning_symbol, | ||
282 | .stack = print_trace_stack, | ||
283 | .address = print_trace_address, | ||
284 | }; | ||
285 | |||
286 | static void | ||
287 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
288 | unsigned long *stack, unsigned long bp, char *log_lvl) | ||
289 | { | ||
290 | printk("%sCall Trace:\n", log_lvl); | ||
291 | dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); | ||
292 | } | ||
293 | |||
294 | void show_trace(struct task_struct *task, struct pt_regs *regs, | ||
295 | unsigned long *stack, unsigned long bp) | ||
296 | { | ||
297 | show_trace_log_lvl(task, regs, stack, bp, ""); | ||
298 | } | ||
299 | |||
300 | static void | ||
301 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
302 | unsigned long *sp, unsigned long bp, char *log_lvl) | ||
303 | { | ||
304 | unsigned long *stack; | ||
305 | int i; | ||
306 | const int cpu = smp_processor_id(); | ||
307 | unsigned long *irqstack_end = | ||
308 | (unsigned long *) (cpu_pda(cpu)->irqstackptr); | ||
309 | unsigned long *irqstack = | ||
310 | (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE); | ||
311 | |||
312 | /* | ||
313 | * debugging aid: "show_stack(NULL, NULL);" prints the | ||
314 | * back trace for this cpu. | ||
315 | */ | ||
316 | |||
317 | if (sp == NULL) { | ||
318 | if (task) | ||
319 | sp = (unsigned long *)task->thread.sp; | ||
320 | else | ||
321 | sp = (unsigned long *)&sp; | ||
322 | } | ||
323 | |||
324 | stack = sp; | ||
325 | for (i = 0; i < kstack_depth_to_print; i++) { | ||
326 | if (stack >= irqstack && stack <= irqstack_end) { | ||
327 | if (stack == irqstack_end) { | ||
328 | stack = (unsigned long *) (irqstack_end[-1]); | ||
329 | printk(" <EOI> "); | ||
330 | } | ||
331 | } else { | ||
332 | if (((long) stack & (THREAD_SIZE-1)) == 0) | ||
333 | break; | ||
334 | } | ||
335 | if (i && ((i % STACKSLOTS_PER_LINE) == 0)) | ||
336 | printk("\n%s", log_lvl); | ||
337 | printk(" %016lx", *stack++); | ||
338 | touch_nmi_watchdog(); | ||
339 | } | ||
340 | printk("\n"); | ||
341 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); | ||
342 | } | ||
343 | |||
344 | void show_stack(struct task_struct *task, unsigned long *sp) | ||
345 | { | ||
346 | show_stack_log_lvl(task, NULL, sp, 0, ""); | ||
347 | } | ||
348 | |||
349 | /* | ||
350 | * The architecture-independent dump_stack generator | ||
351 | */ | ||
352 | void dump_stack(void) | ||
353 | { | ||
354 | unsigned long bp = 0; | ||
355 | unsigned long stack; | ||
356 | |||
357 | #ifdef CONFIG_FRAME_POINTER | ||
358 | if (!bp) | ||
359 | get_bp(bp); | ||
360 | #endif | ||
361 | |||
362 | printk("Pid: %d, comm: %.20s %s %s %.*s\n", | ||
363 | current->pid, current->comm, print_tainted(), | ||
364 | init_utsname()->release, | ||
365 | (int)strcspn(init_utsname()->version, " "), | ||
366 | init_utsname()->version); | ||
367 | show_trace(NULL, NULL, &stack, bp); | ||
368 | } | ||
369 | EXPORT_SYMBOL(dump_stack); | ||
370 | |||
371 | void show_registers(struct pt_regs *regs) | ||
372 | { | ||
373 | int i; | ||
374 | unsigned long sp; | ||
375 | const int cpu = smp_processor_id(); | ||
376 | struct task_struct *cur = cpu_pda(cpu)->pcurrent; | ||
377 | |||
378 | sp = regs->sp; | ||
379 | printk("CPU %d ", cpu); | ||
380 | __show_regs(regs, 1); | ||
381 | printk("Process %s (pid: %d, threadinfo %p, task %p)\n", | ||
382 | cur->comm, cur->pid, task_thread_info(cur), cur); | ||
383 | |||
384 | /* | ||
385 | * When in-kernel, we also print out the stack and code at the | ||
386 | * time of the fault.. | ||
387 | */ | ||
388 | if (!user_mode(regs)) { | ||
389 | unsigned int code_prologue = code_bytes * 43 / 64; | ||
390 | unsigned int code_len = code_bytes; | ||
391 | unsigned char c; | ||
392 | u8 *ip; | ||
393 | |||
394 | printk(KERN_EMERG "Stack:\n"); | ||
395 | show_stack_log_lvl(NULL, regs, (unsigned long *)sp, | ||
396 | regs->bp, KERN_EMERG); | ||
397 | |||
398 | printk(KERN_EMERG "Code: "); | ||
399 | |||
400 | ip = (u8 *)regs->ip - code_prologue; | ||
401 | if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { | ||
402 | /* try starting at IP */ | ||
403 | ip = (u8 *)regs->ip; | ||
404 | code_len = code_len - code_prologue + 1; | ||
405 | } | ||
406 | for (i = 0; i < code_len; i++, ip++) { | ||
407 | if (ip < (u8 *)PAGE_OFFSET || | ||
408 | probe_kernel_address(ip, c)) { | ||
409 | printk(" Bad RIP value."); | ||
410 | break; | ||
411 | } | ||
412 | if (ip == (u8 *)regs->ip) | ||
413 | printk("<%02x> ", c); | ||
414 | else | ||
415 | printk("%02x ", c); | ||
416 | } | ||
417 | } | ||
418 | printk("\n"); | ||
419 | } | ||
420 | |||
421 | int is_valid_bugaddr(unsigned long ip) | ||
422 | { | ||
423 | unsigned short ud2; | ||
424 | |||
425 | if (__copy_from_user(&ud2, (const void __user *) ip, sizeof(ud2))) | ||
426 | return 0; | ||
427 | |||
428 | return ud2 == 0x0b0f; | ||
429 | } | ||
430 | |||
431 | static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; | ||
432 | static int die_owner = -1; | ||
433 | static unsigned int die_nest_count; | ||
434 | |||
435 | unsigned __kprobes long oops_begin(void) | ||
436 | { | ||
437 | int cpu; | ||
438 | unsigned long flags; | ||
439 | |||
440 | oops_enter(); | ||
441 | |||
442 | /* racy, but better than risking deadlock. */ | ||
443 | raw_local_irq_save(flags); | ||
444 | cpu = smp_processor_id(); | ||
445 | if (!__raw_spin_trylock(&die_lock)) { | ||
446 | if (cpu == die_owner) | ||
447 | /* nested oops. should stop eventually */; | ||
448 | else | ||
449 | __raw_spin_lock(&die_lock); | ||
450 | } | ||
451 | die_nest_count++; | ||
452 | die_owner = cpu; | ||
453 | console_verbose(); | ||
454 | bust_spinlocks(1); | ||
455 | return flags; | ||
456 | } | ||
457 | |||
458 | void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) | ||
459 | { | ||
460 | die_owner = -1; | ||
461 | bust_spinlocks(0); | ||
462 | die_nest_count--; | ||
463 | if (!die_nest_count) | ||
464 | /* Nest count reaches zero, release the lock. */ | ||
465 | __raw_spin_unlock(&die_lock); | ||
466 | raw_local_irq_restore(flags); | ||
467 | if (!regs) { | ||
468 | oops_exit(); | ||
469 | return; | ||
470 | } | ||
471 | if (in_interrupt()) | ||
472 | panic("Fatal exception in interrupt"); | ||
473 | if (panic_on_oops) | ||
474 | panic("Fatal exception"); | ||
475 | oops_exit(); | ||
476 | do_exit(signr); | ||
477 | } | ||
478 | |||
479 | int __kprobes __die(const char *str, struct pt_regs *regs, long err) | ||
480 | { | ||
481 | printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); | ||
482 | #ifdef CONFIG_PREEMPT | ||
483 | printk("PREEMPT "); | ||
484 | #endif | ||
485 | #ifdef CONFIG_SMP | ||
486 | printk("SMP "); | ||
487 | #endif | ||
488 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
489 | printk("DEBUG_PAGEALLOC"); | ||
490 | #endif | ||
491 | printk("\n"); | ||
492 | if (notify_die(DIE_OOPS, str, regs, err, | ||
493 | current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) | ||
494 | return 1; | ||
495 | |||
496 | show_registers(regs); | ||
497 | add_taint(TAINT_DIE); | ||
498 | /* Executive summary in case the oops scrolled away */ | ||
499 | printk(KERN_ALERT "RIP "); | ||
500 | printk_address(regs->ip, 1); | ||
501 | printk(" RSP <%016lx>\n", regs->sp); | ||
502 | if (kexec_should_crash(current)) | ||
503 | crash_kexec(regs); | ||
504 | return 0; | ||
505 | } | ||
506 | |||
507 | void die(const char *str, struct pt_regs *regs, long err) | ||
508 | { | ||
509 | unsigned long flags = oops_begin(); | ||
510 | |||
511 | if (!user_mode(regs)) | ||
512 | report_bug(regs->ip, regs); | ||
513 | |||
514 | if (__die(str, regs, err)) | ||
515 | regs = NULL; | ||
516 | oops_end(flags, regs, SIGSEGV); | ||
517 | } | ||
518 | |||
519 | notrace __kprobes void | ||
520 | die_nmi(char *str, struct pt_regs *regs, int do_panic) | ||
521 | { | ||
522 | unsigned long flags; | ||
523 | |||
524 | if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) | ||
525 | return; | ||
526 | |||
527 | flags = oops_begin(); | ||
528 | /* | ||
529 | * We are in trouble anyway, lets at least try | ||
530 | * to get a message out. | ||
531 | */ | ||
532 | printk(KERN_EMERG "%s", str); | ||
533 | printk(" on CPU%d, ip %08lx, registers:\n", | ||
534 | smp_processor_id(), regs->ip); | ||
535 | show_registers(regs); | ||
536 | if (kexec_should_crash(current)) | ||
537 | crash_kexec(regs); | ||
538 | if (do_panic || panic_on_oops) | ||
539 | panic("Non maskable interrupt"); | ||
540 | oops_end(flags, NULL, SIGBUS); | ||
541 | nmi_exit(); | ||
542 | local_irq_enable(); | ||
543 | do_exit(SIGBUS); | ||
544 | } | ||
545 | |||
546 | static int __init oops_setup(char *s) | ||
547 | { | ||
548 | if (!s) | ||
549 | return -EINVAL; | ||
550 | if (!strcmp(s, "panic")) | ||
551 | panic_on_oops = 1; | ||
552 | return 0; | ||
553 | } | ||
554 | early_param("oops", oops_setup); | ||
555 | |||
556 | static int __init kstack_setup(char *s) | ||
557 | { | ||
558 | if (!s) | ||
559 | return -EINVAL; | ||
560 | kstack_depth_to_print = simple_strtoul(s, NULL, 0); | ||
561 | return 0; | ||
562 | } | ||
563 | early_param("kstack", kstack_setup); | ||
564 | |||
565 | static int __init code_bytes_setup(char *s) | ||
566 | { | ||
567 | code_bytes = simple_strtoul(s, NULL, 0); | ||
568 | if (code_bytes > 8192) | ||
569 | code_bytes = 8192; | ||
570 | |||
571 | return 1; | ||
572 | } | ||
573 | __setup("code_bytes=", code_bytes_setup); | ||
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 109792bc7cfa..b21fbfaffe39 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -730,6 +730,7 @@ error_code: | |||
730 | movl $(__USER_DS), %ecx | 730 | movl $(__USER_DS), %ecx |
731 | movl %ecx, %ds | 731 | movl %ecx, %ds |
732 | movl %ecx, %es | 732 | movl %ecx, %es |
733 | TRACE_IRQS_OFF | ||
733 | movl %esp,%eax # pt_regs pointer | 734 | movl %esp,%eax # pt_regs pointer |
734 | call *%edi | 735 | call *%edi |
735 | jmp ret_from_exception | 736 | jmp ret_from_exception |
@@ -760,20 +761,9 @@ ENTRY(device_not_available) | |||
760 | RING0_INT_FRAME | 761 | RING0_INT_FRAME |
761 | pushl $-1 # mark this as an int | 762 | pushl $-1 # mark this as an int |
762 | CFI_ADJUST_CFA_OFFSET 4 | 763 | CFI_ADJUST_CFA_OFFSET 4 |
763 | SAVE_ALL | 764 | pushl $do_device_not_available |
764 | GET_CR0_INTO_EAX | ||
765 | testl $0x4, %eax # EM (math emulation bit) | ||
766 | jne device_not_available_emulate | ||
767 | preempt_stop(CLBR_ANY) | ||
768 | call math_state_restore | ||
769 | jmp ret_from_exception | ||
770 | device_not_available_emulate: | ||
771 | pushl $0 # temporary storage for ORIG_EIP | ||
772 | CFI_ADJUST_CFA_OFFSET 4 | 765 | CFI_ADJUST_CFA_OFFSET 4 |
773 | call math_emulate | 766 | jmp error_code |
774 | addl $4, %esp | ||
775 | CFI_ADJUST_CFA_OFFSET -4 | ||
776 | jmp ret_from_exception | ||
777 | CFI_ENDPROC | 767 | CFI_ENDPROC |
778 | END(device_not_available) | 768 | END(device_not_available) |
779 | 769 | ||
@@ -814,6 +804,7 @@ debug_stack_correct: | |||
814 | pushl $-1 # mark this as an int | 804 | pushl $-1 # mark this as an int |
815 | CFI_ADJUST_CFA_OFFSET 4 | 805 | CFI_ADJUST_CFA_OFFSET 4 |
816 | SAVE_ALL | 806 | SAVE_ALL |
807 | TRACE_IRQS_OFF | ||
817 | xorl %edx,%edx # error code 0 | 808 | xorl %edx,%edx # error code 0 |
818 | movl %esp,%eax # pt_regs pointer | 809 | movl %esp,%eax # pt_regs pointer |
819 | call do_debug | 810 | call do_debug |
@@ -858,6 +849,7 @@ nmi_stack_correct: | |||
858 | pushl %eax | 849 | pushl %eax |
859 | CFI_ADJUST_CFA_OFFSET 4 | 850 | CFI_ADJUST_CFA_OFFSET 4 |
860 | SAVE_ALL | 851 | SAVE_ALL |
852 | TRACE_IRQS_OFF | ||
861 | xorl %edx,%edx # zero error code | 853 | xorl %edx,%edx # zero error code |
862 | movl %esp,%eax # pt_regs pointer | 854 | movl %esp,%eax # pt_regs pointer |
863 | call do_nmi | 855 | call do_nmi |
@@ -898,6 +890,7 @@ nmi_espfix_stack: | |||
898 | pushl %eax | 890 | pushl %eax |
899 | CFI_ADJUST_CFA_OFFSET 4 | 891 | CFI_ADJUST_CFA_OFFSET 4 |
900 | SAVE_ALL | 892 | SAVE_ALL |
893 | TRACE_IRQS_OFF | ||
901 | FIXUP_ESPFIX_STACK # %eax == %esp | 894 | FIXUP_ESPFIX_STACK # %eax == %esp |
902 | xorl %edx,%edx # zero error code | 895 | xorl %edx,%edx # zero error code |
903 | call do_nmi | 896 | call do_nmi |
@@ -928,6 +921,7 @@ KPROBE_ENTRY(int3) | |||
928 | pushl $-1 # mark this as an int | 921 | pushl $-1 # mark this as an int |
929 | CFI_ADJUST_CFA_OFFSET 4 | 922 | CFI_ADJUST_CFA_OFFSET 4 |
930 | SAVE_ALL | 923 | SAVE_ALL |
924 | TRACE_IRQS_OFF | ||
931 | xorl %edx,%edx # zero error code | 925 | xorl %edx,%edx # zero error code |
932 | movl %esp,%eax # pt_regs pointer | 926 | movl %esp,%eax # pt_regs pointer |
933 | call do_int3 | 927 | call do_int3 |
@@ -1030,7 +1024,7 @@ ENTRY(machine_check) | |||
1030 | RING0_INT_FRAME | 1024 | RING0_INT_FRAME |
1031 | pushl $0 | 1025 | pushl $0 |
1032 | CFI_ADJUST_CFA_OFFSET 4 | 1026 | CFI_ADJUST_CFA_OFFSET 4 |
1033 | pushl machine_check_vector | 1027 | pushl $do_machine_check |
1034 | CFI_ADJUST_CFA_OFFSET 4 | 1028 | CFI_ADJUST_CFA_OFFSET 4 |
1035 | jmp error_code | 1029 | jmp error_code |
1036 | CFI_ENDPROC | 1030 | CFI_ENDPROC |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index cf3a0b2d0059..1db6ce4314e1 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -667,6 +667,13 @@ END(stub_rt_sigreturn) | |||
667 | SAVE_ARGS | 667 | SAVE_ARGS |
668 | leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler | 668 | leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler |
669 | pushq %rbp | 669 | pushq %rbp |
670 | /* | ||
671 | * Save rbp twice: One is for marking the stack frame, as usual, and the | ||
672 | * other, to fill pt_regs properly. This is because bx comes right | ||
673 | * before the last saved register in that structure, and not bp. If the | ||
674 | * base pointer were in the place bx is today, this would not be needed. | ||
675 | */ | ||
676 | movq %rbp, -8(%rsp) | ||
670 | CFI_ADJUST_CFA_OFFSET 8 | 677 | CFI_ADJUST_CFA_OFFSET 8 |
671 | CFI_REL_OFFSET rbp, 0 | 678 | CFI_REL_OFFSET rbp, 0 |
672 | movq %rsp,%rbp | 679 | movq %rsp,%rbp |
@@ -932,6 +939,9 @@ END(spurious_interrupt) | |||
932 | .if \ist | 939 | .if \ist |
933 | movq %gs:pda_data_offset, %rbp | 940 | movq %gs:pda_data_offset, %rbp |
934 | .endif | 941 | .endif |
942 | .if \irqtrace | ||
943 | TRACE_IRQS_OFF | ||
944 | .endif | ||
935 | movq %rsp,%rdi | 945 | movq %rsp,%rdi |
936 | movq ORIG_RAX(%rsp),%rsi | 946 | movq ORIG_RAX(%rsp),%rsi |
937 | movq $-1,ORIG_RAX(%rsp) | 947 | movq $-1,ORIG_RAX(%rsp) |
@@ -1058,7 +1068,8 @@ KPROBE_ENTRY(error_entry) | |||
1058 | je error_kernelspace | 1068 | je error_kernelspace |
1059 | error_swapgs: | 1069 | error_swapgs: |
1060 | SWAPGS | 1070 | SWAPGS |
1061 | error_sti: | 1071 | error_sti: |
1072 | TRACE_IRQS_OFF | ||
1062 | movq %rdi,RDI(%rsp) | 1073 | movq %rdi,RDI(%rsp) |
1063 | CFI_REL_OFFSET rdi,RDI | 1074 | CFI_REL_OFFSET rdi,RDI |
1064 | movq %rsp,%rdi | 1075 | movq %rsp,%rdi |
@@ -1232,7 +1243,7 @@ ENTRY(simd_coprocessor_error) | |||
1232 | END(simd_coprocessor_error) | 1243 | END(simd_coprocessor_error) |
1233 | 1244 | ||
1234 | ENTRY(device_not_available) | 1245 | ENTRY(device_not_available) |
1235 | zeroentry math_state_restore | 1246 | zeroentry do_device_not_available |
1236 | END(device_not_available) | 1247 | END(device_not_available) |
1237 | 1248 | ||
1238 | /* runs on exception stack */ | 1249 | /* runs on exception stack */ |
diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c index 849e5cd485b8..f454c78fcef6 100644 --- a/arch/x86/kernel/es7000_32.c +++ b/arch/x86/kernel/es7000_32.c | |||
@@ -109,6 +109,7 @@ struct oem_table { | |||
109 | }; | 109 | }; |
110 | 110 | ||
111 | extern int find_unisys_acpi_oem_table(unsigned long *oem_addr); | 111 | extern int find_unisys_acpi_oem_table(unsigned long *oem_addr); |
112 | extern void unmap_unisys_acpi_oem_table(unsigned long oem_addr); | ||
112 | #endif | 113 | #endif |
113 | 114 | ||
114 | struct mip_reg { | 115 | struct mip_reg { |
@@ -243,21 +244,38 @@ parse_unisys_oem (char *oemptr) | |||
243 | } | 244 | } |
244 | 245 | ||
245 | #ifdef CONFIG_ACPI | 246 | #ifdef CONFIG_ACPI |
246 | int __init | 247 | static unsigned long oem_addrX; |
247 | find_unisys_acpi_oem_table(unsigned long *oem_addr) | 248 | static unsigned long oem_size; |
249 | int __init find_unisys_acpi_oem_table(unsigned long *oem_addr) | ||
248 | { | 250 | { |
249 | struct acpi_table_header *header = NULL; | 251 | struct acpi_table_header *header = NULL; |
250 | int i = 0; | 252 | int i = 0; |
251 | while (ACPI_SUCCESS(acpi_get_table("OEM1", i++, &header))) { | 253 | acpi_size tbl_size; |
254 | |||
255 | while (ACPI_SUCCESS(acpi_get_table_with_size("OEM1", i++, &header, &tbl_size))) { | ||
252 | if (!memcmp((char *) &header->oem_id, "UNISYS", 6)) { | 256 | if (!memcmp((char *) &header->oem_id, "UNISYS", 6)) { |
253 | struct oem_table *t = (struct oem_table *)header; | 257 | struct oem_table *t = (struct oem_table *)header; |
254 | *oem_addr = (unsigned long)__acpi_map_table(t->OEMTableAddr, | 258 | |
255 | t->OEMTableSize); | 259 | oem_addrX = t->OEMTableAddr; |
260 | oem_size = t->OEMTableSize; | ||
261 | early_acpi_os_unmap_memory(header, tbl_size); | ||
262 | |||
263 | *oem_addr = (unsigned long)__acpi_map_table(oem_addrX, | ||
264 | oem_size); | ||
256 | return 0; | 265 | return 0; |
257 | } | 266 | } |
267 | early_acpi_os_unmap_memory(header, tbl_size); | ||
258 | } | 268 | } |
259 | return -1; | 269 | return -1; |
260 | } | 270 | } |
271 | |||
272 | void __init unmap_unisys_acpi_oem_table(unsigned long oem_addr) | ||
273 | { | ||
274 | if (!oem_addr) | ||
275 | return; | ||
276 | |||
277 | __acpi_unmap_table((char *)oem_addr, oem_size); | ||
278 | } | ||
261 | #endif | 279 | #endif |
262 | 280 | ||
263 | static void | 281 | static void |
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index ae2ffc8a400c..33581d94a90e 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c | |||
@@ -114,7 +114,7 @@ static void uv_send_IPI_one(int cpu, int vector) | |||
114 | unsigned long val, apicid, lapicid; | 114 | unsigned long val, apicid, lapicid; |
115 | int pnode; | 115 | int pnode; |
116 | 116 | ||
117 | apicid = per_cpu(x86_cpu_to_apicid, cpu); /* ZZZ - cache node-local ? */ | 117 | apicid = per_cpu(x86_cpu_to_apicid, cpu); |
118 | lapicid = apicid & 0x3f; /* ZZZ macro needed */ | 118 | lapicid = apicid & 0x3f; /* ZZZ macro needed */ |
119 | pnode = uv_apicid_to_pnode(apicid); | 119 | pnode = uv_apicid_to_pnode(apicid); |
120 | val = | 120 | val = |
@@ -202,12 +202,10 @@ static unsigned int phys_pkg_id(int index_msb) | |||
202 | return uv_read_apic_id() >> index_msb; | 202 | return uv_read_apic_id() >> index_msb; |
203 | } | 203 | } |
204 | 204 | ||
205 | #ifdef ZZZ /* Needs x2apic patch */ | ||
206 | static void uv_send_IPI_self(int vector) | 205 | static void uv_send_IPI_self(int vector) |
207 | { | 206 | { |
208 | apic_write(APIC_SELF_IPI, vector); | 207 | apic_write(APIC_SELF_IPI, vector); |
209 | } | 208 | } |
210 | #endif | ||
211 | 209 | ||
212 | struct genapic apic_x2apic_uv_x = { | 210 | struct genapic apic_x2apic_uv_x = { |
213 | .name = "UV large system", | 211 | .name = "UV large system", |
@@ -215,15 +213,15 @@ struct genapic apic_x2apic_uv_x = { | |||
215 | .int_delivery_mode = dest_Fixed, | 213 | .int_delivery_mode = dest_Fixed, |
216 | .int_dest_mode = (APIC_DEST_PHYSICAL != 0), | 214 | .int_dest_mode = (APIC_DEST_PHYSICAL != 0), |
217 | .target_cpus = uv_target_cpus, | 215 | .target_cpus = uv_target_cpus, |
218 | .vector_allocation_domain = uv_vector_allocation_domain,/* Fixme ZZZ */ | 216 | .vector_allocation_domain = uv_vector_allocation_domain, |
219 | .apic_id_registered = uv_apic_id_registered, | 217 | .apic_id_registered = uv_apic_id_registered, |
220 | .init_apic_ldr = uv_init_apic_ldr, | 218 | .init_apic_ldr = uv_init_apic_ldr, |
221 | .send_IPI_all = uv_send_IPI_all, | 219 | .send_IPI_all = uv_send_IPI_all, |
222 | .send_IPI_allbutself = uv_send_IPI_allbutself, | 220 | .send_IPI_allbutself = uv_send_IPI_allbutself, |
223 | .send_IPI_mask = uv_send_IPI_mask, | 221 | .send_IPI_mask = uv_send_IPI_mask, |
224 | /* ZZZ.send_IPI_self = uv_send_IPI_self, */ | 222 | .send_IPI_self = uv_send_IPI_self, |
225 | .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, | 223 | .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, |
226 | .phys_pkg_id = phys_pkg_id, /* Fixme ZZZ */ | 224 | .phys_pkg_id = phys_pkg_id, |
227 | .get_apic_id = get_apic_id, | 225 | .get_apic_id = get_apic_id, |
228 | .set_apic_id = set_apic_id, | 226 | .set_apic_id = set_apic_id, |
229 | .apic_id_mask = (0xFFFFFFFFu), | 227 | .apic_id_mask = (0xFFFFFFFFu), |
@@ -286,12 +284,13 @@ static __init void map_low_mmrs(void) | |||
286 | 284 | ||
287 | enum map_type {map_wb, map_uc}; | 285 | enum map_type {map_wb, map_uc}; |
288 | 286 | ||
289 | static __init void map_high(char *id, unsigned long base, int shift, enum map_type map_type) | 287 | static __init void map_high(char *id, unsigned long base, int shift, |
288 | int max_pnode, enum map_type map_type) | ||
290 | { | 289 | { |
291 | unsigned long bytes, paddr; | 290 | unsigned long bytes, paddr; |
292 | 291 | ||
293 | paddr = base << shift; | 292 | paddr = base << shift; |
294 | bytes = (1UL << shift); | 293 | bytes = (1UL << shift) * (max_pnode + 1); |
295 | printk(KERN_INFO "UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr, | 294 | printk(KERN_INFO "UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr, |
296 | paddr + bytes); | 295 | paddr + bytes); |
297 | if (map_type == map_uc) | 296 | if (map_type == map_uc) |
@@ -307,7 +306,7 @@ static __init void map_gru_high(int max_pnode) | |||
307 | 306 | ||
308 | gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR); | 307 | gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR); |
309 | if (gru.s.enable) | 308 | if (gru.s.enable) |
310 | map_high("GRU", gru.s.base, shift, map_wb); | 309 | map_high("GRU", gru.s.base, shift, max_pnode, map_wb); |
311 | } | 310 | } |
312 | 311 | ||
313 | static __init void map_config_high(int max_pnode) | 312 | static __init void map_config_high(int max_pnode) |
@@ -317,7 +316,7 @@ static __init void map_config_high(int max_pnode) | |||
317 | 316 | ||
318 | cfg.v = uv_read_local_mmr(UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR); | 317 | cfg.v = uv_read_local_mmr(UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR); |
319 | if (cfg.s.enable) | 318 | if (cfg.s.enable) |
320 | map_high("CONFIG", cfg.s.base, shift, map_uc); | 319 | map_high("CONFIG", cfg.s.base, shift, max_pnode, map_uc); |
321 | } | 320 | } |
322 | 321 | ||
323 | static __init void map_mmr_high(int max_pnode) | 322 | static __init void map_mmr_high(int max_pnode) |
@@ -327,7 +326,7 @@ static __init void map_mmr_high(int max_pnode) | |||
327 | 326 | ||
328 | mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR); | 327 | mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR); |
329 | if (mmr.s.enable) | 328 | if (mmr.s.enable) |
330 | map_high("MMR", mmr.s.base, shift, map_uc); | 329 | map_high("MMR", mmr.s.base, shift, max_pnode, map_uc); |
331 | } | 330 | } |
332 | 331 | ||
333 | static __init void map_mmioh_high(int max_pnode) | 332 | static __init void map_mmioh_high(int max_pnode) |
@@ -337,7 +336,7 @@ static __init void map_mmioh_high(int max_pnode) | |||
337 | 336 | ||
338 | mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR); | 337 | mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR); |
339 | if (mmioh.s.enable) | 338 | if (mmioh.s.enable) |
340 | map_high("MMIOH", mmioh.s.base, shift, map_uc); | 339 | map_high("MMIOH", mmioh.s.base, shift, max_pnode, map_uc); |
341 | } | 340 | } |
342 | 341 | ||
343 | static __init void uv_rtc_init(void) | 342 | static __init void uv_rtc_init(void) |
diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c index 3e66bd364a9d..1dcb0f13897e 100644 --- a/arch/x86/kernel/head.c +++ b/arch/x86/kernel/head.c | |||
@@ -35,6 +35,7 @@ void __init reserve_ebda_region(void) | |||
35 | 35 | ||
36 | /* start of EBDA area */ | 36 | /* start of EBDA area */ |
37 | ebda_addr = get_bios_ebda(); | 37 | ebda_addr = get_bios_ebda(); |
38 | printk(KERN_INFO "BIOS EBDA/lowmem at: %08x/%08x\n", ebda_addr, lowmem); | ||
38 | 39 | ||
39 | /* Fixup: bios puts an EBDA in the top 64K segment */ | 40 | /* Fixup: bios puts an EBDA in the top 64K segment */ |
40 | /* of conventional memory, but does not adjust lowmem. */ | 41 | /* of conventional memory, but does not adjust lowmem. */ |
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 73deaffadd03..acf62fc233da 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
@@ -115,13 +115,17 @@ static void hpet_reserve_platform_timers(unsigned long id) | |||
115 | hd.hd_phys_address = hpet_address; | 115 | hd.hd_phys_address = hpet_address; |
116 | hd.hd_address = hpet; | 116 | hd.hd_address = hpet; |
117 | hd.hd_nirqs = nrtimers; | 117 | hd.hd_nirqs = nrtimers; |
118 | hd.hd_flags = HPET_DATA_PLATFORM; | ||
119 | hpet_reserve_timer(&hd, 0); | 118 | hpet_reserve_timer(&hd, 0); |
120 | 119 | ||
121 | #ifdef CONFIG_HPET_EMULATE_RTC | 120 | #ifdef CONFIG_HPET_EMULATE_RTC |
122 | hpet_reserve_timer(&hd, 1); | 121 | hpet_reserve_timer(&hd, 1); |
123 | #endif | 122 | #endif |
124 | 123 | ||
124 | /* | ||
125 | * NOTE that hd_irq[] reflects IOAPIC input pins (LEGACY_8254 | ||
126 | * is wrong for i8259!) not the output IRQ. Many BIOS writers | ||
127 | * don't bother configuring *any* comparator interrupts. | ||
128 | */ | ||
125 | hd.hd_irq[0] = HPET_LEGACY_8254; | 129 | hd.hd_irq[0] = HPET_LEGACY_8254; |
126 | hd.hd_irq[1] = HPET_LEGACY_RTC; | 130 | hd.hd_irq[1] = HPET_LEGACY_RTC; |
127 | 131 | ||
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c index 1f26fd9ec4f4..5b5be9d43c2a 100644 --- a/arch/x86/kernel/irqinit_64.c +++ b/arch/x86/kernel/irqinit_64.c | |||
@@ -135,7 +135,7 @@ DEFINE_PER_CPU(vector_irq_t, vector_irq) = { | |||
135 | [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 | 135 | [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 |
136 | }; | 136 | }; |
137 | 137 | ||
138 | static void __init init_ISA_irqs (void) | 138 | void __init init_ISA_irqs(void) |
139 | { | 139 | { |
140 | int i; | 140 | int i; |
141 | 141 | ||
@@ -164,22 +164,8 @@ static void __init init_ISA_irqs (void) | |||
164 | 164 | ||
165 | void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); | 165 | void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); |
166 | 166 | ||
167 | void __init native_init_IRQ(void) | 167 | static void __init smp_intr_init(void) |
168 | { | 168 | { |
169 | int i; | ||
170 | |||
171 | init_ISA_irqs(); | ||
172 | /* | ||
173 | * Cover the whole vector space, no vector can escape | ||
174 | * us. (some of these will be overridden and become | ||
175 | * 'special' SMP interrupts) | ||
176 | */ | ||
177 | for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { | ||
178 | int vector = FIRST_EXTERNAL_VECTOR + i; | ||
179 | if (vector != IA32_SYSCALL_VECTOR) | ||
180 | set_intr_gate(vector, interrupt[i]); | ||
181 | } | ||
182 | |||
183 | #ifdef CONFIG_SMP | 169 | #ifdef CONFIG_SMP |
184 | /* | 170 | /* |
185 | * The reschedule interrupt is a CPU-to-CPU reschedule-helper | 171 | * The reschedule interrupt is a CPU-to-CPU reschedule-helper |
@@ -207,6 +193,12 @@ void __init native_init_IRQ(void) | |||
207 | /* Low priority IPI to cleanup after moving an irq */ | 193 | /* Low priority IPI to cleanup after moving an irq */ |
208 | set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); | 194 | set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); |
209 | #endif | 195 | #endif |
196 | } | ||
197 | |||
198 | static void __init apic_intr_init(void) | ||
199 | { | ||
200 | smp_intr_init(); | ||
201 | |||
210 | alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); | 202 | alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); |
211 | alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); | 203 | alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); |
212 | 204 | ||
@@ -216,6 +208,25 @@ void __init native_init_IRQ(void) | |||
216 | /* IPI vectors for APIC spurious and error interrupts */ | 208 | /* IPI vectors for APIC spurious and error interrupts */ |
217 | alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); | 209 | alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); |
218 | alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); | 210 | alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); |
211 | } | ||
212 | |||
213 | void __init native_init_IRQ(void) | ||
214 | { | ||
215 | int i; | ||
216 | |||
217 | init_ISA_irqs(); | ||
218 | /* | ||
219 | * Cover the whole vector space, no vector can escape | ||
220 | * us. (some of these will be overridden and become | ||
221 | * 'special' SMP interrupts) | ||
222 | */ | ||
223 | for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { | ||
224 | int vector = FIRST_EXTERNAL_VECTOR + i; | ||
225 | if (vector != IA32_SYSCALL_VECTOR) | ||
226 | set_intr_gate(vector, interrupt[i]); | ||
227 | } | ||
228 | |||
229 | apic_intr_init(); | ||
219 | 230 | ||
220 | if (!acpi_ioapic) | 231 | if (!acpi_ioapic) |
221 | setup_irq(2, &irq2); | 232 | setup_irq(2, &irq2); |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 922c14058f97..0a1302fe6d45 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -123,7 +123,7 @@ void cpu_idle(void) | |||
123 | } | 123 | } |
124 | } | 124 | } |
125 | 125 | ||
126 | void __show_registers(struct pt_regs *regs, int all) | 126 | void __show_regs(struct pt_regs *regs, int all) |
127 | { | 127 | { |
128 | unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; | 128 | unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; |
129 | unsigned long d0, d1, d2, d3, d6, d7; | 129 | unsigned long d0, d1, d2, d3, d6, d7; |
@@ -189,7 +189,7 @@ void __show_registers(struct pt_regs *regs, int all) | |||
189 | 189 | ||
190 | void show_regs(struct pt_regs *regs) | 190 | void show_regs(struct pt_regs *regs) |
191 | { | 191 | { |
192 | __show_registers(regs, 1); | 192 | __show_regs(regs, 1); |
193 | show_trace(NULL, regs, ®s->sp, regs->bp); | 193 | show_trace(NULL, regs, ®s->sp, regs->bp); |
194 | } | 194 | } |
195 | 195 | ||
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ca80394ef5b8..cd8c0ed02b7e 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -136,7 +136,7 @@ void cpu_idle(void) | |||
136 | } | 136 | } |
137 | 137 | ||
138 | /* Prints also some state that isn't saved in the pt_regs */ | 138 | /* Prints also some state that isn't saved in the pt_regs */ |
139 | void __show_regs(struct pt_regs *regs) | 139 | void __show_regs(struct pt_regs *regs, int all) |
140 | { | 140 | { |
141 | unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs; | 141 | unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs; |
142 | unsigned long d0, d1, d2, d3, d6, d7; | 142 | unsigned long d0, d1, d2, d3, d6, d7; |
@@ -175,6 +175,9 @@ void __show_regs(struct pt_regs *regs) | |||
175 | rdmsrl(MSR_GS_BASE, gs); | 175 | rdmsrl(MSR_GS_BASE, gs); |
176 | rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); | 176 | rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); |
177 | 177 | ||
178 | if (!all) | ||
179 | return; | ||
180 | |||
178 | cr0 = read_cr0(); | 181 | cr0 = read_cr0(); |
179 | cr2 = read_cr2(); | 182 | cr2 = read_cr2(); |
180 | cr3 = read_cr3(); | 183 | cr3 = read_cr3(); |
@@ -200,7 +203,7 @@ void __show_regs(struct pt_regs *regs) | |||
200 | void show_regs(struct pt_regs *regs) | 203 | void show_regs(struct pt_regs *regs) |
201 | { | 204 | { |
202 | printk(KERN_INFO "CPU %d:", smp_processor_id()); | 205 | printk(KERN_INFO "CPU %d:", smp_processor_id()); |
203 | __show_regs(regs); | 206 | __show_regs(regs, 1); |
204 | show_trace(NULL, regs, (void *)(regs + 1), regs->bp); | 207 | show_trace(NULL, regs, (void *)(regs + 1), regs->bp); |
205 | } | 208 | } |
206 | 209 | ||
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index d13858818100..f6a11b9b1f98 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c | |||
@@ -354,9 +354,27 @@ static void ati_force_hpet_resume(void) | |||
354 | printk(KERN_DEBUG "Force enabled HPET at resume\n"); | 354 | printk(KERN_DEBUG "Force enabled HPET at resume\n"); |
355 | } | 355 | } |
356 | 356 | ||
357 | static u32 ati_ixp4x0_rev(struct pci_dev *dev) | ||
358 | { | ||
359 | u32 d; | ||
360 | u8 b; | ||
361 | |||
362 | pci_read_config_byte(dev, 0xac, &b); | ||
363 | b &= ~(1<<5); | ||
364 | pci_write_config_byte(dev, 0xac, b); | ||
365 | pci_read_config_dword(dev, 0x70, &d); | ||
366 | d |= 1<<8; | ||
367 | pci_write_config_dword(dev, 0x70, d); | ||
368 | pci_read_config_dword(dev, 0x8, &d); | ||
369 | d &= 0xff; | ||
370 | dev_printk(KERN_DEBUG, &dev->dev, "SB4X0 revision 0x%x\n", d); | ||
371 | return d; | ||
372 | } | ||
373 | |||
357 | static void ati_force_enable_hpet(struct pci_dev *dev) | 374 | static void ati_force_enable_hpet(struct pci_dev *dev) |
358 | { | 375 | { |
359 | u32 uninitialized_var(val); | 376 | u32 d, val; |
377 | u8 b; | ||
360 | 378 | ||
361 | if (hpet_address || force_hpet_address) | 379 | if (hpet_address || force_hpet_address) |
362 | return; | 380 | return; |
@@ -366,14 +384,33 @@ static void ati_force_enable_hpet(struct pci_dev *dev) | |||
366 | return; | 384 | return; |
367 | } | 385 | } |
368 | 386 | ||
387 | d = ati_ixp4x0_rev(dev); | ||
388 | if (d < 0x82) | ||
389 | return; | ||
390 | |||
391 | /* base address */ | ||
369 | pci_write_config_dword(dev, 0x14, 0xfed00000); | 392 | pci_write_config_dword(dev, 0x14, 0xfed00000); |
370 | pci_read_config_dword(dev, 0x14, &val); | 393 | pci_read_config_dword(dev, 0x14, &val); |
394 | |||
395 | /* enable interrupt */ | ||
396 | outb(0x72, 0xcd6); b = inb(0xcd7); | ||
397 | b |= 0x1; | ||
398 | outb(0x72, 0xcd6); outb(b, 0xcd7); | ||
399 | outb(0x72, 0xcd6); b = inb(0xcd7); | ||
400 | if (!(b & 0x1)) | ||
401 | return; | ||
402 | pci_read_config_dword(dev, 0x64, &d); | ||
403 | d |= (1<<10); | ||
404 | pci_write_config_dword(dev, 0x64, d); | ||
405 | pci_read_config_dword(dev, 0x64, &d); | ||
406 | if (!(d & (1<<10))) | ||
407 | return; | ||
408 | |||
371 | force_hpet_address = val; | 409 | force_hpet_address = val; |
372 | force_hpet_resume_type = ATI_FORCE_HPET_RESUME; | 410 | force_hpet_resume_type = ATI_FORCE_HPET_RESUME; |
373 | dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at 0x%lx\n", | 411 | dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at 0x%lx\n", |
374 | force_hpet_address); | 412 | force_hpet_address); |
375 | cached_dev = dev; | 413 | cached_dev = dev; |
376 | return; | ||
377 | } | 414 | } |
378 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP400_SMBUS, | 415 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP400_SMBUS, |
379 | ati_force_enable_hpet); | 416 | ati_force_enable_hpet); |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 21b8e0a59780..2255782e8d4b 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -302,7 +302,7 @@ static void __init relocate_initrd(void) | |||
302 | if (clen > MAX_MAP_CHUNK-slop) | 302 | if (clen > MAX_MAP_CHUNK-slop) |
303 | clen = MAX_MAP_CHUNK-slop; | 303 | clen = MAX_MAP_CHUNK-slop; |
304 | mapaddr = ramdisk_image & PAGE_MASK; | 304 | mapaddr = ramdisk_image & PAGE_MASK; |
305 | p = early_ioremap(mapaddr, clen+slop); | 305 | p = early_memremap(mapaddr, clen+slop); |
306 | memcpy(q, p+slop, clen); | 306 | memcpy(q, p+slop, clen); |
307 | early_iounmap(p, clen+slop); | 307 | early_iounmap(p, clen+slop); |
308 | q += clen; | 308 | q += clen; |
@@ -379,7 +379,7 @@ static void __init parse_setup_data(void) | |||
379 | return; | 379 | return; |
380 | pa_data = boot_params.hdr.setup_data; | 380 | pa_data = boot_params.hdr.setup_data; |
381 | while (pa_data) { | 381 | while (pa_data) { |
382 | data = early_ioremap(pa_data, PAGE_SIZE); | 382 | data = early_memremap(pa_data, PAGE_SIZE); |
383 | switch (data->type) { | 383 | switch (data->type) { |
384 | case SETUP_E820_EXT: | 384 | case SETUP_E820_EXT: |
385 | parse_e820_ext(data, pa_data); | 385 | parse_e820_ext(data, pa_data); |
@@ -402,7 +402,7 @@ static void __init e820_reserve_setup_data(void) | |||
402 | return; | 402 | return; |
403 | pa_data = boot_params.hdr.setup_data; | 403 | pa_data = boot_params.hdr.setup_data; |
404 | while (pa_data) { | 404 | while (pa_data) { |
405 | data = early_ioremap(pa_data, sizeof(*data)); | 405 | data = early_memremap(pa_data, sizeof(*data)); |
406 | e820_update_range(pa_data, sizeof(*data)+data->len, | 406 | e820_update_range(pa_data, sizeof(*data)+data->len, |
407 | E820_RAM, E820_RESERVED_KERN); | 407 | E820_RAM, E820_RESERVED_KERN); |
408 | found = 1; | 408 | found = 1; |
@@ -428,7 +428,7 @@ static void __init reserve_early_setup_data(void) | |||
428 | return; | 428 | return; |
429 | pa_data = boot_params.hdr.setup_data; | 429 | pa_data = boot_params.hdr.setup_data; |
430 | while (pa_data) { | 430 | while (pa_data) { |
431 | data = early_ioremap(pa_data, sizeof(*data)); | 431 | data = early_memremap(pa_data, sizeof(*data)); |
432 | sprintf(buf, "setup data %x", data->type); | 432 | sprintf(buf, "setup data %x", data->type); |
433 | reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf); | 433 | reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf); |
434 | pa_data = data->next; | 434 | pa_data = data->next; |
@@ -998,6 +998,8 @@ void __init setup_arch(char **cmdline_p) | |||
998 | */ | 998 | */ |
999 | acpi_boot_table_init(); | 999 | acpi_boot_table_init(); |
1000 | 1000 | ||
1001 | early_acpi_boot_init(); | ||
1002 | |||
1001 | #ifdef CONFIG_ACPI_NUMA | 1003 | #ifdef CONFIG_ACPI_NUMA |
1002 | /* | 1004 | /* |
1003 | * Parse SRAT to discover nodes. | 1005 | * Parse SRAT to discover nodes. |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 76b6f50978f7..8c3aca7cb343 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -334,14 +334,17 @@ static void __cpuinit start_secondary(void *unused) | |||
334 | * does not change while we are assigning vectors to cpus. Holding | 334 | * does not change while we are assigning vectors to cpus. Holding |
335 | * this lock ensures we don't half assign or remove an irq from a cpu. | 335 | * this lock ensures we don't half assign or remove an irq from a cpu. |
336 | */ | 336 | */ |
337 | ipi_call_lock_irq(); | 337 | ipi_call_lock(); |
338 | lock_vector_lock(); | 338 | lock_vector_lock(); |
339 | __setup_vector_irq(smp_processor_id()); | 339 | __setup_vector_irq(smp_processor_id()); |
340 | cpu_set(smp_processor_id(), cpu_online_map); | 340 | cpu_set(smp_processor_id(), cpu_online_map); |
341 | unlock_vector_lock(); | 341 | unlock_vector_lock(); |
342 | ipi_call_unlock_irq(); | 342 | ipi_call_unlock(); |
343 | per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; | 343 | per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; |
344 | 344 | ||
345 | /* enable local interrupts */ | ||
346 | local_irq_enable(); | ||
347 | |||
345 | setup_secondary_clock(); | 348 | setup_secondary_clock(); |
346 | 349 | ||
347 | wmb(); | 350 | wmb(); |
@@ -596,10 +599,12 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) | |||
596 | * Give the other CPU some time to accept the IPI. | 599 | * Give the other CPU some time to accept the IPI. |
597 | */ | 600 | */ |
598 | udelay(200); | 601 | udelay(200); |
599 | maxlvt = lapic_get_maxlvt(); | 602 | if (APIC_INTEGRATED(apic_version[phys_apicid])) { |
600 | if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ | 603 | maxlvt = lapic_get_maxlvt(); |
601 | apic_write(APIC_ESR, 0); | 604 | if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ |
602 | accept_status = (apic_read(APIC_ESR) & 0xEF); | 605 | apic_write(APIC_ESR, 0); |
606 | accept_status = (apic_read(APIC_ESR) & 0xEF); | ||
607 | } | ||
603 | pr_debug("NMI sent.\n"); | 608 | pr_debug("NMI sent.\n"); |
604 | 609 | ||
605 | if (send_status) | 610 | if (send_status) |
@@ -1256,39 +1261,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus) | |||
1256 | check_nmi_watchdog(); | 1261 | check_nmi_watchdog(); |
1257 | } | 1262 | } |
1258 | 1263 | ||
1259 | #ifdef CONFIG_HOTPLUG_CPU | ||
1260 | |||
1261 | static void remove_siblinginfo(int cpu) | ||
1262 | { | ||
1263 | int sibling; | ||
1264 | struct cpuinfo_x86 *c = &cpu_data(cpu); | ||
1265 | |||
1266 | for_each_cpu_mask_nr(sibling, per_cpu(cpu_core_map, cpu)) { | ||
1267 | cpu_clear(cpu, per_cpu(cpu_core_map, sibling)); | ||
1268 | /*/ | ||
1269 | * last thread sibling in this cpu core going down | ||
1270 | */ | ||
1271 | if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) | ||
1272 | cpu_data(sibling).booted_cores--; | ||
1273 | } | ||
1274 | |||
1275 | for_each_cpu_mask_nr(sibling, per_cpu(cpu_sibling_map, cpu)) | ||
1276 | cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling)); | ||
1277 | cpus_clear(per_cpu(cpu_sibling_map, cpu)); | ||
1278 | cpus_clear(per_cpu(cpu_core_map, cpu)); | ||
1279 | c->phys_proc_id = 0; | ||
1280 | c->cpu_core_id = 0; | ||
1281 | cpu_clear(cpu, cpu_sibling_setup_map); | ||
1282 | } | ||
1283 | |||
1284 | static int additional_cpus __initdata = -1; | ||
1285 | |||
1286 | static __init int setup_additional_cpus(char *s) | ||
1287 | { | ||
1288 | return s && get_option(&s, &additional_cpus) ? 0 : -EINVAL; | ||
1289 | } | ||
1290 | early_param("additional_cpus", setup_additional_cpus); | ||
1291 | |||
1292 | /* | 1264 | /* |
1293 | * cpu_possible_map should be static, it cannot change as cpu's | 1265 | * cpu_possible_map should be static, it cannot change as cpu's |
1294 | * are onlined, or offlined. The reason is per-cpu data-structures | 1266 | * are onlined, or offlined. The reason is per-cpu data-structures |
@@ -1308,21 +1280,13 @@ early_param("additional_cpus", setup_additional_cpus); | |||
1308 | */ | 1280 | */ |
1309 | __init void prefill_possible_map(void) | 1281 | __init void prefill_possible_map(void) |
1310 | { | 1282 | { |
1311 | int i; | 1283 | int i, possible; |
1312 | int possible; | ||
1313 | 1284 | ||
1314 | /* no processor from mptable or madt */ | 1285 | /* no processor from mptable or madt */ |
1315 | if (!num_processors) | 1286 | if (!num_processors) |
1316 | num_processors = 1; | 1287 | num_processors = 1; |
1317 | 1288 | ||
1318 | if (additional_cpus == -1) { | 1289 | possible = num_processors + disabled_cpus; |
1319 | if (disabled_cpus > 0) | ||
1320 | additional_cpus = disabled_cpus; | ||
1321 | else | ||
1322 | additional_cpus = 0; | ||
1323 | } | ||
1324 | |||
1325 | possible = num_processors + additional_cpus; | ||
1326 | if (possible > NR_CPUS) | 1290 | if (possible > NR_CPUS) |
1327 | possible = NR_CPUS; | 1291 | possible = NR_CPUS; |
1328 | 1292 | ||
@@ -1335,6 +1299,31 @@ __init void prefill_possible_map(void) | |||
1335 | nr_cpu_ids = possible; | 1299 | nr_cpu_ids = possible; |
1336 | } | 1300 | } |
1337 | 1301 | ||
1302 | #ifdef CONFIG_HOTPLUG_CPU | ||
1303 | |||
1304 | static void remove_siblinginfo(int cpu) | ||
1305 | { | ||
1306 | int sibling; | ||
1307 | struct cpuinfo_x86 *c = &cpu_data(cpu); | ||
1308 | |||
1309 | for_each_cpu_mask_nr(sibling, per_cpu(cpu_core_map, cpu)) { | ||
1310 | cpu_clear(cpu, per_cpu(cpu_core_map, sibling)); | ||
1311 | /*/ | ||
1312 | * last thread sibling in this cpu core going down | ||
1313 | */ | ||
1314 | if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) | ||
1315 | cpu_data(sibling).booted_cores--; | ||
1316 | } | ||
1317 | |||
1318 | for_each_cpu_mask_nr(sibling, per_cpu(cpu_sibling_map, cpu)) | ||
1319 | cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling)); | ||
1320 | cpus_clear(per_cpu(cpu_sibling_map, cpu)); | ||
1321 | cpus_clear(per_cpu(cpu_core_map, cpu)); | ||
1322 | c->phys_proc_id = 0; | ||
1323 | c->cpu_core_id = 0; | ||
1324 | cpu_clear(cpu, cpu_sibling_setup_map); | ||
1325 | } | ||
1326 | |||
1338 | static void __ref remove_cpu_from_maps(int cpu) | 1327 | static void __ref remove_cpu_from_maps(int cpu) |
1339 | { | 1328 | { |
1340 | cpu_clear(cpu, cpu_online_map); | 1329 | cpu_clear(cpu, cpu_online_map); |
diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c index bbecf8b6bf96..77b400f06ea2 100644 --- a/arch/x86/kernel/time_32.c +++ b/arch/x86/kernel/time_32.c | |||
@@ -47,10 +47,9 @@ unsigned long profile_pc(struct pt_regs *regs) | |||
47 | unsigned long pc = instruction_pointer(regs); | 47 | unsigned long pc = instruction_pointer(regs); |
48 | 48 | ||
49 | #ifdef CONFIG_SMP | 49 | #ifdef CONFIG_SMP |
50 | if (!v8086_mode(regs) && SEGMENT_IS_KERNEL_CODE(regs->cs) && | 50 | if (!user_mode_vm(regs) && in_lock_functions(pc)) { |
51 | in_lock_functions(pc)) { | ||
52 | #ifdef CONFIG_FRAME_POINTER | 51 | #ifdef CONFIG_FRAME_POINTER |
53 | return *(unsigned long *)(regs->bp + 4); | 52 | return *(unsigned long *)(regs->bp + sizeof(long)); |
54 | #else | 53 | #else |
55 | unsigned long *sp = (unsigned long *)®s->sp; | 54 | unsigned long *sp = (unsigned long *)®s->sp; |
56 | 55 | ||
@@ -95,6 +94,7 @@ irqreturn_t timer_interrupt(int irq, void *dev_id) | |||
95 | 94 | ||
96 | do_timer_interrupt_hook(); | 95 | do_timer_interrupt_hook(); |
97 | 96 | ||
97 | #ifdef CONFIG_MCA | ||
98 | if (MCA_bus) { | 98 | if (MCA_bus) { |
99 | /* The PS/2 uses level-triggered interrupts. You can't | 99 | /* The PS/2 uses level-triggered interrupts. You can't |
100 | turn them off, nor would you want to (any attempt to | 100 | turn them off, nor would you want to (any attempt to |
@@ -108,6 +108,7 @@ irqreturn_t timer_interrupt(int irq, void *dev_id) | |||
108 | u8 irq_v = inb_p( 0x61 ); /* read the current state */ | 108 | u8 irq_v = inb_p( 0x61 ); /* read the current state */ |
109 | outb_p( irq_v|0x80, 0x61 ); /* reset the IRQ */ | 109 | outb_p( irq_v|0x80, 0x61 ); /* reset the IRQ */ |
110 | } | 110 | } |
111 | #endif | ||
111 | 112 | ||
112 | return IRQ_HANDLED; | 113 | return IRQ_HANDLED; |
113 | } | 114 | } |
diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c index e3d49c553af2..cb19d650c216 100644 --- a/arch/x86/kernel/time_64.c +++ b/arch/x86/kernel/time_64.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/interrupt.h> | 16 | #include <linux/interrupt.h> |
17 | #include <linux/module.h> | 17 | #include <linux/module.h> |
18 | #include <linux/time.h> | 18 | #include <linux/time.h> |
19 | #include <linux/mca.h> | ||
19 | 20 | ||
20 | #include <asm/i8253.h> | 21 | #include <asm/i8253.h> |
21 | #include <asm/hpet.h> | 22 | #include <asm/hpet.h> |
@@ -33,23 +34,34 @@ unsigned long profile_pc(struct pt_regs *regs) | |||
33 | /* Assume the lock function has either no stack frame or a copy | 34 | /* Assume the lock function has either no stack frame or a copy |
34 | of flags from PUSHF | 35 | of flags from PUSHF |
35 | Eflags always has bits 22 and up cleared unlike kernel addresses. */ | 36 | Eflags always has bits 22 and up cleared unlike kernel addresses. */ |
36 | if (!user_mode(regs) && in_lock_functions(pc)) { | 37 | if (!user_mode_vm(regs) && in_lock_functions(pc)) { |
38 | #ifdef CONFIG_FRAME_POINTER | ||
39 | return *(unsigned long *)(regs->bp + sizeof(long)); | ||
40 | #else | ||
37 | unsigned long *sp = (unsigned long *)regs->sp; | 41 | unsigned long *sp = (unsigned long *)regs->sp; |
38 | if (sp[0] >> 22) | 42 | if (sp[0] >> 22) |
39 | return sp[0]; | 43 | return sp[0]; |
40 | if (sp[1] >> 22) | 44 | if (sp[1] >> 22) |
41 | return sp[1]; | 45 | return sp[1]; |
46 | #endif | ||
42 | } | 47 | } |
43 | return pc; | 48 | return pc; |
44 | } | 49 | } |
45 | EXPORT_SYMBOL(profile_pc); | 50 | EXPORT_SYMBOL(profile_pc); |
46 | 51 | ||
47 | static irqreturn_t timer_event_interrupt(int irq, void *dev_id) | 52 | irqreturn_t timer_interrupt(int irq, void *dev_id) |
48 | { | 53 | { |
49 | add_pda(irq0_irqs, 1); | 54 | add_pda(irq0_irqs, 1); |
50 | 55 | ||
51 | global_clock_event->event_handler(global_clock_event); | 56 | global_clock_event->event_handler(global_clock_event); |
52 | 57 | ||
58 | #ifdef CONFIG_MCA | ||
59 | if (MCA_bus) { | ||
60 | u8 irq_v = inb_p(0x61); /* read the current state */ | ||
61 | outb_p(irq_v|0x80, 0x61); /* reset the IRQ */ | ||
62 | } | ||
63 | #endif | ||
64 | |||
53 | return IRQ_HANDLED; | 65 | return IRQ_HANDLED; |
54 | } | 66 | } |
55 | 67 | ||
@@ -100,7 +112,7 @@ unsigned long __init calibrate_cpu(void) | |||
100 | } | 112 | } |
101 | 113 | ||
102 | static struct irqaction irq0 = { | 114 | static struct irqaction irq0 = { |
103 | .handler = timer_event_interrupt, | 115 | .handler = timer_interrupt, |
104 | .flags = IRQF_DISABLED | IRQF_IRQPOLL | IRQF_NOBALANCING, | 116 | .flags = IRQF_DISABLED | IRQF_IRQPOLL | IRQF_NOBALANCING, |
105 | .mask = CPU_MASK_NONE, | 117 | .mask = CPU_MASK_NONE, |
106 | .name = "timer" | 118 | .name = "timer" |
@@ -111,16 +123,13 @@ void __init hpet_time_init(void) | |||
111 | if (!hpet_enable()) | 123 | if (!hpet_enable()) |
112 | setup_pit_timer(); | 124 | setup_pit_timer(); |
113 | 125 | ||
126 | irq0.mask = cpumask_of_cpu(0); | ||
114 | setup_irq(0, &irq0); | 127 | setup_irq(0, &irq0); |
115 | } | 128 | } |
116 | 129 | ||
117 | void __init time_init(void) | 130 | void __init time_init(void) |
118 | { | 131 | { |
119 | tsc_init(); | 132 | tsc_init(); |
120 | if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP)) | ||
121 | vgetcpu_mode = VGETCPU_RDTSCP; | ||
122 | else | ||
123 | vgetcpu_mode = VGETCPU_LSL; | ||
124 | 133 | ||
125 | late_time_init = choose_time_init(); | 134 | late_time_init = choose_time_init(); |
126 | } | 135 | } |
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps.c index 0429c5de5ea9..e062974cce34 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps.c | |||
@@ -7,13 +7,11 @@ | |||
7 | */ | 7 | */ |
8 | 8 | ||
9 | /* | 9 | /* |
10 | * 'Traps.c' handles hardware traps and faults after we have saved some | 10 | * Handle hardware traps and faults. |
11 | * state in 'asm.s'. | ||
12 | */ | 11 | */ |
13 | #include <linux/interrupt.h> | 12 | #include <linux/interrupt.h> |
14 | #include <linux/kallsyms.h> | 13 | #include <linux/kallsyms.h> |
15 | #include <linux/spinlock.h> | 14 | #include <linux/spinlock.h> |
16 | #include <linux/highmem.h> | ||
17 | #include <linux/kprobes.h> | 15 | #include <linux/kprobes.h> |
18 | #include <linux/uaccess.h> | 16 | #include <linux/uaccess.h> |
19 | #include <linux/utsname.h> | 17 | #include <linux/utsname.h> |
@@ -32,6 +30,8 @@ | |||
32 | #include <linux/bug.h> | 30 | #include <linux/bug.h> |
33 | #include <linux/nmi.h> | 31 | #include <linux/nmi.h> |
34 | #include <linux/mm.h> | 32 | #include <linux/mm.h> |
33 | #include <linux/smp.h> | ||
34 | #include <linux/io.h> | ||
35 | 35 | ||
36 | #ifdef CONFIG_EISA | 36 | #ifdef CONFIG_EISA |
37 | #include <linux/ioport.h> | 37 | #include <linux/ioport.h> |
@@ -46,21 +46,31 @@ | |||
46 | #include <linux/edac.h> | 46 | #include <linux/edac.h> |
47 | #endif | 47 | #endif |
48 | 48 | ||
49 | #include <asm/arch_hooks.h> | ||
50 | #include <asm/stacktrace.h> | 49 | #include <asm/stacktrace.h> |
51 | #include <asm/processor.h> | 50 | #include <asm/processor.h> |
52 | #include <asm/debugreg.h> | 51 | #include <asm/debugreg.h> |
53 | #include <asm/atomic.h> | 52 | #include <asm/atomic.h> |
54 | #include <asm/system.h> | 53 | #include <asm/system.h> |
55 | #include <asm/unwind.h> | 54 | #include <asm/unwind.h> |
55 | #include <asm/traps.h> | ||
56 | #include <asm/desc.h> | 56 | #include <asm/desc.h> |
57 | #include <asm/i387.h> | 57 | #include <asm/i387.h> |
58 | |||
59 | #include <mach_traps.h> | ||
60 | |||
61 | #ifdef CONFIG_X86_64 | ||
62 | #include <asm/pgalloc.h> | ||
63 | #include <asm/proto.h> | ||
64 | #include <asm/pda.h> | ||
65 | #else | ||
66 | #include <asm/processor-flags.h> | ||
67 | #include <asm/arch_hooks.h> | ||
58 | #include <asm/nmi.h> | 68 | #include <asm/nmi.h> |
59 | #include <asm/smp.h> | 69 | #include <asm/smp.h> |
60 | #include <asm/io.h> | 70 | #include <asm/io.h> |
61 | #include <asm/traps.h> | 71 | #include <asm/traps.h> |
62 | 72 | ||
63 | #include "mach_traps.h" | 73 | #include "cpu/mcheck/mce.h" |
64 | 74 | ||
65 | DECLARE_BITMAP(used_vectors, NR_VECTORS); | 75 | DECLARE_BITMAP(used_vectors, NR_VECTORS); |
66 | EXPORT_SYMBOL_GPL(used_vectors); | 76 | EXPORT_SYMBOL_GPL(used_vectors); |
@@ -77,418 +87,104 @@ char ignore_fpu_irq; | |||
77 | */ | 87 | */ |
78 | gate_desc idt_table[256] | 88 | gate_desc idt_table[256] |
79 | __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; | 89 | __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; |
80 | |||
81 | int panic_on_unrecovered_nmi; | ||
82 | int kstack_depth_to_print = 24; | ||
83 | static unsigned int code_bytes = 64; | ||
84 | static int ignore_nmis; | ||
85 | static int die_counter; | ||
86 | |||
87 | void printk_address(unsigned long address, int reliable) | ||
88 | { | ||
89 | #ifdef CONFIG_KALLSYMS | ||
90 | unsigned long offset = 0; | ||
91 | unsigned long symsize; | ||
92 | const char *symname; | ||
93 | char *modname; | ||
94 | char *delim = ":"; | ||
95 | char namebuf[KSYM_NAME_LEN]; | ||
96 | char reliab[4] = ""; | ||
97 | |||
98 | symname = kallsyms_lookup(address, &symsize, &offset, | ||
99 | &modname, namebuf); | ||
100 | if (!symname) { | ||
101 | printk(" [<%08lx>]\n", address); | ||
102 | return; | ||
103 | } | ||
104 | if (!reliable) | ||
105 | strcpy(reliab, "? "); | ||
106 | |||
107 | if (!modname) | ||
108 | modname = delim = ""; | ||
109 | printk(" [<%08lx>] %s%s%s%s%s+0x%lx/0x%lx\n", | ||
110 | address, reliab, delim, modname, delim, symname, offset, symsize); | ||
111 | #else | ||
112 | printk(" [<%08lx>]\n", address); | ||
113 | #endif | 90 | #endif |
114 | } | ||
115 | |||
116 | static inline int valid_stack_ptr(struct thread_info *tinfo, | ||
117 | void *p, unsigned int size) | ||
118 | { | ||
119 | void *t = tinfo; | ||
120 | return p > t && p <= t + THREAD_SIZE - size; | ||
121 | } | ||
122 | |||
123 | /* The form of the top of the frame on the stack */ | ||
124 | struct stack_frame { | ||
125 | struct stack_frame *next_frame; | ||
126 | unsigned long return_address; | ||
127 | }; | ||
128 | |||
129 | static inline unsigned long | ||
130 | print_context_stack(struct thread_info *tinfo, | ||
131 | unsigned long *stack, unsigned long bp, | ||
132 | const struct stacktrace_ops *ops, void *data) | ||
133 | { | ||
134 | struct stack_frame *frame = (struct stack_frame *)bp; | ||
135 | |||
136 | while (valid_stack_ptr(tinfo, stack, sizeof(*stack))) { | ||
137 | unsigned long addr; | ||
138 | |||
139 | addr = *stack; | ||
140 | if (__kernel_text_address(addr)) { | ||
141 | if ((unsigned long) stack == bp + 4) { | ||
142 | ops->address(data, addr, 1); | ||
143 | frame = frame->next_frame; | ||
144 | bp = (unsigned long) frame; | ||
145 | } else { | ||
146 | ops->address(data, addr, bp == 0); | ||
147 | } | ||
148 | } | ||
149 | stack++; | ||
150 | } | ||
151 | return bp; | ||
152 | } | ||
153 | |||
154 | void dump_trace(struct task_struct *task, struct pt_regs *regs, | ||
155 | unsigned long *stack, unsigned long bp, | ||
156 | const struct stacktrace_ops *ops, void *data) | ||
157 | { | ||
158 | if (!task) | ||
159 | task = current; | ||
160 | |||
161 | if (!stack) { | ||
162 | unsigned long dummy; | ||
163 | stack = &dummy; | ||
164 | if (task != current) | ||
165 | stack = (unsigned long *)task->thread.sp; | ||
166 | } | ||
167 | |||
168 | #ifdef CONFIG_FRAME_POINTER | ||
169 | if (!bp) { | ||
170 | if (task == current) { | ||
171 | /* Grab bp right from our regs */ | ||
172 | asm("movl %%ebp, %0" : "=r" (bp) :); | ||
173 | } else { | ||
174 | /* bp is the last reg pushed by switch_to */ | ||
175 | bp = *(unsigned long *) task->thread.sp; | ||
176 | } | ||
177 | } | ||
178 | #endif | ||
179 | |||
180 | for (;;) { | ||
181 | struct thread_info *context; | ||
182 | |||
183 | context = (struct thread_info *) | ||
184 | ((unsigned long)stack & (~(THREAD_SIZE - 1))); | ||
185 | bp = print_context_stack(context, stack, bp, ops, data); | ||
186 | /* | ||
187 | * Should be after the line below, but somewhere | ||
188 | * in early boot context comes out corrupted and we | ||
189 | * can't reference it: | ||
190 | */ | ||
191 | if (ops->stack(data, "IRQ") < 0) | ||
192 | break; | ||
193 | stack = (unsigned long *)context->previous_esp; | ||
194 | if (!stack) | ||
195 | break; | ||
196 | touch_nmi_watchdog(); | ||
197 | } | ||
198 | } | ||
199 | EXPORT_SYMBOL(dump_trace); | ||
200 | |||
201 | static void | ||
202 | print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) | ||
203 | { | ||
204 | printk(data); | ||
205 | print_symbol(msg, symbol); | ||
206 | printk("\n"); | ||
207 | } | ||
208 | |||
209 | static void print_trace_warning(void *data, char *msg) | ||
210 | { | ||
211 | printk("%s%s\n", (char *)data, msg); | ||
212 | } | ||
213 | 91 | ||
214 | static int print_trace_stack(void *data, char *name) | 92 | static int ignore_nmis; |
215 | { | ||
216 | return 0; | ||
217 | } | ||
218 | |||
219 | /* | ||
220 | * Print one address/symbol entries per line. | ||
221 | */ | ||
222 | static void print_trace_address(void *data, unsigned long addr, int reliable) | ||
223 | { | ||
224 | printk("%s [<%08lx>] ", (char *)data, addr); | ||
225 | if (!reliable) | ||
226 | printk("? "); | ||
227 | print_symbol("%s\n", addr); | ||
228 | touch_nmi_watchdog(); | ||
229 | } | ||
230 | |||
231 | static const struct stacktrace_ops print_trace_ops = { | ||
232 | .warning = print_trace_warning, | ||
233 | .warning_symbol = print_trace_warning_symbol, | ||
234 | .stack = print_trace_stack, | ||
235 | .address = print_trace_address, | ||
236 | }; | ||
237 | 93 | ||
238 | static void | 94 | static inline void conditional_sti(struct pt_regs *regs) |
239 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
240 | unsigned long *stack, unsigned long bp, char *log_lvl) | ||
241 | { | 95 | { |
242 | dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); | 96 | if (regs->flags & X86_EFLAGS_IF) |
243 | printk("%s =======================\n", log_lvl); | 97 | local_irq_enable(); |
244 | } | 98 | } |
245 | 99 | ||
246 | void show_trace(struct task_struct *task, struct pt_regs *regs, | 100 | static inline void preempt_conditional_sti(struct pt_regs *regs) |
247 | unsigned long *stack, unsigned long bp) | ||
248 | { | 101 | { |
249 | show_trace_log_lvl(task, regs, stack, bp, ""); | 102 | inc_preempt_count(); |
103 | if (regs->flags & X86_EFLAGS_IF) | ||
104 | local_irq_enable(); | ||
250 | } | 105 | } |
251 | 106 | ||
252 | static void | 107 | static inline void preempt_conditional_cli(struct pt_regs *regs) |
253 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
254 | unsigned long *sp, unsigned long bp, char *log_lvl) | ||
255 | { | 108 | { |
256 | unsigned long *stack; | 109 | if (regs->flags & X86_EFLAGS_IF) |
257 | int i; | 110 | local_irq_disable(); |
258 | 111 | dec_preempt_count(); | |
259 | if (sp == NULL) { | ||
260 | if (task) | ||
261 | sp = (unsigned long *)task->thread.sp; | ||
262 | else | ||
263 | sp = (unsigned long *)&sp; | ||
264 | } | ||
265 | |||
266 | stack = sp; | ||
267 | for (i = 0; i < kstack_depth_to_print; i++) { | ||
268 | if (kstack_end(stack)) | ||
269 | break; | ||
270 | if (i && ((i % 8) == 0)) | ||
271 | printk("\n%s ", log_lvl); | ||
272 | printk("%08lx ", *stack++); | ||
273 | } | ||
274 | printk("\n%sCall Trace:\n", log_lvl); | ||
275 | |||
276 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); | ||
277 | } | 112 | } |
278 | 113 | ||
279 | void show_stack(struct task_struct *task, unsigned long *sp) | 114 | #ifdef CONFIG_X86_32 |
115 | static inline void | ||
116 | die_if_kernel(const char *str, struct pt_regs *regs, long err) | ||
280 | { | 117 | { |
281 | printk(" "); | 118 | if (!user_mode_vm(regs)) |
282 | show_stack_log_lvl(task, NULL, sp, 0, ""); | 119 | die(str, regs, err); |
283 | } | 120 | } |
284 | 121 | ||
285 | /* | 122 | /* |
286 | * The architecture-independent dump_stack generator | 123 | * Perform the lazy TSS's I/O bitmap copy. If the TSS has an |
124 | * invalid offset set (the LAZY one) and the faulting thread has | ||
125 | * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS, | ||
126 | * we set the offset field correctly and return 1. | ||
287 | */ | 127 | */ |
288 | void dump_stack(void) | 128 | static int lazy_iobitmap_copy(void) |
289 | { | 129 | { |
290 | unsigned long bp = 0; | 130 | struct thread_struct *thread; |
291 | unsigned long stack; | 131 | struct tss_struct *tss; |
292 | 132 | int cpu; | |
293 | #ifdef CONFIG_FRAME_POINTER | ||
294 | if (!bp) | ||
295 | asm("movl %%ebp, %0" : "=r" (bp):); | ||
296 | #endif | ||
297 | |||
298 | printk("Pid: %d, comm: %.20s %s %s %.*s\n", | ||
299 | current->pid, current->comm, print_tainted(), | ||
300 | init_utsname()->release, | ||
301 | (int)strcspn(init_utsname()->version, " "), | ||
302 | init_utsname()->version); | ||
303 | |||
304 | show_trace(current, NULL, &stack, bp); | ||
305 | } | ||
306 | |||
307 | EXPORT_SYMBOL(dump_stack); | ||
308 | |||
309 | void show_registers(struct pt_regs *regs) | ||
310 | { | ||
311 | int i; | ||
312 | 133 | ||
313 | print_modules(); | 134 | cpu = get_cpu(); |
314 | __show_registers(regs, 0); | 135 | tss = &per_cpu(init_tss, cpu); |
136 | thread = ¤t->thread; | ||
315 | 137 | ||
316 | printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)", | 138 | if (tss->x86_tss.io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY && |
317 | TASK_COMM_LEN, current->comm, task_pid_nr(current), | 139 | thread->io_bitmap_ptr) { |
318 | current_thread_info(), current, task_thread_info(current)); | 140 | memcpy(tss->io_bitmap, thread->io_bitmap_ptr, |
319 | /* | 141 | thread->io_bitmap_max); |
320 | * When in-kernel, we also print out the stack and code at the | 142 | /* |
321 | * time of the fault.. | 143 | * If the previously set map was extending to higher ports |
322 | */ | 144 | * than the current one, pad extra space with 0xff (no access). |
323 | if (!user_mode_vm(regs)) { | 145 | */ |
324 | unsigned int code_prologue = code_bytes * 43 / 64; | 146 | if (thread->io_bitmap_max < tss->io_bitmap_max) { |
325 | unsigned int code_len = code_bytes; | 147 | memset((char *) tss->io_bitmap + |
326 | unsigned char c; | 148 | thread->io_bitmap_max, 0xff, |
327 | u8 *ip; | 149 | tss->io_bitmap_max - thread->io_bitmap_max); |
328 | |||
329 | printk("\n" KERN_EMERG "Stack: "); | ||
330 | show_stack_log_lvl(NULL, regs, ®s->sp, 0, KERN_EMERG); | ||
331 | |||
332 | printk(KERN_EMERG "Code: "); | ||
333 | |||
334 | ip = (u8 *)regs->ip - code_prologue; | ||
335 | if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { | ||
336 | /* try starting at EIP */ | ||
337 | ip = (u8 *)regs->ip; | ||
338 | code_len = code_len - code_prologue + 1; | ||
339 | } | ||
340 | for (i = 0; i < code_len; i++, ip++) { | ||
341 | if (ip < (u8 *)PAGE_OFFSET || | ||
342 | probe_kernel_address(ip, c)) { | ||
343 | printk(" Bad EIP value."); | ||
344 | break; | ||
345 | } | ||
346 | if (ip == (u8 *)regs->ip) | ||
347 | printk("<%02x> ", c); | ||
348 | else | ||
349 | printk("%02x ", c); | ||
350 | } | 150 | } |
351 | } | 151 | tss->io_bitmap_max = thread->io_bitmap_max; |
352 | printk("\n"); | 152 | tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; |
353 | } | 153 | tss->io_bitmap_owner = thread; |
354 | 154 | put_cpu(); | |
355 | int is_valid_bugaddr(unsigned long ip) | ||
356 | { | ||
357 | unsigned short ud2; | ||
358 | |||
359 | if (ip < PAGE_OFFSET) | ||
360 | return 0; | ||
361 | if (probe_kernel_address((unsigned short *)ip, ud2)) | ||
362 | return 0; | ||
363 | |||
364 | return ud2 == 0x0b0f; | ||
365 | } | ||
366 | |||
367 | static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; | ||
368 | static int die_owner = -1; | ||
369 | static unsigned int die_nest_count; | ||
370 | |||
371 | unsigned __kprobes long oops_begin(void) | ||
372 | { | ||
373 | unsigned long flags; | ||
374 | |||
375 | oops_enter(); | ||
376 | |||
377 | if (die_owner != raw_smp_processor_id()) { | ||
378 | console_verbose(); | ||
379 | raw_local_irq_save(flags); | ||
380 | __raw_spin_lock(&die_lock); | ||
381 | die_owner = smp_processor_id(); | ||
382 | die_nest_count = 0; | ||
383 | bust_spinlocks(1); | ||
384 | } else { | ||
385 | raw_local_irq_save(flags); | ||
386 | } | ||
387 | die_nest_count++; | ||
388 | return flags; | ||
389 | } | ||
390 | |||
391 | void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) | ||
392 | { | ||
393 | bust_spinlocks(0); | ||
394 | die_owner = -1; | ||
395 | add_taint(TAINT_DIE); | ||
396 | __raw_spin_unlock(&die_lock); | ||
397 | raw_local_irq_restore(flags); | ||
398 | |||
399 | if (!regs) | ||
400 | return; | ||
401 | |||
402 | if (kexec_should_crash(current)) | ||
403 | crash_kexec(regs); | ||
404 | |||
405 | if (in_interrupt()) | ||
406 | panic("Fatal exception in interrupt"); | ||
407 | |||
408 | if (panic_on_oops) | ||
409 | panic("Fatal exception"); | ||
410 | |||
411 | oops_exit(); | ||
412 | do_exit(signr); | ||
413 | } | ||
414 | |||
415 | int __kprobes __die(const char *str, struct pt_regs *regs, long err) | ||
416 | { | ||
417 | unsigned short ss; | ||
418 | unsigned long sp; | ||
419 | 155 | ||
420 | printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); | ||
421 | #ifdef CONFIG_PREEMPT | ||
422 | printk("PREEMPT "); | ||
423 | #endif | ||
424 | #ifdef CONFIG_SMP | ||
425 | printk("SMP "); | ||
426 | #endif | ||
427 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
428 | printk("DEBUG_PAGEALLOC"); | ||
429 | #endif | ||
430 | printk("\n"); | ||
431 | if (notify_die(DIE_OOPS, str, regs, err, | ||
432 | current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) | ||
433 | return 1; | 156 | return 1; |
434 | |||
435 | show_registers(regs); | ||
436 | /* Executive summary in case the oops scrolled away */ | ||
437 | sp = (unsigned long) (®s->sp); | ||
438 | savesegment(ss, ss); | ||
439 | if (user_mode(regs)) { | ||
440 | sp = regs->sp; | ||
441 | ss = regs->ss & 0xffff; | ||
442 | } | 157 | } |
443 | printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip); | 158 | put_cpu(); |
444 | print_symbol("%s", regs->ip); | ||
445 | printk(" SS:ESP %04x:%08lx\n", ss, sp); | ||
446 | return 0; | ||
447 | } | ||
448 | |||
449 | /* | ||
450 | * This is gone through when something in the kernel has done something bad | ||
451 | * and is about to be terminated: | ||
452 | */ | ||
453 | void die(const char *str, struct pt_regs *regs, long err) | ||
454 | { | ||
455 | unsigned long flags = oops_begin(); | ||
456 | |||
457 | if (die_nest_count < 3) { | ||
458 | report_bug(regs->ip, regs); | ||
459 | |||
460 | if (__die(str, regs, err)) | ||
461 | regs = NULL; | ||
462 | } else { | ||
463 | printk(KERN_EMERG "Recursive die() failure, output suppressed\n"); | ||
464 | } | ||
465 | |||
466 | oops_end(flags, regs, SIGSEGV); | ||
467 | } | ||
468 | 159 | ||
469 | static inline void | 160 | return 0; |
470 | die_if_kernel(const char *str, struct pt_regs *regs, long err) | ||
471 | { | ||
472 | if (!user_mode_vm(regs)) | ||
473 | die(str, regs, err); | ||
474 | } | 161 | } |
162 | #endif | ||
475 | 163 | ||
476 | static void __kprobes | 164 | static void __kprobes |
477 | do_trap(int trapnr, int signr, char *str, int vm86, struct pt_regs *regs, | 165 | do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, |
478 | long error_code, siginfo_t *info) | 166 | long error_code, siginfo_t *info) |
479 | { | 167 | { |
480 | struct task_struct *tsk = current; | 168 | struct task_struct *tsk = current; |
481 | 169 | ||
170 | #ifdef CONFIG_X86_32 | ||
482 | if (regs->flags & X86_VM_MASK) { | 171 | if (regs->flags & X86_VM_MASK) { |
483 | if (vm86) | 172 | /* |
173 | * traps 0, 1, 3, 4, and 5 should be forwarded to vm86. | ||
174 | * On nmi (interrupt 2), do_trap should not be called. | ||
175 | */ | ||
176 | if (trapnr < 6) | ||
484 | goto vm86_trap; | 177 | goto vm86_trap; |
485 | goto trap_signal; | 178 | goto trap_signal; |
486 | } | 179 | } |
180 | #endif | ||
487 | 181 | ||
488 | if (!user_mode(regs)) | 182 | if (!user_mode(regs)) |
489 | goto kernel_trap; | 183 | goto kernel_trap; |
490 | 184 | ||
185 | #ifdef CONFIG_X86_32 | ||
491 | trap_signal: | 186 | trap_signal: |
187 | #endif | ||
492 | /* | 188 | /* |
493 | * We want error_code and trap_no set for userspace faults and | 189 | * We want error_code and trap_no set for userspace faults and |
494 | * kernelspace faults which result in die(), but not | 190 | * kernelspace faults which result in die(), but not |
@@ -501,6 +197,18 @@ trap_signal: | |||
501 | tsk->thread.error_code = error_code; | 197 | tsk->thread.error_code = error_code; |
502 | tsk->thread.trap_no = trapnr; | 198 | tsk->thread.trap_no = trapnr; |
503 | 199 | ||
200 | #ifdef CONFIG_X86_64 | ||
201 | if (show_unhandled_signals && unhandled_signal(tsk, signr) && | ||
202 | printk_ratelimit()) { | ||
203 | printk(KERN_INFO | ||
204 | "%s[%d] trap %s ip:%lx sp:%lx error:%lx", | ||
205 | tsk->comm, tsk->pid, str, | ||
206 | regs->ip, regs->sp, error_code); | ||
207 | print_vma_addr(" in ", regs->ip); | ||
208 | printk("\n"); | ||
209 | } | ||
210 | #endif | ||
211 | |||
504 | if (info) | 212 | if (info) |
505 | force_sig_info(signr, info, tsk); | 213 | force_sig_info(signr, info, tsk); |
506 | else | 214 | else |
@@ -515,29 +223,29 @@ kernel_trap: | |||
515 | } | 223 | } |
516 | return; | 224 | return; |
517 | 225 | ||
226 | #ifdef CONFIG_X86_32 | ||
518 | vm86_trap: | 227 | vm86_trap: |
519 | if (handle_vm86_trap((struct kernel_vm86_regs *) regs, | 228 | if (handle_vm86_trap((struct kernel_vm86_regs *) regs, |
520 | error_code, trapnr)) | 229 | error_code, trapnr)) |
521 | goto trap_signal; | 230 | goto trap_signal; |
522 | return; | 231 | return; |
232 | #endif | ||
523 | } | 233 | } |
524 | 234 | ||
525 | #define DO_ERROR(trapnr, signr, str, name) \ | 235 | #define DO_ERROR(trapnr, signr, str, name) \ |
526 | void do_##name(struct pt_regs *regs, long error_code) \ | 236 | dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ |
527 | { \ | 237 | { \ |
528 | trace_hardirqs_fixup(); \ | ||
529 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | 238 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ |
530 | == NOTIFY_STOP) \ | 239 | == NOTIFY_STOP) \ |
531 | return; \ | 240 | return; \ |
532 | do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \ | 241 | conditional_sti(regs); \ |
242 | do_trap(trapnr, signr, str, regs, error_code, NULL); \ | ||
533 | } | 243 | } |
534 | 244 | ||
535 | #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr, irq) \ | 245 | #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ |
536 | void do_##name(struct pt_regs *regs, long error_code) \ | 246 | dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ |
537 | { \ | 247 | { \ |
538 | siginfo_t info; \ | 248 | siginfo_t info; \ |
539 | if (irq) \ | ||
540 | local_irq_enable(); \ | ||
541 | info.si_signo = signr; \ | 249 | info.si_signo = signr; \ |
542 | info.si_errno = 0; \ | 250 | info.si_errno = 0; \ |
543 | info.si_code = sicode; \ | 251 | info.si_code = sicode; \ |
@@ -545,90 +253,68 @@ void do_##name(struct pt_regs *regs, long error_code) \ | |||
545 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | 253 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ |
546 | == NOTIFY_STOP) \ | 254 | == NOTIFY_STOP) \ |
547 | return; \ | 255 | return; \ |
548 | do_trap(trapnr, signr, str, 0, regs, error_code, &info); \ | 256 | conditional_sti(regs); \ |
257 | do_trap(trapnr, signr, str, regs, error_code, &info); \ | ||
549 | } | 258 | } |
550 | 259 | ||
551 | #define DO_VM86_ERROR(trapnr, signr, str, name) \ | 260 | DO_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) |
552 | void do_##name(struct pt_regs *regs, long error_code) \ | 261 | DO_ERROR(4, SIGSEGV, "overflow", overflow) |
553 | { \ | 262 | DO_ERROR(5, SIGSEGV, "bounds", bounds) |
554 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | 263 | DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip) |
555 | == NOTIFY_STOP) \ | ||
556 | return; \ | ||
557 | do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \ | ||
558 | } | ||
559 | |||
560 | #define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ | ||
561 | void do_##name(struct pt_regs *regs, long error_code) \ | ||
562 | { \ | ||
563 | siginfo_t info; \ | ||
564 | info.si_signo = signr; \ | ||
565 | info.si_errno = 0; \ | ||
566 | info.si_code = sicode; \ | ||
567 | info.si_addr = (void __user *)siaddr; \ | ||
568 | trace_hardirqs_fixup(); \ | ||
569 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | ||
570 | == NOTIFY_STOP) \ | ||
571 | return; \ | ||
572 | do_trap(trapnr, signr, str, 1, regs, error_code, &info); \ | ||
573 | } | ||
574 | |||
575 | DO_VM86_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) | ||
576 | #ifndef CONFIG_KPROBES | ||
577 | DO_VM86_ERROR(3, SIGTRAP, "int3", int3) | ||
578 | #endif | ||
579 | DO_VM86_ERROR(4, SIGSEGV, "overflow", overflow) | ||
580 | DO_VM86_ERROR(5, SIGSEGV, "bounds", bounds) | ||
581 | DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0) | ||
582 | DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) | 264 | DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) |
583 | DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) | 265 | DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) |
584 | DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) | 266 | DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) |
267 | #ifdef CONFIG_X86_32 | ||
585 | DO_ERROR(12, SIGBUS, "stack segment", stack_segment) | 268 | DO_ERROR(12, SIGBUS, "stack segment", stack_segment) |
586 | DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0) | 269 | #endif |
587 | DO_ERROR_INFO(32, SIGILL, "iret exception", iret_error, ILL_BADSTK, 0, 1) | 270 | DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) |
271 | |||
272 | #ifdef CONFIG_X86_64 | ||
273 | /* Runs on IST stack */ | ||
274 | dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code) | ||
275 | { | ||
276 | if (notify_die(DIE_TRAP, "stack segment", regs, error_code, | ||
277 | 12, SIGBUS) == NOTIFY_STOP) | ||
278 | return; | ||
279 | preempt_conditional_sti(regs); | ||
280 | do_trap(12, SIGBUS, "stack segment", regs, error_code, NULL); | ||
281 | preempt_conditional_cli(regs); | ||
282 | } | ||
283 | |||
284 | dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) | ||
285 | { | ||
286 | static const char str[] = "double fault"; | ||
287 | struct task_struct *tsk = current; | ||
288 | |||
289 | /* Return not checked because double check cannot be ignored */ | ||
290 | notify_die(DIE_TRAP, str, regs, error_code, 8, SIGSEGV); | ||
588 | 291 | ||
589 | void __kprobes | 292 | tsk->thread.error_code = error_code; |
293 | tsk->thread.trap_no = 8; | ||
294 | |||
295 | /* This is always a kernel trap and never fixable (and thus must | ||
296 | never return). */ | ||
297 | for (;;) | ||
298 | die(str, regs, error_code); | ||
299 | } | ||
300 | #endif | ||
301 | |||
302 | dotraplinkage void __kprobes | ||
590 | do_general_protection(struct pt_regs *regs, long error_code) | 303 | do_general_protection(struct pt_regs *regs, long error_code) |
591 | { | 304 | { |
592 | struct task_struct *tsk; | 305 | struct task_struct *tsk; |
593 | struct thread_struct *thread; | ||
594 | struct tss_struct *tss; | ||
595 | int cpu; | ||
596 | 306 | ||
597 | cpu = get_cpu(); | 307 | conditional_sti(regs); |
598 | tss = &per_cpu(init_tss, cpu); | ||
599 | thread = ¤t->thread; | ||
600 | |||
601 | /* | ||
602 | * Perform the lazy TSS's I/O bitmap copy. If the TSS has an | ||
603 | * invalid offset set (the LAZY one) and the faulting thread has | ||
604 | * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS | ||
605 | * and we set the offset field correctly. Then we let the CPU to | ||
606 | * restart the faulting instruction. | ||
607 | */ | ||
608 | if (tss->x86_tss.io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY && | ||
609 | thread->io_bitmap_ptr) { | ||
610 | memcpy(tss->io_bitmap, thread->io_bitmap_ptr, | ||
611 | thread->io_bitmap_max); | ||
612 | /* | ||
613 | * If the previously set map was extending to higher ports | ||
614 | * than the current one, pad extra space with 0xff (no access). | ||
615 | */ | ||
616 | if (thread->io_bitmap_max < tss->io_bitmap_max) { | ||
617 | memset((char *) tss->io_bitmap + | ||
618 | thread->io_bitmap_max, 0xff, | ||
619 | tss->io_bitmap_max - thread->io_bitmap_max); | ||
620 | } | ||
621 | tss->io_bitmap_max = thread->io_bitmap_max; | ||
622 | tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; | ||
623 | tss->io_bitmap_owner = thread; | ||
624 | put_cpu(); | ||
625 | 308 | ||
309 | #ifdef CONFIG_X86_32 | ||
310 | if (lazy_iobitmap_copy()) { | ||
311 | /* restart the faulting instruction */ | ||
626 | return; | 312 | return; |
627 | } | 313 | } |
628 | put_cpu(); | ||
629 | 314 | ||
630 | if (regs->flags & X86_VM_MASK) | 315 | if (regs->flags & X86_VM_MASK) |
631 | goto gp_in_vm86; | 316 | goto gp_in_vm86; |
317 | #endif | ||
632 | 318 | ||
633 | tsk = current; | 319 | tsk = current; |
634 | if (!user_mode(regs)) | 320 | if (!user_mode(regs)) |
@@ -650,10 +336,12 @@ do_general_protection(struct pt_regs *regs, long error_code) | |||
650 | force_sig(SIGSEGV, tsk); | 336 | force_sig(SIGSEGV, tsk); |
651 | return; | 337 | return; |
652 | 338 | ||
339 | #ifdef CONFIG_X86_32 | ||
653 | gp_in_vm86: | 340 | gp_in_vm86: |
654 | local_irq_enable(); | 341 | local_irq_enable(); |
655 | handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); | 342 | handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); |
656 | return; | 343 | return; |
344 | #endif | ||
657 | 345 | ||
658 | gp_in_kernel: | 346 | gp_in_kernel: |
659 | if (fixup_exception(regs)) | 347 | if (fixup_exception(regs)) |
@@ -690,7 +378,8 @@ mem_parity_error(unsigned char reason, struct pt_regs *regs) | |||
690 | printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); | 378 | printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); |
691 | 379 | ||
692 | /* Clear and disable the memory parity error line. */ | 380 | /* Clear and disable the memory parity error line. */ |
693 | clear_mem_error(reason); | 381 | reason = (reason & 0xf) | 4; |
382 | outb(reason, 0x61); | ||
694 | } | 383 | } |
695 | 384 | ||
696 | static notrace __kprobes void | 385 | static notrace __kprobes void |
@@ -716,7 +405,8 @@ io_check_error(unsigned char reason, struct pt_regs *regs) | |||
716 | static notrace __kprobes void | 405 | static notrace __kprobes void |
717 | unknown_nmi_error(unsigned char reason, struct pt_regs *regs) | 406 | unknown_nmi_error(unsigned char reason, struct pt_regs *regs) |
718 | { | 407 | { |
719 | if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) | 408 | if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == |
409 | NOTIFY_STOP) | ||
720 | return; | 410 | return; |
721 | #ifdef CONFIG_MCA | 411 | #ifdef CONFIG_MCA |
722 | /* | 412 | /* |
@@ -739,41 +429,6 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) | |||
739 | printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); | 429 | printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); |
740 | } | 430 | } |
741 | 431 | ||
742 | static DEFINE_SPINLOCK(nmi_print_lock); | ||
743 | |||
744 | void notrace __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic) | ||
745 | { | ||
746 | if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) | ||
747 | return; | ||
748 | |||
749 | spin_lock(&nmi_print_lock); | ||
750 | /* | ||
751 | * We are in trouble anyway, lets at least try | ||
752 | * to get a message out: | ||
753 | */ | ||
754 | bust_spinlocks(1); | ||
755 | printk(KERN_EMERG "%s", str); | ||
756 | printk(" on CPU%d, ip %08lx, registers:\n", | ||
757 | smp_processor_id(), regs->ip); | ||
758 | show_registers(regs); | ||
759 | if (do_panic) | ||
760 | panic("Non maskable interrupt"); | ||
761 | console_silent(); | ||
762 | spin_unlock(&nmi_print_lock); | ||
763 | bust_spinlocks(0); | ||
764 | |||
765 | /* | ||
766 | * If we are in kernel we are probably nested up pretty bad | ||
767 | * and might aswell get out now while we still can: | ||
768 | */ | ||
769 | if (!user_mode_vm(regs)) { | ||
770 | current->thread.trap_no = 2; | ||
771 | crash_kexec(regs); | ||
772 | } | ||
773 | |||
774 | do_exit(SIGSEGV); | ||
775 | } | ||
776 | |||
777 | static notrace __kprobes void default_do_nmi(struct pt_regs *regs) | 432 | static notrace __kprobes void default_do_nmi(struct pt_regs *regs) |
778 | { | 433 | { |
779 | unsigned char reason = 0; | 434 | unsigned char reason = 0; |
@@ -812,22 +467,25 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) | |||
812 | mem_parity_error(reason, regs); | 467 | mem_parity_error(reason, regs); |
813 | if (reason & 0x40) | 468 | if (reason & 0x40) |
814 | io_check_error(reason, regs); | 469 | io_check_error(reason, regs); |
470 | #ifdef CONFIG_X86_32 | ||
815 | /* | 471 | /* |
816 | * Reassert NMI in case it became active meanwhile | 472 | * Reassert NMI in case it became active meanwhile |
817 | * as it's edge-triggered: | 473 | * as it's edge-triggered: |
818 | */ | 474 | */ |
819 | reassert_nmi(); | 475 | reassert_nmi(); |
476 | #endif | ||
820 | } | 477 | } |
821 | 478 | ||
822 | notrace __kprobes void do_nmi(struct pt_regs *regs, long error_code) | 479 | dotraplinkage notrace __kprobes void |
480 | do_nmi(struct pt_regs *regs, long error_code) | ||
823 | { | 481 | { |
824 | int cpu; | ||
825 | |||
826 | nmi_enter(); | 482 | nmi_enter(); |
827 | 483 | ||
828 | cpu = smp_processor_id(); | 484 | #ifdef CONFIG_X86_32 |
829 | 485 | { int cpu; cpu = smp_processor_id(); ++nmi_count(cpu); } | |
830 | ++nmi_count(cpu); | 486 | #else |
487 | add_pda(__nmi_count, 1); | ||
488 | #endif | ||
831 | 489 | ||
832 | if (!ignore_nmis) | 490 | if (!ignore_nmis) |
833 | default_do_nmi(regs); | 491 | default_do_nmi(regs); |
@@ -847,21 +505,44 @@ void restart_nmi(void) | |||
847 | acpi_nmi_enable(); | 505 | acpi_nmi_enable(); |
848 | } | 506 | } |
849 | 507 | ||
850 | #ifdef CONFIG_KPROBES | 508 | /* May run on IST stack. */ |
851 | void __kprobes do_int3(struct pt_regs *regs, long error_code) | 509 | dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) |
852 | { | 510 | { |
853 | trace_hardirqs_fixup(); | 511 | #ifdef CONFIG_KPROBES |
854 | |||
855 | if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) | 512 | if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) |
856 | == NOTIFY_STOP) | 513 | == NOTIFY_STOP) |
857 | return; | 514 | return; |
858 | /* | 515 | #else |
859 | * This is an interrupt gate, because kprobes wants interrupts | 516 | if (notify_die(DIE_TRAP, "int3", regs, error_code, 3, SIGTRAP) |
860 | * disabled. Normal trap handlers don't. | 517 | == NOTIFY_STOP) |
861 | */ | 518 | return; |
862 | restore_interrupts(regs); | 519 | #endif |
520 | |||
521 | preempt_conditional_sti(regs); | ||
522 | do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); | ||
523 | preempt_conditional_cli(regs); | ||
524 | } | ||
863 | 525 | ||
864 | do_trap(3, SIGTRAP, "int3", 1, regs, error_code, NULL); | 526 | #ifdef CONFIG_X86_64 |
527 | /* Help handler running on IST stack to switch back to user stack | ||
528 | for scheduling or signal handling. The actual stack switch is done in | ||
529 | entry.S */ | ||
530 | asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) | ||
531 | { | ||
532 | struct pt_regs *regs = eregs; | ||
533 | /* Did already sync */ | ||
534 | if (eregs == (struct pt_regs *)eregs->sp) | ||
535 | ; | ||
536 | /* Exception from user space */ | ||
537 | else if (user_mode(eregs)) | ||
538 | regs = task_pt_regs(current); | ||
539 | /* Exception from kernel and interrupts are enabled. Move to | ||
540 | kernel process stack. */ | ||
541 | else if (eregs->flags & X86_EFLAGS_IF) | ||
542 | regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs)); | ||
543 | if (eregs != regs) | ||
544 | *regs = *eregs; | ||
545 | return regs; | ||
865 | } | 546 | } |
866 | #endif | 547 | #endif |
867 | 548 | ||
@@ -886,15 +567,15 @@ void __kprobes do_int3(struct pt_regs *regs, long error_code) | |||
886 | * about restoring all the debug state, and ptrace doesn't have to | 567 | * about restoring all the debug state, and ptrace doesn't have to |
887 | * find every occurrence of the TF bit that could be saved away even | 568 | * find every occurrence of the TF bit that could be saved away even |
888 | * by user code) | 569 | * by user code) |
570 | * | ||
571 | * May run on IST stack. | ||
889 | */ | 572 | */ |
890 | void __kprobes do_debug(struct pt_regs *regs, long error_code) | 573 | dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) |
891 | { | 574 | { |
892 | struct task_struct *tsk = current; | 575 | struct task_struct *tsk = current; |
893 | unsigned int condition; | 576 | unsigned long condition; |
894 | int si_code; | 577 | int si_code; |
895 | 578 | ||
896 | trace_hardirqs_fixup(); | ||
897 | |||
898 | get_debugreg(condition, 6); | 579 | get_debugreg(condition, 6); |
899 | 580 | ||
900 | /* | 581 | /* |
@@ -906,9 +587,9 @@ void __kprobes do_debug(struct pt_regs *regs, long error_code) | |||
906 | if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, | 587 | if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, |
907 | SIGTRAP) == NOTIFY_STOP) | 588 | SIGTRAP) == NOTIFY_STOP) |
908 | return; | 589 | return; |
590 | |||
909 | /* It's safe to allow irq's after DR6 has been saved */ | 591 | /* It's safe to allow irq's after DR6 has been saved */ |
910 | if (regs->flags & X86_EFLAGS_IF) | 592 | preempt_conditional_sti(regs); |
911 | local_irq_enable(); | ||
912 | 593 | ||
913 | /* Mask out spurious debug traps due to lazy DR7 setting */ | 594 | /* Mask out spurious debug traps due to lazy DR7 setting */ |
914 | if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { | 595 | if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { |
@@ -916,8 +597,10 @@ void __kprobes do_debug(struct pt_regs *regs, long error_code) | |||
916 | goto clear_dr7; | 597 | goto clear_dr7; |
917 | } | 598 | } |
918 | 599 | ||
600 | #ifdef CONFIG_X86_32 | ||
919 | if (regs->flags & X86_VM_MASK) | 601 | if (regs->flags & X86_VM_MASK) |
920 | goto debug_vm86; | 602 | goto debug_vm86; |
603 | #endif | ||
921 | 604 | ||
922 | /* Save debug status register where ptrace can see it */ | 605 | /* Save debug status register where ptrace can see it */ |
923 | tsk->thread.debugreg6 = condition; | 606 | tsk->thread.debugreg6 = condition; |
@@ -927,16 +610,11 @@ void __kprobes do_debug(struct pt_regs *regs, long error_code) | |||
927 | * kernel space (but re-enable TF when returning to user mode). | 610 | * kernel space (but re-enable TF when returning to user mode). |
928 | */ | 611 | */ |
929 | if (condition & DR_STEP) { | 612 | if (condition & DR_STEP) { |
930 | /* | ||
931 | * We already checked v86 mode above, so we can | ||
932 | * check for kernel mode by just checking the CPL | ||
933 | * of CS. | ||
934 | */ | ||
935 | if (!user_mode(regs)) | 613 | if (!user_mode(regs)) |
936 | goto clear_TF_reenable; | 614 | goto clear_TF_reenable; |
937 | } | 615 | } |
938 | 616 | ||
939 | si_code = get_si_code((unsigned long)condition); | 617 | si_code = get_si_code(condition); |
940 | /* Ok, finally something we can handle */ | 618 | /* Ok, finally something we can handle */ |
941 | send_sigtrap(tsk, regs, error_code, si_code); | 619 | send_sigtrap(tsk, regs, error_code, si_code); |
942 | 620 | ||
@@ -946,18 +624,37 @@ void __kprobes do_debug(struct pt_regs *regs, long error_code) | |||
946 | */ | 624 | */ |
947 | clear_dr7: | 625 | clear_dr7: |
948 | set_debugreg(0, 7); | 626 | set_debugreg(0, 7); |
627 | preempt_conditional_cli(regs); | ||
949 | return; | 628 | return; |
950 | 629 | ||
630 | #ifdef CONFIG_X86_32 | ||
951 | debug_vm86: | 631 | debug_vm86: |
952 | handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); | 632 | handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); |
633 | preempt_conditional_cli(regs); | ||
953 | return; | 634 | return; |
635 | #endif | ||
954 | 636 | ||
955 | clear_TF_reenable: | 637 | clear_TF_reenable: |
956 | set_tsk_thread_flag(tsk, TIF_SINGLESTEP); | 638 | set_tsk_thread_flag(tsk, TIF_SINGLESTEP); |
957 | regs->flags &= ~X86_EFLAGS_TF; | 639 | regs->flags &= ~X86_EFLAGS_TF; |
640 | preempt_conditional_cli(regs); | ||
958 | return; | 641 | return; |
959 | } | 642 | } |
960 | 643 | ||
644 | #ifdef CONFIG_X86_64 | ||
645 | static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr) | ||
646 | { | ||
647 | if (fixup_exception(regs)) | ||
648 | return 1; | ||
649 | |||
650 | notify_die(DIE_GPF, str, regs, 0, trapnr, SIGFPE); | ||
651 | /* Illegal floating point operation in the kernel */ | ||
652 | current->thread.trap_no = trapnr; | ||
653 | die(str, regs, 0); | ||
654 | return 0; | ||
655 | } | ||
656 | #endif | ||
657 | |||
961 | /* | 658 | /* |
962 | * Note that we play around with the 'TS' bit in an attempt to get | 659 | * Note that we play around with the 'TS' bit in an attempt to get |
963 | * the correct behaviour even in the presence of the asynchronous | 660 | * the correct behaviour even in the presence of the asynchronous |
@@ -994,7 +691,9 @@ void math_error(void __user *ip) | |||
994 | swd = get_fpu_swd(task); | 691 | swd = get_fpu_swd(task); |
995 | switch (swd & ~cwd & 0x3f) { | 692 | switch (swd & ~cwd & 0x3f) { |
996 | case 0x000: /* No unmasked exception */ | 693 | case 0x000: /* No unmasked exception */ |
694 | #ifdef CONFIG_X86_32 | ||
997 | return; | 695 | return; |
696 | #endif | ||
998 | default: /* Multiple exceptions */ | 697 | default: /* Multiple exceptions */ |
999 | break; | 698 | break; |
1000 | case 0x001: /* Invalid Op */ | 699 | case 0x001: /* Invalid Op */ |
@@ -1022,9 +721,18 @@ void math_error(void __user *ip) | |||
1022 | force_sig_info(SIGFPE, &info, task); | 721 | force_sig_info(SIGFPE, &info, task); |
1023 | } | 722 | } |
1024 | 723 | ||
1025 | void do_coprocessor_error(struct pt_regs *regs, long error_code) | 724 | dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) |
1026 | { | 725 | { |
726 | conditional_sti(regs); | ||
727 | |||
728 | #ifdef CONFIG_X86_32 | ||
1027 | ignore_fpu_irq = 1; | 729 | ignore_fpu_irq = 1; |
730 | #else | ||
731 | if (!user_mode(regs) && | ||
732 | kernel_math_error(regs, "kernel x87 math error", 16)) | ||
733 | return; | ||
734 | #endif | ||
735 | |||
1028 | math_error((void __user *)regs->ip); | 736 | math_error((void __user *)regs->ip); |
1029 | } | 737 | } |
1030 | 738 | ||
@@ -1076,8 +784,12 @@ static void simd_math_error(void __user *ip) | |||
1076 | force_sig_info(SIGFPE, &info, task); | 784 | force_sig_info(SIGFPE, &info, task); |
1077 | } | 785 | } |
1078 | 786 | ||
1079 | void do_simd_coprocessor_error(struct pt_regs *regs, long error_code) | 787 | dotraplinkage void |
788 | do_simd_coprocessor_error(struct pt_regs *regs, long error_code) | ||
1080 | { | 789 | { |
790 | conditional_sti(regs); | ||
791 | |||
792 | #ifdef CONFIG_X86_32 | ||
1081 | if (cpu_has_xmm) { | 793 | if (cpu_has_xmm) { |
1082 | /* Handle SIMD FPU exceptions on PIII+ processors. */ | 794 | /* Handle SIMD FPU exceptions on PIII+ processors. */ |
1083 | ignore_fpu_irq = 1; | 795 | ignore_fpu_irq = 1; |
@@ -1096,16 +808,25 @@ void do_simd_coprocessor_error(struct pt_regs *regs, long error_code) | |||
1096 | current->thread.error_code = error_code; | 808 | current->thread.error_code = error_code; |
1097 | die_if_kernel("cache flush denied", regs, error_code); | 809 | die_if_kernel("cache flush denied", regs, error_code); |
1098 | force_sig(SIGSEGV, current); | 810 | force_sig(SIGSEGV, current); |
811 | #else | ||
812 | if (!user_mode(regs) && | ||
813 | kernel_math_error(regs, "kernel simd math error", 19)) | ||
814 | return; | ||
815 | simd_math_error((void __user *)regs->ip); | ||
816 | #endif | ||
1099 | } | 817 | } |
1100 | 818 | ||
1101 | void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) | 819 | dotraplinkage void |
820 | do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) | ||
1102 | { | 821 | { |
822 | conditional_sti(regs); | ||
1103 | #if 0 | 823 | #if 0 |
1104 | /* No need to warn about this any longer. */ | 824 | /* No need to warn about this any longer. */ |
1105 | printk(KERN_INFO "Ignoring P6 Local APIC Spurious Interrupt Bug...\n"); | 825 | printk(KERN_INFO "Ignoring P6 Local APIC Spurious Interrupt Bug...\n"); |
1106 | #endif | 826 | #endif |
1107 | } | 827 | } |
1108 | 828 | ||
829 | #ifdef CONFIG_X86_32 | ||
1109 | unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp) | 830 | unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp) |
1110 | { | 831 | { |
1111 | struct desc_struct *gdt = get_cpu_gdt_table(smp_processor_id()); | 832 | struct desc_struct *gdt = get_cpu_gdt_table(smp_processor_id()); |
@@ -1124,6 +845,15 @@ unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp) | |||
1124 | 845 | ||
1125 | return new_kesp; | 846 | return new_kesp; |
1126 | } | 847 | } |
848 | #else | ||
849 | asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void) | ||
850 | { | ||
851 | } | ||
852 | |||
853 | asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void) | ||
854 | { | ||
855 | } | ||
856 | #endif | ||
1127 | 857 | ||
1128 | /* | 858 | /* |
1129 | * 'math_state_restore()' saves the current math information in the | 859 | * 'math_state_restore()' saves the current math information in the |
@@ -1156,14 +886,24 @@ asmlinkage void math_state_restore(void) | |||
1156 | } | 886 | } |
1157 | 887 | ||
1158 | clts(); /* Allow maths ops (or we recurse) */ | 888 | clts(); /* Allow maths ops (or we recurse) */ |
889 | #ifdef CONFIG_X86_32 | ||
1159 | restore_fpu(tsk); | 890 | restore_fpu(tsk); |
891 | #else | ||
892 | /* | ||
893 | * Paranoid restore. send a SIGSEGV if we fail to restore the state. | ||
894 | */ | ||
895 | if (unlikely(restore_fpu_checking(tsk))) { | ||
896 | stts(); | ||
897 | force_sig(SIGSEGV, tsk); | ||
898 | return; | ||
899 | } | ||
900 | #endif | ||
1160 | thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ | 901 | thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ |
1161 | tsk->fpu_counter++; | 902 | tsk->fpu_counter++; |
1162 | } | 903 | } |
1163 | EXPORT_SYMBOL_GPL(math_state_restore); | 904 | EXPORT_SYMBOL_GPL(math_state_restore); |
1164 | 905 | ||
1165 | #ifndef CONFIG_MATH_EMULATION | 906 | #ifndef CONFIG_MATH_EMULATION |
1166 | |||
1167 | asmlinkage void math_emulate(long arg) | 907 | asmlinkage void math_emulate(long arg) |
1168 | { | 908 | { |
1169 | printk(KERN_EMERG | 909 | printk(KERN_EMERG |
@@ -1172,12 +912,54 @@ asmlinkage void math_emulate(long arg) | |||
1172 | force_sig(SIGFPE, current); | 912 | force_sig(SIGFPE, current); |
1173 | schedule(); | 913 | schedule(); |
1174 | } | 914 | } |
1175 | |||
1176 | #endif /* CONFIG_MATH_EMULATION */ | 915 | #endif /* CONFIG_MATH_EMULATION */ |
1177 | 916 | ||
917 | dotraplinkage void __kprobes | ||
918 | do_device_not_available(struct pt_regs *regs, long error) | ||
919 | { | ||
920 | #ifdef CONFIG_X86_32 | ||
921 | if (read_cr0() & X86_CR0_EM) { | ||
922 | conditional_sti(regs); | ||
923 | math_emulate(0); | ||
924 | } else { | ||
925 | math_state_restore(); /* interrupts still off */ | ||
926 | conditional_sti(regs); | ||
927 | } | ||
928 | #else | ||
929 | math_state_restore(); | ||
930 | #endif | ||
931 | } | ||
932 | |||
933 | #ifdef CONFIG_X86_32 | ||
934 | #ifdef CONFIG_X86_MCE | ||
935 | dotraplinkage void __kprobes do_machine_check(struct pt_regs *regs, long error) | ||
936 | { | ||
937 | conditional_sti(regs); | ||
938 | machine_check_vector(regs, error); | ||
939 | } | ||
940 | #endif | ||
941 | |||
942 | dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) | ||
943 | { | ||
944 | siginfo_t info; | ||
945 | local_irq_enable(); | ||
946 | |||
947 | info.si_signo = SIGILL; | ||
948 | info.si_errno = 0; | ||
949 | info.si_code = ILL_BADSTK; | ||
950 | info.si_addr = 0; | ||
951 | if (notify_die(DIE_TRAP, "iret exception", | ||
952 | regs, error_code, 32, SIGILL) == NOTIFY_STOP) | ||
953 | return; | ||
954 | do_trap(32, SIGILL, "iret exception", regs, error_code, &info); | ||
955 | } | ||
956 | #endif | ||
957 | |||
1178 | void __init trap_init(void) | 958 | void __init trap_init(void) |
1179 | { | 959 | { |
960 | #ifdef CONFIG_X86_32 | ||
1180 | int i; | 961 | int i; |
962 | #endif | ||
1181 | 963 | ||
1182 | #ifdef CONFIG_EISA | 964 | #ifdef CONFIG_EISA |
1183 | void __iomem *p = early_ioremap(0x0FFFD9, 4); | 965 | void __iomem *p = early_ioremap(0x0FFFD9, 4); |
@@ -1187,29 +969,40 @@ void __init trap_init(void) | |||
1187 | early_iounmap(p, 4); | 969 | early_iounmap(p, 4); |
1188 | #endif | 970 | #endif |
1189 | 971 | ||
1190 | set_trap_gate(0, ÷_error); | 972 | set_intr_gate(0, ÷_error); |
1191 | set_intr_gate(1, &debug); | 973 | set_intr_gate_ist(1, &debug, DEBUG_STACK); |
1192 | set_intr_gate(2, &nmi); | 974 | set_intr_gate_ist(2, &nmi, NMI_STACK); |
1193 | set_system_intr_gate(3, &int3); /* int3 can be called from all */ | 975 | /* int3 can be called from all */ |
1194 | set_system_gate(4, &overflow); /* int4 can be called from all */ | 976 | set_system_intr_gate_ist(3, &int3, DEBUG_STACK); |
1195 | set_trap_gate(5, &bounds); | 977 | /* int4 can be called from all */ |
1196 | set_trap_gate(6, &invalid_op); | 978 | set_system_intr_gate(4, &overflow); |
1197 | set_trap_gate(7, &device_not_available); | 979 | set_intr_gate(5, &bounds); |
980 | set_intr_gate(6, &invalid_op); | ||
981 | set_intr_gate(7, &device_not_available); | ||
982 | #ifdef CONFIG_X86_32 | ||
1198 | set_task_gate(8, GDT_ENTRY_DOUBLEFAULT_TSS); | 983 | set_task_gate(8, GDT_ENTRY_DOUBLEFAULT_TSS); |
1199 | set_trap_gate(9, &coprocessor_segment_overrun); | 984 | #else |
1200 | set_trap_gate(10, &invalid_TSS); | 985 | set_intr_gate_ist(8, &double_fault, DOUBLEFAULT_STACK); |
1201 | set_trap_gate(11, &segment_not_present); | 986 | #endif |
1202 | set_trap_gate(12, &stack_segment); | 987 | set_intr_gate(9, &coprocessor_segment_overrun); |
1203 | set_trap_gate(13, &general_protection); | 988 | set_intr_gate(10, &invalid_TSS); |
989 | set_intr_gate(11, &segment_not_present); | ||
990 | set_intr_gate_ist(12, &stack_segment, STACKFAULT_STACK); | ||
991 | set_intr_gate(13, &general_protection); | ||
1204 | set_intr_gate(14, &page_fault); | 992 | set_intr_gate(14, &page_fault); |
1205 | set_trap_gate(15, &spurious_interrupt_bug); | 993 | set_intr_gate(15, &spurious_interrupt_bug); |
1206 | set_trap_gate(16, &coprocessor_error); | 994 | set_intr_gate(16, &coprocessor_error); |
1207 | set_trap_gate(17, &alignment_check); | 995 | set_intr_gate(17, &alignment_check); |
1208 | #ifdef CONFIG_X86_MCE | 996 | #ifdef CONFIG_X86_MCE |
1209 | set_trap_gate(18, &machine_check); | 997 | set_intr_gate_ist(18, &machine_check, MCE_STACK); |
1210 | #endif | 998 | #endif |
1211 | set_trap_gate(19, &simd_coprocessor_error); | 999 | set_intr_gate(19, &simd_coprocessor_error); |
1212 | 1000 | ||
1001 | #ifdef CONFIG_IA32_EMULATION | ||
1002 | set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall); | ||
1003 | #endif | ||
1004 | |||
1005 | #ifdef CONFIG_X86_32 | ||
1213 | if (cpu_has_fxsr) { | 1006 | if (cpu_has_fxsr) { |
1214 | printk(KERN_INFO "Enabling fast FPU save and restore... "); | 1007 | printk(KERN_INFO "Enabling fast FPU save and restore... "); |
1215 | set_in_cr4(X86_CR4_OSFXSR); | 1008 | set_in_cr4(X86_CR4_OSFXSR); |
@@ -1222,36 +1015,20 @@ void __init trap_init(void) | |||
1222 | printk("done.\n"); | 1015 | printk("done.\n"); |
1223 | } | 1016 | } |
1224 | 1017 | ||
1225 | set_system_gate(SYSCALL_VECTOR, &system_call); | 1018 | set_system_trap_gate(SYSCALL_VECTOR, &system_call); |
1226 | 1019 | ||
1227 | /* Reserve all the builtin and the syscall vector: */ | 1020 | /* Reserve all the builtin and the syscall vector: */ |
1228 | for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) | 1021 | for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) |
1229 | set_bit(i, used_vectors); | 1022 | set_bit(i, used_vectors); |
1230 | 1023 | ||
1231 | set_bit(SYSCALL_VECTOR, used_vectors); | 1024 | set_bit(SYSCALL_VECTOR, used_vectors); |
1232 | 1025 | #endif | |
1233 | /* | 1026 | /* |
1234 | * Should be a barrier for any external CPU state: | 1027 | * Should be a barrier for any external CPU state: |
1235 | */ | 1028 | */ |
1236 | cpu_init(); | 1029 | cpu_init(); |
1237 | 1030 | ||
1031 | #ifdef CONFIG_X86_32 | ||
1238 | trap_init_hook(); | 1032 | trap_init_hook(); |
1033 | #endif | ||
1239 | } | 1034 | } |
1240 | |||
1241 | static int __init kstack_setup(char *s) | ||
1242 | { | ||
1243 | kstack_depth_to_print = simple_strtoul(s, NULL, 0); | ||
1244 | |||
1245 | return 1; | ||
1246 | } | ||
1247 | __setup("kstack=", kstack_setup); | ||
1248 | |||
1249 | static int __init code_bytes_setup(char *s) | ||
1250 | { | ||
1251 | code_bytes = simple_strtoul(s, NULL, 0); | ||
1252 | if (code_bytes > 8192) | ||
1253 | code_bytes = 8192; | ||
1254 | |||
1255 | return 1; | ||
1256 | } | ||
1257 | __setup("code_bytes=", code_bytes_setup); | ||
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c deleted file mode 100644 index 9c0ac0cab013..000000000000 --- a/arch/x86/kernel/traps_64.c +++ /dev/null | |||
@@ -1,1214 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
3 | * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs | ||
4 | * | ||
5 | * Pentium III FXSR, SSE support | ||
6 | * Gareth Hughes <gareth@valinux.com>, May 2000 | ||
7 | */ | ||
8 | |||
9 | /* | ||
10 | * 'Traps.c' handles hardware traps and faults after we have saved some | ||
11 | * state in 'entry.S'. | ||
12 | */ | ||
13 | #include <linux/moduleparam.h> | ||
14 | #include <linux/interrupt.h> | ||
15 | #include <linux/kallsyms.h> | ||
16 | #include <linux/spinlock.h> | ||
17 | #include <linux/kprobes.h> | ||
18 | #include <linux/uaccess.h> | ||
19 | #include <linux/utsname.h> | ||
20 | #include <linux/kdebug.h> | ||
21 | #include <linux/kernel.h> | ||
22 | #include <linux/module.h> | ||
23 | #include <linux/ptrace.h> | ||
24 | #include <linux/string.h> | ||
25 | #include <linux/unwind.h> | ||
26 | #include <linux/delay.h> | ||
27 | #include <linux/errno.h> | ||
28 | #include <linux/kexec.h> | ||
29 | #include <linux/sched.h> | ||
30 | #include <linux/timer.h> | ||
31 | #include <linux/init.h> | ||
32 | #include <linux/bug.h> | ||
33 | #include <linux/nmi.h> | ||
34 | #include <linux/mm.h> | ||
35 | #include <linux/smp.h> | ||
36 | #include <linux/io.h> | ||
37 | |||
38 | #if defined(CONFIG_EDAC) | ||
39 | #include <linux/edac.h> | ||
40 | #endif | ||
41 | |||
42 | #include <asm/stacktrace.h> | ||
43 | #include <asm/processor.h> | ||
44 | #include <asm/debugreg.h> | ||
45 | #include <asm/atomic.h> | ||
46 | #include <asm/system.h> | ||
47 | #include <asm/unwind.h> | ||
48 | #include <asm/desc.h> | ||
49 | #include <asm/i387.h> | ||
50 | #include <asm/pgalloc.h> | ||
51 | #include <asm/proto.h> | ||
52 | #include <asm/pda.h> | ||
53 | #include <asm/traps.h> | ||
54 | |||
55 | #include <mach_traps.h> | ||
56 | |||
57 | int panic_on_unrecovered_nmi; | ||
58 | int kstack_depth_to_print = 12; | ||
59 | static unsigned int code_bytes = 64; | ||
60 | static int ignore_nmis; | ||
61 | static int die_counter; | ||
62 | |||
63 | static inline void conditional_sti(struct pt_regs *regs) | ||
64 | { | ||
65 | if (regs->flags & X86_EFLAGS_IF) | ||
66 | local_irq_enable(); | ||
67 | } | ||
68 | |||
69 | static inline void preempt_conditional_sti(struct pt_regs *regs) | ||
70 | { | ||
71 | inc_preempt_count(); | ||
72 | if (regs->flags & X86_EFLAGS_IF) | ||
73 | local_irq_enable(); | ||
74 | } | ||
75 | |||
76 | static inline void preempt_conditional_cli(struct pt_regs *regs) | ||
77 | { | ||
78 | if (regs->flags & X86_EFLAGS_IF) | ||
79 | local_irq_disable(); | ||
80 | /* Make sure to not schedule here because we could be running | ||
81 | on an exception stack. */ | ||
82 | dec_preempt_count(); | ||
83 | } | ||
84 | |||
85 | void printk_address(unsigned long address, int reliable) | ||
86 | { | ||
87 | printk(" [<%016lx>] %s%pS\n", | ||
88 | address, reliable ? "" : "? ", (void *) address); | ||
89 | } | ||
90 | |||
91 | static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | ||
92 | unsigned *usedp, char **idp) | ||
93 | { | ||
94 | static char ids[][8] = { | ||
95 | [DEBUG_STACK - 1] = "#DB", | ||
96 | [NMI_STACK - 1] = "NMI", | ||
97 | [DOUBLEFAULT_STACK - 1] = "#DF", | ||
98 | [STACKFAULT_STACK - 1] = "#SS", | ||
99 | [MCE_STACK - 1] = "#MC", | ||
100 | #if DEBUG_STKSZ > EXCEPTION_STKSZ | ||
101 | [N_EXCEPTION_STACKS ... | ||
102 | N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]" | ||
103 | #endif | ||
104 | }; | ||
105 | unsigned k; | ||
106 | |||
107 | /* | ||
108 | * Iterate over all exception stacks, and figure out whether | ||
109 | * 'stack' is in one of them: | ||
110 | */ | ||
111 | for (k = 0; k < N_EXCEPTION_STACKS; k++) { | ||
112 | unsigned long end = per_cpu(orig_ist, cpu).ist[k]; | ||
113 | /* | ||
114 | * Is 'stack' above this exception frame's end? | ||
115 | * If yes then skip to the next frame. | ||
116 | */ | ||
117 | if (stack >= end) | ||
118 | continue; | ||
119 | /* | ||
120 | * Is 'stack' above this exception frame's start address? | ||
121 | * If yes then we found the right frame. | ||
122 | */ | ||
123 | if (stack >= end - EXCEPTION_STKSZ) { | ||
124 | /* | ||
125 | * Make sure we only iterate through an exception | ||
126 | * stack once. If it comes up for the second time | ||
127 | * then there's something wrong going on - just | ||
128 | * break out and return NULL: | ||
129 | */ | ||
130 | if (*usedp & (1U << k)) | ||
131 | break; | ||
132 | *usedp |= 1U << k; | ||
133 | *idp = ids[k]; | ||
134 | return (unsigned long *)end; | ||
135 | } | ||
136 | /* | ||
137 | * If this is a debug stack, and if it has a larger size than | ||
138 | * the usual exception stacks, then 'stack' might still | ||
139 | * be within the lower portion of the debug stack: | ||
140 | */ | ||
141 | #if DEBUG_STKSZ > EXCEPTION_STKSZ | ||
142 | if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) { | ||
143 | unsigned j = N_EXCEPTION_STACKS - 1; | ||
144 | |||
145 | /* | ||
146 | * Black magic. A large debug stack is composed of | ||
147 | * multiple exception stack entries, which we | ||
148 | * iterate through now. Dont look: | ||
149 | */ | ||
150 | do { | ||
151 | ++j; | ||
152 | end -= EXCEPTION_STKSZ; | ||
153 | ids[j][4] = '1' + (j - N_EXCEPTION_STACKS); | ||
154 | } while (stack < end - EXCEPTION_STKSZ); | ||
155 | if (*usedp & (1U << j)) | ||
156 | break; | ||
157 | *usedp |= 1U << j; | ||
158 | *idp = ids[j]; | ||
159 | return (unsigned long *)end; | ||
160 | } | ||
161 | #endif | ||
162 | } | ||
163 | return NULL; | ||
164 | } | ||
165 | |||
166 | /* | ||
167 | * x86-64 can have up to three kernel stacks: | ||
168 | * process stack | ||
169 | * interrupt stack | ||
170 | * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack | ||
171 | */ | ||
172 | |||
173 | static inline int valid_stack_ptr(struct thread_info *tinfo, | ||
174 | void *p, unsigned int size, void *end) | ||
175 | { | ||
176 | void *t = tinfo; | ||
177 | if (end) { | ||
178 | if (p < end && p >= (end-THREAD_SIZE)) | ||
179 | return 1; | ||
180 | else | ||
181 | return 0; | ||
182 | } | ||
183 | return p > t && p < t + THREAD_SIZE - size; | ||
184 | } | ||
185 | |||
186 | /* The form of the top of the frame on the stack */ | ||
187 | struct stack_frame { | ||
188 | struct stack_frame *next_frame; | ||
189 | unsigned long return_address; | ||
190 | }; | ||
191 | |||
192 | static inline unsigned long | ||
193 | print_context_stack(struct thread_info *tinfo, | ||
194 | unsigned long *stack, unsigned long bp, | ||
195 | const struct stacktrace_ops *ops, void *data, | ||
196 | unsigned long *end) | ||
197 | { | ||
198 | struct stack_frame *frame = (struct stack_frame *)bp; | ||
199 | |||
200 | while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) { | ||
201 | unsigned long addr; | ||
202 | |||
203 | addr = *stack; | ||
204 | if (__kernel_text_address(addr)) { | ||
205 | if ((unsigned long) stack == bp + 8) { | ||
206 | ops->address(data, addr, 1); | ||
207 | frame = frame->next_frame; | ||
208 | bp = (unsigned long) frame; | ||
209 | } else { | ||
210 | ops->address(data, addr, bp == 0); | ||
211 | } | ||
212 | } | ||
213 | stack++; | ||
214 | } | ||
215 | return bp; | ||
216 | } | ||
217 | |||
218 | void dump_trace(struct task_struct *task, struct pt_regs *regs, | ||
219 | unsigned long *stack, unsigned long bp, | ||
220 | const struct stacktrace_ops *ops, void *data) | ||
221 | { | ||
222 | const unsigned cpu = get_cpu(); | ||
223 | unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; | ||
224 | unsigned used = 0; | ||
225 | struct thread_info *tinfo; | ||
226 | |||
227 | if (!task) | ||
228 | task = current; | ||
229 | |||
230 | if (!stack) { | ||
231 | unsigned long dummy; | ||
232 | stack = &dummy; | ||
233 | if (task && task != current) | ||
234 | stack = (unsigned long *)task->thread.sp; | ||
235 | } | ||
236 | |||
237 | #ifdef CONFIG_FRAME_POINTER | ||
238 | if (!bp) { | ||
239 | if (task == current) { | ||
240 | /* Grab bp right from our regs */ | ||
241 | asm("movq %%rbp, %0" : "=r" (bp) : ); | ||
242 | } else { | ||
243 | /* bp is the last reg pushed by switch_to */ | ||
244 | bp = *(unsigned long *) task->thread.sp; | ||
245 | } | ||
246 | } | ||
247 | #endif | ||
248 | |||
249 | /* | ||
250 | * Print function call entries in all stacks, starting at the | ||
251 | * current stack address. If the stacks consist of nested | ||
252 | * exceptions | ||
253 | */ | ||
254 | tinfo = task_thread_info(task); | ||
255 | for (;;) { | ||
256 | char *id; | ||
257 | unsigned long *estack_end; | ||
258 | estack_end = in_exception_stack(cpu, (unsigned long)stack, | ||
259 | &used, &id); | ||
260 | |||
261 | if (estack_end) { | ||
262 | if (ops->stack(data, id) < 0) | ||
263 | break; | ||
264 | |||
265 | bp = print_context_stack(tinfo, stack, bp, ops, | ||
266 | data, estack_end); | ||
267 | ops->stack(data, "<EOE>"); | ||
268 | /* | ||
269 | * We link to the next stack via the | ||
270 | * second-to-last pointer (index -2 to end) in the | ||
271 | * exception stack: | ||
272 | */ | ||
273 | stack = (unsigned long *) estack_end[-2]; | ||
274 | continue; | ||
275 | } | ||
276 | if (irqstack_end) { | ||
277 | unsigned long *irqstack; | ||
278 | irqstack = irqstack_end - | ||
279 | (IRQSTACKSIZE - 64) / sizeof(*irqstack); | ||
280 | |||
281 | if (stack >= irqstack && stack < irqstack_end) { | ||
282 | if (ops->stack(data, "IRQ") < 0) | ||
283 | break; | ||
284 | bp = print_context_stack(tinfo, stack, bp, | ||
285 | ops, data, irqstack_end); | ||
286 | /* | ||
287 | * We link to the next stack (which would be | ||
288 | * the process stack normally) the last | ||
289 | * pointer (index -1 to end) in the IRQ stack: | ||
290 | */ | ||
291 | stack = (unsigned long *) (irqstack_end[-1]); | ||
292 | irqstack_end = NULL; | ||
293 | ops->stack(data, "EOI"); | ||
294 | continue; | ||
295 | } | ||
296 | } | ||
297 | break; | ||
298 | } | ||
299 | |||
300 | /* | ||
301 | * This handles the process stack: | ||
302 | */ | ||
303 | bp = print_context_stack(tinfo, stack, bp, ops, data, NULL); | ||
304 | put_cpu(); | ||
305 | } | ||
306 | EXPORT_SYMBOL(dump_trace); | ||
307 | |||
308 | static void | ||
309 | print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) | ||
310 | { | ||
311 | print_symbol(msg, symbol); | ||
312 | printk("\n"); | ||
313 | } | ||
314 | |||
315 | static void print_trace_warning(void *data, char *msg) | ||
316 | { | ||
317 | printk("%s\n", msg); | ||
318 | } | ||
319 | |||
320 | static int print_trace_stack(void *data, char *name) | ||
321 | { | ||
322 | printk(" <%s> ", name); | ||
323 | return 0; | ||
324 | } | ||
325 | |||
326 | static void print_trace_address(void *data, unsigned long addr, int reliable) | ||
327 | { | ||
328 | touch_nmi_watchdog(); | ||
329 | printk_address(addr, reliable); | ||
330 | } | ||
331 | |||
332 | static const struct stacktrace_ops print_trace_ops = { | ||
333 | .warning = print_trace_warning, | ||
334 | .warning_symbol = print_trace_warning_symbol, | ||
335 | .stack = print_trace_stack, | ||
336 | .address = print_trace_address, | ||
337 | }; | ||
338 | |||
339 | static void | ||
340 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
341 | unsigned long *stack, unsigned long bp, char *log_lvl) | ||
342 | { | ||
343 | printk("Call Trace:\n"); | ||
344 | dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); | ||
345 | } | ||
346 | |||
347 | void show_trace(struct task_struct *task, struct pt_regs *regs, | ||
348 | unsigned long *stack, unsigned long bp) | ||
349 | { | ||
350 | show_trace_log_lvl(task, regs, stack, bp, ""); | ||
351 | } | ||
352 | |||
353 | static void | ||
354 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
355 | unsigned long *sp, unsigned long bp, char *log_lvl) | ||
356 | { | ||
357 | unsigned long *stack; | ||
358 | int i; | ||
359 | const int cpu = smp_processor_id(); | ||
360 | unsigned long *irqstack_end = | ||
361 | (unsigned long *) (cpu_pda(cpu)->irqstackptr); | ||
362 | unsigned long *irqstack = | ||
363 | (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE); | ||
364 | |||
365 | /* | ||
366 | * debugging aid: "show_stack(NULL, NULL);" prints the | ||
367 | * back trace for this cpu. | ||
368 | */ | ||
369 | |||
370 | if (sp == NULL) { | ||
371 | if (task) | ||
372 | sp = (unsigned long *)task->thread.sp; | ||
373 | else | ||
374 | sp = (unsigned long *)&sp; | ||
375 | } | ||
376 | |||
377 | stack = sp; | ||
378 | for (i = 0; i < kstack_depth_to_print; i++) { | ||
379 | if (stack >= irqstack && stack <= irqstack_end) { | ||
380 | if (stack == irqstack_end) { | ||
381 | stack = (unsigned long *) (irqstack_end[-1]); | ||
382 | printk(" <EOI> "); | ||
383 | } | ||
384 | } else { | ||
385 | if (((long) stack & (THREAD_SIZE-1)) == 0) | ||
386 | break; | ||
387 | } | ||
388 | if (i && ((i % 4) == 0)) | ||
389 | printk("\n"); | ||
390 | printk(" %016lx", *stack++); | ||
391 | touch_nmi_watchdog(); | ||
392 | } | ||
393 | printk("\n"); | ||
394 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); | ||
395 | } | ||
396 | |||
397 | void show_stack(struct task_struct *task, unsigned long *sp) | ||
398 | { | ||
399 | show_stack_log_lvl(task, NULL, sp, 0, ""); | ||
400 | } | ||
401 | |||
402 | /* | ||
403 | * The architecture-independent dump_stack generator | ||
404 | */ | ||
405 | void dump_stack(void) | ||
406 | { | ||
407 | unsigned long bp = 0; | ||
408 | unsigned long stack; | ||
409 | |||
410 | #ifdef CONFIG_FRAME_POINTER | ||
411 | if (!bp) | ||
412 | asm("movq %%rbp, %0" : "=r" (bp) : ); | ||
413 | #endif | ||
414 | |||
415 | printk("Pid: %d, comm: %.20s %s %s %.*s\n", | ||
416 | current->pid, current->comm, print_tainted(), | ||
417 | init_utsname()->release, | ||
418 | (int)strcspn(init_utsname()->version, " "), | ||
419 | init_utsname()->version); | ||
420 | show_trace(NULL, NULL, &stack, bp); | ||
421 | } | ||
422 | EXPORT_SYMBOL(dump_stack); | ||
423 | |||
424 | void show_registers(struct pt_regs *regs) | ||
425 | { | ||
426 | int i; | ||
427 | unsigned long sp; | ||
428 | const int cpu = smp_processor_id(); | ||
429 | struct task_struct *cur = cpu_pda(cpu)->pcurrent; | ||
430 | |||
431 | sp = regs->sp; | ||
432 | printk("CPU %d ", cpu); | ||
433 | __show_regs(regs); | ||
434 | printk("Process %s (pid: %d, threadinfo %p, task %p)\n", | ||
435 | cur->comm, cur->pid, task_thread_info(cur), cur); | ||
436 | |||
437 | /* | ||
438 | * When in-kernel, we also print out the stack and code at the | ||
439 | * time of the fault.. | ||
440 | */ | ||
441 | if (!user_mode(regs)) { | ||
442 | unsigned int code_prologue = code_bytes * 43 / 64; | ||
443 | unsigned int code_len = code_bytes; | ||
444 | unsigned char c; | ||
445 | u8 *ip; | ||
446 | |||
447 | printk("Stack: "); | ||
448 | show_stack_log_lvl(NULL, regs, (unsigned long *)sp, | ||
449 | regs->bp, ""); | ||
450 | |||
451 | printk(KERN_EMERG "Code: "); | ||
452 | |||
453 | ip = (u8 *)regs->ip - code_prologue; | ||
454 | if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { | ||
455 | /* try starting at RIP */ | ||
456 | ip = (u8 *)regs->ip; | ||
457 | code_len = code_len - code_prologue + 1; | ||
458 | } | ||
459 | for (i = 0; i < code_len; i++, ip++) { | ||
460 | if (ip < (u8 *)PAGE_OFFSET || | ||
461 | probe_kernel_address(ip, c)) { | ||
462 | printk(" Bad RIP value."); | ||
463 | break; | ||
464 | } | ||
465 | if (ip == (u8 *)regs->ip) | ||
466 | printk("<%02x> ", c); | ||
467 | else | ||
468 | printk("%02x ", c); | ||
469 | } | ||
470 | } | ||
471 | printk("\n"); | ||
472 | } | ||
473 | |||
474 | int is_valid_bugaddr(unsigned long ip) | ||
475 | { | ||
476 | unsigned short ud2; | ||
477 | |||
478 | if (__copy_from_user(&ud2, (const void __user *) ip, sizeof(ud2))) | ||
479 | return 0; | ||
480 | |||
481 | return ud2 == 0x0b0f; | ||
482 | } | ||
483 | |||
484 | static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; | ||
485 | static int die_owner = -1; | ||
486 | static unsigned int die_nest_count; | ||
487 | |||
488 | unsigned __kprobes long oops_begin(void) | ||
489 | { | ||
490 | int cpu; | ||
491 | unsigned long flags; | ||
492 | |||
493 | oops_enter(); | ||
494 | |||
495 | /* racy, but better than risking deadlock. */ | ||
496 | raw_local_irq_save(flags); | ||
497 | cpu = smp_processor_id(); | ||
498 | if (!__raw_spin_trylock(&die_lock)) { | ||
499 | if (cpu == die_owner) | ||
500 | /* nested oops. should stop eventually */; | ||
501 | else | ||
502 | __raw_spin_lock(&die_lock); | ||
503 | } | ||
504 | die_nest_count++; | ||
505 | die_owner = cpu; | ||
506 | console_verbose(); | ||
507 | bust_spinlocks(1); | ||
508 | return flags; | ||
509 | } | ||
510 | |||
511 | void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) | ||
512 | { | ||
513 | die_owner = -1; | ||
514 | bust_spinlocks(0); | ||
515 | die_nest_count--; | ||
516 | if (!die_nest_count) | ||
517 | /* Nest count reaches zero, release the lock. */ | ||
518 | __raw_spin_unlock(&die_lock); | ||
519 | raw_local_irq_restore(flags); | ||
520 | if (!regs) { | ||
521 | oops_exit(); | ||
522 | return; | ||
523 | } | ||
524 | if (panic_on_oops) | ||
525 | panic("Fatal exception"); | ||
526 | oops_exit(); | ||
527 | do_exit(signr); | ||
528 | } | ||
529 | |||
530 | int __kprobes __die(const char *str, struct pt_regs *regs, long err) | ||
531 | { | ||
532 | printk(KERN_EMERG "%s: %04lx [%u] ", str, err & 0xffff, ++die_counter); | ||
533 | #ifdef CONFIG_PREEMPT | ||
534 | printk("PREEMPT "); | ||
535 | #endif | ||
536 | #ifdef CONFIG_SMP | ||
537 | printk("SMP "); | ||
538 | #endif | ||
539 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
540 | printk("DEBUG_PAGEALLOC"); | ||
541 | #endif | ||
542 | printk("\n"); | ||
543 | if (notify_die(DIE_OOPS, str, regs, err, | ||
544 | current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) | ||
545 | return 1; | ||
546 | |||
547 | show_registers(regs); | ||
548 | add_taint(TAINT_DIE); | ||
549 | /* Executive summary in case the oops scrolled away */ | ||
550 | printk(KERN_ALERT "RIP "); | ||
551 | printk_address(regs->ip, 1); | ||
552 | printk(" RSP <%016lx>\n", regs->sp); | ||
553 | if (kexec_should_crash(current)) | ||
554 | crash_kexec(regs); | ||
555 | return 0; | ||
556 | } | ||
557 | |||
558 | void die(const char *str, struct pt_regs *regs, long err) | ||
559 | { | ||
560 | unsigned long flags = oops_begin(); | ||
561 | |||
562 | if (!user_mode(regs)) | ||
563 | report_bug(regs->ip, regs); | ||
564 | |||
565 | if (__die(str, regs, err)) | ||
566 | regs = NULL; | ||
567 | oops_end(flags, regs, SIGSEGV); | ||
568 | } | ||
569 | |||
570 | notrace __kprobes void | ||
571 | die_nmi(char *str, struct pt_regs *regs, int do_panic) | ||
572 | { | ||
573 | unsigned long flags; | ||
574 | |||
575 | if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) | ||
576 | return; | ||
577 | |||
578 | flags = oops_begin(); | ||
579 | /* | ||
580 | * We are in trouble anyway, lets at least try | ||
581 | * to get a message out. | ||
582 | */ | ||
583 | printk(KERN_EMERG "%s", str); | ||
584 | printk(" on CPU%d, ip %08lx, registers:\n", | ||
585 | smp_processor_id(), regs->ip); | ||
586 | show_registers(regs); | ||
587 | if (kexec_should_crash(current)) | ||
588 | crash_kexec(regs); | ||
589 | if (do_panic || panic_on_oops) | ||
590 | panic("Non maskable interrupt"); | ||
591 | oops_end(flags, NULL, SIGBUS); | ||
592 | nmi_exit(); | ||
593 | local_irq_enable(); | ||
594 | do_exit(SIGBUS); | ||
595 | } | ||
596 | |||
597 | static void __kprobes | ||
598 | do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, | ||
599 | long error_code, siginfo_t *info) | ||
600 | { | ||
601 | struct task_struct *tsk = current; | ||
602 | |||
603 | if (!user_mode(regs)) | ||
604 | goto kernel_trap; | ||
605 | |||
606 | /* | ||
607 | * We want error_code and trap_no set for userspace faults and | ||
608 | * kernelspace faults which result in die(), but not | ||
609 | * kernelspace faults which are fixed up. die() gives the | ||
610 | * process no chance to handle the signal and notice the | ||
611 | * kernel fault information, so that won't result in polluting | ||
612 | * the information about previously queued, but not yet | ||
613 | * delivered, faults. See also do_general_protection below. | ||
614 | */ | ||
615 | tsk->thread.error_code = error_code; | ||
616 | tsk->thread.trap_no = trapnr; | ||
617 | |||
618 | if (show_unhandled_signals && unhandled_signal(tsk, signr) && | ||
619 | printk_ratelimit()) { | ||
620 | printk(KERN_INFO | ||
621 | "%s[%d] trap %s ip:%lx sp:%lx error:%lx", | ||
622 | tsk->comm, tsk->pid, str, | ||
623 | regs->ip, regs->sp, error_code); | ||
624 | print_vma_addr(" in ", regs->ip); | ||
625 | printk("\n"); | ||
626 | } | ||
627 | |||
628 | if (info) | ||
629 | force_sig_info(signr, info, tsk); | ||
630 | else | ||
631 | force_sig(signr, tsk); | ||
632 | return; | ||
633 | |||
634 | kernel_trap: | ||
635 | if (!fixup_exception(regs)) { | ||
636 | tsk->thread.error_code = error_code; | ||
637 | tsk->thread.trap_no = trapnr; | ||
638 | die(str, regs, error_code); | ||
639 | } | ||
640 | return; | ||
641 | } | ||
642 | |||
643 | #define DO_ERROR(trapnr, signr, str, name) \ | ||
644 | asmlinkage void do_##name(struct pt_regs *regs, long error_code) \ | ||
645 | { \ | ||
646 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | ||
647 | == NOTIFY_STOP) \ | ||
648 | return; \ | ||
649 | conditional_sti(regs); \ | ||
650 | do_trap(trapnr, signr, str, regs, error_code, NULL); \ | ||
651 | } | ||
652 | |||
653 | #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ | ||
654 | asmlinkage void do_##name(struct pt_regs *regs, long error_code) \ | ||
655 | { \ | ||
656 | siginfo_t info; \ | ||
657 | info.si_signo = signr; \ | ||
658 | info.si_errno = 0; \ | ||
659 | info.si_code = sicode; \ | ||
660 | info.si_addr = (void __user *)siaddr; \ | ||
661 | trace_hardirqs_fixup(); \ | ||
662 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | ||
663 | == NOTIFY_STOP) \ | ||
664 | return; \ | ||
665 | conditional_sti(regs); \ | ||
666 | do_trap(trapnr, signr, str, regs, error_code, &info); \ | ||
667 | } | ||
668 | |||
669 | DO_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) | ||
670 | DO_ERROR(4, SIGSEGV, "overflow", overflow) | ||
671 | DO_ERROR(5, SIGSEGV, "bounds", bounds) | ||
672 | DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip) | ||
673 | DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) | ||
674 | DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) | ||
675 | DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) | ||
676 | DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) | ||
677 | |||
678 | /* Runs on IST stack */ | ||
679 | asmlinkage void do_stack_segment(struct pt_regs *regs, long error_code) | ||
680 | { | ||
681 | if (notify_die(DIE_TRAP, "stack segment", regs, error_code, | ||
682 | 12, SIGBUS) == NOTIFY_STOP) | ||
683 | return; | ||
684 | preempt_conditional_sti(regs); | ||
685 | do_trap(12, SIGBUS, "stack segment", regs, error_code, NULL); | ||
686 | preempt_conditional_cli(regs); | ||
687 | } | ||
688 | |||
689 | asmlinkage void do_double_fault(struct pt_regs *regs, long error_code) | ||
690 | { | ||
691 | static const char str[] = "double fault"; | ||
692 | struct task_struct *tsk = current; | ||
693 | |||
694 | /* Return not checked because double check cannot be ignored */ | ||
695 | notify_die(DIE_TRAP, str, regs, error_code, 8, SIGSEGV); | ||
696 | |||
697 | tsk->thread.error_code = error_code; | ||
698 | tsk->thread.trap_no = 8; | ||
699 | |||
700 | /* This is always a kernel trap and never fixable (and thus must | ||
701 | never return). */ | ||
702 | for (;;) | ||
703 | die(str, regs, error_code); | ||
704 | } | ||
705 | |||
706 | asmlinkage void __kprobes | ||
707 | do_general_protection(struct pt_regs *regs, long error_code) | ||
708 | { | ||
709 | struct task_struct *tsk; | ||
710 | |||
711 | conditional_sti(regs); | ||
712 | |||
713 | tsk = current; | ||
714 | if (!user_mode(regs)) | ||
715 | goto gp_in_kernel; | ||
716 | |||
717 | tsk->thread.error_code = error_code; | ||
718 | tsk->thread.trap_no = 13; | ||
719 | |||
720 | if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && | ||
721 | printk_ratelimit()) { | ||
722 | printk(KERN_INFO | ||
723 | "%s[%d] general protection ip:%lx sp:%lx error:%lx", | ||
724 | tsk->comm, tsk->pid, | ||
725 | regs->ip, regs->sp, error_code); | ||
726 | print_vma_addr(" in ", regs->ip); | ||
727 | printk("\n"); | ||
728 | } | ||
729 | |||
730 | force_sig(SIGSEGV, tsk); | ||
731 | return; | ||
732 | |||
733 | gp_in_kernel: | ||
734 | if (fixup_exception(regs)) | ||
735 | return; | ||
736 | |||
737 | tsk->thread.error_code = error_code; | ||
738 | tsk->thread.trap_no = 13; | ||
739 | if (notify_die(DIE_GPF, "general protection fault", regs, | ||
740 | error_code, 13, SIGSEGV) == NOTIFY_STOP) | ||
741 | return; | ||
742 | die("general protection fault", regs, error_code); | ||
743 | } | ||
744 | |||
745 | static notrace __kprobes void | ||
746 | mem_parity_error(unsigned char reason, struct pt_regs *regs) | ||
747 | { | ||
748 | printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", | ||
749 | reason); | ||
750 | printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n"); | ||
751 | |||
752 | #if defined(CONFIG_EDAC) | ||
753 | if (edac_handler_set()) { | ||
754 | edac_atomic_assert_error(); | ||
755 | return; | ||
756 | } | ||
757 | #endif | ||
758 | |||
759 | if (panic_on_unrecovered_nmi) | ||
760 | panic("NMI: Not continuing"); | ||
761 | |||
762 | printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); | ||
763 | |||
764 | /* Clear and disable the memory parity error line. */ | ||
765 | reason = (reason & 0xf) | 4; | ||
766 | outb(reason, 0x61); | ||
767 | } | ||
768 | |||
769 | static notrace __kprobes void | ||
770 | io_check_error(unsigned char reason, struct pt_regs *regs) | ||
771 | { | ||
772 | printk("NMI: IOCK error (debug interrupt?)\n"); | ||
773 | show_registers(regs); | ||
774 | |||
775 | /* Re-enable the IOCK line, wait for a few seconds */ | ||
776 | reason = (reason & 0xf) | 8; | ||
777 | outb(reason, 0x61); | ||
778 | mdelay(2000); | ||
779 | reason &= ~8; | ||
780 | outb(reason, 0x61); | ||
781 | } | ||
782 | |||
783 | static notrace __kprobes void | ||
784 | unknown_nmi_error(unsigned char reason, struct pt_regs *regs) | ||
785 | { | ||
786 | if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == | ||
787 | NOTIFY_STOP) | ||
788 | return; | ||
789 | printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", | ||
790 | reason); | ||
791 | printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n"); | ||
792 | |||
793 | if (panic_on_unrecovered_nmi) | ||
794 | panic("NMI: Not continuing"); | ||
795 | |||
796 | printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); | ||
797 | } | ||
798 | |||
799 | /* Runs on IST stack. This code must keep interrupts off all the time. | ||
800 | Nested NMIs are prevented by the CPU. */ | ||
801 | asmlinkage notrace __kprobes void default_do_nmi(struct pt_regs *regs) | ||
802 | { | ||
803 | unsigned char reason = 0; | ||
804 | int cpu; | ||
805 | |||
806 | cpu = smp_processor_id(); | ||
807 | |||
808 | /* Only the BSP gets external NMIs from the system. */ | ||
809 | if (!cpu) | ||
810 | reason = get_nmi_reason(); | ||
811 | |||
812 | if (!(reason & 0xc0)) { | ||
813 | if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) | ||
814 | == NOTIFY_STOP) | ||
815 | return; | ||
816 | /* | ||
817 | * Ok, so this is none of the documented NMI sources, | ||
818 | * so it must be the NMI watchdog. | ||
819 | */ | ||
820 | if (nmi_watchdog_tick(regs, reason)) | ||
821 | return; | ||
822 | if (!do_nmi_callback(regs, cpu)) | ||
823 | unknown_nmi_error(reason, regs); | ||
824 | |||
825 | return; | ||
826 | } | ||
827 | if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) | ||
828 | return; | ||
829 | |||
830 | /* AK: following checks seem to be broken on modern chipsets. FIXME */ | ||
831 | if (reason & 0x80) | ||
832 | mem_parity_error(reason, regs); | ||
833 | if (reason & 0x40) | ||
834 | io_check_error(reason, regs); | ||
835 | } | ||
836 | |||
837 | asmlinkage notrace __kprobes void | ||
838 | do_nmi(struct pt_regs *regs, long error_code) | ||
839 | { | ||
840 | nmi_enter(); | ||
841 | |||
842 | add_pda(__nmi_count, 1); | ||
843 | |||
844 | if (!ignore_nmis) | ||
845 | default_do_nmi(regs); | ||
846 | |||
847 | nmi_exit(); | ||
848 | } | ||
849 | |||
850 | void stop_nmi(void) | ||
851 | { | ||
852 | acpi_nmi_disable(); | ||
853 | ignore_nmis++; | ||
854 | } | ||
855 | |||
856 | void restart_nmi(void) | ||
857 | { | ||
858 | ignore_nmis--; | ||
859 | acpi_nmi_enable(); | ||
860 | } | ||
861 | |||
862 | /* runs on IST stack. */ | ||
863 | asmlinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) | ||
864 | { | ||
865 | trace_hardirqs_fixup(); | ||
866 | |||
867 | if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) | ||
868 | == NOTIFY_STOP) | ||
869 | return; | ||
870 | |||
871 | preempt_conditional_sti(regs); | ||
872 | do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); | ||
873 | preempt_conditional_cli(regs); | ||
874 | } | ||
875 | |||
876 | /* Help handler running on IST stack to switch back to user stack | ||
877 | for scheduling or signal handling. The actual stack switch is done in | ||
878 | entry.S */ | ||
879 | asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) | ||
880 | { | ||
881 | struct pt_regs *regs = eregs; | ||
882 | /* Did already sync */ | ||
883 | if (eregs == (struct pt_regs *)eregs->sp) | ||
884 | ; | ||
885 | /* Exception from user space */ | ||
886 | else if (user_mode(eregs)) | ||
887 | regs = task_pt_regs(current); | ||
888 | /* Exception from kernel and interrupts are enabled. Move to | ||
889 | kernel process stack. */ | ||
890 | else if (eregs->flags & X86_EFLAGS_IF) | ||
891 | regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs)); | ||
892 | if (eregs != regs) | ||
893 | *regs = *eregs; | ||
894 | return regs; | ||
895 | } | ||
896 | |||
897 | /* runs on IST stack. */ | ||
898 | asmlinkage void __kprobes do_debug(struct pt_regs *regs, | ||
899 | unsigned long error_code) | ||
900 | { | ||
901 | struct task_struct *tsk = current; | ||
902 | unsigned long condition; | ||
903 | siginfo_t info; | ||
904 | |||
905 | trace_hardirqs_fixup(); | ||
906 | |||
907 | get_debugreg(condition, 6); | ||
908 | |||
909 | /* | ||
910 | * The processor cleared BTF, so don't mark that we need it set. | ||
911 | */ | ||
912 | clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); | ||
913 | tsk->thread.debugctlmsr = 0; | ||
914 | |||
915 | if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, | ||
916 | SIGTRAP) == NOTIFY_STOP) | ||
917 | return; | ||
918 | |||
919 | preempt_conditional_sti(regs); | ||
920 | |||
921 | /* Mask out spurious debug traps due to lazy DR7 setting */ | ||
922 | if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { | ||
923 | if (!tsk->thread.debugreg7) | ||
924 | goto clear_dr7; | ||
925 | } | ||
926 | |||
927 | tsk->thread.debugreg6 = condition; | ||
928 | |||
929 | /* | ||
930 | * Single-stepping through TF: make sure we ignore any events in | ||
931 | * kernel space (but re-enable TF when returning to user mode). | ||
932 | */ | ||
933 | if (condition & DR_STEP) { | ||
934 | if (!user_mode(regs)) | ||
935 | goto clear_TF_reenable; | ||
936 | } | ||
937 | |||
938 | /* Ok, finally something we can handle */ | ||
939 | tsk->thread.trap_no = 1; | ||
940 | tsk->thread.error_code = error_code; | ||
941 | info.si_signo = SIGTRAP; | ||
942 | info.si_errno = 0; | ||
943 | info.si_code = get_si_code(condition); | ||
944 | info.si_addr = user_mode(regs) ? (void __user *)regs->ip : NULL; | ||
945 | force_sig_info(SIGTRAP, &info, tsk); | ||
946 | |||
947 | clear_dr7: | ||
948 | set_debugreg(0, 7); | ||
949 | preempt_conditional_cli(regs); | ||
950 | return; | ||
951 | |||
952 | clear_TF_reenable: | ||
953 | set_tsk_thread_flag(tsk, TIF_SINGLESTEP); | ||
954 | regs->flags &= ~X86_EFLAGS_TF; | ||
955 | preempt_conditional_cli(regs); | ||
956 | return; | ||
957 | } | ||
958 | |||
959 | static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr) | ||
960 | { | ||
961 | if (fixup_exception(regs)) | ||
962 | return 1; | ||
963 | |||
964 | notify_die(DIE_GPF, str, regs, 0, trapnr, SIGFPE); | ||
965 | /* Illegal floating point operation in the kernel */ | ||
966 | current->thread.trap_no = trapnr; | ||
967 | die(str, regs, 0); | ||
968 | return 0; | ||
969 | } | ||
970 | |||
971 | /* | ||
972 | * Note that we play around with the 'TS' bit in an attempt to get | ||
973 | * the correct behaviour even in the presence of the asynchronous | ||
974 | * IRQ13 behaviour | ||
975 | */ | ||
976 | asmlinkage void do_coprocessor_error(struct pt_regs *regs) | ||
977 | { | ||
978 | void __user *ip = (void __user *)(regs->ip); | ||
979 | struct task_struct *task; | ||
980 | siginfo_t info; | ||
981 | unsigned short cwd, swd; | ||
982 | |||
983 | conditional_sti(regs); | ||
984 | if (!user_mode(regs) && | ||
985 | kernel_math_error(regs, "kernel x87 math error", 16)) | ||
986 | return; | ||
987 | |||
988 | /* | ||
989 | * Save the info for the exception handler and clear the error. | ||
990 | */ | ||
991 | task = current; | ||
992 | save_init_fpu(task); | ||
993 | task->thread.trap_no = 16; | ||
994 | task->thread.error_code = 0; | ||
995 | info.si_signo = SIGFPE; | ||
996 | info.si_errno = 0; | ||
997 | info.si_code = __SI_FAULT; | ||
998 | info.si_addr = ip; | ||
999 | /* | ||
1000 | * (~cwd & swd) will mask out exceptions that are not set to unmasked | ||
1001 | * status. 0x3f is the exception bits in these regs, 0x200 is the | ||
1002 | * C1 reg you need in case of a stack fault, 0x040 is the stack | ||
1003 | * fault bit. We should only be taking one exception at a time, | ||
1004 | * so if this combination doesn't produce any single exception, | ||
1005 | * then we have a bad program that isn't synchronizing its FPU usage | ||
1006 | * and it will suffer the consequences since we won't be able to | ||
1007 | * fully reproduce the context of the exception | ||
1008 | */ | ||
1009 | cwd = get_fpu_cwd(task); | ||
1010 | swd = get_fpu_swd(task); | ||
1011 | switch (swd & ~cwd & 0x3f) { | ||
1012 | case 0x000: /* No unmasked exception */ | ||
1013 | default: /* Multiple exceptions */ | ||
1014 | break; | ||
1015 | case 0x001: /* Invalid Op */ | ||
1016 | /* | ||
1017 | * swd & 0x240 == 0x040: Stack Underflow | ||
1018 | * swd & 0x240 == 0x240: Stack Overflow | ||
1019 | * User must clear the SF bit (0x40) if set | ||
1020 | */ | ||
1021 | info.si_code = FPE_FLTINV; | ||
1022 | break; | ||
1023 | case 0x002: /* Denormalize */ | ||
1024 | case 0x010: /* Underflow */ | ||
1025 | info.si_code = FPE_FLTUND; | ||
1026 | break; | ||
1027 | case 0x004: /* Zero Divide */ | ||
1028 | info.si_code = FPE_FLTDIV; | ||
1029 | break; | ||
1030 | case 0x008: /* Overflow */ | ||
1031 | info.si_code = FPE_FLTOVF; | ||
1032 | break; | ||
1033 | case 0x020: /* Precision */ | ||
1034 | info.si_code = FPE_FLTRES; | ||
1035 | break; | ||
1036 | } | ||
1037 | force_sig_info(SIGFPE, &info, task); | ||
1038 | } | ||
1039 | |||
1040 | asmlinkage void bad_intr(void) | ||
1041 | { | ||
1042 | printk("bad interrupt"); | ||
1043 | } | ||
1044 | |||
1045 | asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs) | ||
1046 | { | ||
1047 | void __user *ip = (void __user *)(regs->ip); | ||
1048 | struct task_struct *task; | ||
1049 | siginfo_t info; | ||
1050 | unsigned short mxcsr; | ||
1051 | |||
1052 | conditional_sti(regs); | ||
1053 | if (!user_mode(regs) && | ||
1054 | kernel_math_error(regs, "kernel simd math error", 19)) | ||
1055 | return; | ||
1056 | |||
1057 | /* | ||
1058 | * Save the info for the exception handler and clear the error. | ||
1059 | */ | ||
1060 | task = current; | ||
1061 | save_init_fpu(task); | ||
1062 | task->thread.trap_no = 19; | ||
1063 | task->thread.error_code = 0; | ||
1064 | info.si_signo = SIGFPE; | ||
1065 | info.si_errno = 0; | ||
1066 | info.si_code = __SI_FAULT; | ||
1067 | info.si_addr = ip; | ||
1068 | /* | ||
1069 | * The SIMD FPU exceptions are handled a little differently, as there | ||
1070 | * is only a single status/control register. Thus, to determine which | ||
1071 | * unmasked exception was caught we must mask the exception mask bits | ||
1072 | * at 0x1f80, and then use these to mask the exception bits at 0x3f. | ||
1073 | */ | ||
1074 | mxcsr = get_fpu_mxcsr(task); | ||
1075 | switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) { | ||
1076 | case 0x000: | ||
1077 | default: | ||
1078 | break; | ||
1079 | case 0x001: /* Invalid Op */ | ||
1080 | info.si_code = FPE_FLTINV; | ||
1081 | break; | ||
1082 | case 0x002: /* Denormalize */ | ||
1083 | case 0x010: /* Underflow */ | ||
1084 | info.si_code = FPE_FLTUND; | ||
1085 | break; | ||
1086 | case 0x004: /* Zero Divide */ | ||
1087 | info.si_code = FPE_FLTDIV; | ||
1088 | break; | ||
1089 | case 0x008: /* Overflow */ | ||
1090 | info.si_code = FPE_FLTOVF; | ||
1091 | break; | ||
1092 | case 0x020: /* Precision */ | ||
1093 | info.si_code = FPE_FLTRES; | ||
1094 | break; | ||
1095 | } | ||
1096 | force_sig_info(SIGFPE, &info, task); | ||
1097 | } | ||
1098 | |||
1099 | asmlinkage void do_spurious_interrupt_bug(struct pt_regs *regs) | ||
1100 | { | ||
1101 | } | ||
1102 | |||
1103 | asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void) | ||
1104 | { | ||
1105 | } | ||
1106 | |||
1107 | asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void) | ||
1108 | { | ||
1109 | } | ||
1110 | |||
1111 | /* | ||
1112 | * 'math_state_restore()' saves the current math information in the | ||
1113 | * old math state array, and gets the new ones from the current task | ||
1114 | * | ||
1115 | * Careful.. There are problems with IBM-designed IRQ13 behaviour. | ||
1116 | * Don't touch unless you *really* know how it works. | ||
1117 | */ | ||
1118 | asmlinkage void math_state_restore(void) | ||
1119 | { | ||
1120 | struct task_struct *me = current; | ||
1121 | |||
1122 | if (!used_math()) { | ||
1123 | local_irq_enable(); | ||
1124 | /* | ||
1125 | * does a slab alloc which can sleep | ||
1126 | */ | ||
1127 | if (init_fpu(me)) { | ||
1128 | /* | ||
1129 | * ran out of memory! | ||
1130 | */ | ||
1131 | do_group_exit(SIGKILL); | ||
1132 | return; | ||
1133 | } | ||
1134 | local_irq_disable(); | ||
1135 | } | ||
1136 | |||
1137 | clts(); /* Allow maths ops (or we recurse) */ | ||
1138 | /* | ||
1139 | * Paranoid restore. send a SIGSEGV if we fail to restore the state. | ||
1140 | */ | ||
1141 | if (unlikely(restore_fpu_checking(me))) { | ||
1142 | stts(); | ||
1143 | force_sig(SIGSEGV, me); | ||
1144 | return; | ||
1145 | } | ||
1146 | task_thread_info(me)->status |= TS_USEDFPU; | ||
1147 | me->fpu_counter++; | ||
1148 | } | ||
1149 | EXPORT_SYMBOL_GPL(math_state_restore); | ||
1150 | |||
1151 | void __init trap_init(void) | ||
1152 | { | ||
1153 | set_intr_gate(0, ÷_error); | ||
1154 | set_intr_gate_ist(1, &debug, DEBUG_STACK); | ||
1155 | set_intr_gate_ist(2, &nmi, NMI_STACK); | ||
1156 | /* int3 can be called from all */ | ||
1157 | set_system_gate_ist(3, &int3, DEBUG_STACK); | ||
1158 | /* int4 can be called from all */ | ||
1159 | set_system_gate(4, &overflow); | ||
1160 | set_intr_gate(5, &bounds); | ||
1161 | set_intr_gate(6, &invalid_op); | ||
1162 | set_intr_gate(7, &device_not_available); | ||
1163 | set_intr_gate_ist(8, &double_fault, DOUBLEFAULT_STACK); | ||
1164 | set_intr_gate(9, &coprocessor_segment_overrun); | ||
1165 | set_intr_gate(10, &invalid_TSS); | ||
1166 | set_intr_gate(11, &segment_not_present); | ||
1167 | set_intr_gate_ist(12, &stack_segment, STACKFAULT_STACK); | ||
1168 | set_intr_gate(13, &general_protection); | ||
1169 | set_intr_gate(14, &page_fault); | ||
1170 | set_intr_gate(15, &spurious_interrupt_bug); | ||
1171 | set_intr_gate(16, &coprocessor_error); | ||
1172 | set_intr_gate(17, &alignment_check); | ||
1173 | #ifdef CONFIG_X86_MCE | ||
1174 | set_intr_gate_ist(18, &machine_check, MCE_STACK); | ||
1175 | #endif | ||
1176 | set_intr_gate(19, &simd_coprocessor_error); | ||
1177 | |||
1178 | #ifdef CONFIG_IA32_EMULATION | ||
1179 | set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall); | ||
1180 | #endif | ||
1181 | /* | ||
1182 | * Should be a barrier for any external CPU state: | ||
1183 | */ | ||
1184 | cpu_init(); | ||
1185 | } | ||
1186 | |||
1187 | static int __init oops_setup(char *s) | ||
1188 | { | ||
1189 | if (!s) | ||
1190 | return -EINVAL; | ||
1191 | if (!strcmp(s, "panic")) | ||
1192 | panic_on_oops = 1; | ||
1193 | return 0; | ||
1194 | } | ||
1195 | early_param("oops", oops_setup); | ||
1196 | |||
1197 | static int __init kstack_setup(char *s) | ||
1198 | { | ||
1199 | if (!s) | ||
1200 | return -EINVAL; | ||
1201 | kstack_depth_to_print = simple_strtoul(s, NULL, 0); | ||
1202 | return 0; | ||
1203 | } | ||
1204 | early_param("kstack", kstack_setup); | ||
1205 | |||
1206 | static int __init code_bytes_setup(char *s) | ||
1207 | { | ||
1208 | code_bytes = simple_strtoul(s, NULL, 0); | ||
1209 | if (code_bytes > 8192) | ||
1210 | code_bytes = 8192; | ||
1211 | |||
1212 | return 1; | ||
1213 | } | ||
1214 | __setup("code_bytes=", code_bytes_setup); | ||
diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 520cca0ee04e..6513d41ea21e 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c | |||
@@ -47,16 +47,26 @@ static __init int mps_oem_check(struct mp_config_table *mpc, char *oem, | |||
47 | /* Hook from generic ACPI tables.c */ | 47 | /* Hook from generic ACPI tables.c */ |
48 | static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) | 48 | static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) |
49 | { | 49 | { |
50 | unsigned long oem_addr; | 50 | unsigned long oem_addr = 0; |
51 | int check_dsdt; | ||
52 | int ret = 0; | ||
53 | |||
54 | /* check dsdt at first to avoid clear fix_map for oem_addr */ | ||
55 | check_dsdt = es7000_check_dsdt(); | ||
56 | |||
51 | if (!find_unisys_acpi_oem_table(&oem_addr)) { | 57 | if (!find_unisys_acpi_oem_table(&oem_addr)) { |
52 | if (es7000_check_dsdt()) | 58 | if (check_dsdt) |
53 | return parse_unisys_oem((char *)oem_addr); | 59 | ret = parse_unisys_oem((char *)oem_addr); |
54 | else { | 60 | else { |
55 | setup_unisys(); | 61 | setup_unisys(); |
56 | return 1; | 62 | ret = 1; |
57 | } | 63 | } |
64 | /* | ||
65 | * we need to unmap it | ||
66 | */ | ||
67 | unmap_unisys_acpi_oem_table(oem_addr); | ||
58 | } | 68 | } |
59 | return 0; | 69 | return ret; |
60 | } | 70 | } |
61 | #else | 71 | #else |
62 | static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) | 72 | static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) |
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index dfb932dcf136..59f89b434b45 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile | |||
@@ -13,12 +13,8 @@ obj-$(CONFIG_MMIOTRACE) += mmiotrace.o | |||
13 | mmiotrace-y := pf_in.o mmio-mod.o | 13 | mmiotrace-y := pf_in.o mmio-mod.o |
14 | obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o | 14 | obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o |
15 | 15 | ||
16 | ifeq ($(CONFIG_X86_32),y) | 16 | obj-$(CONFIG_NUMA) += numa_$(BITS).o |
17 | obj-$(CONFIG_NUMA) += discontig_32.o | ||
18 | else | ||
19 | obj-$(CONFIG_NUMA) += numa_64.o | ||
20 | obj-$(CONFIG_K8_NUMA) += k8topology_64.o | 17 | obj-$(CONFIG_K8_NUMA) += k8topology_64.o |
21 | endif | ||
22 | obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o | 18 | obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o |
23 | 19 | ||
24 | obj-$(CONFIG_MEMTEST) += memtest.o | 20 | obj-$(CONFIG_MEMTEST) += memtest.o |
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index a742d753d5b0..3f2b8962cbd0 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -592,11 +592,6 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
592 | unsigned long flags; | 592 | unsigned long flags; |
593 | #endif | 593 | #endif |
594 | 594 | ||
595 | /* | ||
596 | * We can fault from pretty much anywhere, with unknown IRQ state. | ||
597 | */ | ||
598 | trace_hardirqs_fixup(); | ||
599 | |||
600 | tsk = current; | 595 | tsk = current; |
601 | mm = tsk->mm; | 596 | mm = tsk->mm; |
602 | prefetchw(&mm->mmap_sem); | 597 | prefetchw(&mm->mmap_sem); |
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 007bb06c7504..4ba373c5b8c8 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c | |||
@@ -82,7 +82,7 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, | |||
82 | pte_t pte = gup_get_pte(ptep); | 82 | pte_t pte = gup_get_pte(ptep); |
83 | struct page *page; | 83 | struct page *page; |
84 | 84 | ||
85 | if ((pte_val(pte) & (mask | _PAGE_SPECIAL)) != mask) { | 85 | if ((pte_flags(pte) & (mask | _PAGE_SPECIAL)) != mask) { |
86 | pte_unmap(ptep); | 86 | pte_unmap(ptep); |
87 | return 0; | 87 | return 0; |
88 | } | 88 | } |
@@ -116,10 +116,10 @@ static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr, | |||
116 | mask = _PAGE_PRESENT|_PAGE_USER; | 116 | mask = _PAGE_PRESENT|_PAGE_USER; |
117 | if (write) | 117 | if (write) |
118 | mask |= _PAGE_RW; | 118 | mask |= _PAGE_RW; |
119 | if ((pte_val(pte) & mask) != mask) | 119 | if ((pte_flags(pte) & mask) != mask) |
120 | return 0; | 120 | return 0; |
121 | /* hugepages are never "special" */ | 121 | /* hugepages are never "special" */ |
122 | VM_BUG_ON(pte_val(pte) & _PAGE_SPECIAL); | 122 | VM_BUG_ON(pte_flags(pte) & _PAGE_SPECIAL); |
123 | VM_BUG_ON(!pfn_valid(pte_pfn(pte))); | 123 | VM_BUG_ON(!pfn_valid(pte_pfn(pte))); |
124 | 124 | ||
125 | refs = 0; | 125 | refs = 0; |
@@ -173,10 +173,10 @@ static noinline int gup_huge_pud(pud_t pud, unsigned long addr, | |||
173 | mask = _PAGE_PRESENT|_PAGE_USER; | 173 | mask = _PAGE_PRESENT|_PAGE_USER; |
174 | if (write) | 174 | if (write) |
175 | mask |= _PAGE_RW; | 175 | mask |= _PAGE_RW; |
176 | if ((pte_val(pte) & mask) != mask) | 176 | if ((pte_flags(pte) & mask) != mask) |
177 | return 0; | 177 | return 0; |
178 | /* hugepages are never "special" */ | 178 | /* hugepages are never "special" */ |
179 | VM_BUG_ON(pte_val(pte) & _PAGE_SPECIAL); | 179 | VM_BUG_ON(pte_flags(pte) & _PAGE_SPECIAL); |
180 | VM_BUG_ON(!pfn_valid(pte_pfn(pte))); | 180 | VM_BUG_ON(!pfn_valid(pte_pfn(pte))); |
181 | 181 | ||
182 | refs = 0; | 182 | refs = 0; |
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index bbe044dbe014..8396868e82c5 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -558,7 +558,7 @@ void zap_low_mappings(void) | |||
558 | 558 | ||
559 | int nx_enabled; | 559 | int nx_enabled; |
560 | 560 | ||
561 | pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL); | 561 | pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL | _PAGE_IOMAP); |
562 | EXPORT_SYMBOL_GPL(__supported_pte_mask); | 562 | EXPORT_SYMBOL_GPL(__supported_pte_mask); |
563 | 563 | ||
564 | #ifdef CONFIG_X86_PAE | 564 | #ifdef CONFIG_X86_PAE |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 3e10054c5731..b8e461d49412 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -89,7 +89,7 @@ early_param("gbpages", parse_direct_gbpages_on); | |||
89 | 89 | ||
90 | int after_bootmem; | 90 | int after_bootmem; |
91 | 91 | ||
92 | unsigned long __supported_pte_mask __read_mostly = ~0UL; | 92 | pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP; |
93 | EXPORT_SYMBOL_GPL(__supported_pte_mask); | 93 | EXPORT_SYMBOL_GPL(__supported_pte_mask); |
94 | 94 | ||
95 | static int do_not_nx __cpuinitdata; | 95 | static int do_not_nx __cpuinitdata; |
@@ -196,9 +196,6 @@ set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte) | |||
196 | } | 196 | } |
197 | 197 | ||
198 | pte = pte_offset_kernel(pmd, vaddr); | 198 | pte = pte_offset_kernel(pmd, vaddr); |
199 | if (!pte_none(*pte) && pte_val(new_pte) && | ||
200 | pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask)) | ||
201 | pte_ERROR(*pte); | ||
202 | set_pte(pte, new_pte); | 199 | set_pte(pte, new_pte); |
203 | 200 | ||
204 | /* | 201 | /* |
@@ -313,7 +310,7 @@ static __ref void *alloc_low_page(unsigned long *phys) | |||
313 | if (pfn >= table_top) | 310 | if (pfn >= table_top) |
314 | panic("alloc_low_page: ran out of memory"); | 311 | panic("alloc_low_page: ran out of memory"); |
315 | 312 | ||
316 | adr = early_ioremap(pfn * PAGE_SIZE, PAGE_SIZE); | 313 | adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE); |
317 | memset(adr, 0, PAGE_SIZE); | 314 | memset(adr, 0, PAGE_SIZE); |
318 | *phys = pfn * PAGE_SIZE; | 315 | *phys = pfn * PAGE_SIZE; |
319 | return adr; | 316 | return adr; |
@@ -749,7 +746,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, | |||
749 | old_start = mr[i].start; | 746 | old_start = mr[i].start; |
750 | memmove(&mr[i], &mr[i+1], | 747 | memmove(&mr[i], &mr[i+1], |
751 | (nr_range - 1 - i) * sizeof (struct map_range)); | 748 | (nr_range - 1 - i) * sizeof (struct map_range)); |
752 | mr[i].start = old_start; | 749 | mr[i--].start = old_start; |
753 | nr_range--; | 750 | nr_range--; |
754 | } | 751 | } |
755 | 752 | ||
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 8cbeda15cd29..e4c43ec71b29 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c | |||
@@ -45,6 +45,27 @@ unsigned long __phys_addr(unsigned long x) | |||
45 | } | 45 | } |
46 | EXPORT_SYMBOL(__phys_addr); | 46 | EXPORT_SYMBOL(__phys_addr); |
47 | 47 | ||
48 | bool __virt_addr_valid(unsigned long x) | ||
49 | { | ||
50 | if (x >= __START_KERNEL_map) { | ||
51 | x -= __START_KERNEL_map; | ||
52 | if (x >= KERNEL_IMAGE_SIZE) | ||
53 | return false; | ||
54 | x += phys_base; | ||
55 | } else { | ||
56 | if (x < PAGE_OFFSET) | ||
57 | return false; | ||
58 | x -= PAGE_OFFSET; | ||
59 | if (system_state == SYSTEM_BOOTING ? | ||
60 | x > MAXMEM : !phys_addr_valid(x)) { | ||
61 | return false; | ||
62 | } | ||
63 | } | ||
64 | |||
65 | return pfn_valid(x >> PAGE_SHIFT); | ||
66 | } | ||
67 | EXPORT_SYMBOL(__virt_addr_valid); | ||
68 | |||
48 | #else | 69 | #else |
49 | 70 | ||
50 | static inline int phys_addr_valid(unsigned long addr) | 71 | static inline int phys_addr_valid(unsigned long addr) |
@@ -56,13 +77,24 @@ static inline int phys_addr_valid(unsigned long addr) | |||
56 | unsigned long __phys_addr(unsigned long x) | 77 | unsigned long __phys_addr(unsigned long x) |
57 | { | 78 | { |
58 | /* VMALLOC_* aren't constants; not available at the boot time */ | 79 | /* VMALLOC_* aren't constants; not available at the boot time */ |
59 | VIRTUAL_BUG_ON(x < PAGE_OFFSET || (system_state != SYSTEM_BOOTING && | 80 | VIRTUAL_BUG_ON(x < PAGE_OFFSET); |
60 | is_vmalloc_addr((void *)x))); | 81 | VIRTUAL_BUG_ON(system_state != SYSTEM_BOOTING && |
82 | is_vmalloc_addr((void *) x)); | ||
61 | return x - PAGE_OFFSET; | 83 | return x - PAGE_OFFSET; |
62 | } | 84 | } |
63 | EXPORT_SYMBOL(__phys_addr); | 85 | EXPORT_SYMBOL(__phys_addr); |
64 | #endif | 86 | #endif |
65 | 87 | ||
88 | bool __virt_addr_valid(unsigned long x) | ||
89 | { | ||
90 | if (x < PAGE_OFFSET) | ||
91 | return false; | ||
92 | if (system_state != SYSTEM_BOOTING && is_vmalloc_addr((void *) x)) | ||
93 | return false; | ||
94 | return pfn_valid((x - PAGE_OFFSET) >> PAGE_SHIFT); | ||
95 | } | ||
96 | EXPORT_SYMBOL(__virt_addr_valid); | ||
97 | |||
66 | #endif | 98 | #endif |
67 | 99 | ||
68 | int page_is_ram(unsigned long pagenr) | 100 | int page_is_ram(unsigned long pagenr) |
@@ -242,16 +274,16 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, | |||
242 | switch (prot_val) { | 274 | switch (prot_val) { |
243 | case _PAGE_CACHE_UC: | 275 | case _PAGE_CACHE_UC: |
244 | default: | 276 | default: |
245 | prot = PAGE_KERNEL_NOCACHE; | 277 | prot = PAGE_KERNEL_IO_NOCACHE; |
246 | break; | 278 | break; |
247 | case _PAGE_CACHE_UC_MINUS: | 279 | case _PAGE_CACHE_UC_MINUS: |
248 | prot = PAGE_KERNEL_UC_MINUS; | 280 | prot = PAGE_KERNEL_IO_UC_MINUS; |
249 | break; | 281 | break; |
250 | case _PAGE_CACHE_WC: | 282 | case _PAGE_CACHE_WC: |
251 | prot = PAGE_KERNEL_WC; | 283 | prot = PAGE_KERNEL_IO_WC; |
252 | break; | 284 | break; |
253 | case _PAGE_CACHE_WB: | 285 | case _PAGE_CACHE_WB: |
254 | prot = PAGE_KERNEL; | 286 | prot = PAGE_KERNEL_IO; |
255 | break; | 287 | break; |
256 | } | 288 | } |
257 | 289 | ||
@@ -568,12 +600,12 @@ static void __init __early_set_fixmap(enum fixed_addresses idx, | |||
568 | } | 600 | } |
569 | 601 | ||
570 | static inline void __init early_set_fixmap(enum fixed_addresses idx, | 602 | static inline void __init early_set_fixmap(enum fixed_addresses idx, |
571 | unsigned long phys) | 603 | unsigned long phys, pgprot_t prot) |
572 | { | 604 | { |
573 | if (after_paging_init) | 605 | if (after_paging_init) |
574 | set_fixmap(idx, phys); | 606 | __set_fixmap(idx, phys, prot); |
575 | else | 607 | else |
576 | __early_set_fixmap(idx, phys, PAGE_KERNEL); | 608 | __early_set_fixmap(idx, phys, prot); |
577 | } | 609 | } |
578 | 610 | ||
579 | static inline void __init early_clear_fixmap(enum fixed_addresses idx) | 611 | static inline void __init early_clear_fixmap(enum fixed_addresses idx) |
@@ -584,16 +616,22 @@ static inline void __init early_clear_fixmap(enum fixed_addresses idx) | |||
584 | __early_set_fixmap(idx, 0, __pgprot(0)); | 616 | __early_set_fixmap(idx, 0, __pgprot(0)); |
585 | } | 617 | } |
586 | 618 | ||
587 | 619 | static void *prev_map[FIX_BTMAPS_SLOTS] __initdata; | |
588 | static int __initdata early_ioremap_nested; | 620 | static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata; |
589 | |||
590 | static int __init check_early_ioremap_leak(void) | 621 | static int __init check_early_ioremap_leak(void) |
591 | { | 622 | { |
592 | if (!early_ioremap_nested) | 623 | int count = 0; |
624 | int i; | ||
625 | |||
626 | for (i = 0; i < FIX_BTMAPS_SLOTS; i++) | ||
627 | if (prev_map[i]) | ||
628 | count++; | ||
629 | |||
630 | if (!count) | ||
593 | return 0; | 631 | return 0; |
594 | WARN(1, KERN_WARNING | 632 | WARN(1, KERN_WARNING |
595 | "Debug warning: early ioremap leak of %d areas detected.\n", | 633 | "Debug warning: early ioremap leak of %d areas detected.\n", |
596 | early_ioremap_nested); | 634 | count); |
597 | printk(KERN_WARNING | 635 | printk(KERN_WARNING |
598 | "please boot with early_ioremap_debug and report the dmesg.\n"); | 636 | "please boot with early_ioremap_debug and report the dmesg.\n"); |
599 | 637 | ||
@@ -601,18 +639,33 @@ static int __init check_early_ioremap_leak(void) | |||
601 | } | 639 | } |
602 | late_initcall(check_early_ioremap_leak); | 640 | late_initcall(check_early_ioremap_leak); |
603 | 641 | ||
604 | void __init *early_ioremap(unsigned long phys_addr, unsigned long size) | 642 | static void __init *__early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot) |
605 | { | 643 | { |
606 | unsigned long offset, last_addr; | 644 | unsigned long offset, last_addr; |
607 | unsigned int nrpages, nesting; | 645 | unsigned int nrpages; |
608 | enum fixed_addresses idx0, idx; | 646 | enum fixed_addresses idx0, idx; |
647 | int i, slot; | ||
609 | 648 | ||
610 | WARN_ON(system_state != SYSTEM_BOOTING); | 649 | WARN_ON(system_state != SYSTEM_BOOTING); |
611 | 650 | ||
612 | nesting = early_ioremap_nested; | 651 | slot = -1; |
652 | for (i = 0; i < FIX_BTMAPS_SLOTS; i++) { | ||
653 | if (!prev_map[i]) { | ||
654 | slot = i; | ||
655 | break; | ||
656 | } | ||
657 | } | ||
658 | |||
659 | if (slot < 0) { | ||
660 | printk(KERN_INFO "early_iomap(%08lx, %08lx) not found slot\n", | ||
661 | phys_addr, size); | ||
662 | WARN_ON(1); | ||
663 | return NULL; | ||
664 | } | ||
665 | |||
613 | if (early_ioremap_debug) { | 666 | if (early_ioremap_debug) { |
614 | printk(KERN_INFO "early_ioremap(%08lx, %08lx) [%d] => ", | 667 | printk(KERN_INFO "early_ioremap(%08lx, %08lx) [%d] => ", |
615 | phys_addr, size, nesting); | 668 | phys_addr, size, slot); |
616 | dump_stack(); | 669 | dump_stack(); |
617 | } | 670 | } |
618 | 671 | ||
@@ -623,11 +676,7 @@ void __init *early_ioremap(unsigned long phys_addr, unsigned long size) | |||
623 | return NULL; | 676 | return NULL; |
624 | } | 677 | } |
625 | 678 | ||
626 | if (nesting >= FIX_BTMAPS_NESTING) { | 679 | prev_size[slot] = size; |
627 | WARN_ON(1); | ||
628 | return NULL; | ||
629 | } | ||
630 | early_ioremap_nested++; | ||
631 | /* | 680 | /* |
632 | * Mappings have to be page-aligned | 681 | * Mappings have to be page-aligned |
633 | */ | 682 | */ |
@@ -647,10 +696,10 @@ void __init *early_ioremap(unsigned long phys_addr, unsigned long size) | |||
647 | /* | 696 | /* |
648 | * Ok, go for it.. | 697 | * Ok, go for it.. |
649 | */ | 698 | */ |
650 | idx0 = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*nesting; | 699 | idx0 = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot; |
651 | idx = idx0; | 700 | idx = idx0; |
652 | while (nrpages > 0) { | 701 | while (nrpages > 0) { |
653 | early_set_fixmap(idx, phys_addr); | 702 | early_set_fixmap(idx, phys_addr, prot); |
654 | phys_addr += PAGE_SIZE; | 703 | phys_addr += PAGE_SIZE; |
655 | --idx; | 704 | --idx; |
656 | --nrpages; | 705 | --nrpages; |
@@ -658,7 +707,20 @@ void __init *early_ioremap(unsigned long phys_addr, unsigned long size) | |||
658 | if (early_ioremap_debug) | 707 | if (early_ioremap_debug) |
659 | printk(KERN_CONT "%08lx + %08lx\n", offset, fix_to_virt(idx0)); | 708 | printk(KERN_CONT "%08lx + %08lx\n", offset, fix_to_virt(idx0)); |
660 | 709 | ||
661 | return (void *) (offset + fix_to_virt(idx0)); | 710 | prev_map[slot] = (void *) (offset + fix_to_virt(idx0)); |
711 | return prev_map[slot]; | ||
712 | } | ||
713 | |||
714 | /* Remap an IO device */ | ||
715 | void __init *early_ioremap(unsigned long phys_addr, unsigned long size) | ||
716 | { | ||
717 | return __early_ioremap(phys_addr, size, PAGE_KERNEL_IO); | ||
718 | } | ||
719 | |||
720 | /* Remap memory */ | ||
721 | void __init *early_memremap(unsigned long phys_addr, unsigned long size) | ||
722 | { | ||
723 | return __early_ioremap(phys_addr, size, PAGE_KERNEL); | ||
662 | } | 724 | } |
663 | 725 | ||
664 | void __init early_iounmap(void *addr, unsigned long size) | 726 | void __init early_iounmap(void *addr, unsigned long size) |
@@ -667,15 +729,33 @@ void __init early_iounmap(void *addr, unsigned long size) | |||
667 | unsigned long offset; | 729 | unsigned long offset; |
668 | unsigned int nrpages; | 730 | unsigned int nrpages; |
669 | enum fixed_addresses idx; | 731 | enum fixed_addresses idx; |
670 | int nesting; | 732 | int i, slot; |
733 | |||
734 | slot = -1; | ||
735 | for (i = 0; i < FIX_BTMAPS_SLOTS; i++) { | ||
736 | if (prev_map[i] == addr) { | ||
737 | slot = i; | ||
738 | break; | ||
739 | } | ||
740 | } | ||
671 | 741 | ||
672 | nesting = --early_ioremap_nested; | 742 | if (slot < 0) { |
673 | if (WARN_ON(nesting < 0)) | 743 | printk(KERN_INFO "early_iounmap(%p, %08lx) not found slot\n", |
744 | addr, size); | ||
745 | WARN_ON(1); | ||
746 | return; | ||
747 | } | ||
748 | |||
749 | if (prev_size[slot] != size) { | ||
750 | printk(KERN_INFO "early_iounmap(%p, %08lx) [%d] size not consistent %08lx\n", | ||
751 | addr, size, slot, prev_size[slot]); | ||
752 | WARN_ON(1); | ||
674 | return; | 753 | return; |
754 | } | ||
675 | 755 | ||
676 | if (early_ioremap_debug) { | 756 | if (early_ioremap_debug) { |
677 | printk(KERN_INFO "early_iounmap(%p, %08lx) [%d]\n", addr, | 757 | printk(KERN_INFO "early_iounmap(%p, %08lx) [%d]\n", addr, |
678 | size, nesting); | 758 | size, slot); |
679 | dump_stack(); | 759 | dump_stack(); |
680 | } | 760 | } |
681 | 761 | ||
@@ -687,12 +767,13 @@ void __init early_iounmap(void *addr, unsigned long size) | |||
687 | offset = virt_addr & ~PAGE_MASK; | 767 | offset = virt_addr & ~PAGE_MASK; |
688 | nrpages = PAGE_ALIGN(offset + size - 1) >> PAGE_SHIFT; | 768 | nrpages = PAGE_ALIGN(offset + size - 1) >> PAGE_SHIFT; |
689 | 769 | ||
690 | idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*nesting; | 770 | idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot; |
691 | while (nrpages > 0) { | 771 | while (nrpages > 0) { |
692 | early_clear_fixmap(idx); | 772 | early_clear_fixmap(idx); |
693 | --idx; | 773 | --idx; |
694 | --nrpages; | 774 | --nrpages; |
695 | } | 775 | } |
776 | prev_map[slot] = 0; | ||
696 | } | 777 | } |
697 | 778 | ||
698 | void __this_fixmap_does_not_exist(void) | 779 | void __this_fixmap_does_not_exist(void) |
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/numa_32.c index 847c164725f4..847c164725f4 100644 --- a/arch/x86/mm/discontig_32.c +++ b/arch/x86/mm/numa_32.c | |||
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 1b4763e26ea9..51c0a2fc14fe 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c | |||
@@ -138,7 +138,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) | |||
138 | return; | 138 | return; |
139 | } | 139 | } |
140 | 140 | ||
141 | if (is_uv_system()) | 141 | if (get_uv_system_type() >= UV_X2APIC) |
142 | apic_id = (pa->apic_id << 8) | pa->local_sapic_eid; | 142 | apic_id = (pa->apic_id << 8) | pa->local_sapic_eid; |
143 | else | 143 | else |
144 | apic_id = pa->apic_id; | 144 | apic_id = pa->apic_id; |
diff --git a/arch/x86/oprofile/Makefile b/arch/x86/oprofile/Makefile index 30f3eb366667..446902b2a6b6 100644 --- a/arch/x86/oprofile/Makefile +++ b/arch/x86/oprofile/Makefile | |||
@@ -7,6 +7,6 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \ | |||
7 | timer_int.o ) | 7 | timer_int.o ) |
8 | 8 | ||
9 | oprofile-y := $(DRIVER_OBJS) init.o backtrace.o | 9 | oprofile-y := $(DRIVER_OBJS) init.o backtrace.o |
10 | oprofile-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_athlon.o \ | 10 | oprofile-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_amd.o \ |
11 | op_model_ppro.o op_model_p4.o | 11 | op_model_ppro.o op_model_p4.o |
12 | oprofile-$(CONFIG_X86_IO_APIC) += nmi_timer_int.o | 12 | oprofile-$(CONFIG_X86_IO_APIC) += nmi_timer_int.o |
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 8a5f1614a3d5..57f6c9088081 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c | |||
@@ -1,10 +1,11 @@ | |||
1 | /** | 1 | /** |
2 | * @file nmi_int.c | 2 | * @file nmi_int.c |
3 | * | 3 | * |
4 | * @remark Copyright 2002 OProfile authors | 4 | * @remark Copyright 2002-2008 OProfile authors |
5 | * @remark Read the file COPYING | 5 | * @remark Read the file COPYING |
6 | * | 6 | * |
7 | * @author John Levon <levon@movementarian.org> | 7 | * @author John Levon <levon@movementarian.org> |
8 | * @author Robert Richter <robert.richter@amd.com> | ||
8 | */ | 9 | */ |
9 | 10 | ||
10 | #include <linux/init.h> | 11 | #include <linux/init.h> |
@@ -439,6 +440,7 @@ int __init op_nmi_init(struct oprofile_operations *ops) | |||
439 | __u8 vendor = boot_cpu_data.x86_vendor; | 440 | __u8 vendor = boot_cpu_data.x86_vendor; |
440 | __u8 family = boot_cpu_data.x86; | 441 | __u8 family = boot_cpu_data.x86; |
441 | char *cpu_type; | 442 | char *cpu_type; |
443 | int ret = 0; | ||
442 | 444 | ||
443 | if (!cpu_has_apic) | 445 | if (!cpu_has_apic) |
444 | return -ENODEV; | 446 | return -ENODEV; |
@@ -451,19 +453,23 @@ int __init op_nmi_init(struct oprofile_operations *ops) | |||
451 | default: | 453 | default: |
452 | return -ENODEV; | 454 | return -ENODEV; |
453 | case 6: | 455 | case 6: |
454 | model = &op_athlon_spec; | 456 | model = &op_amd_spec; |
455 | cpu_type = "i386/athlon"; | 457 | cpu_type = "i386/athlon"; |
456 | break; | 458 | break; |
457 | case 0xf: | 459 | case 0xf: |
458 | model = &op_athlon_spec; | 460 | model = &op_amd_spec; |
459 | /* Actually it could be i386/hammer too, but give | 461 | /* Actually it could be i386/hammer too, but give |
460 | user space an consistent name. */ | 462 | user space an consistent name. */ |
461 | cpu_type = "x86-64/hammer"; | 463 | cpu_type = "x86-64/hammer"; |
462 | break; | 464 | break; |
463 | case 0x10: | 465 | case 0x10: |
464 | model = &op_athlon_spec; | 466 | model = &op_amd_spec; |
465 | cpu_type = "x86-64/family10"; | 467 | cpu_type = "x86-64/family10"; |
466 | break; | 468 | break; |
469 | case 0x11: | ||
470 | model = &op_amd_spec; | ||
471 | cpu_type = "x86-64/family11h"; | ||
472 | break; | ||
467 | } | 473 | } |
468 | break; | 474 | break; |
469 | 475 | ||
@@ -490,17 +496,24 @@ int __init op_nmi_init(struct oprofile_operations *ops) | |||
490 | return -ENODEV; | 496 | return -ENODEV; |
491 | } | 497 | } |
492 | 498 | ||
493 | init_sysfs(); | ||
494 | #ifdef CONFIG_SMP | 499 | #ifdef CONFIG_SMP |
495 | register_cpu_notifier(&oprofile_cpu_nb); | 500 | register_cpu_notifier(&oprofile_cpu_nb); |
496 | #endif | 501 | #endif |
497 | using_nmi = 1; | 502 | /* default values, can be overwritten by model */ |
498 | ops->create_files = nmi_create_files; | 503 | ops->create_files = nmi_create_files; |
499 | ops->setup = nmi_setup; | 504 | ops->setup = nmi_setup; |
500 | ops->shutdown = nmi_shutdown; | 505 | ops->shutdown = nmi_shutdown; |
501 | ops->start = nmi_start; | 506 | ops->start = nmi_start; |
502 | ops->stop = nmi_stop; | 507 | ops->stop = nmi_stop; |
503 | ops->cpu_type = cpu_type; | 508 | ops->cpu_type = cpu_type; |
509 | |||
510 | if (model->init) | ||
511 | ret = model->init(ops); | ||
512 | if (ret) | ||
513 | return ret; | ||
514 | |||
515 | init_sysfs(); | ||
516 | using_nmi = 1; | ||
504 | printk(KERN_INFO "oprofile: using NMI interrupt.\n"); | 517 | printk(KERN_INFO "oprofile: using NMI interrupt.\n"); |
505 | return 0; | 518 | return 0; |
506 | } | 519 | } |
@@ -513,4 +526,6 @@ void op_nmi_exit(void) | |||
513 | unregister_cpu_notifier(&oprofile_cpu_nb); | 526 | unregister_cpu_notifier(&oprofile_cpu_nb); |
514 | #endif | 527 | #endif |
515 | } | 528 | } |
529 | if (model->exit) | ||
530 | model->exit(); | ||
516 | } | 531 | } |
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c new file mode 100644 index 000000000000..d9faf607b3a6 --- /dev/null +++ b/arch/x86/oprofile/op_model_amd.c | |||
@@ -0,0 +1,543 @@ | |||
1 | /* | ||
2 | * @file op_model_amd.c | ||
3 | * athlon / K7 / K8 / Family 10h model-specific MSR operations | ||
4 | * | ||
5 | * @remark Copyright 2002-2008 OProfile authors | ||
6 | * @remark Read the file COPYING | ||
7 | * | ||
8 | * @author John Levon | ||
9 | * @author Philippe Elie | ||
10 | * @author Graydon Hoare | ||
11 | * @author Robert Richter <robert.richter@amd.com> | ||
12 | * @author Barry Kasindorf | ||
13 | */ | ||
14 | |||
15 | #include <linux/oprofile.h> | ||
16 | #include <linux/device.h> | ||
17 | #include <linux/pci.h> | ||
18 | |||
19 | #include <asm/ptrace.h> | ||
20 | #include <asm/msr.h> | ||
21 | #include <asm/nmi.h> | ||
22 | |||
23 | #include "op_x86_model.h" | ||
24 | #include "op_counter.h" | ||
25 | |||
26 | #define NUM_COUNTERS 4 | ||
27 | #define NUM_CONTROLS 4 | ||
28 | |||
29 | #define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) | ||
30 | #define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0) | ||
31 | #define CTR_WRITE(l, msrs, c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1); } while (0) | ||
32 | #define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) | ||
33 | |||
34 | #define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0) | ||
35 | #define CTRL_READ(l, h, msrs, c) do {rdmsr(msrs->controls[(c)].addr, (l), (h)); } while (0) | ||
36 | #define CTRL_WRITE(l, h, msrs, c) do {wrmsr(msrs->controls[(c)].addr, (l), (h)); } while (0) | ||
37 | #define CTRL_SET_ACTIVE(n) (n |= (1<<22)) | ||
38 | #define CTRL_SET_INACTIVE(n) (n &= ~(1<<22)) | ||
39 | #define CTRL_CLEAR_LO(x) (x &= (1<<21)) | ||
40 | #define CTRL_CLEAR_HI(x) (x &= 0xfffffcf0) | ||
41 | #define CTRL_SET_ENABLE(val) (val |= 1<<20) | ||
42 | #define CTRL_SET_USR(val, u) (val |= ((u & 1) << 16)) | ||
43 | #define CTRL_SET_KERN(val, k) (val |= ((k & 1) << 17)) | ||
44 | #define CTRL_SET_UM(val, m) (val |= (m << 8)) | ||
45 | #define CTRL_SET_EVENT_LOW(val, e) (val |= (e & 0xff)) | ||
46 | #define CTRL_SET_EVENT_HIGH(val, e) (val |= ((e >> 8) & 0xf)) | ||
47 | #define CTRL_SET_HOST_ONLY(val, h) (val |= ((h & 1) << 9)) | ||
48 | #define CTRL_SET_GUEST_ONLY(val, h) (val |= ((h & 1) << 8)) | ||
49 | |||
50 | static unsigned long reset_value[NUM_COUNTERS]; | ||
51 | |||
52 | #ifdef CONFIG_OPROFILE_IBS | ||
53 | |||
54 | /* IbsFetchCtl bits/masks */ | ||
55 | #define IBS_FETCH_HIGH_VALID_BIT (1UL << 17) /* bit 49 */ | ||
56 | #define IBS_FETCH_HIGH_ENABLE (1UL << 16) /* bit 48 */ | ||
57 | #define IBS_FETCH_LOW_MAX_CNT_MASK 0x0000FFFFUL /* MaxCnt mask */ | ||
58 | |||
59 | /*IbsOpCtl bits */ | ||
60 | #define IBS_OP_LOW_VALID_BIT (1ULL<<18) /* bit 18 */ | ||
61 | #define IBS_OP_LOW_ENABLE (1ULL<<17) /* bit 17 */ | ||
62 | |||
63 | /* Codes used in cpu_buffer.c */ | ||
64 | /* This produces duplicate code, need to be fixed */ | ||
65 | #define IBS_FETCH_BEGIN 3 | ||
66 | #define IBS_OP_BEGIN 4 | ||
67 | |||
68 | /* The function interface needs to be fixed, something like add | ||
69 | data. Should then be added to linux/oprofile.h. */ | ||
70 | extern void oprofile_add_ibs_sample(struct pt_regs *const regs, | ||
71 | unsigned int * const ibs_sample, u8 code); | ||
72 | |||
73 | struct ibs_fetch_sample { | ||
74 | /* MSRC001_1031 IBS Fetch Linear Address Register */ | ||
75 | unsigned int ibs_fetch_lin_addr_low; | ||
76 | unsigned int ibs_fetch_lin_addr_high; | ||
77 | /* MSRC001_1030 IBS Fetch Control Register */ | ||
78 | unsigned int ibs_fetch_ctl_low; | ||
79 | unsigned int ibs_fetch_ctl_high; | ||
80 | /* MSRC001_1032 IBS Fetch Physical Address Register */ | ||
81 | unsigned int ibs_fetch_phys_addr_low; | ||
82 | unsigned int ibs_fetch_phys_addr_high; | ||
83 | }; | ||
84 | |||
85 | struct ibs_op_sample { | ||
86 | /* MSRC001_1034 IBS Op Logical Address Register (IbsRIP) */ | ||
87 | unsigned int ibs_op_rip_low; | ||
88 | unsigned int ibs_op_rip_high; | ||
89 | /* MSRC001_1035 IBS Op Data Register */ | ||
90 | unsigned int ibs_op_data1_low; | ||
91 | unsigned int ibs_op_data1_high; | ||
92 | /* MSRC001_1036 IBS Op Data 2 Register */ | ||
93 | unsigned int ibs_op_data2_low; | ||
94 | unsigned int ibs_op_data2_high; | ||
95 | /* MSRC001_1037 IBS Op Data 3 Register */ | ||
96 | unsigned int ibs_op_data3_low; | ||
97 | unsigned int ibs_op_data3_high; | ||
98 | /* MSRC001_1038 IBS DC Linear Address Register (IbsDcLinAd) */ | ||
99 | unsigned int ibs_dc_linear_low; | ||
100 | unsigned int ibs_dc_linear_high; | ||
101 | /* MSRC001_1039 IBS DC Physical Address Register (IbsDcPhysAd) */ | ||
102 | unsigned int ibs_dc_phys_low; | ||
103 | unsigned int ibs_dc_phys_high; | ||
104 | }; | ||
105 | |||
106 | /* | ||
107 | * unitialize the APIC for the IBS interrupts if needed on AMD Family10h+ | ||
108 | */ | ||
109 | static void clear_ibs_nmi(void); | ||
110 | |||
111 | static int ibs_allowed; /* AMD Family10h and later */ | ||
112 | |||
113 | struct op_ibs_config { | ||
114 | unsigned long op_enabled; | ||
115 | unsigned long fetch_enabled; | ||
116 | unsigned long max_cnt_fetch; | ||
117 | unsigned long max_cnt_op; | ||
118 | unsigned long rand_en; | ||
119 | unsigned long dispatched_ops; | ||
120 | }; | ||
121 | |||
122 | static struct op_ibs_config ibs_config; | ||
123 | |||
124 | #endif | ||
125 | |||
126 | /* functions for op_amd_spec */ | ||
127 | |||
128 | static void op_amd_fill_in_addresses(struct op_msrs * const msrs) | ||
129 | { | ||
130 | int i; | ||
131 | |||
132 | for (i = 0; i < NUM_COUNTERS; i++) { | ||
133 | if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) | ||
134 | msrs->counters[i].addr = MSR_K7_PERFCTR0 + i; | ||
135 | else | ||
136 | msrs->counters[i].addr = 0; | ||
137 | } | ||
138 | |||
139 | for (i = 0; i < NUM_CONTROLS; i++) { | ||
140 | if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) | ||
141 | msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i; | ||
142 | else | ||
143 | msrs->controls[i].addr = 0; | ||
144 | } | ||
145 | } | ||
146 | |||
147 | |||
148 | static void op_amd_setup_ctrs(struct op_msrs const * const msrs) | ||
149 | { | ||
150 | unsigned int low, high; | ||
151 | int i; | ||
152 | |||
153 | /* clear all counters */ | ||
154 | for (i = 0 ; i < NUM_CONTROLS; ++i) { | ||
155 | if (unlikely(!CTRL_IS_RESERVED(msrs, i))) | ||
156 | continue; | ||
157 | CTRL_READ(low, high, msrs, i); | ||
158 | CTRL_CLEAR_LO(low); | ||
159 | CTRL_CLEAR_HI(high); | ||
160 | CTRL_WRITE(low, high, msrs, i); | ||
161 | } | ||
162 | |||
163 | /* avoid a false detection of ctr overflows in NMI handler */ | ||
164 | for (i = 0; i < NUM_COUNTERS; ++i) { | ||
165 | if (unlikely(!CTR_IS_RESERVED(msrs, i))) | ||
166 | continue; | ||
167 | CTR_WRITE(1, msrs, i); | ||
168 | } | ||
169 | |||
170 | /* enable active counters */ | ||
171 | for (i = 0; i < NUM_COUNTERS; ++i) { | ||
172 | if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) { | ||
173 | reset_value[i] = counter_config[i].count; | ||
174 | |||
175 | CTR_WRITE(counter_config[i].count, msrs, i); | ||
176 | |||
177 | CTRL_READ(low, high, msrs, i); | ||
178 | CTRL_CLEAR_LO(low); | ||
179 | CTRL_CLEAR_HI(high); | ||
180 | CTRL_SET_ENABLE(low); | ||
181 | CTRL_SET_USR(low, counter_config[i].user); | ||
182 | CTRL_SET_KERN(low, counter_config[i].kernel); | ||
183 | CTRL_SET_UM(low, counter_config[i].unit_mask); | ||
184 | CTRL_SET_EVENT_LOW(low, counter_config[i].event); | ||
185 | CTRL_SET_EVENT_HIGH(high, counter_config[i].event); | ||
186 | CTRL_SET_HOST_ONLY(high, 0); | ||
187 | CTRL_SET_GUEST_ONLY(high, 0); | ||
188 | |||
189 | CTRL_WRITE(low, high, msrs, i); | ||
190 | } else { | ||
191 | reset_value[i] = 0; | ||
192 | } | ||
193 | } | ||
194 | } | ||
195 | |||
196 | #ifdef CONFIG_OPROFILE_IBS | ||
197 | |||
198 | static inline int | ||
199 | op_amd_handle_ibs(struct pt_regs * const regs, | ||
200 | struct op_msrs const * const msrs) | ||
201 | { | ||
202 | unsigned int low, high; | ||
203 | struct ibs_fetch_sample ibs_fetch; | ||
204 | struct ibs_op_sample ibs_op; | ||
205 | |||
206 | if (!ibs_allowed) | ||
207 | return 1; | ||
208 | |||
209 | if (ibs_config.fetch_enabled) { | ||
210 | rdmsr(MSR_AMD64_IBSFETCHCTL, low, high); | ||
211 | if (high & IBS_FETCH_HIGH_VALID_BIT) { | ||
212 | ibs_fetch.ibs_fetch_ctl_high = high; | ||
213 | ibs_fetch.ibs_fetch_ctl_low = low; | ||
214 | rdmsr(MSR_AMD64_IBSFETCHLINAD, low, high); | ||
215 | ibs_fetch.ibs_fetch_lin_addr_high = high; | ||
216 | ibs_fetch.ibs_fetch_lin_addr_low = low; | ||
217 | rdmsr(MSR_AMD64_IBSFETCHPHYSAD, low, high); | ||
218 | ibs_fetch.ibs_fetch_phys_addr_high = high; | ||
219 | ibs_fetch.ibs_fetch_phys_addr_low = low; | ||
220 | |||
221 | oprofile_add_ibs_sample(regs, | ||
222 | (unsigned int *)&ibs_fetch, | ||
223 | IBS_FETCH_BEGIN); | ||
224 | |||
225 | /*reenable the IRQ */ | ||
226 | rdmsr(MSR_AMD64_IBSFETCHCTL, low, high); | ||
227 | high &= ~IBS_FETCH_HIGH_VALID_BIT; | ||
228 | high |= IBS_FETCH_HIGH_ENABLE; | ||
229 | low &= IBS_FETCH_LOW_MAX_CNT_MASK; | ||
230 | wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); | ||
231 | } | ||
232 | } | ||
233 | |||
234 | if (ibs_config.op_enabled) { | ||
235 | rdmsr(MSR_AMD64_IBSOPCTL, low, high); | ||
236 | if (low & IBS_OP_LOW_VALID_BIT) { | ||
237 | rdmsr(MSR_AMD64_IBSOPRIP, low, high); | ||
238 | ibs_op.ibs_op_rip_low = low; | ||
239 | ibs_op.ibs_op_rip_high = high; | ||
240 | rdmsr(MSR_AMD64_IBSOPDATA, low, high); | ||
241 | ibs_op.ibs_op_data1_low = low; | ||
242 | ibs_op.ibs_op_data1_high = high; | ||
243 | rdmsr(MSR_AMD64_IBSOPDATA2, low, high); | ||
244 | ibs_op.ibs_op_data2_low = low; | ||
245 | ibs_op.ibs_op_data2_high = high; | ||
246 | rdmsr(MSR_AMD64_IBSOPDATA3, low, high); | ||
247 | ibs_op.ibs_op_data3_low = low; | ||
248 | ibs_op.ibs_op_data3_high = high; | ||
249 | rdmsr(MSR_AMD64_IBSDCLINAD, low, high); | ||
250 | ibs_op.ibs_dc_linear_low = low; | ||
251 | ibs_op.ibs_dc_linear_high = high; | ||
252 | rdmsr(MSR_AMD64_IBSDCPHYSAD, low, high); | ||
253 | ibs_op.ibs_dc_phys_low = low; | ||
254 | ibs_op.ibs_dc_phys_high = high; | ||
255 | |||
256 | /* reenable the IRQ */ | ||
257 | oprofile_add_ibs_sample(regs, | ||
258 | (unsigned int *)&ibs_op, | ||
259 | IBS_OP_BEGIN); | ||
260 | rdmsr(MSR_AMD64_IBSOPCTL, low, high); | ||
261 | high = 0; | ||
262 | low &= ~IBS_OP_LOW_VALID_BIT; | ||
263 | low |= IBS_OP_LOW_ENABLE; | ||
264 | wrmsr(MSR_AMD64_IBSOPCTL, low, high); | ||
265 | } | ||
266 | } | ||
267 | |||
268 | return 1; | ||
269 | } | ||
270 | |||
271 | #endif | ||
272 | |||
273 | static int op_amd_check_ctrs(struct pt_regs * const regs, | ||
274 | struct op_msrs const * const msrs) | ||
275 | { | ||
276 | unsigned int low, high; | ||
277 | int i; | ||
278 | |||
279 | for (i = 0 ; i < NUM_COUNTERS; ++i) { | ||
280 | if (!reset_value[i]) | ||
281 | continue; | ||
282 | CTR_READ(low, high, msrs, i); | ||
283 | if (CTR_OVERFLOWED(low)) { | ||
284 | oprofile_add_sample(regs, i); | ||
285 | CTR_WRITE(reset_value[i], msrs, i); | ||
286 | } | ||
287 | } | ||
288 | |||
289 | #ifdef CONFIG_OPROFILE_IBS | ||
290 | op_amd_handle_ibs(regs, msrs); | ||
291 | #endif | ||
292 | |||
293 | /* See op_model_ppro.c */ | ||
294 | return 1; | ||
295 | } | ||
296 | |||
297 | static void op_amd_start(struct op_msrs const * const msrs) | ||
298 | { | ||
299 | unsigned int low, high; | ||
300 | int i; | ||
301 | for (i = 0 ; i < NUM_COUNTERS ; ++i) { | ||
302 | if (reset_value[i]) { | ||
303 | CTRL_READ(low, high, msrs, i); | ||
304 | CTRL_SET_ACTIVE(low); | ||
305 | CTRL_WRITE(low, high, msrs, i); | ||
306 | } | ||
307 | } | ||
308 | |||
309 | #ifdef CONFIG_OPROFILE_IBS | ||
310 | if (ibs_allowed && ibs_config.fetch_enabled) { | ||
311 | low = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF; | ||
312 | high = IBS_FETCH_HIGH_ENABLE; | ||
313 | wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); | ||
314 | } | ||
315 | |||
316 | if (ibs_allowed && ibs_config.op_enabled) { | ||
317 | low = ((ibs_config.max_cnt_op >> 4) & 0xFFFF) + IBS_OP_LOW_ENABLE; | ||
318 | high = 0; | ||
319 | wrmsr(MSR_AMD64_IBSOPCTL, low, high); | ||
320 | } | ||
321 | #endif | ||
322 | } | ||
323 | |||
324 | |||
325 | static void op_amd_stop(struct op_msrs const * const msrs) | ||
326 | { | ||
327 | unsigned int low, high; | ||
328 | int i; | ||
329 | |||
330 | /* Subtle: stop on all counters to avoid race with | ||
331 | * setting our pm callback */ | ||
332 | for (i = 0 ; i < NUM_COUNTERS ; ++i) { | ||
333 | if (!reset_value[i]) | ||
334 | continue; | ||
335 | CTRL_READ(low, high, msrs, i); | ||
336 | CTRL_SET_INACTIVE(low); | ||
337 | CTRL_WRITE(low, high, msrs, i); | ||
338 | } | ||
339 | |||
340 | #ifdef CONFIG_OPROFILE_IBS | ||
341 | if (ibs_allowed && ibs_config.fetch_enabled) { | ||
342 | low = 0; /* clear max count and enable */ | ||
343 | high = 0; | ||
344 | wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); | ||
345 | } | ||
346 | |||
347 | if (ibs_allowed && ibs_config.op_enabled) { | ||
348 | low = 0; /* clear max count and enable */ | ||
349 | high = 0; | ||
350 | wrmsr(MSR_AMD64_IBSOPCTL, low, high); | ||
351 | } | ||
352 | #endif | ||
353 | } | ||
354 | |||
355 | static void op_amd_shutdown(struct op_msrs const * const msrs) | ||
356 | { | ||
357 | int i; | ||
358 | |||
359 | for (i = 0 ; i < NUM_COUNTERS ; ++i) { | ||
360 | if (CTR_IS_RESERVED(msrs, i)) | ||
361 | release_perfctr_nmi(MSR_K7_PERFCTR0 + i); | ||
362 | } | ||
363 | for (i = 0 ; i < NUM_CONTROLS ; ++i) { | ||
364 | if (CTRL_IS_RESERVED(msrs, i)) | ||
365 | release_evntsel_nmi(MSR_K7_EVNTSEL0 + i); | ||
366 | } | ||
367 | } | ||
368 | |||
369 | #ifndef CONFIG_OPROFILE_IBS | ||
370 | |||
371 | /* no IBS support */ | ||
372 | |||
373 | static int op_amd_init(struct oprofile_operations *ops) | ||
374 | { | ||
375 | return 0; | ||
376 | } | ||
377 | |||
378 | static void op_amd_exit(void) {} | ||
379 | |||
380 | #else | ||
381 | |||
382 | static u8 ibs_eilvt_off; | ||
383 | |||
384 | static inline void apic_init_ibs_nmi_per_cpu(void *arg) | ||
385 | { | ||
386 | ibs_eilvt_off = setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_NMI, 0); | ||
387 | } | ||
388 | |||
389 | static inline void apic_clear_ibs_nmi_per_cpu(void *arg) | ||
390 | { | ||
391 | setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_FIX, 1); | ||
392 | } | ||
393 | |||
394 | static int pfm_amd64_setup_eilvt(void) | ||
395 | { | ||
396 | #define IBSCTL_LVTOFFSETVAL (1 << 8) | ||
397 | #define IBSCTL 0x1cc | ||
398 | struct pci_dev *cpu_cfg; | ||
399 | int nodes; | ||
400 | u32 value = 0; | ||
401 | |||
402 | /* per CPU setup */ | ||
403 | on_each_cpu(apic_init_ibs_nmi_per_cpu, NULL, 1); | ||
404 | |||
405 | nodes = 0; | ||
406 | cpu_cfg = NULL; | ||
407 | do { | ||
408 | cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD, | ||
409 | PCI_DEVICE_ID_AMD_10H_NB_MISC, | ||
410 | cpu_cfg); | ||
411 | if (!cpu_cfg) | ||
412 | break; | ||
413 | ++nodes; | ||
414 | pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off | ||
415 | | IBSCTL_LVTOFFSETVAL); | ||
416 | pci_read_config_dword(cpu_cfg, IBSCTL, &value); | ||
417 | if (value != (ibs_eilvt_off | IBSCTL_LVTOFFSETVAL)) { | ||
418 | printk(KERN_DEBUG "Failed to setup IBS LVT offset, " | ||
419 | "IBSCTL = 0x%08x", value); | ||
420 | return 1; | ||
421 | } | ||
422 | } while (1); | ||
423 | |||
424 | if (!nodes) { | ||
425 | printk(KERN_DEBUG "No CPU node configured for IBS"); | ||
426 | return 1; | ||
427 | } | ||
428 | |||
429 | #ifdef CONFIG_NUMA | ||
430 | /* Sanity check */ | ||
431 | /* Works only for 64bit with proper numa implementation. */ | ||
432 | if (nodes != num_possible_nodes()) { | ||
433 | printk(KERN_DEBUG "Failed to setup CPU node(s) for IBS, " | ||
434 | "found: %d, expected %d", | ||
435 | nodes, num_possible_nodes()); | ||
436 | return 1; | ||
437 | } | ||
438 | #endif | ||
439 | return 0; | ||
440 | } | ||
441 | |||
442 | /* | ||
443 | * initialize the APIC for the IBS interrupts | ||
444 | * if available (AMD Family10h rev B0 and later) | ||
445 | */ | ||
446 | static void setup_ibs(void) | ||
447 | { | ||
448 | ibs_allowed = boot_cpu_has(X86_FEATURE_IBS); | ||
449 | |||
450 | if (!ibs_allowed) | ||
451 | return; | ||
452 | |||
453 | if (pfm_amd64_setup_eilvt()) { | ||
454 | ibs_allowed = 0; | ||
455 | return; | ||
456 | } | ||
457 | |||
458 | printk(KERN_INFO "oprofile: AMD IBS detected\n"); | ||
459 | } | ||
460 | |||
461 | |||
462 | /* | ||
463 | * unitialize the APIC for the IBS interrupts if needed on AMD Family10h | ||
464 | * rev B0 and later */ | ||
465 | static void clear_ibs_nmi(void) | ||
466 | { | ||
467 | if (ibs_allowed) | ||
468 | on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1); | ||
469 | } | ||
470 | |||
471 | static int (*create_arch_files)(struct super_block * sb, struct dentry * root); | ||
472 | |||
473 | static int setup_ibs_files(struct super_block * sb, struct dentry * root) | ||
474 | { | ||
475 | char buf[12]; | ||
476 | struct dentry *dir; | ||
477 | int ret = 0; | ||
478 | |||
479 | /* architecture specific files */ | ||
480 | if (create_arch_files) | ||
481 | ret = create_arch_files(sb, root); | ||
482 | |||
483 | if (ret) | ||
484 | return ret; | ||
485 | |||
486 | if (!ibs_allowed) | ||
487 | return ret; | ||
488 | |||
489 | /* model specific files */ | ||
490 | |||
491 | /* setup some reasonable defaults */ | ||
492 | ibs_config.max_cnt_fetch = 250000; | ||
493 | ibs_config.fetch_enabled = 0; | ||
494 | ibs_config.max_cnt_op = 250000; | ||
495 | ibs_config.op_enabled = 0; | ||
496 | ibs_config.dispatched_ops = 1; | ||
497 | snprintf(buf, sizeof(buf), "ibs_fetch"); | ||
498 | dir = oprofilefs_mkdir(sb, root, buf); | ||
499 | oprofilefs_create_ulong(sb, dir, "rand_enable", | ||
500 | &ibs_config.rand_en); | ||
501 | oprofilefs_create_ulong(sb, dir, "enable", | ||
502 | &ibs_config.fetch_enabled); | ||
503 | oprofilefs_create_ulong(sb, dir, "max_count", | ||
504 | &ibs_config.max_cnt_fetch); | ||
505 | snprintf(buf, sizeof(buf), "ibs_uops"); | ||
506 | dir = oprofilefs_mkdir(sb, root, buf); | ||
507 | oprofilefs_create_ulong(sb, dir, "enable", | ||
508 | &ibs_config.op_enabled); | ||
509 | oprofilefs_create_ulong(sb, dir, "max_count", | ||
510 | &ibs_config.max_cnt_op); | ||
511 | oprofilefs_create_ulong(sb, dir, "dispatched_ops", | ||
512 | &ibs_config.dispatched_ops); | ||
513 | |||
514 | return 0; | ||
515 | } | ||
516 | |||
517 | static int op_amd_init(struct oprofile_operations *ops) | ||
518 | { | ||
519 | setup_ibs(); | ||
520 | create_arch_files = ops->create_files; | ||
521 | ops->create_files = setup_ibs_files; | ||
522 | return 0; | ||
523 | } | ||
524 | |||
525 | static void op_amd_exit(void) | ||
526 | { | ||
527 | clear_ibs_nmi(); | ||
528 | } | ||
529 | |||
530 | #endif | ||
531 | |||
532 | struct op_x86_model_spec const op_amd_spec = { | ||
533 | .init = op_amd_init, | ||
534 | .exit = op_amd_exit, | ||
535 | .num_counters = NUM_COUNTERS, | ||
536 | .num_controls = NUM_CONTROLS, | ||
537 | .fill_in_addresses = &op_amd_fill_in_addresses, | ||
538 | .setup_ctrs = &op_amd_setup_ctrs, | ||
539 | .check_ctrs = &op_amd_check_ctrs, | ||
540 | .start = &op_amd_start, | ||
541 | .stop = &op_amd_stop, | ||
542 | .shutdown = &op_amd_shutdown | ||
543 | }; | ||
diff --git a/arch/x86/oprofile/op_model_athlon.c b/arch/x86/oprofile/op_model_athlon.c deleted file mode 100644 index 3d534879a9dc..000000000000 --- a/arch/x86/oprofile/op_model_athlon.c +++ /dev/null | |||
@@ -1,190 +0,0 @@ | |||
1 | /* | ||
2 | * @file op_model_athlon.h | ||
3 | * athlon / K7 / K8 / Family 10h model-specific MSR operations | ||
4 | * | ||
5 | * @remark Copyright 2002 OProfile authors | ||
6 | * @remark Read the file COPYING | ||
7 | * | ||
8 | * @author John Levon | ||
9 | * @author Philippe Elie | ||
10 | * @author Graydon Hoare | ||
11 | */ | ||
12 | |||
13 | #include <linux/oprofile.h> | ||
14 | #include <asm/ptrace.h> | ||
15 | #include <asm/msr.h> | ||
16 | #include <asm/nmi.h> | ||
17 | |||
18 | #include "op_x86_model.h" | ||
19 | #include "op_counter.h" | ||
20 | |||
21 | #define NUM_COUNTERS 4 | ||
22 | #define NUM_CONTROLS 4 | ||
23 | |||
24 | #define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) | ||
25 | #define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0) | ||
26 | #define CTR_WRITE(l, msrs, c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1); } while (0) | ||
27 | #define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) | ||
28 | |||
29 | #define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0) | ||
30 | #define CTRL_READ(l, h, msrs, c) do {rdmsr(msrs->controls[(c)].addr, (l), (h)); } while (0) | ||
31 | #define CTRL_WRITE(l, h, msrs, c) do {wrmsr(msrs->controls[(c)].addr, (l), (h)); } while (0) | ||
32 | #define CTRL_SET_ACTIVE(n) (n |= (1<<22)) | ||
33 | #define CTRL_SET_INACTIVE(n) (n &= ~(1<<22)) | ||
34 | #define CTRL_CLEAR_LO(x) (x &= (1<<21)) | ||
35 | #define CTRL_CLEAR_HI(x) (x &= 0xfffffcf0) | ||
36 | #define CTRL_SET_ENABLE(val) (val |= 1<<20) | ||
37 | #define CTRL_SET_USR(val, u) (val |= ((u & 1) << 16)) | ||
38 | #define CTRL_SET_KERN(val, k) (val |= ((k & 1) << 17)) | ||
39 | #define CTRL_SET_UM(val, m) (val |= (m << 8)) | ||
40 | #define CTRL_SET_EVENT_LOW(val, e) (val |= (e & 0xff)) | ||
41 | #define CTRL_SET_EVENT_HIGH(val, e) (val |= ((e >> 8) & 0xf)) | ||
42 | #define CTRL_SET_HOST_ONLY(val, h) (val |= ((h & 1) << 9)) | ||
43 | #define CTRL_SET_GUEST_ONLY(val, h) (val |= ((h & 1) << 8)) | ||
44 | |||
45 | static unsigned long reset_value[NUM_COUNTERS]; | ||
46 | |||
47 | static void athlon_fill_in_addresses(struct op_msrs * const msrs) | ||
48 | { | ||
49 | int i; | ||
50 | |||
51 | for (i = 0; i < NUM_COUNTERS; i++) { | ||
52 | if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) | ||
53 | msrs->counters[i].addr = MSR_K7_PERFCTR0 + i; | ||
54 | else | ||
55 | msrs->counters[i].addr = 0; | ||
56 | } | ||
57 | |||
58 | for (i = 0; i < NUM_CONTROLS; i++) { | ||
59 | if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) | ||
60 | msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i; | ||
61 | else | ||
62 | msrs->controls[i].addr = 0; | ||
63 | } | ||
64 | } | ||
65 | |||
66 | |||
67 | static void athlon_setup_ctrs(struct op_msrs const * const msrs) | ||
68 | { | ||
69 | unsigned int low, high; | ||
70 | int i; | ||
71 | |||
72 | /* clear all counters */ | ||
73 | for (i = 0 ; i < NUM_CONTROLS; ++i) { | ||
74 | if (unlikely(!CTRL_IS_RESERVED(msrs, i))) | ||
75 | continue; | ||
76 | CTRL_READ(low, high, msrs, i); | ||
77 | CTRL_CLEAR_LO(low); | ||
78 | CTRL_CLEAR_HI(high); | ||
79 | CTRL_WRITE(low, high, msrs, i); | ||
80 | } | ||
81 | |||
82 | /* avoid a false detection of ctr overflows in NMI handler */ | ||
83 | for (i = 0; i < NUM_COUNTERS; ++i) { | ||
84 | if (unlikely(!CTR_IS_RESERVED(msrs, i))) | ||
85 | continue; | ||
86 | CTR_WRITE(1, msrs, i); | ||
87 | } | ||
88 | |||
89 | /* enable active counters */ | ||
90 | for (i = 0; i < NUM_COUNTERS; ++i) { | ||
91 | if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) { | ||
92 | reset_value[i] = counter_config[i].count; | ||
93 | |||
94 | CTR_WRITE(counter_config[i].count, msrs, i); | ||
95 | |||
96 | CTRL_READ(low, high, msrs, i); | ||
97 | CTRL_CLEAR_LO(low); | ||
98 | CTRL_CLEAR_HI(high); | ||
99 | CTRL_SET_ENABLE(low); | ||
100 | CTRL_SET_USR(low, counter_config[i].user); | ||
101 | CTRL_SET_KERN(low, counter_config[i].kernel); | ||
102 | CTRL_SET_UM(low, counter_config[i].unit_mask); | ||
103 | CTRL_SET_EVENT_LOW(low, counter_config[i].event); | ||
104 | CTRL_SET_EVENT_HIGH(high, counter_config[i].event); | ||
105 | CTRL_SET_HOST_ONLY(high, 0); | ||
106 | CTRL_SET_GUEST_ONLY(high, 0); | ||
107 | |||
108 | CTRL_WRITE(low, high, msrs, i); | ||
109 | } else { | ||
110 | reset_value[i] = 0; | ||
111 | } | ||
112 | } | ||
113 | } | ||
114 | |||
115 | |||
116 | static int athlon_check_ctrs(struct pt_regs * const regs, | ||
117 | struct op_msrs const * const msrs) | ||
118 | { | ||
119 | unsigned int low, high; | ||
120 | int i; | ||
121 | |||
122 | for (i = 0 ; i < NUM_COUNTERS; ++i) { | ||
123 | if (!reset_value[i]) | ||
124 | continue; | ||
125 | CTR_READ(low, high, msrs, i); | ||
126 | if (CTR_OVERFLOWED(low)) { | ||
127 | oprofile_add_sample(regs, i); | ||
128 | CTR_WRITE(reset_value[i], msrs, i); | ||
129 | } | ||
130 | } | ||
131 | |||
132 | /* See op_model_ppro.c */ | ||
133 | return 1; | ||
134 | } | ||
135 | |||
136 | |||
137 | static void athlon_start(struct op_msrs const * const msrs) | ||
138 | { | ||
139 | unsigned int low, high; | ||
140 | int i; | ||
141 | for (i = 0 ; i < NUM_COUNTERS ; ++i) { | ||
142 | if (reset_value[i]) { | ||
143 | CTRL_READ(low, high, msrs, i); | ||
144 | CTRL_SET_ACTIVE(low); | ||
145 | CTRL_WRITE(low, high, msrs, i); | ||
146 | } | ||
147 | } | ||
148 | } | ||
149 | |||
150 | |||
151 | static void athlon_stop(struct op_msrs const * const msrs) | ||
152 | { | ||
153 | unsigned int low, high; | ||
154 | int i; | ||
155 | |||
156 | /* Subtle: stop on all counters to avoid race with | ||
157 | * setting our pm callback */ | ||
158 | for (i = 0 ; i < NUM_COUNTERS ; ++i) { | ||
159 | if (!reset_value[i]) | ||
160 | continue; | ||
161 | CTRL_READ(low, high, msrs, i); | ||
162 | CTRL_SET_INACTIVE(low); | ||
163 | CTRL_WRITE(low, high, msrs, i); | ||
164 | } | ||
165 | } | ||
166 | |||
167 | static void athlon_shutdown(struct op_msrs const * const msrs) | ||
168 | { | ||
169 | int i; | ||
170 | |||
171 | for (i = 0 ; i < NUM_COUNTERS ; ++i) { | ||
172 | if (CTR_IS_RESERVED(msrs, i)) | ||
173 | release_perfctr_nmi(MSR_K7_PERFCTR0 + i); | ||
174 | } | ||
175 | for (i = 0 ; i < NUM_CONTROLS ; ++i) { | ||
176 | if (CTRL_IS_RESERVED(msrs, i)) | ||
177 | release_evntsel_nmi(MSR_K7_EVNTSEL0 + i); | ||
178 | } | ||
179 | } | ||
180 | |||
181 | struct op_x86_model_spec const op_athlon_spec = { | ||
182 | .num_counters = NUM_COUNTERS, | ||
183 | .num_controls = NUM_CONTROLS, | ||
184 | .fill_in_addresses = &athlon_fill_in_addresses, | ||
185 | .setup_ctrs = &athlon_setup_ctrs, | ||
186 | .check_ctrs = &athlon_check_ctrs, | ||
187 | .start = &athlon_start, | ||
188 | .stop = &athlon_stop, | ||
189 | .shutdown = &athlon_shutdown | ||
190 | }; | ||
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index 45b605fa71d0..05a0261ba0c3 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h | |||
@@ -32,6 +32,8 @@ struct pt_regs; | |||
32 | * various x86 CPU models' perfctr support. | 32 | * various x86 CPU models' perfctr support. |
33 | */ | 33 | */ |
34 | struct op_x86_model_spec { | 34 | struct op_x86_model_spec { |
35 | int (*init)(struct oprofile_operations *ops); | ||
36 | void (*exit)(void); | ||
35 | unsigned int const num_counters; | 37 | unsigned int const num_counters; |
36 | unsigned int const num_controls; | 38 | unsigned int const num_controls; |
37 | void (*fill_in_addresses)(struct op_msrs * const msrs); | 39 | void (*fill_in_addresses)(struct op_msrs * const msrs); |
@@ -46,6 +48,6 @@ struct op_x86_model_spec { | |||
46 | extern struct op_x86_model_spec const op_ppro_spec; | 48 | extern struct op_x86_model_spec const op_ppro_spec; |
47 | extern struct op_x86_model_spec const op_p4_spec; | 49 | extern struct op_x86_model_spec const op_p4_spec; |
48 | extern struct op_x86_model_spec const op_p4_ht2_spec; | 50 | extern struct op_x86_model_spec const op_p4_ht2_spec; |
49 | extern struct op_x86_model_spec const op_athlon_spec; | 51 | extern struct op_x86_model_spec const op_amd_spec; |
50 | 52 | ||
51 | #endif /* OP_X86_MODEL_H */ | 53 | #endif /* OP_X86_MODEL_H */ |
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 4bdaa590375d..3c27a809393b 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c | |||
@@ -511,3 +511,31 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1201, fam10h_pci_cfg_space_size); | |||
511 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1202, fam10h_pci_cfg_space_size); | 511 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1202, fam10h_pci_cfg_space_size); |
512 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1203, fam10h_pci_cfg_space_size); | 512 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1203, fam10h_pci_cfg_space_size); |
513 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1204, fam10h_pci_cfg_space_size); | 513 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1204, fam10h_pci_cfg_space_size); |
514 | |||
515 | /* | ||
516 | * SB600: Disable BAR1 on device 14.0 to avoid HPET resources from | ||
517 | * confusing the PCI engine: | ||
518 | */ | ||
519 | static void sb600_disable_hpet_bar(struct pci_dev *dev) | ||
520 | { | ||
521 | u8 val; | ||
522 | |||
523 | /* | ||
524 | * The SB600 and SB700 both share the same device | ||
525 | * ID, but the PM register 0x55 does something different | ||
526 | * for the SB700, so make sure we are dealing with the | ||
527 | * SB600 before touching the bit: | ||
528 | */ | ||
529 | |||
530 | pci_read_config_byte(dev, 0x08, &val); | ||
531 | |||
532 | if (val < 0x2F) { | ||
533 | outb(0x55, 0xCD6); | ||
534 | val = inb(0xCD7); | ||
535 | |||
536 | /* Set bit 7 in PM register 0x55 */ | ||
537 | outb(0x55, 0xCD6); | ||
538 | outb(val | 0x80, 0xCD7); | ||
539 | } | ||
540 | } | ||
541 | DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_ATI, 0x4385, sb600_disable_hpet_bar); | ||