diff options
author | Roland McGrath <roland@redhat.com> | 2008-07-09 05:38:07 -0400 |
---|---|---|
committer | Roland McGrath <roland@redhat.com> | 2008-07-16 15:15:17 -0400 |
commit | d4d67150165df8bf1cc05e532f6efca96f907cab (patch) | |
tree | 390d5951231c2a1d97d6453d70c42da7af49eeae /arch/x86/kernel | |
parent | 64f097331928b01d704047c1dbc738bb6d2a9bf9 (diff) |
x86 ptrace: unify syscall tracing
This unifies and cleans up the syscall tracing code on i386 and x86_64.
Using a single function for entry and exit tracing on 32-bit made the
do_syscall_trace() into some terrible spaghetti. The logic is clear and
simple using separate syscall_trace_enter() and syscall_trace_leave()
functions as on 64-bit.
The unification adds PTRACE_SYSEMU and PTRACE_SYSEMU_SINGLESTEP support
on x86_64, for 32-bit ptrace() callers and for 64-bit ptrace() callers
tracing either 32-bit or 64-bit tasks. It behaves just like 32-bit.
Changing syscall_trace_enter() to return the syscall number shortens
all the assembly paths, while adding the SYSEMU feature in a simple way.
Signed-off-by: Roland McGrath <roland@redhat.com>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/entry_32.S | 19 | ||||
-rw-r--r-- | arch/x86/kernel/entry_64.S | 14 | ||||
-rw-r--r-- | arch/x86/kernel/ptrace.c | 141 |
3 files changed, 62 insertions, 112 deletions
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 0ad987d02b72..cadf73f70d33 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -332,7 +332,7 @@ sysenter_past_esp: | |||
332 | GET_THREAD_INFO(%ebp) | 332 | GET_THREAD_INFO(%ebp) |
333 | 333 | ||
334 | /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ | 334 | /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ |
335 | testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) | 335 | testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) |
336 | jnz syscall_trace_entry | 336 | jnz syscall_trace_entry |
337 | cmpl $(nr_syscalls), %eax | 337 | cmpl $(nr_syscalls), %eax |
338 | jae syscall_badsys | 338 | jae syscall_badsys |
@@ -370,7 +370,7 @@ ENTRY(system_call) | |||
370 | GET_THREAD_INFO(%ebp) | 370 | GET_THREAD_INFO(%ebp) |
371 | # system call tracing in operation / emulation | 371 | # system call tracing in operation / emulation |
372 | /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ | 372 | /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ |
373 | testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) | 373 | testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) |
374 | jnz syscall_trace_entry | 374 | jnz syscall_trace_entry |
375 | cmpl $(nr_syscalls), %eax | 375 | cmpl $(nr_syscalls), %eax |
376 | jae syscall_badsys | 376 | jae syscall_badsys |
@@ -510,12 +510,8 @@ END(work_pending) | |||
510 | syscall_trace_entry: | 510 | syscall_trace_entry: |
511 | movl $-ENOSYS,PT_EAX(%esp) | 511 | movl $-ENOSYS,PT_EAX(%esp) |
512 | movl %esp, %eax | 512 | movl %esp, %eax |
513 | xorl %edx,%edx | 513 | call syscall_trace_enter |
514 | call do_syscall_trace | 514 | /* What it returned is what we'll actually use. */ |
515 | cmpl $0, %eax | ||
516 | jne resume_userspace # ret != 0 -> running under PTRACE_SYSEMU, | ||
517 | # so must skip actual syscall | ||
518 | movl PT_ORIG_EAX(%esp), %eax | ||
519 | cmpl $(nr_syscalls), %eax | 515 | cmpl $(nr_syscalls), %eax |
520 | jnae syscall_call | 516 | jnae syscall_call |
521 | jmp syscall_exit | 517 | jmp syscall_exit |
@@ -524,14 +520,13 @@ END(syscall_trace_entry) | |||
524 | # perform syscall exit tracing | 520 | # perform syscall exit tracing |
525 | ALIGN | 521 | ALIGN |
526 | syscall_exit_work: | 522 | syscall_exit_work: |
527 | testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl | 523 | testb $_TIF_WORK_SYSCALL_EXIT, %cl |
528 | jz work_pending | 524 | jz work_pending |
529 | TRACE_IRQS_ON | 525 | TRACE_IRQS_ON |
530 | ENABLE_INTERRUPTS(CLBR_ANY) # could let do_syscall_trace() call | 526 | ENABLE_INTERRUPTS(CLBR_ANY) # could let syscall_trace_leave() call |
531 | # schedule() instead | 527 | # schedule() instead |
532 | movl %esp, %eax | 528 | movl %esp, %eax |
533 | movl $1, %edx | 529 | call syscall_trace_leave |
534 | call do_syscall_trace | ||
535 | jmp resume_userspace | 530 | jmp resume_userspace |
536 | END(syscall_exit_work) | 531 | END(syscall_exit_work) |
537 | CFI_ENDPROC | 532 | CFI_ENDPROC |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index ae63e584c340..63001c6ecf6d 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -349,8 +349,7 @@ ENTRY(system_call_after_swapgs) | |||
349 | movq %rcx,RIP-ARGOFFSET(%rsp) | 349 | movq %rcx,RIP-ARGOFFSET(%rsp) |
350 | CFI_REL_OFFSET rip,RIP-ARGOFFSET | 350 | CFI_REL_OFFSET rip,RIP-ARGOFFSET |
351 | GET_THREAD_INFO(%rcx) | 351 | GET_THREAD_INFO(%rcx) |
352 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \ | 352 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx) |
353 | TI_flags(%rcx) | ||
354 | jnz tracesys | 353 | jnz tracesys |
355 | cmpq $__NR_syscall_max,%rax | 354 | cmpq $__NR_syscall_max,%rax |
356 | ja badsys | 355 | ja badsys |
@@ -430,7 +429,12 @@ tracesys: | |||
430 | FIXUP_TOP_OF_STACK %rdi | 429 | FIXUP_TOP_OF_STACK %rdi |
431 | movq %rsp,%rdi | 430 | movq %rsp,%rdi |
432 | call syscall_trace_enter | 431 | call syscall_trace_enter |
433 | LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */ | 432 | /* |
433 | * Reload arg registers from stack in case ptrace changed them. | ||
434 | * We don't reload %rax because syscall_trace_enter() returned | ||
435 | * the value it wants us to use in the table lookup. | ||
436 | */ | ||
437 | LOAD_ARGS ARGOFFSET, 1 | ||
434 | RESTORE_REST | 438 | RESTORE_REST |
435 | cmpq $__NR_syscall_max,%rax | 439 | cmpq $__NR_syscall_max,%rax |
436 | ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */ | 440 | ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */ |
@@ -483,7 +487,7 @@ int_very_careful: | |||
483 | ENABLE_INTERRUPTS(CLBR_NONE) | 487 | ENABLE_INTERRUPTS(CLBR_NONE) |
484 | SAVE_REST | 488 | SAVE_REST |
485 | /* Check for syscall exit trace */ | 489 | /* Check for syscall exit trace */ |
486 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx | 490 | testl $_TIF_WORK_SYSCALL_EXIT,%edx |
487 | jz int_signal | 491 | jz int_signal |
488 | pushq %rdi | 492 | pushq %rdi |
489 | CFI_ADJUST_CFA_OFFSET 8 | 493 | CFI_ADJUST_CFA_OFFSET 8 |
@@ -491,7 +495,7 @@ int_very_careful: | |||
491 | call syscall_trace_leave | 495 | call syscall_trace_leave |
492 | popq %rdi | 496 | popq %rdi |
493 | CFI_ADJUST_CFA_OFFSET -8 | 497 | CFI_ADJUST_CFA_OFFSET -8 |
494 | andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi | 498 | andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi |
495 | jmp int_restore_rest | 499 | jmp int_restore_rest |
496 | 500 | ||
497 | int_signal: | 501 | int_signal: |
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 77040b6070e1..34e77b16a42a 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -1357,8 +1357,6 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task) | |||
1357 | #endif | 1357 | #endif |
1358 | } | 1358 | } |
1359 | 1359 | ||
1360 | #ifdef CONFIG_X86_32 | ||
1361 | |||
1362 | void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) | 1360 | void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) |
1363 | { | 1361 | { |
1364 | struct siginfo info; | 1362 | struct siginfo info; |
@@ -1377,89 +1375,10 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) | |||
1377 | force_sig_info(SIGTRAP, &info, tsk); | 1375 | force_sig_info(SIGTRAP, &info, tsk); |
1378 | } | 1376 | } |
1379 | 1377 | ||
1380 | /* notification of system call entry/exit | ||
1381 | * - triggered by current->work.syscall_trace | ||
1382 | */ | ||
1383 | int do_syscall_trace(struct pt_regs *regs, int entryexit) | ||
1384 | { | ||
1385 | int is_sysemu = test_thread_flag(TIF_SYSCALL_EMU); | ||
1386 | /* | ||
1387 | * With TIF_SYSCALL_EMU set we want to ignore TIF_SINGLESTEP for syscall | ||
1388 | * interception | ||
1389 | */ | ||
1390 | int is_singlestep = !is_sysemu && test_thread_flag(TIF_SINGLESTEP); | ||
1391 | int ret = 0; | ||
1392 | |||
1393 | /* do the secure computing check first */ | ||
1394 | if (!entryexit) | ||
1395 | secure_computing(regs->orig_ax); | ||
1396 | |||
1397 | if (unlikely(current->audit_context)) { | ||
1398 | if (entryexit) | ||
1399 | audit_syscall_exit(AUDITSC_RESULT(regs->ax), | ||
1400 | regs->ax); | ||
1401 | /* Debug traps, when using PTRACE_SINGLESTEP, must be sent only | ||
1402 | * on the syscall exit path. Normally, when TIF_SYSCALL_AUDIT is | ||
1403 | * not used, entry.S will call us only on syscall exit, not | ||
1404 | * entry; so when TIF_SYSCALL_AUDIT is used we must avoid | ||
1405 | * calling send_sigtrap() on syscall entry. | ||
1406 | * | ||
1407 | * Note that when PTRACE_SYSEMU_SINGLESTEP is used, | ||
1408 | * is_singlestep is false, despite his name, so we will still do | ||
1409 | * the correct thing. | ||
1410 | */ | ||
1411 | else if (is_singlestep) | ||
1412 | goto out; | ||
1413 | } | ||
1414 | |||
1415 | if (!(current->ptrace & PT_PTRACED)) | ||
1416 | goto out; | ||
1417 | |||
1418 | /* If a process stops on the 1st tracepoint with SYSCALL_TRACE | ||
1419 | * and then is resumed with SYSEMU_SINGLESTEP, it will come in | ||
1420 | * here. We have to check this and return */ | ||
1421 | if (is_sysemu && entryexit) | ||
1422 | return 0; | ||
1423 | |||
1424 | /* Fake a debug trap */ | ||
1425 | if (is_singlestep) | ||
1426 | send_sigtrap(current, regs, 0); | ||
1427 | |||
1428 | if (!test_thread_flag(TIF_SYSCALL_TRACE) && !is_sysemu) | ||
1429 | goto out; | ||
1430 | |||
1431 | /* the 0x80 provides a way for the tracing parent to distinguish | ||
1432 | between a syscall stop and SIGTRAP delivery */ | ||
1433 | /* Note that the debugger could change the result of test_thread_flag!*/ | ||
1434 | ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80:0)); | ||
1435 | |||
1436 | /* | ||
1437 | * this isn't the same as continuing with a signal, but it will do | ||
1438 | * for normal use. strace only continues with a signal if the | ||
1439 | * stopping signal is not SIGTRAP. -brl | ||
1440 | */ | ||
1441 | if (current->exit_code) { | ||
1442 | send_sig(current->exit_code, current, 1); | ||
1443 | current->exit_code = 0; | ||
1444 | } | ||
1445 | ret = is_sysemu; | ||
1446 | out: | ||
1447 | if (unlikely(current->audit_context) && !entryexit) | ||
1448 | audit_syscall_entry(AUDIT_ARCH_I386, regs->orig_ax, | ||
1449 | regs->bx, regs->cx, regs->dx, regs->si); | ||
1450 | if (ret == 0) | ||
1451 | return 0; | ||
1452 | |||
1453 | regs->orig_ax = -1; /* force skip of syscall restarting */ | ||
1454 | if (unlikely(current->audit_context)) | ||
1455 | audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); | ||
1456 | return 1; | ||
1457 | } | ||
1458 | |||
1459 | #else /* CONFIG_X86_64 */ | ||
1460 | |||
1461 | static void syscall_trace(struct pt_regs *regs) | 1378 | static void syscall_trace(struct pt_regs *regs) |
1462 | { | 1379 | { |
1380 | if (!(current->ptrace & PT_PTRACED)) | ||
1381 | return; | ||
1463 | 1382 | ||
1464 | #if 0 | 1383 | #if 0 |
1465 | printk("trace %s ip %lx sp %lx ax %d origrax %d caller %lx tiflags %x ptrace %x\n", | 1384 | printk("trace %s ip %lx sp %lx ax %d origrax %d caller %lx tiflags %x ptrace %x\n", |
@@ -1481,39 +1400,71 @@ static void syscall_trace(struct pt_regs *regs) | |||
1481 | } | 1400 | } |
1482 | } | 1401 | } |
1483 | 1402 | ||
1484 | asmlinkage void syscall_trace_enter(struct pt_regs *regs) | 1403 | #ifdef CONFIG_X86_32 |
1404 | # define IS_IA32 1 | ||
1405 | #elif defined CONFIG_IA32_EMULATION | ||
1406 | # define IS_IA32 test_thread_flag(TIF_IA32) | ||
1407 | #else | ||
1408 | # define IS_IA32 0 | ||
1409 | #endif | ||
1410 | |||
1411 | /* | ||
1412 | * We must return the syscall number to actually look up in the table. | ||
1413 | * This can be -1L to skip running any syscall at all. | ||
1414 | */ | ||
1415 | asmregparm long syscall_trace_enter(struct pt_regs *regs) | ||
1485 | { | 1416 | { |
1417 | long ret = 0; | ||
1418 | |||
1486 | /* do the secure computing check first */ | 1419 | /* do the secure computing check first */ |
1487 | secure_computing(regs->orig_ax); | 1420 | secure_computing(regs->orig_ax); |
1488 | 1421 | ||
1489 | if (test_thread_flag(TIF_SYSCALL_TRACE) | 1422 | if (unlikely(test_thread_flag(TIF_SYSCALL_EMU))) |
1490 | && (current->ptrace & PT_PTRACED)) | 1423 | ret = -1L; |
1424 | |||
1425 | if (ret || test_thread_flag(TIF_SYSCALL_TRACE)) | ||
1491 | syscall_trace(regs); | 1426 | syscall_trace(regs); |
1492 | 1427 | ||
1493 | if (unlikely(current->audit_context)) { | 1428 | if (unlikely(current->audit_context)) { |
1494 | if (test_thread_flag(TIF_IA32)) { | 1429 | if (IS_IA32) |
1495 | audit_syscall_entry(AUDIT_ARCH_I386, | 1430 | audit_syscall_entry(AUDIT_ARCH_I386, |
1496 | regs->orig_ax, | 1431 | regs->orig_ax, |
1497 | regs->bx, regs->cx, | 1432 | regs->bx, regs->cx, |
1498 | regs->dx, regs->si); | 1433 | regs->dx, regs->si); |
1499 | } else { | 1434 | #ifdef CONFIG_X86_64 |
1435 | else | ||
1500 | audit_syscall_entry(AUDIT_ARCH_X86_64, | 1436 | audit_syscall_entry(AUDIT_ARCH_X86_64, |
1501 | regs->orig_ax, | 1437 | regs->orig_ax, |
1502 | regs->di, regs->si, | 1438 | regs->di, regs->si, |
1503 | regs->dx, regs->r10); | 1439 | regs->dx, regs->r10); |
1504 | } | 1440 | #endif |
1505 | } | 1441 | } |
1442 | |||
1443 | return ret ?: regs->orig_ax; | ||
1506 | } | 1444 | } |
1507 | 1445 | ||
1508 | asmlinkage void syscall_trace_leave(struct pt_regs *regs) | 1446 | asmregparm void syscall_trace_leave(struct pt_regs *regs) |
1509 | { | 1447 | { |
1510 | if (unlikely(current->audit_context)) | 1448 | if (unlikely(current->audit_context)) |
1511 | audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); | 1449 | audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); |
1512 | 1450 | ||
1513 | if ((test_thread_flag(TIF_SYSCALL_TRACE) | 1451 | if (test_thread_flag(TIF_SYSCALL_TRACE)) |
1514 | || test_thread_flag(TIF_SINGLESTEP)) | ||
1515 | && (current->ptrace & PT_PTRACED)) | ||
1516 | syscall_trace(regs); | 1452 | syscall_trace(regs); |
1517 | } | ||
1518 | 1453 | ||
1519 | #endif /* CONFIG_X86_32 */ | 1454 | /* |
1455 | * If TIF_SYSCALL_EMU is set, we only get here because of | ||
1456 | * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP). | ||
1457 | * We already reported this syscall instruction in | ||
1458 | * syscall_trace_enter(), so don't do any more now. | ||
1459 | */ | ||
1460 | if (unlikely(test_thread_flag(TIF_SYSCALL_EMU))) | ||
1461 | return; | ||
1462 | |||
1463 | /* | ||
1464 | * If we are single-stepping, synthesize a trap to follow the | ||
1465 | * system call instruction. | ||
1466 | */ | ||
1467 | if (test_thread_flag(TIF_SINGLESTEP) && | ||
1468 | (current->ptrace & PT_PTRACED)) | ||
1469 | send_sigtrap(current, regs, 0); | ||
1470 | } | ||