aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorRoland McGrath <roland@redhat.com>2008-07-09 05:38:07 -0400
committerRoland McGrath <roland@redhat.com>2008-07-16 15:15:17 -0400
commitd4d67150165df8bf1cc05e532f6efca96f907cab (patch)
tree390d5951231c2a1d97d6453d70c42da7af49eeae /arch/x86/kernel
parent64f097331928b01d704047c1dbc738bb6d2a9bf9 (diff)
x86 ptrace: unify syscall tracing
This unifies and cleans up the syscall tracing code on i386 and x86_64. Using a single function for entry and exit tracing on 32-bit made the do_syscall_trace() into some terrible spaghetti. The logic is clear and simple using separate syscall_trace_enter() and syscall_trace_leave() functions as on 64-bit. The unification adds PTRACE_SYSEMU and PTRACE_SYSEMU_SINGLESTEP support on x86_64, for 32-bit ptrace() callers and for 64-bit ptrace() callers tracing either 32-bit or 64-bit tasks. It behaves just like 32-bit. Changing syscall_trace_enter() to return the syscall number shortens all the assembly paths, while adding the SYSEMU feature in a simple way. Signed-off-by: Roland McGrath <roland@redhat.com>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/entry_32.S19
-rw-r--r--arch/x86/kernel/entry_64.S14
-rw-r--r--arch/x86/kernel/ptrace.c141
3 files changed, 62 insertions, 112 deletions
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 0ad987d02b7..cadf73f70d3 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -332,7 +332,7 @@ sysenter_past_esp:
332 GET_THREAD_INFO(%ebp) 332 GET_THREAD_INFO(%ebp)
333 333
334 /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ 334 /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
335 testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) 335 testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
336 jnz syscall_trace_entry 336 jnz syscall_trace_entry
337 cmpl $(nr_syscalls), %eax 337 cmpl $(nr_syscalls), %eax
338 jae syscall_badsys 338 jae syscall_badsys
@@ -370,7 +370,7 @@ ENTRY(system_call)
370 GET_THREAD_INFO(%ebp) 370 GET_THREAD_INFO(%ebp)
371 # system call tracing in operation / emulation 371 # system call tracing in operation / emulation
372 /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ 372 /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
373 testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) 373 testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
374 jnz syscall_trace_entry 374 jnz syscall_trace_entry
375 cmpl $(nr_syscalls), %eax 375 cmpl $(nr_syscalls), %eax
376 jae syscall_badsys 376 jae syscall_badsys
@@ -510,12 +510,8 @@ END(work_pending)
510syscall_trace_entry: 510syscall_trace_entry:
511 movl $-ENOSYS,PT_EAX(%esp) 511 movl $-ENOSYS,PT_EAX(%esp)
512 movl %esp, %eax 512 movl %esp, %eax
513 xorl %edx,%edx 513 call syscall_trace_enter
514 call do_syscall_trace 514 /* What it returned is what we'll actually use. */
515 cmpl $0, %eax
516 jne resume_userspace # ret != 0 -> running under PTRACE_SYSEMU,
517 # so must skip actual syscall
518 movl PT_ORIG_EAX(%esp), %eax
519 cmpl $(nr_syscalls), %eax 515 cmpl $(nr_syscalls), %eax
520 jnae syscall_call 516 jnae syscall_call
521 jmp syscall_exit 517 jmp syscall_exit
@@ -524,14 +520,13 @@ END(syscall_trace_entry)
524 # perform syscall exit tracing 520 # perform syscall exit tracing
525 ALIGN 521 ALIGN
526syscall_exit_work: 522syscall_exit_work:
527 testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl 523 testb $_TIF_WORK_SYSCALL_EXIT, %cl
528 jz work_pending 524 jz work_pending
529 TRACE_IRQS_ON 525 TRACE_IRQS_ON
530 ENABLE_INTERRUPTS(CLBR_ANY) # could let do_syscall_trace() call 526 ENABLE_INTERRUPTS(CLBR_ANY) # could let syscall_trace_leave() call
531 # schedule() instead 527 # schedule() instead
532 movl %esp, %eax 528 movl %esp, %eax
533 movl $1, %edx 529 call syscall_trace_leave
534 call do_syscall_trace
535 jmp resume_userspace 530 jmp resume_userspace
536END(syscall_exit_work) 531END(syscall_exit_work)
537 CFI_ENDPROC 532 CFI_ENDPROC
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index ae63e584c34..63001c6ecf6 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -349,8 +349,7 @@ ENTRY(system_call_after_swapgs)
349 movq %rcx,RIP-ARGOFFSET(%rsp) 349 movq %rcx,RIP-ARGOFFSET(%rsp)
350 CFI_REL_OFFSET rip,RIP-ARGOFFSET 350 CFI_REL_OFFSET rip,RIP-ARGOFFSET
351 GET_THREAD_INFO(%rcx) 351 GET_THREAD_INFO(%rcx)
352 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \ 352 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
353 TI_flags(%rcx)
354 jnz tracesys 353 jnz tracesys
355 cmpq $__NR_syscall_max,%rax 354 cmpq $__NR_syscall_max,%rax
356 ja badsys 355 ja badsys
@@ -430,7 +429,12 @@ tracesys:
430 FIXUP_TOP_OF_STACK %rdi 429 FIXUP_TOP_OF_STACK %rdi
431 movq %rsp,%rdi 430 movq %rsp,%rdi
432 call syscall_trace_enter 431 call syscall_trace_enter
433 LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */ 432 /*
433 * Reload arg registers from stack in case ptrace changed them.
434 * We don't reload %rax because syscall_trace_enter() returned
435 * the value it wants us to use in the table lookup.
436 */
437 LOAD_ARGS ARGOFFSET, 1
434 RESTORE_REST 438 RESTORE_REST
435 cmpq $__NR_syscall_max,%rax 439 cmpq $__NR_syscall_max,%rax
436 ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */ 440 ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */
@@ -483,7 +487,7 @@ int_very_careful:
483 ENABLE_INTERRUPTS(CLBR_NONE) 487 ENABLE_INTERRUPTS(CLBR_NONE)
484 SAVE_REST 488 SAVE_REST
485 /* Check for syscall exit trace */ 489 /* Check for syscall exit trace */
486 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx 490 testl $_TIF_WORK_SYSCALL_EXIT,%edx
487 jz int_signal 491 jz int_signal
488 pushq %rdi 492 pushq %rdi
489 CFI_ADJUST_CFA_OFFSET 8 493 CFI_ADJUST_CFA_OFFSET 8
@@ -491,7 +495,7 @@ int_very_careful:
491 call syscall_trace_leave 495 call syscall_trace_leave
492 popq %rdi 496 popq %rdi
493 CFI_ADJUST_CFA_OFFSET -8 497 CFI_ADJUST_CFA_OFFSET -8
494 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi 498 andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
495 jmp int_restore_rest 499 jmp int_restore_rest
496 500
497int_signal: 501int_signal:
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 77040b6070e..34e77b16a42 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1357,8 +1357,6 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task)
1357#endif 1357#endif
1358} 1358}
1359 1359
1360#ifdef CONFIG_X86_32
1361
1362void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) 1360void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
1363{ 1361{
1364 struct siginfo info; 1362 struct siginfo info;
@@ -1377,89 +1375,10 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
1377 force_sig_info(SIGTRAP, &info, tsk); 1375 force_sig_info(SIGTRAP, &info, tsk);
1378} 1376}
1379 1377
1380/* notification of system call entry/exit
1381 * - triggered by current->work.syscall_trace
1382 */
1383int do_syscall_trace(struct pt_regs *regs, int entryexit)
1384{
1385 int is_sysemu = test_thread_flag(TIF_SYSCALL_EMU);
1386 /*
1387 * With TIF_SYSCALL_EMU set we want to ignore TIF_SINGLESTEP for syscall
1388 * interception
1389 */
1390 int is_singlestep = !is_sysemu && test_thread_flag(TIF_SINGLESTEP);
1391 int ret = 0;
1392
1393 /* do the secure computing check first */
1394 if (!entryexit)
1395 secure_computing(regs->orig_ax);
1396
1397 if (unlikely(current->audit_context)) {
1398 if (entryexit)
1399 audit_syscall_exit(AUDITSC_RESULT(regs->ax),
1400 regs->ax);
1401 /* Debug traps, when using PTRACE_SINGLESTEP, must be sent only
1402 * on the syscall exit path. Normally, when TIF_SYSCALL_AUDIT is
1403 * not used, entry.S will call us only on syscall exit, not
1404 * entry; so when TIF_SYSCALL_AUDIT is used we must avoid
1405 * calling send_sigtrap() on syscall entry.
1406 *
1407 * Note that when PTRACE_SYSEMU_SINGLESTEP is used,
1408 * is_singlestep is false, despite his name, so we will still do
1409 * the correct thing.
1410 */
1411 else if (is_singlestep)
1412 goto out;
1413 }
1414
1415 if (!(current->ptrace & PT_PTRACED))
1416 goto out;
1417
1418 /* If a process stops on the 1st tracepoint with SYSCALL_TRACE
1419 * and then is resumed with SYSEMU_SINGLESTEP, it will come in
1420 * here. We have to check this and return */
1421 if (is_sysemu && entryexit)
1422 return 0;
1423
1424 /* Fake a debug trap */
1425 if (is_singlestep)
1426 send_sigtrap(current, regs, 0);
1427
1428 if (!test_thread_flag(TIF_SYSCALL_TRACE) && !is_sysemu)
1429 goto out;
1430
1431 /* the 0x80 provides a way for the tracing parent to distinguish
1432 between a syscall stop and SIGTRAP delivery */
1433 /* Note that the debugger could change the result of test_thread_flag!*/
1434 ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80:0));
1435
1436 /*
1437 * this isn't the same as continuing with a signal, but it will do
1438 * for normal use. strace only continues with a signal if the
1439 * stopping signal is not SIGTRAP. -brl
1440 */
1441 if (current->exit_code) {
1442 send_sig(current->exit_code, current, 1);
1443 current->exit_code = 0;
1444 }
1445 ret = is_sysemu;
1446out:
1447 if (unlikely(current->audit_context) && !entryexit)
1448 audit_syscall_entry(AUDIT_ARCH_I386, regs->orig_ax,
1449 regs->bx, regs->cx, regs->dx, regs->si);
1450 if (ret == 0)
1451 return 0;
1452
1453 regs->orig_ax = -1; /* force skip of syscall restarting */
1454 if (unlikely(current->audit_context))
1455 audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
1456 return 1;
1457}
1458
1459#else /* CONFIG_X86_64 */
1460
1461static void syscall_trace(struct pt_regs *regs) 1378static void syscall_trace(struct pt_regs *regs)
1462{ 1379{
1380 if (!(current->ptrace & PT_PTRACED))
1381 return;
1463 1382
1464#if 0 1383#if 0
1465 printk("trace %s ip %lx sp %lx ax %d origrax %d caller %lx tiflags %x ptrace %x\n", 1384 printk("trace %s ip %lx sp %lx ax %d origrax %d caller %lx tiflags %x ptrace %x\n",
@@ -1481,39 +1400,71 @@ static void syscall_trace(struct pt_regs *regs)
1481 } 1400 }
1482} 1401}
1483 1402
1484asmlinkage void syscall_trace_enter(struct pt_regs *regs) 1403#ifdef CONFIG_X86_32
1404# define IS_IA32 1
1405#elif defined CONFIG_IA32_EMULATION
1406# define IS_IA32 test_thread_flag(TIF_IA32)
1407#else
1408# define IS_IA32 0
1409#endif
1410
1411/*
1412 * We must return the syscall number to actually look up in the table.
1413 * This can be -1L to skip running any syscall at all.
1414 */
1415asmregparm long syscall_trace_enter(struct pt_regs *regs)
1485{ 1416{
1417 long ret = 0;
1418
1486 /* do the secure computing check first */ 1419 /* do the secure computing check first */
1487 secure_computing(regs->orig_ax); 1420 secure_computing(regs->orig_ax);
1488 1421
1489 if (test_thread_flag(TIF_SYSCALL_TRACE) 1422 if (unlikely(test_thread_flag(TIF_SYSCALL_EMU)))
1490 && (current->ptrace & PT_PTRACED)) 1423 ret = -1L;
1424
1425 if (ret || test_thread_flag(TIF_SYSCALL_TRACE))
1491 syscall_trace(regs); 1426 syscall_trace(regs);
1492 1427
1493 if (unlikely(current->audit_context)) { 1428 if (unlikely(current->audit_context)) {
1494 if (test_thread_flag(TIF_IA32)) { 1429 if (IS_IA32)
1495 audit_syscall_entry(AUDIT_ARCH_I386, 1430 audit_syscall_entry(AUDIT_ARCH_I386,
1496 regs->orig_ax, 1431 regs->orig_ax,
1497 regs->bx, regs->cx, 1432 regs->bx, regs->cx,
1498 regs->dx, regs->si); 1433 regs->dx, regs->si);
1499 } else { 1434#ifdef CONFIG_X86_64
1435 else
1500 audit_syscall_entry(AUDIT_ARCH_X86_64, 1436 audit_syscall_entry(AUDIT_ARCH_X86_64,
1501 regs->orig_ax, 1437 regs->orig_ax,
1502 regs->di, regs->si, 1438 regs->di, regs->si,
1503 regs->dx, regs->r10); 1439 regs->dx, regs->r10);
1504 } 1440#endif
1505 } 1441 }
1442
1443 return ret ?: regs->orig_ax;
1506} 1444}
1507 1445
1508asmlinkage void syscall_trace_leave(struct pt_regs *regs) 1446asmregparm void syscall_trace_leave(struct pt_regs *regs)
1509{ 1447{
1510 if (unlikely(current->audit_context)) 1448 if (unlikely(current->audit_context))
1511 audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); 1449 audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
1512 1450
1513 if ((test_thread_flag(TIF_SYSCALL_TRACE) 1451 if (test_thread_flag(TIF_SYSCALL_TRACE))
1514 || test_thread_flag(TIF_SINGLESTEP))
1515 && (current->ptrace & PT_PTRACED))
1516 syscall_trace(regs); 1452 syscall_trace(regs);
1517}
1518 1453
1519#endif /* CONFIG_X86_32 */ 1454 /*
1455 * If TIF_SYSCALL_EMU is set, we only get here because of
1456 * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP).
1457 * We already reported this syscall instruction in
1458 * syscall_trace_enter(), so don't do any more now.
1459 */
1460 if (unlikely(test_thread_flag(TIF_SYSCALL_EMU)))
1461 return;
1462
1463 /*
1464 * If we are single-stepping, synthesize a trap to follow the
1465 * system call instruction.
1466 */
1467 if (test_thread_flag(TIF_SINGLESTEP) &&
1468 (current->ptrace & PT_PTRACED))
1469 send_sigtrap(current, regs, 0);
1470}