From cd0980fc8add25e8ab12fcf1051c0f20cbc7c0c0 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com> Date: Tue, 25 Aug 2009 14:50:27 +0200 Subject: tracing: Check invalid syscall nr while tracing syscalls Most arch syscall_get_nr() implementations returns -1 if the syscall number is not valid. Accessing the bit field without a check might result in a kernel oops (at least I saw it on s390 for ftrace selftest). Before this change, this problem did not occur, because the invalid syscall number (-1) caused syscall_nr_to_meta() to return NULL. There are at least two scenarios where syscall_get_nr() can return -1: 1. For example, ptrace stores an invalid syscall number, and thus, tracing code resets it. (see do_syscall_trace_enter in arch/s390/kernel/ptrace.c) 2. The syscall_regfunc() (kernel/tracepoint.c) sets the TIF_SYSCALL_FTRACE (now: TIF_SYSCALL_TRACEPOINT) flag for all threads which include kernel threads. However, the ftrace selftest triggers a kernel oops when testing syscall trace points: - The kernel thread is started as ususal (do_fork()), - tracing code sets TIF_SYSCALL_FTRACE, - the ret_from_fork() function is triggered and starts ftrace_syscall_exit() with an invalid syscall number. To avoid these scenarios, I suggest to check the syscall_nr. For instance, the ftrace selftest fails for s390 (with config option CONFIG_FTRACE_SYSCALLS set) and produces the following kernel oops. Unable to handle kernel pointer dereference at virtual kernel address 2000000000 Oops: 0038 [#1] PREEMPT SMP Modules linked in: CPU: 0 Not tainted 2.6.31-rc6-next-20090819-dirty #18 Process kthreadd (pid: 818, task: 000000003ea207e8, ksp: 000000003e813eb8) Krnl PSW : 0704100180000000 00000000000ea54c (ftrace_syscall_exit+0x58/0xdc) R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:0 CC:1 PM:0 EA:3 Krnl GPRS: 0000000000000000 00000000000e0000 ffffffffffffffff 20000000008c2650 0000000000000007 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 ffffffffffffffff 000000003e813d78 000000003e813f58 0000000000505ba8 000000003e813e18 000000003e813d78 Krnl Code: 00000000000ea540: e330d0000008 ag %r3,0(%r13) 00000000000ea546: a7480007 lhi %r4,7 00000000000ea54a: 1442 nr %r4,%r2 >00000000000ea54c: e31030000090 llgc %r1,0(%r3) 00000000000ea552: 5410d008 n %r1,8(%r13) 00000000000ea556: 8a104000 sra %r1,0(%r4) 00000000000ea55a: 5410d00c n %r1,12(%r13) 00000000000ea55e: 1211 ltr %r1,%r1 Call Trace: ([<0000000000000000>] 0x0) [<000000000001fa22>] do_syscall_trace_exit+0x132/0x18c [<000000000002d0c4>] sysc_return+0x0/0x8 [<000000000001c738>] kernel_thread_starter+0x0/0xc Last Breaking-Event-Address: [<00000000000ea51e>] ftrace_syscall_exit+0x2a/0xdc Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com> Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Jason Baron <jbaron@redhat.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Lai Jiangshan <laijs@cn.fujitsu.com> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> Cc: Jiaying Zhang <jiayingz@google.com> Cc: Martin Bligh <mbligh@google.com> Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Paul Mundt <lethal@linux-sh.org> LKML-Reference: <20090825125027.GE4639@cetus.boeblingen.de.ibm.com> Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> --- kernel/trace/trace_syscalls.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel/trace/trace_syscalls.c') diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 85291c4de406..cb7f600cb02a 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -227,6 +227,8 @@ void ftrace_syscall_enter(struct pt_regs *regs, long id) int syscall_nr; syscall_nr = syscall_get_nr(current, regs); + if (syscall_nr < 0) + return; if (!test_bit(syscall_nr, enabled_enter_syscalls)) return; @@ -257,6 +259,8 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret) int syscall_nr; syscall_nr = syscall_get_nr(current, regs); + if (syscall_nr < 0) + return; if (!test_bit(syscall_nr, enabled_exit_syscalls)) return; -- cgit v1.2.2 From 57421dbbdc932d65f0e6a41ebb027a2bfe3d0669 Mon Sep 17 00:00:00 2001 From: Jason Baron <jbaron@redhat.com> Date: Mon, 24 Aug 2009 17:40:22 -0400 Subject: tracing: Convert event tracing code to use NR_syscalls Convert the syscalls event tracing code to use NR_syscalls, instead of FTRACE_SYSCALL_MAX. NR_syscalls is standard accross most arches, and reduces code confusion/complexity. Signed-off-by: Jason Baron <jbaron@redhat.com> Cc: Paul Mundt <lethal@linux-sh.org> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Lai Jiangshan <laijs@cn.fujitsu.com> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> Cc: Jiaying Zhang <jiayingz@google.com> Cc: Martin Bligh <mbligh@google.com> Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: Josh Stone <jistone@redhat.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: H. Peter Anwin <hpa@zytor.com> Cc: Hendrik Brueckner <brueckner@linux.vnet.ibm.com> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> LKML-Reference: <9b4f1a84ecae57cc6599412772efa36f0d2b815b.1251146513.git.jbaron@redhat.com> Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> --- kernel/trace/trace_syscalls.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'kernel/trace/trace_syscalls.c') diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index cb7f600cb02a..4f5fae6fad90 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -11,8 +11,8 @@ static DEFINE_MUTEX(syscall_trace_lock); static int sys_refcount_enter; static int sys_refcount_exit; -static DECLARE_BITMAP(enabled_enter_syscalls, FTRACE_SYSCALL_MAX); -static DECLARE_BITMAP(enabled_exit_syscalls, FTRACE_SYSCALL_MAX); +static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls); +static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls); enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags) @@ -289,7 +289,7 @@ int reg_event_syscall_enter(void *ptr) name = (char *)ptr; num = syscall_name_to_nr(name); - if (num < 0 || num >= FTRACE_SYSCALL_MAX) + if (num < 0 || num >= NR_syscalls) return -ENOSYS; mutex_lock(&syscall_trace_lock); if (!sys_refcount_enter) @@ -312,7 +312,7 @@ void unreg_event_syscall_enter(void *ptr) name = (char *)ptr; num = syscall_name_to_nr(name); - if (num < 0 || num >= FTRACE_SYSCALL_MAX) + if (num < 0 || num >= NR_syscalls) return; mutex_lock(&syscall_trace_lock); sys_refcount_enter--; @@ -330,7 +330,7 @@ int reg_event_syscall_exit(void *ptr) name = (char *)ptr; num = syscall_name_to_nr(name); - if (num < 0 || num >= FTRACE_SYSCALL_MAX) + if (num < 0 || num >= NR_syscalls) return -ENOSYS; mutex_lock(&syscall_trace_lock); if (!sys_refcount_exit) @@ -353,7 +353,7 @@ void unreg_event_syscall_exit(void *ptr) name = (char *)ptr; num = syscall_name_to_nr(name); - if (num < 0 || num >= FTRACE_SYSCALL_MAX) + if (num < 0 || num >= NR_syscalls) return; mutex_lock(&syscall_trace_lock); sys_refcount_exit--; @@ -373,8 +373,8 @@ struct trace_event event_syscall_exit = { #ifdef CONFIG_EVENT_PROFILE -static DECLARE_BITMAP(enabled_prof_enter_syscalls, FTRACE_SYSCALL_MAX); -static DECLARE_BITMAP(enabled_prof_exit_syscalls, FTRACE_SYSCALL_MAX); +static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls); +static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls); static int sys_prof_refcount_enter; static int sys_prof_refcount_exit; @@ -420,7 +420,7 @@ int reg_prof_syscall_enter(char *name) int num; num = syscall_name_to_nr(name); - if (num < 0 || num >= FTRACE_SYSCALL_MAX) + if (num < 0 || num >= NR_syscalls) return -ENOSYS; mutex_lock(&syscall_trace_lock); @@ -442,7 +442,7 @@ void unreg_prof_syscall_enter(char *name) int num; num = syscall_name_to_nr(name); - if (num < 0 || num >= FTRACE_SYSCALL_MAX) + if (num < 0 || num >= NR_syscalls) return; mutex_lock(&syscall_trace_lock); @@ -481,7 +481,7 @@ int reg_prof_syscall_exit(char *name) int num; num = syscall_name_to_nr(name); - if (num < 0 || num >= FTRACE_SYSCALL_MAX) + if (num < 0 || num >= NR_syscalls) return -ENOSYS; mutex_lock(&syscall_trace_lock); @@ -503,7 +503,7 @@ void unreg_prof_syscall_exit(char *name) int num; num = syscall_name_to_nr(name); - if (num < 0 || num >= FTRACE_SYSCALL_MAX) + if (num < 0 || num >= NR_syscalls) return; mutex_lock(&syscall_trace_lock); -- cgit v1.2.2 From e77405ad80f53966524b5c31244e13fbbbecbd84 Mon Sep 17 00:00:00 2001 From: Steven Rostedt <srostedt@redhat.com> Date: Wed, 2 Sep 2009 14:17:06 -0400 Subject: tracing: pass around ring buffer instead of tracer The latency tracers (irqsoff and wakeup) can swap trace buffers on the fly. If an event is happening and has reserved data on one of the buffers, and the latency tracer swaps the global buffer with the max buffer, the result is that the event may commit the data to the wrong buffer. This patch changes the API to the trace recording to be recieve the buffer that was used to reserve a commit. Then this buffer can be passed in to the commit. Signed-off-by: Steven Rostedt <rostedt@goodmis.org> --- kernel/trace/trace_syscalls.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'kernel/trace/trace_syscalls.c') diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 4f5fae6fad90..8712ce3c6a0e 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -223,6 +223,7 @@ void ftrace_syscall_enter(struct pt_regs *regs, long id) struct syscall_trace_enter *entry; struct syscall_metadata *sys_data; struct ring_buffer_event *event; + struct ring_buffer *buffer; int size; int syscall_nr; @@ -238,8 +239,8 @@ void ftrace_syscall_enter(struct pt_regs *regs, long id) size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; - event = trace_current_buffer_lock_reserve(sys_data->enter_id, size, - 0, 0); + event = trace_current_buffer_lock_reserve(&buffer, sys_data->enter_id, + size, 0, 0); if (!event) return; @@ -247,8 +248,9 @@ void ftrace_syscall_enter(struct pt_regs *regs, long id) entry->nr = syscall_nr; syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args); - if (!filter_current_check_discard(sys_data->enter_event, entry, event)) - trace_current_buffer_unlock_commit(event, 0, 0); + if (!filter_current_check_discard(buffer, sys_data->enter_event, + entry, event)) + trace_current_buffer_unlock_commit(buffer, event, 0, 0); } void ftrace_syscall_exit(struct pt_regs *regs, long ret) @@ -256,6 +258,7 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret) struct syscall_trace_exit *entry; struct syscall_metadata *sys_data; struct ring_buffer_event *event; + struct ring_buffer *buffer; int syscall_nr; syscall_nr = syscall_get_nr(current, regs); @@ -268,7 +271,7 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret) if (!sys_data) return; - event = trace_current_buffer_lock_reserve(sys_data->exit_id, + event = trace_current_buffer_lock_reserve(&buffer, sys_data->exit_id, sizeof(*entry), 0, 0); if (!event) return; @@ -277,8 +280,9 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret) entry->nr = syscall_nr; entry->ret = syscall_get_return_value(current, regs); - if (!filter_current_check_discard(sys_data->exit_event, entry, event)) - trace_current_buffer_unlock_commit(event, 0, 0); + if (!filter_current_check_discard(buffer, sys_data->exit_event, + entry, event)) + trace_current_buffer_unlock_commit(buffer, event, 0, 0); } int reg_event_syscall_enter(void *ptr) -- cgit v1.2.2