From fe7de49f9d4e53f24ec9ef762a503f70b562341c Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 20 Oct 2010 16:01:12 -0700 Subject: sched: Make sched_param argument static in sched_setscheduler() callers Andrew Morton pointed out almost all sched_setscheduler() callers are using fixed parameters and can be converted to static. It reduces runtime memory use a little. Signed-off-by: KOSAKI Motohiro Reported-by: Andrew Morton Acked-by: James Morris Cc: Ingo Molnar Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- kernel/trace/trace_selftest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 155a415b3209..562c56e048fd 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -558,7 +558,7 @@ trace_selftest_startup_nop(struct tracer *trace, struct trace_array *tr) static int trace_wakeup_test_thread(void *data) { /* Make this a RT thread, doesn't need to be too high */ - struct sched_param param = { .sched_priority = 5 }; + static struct sched_param param = { .sched_priority = 5 }; struct completion *x = data; sched_setscheduler(current, SCHED_FIFO, ¶m); -- cgit v1.2.2 From becf91f18750cf1c60828aa2ee63a36b05c2e4d0 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 10 Nov 2010 10:05:56 +0100 Subject: [S390] ftrace: build without frame pointers on s390 s390 doesn't need FRAME_POINTERS in order to have a working function tracer. We don't need frame pointers in order to get strack traces since we always have valid backchains by using the -mkernel-backchain gcc option. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- kernel/trace/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index e04b8bcdef88..ea37e2ff4164 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -126,7 +126,7 @@ if FTRACE config FUNCTION_TRACER bool "Kernel Function Tracer" depends on HAVE_FUNCTION_TRACER - select FRAME_POINTER if (!ARM_UNWIND) + select FRAME_POINTER if !ARM_UNWIND && !S390 select KALLSYMS select GENERIC_TRACER select CONTEXT_SWITCH_TRACER -- cgit v1.2.2 From 02e031cbc843b010e72fcc05c76113c688b2860f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 10 Nov 2010 14:54:09 +0100 Subject: block: remove REQ_HARDBARRIER REQ_HARDBARRIER is dead now, so remove the leftovers. What's left at this point is: - various checks inside the block layer. - sanity checks in bio based drivers. - now unused bio_empty_barrier helper. - Xen blockfront use of BLKIF_OP_WRITE_BARRIER - it's dead for a while, but Xen really needs to sort out it's barrier situaton. - setting of ordered tags in uas - dead code copied from old scsi drivers. - scsi different retry for barriers - it's dead and should have been removed when flushes were converted to FS requests. - blktrace handling of barriers - removed. Someone who knows blktrace better should add support for REQ_FLUSH and REQ_FUA, though. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- kernel/trace/blktrace.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index bc251ed66724..7b8ec0281548 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -168,7 +168,6 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector, static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK_TC_WRITE) }; -#define BLK_TC_HARDBARRIER BLK_TC_BARRIER #define BLK_TC_RAHEAD BLK_TC_AHEAD /* The ilog2() calls fall out because they're constant */ @@ -196,7 +195,6 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, return; what |= ddir_act[rw & WRITE]; - what |= MASK_TC_BIT(rw, HARDBARRIER); what |= MASK_TC_BIT(rw, SYNC); what |= MASK_TC_BIT(rw, RAHEAD); what |= MASK_TC_BIT(rw, META); @@ -1807,8 +1805,6 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes) if (rw & REQ_RAHEAD) rwbs[i++] = 'A'; - if (rw & REQ_HARDBARRIER) - rwbs[i++] = 'B'; if (rw & REQ_SYNC) rwbs[i++] = 'S'; if (rw & REQ_META) -- cgit v1.2.2 From 91e86e560d0b3ce4c5fc64fd2bbb99f856a30a4e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 10 Nov 2010 12:56:12 +0100 Subject: tracing: Fix recursive user stack trace The user stack trace can fault when examining the trace. Which would call the do_page_fault handler, which would trace again, which would do the user stack trace, which would fault and call do_page_fault again ... Thus this is causing a recursive bug. We need to have a recursion detector here. [ Resubmitted by Jiri Olsa ] [ Eric Dumazet recommended using __this_cpu_* instead of __get_cpu_* ] Cc: Eric Dumazet Signed-off-by: Jiri Olsa LKML-Reference: <1289390172-9730-3-git-send-email-jolsa@redhat.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 82d9b8106cd0..ee6a7339cf0e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1284,6 +1284,8 @@ void trace_dump_stack(void) __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count()); } +static DEFINE_PER_CPU(int, user_stack_count); + void ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) { @@ -1302,6 +1304,18 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) if (unlikely(in_nmi())) return; + /* + * prevent recursion, since the user stack tracing may + * trigger other kernel events. + */ + preempt_disable(); + if (__this_cpu_read(user_stack_count)) + goto out; + + __this_cpu_inc(user_stack_count); + + + event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, sizeof(*entry), flags, pc); if (!event) @@ -1319,6 +1333,11 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) save_stack_trace_user(&trace); if (!filter_check_discard(call, entry, buffer, event)) ring_buffer_unlock_commit(buffer, event); + + __this_cpu_dec(user_stack_count); + + out: + preempt_enable(); } #ifdef UNUSED -- cgit v1.2.2 From 451a3c24b0135bce54542009b5fde43846c7cf67 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 17 Nov 2010 16:26:55 +0100 Subject: BKL: remove extraneous #include The big kernel lock has been removed from all these files at some point, leaving only the #include. Remove this too as a cleanup. Signed-off-by: Arnd Bergmann Signed-off-by: Linus Torvalds --- kernel/trace/trace.c | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 82d9b8106cd0..042084157980 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.2 From 61c32659b12c44e62de32fbf99f7e4ca783dc38b Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 18 Nov 2010 01:39:17 +0100 Subject: tracing: New flag to allow non privileged users to use a trace event This adds a new trace event internal flag that allows them to be used in perf by non privileged users in case of task bound tracing. This is desired for syscalls tracepoint because they don't leak global system informations, like some other tracepoints. Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Thomas Gleixner Cc: Steven Rostedt Cc: Li Zefan Cc: Jason Baron --- kernel/trace/trace_event_perf.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 39c059ca670e..19a359d5e6d5 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -21,17 +21,46 @@ typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)]) /* Count the events in use (per event id, not per instance) */ static int total_ref_count; +static int perf_trace_event_perm(struct ftrace_event_call *tp_event, + struct perf_event *p_event) +{ + /* No tracing, just counting, so no obvious leak */ + if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW)) + return 0; + + /* Some events are ok to be traced by non-root users... */ + if (p_event->attach_state == PERF_ATTACH_TASK) { + if (tp_event->flags & TRACE_EVENT_FL_CAP_ANY) + return 0; + } + + /* + * ...otherwise raw tracepoint data can be a severe data leak, + * only allow root to have these. + */ + if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + return 0; +} + static int perf_trace_event_init(struct ftrace_event_call *tp_event, struct perf_event *p_event) { struct hlist_head __percpu *list; - int ret = -ENOMEM; + int ret; int cpu; + ret = perf_trace_event_perm(tp_event, p_event); + if (ret) + return ret; + p_event->tp_event = tp_event; if (tp_event->perf_refcount++ > 0) return 0; + ret = -ENOMEM; + list = alloc_percpu(struct hlist_head); if (!list) goto fail; -- cgit v1.2.2 From 042957801626465492b9428860de39a3cb2a8219 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 12 Nov 2010 22:32:11 -0500 Subject: tracing/events: Show real number in array fields Currently we have in something like the sched_switch event: field:char prev_comm[TASK_COMM_LEN]; offset:12; size:16; signed:1; When a userspace tool such as perf tries to parse this, the TASK_COMM_LEN is meaningless. This is done because the TRACE_EVENT() macro simply uses a #len to show the string of the length. When the length is an enum, we get a string that means nothing for tools. By adding a static buffer and a mutex to protect it, we can store the string into that buffer with snprintf and show the actual number. Now we get: field:char prev_comm[16]; offset:12; size:16; signed:1; Something much more useful. Signed-off-by: Steven Rostedt --- kernel/trace/trace_events.c | 6 ++++++ kernel/trace/trace_export.c | 14 ++++++++++---- 2 files changed, 16 insertions(+), 4 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 0725eeab1937..35fde09b81de 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -27,6 +27,12 @@ DEFINE_MUTEX(event_mutex); +DEFINE_MUTEX(event_storage_mutex); +EXPORT_SYMBOL_GPL(event_storage_mutex); + +char event_storage[EVENT_STORAGE_SIZE]; +EXPORT_SYMBOL_GPL(event_storage); + LIST_HEAD(ftrace_events); LIST_HEAD(ftrace_common_fields); diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 4ba44deaac25..4b74d71705c0 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -83,13 +83,19 @@ static void __always_unused ____ftrace_check_##name(void) \ #undef __array #define __array(type, item, len) \ - BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ - ret = trace_define_field(event_call, #type "[" #len "]", #item, \ + do { \ + BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ + mutex_lock(&event_storage_mutex); \ + snprintf(event_storage, sizeof(event_storage), \ + "%s[%d]", #type, len); \ + ret = trace_define_field(event_call, event_storage, #item, \ offsetof(typeof(field), item), \ sizeof(field.item), \ is_signed_type(type), FILTER_OTHER); \ - if (ret) \ - return ret; + mutex_unlock(&event_storage_mutex); \ + if (ret) \ + return ret; \ + } while (0); #undef __array_desc #define __array_desc(type, container, item, len) \ -- cgit v1.2.2 From 364829b1263b44aa60383824e4c1289d83d78ca7 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Wed, 24 Nov 2010 15:13:16 -0800 Subject: tracing: Fix panic when lseek() called on "trace" opened for writing The file_ops struct for the "trace" special file defined llseek as seq_lseek(). However, if the file was opened for writing only, seq_open() was not called, and the seek would dereference a null pointer, file->private_data. This patch introduces a new wrapper for seq_lseek() which checks if the file descriptor is opened for reading first. If not, it does nothing. Cc: Signed-off-by: Slava Pestov LKML-Reference: <1290640396-24179-1-git-send-email-slavapestov@google.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ee6a7339cf0e..21db0deb5c7d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2339,11 +2339,19 @@ tracing_write_stub(struct file *filp, const char __user *ubuf, return count; } +static loff_t tracing_seek(struct file *file, loff_t offset, int origin) +{ + if (file->f_mode & FMODE_READ) + return seq_lseek(file, offset, origin); + else + return 0; +} + static const struct file_operations tracing_fops = { .open = tracing_open, .read = seq_read, .write = tracing_write_stub, - .llseek = seq_lseek, + .llseek = tracing_seek, .release = tracing_release, }; -- cgit v1.2.2 From e1e359273576ee8fe27021356b064c772ed29af3 Mon Sep 17 00:00:00 2001 From: David Sharp Date: Wed, 22 Dec 2010 16:38:24 -0800 Subject: ring_buffer: Off-by-one and duplicate events in ring_buffer_read_page Fix two related problems in the event-copying loop of ring_buffer_read_page. The loop condition for copying events is off-by-one. "len" is the remaining space in the caller-supplied page. "size" is the size of the next event (or two events). If len == size, then there is just enough space for the next event. size was set to rb_event_ts_length, which may include the size of two events if the first event is a time-extend, in order to assure time- extends are kept together with the event after it. However, rb_advance_reader always advances by one event. This would result in the event after any time-extend being duplicated. Instead, get the size of a single event for the memcpy, but use rb_event_ts_length for the loop condition. Signed-off-by: David Sharp LKML-Reference: <1293064704-8101-1-git-send-email-dhsharp@google.com> LKML-Reference: Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 9ed509a015d8..bd1c35a4fbcc 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3853,6 +3853,13 @@ int ring_buffer_read_page(struct ring_buffer *buffer, /* Need to copy one event at a time */ do { + /* We need the size of one event, because + * rb_advance_reader only advances by one event, + * whereas rb_event_ts_length may include the size of + * one or two events. + * We have already ensured there's enough space if this + * is a time extend. */ + size = rb_event_length(event); memcpy(bpage->data + pos, rpage->data + rpos, size); len -= size; @@ -3867,7 +3874,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer, event = rb_reader_event(cpu_buffer); /* Always keep the time extend and data together */ size = rb_event_ts_length(event); - } while (len > size); + } while (len >= size); /* update bpage */ local_set(&bpage->commit, pos); -- cgit v1.2.2 From 61a0d49c33c7fd57c14895e5b0760bd02b65ac1f Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Mon, 3 Jan 2011 17:50:43 +0100 Subject: perf: Do not export power_frequency, but power_start event power_frequency moved to drivers/cpufreq/cpufreq.c which has to be compiled in, no need to export it. intel_idle can a be module though... Signed-off-by: Thomas Renninger Signed-off-by: Ingo Molnar Acked-by: Jean Pihet Cc: Jean Pihet Cc: Arjan van de Ven Cc: rjw@sisk.pl LKML-Reference: <1294073445-14812-2-git-send-email-trenn@suse.de> Signed-off-by: Ingo Molnar LKML-Reference: <1290072314-31155-2-git-send-email-trenn@suse.de> --- kernel/trace/power-traces.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c index a22582a06161..0e0497d9fade 100644 --- a/kernel/trace/power-traces.c +++ b/kernel/trace/power-traces.c @@ -13,5 +13,5 @@ #define CREATE_TRACE_POINTS #include -EXPORT_TRACEPOINT_SYMBOL_GPL(power_frequency); +EXPORT_TRACEPOINT_SYMBOL_GPL(power_start); -- cgit v1.2.2 From 25e41933b58777f2d020c3b0186b430ea004ec28 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Mon, 3 Jan 2011 17:50:44 +0100 Subject: perf: Clean up power events by introducing new, more generic ones Add these new power trace events: power:cpu_idle power:cpu_frequency power:machine_suspend The old C-state/idle accounting events: power:power_start power:power_end Have now a replacement (but we are still keeping the old tracepoints for compatibility): power:cpu_idle and power:power_frequency is replaced with: power:cpu_frequency power:machine_suspend is newly introduced. Jean Pihet has a patch integrated into the generic layer (kernel/power/suspend.c) which will make use of it. the type= field got removed from both, it was never used and the type is differed by the event type itself. perf timechart userspace tool gets adjusted in a separate patch. Signed-off-by: Thomas Renninger Signed-off-by: Ingo Molnar Acked-by: Arjan van de Ven Acked-by: Jean Pihet Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Linus Torvalds Cc: rjw@sisk.pl LKML-Reference: <1294073445-14812-3-git-send-email-trenn@suse.de> Signed-off-by: Ingo Molnar LKML-Reference: <1290072314-31155-2-git-send-email-trenn@suse.de> --- kernel/trace/Kconfig | 15 +++++++++++++++ kernel/trace/power-traces.c | 3 +++ 2 files changed, 18 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index ea37e2ff4164..14674dce77a6 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -69,6 +69,21 @@ config EVENT_TRACING select CONTEXT_SWITCH_TRACER bool +config EVENT_POWER_TRACING_DEPRECATED + depends on EVENT_TRACING + bool "Deprecated power event trace API, to be removed" + default y + help + Provides old power event types: + C-state/idle accounting events: + power:power_start + power:power_end + and old cpufreq accounting event: + power:power_frequency + This is for userspace compatibility + and will vanish after 5 kernel iterations, + namely 2.6.41. + config CONTEXT_SWITCH_TRACER bool diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c index 0e0497d9fade..f55fcf61b223 100644 --- a/kernel/trace/power-traces.c +++ b/kernel/trace/power-traces.c @@ -13,5 +13,8 @@ #define CREATE_TRACE_POINTS #include +#ifdef EVENT_POWER_TRACING_DEPRECATED EXPORT_TRACEPOINT_SYMBOL_GPL(power_start); +#endif +EXPORT_TRACEPOINT_SYMBOL_GPL(cpu_idle); -- cgit v1.2.2 From c9b5f501ef1580faa30c40c644b7691870462201 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 7 Jan 2011 13:41:40 +0100 Subject: sched: Constify function scope static struct sched_param usage Function-scope statics are discouraged because they are easily overlooked and can cause subtle bugs/races due to their global (non-SMP safe) nature. Linus noticed that we did this for sched_param - at minimum make the const. Suggested-by: Linus Torvalds Signed-off-by: Peter Zijlstra LKML-Reference: Message-ID: Signed-off-by: Ingo Molnar --- kernel/trace/trace_selftest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 562c56e048fd..659732eba07c 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -558,7 +558,7 @@ trace_selftest_startup_nop(struct tracer *trace, struct trace_array *tr) static int trace_wakeup_test_thread(void *data) { /* Make this a RT thread, doesn't need to be too high */ - static struct sched_param param = { .sched_priority = 5 }; + static const struct sched_param param = { .sched_priority = 5 }; struct completion *x = data; sched_setscheduler(current, SCHED_FIFO, ¶m); -- cgit v1.2.2