From e36673ec5126f15a8cddf6049aede7bdcf484c26 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 24 Mar 2010 10:57:37 +0800 Subject: tracing: Fix lockdep warning in global_clock() # echo 1 > events/enable # echo global > trace_clock ------------[ cut here ]------------ WARNING: at kernel/lockdep.c:3162 check_flags+0xb2/0x190() ... ---[ end trace 3f86734a89416623 ]--- possible reason: unannotated irqs-on. ... There's no reason to use the raw_local_irq_save() in trace_clock_global. The local_irq_save() version is fine, and does not cause the bug in lockdep. Acked-by: Peter Zijlstra Signed-off-by: Li Zefan LKML-Reference: <4BA97FA1.7030606@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace_clock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c index 6fbfb8f417b..9d589d8dcd1 100644 --- a/kernel/trace/trace_clock.c +++ b/kernel/trace/trace_clock.c @@ -84,7 +84,7 @@ u64 notrace trace_clock_global(void) int this_cpu; u64 now; - raw_local_irq_save(flags); + local_irq_save(flags); this_cpu = raw_smp_processor_id(); now = cpu_clock(this_cpu); @@ -110,7 +110,7 @@ u64 notrace trace_clock_global(void) arch_spin_unlock(&trace_clock_struct.lock); out: - raw_local_irq_restore(flags); + local_irq_restore(flags); return now; } -- cgit v1.2.2 From 292f60c0c4ab44aa2d589ba03c12e64a3b3c5e38 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Mon, 29 Mar 2010 17:37:02 +0200 Subject: ring-buffer: Add missing unlock In some error handling cases the lock is not unlocked. The return is converted to a goto, to share the unlock at the end of the function. A simplified version of the semantic patch that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @r exists@ expression E1; identifier f; @@ f (...) { <+... * spin_lock_irq (E1,...); ... when != E1 * return ...; ...+> } // Signed-off-by: Julia Lawall LKML-Reference: Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index d1187ef20ca..9a0f9bf6a37 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1209,18 +1209,19 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages) for (i = 0; i < nr_pages; i++) { if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages))) - return; + goto out; p = cpu_buffer->pages->next; bpage = list_entry(p, struct buffer_page, list); list_del_init(&bpage->list); free_buffer_page(bpage); } if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages))) - return; + goto out; rb_reset_cpu(cpu_buffer); rb_check_pages(cpu_buffer); +out: spin_unlock_irq(&cpu_buffer->reader_lock); } @@ -1237,7 +1238,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer, for (i = 0; i < nr_pages; i++) { if (RB_WARN_ON(cpu_buffer, list_empty(pages))) - return; + goto out; p = pages->next; bpage = list_entry(p, struct buffer_page, list); list_del_init(&bpage->list); @@ -1246,6 +1247,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer, rb_reset_cpu(cpu_buffer); rb_check_pages(cpu_buffer); +out: spin_unlock_irq(&cpu_buffer->reader_lock); } -- cgit v1.2.2 From 5a0e3ad6af8660be21ca98a971cd00f331318c05 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 24 Mar 2010 17:04:11 +0900 Subject: include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo Guess-its-ok-by: Christoph Lameter Cc: Ingo Molnar Cc: Lee Schermerhorn --- kernel/trace/blktrace.c | 1 + kernel/trace/ftrace.c | 1 + kernel/trace/power-traces.c | 1 - kernel/trace/ring_buffer.c | 1 + kernel/trace/trace.c | 2 +- kernel/trace/trace_events.c | 1 + kernel/trace/trace_events_filter.c | 1 + kernel/trace/trace_functions_graph.c | 1 + kernel/trace/trace_ksym.c | 1 + kernel/trace/trace_mmiotrace.c | 1 + kernel/trace/trace_selftest.c | 1 + kernel/trace/trace_stat.c | 1 + kernel/trace/trace_syscalls.c | 1 + kernel/trace/trace_workqueue.c | 1 + 14 files changed, 13 insertions(+), 2 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 07f945a9943..b3bc91a3f51 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index d9062f5cc0c..2404b59b309 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c index 9f4f565b01e..a22582a0616 100644 --- a/kernel/trace/power-traces.c +++ b/kernel/trace/power-traces.c @@ -9,7 +9,6 @@ #include #include #include -#include #define CREATE_TRACE_POINTS #include diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index d1187ef20ca..2c839ca5e5c 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3ec2ee6f656..44f916a0406 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -33,10 +33,10 @@ #include #include #include +#include #include #include #include -#include #include #include "trace.h" diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index beab8bf2f31..c697c704334 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 4615f62a04f..88c0b6dbd7f 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "trace.h" #include "trace_output.h" diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index e6989d9b44d..9aed1a5cf55 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include "trace.h" diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index 94103cdcf9d..d59cd687947 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include "trace_output.h" diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index 0acd834659e..017fa376505 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 280fea470d6..81003b4d617 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -3,6 +3,7 @@ #include #include #include +#include static inline int trace_valid_entry(struct trace_entry *entry) { diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c index a4bb239eb98..96cffb269e7 100644 --- a/kernel/trace/trace_stat.c +++ b/kernel/trace/trace_stat.c @@ -10,6 +10,7 @@ #include +#include #include #include #include "trace_stat.h" diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 33c2a5b769d..4d6d711717f 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -1,5 +1,6 @@ #include #include +#include #include #include #include diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c index 40cafb07dff..cc2d2faa7d9 100644 --- a/kernel/trace/trace_workqueue.c +++ b/kernel/trace/trace_workqueue.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include "trace_stat.h" #include "trace.h" -- cgit v1.2.2 From eb1e79611cc9bfe21978230e3521e77ea2d7874a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 23 Mar 2010 00:08:59 +0100 Subject: perf: Correctly align perf event tracing buffer The trace event buffer used by perf to record raw sample events is typed as an array of char and may then not be aligned to 8 by alloc_percpu(). But we need it to be aligned to 8 in sparc64 because we cast this buffer into a random structure type built by the TRACE_EVENT() macro to store the traces. So if a random 64 bits field is accessed inside, it may be not under an expected good alignment. Use an array of long instead to force the appropriate alignment, and perform a compile time check to ensure the size in byte of the buffer is a multiple of sizeof(long) so that its actual size doesn't get shrinked under us. This fixes unaligned accesses reported while using perf lock in sparc 64. Suggested-by: David Miller Suggested-by: Tejun Heo Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Ingo Molnar Cc: David Miller Cc: Steven Rostedt --- kernel/trace/trace_event_perf.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 81f691eb3a3..0565bb42566 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -17,7 +17,12 @@ EXPORT_SYMBOL_GPL(perf_arch_fetch_caller_regs); static char *perf_trace_buf; static char *perf_trace_buf_nmi; -typedef typeof(char [PERF_MAX_TRACE_SIZE]) perf_trace_t ; +/* + * Force it to be aligned to unsigned long to avoid misaligned accesses + * suprises + */ +typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)]) + perf_trace_t; /* Count the events in use (per event id, not per instance) */ static int total_ref_count; @@ -130,6 +135,8 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, char *trace_buf, *raw_data; int pc, cpu; + BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long)); + pc = preempt_count(); /* Protect the per cpu buffer, begin the rcu read side */ @@ -152,7 +159,7 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, raw_data = per_cpu_ptr(trace_buf, cpu); /* zero the dead bytes from align to not leak stack to user */ - *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; + memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64)); entry = (struct trace_entry *)raw_data; tracing_generic_entry_update(entry, *irq_flags, pc); -- cgit v1.2.2