From b332828c39326b1dca617f387dd15d12e81cd5f0 Mon Sep 17 00:00:00 2001 From: "K.Prasad" Date: Mon, 1 Jun 2009 23:43:10 +0530 Subject: hw-breakpoints: prepare the code for Hardware Breakpoint interfaces The generic hardware breakpoint interface provides an abstraction of hardware breakpoints in front of specific arch implementations for both kernel and user side breakpoints. This includes execution breakpoints and read/write breakpoints, also known as "watchpoints". This patch introduces header files containing constants, structure definitions and declaration of functions used by the hardware breakpoint core and x86 specific code. It also introduces an array based storage for the debug-register values in 'struct thread_struct', while modifying all users of debugreg member in the structure. [ Impact: add headers for new hardware breakpoint interface ] Original-patch-by: Alan Stern Signed-off-by: K.Prasad Reviewed-by: Alan Stern Signed-off-by: Frederic Weisbecker --- include/asm-generic/hw_breakpoint.h | 139 ++++++++++++++++++++++++++++++++++++ 1 file changed, 139 insertions(+) create mode 100644 include/asm-generic/hw_breakpoint.h (limited to 'include') diff --git a/include/asm-generic/hw_breakpoint.h b/include/asm-generic/hw_breakpoint.h new file mode 100644 index 000000000000..9bf2d12eb74a --- /dev/null +++ b/include/asm-generic/hw_breakpoint.h @@ -0,0 +1,139 @@ +#ifndef _ASM_GENERIC_HW_BREAKPOINT_H +#define _ASM_GENERIC_HW_BREAKPOINT_H + +#ifndef __ARCH_HW_BREAKPOINT_H +#error "Please don't include this file directly" +#endif + +#ifdef __KERNEL__ +#include +#include +#include + +/** + * struct hw_breakpoint - unified kernel/user-space hardware breakpoint + * @triggered: callback invoked after target address access + * @info: arch-specific breakpoint info (address, length, and type) + * + * %hw_breakpoint structures are the kernel's way of representing + * hardware breakpoints. These are data breakpoints + * (also known as "watchpoints", triggered on data access), and the breakpoint's + * target address can be located in either kernel space or user space. + * + * The breakpoint's address, length, and type are highly + * architecture-specific. The values are encoded in the @info field; you + * specify them when registering the breakpoint. To examine the encoded + * values use hw_breakpoint_get_{kaddress,uaddress,len,type}(), declared + * below. + * + * The address is specified as a regular kernel pointer (for kernel-space + * breakponts) or as an %__user pointer (for user-space breakpoints). + * With register_user_hw_breakpoint(), the address must refer to a + * location in user space. The breakpoint will be active only while the + * requested task is running. Conversely with + * register_kernel_hw_breakpoint(), the address must refer to a location + * in kernel space, and the breakpoint will be active on all CPUs + * regardless of the current task. + * + * The length is the breakpoint's extent in bytes, which is subject to + * certain limitations. include/asm/hw_breakpoint.h contains macros + * defining the available lengths for a specific architecture. Note that + * the address's alignment must match the length. The breakpoint will + * catch accesses to any byte in the range from address to address + + * (length - 1). + * + * The breakpoint's type indicates the sort of access that will cause it + * to trigger. Possible values may include: + * + * %HW_BREAKPOINT_RW (triggered on read or write access), + * %HW_BREAKPOINT_WRITE (triggered on write access), and + * %HW_BREAKPOINT_READ (triggered on read access). + * + * Appropriate macros are defined in include/asm/hw_breakpoint.h; not all + * possibilities are available on all architectures. Execute breakpoints + * must have length equal to the special value %HW_BREAKPOINT_LEN_EXECUTE. + * + * When a breakpoint gets hit, the @triggered callback is + * invoked in_interrupt with a pointer to the %hw_breakpoint structure and the + * processor registers. + * Data breakpoints occur after the memory access has taken place. + * Breakpoints are disabled during execution @triggered, to avoid + * recursive traps and allow unhindered access to breakpointed memory. + * + * This sample code sets a breakpoint on pid_max and registers a callback + * function for writes to that variable. Note that it is not portable + * as written, because not all architectures support HW_BREAKPOINT_LEN_4. + * + * ---------------------------------------------------------------------- + * + * #include + * + * struct hw_breakpoint my_bp; + * + * static void my_triggered(struct hw_breakpoint *bp, struct pt_regs *regs) + * { + * printk(KERN_DEBUG "Inside triggered routine of breakpoint exception\n"); + * dump_stack(); + * ............... + * } + * + * static struct hw_breakpoint my_bp; + * + * static int init_module(void) + * { + * ...................... + * my_bp.info.type = HW_BREAKPOINT_WRITE; + * my_bp.info.len = HW_BREAKPOINT_LEN_4; + * + * my_bp.installed = (void *)my_bp_installed; + * + * rc = register_kernel_hw_breakpoint(&my_bp); + * ...................... + * } + * + * static void cleanup_module(void) + * { + * ...................... + * unregister_kernel_hw_breakpoint(&my_bp); + * ...................... + * } + * + * ---------------------------------------------------------------------- + */ +struct hw_breakpoint { + void (*triggered)(struct hw_breakpoint *, struct pt_regs *); + struct arch_hw_breakpoint info; +}; + +/* + * len and type values are defined in include/asm/hw_breakpoint.h. + * Available values vary according to the architecture. On i386 the + * possibilities are: + * + * HW_BREAKPOINT_LEN_1 + * HW_BREAKPOINT_LEN_2 + * HW_BREAKPOINT_LEN_4 + * HW_BREAKPOINT_RW + * HW_BREAKPOINT_READ + * + * On other architectures HW_BREAKPOINT_LEN_8 may be available, and the + * 1-, 2-, and 4-byte lengths may be unavailable. There also may be + * HW_BREAKPOINT_WRITE. You can use #ifdef to check at compile time. + */ + +extern int register_user_hw_breakpoint(struct task_struct *tsk, + struct hw_breakpoint *bp); +extern int modify_user_hw_breakpoint(struct task_struct *tsk, + struct hw_breakpoint *bp); +extern void unregister_user_hw_breakpoint(struct task_struct *tsk, + struct hw_breakpoint *bp); +/* + * Kernel breakpoints are not associated with any particular thread. + */ +extern int register_kernel_hw_breakpoint(struct hw_breakpoint *bp); +extern void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp); + +extern unsigned int hbp_kernel_pos; + +#endif /* __KERNEL__ */ +#endif /* _ASM_GENERIC_HW_BREAKPOINT_H */ -- cgit v1.2.2 From bd1a5c849bdcc5c89e4a6a18216cd2b9a7a8a78f Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 13 Aug 2009 16:34:53 -0400 Subject: tracing: Ftrace dynamic ftrace_event_call support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add dynamic ftrace_event_call support to ftrace. Trace engines can add new ftrace_event_call to ftrace on the fly. Each operator function of the call takes an ftrace_event_call data structure as an argument, because these functions may be shared among several ftrace_event_calls. Changes from v13: - Define remove_subsystem_dir() always (revirt a2ca5e03), because trace_remove_event_call() uses it. - Modify syscall tracer because of ftrace_event_call change. [fweisbec@gmail.com: Fixed conflict against latest tracing/core] Signed-off-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Avi Kivity Cc: Andi Kleen Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Jason Baron Cc: Jim Keniston Cc: K.Prasad Cc: Lai Jiangshan Cc: Li Zefan Cc: Przemysław Pawełczyk Cc: Roland McGrath Cc: Sam Ravnborg Cc: Srikar Dronamraju Cc: Steven Rostedt Cc: Tom Zanussi Cc: Vegard Nossum LKML-Reference: <20090813203453.31965.71901.stgit@localhost.localdomain> Signed-off-by: Frederic Weisbecker --- include/linux/ftrace_event.h | 19 ++++++++++--------- include/linux/syscalls.h | 4 ++-- include/trace/ftrace.h | 16 ++++++++-------- include/trace/syscall.h | 11 +++++++---- 4 files changed, 27 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index ace2da9e0a0d..1ab3089b5c59 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -112,12 +112,12 @@ struct ftrace_event_call { struct dentry *dir; struct trace_event *event; int enabled; - int (*regfunc)(void *); - void (*unregfunc)(void *); + int (*regfunc)(struct ftrace_event_call *); + void (*unregfunc)(struct ftrace_event_call *); int id; - int (*raw_init)(void); - int (*show_format)(struct ftrace_event_call *call, - struct trace_seq *s); + int (*raw_init)(struct ftrace_event_call *); + int (*show_format)(struct ftrace_event_call *, + struct trace_seq *); int (*define_fields)(struct ftrace_event_call *); struct list_head fields; int filter_active; @@ -147,11 +147,12 @@ enum { FILTER_PTR_STRING, }; -extern int trace_define_field(struct ftrace_event_call *call, - const char *type, const char *name, - int offset, int size, int is_signed, - int filter_type); extern int trace_define_common_fields(struct ftrace_event_call *call); +extern int trace_define_field(struct ftrace_event_call *call, char *type, + char *name, int offset, int size, int is_signed, + int filter_type); +extern int trace_add_event_call(struct ftrace_event_call *call); +extern void trace_remove_event_call(struct ftrace_event_call *call); #define is_signed_type(type) (((type)(-1)) < 0) diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index f124c8995555..646102eeff92 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -165,7 +165,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \ struct trace_event enter_syscall_print_##sname = { \ .trace = print_syscall_enter, \ }; \ - static int init_enter_##sname(void) \ + static int init_enter_##sname(struct ftrace_event_call *call) \ { \ int num, id; \ num = syscall_name_to_nr("sys"#sname); \ @@ -202,7 +202,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \ struct trace_event exit_syscall_print_##sname = { \ .trace = print_syscall_exit, \ }; \ - static int init_exit_##sname(void) \ + static int init_exit_##sname(struct ftrace_event_call *call) \ { \ int num, id; \ num = syscall_name_to_nr("sys"#sname); \ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 360a77ad79e1..f2bd7a8f8e8b 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -434,7 +434,7 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ * event_trace_printk(_RET_IP_, ": " ); * } * - * static int ftrace_reg_event_(void) + * static int ftrace_reg_event_(struct ftrace_event_call *unused) * { * int ret; * @@ -445,7 +445,7 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ * return ret; * } * - * static void ftrace_unreg_event_(void) + * static void ftrace_unreg_event_(struct ftrace_event_call *unused) * { * unregister_trace_(ftrace_event_); * } @@ -478,7 +478,7 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ * trace_current_buffer_unlock_commit(event, irq_flags, pc); * } * - * static int ftrace_raw_reg_event_(void) + * static int ftrace_raw_reg_event_(struct ftrace_event_call *unused) * { * int ret; * @@ -489,7 +489,7 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ * return ret; * } * - * static void ftrace_unreg_event_(void) + * static void ftrace_unreg_event_(struct ftrace_event_call *unused) * { * unregister_trace_(ftrace_raw_event_); * } @@ -498,7 +498,7 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ * .trace = ftrace_raw_output_, <-- stage 2 * }; * - * static int ftrace_raw_init_event_(void) + * static int ftrace_raw_init_event_(struct ftrace_event_call *unused) * { * int id; * @@ -592,7 +592,7 @@ static void ftrace_raw_event_##call(proto) \ trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \ } \ \ -static int ftrace_raw_reg_event_##call(void *ptr) \ +static int ftrace_raw_reg_event_##call(struct ftrace_event_call *unused)\ { \ int ret; \ \ @@ -603,7 +603,7 @@ static int ftrace_raw_reg_event_##call(void *ptr) \ return ret; \ } \ \ -static void ftrace_raw_unreg_event_##call(void *ptr) \ +static void ftrace_raw_unreg_event_##call(struct ftrace_event_call *unused)\ { \ unregister_trace_##call(ftrace_raw_event_##call); \ } \ @@ -612,7 +612,7 @@ static struct trace_event ftrace_event_type_##call = { \ .trace = ftrace_raw_output_##call, \ }; \ \ -static int ftrace_raw_init_event_##call(void) \ +static int ftrace_raw_init_event_##call(struct ftrace_event_call *unused)\ { \ int id; \ \ diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 5dc283ba5ae0..e290b86f6167 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -39,16 +39,19 @@ void set_syscall_enter_id(int num, int id); void set_syscall_exit_id(int num, int id); extern struct trace_event event_syscall_enter; extern struct trace_event event_syscall_exit; -extern int reg_event_syscall_enter(void *ptr); -extern void unreg_event_syscall_enter(void *ptr); -extern int reg_event_syscall_exit(void *ptr); -extern void unreg_event_syscall_exit(void *ptr); + extern int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s); extern int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s); extern int syscall_enter_define_fields(struct ftrace_event_call *call); extern int syscall_exit_define_fields(struct ftrace_event_call *call); +extern int reg_event_syscall_enter(struct ftrace_event_call *call); +extern void unreg_event_syscall_enter(struct ftrace_event_call *call); +extern int reg_event_syscall_exit(struct ftrace_event_call *call); +extern void unreg_event_syscall_exit(struct ftrace_event_call *call); +extern int +ftrace_format_syscall(struct ftrace_event_call *call, struct trace_seq *s); enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags); enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags); #endif -- cgit v1.2.2 From 24851d2447830e6cba4c4b641cb73e713f312373 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 26 Aug 2009 23:38:30 +0200 Subject: tracing/kprobes: Dump the culprit kprobe in case of kprobe recursion Kprobes can enter into a probing recursion, ie: a kprobe that does an endless loop because one of its core mechanism function used during probing is also probed itself. This patch helps pinpointing the kprobe that raised such recursion by dumping it and raising a BUG instead of a warning (we also disarm the kprobe to try avoiding recursion in BUG itself). Having a BUG instead of a warning stops the stacktrace in the right place and doesn't pollute the logs with hundreds of traces that eventually end up in a stack overflow. Signed-off-by: Frederic Weisbecker Cc: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli --- include/linux/kprobes.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index bcd9c07848be..87eb79c9dd60 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -296,6 +296,8 @@ void recycle_rp_inst(struct kretprobe_instance *ri, struct hlist_head *head); int disable_kprobe(struct kprobe *kp); int enable_kprobe(struct kprobe *kp); +void dump_kprobe(struct kprobe *kp); + #else /* !CONFIG_KPROBES: */ static inline int kprobes_built_in(void) -- cgit v1.2.2 From aeaeae1187d7520f1c5559623f0a149da6a1c96e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 27 Aug 2009 05:09:51 +0200 Subject: tracing: Restore the const qualifier for field names and types definition Restore the const qualifier in field's name and type parameters of trace_define_field that was lost while solving a conflict. Fields names and types are defined as builtin constant strings in static TRACE_EVENTs. But kprobes allocates these dynamically. That said, we still want to always pass these strings as const char * in trace_define_fields() to avoid any further accidental writes on the pointed strings. Reported-by: Li Zefan Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt --- include/linux/ftrace_event.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 1ab3089b5c59..73edf5a52e31 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -148,9 +148,9 @@ enum { }; extern int trace_define_common_fields(struct ftrace_event_call *call); -extern int trace_define_field(struct ftrace_event_call *call, char *type, - char *name, int offset, int size, int is_signed, - int filter_type); +extern int trace_define_field(struct ftrace_event_call *call, const char *type, + const char *name, int offset, int size, + int is_signed, int filter_type); extern int trace_add_event_call(struct ftrace_event_call *call); extern void trace_remove_event_call(struct ftrace_event_call *call); -- cgit v1.2.2 From 979f693def9084a452846365dfde5dcb28366333 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 22 Sep 2009 14:44:11 +0200 Subject: ratelimit: Use per ratelimit context locking I'd like to use printk_ratelimit() in atomic context, but that's not possible right now due to the spinlock usage this commit introduced more than a year ago: 717115e: printk ratelimiting rewrite As a first step push the lock into the ratelimit state structure. This allows us to deal with locking failures to be considered as an event related to that state being too busy. Also clean up the code a bit (without changing functionality): - tidy up the definitions - clean up the code flow This also shrinks the code a tiny bit: text data bss dec hex filename 264 0 4 268 10c ratelimit.o.before 255 0 0 255 ff ratelimit.o.after ( Whole-kernel data size got a bit larger, because we have two ratelimit-state data structures right now. ) Cc: Peter Zijlstra Cc: Andrew Morton Cc: Linus Torvalds Cc: David S. Miller LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/ratelimit.h | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h index 00044b856453..187bc16c1f15 100644 --- a/include/linux/ratelimit.h +++ b/include/linux/ratelimit.h @@ -1,20 +1,30 @@ #ifndef _LINUX_RATELIMIT_H #define _LINUX_RATELIMIT_H + #include +#include -#define DEFAULT_RATELIMIT_INTERVAL (5 * HZ) -#define DEFAULT_RATELIMIT_BURST 10 +#define DEFAULT_RATELIMIT_INTERVAL (5 * HZ) +#define DEFAULT_RATELIMIT_BURST 10 struct ratelimit_state { - int interval; - int burst; - int printed; - int missed; - unsigned long begin; + spinlock_t lock; /* protect the state */ + + int interval; + int burst; + int printed; + int missed; + unsigned long begin; }; -#define DEFINE_RATELIMIT_STATE(name, interval, burst) \ - struct ratelimit_state name = {interval, burst,} +#define DEFINE_RATELIMIT_STATE(name, interval_init, burst_init) \ + \ + struct ratelimit_state name = { \ + .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ + .interval = interval_init, \ + .burst = burst_init, \ + } extern int __ratelimit(struct ratelimit_state *rs); -#endif + +#endif /* _LINUX_RATELIMIT_H */ -- cgit v1.2.2 From 3fff4c42bd0a89869a0eb1e7874cc06ffa4aa0f5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 22 Sep 2009 16:18:09 +0200 Subject: printk: Remove ratelimit.h from kernel.h Decouple kernel.h from ratelimit.h: the global declaration of printk's ratelimit_state is not needed, and it leads to messy circular dependencies due to ratelimit.h's (new) adding of a spinlock_types.h include. Cc: Peter Zijlstra Cc: Andrew Morton Cc: Linus Torvalds Cc: David S. Miller LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/kernel.h | 2 -- include/linux/net.h | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 2b5b1e0899a8..3305f33201be 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -241,7 +240,6 @@ asmlinkage int vprintk(const char *fmt, va_list args) asmlinkage int printk(const char * fmt, ...) __attribute__ ((format (printf, 1, 2))) __cold; -extern struct ratelimit_state printk_ratelimit_state; extern int printk_ratelimit(void); extern bool printk_timed_ratelimit(unsigned long *caller_jiffies, unsigned int interval_msec); diff --git a/include/linux/net.h b/include/linux/net.h index 9040a10584f7..df20f680f455 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -358,6 +358,7 @@ static const struct proto_ops name##_ops = { \ #ifdef CONFIG_SYSCTL #include +#include extern struct ratelimit_state net_ratelimit_state; #endif -- cgit v1.2.2 From 96a2c464de07d7c72988db851c029b204fc59108 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 1 Aug 2009 01:34:24 +0200 Subject: tracing/bkl: Add bkl ftrace events Add two events lock_kernel and unlock_kernel() to trace the bkl uses. This opens the door for userspace tools to perform statistics about the callsites that use it, dependencies with other locks (by pairing the trace with lock events), use with recursivity and so on... The {__reacquire,release}_kernel_lock() events are not traced because these are called from schedule, thus the sched events are sufficient to trace them. Example of a trace: hald-addon-stor-4152 [000] 165.875501: unlock_kernel: depth: 0, fs/block_dev.c:1358 __blkdev_put() hald-addon-stor-4152 [000] 167.832974: lock_kernel: depth: 0, fs/block_dev.c:1167 __blkdev_get() How to get the callsites that acquire it recursively: cd /debug/tracing/events/bkl echo "lock_depth > 0" > filter firefox-4951 [001] 206.276967: unlock_kernel: depth: 1, fs/reiserfs/super.c:575 reiserfs_dirty_inode() You can also filter by file and/or line. v2: Use of FILTER_PTR_STRING attribute for files and lines fields to make them traceable. Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Li Zefan --- include/linux/smp_lock.h | 19 ++++++++++++--- include/trace/events/bkl.h | 61 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 4 deletions(-) create mode 100644 include/trace/events/bkl.h (limited to 'include') diff --git a/include/linux/smp_lock.h b/include/linux/smp_lock.h index 813be59bf345..d48cc77ba70d 100644 --- a/include/linux/smp_lock.h +++ b/include/linux/smp_lock.h @@ -3,6 +3,7 @@ #ifdef CONFIG_LOCK_KERNEL #include +#include #define kernel_locked() (current->lock_depth >= 0) @@ -24,8 +25,18 @@ static inline int reacquire_kernel_lock(struct task_struct *task) return 0; } -extern void __lockfunc lock_kernel(void) __acquires(kernel_lock); -extern void __lockfunc unlock_kernel(void) __releases(kernel_lock); +extern void __lockfunc _lock_kernel(void) __acquires(kernel_lock); +extern void __lockfunc _unlock_kernel(void) __releases(kernel_lock); + +#define lock_kernel() { \ + trace_lock_kernel(__func__, __FILE__, __LINE__); \ + _lock_kernel(); \ +} + +#define unlock_kernel() { \ + trace_unlock_kernel(__func__, __FILE__, __LINE__); \ + _unlock_kernel(); \ +} /* * Various legacy drivers don't really need the BKL in a specific @@ -41,8 +52,8 @@ static inline void cycle_kernel_lock(void) #else -#define lock_kernel() do { } while(0) -#define unlock_kernel() do { } while(0) +#define lock_kernel() trace_lock_kernel(__func__, __FILE__, __LINE__); +#define unlock_kernel() trace_unlock_kernel(__func__, __FILE__, __LINE__); #define release_kernel_lock(task) do { } while(0) #define cycle_kernel_lock() do { } while(0) #define reacquire_kernel_lock(task) 0 diff --git a/include/trace/events/bkl.h b/include/trace/events/bkl.h new file mode 100644 index 000000000000..8abd620a490e --- /dev/null +++ b/include/trace/events/bkl.h @@ -0,0 +1,61 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM bkl + +#if !defined(_TRACE_BKL_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_BKL_H + +#include + +TRACE_EVENT(lock_kernel, + + TP_PROTO(const char *func, const char *file, int line), + + TP_ARGS(func, file, line), + + TP_STRUCT__entry( + __field( int, lock_depth ) + __field_ext( const char *, func, FILTER_PTR_STRING ) + __field_ext( const char *, file, FILTER_PTR_STRING ) + __field( int, line ) + ), + + TP_fast_assign( + /* We want to record the lock_depth after lock is acquired */ + __entry->lock_depth = current->lock_depth + 1; + __entry->func = func; + __entry->file = file; + __entry->line = line; + ), + + TP_printk("depth: %d, %s:%d %s()", __entry->lock_depth, + __entry->file, __entry->line, __entry->func) +); + +TRACE_EVENT(unlock_kernel, + + TP_PROTO(const char *func, const char *file, int line), + + TP_ARGS(func, file, line), + + TP_STRUCT__entry( + __field(int, lock_depth) + __field(const char *, func) + __field(const char *, file) + __field(int, line) + ), + + TP_fast_assign( + __entry->lock_depth = current->lock_depth; + __entry->func = func; + __entry->file = file; + __entry->line = line; + ), + + TP_printk("depth: %d, %s:%d %s()", __entry->lock_depth, + __entry->file, __entry->line, __entry->func) +); + +#endif /* _TRACE_BKL_H */ + +/* This part must be outside protection */ +#include -- cgit v1.2.2 From 9f0cf4adb6aa0bfccf675c938124e68f7f06349d Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sat, 26 Sep 2009 14:33:01 +0200 Subject: x86: Use __builtin_object_size() to validate the buffer size for copy_from_user() gcc (4.x) supports the __builtin_object_size() builtin, which reports the size of an object that a pointer point to, when known at compile time. If the buffer size is not known at compile time, a constant -1 is returned. This patch uses this feature to add a sanity check to copy_from_user(); if the target buffer is known to be smaller than the copy size, the copy is aborted and a WARNing is emitted in memory debug mode. These extra checks compile away when the object size is not known, or if both the buffer size and the copy length are constants. Signed-off-by: Arjan van de Ven LKML-Reference: <20090926143301.2c396b94@infradead.org> Signed-off-by: Ingo Molnar --- include/linux/compiler-gcc4.h | 2 ++ include/linux/compiler.h | 4 ++++ 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index 450fa597c94d..a3aef5d55dba 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -37,3 +37,5 @@ #define __cold __attribute__((__cold__)) #endif + +#define __compiletime_object_size(obj) __builtin_object_size(obj, 0) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 04fb5135b4e1..8e54108688f9 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -266,6 +266,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); # define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) #endif +/* Compile time object size, -1 for unknown */ +#ifndef __compiletime_object_size +# define __compiletime_object_size(obj) -1 +#endif /* * Prevent the compiler from merging or refetching accesses. The compiler * is also forbidden from reordering successive instances of ACCESS_ONCE(), -- cgit v1.2.2 From 925936ebf35a95c290e010b784c962164e6728f3 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 28 Sep 2009 17:12:49 +0200 Subject: tracing: Pushdown the bkl tracepoints calls Currently we are calling the bkl tracepoint callbacks just before the bkl lock/unlock operations, ie the tracepoint call is not inside a lock_kernel() function but inside a lock_kernel() macro. Hence the bkl trace event header must be included from smp_lock.h. This raises some nasty circular header dependencies: linux/smp_lock.h -> trace/events/bkl.h -> trace/define_trace.h -> trace/ftrace.h -> linux/ftrace_event.h -> linux/hardirq.h -> linux/smp_lock.h This results in incomplete event declarations, spurious event definitions and other kind of funny behaviours. This is hardly fixable without ugly workarounds. So instead, we push the file name, line number and function name as lock_kernel() parameters, so that we only deal with the trace event header from lib/kernel_lock.c This adds two parameters to lock_kernel() and unlock_kernel() but it should be fine wrt to performances because this pair dos not seem to be called in fast paths. Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Ingo Molnar Cc: Li Zefan --- include/linux/smp_lock.h | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/smp_lock.h b/include/linux/smp_lock.h index d48cc77ba70d..2ea1dd1ba21c 100644 --- a/include/linux/smp_lock.h +++ b/include/linux/smp_lock.h @@ -3,7 +3,6 @@ #ifdef CONFIG_LOCK_KERNEL #include -#include #define kernel_locked() (current->lock_depth >= 0) @@ -25,18 +24,21 @@ static inline int reacquire_kernel_lock(struct task_struct *task) return 0; } -extern void __lockfunc _lock_kernel(void) __acquires(kernel_lock); -extern void __lockfunc _unlock_kernel(void) __releases(kernel_lock); +extern void __lockfunc +_lock_kernel(const char *func, const char *file, int line) +__acquires(kernel_lock); -#define lock_kernel() { \ - trace_lock_kernel(__func__, __FILE__, __LINE__); \ - _lock_kernel(); \ -} +extern void __lockfunc +_unlock_kernel(const char *func, const char *file, int line) +__releases(kernel_lock); -#define unlock_kernel() { \ - trace_unlock_kernel(__func__, __FILE__, __LINE__); \ - _unlock_kernel(); \ -} +#define lock_kernel() do { \ + _lock_kernel(__func__, __FILE__, __LINE__); \ +} while (0) + +#define unlock_kernel() do { \ + _unlock_kernel(__func__, __FILE__, __LINE__); \ +} while (0) /* * Various legacy drivers don't really need the BKL in a specific @@ -52,8 +54,8 @@ static inline void cycle_kernel_lock(void) #else -#define lock_kernel() trace_lock_kernel(__func__, __FILE__, __LINE__); -#define unlock_kernel() trace_unlock_kernel(__func__, __FILE__, __LINE__); +#define lock_kernel() +#define unlock_kernel() #define release_kernel_lock(task) do { } while(0) #define cycle_kernel_lock() do { } while(0) #define reacquire_kernel_lock(task) 0 -- cgit v1.2.2 From 4a3127693001c61a21d1ce680db6340623f52e93 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Wed, 30 Sep 2009 13:05:23 +0200 Subject: x86: Turn the copy_from_user check into an (optional) compile time warning A previous patch added the buffer size check to copy_from_user(). One of the things learned from analyzing the result of the previous patch is that in general, gcc is really good at proving that the code contains sufficient security checks to not need to do a runtime check. But that for those cases where gcc could not prove this, there was a relatively high percentage of real security issues. This patch turns the case of "gcc cannot prove" into a compile time warning, as long as a sufficiently new gcc is in use that supports this. The objective is that these warnings will trigger developers checking new cases out before a security hole enters a linux kernel release. Signed-off-by: Arjan van de Ven Cc: Linus Torvalds Cc: "David S. Miller" Cc: James Morris Cc: Jan Beulich LKML-Reference: <20090930130523.348ae6c4@infradead.org> Signed-off-by: Ingo Molnar --- include/linux/compiler-gcc4.h | 3 +++ include/linux/compiler.h | 4 ++++ 2 files changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index a3aef5d55dba..f1709c1f9eae 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -39,3 +39,6 @@ #endif #define __compiletime_object_size(obj) __builtin_object_size(obj, 0) +#if __GNUC_MINOR__ >= 4 +#define __compiletime_warning(message) __attribute__((warning(message))) +#endif diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 8e54108688f9..950356311f12 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -270,6 +270,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); #ifndef __compiletime_object_size # define __compiletime_object_size(obj) -1 #endif +#ifndef __compiletime_warning +# define __compiletime_warning(message) +#endif + /* * Prevent the compiler from merging or refetching accesses. The compiler * is also forbidden from reordering successive instances of ACCESS_ONCE(), -- cgit v1.2.2 From 63312b6a6faae3f2e5577f2b001e3b504f10a2aa Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Fri, 2 Oct 2009 07:50:50 -0700 Subject: x86: Add a Kconfig option to turn the copy_from_user warnings into errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For automated testing it is useful to have the option to turn the warnings on copy_from_user() etc checks into errors: In function ‘copy_from_user’, inlined from ‘fd_copyin’ at drivers/block/floppy.c:3080, inlined from ‘fd_ioctl’ at drivers/block/floppy.c:3503: linux/arch/x86/include/asm/uaccess_32.h:213: error: call to ‘copy_from_user_overflow’ declared with attribute error: copy_from_user buffer size is not provably correct Signed-off-by: Arjan van de Ven Cc: Linus Torvalds Cc: Andrew Morton LKML-Reference: <20091002075050.4e9f7641@infradead.org> Signed-off-by: Ingo Molnar --- include/linux/compiler-gcc4.h | 1 + include/linux/compiler.h | 3 +++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index f1709c1f9eae..77542c57e20a 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -41,4 +41,5 @@ #define __compiletime_object_size(obj) __builtin_object_size(obj, 0) #if __GNUC_MINOR__ >= 4 #define __compiletime_warning(message) __attribute__((warning(message))) +#define __compiletime_error(message) __attribute__((error(message))) #endif diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 950356311f12..88fd4b673cb4 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -273,6 +273,9 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); #ifndef __compiletime_warning # define __compiletime_warning(message) #endif +#ifndef __compiletime_error +# define __compiletime_error(message) +#endif /* * Prevent the compiler from merging or refetching accesses. The compiler -- cgit v1.2.2 From 26a50744b21fff65bd754874072857bee8967f4d Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Tue, 6 Oct 2009 01:09:50 -0500 Subject: tracing/events: Add 'signed' field to format files The sign info used for filters in the kernel is also useful to applications that process the trace stream. Add it to the format files and make it available to userspace. Signed-off-by: Tom Zanussi Acked-by: Frederic Weisbecker Cc: rostedt@goodmis.org Cc: lizf@cn.fujitsu.com Cc: hch@infradead.org Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: <1254809398-8078-2-git-send-email-tzanussi@gmail.com> Signed-off-by: Ingo Molnar --- include/trace/ftrace.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index cc0d9667e182..c9bbcab95fbe 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -120,9 +120,10 @@ #undef __field #define __field(type, item) \ ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ - "offset:%u;\tsize:%u;\n", \ + "offset:%u;\tsize:%u;\tsigned:%u;\n", \ (unsigned int)offsetof(typeof(field), item), \ - (unsigned int)sizeof(field.item)); \ + (unsigned int)sizeof(field.item), \ + (unsigned int)is_signed_type(type)); \ if (!ret) \ return 0; @@ -132,19 +133,21 @@ #undef __array #define __array(type, item, len) \ ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ - "offset:%u;\tsize:%u;\n", \ + "offset:%u;\tsize:%u;\tsigned:%u;\n", \ (unsigned int)offsetof(typeof(field), item), \ - (unsigned int)sizeof(field.item)); \ + (unsigned int)sizeof(field.item), \ + (unsigned int)is_signed_type(type)); \ if (!ret) \ return 0; #undef __dynamic_array #define __dynamic_array(type, item, len) \ ret = trace_seq_printf(s, "\tfield:__data_loc " #type "[] " #item ";\t"\ - "offset:%u;\tsize:%u;\n", \ + "offset:%u;\tsize:%u;\tsigned:%u;\n", \ (unsigned int)offsetof(typeof(field), \ __data_loc_##item), \ - (unsigned int)sizeof(field.__data_loc_##item)); \ + (unsigned int)sizeof(field.__data_loc_##item), \ + (unsigned int)is_signed_type(type)); \ if (!ret) \ return 0; -- cgit v1.2.2 From 89eda06837094ce9f34fae269b8773fcfd70f046 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sun, 4 Oct 2009 21:49:47 +0900 Subject: LSM: Add security_path_chmod() and security_path_chown(). This patch allows pathname based LSM modules to check chmod()/chown() operations. Since notify_change() does not receive "struct vfsmount *", we add security_path_chmod() and security_path_chown() to the caller of notify_change(). These hooks are used by TOMOYO. Signed-off-by: Tetsuo Handa Signed-off-by: James Morris --- include/linux/security.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include') diff --git a/include/linux/security.h b/include/linux/security.h index 239e40d0450b..c8a584c26f7b 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -447,6 +447,18 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @new_dir contains the path structure for parent of the new link. * @new_dentry contains the dentry structure of the new link. * Return 0 if permission is granted. + * @path_chmod: + * Check for permission to change DAC's permission of a file or directory. + * @dentry contains the dentry structure. + * @mnt contains the vfsmnt structure. + * @mode contains DAC's mode. + * Return 0 if permission is granted. + * @path_chown: + * Check for permission to change owner/group of a file or directory. + * @path contains the path structure. + * @uid contains new owner's ID. + * @gid contains new group's ID. + * Return 0 if permission is granted. * @inode_readlink: * Check the permission to read the symbolic link. * @dentry contains the dentry structure for the file link. @@ -1488,6 +1500,9 @@ struct security_operations { struct dentry *new_dentry); int (*path_rename) (struct path *old_dir, struct dentry *old_dentry, struct path *new_dir, struct dentry *new_dentry); + int (*path_chmod) (struct dentry *dentry, struct vfsmount *mnt, + mode_t mode); + int (*path_chown) (struct path *path, uid_t uid, gid_t gid); #endif int (*inode_alloc_security) (struct inode *inode); @@ -2952,6 +2967,9 @@ int security_path_link(struct dentry *old_dentry, struct path *new_dir, struct dentry *new_dentry); int security_path_rename(struct path *old_dir, struct dentry *old_dentry, struct path *new_dir, struct dentry *new_dentry); +int security_path_chmod(struct dentry *dentry, struct vfsmount *mnt, + mode_t mode); +int security_path_chown(struct path *path, uid_t uid, gid_t gid); #else /* CONFIG_SECURITY_PATH */ static inline int security_path_unlink(struct path *dir, struct dentry *dentry) { @@ -3001,6 +3019,18 @@ static inline int security_path_rename(struct path *old_dir, { return 0; } + +static inline int security_path_chmod(struct dentry *dentry, + struct vfsmount *mnt, + mode_t mode) +{ + return 0; +} + +static inline int security_path_chown(struct path *path, uid_t uid, gid_t gid) +{ + return 0; +} #endif /* CONFIG_SECURITY_PATH */ #ifdef CONFIG_KEYS -- cgit v1.2.2 From 8b8efb44033c7e86b3dc76f825c693ec92ae30e9 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sun, 4 Oct 2009 21:49:48 +0900 Subject: LSM: Add security_path_chroot(). This patch allows pathname based LSM modules to check chroot() operations. This hook is used by TOMOYO. Signed-off-by: Tetsuo Handa Signed-off-by: James Morris --- include/linux/security.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/security.h b/include/linux/security.h index c8a584c26f7b..ed0faea60b82 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -459,6 +459,10 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @uid contains new owner's ID. * @gid contains new group's ID. * Return 0 if permission is granted. + * @path_chroot: + * Check for permission to change root directory. + * @path contains the path structure. + * Return 0 if permission is granted. * @inode_readlink: * Check the permission to read the symbolic link. * @dentry contains the dentry structure for the file link. @@ -1503,6 +1507,7 @@ struct security_operations { int (*path_chmod) (struct dentry *dentry, struct vfsmount *mnt, mode_t mode); int (*path_chown) (struct path *path, uid_t uid, gid_t gid); + int (*path_chroot) (struct path *path); #endif int (*inode_alloc_security) (struct inode *inode); @@ -2970,6 +2975,7 @@ int security_path_rename(struct path *old_dir, struct dentry *old_dentry, int security_path_chmod(struct dentry *dentry, struct vfsmount *mnt, mode_t mode); int security_path_chown(struct path *path, uid_t uid, gid_t gid); +int security_path_chroot(struct path *path); #else /* CONFIG_SECURITY_PATH */ static inline int security_path_unlink(struct path *dir, struct dentry *dentry) { @@ -3031,6 +3037,11 @@ static inline int security_path_chown(struct path *path, uid_t uid, gid_t gid) { return 0; } + +static inline int security_path_chroot(struct path *path) +{ + return 0; +} #endif /* CONFIG_SECURITY_PATH */ #ifdef CONFIG_KEYS -- cgit v1.2.2 From 8968f9d3dc23d9a1821d97c6f11e72a59382e56c Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Tue, 13 Oct 2009 16:19:41 +0900 Subject: perf_event, x86, mce: Use TRACE_EVENT() for MCE logging This approach is the first baby step towards solving many of the structural problems the x86 MCE logging code is having today: - It has a private ring-buffer implementation that has a number of limitations and has been historically fragile and buggy. - It is using a quirky /dev/mcelog ioctl driven ABI that is MCE specific. /dev/mcelog is not part of any larger logging framework and hence has remained on the fringes for many years. - The MCE logging code is still very unclean partly due to its ABI limitations. Fields are being reused for multiple purposes, and the whole message structure is limited and x86 specific to begin with. All in one, the x86 tree would like to move away from this private implementation of an event logging facility to a broader framework. By using perf events we gain the following advantages: - Multiple user-space agents can access MCE events. We can have an mcelog daemon running but also a system-wide tracer capturing important events in flight-recorder mode. - Sampling support: the kernel and the user-space call-chain of MCE events can be stored and analyzed as well. This way actual patterns of bad behavior can be matched to precisely what kind of activity happened in the kernel (and/or in the app) around that moment in time. - Coupling with other hardware and software events: the PMU can track a number of other anomalies - monitoring software might chose to monitor those plus the MCE events as well - in one coherent stream of events. - Discovery of MCE sources - tracepoints are enumerated and tools can act upon the existence (or non-existence) of various channels of MCE information. - Filtering support: we just subscribe to and act upon the events we are interested in. Then even on a per event source basis there's in-kernel filter expressions available that can restrict the amount of data that hits the event channel. - Arbitrary deep per cpu buffering of events - we can buffer 32 entries or we can buffer as much as we want, as long as we have the RAM. - An NMI-safe ring-buffer implementation - mappable to user-space. - Built-in support for timestamping of events, PID markers, CPU markers, etc. - A rich ABI accessible over system call interface. Per cpu, per task and per workload monitoring of MCE events can be done this way. The ABI itself has a nice, meaningful structure. - Extensible ABI: new fields can be added without breaking tooling. New tracepoints can be added as the hardware side evolves. There's various parsers that can be used. - Lots of scheduling/buffering/batching modes of operandi for MCE events. poll() support. mmap() support. read() support. You name it. - Rich tooling support: even without any MCE specific extensions added the 'perf' tool today offers various views of MCE data: perf report, perf stat, perf trace can all be used to view logged MCE events and perhaps correlate them to certain user-space usage patterns. But it can be used directly as well, for user-space agents and policy action in mcelog, etc. With this we hope to achieve significant code cleanup and feature improvements in the MCE code, and we hope to be able to drop the /dev/mcelog facility in the end. This patch is just a plain dumb dump of mce_log() records to the tracepoints / perf events framework - a first proof of concept step. Signed-off-by: Hidetoshi Seto Cc: Huang Ying Cc: Andi Kleen LKML-Reference: <4AD42A0D.7050104@jp.fujitsu.com> Signed-off-by: Ingo Molnar --- include/trace/events/mce.h | 69 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 include/trace/events/mce.h (limited to 'include') diff --git a/include/trace/events/mce.h b/include/trace/events/mce.h new file mode 100644 index 000000000000..7eee77895cb3 --- /dev/null +++ b/include/trace/events/mce.h @@ -0,0 +1,69 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mce + +#if !defined(_TRACE_MCE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_MCE_H + +#include +#include +#include + +TRACE_EVENT(mce_record, + + TP_PROTO(struct mce *m), + + TP_ARGS(m), + + TP_STRUCT__entry( + __field( u64, mcgcap ) + __field( u64, mcgstatus ) + __field( u8, bank ) + __field( u64, status ) + __field( u64, addr ) + __field( u64, misc ) + __field( u64, ip ) + __field( u8, cs ) + __field( u64, tsc ) + __field( u64, walltime ) + __field( u32, cpu ) + __field( u32, cpuid ) + __field( u32, apicid ) + __field( u32, socketid ) + __field( u8, cpuvendor ) + ), + + TP_fast_assign( + __entry->mcgcap = m->mcgcap; + __entry->mcgstatus = m->mcgstatus; + __entry->bank = m->bank; + __entry->status = m->status; + __entry->addr = m->addr; + __entry->misc = m->misc; + __entry->ip = m->ip; + __entry->cs = m->cs; + __entry->tsc = m->tsc; + __entry->walltime = m->time; + __entry->cpu = m->extcpu; + __entry->cpuid = m->cpuid; + __entry->apicid = m->apicid; + __entry->socketid = m->socketid; + __entry->cpuvendor = m->cpuvendor; + ), + + TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, ADDR/MISC: %016Lx/%016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PROCESSOR: %u:%x, TIME: %llu, SOCKET: %u, APIC: %x", + __entry->cpu, + __entry->mcgcap, __entry->mcgstatus, + __entry->bank, __entry->status, + __entry->addr, __entry->misc, + __entry->cs, __entry->ip, + __entry->tsc, + __entry->cpuvendor, __entry->cpuid, + __entry->walltime, + __entry->socketid, + __entry->apicid) +); + +#endif /* _TRACE_MCE_H */ + +/* This part must be outside protection */ +#include -- cgit v1.2.2 From c44fc770845163f8d9e573f37f92a7b7a7ade14e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 19 Sep 2009 06:50:42 +0200 Subject: tracing: Move syscalls metadata handling from arch to core Most of the syscalls metadata processing is done from arch. But these operations are mostly generic accross archs. Especially now that we have a common variable name that expresses the number of syscalls supported by an arch: NR_syscalls, the only remaining bits that need to reside in arch is the syscall nr to addr translation. v2: Compare syscalls symbols only after the "sys" prefix so that we avoid spurious mismatches with archs that have syscalls wrappers, in which case syscalls symbols have "SyS" prefixed aliases. (Reported by: Heiko Carstens) Signed-off-by: Frederic Weisbecker Acked-by: Heiko Carstens Cc: Ingo Molnar Cc: Steven Rostedt Cc: Li Zefan Cc: Masami Hiramatsu Cc: Jason Baron Cc: Lai Jiangshan Cc: Martin Schwidefsky Cc: Paul Mundt --- include/trace/syscall.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 5dc283ba5ae0..e972f0a40f8d 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -33,7 +33,7 @@ struct syscall_metadata { }; #ifdef CONFIG_FTRACE_SYSCALLS -extern struct syscall_metadata *syscall_nr_to_meta(int nr); +extern unsigned long arch_syscall_addr(int nr); extern int syscall_name_to_nr(char *name); void set_syscall_enter_id(int num, int id); void set_syscall_exit_id(int num, int id); -- cgit v1.2.2 From 6fb2915df7f0747d9044da9dbff5b46dc2e20830 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 15 Oct 2009 11:21:42 +0800 Subject: tracing/profile: Add filter support - Add an ioctl to allocate a filter for a perf event. - Free the filter when the associated perf event is to be freed. - Do the filtering in perf_swevent_match(). Signed-off-by: Li Zefan Acked-by: Peter Zijlstra Acked-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Tom Zanussi LKML-Reference: <4AD69546.8050401@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/ftrace_event.h | 11 ++++++++++- include/linux/perf_counter.h | 1 + include/linux/perf_event.h | 6 ++++++ 3 files changed, 17 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 4ec5e67e18cf..d11770472bc8 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -144,7 +144,7 @@ extern char *trace_profile_buf_nmi; #define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ extern void destroy_preds(struct ftrace_event_call *call); -extern int filter_match_preds(struct ftrace_event_call *call, void *rec); +extern int filter_match_preds(struct event_filter *filter, void *rec); extern int filter_current_check_discard(struct ring_buffer *buffer, struct ftrace_event_call *call, void *rec, @@ -186,4 +186,13 @@ do { \ __trace_printk(ip, fmt, ##args); \ } while (0) +#ifdef CONFIG_EVENT_PROFILE +struct perf_event; +extern int ftrace_profile_enable(int event_id); +extern void ftrace_profile_disable(int event_id); +extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, + char *filter_str); +extern void ftrace_profile_free_filter(struct perf_event *event); +#endif + #endif /* _LINUX_FTRACE_EVENT_H */ diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 7b7fbf433cff..91a2b4309e7a 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -225,6 +225,7 @@ struct perf_counter_attr { #define PERF_COUNTER_IOC_RESET _IO ('$', 3) #define PERF_COUNTER_IOC_PERIOD _IOW('$', 4, u64) #define PERF_COUNTER_IOC_SET_OUTPUT _IO ('$', 5) +#define PERF_COUNTER_IOC_SET_FILTER _IOW('$', 6, char *) enum perf_counter_ioc_flags { PERF_IOC_FLAG_GROUP = 1U << 0, diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 2e6d95f97419..df9d964c15fc 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -221,6 +221,7 @@ struct perf_event_attr { #define PERF_EVENT_IOC_RESET _IO ('$', 3) #define PERF_EVENT_IOC_PERIOD _IOW('$', 4, u64) #define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) +#define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *) enum perf_event_ioc_flags { PERF_IOC_FLAG_GROUP = 1U << 0, @@ -633,7 +634,12 @@ struct perf_event { struct pid_namespace *ns; u64 id; + +#ifdef CONFIG_EVENT_PROFILE + struct event_filter *filter; #endif + +#endif /* CONFIG_PERF_EVENTS */ }; /** -- cgit v1.2.2 From 434a83c3fbb951908a3a52040f7f0e0b8ba00dd0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 15 Oct 2009 11:50:39 +0200 Subject: events: Harmonize event field names and print output names Now that we can filter based on fields via perf record, people will start using filter expressions and will expect them to be obvious. The primary way to see which fields are available is by looking at the trace output, such as: gcc-18676 [000] 343.011728: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.012727: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.032692: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.033690: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.034687: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.035686: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.036684: irq_handler_entry: irq=0 handler=timer While 'irq==0' filters work, the 'handler==' filter expression does not work: $ perf record -R -f -a -e irq:irq_handler_entry --filter handler=timer sleep 1 Error: failed to set filter with 22 (Invalid argument) The problem is that while an 'irq' field exists and is recognized as a filter field - 'handler' does not exist - its name is 'name' in the output. To solve this, we need to synchronize the printout and the field names, wherever possible. In cases where the printout prints a non-field, we enclose that information in square brackets, such as: perf-1380 [013] 724.903505: softirq_exit: vec=9 [action=RCU] perf-1380 [013] 724.904482: softirq_exit: vec=1 [action=TIMER] This way users can use filter expressions more intuitively: all fields that show up as 'primary' (non-bracketed) information is filterable. This patch harmonizes the field names for all irq, bkl, power, sched and timer events. We might in fact think about dropping the print format bit of generic tracepoints altogether, and just print the fields that are being recorded. Cc: Li Zefan Cc: Tom Zanussi Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- include/trace/events/bkl.h | 18 +++++----- include/trace/events/irq.h | 8 ++--- include/trace/events/power.h | 2 -- include/trace/events/sched.h | 44 ++++++++++++------------ include/trace/events/timer.h | 79 ++++++++++++++++++++++---------------------- 5 files changed, 74 insertions(+), 77 deletions(-) (limited to 'include') diff --git a/include/trace/events/bkl.h b/include/trace/events/bkl.h index 8abd620a490e..1af72dc24278 100644 --- a/include/trace/events/bkl.h +++ b/include/trace/events/bkl.h @@ -13,7 +13,7 @@ TRACE_EVENT(lock_kernel, TP_ARGS(func, file, line), TP_STRUCT__entry( - __field( int, lock_depth ) + __field( int, depth ) __field_ext( const char *, func, FILTER_PTR_STRING ) __field_ext( const char *, file, FILTER_PTR_STRING ) __field( int, line ) @@ -21,13 +21,13 @@ TRACE_EVENT(lock_kernel, TP_fast_assign( /* We want to record the lock_depth after lock is acquired */ - __entry->lock_depth = current->lock_depth + 1; + __entry->depth = current->lock_depth + 1; __entry->func = func; __entry->file = file; __entry->line = line; ), - TP_printk("depth: %d, %s:%d %s()", __entry->lock_depth, + TP_printk("depth=%d file:line=%s:%d func=%s()", __entry->depth, __entry->file, __entry->line, __entry->func) ); @@ -38,20 +38,20 @@ TRACE_EVENT(unlock_kernel, TP_ARGS(func, file, line), TP_STRUCT__entry( - __field(int, lock_depth) - __field(const char *, func) - __field(const char *, file) - __field(int, line) + __field(int, depth ) + __field(const char *, func ) + __field(const char *, file ) + __field(int, line ) ), TP_fast_assign( - __entry->lock_depth = current->lock_depth; + __entry->depth = current->lock_depth; __entry->func = func; __entry->file = file; __entry->line = line; ), - TP_printk("depth: %d, %s:%d %s()", __entry->lock_depth, + TP_printk("depth=%d file:line=%s:%d func=%s()", __entry->depth, __entry->file, __entry->line, __entry->func) ); diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h index b89f9db4a404..dcfcd4407623 100644 --- a/include/trace/events/irq.h +++ b/include/trace/events/irq.h @@ -48,7 +48,7 @@ TRACE_EVENT(irq_handler_entry, __assign_str(name, action->name); ), - TP_printk("irq=%d handler=%s", __entry->irq, __get_str(name)) + TP_printk("irq=%d name=%s", __entry->irq, __get_str(name)) ); /** @@ -78,7 +78,7 @@ TRACE_EVENT(irq_handler_exit, __entry->ret = ret; ), - TP_printk("irq=%d return=%s", + TP_printk("irq=%d ret=%s", __entry->irq, __entry->ret ? "handled" : "unhandled") ); @@ -107,7 +107,7 @@ TRACE_EVENT(softirq_entry, __entry->vec = (int)(h - vec); ), - TP_printk("softirq=%d action=%s", __entry->vec, + TP_printk("vec=%d [action=%s]", __entry->vec, show_softirq_name(__entry->vec)) ); @@ -136,7 +136,7 @@ TRACE_EVENT(softirq_exit, __entry->vec = (int)(h - vec); ), - TP_printk("softirq=%d action=%s", __entry->vec, + TP_printk("vec=%d [action=%s]", __entry->vec, show_softirq_name(__entry->vec)) ); diff --git a/include/trace/events/power.h b/include/trace/events/power.h index ea6d579261ad..9bb96e5a2848 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h @@ -16,8 +16,6 @@ enum { }; #endif - - TRACE_EVENT(power_start, TP_PROTO(unsigned int type, unsigned int state), diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 4069c43f4187..b50b9856c59f 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -26,7 +26,7 @@ TRACE_EVENT(sched_kthread_stop, __entry->pid = t->pid; ), - TP_printk("task %s:%d", __entry->comm, __entry->pid) + TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid) ); /* @@ -46,7 +46,7 @@ TRACE_EVENT(sched_kthread_stop_ret, __entry->ret = ret; ), - TP_printk("ret %d", __entry->ret) + TP_printk("ret=%d", __entry->ret) ); /* @@ -73,7 +73,7 @@ TRACE_EVENT(sched_wait_task, __entry->prio = p->prio; ), - TP_printk("task %s:%d [%d]", + TP_printk("comm=%s pid=%d prio=%d", __entry->comm, __entry->pid, __entry->prio) ); @@ -94,7 +94,7 @@ TRACE_EVENT(sched_wakeup, __field( pid_t, pid ) __field( int, prio ) __field( int, success ) - __field( int, cpu ) + __field( int, target_cpu ) ), TP_fast_assign( @@ -102,12 +102,12 @@ TRACE_EVENT(sched_wakeup, __entry->pid = p->pid; __entry->prio = p->prio; __entry->success = success; - __entry->cpu = task_cpu(p); + __entry->target_cpu = task_cpu(p); ), - TP_printk("task %s:%d [%d] success=%d [%03d]", + TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d", __entry->comm, __entry->pid, __entry->prio, - __entry->success, __entry->cpu) + __entry->success, __entry->target_cpu) ); /* @@ -127,7 +127,7 @@ TRACE_EVENT(sched_wakeup_new, __field( pid_t, pid ) __field( int, prio ) __field( int, success ) - __field( int, cpu ) + __field( int, target_cpu ) ), TP_fast_assign( @@ -135,12 +135,12 @@ TRACE_EVENT(sched_wakeup_new, __entry->pid = p->pid; __entry->prio = p->prio; __entry->success = success; - __entry->cpu = task_cpu(p); + __entry->target_cpu = task_cpu(p); ), - TP_printk("task %s:%d [%d] success=%d [%03d]", + TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d", __entry->comm, __entry->pid, __entry->prio, - __entry->success, __entry->cpu) + __entry->success, __entry->target_cpu) ); /* @@ -176,7 +176,7 @@ TRACE_EVENT(sched_switch, __entry->next_prio = next->prio; ), - TP_printk("task %s:%d [%d] (%s) ==> %s:%d [%d]", + TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s ==> next_comm=%s next_pid=%d next_prio=%d", __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, __entry->prev_state ? __print_flags(__entry->prev_state, "|", @@ -211,7 +211,7 @@ TRACE_EVENT(sched_migrate_task, __entry->dest_cpu = dest_cpu; ), - TP_printk("task %s:%d [%d] from: %d to: %d", + TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d", __entry->comm, __entry->pid, __entry->prio, __entry->orig_cpu, __entry->dest_cpu) ); @@ -237,7 +237,7 @@ TRACE_EVENT(sched_process_free, __entry->prio = p->prio; ), - TP_printk("task %s:%d [%d]", + TP_printk("comm=%s pid=%d prio=%d", __entry->comm, __entry->pid, __entry->prio) ); @@ -262,7 +262,7 @@ TRACE_EVENT(sched_process_exit, __entry->prio = p->prio; ), - TP_printk("task %s:%d [%d]", + TP_printk("comm=%s pid=%d prio=%d", __entry->comm, __entry->pid, __entry->prio) ); @@ -287,7 +287,7 @@ TRACE_EVENT(sched_process_wait, __entry->prio = current->prio; ), - TP_printk("task %s:%d [%d]", + TP_printk("comm=%s pid=%d prio=%d", __entry->comm, __entry->pid, __entry->prio) ); @@ -314,7 +314,7 @@ TRACE_EVENT(sched_process_fork, __entry->child_pid = child->pid; ), - TP_printk("parent %s:%d child %s:%d", + TP_printk("comm=%s pid=%d child_comm=%s child_pid=%d", __entry->parent_comm, __entry->parent_pid, __entry->child_comm, __entry->child_pid) ); @@ -340,7 +340,7 @@ TRACE_EVENT(sched_signal_send, __entry->sig = sig; ), - TP_printk("sig: %d task %s:%d", + TP_printk("sig=%d comm=%s pid=%d", __entry->sig, __entry->comm, __entry->pid) ); @@ -374,7 +374,7 @@ TRACE_EVENT(sched_stat_wait, __perf_count(delay); ), - TP_printk("task: %s:%d wait: %Lu [ns]", + TP_printk("comm=%s pid=%d delay=%Lu [ns]", __entry->comm, __entry->pid, (unsigned long long)__entry->delay) ); @@ -406,7 +406,7 @@ TRACE_EVENT(sched_stat_runtime, __perf_count(runtime); ), - TP_printk("task: %s:%d runtime: %Lu [ns], vruntime: %Lu [ns]", + TP_printk("comm=%s pid=%d runtime=%Lu [ns] vruntime=%Lu [ns]", __entry->comm, __entry->pid, (unsigned long long)__entry->runtime, (unsigned long long)__entry->vruntime) @@ -437,7 +437,7 @@ TRACE_EVENT(sched_stat_sleep, __perf_count(delay); ), - TP_printk("task: %s:%d sleep: %Lu [ns]", + TP_printk("comm=%s pid=%d delay=%Lu [ns]", __entry->comm, __entry->pid, (unsigned long long)__entry->delay) ); @@ -467,7 +467,7 @@ TRACE_EVENT(sched_stat_iowait, __perf_count(delay); ), - TP_printk("task: %s:%d iowait: %Lu [ns]", + TP_printk("comm=%s pid=%d delay=%Lu [ns]", __entry->comm, __entry->pid, (unsigned long long)__entry->delay) ); diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h index 1844c48d640e..e5ce87a0498d 100644 --- a/include/trace/events/timer.h +++ b/include/trace/events/timer.h @@ -26,7 +26,7 @@ TRACE_EVENT(timer_init, __entry->timer = timer; ), - TP_printk("timer %p", __entry->timer) + TP_printk("timer=%p", __entry->timer) ); /** @@ -54,7 +54,7 @@ TRACE_EVENT(timer_start, __entry->now = jiffies; ), - TP_printk("timer %p: func %pf, expires %lu, timeout %ld", + TP_printk("timer=%p function=%pf expires=%lu [timeout=%ld]", __entry->timer, __entry->function, __entry->expires, (long)__entry->expires - __entry->now) ); @@ -81,7 +81,7 @@ TRACE_EVENT(timer_expire_entry, __entry->now = jiffies; ), - TP_printk("timer %p: now %lu", __entry->timer, __entry->now) + TP_printk("timer=%p now=%lu", __entry->timer, __entry->now) ); /** @@ -108,7 +108,7 @@ TRACE_EVENT(timer_expire_exit, __entry->timer = timer; ), - TP_printk("timer %p", __entry->timer) + TP_printk("timer=%p", __entry->timer) ); /** @@ -129,7 +129,7 @@ TRACE_EVENT(timer_cancel, __entry->timer = timer; ), - TP_printk("timer %p", __entry->timer) + TP_printk("timer=%p", __entry->timer) ); /** @@ -140,24 +140,24 @@ TRACE_EVENT(timer_cancel, */ TRACE_EVENT(hrtimer_init, - TP_PROTO(struct hrtimer *timer, clockid_t clockid, + TP_PROTO(struct hrtimer *hrtimer, clockid_t clockid, enum hrtimer_mode mode), - TP_ARGS(timer, clockid, mode), + TP_ARGS(hrtimer, clockid, mode), TP_STRUCT__entry( - __field( void *, timer ) + __field( void *, hrtimer ) __field( clockid_t, clockid ) __field( enum hrtimer_mode, mode ) ), TP_fast_assign( - __entry->timer = timer; + __entry->hrtimer = hrtimer; __entry->clockid = clockid; __entry->mode = mode; ), - TP_printk("hrtimer %p, clockid %s, mode %s", __entry->timer, + TP_printk("hrtimer=%p clockid=%s mode=%s", __entry->hrtimer, __entry->clockid == CLOCK_REALTIME ? "CLOCK_REALTIME" : "CLOCK_MONOTONIC", __entry->mode == HRTIMER_MODE_ABS ? @@ -170,26 +170,26 @@ TRACE_EVENT(hrtimer_init, */ TRACE_EVENT(hrtimer_start, - TP_PROTO(struct hrtimer *timer), + TP_PROTO(struct hrtimer *hrtimer), - TP_ARGS(timer), + TP_ARGS(hrtimer), TP_STRUCT__entry( - __field( void *, timer ) + __field( void *, hrtimer ) __field( void *, function ) __field( s64, expires ) __field( s64, softexpires ) ), TP_fast_assign( - __entry->timer = timer; - __entry->function = timer->function; - __entry->expires = hrtimer_get_expires(timer).tv64; - __entry->softexpires = hrtimer_get_softexpires(timer).tv64; + __entry->hrtimer = hrtimer; + __entry->function = hrtimer->function; + __entry->expires = hrtimer_get_expires(hrtimer).tv64; + __entry->softexpires = hrtimer_get_softexpires(hrtimer).tv64; ), - TP_printk("hrtimer %p, func %pf, expires %llu, softexpires %llu", - __entry->timer, __entry->function, + TP_printk("hrtimer=%p function=%pf expires=%llu softexpires=%llu", + __entry->hrtimer, __entry->function, (unsigned long long)ktime_to_ns((ktime_t) { .tv64 = __entry->expires }), (unsigned long long)ktime_to_ns((ktime_t) { @@ -206,23 +206,22 @@ TRACE_EVENT(hrtimer_start, */ TRACE_EVENT(hrtimer_expire_entry, - TP_PROTO(struct hrtimer *timer, ktime_t *now), + TP_PROTO(struct hrtimer *hrtimer, ktime_t *now), - TP_ARGS(timer, now), + TP_ARGS(hrtimer, now), TP_STRUCT__entry( - __field( void *, timer ) + __field( void *, hrtimer ) __field( s64, now ) ), TP_fast_assign( - __entry->timer = timer; - __entry->now = now->tv64; + __entry->hrtimer = hrtimer; + __entry->now = now->tv64; ), - TP_printk("hrtimer %p, now %llu", __entry->timer, - (unsigned long long)ktime_to_ns((ktime_t) { - .tv64 = __entry->now })) + TP_printk("hrtimer=%p now=%llu", __entry->hrtimer, + (unsigned long long)ktime_to_ns((ktime_t) { .tv64 = __entry->now })) ); /** @@ -234,40 +233,40 @@ TRACE_EVENT(hrtimer_expire_entry, */ TRACE_EVENT(hrtimer_expire_exit, - TP_PROTO(struct hrtimer *timer), + TP_PROTO(struct hrtimer *hrtimer), - TP_ARGS(timer), + TP_ARGS(hrtimer), TP_STRUCT__entry( - __field( void *, timer ) + __field( void *, hrtimer ) ), TP_fast_assign( - __entry->timer = timer; + __entry->hrtimer = hrtimer; ), - TP_printk("hrtimer %p", __entry->timer) + TP_printk("hrtimer=%p", __entry->hrtimer) ); /** * hrtimer_cancel - called when the hrtimer is canceled - * @timer: pointer to struct hrtimer + * @hrtimer: pointer to struct hrtimer */ TRACE_EVENT(hrtimer_cancel, - TP_PROTO(struct hrtimer *timer), + TP_PROTO(struct hrtimer *hrtimer), - TP_ARGS(timer), + TP_ARGS(hrtimer), TP_STRUCT__entry( - __field( void *, timer ) + __field( void *, hrtimer ) ), TP_fast_assign( - __entry->timer = timer; + __entry->hrtimer = hrtimer; ), - TP_printk("hrtimer %p", __entry->timer) + TP_printk("hrtimer=%p", __entry->hrtimer) ); /** @@ -302,7 +301,7 @@ TRACE_EVENT(itimer_state, __entry->interval_usec = value->it_interval.tv_usec; ), - TP_printk("which %d, expires %lu, it_value %lu.%lu, it_interval %lu.%lu", + TP_printk("which=%d expires=%lu it_value=%lu.%lu it_interval=%lu.%lu", __entry->which, __entry->expires, __entry->value_sec, __entry->value_usec, __entry->interval_sec, __entry->interval_usec) @@ -332,7 +331,7 @@ TRACE_EVENT(itimer_expire, __entry->pid = pid_nr(pid); ), - TP_printk("which %d, pid %d, now %lu", __entry->which, + TP_printk("which=%d pid=%d now=%lu", __entry->which, (int) __entry->pid, __entry->now) ); -- cgit v1.2.2 From 40b1f4e5113eafc5e84f2ba86822df66087fcb25 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Thu, 22 Oct 2009 14:39:28 +1100 Subject: irq: trivial: Fix typo in comment for #endif The comment suggests this #endif is CONFIG_X86 but it's really CONFIG_TRACE_IRQFLAGS_SUPPORT Signed-off-by: Michael Neuling Cc: michael@ellerman.id.au LKML-Reference: <18191.1256182768@neuling.org> Signed-off-by: Ingo Molnar --- include/linux/irqflags.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index b02a3f1d46a0..006bf45eae30 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -124,6 +124,6 @@ typecheck(unsigned long, flags); \ raw_irqs_disabled_flags(flags); \ }) -#endif /* CONFIG_X86 */ +#endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */ #endif -- cgit v1.2.2 From 5c828713358cb9df8aa174371edcbbb62203a490 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 23 Oct 2009 14:58:11 +0200 Subject: ratelimit: Make suppressed output messages more useful Today I got: [39648.224782] Registered led device: iwl-phy0::TX [40676.545099] __ratelimit: 246 callbacks suppressed [40676.545103] abcdef[23675]: segfault at 0 ... as you can see the ratelimit message contains a function prefix. Since this is always __ratelimit, this wont help much. This patch changes __ratelimit and printk_ratelimit to print the function name that calls ratelimit. This will pinpoint the responsible function, as long as not several different places call ratelimit with the same ratelimit state at the same time. In that case we catch only one random function that calls ratelimit after the wait period. Signed-off-by: Christian Borntraeger Cc: Dave Young Cc: Linus Torvalds CC: Andrew Morton LKML-Reference: <200910231458.11832.borntraeger@de.ibm.com> Signed-off-by: Ingo Molnar --- include/linux/kernel.h | 3 ++- include/linux/ratelimit.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 3305f33201be..21d0d822c716 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -240,7 +240,8 @@ asmlinkage int vprintk(const char *fmt, va_list args) asmlinkage int printk(const char * fmt, ...) __attribute__ ((format (printf, 1, 2))) __cold; -extern int printk_ratelimit(void); +extern int __printk_ratelimit(const char *func); +#define printk_ratelimit() __printk_ratelimit(__func__) extern bool printk_timed_ratelimit(unsigned long *caller_jiffies, unsigned int interval_msec); diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h index 187bc16c1f15..668cf1bef030 100644 --- a/include/linux/ratelimit.h +++ b/include/linux/ratelimit.h @@ -25,6 +25,7 @@ struct ratelimit_state { .burst = burst_init, \ } -extern int __ratelimit(struct ratelimit_state *rs); +extern int ___ratelimit(struct ratelimit_state *rs, const char *func); +#define __ratelimit(state) ___ratelimit(state, __func__) #endif /* _LINUX_RATELIMIT_H */ -- cgit v1.2.2 From bb015f0c85362aa767f8f00f50a40d85e489414f Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 19 Oct 2009 11:43:32 +0200 Subject: pcmcia: drop already defined PCI_IDs Out of 10 PCI_IDs found in the PCMCIA subsystem, only two were not defined in pci_ids.h. Move them and drop the duplicates. Successfully build-tested. Signed-off-by: Wolfram Sang Cc: Jesse Barnes Signed-off-by: Dominik Brodowski --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index f490e7a7307a..857cc349bf71 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1632,6 +1632,8 @@ #define PCI_DEVICE_ID_O2_6730 0x673a #define PCI_DEVICE_ID_O2_6832 0x6832 #define PCI_DEVICE_ID_O2_6836 0x6836 +#define PCI_DEVICE_ID_O2_6812 0x6872 +#define PCI_DEVICE_ID_O2_6933 0x6933 #define PCI_VENDOR_ID_3DFX 0x121a #define PCI_DEVICE_ID_3DFX_VOODOO 0x0001 -- cgit v1.2.2 From ce0e7b28fb75cb003cfc8d0238613aaf1c55e797 Mon Sep 17 00:00:00 2001 From: Ryota Ozaki Date: Sat, 24 Oct 2009 01:20:10 +0900 Subject: sched, cpuacct: Fix niced guest time accounting CPU time of a guest is always accounted in 'user' time without concern for the nice value of its counterpart process although the guest is scheduled under the nice value. This patch fixes the defect and accounts cpu time of a niced guest in 'nice' time as same as a niced process. And also the patch adds 'guest_nice' to cpuacct. The value provides niced guest cpu time which is like 'nice' to 'user'. The original discussions can be found here: http://www.mail-archive.com/kvm@vger.kernel.org/msg23982.html http://www.mail-archive.com/kvm@vger.kernel.org/msg23860.html Signed-off-by: Ryota Ozaki Acked-by: Avi Kivity Cc: Peter Zijlstra LKML-Reference: <1256314810-7897-1-git-send-email-ozaki.ryota@gmail.com> Signed-off-by: Ingo Molnar --- include/linux/kernel_stat.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 348fa8874b52..c059044bc6dc 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -25,6 +25,7 @@ struct cpu_usage_stat { cputime64_t iowait; cputime64_t steal; cputime64_t guest; + cputime64_t guest_nice; }; struct kernel_stat { -- cgit v1.2.2 From 9b1d82fa1611706fa7ee1505f290160a18caf95d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 25 Oct 2009 19:03:50 -0700 Subject: rcu: "Tiny RCU", The Bloatwatch Edition This patch is a version of RCU designed for !SMP provided for a small-footprint RCU implementation. In particular, the implementation of synchronize_rcu() is extremely lightweight and high performance. It passes rcutorture testing in each of the four relevant configurations (combinations of NO_HZ and PREEMPT) on x86. This saves about 1K bytes compared to old Classic RCU (which is no longer in mainline), and more than three kilobytes compared to Hierarchical RCU (updated to 2.6.30): CONFIG_TREE_RCU: text data bss dec filename 183 4 0 187 kernel/rcupdate.o 2783 520 36 3339 kernel/rcutree.o 3526 Total (vs 4565 for v7) CONFIG_TREE_PREEMPT_RCU: text data bss dec filename 263 4 0 267 kernel/rcupdate.o 4594 776 52 5422 kernel/rcutree.o 5689 Total (6155 for v7) CONFIG_TINY_RCU: text data bss dec filename 96 4 0 100 kernel/rcupdate.o 734 24 0 758 kernel/rcutiny.o 858 Total (vs 848 for v7) The above is for x86. Your mileage may vary on other platforms. Further compression is possible, but is being procrastinated. Changes from v7 (http://lkml.org/lkml/2009/10/9/388) o Apply Lai Jiangshan's review comments (aside from might_sleep() in synchronize_sched(), which is covered by SMP builds). o Fix up expedited primitives. Changes from v6 (http://lkml.org/lkml/2009/9/23/293). o Forward ported to put it into the 2.6.33 stream. o Added lockdep support. o Make lightweight rcu_barrier. Changes from v5 (http://lkml.org/lkml/2009/6/23/12). o Ported to latest pre-2.6.32 merge window kernel. - Renamed rcu_qsctr_inc() to rcu_sched_qs(). - Renamed rcu_bh_qsctr_inc() to rcu_bh_qs(). - Provided trivial rcu_cpu_notify(). - Provided trivial exit_rcu(). - Provided trivial rcu_needs_cpu(). - Fixed up the rcu_*_enter/exit() functions in linux/hardirq.h. o Removed the dependence on EMBEDDED, with a view to making TINY_RCU default for !SMP at some time in the future. o Added (trivial) support for expedited grace periods. Changes from v4 (http://lkml.org/lkml/2009/5/2/91) include: o Squeeze the size down a bit further by removing the ->completed field from struct rcu_ctrlblk. o This permits synchronize_rcu() to become the empty function. Previous concerns about rcutorture were unfounded, as rcutorture correctly handles a constant value from rcu_batches_completed() and rcu_batches_completed_bh(). Changes from v3 (http://lkml.org/lkml/2009/3/29/221) include: o Changed rcu_batches_completed(), rcu_batches_completed_bh() rcu_enter_nohz(), rcu_exit_nohz(), rcu_nmi_enter(), and rcu_nmi_exit(), to be static inlines, as suggested by David Howells. Doing this saves about 100 bytes from rcutiny.o. (The numbers between v3 and this v4 of the patch are not directly comparable, since they are against different versions of Linux.) Changes from v2 (http://lkml.org/lkml/2009/2/3/333) include: o Fix whitespace issues. o Change short-circuit "||" operator to instead be "+" in order to fix performance bug noted by "kraai" on LWN. (http://lwn.net/Articles/324348/) Changes from v1 (http://lkml.org/lkml/2009/1/13/440) include: o This version depends on EMBEDDED as well as !SMP, as suggested by Ingo. o Updated rcu_needs_cpu() to unconditionally return zero, permitting the CPU to enter dynticks-idle mode at any time. This works because callbacks can be invoked upon entry to dynticks-idle mode. o Paul is now OK with this being included, based on a poll at the Kernel Miniconf at linux.conf.au, where about ten people said that they cared about saving 900 bytes on single-CPU systems. o Applies to both mainline and tip/core/rcu. Signed-off-by: Paul E. McKenney Acked-by: David Howells Acked-by: Josh Triplett Reviewed-by: Lai Jiangshan Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: avi@redhat.com Cc: mtosatti@redhat.com LKML-Reference: <12565226351355-git-send-email-> Signed-off-by: Ingo Molnar --- include/linux/hardirq.h | 24 ++++++++++++ include/linux/rcupdate.h | 6 +++ include/linux/rcutiny.h | 97 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 127 insertions(+) create mode 100644 include/linux/rcutiny.h (limited to 'include') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 6d527ee82b2b..d5b387669dab 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -139,10 +139,34 @@ static inline void account_system_vtime(struct task_struct *tsk) #endif #if defined(CONFIG_NO_HZ) +#if defined(CONFIG_TINY_RCU) +extern void rcu_enter_nohz(void); +extern void rcu_exit_nohz(void); + +static inline void rcu_irq_enter(void) +{ + rcu_exit_nohz(); +} + +static inline void rcu_irq_exit(void) +{ + rcu_enter_nohz(); +} + +static inline void rcu_nmi_enter(void) +{ +} + +static inline void rcu_nmi_exit(void) +{ +} + +#else extern void rcu_irq_enter(void); extern void rcu_irq_exit(void); extern void rcu_nmi_enter(void); extern void rcu_nmi_exit(void); +#endif #else # define rcu_irq_enter() do { } while (0) # define rcu_irq_exit() do { } while (0) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 3ebd0b7bcb08..6dd71fa48429 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -68,11 +68,17 @@ extern int sched_expedited_torture_stats(char *page); /* Internal to kernel */ extern void rcu_init(void); extern void rcu_scheduler_starting(void); +#ifndef CONFIG_TINY_RCU extern int rcu_needs_cpu(int cpu); +#else +static inline int rcu_needs_cpu(int cpu) { return 0; } +#endif extern int rcu_scheduler_active; #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) #include +#elif CONFIG_TINY_RCU +#include #else #error "Unknown RCU implementation specified to kernel configuration" #endif diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h new file mode 100644 index 000000000000..891073c264dc --- /dev/null +++ b/include/linux/rcutiny.h @@ -0,0 +1,97 @@ +/* + * Read-Copy Update mechanism for mutual exclusion, the Bloatwatch edition. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright IBM Corporation, 2008 + * + * Author: Paul E. McKenney + * + * For detailed explanation of Read-Copy Update mechanism see - + * Documentation/RCU + */ + +#ifndef __LINUX_TINY_H +#define __LINUX_TINY_H + +#include + +void rcu_sched_qs(int cpu); +void rcu_bh_qs(int cpu); + +#define __rcu_read_lock() preempt_disable() +#define __rcu_read_unlock() preempt_enable() +#define __rcu_read_lock_bh() local_bh_disable() +#define __rcu_read_unlock_bh() local_bh_enable() +#define call_rcu_sched call_rcu + +#define rcu_init_sched() do { } while (0) +extern void rcu_check_callbacks(int cpu, int user); +extern void __rcu_init(void); + +/* + * Return the number of grace periods. + */ +static inline long rcu_batches_completed(void) +{ + return 0; +} + +/* + * Return the number of bottom-half grace periods. + */ +static inline long rcu_batches_completed_bh(void) +{ + return 0; +} + +extern int rcu_expedited_torture_stats(char *page); + +static inline void synchronize_rcu_expedited(void) +{ + synchronize_sched(); +} + +static inline void synchronize_rcu_bh_expedited(void) +{ + synchronize_sched(); +} + +struct notifier_block; +extern int rcu_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu); + +#ifdef CONFIG_NO_HZ + +extern void rcu_enter_nohz(void); +extern void rcu_exit_nohz(void); + +#else /* #ifdef CONFIG_NO_HZ */ + +static inline void rcu_enter_nohz(void) +{ +} + +static inline void rcu_exit_nohz(void) +{ +} + +#endif /* #else #ifdef CONFIG_NO_HZ */ + +static inline void exit_rcu(void) +{ +} + +#endif /* __LINUX_RCUTINY_H */ -- cgit v1.2.2 From 0cd397d33608ae6c97d2ee6c8c43462b419b7e26 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 25 Oct 2009 19:03:51 -0700 Subject: rcu: Add synchronize_srcu_expedited() This patch creates a synchronize_srcu_expedited() that uses synchronize_sched_expedited() where synchronize_srcu() uses synchronize_sched(). The synchronize_srcu() and synchronize_srcu_expedited() functions become one-liners that pass synchronize_sched() or synchronize_sched_expedited(), repectively, to a new __synchronize_srcu() function. While in the file, move the EXPORT_SYMBOL_GPL()s to immediately follow the corresponding functions. Requested-by: Avi Kivity Tested-by: Marcelo Tosatti Signed-off-by: Paul E. McKenney Acked-by: Josh Triplett Reviewed-by: Lai Jiangshan Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com Cc: avi@redhat.com LKML-Reference: <12565226354038-git-send-email-> Signed-off-by: Ingo Molnar --- include/linux/srcu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index aca0eee53930..4765d97dcafb 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -48,6 +48,7 @@ void cleanup_srcu_struct(struct srcu_struct *sp); int srcu_read_lock(struct srcu_struct *sp) __acquires(sp); void srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp); void synchronize_srcu(struct srcu_struct *sp); +void synchronize_srcu_expedited(struct srcu_struct *sp); long srcu_batches_completed(struct srcu_struct *sp); #endif -- cgit v1.2.2 From 4ce5b90340879ce93d169b7b523c2cbbe7c45843 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 26 Oct 2009 07:55:55 +0100 Subject: rcu: Do tiny cleanups in rcutiny No change in functionality - just straighten out a few small stylistic details. Cc: Paul E. McKenney Cc: David Howells Cc: Josh Triplett Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: avi@redhat.com Cc: mtosatti@redhat.com LKML-Reference: <12565226351355-git-send-email-> Signed-off-by: Ingo Molnar --- include/linux/rcutiny.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 891073c264dc..2c1fe8373e71 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -20,9 +20,8 @@ * Author: Paul E. McKenney * * For detailed explanation of Read-Copy Update mechanism see - - * Documentation/RCU + * Documentation/RCU */ - #ifndef __LINUX_TINY_H #define __LINUX_TINY_H @@ -70,8 +69,7 @@ static inline void synchronize_rcu_bh_expedited(void) } struct notifier_block; -extern int rcu_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu); +extern int rcu_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu); #ifdef CONFIG_NO_HZ -- cgit v1.2.2 From 2c28e2451dba2260e9f88811b29a7787db7e7616 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 26 Oct 2009 13:57:44 -0700 Subject: rcu: Fix TINY_RCU #elif condition Some compilers are happy with "#elif CONFIG_RCU_TINY", while others strongly prefer "#elif defined(CONFIG_RCU_TINY)". Change to the latter to make more compilers happy. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <12565906642768-git-send-email-> Signed-off-by: Ingo Molnar --- include/linux/rcupdate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 6dd71fa48429..2f1bc42a3b82 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -77,7 +77,7 @@ extern int rcu_scheduler_active; #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) #include -#elif CONFIG_TINY_RCU +#elif defined(CONFIG_TINY_RCU) #include #else #error "Unknown RCU implementation specified to kernel configuration" -- cgit v1.2.2 From d6ba452128178091dab7a04d54f7e66fdc32fb39 Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Mon, 26 Oct 2009 09:26:18 -0400 Subject: tpm add default function definitions Add default tpm_pcr_read/extend function definitions required by IMA/Kconfig changes. Signed-off-by: Mimi Zohar Reviewed-by: Eric Paris Signed-off-by: James Morris --- include/linux/tpm.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/tpm.h b/include/linux/tpm.h index 3338b3f5c21a..8eaa8f83effb 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -31,5 +31,12 @@ extern int tpm_pcr_read(u32 chip_num, int pcr_idx, u8 *res_buf); extern int tpm_pcr_extend(u32 chip_num, int pcr_idx, const u8 *hash); +#else +static inline int tpm_pcr_read(u32 chip_num, int pcr_idx, u8 *res_buf) { + return -ENODEV; +} +static inline int tpm_pcr_extend(u32 chip_num, int pcr_idx, const u8 *hash) { + return -ENODEV; +} #endif #endif -- cgit v1.2.2 From f7d7986060b2890fc26db6ab5203efbd33aa2497 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Sun, 18 Oct 2009 01:09:29 +0000 Subject: perf_event: Add alignment-faults and emulation-faults software events Add two more software events that are common to many cpus. Alignment faults: When a load or store is not aligned properly. Emulation faults: When an instruction is emulated in software. Both cause a very significant slowdown (100x or worse), so identifying and fixing them is very important. Signed-off-by: Anton Blanchard Signed-off-by: Paul Mackerras --- include/linux/perf_counter.h | 2 ++ include/linux/perf_event.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 7b7fbf433cff..d6b95d1e79f0 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -106,6 +106,8 @@ enum perf_sw_ids { PERF_COUNT_SW_CPU_MIGRATIONS = 4, PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, + PERF_COUNT_SW_ALIGNMENT_FAULTS = 7, + PERF_COUNT_SW_EMULATION_FAULTS = 8, PERF_COUNT_SW_MAX, /* non-ABI */ }; diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 2e6d95f97419..a33707a3a788 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -102,6 +102,8 @@ enum perf_sw_ids { PERF_COUNT_SW_CPU_MIGRATIONS = 4, PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, + PERF_COUNT_SW_ALIGNMENT_FAULTS = 7, + PERF_COUNT_SW_EMULATION_FAULTS = 8, PERF_COUNT_SW_MAX, /* non-ABI */ }; -- cgit v1.2.2 From ff76ec18cabb12a6c8f3c65bd1d23f1a770fe908 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 28 Oct 2009 12:26:39 -0700 Subject: tpm: fix header for modular build Fix build for TCG_TPM=m. Header file doesn't handle this and incorrectly builds stubs. drivers/char/tpm/tpm.c:720: error: redefinition of 'tpm_pcr_read' include/linux/tpm.h:35: error:previous definition of 'tpm_pcr_read' was here drivers/char/tpm/tpm.c:752: error: redefinition of 'tpm_pcr_extend' include/linux/tpm.h:38: error:previous definition of 'tpm_pcr_extend' was here Repairs linux-next's commit d6ba452128178091dab7a04d54f7e66fdc32fb39 Author: Mimi Zohar Date: Mon Oct 26 09:26:18 2009 -0400 tpm add default function definitions Signed-off-by: Randy Dunlap Cc: Rajiv Andrade Cc: Mimi Zohar Cc: James Morris Cc: Eric Paris Signed-off-by: Andrew Morton Signed-off-by: James Morris --- include/linux/tpm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/tpm.h b/include/linux/tpm.h index 8eaa8f83effb..ac5d1c1285d9 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -27,7 +27,7 @@ */ #define TPM_ANY_NUM 0xFFFF -#if defined(CONFIG_TCG_TPM) +#if defined(CONFIG_TCG_TPM) || defined(CONFIG_TCG_TPM_MODULE) extern int tpm_pcr_read(u32 chip_num, int pcr_idx, u8 *res_buf); extern int tpm_pcr_extend(u32 chip_num, int pcr_idx, const u8 *hash); -- cgit v1.2.2 From 5975c725dfd6f7d36f493ab1453fbdbd35c1f0e3 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Thu, 29 Oct 2009 11:40:17 -0500 Subject: define convenient securebits masks for prctl users (v2) Hi James, would you mind taking the following into security-testing? The securebits are used by passing them to prctl with the PR_{S,G}ET_SECUREBITS commands. But the defines must be shifted to be used in prctl, which begs to be confused and misused by userspace. So define some more convenient values for userspace to specify. This way userspace does prctl(PR_SET_SECUREBITS, SECBIT_NOROOT); instead of prctl(PR_SET_SECUREBITS, 1 << SECURE_NOROOT); (Thanks to Michael for the idea) This patch also adds include/linux/securebits to the installed headers. Then perhaps it can be included by glibc's sys/prctl.h. Changelog: Oct 29: Stephen Rothwell points out that issecure can be under __KERNEL__. Oct 14: (Suggestions by Michael Kerrisk): 1. spell out SETUID in SECBIT_NO_SETUID* 2. SECBIT_X_LOCKED does not imply SECBIT_X 3. add definitions for keepcaps Oct 14: As suggested by Michael Kerrisk, don't use SB_* as that convention is already in use. Use SECBIT_ prefix instead. Signed-off-by: Serge E. Hallyn Acked-by: Andrew G. Morgan Acked-by: Michael Kerrisk Cc: Ulrich Drepper Cc: James Morris Signed-off-by: James Morris --- include/linux/Kbuild | 1 + include/linux/securebits.h | 24 ++++++++++++++++++------ 2 files changed, 19 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index cff4a101f266..ffcdb9b509db 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -329,6 +329,7 @@ unifdef-y += scc.h unifdef-y += sched.h unifdef-y += screen_info.h unifdef-y += sdla.h +unifdef-y += securebits.h unifdef-y += selinux_netlink.h unifdef-y += sem.h unifdef-y += serial_core.h diff --git a/include/linux/securebits.h b/include/linux/securebits.h index d2c5ed845bcc..33406174cbe8 100644 --- a/include/linux/securebits.h +++ b/include/linux/securebits.h @@ -1,6 +1,15 @@ #ifndef _LINUX_SECUREBITS_H #define _LINUX_SECUREBITS_H 1 +/* Each securesetting is implemented using two bits. One bit specifies + whether the setting is on or off. The other bit specify whether the + setting is locked or not. A setting which is locked cannot be + changed from user-level. */ +#define issecure_mask(X) (1 << (X)) +#ifdef __KERNEL__ +#define issecure(X) (issecure_mask(X) & current_cred_xxx(securebits)) +#endif + #define SECUREBITS_DEFAULT 0x00000000 /* When set UID 0 has no special privileges. When unset, we support @@ -12,6 +21,9 @@ #define SECURE_NOROOT 0 #define SECURE_NOROOT_LOCKED 1 /* make bit-0 immutable */ +#define SECBIT_NOROOT (issecure_mask(SECURE_NOROOT)) +#define SECBIT_NOROOT_LOCKED (issecure_mask(SECURE_NOROOT_LOCKED)) + /* When set, setuid to/from uid 0 does not trigger capability-"fixup". When unset, to provide compatiblility with old programs relying on set*uid to gain/lose privilege, transitions to/from uid 0 cause @@ -19,6 +31,10 @@ #define SECURE_NO_SETUID_FIXUP 2 #define SECURE_NO_SETUID_FIXUP_LOCKED 3 /* make bit-2 immutable */ +#define SECBIT_NO_SETUID_FIXUP (issecure_mask(SECURE_NO_SETUID_FIXUP)) +#define SECBIT_NO_SETUID_FIXUP_LOCKED \ + (issecure_mask(SECURE_NO_SETUID_FIXUP_LOCKED)) + /* When set, a process can retain its capabilities even after transitioning to a non-root user (the set-uid fixup suppressed by bit 2). Bit-4 is cleared when a process calls exec(); setting both @@ -27,12 +43,8 @@ #define SECURE_KEEP_CAPS 4 #define SECURE_KEEP_CAPS_LOCKED 5 /* make bit-4 immutable */ -/* Each securesetting is implemented using two bits. One bit specifies - whether the setting is on or off. The other bit specify whether the - setting is locked or not. A setting which is locked cannot be - changed from user-level. */ -#define issecure_mask(X) (1 << (X)) -#define issecure(X) (issecure_mask(X) & current_cred_xxx(securebits)) +#define SECBIT_KEEP_CAPS (issecure_mask(SECURE_KEEP_CAPS)) +#define SECBIT_KEEP_CAPS_LOCKED (issecure_mask(SECURE_KEEP_CAPS_LOCKED)) #define SECURE_ALL_BITS (issecure_mask(SECURE_NOROOT) | \ issecure_mask(SECURE_NO_SETUID_FIXUP) | \ -- cgit v1.2.2 From 7b2a35132ad0a70902dcd2844c27ed64cda0ce9b Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 2 Nov 2009 08:50:52 +0800 Subject: compiler: Introduce __always_unused I wrote some code which is used as compile-time checker, and the code should be elided after compile. So I need to annotate the code as "always unused", compared to "maybe unused". Signed-off-by: Li Zefan Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Linus Torvalds LKML-Reference: <4AEE2CEC.8040206@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/compiler-gcc.h | 1 + include/linux/compiler.h | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index a3ed7cb8ca34..73dcf804bc94 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -79,6 +79,7 @@ #define noinline __attribute__((noinline)) #define __attribute_const__ __attribute__((__const__)) #define __maybe_unused __attribute__((unused)) +#define __always_unused __attribute__((unused)) #define __gcc_header(x) #x #define _gcc_header(x) __gcc_header(linux/compiler-gcc##x.h) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 04fb5135b4e1..7947f4f6fa51 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -213,6 +213,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); # define __maybe_unused /* unimplemented */ #endif +#ifndef __always_unused +# define __always_unused /* unimplemented */ +#endif + #ifndef noinline #define noinline #endif -- cgit v1.2.2 From fb0459d75c1d0a4ba3cafdd2c754e7486968a676 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Fri, 25 Sep 2009 12:25:56 +0200 Subject: perf/core: Provide a kernel-internal interface to get to performance counters There are reasons for kernel code to ask for, and use, performance counters. For example, in CPU freq governors this tends to be a good idea, but there are other examples possible as well of course. This patch adds the needed bits to do enable this functionality; they have been tested in an experimental cpufreq driver that I'm working on, and the changes are all that I needed to access counters properly. [fweisbec@gmail.com: added pid to perf_event_create_kernel_counter so that we can profile a particular task too TODO: Have a better error reporting, don't just return NULL in fail case.] v2: Remove the wrong comment about the fact perf_event_create_kernel_counter must be called from a kernel thread. Signed-off-by: Arjan van de Ven Acked-by: Peter Zijlstra Cc: "K.Prasad" Cc: Alan Stern Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Ingo Molnar Cc: Jan Kiszka Cc: Jiri Slaby Cc: Li Zefan Cc: Avi Kivity Cc: Paul Mackerras Cc: Mike Galbraith Cc: Masami Hiramatsu Cc: Paul Mundt Cc: Jan Kiszka Cc: Avi Kivity LKML-Reference: <20090925122556.2f8bd939@infradead.org> Signed-off-by: Frederic Weisbecker --- include/linux/perf_event.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index df9d964c15fc..fa151d49a2ee 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -744,6 +744,12 @@ extern int hw_perf_group_sched_in(struct perf_event *group_leader, struct perf_cpu_context *cpuctx, struct perf_event_context *ctx, int cpu); extern void perf_event_update_userpage(struct perf_event *event); +extern int perf_event_release_kernel(struct perf_event *event); +extern struct perf_event * +perf_event_create_kernel_counter(struct perf_event_attr *attr, + int cpu, + pid_t pid); +extern u64 perf_event_read_value(struct perf_event *event); struct perf_sample_data { u64 type; -- cgit v1.2.2 From 97eaf5300b9d0cd99c310bf8c4a0f2f3296d88a3 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 18 Oct 2009 15:33:50 +0200 Subject: perf/core: Add a callback to perf events A simple callback in a perf event can be used for multiple purposes. For example it is useful for triggered based events like hardware breakpoints that need a callback to dispatch a triggered breakpoint event. v2: Simplify a bit the callback attribution as suggested by Paul Mackerras Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: "K.Prasad" Cc: Alan Stern Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Ingo Molnar Cc: Paul Mackerras Cc: Mike Galbraith Cc: Paul Mundt --- include/linux/perf_event.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index fa151d49a2ee..8d54e6d25eeb 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -544,6 +544,8 @@ struct perf_pending_entry { void (*func)(struct perf_pending_entry *); }; +typedef void (*perf_callback_t)(struct perf_event *, void *); + /** * struct perf_event - performance event kernel representation: */ @@ -639,6 +641,8 @@ struct perf_event { struct event_filter *filter; #endif + perf_callback_t callback; + #endif /* CONFIG_PERF_EVENTS */ }; @@ -748,7 +752,8 @@ extern int perf_event_release_kernel(struct perf_event *event); extern struct perf_event * perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, - pid_t pid); + pid_t pid, + perf_callback_t callback); extern u64 perf_event_read_value(struct perf_event *event); struct perf_sample_data { -- cgit v1.2.2 From 1477b6a7edd9ffa7bba4f9779ce9a76ce92761ed Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Wed, 4 Nov 2009 16:14:16 +0900 Subject: sched: Remove unused __schedule() declaration __schedule() had been removed. Signed-off-by: Hiroshi Shimamoto Cc: Peter Zijlstra LKML-Reference: <4AF129C8.3030008@ct.jp.nec.com> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 75e6e60bf583..f18102c4d0b8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -349,7 +349,6 @@ extern signed long schedule_timeout(signed long timeout); extern signed long schedule_timeout_interruptible(signed long timeout); extern signed long schedule_timeout_killable(signed long timeout); extern signed long schedule_timeout_uninterruptible(signed long timeout); -asmlinkage void __schedule(void); asmlinkage void schedule(void); extern int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner); -- cgit v1.2.2 From 2a2bb3142d326bb28b03875cabfc49baaac9a14a Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Wed, 4 Nov 2009 16:16:10 +0900 Subject: sched: Remove unused time_sync_thresh declaration time_sync_thresh had been removed. Signed-off-by: Hiroshi Shimamoto Cc: Peter Zijlstra LKML-Reference: <4AF12A3A.5050200@ct.jp.nec.com> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index f18102c4d0b8..754b3deed02b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -171,8 +171,6 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) } #endif -extern unsigned long long time_sync_thresh; - /* * Task state bitmask. NOTE! These bits are also * encoded in fs/proc/array.c: get_task_state(). -- cgit v1.2.2 From 9824a2b728b63e7ff586b9fd9293c819be79f0f3 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Wed, 4 Nov 2009 16:16:54 +0900 Subject: sched: Remove unused cpu_nr_migrations() cpu_nr_migrations() is not used, remove it. Signed-off-by: Hiroshi Shimamoto Cc: Peter Zijlstra LKML-Reference: <4AF12A66.6020609@ct.jp.nec.com> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 754b3deed02b..dfc21fb76bf1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -145,7 +145,6 @@ extern unsigned long this_cpu_load(void); extern void calc_global_load(void); -extern u64 cpu_nr_migrations(int cpu); extern unsigned long get_parent_ip(unsigned long addr); -- cgit v1.2.2 From acc3f5d7cabbfd6cec71f0c1f9900621fa2d6ae7 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 3 Nov 2009 14:53:40 +1030 Subject: cpumask: Partition_sched_domains takes array of cpumask_var_t Currently partition_sched_domains() takes a 'struct cpumask *doms_new' which is a kmalloc'ed array of cpumask_t. You can't have such an array if 'struct cpumask' is undefined, as we plan for CONFIG_CPUMASK_OFFSTACK=y. So, we make this an array of cpumask_var_t instead: this is the same for the CONFIG_CPUMASK_OFFSTACK=n case, but requires multiple allocations for the CONFIG_CPUMASK_OFFSTACK=y case. Hence we add alloc_sched_domains() and free_sched_domains() functions. Signed-off-by: Rusty Russell Cc: Peter Zijlstra LKML-Reference: <200911031453.40668.rusty@rustcorp.com.au> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index dfc21fb76bf1..78ba664474f3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1009,9 +1009,13 @@ static inline struct cpumask *sched_domain_span(struct sched_domain *sd) return to_cpumask(sd->span); } -extern void partition_sched_domains(int ndoms_new, struct cpumask *doms_new, +extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], struct sched_domain_attr *dattr_new); +/* Allocate an array of sched domains, for partition_sched_domains(). */ +cpumask_var_t *alloc_sched_domains(unsigned int ndoms); +void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms); + /* Test a flag in parent sched domain */ static inline int test_sd_parent(struct sched_domain *sd, int flag) { @@ -1029,7 +1033,7 @@ unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu); struct sched_domain_attr; static inline void -partition_sched_domains(int ndoms_new, struct cpumask *doms_new, +partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], struct sched_domain_attr *dattr_new) { } -- cgit v1.2.2 From 663e69592856df53ef52969482ef413a96bc4e06 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 4 Nov 2009 14:22:21 +0100 Subject: irq: Remove unused debug_poll_all_shared_irqs() commit 74296a8ed added this function for debug purposes, but it was never used for anything. Remove it. Signed-off-by: Thomas Gleixner --- include/linux/interrupt.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 7ca72b74eec7..75f3f00ac1e5 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -603,12 +603,6 @@ static inline void init_irq_proc(void) } #endif -#if defined(CONFIG_GENERIC_HARDIRQS) && defined(CONFIG_DEBUG_SHIRQ) -extern void debug_poll_all_shared_irqs(void); -#else -static inline void debug_poll_all_shared_irqs(void) { } -#endif - struct seq_file; int show_interrupts(struct seq_file *p, void *v); -- cgit v1.2.2 From 2a855dd01bc1539111adb7233f587c5c468732ac Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Sun, 25 Oct 2009 15:37:58 +0100 Subject: signal: Fix alternate signal stack check All architectures in the kernel increment/decrement the stack pointer before storing values on the stack. On architectures which have the stack grow down sas_ss_sp == sp is not on the alternate signal stack while sas_ss_sp + sas_ss_size == sp is on the alternate signal stack. On architectures which have the stack grow up sas_ss_sp == sp is on the alternate signal stack while sas_ss_sp + sas_ss_size == sp is not on the alternate signal stack. The current implementation fails for architectures which have the stack grow down on the corner case where sas_ss_sp == sp.This was reported as Debian bug #544905 on AMD64. Simplified test case: http://download.breakpoint.cc/tc-sig-stack.c The test case creates the following stack scenario: 0xn0300 stack top 0xn0200 alt stack pointer top (when switching to alt stack) 0xn01ff alt stack end 0xn0100 alt stack start == stack pointer If the signal is sent the stack pointer is pointing to the base address of the alt stack and the kernel erroneously decides that it has already switched to the alternate stack because of the current check for "sp - sas_ss_sp < sas_ss_size" On parisc (stack grows up) the scenario would be: 0xn0200 stack pointer 0xn01ff alt stack end 0xn0100 alt stack start = alt stack pointer base (when switching to alt stack) 0xn0000 stack base This is handled correctly by the current implementation. [ tglx: Modified for archs which have the stack grow up (parisc) which would fail with the correct implementation for stack grows down. Added a check for sp >= current->sas_ss_sp which is strictly not necessary but makes the code symetric for both variants ] Signed-off-by: Sebastian Andrzej Siewior Cc: Oleg Nesterov Cc: Roland McGrath Cc: Kyle McMartin Cc: stable@kernel.org LKML-Reference: <20091025143758.GA6653@Chamillionaire.breakpoint.cc> Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 75e6e60bf583..0f67914a43c9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2086,11 +2086,18 @@ static inline int is_si_special(const struct siginfo *info) return info <= SEND_SIG_FORCED; } -/* True if we are on the alternate signal stack. */ - +/* + * True if we are on the alternate signal stack. + */ static inline int on_sig_stack(unsigned long sp) { - return (sp - current->sas_ss_sp < current->sas_ss_size); +#ifdef CONFIG_STACK_GROWSUP + return sp >= current->sas_ss_sp && + sp - current->sas_ss_sp < current->sas_ss_size; +#else + return sp > current->sas_ss_sp && + sp - current->sas_ss_sp <= current->sas_ss_size; +#endif } static inline int sas_ss_flags(unsigned long sp) -- cgit v1.2.2 From 2da3e160cb3d226d87b907fab26850d838ed8d7c Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 5 Nov 2009 23:06:50 +0100 Subject: hw-breakpoint: Move asm-generic/hw_breakpoint.h to linux/hw_breakpoint.h We plan to make the breakpoints parameters generic among architectures. For that it's better to move the asm-generic header to a generic linux header. Signed-off-by: Frederic Weisbecker --- include/asm-generic/hw_breakpoint.h | 139 ------------------------------------ include/linux/hw_breakpoint.h | 136 +++++++++++++++++++++++++++++++++++ 2 files changed, 136 insertions(+), 139 deletions(-) delete mode 100644 include/asm-generic/hw_breakpoint.h create mode 100644 include/linux/hw_breakpoint.h (limited to 'include') diff --git a/include/asm-generic/hw_breakpoint.h b/include/asm-generic/hw_breakpoint.h deleted file mode 100644 index 9bf2d12eb74a..000000000000 --- a/include/asm-generic/hw_breakpoint.h +++ /dev/null @@ -1,139 +0,0 @@ -#ifndef _ASM_GENERIC_HW_BREAKPOINT_H -#define _ASM_GENERIC_HW_BREAKPOINT_H - -#ifndef __ARCH_HW_BREAKPOINT_H -#error "Please don't include this file directly" -#endif - -#ifdef __KERNEL__ -#include -#include -#include - -/** - * struct hw_breakpoint - unified kernel/user-space hardware breakpoint - * @triggered: callback invoked after target address access - * @info: arch-specific breakpoint info (address, length, and type) - * - * %hw_breakpoint structures are the kernel's way of representing - * hardware breakpoints. These are data breakpoints - * (also known as "watchpoints", triggered on data access), and the breakpoint's - * target address can be located in either kernel space or user space. - * - * The breakpoint's address, length, and type are highly - * architecture-specific. The values are encoded in the @info field; you - * specify them when registering the breakpoint. To examine the encoded - * values use hw_breakpoint_get_{kaddress,uaddress,len,type}(), declared - * below. - * - * The address is specified as a regular kernel pointer (for kernel-space - * breakponts) or as an %__user pointer (for user-space breakpoints). - * With register_user_hw_breakpoint(), the address must refer to a - * location in user space. The breakpoint will be active only while the - * requested task is running. Conversely with - * register_kernel_hw_breakpoint(), the address must refer to a location - * in kernel space, and the breakpoint will be active on all CPUs - * regardless of the current task. - * - * The length is the breakpoint's extent in bytes, which is subject to - * certain limitations. include/asm/hw_breakpoint.h contains macros - * defining the available lengths for a specific architecture. Note that - * the address's alignment must match the length. The breakpoint will - * catch accesses to any byte in the range from address to address + - * (length - 1). - * - * The breakpoint's type indicates the sort of access that will cause it - * to trigger. Possible values may include: - * - * %HW_BREAKPOINT_RW (triggered on read or write access), - * %HW_BREAKPOINT_WRITE (triggered on write access), and - * %HW_BREAKPOINT_READ (triggered on read access). - * - * Appropriate macros are defined in include/asm/hw_breakpoint.h; not all - * possibilities are available on all architectures. Execute breakpoints - * must have length equal to the special value %HW_BREAKPOINT_LEN_EXECUTE. - * - * When a breakpoint gets hit, the @triggered callback is - * invoked in_interrupt with a pointer to the %hw_breakpoint structure and the - * processor registers. - * Data breakpoints occur after the memory access has taken place. - * Breakpoints are disabled during execution @triggered, to avoid - * recursive traps and allow unhindered access to breakpointed memory. - * - * This sample code sets a breakpoint on pid_max and registers a callback - * function for writes to that variable. Note that it is not portable - * as written, because not all architectures support HW_BREAKPOINT_LEN_4. - * - * ---------------------------------------------------------------------- - * - * #include - * - * struct hw_breakpoint my_bp; - * - * static void my_triggered(struct hw_breakpoint *bp, struct pt_regs *regs) - * { - * printk(KERN_DEBUG "Inside triggered routine of breakpoint exception\n"); - * dump_stack(); - * ............... - * } - * - * static struct hw_breakpoint my_bp; - * - * static int init_module(void) - * { - * ...................... - * my_bp.info.type = HW_BREAKPOINT_WRITE; - * my_bp.info.len = HW_BREAKPOINT_LEN_4; - * - * my_bp.installed = (void *)my_bp_installed; - * - * rc = register_kernel_hw_breakpoint(&my_bp); - * ...................... - * } - * - * static void cleanup_module(void) - * { - * ...................... - * unregister_kernel_hw_breakpoint(&my_bp); - * ...................... - * } - * - * ---------------------------------------------------------------------- - */ -struct hw_breakpoint { - void (*triggered)(struct hw_breakpoint *, struct pt_regs *); - struct arch_hw_breakpoint info; -}; - -/* - * len and type values are defined in include/asm/hw_breakpoint.h. - * Available values vary according to the architecture. On i386 the - * possibilities are: - * - * HW_BREAKPOINT_LEN_1 - * HW_BREAKPOINT_LEN_2 - * HW_BREAKPOINT_LEN_4 - * HW_BREAKPOINT_RW - * HW_BREAKPOINT_READ - * - * On other architectures HW_BREAKPOINT_LEN_8 may be available, and the - * 1-, 2-, and 4-byte lengths may be unavailable. There also may be - * HW_BREAKPOINT_WRITE. You can use #ifdef to check at compile time. - */ - -extern int register_user_hw_breakpoint(struct task_struct *tsk, - struct hw_breakpoint *bp); -extern int modify_user_hw_breakpoint(struct task_struct *tsk, - struct hw_breakpoint *bp); -extern void unregister_user_hw_breakpoint(struct task_struct *tsk, - struct hw_breakpoint *bp); -/* - * Kernel breakpoints are not associated with any particular thread. - */ -extern int register_kernel_hw_breakpoint(struct hw_breakpoint *bp); -extern void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp); - -extern unsigned int hbp_kernel_pos; - -#endif /* __KERNEL__ */ -#endif /* _ASM_GENERIC_HW_BREAKPOINT_H */ diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h new file mode 100644 index 000000000000..61ccc8f17eac --- /dev/null +++ b/include/linux/hw_breakpoint.h @@ -0,0 +1,136 @@ +#ifndef _LINUX_HW_BREAKPOINT_H +#define _LINUX_HW_BREAKPOINT_H + + +#ifdef __KERNEL__ +#include +#include +#include + +/** + * struct hw_breakpoint - unified kernel/user-space hardware breakpoint + * @triggered: callback invoked after target address access + * @info: arch-specific breakpoint info (address, length, and type) + * + * %hw_breakpoint structures are the kernel's way of representing + * hardware breakpoints. These are data breakpoints + * (also known as "watchpoints", triggered on data access), and the breakpoint's + * target address can be located in either kernel space or user space. + * + * The breakpoint's address, length, and type are highly + * architecture-specific. The values are encoded in the @info field; you + * specify them when registering the breakpoint. To examine the encoded + * values use hw_breakpoint_get_{kaddress,uaddress,len,type}(), declared + * below. + * + * The address is specified as a regular kernel pointer (for kernel-space + * breakponts) or as an %__user pointer (for user-space breakpoints). + * With register_user_hw_breakpoint(), the address must refer to a + * location in user space. The breakpoint will be active only while the + * requested task is running. Conversely with + * register_kernel_hw_breakpoint(), the address must refer to a location + * in kernel space, and the breakpoint will be active on all CPUs + * regardless of the current task. + * + * The length is the breakpoint's extent in bytes, which is subject to + * certain limitations. include/asm/hw_breakpoint.h contains macros + * defining the available lengths for a specific architecture. Note that + * the address's alignment must match the length. The breakpoint will + * catch accesses to any byte in the range from address to address + + * (length - 1). + * + * The breakpoint's type indicates the sort of access that will cause it + * to trigger. Possible values may include: + * + * %HW_BREAKPOINT_RW (triggered on read or write access), + * %HW_BREAKPOINT_WRITE (triggered on write access), and + * %HW_BREAKPOINT_READ (triggered on read access). + * + * Appropriate macros are defined in include/asm/hw_breakpoint.h; not all + * possibilities are available on all architectures. Execute breakpoints + * must have length equal to the special value %HW_BREAKPOINT_LEN_EXECUTE. + * + * When a breakpoint gets hit, the @triggered callback is + * invoked in_interrupt with a pointer to the %hw_breakpoint structure and the + * processor registers. + * Data breakpoints occur after the memory access has taken place. + * Breakpoints are disabled during execution @triggered, to avoid + * recursive traps and allow unhindered access to breakpointed memory. + * + * This sample code sets a breakpoint on pid_max and registers a callback + * function for writes to that variable. Note that it is not portable + * as written, because not all architectures support HW_BREAKPOINT_LEN_4. + * + * ---------------------------------------------------------------------- + * + * #include + * + * struct hw_breakpoint my_bp; + * + * static void my_triggered(struct hw_breakpoint *bp, struct pt_regs *regs) + * { + * printk(KERN_DEBUG "Inside triggered routine of breakpoint exception\n"); + * dump_stack(); + * ............... + * } + * + * static struct hw_breakpoint my_bp; + * + * static int init_module(void) + * { + * ...................... + * my_bp.info.type = HW_BREAKPOINT_WRITE; + * my_bp.info.len = HW_BREAKPOINT_LEN_4; + * + * my_bp.installed = (void *)my_bp_installed; + * + * rc = register_kernel_hw_breakpoint(&my_bp); + * ...................... + * } + * + * static void cleanup_module(void) + * { + * ...................... + * unregister_kernel_hw_breakpoint(&my_bp); + * ...................... + * } + * + * ---------------------------------------------------------------------- + */ +struct hw_breakpoint { + void (*triggered)(struct hw_breakpoint *, struct pt_regs *); + struct arch_hw_breakpoint info; +}; + +/* + * len and type values are defined in include/asm/hw_breakpoint.h. + * Available values vary according to the architecture. On i386 the + * possibilities are: + * + * HW_BREAKPOINT_LEN_1 + * HW_BREAKPOINT_LEN_2 + * HW_BREAKPOINT_LEN_4 + * HW_BREAKPOINT_RW + * HW_BREAKPOINT_READ + * + * On other architectures HW_BREAKPOINT_LEN_8 may be available, and the + * 1-, 2-, and 4-byte lengths may be unavailable. There also may be + * HW_BREAKPOINT_WRITE. You can use #ifdef to check at compile time. + */ + +extern int register_user_hw_breakpoint(struct task_struct *tsk, + struct hw_breakpoint *bp); +extern int modify_user_hw_breakpoint(struct task_struct *tsk, + struct hw_breakpoint *bp); +extern void unregister_user_hw_breakpoint(struct task_struct *tsk, + struct hw_breakpoint *bp); +/* + * Kernel breakpoints are not associated with any particular thread. + */ +extern int register_kernel_hw_breakpoint(struct hw_breakpoint *bp); +extern void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp); + +extern unsigned int hbp_kernel_pos; + +#endif /* __KERNEL__ */ +#endif /* _LINUX_HW_BREAKPOINT_H */ -- cgit v1.2.2 From 444a2a3bcd6d5bed5c823136f68fcc93c0fe283f Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 6 Nov 2009 04:13:05 +0100 Subject: tracing, perf_events: Protect the buffer from recursion in perf While tracing using events with perf, if one enables the lockdep:lock_acquire event, it will infect every other perf trace events. Basically, you can enable whatever set of trace events through perf but if this event is part of the set, the only result we can get is a long list of lock_acquire events of rcu read lock, and only that. This is because of a recursion inside perf. 1) When a trace event is triggered, it will fill a per cpu buffer and submit it to perf. 2) Perf will commit this event but will also protect some data using rcu_read_lock 3) A recursion appears: rcu_read_lock triggers a lock_acquire event that will fill the per cpu event and then submit the buffer to perf. 4) Perf detects a recursion and ignores it 5) Perf continues its work on the previous event, but its buffer has been overwritten by the lock_acquire event, it has then been turned into a lock_acquire event of rcu read lock Such scenario also happens with lock_release with rcu_read_unlock(). We could turn the rcu_read_lock() into __rcu_read_lock() to drop the lock debugging from perf fast path, but that would make us lose the rcu debugging and that doesn't prevent from other possible kind of recursion from perf in the future. This patch adds a recursion protection based on a counter on the perf trace per cpu buffers to solve the problem. -v2: Fixed lost whitespace, added reviewed-by tag Signed-off-by: Frederic Weisbecker Reviewed-by: Masami Hiramatsu Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Paul Mackerras Cc: Steven Rostedt Cc: Li Zefan Cc: Jason Baron LKML-Reference: <1257477185-7838-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- include/linux/ftrace_event.h | 9 +++++++-- include/trace/ftrace.h | 39 ++++++++++++++++++++++++++++++--------- 2 files changed, 37 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index f7b47c336703..43360c1d8f70 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -137,8 +137,13 @@ struct ftrace_event_call { #define FTRACE_MAX_PROFILE_SIZE 2048 -extern char *trace_profile_buf; -extern char *trace_profile_buf_nmi; +struct perf_trace_buf { + char buf[FTRACE_MAX_PROFILE_SIZE]; + int recursion; +}; + +extern struct perf_trace_buf *perf_trace_buf; +extern struct perf_trace_buf *perf_trace_buf_nmi; #define MAX_FILTER_PRED 32 #define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index a7f946094128..4945d1c99864 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -649,6 +649,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ * struct ftrace_event_call *event_call = &event_; * extern void perf_tp_event(int, u64, u64, void *, int); * struct ftrace_raw_##call *entry; + * struct perf_trace_buf *trace_buf; * u64 __addr = 0, __count = 1; * unsigned long irq_flags; * struct trace_entry *ent; @@ -673,14 +674,25 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ * __cpu = smp_processor_id(); * * if (in_nmi()) - * raw_data = rcu_dereference(trace_profile_buf_nmi); + * trace_buf = rcu_dereference(perf_trace_buf_nmi); * else - * raw_data = rcu_dereference(trace_profile_buf); + * trace_buf = rcu_dereference(perf_trace_buf); * - * if (!raw_data) + * if (!trace_buf) * goto end; * - * raw_data = per_cpu_ptr(raw_data, __cpu); + * trace_buf = per_cpu_ptr(trace_buf, __cpu); + * + * // Avoid recursion from perf that could mess up the buffer + * if (trace_buf->recursion++) + * goto end_recursion; + * + * raw_data = trace_buf->buf; + * + * // Make recursion update visible before entering perf_tp_event + * // so that we protect from perf recursions. + * + * barrier(); * * //zero dead bytes from alignment to avoid stack leak to userspace: * *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; @@ -713,8 +725,9 @@ static void ftrace_profile_##call(proto) \ { \ struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ struct ftrace_event_call *event_call = &event_##call; \ - extern void perf_tp_event(int, u64, u64, void *, int); \ + extern void perf_tp_event(int, u64, u64, void *, int); \ struct ftrace_raw_##call *entry; \ + struct perf_trace_buf *trace_buf; \ u64 __addr = 0, __count = 1; \ unsigned long irq_flags; \ struct trace_entry *ent; \ @@ -739,14 +752,20 @@ static void ftrace_profile_##call(proto) \ __cpu = smp_processor_id(); \ \ if (in_nmi()) \ - raw_data = rcu_dereference(trace_profile_buf_nmi); \ + trace_buf = rcu_dereference(perf_trace_buf_nmi); \ else \ - raw_data = rcu_dereference(trace_profile_buf); \ + trace_buf = rcu_dereference(perf_trace_buf); \ \ - if (!raw_data) \ + if (!trace_buf) \ goto end; \ \ - raw_data = per_cpu_ptr(raw_data, __cpu); \ + trace_buf = per_cpu_ptr(trace_buf, __cpu); \ + if (trace_buf->recursion++) \ + goto end_recursion; \ + \ + barrier(); \ + \ + raw_data = trace_buf->buf; \ \ *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \ entry = (struct ftrace_raw_##call *)raw_data; \ @@ -761,6 +780,8 @@ static void ftrace_profile_##call(proto) \ perf_tp_event(event_call->id, __addr, __count, entry, \ __entry_size); \ \ +end_recursion: \ + trace_buf->recursion--; \ end: \ local_irq_restore(irq_flags); \ \ -- cgit v1.2.2 From 24f1e32c60c45c89a997c73395b69c8af6f0a84e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 9 Sep 2009 19:22:48 +0200 Subject: hw-breakpoints: Rewrite the hw-breakpoints layer on top of perf events This patch rebase the implementation of the breakpoints API on top of perf events instances. Each breakpoints are now perf events that handle the register scheduling, thread/cpu attachment, etc.. The new layering is now made as follows: ptrace kgdb ftrace perf syscall \ | / / \ | / / / Core breakpoint API / / | / | / Breakpoints perf events | | Breakpoints PMU ---- Debug Register constraints handling (Part of core breakpoint API) | | Hardware debug registers Reasons of this rewrite: - Use the centralized/optimized pmu registers scheduling, implying an easier arch integration - More powerful register handling: perf attributes (pinned/flexible events, exclusive/non-exclusive, tunable period, etc...) Impact: - New perf ABI: the hardware breakpoints counters - Ptrace breakpoints setting remains tricky and still needs some per thread breakpoints references. Todo (in the order): - Support breakpoints perf counter events for perf tools (ie: implement perf_bpcounter_event()) - Support from perf tools Changes in v2: - Follow the perf "event " rename - The ptrace regression have been fixed (ptrace breakpoint perf events weren't released when a task ended) - Drop the struct hw_breakpoint and store generic fields in perf_event_attr. - Separate core and arch specific headers, drop asm-generic/hw_breakpoint.h and create linux/hw_breakpoint.h - Use new generic len/type for breakpoint - Handle off case: when breakpoints api is not supported by an arch Changes in v3: - Fix broken CONFIG_KVM, we need to propagate the breakpoint api changes to kvm when we exit the guest and restore the bp registers to the host. Changes in v4: - Drop the hw_breakpoint_restore() stub as it is only used by KVM - EXPORT_SYMBOL_GPL hw_breakpoint_restore() as KVM can be built as a module - Restore the breakpoints unconditionally on kvm guest exit: TIF_DEBUG_THREAD doesn't anymore cover every cases of running breakpoints and vcpu->arch.switch_db_regs might not always be set when the guest used debug registers. (Waiting for a reliable optimization) Changes in v5: - Split-up the asm-generic/hw-breakpoint.h moving to linux/hw_breakpoint.h into a separate patch - Optimize the breakpoints restoring while switching from kvm guest to host. We only want to restore the state if we have active breakpoints to the host, otherwise we don't care about messed-up address registers. - Add asm/hw_breakpoint.h to Kbuild - Fix bad breakpoint type in trace_selftest.c Changes in v6: - Fix wrong header inclusion in trace.h (triggered a build error with CONFIG_FTRACE_SELFTEST Signed-off-by: Frederic Weisbecker Cc: Prasad Cc: Alan Stern Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Ingo Molnar Cc: Jan Kiszka Cc: Jiri Slaby Cc: Li Zefan Cc: Avi Kivity Cc: Paul Mackerras Cc: Mike Galbraith Cc: Masami Hiramatsu Cc: Paul Mundt --- include/linux/hw_breakpoint.h | 243 +++++++++++++++++++++--------------------- include/linux/perf_event.h | 26 ++++- 2 files changed, 144 insertions(+), 125 deletions(-) (limited to 'include') diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index 61ccc8f17eac..7eba9b92e5f3 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -1,136 +1,131 @@ #ifndef _LINUX_HW_BREAKPOINT_H #define _LINUX_HW_BREAKPOINT_H +#include -#ifdef __KERNEL__ -#include -#include -#include - -/** - * struct hw_breakpoint - unified kernel/user-space hardware breakpoint - * @triggered: callback invoked after target address access - * @info: arch-specific breakpoint info (address, length, and type) - * - * %hw_breakpoint structures are the kernel's way of representing - * hardware breakpoints. These are data breakpoints - * (also known as "watchpoints", triggered on data access), and the breakpoint's - * target address can be located in either kernel space or user space. - * - * The breakpoint's address, length, and type are highly - * architecture-specific. The values are encoded in the @info field; you - * specify them when registering the breakpoint. To examine the encoded - * values use hw_breakpoint_get_{kaddress,uaddress,len,type}(), declared - * below. - * - * The address is specified as a regular kernel pointer (for kernel-space - * breakponts) or as an %__user pointer (for user-space breakpoints). - * With register_user_hw_breakpoint(), the address must refer to a - * location in user space. The breakpoint will be active only while the - * requested task is running. Conversely with - * register_kernel_hw_breakpoint(), the address must refer to a location - * in kernel space, and the breakpoint will be active on all CPUs - * regardless of the current task. - * - * The length is the breakpoint's extent in bytes, which is subject to - * certain limitations. include/asm/hw_breakpoint.h contains macros - * defining the available lengths for a specific architecture. Note that - * the address's alignment must match the length. The breakpoint will - * catch accesses to any byte in the range from address to address + - * (length - 1). - * - * The breakpoint's type indicates the sort of access that will cause it - * to trigger. Possible values may include: - * - * %HW_BREAKPOINT_RW (triggered on read or write access), - * %HW_BREAKPOINT_WRITE (triggered on write access), and - * %HW_BREAKPOINT_READ (triggered on read access). - * - * Appropriate macros are defined in include/asm/hw_breakpoint.h; not all - * possibilities are available on all architectures. Execute breakpoints - * must have length equal to the special value %HW_BREAKPOINT_LEN_EXECUTE. - * - * When a breakpoint gets hit, the @triggered callback is - * invoked in_interrupt with a pointer to the %hw_breakpoint structure and the - * processor registers. - * Data breakpoints occur after the memory access has taken place. - * Breakpoints are disabled during execution @triggered, to avoid - * recursive traps and allow unhindered access to breakpointed memory. - * - * This sample code sets a breakpoint on pid_max and registers a callback - * function for writes to that variable. Note that it is not portable - * as written, because not all architectures support HW_BREAKPOINT_LEN_4. - * - * ---------------------------------------------------------------------- - * - * #include - * - * struct hw_breakpoint my_bp; - * - * static void my_triggered(struct hw_breakpoint *bp, struct pt_regs *regs) - * { - * printk(KERN_DEBUG "Inside triggered routine of breakpoint exception\n"); - * dump_stack(); - * ............... - * } - * - * static struct hw_breakpoint my_bp; - * - * static int init_module(void) - * { - * ...................... - * my_bp.info.type = HW_BREAKPOINT_WRITE; - * my_bp.info.len = HW_BREAKPOINT_LEN_4; - * - * my_bp.installed = (void *)my_bp_installed; - * - * rc = register_kernel_hw_breakpoint(&my_bp); - * ...................... - * } - * - * static void cleanup_module(void) - * { - * ...................... - * unregister_kernel_hw_breakpoint(&my_bp); - * ...................... - * } - * - * ---------------------------------------------------------------------- - */ -struct hw_breakpoint { - void (*triggered)(struct hw_breakpoint *, struct pt_regs *); - struct arch_hw_breakpoint info; +enum { + HW_BREAKPOINT_LEN_1 = 1, + HW_BREAKPOINT_LEN_2 = 2, + HW_BREAKPOINT_LEN_4 = 4, + HW_BREAKPOINT_LEN_8 = 8, }; -/* - * len and type values are defined in include/asm/hw_breakpoint.h. - * Available values vary according to the architecture. On i386 the - * possibilities are: - * - * HW_BREAKPOINT_LEN_1 - * HW_BREAKPOINT_LEN_2 - * HW_BREAKPOINT_LEN_4 - * HW_BREAKPOINT_RW - * HW_BREAKPOINT_READ - * - * On other architectures HW_BREAKPOINT_LEN_8 may be available, and the - * 1-, 2-, and 4-byte lengths may be unavailable. There also may be - * HW_BREAKPOINT_WRITE. You can use #ifdef to check at compile time. - */ +enum { + HW_BREAKPOINT_R = 1, + HW_BREAKPOINT_W = 2, + HW_BREAKPOINT_X = 4, +}; + +static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp) +{ + return &bp->hw.info; +} + +static inline unsigned long hw_breakpoint_addr(struct perf_event *bp) +{ + return bp->attr.bp_addr; +} + +static inline int hw_breakpoint_type(struct perf_event *bp) +{ + return bp->attr.bp_type; +} + +static inline int hw_breakpoint_len(struct perf_event *bp) +{ + return bp->attr.bp_len; +} + +#ifdef CONFIG_HAVE_HW_BREAKPOINT +extern struct perf_event * +register_user_hw_breakpoint(unsigned long addr, + int len, + int type, + perf_callback_t triggered, + struct task_struct *tsk, + bool active); + +/* FIXME: only change from the attr, and don't unregister */ +extern struct perf_event * +modify_user_hw_breakpoint(struct perf_event *bp, + unsigned long addr, + int len, + int type, + perf_callback_t triggered, + struct task_struct *tsk, + bool active); -extern int register_user_hw_breakpoint(struct task_struct *tsk, - struct hw_breakpoint *bp); -extern int modify_user_hw_breakpoint(struct task_struct *tsk, - struct hw_breakpoint *bp); -extern void unregister_user_hw_breakpoint(struct task_struct *tsk, - struct hw_breakpoint *bp); /* * Kernel breakpoints are not associated with any particular thread. */ -extern int register_kernel_hw_breakpoint(struct hw_breakpoint *bp); -extern void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp); +extern struct perf_event * +register_wide_hw_breakpoint_cpu(unsigned long addr, + int len, + int type, + perf_callback_t triggered, + int cpu, + bool active); + +extern struct perf_event ** +register_wide_hw_breakpoint(unsigned long addr, + int len, + int type, + perf_callback_t triggered, + bool active); + +extern int register_perf_hw_breakpoint(struct perf_event *bp); +extern int __register_perf_hw_breakpoint(struct perf_event *bp); +extern void unregister_hw_breakpoint(struct perf_event *bp); +extern void unregister_wide_hw_breakpoint(struct perf_event **cpu_events); + +extern int reserve_bp_slot(struct perf_event *bp); +extern void release_bp_slot(struct perf_event *bp); + +extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk); + +#else /* !CONFIG_HAVE_HW_BREAKPOINT */ + +static inline struct perf_event * +register_user_hw_breakpoint(unsigned long addr, + int len, + int type, + perf_callback_t triggered, + struct task_struct *tsk, + bool active) { return NULL; } +static inline struct perf_event * +modify_user_hw_breakpoint(struct perf_event *bp, + unsigned long addr, + int len, + int type, + perf_callback_t triggered, + struct task_struct *tsk, + bool active) { return NULL; } +static inline struct perf_event * +register_wide_hw_breakpoint_cpu(unsigned long addr, + int len, + int type, + perf_callback_t triggered, + int cpu, + bool active) { return NULL; } +static inline struct perf_event ** +register_wide_hw_breakpoint(unsigned long addr, + int len, + int type, + perf_callback_t triggered, + bool active) { return NULL; } +static inline int +register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; } +static inline int +__register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; } +static inline void unregister_hw_breakpoint(struct perf_event *bp) { } +static inline void +unregister_wide_hw_breakpoint(struct perf_event **cpu_events) { } +static inline int +reserve_bp_slot(struct perf_event *bp) {return -ENOSYS; } +static inline void release_bp_slot(struct perf_event *bp) { } + +static inline void flush_ptrace_hw_breakpoint(struct task_struct *tsk) { } -extern unsigned int hbp_kernel_pos; +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ -#endif /* __KERNEL__ */ -#endif /* _LINUX_HW_BREAKPOINT_H */ +#endif /* _LINUX_HW_BREAKPOINT_H */ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 8d54e6d25eeb..cead64ea6c15 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -18,6 +18,10 @@ #include #include +#ifdef CONFIG_HAVE_HW_BREAKPOINT +#include +#endif + /* * User-space ABI bits: */ @@ -31,6 +35,7 @@ enum perf_type_id { PERF_TYPE_TRACEPOINT = 2, PERF_TYPE_HW_CACHE = 3, PERF_TYPE_RAW = 4, + PERF_TYPE_BREAKPOINT = 5, PERF_TYPE_MAX, /* non-ABI */ }; @@ -207,6 +212,15 @@ struct perf_event_attr { __u32 wakeup_events; /* wakeup every n events */ __u32 wakeup_watermark; /* bytes before wakeup */ }; + + union { + struct { /* Hardware breakpoint info */ + __u64 bp_addr; + __u32 bp_type; + __u32 bp_len; + }; + }; + __u32 __reserved_2; __u64 __reserved_3; @@ -476,6 +490,11 @@ struct hw_perf_event { atomic64_t count; struct hrtimer hrtimer; }; +#ifdef CONFIG_HAVE_HW_BREAKPOINT + union { /* breakpoint */ + struct arch_hw_breakpoint info; + }; +#endif }; atomic64_t prev_count; u64 sample_period; @@ -588,7 +607,7 @@ struct perf_event { u64 tstamp_running; u64 tstamp_stopped; - struct perf_event_attr attr; + struct perf_event_attr attr; struct hw_perf_event hw; struct perf_event_context *ctx; @@ -643,6 +662,8 @@ struct perf_event { perf_callback_t callback; + perf_callback_t event_callback; + #endif /* CONFIG_PERF_EVENTS */ }; @@ -831,6 +852,7 @@ extern int sysctl_perf_event_sample_rate; extern void perf_event_init(void); extern void perf_tp_event(int event_id, u64 addr, u64 count, void *record, int entry_size); +extern void perf_bp_event(struct perf_event *event, void *data); #ifndef perf_misc_flags #define perf_misc_flags(regs) (user_mode(regs) ? PERF_RECORD_MISC_USER : \ @@ -865,6 +887,8 @@ static inline int perf_event_task_enable(void) { return -EINVAL; } static inline void perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr) { } +static inline void +perf_bp_event(struct perf_event *event, void *data) { } static inline void perf_event_mmap(struct vm_area_struct *vma) { } static inline void perf_event_comm(struct task_struct *tsk) { } -- cgit v1.2.2 From 91284224da5b15ec6c2b45e10fa5eccd1c92a204 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 18 Oct 2009 23:32:33 +0200 Subject: pcmcia: add new CIS access helpers As a replacement to pcmcia_get_{first,next}_tuple() and pcmcia_get_tuple_data(), three new -- and easier to use -- functions are added: - pcmcia_get_tuple() to get the very first CIS entry of one type. - pcmcia_loop_tuple() to loop over all CIS entries of one type. - pcmcia_get_mac_from_cis() to read out the hardware MAC address from CISTPL_FUNCE. Only a handful of drivers need these functions anyway, as most CIS access is already handled by pcmcia_loop_config(), which now shares the same backed (pccard_loop_tuple()) with pcmcia_loop_tuple(). A pcmcia_get_mac_from_cis() bug noted by Komuro has been fixed in this revision. Signed-off-by: Dominik Brodowski --- include/pcmcia/ds.h | 57 ++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 43 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h index a2be80b9a095..2eb6e24d1a6b 100644 --- a/include/pcmcia/ds.h +++ b/include/pcmcia/ds.h @@ -34,6 +34,7 @@ struct pcmcia_socket; struct pcmcia_device; struct config_t; +struct net_device; /* dynamic device IDs for PCMCIA device drivers. See * Documentation/pcmcia/driver.txt for details. @@ -176,26 +177,39 @@ const char *pcmcia_error_ret(int ret); pcmcia_error_ret(ret)); \ } -/* CIS access. - * Use the pcmcia_* versions in PCMCIA drivers + +/* + * CIS access. + * + * Please use the following functions to access CIS tuples: + * - pcmcia_get_tuple() + * - pcmcia_loop_tuple() + * - pcmcia_get_mac_from_cis() + * + * To parse a tuple_t, pcmcia_parse_tuple() exists. Its interface + * might change in future. */ -int pcmcia_parse_tuple(tuple_t *tuple, cisparse_t *parse); -int pccard_get_first_tuple(struct pcmcia_socket *s, unsigned int function, - tuple_t *tuple); -#define pcmcia_get_first_tuple(p_dev, tuple) \ - pccard_get_first_tuple(p_dev->socket, p_dev->func, tuple) +/* get the very first CIS entry of type @code. Note that buf is pointer + * to u8 *buf; and that you need to kfree(buf) afterwards. */ +size_t pcmcia_get_tuple(struct pcmcia_device *p_dev, cisdata_t code, + u8 **buf); -int pccard_get_next_tuple(struct pcmcia_socket *s, unsigned int function, - tuple_t *tuple); -#define pcmcia_get_next_tuple(p_dev, tuple) \ - pccard_get_next_tuple(p_dev->socket, p_dev->func, tuple) +/* loop over CIS entries */ +int pcmcia_loop_tuple(struct pcmcia_device *p_dev, cisdata_t code, + int (*loop_tuple) (struct pcmcia_device *p_dev, + tuple_t *tuple, + void *priv_data), + void *priv_data); -int pccard_get_tuple_data(struct pcmcia_socket *s, tuple_t *tuple); -#define pcmcia_get_tuple_data(p_dev, tuple) \ - pccard_get_tuple_data(p_dev->socket, tuple) +/* get the MAC address from CISTPL_FUNCE */ +int pcmcia_get_mac_from_cis(struct pcmcia_device *p_dev, + struct net_device *dev); +/* parse a tuple_t */ +int pcmcia_parse_tuple(tuple_t *tuple, cisparse_t *parse); + /* loop CIS entries for valid configuration */ int pcmcia_loop_config(struct pcmcia_device *p_dev, int (*conf_check) (struct pcmcia_device *p_dev, @@ -215,6 +229,21 @@ int pcmcia_reset_card(struct pcmcia_socket *skt); int pcmcia_access_configuration_register(struct pcmcia_device *p_dev, conf_reg_t *reg); +/* deprecated -- do not use in drivers. */ +int pccard_get_first_tuple(struct pcmcia_socket *s, unsigned int function, + tuple_t *tuple); +#define pcmcia_get_first_tuple(p_dev, tuple) \ + pccard_get_first_tuple(p_dev->socket, p_dev->func, tuple) + +int pccard_get_next_tuple(struct pcmcia_socket *s, unsigned int function, + tuple_t *tuple); +#define pcmcia_get_next_tuple(p_dev, tuple) \ + pccard_get_next_tuple(p_dev->socket, p_dev->func, tuple) + +int pccard_get_tuple_data(struct pcmcia_socket *s, tuple_t *tuple); +#define pcmcia_get_tuple_data(p_dev, tuple) \ + pccard_get_tuple_data(p_dev->socket, tuple) + /* device configuration */ int pcmcia_request_io(struct pcmcia_device *p_dev, io_req_t *req); int pcmcia_request_irq(struct pcmcia_device *p_dev, irq_req_t *req); -- cgit v1.2.2 From 18a7a19b37838789452e0bd2855a51475628b971 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Mon, 19 Oct 2009 00:07:39 +0200 Subject: pcmcia: remove pcmcia_get_{first,next}_tuple() Remove the pcmcia_get_{first,next}_tuple() calls no longer needed by (current) pcmcia device drivers. Signed-off-by: Dominik Brodowski --- include/pcmcia/ds.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'include') diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h index 2eb6e24d1a6b..6c37d4ed7832 100644 --- a/include/pcmcia/ds.h +++ b/include/pcmcia/ds.h @@ -229,21 +229,6 @@ int pcmcia_reset_card(struct pcmcia_socket *skt); int pcmcia_access_configuration_register(struct pcmcia_device *p_dev, conf_reg_t *reg); -/* deprecated -- do not use in drivers. */ -int pccard_get_first_tuple(struct pcmcia_socket *s, unsigned int function, - tuple_t *tuple); -#define pcmcia_get_first_tuple(p_dev, tuple) \ - pccard_get_first_tuple(p_dev->socket, p_dev->func, tuple) - -int pccard_get_next_tuple(struct pcmcia_socket *s, unsigned int function, - tuple_t *tuple); -#define pcmcia_get_next_tuple(p_dev, tuple) \ - pccard_get_next_tuple(p_dev->socket, p_dev->func, tuple) - -int pccard_get_tuple_data(struct pcmcia_socket *s, tuple_t *tuple); -#define pcmcia_get_tuple_data(p_dev, tuple) \ - pccard_get_tuple_data(p_dev->socket, tuple) - /* device configuration */ int pcmcia_request_io(struct pcmcia_device *p_dev, io_req_t *req); int pcmcia_request_irq(struct pcmcia_device *p_dev, irq_req_t *req); -- cgit v1.2.2 From 9cb495bb41f07a3ebfc60d3b9d26017a1fd7050c Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sat, 24 Oct 2009 15:57:22 +0200 Subject: pcmcia: remove now-defunct cs_error, pcmcia_error_{func,ret} As all in-tree drivers have been converted to not use cs_error() any more, drop these functions and definitions, and update the Documentation. Signed-off-by: Dominik Brodowski --- include/pcmcia/ds.h | 36 ------------------------------------ 1 file changed, 36 deletions(-) (limited to 'include') diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h index 6c37d4ed7832..d82392de4e92 100644 --- a/include/pcmcia/ds.h +++ b/include/pcmcia/ds.h @@ -142,42 +142,6 @@ struct pcmcia_device { #define handle_to_dev(handle) (handle->dev) -/* (deprecated) error reporting by PCMCIA devices. Use dev_printk() - * or dev_dbg() directly in the driver, without referring to pcmcia_error_func() - * and/or pcmcia_error_ret() for those functions will go away soon. - */ -enum service { - AccessConfigurationRegister, AddSocketServices, - AdjustResourceInfo, CheckEraseQueue, CloseMemory, CopyMemory, - DeregisterClient, DeregisterEraseQueue, GetCardServicesInfo, - GetClientInfo, GetConfigurationInfo, GetEventMask, - GetFirstClient, GetFirstPartion, GetFirstRegion, GetFirstTuple, - GetNextClient, GetNextPartition, GetNextRegion, GetNextTuple, - GetStatus, GetTupleData, MapLogSocket, MapLogWindow, MapMemPage, - MapPhySocket, MapPhyWindow, ModifyConfiguration, ModifyWindow, - OpenMemory, ParseTuple, ReadMemory, RegisterClient, - RegisterEraseQueue, RegisterMTD, RegisterTimer, - ReleaseConfiguration, ReleaseExclusive, ReleaseIO, ReleaseIRQ, - ReleaseSocketMask, ReleaseWindow, ReplaceSocketServices, - RequestConfiguration, RequestExclusive, RequestIO, RequestIRQ, - RequestSocketMask, RequestWindow, ResetCard, ReturnSSEntry, - SetEventMask, SetRegion, ValidateCIS, VendorSpecific, - WriteMemory, BindDevice, BindMTD, ReportError, - SuspendCard, ResumeCard, EjectCard, InsertCard, ReplaceCIS, - GetFirstWindow, GetNextWindow, GetMemPage -}; -const char *pcmcia_error_func(int func); -const char *pcmcia_error_ret(int ret); - -#define cs_error(p_dev, func, ret) \ - { \ - dev_printk(KERN_NOTICE, &p_dev->dev, \ - "%s : %s\n", \ - pcmcia_error_func(func), \ - pcmcia_error_ret(ret)); \ - } - - /* * CIS access. * -- cgit v1.2.2 From 1689164a272a962572a1f31af715dfe462cf7910 Mon Sep 17 00:00:00 2001 From: Russell King - ARM Linux Date: Sun, 29 Mar 2009 22:43:43 +0100 Subject: PCMCIA: ss: allow PCI IRQs > 255 Signed-off-by: Russell King Signed-off-by: Dominik Brodowski --- include/pcmcia/ss.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/pcmcia/ss.h b/include/pcmcia/ss.h index d696a692d94a..753da9b087d3 100644 --- a/include/pcmcia/ss.h +++ b/include/pcmcia/ss.h @@ -172,7 +172,7 @@ struct pcmcia_socket { u_int irq_mask; u_int map_size; u_int io_offset; - u_char pci_irq; + u_int pci_irq; struct pci_dev * cb_dev; -- cgit v1.2.2 From dd8dbf2e6880e30c00b18600c962d0cb5a03c555 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Tue, 3 Nov 2009 16:35:32 +1100 Subject: security: report the module name to security_module_request For SELinux to do better filtering in userspace we send the name of the module along with the AVC denial when a program is denied module_request. Example output: type=SYSCALL msg=audit(11/03/2009 10:59:43.510:9) : arch=x86_64 syscall=write success=yes exit=2 a0=3 a1=7fc28c0d56c0 a2=2 a3=7fffca0d7440 items=0 ppid=1727 pid=1729 auid=unset uid=root gid=root euid=root suid=root fsuid=root egid=root sgid=root fsgid=root tty=(none) ses=unset comm=rpc.nfsd exe=/usr/sbin/rpc.nfsd subj=system_u:system_r:nfsd_t:s0 key=(null) type=AVC msg=audit(11/03/2009 10:59:43.510:9) : avc: denied { module_request } for pid=1729 comm=rpc.nfsd kmod="net-pf-10" scontext=system_u:system_r:nfsd_t:s0 tcontext=system_u:system_r:kernel_t:s0 tclass=system Signed-off-by: Eric Paris Signed-off-by: James Morris --- include/linux/lsm_audit.h | 18 ++++++++++-------- include/linux/security.h | 7 ++++--- 2 files changed, 14 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h index 190c37854870..f78f83d7663f 100644 --- a/include/linux/lsm_audit.h +++ b/include/linux/lsm_audit.h @@ -26,14 +26,15 @@ /* Auxiliary data to use in generating the audit record. */ struct common_audit_data { - char type; -#define LSM_AUDIT_DATA_FS 1 -#define LSM_AUDIT_DATA_NET 2 -#define LSM_AUDIT_DATA_CAP 3 -#define LSM_AUDIT_DATA_IPC 4 -#define LSM_AUDIT_DATA_TASK 5 -#define LSM_AUDIT_DATA_KEY 6 -#define LSM_AUDIT_NO_AUDIT 7 + char type; +#define LSM_AUDIT_DATA_FS 1 +#define LSM_AUDIT_DATA_NET 2 +#define LSM_AUDIT_DATA_CAP 3 +#define LSM_AUDIT_DATA_IPC 4 +#define LSM_AUDIT_DATA_TASK 5 +#define LSM_AUDIT_DATA_KEY 6 +#define LSM_AUDIT_NO_AUDIT 7 +#define LSM_AUDIT_DATA_KMOD 8 struct task_struct *tsk; union { struct { @@ -66,6 +67,7 @@ struct common_audit_data { char *key_desc; } key_struct; #endif + char *kmod_name; } u; /* this union contains LSM specific data */ union { diff --git a/include/linux/security.h b/include/linux/security.h index ed0faea60b82..466cbadbd1ef 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -706,6 +706,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @kernel_module_request: * Ability to trigger the kernel to automatically upcall to userspace for * userspace to load a kernel module with the given name. + * @kmod_name name of the module requested by the kernel * Return 0 if successful. * @task_setuid: * Check permission before setting one or more of the user identity @@ -1577,7 +1578,7 @@ struct security_operations { void (*cred_transfer)(struct cred *new, const struct cred *old); int (*kernel_act_as)(struct cred *new, u32 secid); int (*kernel_create_files_as)(struct cred *new, struct inode *inode); - int (*kernel_module_request)(void); + int (*kernel_module_request)(char *kmod_name); int (*task_setuid) (uid_t id0, uid_t id1, uid_t id2, int flags); int (*task_fix_setuid) (struct cred *new, const struct cred *old, int flags); @@ -1842,7 +1843,7 @@ void security_commit_creds(struct cred *new, const struct cred *old); void security_transfer_creds(struct cred *new, const struct cred *old); int security_kernel_act_as(struct cred *new, u32 secid); int security_kernel_create_files_as(struct cred *new, struct inode *inode); -int security_kernel_module_request(void); +int security_kernel_module_request(char *kmod_name); int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags); int security_task_fix_setuid(struct cred *new, const struct cred *old, int flags); @@ -2407,7 +2408,7 @@ static inline int security_kernel_create_files_as(struct cred *cred, return 0; } -static inline int security_kernel_module_request(void) +static inline int security_kernel_module_request(char *kmod_name) { return 0; } -- cgit v1.2.2 From 9d5ce73a64be2be8112147a3e0b551ad9cd1247b Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 10 Nov 2009 19:46:16 +0900 Subject: x86: intel-iommu: Convert detect_intel_iommu to use iommu_init hook This changes detect_intel_iommu() to set intel_iommu_init() to iommu_init hook if detect_intel_iommu() finds the IOMMU. Signed-off-by: FUJITA Tomonori Cc: chrisw@sous-sol.org Cc: dwmw2@infradead.org Cc: joerg.roedel@amd.com Cc: muli@il.ibm.com LKML-Reference: <1257849980-22640-6-git-send-email-fujita.tomonori@lab.ntt.co.jp> [ -v2: build fix for the !CONFIG_DMAR case ] Signed-off-by: Ingo Molnar --- include/linux/dmar.h | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 4a2b162c256a..5de4c9e5856d 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -208,16 +208,9 @@ struct dmar_atsr_unit { u8 include_all:1; /* include all ports */ }; -/* Intel DMAR initialization functions */ extern int intel_iommu_init(void); -#else -static inline int intel_iommu_init(void) -{ -#ifdef CONFIG_INTR_REMAP - return dmar_dev_scope_init(); -#else - return -ENODEV; -#endif -} -#endif /* !CONFIG_DMAR */ +#else /* !CONFIG_DMAR: */ +static inline int intel_iommu_init(void) { return -ENODEV; } +#endif /* CONFIG_DMAR */ + #endif /* __DMAR_H__ */ -- cgit v1.2.2 From 9f993ac3f708b661207ed7de521f245586217a68 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 10 Nov 2009 19:46:17 +0900 Subject: bootmem: Add free_bootmem_late() Add a new function for freeing bootmem after the bootmem allocator has been released and the unreserved pages given to the page allocator. This allows us to reserve bootmem and then release it if we later discover it was not needed. ( This new API will be used by the swiotlb code to recover a significant amount of RAM (64MB). ) Signed-off-by: FUJITA Tomonori Acked-by: Pekka Enberg Cc: chrisw@sous-sol.org Cc: dwmw2@infradead.org Cc: joerg.roedel@amd.com Cc: muli@il.ibm.com Cc: hannes@cmpxchg.org Cc: tj@kernel.org Cc: akpm@linux-foundation.org Cc: Linus Torvalds LKML-Reference: <1257849980-22640-7-git-send-email-fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar --- include/linux/bootmem.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index dd97fb8408a8..b10ec49ee2dd 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -53,6 +53,7 @@ extern void free_bootmem_node(pg_data_t *pgdat, unsigned long addr, unsigned long size); extern void free_bootmem(unsigned long addr, unsigned long size); +extern void free_bootmem_late(unsigned long addr, unsigned long size); /* * Flags for reserve_bootmem (also if CONFIG_HAVE_ARCH_BOOTMEM_NODE, -- cgit v1.2.2 From 5740afdb68abadc473fd5392df733558a58c1254 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 10 Nov 2009 19:46:18 +0900 Subject: swiotlb: Add swiotlb_free() function swiotlb_free() function frees all allocated memory for swiotlb. We need to initialize swiotlb before IOMMU initialization (x86 and powerpc needs to allocate memory from bootmem allocator). If IOMMU initialization is successful, we need to free swiotlb resource (don't want to waste 64MB). Signed-off-by: FUJITA Tomonori Cc: chrisw@sous-sol.org Cc: dwmw2@infradead.org Cc: joerg.roedel@amd.com Cc: muli@il.ibm.com LKML-Reference: <1257849980-22640-8-git-send-email-fujita.tomonori@lab.ntt.co.jp> [ -v2: build fix for the !CONFIG_SWIOTLB case ] Signed-off-by: Ingo Molnar --- include/linux/swiotlb.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 73b1f1cec423..59bafa690290 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -88,4 +88,10 @@ swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr); extern int swiotlb_dma_supported(struct device *hwdev, u64 mask); +#ifdef CONFIG_SWIOTLB +extern void __init swiotlb_free(void); +#else +static inline void swiotlb_free(void) { } +#endif + #endif /* __LINUX_SWIOTLB_H */ -- cgit v1.2.2 From ad32e8cb86e7894aac51c8963eaa9f36bb8a4e14 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 10 Nov 2009 19:46:19 +0900 Subject: swiotlb: Defer swiotlb init printing, export swiotlb_print_info() This enables us to avoid printing swiotlb memory info when we initialize swiotlb. After swiotlb initialization, we could find that we don't need swiotlb. This patch removes the code to print swiotlb memory info in swiotlb_init() and exports the function to do that. Signed-off-by: FUJITA Tomonori Cc: chrisw@sous-sol.org Cc: dwmw2@infradead.org Cc: joerg.roedel@amd.com Cc: muli@il.ibm.com Cc: tony.luck@intel.com Cc: benh@kernel.crashing.org LKML-Reference: <1257849980-22640-9-git-send-email-fujita.tomonori@lab.ntt.co.jp> [ -v2: merge up conflict ] Signed-off-by: Ingo Molnar --- include/linux/swiotlb.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 59bafa690290..eb9bdb4d4854 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -20,8 +20,7 @@ struct scatterlist; */ #define IO_TLB_SHIFT 11 -extern void -swiotlb_init(void); +extern void swiotlb_init(int verbose); extern void *swiotlb_alloc_coherent(struct device *hwdev, size_t size, @@ -94,4 +93,5 @@ extern void __init swiotlb_free(void); static inline void swiotlb_free(void) { } #endif +extern void swiotlb_print_info(void); #endif /* __LINUX_SWIOTLB_H */ -- cgit v1.2.2 From 67178767b936fb47a3a5e88097cff41ccbda7acb Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 13 Nov 2009 10:06:34 +0100 Subject: tracing: Rename 'lockdep' event subsystem into 'lock' Lockdep events subsystem gathers various locking related events such as a request, release, contention or acquisition of a lock. The name of this event subsystem is a bit of a misnomer since these events are not quite related to lockdep but more generally to locking, ie: these events are not reporting lock dependencies or possible deadlock scenario but pure locking events. Hence this rename. Signed-off-by: Frederic Weisbecker Acked-by: Peter Zijlstra Acked-by: Hitoshi Mitake Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Paul Mackerras Cc: Steven Rostedt Cc: Li Zefan LKML-Reference: <1258103194-843-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- include/trace/events/lock.h | 96 ++++++++++++++++++++++++++++++++++++++++++ include/trace/events/lockdep.h | 96 ------------------------------------------ 2 files changed, 96 insertions(+), 96 deletions(-) create mode 100644 include/trace/events/lock.h delete mode 100644 include/trace/events/lockdep.h (limited to 'include') diff --git a/include/trace/events/lock.h b/include/trace/events/lock.h new file mode 100644 index 000000000000..a870ba125aa8 --- /dev/null +++ b/include/trace/events/lock.h @@ -0,0 +1,96 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM lock + +#if !defined(_TRACE_LOCK_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_LOCK_H + +#include +#include + +#ifdef CONFIG_LOCKDEP + +TRACE_EVENT(lock_acquire, + + TP_PROTO(struct lockdep_map *lock, unsigned int subclass, + int trylock, int read, int check, + struct lockdep_map *next_lock, unsigned long ip), + + TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip), + + TP_STRUCT__entry( + __field(unsigned int, flags) + __string(name, lock->name) + ), + + TP_fast_assign( + __entry->flags = (trylock ? 1 : 0) | (read ? 2 : 0); + __assign_str(name, lock->name); + ), + + TP_printk("%s%s%s", (__entry->flags & 1) ? "try " : "", + (__entry->flags & 2) ? "read " : "", + __get_str(name)) +); + +TRACE_EVENT(lock_release, + + TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip), + + TP_ARGS(lock, nested, ip), + + TP_STRUCT__entry( + __string(name, lock->name) + ), + + TP_fast_assign( + __assign_str(name, lock->name); + ), + + TP_printk("%s", __get_str(name)) +); + +#ifdef CONFIG_LOCK_STAT + +TRACE_EVENT(lock_contended, + + TP_PROTO(struct lockdep_map *lock, unsigned long ip), + + TP_ARGS(lock, ip), + + TP_STRUCT__entry( + __string(name, lock->name) + ), + + TP_fast_assign( + __assign_str(name, lock->name); + ), + + TP_printk("%s", __get_str(name)) +); + +TRACE_EVENT(lock_acquired, + TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime), + + TP_ARGS(lock, ip, waittime), + + TP_STRUCT__entry( + __string(name, lock->name) + __field(unsigned long, wait_usec) + __field(unsigned long, wait_nsec_rem) + ), + TP_fast_assign( + __assign_str(name, lock->name); + __entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC); + __entry->wait_usec = (unsigned long) waittime; + ), + TP_printk("%s (%lu.%03lu us)", __get_str(name), __entry->wait_usec, + __entry->wait_nsec_rem) +); + +#endif +#endif + +#endif /* _TRACE_LOCK_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/events/lockdep.h b/include/trace/events/lockdep.h deleted file mode 100644 index bcf1d209a00d..000000000000 --- a/include/trace/events/lockdep.h +++ /dev/null @@ -1,96 +0,0 @@ -#undef TRACE_SYSTEM -#define TRACE_SYSTEM lockdep - -#if !defined(_TRACE_LOCKDEP_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_LOCKDEP_H - -#include -#include - -#ifdef CONFIG_LOCKDEP - -TRACE_EVENT(lock_acquire, - - TP_PROTO(struct lockdep_map *lock, unsigned int subclass, - int trylock, int read, int check, - struct lockdep_map *next_lock, unsigned long ip), - - TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip), - - TP_STRUCT__entry( - __field(unsigned int, flags) - __string(name, lock->name) - ), - - TP_fast_assign( - __entry->flags = (trylock ? 1 : 0) | (read ? 2 : 0); - __assign_str(name, lock->name); - ), - - TP_printk("%s%s%s", (__entry->flags & 1) ? "try " : "", - (__entry->flags & 2) ? "read " : "", - __get_str(name)) -); - -TRACE_EVENT(lock_release, - - TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip), - - TP_ARGS(lock, nested, ip), - - TP_STRUCT__entry( - __string(name, lock->name) - ), - - TP_fast_assign( - __assign_str(name, lock->name); - ), - - TP_printk("%s", __get_str(name)) -); - -#ifdef CONFIG_LOCK_STAT - -TRACE_EVENT(lock_contended, - - TP_PROTO(struct lockdep_map *lock, unsigned long ip), - - TP_ARGS(lock, ip), - - TP_STRUCT__entry( - __string(name, lock->name) - ), - - TP_fast_assign( - __assign_str(name, lock->name); - ), - - TP_printk("%s", __get_str(name)) -); - -TRACE_EVENT(lock_acquired, - TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime), - - TP_ARGS(lock, ip, waittime), - - TP_STRUCT__entry( - __string(name, lock->name) - __field(unsigned long, wait_usec) - __field(unsigned long, wait_nsec_rem) - ), - TP_fast_assign( - __assign_str(name, lock->name); - __entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC); - __entry->wait_usec = (unsigned long) waittime; - ), - TP_printk("%s (%lu.%03lu us)", __get_str(name), __entry->wait_usec, - __entry->wait_nsec_rem) -); - -#endif -#endif - -#endif /* _TRACE_LOCKDEP_H */ - -/* This part must be outside protection */ -#include -- cgit v1.2.2 From 6beb000923882f6204ea2cfcd932e568e900803f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 9 Nov 2009 15:21:34 +0000 Subject: locking: Make inlining decision Kconfig based commit 892a7c67 (locking: Allow arch-inlined spinlocks) implements the selection of which lock functions are inlined based on defines in arch/.../spinlock.h: #define __always_inline__LOCK_FUNCTION Despite of the name __always_inline__* the lock functions can be built out of line depending on config options. Also if the arch does not set some inline defines the generic code might set them; again depending on config options. This makes it unnecessary hard to figure out when and which lock functions are inlined. Aside of that it makes it way harder and messier for -rt to manipulate the lock functions. Convert the inlining decision to CONFIG switches. Each lock function is inlined depending on CONFIG_INLINE_*. The configs implement the existing dependencies. The architecture code can select ARCH_INLINE_* to signal that it wants the corresponding lock function inlined. ARCH_INLINE_* is necessary as Kconfig ignores "depends on" restrictions when a config element is selected. No functional change. Signed-off-by: Thomas Gleixner LKML-Reference: <20091109151428.504477141@linutronix.de> Acked-by: Heiko Carstens Reviewed-by: Ingo Molnar Acked-by: Peter Zijlstra --- include/linux/spinlock_api_smp.h | 75 +++++++++++++++------------------------- 1 file changed, 28 insertions(+), 47 deletions(-) (limited to 'include') diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h index 7a7e18fc2415..8264a7f459bc 100644 --- a/include/linux/spinlock_api_smp.h +++ b/include/linux/spinlock_api_smp.h @@ -60,137 +60,118 @@ void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags) void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags) __releases(lock); -/* - * We inline the unlock functions in the nondebug case: - */ -#if !defined(CONFIG_DEBUG_SPINLOCK) && !defined(CONFIG_PREEMPT) -#define __always_inline__spin_unlock -#define __always_inline__read_unlock -#define __always_inline__write_unlock -#define __always_inline__spin_unlock_irq -#define __always_inline__read_unlock_irq -#define __always_inline__write_unlock_irq -#endif - -#ifndef CONFIG_DEBUG_SPINLOCK -#ifndef CONFIG_GENERIC_LOCKBREAK - -#ifdef __always_inline__spin_lock +#ifdef CONFIG_INLINE_SPIN_LOCK #define _spin_lock(lock) __spin_lock(lock) #endif -#ifdef __always_inline__read_lock +#ifdef CONFIG_INLINE_READ_LOCK #define _read_lock(lock) __read_lock(lock) #endif -#ifdef __always_inline__write_lock +#ifdef CONFIG_INLINE_WRITE_LOCK #define _write_lock(lock) __write_lock(lock) #endif -#ifdef __always_inline__spin_lock_bh +#ifdef CONFIG_INLINE_SPIN_LOCK_BH #define _spin_lock_bh(lock) __spin_lock_bh(lock) #endif -#ifdef __always_inline__read_lock_bh +#ifdef CONFIG_INLINE_READ_LOCK_BH #define _read_lock_bh(lock) __read_lock_bh(lock) #endif -#ifdef __always_inline__write_lock_bh +#ifdef CONFIG_INLINE_WRITE_LOCK_BH #define _write_lock_bh(lock) __write_lock_bh(lock) #endif -#ifdef __always_inline__spin_lock_irq +#ifdef CONFIG_INLINE_SPIN_LOCK_IRQ #define _spin_lock_irq(lock) __spin_lock_irq(lock) #endif -#ifdef __always_inline__read_lock_irq +#ifdef CONFIG_INLINE_READ_LOCK_IRQ #define _read_lock_irq(lock) __read_lock_irq(lock) #endif -#ifdef __always_inline__write_lock_irq +#ifdef CONFIG_INLINE_WRITE_LOCK_IRQ #define _write_lock_irq(lock) __write_lock_irq(lock) #endif -#ifdef __always_inline__spin_lock_irqsave +#ifdef CONFIG_INLINE_SPIN_LOCK_IRQSAVE #define _spin_lock_irqsave(lock) __spin_lock_irqsave(lock) #endif -#ifdef __always_inline__read_lock_irqsave +#ifdef CONFIG_INLINE_READ_LOCK_IRQSAVE #define _read_lock_irqsave(lock) __read_lock_irqsave(lock) #endif -#ifdef __always_inline__write_lock_irqsave +#ifdef CONFIG_INLINE_WRITE_LOCK_IRQSAVE #define _write_lock_irqsave(lock) __write_lock_irqsave(lock) #endif -#endif /* !CONFIG_GENERIC_LOCKBREAK */ - -#ifdef __always_inline__spin_trylock +#ifdef CONFIG_INLINE_SPIN_TRYLOCK #define _spin_trylock(lock) __spin_trylock(lock) #endif -#ifdef __always_inline__read_trylock +#ifdef CONFIG_INLINE_READ_TRYLOCK #define _read_trylock(lock) __read_trylock(lock) #endif -#ifdef __always_inline__write_trylock +#ifdef CONFIG_INLINE_WRITE_TRYLOCK #define _write_trylock(lock) __write_trylock(lock) #endif -#ifdef __always_inline__spin_trylock_bh +#ifdef CONFIG_INLINE_SPIN_TRYLOCK_BH #define _spin_trylock_bh(lock) __spin_trylock_bh(lock) #endif -#ifdef __always_inline__spin_unlock +#ifdef CONFIG_INLINE_SPIN_UNLOCK #define _spin_unlock(lock) __spin_unlock(lock) #endif -#ifdef __always_inline__read_unlock +#ifdef CONFIG_INLINE_READ_UNLOCK #define _read_unlock(lock) __read_unlock(lock) #endif -#ifdef __always_inline__write_unlock +#ifdef CONFIG_INLINE_WRITE_UNLOCK #define _write_unlock(lock) __write_unlock(lock) #endif -#ifdef __always_inline__spin_unlock_bh +#ifdef CONFIG_INLINE_SPIN_UNLOCK_BH #define _spin_unlock_bh(lock) __spin_unlock_bh(lock) #endif -#ifdef __always_inline__read_unlock_bh +#ifdef CONFIG_INLINE_READ_UNLOCK_BH #define _read_unlock_bh(lock) __read_unlock_bh(lock) #endif -#ifdef __always_inline__write_unlock_bh +#ifdef CONFIG_INLINE_WRITE_UNLOCK_BH #define _write_unlock_bh(lock) __write_unlock_bh(lock) #endif -#ifdef __always_inline__spin_unlock_irq +#ifdef CONFIG_INLINE_SPIN_UNLOCK_IRQ #define _spin_unlock_irq(lock) __spin_unlock_irq(lock) #endif -#ifdef __always_inline__read_unlock_irq +#ifdef CONFIG_INLINE_READ_UNLOCK_IRQ #define _read_unlock_irq(lock) __read_unlock_irq(lock) #endif -#ifdef __always_inline__write_unlock_irq +#ifdef CONFIG_INLINE_WRITE_UNLOCK_IRQ #define _write_unlock_irq(lock) __write_unlock_irq(lock) #endif -#ifdef __always_inline__spin_unlock_irqrestore +#ifdef CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE #define _spin_unlock_irqrestore(lock, flags) __spin_unlock_irqrestore(lock, flags) #endif -#ifdef __always_inline__read_unlock_irqrestore +#ifdef CONFIG_INLINE_READ_UNLOCK_IRQRESTORE #define _read_unlock_irqrestore(lock, flags) __read_unlock_irqrestore(lock, flags) #endif -#ifdef __always_inline__write_unlock_irqrestore +#ifdef CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE #define _write_unlock_irqrestore(lock, flags) __write_unlock_irqrestore(lock, flags) #endif -#endif /* CONFIG_DEBUG_SPINLOCK */ - static inline int __spin_trylock(spinlock_t *lock) { preempt_disable(); -- cgit v1.2.2 From 687b16fb617bd446439425a368ad7c7bbd202c73 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 13 Nov 2009 13:16:15 +0100 Subject: hw-breakpoints: Provide an off-case for counter_arch_bp() If an arch doesn't support the hw breakpoints, counter_arch_bp() has no off case to cover the missing breakpoint info structure from the perf event. The result is a build error in non-x86 configs. Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker Cc: Prasad LKML-Reference: <1258114575-32655-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar Cc: Prasad --- include/linux/hw_breakpoint.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index 7eba9b92e5f3..18710e0c84bd 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -16,11 +16,6 @@ enum { HW_BREAKPOINT_X = 4, }; -static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp) -{ - return &bp->hw.info; -} - static inline unsigned long hw_breakpoint_addr(struct perf_event *bp) { return bp->attr.bp_addr; @@ -83,6 +78,11 @@ extern void release_bp_slot(struct perf_event *bp); extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk); +static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp) +{ + return &bp->hw.info; +} + #else /* !CONFIG_HAVE_HW_BREAKPOINT */ static inline struct perf_event * @@ -126,6 +126,11 @@ static inline void release_bp_slot(struct perf_event *bp) { } static inline void flush_ptrace_hw_breakpoint(struct task_struct *tsk) { } +static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp) +{ + return NULL; +} + #endif /* CONFIG_HAVE_HW_BREAKPOINT */ #endif /* _LINUX_HW_BREAKPOINT_H */ -- cgit v1.2.2 From 688bcaff291cf2fe2734e43f2793d4d05b850518 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 14 Nov 2009 01:12:47 +0100 Subject: hw-breakpoints: Fix build on !perf architectures the arch/alpha build fails with: In file included from tip/kernel/exit.c:52: tip/include/linux/hw_breakpoint.h: In function 'hw_breakpoint_addr': tip/include/linux/hw_breakpoint.h:21: error: 'struct perf_event' has no member named 'attr' [...] Move these helper inlines inside the CONFIG_HAVE_HW_BREAKPOINT ifdef. Cc: Frederic Weisbecker Cc: Prasad LKML-Reference: <1258114575-32655-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- include/linux/hw_breakpoint.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index 18710e0c84bd..0b98cbf76da7 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -16,6 +16,8 @@ enum { HW_BREAKPOINT_X = 4, }; +#ifdef CONFIG_HAVE_HW_BREAKPOINT + static inline unsigned long hw_breakpoint_addr(struct perf_event *bp) { return bp->attr.bp_addr; @@ -31,7 +33,6 @@ static inline int hw_breakpoint_len(struct perf_event *bp) return bp->attr.bp_len; } -#ifdef CONFIG_HAVE_HW_BREAKPOINT extern struct perf_event * register_user_hw_breakpoint(unsigned long addr, int len, -- cgit v1.2.2 From 811cb50baf63461ce0bdb234927046131fc7fa8b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 13 Nov 2009 23:40:09 +0100 Subject: tracing: Fix event format export For some reason the export of the event print format to userspace uses '#fmt' which breaks if the format string is anything but a plain string, for example if it is built with macros then the macro names are exported instead of their contents. Use "\"%s\"", fmt instead of "%s", #fmt to export the string and not the way it is built. For example, in net/mac80211/driver-trace.h for the trace event drv_start there is: TP_printk( LOCAL_PR_FMT, LOCAL_PR_ARG ) Which use to produce: print fmt: LOCAL_PR_FMT, REC->wiphy_name Now produces: print fmt: "%s", REC->wiphy_name Signed-off-by: Johannes Berg LKML-Reference: <20091113224009.GB23942@elte.hu> Signed-off-by: Steven Rostedt --- include/trace/ftrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index cc0d9667e182..dacb8ef67000 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -159,7 +159,7 @@ #undef __get_str #undef TP_printk -#define TP_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args) +#define TP_printk(fmt, args...) "\"%s\", %s\n", fmt, __stringify(args) #undef TP_fast_assign #define TP_fast_assign(args...) args -- cgit v1.2.2 From 4c49b12853fbb5eff4849b7b6a1e895776f027a1 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Fri, 13 Nov 2009 21:47:33 -0800 Subject: perf_event: Fix invalid type in ioctl definition u64 is invalid in userspace headers, including ioctl definitions; use __u64 instead Signed-off-by: Arjan van de Ven Cc: LKML-Reference: <20091113214733.7cd76be9@infradead.org> Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 45b56faf5cdc..ec3768a81058 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -219,7 +219,7 @@ struct perf_event_attr { #define PERF_EVENT_IOC_DISABLE _IO ('$', 1) #define PERF_EVENT_IOC_REFRESH _IO ('$', 2) #define PERF_EVENT_IOC_RESET _IO ('$', 3) -#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, u64) +#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, __u64) #define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) #define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *) -- cgit v1.2.2 From 6959450e567c1f17d3ce8489099fc56c3721d577 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Sat, 14 Nov 2009 20:46:38 +0900 Subject: swiotlb: Remove duplicate swiotlb_force extern declarations Signed-off-by: FUJITA Tomonori Cc: tony.luck@intel.com LKML-Reference: <1258199198-16657-4-git-send-email-fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar --- include/linux/swiotlb.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index eb9bdb4d4854..febedcf67c7e 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -7,6 +7,8 @@ struct device; struct dma_attrs; struct scatterlist; +extern int swiotlb_force; + /* * Maximum allowable number of contiguous slabs to map, * must be a power of 2. What is the appropriate value ? -- cgit v1.2.2 From 559fdc3c1b624edb1933a875022fe7e27934d11c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 16 Nov 2009 12:45:14 +0100 Subject: perf_event: Optimize perf_output_lock() The purpose of perf_output_{un,}lock() is to: 1) avoid publishing incomplete data [ possible when publishing a head that is ahead of an entry that is still being written ] 2) guarantee fwd progress [ a simple refcount on pending writers doesn't need to drop to 0, making it so would end up implementing something like forced quiecent states of RCU ] To satisfy the above without undue complexity it serializes between CPUs, this means that a pending writer can only be the same cpu in a nested context, and since (under normal operation) a cpu always makes progress we're good -- if the head is only published when the bottom most writer completes. Now we don't need to disable IRQs in order to serialize between CPUs, disabling preemption ought to be sufficient, esp since we already deal with nesting due to NMIs. This avoids potentially expensive (and needless) local IRQ disable/enable ops. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <1258373161.26714.254.camel@laptop> Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index df4e73e33774..7f87563c8485 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -714,7 +714,6 @@ struct perf_output_handle { int nmi; int sample; int locked; - unsigned long flags; }; #ifdef CONFIG_PERF_EVENTS -- cgit v1.2.2 From 2ea6dec4a22a6f66f6633876212fd4d195cf8277 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 17 Nov 2009 14:27:27 -0800 Subject: generic-ipi: Add smp_call_function_any() Andrew points out that acpi-cpufreq uses cpumask_any, when it really would prefer to use the same CPU if possible (to avoid an IPI). In general, this seems a good idea to offer. [ tglx: Documented selection preference and Inlined the UP case to avoid the copy of smp_call_function_single() and the extra EXPORT ] Signed-off-by: Rusty Russell Cc: Ingo Molnar Cc: Venkatesh Pallipadi Cc: Len Brown Cc: Zhao Yakui Cc: Dave Jones Cc: Thomas Gleixner Cc: Mike Galbraith Cc: "Zhang, Yanmin" Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- include/linux/smp.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/smp.h b/include/linux/smp.h index 39c64bae776d..7a0570e6a596 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -76,6 +76,9 @@ void smp_call_function_many(const struct cpumask *mask, void __smp_call_function_single(int cpuid, struct call_single_data *data, int wait); +int smp_call_function_any(const struct cpumask *mask, + void (*func)(void *info), void *info, int wait); + /* * Generic and arch helpers */ @@ -137,9 +140,15 @@ static inline void smp_send_reschedule(int cpu) { } #define smp_prepare_boot_cpu() do {} while (0) #define smp_call_function_many(mask, func, info, wait) \ (up_smp_call_function(func, info)) -static inline void init_call_single_data(void) +static inline void init_call_single_data(void) { } + +static inline int +smp_call_function_any(const struct cpumask *mask, void (*func)(void *info), + void *info, int wait) { + return smp_call_function_single(0, func, info, wait); } + #endif /* !SMP */ /* -- cgit v1.2.2 From 192dcf1d1775736627280a5dd4cb0f605b21857a Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Wed, 18 Nov 2009 13:06:55 -0800 Subject: tracing: Remove the stale include/trace/power.h Commit 6161352 moved the power tracing to include/trace/events/, but left the old header behind. No one is using the old header, and its declarations are now incorrect, so it should be removed. Signed-off-by: Josh Stone Acked-by: Arjan van de Ven Cc: Frank Ch. Eigler Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Frederic Weisbecker LKML-Reference: <1258578415-14752-1-git-send-email-jistone@redhat.com> Signed-off-by: Ingo Molnar --- include/trace/power.h | 32 -------------------------------- 1 file changed, 32 deletions(-) delete mode 100644 include/trace/power.h (limited to 'include') diff --git a/include/trace/power.h b/include/trace/power.h deleted file mode 100644 index ef204666e983..000000000000 --- a/include/trace/power.h +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef _TRACE_POWER_H -#define _TRACE_POWER_H - -#include -#include - -enum { - POWER_NONE = 0, - POWER_CSTATE = 1, - POWER_PSTATE = 2, -}; - -struct power_trace { - ktime_t stamp; - ktime_t end; - int type; - int state; -}; - -DECLARE_TRACE(power_start, - TP_PROTO(struct power_trace *it, unsigned int type, unsigned int state), - TP_ARGS(it, type, state)); - -DECLARE_TRACE(power_mark, - TP_PROTO(struct power_trace *it, unsigned int type, unsigned int state), - TP_ARGS(it, type, state)); - -DECLARE_TRACE(power_end, - TP_PROTO(struct power_trace *it), - TP_ARGS(it)); - -#endif /* _TRACE_POWER_H */ -- cgit v1.2.2 From 453f19eea7dbad837425e9b07d84568d14898794 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 20 Nov 2009 22:19:43 +0100 Subject: perf: Allow for custom overflow handlers in-kernel perf users might wish to have custom actions on the sample interrupt. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <20091120212508.222339539@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index b5cdac0de370..a430ac3074af 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -567,6 +567,8 @@ struct perf_pending_entry { typedef void (*perf_callback_t)(struct perf_event *, void *); +struct perf_sample_data; + /** * struct perf_event - performance event kernel representation: */ @@ -658,6 +660,10 @@ struct perf_event { struct pid_namespace *ns; u64 id; + void (*overflow_handler)(struct perf_event *event, + int nmi, struct perf_sample_data *data, + struct pt_regs *regs); + #ifdef CONFIG_EVENT_PROFILE struct event_filter *filter; #endif -- cgit v1.2.2 From 59ed446f792cc07d37b1536b9c4664d14e25e425 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 20 Nov 2009 22:19:55 +0100 Subject: perf: Fix event scaling for inherited counters Properly account the full hierarchy of counters for both the count (we already did so) and the scale times (new). Signed-off-by: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <20091120212509.153379276@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index a430ac3074af..36fe89f72641 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -782,7 +782,8 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, pid_t pid, perf_callback_t callback); -extern u64 perf_event_read_value(struct perf_event *event); +extern u64 perf_event_read_value(struct perf_event *event, + u64 *enabled, u64 *running); struct perf_sample_data { u64 type; -- cgit v1.2.2 From ce71b9df8893ec954e56c5979df6da274f20f65e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 22 Nov 2009 05:26:55 +0100 Subject: tracing: Use the perf recursion protection from trace event When we commit a trace to perf, we first check if we are recursing in the same buffer so that we don't mess-up the buffer with a recursing trace. But later on, we do the same check from perf to avoid commit recursion. The recursion check is desired early before we touch the buffer but we want to do this check only once. Then export the recursion protection from perf and use it from the trace events before submitting a trace. v2: Put appropriate Reported-by tag Reported-by: Peter Zijlstra Signed-off-by: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Steven Rostedt Cc: Masami Hiramatsu Cc: Jason Baron LKML-Reference: <1258864015-10579-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- include/linux/ftrace_event.h | 9 ++------- include/linux/perf_event.h | 4 ++++ include/trace/ftrace.h | 23 ++++++++++++----------- 3 files changed, 18 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 43360c1d8f70..47bbdf9c38d0 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -137,13 +137,8 @@ struct ftrace_event_call { #define FTRACE_MAX_PROFILE_SIZE 2048 -struct perf_trace_buf { - char buf[FTRACE_MAX_PROFILE_SIZE]; - int recursion; -}; - -extern struct perf_trace_buf *perf_trace_buf; -extern struct perf_trace_buf *perf_trace_buf_nmi; +extern char *perf_trace_buf; +extern char *perf_trace_buf_nmi; #define MAX_FILTER_PRED 32 #define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 36fe89f72641..74e98b1d3391 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -874,6 +874,8 @@ extern int perf_output_begin(struct perf_output_handle *handle, extern void perf_output_end(struct perf_output_handle *handle); extern void perf_output_copy(struct perf_output_handle *handle, const void *buf, unsigned int len); +extern int perf_swevent_get_recursion_context(int **recursion); +extern void perf_swevent_put_recursion_context(int *recursion); #else static inline void perf_event_task_sched_in(struct task_struct *task, int cpu) { } @@ -902,6 +904,8 @@ static inline void perf_event_mmap(struct vm_area_struct *vma) { } static inline void perf_event_comm(struct task_struct *tsk) { } static inline void perf_event_fork(struct task_struct *tsk) { } static inline void perf_event_init(void) { } +static int perf_swevent_get_recursion_context(int **recursion) { return -1; } +static void perf_swevent_put_recursion_context(int *recursion) { } #endif diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 4945d1c99864..c222ef5238bf 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -724,16 +724,19 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ static void ftrace_profile_##call(proto) \ { \ struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ + extern int perf_swevent_get_recursion_context(int **recursion); \ + extern void perf_swevent_put_recursion_context(int *recursion); \ struct ftrace_event_call *event_call = &event_##call; \ extern void perf_tp_event(int, u64, u64, void *, int); \ struct ftrace_raw_##call *entry; \ - struct perf_trace_buf *trace_buf; \ u64 __addr = 0, __count = 1; \ unsigned long irq_flags; \ struct trace_entry *ent; \ int __entry_size; \ int __data_size; \ + char *trace_buf; \ char *raw_data; \ + int *recursion; \ int __cpu; \ int pc; \ \ @@ -749,6 +752,10 @@ static void ftrace_profile_##call(proto) \ return; \ \ local_irq_save(irq_flags); \ + \ + if (perf_swevent_get_recursion_context(&recursion)) \ + goto end_recursion; \ + \ __cpu = smp_processor_id(); \ \ if (in_nmi()) \ @@ -759,13 +766,7 @@ static void ftrace_profile_##call(proto) \ if (!trace_buf) \ goto end; \ \ - trace_buf = per_cpu_ptr(trace_buf, __cpu); \ - if (trace_buf->recursion++) \ - goto end_recursion; \ - \ - barrier(); \ - \ - raw_data = trace_buf->buf; \ + raw_data = per_cpu_ptr(trace_buf, __cpu); \ \ *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \ entry = (struct ftrace_raw_##call *)raw_data; \ @@ -780,9 +781,9 @@ static void ftrace_profile_##call(proto) \ perf_tp_event(event_call->id, __addr, __count, entry, \ __entry_size); \ \ -end_recursion: \ - trace_buf->recursion--; \ -end: \ +end: \ + perf_swevent_put_recursion_context(recursion); \ +end_recursion: \ local_irq_restore(irq_flags); \ \ } -- cgit v1.2.2 From 5093ebad5f2348076fdc3dac7d2358b1ad7f85f7 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 22 Nov 2009 05:21:35 +0100 Subject: hw-breakpoints: Separate the kernel part from breakpoint headers So that we can include this header from userspace tools, like perf tools, to get the breakpoint types and len definitions. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Prasad LKML-Reference: <1258863695-10464-4-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- include/linux/hw_breakpoint.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index 0b98cbf76da7..4659e0c55ea6 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -16,6 +16,7 @@ enum { HW_BREAKPOINT_X = 4, }; +#ifdef __KERNEL__ #ifdef CONFIG_HAVE_HW_BREAKPOINT static inline unsigned long hw_breakpoint_addr(struct perf_event *bp) @@ -133,5 +134,6 @@ static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp) } #endif /* CONFIG_HAVE_HW_BREAKPOINT */ +#endif /* __KERNEL__ */ #endif /* _LINUX_HW_BREAKPOINT_H */ -- cgit v1.2.2 From 9f680ab41485edfdc96331b70afa7513aa0a7720 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 22 Nov 2009 08:53:49 -0800 Subject: rcu: Eliminate unneeded function wrapping The functions rcu_init() is a wrapper for __rcu_init(), and also sets up the CPU-hotplug notifier for rcu_barrier_cpu_hotplug(). But TINY_RCU doesn't need CPU-hotplug notification, and the rcu_barrier_cpu_hotplug() is a simple wrapper for rcu_cpu_notify(). So push rcu_init() out to kernel/rcutree.c and kernel/rcutiny.c and get rid of the wrapper function rcu_barrier_cpu_hotplug(). Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <12589088302320-git-send-email-> Signed-off-by: Ingo Molnar --- include/linux/rcutiny.h | 2 -- include/linux/rcutree.h | 3 --- 2 files changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 2c1fe8373e71..a3b6272af2dd 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -38,7 +38,6 @@ void rcu_bh_qs(int cpu); #define rcu_init_sched() do { } while (0) extern void rcu_check_callbacks(int cpu, int user); -extern void __rcu_init(void); /* * Return the number of grace periods. @@ -69,7 +68,6 @@ static inline void synchronize_rcu_bh_expedited(void) } struct notifier_block; -extern int rcu_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu); #ifdef CONFIG_NO_HZ diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 9642c6bcb399..111a65257350 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -34,8 +34,6 @@ struct notifier_block; extern void rcu_sched_qs(int cpu); extern void rcu_bh_qs(int cpu); -extern int rcu_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu); extern int rcu_needs_cpu(int cpu); extern int rcu_expedited_torture_stats(char *page); @@ -83,7 +81,6 @@ static inline void synchronize_rcu_bh_expedited(void) synchronize_sched_expedited(); } -extern void __rcu_init(void); extern void rcu_check_callbacks(int cpu, int user); extern long rcu_batches_completed(void); -- cgit v1.2.2 From 6ebb237bece23275d1da149b61a342f0d4d06a08 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 22 Nov 2009 08:53:50 -0800 Subject: rcu: Re-arrange code to reduce #ifdef pain Remove #ifdefs from kernel/rcupdate.c and include/linux/rcupdate.h by moving code to include/linux/rcutiny.h, include/linux/rcutree.h, and kernel/rcutree.c. Also remove some definitions that are no longer used. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1258908830885-git-send-email-> Signed-off-by: Ingo Molnar --- include/linux/rcupdate.h | 12 ------------ include/linux/rcutiny.h | 11 +++++++++++ include/linux/rcutree.h | 4 +++- 3 files changed, 14 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 2f1bc42a3b82..24440f4bf476 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -52,11 +52,6 @@ struct rcu_head { }; /* Exported common interfaces */ -#ifdef CONFIG_TREE_PREEMPT_RCU -extern void synchronize_rcu(void); -#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ -#define synchronize_rcu synchronize_sched -#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ extern void synchronize_rcu_bh(void); extern void synchronize_sched(void); extern void rcu_barrier(void); @@ -67,13 +62,6 @@ extern int sched_expedited_torture_stats(char *page); /* Internal to kernel */ extern void rcu_init(void); -extern void rcu_scheduler_starting(void); -#ifndef CONFIG_TINY_RCU -extern int rcu_needs_cpu(int cpu); -#else -static inline int rcu_needs_cpu(int cpu) { return 0; } -#endif -extern int rcu_scheduler_active; #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) #include diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index a3b6272af2dd..c4ba9a78721e 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -39,6 +39,11 @@ void rcu_bh_qs(int cpu); #define rcu_init_sched() do { } while (0) extern void rcu_check_callbacks(int cpu, int user); +static inline int rcu_needs_cpu(int cpu) +{ + return 0; +} + /* * Return the number of grace periods. */ @@ -57,6 +62,8 @@ static inline long rcu_batches_completed_bh(void) extern int rcu_expedited_torture_stats(char *page); +#define synchronize_rcu synchronize_sched + static inline void synchronize_rcu_expedited(void) { synchronize_sched(); @@ -86,6 +93,10 @@ static inline void rcu_exit_nohz(void) #endif /* #else #ifdef CONFIG_NO_HZ */ +static inline void rcu_scheduler_starting(void) +{ +} + static inline void exit_rcu(void) { } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 111a65257350..c93eee5911b0 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -35,12 +35,14 @@ struct notifier_block; extern void rcu_sched_qs(int cpu); extern void rcu_bh_qs(int cpu); extern int rcu_needs_cpu(int cpu); +extern void rcu_scheduler_starting(void); extern int rcu_expedited_torture_stats(char *page); #ifdef CONFIG_TREE_PREEMPT_RCU extern void __rcu_read_lock(void); extern void __rcu_read_unlock(void); +extern void synchronize_rcu(void); extern void exit_rcu(void); #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ @@ -55,7 +57,7 @@ static inline void __rcu_read_unlock(void) preempt_enable(); } -#define __synchronize_sched() synchronize_rcu() +#define synchronize_rcu synchronize_sched static inline void exit_rcu(void) { -- cgit v1.2.2 From 4ed7c92d68a5387ba5f7030dc76eab03558e27f5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 23 Nov 2009 11:37:29 +0100 Subject: perf_events: Undo some recursion damage Make perf_swevent_get_recursion_context return a context number and disable preemption. This could be used to remove the IRQ disable from the trace bit and index the per-cpu buffer with. Signed-off-by: Peter Zijlstra Cc: Frederic Weisbecker Cc: Paul Mackerras LKML-Reference: <20091123103819.993226816@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 8 ++++---- include/trace/ftrace.h | 17 +++++++++-------- 2 files changed, 13 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 74e98b1d3391..43adbd7f0010 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -874,8 +874,8 @@ extern int perf_output_begin(struct perf_output_handle *handle, extern void perf_output_end(struct perf_output_handle *handle); extern void perf_output_copy(struct perf_output_handle *handle, const void *buf, unsigned int len); -extern int perf_swevent_get_recursion_context(int **recursion); -extern void perf_swevent_put_recursion_context(int *recursion); +extern int perf_swevent_get_recursion_context(void); +extern void perf_swevent_put_recursion_context(int rctx); #else static inline void perf_event_task_sched_in(struct task_struct *task, int cpu) { } @@ -904,8 +904,8 @@ static inline void perf_event_mmap(struct vm_area_struct *vma) { } static inline void perf_event_comm(struct task_struct *tsk) { } static inline void perf_event_fork(struct task_struct *tsk) { } static inline void perf_event_init(void) { } -static int perf_swevent_get_recursion_context(int **recursion) { return -1; } -static void perf_swevent_put_recursion_context(int *recursion) { } +static inline int perf_swevent_get_recursion_context(void) { return -1; } +static inline void perf_swevent_put_recursion_context(int rctx) { } #endif diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index c222ef5238bf..c3417c13e3ed 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -724,8 +724,8 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ static void ftrace_profile_##call(proto) \ { \ struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ - extern int perf_swevent_get_recursion_context(int **recursion); \ - extern void perf_swevent_put_recursion_context(int *recursion); \ + extern int perf_swevent_get_recursion_context(void); \ + extern void perf_swevent_put_recursion_context(int rctx); \ struct ftrace_event_call *event_call = &event_##call; \ extern void perf_tp_event(int, u64, u64, void *, int); \ struct ftrace_raw_##call *entry; \ @@ -736,8 +736,8 @@ static void ftrace_profile_##call(proto) \ int __data_size; \ char *trace_buf; \ char *raw_data; \ - int *recursion; \ int __cpu; \ + int rctx; \ int pc; \ \ pc = preempt_count(); \ @@ -753,8 +753,9 @@ static void ftrace_profile_##call(proto) \ \ local_irq_save(irq_flags); \ \ - if (perf_swevent_get_recursion_context(&recursion)) \ - goto end_recursion; \ + rctx = perf_swevent_get_recursion_context(); \ + if (rctx < 0) \ + goto end_recursion; \ \ __cpu = smp_processor_id(); \ \ @@ -781,9 +782,9 @@ static void ftrace_profile_##call(proto) \ perf_tp_event(event_call->id, __addr, __count, entry, \ __entry_size); \ \ -end: \ - perf_swevent_put_recursion_context(recursion); \ -end_recursion: \ +end: \ + perf_swevent_put_recursion_context(rctx); \ +end_recursion: \ local_irq_restore(irq_flags); \ \ } -- cgit v1.2.2 From e6db4876575f3fdd5b1df2cbff826df95ab9af6a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 23 Nov 2009 15:42:32 +0100 Subject: hw-breakpoints: Include only linux/perf_event.h from kernel part of bp headers As userspace only needs the breakpoints enum types from the breakpoints headers. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Prasad LKML-Reference: <1258987355-8751-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- include/linux/hw_breakpoint.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index 4659e0c55ea6..76a48ab9a81e 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -1,8 +1,6 @@ #ifndef _LINUX_HW_BREAKPOINT_H #define _LINUX_HW_BREAKPOINT_H -#include - enum { HW_BREAKPOINT_LEN_1 = 1, HW_BREAKPOINT_LEN_2 = 2, @@ -19,6 +17,8 @@ enum { #ifdef __KERNEL__ #ifdef CONFIG_HAVE_HW_BREAKPOINT +#include + static inline unsigned long hw_breakpoint_addr(struct perf_event *bp) { return bp->attr.bp_addr; -- cgit v1.2.2 From fa7c27ee9394fc0d52404b2a89882e95868a60b9 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 23 Nov 2009 22:30:12 +0100 Subject: hw-breakpoints: Fix misordered ifdef Fix a misplaced ifdef. We need the perf event headers also in off-case to avoid the following build error: include/linux/hw_breakpoint.h:94: error: expected declaration specifiers or '...' before 'perf_callback_t' include/linux/hw_breakpoint.h:102: error: expected declaration specifiers or '...' before 'perf_callback_t' include/linux/hw_breakpoint.h:109: error: expected declaration specifiers or '...' before 'perf_callback_t' include/linux/hw_breakpoint.h:116: error: expected declaration specifiers or '...' before 'perf_callback_t' Reported-by: Kisskb-bot by Michael Ellerman Signed-off-by: Frederic Weisbecker Cc: Prasad LKML-Reference: <1259011812-8093-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- include/linux/hw_breakpoint.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index 76a48ab9a81e..c9f7f7c7b0e0 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -15,10 +15,11 @@ enum { }; #ifdef __KERNEL__ -#ifdef CONFIG_HAVE_HW_BREAKPOINT #include +#ifdef CONFIG_HAVE_HW_BREAKPOINT + static inline unsigned long hw_breakpoint_addr(struct perf_event *bp) { return bp->attr.bp_addr; -- cgit v1.2.2 From b3a222e52e4d4be77cc4520a57af1a4a0d8222d1 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Mon, 23 Nov 2009 16:21:30 -0600 Subject: remove CONFIG_SECURITY_FILE_CAPABILITIES compile option As far as I know, all distros currently ship kernels with default CONFIG_SECURITY_FILE_CAPABILITIES=y. Since having the option on leaves a 'no_file_caps' option to boot without file capabilities, the main reason to keep the option is that turning it off saves you (on my s390x partition) 5k. In particular, vmlinux sizes came to: without patch fscaps=n: 53598392 without patch fscaps=y: 53603406 with this patch applied: 53603342 with the security-next tree. Against this we must weigh the fact that there is no simple way for userspace to figure out whether file capabilities are supported, while things like per-process securebits, capability bounding sets, and adding bits to pI if CAP_SETPCAP is in pE are not supported with SECURITY_FILE_CAPABILITIES=n, leaving a bit of a problem for applications wanting to know whether they can use them and/or why something failed. It also adds another subtly different set of semantics which we must maintain at the risk of severe security regressions. So this patch removes the SECURITY_FILE_CAPABILITIES compile option. It drops the kernel size by about 50k over the stock SECURITY_FILE_CAPABILITIES=y kernel, by removing the cap_limit_ptraced_target() function. Changelog: Nov 20: remove cap_limit_ptraced_target() as it's logic was ifndef'ed. Signed-off-by: Serge E. Hallyn Acked-by: Andrew G. Morgan" Signed-off-by: James Morris --- include/linux/capability.h | 2 -- include/linux/init_task.h | 4 ---- 2 files changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/capability.h b/include/linux/capability.h index c8f2a5f70ed5..39e5ff512fbe 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -92,9 +92,7 @@ struct vfs_cap_data { #define _KERNEL_CAPABILITY_VERSION _LINUX_CAPABILITY_VERSION_3 #define _KERNEL_CAPABILITY_U32S _LINUX_CAPABILITY_U32S_3 -#ifdef CONFIG_SECURITY_FILE_CAPABILITIES extern int file_caps_enabled; -#endif typedef struct kernel_cap_struct { __u32 cap[_KERNEL_CAPABILITY_U32S]; diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 21a6f5d9af22..8d10aa7fd4c9 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -83,16 +83,12 @@ extern struct group_info init_groups; #define INIT_IDS #endif -#ifdef CONFIG_SECURITY_FILE_CAPABILITIES /* * Because of the reduced scope of CAP_SETPCAP when filesystem * capabilities are in effect, it is safe to allow CAP_SETPCAP to * be available in the default configuration. */ # define CAP_INIT_BSET CAP_FULL_SET -#else -# define CAP_INIT_BSET CAP_INIT_EFF_SET -#endif #ifdef CONFIG_TREE_PREEMPT_RCU #define INIT_TASK_RCU_PREEMPT(tsk) \ -- cgit v1.2.2 From c9286b7e293a1ea054e857ff3f5a23d0ad8d4f36 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 16 Nov 2009 19:50:38 +0100 Subject: locking: Remove unused prototype commit 910067d1(remove generic__raw_read_trylock()) removed the implementation but left the prototype around. Remove it. Signed-off-by: Thomas Gleixner --- include/linux/spinlock.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index f0ca7a7a1757..faf1482028df 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -79,8 +79,6 @@ */ #include -extern int __lockfunc generic__raw_read_trylock(raw_rwlock_t *lock); - /* * Pull the __raw*() functions/declarations (UP-nondebug doesnt need them): */ -- cgit v1.2.2 From a49ed0bf427a8328a3296eebedc7697fe5098dbf Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 16 Nov 2009 19:57:50 +0100 Subject: locking: Use __[SPIN|RW]_LOCK_UNLOCKED in [spin|rw]_lock_init() SPIN_LOCK_UNLOCKED and RW_LOCK_UNLOCKED are deprecated. Replace them with the __*_LOCK_UNLOCKED variants. Signed-off-by: Thomas Gleixner --- include/linux/spinlock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index faf1482028df..71dccfeb0d88 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -100,7 +100,7 @@ do { \ #else # define spin_lock_init(lock) \ - do { *(lock) = SPIN_LOCK_UNLOCKED; } while (0) + do { *(lock) = __SPIN_LOCK_UNLOCKED(lock); } while (0) #endif #ifdef CONFIG_DEBUG_SPINLOCK @@ -114,7 +114,7 @@ do { \ } while (0) #else # define rwlock_init(lock) \ - do { *(lock) = RW_LOCK_UNLOCKED; } while (0) + do { *(lock) = __RW_LOCK_UNLOCKED(lock); } while (0) #endif #define spin_is_locked(lock) __raw_spin_is_locked(&(lock)->raw_lock) -- cgit v1.2.2 From ff038f5c37c2070829004a0678372766c2b32180 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 18 Nov 2009 20:27:27 -0500 Subject: tracing: Create new TRACE_EVENT_TEMPLATE There are some places in the kernel that define several tracepoints and they are all identical besides the name. The code to enable, disable and record is created for every trace point even if most of the code is identical. This patch adds TRACE_EVENT_TEMPLATE that lets the developer create a template TRACE_EVENT and create trace points with DEFINE_EVENT, which is based off of a given template. Each trace point used by this will share most of the code, and bring down the size of the kernel when there are several duplicate events. Usage is: TRACE_EVENT_TEMPLATE(name, proto, args, tstruct, assign, print); Which would be the same as defining a normal TRACE_EVENT. To create the trace events that the trace points will use: DEFINE_EVENT(template, name, proto, args) is done. The template is the name of the TRACE_EVENT_TEMPLATE to use. The name is the name of the trace point. The parameters proto and args must be the same as the proto and args of the template. If they are not the same, then a compile error will result. I tried hard removing this duplication but the C preprocessor is not powerful enough (or my CPP magic experience points is not at a high enough level) to not need them. A lot of trace events are coming in with new XFS development. Most of the trace points are identical except for the name. The following shows the advantage of having TRACE_EVENT_TEMPLATE: $ size fs/xfs/xfs.o.* text data bss dec hex filename 452114 2788 3520 458422 6feb6 fs/xfs/xfs.o.old 638482 38116 3744 680342 a6196 fs/xfs/xfs.o.template 996954 38116 4480 1039550 fdcbe fs/xfs/xfs.o.trace xfs.o.old is without any tracepoints. xfs.o.template uses the new TRACE_EVENT_TEMPLATE. xfs.o.trace uses the current TRACE_EVENT macros. Requested-by: Christoph Hellwig Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 4 ++ include/trace/define_trace.h | 6 ++ include/trace/ftrace.h | 149 +++++++++++++++++++++++++++++++------------ 3 files changed, 117 insertions(+), 42 deletions(-) (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 2aac8a83e89b..88a5b5a809ec 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -280,6 +280,10 @@ static inline void tracepoint_synchronize_unregister(void) * TRACE_EVENT_FN to perform any (un)registration work. */ +#define TRACE_EVENT_TEMPLATE(name, proto, args, tstruct, assign, print) +#define DEFINE_EVENT(template, name, proto, args) \ + DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) + #define TRACE_EVENT(name, proto, args, struct, assign, print) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) #define TRACE_EVENT_FN(name, proto, args, struct, \ diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index 2a4b3bf74033..244985814a43 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -31,6 +31,10 @@ assign, print, reg, unreg) \ DEFINE_TRACE_FN(name, reg, unreg) +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, name, proto, args) \ + DEFINE_TRACE(name) + #undef DECLARE_TRACE #define DECLARE_TRACE(name, proto, args) \ DEFINE_TRACE(name) @@ -63,6 +67,8 @@ #undef TRACE_EVENT #undef TRACE_EVENT_FN +#undef TRACE_EVENT_TEMPLATE +#undef DEFINE_EVENT #undef TRACE_HEADER_MULTI_READ /* Only undef what we defined in this file */ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index c3417c13e3ed..2969f65d8002 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -18,6 +18,26 @@ #include +/* + * TRACE_EVENT_TEMPLATE can be used to add a generic function + * handlers for events. That is, if all events have the same + * parameters and just have distinct trace points. + * Each tracepoint can be defined with DEFINE_EVENT and that + * will map the TRACE_EVENT_TEMPLATE to the tracepoint. + * + * TRACE_EVENT is a one to one mapping between tracepoint and template. + */ +#undef TRACE_EVENT +#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \ + TRACE_EVENT_TEMPLATE(name, \ + PARAMS(proto), \ + PARAMS(args), \ + PARAMS(tstruct), \ + PARAMS(assign), \ + PARAMS(print)); \ + DEFINE_EVENT(name, name, PARAMS(proto), PARAMS(args)); + + #undef __field #define __field(type, item) type item; @@ -36,13 +56,15 @@ #undef TP_STRUCT__entry #define TP_STRUCT__entry(args...) args -#undef TRACE_EVENT -#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \ - struct ftrace_raw_##name { \ - struct trace_entry ent; \ - tstruct \ - char __data[0]; \ - }; \ +#undef TRACE_EVENT_TEMPLATE +#define TRACE_EVENT_TEMPLATE(name, proto, args, tstruct, assign, print) \ + struct ftrace_raw_##name { \ + struct trace_entry ent; \ + tstruct \ + char __data[0]; \ + }; +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, name, proto, args) \ static struct ftrace_event_call event_##name #undef __cpparg @@ -89,12 +111,15 @@ #undef __string #define __string(item, src) __dynamic_array(char, item, -1) -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ +#undef TRACE_EVENT_TEMPLATE +#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) \ struct ftrace_data_offsets_##call { \ tstruct; \ }; +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, name, proto, args) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) /* @@ -170,8 +195,8 @@ #undef TP_perf_assign #define TP_perf_assign(args...) -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, func, print) \ +#undef TRACE_EVENT_TEMPLATE +#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, func, print) \ static int \ ftrace_format_##call(struct ftrace_event_call *unused, \ struct trace_seq *s) \ @@ -186,6 +211,9 @@ ftrace_format_##call(struct ftrace_event_call *unused, \ return ret; \ } +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, name, proto, args) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) /* @@ -255,10 +283,11 @@ ftrace_format_##call(struct ftrace_event_call *unused, \ ftrace_print_symbols_seq(p, value, symbols); \ }) -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ +#undef TRACE_EVENT_TEMPLATE +#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) \ static enum print_line_t \ -ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ +ftrace_raw_output_id_##call(int event_id, const char *name, \ + struct trace_iterator *iter, int flags) \ { \ struct trace_seq *s = &iter->seq; \ struct ftrace_raw_##call *field; \ @@ -268,7 +297,7 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ \ entry = iter->ent; \ \ - if (entry->type != event_##call.id) { \ + if (entry->type != event_id) { \ WARN_ON_ONCE(1); \ return TRACE_TYPE_UNHANDLED; \ } \ @@ -277,14 +306,25 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ \ p = &get_cpu_var(ftrace_event_seq); \ trace_seq_init(p); \ - ret = trace_seq_printf(s, #call ": " print); \ + ret = trace_seq_printf(s, "%s: ", name); \ + if (ret) \ + ret = trace_seq_printf(s, print); \ put_cpu(); \ if (!ret) \ return TRACE_TYPE_PARTIAL_LINE; \ \ return TRACE_TYPE_HANDLED; \ } - + +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, name, proto, args) \ +static enum print_line_t \ +ftrace_raw_output_##name(struct trace_iterator *iter, int flags) \ +{ \ + return ftrace_raw_output_id_##template(event_##name.id, \ + #name, iter, flags); \ +} + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) #undef __field_ext @@ -318,8 +358,8 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ #undef __string #define __string(item, src) __dynamic_array(char, item, -1) -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, func, print) \ +#undef TRACE_EVENT_TEMPLATE +#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, func, print) \ static int \ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ { \ @@ -335,6 +375,9 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ return ret; \ } +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, name, proto, args) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) /* @@ -361,10 +404,10 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ __data_size += (len) * sizeof(type); #undef __string -#define __string(item, src) __dynamic_array(char, item, strlen(src) + 1) \ +#define __string(item, src) __dynamic_array(char, item, strlen(src) + 1) -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ +#undef TRACE_EVENT_TEMPLATE +#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) \ static inline int ftrace_get_offsets_##call( \ struct ftrace_data_offsets_##call *__data_offsets, proto) \ { \ @@ -376,6 +419,9 @@ static inline int ftrace_get_offsets_##call( \ return __data_size; \ } +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, name, proto, args) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) #ifdef CONFIG_EVENT_PROFILE @@ -397,19 +443,22 @@ static inline int ftrace_get_offsets_##call( \ * */ -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ +#undef TRACE_EVENT_TEMPLATE +#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) + +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, name, proto, args) \ \ -static void ftrace_profile_##call(proto); \ +static void ftrace_profile_##name(proto); \ \ -static int ftrace_profile_enable_##call(struct ftrace_event_call *unused)\ +static int ftrace_profile_enable_##name(struct ftrace_event_call *unused)\ { \ - return register_trace_##call(ftrace_profile_##call); \ + return register_trace_##name(ftrace_profile_##name); \ } \ \ -static void ftrace_profile_disable_##call(struct ftrace_event_call *unused)\ +static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\ { \ - unregister_trace_##call(ftrace_profile_##call); \ + unregister_trace_##name(ftrace_profile_##name); \ } #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) @@ -550,15 +599,13 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *unused)\ #define __assign_str(dst, src) \ strcpy(__get_str(dst), src); -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ - \ -static struct ftrace_event_call event_##call; \ +#undef TRACE_EVENT_TEMPLATE +#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) \ \ -static void ftrace_raw_event_##call(proto) \ +static void ftrace_raw_event_id_##call(struct ftrace_event_call *event_call, \ + proto) \ { \ struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ - struct ftrace_event_call *event_call = &event_##call; \ struct ring_buffer_event *event; \ struct ftrace_raw_##call *entry; \ struct ring_buffer *buffer; \ @@ -572,7 +619,7 @@ static void ftrace_raw_event_##call(proto) \ __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \ \ event = trace_current_buffer_lock_reserve(&buffer, \ - event_##call.id, \ + event_call->id, \ sizeof(*entry) + __data_size, \ irq_flags, pc); \ if (!event) \ @@ -587,6 +634,14 @@ static void ftrace_raw_event_##call(proto) \ if (!filter_current_check_discard(buffer, event_call, entry, event)) \ trace_nowake_buffer_unlock_commit(buffer, \ event, irq_flags, pc); \ +} + +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, call, proto, args) \ + \ +static void ftrace_raw_event_##call(proto) \ +{ \ + ftrace_raw_event_id_##template(&event_##call, args); \ } \ \ static int ftrace_raw_reg_event_##call(struct ftrace_event_call *unused)\ @@ -630,8 +685,8 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ .raw_init = ftrace_raw_init_event_##call, \ .regfunc = ftrace_raw_reg_event_##call, \ .unregfunc = ftrace_raw_unreg_event_##call, \ - .show_format = ftrace_format_##call, \ - .define_fields = ftrace_define_fields_##call, \ + .show_format = ftrace_format_##template, \ + .define_fields = ftrace_define_fields_##template, \ _TRACE_PROFILE_INIT(call) \ } @@ -719,14 +774,15 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ #undef __perf_count #define __perf_count(c) __count = (c) -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ -static void ftrace_profile_##call(proto) \ +#undef TRACE_EVENT_TEMPLATE +#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) \ +static void \ +ftrace_profile_templ_##call(struct ftrace_event_call *event_call, \ + proto) \ { \ struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ extern int perf_swevent_get_recursion_context(void); \ extern void perf_swevent_put_recursion_context(int rctx); \ - struct ftrace_event_call *event_call = &event_##call; \ extern void perf_tp_event(int, u64, u64, void *, int); \ struct ftrace_raw_##call *entry; \ u64 __addr = 0, __count = 1; \ @@ -789,6 +845,15 @@ end_recursion: \ \ } +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, call, proto, args) \ +static void ftrace_profile_##call(proto) \ +{ \ + struct ftrace_event_call *event_call = &event_##call; \ + \ + ftrace_profile_templ_##template(event_call, args); \ +} + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) #endif /* CONFIG_EVENT_PROFILE */ -- cgit v1.2.2 From e5bc9721684e9412f3e0465222f317c362a8ab47 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 18 Nov 2009 20:36:26 -0500 Subject: tracing: Create new DEFINE_EVENT_PRINT After creating the TRACE_EVENT_TEMPLATE I started to look at other trace points to see what duplication was made. I noticed that there are several trace points where they are almost identical except for the name and the output format. Since TRACE_EVENT_TEMPLATE was successful in bringing down the size of trace events, I added a DEFINE_EVENT_PRINT. DEFINE_EVENT_PRINT is used just like DEFINE_EVENT is. That is, the DEFINE_EVENT_PRINT also uses a TRACE_EVENT_TEMPLATE, but it allows the developer to overwrite the print format. If there are two or more TRACE_EVENTS that are identical except for the name and print, then they can be converted to use a TRACE_EVENT_TEMPLATE. Since the TRACE_EVENT_TEMPLATE already does the print output, the first trace event would have its print format held in the TRACE_EVENT_TEMPLATE and be defined with a DEFINE_EVENT. The rest will use the DEFINE_EVENT_PRINT and override the print format. Converting the sched trace points to both DEFINE_EVENT and DEFINE_EVENT_PRINT. Five were converted to DEFINE_EVENT and two were converted to DEFINE_EVENT_PRINT. I was able to get the following: $ size kernel/sched.o-* text data bss dec hex filename 79299 6776 2520 88595 15a13 kernel/sched.o-notrace 101941 11896 2584 116421 1c6c5 kernel/sched.o-templ 104779 11896 2584 119259 1d1db kernel/sched.o-trace sched.o-notrace is the scheduler compiled with no trace points. sched.o-templ is with the use of DEFINE_EVENT and DEFINE_EVENT_PRINT sched.o-trace is the current trace events. Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 2 + include/trace/define_trace.h | 5 ++ include/trace/ftrace.h | 123 +++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 126 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 88a5b5a809ec..7063383cca13 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -283,6 +283,8 @@ static inline void tracepoint_synchronize_unregister(void) #define TRACE_EVENT_TEMPLATE(name, proto, args, tstruct, assign, print) #define DEFINE_EVENT(template, name, proto, args) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) #define TRACE_EVENT(name, proto, args, struct, assign, print) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index 244985814a43..5d7d855ae21e 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -35,6 +35,10 @@ #define DEFINE_EVENT(template, name, proto, args) \ DEFINE_TRACE(name) +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DEFINE_TRACE(name) + #undef DECLARE_TRACE #define DECLARE_TRACE(name, proto, args) \ DEFINE_TRACE(name) @@ -69,6 +73,7 @@ #undef TRACE_EVENT_FN #undef TRACE_EVENT_TEMPLATE #undef DEFINE_EVENT +#undef DEFINE_EVENT_PRINT #undef TRACE_HEADER_MULTI_READ /* Only undef what we defined in this file */ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 2969f65d8002..b0461772bc8d 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -67,6 +67,10 @@ #define DEFINE_EVENT(template, name, proto, args) \ static struct ftrace_event_call event_##name +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) + #undef __cpparg #define __cpparg(arg...) arg @@ -120,6 +124,10 @@ #undef DEFINE_EVENT #define DEFINE_EVENT(template, name, proto, args) +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) /* @@ -198,15 +206,28 @@ #undef TRACE_EVENT_TEMPLATE #define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, func, print) \ static int \ -ftrace_format_##call(struct ftrace_event_call *unused, \ - struct trace_seq *s) \ +ftrace_format_setup_##call(struct ftrace_event_call *unused, \ + struct trace_seq *s) \ { \ struct ftrace_raw_##call field __attribute__((unused)); \ int ret = 0; \ \ tstruct; \ \ - trace_seq_printf(s, "\nprint fmt: " print); \ + return ret; \ +} \ + \ +static int \ +ftrace_format_##call(struct ftrace_event_call *unused, \ + struct trace_seq *s) \ +{ \ + int ret = 0; \ + \ + ret = ftrace_format_setup_##call(unused, s); \ + if (!ret) \ + return ret; \ + \ + ret = trace_seq_printf(s, "\nprint fmt: " print); \ \ return ret; \ } @@ -214,6 +235,23 @@ ftrace_format_##call(struct ftrace_event_call *unused, \ #undef DEFINE_EVENT #define DEFINE_EVENT(template, name, proto, args) +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ +static int \ +ftrace_format_##name(struct ftrace_event_call *unused, \ + struct trace_seq *s) \ +{ \ + int ret = 0; \ + \ + ret = ftrace_format_setup_##template(unused, s); \ + if (!ret) \ + return ret; \ + \ + trace_seq_printf(s, "\nprint fmt: " print); \ + \ + return ret; \ +} + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) /* @@ -325,6 +363,38 @@ ftrace_raw_output_##name(struct trace_iterator *iter, int flags) \ #name, iter, flags); \ } +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, call, proto, args, print) \ +static enum print_line_t \ +ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ +{ \ + struct trace_seq *s = &iter->seq; \ + struct ftrace_raw_##template *field; \ + struct trace_entry *entry; \ + struct trace_seq *p; \ + int ret; \ + \ + entry = iter->ent; \ + \ + if (entry->type != event_##call.id) { \ + WARN_ON_ONCE(1); \ + return TRACE_TYPE_UNHANDLED; \ + } \ + \ + field = (typeof(field))entry; \ + \ + p = &get_cpu_var(ftrace_event_seq); \ + trace_seq_init(p); \ + ret = trace_seq_printf(s, "%s: ", #call); \ + if (ret) \ + ret = trace_seq_printf(s, print); \ + put_cpu(); \ + if (!ret) \ + return TRACE_TYPE_PARTIAL_LINE; \ + \ + return TRACE_TYPE_HANDLED; \ +} + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) #undef __field_ext @@ -378,6 +448,10 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ #undef DEFINE_EVENT #define DEFINE_EVENT(template, name, proto, args) +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) /* @@ -422,6 +496,10 @@ static inline int ftrace_get_offsets_##call( \ #undef DEFINE_EVENT #define DEFINE_EVENT(template, name, proto, args) +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) #ifdef CONFIG_EVENT_PROFILE @@ -461,6 +539,10 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\ unregister_trace_##name(ftrace_profile_##name); \ } +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) #endif @@ -674,7 +756,19 @@ static int ftrace_raw_init_event_##call(struct ftrace_event_call *unused)\ event_##call.id = id; \ INIT_LIST_HEAD(&event_##call.fields); \ return 0; \ -} \ +} + +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +#undef TRACE_EVENT_TEMPLATE +#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) + +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, call, proto, args) \ \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ @@ -690,6 +784,23 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ _TRACE_PROFILE_INIT(call) \ } +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, call, proto, args, print) \ + \ +static struct ftrace_event_call __used \ +__attribute__((__aligned__(4))) \ +__attribute__((section("_ftrace_events"))) event_##call = { \ + .name = #call, \ + .system = __stringify(TRACE_SYSTEM), \ + .event = &ftrace_event_type_##call, \ + .raw_init = ftrace_raw_init_event_##call, \ + .regfunc = ftrace_raw_reg_event_##call, \ + .unregfunc = ftrace_raw_unreg_event_##call, \ + .show_format = ftrace_format_##call, \ + .define_fields = ftrace_define_fields_##template, \ + _TRACE_PROFILE_INIT(call) \ +} + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) /* @@ -854,6 +965,10 @@ static void ftrace_profile_##call(proto) \ ftrace_profile_templ_##template(event_call, args); \ } +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) #endif /* CONFIG_EVENT_PROFILE */ -- cgit v1.2.2 From 75ec29ab848a7e92a41aaafaeb33d1afbc839be4 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 18 Nov 2009 20:48:08 -0500 Subject: tracing: Convert some sched trace events to DEFINE_EVENT and _PRINT Converting some of the scheduler trace events to use the TRACE_EVENT_TEMPLATE, DEFINE_EVENT and DEFINE_EVENT_PRINT helped to save some space: $ size kernel/sched.o-* text data bss dec hex filename 79299 6776 2520 88595 15a13 kernel/sched.o-notrace 101941 11896 2584 116421 1c6c5 kernel/sched.o-templ 104779 11896 2584 119259 1d1db kernel/sched.o-trace sched.o-notrace is without any tracepoints compiled sched.o-templ is with this patch sched.o-trace is the tracepoints before this patch The trace events converted to DEFINE_EVENT: sched_wakeup, sched_wakeup_new, sched_process_free, sched_process_exit, and sched_stat_wait. The trace events converted to DEFINE_EVENT_PRINT: sched_stat_sleep and sched_stat_iowait. Note, since the TRACE_EVENT_TEMPLATE always uses a print, the sched_stat_wait print format is defined in the template and this template is used by sched_stat_sleep and sched_stat_iowait. But the later two override the print format. Signed-off-by: Steven Rostedt --- include/trace/events/sched.h | 170 +++++++++++++------------------------------ 1 file changed, 52 insertions(+), 118 deletions(-) (limited to 'include') diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index b50b9856c59f..238f74b58486 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -83,7 +83,7 @@ TRACE_EVENT(sched_wait_task, * (NOTE: the 'rq' argument is not used by generic trace events, * but used by the latency tracer plugin. ) */ -TRACE_EVENT(sched_wakeup, +TRACE_EVENT_TEMPLATE(sched_wakeup_template, TP_PROTO(struct rq *rq, struct task_struct *p, int success), @@ -110,38 +110,19 @@ TRACE_EVENT(sched_wakeup, __entry->success, __entry->target_cpu) ); +DEFINE_EVENT(sched_wakeup_template, sched_wakeup, + TP_PROTO(struct rq *rq, struct task_struct *p, int success), + TP_ARGS(rq, p, success)); + /* * Tracepoint for waking up a new task: * * (NOTE: the 'rq' argument is not used by generic trace events, * but used by the latency tracer plugin. ) */ -TRACE_EVENT(sched_wakeup_new, - - TP_PROTO(struct rq *rq, struct task_struct *p, int success), - - TP_ARGS(rq, p, success), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - __field( int, success ) - __field( int, target_cpu ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - __entry->success = success; - __entry->target_cpu = task_cpu(p); - ), - - TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d", - __entry->comm, __entry->pid, __entry->prio, - __entry->success, __entry->target_cpu) -); +DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new, + TP_PROTO(struct rq *rq, struct task_struct *p, int success), + TP_ARGS(rq, p, success)); /* * Tracepoint for task switches, performed by the scheduler: @@ -216,10 +197,7 @@ TRACE_EVENT(sched_migrate_task, __entry->orig_cpu, __entry->dest_cpu) ); -/* - * Tracepoint for freeing a task: - */ -TRACE_EVENT(sched_process_free, +TRACE_EVENT_TEMPLATE(sched_process_template, TP_PROTO(struct task_struct *p), @@ -242,29 +220,19 @@ TRACE_EVENT(sched_process_free, ); /* - * Tracepoint for a task exiting: + * Tracepoint for freeing a task: */ -TRACE_EVENT(sched_process_exit, - - TP_PROTO(struct task_struct *p), +DEFINE_EVENT(sched_process_template, sched_process_free, + TP_PROTO(struct task_struct *p), + TP_ARGS(p)); + - TP_ARGS(p), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - ), - - TP_printk("comm=%s pid=%d prio=%d", - __entry->comm, __entry->pid, __entry->prio) -); +/* + * Tracepoint for a task exiting: + */ +DEFINE_EVENT(sched_process_template, sched_process_exit, + TP_PROTO(struct task_struct *p), + TP_ARGS(p)); /* * Tracepoint for a waiting task: @@ -348,12 +316,7 @@ TRACE_EVENT(sched_signal_send, * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE * adding sched_stat support to SCHED_FIFO/RR would be welcome. */ - -/* - * Tracepoint for accounting wait time (time the task is runnable - * but not actually running due to scheduler contention). - */ -TRACE_EVENT(sched_stat_wait, +TRACE_EVENT_TEMPLATE(sched_stat_template, TP_PROTO(struct task_struct *tsk, u64 delay), @@ -379,6 +342,37 @@ TRACE_EVENT(sched_stat_wait, (unsigned long long)__entry->delay) ); + +/* + * Tracepoint for accounting wait time (time the task is runnable + * but not actually running due to scheduler contention). + */ +DEFINE_EVENT(sched_stat_template, sched_stat_wait, + TP_PROTO(struct task_struct *tsk, u64 delay), + TP_ARGS(tsk, delay)); + +/* + * Tracepoint for accounting sleep time (time the task is not runnable, + * including iowait, see below). + */ +DEFINE_EVENT_PRINT(sched_stat_template, sched_stat_sleep, + TP_PROTO(struct task_struct *tsk, u64 delay), + TP_ARGS(tsk, delay), + TP_printk("task: %s:%d sleep: %Lu [ns]", + __entry->comm, __entry->pid, + (unsigned long long)__entry->delay)); + +/* + * Tracepoint for accounting iowait time (time the task is not runnable + * due to waiting on IO to complete). + */ +DEFINE_EVENT_PRINT(sched_stat_template, sched_stat_iowait, + TP_PROTO(struct task_struct *tsk, u64 delay), + TP_ARGS(tsk, delay), + TP_printk("task: %s:%d iowait: %Lu [ns]", + __entry->comm, __entry->pid, + (unsigned long long)__entry->delay)); + /* * Tracepoint for accounting runtime (time the task is executing * on a CPU). @@ -412,66 +406,6 @@ TRACE_EVENT(sched_stat_runtime, (unsigned long long)__entry->vruntime) ); -/* - * Tracepoint for accounting sleep time (time the task is not runnable, - * including iowait, see below). - */ -TRACE_EVENT(sched_stat_sleep, - - TP_PROTO(struct task_struct *tsk, u64 delay), - - TP_ARGS(tsk, delay), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( u64, delay ) - ), - - TP_fast_assign( - memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); - __entry->pid = tsk->pid; - __entry->delay = delay; - ) - TP_perf_assign( - __perf_count(delay); - ), - - TP_printk("comm=%s pid=%d delay=%Lu [ns]", - __entry->comm, __entry->pid, - (unsigned long long)__entry->delay) -); - -/* - * Tracepoint for accounting iowait time (time the task is not runnable - * due to waiting on IO to complete). - */ -TRACE_EVENT(sched_stat_iowait, - - TP_PROTO(struct task_struct *tsk, u64 delay), - - TP_ARGS(tsk, delay), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( u64, delay ) - ), - - TP_fast_assign( - memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); - __entry->pid = tsk->pid; - __entry->delay = delay; - ) - TP_perf_assign( - __perf_count(delay); - ), - - TP_printk("comm=%s pid=%d delay=%Lu [ns]", - __entry->comm, __entry->pid, - (unsigned long long)__entry->delay) -); - #endif /* _TRACE_SCHED_H */ /* This part must be outside protection */ -- cgit v1.2.2 From b8007ef7422270864eae523cb38d7522a53a94d3 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 3 Nov 2009 13:45:32 +0800 Subject: tracing: Separate raw syscall from syscall tracer The current syscall tracer mixes raw syscalls and real syscalls. echo 1 > events/syscalls/enable And we get these from the output: (XXXX insteads " grep-20914 [001] 588211.446347" .. etc) XXXX: sys_read(fd: 3, buf: 80609a8, count: 7000) XXXX: sys_enter: NR 3 (3, 80609a8, 7000, a, 1000, bfce8ef8) XXXX: sys_read -> 0x138 XXXX: sys_exit: NR 3 = 312 XXXX: sys_read(fd: 3, buf: 8060ae0, count: 7000) XXXX: sys_enter: NR 3 (3, 8060ae0, 7000, a, 1000, bfce8ef8) XXXX: sys_read -> 0x138 XXXX: sys_exit: NR 3 = 312 There are 2 drawbacks here. A) two almost identical records are saved in ringbuffer when a syscall enters or exits. (4 records for every syscall) This wastes precious space in the ring buffer. B) the lines including "sys_enter/sys_exit" produces hardly any useful information for the output (no labels). The user can use this method to prevent these drawbacks: echo 1 > events/syscalls/enable echo 0 > events/syscalls/sys_enter/enable echo 0 > events/syscalls/sys_exit/enable But this is not user friendly. So we separate raw syscall from syscall tracer. After this fix applied: syscall tracer's output (echo 1 > events/syscalls/enable): XXXX: sys_read(fd: 3, buf: bfe87d88, count: 200) XXXX: sys_read -> 0x200 XXXX: sys_fstat64(fd: 3, statbuf: bfe87c98) XXXX: sys_fstat64 -> 0x0 XXXX: sys_close(fd: 3) raw syscall tracer's output (echo 1 > events/raw_syscalls/enable): XXXX: sys_enter: NR 175 (0, bf92bf18, bf92bf98, 8, b748cff4, bf92bef8) XXXX: sys_exit: NR 175 = 0 XXXX: sys_enter: NR 175 (2, bf92bf98, 0, 8, b748cff4, bf92bef8) XXXX: sys_exit: NR 175 = 0 XXXX: sys_enter: NR 3 (9, bf927f9c, 4000, b77e2518, b77dce60, bf92bff8) Signed-off-by: Lai Jiangshan LKML-Reference: <4AEFC37C.5080609@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- include/trace/events/syscalls.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/syscalls.h b/include/trace/events/syscalls.h index 397dff2dbd5a..fb726ac7caee 100644 --- a/include/trace/events/syscalls.h +++ b/include/trace/events/syscalls.h @@ -1,5 +1,6 @@ #undef TRACE_SYSTEM -#define TRACE_SYSTEM syscalls +#define TRACE_SYSTEM raw_syscalls +#define TRACE_INCLUDE_FILE syscalls #if !defined(_TRACE_EVENTS_SYSCALLS_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_EVENTS_SYSCALLS_H -- cgit v1.2.2 From 091ad3658e3c76c5fb05f65bfb64a0246f8f31b5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 26 Nov 2009 09:04:55 +0100 Subject: events: Rename TRACE_EVENT_TEMPLATE() to DECLARE_EVENT_CLASS() It is not quite obvious at first sight what TRACE_EVENT_TEMPLATE does: does it define an event as well beyond defining a template? To clarify this, rename it to DECLARE_EVENT_CLASS, which follows the various 'DECLARE_*()' idioms we already have in the kernel: DECLARE_EVENT_CLASS(class) DEFINE_EVENT(class, event1) DEFINE_EVENT(class, event2) DEFINE_EVENT(class, event3) To complete this logic we should also rename TRACE_EVENT() to: DEFINE_SINGLE_EVENT(single_event) ... but in a more quiet moment of the kernel cycle. Cc: Pekka Enberg Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B0E286A.2000405@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/tracepoint.h | 2 +- include/trace/define_trace.h | 2 +- include/trace/events/sched.h | 6 +++--- include/trace/ftrace.h | 46 ++++++++++++++++++++++---------------------- 4 files changed, 28 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 7063383cca13..f59604ed0ec6 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -280,7 +280,7 @@ static inline void tracepoint_synchronize_unregister(void) * TRACE_EVENT_FN to perform any (un)registration work. */ -#define TRACE_EVENT_TEMPLATE(name, proto, args, tstruct, assign, print) +#define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print) #define DEFINE_EVENT(template, name, proto, args) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index 5d7d855ae21e..5acfb1eb4df9 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -71,7 +71,7 @@ #undef TRACE_EVENT #undef TRACE_EVENT_FN -#undef TRACE_EVENT_TEMPLATE +#undef DECLARE_EVENT_CLASS #undef DEFINE_EVENT #undef DEFINE_EVENT_PRINT #undef TRACE_HEADER_MULTI_READ diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 238f74b58486..5ce795021851 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -83,7 +83,7 @@ TRACE_EVENT(sched_wait_task, * (NOTE: the 'rq' argument is not used by generic trace events, * but used by the latency tracer plugin. ) */ -TRACE_EVENT_TEMPLATE(sched_wakeup_template, +DECLARE_EVENT_CLASS(sched_wakeup_template, TP_PROTO(struct rq *rq, struct task_struct *p, int success), @@ -197,7 +197,7 @@ TRACE_EVENT(sched_migrate_task, __entry->orig_cpu, __entry->dest_cpu) ); -TRACE_EVENT_TEMPLATE(sched_process_template, +DECLARE_EVENT_CLASS(sched_process_template, TP_PROTO(struct task_struct *p), @@ -316,7 +316,7 @@ TRACE_EVENT(sched_signal_send, * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE * adding sched_stat support to SCHED_FIFO/RR would be welcome. */ -TRACE_EVENT_TEMPLATE(sched_stat_template, +DECLARE_EVENT_CLASS(sched_stat_template, TP_PROTO(struct task_struct *tsk, u64 delay), diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index b0461772bc8d..2c9c073e45ad 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -19,17 +19,17 @@ #include /* - * TRACE_EVENT_TEMPLATE can be used to add a generic function + * DECLARE_EVENT_CLASS can be used to add a generic function * handlers for events. That is, if all events have the same * parameters and just have distinct trace points. * Each tracepoint can be defined with DEFINE_EVENT and that - * will map the TRACE_EVENT_TEMPLATE to the tracepoint. + * will map the DECLARE_EVENT_CLASS to the tracepoint. * * TRACE_EVENT is a one to one mapping between tracepoint and template. */ #undef TRACE_EVENT #define TRACE_EVENT(name, proto, args, tstruct, assign, print) \ - TRACE_EVENT_TEMPLATE(name, \ + DECLARE_EVENT_CLASS(name, \ PARAMS(proto), \ PARAMS(args), \ PARAMS(tstruct), \ @@ -56,8 +56,8 @@ #undef TP_STRUCT__entry #define TP_STRUCT__entry(args...) args -#undef TRACE_EVENT_TEMPLATE -#define TRACE_EVENT_TEMPLATE(name, proto, args, tstruct, assign, print) \ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print) \ struct ftrace_raw_##name { \ struct trace_entry ent; \ tstruct \ @@ -115,8 +115,8 @@ #undef __string #define __string(item, src) __dynamic_array(char, item, -1) -#undef TRACE_EVENT_TEMPLATE -#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) \ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ struct ftrace_data_offsets_##call { \ tstruct; \ }; @@ -203,8 +203,8 @@ #undef TP_perf_assign #define TP_perf_assign(args...) -#undef TRACE_EVENT_TEMPLATE -#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, func, print) \ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print) \ static int \ ftrace_format_setup_##call(struct ftrace_event_call *unused, \ struct trace_seq *s) \ @@ -321,8 +321,8 @@ ftrace_format_##name(struct ftrace_event_call *unused, \ ftrace_print_symbols_seq(p, value, symbols); \ }) -#undef TRACE_EVENT_TEMPLATE -#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) \ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ static enum print_line_t \ ftrace_raw_output_id_##call(int event_id, const char *name, \ struct trace_iterator *iter, int flags) \ @@ -428,8 +428,8 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ #undef __string #define __string(item, src) __dynamic_array(char, item, -1) -#undef TRACE_EVENT_TEMPLATE -#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, func, print) \ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print) \ static int \ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ { \ @@ -480,8 +480,8 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ #undef __string #define __string(item, src) __dynamic_array(char, item, strlen(src) + 1) -#undef TRACE_EVENT_TEMPLATE -#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) \ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ static inline int ftrace_get_offsets_##call( \ struct ftrace_data_offsets_##call *__data_offsets, proto) \ { \ @@ -521,8 +521,8 @@ static inline int ftrace_get_offsets_##call( \ * */ -#undef TRACE_EVENT_TEMPLATE -#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) #undef DEFINE_EVENT #define DEFINE_EVENT(template, name, proto, args) \ @@ -681,8 +681,8 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\ #define __assign_str(dst, src) \ strcpy(__get_str(dst), src); -#undef TRACE_EVENT_TEMPLATE -#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) \ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ \ static void ftrace_raw_event_id_##call(struct ftrace_event_call *event_call, \ proto) \ @@ -764,8 +764,8 @@ static int ftrace_raw_init_event_##call(struct ftrace_event_call *unused)\ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) -#undef TRACE_EVENT_TEMPLATE -#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) #undef DEFINE_EVENT #define DEFINE_EVENT(template, call, proto, args) \ @@ -885,8 +885,8 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ #undef __perf_count #define __perf_count(c) __count = (c) -#undef TRACE_EVENT_TEMPLATE -#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) \ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ static void \ ftrace_profile_templ_##call(struct ftrace_event_call *event_call, \ proto) \ -- cgit v1.2.2 From 925684d6d589e40e41007edf47c69e729d911263 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 26 Nov 2009 15:03:23 +0800 Subject: tracing: Convert module refcnt events to DEFINE_EVENT Use DECLARE_EVENT_CLASS to remove duplicate code: text data bss dec hex filename 29854 1980 128 31962 7cda kernel/module.o.old 28750 1980 128 30858 788a kernel/module.o Two events are converted: module_refcnt: module_get, module_put No change in functionality. Signed-off-by: Li Zefan Cc: Rusty Russell Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B0E283B.3010508@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/trace/events/module.h | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/trace/events/module.h b/include/trace/events/module.h index 84160fb18478..4b0f48ba16a6 100644 --- a/include/trace/events/module.h +++ b/include/trace/events/module.h @@ -51,7 +51,7 @@ TRACE_EVENT(module_free, TP_printk("%s", __get_str(name)) ); -TRACE_EVENT(module_get, +DECLARE_EVENT_CLASS(module_refcnt, TP_PROTO(struct module *mod, unsigned long ip, int refcnt), @@ -73,26 +73,18 @@ TRACE_EVENT(module_get, __get_str(name), (void *)__entry->ip, __entry->refcnt) ); -TRACE_EVENT(module_put, +DEFINE_EVENT(module_refcnt, module_get, TP_PROTO(struct module *mod, unsigned long ip, int refcnt), - TP_ARGS(mod, ip, refcnt), + TP_ARGS(mod, ip, refcnt) +); - TP_STRUCT__entry( - __field( unsigned long, ip ) - __field( int, refcnt ) - __string( name, mod->name ) - ), +DEFINE_EVENT(module_refcnt, module_put, - TP_fast_assign( - __entry->ip = ip; - __entry->refcnt = refcnt; - __assign_str(name, mod->name); - ), + TP_PROTO(struct module *mod, unsigned long ip, int refcnt), - TP_printk("%s call_site=%pf refcnt=%d", - __get_str(name), (void *)__entry->ip, __entry->refcnt) + TP_ARGS(mod, ip, refcnt) ); TRACE_EVENT(module_request, -- cgit v1.2.2 From 53d0422c2d10808fddb2c30859193bfea164c7e3 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 26 Nov 2009 15:04:10 +0800 Subject: tracing: Convert some kmem events to DEFINE_EVENT Use DECLARE_EVENT_CLASS to remove duplicate code: text data bss dec hex filename 333987 69800 27228 431015 693a7 mm/built-in.o.old 330030 69800 27228 427058 68432 mm/built-in.o 8 events are converted: kmem_alloc: kmalloc, kmem_cache_alloc kmem_alloc_node: kmalloc_node, kmem_cache_alloc_node kmem_free: kfree, kmem_cache_free mm_page: mm_page_alloc_zone_locked, mm_page_pcpu_drain No change in functionality. Signed-off-by: Li Zefan Acked-by: Pekka Enberg Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Mel Gorman LKML-Reference: <4B0E286A.2000405@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/trace/events/kmem.h | 130 ++++++++++++++------------------------------ 1 file changed, 40 insertions(+), 90 deletions(-) (limited to 'include') diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index eaf46bdd18a5..3adca0ca9dbe 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -44,7 +44,7 @@ {(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"} \ ) : "GFP_NOWAIT" -TRACE_EVENT(kmalloc, +DECLARE_EVENT_CLASS(kmem_alloc, TP_PROTO(unsigned long call_site, const void *ptr, @@ -78,41 +78,23 @@ TRACE_EVENT(kmalloc, show_gfp_flags(__entry->gfp_flags)) ); -TRACE_EVENT(kmem_cache_alloc, +DEFINE_EVENT(kmem_alloc, kmalloc, - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags), + TP_PROTO(unsigned long call_site, const void *ptr, + size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags), - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags) +); - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - __field( size_t, bytes_req ) - __field( size_t, bytes_alloc ) - __field( gfp_t, gfp_flags ) - ), +DEFINE_EVENT(kmem_alloc, kmem_cache_alloc, - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - __entry->bytes_req = bytes_req; - __entry->bytes_alloc = bytes_alloc; - __entry->gfp_flags = gfp_flags; - ), + TP_PROTO(unsigned long call_site, const void *ptr, + size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags), - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s", - __entry->call_site, - __entry->ptr, - __entry->bytes_req, - __entry->bytes_alloc, - show_gfp_flags(__entry->gfp_flags)) + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags) ); -TRACE_EVENT(kmalloc_node, +DECLARE_EVENT_CLASS(kmem_alloc_node, TP_PROTO(unsigned long call_site, const void *ptr, @@ -150,45 +132,25 @@ TRACE_EVENT(kmalloc_node, __entry->node) ); -TRACE_EVENT(kmem_cache_alloc_node, +DEFINE_EVENT(kmem_alloc_node, kmalloc_node, - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags, - int node), + TP_PROTO(unsigned long call_site, const void *ptr, + size_t bytes_req, size_t bytes_alloc, + gfp_t gfp_flags, int node), - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node) +); - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - __field( size_t, bytes_req ) - __field( size_t, bytes_alloc ) - __field( gfp_t, gfp_flags ) - __field( int, node ) - ), +DEFINE_EVENT(kmem_alloc_node, kmem_cache_alloc_node, - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - __entry->bytes_req = bytes_req; - __entry->bytes_alloc = bytes_alloc; - __entry->gfp_flags = gfp_flags; - __entry->node = node; - ), + TP_PROTO(unsigned long call_site, const void *ptr, + size_t bytes_req, size_t bytes_alloc, + gfp_t gfp_flags, int node), - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d", - __entry->call_site, - __entry->ptr, - __entry->bytes_req, - __entry->bytes_alloc, - show_gfp_flags(__entry->gfp_flags), - __entry->node) + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node) ); -TRACE_EVENT(kfree, +DECLARE_EVENT_CLASS(kmem_free, TP_PROTO(unsigned long call_site, const void *ptr), @@ -207,23 +169,18 @@ TRACE_EVENT(kfree, TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) ); -TRACE_EVENT(kmem_cache_free, +DEFINE_EVENT(kmem_free, kfree, TP_PROTO(unsigned long call_site, const void *ptr), - TP_ARGS(call_site, ptr), + TP_ARGS(call_site, ptr) +); - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - ), +DEFINE_EVENT(kmem_free, kmem_cache_free, - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - ), + TP_PROTO(unsigned long call_site, const void *ptr), - TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) + TP_ARGS(call_site, ptr) ); TRACE_EVENT(mm_page_free_direct, @@ -299,7 +256,7 @@ TRACE_EVENT(mm_page_alloc, show_gfp_flags(__entry->gfp_flags)) ); -TRACE_EVENT(mm_page_alloc_zone_locked, +DECLARE_EVENT_CLASS(mm_page, TP_PROTO(struct page *page, unsigned int order, int migratetype), @@ -325,29 +282,22 @@ TRACE_EVENT(mm_page_alloc_zone_locked, __entry->order == 0) ); -TRACE_EVENT(mm_page_pcpu_drain, +DEFINE_EVENT(mm_page, mm_page_alloc_zone_locked, - TP_PROTO(struct page *page, int order, int migratetype), + TP_PROTO(struct page *page, unsigned int order, int migratetype), - TP_ARGS(page, order, migratetype), + TP_ARGS(page, order, migratetype) +); - TP_STRUCT__entry( - __field( struct page *, page ) - __field( int, order ) - __field( int, migratetype ) - ), +DEFINE_EVENT_PRINT(mm_page, mm_page_pcpu_drain, - TP_fast_assign( - __entry->page = page; - __entry->order = order; - __entry->migratetype = migratetype; - ), + TP_PROTO(struct page *page, unsigned int order, int migratetype), + + TP_ARGS(page, order, migratetype), TP_printk("page=%p pfn=%lu order=%d migratetype=%d", - __entry->page, - page_to_pfn(__entry->page), - __entry->order, - __entry->migratetype) + __entry->page, page_to_pfn(__entry->page), + __entry->order, __entry->migratetype) ); TRACE_EVENT(mm_page_alloc_extfrag, -- cgit v1.2.2 From c467307c1a812c3150b27a68c2b2d3397bb40a4f Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 26 Nov 2009 15:04:31 +0800 Subject: tracing: Convert softirq events to DEFINE_EVENT Use DECLARE_EVENT_CLASS to remove duplicate code: text data bss dec hex filename 12781 952 36 13769 35c9 kernel/softirq.o.old 11981 952 32 12965 32a5 kernel/softirq.o Two events are converted: softirq: softirq_entry, softirq_exit No change in functionality. Signed-off-by: Li Zefan Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B0E287F.4030708@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/trace/events/irq.h | 46 +++++++++++++++++++++------------------------- 1 file changed, 21 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h index dcfcd4407623..0e4cfb694fe7 100644 --- a/include/trace/events/irq.h +++ b/include/trace/events/irq.h @@ -82,18 +82,7 @@ TRACE_EVENT(irq_handler_exit, __entry->irq, __entry->ret ? "handled" : "unhandled") ); -/** - * softirq_entry - called immediately before the softirq handler - * @h: pointer to struct softirq_action - * @vec: pointer to first struct softirq_action in softirq_vec array - * - * The @h parameter, contains a pointer to the struct softirq_action - * which has a pointer to the action handler that is called. By subtracting - * the @vec pointer from the @h pointer, we can determine the softirq - * number. Also, when used in combination with the softirq_exit tracepoint - * we can determine the softirq latency. - */ -TRACE_EVENT(softirq_entry, +DECLARE_EVENT_CLASS(softirq, TP_PROTO(struct softirq_action *h, struct softirq_action *vec), @@ -111,6 +100,24 @@ TRACE_EVENT(softirq_entry, show_softirq_name(__entry->vec)) ); +/** + * softirq_entry - called immediately before the softirq handler + * @h: pointer to struct softirq_action + * @vec: pointer to first struct softirq_action in softirq_vec array + * + * The @h parameter, contains a pointer to the struct softirq_action + * which has a pointer to the action handler that is called. By subtracting + * the @vec pointer from the @h pointer, we can determine the softirq + * number. Also, when used in combination with the softirq_exit tracepoint + * we can determine the softirq latency. + */ +DEFINE_EVENT(softirq, softirq_entry, + + TP_PROTO(struct softirq_action *h, struct softirq_action *vec), + + TP_ARGS(h, vec) +); + /** * softirq_exit - called immediately after the softirq handler returns * @h: pointer to struct softirq_action @@ -122,22 +129,11 @@ TRACE_EVENT(softirq_entry, * combination with the softirq_entry tracepoint we can determine the softirq * latency. */ -TRACE_EVENT(softirq_exit, +DEFINE_EVENT(softirq, softirq_exit, TP_PROTO(struct softirq_action *h, struct softirq_action *vec), - TP_ARGS(h, vec), - - TP_STRUCT__entry( - __field( int, vec ) - ), - - TP_fast_assign( - __entry->vec = (int)(h - vec); - ), - - TP_printk("vec=%d [action=%s]", __entry->vec, - show_softirq_name(__entry->vec)) + TP_ARGS(h, vec) ); #endif /* _TRACE_IRQ_H */ -- cgit v1.2.2 From 382ece710bf88b08440b598731361e5a47582b62 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 26 Nov 2009 15:05:03 +0800 Subject: tracing: Convert some workqueue events to DEFINE_EVENT Use DECLARE_EVENT_CLASS to remove duplicate code: text data bss dec hex filename 13171 800 72 14043 36db kernel/workqueue.o.old 12243 800 68 13111 3337 kernel/workqueue.o Two events are converted: workqueue: workqueue_insertion, workqueue_execution No change in functionality. Signed-off-by: Li Zefan Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B0E289F.5010104@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/trace/events/workqueue.h | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h index e4612dbd7ba6..d6c974474e70 100644 --- a/include/trace/events/workqueue.h +++ b/include/trace/events/workqueue.h @@ -8,7 +8,7 @@ #include #include -TRACE_EVENT(workqueue_insertion, +DECLARE_EVENT_CLASS(workqueue, TP_PROTO(struct task_struct *wq_thread, struct work_struct *work), @@ -30,26 +30,18 @@ TRACE_EVENT(workqueue_insertion, __entry->thread_pid, __entry->func) ); -TRACE_EVENT(workqueue_execution, +DEFINE_EVENT(workqueue, workqueue_insertion, TP_PROTO(struct task_struct *wq_thread, struct work_struct *work), - TP_ARGS(wq_thread, work), + TP_ARGS(wq_thread, work) +); - TP_STRUCT__entry( - __array(char, thread_comm, TASK_COMM_LEN) - __field(pid_t, thread_pid) - __field(work_func_t, func) - ), +DEFINE_EVENT(workqueue, workqueue_execution, - TP_fast_assign( - memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN); - __entry->thread_pid = wq_thread->pid; - __entry->func = work->func; - ), + TP_PROTO(struct task_struct *wq_thread, struct work_struct *work), - TP_printk("thread=%s:%d func=%pf", __entry->thread_comm, - __entry->thread_pid, __entry->func) + TP_ARGS(wq_thread, work) ); /* Trace the creation of one workqueue thread on a cpu */ -- cgit v1.2.2 From 7703466b4c0a21b88d701882bef0d45bcb0a0281 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 26 Nov 2009 15:05:38 +0800 Subject: tracing: Convert some power events to DEFINE_EVENT Use DECLARE_EVENT_CLASS to remove duplicate code: text data bss dec hex filename 4312 524 12 4848 12f0 kernel/trace/power-traces.o.old 3455 524 8 3987 f93 kernel/trace/power-traces.o Two events are converted: power: power_start, power_frequency No change in functionality. Signed-off-by: Li Zefan Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Arjan van de Ven LKML-Reference: <4B0E28C2.1090906@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/trace/events/power.h | 36 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/trace/events/power.h b/include/trace/events/power.h index 9bb96e5a2848..c4efe9b8280d 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h @@ -16,7 +16,7 @@ enum { }; #endif -TRACE_EVENT(power_start, +DECLARE_EVENT_CLASS(power, TP_PROTO(unsigned int type, unsigned int state), @@ -35,42 +35,36 @@ TRACE_EVENT(power_start, TP_printk("type=%lu state=%lu", (unsigned long)__entry->type, (unsigned long)__entry->state) ); -TRACE_EVENT(power_end, - - TP_PROTO(int dummy), +DEFINE_EVENT(power, power_start, - TP_ARGS(dummy), + TP_PROTO(unsigned int type, unsigned int state), - TP_STRUCT__entry( - __field( u64, dummy ) - ), + TP_ARGS(type, state) +); - TP_fast_assign( - __entry->dummy = 0xffff; - ), +DEFINE_EVENT(power, power_frequency, - TP_printk("dummy=%lu", (unsigned long)__entry->dummy) + TP_PROTO(unsigned int type, unsigned int state), + TP_ARGS(type, state) ); +TRACE_EVENT(power_end, -TRACE_EVENT(power_frequency, - - TP_PROTO(unsigned int type, unsigned int state), + TP_PROTO(int dummy), - TP_ARGS(type, state), + TP_ARGS(dummy), TP_STRUCT__entry( - __field( u64, type ) - __field( u64, state ) + __field( u64, dummy ) ), TP_fast_assign( - __entry->type = type; - __entry->state = state; + __entry->dummy = 0xffff; ), - TP_printk("type=%lu state=%lu", (unsigned long)__entry->type, (unsigned long) __entry->state) + TP_printk("dummy=%lu", (unsigned long)__entry->dummy) + ); #endif /* _TRACE_POWER_H */ -- cgit v1.2.2 From 77ca1e0294f25fc26053ba14353e703158acef26 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 26 Nov 2009 15:06:14 +0800 Subject: tracing: Convert some block events to DEFINE_EVENT use DECLARE_EVENT_CLASS to remove duplicate code: text data bss dec hex filename 53570 3284 184 57038 dece block/blk-core.o.old 43702 3284 144 47130 b81a block/blk-core.o 12 events are converted: block_rq: block_rq_insert, block_rq_issue block_rq_with_error: block_rq_{abort, requeue, complete} block_bio: block_bio_{backmerge, frontmerge, queue} block_get_rq: block_getrq, block_sleeprq block_unplug: block_unplug_timer, block_unplug_io No change in functionality. Signed-off-by: Li Zefan Cc: Jens Axboe Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B0E28E6.7060609@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/trace/events/block.h | 202 +++++++++---------------------------------- 1 file changed, 42 insertions(+), 160 deletions(-) (limited to 'include') diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 00405b5f624a..5fb72733331e 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -8,7 +8,7 @@ #include #include -TRACE_EVENT(block_rq_abort, +DECLARE_EVENT_CLASS(block_rq_with_error, TP_PROTO(struct request_queue *q, struct request *rq), @@ -40,41 +40,28 @@ TRACE_EVENT(block_rq_abort, __entry->nr_sector, __entry->errors) ); -TRACE_EVENT(block_rq_insert, +DEFINE_EVENT(block_rq_with_error, block_rq_abort, TP_PROTO(struct request_queue *q, struct request *rq), - TP_ARGS(q, rq), + TP_ARGS(q, rq) +); - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( sector_t, sector ) - __field( unsigned int, nr_sector ) - __field( unsigned int, bytes ) - __array( char, rwbs, 6 ) - __array( char, comm, TASK_COMM_LEN ) - __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) - ), +DEFINE_EVENT(block_rq_with_error, block_rq_requeue, - TP_fast_assign( - __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; - __entry->sector = blk_pc_request(rq) ? 0 : blk_rq_pos(rq); - __entry->nr_sector = blk_pc_request(rq) ? 0 : blk_rq_sectors(rq); - __entry->bytes = blk_pc_request(rq) ? blk_rq_bytes(rq) : 0; + TP_PROTO(struct request_queue *q, struct request *rq), - blk_fill_rwbs_rq(__entry->rwbs, rq); - blk_dump_cmd(__get_str(cmd), rq); - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); - ), + TP_ARGS(q, rq) +); - TP_printk("%d,%d %s %u (%s) %llu + %u [%s]", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->rwbs, __entry->bytes, __get_str(cmd), - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->comm) +DEFINE_EVENT(block_rq_with_error, block_rq_complete, + + TP_PROTO(struct request_queue *q, struct request *rq), + + TP_ARGS(q, rq) ); -TRACE_EVENT(block_rq_issue, +DECLARE_EVENT_CLASS(block_rq, TP_PROTO(struct request_queue *q, struct request *rq), @@ -86,7 +73,7 @@ TRACE_EVENT(block_rq_issue, __field( unsigned int, nr_sector ) __field( unsigned int, bytes ) __array( char, rwbs, 6 ) - __array( char, comm, TASK_COMM_LEN ) + __array( char, comm, TASK_COMM_LEN ) __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) ), @@ -108,68 +95,18 @@ TRACE_EVENT(block_rq_issue, __entry->nr_sector, __entry->comm) ); -TRACE_EVENT(block_rq_requeue, +DEFINE_EVENT(block_rq, block_rq_insert, TP_PROTO(struct request_queue *q, struct request *rq), - TP_ARGS(q, rq), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( sector_t, sector ) - __field( unsigned int, nr_sector ) - __field( int, errors ) - __array( char, rwbs, 6 ) - __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) - ), - - TP_fast_assign( - __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; - __entry->sector = blk_pc_request(rq) ? 0 : blk_rq_pos(rq); - __entry->nr_sector = blk_pc_request(rq) ? 0 : blk_rq_sectors(rq); - __entry->errors = rq->errors; - - blk_fill_rwbs_rq(__entry->rwbs, rq); - blk_dump_cmd(__get_str(cmd), rq); - ), - - TP_printk("%d,%d %s (%s) %llu + %u [%d]", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->rwbs, __get_str(cmd), - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->errors) + TP_ARGS(q, rq) ); -TRACE_EVENT(block_rq_complete, +DEFINE_EVENT(block_rq, block_rq_issue, TP_PROTO(struct request_queue *q, struct request *rq), - TP_ARGS(q, rq), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( sector_t, sector ) - __field( unsigned int, nr_sector ) - __field( int, errors ) - __array( char, rwbs, 6 ) - __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) - ), - - TP_fast_assign( - __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; - __entry->sector = blk_pc_request(rq) ? 0 : blk_rq_pos(rq); - __entry->nr_sector = blk_pc_request(rq) ? 0 : blk_rq_sectors(rq); - __entry->errors = rq->errors; - - blk_fill_rwbs_rq(__entry->rwbs, rq); - blk_dump_cmd(__get_str(cmd), rq); - ), - - TP_printk("%d,%d %s (%s) %llu + %u [%d]", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->rwbs, __get_str(cmd), - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->errors) + TP_ARGS(q, rq) ); TRACE_EVENT(block_bio_bounce, @@ -228,7 +165,7 @@ TRACE_EVENT(block_bio_complete, __entry->nr_sector, __entry->error) ); -TRACE_EVENT(block_bio_backmerge, +DECLARE_EVENT_CLASS(block_bio, TP_PROTO(struct request_queue *q, struct bio *bio), @@ -256,63 +193,28 @@ TRACE_EVENT(block_bio_backmerge, __entry->nr_sector, __entry->comm) ); -TRACE_EVENT(block_bio_frontmerge, +DEFINE_EVENT(block_bio, block_bio_backmerge, TP_PROTO(struct request_queue *q, struct bio *bio), - TP_ARGS(q, bio), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( sector_t, sector ) - __field( unsigned, nr_sector ) - __array( char, rwbs, 6 ) - __array( char, comm, TASK_COMM_LEN ) - ), - - TP_fast_assign( - __entry->dev = bio->bi_bdev->bd_dev; - __entry->sector = bio->bi_sector; - __entry->nr_sector = bio->bi_size >> 9; - blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); - ), - - TP_printk("%d,%d %s %llu + %u [%s]", - MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->comm) + TP_ARGS(q, bio) ); -TRACE_EVENT(block_bio_queue, +DEFINE_EVENT(block_bio, block_bio_frontmerge, TP_PROTO(struct request_queue *q, struct bio *bio), - TP_ARGS(q, bio), + TP_ARGS(q, bio) +); - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( sector_t, sector ) - __field( unsigned int, nr_sector ) - __array( char, rwbs, 6 ) - __array( char, comm, TASK_COMM_LEN ) - ), +DEFINE_EVENT(block_bio, block_bio_queue, - TP_fast_assign( - __entry->dev = bio->bi_bdev->bd_dev; - __entry->sector = bio->bi_sector; - __entry->nr_sector = bio->bi_size >> 9; - blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); - ), + TP_PROTO(struct request_queue *q, struct bio *bio), - TP_printk("%d,%d %s %llu + %u [%s]", - MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->comm) + TP_ARGS(q, bio) ); -TRACE_EVENT(block_getrq, +DECLARE_EVENT_CLASS(block_get_rq, TP_PROTO(struct request_queue *q, struct bio *bio, int rw), @@ -341,33 +243,18 @@ TRACE_EVENT(block_getrq, __entry->nr_sector, __entry->comm) ); -TRACE_EVENT(block_sleeprq, +DEFINE_EVENT(block_get_rq, block_getrq, TP_PROTO(struct request_queue *q, struct bio *bio, int rw), - TP_ARGS(q, bio, rw), + TP_ARGS(q, bio, rw) +); - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( sector_t, sector ) - __field( unsigned int, nr_sector ) - __array( char, rwbs, 6 ) - __array( char, comm, TASK_COMM_LEN ) - ), +DEFINE_EVENT(block_get_rq, block_sleeprq, - TP_fast_assign( - __entry->dev = bio ? bio->bi_bdev->bd_dev : 0; - __entry->sector = bio ? bio->bi_sector : 0; - __entry->nr_sector = bio ? bio->bi_size >> 9 : 0; - blk_fill_rwbs(__entry->rwbs, - bio ? bio->bi_rw : 0, __entry->nr_sector); - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); - ), + TP_PROTO(struct request_queue *q, struct bio *bio, int rw), - TP_printk("%d,%d %s %llu + %u [%s]", - MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->comm) + TP_ARGS(q, bio, rw) ); TRACE_EVENT(block_plug, @@ -387,7 +274,7 @@ TRACE_EVENT(block_plug, TP_printk("[%s]", __entry->comm) ); -TRACE_EVENT(block_unplug_timer, +DECLARE_EVENT_CLASS(block_unplug, TP_PROTO(struct request_queue *q), @@ -406,23 +293,18 @@ TRACE_EVENT(block_unplug_timer, TP_printk("[%s] %d", __entry->comm, __entry->nr_rq) ); -TRACE_EVENT(block_unplug_io, +DEFINE_EVENT(block_unplug, block_unplug_timer, TP_PROTO(struct request_queue *q), - TP_ARGS(q), + TP_ARGS(q) +); - TP_STRUCT__entry( - __field( int, nr_rq ) - __array( char, comm, TASK_COMM_LEN ) - ), +DEFINE_EVENT(block_unplug, block_unplug_io, - TP_fast_assign( - __entry->nr_rq = q->rq.count[READ] + q->rq.count[WRITE]; - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); - ), + TP_PROTO(struct request_queue *q), - TP_printk("[%s] %d", __entry->comm, __entry->nr_rq) + TP_ARGS(q) ); TRACE_EVENT(block_split, -- cgit v1.2.2 From 071688f36e7eba3e37b2fc48e35bfdab99b80b4d Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 26 Nov 2009 15:06:55 +0800 Subject: tracing: Convert some jbd2 events to DEFINE_EVENT Use DECLARE_EVENT_CLASS to remove duplicate code: text data bss dec hex filename 34903 1693 448 37044 90b4 fs/jbd2/journal.o.old 31931 1693 416 34040 84f8 fs/jbd2/journal.o Four events are converted: jbd2_commit: jbd2_start_commit, jbd2_commit_{locking, flushing, logging} No change in functionality. Signed-off-by: Li Zefan Cc: Theodore Ts'o Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B0E290F.7030909@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/trace/events/jbd2.h | 63 ++++++++------------------------------------- 1 file changed, 11 insertions(+), 52 deletions(-) (limited to 'include') diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h index 3c60b75adb9e..96b370a050de 100644 --- a/include/trace/events/jbd2.h +++ b/include/trace/events/jbd2.h @@ -30,7 +30,7 @@ TRACE_EVENT(jbd2_checkpoint, jbd2_dev_to_name(__entry->dev), __entry->result) ); -TRACE_EVENT(jbd2_start_commit, +DECLARE_EVENT_CLASS(jbd2_commit, TP_PROTO(journal_t *journal, transaction_t *commit_transaction), @@ -53,73 +53,32 @@ TRACE_EVENT(jbd2_start_commit, __entry->sync_commit) ); -TRACE_EVENT(jbd2_commit_locking, +DEFINE_EVENT(jbd2_commit, jbd2_start_commit, TP_PROTO(journal_t *journal, transaction_t *commit_transaction), - TP_ARGS(journal, commit_transaction), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( char, sync_commit ) - __field( int, transaction ) - ), - - TP_fast_assign( - __entry->dev = journal->j_fs_dev->bd_dev; - __entry->sync_commit = commit_transaction->t_synchronous_commit; - __entry->transaction = commit_transaction->t_tid; - ), - - TP_printk("dev %s transaction %d sync %d", - jbd2_dev_to_name(__entry->dev), __entry->transaction, - __entry->sync_commit) + TP_ARGS(journal, commit_transaction) ); -TRACE_EVENT(jbd2_commit_flushing, +DEFINE_EVENT(jbd2_commit, jbd2_commit_locking, TP_PROTO(journal_t *journal, transaction_t *commit_transaction), - TP_ARGS(journal, commit_transaction), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( char, sync_commit ) - __field( int, transaction ) - ), - - TP_fast_assign( - __entry->dev = journal->j_fs_dev->bd_dev; - __entry->sync_commit = commit_transaction->t_synchronous_commit; - __entry->transaction = commit_transaction->t_tid; - ), - - TP_printk("dev %s transaction %d sync %d", - jbd2_dev_to_name(__entry->dev), __entry->transaction, - __entry->sync_commit) + TP_ARGS(journal, commit_transaction) ); -TRACE_EVENT(jbd2_commit_logging, +DEFINE_EVENT(jbd2_commit, jbd2_commit_flushing, TP_PROTO(journal_t *journal, transaction_t *commit_transaction), - TP_ARGS(journal, commit_transaction), + TP_ARGS(journal, commit_transaction) +); - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( char, sync_commit ) - __field( int, transaction ) - ), +DEFINE_EVENT(jbd2_commit, jbd2_commit_logging, - TP_fast_assign( - __entry->dev = journal->j_fs_dev->bd_dev; - __entry->sync_commit = commit_transaction->t_synchronous_commit; - __entry->transaction = commit_transaction->t_tid; - ), + TP_PROTO(journal_t *journal, transaction_t *commit_transaction), - TP_printk("dev %s transaction %d sync %d", - jbd2_dev_to_name(__entry->dev), __entry->transaction, - __entry->sync_commit) + TP_ARGS(journal, commit_transaction) ); TRACE_EVENT(jbd2_end_commit, -- cgit v1.2.2 From b5eb34c3592545c756e50d882c08417eb60740a7 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 26 Nov 2009 15:07:36 +0800 Subject: tracing: Convert some ext4 events to DEFINE_TRACE Use DECLARE_EVENT_CLASS to remove duplicate code: text data bss dec hex filename 294695 6104 340 301139 49853 fs/ext4/ext4.o.old 289983 6104 324 296411 485db fs/ext4/ext4.o 5 events are convertd: ext4__write_begin: ext4_write_begin, ext4_da_write_begin ext4__write_end: ext4_{ordered, writeback, journalled}_write_end No change in functionality. Signed-off-by: Li Zefan Cc: Theodore Ts'o Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B0E2938.2040708@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/trace/events/ext4.h | 129 ++++++++++++-------------------------------- 1 file changed, 35 insertions(+), 94 deletions(-) (limited to 'include') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index d09550bf3f95..318f76535bd4 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -90,7 +90,7 @@ TRACE_EVENT(ext4_allocate_inode, (unsigned long) __entry->dir, __entry->mode) ); -TRACE_EVENT(ext4_write_begin, +DECLARE_EVENT_CLASS(ext4__write_begin, TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, unsigned int flags), @@ -118,7 +118,23 @@ TRACE_EVENT(ext4_write_begin, __entry->pos, __entry->len, __entry->flags) ); -TRACE_EVENT(ext4_ordered_write_end, +DEFINE_EVENT(ext4__write_begin, ext4_write_begin, + + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, + unsigned int flags), + + TP_ARGS(inode, pos, len, flags) +); + +DEFINE_EVENT(ext4__write_begin, ext4_da_write_begin, + + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, + unsigned int flags), + + TP_ARGS(inode, pos, len, flags) +); + +DECLARE_EVENT_CLASS(ext4__write_end, TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, unsigned int copied), @@ -145,57 +161,36 @@ TRACE_EVENT(ext4_ordered_write_end, __entry->pos, __entry->len, __entry->copied) ); -TRACE_EVENT(ext4_writeback_write_end, +DEFINE_EVENT(ext4__write_end, ext4_ordered_write_end, + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, unsigned int copied), - TP_ARGS(inode, pos, len, copied), + TP_ARGS(inode, pos, len, copied) +); - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( ino_t, ino ) - __field( loff_t, pos ) - __field( unsigned int, len ) - __field( unsigned int, copied ) - ), +DEFINE_EVENT(ext4__write_end, ext4_writeback_write_end, - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->pos = pos; - __entry->len = len; - __entry->copied = copied; - ), + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, + unsigned int copied), - TP_printk("dev %s ino %lu pos %llu len %u copied %u", - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, - __entry->pos, __entry->len, __entry->copied) + TP_ARGS(inode, pos, len, copied) ); -TRACE_EVENT(ext4_journalled_write_end, +DEFINE_EVENT(ext4__write_end, ext4_journalled_write_end, + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, unsigned int copied), - TP_ARGS(inode, pos, len, copied), - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( ino_t, ino ) - __field( loff_t, pos ) - __field( unsigned int, len ) - __field( unsigned int, copied ) - ), + TP_ARGS(inode, pos, len, copied) +); - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->pos = pos; - __entry->len = len; - __entry->copied = copied; - ), +DEFINE_EVENT(ext4__write_end, ext4_da_write_end, - TP_printk("dev %s ino %lu pos %llu len %u copied %u", - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, - __entry->pos, __entry->len, __entry->copied) + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, + unsigned int copied), + + TP_ARGS(inode, pos, len, copied) ); TRACE_EVENT(ext4_writepage, @@ -337,60 +332,6 @@ TRACE_EVENT(ext4_da_writepages_result, (unsigned long) __entry->writeback_index) ); -TRACE_EVENT(ext4_da_write_begin, - TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, - unsigned int flags), - - TP_ARGS(inode, pos, len, flags), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( ino_t, ino ) - __field( loff_t, pos ) - __field( unsigned int, len ) - __field( unsigned int, flags ) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->pos = pos; - __entry->len = len; - __entry->flags = flags; - ), - - TP_printk("dev %s ino %lu pos %llu len %u flags %u", - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, - __entry->pos, __entry->len, __entry->flags) -); - -TRACE_EVENT(ext4_da_write_end, - TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, - unsigned int copied), - - TP_ARGS(inode, pos, len, copied), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( ino_t, ino ) - __field( loff_t, pos ) - __field( unsigned int, len ) - __field( unsigned int, copied ) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->pos = pos; - __entry->len = len; - __entry->copied = copied; - ), - - TP_printk("dev %s ino %lu pos %llu len %u copied %u", - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, - __entry->pos, __entry->len, __entry->copied) -); - TRACE_EVENT(ext4_discard_blocks, TP_PROTO(struct super_block *sb, unsigned long long blk, unsigned long long count), -- cgit v1.2.2 From 470dda7417f284b9cfc96560b2acd98df63798a2 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 26 Nov 2009 15:08:01 +0800 Subject: tracing: Restore original format of sched events The original format for sched_stat_iowait and sched_stat_sleep: $ cat events/sched/sched_stat_iowait/format ... print fmt: "comm=%s pid=%d delay=%Lu [ns]", ... $ cat events/sched/sched_stat_sleep/format ... print fmt: "comm=%s pid=%d delay=%Lu [ns]", ... But commit commit 75ec29ab848a7e92a41aaafaeb33d1afbc839be4 ("tracing: Convert some sched trace events to DEFINE_EVENT and _PRINT") broke the format: $ cat events/sched/sched_stat_iowait/format print fmt: "task: %s:%d iowait: %Lu [ns]", ... $ cat events/sched/sched_stat_sleep/format print fmt: "task: %s:%d sleep: %Lu [ns]", ... No change in functionality. Signed-off-by: Li Zefan Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B0E2951.9050800@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/trace/events/sched.h | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 5ce795021851..9d316b22388c 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -355,23 +355,17 @@ DEFINE_EVENT(sched_stat_template, sched_stat_wait, * Tracepoint for accounting sleep time (time the task is not runnable, * including iowait, see below). */ -DEFINE_EVENT_PRINT(sched_stat_template, sched_stat_sleep, - TP_PROTO(struct task_struct *tsk, u64 delay), - TP_ARGS(tsk, delay), - TP_printk("task: %s:%d sleep: %Lu [ns]", - __entry->comm, __entry->pid, - (unsigned long long)__entry->delay)); +DEFINE_EVENT(sched_stat_template, sched_stat_sleep, + TP_PROTO(struct task_struct *tsk, u64 delay), + TP_ARGS(tsk, delay)); /* * Tracepoint for accounting iowait time (time the task is not runnable * due to waiting on IO to complete). */ -DEFINE_EVENT_PRINT(sched_stat_template, sched_stat_iowait, - TP_PROTO(struct task_struct *tsk, u64 delay), - TP_ARGS(tsk, delay), - TP_printk("task: %s:%d iowait: %Lu [ns]", - __entry->comm, __entry->pid, - (unsigned long long)__entry->delay)); +DEFINE_EVENT(sched_stat_template, sched_stat_iowait, + TP_PROTO(struct task_struct *tsk, u64 delay), + TP_ARGS(tsk, delay)); /* * Tracepoint for accounting runtime (time the task is executing -- cgit v1.2.2 From d1eb650ff4130972fa21462fa49cd35a2865403b Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 24 Nov 2009 16:56:45 -0500 Subject: tracepoint: Move signal sending tracepoint to events/signal.h Move signal sending event to events/signal.h. This patch also renames sched_signal_send event to signal_generate. Changes in v4: - Fix a typo of task_struct pointer. Changes in v3: - Add docbook style comments Changes in v2: - Add siginfo argument - Add siginfo storing macro Signed-off-by: Masami Hiramatsu Reviewed-by: Jason Baron Acked-by: Roland McGrath Cc: systemtap Cc: DLE Cc: Oleg Nesterov LKML-Reference: <20091124215645.30449.60208.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar --- include/trace/events/sched.h | 25 ---------------- include/trace/events/signal.h | 66 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 25 deletions(-) create mode 100644 include/trace/events/signal.h (limited to 'include') diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 9d316b22388c..cfceb0b73e20 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -287,31 +287,6 @@ TRACE_EVENT(sched_process_fork, __entry->child_comm, __entry->child_pid) ); -/* - * Tracepoint for sending a signal: - */ -TRACE_EVENT(sched_signal_send, - - TP_PROTO(int sig, struct task_struct *p), - - TP_ARGS(sig, p), - - TP_STRUCT__entry( - __field( int, sig ) - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->sig = sig; - ), - - TP_printk("sig=%d comm=%s pid=%d", - __entry->sig, __entry->comm, __entry->pid) -); - /* * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE * adding sched_stat support to SCHED_FIFO/RR would be welcome. diff --git a/include/trace/events/signal.h b/include/trace/events/signal.h new file mode 100644 index 000000000000..ef51756a801d --- /dev/null +++ b/include/trace/events/signal.h @@ -0,0 +1,66 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM signal + +#if !defined(_TRACE_SIGNAL_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_SIGNAL_H + +#include +#include +#include + +#define TP_STORE_SIGINFO(__entry, info) \ + do { \ + if (info == SEND_SIG_NOINFO) { \ + __entry->errno = 0; \ + __entry->code = SI_USER; \ + } else if (info == SEND_SIG_PRIV) { \ + __entry->errno = 0; \ + __entry->code = SI_KERNEL; \ + } else { \ + __entry->errno = info->si_errno; \ + __entry->code = info->si_code; \ + } \ + } while (0) + +/** + * signal_generate - called when a signal is generated + * @sig: signal number + * @info: pointer to struct siginfo + * @task: pointer to struct task_struct + * + * Current process sends a 'sig' signal to 'task' process with + * 'info' siginfo. If 'info' is SEND_SIG_NOINFO or SEND_SIG_PRIV, + * 'info' is not a pointer and you can't access its field. Instead, + * SEND_SIG_NOINFO means that si_code is SI_USER, and SEND_SIG_PRIV + * means that si_code is SI_KERNEL. + */ +TRACE_EVENT(signal_generate, + + TP_PROTO(int sig, struct siginfo *info, struct task_struct *task), + + TP_ARGS(sig, info, task), + + TP_STRUCT__entry( + __field( int, sig ) + __field( int, errno ) + __field( int, code ) + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + ), + + TP_fast_assign( + __entry->sig = sig; + TP_STORE_SIGINFO(__entry, info); + memcpy(__entry->comm, task->comm, TASK_COMM_LEN); + __entry->pid = task->pid; + ), + + TP_printk("sig=%d errno=%d code=%d comm=%s pid=%d", + __entry->sig, __entry->errno, __entry->code, + __entry->comm, __entry->pid) +); + +#endif /* _TRACE_SIGNAL_H */ + +/* This part must be outside protection */ +#include -- cgit v1.2.2 From f9d4257e01d266e67420cc99d456b6d4c8464f54 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 24 Nov 2009 16:56:51 -0500 Subject: tracepoint: Add signal deliver event Add a tracepoint where a process gets a signal. This tracepoint shows signal-number, sa-handler and sa-flag. Changes in v3: - Add docbook style comments Changes in v2: - Add siginfo argument - Fix comment Signed-off-by: Masami Hiramatsu Reviewed-by: Jason Baron Acked-by: Roland McGrath Cc: systemtap Cc: DLE Cc: Oleg Nesterov LKML-Reference: <20091124215651.30449.20926.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar --- include/trace/events/signal.h | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'include') diff --git a/include/trace/events/signal.h b/include/trace/events/signal.h index ef51756a801d..a6d71de0dc0d 100644 --- a/include/trace/events/signal.h +++ b/include/trace/events/signal.h @@ -60,6 +60,45 @@ TRACE_EVENT(signal_generate, __entry->comm, __entry->pid) ); +/** + * signal_deliver - called when a signal is delivered + * @sig: signal number + * @info: pointer to struct siginfo + * @ka: pointer to struct k_sigaction + * + * A 'sig' signal is delivered to current process with 'info' siginfo, + * and it will be handled by 'ka'. ka->sa.sa_handler can be SIG_IGN or + * SIG_DFL. + * Note that some signals reported by signal_generate tracepoint can be + * lost, ignored or modified (by debugger) before hitting this tracepoint. + * This means, this can show which signals are actually delivered, but + * matching generated signals and delivered signals may not be correct. + */ +TRACE_EVENT(signal_deliver, + + TP_PROTO(int sig, struct siginfo *info, struct k_sigaction *ka), + + TP_ARGS(sig, info, ka), + + TP_STRUCT__entry( + __field( int, sig ) + __field( int, errno ) + __field( int, code ) + __field( unsigned long, sa_handler ) + __field( unsigned long, sa_flags ) + ), + + TP_fast_assign( + __entry->sig = sig; + TP_STORE_SIGINFO(__entry, info); + __entry->sa_handler = (unsigned long)ka->sa.sa_handler; + __entry->sa_flags = ka->sa.sa_flags; + ), + + TP_printk("sig=%d errno=%d code=%d sa_handler=%lx sa_flags=%lx", + __entry->sig, __entry->errno, __entry->code, + __entry->sa_handler, __entry->sa_flags) +); #endif /* _TRACE_SIGNAL_H */ /* This part must be outside protection */ -- cgit v1.2.2 From ba005e1f417295d28cd1563ab82bc33af07fb16a Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 24 Nov 2009 16:56:58 -0500 Subject: tracepoint: Add signal loss events Add signal_overflow_fail and signal_lose_info tracepoints for signal-lost events. Changes in v3: - Add docbook style comments Changes in v2: - Use siginfo string macro Suggested-by: Roland McGrath Reviewed-by: Jason Baron Signed-off-by: Masami Hiramatsu Acked-by: Roland McGrath Cc: systemtap Cc: DLE Cc: Oleg Nesterov LKML-Reference: <20091124215658.30449.9934.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar --- include/trace/events/signal.h | 68 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) (limited to 'include') diff --git a/include/trace/events/signal.h b/include/trace/events/signal.h index a6d71de0dc0d..a510b75ac304 100644 --- a/include/trace/events/signal.h +++ b/include/trace/events/signal.h @@ -99,6 +99,74 @@ TRACE_EVENT(signal_deliver, __entry->sig, __entry->errno, __entry->code, __entry->sa_handler, __entry->sa_flags) ); + +/** + * signal_overflow_fail - called when signal queue is overflow + * @sig: signal number + * @group: signal to process group or not (bool) + * @info: pointer to struct siginfo + * + * Kernel fails to generate 'sig' signal with 'info' siginfo, because + * siginfo queue is overflow, and the signal is dropped. + * 'group' is not 0 if the signal will be sent to a process group. + * 'sig' is always one of RT signals. + */ +TRACE_EVENT(signal_overflow_fail, + + TP_PROTO(int sig, int group, struct siginfo *info), + + TP_ARGS(sig, group, info), + + TP_STRUCT__entry( + __field( int, sig ) + __field( int, group ) + __field( int, errno ) + __field( int, code ) + ), + + TP_fast_assign( + __entry->sig = sig; + __entry->group = group; + TP_STORE_SIGINFO(__entry, info); + ), + + TP_printk("sig=%d group=%d errno=%d code=%d", + __entry->sig, __entry->group, __entry->errno, __entry->code) +); + +/** + * signal_lose_info - called when siginfo is lost + * @sig: signal number + * @group: signal to process group or not (bool) + * @info: pointer to struct siginfo + * + * Kernel generates 'sig' signal but loses 'info' siginfo, because siginfo + * queue is overflow. + * 'group' is not 0 if the signal will be sent to a process group. + * 'sig' is always one of non-RT signals. + */ +TRACE_EVENT(signal_lose_info, + + TP_PROTO(int sig, int group, struct siginfo *info), + + TP_ARGS(sig, group, info), + + TP_STRUCT__entry( + __field( int, sig ) + __field( int, group ) + __field( int, errno ) + __field( int, code ) + ), + + TP_fast_assign( + __entry->sig = sig; + __entry->group = group; + TP_STORE_SIGINFO(__entry, info); + ), + + TP_printk("sig=%d group=%d errno=%d code=%d", + __entry->sig, __entry->group, __entry->errno, __entry->code) +); #endif /* _TRACE_SIGNAL_H */ /* This part must be outside protection */ -- cgit v1.2.2 From d180c5bccec02612256fd8076ff3c1fac3429553 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Thu, 26 Nov 2009 14:48:30 +0900 Subject: sched: Introduce task_times() to replace task_{u,s}time() pair Functions task_{u,s}time() are called in pair in almost all cases. However task_stime() is implemented to call task_utime() from its inside, so such paired calls run task_utime() twice. It means we do heavy divisions (div_u64 + do_div) twice to get utime and stime which can be obtained at same time by one set of divisions. This patch introduces a function task_times(*tsk, *utime, *stime) to retrieve utime and stime at once in better, optimized way. Signed-off-by: Hidetoshi Seto Acked-by: Peter Zijlstra Cc: Stanislaw Gruszka Cc: Spencer Candland Cc: Oleg Nesterov Cc: Balbir Singh Cc: Americo Wang LKML-Reference: <4B0E16AE.906@jp.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 78ba664474f3..fe6ae1516640 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1723,6 +1723,7 @@ static inline void put_task_struct(struct task_struct *t) extern cputime_t task_utime(struct task_struct *p); extern cputime_t task_stime(struct task_struct *p); extern cputime_t task_gtime(struct task_struct *p); +extern void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st); /* * Per process flags -- cgit v1.2.2 From d5b7c78e975302a1bab28263266c39ecb71acad4 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Thu, 26 Nov 2009 14:49:05 +0900 Subject: sched: Remove task_{u,s,g}time() Now all task_{u,s}time() pairs are replaced by task_times(). And task_gtime() is too simple to be an inline function. Cleanup them all. Signed-off-by: Hidetoshi Seto Acked-by: Peter Zijlstra Cc: Stanislaw Gruszka Cc: Spencer Candland Cc: Oleg Nesterov Cc: Balbir Singh Cc: Americo Wang LKML-Reference: <4B0E16D1.70902@jp.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index fe6ae1516640..0395b0f4df3a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1720,9 +1720,6 @@ static inline void put_task_struct(struct task_struct *t) __put_task_struct(t); } -extern cputime_t task_utime(struct task_struct *p); -extern cputime_t task_stime(struct task_struct *p); -extern cputime_t task_gtime(struct task_struct *p); extern void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st); /* -- cgit v1.2.2 From b7b20df91d43d5e59578b8fc16e895c0c8cbd9b5 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Thu, 26 Nov 2009 14:49:27 +0900 Subject: sched, time: Define nsecs_to_jiffies() Use of msecs_to_jiffies() for nsecs_to_cputime() have some problems: - The type of msecs_to_jiffies()'s argument is unsigned int, so it cannot convert msecs greater than UINT_MAX = about 49.7 days. - msecs_to_jiffies() returns MAX_JIFFY_OFFSET if MSB of argument is set, assuming that input was negative value. So it cannot convert msecs greater than INT_MAX = about 24.8 days too. This patch defines a new function nsecs_to_jiffies() that can deal greater values, and that can deal all incoming values as unsigned. Signed-off-by: Hidetoshi Seto Acked-by: Peter Zijlstra Cc: Stanislaw Gruszka Cc: Spencer Candland Cc: Oleg Nesterov Cc: Balbir Singh Cc: Amrico Wang Cc: Thomas Gleixner Cc: John Stultz LKML-Reference: <4B0E16E7.5070307@jp.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/jiffies.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index 1a9cf78bfce5..6811f4bfc6e7 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -307,6 +307,7 @@ extern clock_t jiffies_to_clock_t(long x); extern unsigned long clock_t_to_jiffies(unsigned long x); extern u64 jiffies_64_to_clock_t(u64 x); extern u64 nsec_to_clock_t(u64 x); +extern unsigned long nsecs_to_jiffies(u64 n); #define TIMESTAMP_SIZE 30 -- cgit v1.2.2 From 5fa10b28e57f94a90535cfeafe89dcee9f47d540 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 27 Nov 2009 04:55:53 +0100 Subject: hw-breakpoints: Use struct perf_event_attr to define user breakpoints In-kernel user breakpoints are created using functions in which we pass breakpoint parameters as individual variables: address, length and type. Although it fits well for x86, this just does not scale across archictectures that may support this api later as these may have more or different needs. Pass in a perf_event_attr structure instead because it is meant to evolve as much as possible into a generic hardware breakpoint parameter structure. Reported-by: K.Prasad Signed-off-by: Frederic Weisbecker LKML-Reference: <1259294154-5197-1-git-send-regression-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- include/linux/hw_breakpoint.h | 36 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index c9f7f7c7b0e0..5da472e434b7 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -20,6 +20,14 @@ enum { #ifdef CONFIG_HAVE_HW_BREAKPOINT +/* As it's for in-kernel or ptrace use, we want it to be pinned */ +#define DEFINE_BREAKPOINT_ATTR(name) \ +struct perf_event_attr name = { \ + .type = PERF_TYPE_BREAKPOINT, \ + .size = sizeof(name), \ + .pinned = 1, \ +}; + static inline unsigned long hw_breakpoint_addr(struct perf_event *bp) { return bp->attr.bp_addr; @@ -36,22 +44,16 @@ static inline int hw_breakpoint_len(struct perf_event *bp) } extern struct perf_event * -register_user_hw_breakpoint(unsigned long addr, - int len, - int type, +register_user_hw_breakpoint(struct perf_event_attr *attr, perf_callback_t triggered, - struct task_struct *tsk, - bool active); + struct task_struct *tsk); /* FIXME: only change from the attr, and don't unregister */ extern struct perf_event * modify_user_hw_breakpoint(struct perf_event *bp, - unsigned long addr, - int len, - int type, + struct perf_event_attr *attr, perf_callback_t triggered, - struct task_struct *tsk, - bool active); + struct task_struct *tsk); /* * Kernel breakpoints are not associated with any particular thread. @@ -89,20 +91,14 @@ static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp) #else /* !CONFIG_HAVE_HW_BREAKPOINT */ static inline struct perf_event * -register_user_hw_breakpoint(unsigned long addr, - int len, - int type, +register_user_hw_breakpoint(struct perf_event_attr *attr, perf_callback_t triggered, - struct task_struct *tsk, - bool active) { return NULL; } + struct task_struct *tsk) { return NULL; } static inline struct perf_event * modify_user_hw_breakpoint(struct perf_event *bp, - unsigned long addr, - int len, - int type, + struct perf_event_attr *attr, perf_callback_t triggered, - struct task_struct *tsk, - bool active) { return NULL; } + struct task_struct *tsk) { return NULL; } static inline struct perf_event * register_wide_hw_breakpoint_cpu(unsigned long addr, int len, -- cgit v1.2.2 From dd1853c3f493f6d22d9e5390b192a07b73d2ac0a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 27 Nov 2009 04:55:54 +0100 Subject: hw-breakpoints: Use struct perf_event_attr to define kernel breakpoints Kernel breakpoints are created using functions in which we pass breakpoint parameters as individual variables: address, length and type. Although it fits well for x86, this just does not scale across architectures that may support this api later as these may have more or different needs. Pass in a perf_event_attr structure instead because it is meant to evolve as much as possible into a generic hardware breakpoint parameter structure. Reported-by: K.Prasad Signed-off-by: Frederic Weisbecker LKML-Reference: <1259294154-5197-2-git-send-regression-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- include/linux/hw_breakpoint.h | 35 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index 5da472e434b7..a03daed08c59 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -28,6 +28,13 @@ struct perf_event_attr name = { \ .pinned = 1, \ }; +static inline void hw_breakpoint_init(struct perf_event_attr *attr) +{ + attr->type = PERF_TYPE_BREAKPOINT; + attr->size = sizeof(*attr); + attr->pinned = 1; +} + static inline unsigned long hw_breakpoint_addr(struct perf_event *bp) { return bp->attr.bp_addr; @@ -59,19 +66,13 @@ modify_user_hw_breakpoint(struct perf_event *bp, * Kernel breakpoints are not associated with any particular thread. */ extern struct perf_event * -register_wide_hw_breakpoint_cpu(unsigned long addr, - int len, - int type, +register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr, perf_callback_t triggered, - int cpu, - bool active); + int cpu); extern struct perf_event ** -register_wide_hw_breakpoint(unsigned long addr, - int len, - int type, - perf_callback_t triggered, - bool active); +register_wide_hw_breakpoint(struct perf_event_attr *attr, + perf_callback_t triggered); extern int register_perf_hw_breakpoint(struct perf_event *bp); extern int __register_perf_hw_breakpoint(struct perf_event *bp); @@ -100,18 +101,12 @@ modify_user_hw_breakpoint(struct perf_event *bp, perf_callback_t triggered, struct task_struct *tsk) { return NULL; } static inline struct perf_event * -register_wide_hw_breakpoint_cpu(unsigned long addr, - int len, - int type, +register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr, perf_callback_t triggered, - int cpu, - bool active) { return NULL; } + int cpu) { return NULL; } static inline struct perf_event ** -register_wide_hw_breakpoint(unsigned long addr, - int len, - int type, - perf_callback_t triggered, - bool active) { return NULL; } +register_wide_hw_breakpoint(struct perf_event_attr *attr, + perf_callback_t triggered) { return NULL; } static inline int register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; } static inline int -- cgit v1.2.2 From c8602edf3f9471466755329b78d309f2a01dd449 Mon Sep 17 00:00:00 2001 From: Thomas Kunze Date: Tue, 10 Feb 2009 14:54:57 +0100 Subject: move drivers/mfd/*.h to include/linux/mfd So drivers like collie_battery driver can use those files easier. --- include/linux/mfd/mcp.h | 68 ++++++++++++ include/linux/mfd/ucb1x00.h | 255 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 323 insertions(+) create mode 100644 include/linux/mfd/mcp.h create mode 100644 include/linux/mfd/ucb1x00.h (limited to 'include') diff --git a/include/linux/mfd/mcp.h b/include/linux/mfd/mcp.h new file mode 100644 index 000000000000..be95e09fd746 --- /dev/null +++ b/include/linux/mfd/mcp.h @@ -0,0 +1,68 @@ +/* + * linux/drivers/mfd/mcp.h + * + * Copyright (C) 2001 Russell King, All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License. + */ +#ifndef MCP_H +#define MCP_H + +#include + +struct mcp_ops; + +struct mcp { + struct module *owner; + struct mcp_ops *ops; + spinlock_t lock; + int use_count; + unsigned int sclk_rate; + unsigned int rw_timeout; + dma_device_t dma_audio_rd; + dma_device_t dma_audio_wr; + dma_device_t dma_telco_rd; + dma_device_t dma_telco_wr; + struct device attached_device; +}; + +struct mcp_ops { + void (*set_telecom_divisor)(struct mcp *, unsigned int); + void (*set_audio_divisor)(struct mcp *, unsigned int); + void (*reg_write)(struct mcp *, unsigned int, unsigned int); + unsigned int (*reg_read)(struct mcp *, unsigned int); + void (*enable)(struct mcp *); + void (*disable)(struct mcp *); +}; + +void mcp_set_telecom_divisor(struct mcp *, unsigned int); +void mcp_set_audio_divisor(struct mcp *, unsigned int); +void mcp_reg_write(struct mcp *, unsigned int, unsigned int); +unsigned int mcp_reg_read(struct mcp *, unsigned int); +void mcp_enable(struct mcp *); +void mcp_disable(struct mcp *); +#define mcp_get_sclk_rate(mcp) ((mcp)->sclk_rate) + +struct mcp *mcp_host_alloc(struct device *, size_t); +int mcp_host_register(struct mcp *); +void mcp_host_unregister(struct mcp *); + +struct mcp_driver { + struct device_driver drv; + int (*probe)(struct mcp *); + void (*remove)(struct mcp *); + int (*suspend)(struct mcp *, pm_message_t); + int (*resume)(struct mcp *); +}; + +int mcp_driver_register(struct mcp_driver *); +void mcp_driver_unregister(struct mcp_driver *); + +#define mcp_get_drvdata(mcp) dev_get_drvdata(&(mcp)->attached_device) +#define mcp_set_drvdata(mcp,d) dev_set_drvdata(&(mcp)->attached_device, d) + +#define mcp_priv(mcp) ((void *)((mcp)+1)) + +#endif diff --git a/include/linux/mfd/ucb1x00.h b/include/linux/mfd/ucb1x00.h new file mode 100644 index 000000000000..eac346336382 --- /dev/null +++ b/include/linux/mfd/ucb1x00.h @@ -0,0 +1,255 @@ +/* + * linux/include/mfd/ucb1x00.h + * + * Copyright (C) 2001 Russell King, All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License. + */ +#ifndef UCB1200_H +#define UCB1200_H + +#include +#define UCB_IO_DATA 0x00 +#define UCB_IO_DIR 0x01 + +#define UCB_IO_0 (1 << 0) +#define UCB_IO_1 (1 << 1) +#define UCB_IO_2 (1 << 2) +#define UCB_IO_3 (1 << 3) +#define UCB_IO_4 (1 << 4) +#define UCB_IO_5 (1 << 5) +#define UCB_IO_6 (1 << 6) +#define UCB_IO_7 (1 << 7) +#define UCB_IO_8 (1 << 8) +#define UCB_IO_9 (1 << 9) + +#define UCB_IE_RIS 0x02 +#define UCB_IE_FAL 0x03 +#define UCB_IE_STATUS 0x04 +#define UCB_IE_CLEAR 0x04 +#define UCB_IE_ADC (1 << 11) +#define UCB_IE_TSPX (1 << 12) +#define UCB_IE_TSMX (1 << 13) +#define UCB_IE_TCLIP (1 << 14) +#define UCB_IE_ACLIP (1 << 15) + +#define UCB_IRQ_TSPX 12 + +#define UCB_TC_A 0x05 +#define UCB_TC_A_LOOP (1 << 7) /* UCB1200 */ +#define UCB_TC_A_AMPL (1 << 7) /* UCB1300 */ + +#define UCB_TC_B 0x06 +#define UCB_TC_B_VOICE_ENA (1 << 3) +#define UCB_TC_B_CLIP (1 << 4) +#define UCB_TC_B_ATT (1 << 6) +#define UCB_TC_B_SIDE_ENA (1 << 11) +#define UCB_TC_B_MUTE (1 << 13) +#define UCB_TC_B_IN_ENA (1 << 14) +#define UCB_TC_B_OUT_ENA (1 << 15) + +#define UCB_AC_A 0x07 +#define UCB_AC_B 0x08 +#define UCB_AC_B_LOOP (1 << 8) +#define UCB_AC_B_MUTE (1 << 13) +#define UCB_AC_B_IN_ENA (1 << 14) +#define UCB_AC_B_OUT_ENA (1 << 15) + +#define UCB_TS_CR 0x09 +#define UCB_TS_CR_TSMX_POW (1 << 0) +#define UCB_TS_CR_TSPX_POW (1 << 1) +#define UCB_TS_CR_TSMY_POW (1 << 2) +#define UCB_TS_CR_TSPY_POW (1 << 3) +#define UCB_TS_CR_TSMX_GND (1 << 4) +#define UCB_TS_CR_TSPX_GND (1 << 5) +#define UCB_TS_CR_TSMY_GND (1 << 6) +#define UCB_TS_CR_TSPY_GND (1 << 7) +#define UCB_TS_CR_MODE_INT (0 << 8) +#define UCB_TS_CR_MODE_PRES (1 << 8) +#define UCB_TS_CR_MODE_POS (2 << 8) +#define UCB_TS_CR_BIAS_ENA (1 << 11) +#define UCB_TS_CR_TSPX_LOW (1 << 12) +#define UCB_TS_CR_TSMX_LOW (1 << 13) + +#define UCB_ADC_CR 0x0a +#define UCB_ADC_SYNC_ENA (1 << 0) +#define UCB_ADC_VREFBYP_CON (1 << 1) +#define UCB_ADC_INP_TSPX (0 << 2) +#define UCB_ADC_INP_TSMX (1 << 2) +#define UCB_ADC_INP_TSPY (2 << 2) +#define UCB_ADC_INP_TSMY (3 << 2) +#define UCB_ADC_INP_AD0 (4 << 2) +#define UCB_ADC_INP_AD1 (5 << 2) +#define UCB_ADC_INP_AD2 (6 << 2) +#define UCB_ADC_INP_AD3 (7 << 2) +#define UCB_ADC_EXT_REF (1 << 5) +#define UCB_ADC_START (1 << 7) +#define UCB_ADC_ENA (1 << 15) + +#define UCB_ADC_DATA 0x0b +#define UCB_ADC_DAT_VAL (1 << 15) +#define UCB_ADC_DAT(x) (((x) & 0x7fe0) >> 5) + +#define UCB_ID 0x0c +#define UCB_ID_1200 0x1004 +#define UCB_ID_1300 0x1005 +#define UCB_ID_TC35143 0x9712 + +#define UCB_MODE 0x0d +#define UCB_MODE_DYN_VFLAG_ENA (1 << 12) +#define UCB_MODE_AUD_OFF_CAN (1 << 13) + + +struct ucb1x00_irq { + void *devid; + void (*fn)(int, void *); +}; + +struct ucb1x00 { + spinlock_t lock; + struct mcp *mcp; + unsigned int irq; + struct semaphore adc_sem; + spinlock_t io_lock; + u16 id; + u16 io_dir; + u16 io_out; + u16 adc_cr; + u16 irq_fal_enbl; + u16 irq_ris_enbl; + struct ucb1x00_irq irq_handler[16]; + struct device dev; + struct list_head node; + struct list_head devs; +}; + +struct ucb1x00_driver; + +struct ucb1x00_dev { + struct list_head dev_node; + struct list_head drv_node; + struct ucb1x00 *ucb; + struct ucb1x00_driver *drv; + void *priv; +}; + +struct ucb1x00_driver { + struct list_head node; + struct list_head devs; + int (*add)(struct ucb1x00_dev *dev); + void (*remove)(struct ucb1x00_dev *dev); + int (*suspend)(struct ucb1x00_dev *dev, pm_message_t state); + int (*resume)(struct ucb1x00_dev *dev); +}; + +#define classdev_to_ucb1x00(cd) container_of(cd, struct ucb1x00, dev) + +int ucb1x00_register_driver(struct ucb1x00_driver *); +void ucb1x00_unregister_driver(struct ucb1x00_driver *); + +/** + * ucb1x00_clkrate - return the UCB1x00 SIB clock rate + * @ucb: UCB1x00 structure describing chip + * + * Return the SIB clock rate in Hz. + */ +static inline unsigned int ucb1x00_clkrate(struct ucb1x00 *ucb) +{ + return mcp_get_sclk_rate(ucb->mcp); +} + +/** + * ucb1x00_enable - enable the UCB1x00 SIB clock + * @ucb: UCB1x00 structure describing chip + * + * Enable the SIB clock. This can be called multiple times. + */ +static inline void ucb1x00_enable(struct ucb1x00 *ucb) +{ + mcp_enable(ucb->mcp); +} + +/** + * ucb1x00_disable - disable the UCB1x00 SIB clock + * @ucb: UCB1x00 structure describing chip + * + * Disable the SIB clock. The SIB clock will only be disabled + * when the number of ucb1x00_enable calls match the number of + * ucb1x00_disable calls. + */ +static inline void ucb1x00_disable(struct ucb1x00 *ucb) +{ + mcp_disable(ucb->mcp); +} + +/** + * ucb1x00_reg_write - write a UCB1x00 register + * @ucb: UCB1x00 structure describing chip + * @reg: UCB1x00 4-bit register index to write + * @val: UCB1x00 16-bit value to write + * + * Write the UCB1x00 register @reg with value @val. The SIB + * clock must be running for this function to return. + */ +static inline void ucb1x00_reg_write(struct ucb1x00 *ucb, unsigned int reg, unsigned int val) +{ + mcp_reg_write(ucb->mcp, reg, val); +} + +/** + * ucb1x00_reg_read - read a UCB1x00 register + * @ucb: UCB1x00 structure describing chip + * @reg: UCB1x00 4-bit register index to write + * + * Read the UCB1x00 register @reg and return its value. The SIB + * clock must be running for this function to return. + */ +static inline unsigned int ucb1x00_reg_read(struct ucb1x00 *ucb, unsigned int reg) +{ + return mcp_reg_read(ucb->mcp, reg); +} +/** + * ucb1x00_set_audio_divisor - + * @ucb: UCB1x00 structure describing chip + * @div: SIB clock divisor + */ +static inline void ucb1x00_set_audio_divisor(struct ucb1x00 *ucb, unsigned int div) +{ + mcp_set_audio_divisor(ucb->mcp, div); +} + +/** + * ucb1x00_set_telecom_divisor - + * @ucb: UCB1x00 structure describing chip + * @div: SIB clock divisor + */ +static inline void ucb1x00_set_telecom_divisor(struct ucb1x00 *ucb, unsigned int div) +{ + mcp_set_telecom_divisor(ucb->mcp, div); +} + +void ucb1x00_io_set_dir(struct ucb1x00 *ucb, unsigned int, unsigned int); +void ucb1x00_io_write(struct ucb1x00 *ucb, unsigned int, unsigned int); +unsigned int ucb1x00_io_read(struct ucb1x00 *ucb); + +#define UCB_NOSYNC (0) +#define UCB_SYNC (1) + +unsigned int ucb1x00_adc_read(struct ucb1x00 *ucb, int adc_channel, int sync); +void ucb1x00_adc_enable(struct ucb1x00 *ucb); +void ucb1x00_adc_disable(struct ucb1x00 *ucb); + +/* + * Which edges of the IRQ do you want to control today? + */ +#define UCB_RISING (1 << 0) +#define UCB_FALLING (1 << 1) + +int ucb1x00_hook_irq(struct ucb1x00 *ucb, unsigned int idx, void (*fn)(int, void *), void *devid); +void ucb1x00_enable_irq(struct ucb1x00 *ucb, unsigned int idx, int edges); +void ucb1x00_disable_irq(struct ucb1x00 *ucb, unsigned int idx, int edges); +int ucb1x00_free_irq(struct ucb1x00 *ucb, unsigned int idx, void *devid); + +#endif -- cgit v1.2.2 From 9ca3dc805cd0d89c44f88b9a399061946781323a Mon Sep 17 00:00:00 2001 From: Thomas Kunze Date: Tue, 10 Feb 2009 14:50:56 +0100 Subject: add gpiolib support to ucb1x00 The old access methods to the gpios will be removed when all users has been converted. (mainly ucb1x00-ts) --- include/linux/mfd/mcp.h | 1 + include/linux/mfd/ucb1x00.h | 3 +++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/mcp.h b/include/linux/mfd/mcp.h index be95e09fd746..ee496708e38b 100644 --- a/include/linux/mfd/mcp.h +++ b/include/linux/mfd/mcp.h @@ -26,6 +26,7 @@ struct mcp { dma_device_t dma_telco_rd; dma_device_t dma_telco_wr; struct device attached_device; + int gpio_base; }; struct mcp_ops { diff --git a/include/linux/mfd/ucb1x00.h b/include/linux/mfd/ucb1x00.h index eac346336382..aa9c3789bed4 100644 --- a/include/linux/mfd/ucb1x00.h +++ b/include/linux/mfd/ucb1x00.h @@ -11,6 +11,8 @@ #define UCB1200_H #include +#include + #define UCB_IO_DATA 0x00 #define UCB_IO_DIR 0x01 @@ -123,6 +125,7 @@ struct ucb1x00 { struct device dev; struct list_head node; struct list_head devs; + struct gpio_chip gpio; }; struct ucb1x00_driver; -- cgit v1.2.2 From f5560da549ea2e32dd41e36548c0e7dee3d4aabb Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 13 Dec 2006 19:46:38 +0900 Subject: pcmcia: Pass struct pcmcia_device to pcmcia_release_window() No logic changes, just pass struct pcmcia_device to pcmcia_release_window(). [linux@dominikbrodowski.net: update to 2.6.31] CC: netdev@vger.kernel.org CC: Jiri Kosina Signed-off-by: Magnus Damm Signed-off-by: Dominik Brodowski --- include/pcmcia/ds.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h index d82392de4e92..40b098d7aa41 100644 --- a/include/pcmcia/ds.h +++ b/include/pcmcia/ds.h @@ -201,7 +201,7 @@ int pcmcia_request_configuration(struct pcmcia_device *p_dev, int pcmcia_request_window(struct pcmcia_device **p_dev, win_req_t *req, window_handle_t *wh); -int pcmcia_release_window(window_handle_t win); +int pcmcia_release_window(struct pcmcia_device *p_dev, window_handle_t win); int pcmcia_get_mem_page(window_handle_t win, memreq_t *req); int pcmcia_map_mem_page(window_handle_t win, memreq_t *req); -- cgit v1.2.2 From 868575d1e87ff2091800aea816972ddb46de60d5 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 13 Dec 2006 19:46:43 +0900 Subject: pcmcia: Pass struct pcmcia_device to pcmcia_map_mem_page() No logic changes, just pass struct pcmcia_device to pcmcia_map_mem_page() [linux@dominikbrodowski.net: update to 2.6.31] CC: netdev@vger.kernel.org CC: linux-wireless@vger.kernel.org CC: linux-scsi@vger.kernel.org CC: Jiri Kosina Acked-by: Karsten Keil (for ISDN) Signed-off-by: Magnus Damm Signed-off-by: Dominik Brodowski --- include/pcmcia/ds.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h index 40b098d7aa41..f240bfa454f8 100644 --- a/include/pcmcia/ds.h +++ b/include/pcmcia/ds.h @@ -204,7 +204,8 @@ int pcmcia_request_window(struct pcmcia_device **p_dev, win_req_t *req, int pcmcia_release_window(struct pcmcia_device *p_dev, window_handle_t win); int pcmcia_get_mem_page(window_handle_t win, memreq_t *req); -int pcmcia_map_mem_page(window_handle_t win, memreq_t *req); +int pcmcia_map_mem_page(struct pcmcia_device *p_dev, window_handle_t win, + memreq_t *req); int pcmcia_modify_configuration(struct pcmcia_device *p_dev, modconf_t *mod); void pcmcia_disable_device(struct pcmcia_device *p_dev); -- cgit v1.2.2 From 16456ebabfec3f8f509fc18b45f256d066a1b360 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 13 Dec 2006 19:46:48 +0900 Subject: pcmcia: Pass struct pcmcia_socket to pcmcia_get_mem_page() No logic changes, just pass struct pcmcia_socket to pcmcia_get_mem_page() [linux@dominikbrodowski.net: update to 2.6.31] Signed-off-by: Magnus Damm Signed-off-by: Dominik Brodowski --- include/pcmcia/ds.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h index f240bfa454f8..cbf5f05745f2 100644 --- a/include/pcmcia/ds.h +++ b/include/pcmcia/ds.h @@ -202,8 +202,6 @@ int pcmcia_request_configuration(struct pcmcia_device *p_dev, int pcmcia_request_window(struct pcmcia_device **p_dev, win_req_t *req, window_handle_t *wh); int pcmcia_release_window(struct pcmcia_device *p_dev, window_handle_t win); - -int pcmcia_get_mem_page(window_handle_t win, memreq_t *req); int pcmcia_map_mem_page(struct pcmcia_device *p_dev, window_handle_t win, memreq_t *req); -- cgit v1.2.2 From 0bdf9b3dd3cfa5cbd5d55172c19f5dd166208e17 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 13 Dec 2006 19:46:53 +0900 Subject: pcmcia: Change window_handle_t logic to unsigned long Logic changes based on top of the other patches: This set of patches changed window_handle_t from being a pointer to an unsigned long. The unsigned long is now a simple index into socket->win[]. Going from a pointer to unsigned long should leave the user space interface unchanged unless I'm mistaken. This change results in code that is less error prone and a user space interface which is much cleaner and safer. A nice side effect is that we are also are able to remove all members except one from window_t. [ linux@dominikbrodowski.net: Update to 2.6.31. Also, a plain "index" to socket->win[] does not work, as several codepaths rely on "window_handle_t" being non-zero if used. Therefore, set the window_handle_t to the socket->win[] index + 1. ] CC: netdev@vger.kernel.org Signed-off-by: Magnus Damm Signed-off-by: Dominik Brodowski --- include/pcmcia/cs_types.h | 3 +-- include/pcmcia/ss.h | 5 ----- 2 files changed, 1 insertion(+), 7 deletions(-) (limited to 'include') diff --git a/include/pcmcia/cs_types.h b/include/pcmcia/cs_types.h index 315965a37930..f5e3b8386c8f 100644 --- a/include/pcmcia/cs_types.h +++ b/include/pcmcia/cs_types.h @@ -26,8 +26,7 @@ typedef u_int event_t; typedef u_char cisdata_t; typedef u_short page_t; -struct window_t; -typedef struct window_t *window_handle_t; +typedef unsigned long window_handle_t; struct region_t; typedef struct region_t *memory_handle_t; diff --git a/include/pcmcia/ss.h b/include/pcmcia/ss.h index 753da9b087d3..6301c3f4f19e 100644 --- a/include/pcmcia/ss.h +++ b/include/pcmcia/ss.h @@ -107,12 +107,7 @@ typedef struct io_window_t { struct resource *res; } io_window_t; -#define WINDOW_MAGIC 0xB35C typedef struct window_t { - u_short magic; - u_short index; - struct pcmcia_device *handle; - struct pcmcia_socket *sock; pccard_mem_map ctl; } window_t; -- cgit v1.2.2 From 82f88e36004162f49a9340ffbbaebe89016e4835 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 3 Nov 2009 01:16:12 +0100 Subject: pcmcia: remove unused "window_t" typedef Signed-off-by: Dominik Brodowski --- include/pcmcia/ss.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include') diff --git a/include/pcmcia/ss.h b/include/pcmcia/ss.h index 6301c3f4f19e..d85f725be7e3 100644 --- a/include/pcmcia/ss.h +++ b/include/pcmcia/ss.h @@ -107,10 +107,6 @@ typedef struct io_window_t { struct resource *res; } io_window_t; -typedef struct window_t { - pccard_mem_map ctl; -} window_t; - /* Maximum number of IO windows per socket */ #define MAX_IO_WIN 2 @@ -150,7 +146,7 @@ struct pcmcia_socket { u_int Config; } irq; io_window_t io[MAX_IO_WIN]; - window_t win[MAX_WIN]; + pccard_mem_map win[MAX_WIN]; struct list_head cis_cache; size_t fake_cis_len; u8 *fake_cis; -- cgit v1.2.2 From 6838b03fc6564ea07d0cd87ea6e198d90ab1fc3e Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 3 Nov 2009 01:31:52 +0100 Subject: pcmcia: pcmcia_request_window() doesn't need a pointer to a pointer pcmcia_request_window() only needs a pointer to struct pcmcia_device, not a pointer to a pointer. CC: netdev@vger.kernel.org CC: linux-wireless@vger.kernel.org CC: linux-scsi@vger.kernel.org CC: Jiri Kosina Acked-by: Karsten Keil (for ISDN) Signed-off-by: Dominik Brodowski --- include/pcmcia/ds.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h index cbf5f05745f2..d6c55fdf8d01 100644 --- a/include/pcmcia/ds.h +++ b/include/pcmcia/ds.h @@ -199,7 +199,7 @@ int pcmcia_request_irq(struct pcmcia_device *p_dev, irq_req_t *req); int pcmcia_request_configuration(struct pcmcia_device *p_dev, config_req_t *req); -int pcmcia_request_window(struct pcmcia_device **p_dev, win_req_t *req, +int pcmcia_request_window(struct pcmcia_device *p_dev, win_req_t *req, window_handle_t *wh); int pcmcia_release_window(struct pcmcia_device *p_dev, window_handle_t win); int pcmcia_map_mem_page(struct pcmcia_device *p_dev, window_handle_t win, -- cgit v1.2.2 From dd2e5a156525f11754d9b1e0583f6bb49c253d62 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 3 Nov 2009 10:27:34 +0100 Subject: pcmcia: remove deprecated handle_to_dev() macro Update remaining users and remove deprecated handle_to_dev() macro CC: Harald Welte CC: netdev@vger.kernel.org CC: linux-wireless@vger.kernel.org CC: linux-serial@vger.kernel.org Signed-off-by: Dominik Brodowski --- include/pcmcia/ds.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h index d6c55fdf8d01..d403c12f7978 100644 --- a/include/pcmcia/ds.h +++ b/include/pcmcia/ds.h @@ -138,9 +138,6 @@ struct pcmcia_device { #define to_pcmcia_dev(n) container_of(n, struct pcmcia_device, dev) #define to_pcmcia_drv(n) container_of(n, struct pcmcia_driver, drv) -/* deprecated -- don't use! */ -#define handle_to_dev(handle) (handle->dev) - /* * CIS access. -- cgit v1.2.2 From 5fa9167a1bf5f5a4b7282f5e7ac56a4a5a1fa044 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 8 Nov 2009 17:24:46 +0100 Subject: pcmcia: rework the irq_req_t typedef Most of the irq_req_t typedef'd struct can be re-worked quite easily: (1) IRQInfo2 was unused in any case, so drop it. (2) IRQInfo1 was used write-only, so drop it. (3) Instance (private data to be passed to the IRQ handler): Most PCMCIA drivers using pcmcia_request_irq() to actually register an IRQ handler set the "dev_id" to the same pointer as the "priv" pointer in struct pcmcia_device. Modify the two exceptions (ipwireless, ibmtr_cs) to also work this waym and set the IRQ handler's "dev_id" to p_dev->priv unconditionally. (4) Handler is to be of type irq_handler_t. (5) Handler != NULL already tells whether an IRQ handler is present. Therefore, we do not need the IRQ_HANDLER_PRESENT flag in irq_req_t.Attributes. CC: netdev@vger.kernel.org CC: linux-bluetooth@vger.kernel.org CC: linux-ide@vger.kernel.org CC: linux-wireless@vger.kernel.org CC: linux-scsi@vger.kernel.org CC: alsa-devel@alsa-project.org CC: Jaroslav Kysela CC: Jiri Kosina CC: Karsten Keil for the Bluetooth parts: Acked-by: Marcel Holtmann Signed-off-by: Dominik Brodowski --- include/pcmcia/cs.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/pcmcia/cs.h b/include/pcmcia/cs.h index 904468a191ef..afc2bfb9e917 100644 --- a/include/pcmcia/cs.h +++ b/include/pcmcia/cs.h @@ -15,6 +15,10 @@ #ifndef _LINUX_CS_H #define _LINUX_CS_H +#ifdef __KERNEL__ +#include +#endif + /* For AccessConfigurationRegister */ typedef struct conf_reg_t { u_char Function; @@ -111,11 +115,9 @@ typedef struct io_req_t { /* For RequestIRQ and ReleaseIRQ */ typedef struct irq_req_t { - u_int Attributes; - u_int AssignedIRQ; - u_int IRQInfo1, IRQInfo2; /* IRQInfo2 is ignored */ - void *Handler; - void *Instance; + u_int Attributes; + u_int AssignedIRQ; + irq_handler_t Handler; } irq_req_t; /* Attributes for RequestIRQ and ReleaseIRQ */ @@ -125,7 +127,7 @@ typedef struct irq_req_t { #define IRQ_TYPE_DYNAMIC_SHARING 0x02 #define IRQ_FORCED_PULSE 0x04 #define IRQ_FIRST_SHARED 0x08 -#define IRQ_HANDLE_PRESENT 0x10 +//#define IRQ_HANDLE_PRESENT 0x10 #define IRQ_PULSE_ALLOCATED 0x100 /* Bits in IRQInfo1 field */ -- cgit v1.2.2 From 7716977b6ae5a0cdd0afab5c6035c4d0ce53f599 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 30 Nov 2009 13:24:18 +0000 Subject: mfd: Correct WM831X_MAX_ISEL_VALUE There was confusion between the array size and the highest ISEL value possible. Reported-by: Dan Carpenter Signed-off-by: Mark Brown Signed-off-by: Samuel Ortiz --- include/linux/mfd/wm831x/regulator.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mfd/wm831x/regulator.h b/include/linux/mfd/wm831x/regulator.h index f95466343fb2..955d30fc6a27 100644 --- a/include/linux/mfd/wm831x/regulator.h +++ b/include/linux/mfd/wm831x/regulator.h @@ -1212,7 +1212,7 @@ #define WM831X_LDO1_OK_SHIFT 0 /* LDO1_OK */ #define WM831X_LDO1_OK_WIDTH 1 /* LDO1_OK */ -#define WM831X_ISINK_MAX_ISEL 56 -extern int wm831x_isinkv_values[WM831X_ISINK_MAX_ISEL]; +#define WM831X_ISINK_MAX_ISEL 55 +extern int wm831x_isinkv_values[WM831X_ISINK_MAX_ISEL + 1]; #endif -- cgit v1.2.2 From f13a48bd798a159291ca583b95453171b88b7448 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 1 Dec 2009 15:36:11 +0000 Subject: SLOW_WORK: Move slow_work's proc file to debugfs Move slow_work's debugging proc file to debugfs. Signed-off-by: David Howells Requested-and-acked-by: Ingo Molnar Signed-off-by: Linus Torvalds --- include/linux/slow-work.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/slow-work.h b/include/linux/slow-work.h index 5035a2691739..13337bf6c3f5 100644 --- a/include/linux/slow-work.h +++ b/include/linux/slow-work.h @@ -20,7 +20,7 @@ #include struct slow_work; -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG struct seq_file; #endif @@ -42,8 +42,8 @@ struct slow_work_ops { /* execute a work item */ void (*execute)(struct slow_work *work); -#ifdef CONFIG_SLOW_WORK_PROC - /* describe a work item for /proc */ +#ifdef CONFIG_SLOW_WORK_DEBUG + /* describe a work item for debugfs */ void (*desc)(struct slow_work *work, struct seq_file *m); #endif }; @@ -64,7 +64,7 @@ struct slow_work { #define SLOW_WORK_DELAYED 5 /* item is struct delayed_slow_work with active timer */ const struct slow_work_ops *ops; /* operations table for this item */ struct list_head link; /* link in queue */ -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG struct timespec mark; /* jiffies at which queued or exec begun */ #endif }; -- cgit v1.2.2 From bf56a4ea9f1683c5b223fd3a5dbea23f1fa91c34 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 1 Dec 2009 16:23:20 +0800 Subject: trace_syscalls: Remove unused event_syscall_enter and event_syscall_exit fix event_enter_##sname->event fix event_exit_##sname->event remove unused event_syscall_enter and event_syscall_exit Signed-off-by: Lai Jiangshan Acked-by: Jason Baron Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B14D278.4090209@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/syscalls.h | 4 ++-- include/trace/syscall.h | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index b50974a93af0..2f7c539ab96d 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -178,7 +178,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ event_enter_##sname = { \ .name = "sys_enter"#sname, \ .system = "syscalls", \ - .event = &event_syscall_enter, \ + .event = &enter_syscall_print_##sname, \ .raw_init = init_enter_##sname, \ .show_format = syscall_enter_format, \ .define_fields = syscall_enter_define_fields, \ @@ -214,7 +214,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ event_exit_##sname = { \ .name = "sys_exit"#sname, \ .system = "syscalls", \ - .event = &event_syscall_exit, \ + .event = &exit_syscall_print_##sname, \ .raw_init = init_exit_##sname, \ .show_format = syscall_exit_format, \ .define_fields = syscall_exit_define_fields, \ diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 51ee17d3632a..5f8827c92db7 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -37,8 +37,6 @@ extern unsigned long arch_syscall_addr(int nr); extern int syscall_name_to_nr(char *name); void set_syscall_enter_id(int num, int id); void set_syscall_exit_id(int num, int id); -extern struct trace_event event_syscall_enter; -extern struct trace_event event_syscall_exit; extern int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s); -- cgit v1.2.2 From 31c16b13349970b2684248c7d8608d2a96ae135d Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 1 Dec 2009 16:23:30 +0800 Subject: trace_syscalls: Set event_enter_##sname->data to its metadata Set event_enter_##sname->data to its metadata, it makes codes simpler. Signed-off-by: Lai Jiangshan Acked-by: Jason Baron Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B14D282.7050709@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/syscalls.h | 6 ++++-- include/trace/syscall.h | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 2f7c539ab96d..d3c9fd01a110 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -153,6 +153,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ #define __SC_STR_TDECL6(t, a, ...) #t, __SC_STR_TDECL5(__VA_ARGS__) #define SYSCALL_TRACE_ENTER_EVENT(sname) \ + static const struct syscall_metadata __syscall_meta_##sname; \ static struct ftrace_event_call event_enter_##sname; \ struct trace_event enter_syscall_print_##sname = { \ .trace = print_syscall_enter, \ @@ -184,11 +185,12 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ .define_fields = syscall_enter_define_fields, \ .regfunc = reg_event_syscall_enter, \ .unregfunc = unreg_event_syscall_enter, \ - .data = "sys"#sname, \ + .data = (void *)&__syscall_meta_##sname,\ TRACE_SYS_ENTER_PROFILE_INIT(sname) \ } #define SYSCALL_TRACE_EXIT_EVENT(sname) \ + static const struct syscall_metadata __syscall_meta_##sname; \ static struct ftrace_event_call event_exit_##sname; \ struct trace_event exit_syscall_print_##sname = { \ .trace = print_syscall_exit, \ @@ -220,7 +222,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ .define_fields = syscall_exit_define_fields, \ .regfunc = reg_event_syscall_exit, \ .unregfunc = unreg_event_syscall_exit, \ - .data = "sys"#sname, \ + .data = (void *)&__syscall_meta_##sname,\ TRACE_SYS_EXIT_PROFILE_INIT(sname) \ } diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 5f8827c92db7..c5265c81c4e7 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -34,7 +34,7 @@ struct syscall_metadata { #ifdef CONFIG_FTRACE_SYSCALLS extern unsigned long arch_syscall_addr(int nr); -extern int syscall_name_to_nr(char *name); +extern int syscall_name_to_nr(const char *name); void set_syscall_enter_id(int num, int id); void set_syscall_exit_id(int num, int id); -- cgit v1.2.2 From fcc19438dda38dacc8c144e2db3ebc6b9fd4f8b8 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 1 Dec 2009 16:23:36 +0800 Subject: trace_syscalls: Remove enter_id exit_id use ->enter_event->id instead of ->enter_id use ->exit_event->id instead of ->exit_id Signed-off-by: Lai Jiangshan Acked-by: Jason Baron Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B14D288.7030001@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/syscalls.h | 2 -- include/trace/syscall.h | 6 ------ 2 files changed, 8 deletions(-) (limited to 'include') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index d3c9fd01a110..b9af87560adb 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -168,7 +168,6 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ if (!id) \ return -ENODEV; \ event_enter_##sname.id = id; \ - set_syscall_enter_id(num, id); \ INIT_LIST_HEAD(&event_enter_##sname.fields); \ return 0; \ } \ @@ -205,7 +204,6 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ if (!id) \ return -ENODEV; \ event_exit_##sname.id = id; \ - set_syscall_exit_id(num, id); \ INIT_LIST_HEAD(&event_exit_##sname.fields); \ return 0; \ } \ diff --git a/include/trace/syscall.h b/include/trace/syscall.h index c5265c81c4e7..ca09561cd578 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -15,8 +15,6 @@ * @nb_args: number of parameters it takes * @types: list of types as strings * @args: list of args as strings (args[i] matches types[i]) - * @enter_id: associated ftrace enter event id - * @exit_id: associated ftrace exit event id * @enter_event: associated syscall_enter trace event * @exit_event: associated syscall_exit trace event */ @@ -25,8 +23,6 @@ struct syscall_metadata { int nb_args; const char **types; const char **args; - int enter_id; - int exit_id; struct ftrace_event_call *enter_event; struct ftrace_event_call *exit_event; @@ -35,8 +31,6 @@ struct syscall_metadata { #ifdef CONFIG_FTRACE_SYSCALLS extern unsigned long arch_syscall_addr(int nr); extern int syscall_name_to_nr(const char *name); -void set_syscall_enter_id(int num, int id); -void set_syscall_exit_id(int num, int id); extern int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s); -- cgit v1.2.2 From c252f65793874b56d50395ab604db465ce688665 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 1 Dec 2009 16:23:47 +0800 Subject: trace_syscalls: Add syscall_nr field to struct syscall_metadata Add syscall_nr field to struct syscall_metadata, it helps us to get syscall number easier. Signed-off-by: Lai Jiangshan Acked-by: Jason Baron Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B14D293.6090800@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/syscalls.h | 4 ++-- include/trace/syscall.h | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index b9af87560adb..3c280d7ecb76 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -161,7 +161,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ static int init_enter_##sname(struct ftrace_event_call *call) \ { \ int num, id; \ - num = syscall_name_to_nr("sys"#sname); \ + num = __syscall_meta_##sname.syscall_nr; \ if (num < 0) \ return -ENOSYS; \ id = register_ftrace_event(&enter_syscall_print_##sname);\ @@ -197,7 +197,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ static int init_exit_##sname(struct ftrace_event_call *call) \ { \ int num, id; \ - num = syscall_name_to_nr("sys"#sname); \ + num = __syscall_meta_##sname.syscall_nr; \ if (num < 0) \ return -ENOSYS; \ id = register_ftrace_event(&exit_syscall_print_##sname);\ diff --git a/include/trace/syscall.h b/include/trace/syscall.h index ca09561cd578..1531eef3071f 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -12,6 +12,7 @@ * A syscall entry in the ftrace syscalls array. * * @name: name of the syscall + * @syscall_nr: number of the syscall * @nb_args: number of parameters it takes * @types: list of types as strings * @args: list of args as strings (args[i] matches types[i]) @@ -20,6 +21,7 @@ */ struct syscall_metadata { const char *name; + int syscall_nr; int nb_args; const char **types; const char **args; @@ -30,7 +32,6 @@ struct syscall_metadata { #ifdef CONFIG_FTRACE_SYSCALLS extern unsigned long arch_syscall_addr(int nr); -extern int syscall_name_to_nr(const char *name); extern int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s); -- cgit v1.2.2 From a1301da0997bf73c44dbe584e9070a13adc89672 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 1 Dec 2009 16:23:55 +0800 Subject: trace_syscalls: Remove duplicate init_enter_##sname() use only one init_syscall_trace instead of many init_enter_##sname()/init_exit_##sname() Signed-off-by: Lai Jiangshan Acked-by: Jason Baron Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B14D29B.6090708@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/syscalls.h | 30 ++---------------------------- include/trace/syscall.h | 1 + 2 files changed, 3 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 3c280d7ecb76..cf0d923ea40e 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -158,19 +158,6 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ struct trace_event enter_syscall_print_##sname = { \ .trace = print_syscall_enter, \ }; \ - static int init_enter_##sname(struct ftrace_event_call *call) \ - { \ - int num, id; \ - num = __syscall_meta_##sname.syscall_nr; \ - if (num < 0) \ - return -ENOSYS; \ - id = register_ftrace_event(&enter_syscall_print_##sname);\ - if (!id) \ - return -ENODEV; \ - event_enter_##sname.id = id; \ - INIT_LIST_HEAD(&event_enter_##sname.fields); \ - return 0; \ - } \ TRACE_SYS_ENTER_PROFILE(sname); \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ @@ -179,7 +166,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ .name = "sys_enter"#sname, \ .system = "syscalls", \ .event = &enter_syscall_print_##sname, \ - .raw_init = init_enter_##sname, \ + .raw_init = init_syscall_trace, \ .show_format = syscall_enter_format, \ .define_fields = syscall_enter_define_fields, \ .regfunc = reg_event_syscall_enter, \ @@ -194,19 +181,6 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ struct trace_event exit_syscall_print_##sname = { \ .trace = print_syscall_exit, \ }; \ - static int init_exit_##sname(struct ftrace_event_call *call) \ - { \ - int num, id; \ - num = __syscall_meta_##sname.syscall_nr; \ - if (num < 0) \ - return -ENOSYS; \ - id = register_ftrace_event(&exit_syscall_print_##sname);\ - if (!id) \ - return -ENODEV; \ - event_exit_##sname.id = id; \ - INIT_LIST_HEAD(&event_exit_##sname.fields); \ - return 0; \ - } \ TRACE_SYS_EXIT_PROFILE(sname); \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ @@ -215,7 +189,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ .name = "sys_exit"#sname, \ .system = "syscalls", \ .event = &exit_syscall_print_##sname, \ - .raw_init = init_exit_##sname, \ + .raw_init = init_syscall_trace, \ .show_format = syscall_exit_format, \ .define_fields = syscall_exit_define_fields, \ .regfunc = reg_event_syscall_exit, \ diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 1531eef3071f..dff9371e5274 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -32,6 +32,7 @@ struct syscall_metadata { #ifdef CONFIG_FTRACE_SYSCALLS extern unsigned long arch_syscall_addr(int nr); +extern int init_syscall_trace(struct ftrace_event_call *call); extern int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s); -- cgit v1.2.2 From 3bbe84e9d385205d638035ee9dcc4db1b486ea08 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 1 Dec 2009 16:24:01 +0800 Subject: trace_syscalls: Simplify syscall profile use only one prof_sysenter_enable() instead of prof_sysenter_enable_##sname() use only one prof_sysenter_disable() instead of prof_sysenter_disable_##sname() use only one prof_sysexit_enable() instead of prof_sysexit_enable_##sname() use only one prof_sysexit_disable() instead of prof_sysexit_disable_##sname() Signed-off-by: Lai Jiangshan Acked-by: Jason Baron Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B14D2A1.8060304@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/syscalls.h | 31 ++++--------------------------- include/trace/syscall.h | 8 ++++---- 2 files changed, 8 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index cf0d923ea40e..c2df3a593236 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -99,37 +99,16 @@ struct perf_event_attr; #define __SC_TEST6(t6, a6, ...) __SC_TEST(t6); __SC_TEST5(__VA_ARGS__) #ifdef CONFIG_EVENT_PROFILE -#define TRACE_SYS_ENTER_PROFILE(sname) \ -static int prof_sysenter_enable_##sname(struct ftrace_event_call *unused) \ -{ \ - return reg_prof_syscall_enter("sys"#sname); \ -} \ - \ -static void prof_sysenter_disable_##sname(struct ftrace_event_call *unused) \ -{ \ - unreg_prof_syscall_enter("sys"#sname); \ -} - -#define TRACE_SYS_EXIT_PROFILE(sname) \ -static int prof_sysexit_enable_##sname(struct ftrace_event_call *unused) \ -{ \ - return reg_prof_syscall_exit("sys"#sname); \ -} \ - \ -static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ -{ \ - unreg_prof_syscall_exit("sys"#sname); \ -} #define TRACE_SYS_ENTER_PROFILE_INIT(sname) \ .profile_count = ATOMIC_INIT(-1), \ - .profile_enable = prof_sysenter_enable_##sname, \ - .profile_disable = prof_sysenter_disable_##sname, + .profile_enable = prof_sysenter_enable, \ + .profile_disable = prof_sysenter_disable, #define TRACE_SYS_EXIT_PROFILE_INIT(sname) \ .profile_count = ATOMIC_INIT(-1), \ - .profile_enable = prof_sysexit_enable_##sname, \ - .profile_disable = prof_sysexit_disable_##sname, + .profile_enable = prof_sysexit_enable, \ + .profile_disable = prof_sysexit_disable, #else #define TRACE_SYS_ENTER_PROFILE(sname) #define TRACE_SYS_ENTER_PROFILE_INIT(sname) @@ -158,7 +137,6 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ struct trace_event enter_syscall_print_##sname = { \ .trace = print_syscall_enter, \ }; \ - TRACE_SYS_ENTER_PROFILE(sname); \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) \ @@ -181,7 +159,6 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ struct trace_event exit_syscall_print_##sname = { \ .trace = print_syscall_exit, \ }; \ - TRACE_SYS_EXIT_PROFILE(sname); \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) \ diff --git a/include/trace/syscall.h b/include/trace/syscall.h index dff9371e5274..961fda3556bb 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -50,10 +50,10 @@ enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags); enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags); #endif #ifdef CONFIG_EVENT_PROFILE -int reg_prof_syscall_enter(char *name); -void unreg_prof_syscall_enter(char *name); -int reg_prof_syscall_exit(char *name); -void unreg_prof_syscall_exit(char *name); +int prof_sysenter_enable(struct ftrace_event_call *call); +void prof_sysenter_disable(struct ftrace_event_call *call); +int prof_sysexit_enable(struct ftrace_event_call *call); +void prof_sysexit_disable(struct ftrace_event_call *call); #endif -- cgit v1.2.2 From 8592e6486a177a02f048567cb928bc3a1f9a86c3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 2 Dec 2009 12:56:46 +0900 Subject: sched: Revert 498657a478c60be092208422fefa9c7b248729c2 498657a478c60be092208422fefa9c7b248729c2 incorrectly assumed that preempt wasn't disabled around context_switch() and thus was fixing imaginary problem. It also broke KVM because it depended on ->sched_in() to be called with irq enabled so that it can do smp calls from there. Revert the incorrect commit and add comment describing different contexts under with the two callbacks are invoked. Avi: spotted transposed in/out in the added comment. Signed-off-by: Tejun Heo Acked-by: Avi Kivity Cc: peterz@infradead.org Cc: efault@gmx.de Cc: rusty@rustcorp.com.au LKML-Reference: <1259726212-30259-2-git-send-email-tj@kernel.org> Signed-off-by: Ingo Molnar --- include/linux/preempt.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 72b1a10a59b6..2e681d9555bd 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -105,6 +105,11 @@ struct preempt_notifier; * @sched_out: we've just been preempted * notifier: struct preempt_notifier for the task being preempted * next: the task that's kicking us out + * + * Please note that sched_in and out are called under different + * contexts. sched_out is called with rq lock held and irq disabled + * while sched_in is called without rq lock and irq enabled. This + * difference is intentional and depended upon by its users. */ struct preempt_ops { void (*sched_in)(struct preempt_notifier *notifier, int cpu); -- cgit v1.2.2 From 6b62fe019e39edfd1dbe3f224ecd0a87d9365223 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 2 Dec 2009 07:23:10 +0100 Subject: tracing/syscalls: Make syscall events print callbacks static enter_syscall_print_##sname and exit_syscall_print_##sname don't need to have a global scope. Make them static. Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Jason Baron Cc: Lai Jiangshan LKML-Reference: <1259734990-9034-1-git-send-regression-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- include/linux/syscalls.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index c2df3a593236..e79e2f3ccc51 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -134,7 +134,7 @@ struct perf_event_attr; #define SYSCALL_TRACE_ENTER_EVENT(sname) \ static const struct syscall_metadata __syscall_meta_##sname; \ static struct ftrace_event_call event_enter_##sname; \ - struct trace_event enter_syscall_print_##sname = { \ + static struct trace_event enter_syscall_print_##sname = { \ .trace = print_syscall_enter, \ }; \ static struct ftrace_event_call __used \ @@ -156,7 +156,7 @@ struct perf_event_attr; #define SYSCALL_TRACE_EXIT_EVENT(sname) \ static const struct syscall_metadata __syscall_meta_##sname; \ static struct ftrace_event_call event_exit_##sname; \ - struct trace_event exit_syscall_print_##sname = { \ + static struct trace_event exit_syscall_print_##sname = { \ .trace = print_syscall_exit, \ }; \ static struct ftrace_event_call __used \ -- cgit v1.2.2 From fa1452e808732ae10e8b1267fd75fc2d028d634b Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Mon, 30 Nov 2009 14:59:44 +0900 Subject: locking, task_struct: Reduce size on TRACE_IRQFLAGS and 64bit Reorder task_struct field for TRACE_IRQFLAGS to remove padding on 64-bit. Signed-off-by: Hiroshi Shimamoto Cc: Peter Zijlstra LKML-Reference: <4B135F50.8070302@ct.jp.nec.com> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 75e6e60bf583..49be8f7c05f6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1421,17 +1421,17 @@ struct task_struct { #endif #ifdef CONFIG_TRACE_IRQFLAGS unsigned int irq_events; - int hardirqs_enabled; unsigned long hardirq_enable_ip; - unsigned int hardirq_enable_event; unsigned long hardirq_disable_ip; + unsigned int hardirq_enable_event; unsigned int hardirq_disable_event; - int softirqs_enabled; + int hardirqs_enabled; + int hardirq_context; unsigned long softirq_disable_ip; - unsigned int softirq_disable_event; unsigned long softirq_enable_ip; + unsigned int softirq_disable_event; unsigned int softirq_enable_event; - int hardirq_context; + int softirqs_enabled; int softirq_context; #endif #ifdef CONFIG_LOCKDEP -- cgit v1.2.2 From d99ca3b977fc5a93141304f571475c2af9e6c1c5 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Wed, 2 Dec 2009 17:26:47 +0900 Subject: sched, cputime: Cleanups related to task_times() - Remove if({u,s}t)s because no one call it with NULL now. - Use cputime_{add,sub}(). - Add ifndef-endif for prev_{u,s}time since they are used only when !VIRT_CPU_ACCOUNTING. Signed-off-by: Hidetoshi Seto Cc: Peter Zijlstra Cc: Spencer Candland Cc: Americo Wang Cc: Oleg Nesterov Cc: Balbir Singh Cc: Stanislaw Gruszka LKML-Reference: <4B1624C7.7040302@jp.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 0395b0f4df3a..dff85e58264e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1331,7 +1331,9 @@ struct task_struct { cputime_t utime, stime, utimescaled, stimescaled; cputime_t gtime; +#ifndef CONFIG_VIRT_CPU_ACCOUNTING cputime_t prev_utime, prev_stime; +#endif unsigned long nvcsw, nivcsw; /* context switch counts */ struct timespec start_time; /* monotonic time */ struct timespec real_start_time; /* boot based time */ -- cgit v1.2.2 From 0cf55e1ec08bb5a22e068309e2d8ba1180ab4239 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Wed, 2 Dec 2009 17:28:07 +0900 Subject: sched, cputime: Introduce thread_group_times() This is a real fix for problem of utime/stime values decreasing described in the thread: http://lkml.org/lkml/2009/11/3/522 Now cputime is accounted in the following way: - {u,s}time in task_struct are increased every time when the thread is interrupted by a tick (timer interrupt). - When a thread exits, its {u,s}time are added to signal->{u,s}time, after adjusted by task_times(). - When all threads in a thread_group exits, accumulated {u,s}time (and also c{u,s}time) in signal struct are added to c{u,s}time in signal struct of the group's parent. So {u,s}time in task struct are "raw" tick count, while {u,s}time and c{u,s}time in signal struct are "adjusted" values. And accounted values are used by: - task_times(), to get cputime of a thread: This function returns adjusted values that originates from raw {u,s}time and scaled by sum_exec_runtime that accounted by CFS. - thread_group_cputime(), to get cputime of a thread group: This function returns sum of all {u,s}time of living threads in the group, plus {u,s}time in the signal struct that is sum of adjusted cputimes of all exited threads belonged to the group. The problem is the return value of thread_group_cputime(), because it is mixed sum of "raw" value and "adjusted" value: group's {u,s}time = foreach(thread){{u,s}time} + exited({u,s}time) This misbehavior can break {u,s}time monotonicity. Assume that if there is a thread that have raw values greater than adjusted values (e.g. interrupted by 1000Hz ticks 50 times but only runs 45ms) and if it exits, cputime will decrease (e.g. -5ms). To fix this, we could do: group's {u,s}time = foreach(t){task_times(t)} + exited({u,s}time) But task_times() contains hard divisions, so applying it for every thread should be avoided. This patch fixes the above problem in the following way: - Modify thread's exit (= __exit_signal()) not to use task_times(). It means {u,s}time in signal struct accumulates raw values instead of adjusted values. As the result it makes thread_group_cputime() to return pure sum of "raw" values. - Introduce a new function thread_group_times(*task, *utime, *stime) that converts "raw" values of thread_group_cputime() to "adjusted" values, in same calculation procedure as task_times(). - Modify group's exit (= wait_task_zombie()) to use this introduced thread_group_times(). It make c{u,s}time in signal struct to have adjusted values like before this patch. - Replace some thread_group_cputime() by thread_group_times(). This replacements are only applied where conveys the "adjusted" cputime to users, and where already uses task_times() near by it. (i.e. sys_times(), getrusage(), and /proc//stat.) This patch have a positive side effect: - Before this patch, if a group contains many short-life threads (e.g. runs 0.9ms and not interrupted by ticks), the group's cputime could be invisible since thread's cputime was accumulated after adjusted: imagine adjustment function as adj(ticks, runtime), {adj(0, 0.9) + adj(0, 0.9) + ....} = {0 + 0 + ....} = 0. After this patch it will not happen because the adjustment is applied after accumulated. v2: - remove if()s, put new variables into signal_struct. Signed-off-by: Hidetoshi Seto Acked-by: Peter Zijlstra Cc: Spencer Candland Cc: Americo Wang Cc: Oleg Nesterov Cc: Balbir Singh Cc: Stanislaw Gruszka LKML-Reference: <4B162517.8040909@jp.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index dff85e58264e..34238bd10ebf 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -624,6 +624,9 @@ struct signal_struct { cputime_t utime, stime, cutime, cstime; cputime_t gtime; cputime_t cgtime; +#ifndef CONFIG_VIRT_CPU_ACCOUNTING + cputime_t prev_utime, prev_stime; +#endif unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt; unsigned long inblock, oublock, cinblock, coublock; @@ -1723,6 +1726,7 @@ static inline void put_task_struct(struct task_struct *t) } extern void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st); +extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st); /* * Per process flags -- cgit v1.2.2 From 7cff7ce94a7df2ccf5ac76b48ee0995fee2060df Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 9 Oct 2009 00:01:39 -0700 Subject: include/linux/compiler-gcc4.h: Fix build bug - gcc-4.0.2 doesn't understand __builtin_object_size Maybe 4.1.0 doesn't too, but this fixed it for me. Caused by: 4a31276: x86: Turn the copy_from_user check into an (optional) compile time warning 9f0cf4a: x86: Use __builtin_object_size() to validate the buffer size for copy_from_user() Signed-off-by: Andrew Morton Cc: Arjan van de Ven LKML-Reference: <200910090724.n997OQl6013538@imap1.linux-foundation.org> Signed-off-by: Ingo Molnar --- include/linux/compiler-gcc4.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index 77542c57e20a..e6ef279ca20c 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -38,7 +38,9 @@ #endif +#if __GNUC_MINOR__ > 0 #define __compiletime_object_size(obj) __builtin_object_size(obj, 0) +#endif #if __GNUC_MINOR__ >= 4 #define __compiletime_warning(message) __attribute__((warning(message))) #define __compiletime_error(message) __attribute__((error(message))) -- cgit v1.2.2 From 796bd9524731850967d437b7f47a86acc776ea89 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 29 Sep 2009 12:27:23 +0100 Subject: VFS: Add forget_all_cached_acls() This is required for cluster filesystems which want to use cached ACLs so that they can invalidate the cache when required. Signed-off-by: Steven Whitehouse Cc: Alexander Viro Cc: Christoph Hellwig --- include/linux/posix_acl.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index 065a3652a3ea..67608161df6b 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -147,6 +147,20 @@ static inline void forget_cached_acl(struct inode *inode, int type) if (old != ACL_NOT_CACHED) posix_acl_release(old); } + +static inline void forget_all_cached_acls(struct inode *inode) +{ + struct posix_acl *old_access, *old_default; + spin_lock(&inode->i_lock); + old_access = inode->i_acl; + old_default = inode->i_default_acl; + inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED; + spin_unlock(&inode->i_lock); + if (old_access != ACL_NOT_CACHED) + posix_acl_release(old_access); + if (old_default != ACL_NOT_CACHED) + posix_acl_release(old_default); +} #endif static inline void cache_no_acl(struct inode *inode) -- cgit v1.2.2 From 86e931a35e93d94e6e91b57cc76456e16d188ea9 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 28 Sep 2009 12:35:17 +0100 Subject: VFS: Export dquot_send_warning Sending a message to userspace in a generic format to warn of events (e.g. quota exceeded) in the quota subsystem is a generically useful feature. This patch makes some minor changes to the send_message function from dquot.c renaming it quota_send_message, moving it to quota.c and exporting it for use by filesystems which do not use the dquot code. Signed-off-by: Steven Whitehouse --- include/linux/quota.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/quota.h b/include/linux/quota.h index 78c48895b12a..ce9a9b2e5cd4 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -376,6 +376,17 @@ static inline unsigned int dquot_generic_flag(unsigned int flags, int type) return flags >> _DQUOT_STATE_FLAGS; } +#ifdef CONFIG_QUOTA_NETLINK_INTERFACE +extern void quota_send_warning(short type, unsigned int id, dev_t dev, + const char warntype); +#else +static inline void quota_send_warning(short type, unsigned int id, dev_t dev, + const char warntype) +{ + return; +} +#endif /* CONFIG_QUOTA_NETLINK_INTERFACE */ + struct quota_info { unsigned int flags; /* Flags for diskquotas on this device */ struct mutex dqio_mutex; /* lock device while I/O in progress */ -- cgit v1.2.2 From 0ab7d13fcbd7ce1658c563e345990ba453719deb Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 6 Nov 2009 16:20:51 +0000 Subject: GFS2: Tag all metadata with jid There are two spare field in the header common to all GFS2 metadata. One is just the right size to fit a journal id in it, and this patch updates the journal code so that each time a metadata block is modified, we tag it with the journal id of the node which is performing the modification. The reason for this is that it should make it much easier to debug issues which arise if we can tell which node was the last to modify a particular metadata block. Since the field is updated before the block is written into the journal, each journal should only contain metadata which is tagged with its own journal id. The one exception to this is the journal header block, which might have a different node's id in it, if that journal was recovered by another node in the cluster. Thus each journal will contain a record of which nodes recovered it, via the journal header. The other field in the metadata header could potentially be used to hold information about what kind of operation was performed, but for the time being we just zero it on each transaction so that if we use it for that in future, we'll know that the information (where it exists) is reliable. I did consider using the other field to hold the journal sequence number, however since in GFS2's journaling we write the modified data into the journal and not the original data, this gives no information as to what action caused the modification, so I think we can probably come up with a better use for those 64 bits in the future. Signed-off-by: Steven Whitehouse --- include/linux/gfs2_ondisk.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h index b80c88dedbbb..81f90a59cda6 100644 --- a/include/linux/gfs2_ondisk.h +++ b/include/linux/gfs2_ondisk.h @@ -81,7 +81,11 @@ struct gfs2_meta_header { __be32 mh_type; __be64 __pad0; /* Was generation number in gfs1 */ __be32 mh_format; - __be32 __pad1; /* Was incarnation number in gfs1 */ + /* This union is to keep userspace happy */ + union { + __be32 mh_jid; /* Was incarnation number in gfs1 */ + __be32 __pad1; + }; }; /* -- cgit v1.2.2 From 38938c879eb0c39edf85d5164aa0cffe2874304c Mon Sep 17 00:00:00 2001 From: David Daney Date: Fri, 4 Dec 2009 17:44:50 -0800 Subject: Add support for GCC-4.5's __builtin_unreachable() to compiler.h (v2) Starting with version 4.5, GCC has a new built-in function __builtin_unreachable() that can be used in places like the kernel's BUG() where inline assembly is used to transfer control flow. This eliminated the need for an endless loop in these places. The patch adds a new macro 'unreachable()' that will expand to either __builtin_unreachable() or an endless loop depending on the compiler version. Change from v1: Simplify unreachable() for non-GCC 4.5 case. Signed-off-by: David Daney Acked-by: Ralf Baechle Signed-off-by: Linus Torvalds --- include/linux/compiler-gcc4.h | 14 ++++++++++++++ include/linux/compiler.h | 5 +++++ 2 files changed, 19 insertions(+) (limited to 'include') diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index 450fa597c94d..ab3af40a53c6 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -36,4 +36,18 @@ the kernel context */ #define __cold __attribute__((__cold__)) + +#if __GNUC_MINOR__ >= 5 +/* + * Mark a position in code as unreachable. This can be used to + * suppress control flow warnings after asm blocks that transfer + * control elsewhere. + * + * Early snapshots of gcc 4.5 don't support this and we can't detect + * this in the preprocessor, but we can live with this because they're + * unreleased. Really, we need to have autoconf for the kernel. + */ +#define unreachable() __builtin_unreachable() +#endif + #endif diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 04fb5135b4e1..59f208926d13 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -144,6 +144,11 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); # define barrier() __memory_barrier() #endif +/* Unreachable code */ +#ifndef unreachable +# define unreachable() do { } while (1) +#endif + #ifndef RELOC_HIDE # define RELOC_HIDE(ptr, off) \ ({ unsigned long __ptr; \ -- cgit v1.2.2