/*
* Kprobes-based tracing events
*
* Created by Masami Hiramatsu <mhiramat@redhat.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/module.h>
#include <linux/uaccess.h>
#include "trace_probe.h"
#define KPROBE_EVENT_SYSTEM "kprobes"
/**
* Kprobe event core functions
*/
struct trace_probe {
struct list_head list;
struct kretprobe rp; /* Use rp.kp for kprobe use */
unsigned long nhit;
unsigned int flags; /* For TP_FLAG_* */
const char *symbol; /* symbol name */
struct ftrace_event_class class;
struct ftrace_event_call call;
ssize_t size; /* trace entry size */
unsigned int nr_args;
struct probe_arg args[];
};
#define SIZEOF_TRACE_PROBE(n) \
(offsetof(struct trace_probe, args) + \
(sizeof(struct probe_arg) * (n)))
static __kprobes int trace_probe_is_return(struct trace_probe *tp)
{
return tp->rp.handler != NULL;
}
static __kprobes const char *trace_probe_symbol(struct trace_probe *tp)
{
return tp->symbol ? tp->symbol : "unknown";
}
static __kprobes unsigned long trace_probe_offset(struct trace_probe *tp)
{
return tp->rp.kp.offset;
}
static __kprobes bool trace_probe_is_enabled(struct trace_probe *tp)
{
return !!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE));
}
static __kprobes bool trace_probe_is_registered(struct trace_probe *tp)
{
return !!(tp->flags & TP_FLAG_REGISTERED);
}
static __kprobes bool trace_probe_has_gone(struct trace_probe *tp)
{
return !!(kprobe_gone(&tp->rp.kp));
}
static __kprobes bool trace_probe_within_module(struct trace_probe *tp,
struct module *mod)
{
int len = strlen(mod->name);
const char *name = trace_probe_symbol(tp);
return strncmp(mod->name, name, len) == 0 && name[len] == ':';
}
static __kprobes bool trace_probe_is_on_module(struct trace_probe *tp)
{
return !!strchr(trace_probe_symbol(tp), ':');
}
static int register_probe_event(struct trace_probe *tp);
static void unregister_probe_event(struct trace_probe *tp);
static DEFINE_MUTEX(probe_lock);
static LIST_HEAD(probe_list);
static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
static int kretprobe_dispatcher(struct kretprobe_instance *ri,
struct pt_regs *regs);
/*
* Allocate new trace_probe and initialize it (including kprobes).
*/
static struct trace_probe *alloc_trace_probe(const char *group,
const char *event,
void *addr,
const char *symbol,
unsigned long offs,
int nargs, bool is_return)
{
struct trace_probe *tp;
int ret = -ENOMEM;
tp = kzalloc(SIZEOF_TRACE_PROBE(nargs), GFP_KERNEL);
if (!tp)
return ERR_PTR(ret);
if (symbol) {
tp->symbol = kstrdup(symbol, GFP_KERNEL);
if (!tp->symbol)
goto error;
tp->rp.kp.symbol_name = tp->symbol;
tp->rp.kp.offset = offs;
} else
tp->rp.kp.addr = addr;
if (is_return)
tp->rp.handler = kretprobe_dispatcher;
else
tp->rp.kp.pre_handler = kprobe_dispatcher;
if (!event || !is_good_name(event)) {
ret = -EINVAL;
goto error;
}
tp->call.class = &tp->class;
tp->call.name = kstrdup(event, GFP_KERNEL);
if (!tp->call.name)
goto error;
if (!group || !is_good_name(group)) {
ret = -EINVAL;
goto error;
}
tp->class.system = kstrdup(group, GFP_KERNEL);
if (!tp->class.system)
goto error;
INIT_LIST_HEAD(&tp->list);
return tp;
error:
kfree(tp->call.name);
kfree(tp->symbol);
kfree(tp);
return ERR_PTR(ret);
}
static void free_trace_probe(struct trace_probe *tp)
{
int i;
for (i = 0; i < tp->nr_args; i++)
traceprobe_free_probe_arg(&tp->args[i]);
kfree(tp->call.class->system);
kfree(tp->call.name);
kfree(tp->symbol);
kfree(tp);
}
static struct trace_probe *find_trace_probe(const char *event,
const char *group)
{
struct trace_probe *tp;
list_for_each_entry(tp, &probe_list, list)
if (strcmp(tp->call.name, event) == 0 &&
strcmp(tp->call.class->system, group) == 0)
return tp;
return NULL;
}
/* Enable trace_probe - @flag must be TP_FLAG_TRACE or TP_FLAG_PROFILE */
static int enable_trace_probe(struct trace_probe *tp, int flag)
{
int ret = 0;
tp->flags |= flag;
if (trace_probe_is_enabled(tp) && trace_probe_is_registered(tp) &&
!trace_probe_has_gone(tp)) {
if (trace_probe_is_return(tp))
ret = enable_kretprobe(&tp->rp);
else
ret = enable_kprobe(&tp->rp.kp);
}
return ret;
}
/* Disable trace_probe - @flag must be TP_FLAG_TRACE or TP_FLAG_PROFILE */
static void disable_trace_probe(struct trace_probe *tp, int flag)
{
tp->flags &= ~flag;
if (!trace_probe_is_enabled(tp) && trace_probe_is_registered(tp)) {
if (trace_probe_is_return(tp))
disable_kretprobe(&tp->rp);
else
disable_kprobe(&tp->rp.kp);
}
}
/* Internal register function - just handle k*probes and flags */
static int __register_trace_probe(struct trace_probe *tp)
{
int i, ret;
if (trace_probe_is_registered(tp))
return -EINVAL;
for (i = 0; i < tp->nr_args; i++)
traceprobe_update_arg(&tp->args[i]);
/* Set/clear disabled flag according to tp->flag */
if (trace_probe_is_enabled(tp))
tp->rp.kp.flags &= ~KPROBE_FLAG_DISABLED;
else
tp->rp.kp.flags |= KPROBE_FLAG_DISABLED;
if (trace_probe_is_return(tp))
ret = register_kretprobe(&tp->rp);
else
ret = register_kprobe(&tp->rp.kp);
if (ret == 0)
tp->flags |= TP_FLAG_REGISTERED;
else {
pr_warning("Could not insert probe at %s+%lu: %d\n",
trace_probe_symbol(tp), trace_probe_offset(tp), ret);
if (ret == -ENOENT && trace_probe_is_on_module(tp)) {
pr_warning("This probe might be able to register after"
"target module is loaded. Continue.\n");
ret = 0;
} else if (ret == -EILSEQ) {
pr_warning("Probing address(0x%p) is not an "
"instruction boundary.\n",
tp->rp.kp.addr);
ret = -EINVAL;
}
}
return ret;
}
/* Internal unregister function - just handle k*probes and flags */
static void __unregister_trace_probe(struct trace_probe *tp)
{
if (trace_probe_is_registered(tp)) {
if (trace_probe_is_return(tp))
unregister_kretprobe(&tp->rp);
else
unregister_kprobe(&tp->rp.kp);
tp->flags &= ~TP_FLAG_REGISTERED;
/* Cleanup kprobe for reuse */
if (tp->rp.kp.symbol_name)
tp->rp.kp.addr = NULL;
}
}
/* Unregister a trace_probe and probe_event: call with locking probe_lock */
static int unregister_trace_probe(struct trace_probe *tp)
{
/* Enabled event can not be unregistered */
if (trace_probe_is_enabled(tp))
return -EBUSY;
__unregister_trace_probe(tp);
list_del(&tp->list);
unregister_probe_event(tp);
return 0;
}
/* Register a trace_probe and probe_event */
static int register_trace_probe(struct trace_probe *tp)
{
struct trace_probe *old_tp;
int ret;
mutex_lock(&probe_lock);
/* Delete old (same name) event if exist */
old_tp = find_trace_probe(tp->call.name, tp->call.class->system);
if (old_tp) {
ret = unregister_trace_probe(old_tp);
if (ret < 0)
goto end;
free_trace_probe(old_tp);
}
/* Register new event */
ret = register_probe_event(tp);
if (ret) {
pr_warning("Failed to register probe event(%d)\n", ret);
goto end;
}
/* Register k*probe */
ret = __register_trace_probe(tp);
if (ret < 0)
unregister_probe_event(tp);
else
list_add_tail(&tp->list, &probe_list);
end:
mutex_unlock(&probe_lock);
return ret;
}
/* Module notifier call back, checking event on the module */
static int trace_probe_module_callback(struct notifier_block *nb,
unsigned long val, void *data)
{
struct module *mod = data;
struct trace_probe *tp;
int ret;
if (val != MODULE_STATE_COMING)
return NOTIFY_DONE;
/* Update probes on coming module */
mutex_lock(&probe_lock);
list_for_each_entry(tp, &probe_list, list) {
if (trace_probe_within_module(tp, mod)) {
/* Don't need to check busy - this should have gone. */
__unregister_trace_probe(tp);
ret = __register_trace_probe(tp);
if (ret)
pr_warning("Failed to re-register probe %s on"
"%s: %d\n",
tp->call.name, mod->name, ret);
}
}
mutex_unlock(&probe_lock);
return NOTIFY_DONE;
}
static struct notifier_block trace_probe_module_nb = {
.notifier_call = trace_probe_module_callback,
.priority = 1 /* Invoked after kprobe module callback */
};
static int create_trace_probe(int argc, char **argv)
{
/*
* Argument syntax:
* - Add kprobe: p[:[GRP/]EVENT] [MOD:]KSYM[+OFFS]|KADDR [FETCHARGS]
* - Add kretprobe: r[:[GRP/]EVENT] [MOD:]KSYM[+0] [FETCHARGS]
* Fetch args:
* $retval : fetch return value
* $stack : fetch stack address
* $stackN : fetch Nth of stack (N:0-)
* @ADDR : fetch memory at ADDR (ADDR should be in kernel)
* @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol)
* %REG : fetch register REG
* Dereferencing memory fetch:
* +|-offs(ARG) : fetch memory at ARG +|- offs address.
* Alias name of args:
* NAME=FETCHARG : set NAME as alias of FETCHARG.
* Type of args:
* FETCHARG:TYPE : use TYPE instead of unsigned long.
*/
struct trace_probe *tp;
int i, ret = 0;
bool is_return = false, is_delete = false;
char *symbol = NULL, *event = NULL, *group = NULL;
char *arg;
unsigned long offset = 0;
void *addr = NULL;
char buf[MAX_EVENT_NAME_LEN];
/* argc must be >= 1 */
if (argv[0][0] == 'p')
is_return = false;
else if (argv[0][0] == 'r')
is_return = true;
else if (argv[0][0] == '-')
is_delete = true;
else {
pr_info("Probe definition must be started with 'p', 'r' or"
" '-'.\n");
return -EINVAL;
}
if (argv[0][1] == ':') {
event = &argv[0][2];
if (strchr(event, '/')) {
group = event;
event = strchr(group, '/') + 1;
event[-1] = '\0';
if (strlen(group) == 0) {
pr_info("Group name is not specified\n");
return -EINVAL;
}
}
if (strlen(event) == 0) {
pr_info("Event name is not specified\n");
return -EINVAL;
}
}
if (!group)
group = KPROBE_EVENT_SYSTEM;
if (is_delete) {
if (!event) {
pr_info("Delete command needs an event name.\n");
return -EINVAL;
}
mutex_lock(&probe_lock);
tp = find_trace_probe(event, group);
if (!tp) {
mutex_unlock(&probe_lock);
pr_info("Event %s/%s doesn't exist.\n", group, event);
return -ENOENT;
}
/* delete an event */
ret = unregister_trace_probe(tp);
if (ret == 0)
free_trace_probe(tp);
mutex_unlock(&probe_lock);
return ret;
}
if (argc < 2) {
pr_info("Probe point is not specified.\n");
return -EINVAL;
}
if (isdigit(argv[1][0])) {
if (is_return) {
pr_info("Return probe point must be a symbol.\n");
return -EINVAL;
}
/* an address specified */
ret = strict_strtoul(&argv[1][0], 0, (unsigned long *)&addr);
if (ret) {
pr_info("Failed to parse address.\n");
return ret;
}
} else {
/* a symbol specified */
symbol = argv[1];
/* TODO: support .init module functions */
ret = traceprobe_split_symbol_offset(symbol, &offset);
if (ret) {
pr_info("Failed to parse symbol.\n");
return ret;
}
if (offset && is_return) {
pr_info("Return probe must be used without offset.\n");
return -EINVAL;
}
}
argc -= 2; argv += 2;
/* setup a probe */
if (!event) {
/* Make a new event name */
if (symbol)
snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_%ld",
is_return ? 'r' : 'p', symbol, offset);
else
snprintf(buf, MAX_EVENT_NAME_LEN, "%c_0x%p",
is_return ? 'r' : 'p', addr);
event = buf;
}
tp = alloc_trace_probe(group, event, addr, symbol, offset, argc,
is_return);
if (IS_ERR(tp)) {
pr_info("Failed to allocate trace_probe.(%d)\n",
(int)PTR_ERR(tp));
return PTR_ERR(tp);
}
/* parse arguments */
ret = 0;
for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
/* Increment count for freeing args in error case */
tp->nr_args++;
/* Parse argument name */
arg = strchr(argv[i], '=');
if (arg) {
*arg++ = '\0';
tp->args[i].name = kstrdup(argv[i], GFP_KERNEL);
} else {
arg = argv[i];
/* If argument name is omitted, set "argN" */
snprintf(buf, MAX_EVENT_NAME_LEN, "arg%d", i + 1);
tp->args[i].name = kstrdup(buf, GFP_KERNEL);
}
if (!tp->args[i].name) {
pr_info("Failed to allocate argument[%d] name.\n", i);
ret = -ENOMEM;
goto error;
}
if (!is_good_name(tp->args[i].name)) {
pr_info("Invalid argument[%d] name: %s\n",
i, tp->args[i].name);
ret = -EINVAL;
goto error;
}
if (traceprobe_conflict_field_name(tp->args[i].name,
tp->args, i)) {
pr_info("Argument[%d] name '%s' conflicts with "
"another field.\n", i, argv[i]);
ret = -EINVAL;
goto error;
}
/* Parse fetch argument */
ret = traceprobe_parse_probe_arg(arg, &tp->size, &tp->args[i],
is_return, true);
if (ret) {
pr_info("Parse error at argument[%d]. (%d)\n", i, ret);
goto error;
}
}
ret = register_trace_probe(tp);
if (ret)
goto error;
return 0;
error:
free_trace_probe(tp);
return ret;
}
static int release_all_trace_probes(void)
{
struct trace_probe *tp;
int ret = 0;
mutex_lock(&probe_lock);
/* Ensure no probe is in use. */
list_for_each_entry(tp, &probe_list, list)
if (trace_probe_is_enabled(tp)) {
ret = -EBUSY;
goto end;
}
/* TODO: Use batch unregistration */
while (!list_empty(&probe_list)) {
tp = list_entry(probe_list.next, struct trace_probe, list);
unregister_trace_probe(tp);
free_trace_probe(tp);
}
end:
mutex_unlock(&probe_lock);
return ret;
}
/* Probes listing interfaces */
static void *probes_seq_start(struct seq_file *m, loff_t *pos)
{
mutex_lock(&probe_lock);
return seq_list_start(&probe_list, *pos);
}
static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos)
{
return seq_list_next(v, &probe_list, pos);
}
static void probes_seq_stop(struct seq_file *m, void *v)
{
mutex_unlock(&probe_lock);
}
static int probes_seq_show(struct seq_file *m, void *v)
{
struct trace_probe *tp = v;
int i;
seq_printf(m, "%c", trace_probe_is_return(tp) ? 'r' : 'p');
seq_printf(m, ":%s/%s", tp->call.class->system, tp->call.name);
if (!tp->symbol)
seq_printf(m, " 0x%p", tp->rp.kp.addr);
else if (tp->rp.kp.offset)
seq_printf(m, " %s+%u", trace_probe_symbol(tp),
tp->rp.kp.offset);
else
seq_printf(m, " %s", trace_probe_symbol(tp));
for (i = 0; i < tp->nr_args; i++)
seq_printf(m, " %s=%s", tp->args[i].name, tp->args[i].comm);
seq_printf(m, "\n");
return 0;
}
static const struct seq_operations probes_seq_op = {
.start = probes_seq_start,
.next = probes_seq_next,
.stop = probes_seq_stop,
.show = probes_seq_show
};
static int probes_open(struct inode *inode, struct file *file)
{
int ret;
if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
ret = release_all_trace_probes();
if (ret < 0)
return ret;
}
return seq_open(file, &probes_seq_op);
}
static ssize_t probes_write(struct file *file, const char __user *buffer,
size_t count, loff_t *ppos)
{
return traceprobe_probes_write(file, buffer, count, ppos,
create_trace_probe);
}
static const struct file_operations kprobe_events_ops = {
.owner = THIS_MODULE,
.open = probes_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release,
.write = probes_write,
};
/* Probes profiling interfaces */
static int probes_profile_seq_show(struct seq_file *m, void *v)
{
struct trace_probe *tp = v;
seq_printf(m, " %-44s %15lu %15lu\n", tp->call.name, tp->nhit,
tp->rp.kp.nmissed);
return 0;
}
static const struct seq_operations profile_seq_op = {
.start = probes_seq_start,
.next = probes_seq_next,
.stop = probes_seq_stop,
.show = probes_profile_seq_show
};
static int profile_open(struct inode *inode, struct file *file)
{
return seq_open(file, &profile_seq_op);
}
static const struct file_operations kprobe_profile_ops = {
.owner = THIS_MODULE,
.open = profile_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release,
};
/* Sum up total data length for dynamic arraies (strings) */
static __kprobes int __get_data_size(struct trace_probe *tp,
struct pt_regs *regs)
{
int i, ret = 0;
u32 len;
for (i = 0; i < tp->nr_args; i++)
if (unlikely(tp->args[i].fetch_size.fn)) {
call_fetch(&tp->args[i].fetch_size, regs, &len);
ret += len;
}
return ret;
}
/* Store the value of each argument */
static __kprobes void store_trace_args(int ent_size, struct trace_probe *tp,
struct pt_regs *regs,
u8 *data, int maxlen)
{
int i;
u32 end = tp->size;
u32 *dl; /* Data (relative) location */
for (i = 0; i < tp->nr_args; i++) {
if (unlikely(tp->args[i].fetch_size.fn)) {
/*
* First, we set the relative location and
* maximum data length to *dl
*/
dl = (u32 *)(data + tp->args[i].offset);
*dl = make_data_rloc(maxlen, end - tp->args[i].offset);
/* Then try to fetch string or dynamic array data */
call_fetch(&tp->args[i].fetch, regs, dl);
/* Reduce maximum length */
end += get_rloc_len(*dl);
maxlen -= get_rloc_len(*dl);
/* Trick here, convert data_rloc to data_loc */
*dl = convert_rloc_to_loc(*dl,
ent_size + tp->args[i].offset);
} else
/* Just fetching data normally */
call_fetch(&tp->args[i].fetch, regs,
data + tp->args[i].offset);
}
}
/* Kprobe handler */
static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
{
struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
struct kprobe_trace_entry_head *entry;
struct ring_buffer_event *event;
struct ring_buffer *buffer;
int size, dsize, pc;
unsigned long irq_flags;
struct ftrace_event_call *call = &tp->call;
tp->nhit++;
local_save_flags(irq_flags);