/*
* Copyright (C) 2007 Mathieu Desnoyers
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/types.h>
#include <linux/jhash.h>
#include <linux/list.h>
#include <linux/rcupdate.h>
#include <linux/marker.h>
#include <linux/err.h>
#include <linux/slab.h>
extern struct marker __start___markers[];
extern struct marker __stop___markers[];
/* Set to 1 to enable marker debug output */
static const int marker_debug;
/*
* markers_mutex nests inside module_mutex. Markers mutex protects the builtin
* and module markers and the hash table.
*/
static DEFINE_MUTEX(markers_mutex);
/*
* Marker hash table, containing the active markers.
* Protected by module_mutex.
*/
#define MARKER_HASH_BITS 6
#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS)
/*
* Note about RCU :
* It is used to make sure every handler has finished using its private data
* between two consecutive operation (add or remove) on a given marker. It is
* also used to delay the free of multiple probes array until a quiescent state
* is reached.
* marker entries modifications are protected by the markers_mutex.
*/
struct marker_entry {
struct hlist_node hlist;
char *format;
/* Probe wrapper */
void (*call)(const struct marker *mdata, void *call_private, ...);
struct marker_probe_closure single;
struct marker_probe_closure *multi;
int refcount; /* Number of times armed. 0 if disarmed. */
struct rcu_head rcu;
void *oldptr;
unsigned char rcu_pending:1;
unsigned char ptype:1;
char name[0]; /* Contains name'\0'format'\0' */
};
static struct hlist_head marker_table[MARKER_TABLE_SIZE];
/**
* __mark_empty_function - Empty probe callback
* @probe_private: probe private data
* @call_private: call site private data
* @fmt: format string
* @...: variable argument list
*
* Empty callback provided as a probe to the markers. By providing this to a
* disabled marker, we make sure the execution flow is always valid even
* though the function pointer change and the marker enabling are two distinct
* operations that modifies the execution flow of preemptible code.
*/
void __mark_empty_function(void *probe_private, void *call_private,
const char *fmt, va_list *args)
{
}
EXPORT_SYMBOL_GPL(__mark_empty_function);
/*
* marker_probe_cb Callback that prepares the variable argument list for probes.
* @mdata: pointer of type struct marker
* @call_private: caller site private data
* @...: Variable argument list.
*
* Since we do not use "typical" pointer based RCU in the 1 argument case, we
* need to put a full smp_rmb() in this branch. This is why we do not use
* rcu_dereference() for the pointer read.
*/
void marker_probe_cb(const struct marker *mdata, void *call_private, ...)
{
va_list args;
char ptype;
/*
* preempt_disable does two things : disabling preemption to make sure
* the teardown of the callbacks can be done correctly when they are in
* modules and they insure RCU read coherency.
*/
preempt_disable();
ptype = mdata->ptype;
if (likely(!ptype)) {
marker_probe_func *func;
/* Must read the ptype before ptr. They are not data dependant,
* so we put an explicit smp_rmb() here. */
smp_rmb();
func = mdata->single.func;
/* Must read the ptr before private data. They are not data
* dependant, so we put an explicit smp_rmb() here. */
smp_rmb();
va_start(args, call_private);
func(mdata->single.probe_private, call_private, mdata->format,
&args);
va_end(args);
} else {
struct marker_probe_closure *multi;
int i;
/*
* Read mdata->ptype before mdata->multi.
*/
smp_rmb();
multi = mdata->multi;
/*
* multi points to an array, therefore accessing the array
* depends on reading multi. However, even in this case,
* we must insure that the pointer is read _before_ the array
* data. Same as rcu_dereference, but we need a full smp_rmb()
* in the fast path, so put the explicit barrier here.
*/
smp_read_barrier_depends();
for (i = 0; multi[i].func; i++) {
va_start(args, call_private);
multi[i].func(multi[i].probe_private, call_private,
mdata->format, &args);
va_end(args);
}
}
preempt_enable();
}
EXPORT_SYMBOL_GPL(marker_probe_cb);
/*
* marker_probe_cb Callback that does not prepare the variable argument list.
* @mdata: pointer of type struct marker
* @call_private: caller site private data
* @...: Variable argument list.
*
* Should be connected to markers "MARK_NOARGS".
*/
void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...)
{
va_list args; /* not initialized */
char ptype;
preempt_disable();
ptype = mdata->ptype;
if (likely(!ptype)) {
marker_probe_func *func;
/* Must read the ptype before ptr. They are not data dependant,
* so we put an explicit smp_rmb() here. */
smp_rmb();
func = mdata->single.func;
/* Must read the ptr before private data. They are not data
* dependant, so we put an explicit smp_rmb() here. */
smp_rmb();
func(mdata->single.probe_private, call_private, mdata->format,
&args);
} else {
struct marker_probe_closure *multi;
int i;
/*
* Read mdata->ptype before mdata->multi.
*/
smp_rmb();
multi = mdata->multi;
/*
* multi points to an array, therefore accessing the array
* depends on reading multi. However, even in this case,
* we must insure that the pointer is read _before_ the array
* data. Same as rcu_dereference, but we need a full smp_rmb()
* in the fast path, so put the explicit barrier here.
*/
smp_read_barrier_depends();
for (i = 0; multi[i].func; i++)
multi[i].func(multi[i].probe_private, call_private,
mdata->format, &args);
}
preempt_enable();
}
EXPORT_SYMBOL_GPL(marker_probe_cb_noarg);
static void free_old_closure(struct rcu_head *head)
{
struct marker_entry *entry = container_of(head,
struct marker_entry, rcu);
kfree(entry->oldptr);
/* Make sure we free the data before setting the pending flag to 0 */
smp_wmb();
entry->rcu_pending = 0;
}
static void debug_print_probes(struct marker_entry *entry)
{
int i;
if (!marker_debug)
return;
if (!entry->ptype) {
printk(KERN_DEBUG "Single probe : %p %p\n",
entry->single.func,
entry->single.probe_private);
} else {
for (i = 0; entry->multi[i].func; i++)
printk(KERN_DEBUG "Multi probe %d : %p %p\n", i,
entry->multi[i].func,
entry->multi[i].probe_private);
}
}
static struct marker_probe_closure *
marker_entry_add_probe(struct marker_entry *entry,
marker_probe_func *probe, void *probe_private)
{
int nr_probes = 0;
struct marker_probe_closure *old, *new;
WARN_ON(!probe);
debug_print_probes(entry);
old = entry->multi;
if (!entry->ptype) {
if (entry->single.func == probe &&
entry->single.probe_private == probe_private)
return ERR_PTR(-EBUSY);
if (entry->single.func == __mark_empty_function) {
/* 0 -> 1 probes */
entry->single.func = probe;
entry->single.probe_private = probe_private;
entry->refcount = 1;
entry->ptype = 0;
debug_print_probes(entry);
return NULL;
} else {
/* 1 -> 2 probes */
nr_probes = 1;
old = NULL;
}
} else {
/* (N -> N+1), (N != 0, 1) probes */
for (nr_probes = 0; old[nr_probes].func; nr_probes++)
if (old[nr_probes].func == probe
&& old[nr_probes].probe_private
== probe_private)
return ERR_PTR(-EBUSY);
}
/* + 2 : one for new probe, one for NULL func */
new = kzalloc((nr_probes + 2) * sizeof(struct marker_probe_closure),
GFP_KERNEL);
if (new == NULL)
return ERR_PTR(-ENOMEM);
if (!old)
new[0] = entry->single;
else
memcpy(new, old,
nr_probes * sizeof(struct marker_probe_closure));
new[nr_probes].func = probe;
new[nr_probes].probe_private = probe_private;
entry->refcount = nr_probes + 1;
entry->multi = new;
entry->ptype = 1;
debug_print_probes(entry);
return old;
}
static struct marker_probe_closure *
marker_entry_remove_probe(struct marker_entry *entry,
marker_probe_func *probe, void *probe_private)
{
int nr_probes = 0, nr_del = 0, i;
struct marker_probe_closure *old, *new;
old = entry->multi;
debug_print_probes(entry);
if (!entry->ptype) {
/* 0 -> N is an error */
WARN_ON(entry->single.func == __mark_empty_function);
/* 1 -> 0 probes */
WARN_ON(probe && entry->single.func != probe);
WARN_ON(entry->single.probe_private != probe_private);
entry->single.func = __mark_empty_function;
entry->refcount = 0;
entry->ptype = 0;
debug_print_probes(entry);
return NULL;
} else {
/* (N -> M), (N > 1, M >= 0) probes */
for (nr_probes = 0; old[nr_probes].func; nr_probes++) {
if ((!probe || old[nr_probes].func == probe)
&& old[nr_probes].probe_private
== probe_private)
nr_del++;
}
}
if (nr_probes - nr_del == 0) {
/* N -> 0, (N > 1) */
entry->single.func = __mark_empty_function;
entry->refcount = 0;
entry->ptype = 0;
} else if (nr_probes - nr_del == 1) {
/* N -> 1, (N > 1) */
for (i = 0; old[i].func; i++)
if ((probe && old[i].func != probe) ||
old[i].probe_private != probe_private)
entry->single = old[i];
entry->refcount = 1;
entry->ptype = 0;
} else {
int j = 0;
/* N -> M, (N > 1, M > 1) */
/* + 1 for NULL */
new = kzalloc((nr_probes - nr_del + 1)
* sizeof(struct marker_probe_closure), GFP_KERNEL);
if (new == NULL)
return ERR_PTR(-ENOMEM);
for (i = 0; old[i].func; i++)
if ((probe && old[i].func != probe) ||
old[i].probe_private != probe_private)
new[j++] = old[i];
entry->refcount = nr_probes - nr_del;
entry->ptype = 1;
entry->multi = new;
}
debug_print_probes(entry);
return old;
}
/*
* Get marker if the marker is present in the marker hash table.
* Must be called with markers_mutex held.
* Returns NULL if not present.
*/
static struct marker_entry *get_marker(const char *name)
{
struct hlist_head *head;
struct hlist_node *node;
struct marker_entry *e;
u32 hash = jhash(name, strlen(name), 0);
head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
hlist_for_each_entry(e, node, head, hlist) {
if (!strcmp(name, e->name))
return e;
}
return NULL;
}
/*
* Add the marker to the marker hash table. Must be called with markers_mutex
* held.
*/
static struct marker_entry *add_marker(const char *name, const char *format)
{
struct hlist_head *head;
struct hlist_node *node;
struct marker_entry *e;
size_t name_len = strlen(name) + 1;
size_t format_len = 0;
u32 hash = jhash(name, name_len-1, 0);
if (format)
format_len = strlen(format) + 1;
head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
hlist_for_each_entry(e, node, head, hlist) {
if (!strcmp(name, e->name)) {
printk(KERN_NOTICE
"Marker %s busy\n", name);
return ERR_PTR(-EBUSY); /* Already there */
}
}
/*
* Using kmalloc here to allocate a variable length element. Could
* cause some memory fragmentation if overused.
*/
e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
GFP_KERNEL);
if (!e)
return ERR_PTR(-ENOMEM);
memcpy(&e->name[0], name, name_len);
if (format) {
e->format = &e->name[name_len];
memcpy(e->format, format, format_len);
if (strcmp(e->format, MARK_NOARGS) == 0)
e->call = marker_probe_cb_noarg;
else
e->call = marker_probe_cb;
trace_mark(core_marker_format, "name %s format %s",
e->name, e->format);
} else {
e->format = NULL;
e->call = marker_probe_cb;
}
e->single.func = __mark_empty_function;
e->single.probe_private = NULL;
e->multi = NULL;
e->ptype = 0;
e->refcount = 0;
e->rcu_pending = 0;
hlist_add_head(&e->hlist, head);
return e;
}
/*
* Remove the marker from the marker hash table. Must be called with mutex_lock
* held.
*/
static int remove_marker(const char *name)
{
struct hlist_head *head;
struct hlist_node *node;
struct marker_entry *e;
int found = 0;
size_t len = strlen(name) + 1;
u32 hash =