diff options
| -rw-r--r-- | Documentation/markers.txt | 104 | ||||
| -rw-r--r-- | arch/powerpc/platforms/cell/spufs/file.c | 1 | ||||
| -rw-r--r-- | arch/powerpc/platforms/cell/spufs/sched.c | 1 | ||||
| -rw-r--r-- | include/linux/ftrace_event.h | 10 | ||||
| -rw-r--r-- | include/linux/kvm_host.h | 1 | ||||
| -rw-r--r-- | include/linux/marker.h | 221 | ||||
| -rw-r--r-- | include/linux/module.h | 11 | ||||
| -rw-r--r-- | include/linux/syscalls.h | 24 | ||||
| -rw-r--r-- | include/trace/ftrace.h | 111 | ||||
| -rw-r--r-- | init/Kconfig | 7 | ||||
| -rw-r--r-- | kernel/Makefile | 1 | ||||
| -rw-r--r-- | kernel/marker.c | 930 | ||||
| -rw-r--r-- | kernel/module.c | 18 | ||||
| -rw-r--r-- | kernel/profile.c | 45 | ||||
| -rw-r--r-- | kernel/trace/ftrace.c | 23 | ||||
| -rw-r--r-- | kernel/trace/trace.c | 49 | ||||
| -rw-r--r-- | kernel/trace/trace_event_profile.c | 82 | ||||
| -rw-r--r-- | kernel/trace/trace_events.c | 49 | ||||
| -rw-r--r-- | kernel/trace/trace_printk.c | 1 | ||||
| -rw-r--r-- | kernel/trace/trace_syscalls.c | 97 | ||||
| -rw-r--r-- | samples/Kconfig | 6 | ||||
| -rw-r--r-- | samples/Makefile | 2 | ||||
| -rw-r--r-- | samples/markers/Makefile | 4 | ||||
| -rw-r--r-- | samples/markers/marker-example.c | 53 | ||||
| -rw-r--r-- | samples/markers/probe-example.c | 92 | ||||
| -rw-r--r-- | scripts/Makefile.modpost | 12 |
26 files changed, 297 insertions, 1658 deletions
diff --git a/Documentation/markers.txt b/Documentation/markers.txt deleted file mode 100644 index d2b3d0e91b26..000000000000 --- a/Documentation/markers.txt +++ /dev/null | |||
| @@ -1,104 +0,0 @@ | |||
| 1 | Using the Linux Kernel Markers | ||
| 2 | |||
| 3 | Mathieu Desnoyers | ||
| 4 | |||
| 5 | |||
| 6 | This document introduces Linux Kernel Markers and their use. It provides | ||
| 7 | examples of how to insert markers in the kernel and connect probe functions to | ||
| 8 | them and provides some examples of probe functions. | ||
| 9 | |||
| 10 | |||
| 11 | * Purpose of markers | ||
| 12 | |||
| 13 | A marker placed in code provides a hook to call a function (probe) that you can | ||
| 14 | provide at runtime. A marker can be "on" (a probe is connected to it) or "off" | ||
| 15 | (no probe is attached). When a marker is "off" it has no effect, except for | ||
| 16 | adding a tiny time penalty (checking a condition for a branch) and space | ||
| 17 | penalty (adding a few bytes for the function call at the end of the | ||
| 18 | instrumented function and adds a data structure in a separate section). When a | ||
| 19 | marker is "on", the function you provide is called each time the marker is | ||
| 20 | executed, in the execution context of the caller. When the function provided | ||
| 21 | ends its execution, it returns to the caller (continuing from the marker site). | ||
| 22 | |||
| 23 | You can put markers at important locations in the code. Markers are | ||
| 24 | lightweight hooks that can pass an arbitrary number of parameters, | ||
| 25 | described in a printk-like format string, to the attached probe function. | ||
| 26 | |||
| 27 | They can be used for tracing and performance accounting. | ||
| 28 | |||
| 29 | |||
| 30 | * Usage | ||
| 31 | |||
| 32 | In order to use the macro trace_mark, you should include linux/marker.h. | ||
| 33 | |||
| 34 | #include <linux/marker.h> | ||
| 35 | |||
| 36 | And, | ||
| 37 | |||
| 38 | trace_mark(subsystem_event, "myint %d mystring %s", someint, somestring); | ||
| 39 | Where : | ||
| 40 | - subsystem_event is an identifier unique to your event | ||
| 41 | - subsystem is the name of your subsystem. | ||
| 42 | - event is the name of the event to mark. | ||
| 43 | - "myint %d mystring %s" is the formatted string for the serializer. "myint" and | ||
| 44 | "mystring" are repectively the field names associated with the first and | ||
| 45 | second parameter. | ||
| 46 | - someint is an integer. | ||
| 47 | - somestring is a char pointer. | ||
| 48 | |||
| 49 | Connecting a function (probe) to a marker is done by providing a probe (function | ||
| 50 | to call) for the specific marker through marker_probe_register() and can be | ||
| 51 | activated by calling marker_arm(). Marker deactivation can be done by calling | ||
| 52 | marker_disarm() as many times as marker_arm() has been called. Removing a probe | ||
| 53 | is done through marker_probe_unregister(); it will disarm the probe. | ||
| 54 | |||
| 55 | marker_synchronize_unregister() must be called between probe unregistration and | ||
| 56 | the first occurrence of | ||
| 57 | - the end of module exit function, | ||
| 58 | to make sure there is no caller left using the probe; | ||
| 59 | - the free of any resource used by the probes, | ||
| 60 | to make sure the probes wont be accessing invalid data. | ||
| 61 | This, and the fact that preemption is disabled around the probe call, make sure | ||
| 62 | that probe removal and module unload are safe. See the "Probe example" section | ||
| 63 | below for a sample probe module. | ||
| 64 | |||
| 65 | The marker mechanism supports inserting multiple instances of the same marker. | ||
| 66 | Markers can be put in inline functions, inlined static functions, and | ||
| 67 | unrolled loops as well as regular functions. | ||
| 68 | |||
| 69 | The naming scheme "subsystem_event" is suggested here as a convention intended | ||
| 70 | to limit collisions. Marker names are global to the kernel: they are considered | ||
| 71 | as being the same whether they are in the core kernel image or in modules. | ||
| 72 | Conflicting format strings for markers with the same name will cause the markers | ||
| 73 | to be detected to have a different format string not to be armed and will output | ||
| 74 | a printk warning which identifies the inconsistency: | ||
| 75 | |||
| 76 | "Format mismatch for probe probe_name (format), marker (format)" | ||
| 77 | |||
| 78 | Another way to use markers is to simply define the marker without generating any | ||
| 79 | function call to actually call into the marker. This is useful in combination | ||
| 80 | with tracepoint probes in a scheme like this : | ||
| 81 | |||
| 82 | void probe_tracepoint_name(unsigned int arg1, struct task_struct *tsk); | ||
| 83 | |||
| 84 | DEFINE_MARKER_TP(marker_eventname, tracepoint_name, probe_tracepoint_name, | ||
| 85 | "arg1 %u pid %d"); | ||
| 86 | |||
| 87 | notrace void probe_tracepoint_name(unsigned int arg1, struct task_struct *tsk) | ||
| 88 | { | ||
| 89 | struct marker *marker = &GET_MARKER(kernel_irq_entry); | ||
| 90 | /* write data to trace buffers ... */ | ||
| 91 | } | ||
| 92 | |||
| 93 | * Probe / marker example | ||
| 94 | |||
| 95 | See the example provided in samples/markers/src | ||
| 96 | |||
| 97 | Compile them with your kernel. | ||
| 98 | |||
| 99 | Run, as root : | ||
| 100 | modprobe marker-example (insmod order is not important) | ||
| 101 | modprobe probe-example | ||
| 102 | cat /proc/marker-example (returns an expected error) | ||
| 103 | rmmod marker-example probe-example | ||
| 104 | dmesg | ||
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index ab8aef9bb8ea..8f079b865ad0 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c | |||
| @@ -29,7 +29,6 @@ | |||
| 29 | #include <linux/poll.h> | 29 | #include <linux/poll.h> |
| 30 | #include <linux/ptrace.h> | 30 | #include <linux/ptrace.h> |
| 31 | #include <linux/seq_file.h> | 31 | #include <linux/seq_file.h> |
| 32 | #include <linux/marker.h> | ||
| 33 | 32 | ||
| 34 | #include <asm/io.h> | 33 | #include <asm/io.h> |
| 35 | #include <asm/time.h> | 34 | #include <asm/time.h> |
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index bb5b77c66d05..4678078fede8 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c | |||
| @@ -39,7 +39,6 @@ | |||
| 39 | #include <linux/pid_namespace.h> | 39 | #include <linux/pid_namespace.h> |
| 40 | #include <linux/proc_fs.h> | 40 | #include <linux/proc_fs.h> |
| 41 | #include <linux/seq_file.h> | 41 | #include <linux/seq_file.h> |
| 42 | #include <linux/marker.h> | ||
| 43 | 42 | ||
| 44 | #include <asm/io.h> | 43 | #include <asm/io.h> |
| 45 | #include <asm/mmu_context.h> | 44 | #include <asm/mmu_context.h> |
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index bd099ba82ccc..4ec5e67e18cf 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | #include <linux/ring_buffer.h> | 4 | #include <linux/ring_buffer.h> |
| 5 | #include <linux/trace_seq.h> | 5 | #include <linux/trace_seq.h> |
| 6 | #include <linux/percpu.h> | 6 | #include <linux/percpu.h> |
| 7 | #include <linux/hardirq.h> | ||
| 7 | 8 | ||
| 8 | struct trace_array; | 9 | struct trace_array; |
| 9 | struct tracer; | 10 | struct tracer; |
| @@ -130,10 +131,15 @@ struct ftrace_event_call { | |||
| 130 | void *data; | 131 | void *data; |
| 131 | 132 | ||
| 132 | atomic_t profile_count; | 133 | atomic_t profile_count; |
| 133 | int (*profile_enable)(struct ftrace_event_call *); | 134 | int (*profile_enable)(void); |
| 134 | void (*profile_disable)(struct ftrace_event_call *); | 135 | void (*profile_disable)(void); |
| 135 | }; | 136 | }; |
| 136 | 137 | ||
| 138 | #define FTRACE_MAX_PROFILE_SIZE 2048 | ||
| 139 | |||
| 140 | extern char *trace_profile_buf; | ||
| 141 | extern char *trace_profile_buf_nmi; | ||
| 142 | |||
| 137 | #define MAX_FILTER_PRED 32 | 143 | #define MAX_FILTER_PRED 32 |
| 138 | #define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ | 144 | #define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ |
| 139 | 145 | ||
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 4af56036a6bf..b7bbb5ddd7ae 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
| @@ -15,7 +15,6 @@ | |||
| 15 | #include <linux/sched.h> | 15 | #include <linux/sched.h> |
| 16 | #include <linux/mm.h> | 16 | #include <linux/mm.h> |
| 17 | #include <linux/preempt.h> | 17 | #include <linux/preempt.h> |
| 18 | #include <linux/marker.h> | ||
| 19 | #include <linux/msi.h> | 18 | #include <linux/msi.h> |
| 20 | #include <asm/signal.h> | 19 | #include <asm/signal.h> |
| 21 | 20 | ||
diff --git a/include/linux/marker.h b/include/linux/marker.h deleted file mode 100644 index b85e74ca782f..000000000000 --- a/include/linux/marker.h +++ /dev/null | |||
| @@ -1,221 +0,0 @@ | |||
| 1 | #ifndef _LINUX_MARKER_H | ||
| 2 | #define _LINUX_MARKER_H | ||
| 3 | |||
| 4 | /* | ||
| 5 | * Code markup for dynamic and static tracing. | ||
| 6 | * | ||
| 7 | * See Documentation/marker.txt. | ||
| 8 | * | ||
| 9 | * (C) Copyright 2006 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> | ||
| 10 | * | ||
| 11 | * This file is released under the GPLv2. | ||
| 12 | * See the file COPYING for more details. | ||
| 13 | */ | ||
| 14 | |||
| 15 | #include <stdarg.h> | ||
| 16 | #include <linux/types.h> | ||
| 17 | |||
| 18 | struct module; | ||
| 19 | struct marker; | ||
| 20 | |||
| 21 | /** | ||
| 22 | * marker_probe_func - Type of a marker probe function | ||
| 23 | * @probe_private: probe private data | ||
| 24 | * @call_private: call site private data | ||
| 25 | * @fmt: format string | ||
| 26 | * @args: variable argument list pointer. Use a pointer to overcome C's | ||
| 27 | * inability to pass this around as a pointer in a portable manner in | ||
| 28 | * the callee otherwise. | ||
| 29 | * | ||
| 30 | * Type of marker probe functions. They receive the mdata and need to parse the | ||
| 31 | * format string to recover the variable argument list. | ||
| 32 | */ | ||
| 33 | typedef void marker_probe_func(void *probe_private, void *call_private, | ||
| 34 | const char *fmt, va_list *args); | ||
| 35 | |||
| 36 | struct marker_probe_closure { | ||
| 37 | marker_probe_func *func; /* Callback */ | ||
| 38 | void *probe_private; /* Private probe data */ | ||
| 39 | }; | ||
| 40 | |||
| 41 | struct marker { | ||
| 42 | const char *name; /* Marker name */ | ||
| 43 | const char *format; /* Marker format string, describing the | ||
| 44 | * variable argument list. | ||
| 45 | */ | ||
| 46 | char state; /* Marker state. */ | ||
| 47 | char ptype; /* probe type : 0 : single, 1 : multi */ | ||
| 48 | /* Probe wrapper */ | ||
| 49 | void (*call)(const struct marker *mdata, void *call_private, ...); | ||
| 50 | struct marker_probe_closure single; | ||
| 51 | struct marker_probe_closure *multi; | ||
| 52 | const char *tp_name; /* Optional tracepoint name */ | ||
| 53 | void *tp_cb; /* Optional tracepoint callback */ | ||
| 54 | } __attribute__((aligned(8))); | ||
| 55 | |||
| 56 | #ifdef CONFIG_MARKERS | ||
| 57 | |||
| 58 | #define _DEFINE_MARKER(name, tp_name_str, tp_cb, format) \ | ||
| 59 | static const char __mstrtab_##name[] \ | ||
| 60 | __attribute__((section("__markers_strings"))) \ | ||
| 61 | = #name "\0" format; \ | ||
| 62 | static struct marker __mark_##name \ | ||
| 63 | __attribute__((section("__markers"), aligned(8))) = \ | ||
| 64 | { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \ | ||
| 65 | 0, 0, marker_probe_cb, { __mark_empty_function, NULL},\ | ||
| 66 | NULL, tp_name_str, tp_cb } | ||
| 67 | |||
| 68 | #define DEFINE_MARKER(name, format) \ | ||
| 69 | _DEFINE_MARKER(name, NULL, NULL, format) | ||
| 70 | |||
| 71 | #define DEFINE_MARKER_TP(name, tp_name, tp_cb, format) \ | ||
| 72 | _DEFINE_MARKER(name, #tp_name, tp_cb, format) | ||
| 73 | |||
| 74 | /* | ||
| 75 | * Note : the empty asm volatile with read constraint is used here instead of a | ||
| 76 | * "used" attribute to fix a gcc 4.1.x bug. | ||
| 77 | * Make sure the alignment of the structure in the __markers section will | ||
| 78 | * not add unwanted padding between the beginning of the section and the | ||
| 79 | * structure. Force alignment to the same alignment as the section start. | ||
| 80 | * | ||
| 81 | * The "generic" argument controls which marker enabling mechanism must be used. | ||
| 82 | * If generic is true, a variable read is used. | ||
| 83 | * If generic is false, immediate values are used. | ||
| 84 | */ | ||
| 85 | #define __trace_mark(generic, name, call_private, format, args...) \ | ||
| 86 | do { \ | ||
| 87 | DEFINE_MARKER(name, format); \ | ||
| 88 | __mark_check_format(format, ## args); \ | ||
| 89 | if (unlikely(__mark_##name.state)) { \ | ||
| 90 | (*__mark_##name.call) \ | ||
| 91 | (&__mark_##name, call_private, ## args);\ | ||
| 92 | } \ | ||
| 93 | } while (0) | ||
| 94 | |||
| 95 | #define __trace_mark_tp(name, call_private, tp_name, tp_cb, format, args...) \ | ||
| 96 | do { \ | ||
| 97 | void __check_tp_type(void) \ | ||
| 98 | { \ | ||
| 99 | register_trace_##tp_name(tp_cb); \ | ||
| 100 | } \ | ||
| 101 | DEFINE_MARKER_TP(name, tp_name, tp_cb, format); \ | ||
| 102 | __mark_check_format(format, ## args); \ | ||
| 103 | (*__mark_##name.call)(&__mark_##name, call_private, \ | ||
| 104 | ## args); \ | ||
| 105 | } while (0) | ||
| 106 | |||
| 107 | extern void marker_update_probe_range(struct marker *begin, | ||
| 108 | struct marker *end); | ||
| 109 | |||
| 110 | #define GET_MARKER(name) (__mark_##name) | ||
| 111 | |||
| 112 | #else /* !CONFIG_MARKERS */ | ||
| 113 | #define DEFINE_MARKER(name, tp_name, tp_cb, format) | ||
| 114 | #define __trace_mark(generic, name, call_private, format, args...) \ | ||
| 115 | __mark_check_format(format, ## args) | ||
| 116 | #define __trace_mark_tp(name, call_private, tp_name, tp_cb, format, args...) \ | ||
| 117 | do { \ | ||
| 118 | void __check_tp_type(void) \ | ||
| 119 | { \ | ||
| 120 | register_trace_##tp_name(tp_cb); \ | ||
| 121 | } \ | ||
| 122 | __mark_check_format(format, ## args); \ | ||
| 123 | } while (0) | ||
| 124 | static inline void marker_update_probe_range(struct marker *begin, | ||
| 125 | struct marker *end) | ||
| 126 | { } | ||
| 127 | #define GET_MARKER(name) | ||
| 128 | #endif /* CONFIG_MARKERS */ | ||
| 129 | |||
| 130 | /** | ||
| 131 | * trace_mark - Marker using code patching | ||
| 132 | * @name: marker name, not quoted. | ||
| 133 | * @format: format string | ||
| 134 | * @args...: variable argument list | ||
| 135 | * | ||
| 136 | * Places a marker using optimized code patching technique (imv_read()) | ||
| 137 | * to be enabled when immediate values are present. | ||
| 138 | */ | ||
| 139 | #define trace_mark(name, format, args...) \ | ||
| 140 | __trace_mark(0, name, NULL, format, ## args) | ||
| 141 | |||
| 142 | /** | ||
| 143 | * _trace_mark - Marker using variable read | ||
| 144 | * @name: marker name, not quoted. | ||
| 145 | * @format: format string | ||
| 146 | * @args...: variable argument list | ||
| 147 | * | ||
| 148 | * Places a marker using a standard memory read (_imv_read()) to be | ||
| 149 | * enabled. Should be used for markers in code paths where instruction | ||
| 150 | * modification based enabling is not welcome. (__init and __exit functions, | ||
| 151 | * lockdep, some traps, printk). | ||
| 152 | */ | ||
| 153 | #define _trace_mark(name, format, args...) \ | ||
| 154 | __trace_mark(1, name, NULL, format, ## args) | ||
| 155 | |||
| 156 | /** | ||
| 157 | * trace_mark_tp - Marker in a tracepoint callback | ||
| 158 | * @name: marker name, not quoted. | ||
| 159 | * @tp_name: tracepoint name, not quoted. | ||
| 160 | * @tp_cb: tracepoint callback. Should have an associated global symbol so it | ||
| 161 | * is not optimized away by the compiler (should not be static). | ||
| 162 | * @format: format string | ||
| 163 | * @args...: variable argument list | ||
| 164 | * | ||
| 165 | * Places a marker in a tracepoint callback. | ||
| 166 | */ | ||
| 167 | #define trace_mark_tp(name, tp_name, tp_cb, format, args...) \ | ||
| 168 | __trace_mark_tp(name, NULL, tp_name, tp_cb, format, ## args) | ||
| 169 | |||
| 170 | /** | ||
| 171 | * MARK_NOARGS - Format string for a marker with no argument. | ||
| 172 | */ | ||
| 173 | #define MARK_NOARGS " " | ||
| 174 | |||
| 175 | /* To be used for string format validity checking with gcc */ | ||
| 176 | static inline void __printf(1, 2) ___mark_check_format(const char *fmt, ...) | ||
| 177 | { | ||
| 178 | } | ||
| 179 | |||
| 180 | #define __mark_check_format(format, args...) \ | ||
| 181 | do { \ | ||
| 182 | if (0) \ | ||
| 183 | ___mark_check_format(format, ## args); \ | ||
| 184 | } while (0) | ||
| 185 | |||
| 186 | extern marker_probe_func __mark_empty_function; | ||
| 187 | |||
| 188 | extern void marker_probe_cb(const struct marker *mdata, | ||
| 189 | void *call_private, ...); | ||
| 190 | |||
| 191 | /* | ||
| 192 | * Connect a probe to a marker. | ||
| 193 | * private data pointer must be a valid allocated memory address, or NULL. | ||
| 194 | */ | ||
| 195 | extern int marker_probe_register(const char *name, const char *format, | ||
| 196 | marker_probe_func *probe, void *probe_private); | ||
| 197 | |||
| 198 | /* | ||
| 199 | * Returns the private data given to marker_probe_register. | ||
| 200 | */ | ||
| 201 | extern int marker_probe_unregister(const char *name, | ||
| 202 | marker_probe_func *probe, void *probe_private); | ||
| 203 | /* | ||
| 204 | * Unregister a marker by providing the registered private data. | ||
| 205 | */ | ||
| 206 | extern int marker_probe_unregister_private_data(marker_probe_func *probe, | ||
| 207 | void *probe_private); | ||
| 208 | |||
| 209 | extern void *marker_get_private_data(const char *name, marker_probe_func *probe, | ||
| 210 | int num); | ||
| 211 | |||
| 212 | /* | ||
| 213 | * marker_synchronize_unregister must be called between the last marker probe | ||
| 214 | * unregistration and the first one of | ||
| 215 | * - the end of module exit function | ||
| 216 | * - the free of any resource used by the probes | ||
| 217 | * to ensure the code and data are valid for any possibly running probes. | ||
| 218 | */ | ||
| 219 | #define marker_synchronize_unregister() synchronize_sched() | ||
| 220 | |||
| 221 | #endif | ||
diff --git a/include/linux/module.h b/include/linux/module.h index f8f92d015efe..1c755b2f937d 100644 --- a/include/linux/module.h +++ b/include/linux/module.h | |||
| @@ -15,7 +15,6 @@ | |||
| 15 | #include <linux/stringify.h> | 15 | #include <linux/stringify.h> |
| 16 | #include <linux/kobject.h> | 16 | #include <linux/kobject.h> |
| 17 | #include <linux/moduleparam.h> | 17 | #include <linux/moduleparam.h> |
| 18 | #include <linux/marker.h> | ||
| 19 | #include <linux/tracepoint.h> | 18 | #include <linux/tracepoint.h> |
| 20 | 19 | ||
| 21 | #include <asm/local.h> | 20 | #include <asm/local.h> |
| @@ -327,10 +326,6 @@ struct module | |||
| 327 | /* The command line arguments (may be mangled). People like | 326 | /* The command line arguments (may be mangled). People like |
| 328 | keeping pointers to this stuff */ | 327 | keeping pointers to this stuff */ |
| 329 | char *args; | 328 | char *args; |
| 330 | #ifdef CONFIG_MARKERS | ||
| 331 | struct marker *markers; | ||
| 332 | unsigned int num_markers; | ||
| 333 | #endif | ||
| 334 | #ifdef CONFIG_TRACEPOINTS | 329 | #ifdef CONFIG_TRACEPOINTS |
| 335 | struct tracepoint *tracepoints; | 330 | struct tracepoint *tracepoints; |
| 336 | unsigned int num_tracepoints; | 331 | unsigned int num_tracepoints; |
| @@ -535,8 +530,6 @@ int unregister_module_notifier(struct notifier_block * nb); | |||
| 535 | 530 | ||
| 536 | extern void print_modules(void); | 531 | extern void print_modules(void); |
| 537 | 532 | ||
| 538 | extern void module_update_markers(void); | ||
| 539 | |||
| 540 | extern void module_update_tracepoints(void); | 533 | extern void module_update_tracepoints(void); |
| 541 | extern int module_get_iter_tracepoints(struct tracepoint_iter *iter); | 534 | extern int module_get_iter_tracepoints(struct tracepoint_iter *iter); |
| 542 | 535 | ||
| @@ -651,10 +644,6 @@ static inline void print_modules(void) | |||
| 651 | { | 644 | { |
| 652 | } | 645 | } |
| 653 | 646 | ||
| 654 | static inline void module_update_markers(void) | ||
| 655 | { | ||
| 656 | } | ||
| 657 | |||
| 658 | static inline void module_update_tracepoints(void) | 647 | static inline void module_update_tracepoints(void) |
| 659 | { | 648 | { |
| 660 | } | 649 | } |
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index a8e37821cc60..7d9803cbb20f 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h | |||
| @@ -100,33 +100,25 @@ struct perf_counter_attr; | |||
| 100 | 100 | ||
| 101 | #ifdef CONFIG_EVENT_PROFILE | 101 | #ifdef CONFIG_EVENT_PROFILE |
| 102 | #define TRACE_SYS_ENTER_PROFILE(sname) \ | 102 | #define TRACE_SYS_ENTER_PROFILE(sname) \ |
| 103 | static int prof_sysenter_enable_##sname(struct ftrace_event_call *event_call) \ | 103 | static int prof_sysenter_enable_##sname(void) \ |
| 104 | { \ | 104 | { \ |
| 105 | int ret = 0; \ | 105 | return reg_prof_syscall_enter("sys"#sname); \ |
| 106 | if (!atomic_inc_return(&event_enter_##sname.profile_count)) \ | ||
| 107 | ret = reg_prof_syscall_enter("sys"#sname); \ | ||
| 108 | return ret; \ | ||
| 109 | } \ | 106 | } \ |
| 110 | \ | 107 | \ |
| 111 | static void prof_sysenter_disable_##sname(struct ftrace_event_call *event_call)\ | 108 | static void prof_sysenter_disable_##sname(void) \ |
| 112 | { \ | 109 | { \ |
| 113 | if (atomic_add_negative(-1, &event_enter_##sname.profile_count)) \ | 110 | unreg_prof_syscall_enter("sys"#sname); \ |
| 114 | unreg_prof_syscall_enter("sys"#sname); \ | ||
| 115 | } | 111 | } |
| 116 | 112 | ||
| 117 | #define TRACE_SYS_EXIT_PROFILE(sname) \ | 113 | #define TRACE_SYS_EXIT_PROFILE(sname) \ |
| 118 | static int prof_sysexit_enable_##sname(struct ftrace_event_call *event_call) \ | 114 | static int prof_sysexit_enable_##sname(void) \ |
| 119 | { \ | 115 | { \ |
| 120 | int ret = 0; \ | 116 | return reg_prof_syscall_exit("sys"#sname); \ |
| 121 | if (!atomic_inc_return(&event_exit_##sname.profile_count)) \ | ||
| 122 | ret = reg_prof_syscall_exit("sys"#sname); \ | ||
| 123 | return ret; \ | ||
| 124 | } \ | 117 | } \ |
| 125 | \ | 118 | \ |
| 126 | static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \ | 119 | static void prof_sysexit_disable_##sname(void) \ |
| 127 | { \ | 120 | { \ |
| 128 | if (atomic_add_negative(-1, &event_exit_##sname.profile_count)) \ | 121 | unreg_prof_syscall_exit("sys"#sname); \ |
| 129 | unreg_prof_syscall_exit("sys"#sname); \ | ||
| 130 | } | 122 | } |
| 131 | 123 | ||
| 132 | #define TRACE_SYS_ENTER_PROFILE_INIT(sname) \ | 124 | #define TRACE_SYS_ENTER_PROFILE_INIT(sname) \ |
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 72a3b437b829..a0361cb69769 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h | |||
| @@ -382,20 +382,14 @@ static inline int ftrace_get_offsets_##call( \ | |||
| 382 | * | 382 | * |
| 383 | * NOTE: The insertion profile callback (ftrace_profile_<call>) is defined later | 383 | * NOTE: The insertion profile callback (ftrace_profile_<call>) is defined later |
| 384 | * | 384 | * |
| 385 | * static int ftrace_profile_enable_<call>(struct ftrace_event_call *event_call) | 385 | * static int ftrace_profile_enable_<call>(void) |
| 386 | * { | 386 | * { |
| 387 | * int ret = 0; | 387 | * return register_trace_<call>(ftrace_profile_<call>); |
| 388 | * | ||
| 389 | * if (!atomic_inc_return(&event_call->profile_count)) | ||
| 390 | * ret = register_trace_<call>(ftrace_profile_<call>); | ||
| 391 | * | ||
| 392 | * return ret; | ||
| 393 | * } | 388 | * } |
| 394 | * | 389 | * |
| 395 | * static void ftrace_profile_disable_<call>(struct ftrace_event_call *event_call) | 390 | * static void ftrace_profile_disable_<call>(void) |
| 396 | * { | 391 | * { |
| 397 | * if (atomic_add_negative(-1, &event->call->profile_count)) | 392 | * unregister_trace_<call>(ftrace_profile_<call>); |
| 398 | * unregister_trace_<call>(ftrace_profile_<call>); | ||
| 399 | * } | 393 | * } |
| 400 | * | 394 | * |
| 401 | */ | 395 | */ |
| @@ -405,20 +399,14 @@ static inline int ftrace_get_offsets_##call( \ | |||
| 405 | \ | 399 | \ |
| 406 | static void ftrace_profile_##call(proto); \ | 400 | static void ftrace_profile_##call(proto); \ |
| 407 | \ | 401 | \ |
| 408 | static int ftrace_profile_enable_##call(struct ftrace_event_call *event_call) \ | 402 | static int ftrace_profile_enable_##call(void) \ |
| 409 | { \ | 403 | { \ |
| 410 | int ret = 0; \ | 404 | return register_trace_##call(ftrace_profile_##call); \ |
| 411 | \ | ||
| 412 | if (!atomic_inc_return(&event_call->profile_count)) \ | ||
| 413 | ret = register_trace_##call(ftrace_profile_##call); \ | ||
| 414 | \ | ||
| 415 | return ret; \ | ||
| 416 | } \ | 405 | } \ |
| 417 | \ | 406 | \ |
| 418 | static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ | 407 | static void ftrace_profile_disable_##call(void) \ |
| 419 | { \ | 408 | { \ |
| 420 | if (atomic_add_negative(-1, &event_call->profile_count)) \ | 409 | unregister_trace_##call(ftrace_profile_##call); \ |
| 421 | unregister_trace_##call(ftrace_profile_##call); \ | ||
| 422 | } | 410 | } |
| 423 | 411 | ||
| 424 | #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) | 412 | #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) |
| @@ -660,11 +648,12 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ | |||
| 660 | * struct ftrace_raw_##call *entry; | 648 | * struct ftrace_raw_##call *entry; |
| 661 | * u64 __addr = 0, __count = 1; | 649 | * u64 __addr = 0, __count = 1; |
| 662 | * unsigned long irq_flags; | 650 | * unsigned long irq_flags; |
| 651 | * struct trace_entry *ent; | ||
| 663 | * int __entry_size; | 652 | * int __entry_size; |
| 664 | * int __data_size; | 653 | * int __data_size; |
| 654 | * int __cpu | ||
| 665 | * int pc; | 655 | * int pc; |
| 666 | * | 656 | * |
| 667 | * local_save_flags(irq_flags); | ||
| 668 | * pc = preempt_count(); | 657 | * pc = preempt_count(); |
| 669 | * | 658 | * |
| 670 | * __data_size = ftrace_get_offsets_<call>(&__data_offsets, args); | 659 | * __data_size = ftrace_get_offsets_<call>(&__data_offsets, args); |
| @@ -675,25 +664,34 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ | |||
| 675 | * sizeof(u64)); | 664 | * sizeof(u64)); |
| 676 | * __entry_size -= sizeof(u32); | 665 | * __entry_size -= sizeof(u32); |
| 677 | * | 666 | * |
| 678 | * do { | 667 | * // Protect the non nmi buffer |
| 679 | * char raw_data[__entry_size]; <- allocate our sample in the stack | 668 | * // This also protects the rcu read side |
| 680 | * struct trace_entry *ent; | 669 | * local_irq_save(irq_flags); |
| 670 | * __cpu = smp_processor_id(); | ||
| 671 | * | ||
| 672 | * if (in_nmi()) | ||
| 673 | * raw_data = rcu_dereference(trace_profile_buf_nmi); | ||
| 674 | * else | ||
| 675 | * raw_data = rcu_dereference(trace_profile_buf); | ||
| 676 | * | ||
| 677 | * if (!raw_data) | ||
| 678 | * goto end; | ||
| 681 | * | 679 | * |
| 682 | * zero dead bytes from alignment to avoid stack leak to userspace: | 680 | * raw_data = per_cpu_ptr(raw_data, __cpu); |
| 683 | * | 681 | * |
| 684 | * *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; | 682 | * //zero dead bytes from alignment to avoid stack leak to userspace: |
| 685 | * entry = (struct ftrace_raw_<call> *)raw_data; | 683 | * *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; |
| 686 | * ent = &entry->ent; | 684 | * entry = (struct ftrace_raw_<call> *)raw_data; |
| 687 | * tracing_generic_entry_update(ent, irq_flags, pc); | 685 | * ent = &entry->ent; |
| 688 | * ent->type = event_call->id; | 686 | * tracing_generic_entry_update(ent, irq_flags, pc); |
| 687 | * ent->type = event_call->id; | ||
| 689 | * | 688 | * |
| 690 | * <tstruct> <- do some jobs with dynamic arrays | 689 | * <tstruct> <- do some jobs with dynamic arrays |
| 691 | * | 690 | * |
| 692 | * <assign> <- affect our values | 691 | * <assign> <- affect our values |
| 693 | * | 692 | * |
| 694 | * perf_tpcounter_event(event_call->id, __addr, __count, entry, | 693 | * perf_tpcounter_event(event_call->id, __addr, __count, entry, |
| 695 | * __entry_size); <- submit them to perf counter | 694 | * __entry_size); <- submit them to perf counter |
| 696 | * } while (0); | ||
| 697 | * | 695 | * |
| 698 | * } | 696 | * } |
| 699 | */ | 697 | */ |
| @@ -716,11 +714,13 @@ static void ftrace_profile_##call(proto) \ | |||
| 716 | struct ftrace_raw_##call *entry; \ | 714 | struct ftrace_raw_##call *entry; \ |
| 717 | u64 __addr = 0, __count = 1; \ | 715 | u64 __addr = 0, __count = 1; \ |
| 718 | unsigned long irq_flags; \ | 716 | unsigned long irq_flags; \ |
| 717 | struct trace_entry *ent; \ | ||
| 719 | int __entry_size; \ | 718 | int __entry_size; \ |
| 720 | int __data_size; \ | 719 | int __data_size; \ |
| 720 | char *raw_data; \ | ||
| 721 | int __cpu; \ | ||
| 721 | int pc; \ | 722 | int pc; \ |
| 722 | \ | 723 | \ |
| 723 | local_save_flags(irq_flags); \ | ||
| 724 | pc = preempt_count(); \ | 724 | pc = preempt_count(); \ |
| 725 | \ | 725 | \ |
| 726 | __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \ | 726 | __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \ |
| @@ -728,23 +728,38 @@ static void ftrace_profile_##call(proto) \ | |||
| 728 | sizeof(u64)); \ | 728 | sizeof(u64)); \ |
| 729 | __entry_size -= sizeof(u32); \ | 729 | __entry_size -= sizeof(u32); \ |
| 730 | \ | 730 | \ |
| 731 | do { \ | 731 | if (WARN_ONCE(__entry_size > FTRACE_MAX_PROFILE_SIZE, \ |
| 732 | char raw_data[__entry_size]; \ | 732 | "profile buffer not large enough")) \ |
| 733 | struct trace_entry *ent; \ | 733 | return; \ |
| 734 | \ | ||
| 735 | local_irq_save(irq_flags); \ | ||
| 736 | __cpu = smp_processor_id(); \ | ||
| 734 | \ | 737 | \ |
| 735 | *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \ | 738 | if (in_nmi()) \ |
| 736 | entry = (struct ftrace_raw_##call *)raw_data; \ | 739 | raw_data = rcu_dereference(trace_profile_buf_nmi); \ |
| 737 | ent = &entry->ent; \ | 740 | else \ |
| 738 | tracing_generic_entry_update(ent, irq_flags, pc); \ | 741 | raw_data = rcu_dereference(trace_profile_buf); \ |
| 739 | ent->type = event_call->id; \ | ||
| 740 | \ | 742 | \ |
| 741 | tstruct \ | 743 | if (!raw_data) \ |
| 744 | goto end; \ | ||
| 742 | \ | 745 | \ |
| 743 | { assign; } \ | 746 | raw_data = per_cpu_ptr(raw_data, __cpu); \ |
| 744 | \ | 747 | \ |
| 745 | perf_tpcounter_event(event_call->id, __addr, __count, entry,\ | 748 | *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \ |
| 749 | entry = (struct ftrace_raw_##call *)raw_data; \ | ||
| 750 | ent = &entry->ent; \ | ||
| 751 | tracing_generic_entry_update(ent, irq_flags, pc); \ | ||
| 752 | ent->type = event_call->id; \ | ||
| 753 | \ | ||
| 754 | tstruct \ | ||
| 755 | \ | ||
| 756 | { assign; } \ | ||
| 757 | \ | ||
| 758 | perf_tpcounter_event(event_call->id, __addr, __count, entry, \ | ||
| 746 | __entry_size); \ | 759 | __entry_size); \ |
| 747 | } while (0); \ | 760 | \ |
| 761 | end: \ | ||
| 762 | local_irq_restore(irq_flags); \ | ||
| 748 | \ | 763 | \ |
| 749 | } | 764 | } |
| 750 | 765 | ||
diff --git a/init/Kconfig b/init/Kconfig index 8e8b76d8a272..4cc0fa13d5eb 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
| @@ -1054,13 +1054,6 @@ config PROFILING | |||
| 1054 | config TRACEPOINTS | 1054 | config TRACEPOINTS |
| 1055 | bool | 1055 | bool |
| 1056 | 1056 | ||
| 1057 | config MARKERS | ||
| 1058 | bool "Activate markers" | ||
| 1059 | select TRACEPOINTS | ||
| 1060 | help | ||
| 1061 | Place an empty function call at each marker site. Can be | ||
| 1062 | dynamically changed for a probe function. | ||
| 1063 | |||
| 1064 | source "arch/Kconfig" | 1057 | source "arch/Kconfig" |
| 1065 | 1058 | ||
| 1066 | config SLOW_WORK | 1059 | config SLOW_WORK |
diff --git a/kernel/Makefile b/kernel/Makefile index 3d9c7e27e3f9..7c9b0a585502 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
| @@ -87,7 +87,6 @@ obj-$(CONFIG_RELAY) += relay.o | |||
| 87 | obj-$(CONFIG_SYSCTL) += utsname_sysctl.o | 87 | obj-$(CONFIG_SYSCTL) += utsname_sysctl.o |
| 88 | obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o | 88 | obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o |
| 89 | obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o | 89 | obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o |
| 90 | obj-$(CONFIG_MARKERS) += marker.o | ||
| 91 | obj-$(CONFIG_TRACEPOINTS) += tracepoint.o | 90 | obj-$(CONFIG_TRACEPOINTS) += tracepoint.o |
| 92 | obj-$(CONFIG_LATENCYTOP) += latencytop.o | 91 | obj-$(CONFIG_LATENCYTOP) += latencytop.o |
| 93 | obj-$(CONFIG_FUNCTION_TRACER) += trace/ | 92 | obj-$(CONFIG_FUNCTION_TRACER) += trace/ |
diff --git a/kernel/marker.c b/kernel/marker.c deleted file mode 100644 index ea54f2647868..000000000000 --- a/kernel/marker.c +++ /dev/null | |||
| @@ -1,930 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2007 Mathieu Desnoyers | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify | ||
| 5 | * it under the terms of the GNU General Public License as published by | ||
| 6 | * the Free Software Foundation; either version 2 of the License, or | ||
| 7 | * (at your option) any later version. | ||
| 8 | * | ||
| 9 | * This program is distributed in the hope that it will be useful, | ||
| 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 12 | * GNU General Public License for more details. | ||
| 13 | * | ||
| 14 | * You should have received a copy of the GNU General Public License | ||
| 15 | * along with this program; if not, write to the Free Software | ||
| 16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
| 17 | */ | ||
| 18 | #include <linux/module.h> | ||
| 19 | #include <linux/mutex.h> | ||
| 20 | #include <linux/types.h> | ||
| 21 | #include <linux/jhash.h> | ||
| 22 | #include <linux/list.h> | ||
| 23 | #include <linux/rcupdate.h> | ||
| 24 | #include <linux/marker.h> | ||
| 25 | #include <linux/err.h> | ||
| 26 | #include <linux/slab.h> | ||
| 27 | |||
| 28 | extern struct marker __start___markers[]; | ||
| 29 | extern struct marker __stop___markers[]; | ||
| 30 | |||
| 31 | /* Set to 1 to enable marker debug output */ | ||
| 32 | static const int marker_debug; | ||
| 33 | |||
| 34 | /* | ||
| 35 | * markers_mutex nests inside module_mutex. Markers mutex protects the builtin | ||
| 36 | * and module markers and the hash table. | ||
| 37 | */ | ||
| 38 | static DEFINE_MUTEX(markers_mutex); | ||
| 39 | |||
| 40 | /* | ||
| 41 | * Marker hash table, containing the active markers. | ||
| 42 | * Protected by module_mutex. | ||
| 43 | */ | ||
| 44 | #define MARKER_HASH_BITS 6 | ||
| 45 | #define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS) | ||
| 46 | static struct hlist_head marker_table[MARKER_TABLE_SIZE]; | ||
| 47 | |||
| 48 | /* | ||
| 49 | * Note about RCU : | ||
| 50 | * It is used to make sure every handler has finished using its private data | ||
| 51 | * between two consecutive operation (add or remove) on a given marker. It is | ||
| 52 | * also used to delay the free of multiple probes array until a quiescent state | ||
| 53 | * is reached. | ||
| 54 | * marker entries modifications are protected by the markers_mutex. | ||
| 55 | */ | ||
| 56 | struct marker_entry { | ||
| 57 | struct hlist_node hlist; | ||
| 58 | char *format; | ||
| 59 | /* Probe wrapper */ | ||
| 60 | void (*call)(const struct marker *mdata, void *call_private, ...); | ||
| 61 | struct marker_probe_closure single; | ||
| 62 | struct marker_probe_closure *multi; | ||
| 63 | int refcount; /* Number of times armed. 0 if disarmed. */ | ||
| 64 | struct rcu_head rcu; | ||
| 65 | void *oldptr; | ||
| 66 | int rcu_pending; | ||
| 67 | unsigned char ptype:1; | ||
| 68 | unsigned char format_allocated:1; | ||
| 69 | char name[0]; /* Contains name'\0'format'\0' */ | ||
| 70 | }; | ||
| 71 | |||
| 72 | /** | ||
| 73 | * __mark_empty_function - Empty probe callback | ||
| 74 | * @probe_private: probe private data | ||
| 75 | * @call_private: call site private data | ||
| 76 | * @fmt: format string | ||
| 77 | * @...: variable argument list | ||
| 78 | * | ||
| 79 | * Empty callback provided as a probe to the markers. By providing this to a | ||
| 80 | * disabled marker, we make sure the execution flow is always valid even | ||
| 81 | * though the function pointer change and the marker enabling are two distinct | ||
| 82 | * operations that modifies the execution flow of preemptible code. | ||
| 83 | */ | ||
| 84 | notrace void __mark_empty_function(void *probe_private, void *call_private, | ||
| 85 | const char *fmt, va_list *args) | ||
| 86 | { | ||
| 87 | } | ||
| 88 | EXPORT_SYMBOL_GPL(__mark_empty_function); | ||
| 89 | |||
| 90 | /* | ||
| 91 | * marker_probe_cb Callback that prepares the variable argument list for probes. | ||
| 92 | * @mdata: pointer of type struct marker | ||
| 93 | * @call_private: caller site private data | ||
| 94 | * @...: Variable argument list. | ||
| 95 | * | ||
| 96 | * Since we do not use "typical" pointer based RCU in the 1 argument case, we | ||
| 97 | * need to put a full smp_rmb() in this branch. This is why we do not use | ||
| 98 | * rcu_dereference() for the pointer read. | ||
| 99 | */ | ||
| 100 | notrace void marker_probe_cb(const struct marker *mdata, | ||
| 101 | void *call_private, ...) | ||
| 102 | { | ||
| 103 | va_list args; | ||
| 104 | char ptype; | ||
| 105 | |||
| 106 | /* | ||
| 107 | * rcu_read_lock_sched does two things : disabling preemption to make | ||
| 108 | * sure the teardown of the callbacks can be done correctly when they | ||
| 109 | * are in modules and they insure RCU read coherency. | ||
| 110 | */ | ||
| 111 | rcu_read_lock_sched_notrace(); | ||
| 112 | ptype = mdata->ptype; | ||
| 113 | if (likely(!ptype)) { | ||
| 114 | marker_probe_func *func; | ||
| 115 | /* Must read the ptype before ptr. They are not data dependant, | ||
| 116 | * so we put an explicit smp_rmb() here. */ | ||
| 117 | smp_rmb(); | ||
| 118 | func = mdata->single.func; | ||
| 119 | /* Must read the ptr before private data. They are not data | ||
| 120 | * dependant, so we put an explicit smp_rmb() here. */ | ||
| 121 | smp_rmb(); | ||
| 122 | va_start(args, call_private); | ||
| 123 | func(mdata->single.probe_private, call_private, mdata->format, | ||
| 124 | &args); | ||
| 125 | va_end(args); | ||
| 126 | } else { | ||
| 127 | struct marker_probe_closure *multi; | ||
| 128 | int i; | ||
| 129 | /* | ||
| 130 | * Read mdata->ptype before mdata->multi. | ||
| 131 | */ | ||
| 132 | smp_rmb(); | ||
| 133 | multi = mdata->multi; | ||
| 134 | /* | ||
| 135 | * multi points to an array, therefore accessing the array | ||
| 136 | * depends on reading multi. However, even in this case, | ||
| 137 | * we must insure that the pointer is read _before_ the array | ||
| 138 | * data. Same as rcu_dereference, but we need a full smp_rmb() | ||
| 139 | * in the fast path, so put the explicit barrier here. | ||
| 140 | */ | ||
| 141 | smp_read_barrier_depends(); | ||
| 142 | for (i = 0; multi[i].func; i++) { | ||
| 143 | va_start(args, call_private); | ||
| 144 | multi[i].func(multi[i].probe_private, call_private, | ||
| 145 | mdata->format, &args); | ||
| 146 | va_end(args); | ||
| 147 | } | ||
| 148 | } | ||
| 149 | rcu_read_unlock_sched_notrace(); | ||
| 150 | } | ||
| 151 | EXPORT_SYMBOL_GPL(marker_probe_cb); | ||
| 152 | |||
| 153 | /* | ||
| 154 | * marker_probe_cb Callback that does not prepare the variable argument list. | ||
| 155 | * @mdata: pointer of type struct marker | ||
| 156 | * @call_private: caller site private data | ||
| 157 | * @...: Variable argument list. | ||
| 158 | * | ||
| 159 | * Should be connected to markers "MARK_NOARGS". | ||
| 160 | */ | ||
| 161 | static notrace void marker_probe_cb_noarg(const struct marker *mdata, | ||
| 162 | void *call_private, ...) | ||
| 163 | { | ||
| 164 | va_list args; /* not initialized */ | ||
| 165 | char ptype; | ||
| 166 | |||
| 167 | rcu_read_lock_sched_notrace(); | ||
| 168 | ptype = mdata->ptype; | ||
| 169 | if (likely(!ptype)) { | ||
| 170 | marker_probe_func *func; | ||
| 171 | /* Must read the ptype before ptr. They are not data dependant, | ||
| 172 | * so we put an explicit smp_rmb() here. */ | ||
| 173 | smp_rmb(); | ||
| 174 | func = mdata->single.func; | ||
| 175 | /* Must read the ptr before private data. They are not data | ||
| 176 | * dependant, so we put an explicit smp_rmb() here. */ | ||
| 177 | smp_rmb(); | ||
| 178 | func(mdata->single.probe_private, call_private, mdata->format, | ||
| 179 | &args); | ||
| 180 | } else { | ||
| 181 | struct marker_probe_closure *multi; | ||
| 182 | int i; | ||
| 183 | /* | ||
| 184 | * Read mdata->ptype before mdata->multi. | ||
| 185 | */ | ||
| 186 | smp_rmb(); | ||
| 187 | multi = mdata->multi; | ||
| 188 | /* | ||
| 189 | * multi points to an array, therefore accessing the array | ||
| 190 | * depends on reading multi. However, even in this case, | ||
| 191 | * we must insure that the pointer is read _before_ the array | ||
| 192 | * data. Same as rcu_dereference, but we need a full smp_rmb() | ||
| 193 | * in the fast path, so put the explicit barrier here. | ||
| 194 | */ | ||
| 195 | smp_read_barrier_depends(); | ||
| 196 | for (i = 0; multi[i].func; i++) | ||
| 197 | multi[i].func(multi[i].probe_private, call_private, | ||
| 198 | mdata->format, &args); | ||
| 199 | } | ||
| 200 | rcu_read_unlock_sched_notrace(); | ||
| 201 | } | ||
| 202 | |||
| 203 | static void free_old_closure(struct rcu_head *head) | ||
| 204 | { | ||
| 205 | struct marker_entry *entry = container_of(head, | ||
| 206 | struct marker_entry, rcu); | ||
| 207 | kfree(entry->oldptr); | ||
| 208 | /* Make sure we free the data before setting the pending flag to 0 */ | ||
| 209 | smp_wmb(); | ||
| 210 | entry->rcu_pending = 0; | ||
| 211 | } | ||
| 212 | |||
| 213 | static void debug_print_probes(struct marker_entry *entry) | ||
| 214 | { | ||
| 215 | int i; | ||
| 216 | |||
| 217 | if (!marker_debug) | ||
| 218 | return; | ||
| 219 | |||
| 220 | if (!entry->ptype) { | ||
| 221 | printk(KERN_DEBUG "Single probe : %p %p\n", | ||
| 222 | entry->single.func, | ||
| 223 | entry->single.probe_private); | ||
| 224 | } else { | ||
| 225 | for (i = 0; entry->multi[i].func; i++) | ||
| 226 | printk(KERN_DEBUG "Multi probe %d : %p %p\n", i, | ||
| 227 | entry->multi[i].func, | ||
| 228 | entry->multi[i].probe_private); | ||
| 229 | } | ||
| 230 | } | ||
| 231 | |||
| 232 | static struct marker_probe_closure * | ||
| 233 | marker_entry_add_probe(struct marker_entry *entry, | ||
| 234 | marker_probe_func *probe, void *probe_private) | ||
| 235 | { | ||
| 236 | int nr_probes = 0; | ||
| 237 | struct marker_probe_closure *old, *new; | ||
| 238 | |||
| 239 | WARN_ON(!probe); | ||
| 240 | |||
| 241 | debug_print_probes(entry); | ||
| 242 | old = entry->multi; | ||
| 243 | if (!entry->ptype) { | ||
| 244 | if (entry->single.func == probe && | ||
| 245 | entry->single.probe_private == probe_private) | ||
| 246 | return ERR_PTR(-EBUSY); | ||
| 247 | if (entry->single.func == __mark_empty_function) { | ||
| 248 | /* 0 -> 1 probes */ | ||
| 249 | entry->single.func = probe; | ||
| 250 | entry->single.probe_private = probe_private; | ||
| 251 | entry->refcount = 1; | ||
| 252 | entry->ptype = 0; | ||
| 253 | debug_print_probes(entry); | ||
| 254 | return NULL; | ||
| 255 | } else { | ||
| 256 | /* 1 -> 2 probes */ | ||
| 257 | nr_probes = 1; | ||
| 258 | old = NULL; | ||
| 259 | } | ||
| 260 | } else { | ||
| 261 | /* (N -> N+1), (N != 0, 1) probes */ | ||
| 262 | for (nr_probes = 0; old[nr_probes].func; nr_probes++) | ||
| 263 | if (old[nr_probes].func == probe | ||
| 264 | && old[nr_probes].probe_private | ||
| 265 | == probe_private) | ||
| 266 | return ERR_PTR(-EBUSY); | ||
| 267 | } | ||
| 268 | /* + 2 : one for new probe, one for NULL func */ | ||
| 269 | new = kzalloc((nr_probes + 2) * sizeof(struct marker_probe_closure), | ||
| 270 | GFP_KERNEL); | ||
| 271 | if (new == NULL) | ||
| 272 | return ERR_PTR(-ENOMEM); | ||
| 273 | if (!old) | ||
| 274 | new[0] = entry->single; | ||
| 275 | else | ||
| 276 | memcpy(new, old, | ||
| 277 | nr_probes * sizeof(struct marker_probe_closure)); | ||
| 278 | new[nr_probes].func = probe; | ||
| 279 | new[nr_probes].probe_private = probe_private; | ||
| 280 | entry->refcount = nr_probes + 1; | ||
| 281 | entry->multi = new; | ||
| 282 | entry->ptype = 1; | ||
| 283 | debug_print_probes(entry); | ||
| 284 | return old; | ||
| 285 | } | ||
| 286 | |||
| 287 | static struct marker_probe_closure * | ||
| 288 | marker_entry_remove_probe(struct marker_entry *entry, | ||
| 289 | marker_probe_func *probe, void *probe_private) | ||
| 290 | { | ||
| 291 | int nr_probes = 0, nr_del = 0, i; | ||
| 292 | struct marker_probe_closure *old, *new; | ||
| 293 | |||
| 294 | old = entry->multi; | ||
| 295 | |||
| 296 | debug_print_probes(entry); | ||
| 297 | if (!entry->ptype) { | ||
| 298 | /* 0 -> N is an error */ | ||
| 299 | WARN_ON(entry->single.func == __mark_empty_function); | ||
| 300 | /* 1 -> 0 probes */ | ||
| 301 | WARN_ON(probe && entry->single.func != probe); | ||
| 302 | WARN_ON(entry->single.probe_private != probe_private); | ||
| 303 | entry->single.func = __mark_empty_function; | ||
| 304 | entry->refcount = 0; | ||
| 305 | entry->ptype = 0; | ||
| 306 | debug_print_probes(entry); | ||
| 307 | return NULL; | ||
| 308 | } else { | ||
| 309 | /* (N -> M), (N > 1, M >= 0) probes */ | ||
| 310 | for (nr_probes = 0; old[nr_probes].func; nr_probes++) { | ||
| 311 | if ((!probe || old[nr_probes].func == probe) | ||
| 312 | && old[nr_probes].probe_private | ||
| 313 | == probe_private) | ||
| 314 | nr_del++; | ||
| 315 | } | ||
| 316 | } | ||
| 317 | |||
| 318 | if (nr_probes - nr_del == 0) { | ||
| 319 | /* N -> 0, (N > 1) */ | ||
| 320 | entry->single.func = __mark_empty_function; | ||
| 321 | entry->refcount = 0; | ||
| 322 | entry->ptype = 0; | ||
| 323 | } else if (nr_probes - nr_del == 1) { | ||
| 324 | /* N -> 1, (N > 1) */ | ||
| 325 | for (i = 0; old[i].func; i++) | ||
| 326 | if ((probe && old[i].func != probe) || | ||
| 327 | old[i].probe_private != probe_private) | ||
| 328 | entry->single = old[i]; | ||
| 329 | entry->refcount = 1; | ||
| 330 | entry->ptype = 0; | ||
| 331 | } else { | ||
| 332 | int j = 0; | ||
| 333 | /* N -> M, (N > 1, M > 1) */ | ||
| 334 | /* + 1 for NULL */ | ||
| 335 | new = kzalloc((nr_probes - nr_del + 1) | ||
| 336 | * sizeof(struct marker_probe_closure), GFP_KERNEL); | ||
| 337 | if (new == NULL) | ||
| 338 | return ERR_PTR(-ENOMEM); | ||
| 339 | for (i = 0; old[i].func; i++) | ||
| 340 | if ((probe && old[i].func != probe) || | ||
| 341 | old[i].probe_private != probe_private) | ||
| 342 | new[j++] = old[i]; | ||
| 343 | entry->refcount = nr_probes - nr_del; | ||
| 344 | entry->ptype = 1; | ||
| 345 | entry->multi = new; | ||
| 346 | } | ||
| 347 | debug_print_probes(entry); | ||
| 348 | return old; | ||
| 349 | } | ||
| 350 | |||
| 351 | /* | ||
| 352 | * Get marker if the marker is present in the marker hash table. | ||
| 353 | * Must be called with markers_mutex held. | ||
| 354 | * Returns NULL if not present. | ||
| 355 | */ | ||
| 356 | static struct marker_entry *get_marker(const char *name) | ||
| 357 | { | ||
| 358 | struct hlist_head *head; | ||
| 359 | struct hlist_node *node; | ||
| 360 | struct marker_entry *e; | ||
| 361 | u32 hash = jhash(name, strlen(name), 0); | ||
| 362 | |||
| 363 | head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; | ||
| 364 | hlist_for_each_entry(e, node, head, hlist) { | ||
| 365 | if (!strcmp(name, e->name)) | ||
| 366 | return e; | ||
| 367 | } | ||
| 368 | return NULL; | ||
| 369 | } | ||
| 370 | |||
| 371 | /* | ||
| 372 | * Add the marker to the marker hash table. Must be called with markers_mutex | ||
| 373 | * held. | ||
| 374 | */ | ||
| 375 | static struct marker_entry *add_marker(const char *name, const char *format) | ||
| 376 | { | ||
| 377 | struct hlist_head *head; | ||
| 378 | struct hlist_node *node; | ||
| 379 | struct marker_entry *e; | ||
| 380 | size_t name_len = strlen(name) + 1; | ||
| 381 | size_t format_len = 0; | ||
| 382 | u32 hash = jhash(name, name_len-1, 0); | ||
| 383 | |||
| 384 | if (format) | ||
| 385 | format_len = strlen(format) + 1; | ||
| 386 | head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; | ||
| 387 | hlist_for_each_entry(e, node, head, hlist) { | ||
| 388 | if (!strcmp(name, e->name)) { | ||
| 389 | printk(KERN_NOTICE | ||
| 390 | "Marker %s busy\n", name); | ||
| 391 | return ERR_PTR(-EBUSY); /* Already there */ | ||
| 392 | } | ||
| 393 | } | ||
| 394 | /* | ||
| 395 | * Using kmalloc here to allocate a variable length element. Could | ||
| 396 | * cause some memory fragmentation if overused. | ||
| 397 | */ | ||
| 398 | e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, | ||
| 399 | GFP_KERNEL); | ||
| 400 | if (!e) | ||
| 401 | return ERR_PTR(-ENOMEM); | ||
| 402 | memcpy(&e->name[0], name, name_len); | ||
| 403 | if (format) { | ||
| 404 | e->format = &e->name[name_len]; | ||
| 405 | memcpy(e->format, format, format_len); | ||
| 406 | if (strcmp(e->format, MARK_NOARGS) == 0) | ||
| 407 | e->call = marker_probe_cb_noarg; | ||
| 408 | else | ||
| 409 | e->call = marker_probe_cb; | ||
| 410 | trace_mark(core_marker_format, "name %s format %s", | ||
| 411 | e->name, e->format); | ||
| 412 | } else { | ||
| 413 | e->format = NULL; | ||
| 414 | e->call = marker_probe_cb; | ||
| 415 | } | ||
| 416 | e->single.func = __mark_empty_function; | ||
| 417 | e->single.probe_private = NULL; | ||
| 418 | e->multi = NULL; | ||
| 419 | e->ptype = 0; | ||
| 420 | e->format_allocated = 0; | ||
| 421 | e->refcount = 0; | ||
| 422 | e->rcu_pending = 0; | ||
| 423 | hlist_add_head(&e->hlist, head); | ||
| 424 | return e; | ||
| 425 | } | ||
| 426 | |||
| 427 | /* | ||
| 428 | * Remove the marker from the marker hash table. Must be called with mutex_lock | ||
| 429 | * held. | ||
| 430 | */ | ||
| 431 | static int remove_marker(const char *name) | ||
| 432 | { | ||
| 433 | struct hlist_head *head; | ||
| 434 | struct hlist_node *node; | ||
| 435 | struct marker_entry *e; | ||
| 436 | int found = 0; | ||
| 437 | size_t len = strlen(name) + 1; | ||
| 438 | u32 hash = jhash(name, len-1, 0); | ||
| 439 | |||
| 440 | head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; | ||
| 441 | hlist_for_each_entry(e, node, head, hlist) { | ||
| 442 | if (!strcmp(name, e->name)) { | ||
| 443 | found = 1; | ||
| 444 | break; | ||
| 445 | } | ||
| 446 | } | ||
| 447 | if (!found) | ||
| 448 | return -ENOENT; | ||
| 449 | if (e->single.func != __mark_empty_function) | ||
| 450 | return -EBUSY; | ||
| 451 | hlist_del(&e->hlist); | ||
| 452 | if (e->format_allocated) | ||
| 453 | kfree(e->format); | ||
| 454 | /* Make sure the call_rcu has been executed */ | ||
| 455 | if (e->rcu_pending) | ||
| 456 | rcu_barrier_sched(); | ||
| 457 | kfree(e); | ||
| 458 | return 0; | ||
| 459 | } | ||
| 460 | |||
| 461 | /* | ||
| 462 | * Set the mark_entry format to the format found in the element. | ||
| 463 | */ | ||
| 464 | static int marker_set_format(struct marker_entry *entry, const char *format) | ||
| 465 | { | ||
| 466 | entry->format = kstrdup(format, GFP_KERNEL); | ||
| 467 | if (!entry->format) | ||
| 468 | return -ENOMEM; | ||
| 469 | entry->format_allocated = 1; | ||
| 470 | |||
| 471 | trace_mark(core_marker_format, "name %s format %s", | ||
| 472 | entry->name, entry->format); | ||
| 473 | return 0; | ||
| 474 | } | ||
| 475 | |||
| 476 | /* | ||
| 477 | * Sets the probe callback corresponding to one marker. | ||
| 478 | */ | ||
| 479 | static int set_marker(struct marker_entry *entry, struct marker *elem, | ||
| 480 | int active) | ||
| 481 | { | ||
| 482 | int ret = 0; | ||
| 483 | WARN_ON(strcmp(entry->name, elem->name) != 0); | ||
| 484 | |||
| 485 | if (entry->format) { | ||
| 486 | if (strcmp(entry->format, elem->format) != 0) { | ||
| 487 | printk(KERN_NOTICE | ||
| 488 | "Format mismatch for probe %s " | ||
| 489 | "(%s), marker (%s)\n", | ||
| 490 | entry->name, | ||
| 491 | entry->format, | ||
| 492 | elem->format); | ||
| 493 | return -EPERM; | ||
| 494 | } | ||
| 495 | } else { | ||
| 496 | ret = marker_set_format(entry, elem->format); | ||
| 497 | if (ret) | ||
| 498 | return ret; | ||
| 499 | } | ||
| 500 | |||
| 501 | /* | ||
| 502 | * probe_cb setup (statically known) is done here. It is | ||
| 503 | * asynchronous with the rest of execution, therefore we only | ||
| 504 | * pass from a "safe" callback (with argument) to an "unsafe" | ||
| 505 | * callback (does not set arguments). | ||
| 506 | */ | ||
| 507 | elem->call = entry->call; | ||
| 508 | /* | ||
| 509 | * Sanity check : | ||
| 510 | * We only update the single probe private data when the ptr is | ||
| 511 | * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1) | ||
| 512 | */ | ||
| 513 | WARN_ON(elem->single.func != __mark_empty_function | ||
| 514 | && elem->single.probe_private != entry->single.probe_private | ||
| 515 | && !elem->ptype); | ||
| 516 | elem->single.probe_private = entry->single.probe_private; | ||
| 517 | /* | ||
| 518 | * Make sure the private data is valid when we update the | ||
| 519 | * single probe ptr. | ||
| 520 | */ | ||
| 521 | smp_wmb(); | ||
| 522 | elem->single.func = entry->single.func; | ||
| 523 | /* | ||
| 524 | * We also make sure that the new probe callbacks array is consistent | ||
| 525 | * before setting a pointer to it. | ||
| 526 | */ | ||
| 527 | rcu_assign_pointer(elem->multi, entry->multi); | ||
| 528 | /* | ||
| 529 | * Update the function or multi probe array pointer before setting the | ||
| 530 | * ptype. | ||
| 531 | */ | ||
| 532 | smp_wmb(); | ||
| 533 | elem->ptype = entry->ptype; | ||
| 534 | |||
| 535 | if (elem->tp_name && (active ^ elem->state)) { | ||
| 536 | WARN_ON(!elem->tp_cb); | ||
| 537 | /* | ||
| 538 | * It is ok to directly call the probe registration because type | ||
| 539 | * checking has been done in the __trace_mark_tp() macro. | ||
| 540 | */ | ||
| 541 | |||
| 542 | if (active) { | ||
| 543 | /* | ||
| 544 | * try_module_get should always succeed because we hold | ||
| 545 | * lock_module() to get the tp_cb address. | ||
| 546 | */ | ||
| 547 | ret = try_module_get(__module_text_address( | ||
| 548 | (unsigned long)elem->tp_cb)); | ||
| 549 | BUG_ON(!ret); | ||
| 550 | ret = tracepoint_probe_register_noupdate( | ||
| 551 | elem->tp_name, | ||
| 552 | elem->tp_cb); | ||
| 553 | } else { | ||
| 554 | ret = tracepoint_probe_unregister_noupdate( | ||
| 555 | elem->tp_name, | ||
| 556 | elem->tp_cb); | ||
| 557 | /* | ||
| 558 | * tracepoint_probe_update_all() must be called | ||
| 559 | * before the module containing tp_cb is unloaded. | ||
| 560 | */ | ||
| 561 | module_put(__module_text_address( | ||
| 562 | (unsigned long)elem->tp_cb)); | ||
| 563 | } | ||
| 564 | } | ||
| 565 | elem->state = active; | ||
| 566 | |||
| 567 | return ret; | ||
| 568 | } | ||
| 569 | |||
| 570 | /* | ||
| 571 | * Disable a marker and its probe callback. | ||
| 572 | * Note: only waiting an RCU period after setting elem->call to the empty | ||
| 573 | * function insures that the original callback is not used anymore. This insured | ||
| 574 | * by rcu_read_lock_sched around the call site. | ||
| 575 | */ | ||
| 576 | static void disable_marker(struct marker *elem) | ||
| 577 | { | ||
| 578 | int ret; | ||
| 579 | |||
| 580 | /* leave "call" as is. It is known statically. */ | ||
| 581 | if (elem->tp_name && elem->state) { | ||
| 582 | WARN_ON(!elem->tp_cb); | ||
| 583 | /* | ||
| 584 | * It is ok to directly call the probe registration because type | ||
| 585 | * checking has been done in the __trace_mark_tp() macro. | ||
| 586 | */ | ||
| 587 | ret = tracepoint_probe_unregister_noupdate(elem->tp_name, | ||
| 588 | elem->tp_cb); | ||
| 589 | WARN_ON(ret); | ||
| 590 | /* | ||
| 591 | * tracepoint_probe_update_all() must be called | ||
| 592 | * before the module containing tp_cb is unloaded. | ||
| 593 | */ | ||
| 594 | module_put(__module_text_address((unsigned long)elem->tp_cb)); | ||
| 595 | } | ||
| 596 | elem->state = 0; | ||
| 597 | elem->single.func = __mark_empty_function; | ||
| 598 | /* Update the function before setting the ptype */ | ||
| 599 | smp_wmb(); | ||
| 600 | elem->ptype = 0; /* single probe */ | ||
| 601 | /* | ||
| 602 | * Leave the private data and id there, because removal is racy and | ||
| 603 | * should be done only after an RCU period. These are never used until | ||
| 604 | * the next initialization anyway. | ||
| 605 | */ | ||
| 606 | } | ||
| 607 | |||
| 608 | /** | ||
| 609 | * marker_update_probe_range - Update a probe range | ||
| 610 | * @begin: beginning of the range | ||
| 611 | * @end: end of the range | ||
| 612 | * | ||
| 613 | * Updates the probe callback corresponding to a range of markers. | ||
| 614 | */ | ||
| 615 | void marker_update_probe_range(struct marker *begin, | ||
| 616 | struct marker *end) | ||
| 617 | { | ||
| 618 | struct marker *iter; | ||
| 619 | struct marker_entry *mark_entry; | ||
| 620 | |||
| 621 | mutex_lock(&markers_mutex); | ||
| 622 | for (iter = begin; iter < end; iter++) { | ||
| 623 | mark_entry = get_marker(iter->name); | ||
| 624 | if (mark_entry) { | ||
| 625 | set_marker(mark_entry, iter, !!mark_entry->refcount); | ||
| 626 | /* | ||
| 627 | * ignore error, continue | ||
| 628 | */ | ||
| 629 | } else { | ||
| 630 | disable_marker(iter); | ||
| 631 | } | ||
| 632 | } | ||
| 633 | mutex_unlock(&markers_mutex); | ||
| 634 | } | ||
| 635 | |||
| 636 | /* | ||
| 637 | * Update probes, removing the faulty probes. | ||
| 638 | * | ||
| 639 | * Internal callback only changed before the first probe is connected to it. | ||
| 640 | * Single probe private data can only be changed on 0 -> 1 and 2 -> 1 | ||
| 641 | * transitions. All other transitions will leave the old private data valid. | ||
| 642 | * This makes the non-atomicity of the callback/private data updates valid. | ||
| 643 | * | ||
| 644 | * "special case" updates : | ||
| 645 | * 0 -> 1 callback | ||
| 646 | * 1 -> 0 callback | ||
| 647 | * 1 -> 2 callbacks | ||
| 648 | * 2 -> 1 callbacks | ||
| 649 | * Other updates all behave the same, just like the 2 -> 3 or 3 -> 2 updates. | ||
| 650 | * Site effect : marker_set_format may delete the marker entry (creating a | ||
| 651 | * replacement). | ||
| 652 | */ | ||
| 653 | static void marker_update_probes(void) | ||
| 654 | { | ||
| 655 | /* Core kernel markers */ | ||
| 656 | marker_update_probe_range(__start___markers, __stop___markers); | ||
| 657 | /* Markers in modules. */ | ||
| 658 | module_update_markers(); | ||
| 659 | tracepoint_probe_update_all(); | ||
| 660 | } | ||
| 661 | |||
| 662 | /** | ||
| 663 | * marker_probe_register - Connect a probe to a marker | ||
| 664 | * @name: marker name | ||
| 665 | * @format: format string | ||
| 666 | * @probe: probe handler | ||
| 667 | * @probe_private: probe private data | ||
| 668 | * | ||
| 669 | * private data must be a valid allocated memory address, or NULL. | ||
| 670 | * Returns 0 if ok, error value on error. | ||
| 671 | * The probe address must at least be aligned on the architecture pointer size. | ||
| 672 | */ | ||
| 673 | int marker_probe_register(const char *name, const char *format, | ||
| 674 | marker_probe_func *probe, void *probe_private) | ||
| 675 | { | ||
| 676 | struct marker_entry *entry; | ||
| 677 | int ret = 0; | ||
| 678 | struct marker_probe_closure *old; | ||
| 679 | |||
| 680 | mutex_lock(&markers_mutex); | ||
| 681 | entry = get_marker(name); | ||
| 682 | if (!entry) { | ||
| 683 | entry = add_marker(name, format); | ||
| 684 | if (IS_ERR(entry)) | ||
| 685 | ret = PTR_ERR(entry); | ||
| 686 | } else if (format) { | ||
| 687 | if (!entry->format) | ||
| 688 | ret = marker_set_format(entry, format); | ||
| 689 | else if (strcmp(entry->format, format)) | ||
| 690 | ret = -EPERM; | ||
| 691 | } | ||
| 692 | if (ret) | ||
| 693 | goto end; | ||
| 694 | |||
| 695 | /* | ||
| 696 | * If we detect that a call_rcu is pending for this marker, | ||
| 697 | * make sure it's executed now. | ||
| 698 | */ | ||
| 699 | if (entry->rcu_pending) | ||
| 700 | rcu_barrier_sched(); | ||
| 701 | old = marker_entry_add_probe(entry, probe, probe_private); | ||
| 702 | if (IS_ERR(old)) { | ||
| 703 | ret = PTR_ERR(old); | ||
| 704 | goto end; | ||
| 705 | } | ||
| 706 | mutex_unlock(&markers_mutex); | ||
| 707 | marker_update_probes(); | ||
| 708 | mutex_lock(&markers_mutex); | ||
| 709 | entry = get_marker(name); | ||
| 710 | if (!entry) | ||
| 711 | goto end; | ||
| 712 | if (entry->rcu_pending) | ||
| 713 | rcu_barrier_sched(); | ||
| 714 | entry->oldptr = old; | ||
| 715 | entry->rcu_pending = 1; | ||
| 716 | /* write rcu_pending before calling the RCU callback */ | ||
| 717 | smp_wmb(); | ||
| 718 | call_rcu_sched(&entry->rcu, free_old_closure); | ||
| 719 | end: | ||
| 720 | mutex_unlock(&markers_mutex); | ||
| 721 | return ret; | ||
| 722 | } | ||
| 723 | EXPORT_SYMBOL_GPL(marker_probe_register); | ||
| 724 | |||
| 725 | /** | ||
| 726 | * marker_probe_unregister - Disconnect a probe from a marker | ||
| 727 | * @name: marker name | ||
| 728 | * @probe: probe function pointer | ||
| 729 | * @probe_private: probe private data | ||
| 730 | * | ||
| 731 | * Returns the private data given to marker_probe_register, or an ERR_PTR(). | ||
| 732 | * We do not need to call a synchronize_sched to make sure the probes have | ||
| 733 | * finished running before doing a module unload, because the module unload | ||
| 734 | * itself uses stop_machine(), which insures that every preempt disabled section | ||
| 735 | * have finished. | ||
| 736 | */ | ||
| 737 | int marker_probe_unregister(const char *name, | ||
| 738 | marker_probe_func *probe, void *probe_private) | ||
| 739 | { | ||
| 740 | struct marker_entry *entry; | ||
| 741 | struct marker_probe_closure *old; | ||
| 742 | int ret = -ENOENT; | ||
| 743 | |||
| 744 | mutex_lock(&markers_mutex); | ||
| 745 | entry = get_marker(name); | ||
| 746 | if (!entry) | ||
| 747 | goto end; | ||
| 748 | if (entry->rcu_pending) | ||
| 749 | rcu_barrier_sched(); | ||
| 750 | old = marker_entry_remove_probe(entry, probe, probe_private); | ||
| 751 | mutex_unlock(&markers_mutex); | ||
| 752 | marker_update_probes(); | ||
| 753 | mutex_lock(&markers_mutex); | ||
| 754 | entry = get_marker(name); | ||
| 755 | if (!entry) | ||
| 756 | goto end; | ||
| 757 | if (entry->rcu_pending) | ||
| 758 | rcu_barrier_sched(); | ||
| 759 | entry->oldptr = old; | ||
| 760 | entry->rcu_pending = 1; | ||
| 761 | /* write rcu_pending before calling the RCU callback */ | ||
| 762 | smp_wmb(); | ||
| 763 | call_rcu_sched(&entry->rcu, free_old_closure); | ||
| 764 | remove_marker(name); /* Ignore busy error message */ | ||
| 765 | ret = 0; | ||
| 766 | end: | ||
| 767 | mutex_unlock(&markers_mutex); | ||
| 768 | return ret; | ||
| 769 | } | ||
| 770 | EXPORT_SYMBOL_GPL(marker_probe_unregister); | ||
| 771 | |||
| 772 | static struct marker_entry * | ||
| 773 | get_marker_from_private_data(marker_probe_func *probe, void *probe_private) | ||
| 774 | { | ||
| 775 | struct marker_entry *entry; | ||
| 776 | unsigned int i; | ||
| 777 | struct hlist_head *head; | ||
| 778 | struct hlist_node *node; | ||
| 779 | |||
| 780 | for (i = 0; i < MARKER_TABLE_SIZE; i++) { | ||
| 781 | head = &marker_table[i]; | ||
| 782 | hlist_for_each_entry(entry, node, head, hlist) { | ||
| 783 | if (!entry->ptype) { | ||
| 784 | if (entry->single.func == probe | ||
| 785 | && entry->single.probe_private | ||
| 786 | == probe_private) | ||
| 787 | return entry; | ||
| 788 | } else { | ||
| 789 | struct marker_probe_closure *closure; | ||
| 790 | closure = entry->multi; | ||
| 791 | for (i = 0; closure[i].func; i++) { | ||
| 792 | if (closure[i].func == probe && | ||
| 793 | closure[i].probe_private | ||
| 794 | == probe_private) | ||
| 795 | return entry; | ||
| 796 | } | ||
| 797 | } | ||
| 798 | } | ||
| 799 | } | ||
| 800 | return NULL; | ||
| 801 | } | ||
| 802 | |||
| 803 | /** | ||
| 804 | * marker_probe_unregister_private_data - Disconnect a probe from a marker | ||
| 805 | * @probe: probe function | ||
| 806 | * @probe_private: probe private data | ||
| 807 | * | ||
| 808 | * Unregister a probe by providing the registered private data. | ||
| 809 | * Only removes the first marker found in hash table. | ||
| 810 | * Return 0 on success or error value. | ||
| 811 | * We do not need to call a synchronize_sched to make sure the probes have | ||
| 812 | * finished running before doing a module unload, because the module unload | ||
| 813 | * itself uses stop_machine(), which insures that every preempt disabled section | ||
| 814 | * have finished. | ||
| 815 | */ | ||
| 816 | int marker_probe_unregister_private_data(marker_probe_func *probe, | ||
| 817 | void *probe_private) | ||
| 818 | { | ||
| 819 | struct marker_entry *entry; | ||
| 820 | int ret = 0; | ||
| 821 | struct marker_probe_closure *old; | ||
| 822 | |||
| 823 | mutex_lock(&markers_mutex); | ||
| 824 | entry = get_marker_from_private_data(probe, probe_private); | ||
| 825 | if (!entry) { | ||
| 826 | ret = -ENOENT; | ||
| 827 | goto end; | ||
| 828 | } | ||
| 829 | if (entry->rcu_pending) | ||
| 830 | rcu_barrier_sched(); | ||
| 831 | old = marker_entry_remove_probe(entry, NULL, probe_private); | ||
| 832 | mutex_unlock(&markers_mutex); | ||
| 833 | marker_update_probes(); | ||
| 834 | mutex_lock(&markers_mutex); | ||
| 835 | entry = get_marker_from_private_data(probe, probe_private); | ||
| 836 | if (!entry) | ||
| 837 | goto end; | ||
| 838 | if (entry->rcu_pending) | ||
| 839 | rcu_barrier_sched(); | ||
| 840 | entry->oldptr = old; | ||
| 841 | entry->rcu_pending = 1; | ||
| 842 | /* write rcu_pending before calling the RCU callback */ | ||
| 843 | smp_wmb(); | ||
| 844 | call_rcu_sched(&entry->rcu, free_old_closure); | ||
| 845 | remove_marker(entry->name); /* Ignore busy error message */ | ||
| 846 | end: | ||
| 847 | mutex_unlock(&markers_mutex); | ||
| 848 | return ret; | ||
| 849 | } | ||
| 850 | EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data); | ||
| 851 | |||
| 852 | /** | ||
| 853 | * marker_get_private_data - Get a marker's probe private data | ||
| 854 | * @name: marker name | ||
| 855 | * @probe: probe to match | ||
| 856 | * @num: get the nth matching probe's private data | ||
| 857 | * | ||
| 858 | * Returns the nth private data pointer (starting from 0) matching, or an | ||
| 859 | * ERR_PTR. | ||
| 860 | * Returns the private data pointer, or an ERR_PTR. | ||
| 861 | * The private data pointer should _only_ be dereferenced if the caller is the | ||
| 862 | * owner of the data, or its content could vanish. This is mostly used to | ||
| 863 | * confirm that a caller is the owner of a registered probe. | ||
| 864 | */ | ||
| 865 | void *marker_get_private_data(const char *name, marker_probe_func *probe, | ||
| 866 | int num) | ||
| 867 | { | ||
| 868 | struct hlist_head *head; | ||
| 869 | struct hlist_node *node; | ||
| 870 | struct marker_entry *e; | ||
| 871 | size_t name_len = strlen(name) + 1; | ||
| 872 | u32 hash = jhash(name, name_len-1, 0); | ||
| 873 | int i; | ||
| 874 | |||
| 875 | head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; | ||
| 876 | hlist_for_each_entry(e, node, head, hlist) { | ||
| 877 | if (!strcmp(name, e->name)) { | ||
| 878 | if (!e->ptype) { | ||
| 879 | if (num == 0 && e->single.func == probe) | ||
| 880 | return e->single.probe_private; | ||
| 881 | } else { | ||
| 882 | struct marker_probe_closure *closure; | ||
| 883 | int match = 0; | ||
| 884 | closure = e->multi; | ||
| 885 | for (i = 0; closure[i].func; i++) { | ||
| 886 | if (closure[i].func != probe) | ||
| 887 | continue; | ||
| 888 | if (match++ == num) | ||
| 889 | return closure[i].probe_private; | ||
| 890 | } | ||
| 891 | } | ||
| 892 | break; | ||
| 893 | } | ||
| 894 | } | ||
| 895 | return ERR_PTR(-ENOENT); | ||
| 896 | } | ||
| 897 | EXPORT_SYMBOL_GPL(marker_get_private_data); | ||
| 898 | |||
| 899 | #ifdef CONFIG_MODULES | ||
| 900 | |||
| 901 | int marker_module_notify(struct notifier_block *self, | ||
| 902 | unsigned long val, void *data) | ||
| 903 | { | ||
| 904 | struct module *mod = data; | ||
| 905 | |||
| 906 | switch (val) { | ||
| 907 | case MODULE_STATE_COMING: | ||
| 908 | marker_update_probe_range(mod->markers, | ||
| 909 | mod->markers + mod->num_markers); | ||
| 910 | break; | ||
| 911 | case MODULE_STATE_GOING: | ||
| 912 | marker_update_probe_range(mod->markers, | ||
| 913 | mod->markers + mod->num_markers); | ||
| 914 | break; | ||
| 915 | } | ||
| 916 | return 0; | ||
| 917 | } | ||
| 918 | |||
| 919 | struct notifier_block marker_module_nb = { | ||
| 920 | .notifier_call = marker_module_notify, | ||
| 921 | .priority = 0, | ||
| 922 | }; | ||
| 923 | |||
| 924 | static int init_markers(void) | ||
| 925 | { | ||
| 926 | return register_module_notifier(&marker_module_nb); | ||
| 927 | } | ||
| 928 | __initcall(init_markers); | ||
| 929 | |||
| 930 | #endif /* CONFIG_MODULES */ | ||
diff --git a/kernel/module.c b/kernel/module.c index 05ce49ced8f6..b6ee424245dd 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
| @@ -2237,10 +2237,6 @@ static noinline struct module *load_module(void __user *umod, | |||
| 2237 | sizeof(*mod->ctors), &mod->num_ctors); | 2237 | sizeof(*mod->ctors), &mod->num_ctors); |
| 2238 | #endif | 2238 | #endif |
| 2239 | 2239 | ||
| 2240 | #ifdef CONFIG_MARKERS | ||
| 2241 | mod->markers = section_objs(hdr, sechdrs, secstrings, "__markers", | ||
| 2242 | sizeof(*mod->markers), &mod->num_markers); | ||
| 2243 | #endif | ||
| 2244 | #ifdef CONFIG_TRACEPOINTS | 2240 | #ifdef CONFIG_TRACEPOINTS |
| 2245 | mod->tracepoints = section_objs(hdr, sechdrs, secstrings, | 2241 | mod->tracepoints = section_objs(hdr, sechdrs, secstrings, |
| 2246 | "__tracepoints", | 2242 | "__tracepoints", |
| @@ -2958,20 +2954,6 @@ void module_layout(struct module *mod, | |||
| 2958 | EXPORT_SYMBOL(module_layout); | 2954 | EXPORT_SYMBOL(module_layout); |
| 2959 | #endif | 2955 | #endif |
| 2960 | 2956 | ||
| 2961 | #ifdef CONFIG_MARKERS | ||
| 2962 | void module_update_markers(void) | ||
| 2963 | { | ||
| 2964 | struct module *mod; | ||
| 2965 | |||
| 2966 | mutex_lock(&module_mutex); | ||
| 2967 | list_for_each_entry(mod, &modules, list) | ||
| 2968 | if (!mod->taints) | ||
| 2969 | marker_update_probe_range(mod->markers, | ||
| 2970 | mod->markers + mod->num_markers); | ||
| 2971 | mutex_unlock(&module_mutex); | ||
| 2972 | } | ||
| 2973 | #endif | ||
| 2974 | |||
| 2975 | #ifdef CONFIG_TRACEPOINTS | 2957 | #ifdef CONFIG_TRACEPOINTS |
| 2976 | void module_update_tracepoints(void) | 2958 | void module_update_tracepoints(void) |
| 2977 | { | 2959 | { |
diff --git a/kernel/profile.c b/kernel/profile.c index 419250ebec4d..a55d3a367ae8 100644 --- a/kernel/profile.c +++ b/kernel/profile.c | |||
| @@ -442,48 +442,51 @@ void profile_tick(int type) | |||
| 442 | 442 | ||
| 443 | #ifdef CONFIG_PROC_FS | 443 | #ifdef CONFIG_PROC_FS |
| 444 | #include <linux/proc_fs.h> | 444 | #include <linux/proc_fs.h> |
| 445 | #include <linux/seq_file.h> | ||
| 445 | #include <asm/uaccess.h> | 446 | #include <asm/uaccess.h> |
| 446 | 447 | ||
| 447 | static int prof_cpu_mask_read_proc(char *page, char **start, off_t off, | 448 | static int prof_cpu_mask_proc_show(struct seq_file *m, void *v) |
| 448 | int count, int *eof, void *data) | ||
| 449 | { | 449 | { |
| 450 | int len = cpumask_scnprintf(page, count, data); | 450 | seq_cpumask(m, prof_cpu_mask); |
| 451 | if (count - len < 2) | 451 | seq_putc(m, '\n'); |
| 452 | return -EINVAL; | 452 | return 0; |
| 453 | len += sprintf(page + len, "\n"); | ||
| 454 | return len; | ||
| 455 | } | 453 | } |
| 456 | 454 | ||
| 457 | static int prof_cpu_mask_write_proc(struct file *file, | 455 | static int prof_cpu_mask_proc_open(struct inode *inode, struct file *file) |
| 458 | const char __user *buffer, unsigned long count, void *data) | 456 | { |
| 457 | return single_open(file, prof_cpu_mask_proc_show, NULL); | ||
| 458 | } | ||
| 459 | |||
| 460 | static ssize_t prof_cpu_mask_proc_write(struct file *file, | ||
| 461 | const char __user *buffer, size_t count, loff_t *pos) | ||
| 459 | { | 462 | { |
| 460 | struct cpumask *mask = data; | ||
| 461 | unsigned long full_count = count, err; | ||
| 462 | cpumask_var_t new_value; | 463 | cpumask_var_t new_value; |
| 464 | int err; | ||
| 463 | 465 | ||
| 464 | if (!alloc_cpumask_var(&new_value, GFP_KERNEL)) | 466 | if (!alloc_cpumask_var(&new_value, GFP_KERNEL)) |
| 465 | return -ENOMEM; | 467 | return -ENOMEM; |
| 466 | 468 | ||
| 467 | err = cpumask_parse_user(buffer, count, new_value); | 469 | err = cpumask_parse_user(buffer, count, new_value); |
| 468 | if (!err) { | 470 | if (!err) { |
| 469 | cpumask_copy(mask, new_value); | 471 | cpumask_copy(prof_cpu_mask, new_value); |
| 470 | err = full_count; | 472 | err = count; |
| 471 | } | 473 | } |
| 472 | free_cpumask_var(new_value); | 474 | free_cpumask_var(new_value); |
| 473 | return err; | 475 | return err; |
| 474 | } | 476 | } |
| 475 | 477 | ||
| 478 | static const struct file_operations prof_cpu_mask_proc_fops = { | ||
| 479 | .open = prof_cpu_mask_proc_open, | ||
| 480 | .read = seq_read, | ||
| 481 | .llseek = seq_lseek, | ||
| 482 | .release = single_release, | ||
| 483 | .write = prof_cpu_mask_proc_write, | ||
| 484 | }; | ||
| 485 | |||
| 476 | void create_prof_cpu_mask(struct proc_dir_entry *root_irq_dir) | 486 | void create_prof_cpu_mask(struct proc_dir_entry *root_irq_dir) |
| 477 | { | 487 | { |
| 478 | struct proc_dir_entry *entry; | ||
| 479 | |||
| 480 | /* create /proc/irq/prof_cpu_mask */ | 488 | /* create /proc/irq/prof_cpu_mask */ |
| 481 | entry = create_proc_entry("prof_cpu_mask", 0600, root_irq_dir); | 489 | proc_create("prof_cpu_mask", 0600, root_irq_dir, &prof_cpu_mask_proc_fops); |
| 482 | if (!entry) | ||
| 483 | return; | ||
| 484 | entry->data = prof_cpu_mask; | ||
| 485 | entry->read_proc = prof_cpu_mask_read_proc; | ||
| 486 | entry->write_proc = prof_cpu_mask_write_proc; | ||
| 487 | } | 490 | } |
| 488 | 491 | ||
| 489 | /* | 492 | /* |
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index cc615f84751b..c71e91bf7372 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
| @@ -2414,11 +2414,9 @@ unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly; | |||
| 2414 | static void * | 2414 | static void * |
| 2415 | __g_next(struct seq_file *m, loff_t *pos) | 2415 | __g_next(struct seq_file *m, loff_t *pos) |
| 2416 | { | 2416 | { |
| 2417 | unsigned long *array = m->private; | ||
| 2418 | |||
| 2419 | if (*pos >= ftrace_graph_count) | 2417 | if (*pos >= ftrace_graph_count) |
| 2420 | return NULL; | 2418 | return NULL; |
| 2421 | return &array[*pos]; | 2419 | return &ftrace_graph_funcs[*pos]; |
| 2422 | } | 2420 | } |
| 2423 | 2421 | ||
| 2424 | static void * | 2422 | static void * |
| @@ -2482,16 +2480,10 @@ ftrace_graph_open(struct inode *inode, struct file *file) | |||
| 2482 | ftrace_graph_count = 0; | 2480 | ftrace_graph_count = 0; |
| 2483 | memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs)); | 2481 | memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs)); |
| 2484 | } | 2482 | } |
| 2483 | mutex_unlock(&graph_lock); | ||
| 2485 | 2484 | ||
| 2486 | if (file->f_mode & FMODE_READ) { | 2485 | if (file->f_mode & FMODE_READ) |
| 2487 | ret = seq_open(file, &ftrace_graph_seq_ops); | 2486 | ret = seq_open(file, &ftrace_graph_seq_ops); |
| 2488 | if (!ret) { | ||
| 2489 | struct seq_file *m = file->private_data; | ||
| 2490 | m->private = ftrace_graph_funcs; | ||
| 2491 | } | ||
| 2492 | } else | ||
| 2493 | file->private_data = ftrace_graph_funcs; | ||
| 2494 | mutex_unlock(&graph_lock); | ||
| 2495 | 2487 | ||
| 2496 | return ret; | 2488 | return ret; |
| 2497 | } | 2489 | } |
| @@ -2560,7 +2552,6 @@ ftrace_graph_write(struct file *file, const char __user *ubuf, | |||
| 2560 | size_t cnt, loff_t *ppos) | 2552 | size_t cnt, loff_t *ppos) |
| 2561 | { | 2553 | { |
| 2562 | struct trace_parser parser; | 2554 | struct trace_parser parser; |
| 2563 | unsigned long *array; | ||
| 2564 | size_t read = 0; | 2555 | size_t read = 0; |
| 2565 | ssize_t ret; | 2556 | ssize_t ret; |
| 2566 | 2557 | ||
| @@ -2574,12 +2565,6 @@ ftrace_graph_write(struct file *file, const char __user *ubuf, | |||
| 2574 | goto out; | 2565 | goto out; |
| 2575 | } | 2566 | } |
| 2576 | 2567 | ||
| 2577 | if (file->f_mode & FMODE_READ) { | ||
| 2578 | struct seq_file *m = file->private_data; | ||
| 2579 | array = m->private; | ||
| 2580 | } else | ||
| 2581 | array = file->private_data; | ||
| 2582 | |||
| 2583 | if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) { | 2568 | if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) { |
| 2584 | ret = -ENOMEM; | 2569 | ret = -ENOMEM; |
| 2585 | goto out; | 2570 | goto out; |
| @@ -2591,7 +2576,7 @@ ftrace_graph_write(struct file *file, const char __user *ubuf, | |||
| 2591 | parser.buffer[parser.idx] = 0; | 2576 | parser.buffer[parser.idx] = 0; |
| 2592 | 2577 | ||
| 2593 | /* we allow only one expression at a time */ | 2578 | /* we allow only one expression at a time */ |
| 2594 | ret = ftrace_set_func(array, &ftrace_graph_count, | 2579 | ret = ftrace_set_func(ftrace_graph_funcs, &ftrace_graph_count, |
| 2595 | parser.buffer); | 2580 | parser.buffer); |
| 2596 | if (ret) | 2581 | if (ret) |
| 2597 | goto out; | 2582 | goto out; |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index fd52a19dd172..861308072d28 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
| @@ -125,13 +125,13 @@ int ftrace_dump_on_oops; | |||
| 125 | 125 | ||
| 126 | static int tracing_set_tracer(const char *buf); | 126 | static int tracing_set_tracer(const char *buf); |
| 127 | 127 | ||
| 128 | #define BOOTUP_TRACER_SIZE 100 | 128 | #define MAX_TRACER_SIZE 100 |
| 129 | static char bootup_tracer_buf[BOOTUP_TRACER_SIZE] __initdata; | 129 | static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata; |
| 130 | static char *default_bootup_tracer; | 130 | static char *default_bootup_tracer; |
| 131 | 131 | ||
| 132 | static int __init set_ftrace(char *str) | 132 | static int __init set_ftrace(char *str) |
| 133 | { | 133 | { |
| 134 | strncpy(bootup_tracer_buf, str, BOOTUP_TRACER_SIZE); | 134 | strncpy(bootup_tracer_buf, str, MAX_TRACER_SIZE); |
| 135 | default_bootup_tracer = bootup_tracer_buf; | 135 | default_bootup_tracer = bootup_tracer_buf; |
| 136 | /* We are using ftrace early, expand it */ | 136 | /* We are using ftrace early, expand it */ |
| 137 | ring_buffer_expanded = 1; | 137 | ring_buffer_expanded = 1; |
| @@ -242,13 +242,6 @@ static struct tracer *trace_types __read_mostly; | |||
| 242 | static struct tracer *current_trace __read_mostly; | 242 | static struct tracer *current_trace __read_mostly; |
| 243 | 243 | ||
| 244 | /* | 244 | /* |
| 245 | * max_tracer_type_len is used to simplify the allocating of | ||
| 246 | * buffers to read userspace tracer names. We keep track of | ||
| 247 | * the longest tracer name registered. | ||
| 248 | */ | ||
| 249 | static int max_tracer_type_len; | ||
| 250 | |||
| 251 | /* | ||
| 252 | * trace_types_lock is used to protect the trace_types list. | 245 | * trace_types_lock is used to protect the trace_types list. |
| 253 | * This lock is also used to keep user access serialized. | 246 | * This lock is also used to keep user access serialized. |
| 254 | * Accesses from userspace will grab this lock while userspace | 247 | * Accesses from userspace will grab this lock while userspace |
| @@ -619,7 +612,6 @@ __releases(kernel_lock) | |||
| 619 | __acquires(kernel_lock) | 612 | __acquires(kernel_lock) |
| 620 | { | 613 | { |
| 621 | struct tracer *t; | 614 | struct tracer *t; |
| 622 | int len; | ||
| 623 | int ret = 0; | 615 | int ret = 0; |
| 624 | 616 | ||
| 625 | if (!type->name) { | 617 | if (!type->name) { |
| @@ -627,6 +619,11 @@ __acquires(kernel_lock) | |||
| 627 | return -1; | 619 | return -1; |
| 628 | } | 620 | } |
| 629 | 621 | ||
| 622 | if (strlen(type->name) > MAX_TRACER_SIZE) { | ||
| 623 | pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE); | ||
| 624 | return -1; | ||
| 625 | } | ||
| 626 | |||
| 630 | /* | 627 | /* |
| 631 | * When this gets called we hold the BKL which means that | 628 | * When this gets called we hold the BKL which means that |
| 632 | * preemption is disabled. Various trace selftests however | 629 | * preemption is disabled. Various trace selftests however |
| @@ -641,7 +638,7 @@ __acquires(kernel_lock) | |||
| 641 | for (t = trace_types; t; t = t->next) { | 638 | for (t = trace_types; t; t = t->next) { |
| 642 | if (strcmp(type->name, t->name) == 0) { | 639 | if (strcmp(type->name, t->name) == 0) { |
| 643 | /* already found */ | 640 | /* already found */ |
| 644 | pr_info("Trace %s already registered\n", | 641 | pr_info("Tracer %s already registered\n", |
| 645 | type->name); | 642 | type->name); |
| 646 | ret = -1; | 643 | ret = -1; |
| 647 | goto out; | 644 | goto out; |
| @@ -692,9 +689,6 @@ __acquires(kernel_lock) | |||
| 692 | 689 | ||
| 693 | type->next = trace_types; | 690 | type->next = trace_types; |
| 694 | trace_types = type; | 691 | trace_types = type; |
| 695 | len = strlen(type->name); | ||
| 696 | if (len > max_tracer_type_len) | ||
| 697 | max_tracer_type_len = len; | ||
| 698 | 692 | ||
| 699 | out: | 693 | out: |
| 700 | tracing_selftest_running = false; | 694 | tracing_selftest_running = false; |
| @@ -703,7 +697,7 @@ __acquires(kernel_lock) | |||
| 703 | if (ret || !default_bootup_tracer) | 697 | if (ret || !default_bootup_tracer) |
| 704 | goto out_unlock; | 698 | goto out_unlock; |
| 705 | 699 | ||
| 706 | if (strncmp(default_bootup_tracer, type->name, BOOTUP_TRACER_SIZE)) | 700 | if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE)) |
| 707 | goto out_unlock; | 701 | goto out_unlock; |
| 708 | 702 | ||
| 709 | printk(KERN_INFO "Starting tracer '%s'\n", type->name); | 703 | printk(KERN_INFO "Starting tracer '%s'\n", type->name); |
| @@ -725,14 +719,13 @@ __acquires(kernel_lock) | |||
| 725 | void unregister_tracer(struct tracer *type) | 719 | void unregister_tracer(struct tracer *type) |
| 726 | { | 720 | { |
| 727 | struct tracer **t; | 721 | struct tracer **t; |
| 728 | int len; | ||
| 729 | 722 | ||
| 730 | mutex_lock(&trace_types_lock); | 723 | mutex_lock(&trace_types_lock); |
| 731 | for (t = &trace_types; *t; t = &(*t)->next) { | 724 | for (t = &trace_types; *t; t = &(*t)->next) { |
| 732 | if (*t == type) | 725 | if (*t == type) |
| 733 | goto found; | 726 | goto found; |
| 734 | } | 727 | } |
| 735 | pr_info("Trace %s not registered\n", type->name); | 728 | pr_info("Tracer %s not registered\n", type->name); |
| 736 | goto out; | 729 | goto out; |
| 737 | 730 | ||
| 738 | found: | 731 | found: |
| @@ -745,17 +738,7 @@ void unregister_tracer(struct tracer *type) | |||
| 745 | current_trace->stop(&global_trace); | 738 | current_trace->stop(&global_trace); |
| 746 | current_trace = &nop_trace; | 739 | current_trace = &nop_trace; |
| 747 | } | 740 | } |
| 748 | 741 | out: | |
| 749 | if (strlen(type->name) != max_tracer_type_len) | ||
| 750 | goto out; | ||
| 751 | |||
| 752 | max_tracer_type_len = 0; | ||
| 753 | for (t = &trace_types; *t; t = &(*t)->next) { | ||
| 754 | len = strlen((*t)->name); | ||
| 755 | if (len > max_tracer_type_len) | ||
| 756 | max_tracer_type_len = len; | ||
| 757 | } | ||
| 758 | out: | ||
| 759 | mutex_unlock(&trace_types_lock); | 742 | mutex_unlock(&trace_types_lock); |
| 760 | } | 743 | } |
| 761 | 744 | ||
| @@ -2604,7 +2587,7 @@ static ssize_t | |||
| 2604 | tracing_set_trace_read(struct file *filp, char __user *ubuf, | 2587 | tracing_set_trace_read(struct file *filp, char __user *ubuf, |
| 2605 | size_t cnt, loff_t *ppos) | 2588 | size_t cnt, loff_t *ppos) |
| 2606 | { | 2589 | { |
| 2607 | char buf[max_tracer_type_len+2]; | 2590 | char buf[MAX_TRACER_SIZE+2]; |
| 2608 | int r; | 2591 | int r; |
| 2609 | 2592 | ||
| 2610 | mutex_lock(&trace_types_lock); | 2593 | mutex_lock(&trace_types_lock); |
| @@ -2754,15 +2737,15 @@ static ssize_t | |||
| 2754 | tracing_set_trace_write(struct file *filp, const char __user *ubuf, | 2737 | tracing_set_trace_write(struct file *filp, const char __user *ubuf, |
| 2755 | size_t cnt, loff_t *ppos) | 2738 | size_t cnt, loff_t *ppos) |
| 2756 | { | 2739 | { |
| 2757 | char buf[max_tracer_type_len+1]; | 2740 | char buf[MAX_TRACER_SIZE+1]; |
| 2758 | int i; | 2741 | int i; |
| 2759 | size_t ret; | 2742 | size_t ret; |
| 2760 | int err; | 2743 | int err; |
| 2761 | 2744 | ||
| 2762 | ret = cnt; | 2745 | ret = cnt; |
| 2763 | 2746 | ||
| 2764 | if (cnt > max_tracer_type_len) | 2747 | if (cnt > MAX_TRACER_SIZE) |
| 2765 | cnt = max_tracer_type_len; | 2748 | cnt = MAX_TRACER_SIZE; |
| 2766 | 2749 | ||
| 2767 | if (copy_from_user(&buf, ubuf, cnt)) | 2750 | if (copy_from_user(&buf, ubuf, cnt)) |
| 2768 | return -EFAULT; | 2751 | return -EFAULT; |
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c index 55a25c933d15..dd44b8768867 100644 --- a/kernel/trace/trace_event_profile.c +++ b/kernel/trace/trace_event_profile.c | |||
| @@ -8,6 +8,57 @@ | |||
| 8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
| 9 | #include "trace.h" | 9 | #include "trace.h" |
| 10 | 10 | ||
| 11 | /* | ||
| 12 | * We can't use a size but a type in alloc_percpu() | ||
| 13 | * So let's create a dummy type that matches the desired size | ||
| 14 | */ | ||
| 15 | typedef struct {char buf[FTRACE_MAX_PROFILE_SIZE];} profile_buf_t; | ||
| 16 | |||
| 17 | char *trace_profile_buf; | ||
| 18 | EXPORT_SYMBOL_GPL(trace_profile_buf); | ||
| 19 | |||
| 20 | char *trace_profile_buf_nmi; | ||
| 21 | EXPORT_SYMBOL_GPL(trace_profile_buf_nmi); | ||
| 22 | |||
| 23 | /* Count the events in use (per event id, not per instance) */ | ||
| 24 | static int total_profile_count; | ||
| 25 | |||
| 26 | static int ftrace_profile_enable_event(struct ftrace_event_call *event) | ||
| 27 | { | ||
| 28 | char *buf; | ||
| 29 | int ret = -ENOMEM; | ||
| 30 | |||
| 31 | if (atomic_inc_return(&event->profile_count)) | ||
| 32 | return 0; | ||
| 33 | |||
| 34 | if (!total_profile_count++) { | ||
| 35 | buf = (char *)alloc_percpu(profile_buf_t); | ||
| 36 | if (!buf) | ||
| 37 | goto fail_buf; | ||
| 38 | |||
| 39 | rcu_assign_pointer(trace_profile_buf, buf); | ||
| 40 | |||
| 41 | buf = (char *)alloc_percpu(profile_buf_t); | ||
| 42 | if (!buf) | ||
| 43 | goto fail_buf_nmi; | ||
| 44 | |||
| 45 | rcu_assign_pointer(trace_profile_buf_nmi, buf); | ||
| 46 | } | ||
| 47 | |||
| 48 | ret = event->profile_enable(); | ||
| 49 | if (!ret) | ||
| 50 | return 0; | ||
| 51 | |||
| 52 | kfree(trace_profile_buf_nmi); | ||
| 53 | fail_buf_nmi: | ||
| 54 | kfree(trace_profile_buf); | ||
| 55 | fail_buf: | ||
| 56 | total_profile_count--; | ||
| 57 | atomic_dec(&event->profile_count); | ||
| 58 | |||
| 59 | return ret; | ||
| 60 | } | ||
| 61 | |||
| 11 | int ftrace_profile_enable(int event_id) | 62 | int ftrace_profile_enable(int event_id) |
| 12 | { | 63 | { |
| 13 | struct ftrace_event_call *event; | 64 | struct ftrace_event_call *event; |
| @@ -17,7 +68,7 @@ int ftrace_profile_enable(int event_id) | |||
| 17 | list_for_each_entry(event, &ftrace_events, list) { | 68 | list_for_each_entry(event, &ftrace_events, list) { |
| 18 | if (event->id == event_id && event->profile_enable && | 69 | if (event->id == event_id && event->profile_enable && |
| 19 | try_module_get(event->mod)) { | 70 | try_module_get(event->mod)) { |
| 20 | ret = event->profile_enable(event); | 71 | ret = ftrace_profile_enable_event(event); |
| 21 | break; | 72 | break; |
| 22 | } | 73 | } |
| 23 | } | 74 | } |
| @@ -26,6 +77,33 @@ int ftrace_profile_enable(int event_id) | |||
| 26 | return ret; | 77 | return ret; |
| 27 | } | 78 | } |
| 28 | 79 | ||
| 80 | static void ftrace_profile_disable_event(struct ftrace_event_call *event) | ||
| 81 | { | ||
| 82 | char *buf, *nmi_buf; | ||
| 83 | |||
| 84 | if (!atomic_add_negative(-1, &event->profile_count)) | ||
| 85 | return; | ||
| 86 | |||
| 87 | event->profile_disable(); | ||
| 88 | |||
| 89 | if (!--total_profile_count) { | ||
| 90 | buf = trace_profile_buf; | ||
| 91 | rcu_assign_pointer(trace_profile_buf, NULL); | ||
| 92 | |||
| 93 | nmi_buf = trace_profile_buf_nmi; | ||
| 94 | rcu_assign_pointer(trace_profile_buf_nmi, NULL); | ||
| 95 | |||
| 96 | /* | ||
| 97 | * Ensure every events in profiling have finished before | ||
| 98 | * releasing the buffers | ||
| 99 | */ | ||
| 100 | synchronize_sched(); | ||
| 101 | |||
| 102 | free_percpu(buf); | ||
| 103 | free_percpu(nmi_buf); | ||
| 104 | } | ||
| 105 | } | ||
| 106 | |||
| 29 | void ftrace_profile_disable(int event_id) | 107 | void ftrace_profile_disable(int event_id) |
| 30 | { | 108 | { |
| 31 | struct ftrace_event_call *event; | 109 | struct ftrace_event_call *event; |
| @@ -33,7 +111,7 @@ void ftrace_profile_disable(int event_id) | |||
| 33 | mutex_lock(&event_mutex); | 111 | mutex_lock(&event_mutex); |
| 34 | list_for_each_entry(event, &ftrace_events, list) { | 112 | list_for_each_entry(event, &ftrace_events, list) { |
| 35 | if (event->id == event_id) { | 113 | if (event->id == event_id) { |
| 36 | event->profile_disable(event); | 114 | ftrace_profile_disable_event(event); |
| 37 | module_put(event->mod); | 115 | module_put(event->mod); |
| 38 | break; | 116 | break; |
| 39 | } | 117 | } |
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 56c260b83a9c..6f03c8a1105e 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c | |||
| @@ -271,42 +271,32 @@ ftrace_event_write(struct file *file, const char __user *ubuf, | |||
| 271 | static void * | 271 | static void * |
| 272 | t_next(struct seq_file *m, void *v, loff_t *pos) | 272 | t_next(struct seq_file *m, void *v, loff_t *pos) |
| 273 | { | 273 | { |
| 274 | struct list_head *list = m->private; | 274 | struct ftrace_event_call *call = v; |
| 275 | struct ftrace_event_call *call; | ||
| 276 | 275 | ||
| 277 | (*pos)++; | 276 | (*pos)++; |
| 278 | 277 | ||
| 279 | for (;;) { | 278 | list_for_each_entry_continue(call, &ftrace_events, list) { |
| 280 | if (list == &ftrace_events) | ||
| 281 | return NULL; | ||
| 282 | |||
| 283 | call = list_entry(list, struct ftrace_event_call, list); | ||
| 284 | |||
| 285 | /* | 279 | /* |
| 286 | * The ftrace subsystem is for showing formats only. | 280 | * The ftrace subsystem is for showing formats only. |
| 287 | * They can not be enabled or disabled via the event files. | 281 | * They can not be enabled or disabled via the event files. |
| 288 | */ | 282 | */ |
| 289 | if (call->regfunc) | 283 | if (call->regfunc) |
| 290 | break; | 284 | return call; |
| 291 | |||
| 292 | list = list->next; | ||
| 293 | } | 285 | } |
| 294 | 286 | ||
| 295 | m->private = list->next; | 287 | return NULL; |
| 296 | |||
| 297 | return call; | ||
| 298 | } | 288 | } |
| 299 | 289 | ||
| 300 | static void *t_start(struct seq_file *m, loff_t *pos) | 290 | static void *t_start(struct seq_file *m, loff_t *pos) |
| 301 | { | 291 | { |
| 302 | struct ftrace_event_call *call = NULL; | 292 | struct ftrace_event_call *call; |
| 303 | loff_t l; | 293 | loff_t l; |
| 304 | 294 | ||
| 305 | mutex_lock(&event_mutex); | 295 | mutex_lock(&event_mutex); |
| 306 | 296 | ||
| 307 | m->private = ftrace_events.next; | 297 | call = list_entry(&ftrace_events, struct ftrace_event_call, list); |
| 308 | for (l = 0; l <= *pos; ) { | 298 | for (l = 0; l <= *pos; ) { |
| 309 | call = t_next(m, NULL, &l); | 299 | call = t_next(m, call, &l); |
| 310 | if (!call) | 300 | if (!call) |
| 311 | break; | 301 | break; |
| 312 | } | 302 | } |
| @@ -316,37 +306,28 @@ static void *t_start(struct seq_file *m, loff_t *pos) | |||
| 316 | static void * | 306 | static void * |
| 317 | s_next(struct seq_file *m, void *v, loff_t *pos) | 307 | s_next(struct seq_file *m, void *v, loff_t *pos) |
| 318 | { | 308 | { |
| 319 | struct list_head *list = m->private; | 309 | struct ftrace_event_call *call = v; |
| 320 | struct ftrace_event_call *call; | ||
| 321 | 310 | ||
| 322 | (*pos)++; | 311 | (*pos)++; |
| 323 | 312 | ||
| 324 | retry: | 313 | list_for_each_entry_continue(call, &ftrace_events, list) { |
| 325 | if (list == &ftrace_events) | 314 | if (call->enabled) |
| 326 | return NULL; | 315 | return call; |
| 327 | |||
| 328 | call = list_entry(list, struct ftrace_event_call, list); | ||
| 329 | |||
| 330 | if (!call->enabled) { | ||
| 331 | list = list->next; | ||
| 332 | goto retry; | ||
| 333 | } | 316 | } |
| 334 | 317 | ||
| 335 | m->private = list->next; | 318 | return NULL; |
| 336 | |||
| 337 | return call; | ||
| 338 | } | 319 | } |
| 339 | 320 | ||
| 340 | static void *s_start(struct seq_file *m, loff_t *pos) | 321 | static void *s_start(struct seq_file *m, loff_t *pos) |
| 341 | { | 322 | { |
| 342 | struct ftrace_event_call *call = NULL; | 323 | struct ftrace_event_call *call; |
| 343 | loff_t l; | 324 | loff_t l; |
| 344 | 325 | ||
| 345 | mutex_lock(&event_mutex); | 326 | mutex_lock(&event_mutex); |
| 346 | 327 | ||
| 347 | m->private = ftrace_events.next; | 328 | call = list_entry(&ftrace_events, struct ftrace_event_call, list); |
| 348 | for (l = 0; l <= *pos; ) { | 329 | for (l = 0; l <= *pos; ) { |
| 349 | call = s_next(m, NULL, &l); | 330 | call = s_next(m, call, &l); |
| 350 | if (!call) | 331 | if (!call) |
| 351 | break; | 332 | break; |
| 352 | } | 333 | } |
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c index 687699d365ae..2547d8813cf0 100644 --- a/kernel/trace/trace_printk.c +++ b/kernel/trace/trace_printk.c | |||
| @@ -11,7 +11,6 @@ | |||
| 11 | #include <linux/ftrace.h> | 11 | #include <linux/ftrace.h> |
| 12 | #include <linux/string.h> | 12 | #include <linux/string.h> |
| 13 | #include <linux/module.h> | 13 | #include <linux/module.h> |
| 14 | #include <linux/marker.h> | ||
| 15 | #include <linux/mutex.h> | 14 | #include <linux/mutex.h> |
| 16 | #include <linux/ctype.h> | 15 | #include <linux/ctype.h> |
| 17 | #include <linux/list.h> | 16 | #include <linux/list.h> |
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 8712ce3c6a0e..7a3550cf2597 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c | |||
| @@ -384,10 +384,13 @@ static int sys_prof_refcount_exit; | |||
| 384 | 384 | ||
| 385 | static void prof_syscall_enter(struct pt_regs *regs, long id) | 385 | static void prof_syscall_enter(struct pt_regs *regs, long id) |
| 386 | { | 386 | { |
| 387 | struct syscall_trace_enter *rec; | ||
| 388 | struct syscall_metadata *sys_data; | 387 | struct syscall_metadata *sys_data; |
| 388 | struct syscall_trace_enter *rec; | ||
| 389 | unsigned long flags; | ||
| 390 | char *raw_data; | ||
| 389 | int syscall_nr; | 391 | int syscall_nr; |
| 390 | int size; | 392 | int size; |
| 393 | int cpu; | ||
| 391 | 394 | ||
| 392 | syscall_nr = syscall_get_nr(current, regs); | 395 | syscall_nr = syscall_get_nr(current, regs); |
| 393 | if (!test_bit(syscall_nr, enabled_prof_enter_syscalls)) | 396 | if (!test_bit(syscall_nr, enabled_prof_enter_syscalls)) |
| @@ -402,20 +405,38 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) | |||
| 402 | size = ALIGN(size + sizeof(u32), sizeof(u64)); | 405 | size = ALIGN(size + sizeof(u32), sizeof(u64)); |
| 403 | size -= sizeof(u32); | 406 | size -= sizeof(u32); |
| 404 | 407 | ||
| 405 | do { | 408 | if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, |
| 406 | char raw_data[size]; | 409 | "profile buffer not large enough")) |
| 410 | return; | ||
| 411 | |||
| 412 | /* Protect the per cpu buffer, begin the rcu read side */ | ||
| 413 | local_irq_save(flags); | ||
| 407 | 414 | ||
| 408 | /* zero the dead bytes from align to not leak stack to user */ | 415 | cpu = smp_processor_id(); |
| 409 | *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; | 416 | |
| 417 | if (in_nmi()) | ||
| 418 | raw_data = rcu_dereference(trace_profile_buf_nmi); | ||
| 419 | else | ||
| 420 | raw_data = rcu_dereference(trace_profile_buf); | ||
| 421 | |||
| 422 | if (!raw_data) | ||
| 423 | goto end; | ||
| 410 | 424 | ||
| 411 | rec = (struct syscall_trace_enter *) raw_data; | 425 | raw_data = per_cpu_ptr(raw_data, cpu); |
| 412 | tracing_generic_entry_update(&rec->ent, 0, 0); | 426 | |
| 413 | rec->ent.type = sys_data->enter_id; | 427 | /* zero the dead bytes from align to not leak stack to user */ |
| 414 | rec->nr = syscall_nr; | 428 | *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; |
| 415 | syscall_get_arguments(current, regs, 0, sys_data->nb_args, | 429 | |
| 416 | (unsigned long *)&rec->args); | 430 | rec = (struct syscall_trace_enter *) raw_data; |
| 417 | perf_tpcounter_event(sys_data->enter_id, 0, 1, rec, size); | 431 | tracing_generic_entry_update(&rec->ent, 0, 0); |
| 418 | } while(0); | 432 | rec->ent.type = sys_data->enter_id; |
| 433 | rec->nr = syscall_nr; | ||
| 434 | syscall_get_arguments(current, regs, 0, sys_data->nb_args, | ||
| 435 | (unsigned long *)&rec->args); | ||
| 436 | perf_tpcounter_event(sys_data->enter_id, 0, 1, rec, size); | ||
| 437 | |||
| 438 | end: | ||
| 439 | local_irq_restore(flags); | ||
| 419 | } | 440 | } |
| 420 | 441 | ||
| 421 | int reg_prof_syscall_enter(char *name) | 442 | int reg_prof_syscall_enter(char *name) |
| @@ -460,8 +481,12 @@ void unreg_prof_syscall_enter(char *name) | |||
| 460 | static void prof_syscall_exit(struct pt_regs *regs, long ret) | 481 | static void prof_syscall_exit(struct pt_regs *regs, long ret) |
| 461 | { | 482 | { |
| 462 | struct syscall_metadata *sys_data; | 483 | struct syscall_metadata *sys_data; |
| 463 | struct syscall_trace_exit rec; | 484 | struct syscall_trace_exit *rec; |
| 485 | unsigned long flags; | ||
| 464 | int syscall_nr; | 486 | int syscall_nr; |
| 487 | char *raw_data; | ||
| 488 | int size; | ||
| 489 | int cpu; | ||
| 465 | 490 | ||
| 466 | syscall_nr = syscall_get_nr(current, regs); | 491 | syscall_nr = syscall_get_nr(current, regs); |
| 467 | if (!test_bit(syscall_nr, enabled_prof_exit_syscalls)) | 492 | if (!test_bit(syscall_nr, enabled_prof_exit_syscalls)) |
| @@ -471,12 +496,46 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) | |||
| 471 | if (!sys_data) | 496 | if (!sys_data) |
| 472 | return; | 497 | return; |
| 473 | 498 | ||
| 474 | tracing_generic_entry_update(&rec.ent, 0, 0); | 499 | /* We can probably do that at build time */ |
| 475 | rec.ent.type = sys_data->exit_id; | 500 | size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64)); |
| 476 | rec.nr = syscall_nr; | 501 | size -= sizeof(u32); |
| 477 | rec.ret = syscall_get_return_value(current, regs); | ||
| 478 | 502 | ||
| 479 | perf_tpcounter_event(sys_data->exit_id, 0, 1, &rec, sizeof(rec)); | 503 | /* |
| 504 | * Impossible, but be paranoid with the future | ||
| 505 | * How to put this check outside runtime? | ||
| 506 | */ | ||
| 507 | if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, | ||
| 508 | "exit event has grown above profile buffer size")) | ||
| 509 | return; | ||
| 510 | |||
| 511 | /* Protect the per cpu buffer, begin the rcu read side */ | ||
| 512 | local_irq_save(flags); | ||
| 513 | cpu = smp_processor_id(); | ||
| 514 | |||
| 515 | if (in_nmi()) | ||
| 516 | raw_data = rcu_dereference(trace_profile_buf_nmi); | ||
| 517 | else | ||
| 518 | raw_data = rcu_dereference(trace_profile_buf); | ||
| 519 | |||
| 520 | if (!raw_data) | ||
| 521 | goto end; | ||
| 522 | |||
| 523 | raw_data = per_cpu_ptr(raw_data, cpu); | ||
| 524 | |||
| 525 | /* zero the dead bytes from align to not leak stack to user */ | ||
| 526 | *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; | ||
| 527 | |||
| 528 | rec = (struct syscall_trace_exit *)raw_data; | ||
| 529 | |||
| 530 | tracing_generic_entry_update(&rec->ent, 0, 0); | ||
| 531 | rec->ent.type = sys_data->exit_id; | ||
| 532 | rec->nr = syscall_nr; | ||
| 533 | rec->ret = syscall_get_return_value(current, regs); | ||
| 534 | |||
| 535 | perf_tpcounter_event(sys_data->exit_id, 0, 1, rec, size); | ||
| 536 | |||
| 537 | end: | ||
| 538 | local_irq_restore(flags); | ||
| 480 | } | 539 | } |
| 481 | 540 | ||
| 482 | int reg_prof_syscall_exit(char *name) | 541 | int reg_prof_syscall_exit(char *name) |
diff --git a/samples/Kconfig b/samples/Kconfig index 428b065ba695..b92bde3c6a89 100644 --- a/samples/Kconfig +++ b/samples/Kconfig | |||
| @@ -7,12 +7,6 @@ menuconfig SAMPLES | |||
| 7 | 7 | ||
| 8 | if SAMPLES | 8 | if SAMPLES |
| 9 | 9 | ||
| 10 | config SAMPLE_MARKERS | ||
| 11 | tristate "Build markers examples -- loadable modules only" | ||
| 12 | depends on MARKERS && m | ||
| 13 | help | ||
| 14 | This build markers example modules. | ||
| 15 | |||
| 16 | config SAMPLE_TRACEPOINTS | 10 | config SAMPLE_TRACEPOINTS |
| 17 | tristate "Build tracepoints examples -- loadable modules only" | 11 | tristate "Build tracepoints examples -- loadable modules only" |
| 18 | depends on TRACEPOINTS && m | 12 | depends on TRACEPOINTS && m |
diff --git a/samples/Makefile b/samples/Makefile index 13e4b470b539..43343a03b1f4 100644 --- a/samples/Makefile +++ b/samples/Makefile | |||
| @@ -1,3 +1,3 @@ | |||
| 1 | # Makefile for Linux samples code | 1 | # Makefile for Linux samples code |
| 2 | 2 | ||
| 3 | obj-$(CONFIG_SAMPLES) += markers/ kobject/ kprobes/ tracepoints/ trace_events/ | 3 | obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ tracepoints/ trace_events/ |
diff --git a/samples/markers/Makefile b/samples/markers/Makefile deleted file mode 100644 index 6d7231265f0f..000000000000 --- a/samples/markers/Makefile +++ /dev/null | |||
| @@ -1,4 +0,0 @@ | |||
| 1 | # builds the kprobes example kernel modules; | ||
| 2 | # then to use one (as root): insmod <module_name.ko> | ||
| 3 | |||
| 4 | obj-$(CONFIG_SAMPLE_MARKERS) += probe-example.o marker-example.o | ||
diff --git a/samples/markers/marker-example.c b/samples/markers/marker-example.c deleted file mode 100644 index e9cd9c0bc84f..000000000000 --- a/samples/markers/marker-example.c +++ /dev/null | |||
| @@ -1,53 +0,0 @@ | |||
| 1 | /* marker-example.c | ||
| 2 | * | ||
| 3 | * Executes a marker when /proc/marker-example is opened. | ||
| 4 | * | ||
| 5 | * (C) Copyright 2007 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> | ||
| 6 | * | ||
| 7 | * This file is released under the GPLv2. | ||
| 8 | * See the file COPYING for more details. | ||
| 9 | */ | ||
| 10 | |||
| 11 | #include <linux/module.h> | ||
| 12 | #include <linux/marker.h> | ||
| 13 | #include <linux/sched.h> | ||
| 14 | #include <linux/proc_fs.h> | ||
| 15 | |||
| 16 | struct proc_dir_entry *pentry_example; | ||
| 17 | |||
| 18 | static int my_open(struct inode *inode, struct file *file) | ||
| 19 | { | ||
| 20 | int i; | ||
| 21 | |||
| 22 | trace_mark(subsystem_event, "integer %d string %s", 123, | ||
| 23 | "example string"); | ||
| 24 | for (i = 0; i < 10; i++) | ||
| 25 | trace_mark(subsystem_eventb, MARK_NOARGS); | ||
| 26 | return -EPERM; | ||
| 27 | } | ||
| 28 | |||
| 29 | static struct file_operations mark_ops = { | ||
| 30 | .open = my_open, | ||
| 31 | }; | ||
| 32 | |||
| 33 | static int __init example_init(void) | ||
| 34 | { | ||
| 35 | printk(KERN_ALERT "example init\n"); | ||
| 36 | pentry_example = proc_create("marker-example", 0444, NULL, &mark_ops); | ||
| 37 | if (!pentry_example) | ||
| 38 | return -EPERM; | ||
| 39 | return 0; | ||
| 40 | } | ||
| 41 | |||
| 42 | static void __exit example_exit(void) | ||
| 43 | { | ||
| 44 | printk(KERN_ALERT "example exit\n"); | ||
| 45 | remove_proc_entry("marker-example", NULL); | ||
| 46 | } | ||
| 47 | |||
| 48 | module_init(example_init) | ||
| 49 | module_exit(example_exit) | ||
| 50 | |||
| 51 | MODULE_LICENSE("GPL"); | ||
| 52 | MODULE_AUTHOR("Mathieu Desnoyers"); | ||
| 53 | MODULE_DESCRIPTION("Marker example"); | ||
diff --git a/samples/markers/probe-example.c b/samples/markers/probe-example.c deleted file mode 100644 index 2dfb3b32937e..000000000000 --- a/samples/markers/probe-example.c +++ /dev/null | |||
| @@ -1,92 +0,0 @@ | |||
| 1 | /* probe-example.c | ||
| 2 | * | ||
| 3 | * Connects two functions to marker call sites. | ||
| 4 | * | ||
| 5 | * (C) Copyright 2007 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> | ||
| 6 | * | ||
| 7 | * This file is released under the GPLv2. | ||
| 8 | * See the file COPYING for more details. | ||
| 9 | */ | ||
| 10 | |||
| 11 | #include <linux/sched.h> | ||
| 12 | #include <linux/kernel.h> | ||
| 13 | #include <linux/module.h> | ||
| 14 | #include <linux/marker.h> | ||
| 15 | #include <asm/atomic.h> | ||
| 16 | |||
| 17 | struct probe_data { | ||
| 18 | const char *name; | ||
| 19 | const char *format; | ||
| 20 | marker_probe_func *probe_func; | ||
| 21 | }; | ||
| 22 | |||
| 23 | void probe_subsystem_event(void *probe_data, void *call_data, | ||
| 24 | const char *format, va_list *args) | ||
| 25 | { | ||
| 26 | /* Declare args */ | ||
| 27 | unsigned int value; | ||
| 28 | const char *mystr; | ||
| 29 | |||
| 30 | /* Assign args */ | ||
| 31 | value = va_arg(*args, typeof(value)); | ||
| 32 | mystr = va_arg(*args, typeof(mystr)); | ||
| 33 | |||
| 34 | /* Call printk */ | ||
| 35 | printk(KERN_INFO "Value %u, string %s\n", value, mystr); | ||
| 36 | |||
| 37 | /* or count, check rights, serialize data in a buffer */ | ||
| 38 | } | ||
| 39 | |||
| 40 | atomic_t eventb_count = ATOMIC_INIT(0); | ||
| 41 | |||
| 42 | void probe_subsystem_eventb(void *probe_data, void *call_data, | ||
| 43 | const char *format, va_list *args) | ||
| 44 | { | ||
| 45 | /* Increment counter */ | ||
| 46 | atomic_inc(&eventb_count); | ||
| 47 | } | ||
| 48 | |||
| 49 | static struct probe_data probe_array[] = | ||
| 50 | { | ||
| 51 | { .name = "subsystem_event", | ||
| 52 | .format = "integer %d string %s", | ||
| 53 | .probe_func = probe_subsystem_event }, | ||
| 54 | { .name = "subsystem_eventb", | ||
| 55 | .format = MARK_NOARGS, | ||
| 56 | .probe_func = probe_subsystem_eventb }, | ||
| 57 | }; | ||
| 58 | |||
| 59 | static int __init probe_init(void) | ||
| 60 | { | ||
| 61 | int result; | ||
| 62 | int i; | ||
| 63 | |||
| 64 | for (i = 0; i < ARRAY_SIZE(probe_array); i++) { | ||
| 65 | result = marker_probe_register(probe_array[i].name, | ||
| 66 | probe_array[i].format, | ||
| 67 | probe_array[i].probe_func, &probe_array[i]); | ||
| 68 | if (result) | ||
| 69 | printk(KERN_INFO "Unable to register probe %s\n", | ||
| 70 | probe_array[i].name); | ||
| 71 | } | ||
| 72 | return 0; | ||
| 73 | } | ||
| 74 | |||
| 75 | static void __exit probe_fini(void) | ||
| 76 | { | ||
| 77 | int i; | ||
| 78 | |||
| 79 | for (i = 0; i < ARRAY_SIZE(probe_array); i++) | ||
| 80 | marker_probe_unregister(probe_array[i].name, | ||
| 81 | probe_array[i].probe_func, &probe_array[i]); | ||
| 82 | printk(KERN_INFO "Number of event b : %u\n", | ||
| 83 | atomic_read(&eventb_count)); | ||
| 84 | marker_synchronize_unregister(); | ||
| 85 | } | ||
| 86 | |||
| 87 | module_init(probe_init); | ||
| 88 | module_exit(probe_fini); | ||
| 89 | |||
| 90 | MODULE_LICENSE("GPL"); | ||
| 91 | MODULE_AUTHOR("Mathieu Desnoyers"); | ||
| 92 | MODULE_DESCRIPTION("SUBSYSTEM Probe"); | ||
diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index f4053dc7b5d6..8f14c81abbc7 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost | |||
| @@ -13,7 +13,6 @@ | |||
| 13 | # 2) modpost is then used to | 13 | # 2) modpost is then used to |
| 14 | # 3) create one <module>.mod.c file pr. module | 14 | # 3) create one <module>.mod.c file pr. module |
| 15 | # 4) create one Module.symvers file with CRC for all exported symbols | 15 | # 4) create one Module.symvers file with CRC for all exported symbols |
| 16 | # 4a) [CONFIG_MARKERS] create one Module.markers file listing defined markers | ||
| 17 | # 5) compile all <module>.mod.c files | 16 | # 5) compile all <module>.mod.c files |
| 18 | # 6) final link of the module to a <module.ko> file | 17 | # 6) final link of the module to a <module.ko> file |
| 19 | 18 | ||
| @@ -59,10 +58,6 @@ include scripts/Makefile.lib | |||
| 59 | 58 | ||
| 60 | kernelsymfile := $(objtree)/Module.symvers | 59 | kernelsymfile := $(objtree)/Module.symvers |
| 61 | modulesymfile := $(firstword $(KBUILD_EXTMOD))/Module.symvers | 60 | modulesymfile := $(firstword $(KBUILD_EXTMOD))/Module.symvers |
| 62 | kernelmarkersfile := $(objtree)/Module.markers | ||
| 63 | modulemarkersfile := $(firstword $(KBUILD_EXTMOD))/Module.markers | ||
| 64 | |||
| 65 | markersfile = $(if $(KBUILD_EXTMOD),$(modulemarkersfile),$(kernelmarkersfile)) | ||
| 66 | 61 | ||
| 67 | # Step 1), find all modules listed in $(MODVERDIR)/ | 62 | # Step 1), find all modules listed in $(MODVERDIR)/ |
| 68 | __modules := $(sort $(shell grep -h '\.ko' /dev/null $(wildcard $(MODVERDIR)/*.mod))) | 63 | __modules := $(sort $(shell grep -h '\.ko' /dev/null $(wildcard $(MODVERDIR)/*.mod))) |
| @@ -85,8 +80,6 @@ modpost = scripts/mod/modpost \ | |||
| 85 | $(if $(KBUILD_EXTRA_SYMBOLS), $(patsubst %, -e %,$(KBUILD_EXTRA_SYMBOLS))) \ | 80 | $(if $(KBUILD_EXTRA_SYMBOLS), $(patsubst %, -e %,$(KBUILD_EXTRA_SYMBOLS))) \ |
| 86 | $(if $(KBUILD_EXTMOD),-o $(modulesymfile)) \ | 81 | $(if $(KBUILD_EXTMOD),-o $(modulesymfile)) \ |
| 87 | $(if $(CONFIG_DEBUG_SECTION_MISMATCH),,-S) \ | 82 | $(if $(CONFIG_DEBUG_SECTION_MISMATCH),,-S) \ |
| 88 | $(if $(CONFIG_MARKERS),-K $(kernelmarkersfile)) \ | ||
| 89 | $(if $(CONFIG_MARKERS),-M $(markersfile)) \ | ||
| 90 | $(if $(KBUILD_EXTMOD)$(KBUILD_MODPOST_WARN),-w) \ | 83 | $(if $(KBUILD_EXTMOD)$(KBUILD_MODPOST_WARN),-w) \ |
| 91 | $(if $(cross_build),-c) | 84 | $(if $(cross_build),-c) |
| 92 | 85 | ||
| @@ -101,17 +94,12 @@ quiet_cmd_kernel-mod = MODPOST $@ | |||
| 101 | cmd_kernel-mod = $(modpost) $@ | 94 | cmd_kernel-mod = $(modpost) $@ |
| 102 | 95 | ||
| 103 | vmlinux.o: FORCE | 96 | vmlinux.o: FORCE |
| 104 | @rm -fr $(kernelmarkersfile) | ||
| 105 | $(call cmd,kernel-mod) | 97 | $(call cmd,kernel-mod) |
| 106 | 98 | ||
| 107 | # Declare generated files as targets for modpost | 99 | # Declare generated files as targets for modpost |
| 108 | $(symverfile): __modpost ; | 100 | $(symverfile): __modpost ; |
| 109 | $(modules:.ko=.mod.c): __modpost ; | 101 | $(modules:.ko=.mod.c): __modpost ; |
| 110 | 102 | ||
| 111 | ifdef CONFIG_MARKERS | ||
| 112 | $(markersfile): __modpost ; | ||
| 113 | endif | ||
| 114 | |||
| 115 | 103 | ||
| 116 | # Step 5), compile all *.mod.c files | 104 | # Step 5), compile all *.mod.c files |
| 117 | 105 | ||
