diff options
author | Jonathan Herman <hermanjl@cs.unc.edu> | 2013-01-22 10:38:37 -0500 |
---|---|---|
committer | Jonathan Herman <hermanjl@cs.unc.edu> | 2013-01-22 10:38:37 -0500 |
commit | fcc9d2e5a6c89d22b8b773a64fb4ad21ac318446 (patch) | |
tree | a57612d1888735a2ec7972891b68c1ac5ec8faea /kernel/trace | |
parent | 8dea78da5cee153b8af9c07a2745f6c55057fe12 (diff) |
Diffstat (limited to 'kernel/trace')
-rw-r--r-- | kernel/trace/trace_workqueue.c | 300 | ||||
-rw-r--r-- | kernel/trace/tracedump.c | 682 | ||||
-rw-r--r-- | kernel/trace/tracelevel.c | 142 |
3 files changed, 1124 insertions, 0 deletions
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c new file mode 100644 index 00000000000..209b379a472 --- /dev/null +++ b/kernel/trace/trace_workqueue.c | |||
@@ -0,0 +1,300 @@ | |||
1 | /* | ||
2 | * Workqueue statistical tracer. | ||
3 | * | ||
4 | * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com> | ||
5 | * | ||
6 | */ | ||
7 | |||
8 | |||
9 | #include <trace/events/workqueue.h> | ||
10 | #include <linux/list.h> | ||
11 | #include <linux/percpu.h> | ||
12 | #include <linux/slab.h> | ||
13 | #include <linux/kref.h> | ||
14 | #include "trace_stat.h" | ||
15 | #include "trace.h" | ||
16 | |||
17 | |||
18 | /* A cpu workqueue thread */ | ||
19 | struct cpu_workqueue_stats { | ||
20 | struct list_head list; | ||
21 | struct kref kref; | ||
22 | int cpu; | ||
23 | pid_t pid; | ||
24 | /* Can be inserted from interrupt or user context, need to be atomic */ | ||
25 | atomic_t inserted; | ||
26 | /* | ||
27 | * Don't need to be atomic, works are serialized in a single workqueue thread | ||
28 | * on a single CPU. | ||
29 | */ | ||
30 | unsigned int executed; | ||
31 | }; | ||
32 | |||
33 | /* List of workqueue threads on one cpu */ | ||
34 | struct workqueue_global_stats { | ||
35 | struct list_head list; | ||
36 | spinlock_t lock; | ||
37 | }; | ||
38 | |||
39 | /* Don't need a global lock because allocated before the workqueues, and | ||
40 | * never freed. | ||
41 | */ | ||
42 | static DEFINE_PER_CPU(struct workqueue_global_stats, all_workqueue_stat); | ||
43 | #define workqueue_cpu_stat(cpu) (&per_cpu(all_workqueue_stat, cpu)) | ||
44 | |||
45 | static void cpu_workqueue_stat_free(struct kref *kref) | ||
46 | { | ||
47 | kfree(container_of(kref, struct cpu_workqueue_stats, kref)); | ||
48 | } | ||
49 | |||
50 | /* Insertion of a work */ | ||
51 | static void | ||
52 | probe_workqueue_insertion(void *ignore, | ||
53 | struct task_struct *wq_thread, | ||
54 | struct work_struct *work) | ||
55 | { | ||
56 | int cpu = cpumask_first(&wq_thread->cpus_allowed); | ||
57 | struct cpu_workqueue_stats *node; | ||
58 | unsigned long flags; | ||
59 | |||
60 | spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); | ||
61 | list_for_each_entry(node, &workqueue_cpu_stat(cpu)->list, list) { | ||
62 | if (node->pid == wq_thread->pid) { | ||
63 | atomic_inc(&node->inserted); | ||
64 | goto found; | ||
65 | } | ||
66 | } | ||
67 | pr_debug("trace_workqueue: entry not found\n"); | ||
68 | found: | ||
69 | spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags); | ||
70 | } | ||
71 | |||
72 | /* Execution of a work */ | ||
73 | static void | ||
74 | probe_workqueue_execution(void *ignore, | ||
75 | struct task_struct *wq_thread, | ||
76 | struct work_struct *work) | ||
77 | { | ||
78 | int cpu = cpumask_first(&wq_thread->cpus_allowed); | ||
79 | struct cpu_workqueue_stats *node; | ||
80 | unsigned long flags; | ||
81 | |||
82 | spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); | ||
83 | list_for_each_entry(node, &workqueue_cpu_stat(cpu)->list, list) { | ||
84 | if (node->pid == wq_thread->pid) { | ||
85 | node->executed++; | ||
86 | goto found; | ||
87 | } | ||
88 | } | ||
89 | pr_debug("trace_workqueue: entry not found\n"); | ||
90 | found: | ||
91 | spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags); | ||
92 | } | ||
93 | |||
94 | /* Creation of a cpu workqueue thread */ | ||
95 | static void probe_workqueue_creation(void *ignore, | ||
96 | struct task_struct *wq_thread, int cpu) | ||
97 | { | ||
98 | struct cpu_workqueue_stats *cws; | ||
99 | unsigned long flags; | ||
100 | |||
101 | WARN_ON(cpu < 0); | ||
102 | |||
103 | /* Workqueues are sometimes created in atomic context */ | ||
104 | cws = kzalloc(sizeof(struct cpu_workqueue_stats), GFP_ATOMIC); | ||
105 | if (!cws) { | ||
106 | pr_warning("trace_workqueue: not enough memory\n"); | ||
107 | return; | ||
108 | } | ||
109 | INIT_LIST_HEAD(&cws->list); | ||
110 | kref_init(&cws->kref); | ||
111 | cws->cpu = cpu; | ||
112 | cws->pid = wq_thread->pid; | ||
113 | |||
114 | spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); | ||
115 | list_add_tail(&cws->list, &workqueue_cpu_stat(cpu)->list); | ||
116 | spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags); | ||
117 | } | ||
118 | |||
119 | /* Destruction of a cpu workqueue thread */ | ||
120 | static void | ||
121 | probe_workqueue_destruction(void *ignore, struct task_struct *wq_thread) | ||
122 | { | ||
123 | /* Workqueue only execute on one cpu */ | ||
124 | int cpu = cpumask_first(&wq_thread->cpus_allowed); | ||
125 | struct cpu_workqueue_stats *node, *next; | ||
126 | unsigned long flags; | ||
127 | |||
128 | spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); | ||
129 | list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list, | ||
130 | list) { | ||
131 | if (node->pid == wq_thread->pid) { | ||
132 | list_del(&node->list); | ||
133 | kref_put(&node->kref, cpu_workqueue_stat_free); | ||
134 | goto found; | ||
135 | } | ||
136 | } | ||
137 | |||
138 | pr_debug("trace_workqueue: don't find workqueue to destroy\n"); | ||
139 | found: | ||
140 | spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags); | ||
141 | |||
142 | } | ||
143 | |||
144 | static struct cpu_workqueue_stats *workqueue_stat_start_cpu(int cpu) | ||
145 | { | ||
146 | unsigned long flags; | ||
147 | struct cpu_workqueue_stats *ret = NULL; | ||
148 | |||
149 | |||
150 | spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); | ||
151 | |||
152 | if (!list_empty(&workqueue_cpu_stat(cpu)->list)) { | ||
153 | ret = list_entry(workqueue_cpu_stat(cpu)->list.next, | ||
154 | struct cpu_workqueue_stats, list); | ||
155 | kref_get(&ret->kref); | ||
156 | } | ||
157 | |||
158 | spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags); | ||
159 | |||
160 | return ret; | ||
161 | } | ||
162 | |||
163 | static void *workqueue_stat_start(struct tracer_stat *trace) | ||
164 | { | ||
165 | int cpu; | ||
166 | void *ret = NULL; | ||
167 | |||
168 | for_each_possible_cpu(cpu) { | ||
169 | ret = workqueue_stat_start_cpu(cpu); | ||
170 | if (ret) | ||
171 | return ret; | ||
172 | } | ||
173 | return NULL; | ||
174 | } | ||
175 | |||
176 | static void *workqueue_stat_next(void *prev, int idx) | ||
177 | { | ||
178 | struct cpu_workqueue_stats *prev_cws = prev; | ||
179 | struct cpu_workqueue_stats *ret; | ||
180 | int cpu = prev_cws->cpu; | ||
181 | unsigned long flags; | ||
182 | |||
183 | spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); | ||
184 | if (list_is_last(&prev_cws->list, &workqueue_cpu_stat(cpu)->list)) { | ||
185 | spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags); | ||
186 | do { | ||
187 | cpu = cpumask_next(cpu, cpu_possible_mask); | ||
188 | if (cpu >= nr_cpu_ids) | ||
189 | return NULL; | ||
190 | } while (!(ret = workqueue_stat_start_cpu(cpu))); | ||
191 | return ret; | ||
192 | } else { | ||
193 | ret = list_entry(prev_cws->list.next, | ||
194 | struct cpu_workqueue_stats, list); | ||
195 | kref_get(&ret->kref); | ||
196 | } | ||
197 | spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags); | ||
198 | |||
199 | return ret; | ||
200 | } | ||
201 | |||
202 | static int workqueue_stat_show(struct seq_file *s, void *p) | ||
203 | { | ||
204 | struct cpu_workqueue_stats *cws = p; | ||
205 | struct pid *pid; | ||
206 | struct task_struct *tsk; | ||
207 | |||
208 | pid = find_get_pid(cws->pid); | ||
209 | if (pid) { | ||
210 | tsk = get_pid_task(pid, PIDTYPE_PID); | ||
211 | if (tsk) { | ||
212 | seq_printf(s, "%3d %6d %6u %s\n", cws->cpu, | ||
213 | atomic_read(&cws->inserted), cws->executed, | ||
214 | tsk->comm); | ||
215 | put_task_struct(tsk); | ||
216 | } | ||
217 | put_pid(pid); | ||
218 | } | ||
219 | |||
220 | return 0; | ||
221 | } | ||
222 | |||
223 | static void workqueue_stat_release(void *stat) | ||
224 | { | ||
225 | struct cpu_workqueue_stats *node = stat; | ||
226 | |||
227 | kref_put(&node->kref, cpu_workqueue_stat_free); | ||
228 | } | ||
229 | |||
230 | static int workqueue_stat_headers(struct seq_file *s) | ||
231 | { | ||
232 | seq_printf(s, "# CPU INSERTED EXECUTED NAME\n"); | ||
233 | seq_printf(s, "# | | | |\n"); | ||
234 | return 0; | ||
235 | } | ||
236 | |||
237 | struct tracer_stat workqueue_stats __read_mostly = { | ||
238 | .name = "workqueues", | ||
239 | .stat_start = workqueue_stat_start, | ||
240 | .stat_next = workqueue_stat_next, | ||
241 | .stat_show = workqueue_stat_show, | ||
242 | .stat_release = workqueue_stat_release, | ||
243 | .stat_headers = workqueue_stat_headers | ||
244 | }; | ||
245 | |||
246 | |||
247 | int __init stat_workqueue_init(void) | ||
248 | { | ||
249 | if (register_stat_tracer(&workqueue_stats)) { | ||
250 | pr_warning("Unable to register workqueue stat tracer\n"); | ||
251 | return 1; | ||
252 | } | ||
253 | |||
254 | return 0; | ||
255 | } | ||
256 | fs_initcall(stat_workqueue_init); | ||
257 | |||
258 | /* | ||
259 | * Workqueues are created very early, just after pre-smp initcalls. | ||
260 | * So we must register our tracepoints at this stage. | ||
261 | */ | ||
262 | int __init trace_workqueue_early_init(void) | ||
263 | { | ||
264 | int ret, cpu; | ||
265 | |||
266 | for_each_possible_cpu(cpu) { | ||
267 | spin_lock_init(&workqueue_cpu_stat(cpu)->lock); | ||
268 | INIT_LIST_HEAD(&workqueue_cpu_stat(cpu)->list); | ||
269 | } | ||
270 | |||
271 | ret = register_trace_workqueue_insertion(probe_workqueue_insertion, NULL); | ||
272 | if (ret) | ||
273 | goto out; | ||
274 | |||
275 | ret = register_trace_workqueue_execution(probe_workqueue_execution, NULL); | ||
276 | if (ret) | ||
277 | goto no_insertion; | ||
278 | |||
279 | ret = register_trace_workqueue_creation(probe_workqueue_creation, NULL); | ||
280 | if (ret) | ||
281 | goto no_execution; | ||
282 | |||
283 | ret = register_trace_workqueue_destruction(probe_workqueue_destruction, NULL); | ||
284 | if (ret) | ||
285 | goto no_creation; | ||
286 | |||
287 | return 0; | ||
288 | |||
289 | no_creation: | ||
290 | unregister_trace_workqueue_creation(probe_workqueue_creation, NULL); | ||
291 | no_execution: | ||
292 | unregister_trace_workqueue_execution(probe_workqueue_execution, NULL); | ||
293 | no_insertion: | ||
294 | unregister_trace_workqueue_insertion(probe_workqueue_insertion, NULL); | ||
295 | out: | ||
296 | pr_warning("trace_workqueue: unable to trace workqueues\n"); | ||
297 | |||
298 | return 1; | ||
299 | } | ||
300 | early_initcall(trace_workqueue_early_init); | ||
diff --git a/kernel/trace/tracedump.c b/kernel/trace/tracedump.c new file mode 100644 index 00000000000..a83532bc36d --- /dev/null +++ b/kernel/trace/tracedump.c | |||
@@ -0,0 +1,682 @@ | |||
1 | /* | ||
2 | * kernel/trace/tracedump.c | ||
3 | * | ||
4 | * Copyright (c) 2011, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License along with | ||
16 | * this program; if not, write to the Free Software Foundation, Inc., | ||
17 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
18 | * | ||
19 | */ | ||
20 | |||
21 | #include <linux/console.h> | ||
22 | #include <linux/cpumask.h> | ||
23 | #include <linux/init.h> | ||
24 | #include <linux/irqflags.h> | ||
25 | #include <linux/module.h> | ||
26 | #include <linux/moduleparam.h> | ||
27 | #include <linux/mutex.h> | ||
28 | #include <linux/notifier.h> | ||
29 | #include <linux/proc_fs.h> | ||
30 | #include <linux/ring_buffer.h> | ||
31 | #include <linux/sched.h> | ||
32 | #include <linux/smp.h> | ||
33 | #include <linux/string.h> | ||
34 | #include <linux/threads.h> | ||
35 | #include <linux/tracedump.h> | ||
36 | #include <linux/uaccess.h> | ||
37 | #include <linux/vmalloc.h> | ||
38 | #include <linux/zlib.h> | ||
39 | |||
40 | #include "trace.h" | ||
41 | #include "trace_output.h" | ||
42 | |||
43 | #define CPU_MAX (NR_CPUS-1) | ||
44 | |||
45 | #define TRYM(fn, ...) do { \ | ||
46 | int try_error = (fn); \ | ||
47 | if (try_error < 0) { \ | ||
48 | printk(__VA_ARGS__); \ | ||
49 | return try_error; \ | ||
50 | } \ | ||
51 | } while (0) | ||
52 | |||
53 | #define TRY(fn) TRYM(fn, TAG "Caught error from %s in %s\n", #fn, __func__) | ||
54 | |||
55 | /* Stolen from printk.c */ | ||
56 | #define for_each_console(con) \ | ||
57 | for (con = console_drivers; con != NULL; con = con->next) | ||
58 | |||
59 | #define TAG KERN_ERR "tracedump: " | ||
60 | |||
61 | #define TD_MIN_CONSUME 2000 | ||
62 | #define TD_COMPRESS_CHUNK 0x8000 | ||
63 | |||
64 | static DEFINE_MUTEX(tracedump_proc_lock); | ||
65 | |||
66 | static const char MAGIC_NUMBER[9] = "TRACEDUMP"; | ||
67 | static const char CPU_DELIM[7] = "CPU_END"; | ||
68 | #define CMDLINE_DELIM "|" | ||
69 | |||
70 | /* Type of output */ | ||
71 | static bool current_format; | ||
72 | static bool format_ascii; | ||
73 | module_param(format_ascii, bool, S_IRUGO | S_IWUSR); | ||
74 | MODULE_PARM_DESC(format_ascii, "Dump ascii or raw data"); | ||
75 | |||
76 | /* Max size of output */ | ||
77 | static uint panic_size = 0x80000; | ||
78 | module_param(panic_size, uint, S_IRUGO | S_IWUSR); | ||
79 | MODULE_PARM_DESC(panic_size, "Max dump size during kernel panic (bytes)"); | ||
80 | |||
81 | static uint compress_level = 9; | ||
82 | module_param(compress_level, uint, S_IRUGO | S_IWUSR); | ||
83 | MODULE_PARM_DESC(compress_level, "Level of compression to use. [0-9]"); | ||
84 | |||
85 | static char out_buf[TD_COMPRESS_CHUNK]; | ||
86 | static z_stream stream; | ||
87 | static int compress_done; | ||
88 | static int flush; | ||
89 | |||
90 | static int old_trace_flags; | ||
91 | |||
92 | static struct trace_iterator iter; | ||
93 | static struct pager_s { | ||
94 | struct trace_array *tr; | ||
95 | void *spare; | ||
96 | int cpu; | ||
97 | int len; | ||
98 | char __user *ubuf; | ||
99 | } pager; | ||
100 | |||
101 | static char cmdline_buf[16+TASK_COMM_LEN]; | ||
102 | |||
103 | static int print_to_console(const char *buf, size_t len) | ||
104 | { | ||
105 | struct console *con; | ||
106 | |||
107 | /* Stolen from printk.c */ | ||
108 | for_each_console(con) { | ||
109 | if ((con->flags & CON_ENABLED) && con->write && | ||
110 | (cpu_online(smp_processor_id()) || | ||
111 | (con->flags & CON_ANYTIME))) | ||
112 | con->write(con, buf, len); | ||
113 | } | ||
114 | return 0; | ||
115 | } | ||
116 | |||
117 | static int print_to_user(const char *buf, size_t len) | ||
118 | { | ||
119 | int size; | ||
120 | size = copy_to_user(pager.ubuf, buf, len); | ||
121 | if (size > 0) { | ||
122 | printk(TAG "Failed to copy to user %d bytes\n", size); | ||
123 | return -EINVAL; | ||
124 | } | ||
125 | return 0; | ||
126 | } | ||
127 | |||
128 | static int print(const char *buf, size_t len, int print_to) | ||
129 | { | ||
130 | if (print_to == TD_PRINT_CONSOLE) | ||
131 | TRY(print_to_console(buf, len)); | ||
132 | else if (print_to == TD_PRINT_USER) | ||
133 | TRY(print_to_user(buf, len)); | ||
134 | return 0; | ||
135 | } | ||
136 | |||
137 | /* print_magic will print MAGIC_NUMBER using the | ||
138 | * print function selected by print_to. | ||
139 | */ | ||
140 | static inline ssize_t print_magic(int print_to) | ||
141 | { | ||
142 | print(MAGIC_NUMBER, sizeof(MAGIC_NUMBER), print_to); | ||
143 | return sizeof(MAGIC_NUMBER); | ||
144 | } | ||
145 | |||
146 | static int iter_init(void) | ||
147 | { | ||
148 | int cpu; | ||
149 | |||
150 | /* Make iter point to global ring buffer used in trace. */ | ||
151 | trace_init_global_iter(&iter); | ||
152 | |||
153 | /* Disable tracing */ | ||
154 | for_each_tracing_cpu(cpu) { | ||
155 | atomic_inc(&iter.tr->data[cpu]->disabled); | ||
156 | } | ||
157 | |||
158 | /* Save flags */ | ||
159 | old_trace_flags = trace_flags; | ||
160 | |||
161 | /* Dont look at memory in panic mode. */ | ||
162 | trace_flags &= ~TRACE_ITER_SYM_USEROBJ; | ||
163 | |||
164 | /* Prepare ring buffer iter */ | ||
165 | for_each_tracing_cpu(cpu) { | ||
166 | iter.buffer_iter[cpu] = | ||
167 | ring_buffer_read_prepare(iter.tr->buffer, cpu); | ||
168 | } | ||
169 | ring_buffer_read_prepare_sync(); | ||
170 | for_each_tracing_cpu(cpu) { | ||
171 | ring_buffer_read_start(iter.buffer_iter[cpu]); | ||
172 | tracing_iter_reset(&iter, cpu); | ||
173 | } | ||
174 | return 0; | ||
175 | } | ||
176 | |||
177 | /* iter_next gets the next entry in the ring buffer, ordered by time. | ||
178 | * If there are no more entries, returns 0. | ||
179 | */ | ||
180 | static ssize_t iter_next(void) | ||
181 | { | ||
182 | /* Zero out the iterator's seq */ | ||
183 | memset(&iter.seq, 0, | ||
184 | sizeof(struct trace_iterator) - | ||
185 | offsetof(struct trace_iterator, seq)); | ||
186 | |||
187 | while (!trace_empty(&iter)) { | ||
188 | if (trace_find_next_entry_inc(&iter) == NULL) { | ||
189 | printk(TAG "trace_find_next_entry failed!\n"); | ||
190 | return -EINVAL; | ||
191 | } | ||
192 | |||
193 | /* Copy the ring buffer data to iterator's seq */ | ||
194 | print_trace_line(&iter); | ||
195 | if (iter.seq.len != 0) | ||
196 | return iter.seq.len; | ||
197 | } | ||
198 | return 0; | ||
199 | } | ||
200 | |||
201 | static int iter_deinit(void) | ||
202 | { | ||
203 | int cpu; | ||
204 | /* Enable tracing */ | ||
205 | for_each_tracing_cpu(cpu) { | ||
206 | ring_buffer_read_finish(iter.buffer_iter[cpu]); | ||
207 | } | ||
208 | for_each_tracing_cpu(cpu) { | ||
209 | atomic_dec(&iter.tr->data[cpu]->disabled); | ||
210 | } | ||
211 | |||
212 | /* Restore flags */ | ||
213 | trace_flags = old_trace_flags; | ||
214 | return 0; | ||
215 | } | ||
216 | |||
217 | static int pager_init(void) | ||
218 | { | ||
219 | int cpu; | ||
220 | |||
221 | /* Need to do this to get a pointer to global_trace (iter.tr). | ||
222 | Lame, I know. */ | ||
223 | trace_init_global_iter(&iter); | ||
224 | |||
225 | /* Turn off tracing */ | ||
226 | for_each_tracing_cpu(cpu) { | ||
227 | atomic_inc(&iter.tr->data[cpu]->disabled); | ||
228 | } | ||
229 | |||
230 | memset(&pager, 0, sizeof(pager)); | ||
231 | pager.tr = iter.tr; | ||
232 | pager.len = TD_COMPRESS_CHUNK; | ||
233 | |||
234 | return 0; | ||
235 | } | ||
236 | |||
237 | /* pager_next_cpu moves the pager to the next cpu. | ||
238 | * Returns 0 if pager is done, else 1. | ||
239 | */ | ||
240 | static ssize_t pager_next_cpu(void) | ||
241 | { | ||
242 | if (pager.cpu <= CPU_MAX) { | ||
243 | pager.cpu += 1; | ||
244 | return 1; | ||
245 | } | ||
246 | |||
247 | return 0; | ||
248 | } | ||
249 | |||
250 | /* pager_next gets the next page of data from the ring buffer | ||
251 | * of the current cpu. Returns page size or 0 if no more data. | ||
252 | */ | ||
253 | static ssize_t pager_next(void) | ||
254 | { | ||
255 | int ret; | ||
256 | |||
257 | if (pager.cpu > CPU_MAX) | ||
258 | return 0; | ||
259 | |||
260 | if (!pager.spare) | ||
261 | pager.spare = ring_buffer_alloc_read_page(pager.tr->buffer, pager.cpu); | ||
262 | if (!pager.spare) { | ||
263 | printk(TAG "ring_buffer_alloc_read_page failed!"); | ||
264 | return -ENOMEM; | ||
265 | } | ||
266 | |||
267 | ret = ring_buffer_read_page(pager.tr->buffer, | ||
268 | &pager.spare, | ||
269 | pager.len, | ||
270 | pager.cpu, 0); | ||
271 | if (ret < 0) | ||
272 | return 0; | ||
273 | |||
274 | return PAGE_SIZE; | ||
275 | } | ||
276 | |||
277 | static int pager_deinit(void) | ||
278 | { | ||
279 | int cpu; | ||
280 | if (pager.spare != NULL) | ||
281 | ring_buffer_free_read_page(pager.tr->buffer, pager.spare); | ||
282 | |||
283 | for_each_tracing_cpu(cpu) { | ||
284 | atomic_dec(&iter.tr->data[cpu]->disabled); | ||
285 | } | ||
286 | return 0; | ||
287 | } | ||
288 | |||
289 | /* cmdline_next gets the next saved cmdline from the trace and | ||
290 | * puts it in cmdline_buf. Returns the size of the cmdline, or 0 if empty. | ||
291 | * but will reset itself on a subsequent call. | ||
292 | */ | ||
293 | static ssize_t cmdline_next(void) | ||
294 | { | ||
295 | static int pid; | ||
296 | ssize_t size = 0; | ||
297 | |||
298 | if (pid >= PID_MAX_DEFAULT) | ||
299 | pid = -1; | ||
300 | |||
301 | while (size == 0 && pid < PID_MAX_DEFAULT) { | ||
302 | pid++; | ||
303 | trace_find_cmdline(pid, cmdline_buf); | ||
304 | if (!strncmp(cmdline_buf, "<...>", 5)) | ||
305 | continue; | ||
306 | |||
307 | sprintf(&cmdline_buf[strlen(cmdline_buf)], " %d" | ||
308 | CMDLINE_DELIM, pid); | ||
309 | size = strlen(cmdline_buf); | ||
310 | } | ||
311 | return size; | ||
312 | } | ||
313 | |||
314 | /* comsume_events removes the first 'num' entries from the ring buffer. */ | ||
315 | static int consume_events(size_t num) | ||
316 | { | ||
317 | TRY(iter_init()); | ||
318 | for (; num > 0 && !trace_empty(&iter); num--) { | ||
319 | trace_find_next_entry_inc(&iter); | ||
320 | ring_buffer_consume(iter.tr->buffer, iter.cpu, &iter.ts, | ||
321 | &iter.lost_events); | ||
322 | } | ||
323 | TRY(iter_deinit()); | ||
324 | return 0; | ||
325 | } | ||
326 | |||
327 | static int data_init(void) | ||
328 | { | ||
329 | if (current_format) | ||
330 | TRY(iter_init()); | ||
331 | else | ||
332 | TRY(pager_init()); | ||
333 | return 0; | ||
334 | } | ||
335 | |||
336 | /* data_next will figure out the right 'next' function to | ||
337 | * call and will select the right buffer to pass back | ||
338 | * to compress_next. | ||
339 | * | ||
340 | * iter_next should be used to get data entry-by-entry, ordered | ||
341 | * by time, which is what we need in order to convert it to ascii. | ||
342 | * | ||
343 | * pager_next will return a full page of raw data at a time, one | ||
344 | * CPU at a time. pager_next_cpu must be called to get the next CPU. | ||
345 | * cmdline_next will get the next saved cmdline | ||
346 | */ | ||
347 | static ssize_t data_next(const char **buf) | ||
348 | { | ||
349 | ssize_t size; | ||
350 | |||
351 | if (current_format) { | ||
352 | TRY(size = iter_next()); | ||
353 | *buf = iter.seq.buffer; | ||
354 | } else { | ||
355 | TRY(size = pager_next()); | ||
356 | *buf = pager.spare; | ||
357 | if (size == 0) { | ||
358 | if (pager_next_cpu()) { | ||
359 | size = sizeof(CPU_DELIM); | ||
360 | *buf = CPU_DELIM; | ||
361 | } else { | ||
362 | TRY(size = cmdline_next()); | ||
363 | *buf = cmdline_buf; | ||
364 | } | ||
365 | } | ||
366 | } | ||
367 | return size; | ||
368 | } | ||
369 | |||
370 | static int data_deinit(void) | ||
371 | { | ||
372 | if (current_format) | ||
373 | TRY(iter_deinit()); | ||
374 | else | ||
375 | TRY(pager_deinit()); | ||
376 | return 0; | ||
377 | } | ||
378 | |||
379 | static int compress_init(void) | ||
380 | { | ||
381 | int workspacesize, ret; | ||
382 | |||
383 | compress_done = 0; | ||
384 | flush = Z_NO_FLUSH; | ||
385 | stream.data_type = current_format ? Z_ASCII : Z_BINARY; | ||
386 | workspacesize = zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL); | ||
387 | stream.workspace = vmalloc(workspacesize); | ||
388 | if (!stream.workspace) { | ||
389 | printk(TAG "Could not allocate " | ||
390 | "enough memory for zlib!\n"); | ||
391 | return -ENOMEM; | ||
392 | } | ||
393 | memset(stream.workspace, 0, workspacesize); | ||
394 | |||
395 | ret = zlib_deflateInit(&stream, compress_level); | ||
396 | if (ret != Z_OK) { | ||
397 | printk(TAG "%s\n", stream.msg); | ||
398 | return ret; | ||
399 | } | ||
400 | stream.avail_in = 0; | ||
401 | stream.avail_out = 0; | ||
402 | TRY(data_init()); | ||
403 | return 0; | ||
404 | } | ||
405 | |||
406 | /* compress_next will compress up to min(max_out, TD_COMPRESS_CHUNK) bytes | ||
407 | * of data into the output buffer. It gets the data by calling data_next. | ||
408 | * It will return the most data it possibly can. If it returns 0, then | ||
409 | * there is no more data. | ||
410 | * | ||
411 | * By the way that zlib works, each call to zlib_deflate will possibly | ||
412 | * consume up to avail_in bytes from next_in, and will fill up to | ||
413 | * avail_out bytes in next_out. Once flush == Z_FINISH, it can not take | ||
414 | * any more input. It will output until it is finished, and will return | ||
415 | * Z_STREAM_END. | ||
416 | */ | ||
417 | static ssize_t compress_next(size_t max_out) | ||
418 | { | ||
419 | ssize_t ret; | ||
420 | max_out = min(max_out, (size_t)TD_COMPRESS_CHUNK); | ||
421 | stream.next_out = out_buf; | ||
422 | stream.avail_out = max_out; | ||
423 | while (stream.avail_out > 0 && !compress_done) { | ||
424 | if (stream.avail_in == 0 && flush != Z_FINISH) { | ||
425 | TRY(stream.avail_in = | ||
426 | data_next((const char **)&stream.next_in)); | ||
427 | flush = (stream.avail_in == 0) ? Z_FINISH : Z_NO_FLUSH; | ||
428 | } | ||
429 | if (stream.next_in != NULL) { | ||
430 | TRYM((ret = zlib_deflate(&stream, flush)), | ||
431 | "zlib: %s\n", stream.msg); | ||
432 | compress_done = (ret == Z_STREAM_END); | ||
433 | } | ||
434 | } | ||
435 | ret = max_out - stream.avail_out; | ||
436 | return ret; | ||
437 | } | ||
438 | |||
439 | static int compress_deinit(void) | ||
440 | { | ||
441 | TRY(data_deinit()); | ||
442 | |||
443 | zlib_deflateEnd(&stream); | ||
444 | vfree(stream.workspace); | ||
445 | |||
446 | /* TODO: remove */ | ||
447 | printk(TAG "Total in: %ld\n", stream.total_in); | ||
448 | printk(TAG "Total out: %ld\n", stream.total_out); | ||
449 | return stream.total_out; | ||
450 | } | ||
451 | |||
452 | static int compress_reset(void) | ||
453 | { | ||
454 | TRY(compress_deinit()); | ||
455 | TRY(compress_init()); | ||
456 | return 0; | ||
457 | } | ||
458 | |||
459 | /* tracedump_init initializes all tracedump components. | ||
460 | * Call this before tracedump_next | ||
461 | */ | ||
462 | int tracedump_init(void) | ||
463 | { | ||
464 | TRY(compress_init()); | ||
465 | return 0; | ||
466 | } | ||
467 | |||
468 | /* tracedump_next will print up to max_out data from the tracing ring | ||
469 | * buffers using the print function selected by print_to. The data is | ||
470 | * compressed using zlib. | ||
471 | * | ||
472 | * The output type of the data is specified by the format_ascii module | ||
473 | * parameter. If format_ascii == 1, human-readable data will be output. | ||
474 | * Otherwise, it will output raw data from the ring buffer in cpu order, | ||
475 | * followed by the saved_cmdlines data. | ||
476 | */ | ||
477 | ssize_t tracedump_next(size_t max_out, int print_to) | ||
478 | { | ||
479 | ssize_t size; | ||
480 | TRY(size = compress_next(max_out)); | ||
481 | print(out_buf, size, print_to); | ||
482 | return size; | ||
483 | } | ||
484 | |||
485 | /* tracedump_all will print all data in the tracing ring buffers using | ||
486 | * the print function selected by print_to. The data is compressed using | ||
487 | * zlib, and is surrounded by MAGIC_NUMBER. | ||
488 | * | ||
489 | * The output type of the data is specified by the format_ascii module | ||
490 | * parameter. If format_ascii == 1, human-readable data will be output. | ||
491 | * Otherwise, it will output raw data from the ring buffer in cpu order, | ||
492 | * followed by the saved_cmdlines data. | ||
493 | */ | ||
494 | ssize_t tracedump_all(int print_to) | ||
495 | { | ||
496 | ssize_t ret, size = 0; | ||
497 | TRY(size += print_magic(print_to)); | ||
498 | |||
499 | do { | ||
500 | /* Here the size used doesn't really matter, | ||
501 | * since we're dumping everything. */ | ||
502 | TRY(ret = tracedump_next(0xFFFFFFFF, print_to)); | ||
503 | size += ret; | ||
504 | } while (ret > 0); | ||
505 | |||
506 | TRY(size += print_magic(print_to)); | ||
507 | |||
508 | return size; | ||
509 | } | ||
510 | |||
511 | /* tracedump_deinit deinitializes all tracedump components. | ||
512 | * This must be called, even on error. | ||
513 | */ | ||
514 | int tracedump_deinit(void) | ||
515 | { | ||
516 | TRY(compress_deinit()); | ||
517 | return 0; | ||
518 | } | ||
519 | |||
520 | /* tracedump_reset reinitializes all tracedump components. */ | ||
521 | int tracedump_reset(void) | ||
522 | { | ||
523 | TRY(compress_reset()); | ||
524 | return 0; | ||
525 | } | ||
526 | |||
527 | |||
528 | |||
529 | /* tracedump_open opens the tracedump file for reading. */ | ||
530 | static int tracedump_open(struct inode *inode, struct file *file) | ||
531 | { | ||
532 | int ret; | ||
533 | mutex_lock(&tracedump_proc_lock); | ||
534 | current_format = format_ascii; | ||
535 | ret = tracedump_init(); | ||
536 | if (ret < 0) | ||
537 | goto err; | ||
538 | |||
539 | ret = nonseekable_open(inode, file); | ||
540 | if (ret < 0) | ||
541 | goto err; | ||
542 | return ret; | ||
543 | |||
544 | err: | ||
545 | mutex_unlock(&tracedump_proc_lock); | ||
546 | return ret; | ||
547 | } | ||
548 | |||
549 | /* tracedump_read will reads data from tracedump_next and prints | ||
550 | * it to userspace. It will surround the data with MAGIC_NUMBER. | ||
551 | */ | ||
552 | static ssize_t tracedump_read(struct file *file, char __user *buf, | ||
553 | size_t len, loff_t *offset) | ||
554 | { | ||
555 | static int done; | ||
556 | ssize_t size = 0; | ||
557 | |||
558 | pager.ubuf = buf; | ||
559 | |||
560 | if (*offset == 0) { | ||
561 | done = 0; | ||
562 | TRY(size = print_magic(TD_PRINT_USER)); | ||
563 | } else if (!done) { | ||
564 | TRY(size = tracedump_next(len, TD_PRINT_USER)); | ||
565 | if (size == 0) { | ||
566 | TRY(size = print_magic(TD_PRINT_USER)); | ||
567 | done = 1; | ||
568 | } | ||
569 | } | ||
570 | |||
571 | *offset += size; | ||
572 | |||
573 | return size; | ||
574 | } | ||
575 | |||
576 | static int tracedump_release(struct inode *inode, struct file *file) | ||
577 | { | ||
578 | int ret; | ||
579 | ret = tracedump_deinit(); | ||
580 | mutex_unlock(&tracedump_proc_lock); | ||
581 | return ret; | ||
582 | } | ||
583 | |||
584 | /* tracedump_dump dumps all tracing data from the tracing ring buffers | ||
585 | * to all consoles. For details about the output format, see | ||
586 | * tracedump_all. | ||
587 | |||
588 | * At most max_out bytes are dumped. To accomplish this, | ||
589 | * tracedump_dump calls tracedump_all several times without writing the data, | ||
590 | * each time tossing out old data until it reaches its goal. | ||
591 | * | ||
592 | * Note: dumping raw pages currently does NOT follow the size limit. | ||
593 | */ | ||
594 | |||
595 | int tracedump_dump(size_t max_out) | ||
596 | { | ||
597 | ssize_t size; | ||
598 | size_t consume; | ||
599 | |||
600 | printk(TAG "\n"); | ||
601 | |||
602 | tracedump_init(); | ||
603 | |||
604 | if (format_ascii) { | ||
605 | size = tracedump_all(TD_NO_PRINT); | ||
606 | if (size < 0) { | ||
607 | printk(TAG "failed to dump\n"); | ||
608 | goto out; | ||
609 | } | ||
610 | while (size > max_out) { | ||
611 | TRY(tracedump_deinit()); | ||
612 | /* Events take more or less 60 ascii bytes each, | ||
613 | not counting compression */ | ||
614 | consume = TD_MIN_CONSUME + (size - max_out) / | ||
615 | (60 / (compress_level + 1)); | ||
616 | TRY(consume_events(consume)); | ||
617 | TRY(tracedump_init()); | ||
618 | size = tracedump_all(TD_NO_PRINT); | ||
619 | if (size < 0) { | ||
620 | printk(TAG "failed to dump\n"); | ||
621 | goto out; | ||
622 | } | ||
623 | } | ||
624 | |||
625 | TRY(tracedump_reset()); | ||
626 | } | ||
627 | size = tracedump_all(TD_PRINT_CONSOLE); | ||
628 | if (size < 0) { | ||
629 | printk(TAG "failed to dump\n"); | ||
630 | goto out; | ||
631 | } | ||
632 | |||
633 | out: | ||
634 | tracedump_deinit(); | ||
635 | printk(KERN_INFO "\n" TAG " end\n"); | ||
636 | return size; | ||
637 | } | ||
638 | |||
639 | static const struct file_operations tracedump_fops = { | ||
640 | .owner = THIS_MODULE, | ||
641 | .open = tracedump_open, | ||
642 | .read = tracedump_read, | ||
643 | .release = tracedump_release, | ||
644 | }; | ||
645 | |||
646 | #ifdef CONFIG_TRACEDUMP_PANIC | ||
647 | static int tracedump_panic_handler(struct notifier_block *this, | ||
648 | unsigned long event, void *unused) | ||
649 | { | ||
650 | tracedump_dump(panic_size); | ||
651 | return 0; | ||
652 | } | ||
653 | |||
654 | static struct notifier_block tracedump_panic_notifier = { | ||
655 | .notifier_call = tracedump_panic_handler, | ||
656 | .next = NULL, | ||
657 | .priority = 150 /* priority: INT_MAX >= x >= 0 */ | ||
658 | }; | ||
659 | #endif | ||
660 | |||
661 | static int __init tracedump_initcall(void) | ||
662 | { | ||
663 | #ifdef CONFIG_TRACEDUMP_PROCFS | ||
664 | struct proc_dir_entry *entry; | ||
665 | |||
666 | /* Create a procfs file for easy dumping */ | ||
667 | entry = create_proc_entry("tracedump", S_IFREG | S_IRUGO, NULL); | ||
668 | if (!entry) | ||
669 | printk(TAG "failed to create proc entry\n"); | ||
670 | else | ||
671 | entry->proc_fops = &tracedump_fops; | ||
672 | #endif | ||
673 | |||
674 | #ifdef CONFIG_TRACEDUMP_PANIC | ||
675 | /* Automatically dump to console on a kernel panic */ | ||
676 | atomic_notifier_chain_register(&panic_notifier_list, | ||
677 | &tracedump_panic_notifier); | ||
678 | #endif | ||
679 | return 0; | ||
680 | } | ||
681 | |||
682 | early_initcall(tracedump_initcall); | ||
diff --git a/kernel/trace/tracelevel.c b/kernel/trace/tracelevel.c new file mode 100644 index 00000000000..9f8b8eedbb5 --- /dev/null +++ b/kernel/trace/tracelevel.c | |||
@@ -0,0 +1,142 @@ | |||
1 | /* | ||
2 | * kernel/trace/tracelevel.c | ||
3 | * | ||
4 | * Copyright (c) 2011, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License along with | ||
16 | * this program; if not, write to the Free Software Foundation, Inc., | ||
17 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
18 | * | ||
19 | */ | ||
20 | |||
21 | #include <linux/ftrace_event.h> | ||
22 | #include <linux/list.h> | ||
23 | #include <linux/moduleparam.h> | ||
24 | #include <linux/mutex.h> | ||
25 | #include <linux/tracelevel.h> | ||
26 | #include <linux/vmalloc.h> | ||
27 | |||
28 | #include "trace.h" | ||
29 | |||
30 | #define TAG KERN_ERR "tracelevel: " | ||
31 | |||
32 | struct tracelevel_record { | ||
33 | struct list_head list; | ||
34 | char *name; | ||
35 | int level; | ||
36 | }; | ||
37 | |||
38 | static LIST_HEAD(tracelevel_list); | ||
39 | |||
40 | static bool started; | ||
41 | static unsigned int tracelevel_level = TRACELEVEL_DEFAULT; | ||
42 | |||
43 | static DEFINE_MUTEX(tracelevel_record_lock); | ||
44 | |||
45 | /* tracelevel_set_event sets a single event if set = 1, or | ||
46 | * clears an event if set = 0. | ||
47 | */ | ||
48 | static int tracelevel_set_event(struct tracelevel_record *evt, bool set) | ||
49 | { | ||
50 | if (trace_set_clr_event(NULL, evt->name, set) < 0) { | ||
51 | printk(TAG "failed to set event %s\n", evt->name); | ||
52 | return -EINVAL; | ||
53 | } | ||
54 | return 0; | ||
55 | } | ||
56 | |||
57 | /* Registers an event. If possible, it also sets it. | ||
58 | * If not, we'll set it in tracelevel_init. | ||
59 | */ | ||
60 | int __tracelevel_register(char *name, unsigned int level) | ||
61 | { | ||
62 | struct tracelevel_record *evt = (struct tracelevel_record *) | ||
63 | vmalloc(sizeof(struct tracelevel_record)); | ||
64 | if (!evt) { | ||
65 | printk(TAG "failed to allocate tracelevel_record for %s\n", | ||
66 | name); | ||
67 | return -ENOMEM; | ||
68 | } | ||
69 | |||
70 | evt->name = name; | ||
71 | evt->level = level; | ||
72 | |||
73 | mutex_lock(&tracelevel_record_lock); | ||
74 | list_add(&evt->list, &tracelevel_list); | ||
75 | mutex_unlock(&tracelevel_record_lock); | ||
76 | |||
77 | if (level >= tracelevel_level && started) | ||
78 | tracelevel_set_event(evt, 1); | ||
79 | return 0; | ||
80 | } | ||
81 | |||
82 | /* tracelevel_set_level sets the global level, clears events | ||
83 | * lower than that level, and enables events greater or equal. | ||
84 | */ | ||
85 | int tracelevel_set_level(int level) | ||
86 | { | ||
87 | struct tracelevel_record *evt = NULL; | ||
88 | |||
89 | if (level < 0 || level > TRACELEVEL_MAX) | ||
90 | return -EINVAL; | ||
91 | tracelevel_level = level; | ||
92 | |||
93 | mutex_lock(&tracelevel_record_lock); | ||
94 | list_for_each_entry(evt, &tracelevel_list, list) { | ||
95 | if (evt->level >= level) | ||
96 | tracelevel_set_event(evt, 1); | ||
97 | else | ||
98 | tracelevel_set_event(evt, 0); | ||
99 | } | ||
100 | mutex_unlock(&tracelevel_record_lock); | ||
101 | return 0; | ||
102 | } | ||
103 | |||
104 | static int param_set_level(const char *val, const struct kernel_param *kp) | ||
105 | { | ||
106 | int level, ret; | ||
107 | ret = strict_strtol(val, 0, &level); | ||
108 | if (ret < 0) | ||
109 | return ret; | ||
110 | return tracelevel_set_level(level); | ||
111 | } | ||
112 | |||
113 | static int param_get_level(char *buffer, const struct kernel_param *kp) | ||
114 | { | ||
115 | return param_get_int(buffer, kp); | ||
116 | } | ||
117 | |||
118 | static struct kernel_param_ops tracelevel_level_ops = { | ||
119 | .set = param_set_level, | ||
120 | .get = param_get_level | ||
121 | }; | ||
122 | |||
123 | module_param_cb(level, &tracelevel_level_ops, &tracelevel_level, 0644); | ||
124 | |||
125 | /* Turn on the tracing that has been registered thus far. */ | ||
126 | static int __init tracelevel_init(void) | ||
127 | { | ||
128 | int ret; | ||
129 | started = true; | ||
130 | |||
131 | /* Ring buffer is initialize to 1 page until the user sets a tracer. | ||
132 | * Since we're doing this manually, we need to ask for expanded buffer. | ||
133 | */ | ||
134 | ret = tracing_update_buffers(); | ||
135 | if (ret < 0) | ||
136 | return ret; | ||
137 | |||
138 | return tracelevel_set_level(tracelevel_level); | ||
139 | } | ||
140 | |||
141 | /* Tracing mechanism is set up during fs_initcall. */ | ||
142 | fs_initcall_sync(tracelevel_init); | ||