diff options
author | Andrea Bastoni <bastoni@cs.unc.edu> | 2010-05-30 19:16:45 -0400 |
---|---|---|
committer | Andrea Bastoni <bastoni@cs.unc.edu> | 2010-05-30 19:16:45 -0400 |
commit | ada47b5fe13d89735805b566185f4885f5a3f750 (patch) | |
tree | 644b88f8a71896307d71438e9b3af49126ffb22b /kernel/hw_breakpoint.c | |
parent | 43e98717ad40a4ae64545b5ba047c7b86aa44f4f (diff) | |
parent | 3280f21d43ee541f97f8cda5792150d2dbec20d5 (diff) |
Merge branch 'wip-2.6.34' into old-private-masterarchived-private-master
Diffstat (limited to 'kernel/hw_breakpoint.c')
-rw-r--r-- | kernel/hw_breakpoint.c | 492 |
1 files changed, 492 insertions, 0 deletions
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c new file mode 100644 index 000000000000..03808ed342a6 --- /dev/null +++ b/kernel/hw_breakpoint.c | |||
@@ -0,0 +1,492 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or modify | ||
3 | * it under the terms of the GNU General Public License as published by | ||
4 | * the Free Software Foundation; either version 2 of the License, or | ||
5 | * (at your option) any later version. | ||
6 | * | ||
7 | * This program is distributed in the hope that it will be useful, | ||
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
10 | * GNU General Public License for more details. | ||
11 | * | ||
12 | * You should have received a copy of the GNU General Public License | ||
13 | * along with this program; if not, write to the Free Software | ||
14 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
15 | * | ||
16 | * Copyright (C) 2007 Alan Stern | ||
17 | * Copyright (C) IBM Corporation, 2009 | ||
18 | * Copyright (C) 2009, Frederic Weisbecker <fweisbec@gmail.com> | ||
19 | * | ||
20 | * Thanks to Ingo Molnar for his many suggestions. | ||
21 | * | ||
22 | * Authors: Alan Stern <stern@rowland.harvard.edu> | ||
23 | * K.Prasad <prasad@linux.vnet.ibm.com> | ||
24 | * Frederic Weisbecker <fweisbec@gmail.com> | ||
25 | */ | ||
26 | |||
27 | /* | ||
28 | * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility, | ||
29 | * using the CPU's debug registers. | ||
30 | * This file contains the arch-independent routines. | ||
31 | */ | ||
32 | |||
33 | #include <linux/irqflags.h> | ||
34 | #include <linux/kallsyms.h> | ||
35 | #include <linux/notifier.h> | ||
36 | #include <linux/kprobes.h> | ||
37 | #include <linux/kdebug.h> | ||
38 | #include <linux/kernel.h> | ||
39 | #include <linux/module.h> | ||
40 | #include <linux/percpu.h> | ||
41 | #include <linux/sched.h> | ||
42 | #include <linux/init.h> | ||
43 | #include <linux/cpu.h> | ||
44 | #include <linux/smp.h> | ||
45 | |||
46 | #include <linux/hw_breakpoint.h> | ||
47 | |||
48 | /* | ||
49 | * Constraints data | ||
50 | */ | ||
51 | |||
52 | /* Number of pinned cpu breakpoints in a cpu */ | ||
53 | static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned); | ||
54 | |||
55 | /* Number of pinned task breakpoints in a cpu */ | ||
56 | static DEFINE_PER_CPU(unsigned int, nr_task_bp_pinned[HBP_NUM]); | ||
57 | |||
58 | /* Number of non-pinned cpu/task breakpoints in a cpu */ | ||
59 | static DEFINE_PER_CPU(unsigned int, nr_bp_flexible); | ||
60 | |||
61 | /* Gather the number of total pinned and un-pinned bp in a cpuset */ | ||
62 | struct bp_busy_slots { | ||
63 | unsigned int pinned; | ||
64 | unsigned int flexible; | ||
65 | }; | ||
66 | |||
67 | /* Serialize accesses to the above constraints */ | ||
68 | static DEFINE_MUTEX(nr_bp_mutex); | ||
69 | |||
70 | /* | ||
71 | * Report the maximum number of pinned breakpoints a task | ||
72 | * have in this cpu | ||
73 | */ | ||
74 | static unsigned int max_task_bp_pinned(int cpu) | ||
75 | { | ||
76 | int i; | ||
77 | unsigned int *tsk_pinned = per_cpu(nr_task_bp_pinned, cpu); | ||
78 | |||
79 | for (i = HBP_NUM -1; i >= 0; i--) { | ||
80 | if (tsk_pinned[i] > 0) | ||
81 | return i + 1; | ||
82 | } | ||
83 | |||
84 | return 0; | ||
85 | } | ||
86 | |||
87 | static int task_bp_pinned(struct task_struct *tsk) | ||
88 | { | ||
89 | struct perf_event_context *ctx = tsk->perf_event_ctxp; | ||
90 | struct list_head *list; | ||
91 | struct perf_event *bp; | ||
92 | unsigned long flags; | ||
93 | int count = 0; | ||
94 | |||
95 | if (WARN_ONCE(!ctx, "No perf context for this task")) | ||
96 | return 0; | ||
97 | |||
98 | list = &ctx->event_list; | ||
99 | |||
100 | raw_spin_lock_irqsave(&ctx->lock, flags); | ||
101 | |||
102 | /* | ||
103 | * The current breakpoint counter is not included in the list | ||
104 | * at the open() callback time | ||
105 | */ | ||
106 | list_for_each_entry(bp, list, event_entry) { | ||
107 | if (bp->attr.type == PERF_TYPE_BREAKPOINT) | ||
108 | count++; | ||
109 | } | ||
110 | |||
111 | raw_spin_unlock_irqrestore(&ctx->lock, flags); | ||
112 | |||
113 | return count; | ||
114 | } | ||
115 | |||
116 | /* | ||
117 | * Report the number of pinned/un-pinned breakpoints we have in | ||
118 | * a given cpu (cpu > -1) or in all of them (cpu = -1). | ||
119 | */ | ||
120 | static void | ||
121 | fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp) | ||
122 | { | ||
123 | int cpu = bp->cpu; | ||
124 | struct task_struct *tsk = bp->ctx->task; | ||
125 | |||
126 | if (cpu >= 0) { | ||
127 | slots->pinned = per_cpu(nr_cpu_bp_pinned, cpu); | ||
128 | if (!tsk) | ||
129 | slots->pinned += max_task_bp_pinned(cpu); | ||
130 | else | ||
131 | slots->pinned += task_bp_pinned(tsk); | ||
132 | slots->flexible = per_cpu(nr_bp_flexible, cpu); | ||
133 | |||
134 | return; | ||
135 | } | ||
136 | |||
137 | for_each_online_cpu(cpu) { | ||
138 | unsigned int nr; | ||
139 | |||
140 | nr = per_cpu(nr_cpu_bp_pinned, cpu); | ||
141 | if (!tsk) | ||
142 | nr += max_task_bp_pinned(cpu); | ||
143 | else | ||
144 | nr += task_bp_pinned(tsk); | ||
145 | |||
146 | if (nr > slots->pinned) | ||
147 | slots->pinned = nr; | ||
148 | |||
149 | nr = per_cpu(nr_bp_flexible, cpu); | ||
150 | |||
151 | if (nr > slots->flexible) | ||
152 | slots->flexible = nr; | ||
153 | } | ||
154 | } | ||
155 | |||
156 | /* | ||
157 | * Add a pinned breakpoint for the given task in our constraint table | ||
158 | */ | ||
159 | static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable) | ||
160 | { | ||
161 | unsigned int *tsk_pinned; | ||
162 | int count = 0; | ||
163 | |||
164 | count = task_bp_pinned(tsk); | ||
165 | |||
166 | tsk_pinned = per_cpu(nr_task_bp_pinned, cpu); | ||
167 | if (enable) { | ||
168 | tsk_pinned[count]++; | ||
169 | if (count > 0) | ||
170 | tsk_pinned[count-1]--; | ||
171 | } else { | ||
172 | tsk_pinned[count]--; | ||
173 | if (count > 0) | ||
174 | tsk_pinned[count-1]++; | ||
175 | } | ||
176 | } | ||
177 | |||
178 | /* | ||
179 | * Add/remove the given breakpoint in our constraint table | ||
180 | */ | ||
181 | static void toggle_bp_slot(struct perf_event *bp, bool enable) | ||
182 | { | ||
183 | int cpu = bp->cpu; | ||
184 | struct task_struct *tsk = bp->ctx->task; | ||
185 | |||
186 | /* Pinned counter task profiling */ | ||
187 | if (tsk) { | ||
188 | if (cpu >= 0) { | ||
189 | toggle_bp_task_slot(tsk, cpu, enable); | ||
190 | return; | ||
191 | } | ||
192 | |||
193 | for_each_online_cpu(cpu) | ||
194 | toggle_bp_task_slot(tsk, cpu, enable); | ||
195 | return; | ||
196 | } | ||
197 | |||
198 | /* Pinned counter cpu profiling */ | ||
199 | if (enable) | ||
200 | per_cpu(nr_cpu_bp_pinned, bp->cpu)++; | ||
201 | else | ||
202 | per_cpu(nr_cpu_bp_pinned, bp->cpu)--; | ||
203 | } | ||
204 | |||
205 | /* | ||
206 | * Contraints to check before allowing this new breakpoint counter: | ||
207 | * | ||
208 | * == Non-pinned counter == (Considered as pinned for now) | ||
209 | * | ||
210 | * - If attached to a single cpu, check: | ||
211 | * | ||
212 | * (per_cpu(nr_bp_flexible, cpu) || (per_cpu(nr_cpu_bp_pinned, cpu) | ||
213 | * + max(per_cpu(nr_task_bp_pinned, cpu)))) < HBP_NUM | ||
214 | * | ||
215 | * -> If there are already non-pinned counters in this cpu, it means | ||
216 | * there is already a free slot for them. | ||
217 | * Otherwise, we check that the maximum number of per task | ||
218 | * breakpoints (for this cpu) plus the number of per cpu breakpoint | ||
219 | * (for this cpu) doesn't cover every registers. | ||
220 | * | ||
221 | * - If attached to every cpus, check: | ||
222 | * | ||
223 | * (per_cpu(nr_bp_flexible, *) || (max(per_cpu(nr_cpu_bp_pinned, *)) | ||
224 | * + max(per_cpu(nr_task_bp_pinned, *)))) < HBP_NUM | ||
225 | * | ||
226 | * -> This is roughly the same, except we check the number of per cpu | ||
227 | * bp for every cpu and we keep the max one. Same for the per tasks | ||
228 | * breakpoints. | ||
229 | * | ||
230 | * | ||
231 | * == Pinned counter == | ||
232 | * | ||
233 | * - If attached to a single cpu, check: | ||
234 | * | ||
235 | * ((per_cpu(nr_bp_flexible, cpu) > 1) + per_cpu(nr_cpu_bp_pinned, cpu) | ||
236 | * + max(per_cpu(nr_task_bp_pinned, cpu))) < HBP_NUM | ||
237 | * | ||
238 | * -> Same checks as before. But now the nr_bp_flexible, if any, must keep | ||
239 | * one register at least (or they will never be fed). | ||
240 | * | ||
241 | * - If attached to every cpus, check: | ||
242 | * | ||
243 | * ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *)) | ||
244 | * + max(per_cpu(nr_task_bp_pinned, *))) < HBP_NUM | ||
245 | */ | ||
246 | static int __reserve_bp_slot(struct perf_event *bp) | ||
247 | { | ||
248 | struct bp_busy_slots slots = {0}; | ||
249 | |||
250 | fetch_bp_busy_slots(&slots, bp); | ||
251 | |||
252 | /* Flexible counters need to keep at least one slot */ | ||
253 | if (slots.pinned + (!!slots.flexible) == HBP_NUM) | ||
254 | return -ENOSPC; | ||
255 | |||
256 | toggle_bp_slot(bp, true); | ||
257 | |||
258 | return 0; | ||
259 | } | ||
260 | |||
261 | int reserve_bp_slot(struct perf_event *bp) | ||
262 | { | ||
263 | int ret; | ||
264 | |||
265 | mutex_lock(&nr_bp_mutex); | ||
266 | |||
267 | ret = __reserve_bp_slot(bp); | ||
268 | |||
269 | mutex_unlock(&nr_bp_mutex); | ||
270 | |||
271 | return ret; | ||
272 | } | ||
273 | |||
274 | static void __release_bp_slot(struct perf_event *bp) | ||
275 | { | ||
276 | toggle_bp_slot(bp, false); | ||
277 | } | ||
278 | |||
279 | void release_bp_slot(struct perf_event *bp) | ||
280 | { | ||
281 | mutex_lock(&nr_bp_mutex); | ||
282 | |||
283 | __release_bp_slot(bp); | ||
284 | |||
285 | mutex_unlock(&nr_bp_mutex); | ||
286 | } | ||
287 | |||
288 | /* | ||
289 | * Allow the kernel debugger to reserve breakpoint slots without | ||
290 | * taking a lock using the dbg_* variant of for the reserve and | ||
291 | * release breakpoint slots. | ||
292 | */ | ||
293 | int dbg_reserve_bp_slot(struct perf_event *bp) | ||
294 | { | ||
295 | if (mutex_is_locked(&nr_bp_mutex)) | ||
296 | return -1; | ||
297 | |||
298 | return __reserve_bp_slot(bp); | ||
299 | } | ||
300 | |||
301 | int dbg_release_bp_slot(struct perf_event *bp) | ||
302 | { | ||
303 | if (mutex_is_locked(&nr_bp_mutex)) | ||
304 | return -1; | ||
305 | |||
306 | __release_bp_slot(bp); | ||
307 | |||
308 | return 0; | ||
309 | } | ||
310 | |||
311 | int register_perf_hw_breakpoint(struct perf_event *bp) | ||
312 | { | ||
313 | int ret; | ||
314 | |||
315 | ret = reserve_bp_slot(bp); | ||
316 | if (ret) | ||
317 | return ret; | ||
318 | |||
319 | /* | ||
320 | * Ptrace breakpoints can be temporary perf events only | ||
321 | * meant to reserve a slot. In this case, it is created disabled and | ||
322 | * we don't want to check the params right now (as we put a null addr) | ||
323 | * But perf tools create events as disabled and we want to check | ||
324 | * the params for them. | ||
325 | * This is a quick hack that will be removed soon, once we remove | ||
326 | * the tmp breakpoints from ptrace | ||
327 | */ | ||
328 | if (!bp->attr.disabled || !bp->overflow_handler) | ||
329 | ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task); | ||
330 | |||
331 | /* if arch_validate_hwbkpt_settings() fails then release bp slot */ | ||
332 | if (ret) | ||
333 | release_bp_slot(bp); | ||
334 | |||
335 | return ret; | ||
336 | } | ||
337 | |||
338 | /** | ||
339 | * register_user_hw_breakpoint - register a hardware breakpoint for user space | ||
340 | * @attr: breakpoint attributes | ||
341 | * @triggered: callback to trigger when we hit the breakpoint | ||
342 | * @tsk: pointer to 'task_struct' of the process to which the address belongs | ||
343 | */ | ||
344 | struct perf_event * | ||
345 | register_user_hw_breakpoint(struct perf_event_attr *attr, | ||
346 | perf_overflow_handler_t triggered, | ||
347 | struct task_struct *tsk) | ||
348 | { | ||
349 | return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered); | ||
350 | } | ||
351 | EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); | ||
352 | |||
353 | /** | ||
354 | * modify_user_hw_breakpoint - modify a user-space hardware breakpoint | ||
355 | * @bp: the breakpoint structure to modify | ||
356 | * @attr: new breakpoint attributes | ||
357 | * @triggered: callback to trigger when we hit the breakpoint | ||
358 | * @tsk: pointer to 'task_struct' of the process to which the address belongs | ||
359 | */ | ||
360 | int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr) | ||
361 | { | ||
362 | u64 old_addr = bp->attr.bp_addr; | ||
363 | u64 old_len = bp->attr.bp_len; | ||
364 | int old_type = bp->attr.bp_type; | ||
365 | int err = 0; | ||
366 | |||
367 | perf_event_disable(bp); | ||
368 | |||
369 | bp->attr.bp_addr = attr->bp_addr; | ||
370 | bp->attr.bp_type = attr->bp_type; | ||
371 | bp->attr.bp_len = attr->bp_len; | ||
372 | |||
373 | if (attr->disabled) | ||
374 | goto end; | ||
375 | |||
376 | err = arch_validate_hwbkpt_settings(bp, bp->ctx->task); | ||
377 | if (!err) | ||
378 | perf_event_enable(bp); | ||
379 | |||
380 | if (err) { | ||
381 | bp->attr.bp_addr = old_addr; | ||
382 | bp->attr.bp_type = old_type; | ||
383 | bp->attr.bp_len = old_len; | ||
384 | if (!bp->attr.disabled) | ||
385 | perf_event_enable(bp); | ||
386 | |||
387 | return err; | ||
388 | } | ||
389 | |||
390 | end: | ||
391 | bp->attr.disabled = attr->disabled; | ||
392 | |||
393 | return 0; | ||
394 | } | ||
395 | EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint); | ||
396 | |||
397 | /** | ||
398 | * unregister_hw_breakpoint - unregister a user-space hardware breakpoint | ||
399 | * @bp: the breakpoint structure to unregister | ||
400 | */ | ||
401 | void unregister_hw_breakpoint(struct perf_event *bp) | ||
402 | { | ||
403 | if (!bp) | ||
404 | return; | ||
405 | perf_event_release_kernel(bp); | ||
406 | } | ||
407 | EXPORT_SYMBOL_GPL(unregister_hw_breakpoint); | ||
408 | |||
409 | /** | ||
410 | * register_wide_hw_breakpoint - register a wide breakpoint in the kernel | ||
411 | * @attr: breakpoint attributes | ||
412 | * @triggered: callback to trigger when we hit the breakpoint | ||
413 | * | ||
414 | * @return a set of per_cpu pointers to perf events | ||
415 | */ | ||
416 | struct perf_event * __percpu * | ||
417 | register_wide_hw_breakpoint(struct perf_event_attr *attr, | ||
418 | perf_overflow_handler_t triggered) | ||
419 | { | ||
420 | struct perf_event * __percpu *cpu_events, **pevent, *bp; | ||
421 | long err; | ||
422 | int cpu; | ||
423 | |||
424 | cpu_events = alloc_percpu(typeof(*cpu_events)); | ||
425 | if (!cpu_events) | ||
426 | return (void __percpu __force *)ERR_PTR(-ENOMEM); | ||
427 | |||
428 | get_online_cpus(); | ||
429 | for_each_online_cpu(cpu) { | ||
430 | pevent = per_cpu_ptr(cpu_events, cpu); | ||
431 | bp = perf_event_create_kernel_counter(attr, cpu, -1, triggered); | ||
432 | |||
433 | *pevent = bp; | ||
434 | |||
435 | if (IS_ERR(bp)) { | ||
436 | err = PTR_ERR(bp); | ||
437 | goto fail; | ||
438 | } | ||
439 | } | ||
440 | put_online_cpus(); | ||
441 | |||
442 | return cpu_events; | ||
443 | |||
444 | fail: | ||
445 | for_each_online_cpu(cpu) { | ||
446 | pevent = per_cpu_ptr(cpu_events, cpu); | ||
447 | if (IS_ERR(*pevent)) | ||
448 | break; | ||
449 | unregister_hw_breakpoint(*pevent); | ||
450 | } | ||
451 | put_online_cpus(); | ||
452 | |||
453 | free_percpu(cpu_events); | ||
454 | return (void __percpu __force *)ERR_PTR(err); | ||
455 | } | ||
456 | EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint); | ||
457 | |||
458 | /** | ||
459 | * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel | ||
460 | * @cpu_events: the per cpu set of events to unregister | ||
461 | */ | ||
462 | void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events) | ||
463 | { | ||
464 | int cpu; | ||
465 | struct perf_event **pevent; | ||
466 | |||
467 | for_each_possible_cpu(cpu) { | ||
468 | pevent = per_cpu_ptr(cpu_events, cpu); | ||
469 | unregister_hw_breakpoint(*pevent); | ||
470 | } | ||
471 | free_percpu(cpu_events); | ||
472 | } | ||
473 | EXPORT_SYMBOL_GPL(unregister_wide_hw_breakpoint); | ||
474 | |||
475 | static struct notifier_block hw_breakpoint_exceptions_nb = { | ||
476 | .notifier_call = hw_breakpoint_exceptions_notify, | ||
477 | /* we need to be notified first */ | ||
478 | .priority = 0x7fffffff | ||
479 | }; | ||
480 | |||
481 | static int __init init_hw_breakpoint(void) | ||
482 | { | ||
483 | return register_die_notifier(&hw_breakpoint_exceptions_nb); | ||
484 | } | ||
485 | core_initcall(init_hw_breakpoint); | ||
486 | |||
487 | |||
488 | struct pmu perf_ops_bp = { | ||
489 | .enable = arch_install_hw_breakpoint, | ||
490 | .disable = arch_uninstall_hw_breakpoint, | ||
491 | .read = hw_breakpoint_pmu_read, | ||
492 | }; | ||