aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/hw_breakpoint.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/hw_breakpoint.c')
-rw-r--r--kernel/hw_breakpoint.c259
1 files changed, 197 insertions, 62 deletions
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index 50dbd5999588..7a56b22e0602 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -40,23 +40,29 @@
40#include <linux/percpu.h> 40#include <linux/percpu.h>
41#include <linux/sched.h> 41#include <linux/sched.h>
42#include <linux/init.h> 42#include <linux/init.h>
43#include <linux/slab.h>
43#include <linux/cpu.h> 44#include <linux/cpu.h>
44#include <linux/smp.h> 45#include <linux/smp.h>
45 46
46#include <linux/hw_breakpoint.h> 47#include <linux/hw_breakpoint.h>
47 48
49
48/* 50/*
49 * Constraints data 51 * Constraints data
50 */ 52 */
51 53
52/* Number of pinned cpu breakpoints in a cpu */ 54/* Number of pinned cpu breakpoints in a cpu */
53static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned); 55static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned[TYPE_MAX]);
54 56
55/* Number of pinned task breakpoints in a cpu */ 57/* Number of pinned task breakpoints in a cpu */
56static DEFINE_PER_CPU(unsigned int, nr_task_bp_pinned[HBP_NUM]); 58static DEFINE_PER_CPU(unsigned int *, nr_task_bp_pinned[TYPE_MAX]);
57 59
58/* Number of non-pinned cpu/task breakpoints in a cpu */ 60/* Number of non-pinned cpu/task breakpoints in a cpu */
59static DEFINE_PER_CPU(unsigned int, nr_bp_flexible); 61static DEFINE_PER_CPU(unsigned int, nr_bp_flexible[TYPE_MAX]);
62
63static int nr_slots[TYPE_MAX];
64
65static int constraints_initialized;
60 66
61/* Gather the number of total pinned and un-pinned bp in a cpuset */ 67/* Gather the number of total pinned and un-pinned bp in a cpuset */
62struct bp_busy_slots { 68struct bp_busy_slots {
@@ -67,16 +73,29 @@ struct bp_busy_slots {
67/* Serialize accesses to the above constraints */ 73/* Serialize accesses to the above constraints */
68static DEFINE_MUTEX(nr_bp_mutex); 74static DEFINE_MUTEX(nr_bp_mutex);
69 75
76__weak int hw_breakpoint_weight(struct perf_event *bp)
77{
78 return 1;
79}
80
81static inline enum bp_type_idx find_slot_idx(struct perf_event *bp)
82{
83 if (bp->attr.bp_type & HW_BREAKPOINT_RW)
84 return TYPE_DATA;
85
86 return TYPE_INST;
87}
88
70/* 89/*
71 * Report the maximum number of pinned breakpoints a task 90 * Report the maximum number of pinned breakpoints a task
72 * have in this cpu 91 * have in this cpu
73 */ 92 */
74static unsigned int max_task_bp_pinned(int cpu) 93static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)
75{ 94{
76 int i; 95 int i;
77 unsigned int *tsk_pinned = per_cpu(nr_task_bp_pinned, cpu); 96 unsigned int *tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu);
78 97
79 for (i = HBP_NUM -1; i >= 0; i--) { 98 for (i = nr_slots[type] - 1; i >= 0; i--) {
80 if (tsk_pinned[i] > 0) 99 if (tsk_pinned[i] > 0)
81 return i + 1; 100 return i + 1;
82 } 101 }
@@ -84,7 +103,7 @@ static unsigned int max_task_bp_pinned(int cpu)
84 return 0; 103 return 0;
85} 104}
86 105
87static int task_bp_pinned(struct task_struct *tsk) 106static int task_bp_pinned(struct task_struct *tsk, enum bp_type_idx type)
88{ 107{
89 struct perf_event_context *ctx = tsk->perf_event_ctxp; 108 struct perf_event_context *ctx = tsk->perf_event_ctxp;
90 struct list_head *list; 109 struct list_head *list;
@@ -105,7 +124,8 @@ static int task_bp_pinned(struct task_struct *tsk)
105 */ 124 */
106 list_for_each_entry(bp, list, event_entry) { 125 list_for_each_entry(bp, list, event_entry) {
107 if (bp->attr.type == PERF_TYPE_BREAKPOINT) 126 if (bp->attr.type == PERF_TYPE_BREAKPOINT)
108 count++; 127 if (find_slot_idx(bp) == type)
128 count += hw_breakpoint_weight(bp);
109 } 129 }
110 130
111 raw_spin_unlock_irqrestore(&ctx->lock, flags); 131 raw_spin_unlock_irqrestore(&ctx->lock, flags);
@@ -118,18 +138,19 @@ static int task_bp_pinned(struct task_struct *tsk)
118 * a given cpu (cpu > -1) or in all of them (cpu = -1). 138 * a given cpu (cpu > -1) or in all of them (cpu = -1).
119 */ 139 */
120static void 140static void
121fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp) 141fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
142 enum bp_type_idx type)
122{ 143{
123 int cpu = bp->cpu; 144 int cpu = bp->cpu;
124 struct task_struct *tsk = bp->ctx->task; 145 struct task_struct *tsk = bp->ctx->task;
125 146
126 if (cpu >= 0) { 147 if (cpu >= 0) {
127 slots->pinned = per_cpu(nr_cpu_bp_pinned, cpu); 148 slots->pinned = per_cpu(nr_cpu_bp_pinned[type], cpu);
128 if (!tsk) 149 if (!tsk)
129 slots->pinned += max_task_bp_pinned(cpu); 150 slots->pinned += max_task_bp_pinned(cpu, type);
130 else 151 else
131 slots->pinned += task_bp_pinned(tsk); 152 slots->pinned += task_bp_pinned(tsk, type);
132 slots->flexible = per_cpu(nr_bp_flexible, cpu); 153 slots->flexible = per_cpu(nr_bp_flexible[type], cpu);
133 154
134 return; 155 return;
135 } 156 }
@@ -137,16 +158,16 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp)
137 for_each_online_cpu(cpu) { 158 for_each_online_cpu(cpu) {
138 unsigned int nr; 159 unsigned int nr;
139 160
140 nr = per_cpu(nr_cpu_bp_pinned, cpu); 161 nr = per_cpu(nr_cpu_bp_pinned[type], cpu);
141 if (!tsk) 162 if (!tsk)
142 nr += max_task_bp_pinned(cpu); 163 nr += max_task_bp_pinned(cpu, type);
143 else 164 else
144 nr += task_bp_pinned(tsk); 165 nr += task_bp_pinned(tsk, type);
145 166
146 if (nr > slots->pinned) 167 if (nr > slots->pinned)
147 slots->pinned = nr; 168 slots->pinned = nr;
148 169
149 nr = per_cpu(nr_bp_flexible, cpu); 170 nr = per_cpu(nr_bp_flexible[type], cpu);
150 171
151 if (nr > slots->flexible) 172 if (nr > slots->flexible)
152 slots->flexible = nr; 173 slots->flexible = nr;
@@ -154,31 +175,49 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp)
154} 175}
155 176
156/* 177/*
178 * For now, continue to consider flexible as pinned, until we can
179 * ensure no flexible event can ever be scheduled before a pinned event
180 * in a same cpu.
181 */
182static void
183fetch_this_slot(struct bp_busy_slots *slots, int weight)
184{
185 slots->pinned += weight;
186}
187
188/*
157 * Add a pinned breakpoint for the given task in our constraint table 189 * Add a pinned breakpoint for the given task in our constraint table
158 */ 190 */
159static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable) 191static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable,
192 enum bp_type_idx type, int weight)
160{ 193{
161 unsigned int *tsk_pinned; 194 unsigned int *tsk_pinned;
162 int count = 0; 195 int old_count = 0;
196 int old_idx = 0;
197 int idx = 0;
163 198
164 count = task_bp_pinned(tsk); 199 old_count = task_bp_pinned(tsk, type);
200 old_idx = old_count - 1;
201 idx = old_idx + weight;
165 202
166 tsk_pinned = per_cpu(nr_task_bp_pinned, cpu); 203 tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu);
167 if (enable) { 204 if (enable) {
168 tsk_pinned[count]++; 205 tsk_pinned[idx]++;
169 if (count > 0) 206 if (old_count > 0)
170 tsk_pinned[count-1]--; 207 tsk_pinned[old_idx]--;
171 } else { 208 } else {
172 tsk_pinned[count]--; 209 tsk_pinned[idx]--;
173 if (count > 0) 210 if (old_count > 0)
174 tsk_pinned[count-1]++; 211 tsk_pinned[old_idx]++;
175 } 212 }
176} 213}
177 214
178/* 215/*
179 * Add/remove the given breakpoint in our constraint table 216 * Add/remove the given breakpoint in our constraint table
180 */ 217 */
181static void toggle_bp_slot(struct perf_event *bp, bool enable) 218static void
219toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type,
220 int weight)
182{ 221{
183 int cpu = bp->cpu; 222 int cpu = bp->cpu;
184 struct task_struct *tsk = bp->ctx->task; 223 struct task_struct *tsk = bp->ctx->task;
@@ -186,20 +225,20 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)
186 /* Pinned counter task profiling */ 225 /* Pinned counter task profiling */
187 if (tsk) { 226 if (tsk) {
188 if (cpu >= 0) { 227 if (cpu >= 0) {
189 toggle_bp_task_slot(tsk, cpu, enable); 228 toggle_bp_task_slot(tsk, cpu, enable, type, weight);
190 return; 229 return;
191 } 230 }
192 231
193 for_each_online_cpu(cpu) 232 for_each_online_cpu(cpu)
194 toggle_bp_task_slot(tsk, cpu, enable); 233 toggle_bp_task_slot(tsk, cpu, enable, type, weight);
195 return; 234 return;
196 } 235 }
197 236
198 /* Pinned counter cpu profiling */ 237 /* Pinned counter cpu profiling */
199 if (enable) 238 if (enable)
200 per_cpu(nr_cpu_bp_pinned, bp->cpu)++; 239 per_cpu(nr_cpu_bp_pinned[type], bp->cpu) += weight;
201 else 240 else
202 per_cpu(nr_cpu_bp_pinned, bp->cpu)--; 241 per_cpu(nr_cpu_bp_pinned[type], bp->cpu) -= weight;
203} 242}
204 243
205/* 244/*
@@ -243,38 +282,112 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)
243 * ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *)) 282 * ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *))
244 * + max(per_cpu(nr_task_bp_pinned, *))) < HBP_NUM 283 * + max(per_cpu(nr_task_bp_pinned, *))) < HBP_NUM
245 */ 284 */
246int reserve_bp_slot(struct perf_event *bp) 285static int __reserve_bp_slot(struct perf_event *bp)
247{ 286{
248 struct bp_busy_slots slots = {0}; 287 struct bp_busy_slots slots = {0};
249 int ret = 0; 288 enum bp_type_idx type;
289 int weight;
250 290
251 mutex_lock(&nr_bp_mutex); 291 /* We couldn't initialize breakpoint constraints on boot */
292 if (!constraints_initialized)
293 return -ENOMEM;
252 294
253 fetch_bp_busy_slots(&slots, bp); 295 /* Basic checks */
296 if (bp->attr.bp_type == HW_BREAKPOINT_EMPTY ||
297 bp->attr.bp_type == HW_BREAKPOINT_INVALID)
298 return -EINVAL;
299
300 type = find_slot_idx(bp);
301 weight = hw_breakpoint_weight(bp);
302
303 fetch_bp_busy_slots(&slots, bp, type);
304 fetch_this_slot(&slots, weight);
254 305
255 /* Flexible counters need to keep at least one slot */ 306 /* Flexible counters need to keep at least one slot */
256 if (slots.pinned + (!!slots.flexible) == HBP_NUM) { 307 if (slots.pinned + (!!slots.flexible) > nr_slots[type])
257 ret = -ENOSPC; 308 return -ENOSPC;
258 goto end;
259 }
260 309
261 toggle_bp_slot(bp, true); 310 toggle_bp_slot(bp, true, type, weight);
311
312 return 0;
313}
314
315int reserve_bp_slot(struct perf_event *bp)
316{
317 int ret;
318
319 mutex_lock(&nr_bp_mutex);
320
321 ret = __reserve_bp_slot(bp);
262 322
263end:
264 mutex_unlock(&nr_bp_mutex); 323 mutex_unlock(&nr_bp_mutex);
265 324
266 return ret; 325 return ret;
267} 326}
268 327
328static void __release_bp_slot(struct perf_event *bp)
329{
330 enum bp_type_idx type;
331 int weight;
332
333 type = find_slot_idx(bp);
334 weight = hw_breakpoint_weight(bp);
335 toggle_bp_slot(bp, false, type, weight);
336}
337
269void release_bp_slot(struct perf_event *bp) 338void release_bp_slot(struct perf_event *bp)
270{ 339{
271 mutex_lock(&nr_bp_mutex); 340 mutex_lock(&nr_bp_mutex);
272 341
273 toggle_bp_slot(bp, false); 342 __release_bp_slot(bp);
274 343
275 mutex_unlock(&nr_bp_mutex); 344 mutex_unlock(&nr_bp_mutex);
276} 345}
277 346
347/*
348 * Allow the kernel debugger to reserve breakpoint slots without
349 * taking a lock using the dbg_* variant of for the reserve and
350 * release breakpoint slots.
351 */
352int dbg_reserve_bp_slot(struct perf_event *bp)
353{
354 if (mutex_is_locked(&nr_bp_mutex))
355 return -1;
356
357 return __reserve_bp_slot(bp);
358}
359
360int dbg_release_bp_slot(struct perf_event *bp)
361{
362 if (mutex_is_locked(&nr_bp_mutex))
363 return -1;
364
365 __release_bp_slot(bp);
366
367 return 0;
368}
369
370static int validate_hw_breakpoint(struct perf_event *bp)
371{
372 int ret;
373
374 ret = arch_validate_hwbkpt_settings(bp);
375 if (ret)
376 return ret;
377
378 if (arch_check_bp_in_kernelspace(bp)) {
379 if (bp->attr.exclude_kernel)
380 return -EINVAL;
381 /*
382 * Don't let unprivileged users set a breakpoint in the trap
383 * path to avoid trap recursion attacks.
384 */
385 if (!capable(CAP_SYS_ADMIN))
386 return -EPERM;
387 }
388
389 return 0;
390}
278 391
279int register_perf_hw_breakpoint(struct perf_event *bp) 392int register_perf_hw_breakpoint(struct perf_event *bp)
280{ 393{
@@ -284,17 +397,11 @@ int register_perf_hw_breakpoint(struct perf_event *bp)
284 if (ret) 397 if (ret)
285 return ret; 398 return ret;
286 399
287 /* 400 ret = validate_hw_breakpoint(bp);
288 * Ptrace breakpoints can be temporary perf events only 401
289 * meant to reserve a slot. In this case, it is created disabled and 402 /* if arch_validate_hwbkpt_settings() fails then release bp slot */
290 * we don't want to check the params right now (as we put a null addr) 403 if (ret)
291 * But perf tools create events as disabled and we want to check 404 release_bp_slot(bp);
292 * the params for them.
293 * This is a quick hack that will be removed soon, once we remove
294 * the tmp breakpoints from ptrace
295 */
296 if (!bp->attr.disabled || !bp->overflow_handler)
297 ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task);
298 405
299 return ret; 406 return ret;
300} 407}
@@ -324,8 +431,8 @@ EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
324int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr) 431int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr)
325{ 432{
326 u64 old_addr = bp->attr.bp_addr; 433 u64 old_addr = bp->attr.bp_addr;
434 u64 old_len = bp->attr.bp_len;
327 int old_type = bp->attr.bp_type; 435 int old_type = bp->attr.bp_type;
328 int old_len = bp->attr.bp_len;
329 int err = 0; 436 int err = 0;
330 437
331 perf_event_disable(bp); 438 perf_event_disable(bp);
@@ -337,7 +444,7 @@ int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *att
337 if (attr->disabled) 444 if (attr->disabled)
338 goto end; 445 goto end;
339 446
340 err = arch_validate_hwbkpt_settings(bp, bp->ctx->task); 447 err = validate_hw_breakpoint(bp);
341 if (!err) 448 if (!err)
342 perf_event_enable(bp); 449 perf_event_enable(bp);
343 450
@@ -377,17 +484,17 @@ EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);
377 * 484 *
378 * @return a set of per_cpu pointers to perf events 485 * @return a set of per_cpu pointers to perf events
379 */ 486 */
380struct perf_event ** 487struct perf_event * __percpu *
381register_wide_hw_breakpoint(struct perf_event_attr *attr, 488register_wide_hw_breakpoint(struct perf_event_attr *attr,
382 perf_overflow_handler_t triggered) 489 perf_overflow_handler_t triggered)
383{ 490{
384 struct perf_event **cpu_events, **pevent, *bp; 491 struct perf_event * __percpu *cpu_events, **pevent, *bp;
385 long err; 492 long err;
386 int cpu; 493 int cpu;
387 494
388 cpu_events = alloc_percpu(typeof(*cpu_events)); 495 cpu_events = alloc_percpu(typeof(*cpu_events));
389 if (!cpu_events) 496 if (!cpu_events)
390 return ERR_PTR(-ENOMEM); 497 return (void __percpu __force *)ERR_PTR(-ENOMEM);
391 498
392 get_online_cpus(); 499 get_online_cpus();
393 for_each_online_cpu(cpu) { 500 for_each_online_cpu(cpu) {
@@ -415,7 +522,7 @@ fail:
415 put_online_cpus(); 522 put_online_cpus();
416 523
417 free_percpu(cpu_events); 524 free_percpu(cpu_events);
418 return ERR_PTR(err); 525 return (void __percpu __force *)ERR_PTR(err);
419} 526}
420EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint); 527EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint);
421 528
@@ -423,7 +530,7 @@ EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint);
423 * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel 530 * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel
424 * @cpu_events: the per cpu set of events to unregister 531 * @cpu_events: the per cpu set of events to unregister
425 */ 532 */
426void unregister_wide_hw_breakpoint(struct perf_event **cpu_events) 533void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events)
427{ 534{
428 int cpu; 535 int cpu;
429 struct perf_event **pevent; 536 struct perf_event **pevent;
@@ -444,7 +551,36 @@ static struct notifier_block hw_breakpoint_exceptions_nb = {
444 551
445static int __init init_hw_breakpoint(void) 552static int __init init_hw_breakpoint(void)
446{ 553{
554 unsigned int **task_bp_pinned;
555 int cpu, err_cpu;
556 int i;
557
558 for (i = 0; i < TYPE_MAX; i++)
559 nr_slots[i] = hw_breakpoint_slots(i);
560
561 for_each_possible_cpu(cpu) {
562 for (i = 0; i < TYPE_MAX; i++) {
563 task_bp_pinned = &per_cpu(nr_task_bp_pinned[i], cpu);
564 *task_bp_pinned = kzalloc(sizeof(int) * nr_slots[i],
565 GFP_KERNEL);
566 if (!*task_bp_pinned)
567 goto err_alloc;
568 }
569 }
570
571 constraints_initialized = 1;
572
447 return register_die_notifier(&hw_breakpoint_exceptions_nb); 573 return register_die_notifier(&hw_breakpoint_exceptions_nb);
574
575 err_alloc:
576 for_each_possible_cpu(err_cpu) {
577 if (err_cpu == cpu)
578 break;
579 for (i = 0; i < TYPE_MAX; i++)
580 kfree(per_cpu(nr_task_bp_pinned[i], cpu));
581 }
582
583 return -ENOMEM;
448} 584}
449core_initcall(init_hw_breakpoint); 585core_initcall(init_hw_breakpoint);
450 586
@@ -453,5 +589,4 @@ struct pmu perf_ops_bp = {
453 .enable = arch_install_hw_breakpoint, 589 .enable = arch_install_hw_breakpoint,
454 .disable = arch_uninstall_hw_breakpoint, 590 .disable = arch_uninstall_hw_breakpoint,
455 .read = hw_breakpoint_pmu_read, 591 .read = hw_breakpoint_pmu_read,
456 .unthrottle = hw_breakpoint_pmu_unthrottle
457}; 592};