diff options
Diffstat (limited to 'kernel/hw_breakpoint.c')
-rw-r--r-- | kernel/hw_breakpoint.c | 196 |
1 files changed, 148 insertions, 48 deletions
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index 03808ed342a6..7a56b22e0602 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c | |||
@@ -40,23 +40,29 @@ | |||
40 | #include <linux/percpu.h> | 40 | #include <linux/percpu.h> |
41 | #include <linux/sched.h> | 41 | #include <linux/sched.h> |
42 | #include <linux/init.h> | 42 | #include <linux/init.h> |
43 | #include <linux/slab.h> | ||
43 | #include <linux/cpu.h> | 44 | #include <linux/cpu.h> |
44 | #include <linux/smp.h> | 45 | #include <linux/smp.h> |
45 | 46 | ||
46 | #include <linux/hw_breakpoint.h> | 47 | #include <linux/hw_breakpoint.h> |
47 | 48 | ||
49 | |||
48 | /* | 50 | /* |
49 | * Constraints data | 51 | * Constraints data |
50 | */ | 52 | */ |
51 | 53 | ||
52 | /* Number of pinned cpu breakpoints in a cpu */ | 54 | /* Number of pinned cpu breakpoints in a cpu */ |
53 | static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned); | 55 | static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned[TYPE_MAX]); |
54 | 56 | ||
55 | /* Number of pinned task breakpoints in a cpu */ | 57 | /* Number of pinned task breakpoints in a cpu */ |
56 | static DEFINE_PER_CPU(unsigned int, nr_task_bp_pinned[HBP_NUM]); | 58 | static DEFINE_PER_CPU(unsigned int *, nr_task_bp_pinned[TYPE_MAX]); |
57 | 59 | ||
58 | /* Number of non-pinned cpu/task breakpoints in a cpu */ | 60 | /* Number of non-pinned cpu/task breakpoints in a cpu */ |
59 | static DEFINE_PER_CPU(unsigned int, nr_bp_flexible); | 61 | static DEFINE_PER_CPU(unsigned int, nr_bp_flexible[TYPE_MAX]); |
62 | |||
63 | static int nr_slots[TYPE_MAX]; | ||
64 | |||
65 | static int constraints_initialized; | ||
60 | 66 | ||
61 | /* Gather the number of total pinned and un-pinned bp in a cpuset */ | 67 | /* Gather the number of total pinned and un-pinned bp in a cpuset */ |
62 | struct bp_busy_slots { | 68 | struct bp_busy_slots { |
@@ -67,16 +73,29 @@ struct bp_busy_slots { | |||
67 | /* Serialize accesses to the above constraints */ | 73 | /* Serialize accesses to the above constraints */ |
68 | static DEFINE_MUTEX(nr_bp_mutex); | 74 | static DEFINE_MUTEX(nr_bp_mutex); |
69 | 75 | ||
76 | __weak int hw_breakpoint_weight(struct perf_event *bp) | ||
77 | { | ||
78 | return 1; | ||
79 | } | ||
80 | |||
81 | static inline enum bp_type_idx find_slot_idx(struct perf_event *bp) | ||
82 | { | ||
83 | if (bp->attr.bp_type & HW_BREAKPOINT_RW) | ||
84 | return TYPE_DATA; | ||
85 | |||
86 | return TYPE_INST; | ||
87 | } | ||
88 | |||
70 | /* | 89 | /* |
71 | * Report the maximum number of pinned breakpoints a task | 90 | * Report the maximum number of pinned breakpoints a task |
72 | * have in this cpu | 91 | * have in this cpu |
73 | */ | 92 | */ |
74 | static unsigned int max_task_bp_pinned(int cpu) | 93 | static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type) |
75 | { | 94 | { |
76 | int i; | 95 | int i; |
77 | unsigned int *tsk_pinned = per_cpu(nr_task_bp_pinned, cpu); | 96 | unsigned int *tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu); |
78 | 97 | ||
79 | for (i = HBP_NUM -1; i >= 0; i--) { | 98 | for (i = nr_slots[type] - 1; i >= 0; i--) { |
80 | if (tsk_pinned[i] > 0) | 99 | if (tsk_pinned[i] > 0) |
81 | return i + 1; | 100 | return i + 1; |
82 | } | 101 | } |
@@ -84,7 +103,7 @@ static unsigned int max_task_bp_pinned(int cpu) | |||
84 | return 0; | 103 | return 0; |
85 | } | 104 | } |
86 | 105 | ||
87 | static int task_bp_pinned(struct task_struct *tsk) | 106 | static int task_bp_pinned(struct task_struct *tsk, enum bp_type_idx type) |
88 | { | 107 | { |
89 | struct perf_event_context *ctx = tsk->perf_event_ctxp; | 108 | struct perf_event_context *ctx = tsk->perf_event_ctxp; |
90 | struct list_head *list; | 109 | struct list_head *list; |
@@ -105,7 +124,8 @@ static int task_bp_pinned(struct task_struct *tsk) | |||
105 | */ | 124 | */ |
106 | list_for_each_entry(bp, list, event_entry) { | 125 | list_for_each_entry(bp, list, event_entry) { |
107 | if (bp->attr.type == PERF_TYPE_BREAKPOINT) | 126 | if (bp->attr.type == PERF_TYPE_BREAKPOINT) |
108 | count++; | 127 | if (find_slot_idx(bp) == type) |
128 | count += hw_breakpoint_weight(bp); | ||
109 | } | 129 | } |
110 | 130 | ||
111 | raw_spin_unlock_irqrestore(&ctx->lock, flags); | 131 | raw_spin_unlock_irqrestore(&ctx->lock, flags); |
@@ -118,18 +138,19 @@ static int task_bp_pinned(struct task_struct *tsk) | |||
118 | * a given cpu (cpu > -1) or in all of them (cpu = -1). | 138 | * a given cpu (cpu > -1) or in all of them (cpu = -1). |
119 | */ | 139 | */ |
120 | static void | 140 | static void |
121 | fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp) | 141 | fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp, |
142 | enum bp_type_idx type) | ||
122 | { | 143 | { |
123 | int cpu = bp->cpu; | 144 | int cpu = bp->cpu; |
124 | struct task_struct *tsk = bp->ctx->task; | 145 | struct task_struct *tsk = bp->ctx->task; |
125 | 146 | ||
126 | if (cpu >= 0) { | 147 | if (cpu >= 0) { |
127 | slots->pinned = per_cpu(nr_cpu_bp_pinned, cpu); | 148 | slots->pinned = per_cpu(nr_cpu_bp_pinned[type], cpu); |
128 | if (!tsk) | 149 | if (!tsk) |
129 | slots->pinned += max_task_bp_pinned(cpu); | 150 | slots->pinned += max_task_bp_pinned(cpu, type); |
130 | else | 151 | else |
131 | slots->pinned += task_bp_pinned(tsk); | 152 | slots->pinned += task_bp_pinned(tsk, type); |
132 | slots->flexible = per_cpu(nr_bp_flexible, cpu); | 153 | slots->flexible = per_cpu(nr_bp_flexible[type], cpu); |
133 | 154 | ||
134 | return; | 155 | return; |
135 | } | 156 | } |
@@ -137,16 +158,16 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp) | |||
137 | for_each_online_cpu(cpu) { | 158 | for_each_online_cpu(cpu) { |
138 | unsigned int nr; | 159 | unsigned int nr; |
139 | 160 | ||
140 | nr = per_cpu(nr_cpu_bp_pinned, cpu); | 161 | nr = per_cpu(nr_cpu_bp_pinned[type], cpu); |
141 | if (!tsk) | 162 | if (!tsk) |
142 | nr += max_task_bp_pinned(cpu); | 163 | nr += max_task_bp_pinned(cpu, type); |
143 | else | 164 | else |
144 | nr += task_bp_pinned(tsk); | 165 | nr += task_bp_pinned(tsk, type); |
145 | 166 | ||
146 | if (nr > slots->pinned) | 167 | if (nr > slots->pinned) |
147 | slots->pinned = nr; | 168 | slots->pinned = nr; |
148 | 169 | ||
149 | nr = per_cpu(nr_bp_flexible, cpu); | 170 | nr = per_cpu(nr_bp_flexible[type], cpu); |
150 | 171 | ||
151 | if (nr > slots->flexible) | 172 | if (nr > slots->flexible) |
152 | slots->flexible = nr; | 173 | slots->flexible = nr; |
@@ -154,31 +175,49 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp) | |||
154 | } | 175 | } |
155 | 176 | ||
156 | /* | 177 | /* |
178 | * For now, continue to consider flexible as pinned, until we can | ||
179 | * ensure no flexible event can ever be scheduled before a pinned event | ||
180 | * in a same cpu. | ||
181 | */ | ||
182 | static void | ||
183 | fetch_this_slot(struct bp_busy_slots *slots, int weight) | ||
184 | { | ||
185 | slots->pinned += weight; | ||
186 | } | ||
187 | |||
188 | /* | ||
157 | * Add a pinned breakpoint for the given task in our constraint table | 189 | * Add a pinned breakpoint for the given task in our constraint table |
158 | */ | 190 | */ |
159 | static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable) | 191 | static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable, |
192 | enum bp_type_idx type, int weight) | ||
160 | { | 193 | { |
161 | unsigned int *tsk_pinned; | 194 | unsigned int *tsk_pinned; |
162 | int count = 0; | 195 | int old_count = 0; |
196 | int old_idx = 0; | ||
197 | int idx = 0; | ||
163 | 198 | ||
164 | count = task_bp_pinned(tsk); | 199 | old_count = task_bp_pinned(tsk, type); |
200 | old_idx = old_count - 1; | ||
201 | idx = old_idx + weight; | ||
165 | 202 | ||
166 | tsk_pinned = per_cpu(nr_task_bp_pinned, cpu); | 203 | tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu); |
167 | if (enable) { | 204 | if (enable) { |
168 | tsk_pinned[count]++; | 205 | tsk_pinned[idx]++; |
169 | if (count > 0) | 206 | if (old_count > 0) |
170 | tsk_pinned[count-1]--; | 207 | tsk_pinned[old_idx]--; |
171 | } else { | 208 | } else { |
172 | tsk_pinned[count]--; | 209 | tsk_pinned[idx]--; |
173 | if (count > 0) | 210 | if (old_count > 0) |
174 | tsk_pinned[count-1]++; | 211 | tsk_pinned[old_idx]++; |
175 | } | 212 | } |
176 | } | 213 | } |
177 | 214 | ||
178 | /* | 215 | /* |
179 | * Add/remove the given breakpoint in our constraint table | 216 | * Add/remove the given breakpoint in our constraint table |
180 | */ | 217 | */ |
181 | static void toggle_bp_slot(struct perf_event *bp, bool enable) | 218 | static void |
219 | toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type, | ||
220 | int weight) | ||
182 | { | 221 | { |
183 | int cpu = bp->cpu; | 222 | int cpu = bp->cpu; |
184 | struct task_struct *tsk = bp->ctx->task; | 223 | struct task_struct *tsk = bp->ctx->task; |
@@ -186,20 +225,20 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable) | |||
186 | /* Pinned counter task profiling */ | 225 | /* Pinned counter task profiling */ |
187 | if (tsk) { | 226 | if (tsk) { |
188 | if (cpu >= 0) { | 227 | if (cpu >= 0) { |
189 | toggle_bp_task_slot(tsk, cpu, enable); | 228 | toggle_bp_task_slot(tsk, cpu, enable, type, weight); |
190 | return; | 229 | return; |
191 | } | 230 | } |
192 | 231 | ||
193 | for_each_online_cpu(cpu) | 232 | for_each_online_cpu(cpu) |
194 | toggle_bp_task_slot(tsk, cpu, enable); | 233 | toggle_bp_task_slot(tsk, cpu, enable, type, weight); |
195 | return; | 234 | return; |
196 | } | 235 | } |
197 | 236 | ||
198 | /* Pinned counter cpu profiling */ | 237 | /* Pinned counter cpu profiling */ |
199 | if (enable) | 238 | if (enable) |
200 | per_cpu(nr_cpu_bp_pinned, bp->cpu)++; | 239 | per_cpu(nr_cpu_bp_pinned[type], bp->cpu) += weight; |
201 | else | 240 | else |
202 | per_cpu(nr_cpu_bp_pinned, bp->cpu)--; | 241 | per_cpu(nr_cpu_bp_pinned[type], bp->cpu) -= weight; |
203 | } | 242 | } |
204 | 243 | ||
205 | /* | 244 | /* |
@@ -246,14 +285,29 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable) | |||
246 | static int __reserve_bp_slot(struct perf_event *bp) | 285 | static int __reserve_bp_slot(struct perf_event *bp) |
247 | { | 286 | { |
248 | struct bp_busy_slots slots = {0}; | 287 | struct bp_busy_slots slots = {0}; |
288 | enum bp_type_idx type; | ||
289 | int weight; | ||
249 | 290 | ||
250 | fetch_bp_busy_slots(&slots, bp); | 291 | /* We couldn't initialize breakpoint constraints on boot */ |
292 | if (!constraints_initialized) | ||
293 | return -ENOMEM; | ||
294 | |||
295 | /* Basic checks */ | ||
296 | if (bp->attr.bp_type == HW_BREAKPOINT_EMPTY || | ||
297 | bp->attr.bp_type == HW_BREAKPOINT_INVALID) | ||
298 | return -EINVAL; | ||
299 | |||
300 | type = find_slot_idx(bp); | ||
301 | weight = hw_breakpoint_weight(bp); | ||
302 | |||
303 | fetch_bp_busy_slots(&slots, bp, type); | ||
304 | fetch_this_slot(&slots, weight); | ||
251 | 305 | ||
252 | /* Flexible counters need to keep at least one slot */ | 306 | /* Flexible counters need to keep at least one slot */ |
253 | if (slots.pinned + (!!slots.flexible) == HBP_NUM) | 307 | if (slots.pinned + (!!slots.flexible) > nr_slots[type]) |
254 | return -ENOSPC; | 308 | return -ENOSPC; |
255 | 309 | ||
256 | toggle_bp_slot(bp, true); | 310 | toggle_bp_slot(bp, true, type, weight); |
257 | 311 | ||
258 | return 0; | 312 | return 0; |
259 | } | 313 | } |
@@ -273,7 +327,12 @@ int reserve_bp_slot(struct perf_event *bp) | |||
273 | 327 | ||
274 | static void __release_bp_slot(struct perf_event *bp) | 328 | static void __release_bp_slot(struct perf_event *bp) |
275 | { | 329 | { |
276 | toggle_bp_slot(bp, false); | 330 | enum bp_type_idx type; |
331 | int weight; | ||
332 | |||
333 | type = find_slot_idx(bp); | ||
334 | weight = hw_breakpoint_weight(bp); | ||
335 | toggle_bp_slot(bp, false, type, weight); | ||
277 | } | 336 | } |
278 | 337 | ||
279 | void release_bp_slot(struct perf_event *bp) | 338 | void release_bp_slot(struct perf_event *bp) |
@@ -308,6 +367,28 @@ int dbg_release_bp_slot(struct perf_event *bp) | |||
308 | return 0; | 367 | return 0; |
309 | } | 368 | } |
310 | 369 | ||
370 | static int validate_hw_breakpoint(struct perf_event *bp) | ||
371 | { | ||
372 | int ret; | ||
373 | |||
374 | ret = arch_validate_hwbkpt_settings(bp); | ||
375 | if (ret) | ||
376 | return ret; | ||
377 | |||
378 | if (arch_check_bp_in_kernelspace(bp)) { | ||
379 | if (bp->attr.exclude_kernel) | ||
380 | return -EINVAL; | ||
381 | /* | ||
382 | * Don't let unprivileged users set a breakpoint in the trap | ||
383 | * path to avoid trap recursion attacks. | ||
384 | */ | ||
385 | if (!capable(CAP_SYS_ADMIN)) | ||
386 | return -EPERM; | ||
387 | } | ||
388 | |||
389 | return 0; | ||
390 | } | ||
391 | |||
311 | int register_perf_hw_breakpoint(struct perf_event *bp) | 392 | int register_perf_hw_breakpoint(struct perf_event *bp) |
312 | { | 393 | { |
313 | int ret; | 394 | int ret; |
@@ -316,17 +397,7 @@ int register_perf_hw_breakpoint(struct perf_event *bp) | |||
316 | if (ret) | 397 | if (ret) |
317 | return ret; | 398 | return ret; |
318 | 399 | ||
319 | /* | 400 | ret = validate_hw_breakpoint(bp); |
320 | * Ptrace breakpoints can be temporary perf events only | ||
321 | * meant to reserve a slot. In this case, it is created disabled and | ||
322 | * we don't want to check the params right now (as we put a null addr) | ||
323 | * But perf tools create events as disabled and we want to check | ||
324 | * the params for them. | ||
325 | * This is a quick hack that will be removed soon, once we remove | ||
326 | * the tmp breakpoints from ptrace | ||
327 | */ | ||
328 | if (!bp->attr.disabled || !bp->overflow_handler) | ||
329 | ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task); | ||
330 | 401 | ||
331 | /* if arch_validate_hwbkpt_settings() fails then release bp slot */ | 402 | /* if arch_validate_hwbkpt_settings() fails then release bp slot */ |
332 | if (ret) | 403 | if (ret) |
@@ -373,7 +444,7 @@ int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *att | |||
373 | if (attr->disabled) | 444 | if (attr->disabled) |
374 | goto end; | 445 | goto end; |
375 | 446 | ||
376 | err = arch_validate_hwbkpt_settings(bp, bp->ctx->task); | 447 | err = validate_hw_breakpoint(bp); |
377 | if (!err) | 448 | if (!err) |
378 | perf_event_enable(bp); | 449 | perf_event_enable(bp); |
379 | 450 | ||
@@ -480,7 +551,36 @@ static struct notifier_block hw_breakpoint_exceptions_nb = { | |||
480 | 551 | ||
481 | static int __init init_hw_breakpoint(void) | 552 | static int __init init_hw_breakpoint(void) |
482 | { | 553 | { |
554 | unsigned int **task_bp_pinned; | ||
555 | int cpu, err_cpu; | ||
556 | int i; | ||
557 | |||
558 | for (i = 0; i < TYPE_MAX; i++) | ||
559 | nr_slots[i] = hw_breakpoint_slots(i); | ||
560 | |||
561 | for_each_possible_cpu(cpu) { | ||
562 | for (i = 0; i < TYPE_MAX; i++) { | ||
563 | task_bp_pinned = &per_cpu(nr_task_bp_pinned[i], cpu); | ||
564 | *task_bp_pinned = kzalloc(sizeof(int) * nr_slots[i], | ||
565 | GFP_KERNEL); | ||
566 | if (!*task_bp_pinned) | ||
567 | goto err_alloc; | ||
568 | } | ||
569 | } | ||
570 | |||
571 | constraints_initialized = 1; | ||
572 | |||
483 | return register_die_notifier(&hw_breakpoint_exceptions_nb); | 573 | return register_die_notifier(&hw_breakpoint_exceptions_nb); |
574 | |||
575 | err_alloc: | ||
576 | for_each_possible_cpu(err_cpu) { | ||
577 | if (err_cpu == cpu) | ||
578 | break; | ||
579 | for (i = 0; i < TYPE_MAX; i++) | ||
580 | kfree(per_cpu(nr_task_bp_pinned[i], cpu)); | ||
581 | } | ||
582 | |||
583 | return -ENOMEM; | ||
484 | } | 584 | } |
485 | core_initcall(init_hw_breakpoint); | 585 | core_initcall(init_hw_breakpoint); |
486 | 586 | ||