diff options
Diffstat (limited to 'kernel/hw_breakpoint.c')
| -rw-r--r-- | kernel/hw_breakpoint.c | 196 |
1 files changed, 148 insertions, 48 deletions
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index 03808ed342a6..7a56b22e0602 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c | |||
| @@ -40,23 +40,29 @@ | |||
| 40 | #include <linux/percpu.h> | 40 | #include <linux/percpu.h> |
| 41 | #include <linux/sched.h> | 41 | #include <linux/sched.h> |
| 42 | #include <linux/init.h> | 42 | #include <linux/init.h> |
| 43 | #include <linux/slab.h> | ||
| 43 | #include <linux/cpu.h> | 44 | #include <linux/cpu.h> |
| 44 | #include <linux/smp.h> | 45 | #include <linux/smp.h> |
| 45 | 46 | ||
| 46 | #include <linux/hw_breakpoint.h> | 47 | #include <linux/hw_breakpoint.h> |
| 47 | 48 | ||
| 49 | |||
| 48 | /* | 50 | /* |
| 49 | * Constraints data | 51 | * Constraints data |
| 50 | */ | 52 | */ |
| 51 | 53 | ||
| 52 | /* Number of pinned cpu breakpoints in a cpu */ | 54 | /* Number of pinned cpu breakpoints in a cpu */ |
| 53 | static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned); | 55 | static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned[TYPE_MAX]); |
| 54 | 56 | ||
| 55 | /* Number of pinned task breakpoints in a cpu */ | 57 | /* Number of pinned task breakpoints in a cpu */ |
| 56 | static DEFINE_PER_CPU(unsigned int, nr_task_bp_pinned[HBP_NUM]); | 58 | static DEFINE_PER_CPU(unsigned int *, nr_task_bp_pinned[TYPE_MAX]); |
| 57 | 59 | ||
| 58 | /* Number of non-pinned cpu/task breakpoints in a cpu */ | 60 | /* Number of non-pinned cpu/task breakpoints in a cpu */ |
| 59 | static DEFINE_PER_CPU(unsigned int, nr_bp_flexible); | 61 | static DEFINE_PER_CPU(unsigned int, nr_bp_flexible[TYPE_MAX]); |
| 62 | |||
| 63 | static int nr_slots[TYPE_MAX]; | ||
| 64 | |||
| 65 | static int constraints_initialized; | ||
| 60 | 66 | ||
| 61 | /* Gather the number of total pinned and un-pinned bp in a cpuset */ | 67 | /* Gather the number of total pinned and un-pinned bp in a cpuset */ |
| 62 | struct bp_busy_slots { | 68 | struct bp_busy_slots { |
| @@ -67,16 +73,29 @@ struct bp_busy_slots { | |||
| 67 | /* Serialize accesses to the above constraints */ | 73 | /* Serialize accesses to the above constraints */ |
| 68 | static DEFINE_MUTEX(nr_bp_mutex); | 74 | static DEFINE_MUTEX(nr_bp_mutex); |
| 69 | 75 | ||
| 76 | __weak int hw_breakpoint_weight(struct perf_event *bp) | ||
| 77 | { | ||
| 78 | return 1; | ||
| 79 | } | ||
| 80 | |||
| 81 | static inline enum bp_type_idx find_slot_idx(struct perf_event *bp) | ||
| 82 | { | ||
| 83 | if (bp->attr.bp_type & HW_BREAKPOINT_RW) | ||
| 84 | return TYPE_DATA; | ||
| 85 | |||
| 86 | return TYPE_INST; | ||
| 87 | } | ||
| 88 | |||
| 70 | /* | 89 | /* |
| 71 | * Report the maximum number of pinned breakpoints a task | 90 | * Report the maximum number of pinned breakpoints a task |
| 72 | * have in this cpu | 91 | * have in this cpu |
| 73 | */ | 92 | */ |
| 74 | static unsigned int max_task_bp_pinned(int cpu) | 93 | static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type) |
| 75 | { | 94 | { |
| 76 | int i; | 95 | int i; |
| 77 | unsigned int *tsk_pinned = per_cpu(nr_task_bp_pinned, cpu); | 96 | unsigned int *tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu); |
| 78 | 97 | ||
| 79 | for (i = HBP_NUM -1; i >= 0; i--) { | 98 | for (i = nr_slots[type] - 1; i >= 0; i--) { |
| 80 | if (tsk_pinned[i] > 0) | 99 | if (tsk_pinned[i] > 0) |
| 81 | return i + 1; | 100 | return i + 1; |
| 82 | } | 101 | } |
| @@ -84,7 +103,7 @@ static unsigned int max_task_bp_pinned(int cpu) | |||
| 84 | return 0; | 103 | return 0; |
| 85 | } | 104 | } |
| 86 | 105 | ||
| 87 | static int task_bp_pinned(struct task_struct *tsk) | 106 | static int task_bp_pinned(struct task_struct *tsk, enum bp_type_idx type) |
| 88 | { | 107 | { |
| 89 | struct perf_event_context *ctx = tsk->perf_event_ctxp; | 108 | struct perf_event_context *ctx = tsk->perf_event_ctxp; |
| 90 | struct list_head *list; | 109 | struct list_head *list; |
| @@ -105,7 +124,8 @@ static int task_bp_pinned(struct task_struct *tsk) | |||
| 105 | */ | 124 | */ |
| 106 | list_for_each_entry(bp, list, event_entry) { | 125 | list_for_each_entry(bp, list, event_entry) { |
| 107 | if (bp->attr.type == PERF_TYPE_BREAKPOINT) | 126 | if (bp->attr.type == PERF_TYPE_BREAKPOINT) |
| 108 | count++; | 127 | if (find_slot_idx(bp) == type) |
| 128 | count += hw_breakpoint_weight(bp); | ||
| 109 | } | 129 | } |
| 110 | 130 | ||
| 111 | raw_spin_unlock_irqrestore(&ctx->lock, flags); | 131 | raw_spin_unlock_irqrestore(&ctx->lock, flags); |
| @@ -118,18 +138,19 @@ static int task_bp_pinned(struct task_struct *tsk) | |||
| 118 | * a given cpu (cpu > -1) or in all of them (cpu = -1). | 138 | * a given cpu (cpu > -1) or in all of them (cpu = -1). |
| 119 | */ | 139 | */ |
| 120 | static void | 140 | static void |
| 121 | fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp) | 141 | fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp, |
| 142 | enum bp_type_idx type) | ||
| 122 | { | 143 | { |
| 123 | int cpu = bp->cpu; | 144 | int cpu = bp->cpu; |
| 124 | struct task_struct *tsk = bp->ctx->task; | 145 | struct task_struct *tsk = bp->ctx->task; |
| 125 | 146 | ||
| 126 | if (cpu >= 0) { | 147 | if (cpu >= 0) { |
| 127 | slots->pinned = per_cpu(nr_cpu_bp_pinned, cpu); | 148 | slots->pinned = per_cpu(nr_cpu_bp_pinned[type], cpu); |
| 128 | if (!tsk) | 149 | if (!tsk) |
| 129 | slots->pinned += max_task_bp_pinned(cpu); | 150 | slots->pinned += max_task_bp_pinned(cpu, type); |
| 130 | else | 151 | else |
| 131 | slots->pinned += task_bp_pinned(tsk); | 152 | slots->pinned += task_bp_pinned(tsk, type); |
| 132 | slots->flexible = per_cpu(nr_bp_flexible, cpu); | 153 | slots->flexible = per_cpu(nr_bp_flexible[type], cpu); |
| 133 | 154 | ||
| 134 | return; | 155 | return; |
| 135 | } | 156 | } |
| @@ -137,16 +158,16 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp) | |||
| 137 | for_each_online_cpu(cpu) { | 158 | for_each_online_cpu(cpu) { |
| 138 | unsigned int nr; | 159 | unsigned int nr; |
| 139 | 160 | ||
| 140 | nr = per_cpu(nr_cpu_bp_pinned, cpu); | 161 | nr = per_cpu(nr_cpu_bp_pinned[type], cpu); |
| 141 | if (!tsk) | 162 | if (!tsk) |
| 142 | nr += max_task_bp_pinned(cpu); | 163 | nr += max_task_bp_pinned(cpu, type); |
| 143 | else | 164 | else |
| 144 | nr += task_bp_pinned(tsk); | 165 | nr += task_bp_pinned(tsk, type); |
| 145 | 166 | ||
| 146 | if (nr > slots->pinned) | 167 | if (nr > slots->pinned) |
| 147 | slots->pinned = nr; | 168 | slots->pinned = nr; |
| 148 | 169 | ||
| 149 | nr = per_cpu(nr_bp_flexible, cpu); | 170 | nr = per_cpu(nr_bp_flexible[type], cpu); |
| 150 | 171 | ||
| 151 | if (nr > slots->flexible) | 172 | if (nr > slots->flexible) |
| 152 | slots->flexible = nr; | 173 | slots->flexible = nr; |
| @@ -154,31 +175,49 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp) | |||
| 154 | } | 175 | } |
| 155 | 176 | ||
| 156 | /* | 177 | /* |
| 178 | * For now, continue to consider flexible as pinned, until we can | ||
| 179 | * ensure no flexible event can ever be scheduled before a pinned event | ||
| 180 | * in a same cpu. | ||
| 181 | */ | ||
| 182 | static void | ||
| 183 | fetch_this_slot(struct bp_busy_slots *slots, int weight) | ||
| 184 | { | ||
| 185 | slots->pinned += weight; | ||
| 186 | } | ||
| 187 | |||
| 188 | /* | ||
| 157 | * Add a pinned breakpoint for the given task in our constraint table | 189 | * Add a pinned breakpoint for the given task in our constraint table |
| 158 | */ | 190 | */ |
| 159 | static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable) | 191 | static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable, |
| 192 | enum bp_type_idx type, int weight) | ||
| 160 | { | 193 | { |
| 161 | unsigned int *tsk_pinned; | 194 | unsigned int *tsk_pinned; |
| 162 | int count = 0; | 195 | int old_count = 0; |
| 196 | int old_idx = 0; | ||
| 197 | int idx = 0; | ||
| 163 | 198 | ||
| 164 | count = task_bp_pinned(tsk); | 199 | old_count = task_bp_pinned(tsk, type); |
| 200 | old_idx = old_count - 1; | ||
| 201 | idx = old_idx + weight; | ||
| 165 | 202 | ||
| 166 | tsk_pinned = per_cpu(nr_task_bp_pinned, cpu); | 203 | tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu); |
| 167 | if (enable) { | 204 | if (enable) { |
| 168 | tsk_pinned[count]++; | 205 | tsk_pinned[idx]++; |
| 169 | if (count > 0) | 206 | if (old_count > 0) |
| 170 | tsk_pinned[count-1]--; | 207 | tsk_pinned[old_idx]--; |
| 171 | } else { | 208 | } else { |
| 172 | tsk_pinned[count]--; | 209 | tsk_pinned[idx]--; |
| 173 | if (count > 0) | 210 | if (old_count > 0) |
| 174 | tsk_pinned[count-1]++; | 211 | tsk_pinned[old_idx]++; |
| 175 | } | 212 | } |
| 176 | } | 213 | } |
| 177 | 214 | ||
| 178 | /* | 215 | /* |
| 179 | * Add/remove the given breakpoint in our constraint table | 216 | * Add/remove the given breakpoint in our constraint table |
| 180 | */ | 217 | */ |
| 181 | static void toggle_bp_slot(struct perf_event *bp, bool enable) | 218 | static void |
| 219 | toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type, | ||
| 220 | int weight) | ||
| 182 | { | 221 | { |
| 183 | int cpu = bp->cpu; | 222 | int cpu = bp->cpu; |
| 184 | struct task_struct *tsk = bp->ctx->task; | 223 | struct task_struct *tsk = bp->ctx->task; |
| @@ -186,20 +225,20 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable) | |||
| 186 | /* Pinned counter task profiling */ | 225 | /* Pinned counter task profiling */ |
| 187 | if (tsk) { | 226 | if (tsk) { |
| 188 | if (cpu >= 0) { | 227 | if (cpu >= 0) { |
| 189 | toggle_bp_task_slot(tsk, cpu, enable); | 228 | toggle_bp_task_slot(tsk, cpu, enable, type, weight); |
| 190 | return; | 229 | return; |
| 191 | } | 230 | } |
| 192 | 231 | ||
| 193 | for_each_online_cpu(cpu) | 232 | for_each_online_cpu(cpu) |
| 194 | toggle_bp_task_slot(tsk, cpu, enable); | 233 | toggle_bp_task_slot(tsk, cpu, enable, type, weight); |
| 195 | return; | 234 | return; |
| 196 | } | 235 | } |
| 197 | 236 | ||
| 198 | /* Pinned counter cpu profiling */ | 237 | /* Pinned counter cpu profiling */ |
| 199 | if (enable) | 238 | if (enable) |
| 200 | per_cpu(nr_cpu_bp_pinned, bp->cpu)++; | 239 | per_cpu(nr_cpu_bp_pinned[type], bp->cpu) += weight; |
| 201 | else | 240 | else |
| 202 | per_cpu(nr_cpu_bp_pinned, bp->cpu)--; | 241 | per_cpu(nr_cpu_bp_pinned[type], bp->cpu) -= weight; |
| 203 | } | 242 | } |
| 204 | 243 | ||
| 205 | /* | 244 | /* |
| @@ -246,14 +285,29 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable) | |||
| 246 | static int __reserve_bp_slot(struct perf_event *bp) | 285 | static int __reserve_bp_slot(struct perf_event *bp) |
| 247 | { | 286 | { |
| 248 | struct bp_busy_slots slots = {0}; | 287 | struct bp_busy_slots slots = {0}; |
| 288 | enum bp_type_idx type; | ||
| 289 | int weight; | ||
| 249 | 290 | ||
| 250 | fetch_bp_busy_slots(&slots, bp); | 291 | /* We couldn't initialize breakpoint constraints on boot */ |
| 292 | if (!constraints_initialized) | ||
| 293 | return -ENOMEM; | ||
| 294 | |||
| 295 | /* Basic checks */ | ||
| 296 | if (bp->attr.bp_type == HW_BREAKPOINT_EMPTY || | ||
| 297 | bp->attr.bp_type == HW_BREAKPOINT_INVALID) | ||
| 298 | return -EINVAL; | ||
| 299 | |||
| 300 | type = find_slot_idx(bp); | ||
| 301 | weight = hw_breakpoint_weight(bp); | ||
| 302 | |||
| 303 | fetch_bp_busy_slots(&slots, bp, type); | ||
| 304 | fetch_this_slot(&slots, weight); | ||
| 251 | 305 | ||
| 252 | /* Flexible counters need to keep at least one slot */ | 306 | /* Flexible counters need to keep at least one slot */ |
| 253 | if (slots.pinned + (!!slots.flexible) == HBP_NUM) | 307 | if (slots.pinned + (!!slots.flexible) > nr_slots[type]) |
| 254 | return -ENOSPC; | 308 | return -ENOSPC; |
| 255 | 309 | ||
| 256 | toggle_bp_slot(bp, true); | 310 | toggle_bp_slot(bp, true, type, weight); |
| 257 | 311 | ||
| 258 | return 0; | 312 | return 0; |
| 259 | } | 313 | } |
| @@ -273,7 +327,12 @@ int reserve_bp_slot(struct perf_event *bp) | |||
| 273 | 327 | ||
| 274 | static void __release_bp_slot(struct perf_event *bp) | 328 | static void __release_bp_slot(struct perf_event *bp) |
| 275 | { | 329 | { |
| 276 | toggle_bp_slot(bp, false); | 330 | enum bp_type_idx type; |
| 331 | int weight; | ||
| 332 | |||
| 333 | type = find_slot_idx(bp); | ||
| 334 | weight = hw_breakpoint_weight(bp); | ||
| 335 | toggle_bp_slot(bp, false, type, weight); | ||
| 277 | } | 336 | } |
| 278 | 337 | ||
| 279 | void release_bp_slot(struct perf_event *bp) | 338 | void release_bp_slot(struct perf_event *bp) |
| @@ -308,6 +367,28 @@ int dbg_release_bp_slot(struct perf_event *bp) | |||
| 308 | return 0; | 367 | return 0; |
| 309 | } | 368 | } |
| 310 | 369 | ||
| 370 | static int validate_hw_breakpoint(struct perf_event *bp) | ||
| 371 | { | ||
| 372 | int ret; | ||
| 373 | |||
| 374 | ret = arch_validate_hwbkpt_settings(bp); | ||
| 375 | if (ret) | ||
| 376 | return ret; | ||
| 377 | |||
| 378 | if (arch_check_bp_in_kernelspace(bp)) { | ||
| 379 | if (bp->attr.exclude_kernel) | ||
| 380 | return -EINVAL; | ||
| 381 | /* | ||
| 382 | * Don't let unprivileged users set a breakpoint in the trap | ||
| 383 | * path to avoid trap recursion attacks. | ||
| 384 | */ | ||
| 385 | if (!capable(CAP_SYS_ADMIN)) | ||
| 386 | return -EPERM; | ||
| 387 | } | ||
| 388 | |||
| 389 | return 0; | ||
| 390 | } | ||
| 391 | |||
| 311 | int register_perf_hw_breakpoint(struct perf_event *bp) | 392 | int register_perf_hw_breakpoint(struct perf_event *bp) |
| 312 | { | 393 | { |
| 313 | int ret; | 394 | int ret; |
| @@ -316,17 +397,7 @@ int register_perf_hw_breakpoint(struct perf_event *bp) | |||
| 316 | if (ret) | 397 | if (ret) |
| 317 | return ret; | 398 | return ret; |
| 318 | 399 | ||
| 319 | /* | 400 | ret = validate_hw_breakpoint(bp); |
| 320 | * Ptrace breakpoints can be temporary perf events only | ||
| 321 | * meant to reserve a slot. In this case, it is created disabled and | ||
| 322 | * we don't want to check the params right now (as we put a null addr) | ||
| 323 | * But perf tools create events as disabled and we want to check | ||
| 324 | * the params for them. | ||
| 325 | * This is a quick hack that will be removed soon, once we remove | ||
| 326 | * the tmp breakpoints from ptrace | ||
| 327 | */ | ||
| 328 | if (!bp->attr.disabled || !bp->overflow_handler) | ||
| 329 | ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task); | ||
| 330 | 401 | ||
| 331 | /* if arch_validate_hwbkpt_settings() fails then release bp slot */ | 402 | /* if arch_validate_hwbkpt_settings() fails then release bp slot */ |
| 332 | if (ret) | 403 | if (ret) |
| @@ -373,7 +444,7 @@ int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *att | |||
| 373 | if (attr->disabled) | 444 | if (attr->disabled) |
| 374 | goto end; | 445 | goto end; |
| 375 | 446 | ||
| 376 | err = arch_validate_hwbkpt_settings(bp, bp->ctx->task); | 447 | err = validate_hw_breakpoint(bp); |
| 377 | if (!err) | 448 | if (!err) |
| 378 | perf_event_enable(bp); | 449 | perf_event_enable(bp); |
| 379 | 450 | ||
| @@ -480,7 +551,36 @@ static struct notifier_block hw_breakpoint_exceptions_nb = { | |||
| 480 | 551 | ||
| 481 | static int __init init_hw_breakpoint(void) | 552 | static int __init init_hw_breakpoint(void) |
| 482 | { | 553 | { |
| 554 | unsigned int **task_bp_pinned; | ||
| 555 | int cpu, err_cpu; | ||
| 556 | int i; | ||
| 557 | |||
| 558 | for (i = 0; i < TYPE_MAX; i++) | ||
| 559 | nr_slots[i] = hw_breakpoint_slots(i); | ||
| 560 | |||
| 561 | for_each_possible_cpu(cpu) { | ||
| 562 | for (i = 0; i < TYPE_MAX; i++) { | ||
| 563 | task_bp_pinned = &per_cpu(nr_task_bp_pinned[i], cpu); | ||
| 564 | *task_bp_pinned = kzalloc(sizeof(int) * nr_slots[i], | ||
| 565 | GFP_KERNEL); | ||
| 566 | if (!*task_bp_pinned) | ||
| 567 | goto err_alloc; | ||
| 568 | } | ||
| 569 | } | ||
| 570 | |||
| 571 | constraints_initialized = 1; | ||
| 572 | |||
| 483 | return register_die_notifier(&hw_breakpoint_exceptions_nb); | 573 | return register_die_notifier(&hw_breakpoint_exceptions_nb); |
| 574 | |||
| 575 | err_alloc: | ||
| 576 | for_each_possible_cpu(err_cpu) { | ||
| 577 | if (err_cpu == cpu) | ||
| 578 | break; | ||
| 579 | for (i = 0; i < TYPE_MAX; i++) | ||
| 580 | kfree(per_cpu(nr_task_bp_pinned[i], cpu)); | ||
| 581 | } | ||
| 582 | |||
| 583 | return -ENOMEM; | ||
| 484 | } | 584 | } |
| 485 | core_initcall(init_hw_breakpoint); | 585 | core_initcall(init_hw_breakpoint); |
| 486 | 586 | ||
