diff options
Diffstat (limited to 'kernel/perf_counter.c')
| -rw-r--r-- | kernel/perf_counter.c | 173 |
1 files changed, 137 insertions, 36 deletions
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index d7cbc579fc80..e0d91fdf0c3c 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c | |||
| @@ -46,12 +46,18 @@ static atomic_t nr_task_counters __read_mostly; | |||
| 46 | 46 | ||
| 47 | /* | 47 | /* |
| 48 | * perf counter paranoia level: | 48 | * perf counter paranoia level: |
| 49 | * 0 - not paranoid | 49 | * -1 - not paranoid at all |
| 50 | * 1 - disallow cpu counters to unpriv | 50 | * 0 - disallow raw tracepoint access for unpriv |
| 51 | * 2 - disallow kernel profiling to unpriv | 51 | * 1 - disallow cpu counters for unpriv |
| 52 | * 2 - disallow kernel profiling for unpriv | ||
| 52 | */ | 53 | */ |
| 53 | int sysctl_perf_counter_paranoid __read_mostly = 1; | 54 | int sysctl_perf_counter_paranoid __read_mostly = 1; |
| 54 | 55 | ||
| 56 | static inline bool perf_paranoid_tracepoint_raw(void) | ||
| 57 | { | ||
| 58 | return sysctl_perf_counter_paranoid > -1; | ||
| 59 | } | ||
| 60 | |||
| 55 | static inline bool perf_paranoid_cpu(void) | 61 | static inline bool perf_paranoid_cpu(void) |
| 56 | { | 62 | { |
| 57 | return sysctl_perf_counter_paranoid > 0; | 63 | return sysctl_perf_counter_paranoid > 0; |
| @@ -469,7 +475,8 @@ static void update_counter_times(struct perf_counter *counter) | |||
| 469 | struct perf_counter_context *ctx = counter->ctx; | 475 | struct perf_counter_context *ctx = counter->ctx; |
| 470 | u64 run_end; | 476 | u64 run_end; |
| 471 | 477 | ||
| 472 | if (counter->state < PERF_COUNTER_STATE_INACTIVE) | 478 | if (counter->state < PERF_COUNTER_STATE_INACTIVE || |
| 479 | counter->group_leader->state < PERF_COUNTER_STATE_INACTIVE) | ||
| 473 | return; | 480 | return; |
| 474 | 481 | ||
| 475 | counter->total_time_enabled = ctx->time - counter->tstamp_enabled; | 482 | counter->total_time_enabled = ctx->time - counter->tstamp_enabled; |
| @@ -518,7 +525,7 @@ static void __perf_counter_disable(void *info) | |||
| 518 | */ | 525 | */ |
| 519 | if (counter->state >= PERF_COUNTER_STATE_INACTIVE) { | 526 | if (counter->state >= PERF_COUNTER_STATE_INACTIVE) { |
| 520 | update_context_time(ctx); | 527 | update_context_time(ctx); |
| 521 | update_counter_times(counter); | 528 | update_group_times(counter); |
| 522 | if (counter == counter->group_leader) | 529 | if (counter == counter->group_leader) |
| 523 | group_sched_out(counter, cpuctx, ctx); | 530 | group_sched_out(counter, cpuctx, ctx); |
| 524 | else | 531 | else |
| @@ -573,7 +580,7 @@ static void perf_counter_disable(struct perf_counter *counter) | |||
| 573 | * in, so we can change the state safely. | 580 | * in, so we can change the state safely. |
| 574 | */ | 581 | */ |
| 575 | if (counter->state == PERF_COUNTER_STATE_INACTIVE) { | 582 | if (counter->state == PERF_COUNTER_STATE_INACTIVE) { |
| 576 | update_counter_times(counter); | 583 | update_group_times(counter); |
| 577 | counter->state = PERF_COUNTER_STATE_OFF; | 584 | counter->state = PERF_COUNTER_STATE_OFF; |
| 578 | } | 585 | } |
| 579 | 586 | ||
| @@ -851,6 +858,27 @@ retry: | |||
| 851 | } | 858 | } |
| 852 | 859 | ||
| 853 | /* | 860 | /* |
| 861 | * Put a counter into inactive state and update time fields. | ||
| 862 | * Enabling the leader of a group effectively enables all | ||
| 863 | * the group members that aren't explicitly disabled, so we | ||
| 864 | * have to update their ->tstamp_enabled also. | ||
| 865 | * Note: this works for group members as well as group leaders | ||
| 866 | * since the non-leader members' sibling_lists will be empty. | ||
| 867 | */ | ||
| 868 | static void __perf_counter_mark_enabled(struct perf_counter *counter, | ||
| 869 | struct perf_counter_context *ctx) | ||
| 870 | { | ||
| 871 | struct perf_counter *sub; | ||
| 872 | |||
| 873 | counter->state = PERF_COUNTER_STATE_INACTIVE; | ||
| 874 | counter->tstamp_enabled = ctx->time - counter->total_time_enabled; | ||
| 875 | list_for_each_entry(sub, &counter->sibling_list, list_entry) | ||
| 876 | if (sub->state >= PERF_COUNTER_STATE_INACTIVE) | ||
| 877 | sub->tstamp_enabled = | ||
| 878 | ctx->time - sub->total_time_enabled; | ||
| 879 | } | ||
| 880 | |||
| 881 | /* | ||
| 854 | * Cross CPU call to enable a performance counter | 882 | * Cross CPU call to enable a performance counter |
| 855 | */ | 883 | */ |
| 856 | static void __perf_counter_enable(void *info) | 884 | static void __perf_counter_enable(void *info) |
| @@ -877,8 +905,7 @@ static void __perf_counter_enable(void *info) | |||
| 877 | 905 | ||
| 878 | if (counter->state >= PERF_COUNTER_STATE_INACTIVE) | 906 | if (counter->state >= PERF_COUNTER_STATE_INACTIVE) |
| 879 | goto unlock; | 907 | goto unlock; |
| 880 | counter->state = PERF_COUNTER_STATE_INACTIVE; | 908 | __perf_counter_mark_enabled(counter, ctx); |
| 881 | counter->tstamp_enabled = ctx->time - counter->total_time_enabled; | ||
| 882 | 909 | ||
| 883 | /* | 910 | /* |
| 884 | * If the counter is in a group and isn't the group leader, | 911 | * If the counter is in a group and isn't the group leader, |
| @@ -971,11 +998,9 @@ static void perf_counter_enable(struct perf_counter *counter) | |||
| 971 | * Since we have the lock this context can't be scheduled | 998 | * Since we have the lock this context can't be scheduled |
| 972 | * in, so we can change the state safely. | 999 | * in, so we can change the state safely. |
| 973 | */ | 1000 | */ |
| 974 | if (counter->state == PERF_COUNTER_STATE_OFF) { | 1001 | if (counter->state == PERF_COUNTER_STATE_OFF) |
| 975 | counter->state = PERF_COUNTER_STATE_INACTIVE; | 1002 | __perf_counter_mark_enabled(counter, ctx); |
| 976 | counter->tstamp_enabled = | 1003 | |
| 977 | ctx->time - counter->total_time_enabled; | ||
| 978 | } | ||
| 979 | out: | 1004 | out: |
| 980 | spin_unlock_irq(&ctx->lock); | 1005 | spin_unlock_irq(&ctx->lock); |
| 981 | } | 1006 | } |
| @@ -1479,9 +1504,7 @@ static void perf_counter_enable_on_exec(struct task_struct *task) | |||
| 1479 | counter->attr.enable_on_exec = 0; | 1504 | counter->attr.enable_on_exec = 0; |
| 1480 | if (counter->state >= PERF_COUNTER_STATE_INACTIVE) | 1505 | if (counter->state >= PERF_COUNTER_STATE_INACTIVE) |
| 1481 | continue; | 1506 | continue; |
| 1482 | counter->state = PERF_COUNTER_STATE_INACTIVE; | 1507 | __perf_counter_mark_enabled(counter, ctx); |
| 1483 | counter->tstamp_enabled = | ||
| 1484 | ctx->time - counter->total_time_enabled; | ||
| 1485 | enabled = 1; | 1508 | enabled = 1; |
| 1486 | } | 1509 | } |
| 1487 | 1510 | ||
| @@ -1675,6 +1698,11 @@ static void free_counter(struct perf_counter *counter) | |||
| 1675 | atomic_dec(&nr_task_counters); | 1698 | atomic_dec(&nr_task_counters); |
| 1676 | } | 1699 | } |
| 1677 | 1700 | ||
| 1701 | if (counter->output) { | ||
| 1702 | fput(counter->output->filp); | ||
| 1703 | counter->output = NULL; | ||
| 1704 | } | ||
| 1705 | |||
| 1678 | if (counter->destroy) | 1706 | if (counter->destroy) |
| 1679 | counter->destroy(counter); | 1707 | counter->destroy(counter); |
| 1680 | 1708 | ||
| @@ -1960,6 +1988,8 @@ unlock: | |||
| 1960 | return ret; | 1988 | return ret; |
| 1961 | } | 1989 | } |
| 1962 | 1990 | ||
| 1991 | int perf_counter_set_output(struct perf_counter *counter, int output_fd); | ||
| 1992 | |||
| 1963 | static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | 1993 | static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) |
| 1964 | { | 1994 | { |
| 1965 | struct perf_counter *counter = file->private_data; | 1995 | struct perf_counter *counter = file->private_data; |
| @@ -1983,6 +2013,9 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |||
| 1983 | case PERF_COUNTER_IOC_PERIOD: | 2013 | case PERF_COUNTER_IOC_PERIOD: |
| 1984 | return perf_counter_period(counter, (u64 __user *)arg); | 2014 | return perf_counter_period(counter, (u64 __user *)arg); |
| 1985 | 2015 | ||
| 2016 | case PERF_COUNTER_IOC_SET_OUTPUT: | ||
| 2017 | return perf_counter_set_output(counter, arg); | ||
| 2018 | |||
| 1986 | default: | 2019 | default: |
| 1987 | return -ENOTTY; | 2020 | return -ENOTTY; |
| 1988 | } | 2021 | } |
| @@ -2253,6 +2286,11 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) | |||
| 2253 | 2286 | ||
| 2254 | WARN_ON_ONCE(counter->ctx->parent_ctx); | 2287 | WARN_ON_ONCE(counter->ctx->parent_ctx); |
| 2255 | mutex_lock(&counter->mmap_mutex); | 2288 | mutex_lock(&counter->mmap_mutex); |
| 2289 | if (counter->output) { | ||
| 2290 | ret = -EINVAL; | ||
| 2291 | goto unlock; | ||
| 2292 | } | ||
| 2293 | |||
| 2256 | if (atomic_inc_not_zero(&counter->mmap_count)) { | 2294 | if (atomic_inc_not_zero(&counter->mmap_count)) { |
| 2257 | if (nr_pages != counter->data->nr_pages) | 2295 | if (nr_pages != counter->data->nr_pages) |
| 2258 | ret = -EINVAL; | 2296 | ret = -EINVAL; |
| @@ -2638,6 +2676,7 @@ static int perf_output_begin(struct perf_output_handle *handle, | |||
| 2638 | struct perf_counter *counter, unsigned int size, | 2676 | struct perf_counter *counter, unsigned int size, |
| 2639 | int nmi, int sample) | 2677 | int nmi, int sample) |
| 2640 | { | 2678 | { |
| 2679 | struct perf_counter *output_counter; | ||
| 2641 | struct perf_mmap_data *data; | 2680 | struct perf_mmap_data *data; |
| 2642 | unsigned int offset, head; | 2681 | unsigned int offset, head; |
| 2643 | int have_lost; | 2682 | int have_lost; |
| @@ -2647,13 +2686,17 @@ static int perf_output_begin(struct perf_output_handle *handle, | |||
| 2647 | u64 lost; | 2686 | u64 lost; |
| 2648 | } lost_event; | 2687 | } lost_event; |
| 2649 | 2688 | ||
| 2689 | rcu_read_lock(); | ||
| 2650 | /* | 2690 | /* |
| 2651 | * For inherited counters we send all the output towards the parent. | 2691 | * For inherited counters we send all the output towards the parent. |
| 2652 | */ | 2692 | */ |
| 2653 | if (counter->parent) | 2693 | if (counter->parent) |
| 2654 | counter = counter->parent; | 2694 | counter = counter->parent; |
| 2655 | 2695 | ||
| 2656 | rcu_read_lock(); | 2696 | output_counter = rcu_dereference(counter->output); |
| 2697 | if (output_counter) | ||
| 2698 | counter = output_counter; | ||
| 2699 | |||
| 2657 | data = rcu_dereference(counter->data); | 2700 | data = rcu_dereference(counter->data); |
| 2658 | if (!data) | 2701 | if (!data) |
| 2659 | goto out; | 2702 | goto out; |
| @@ -3934,6 +3977,7 @@ static const struct pmu *tp_perf_counter_init(struct perf_counter *counter) | |||
| 3934 | * have these. | 3977 | * have these. |
| 3935 | */ | 3978 | */ |
| 3936 | if ((counter->attr.sample_type & PERF_SAMPLE_RAW) && | 3979 | if ((counter->attr.sample_type & PERF_SAMPLE_RAW) && |
| 3980 | perf_paranoid_tracepoint_raw() && | ||
| 3937 | !capable(CAP_SYS_ADMIN)) | 3981 | !capable(CAP_SYS_ADMIN)) |
| 3938 | return ERR_PTR(-EPERM); | 3982 | return ERR_PTR(-EPERM); |
| 3939 | 3983 | ||
| @@ -4202,6 +4246,57 @@ err_size: | |||
| 4202 | goto out; | 4246 | goto out; |
| 4203 | } | 4247 | } |
| 4204 | 4248 | ||
| 4249 | int perf_counter_set_output(struct perf_counter *counter, int output_fd) | ||
| 4250 | { | ||
| 4251 | struct perf_counter *output_counter = NULL; | ||
| 4252 | struct file *output_file = NULL; | ||
| 4253 | struct perf_counter *old_output; | ||
| 4254 | int fput_needed = 0; | ||
| 4255 | int ret = -EINVAL; | ||
| 4256 | |||
| 4257 | if (!output_fd) | ||
| 4258 | goto set; | ||
| 4259 | |||
| 4260 | output_file = fget_light(output_fd, &fput_needed); | ||
| 4261 | if (!output_file) | ||
| 4262 | return -EBADF; | ||
| 4263 | |||
| 4264 | if (output_file->f_op != &perf_fops) | ||
| 4265 | goto out; | ||
| 4266 | |||
| 4267 | output_counter = output_file->private_data; | ||
| 4268 | |||
| 4269 | /* Don't chain output fds */ | ||
| 4270 | if (output_counter->output) | ||
| 4271 | goto out; | ||
| 4272 | |||
| 4273 | /* Don't set an output fd when we already have an output channel */ | ||
| 4274 | if (counter->data) | ||
| 4275 | goto out; | ||
| 4276 | |||
| 4277 | atomic_long_inc(&output_file->f_count); | ||
| 4278 | |||
| 4279 | set: | ||
| 4280 | mutex_lock(&counter->mmap_mutex); | ||
| 4281 | old_output = counter->output; | ||
| 4282 | rcu_assign_pointer(counter->output, output_counter); | ||
| 4283 | mutex_unlock(&counter->mmap_mutex); | ||
| 4284 | |||
| 4285 | if (old_output) { | ||
| 4286 | /* | ||
| 4287 | * we need to make sure no existing perf_output_*() | ||
| 4288 | * is still referencing this counter. | ||
| 4289 | */ | ||
| 4290 | synchronize_rcu(); | ||
| 4291 | fput(old_output->filp); | ||
| 4292 | } | ||
| 4293 | |||
| 4294 | ret = 0; | ||
| 4295 | out: | ||
| 4296 | fput_light(output_file, fput_needed); | ||
| 4297 | return ret; | ||
| 4298 | } | ||
| 4299 | |||
| 4205 | /** | 4300 | /** |
| 4206 | * sys_perf_counter_open - open a performance counter, associate it to a task/cpu | 4301 | * sys_perf_counter_open - open a performance counter, associate it to a task/cpu |
| 4207 | * | 4302 | * |
| @@ -4221,15 +4316,15 @@ SYSCALL_DEFINE5(perf_counter_open, | |||
| 4221 | struct file *group_file = NULL; | 4316 | struct file *group_file = NULL; |
| 4222 | int fput_needed = 0; | 4317 | int fput_needed = 0; |
| 4223 | int fput_needed2 = 0; | 4318 | int fput_needed2 = 0; |
| 4224 | int ret; | 4319 | int err; |
| 4225 | 4320 | ||
| 4226 | /* for future expandability... */ | 4321 | /* for future expandability... */ |
| 4227 | if (flags) | 4322 | if (flags & ~(PERF_FLAG_FD_NO_GROUP | PERF_FLAG_FD_OUTPUT)) |
| 4228 | return -EINVAL; | 4323 | return -EINVAL; |
| 4229 | 4324 | ||
| 4230 | ret = perf_copy_attr(attr_uptr, &attr); | 4325 | err = perf_copy_attr(attr_uptr, &attr); |
| 4231 | if (ret) | 4326 | if (err) |
| 4232 | return ret; | 4327 | return err; |
| 4233 | 4328 | ||
| 4234 | if (!attr.exclude_kernel) { | 4329 | if (!attr.exclude_kernel) { |
| 4235 | if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) | 4330 | if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) |
| @@ -4252,8 +4347,8 @@ SYSCALL_DEFINE5(perf_counter_open, | |||
| 4252 | * Look up the group leader (we will attach this counter to it): | 4347 | * Look up the group leader (we will attach this counter to it): |
| 4253 | */ | 4348 | */ |
| 4254 | group_leader = NULL; | 4349 | group_leader = NULL; |
| 4255 | if (group_fd != -1) { | 4350 | if (group_fd != -1 && !(flags & PERF_FLAG_FD_NO_GROUP)) { |
| 4256 | ret = -EINVAL; | 4351 | err = -EINVAL; |
| 4257 | group_file = fget_light(group_fd, &fput_needed); | 4352 | group_file = fget_light(group_fd, &fput_needed); |
| 4258 | if (!group_file) | 4353 | if (!group_file) |
| 4259 | goto err_put_context; | 4354 | goto err_put_context; |
| @@ -4282,18 +4377,24 @@ SYSCALL_DEFINE5(perf_counter_open, | |||
| 4282 | 4377 | ||
| 4283 | counter = perf_counter_alloc(&attr, cpu, ctx, group_leader, | 4378 | counter = perf_counter_alloc(&attr, cpu, ctx, group_leader, |
| 4284 | NULL, GFP_KERNEL); | 4379 | NULL, GFP_KERNEL); |
| 4285 | ret = PTR_ERR(counter); | 4380 | err = PTR_ERR(counter); |
| 4286 | if (IS_ERR(counter)) | 4381 | if (IS_ERR(counter)) |
| 4287 | goto err_put_context; | 4382 | goto err_put_context; |
| 4288 | 4383 | ||
| 4289 | ret = anon_inode_getfd("[perf_counter]", &perf_fops, counter, 0); | 4384 | err = anon_inode_getfd("[perf_counter]", &perf_fops, counter, 0); |
| 4290 | if (ret < 0) | 4385 | if (err < 0) |
| 4291 | goto err_free_put_context; | 4386 | goto err_free_put_context; |
| 4292 | 4387 | ||
| 4293 | counter_file = fget_light(ret, &fput_needed2); | 4388 | counter_file = fget_light(err, &fput_needed2); |
| 4294 | if (!counter_file) | 4389 | if (!counter_file) |
| 4295 | goto err_free_put_context; | 4390 | goto err_free_put_context; |
| 4296 | 4391 | ||
| 4392 | if (flags & PERF_FLAG_FD_OUTPUT) { | ||
| 4393 | err = perf_counter_set_output(counter, group_fd); | ||
| 4394 | if (err) | ||
| 4395 | goto err_fput_free_put_context; | ||
| 4396 | } | ||
| 4397 | |||
| 4297 | counter->filp = counter_file; | 4398 | counter->filp = counter_file; |
| 4298 | WARN_ON_ONCE(ctx->parent_ctx); | 4399 | WARN_ON_ONCE(ctx->parent_ctx); |
| 4299 | mutex_lock(&ctx->mutex); | 4400 | mutex_lock(&ctx->mutex); |
| @@ -4307,20 +4408,20 @@ SYSCALL_DEFINE5(perf_counter_open, | |||
| 4307 | list_add_tail(&counter->owner_entry, ¤t->perf_counter_list); | 4408 | list_add_tail(&counter->owner_entry, ¤t->perf_counter_list); |
| 4308 | mutex_unlock(¤t->perf_counter_mutex); | 4409 | mutex_unlock(¤t->perf_counter_mutex); |
| 4309 | 4410 | ||
| 4411 | err_fput_free_put_context: | ||
| 4310 | fput_light(counter_file, fput_needed2); | 4412 | fput_light(counter_file, fput_needed2); |
| 4311 | 4413 | ||
| 4312 | out_fput: | ||
| 4313 | fput_light(group_file, fput_needed); | ||
| 4314 | |||
| 4315 | return ret; | ||
| 4316 | |||
| 4317 | err_free_put_context: | 4414 | err_free_put_context: |
| 4318 | kfree(counter); | 4415 | if (err < 0) |
| 4416 | kfree(counter); | ||
| 4319 | 4417 | ||
| 4320 | err_put_context: | 4418 | err_put_context: |
| 4321 | put_ctx(ctx); | 4419 | if (err < 0) |
| 4420 | put_ctx(ctx); | ||
| 4421 | |||
| 4422 | fput_light(group_file, fput_needed); | ||
| 4322 | 4423 | ||
| 4323 | goto out_fput; | 4424 | return err; |
| 4324 | } | 4425 | } |
| 4325 | 4426 | ||
| 4326 | /* | 4427 | /* |
