diff options
Diffstat (limited to 'kernel/perf_counter.c')
-rw-r--r-- | kernel/perf_counter.c | 176 |
1 files changed, 139 insertions, 37 deletions
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index f274e1959885..e0d91fdf0c3c 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c | |||
@@ -46,11 +46,17 @@ static atomic_t nr_task_counters __read_mostly; | |||
46 | 46 | ||
47 | /* | 47 | /* |
48 | * perf counter paranoia level: | 48 | * perf counter paranoia level: |
49 | * 0 - not paranoid | 49 | * -1 - not paranoid at all |
50 | * 1 - disallow cpu counters to unpriv | 50 | * 0 - disallow raw tracepoint access for unpriv |
51 | * 2 - disallow kernel profiling to unpriv | 51 | * 1 - disallow cpu counters for unpriv |
52 | * 2 - disallow kernel profiling for unpriv | ||
52 | */ | 53 | */ |
53 | int sysctl_perf_counter_paranoid __read_mostly; | 54 | int sysctl_perf_counter_paranoid __read_mostly = 1; |
55 | |||
56 | static inline bool perf_paranoid_tracepoint_raw(void) | ||
57 | { | ||
58 | return sysctl_perf_counter_paranoid > -1; | ||
59 | } | ||
54 | 60 | ||
55 | static inline bool perf_paranoid_cpu(void) | 61 | static inline bool perf_paranoid_cpu(void) |
56 | { | 62 | { |
@@ -469,7 +475,8 @@ static void update_counter_times(struct perf_counter *counter) | |||
469 | struct perf_counter_context *ctx = counter->ctx; | 475 | struct perf_counter_context *ctx = counter->ctx; |
470 | u64 run_end; | 476 | u64 run_end; |
471 | 477 | ||
472 | if (counter->state < PERF_COUNTER_STATE_INACTIVE) | 478 | if (counter->state < PERF_COUNTER_STATE_INACTIVE || |
479 | counter->group_leader->state < PERF_COUNTER_STATE_INACTIVE) | ||
473 | return; | 480 | return; |
474 | 481 | ||
475 | counter->total_time_enabled = ctx->time - counter->tstamp_enabled; | 482 | counter->total_time_enabled = ctx->time - counter->tstamp_enabled; |
@@ -518,7 +525,7 @@ static void __perf_counter_disable(void *info) | |||
518 | */ | 525 | */ |
519 | if (counter->state >= PERF_COUNTER_STATE_INACTIVE) { | 526 | if (counter->state >= PERF_COUNTER_STATE_INACTIVE) { |
520 | update_context_time(ctx); | 527 | update_context_time(ctx); |
521 | update_counter_times(counter); | 528 | update_group_times(counter); |
522 | if (counter == counter->group_leader) | 529 | if (counter == counter->group_leader) |
523 | group_sched_out(counter, cpuctx, ctx); | 530 | group_sched_out(counter, cpuctx, ctx); |
524 | else | 531 | else |
@@ -573,7 +580,7 @@ static void perf_counter_disable(struct perf_counter *counter) | |||
573 | * in, so we can change the state safely. | 580 | * in, so we can change the state safely. |
574 | */ | 581 | */ |
575 | if (counter->state == PERF_COUNTER_STATE_INACTIVE) { | 582 | if (counter->state == PERF_COUNTER_STATE_INACTIVE) { |
576 | update_counter_times(counter); | 583 | update_group_times(counter); |
577 | counter->state = PERF_COUNTER_STATE_OFF; | 584 | counter->state = PERF_COUNTER_STATE_OFF; |
578 | } | 585 | } |
579 | 586 | ||
@@ -851,6 +858,27 @@ retry: | |||
851 | } | 858 | } |
852 | 859 | ||
853 | /* | 860 | /* |
861 | * Put a counter into inactive state and update time fields. | ||
862 | * Enabling the leader of a group effectively enables all | ||
863 | * the group members that aren't explicitly disabled, so we | ||
864 | * have to update their ->tstamp_enabled also. | ||
865 | * Note: this works for group members as well as group leaders | ||
866 | * since the non-leader members' sibling_lists will be empty. | ||
867 | */ | ||
868 | static void __perf_counter_mark_enabled(struct perf_counter *counter, | ||
869 | struct perf_counter_context *ctx) | ||
870 | { | ||
871 | struct perf_counter *sub; | ||
872 | |||
873 | counter->state = PERF_COUNTER_STATE_INACTIVE; | ||
874 | counter->tstamp_enabled = ctx->time - counter->total_time_enabled; | ||
875 | list_for_each_entry(sub, &counter->sibling_list, list_entry) | ||
876 | if (sub->state >= PERF_COUNTER_STATE_INACTIVE) | ||
877 | sub->tstamp_enabled = | ||
878 | ctx->time - sub->total_time_enabled; | ||
879 | } | ||
880 | |||
881 | /* | ||
854 | * Cross CPU call to enable a performance counter | 882 | * Cross CPU call to enable a performance counter |
855 | */ | 883 | */ |
856 | static void __perf_counter_enable(void *info) | 884 | static void __perf_counter_enable(void *info) |
@@ -877,8 +905,7 @@ static void __perf_counter_enable(void *info) | |||
877 | 905 | ||
878 | if (counter->state >= PERF_COUNTER_STATE_INACTIVE) | 906 | if (counter->state >= PERF_COUNTER_STATE_INACTIVE) |
879 | goto unlock; | 907 | goto unlock; |
880 | counter->state = PERF_COUNTER_STATE_INACTIVE; | 908 | __perf_counter_mark_enabled(counter, ctx); |
881 | counter->tstamp_enabled = ctx->time - counter->total_time_enabled; | ||
882 | 909 | ||
883 | /* | 910 | /* |
884 | * If the counter is in a group and isn't the group leader, | 911 | * If the counter is in a group and isn't the group leader, |
@@ -971,11 +998,9 @@ static void perf_counter_enable(struct perf_counter *counter) | |||
971 | * Since we have the lock this context can't be scheduled | 998 | * Since we have the lock this context can't be scheduled |
972 | * in, so we can change the state safely. | 999 | * in, so we can change the state safely. |
973 | */ | 1000 | */ |
974 | if (counter->state == PERF_COUNTER_STATE_OFF) { | 1001 | if (counter->state == PERF_COUNTER_STATE_OFF) |
975 | counter->state = PERF_COUNTER_STATE_INACTIVE; | 1002 | __perf_counter_mark_enabled(counter, ctx); |
976 | counter->tstamp_enabled = | 1003 | |
977 | ctx->time - counter->total_time_enabled; | ||
978 | } | ||
979 | out: | 1004 | out: |
980 | spin_unlock_irq(&ctx->lock); | 1005 | spin_unlock_irq(&ctx->lock); |
981 | } | 1006 | } |
@@ -1479,9 +1504,7 @@ static void perf_counter_enable_on_exec(struct task_struct *task) | |||
1479 | counter->attr.enable_on_exec = 0; | 1504 | counter->attr.enable_on_exec = 0; |
1480 | if (counter->state >= PERF_COUNTER_STATE_INACTIVE) | 1505 | if (counter->state >= PERF_COUNTER_STATE_INACTIVE) |
1481 | continue; | 1506 | continue; |
1482 | counter->state = PERF_COUNTER_STATE_INACTIVE; | 1507 | __perf_counter_mark_enabled(counter, ctx); |
1483 | counter->tstamp_enabled = | ||
1484 | ctx->time - counter->total_time_enabled; | ||
1485 | enabled = 1; | 1508 | enabled = 1; |
1486 | } | 1509 | } |
1487 | 1510 | ||
@@ -1675,6 +1698,11 @@ static void free_counter(struct perf_counter *counter) | |||
1675 | atomic_dec(&nr_task_counters); | 1698 | atomic_dec(&nr_task_counters); |
1676 | } | 1699 | } |
1677 | 1700 | ||
1701 | if (counter->output) { | ||
1702 | fput(counter->output->filp); | ||
1703 | counter->output = NULL; | ||
1704 | } | ||
1705 | |||
1678 | if (counter->destroy) | 1706 | if (counter->destroy) |
1679 | counter->destroy(counter); | 1707 | counter->destroy(counter); |
1680 | 1708 | ||
@@ -1960,6 +1988,8 @@ unlock: | |||
1960 | return ret; | 1988 | return ret; |
1961 | } | 1989 | } |
1962 | 1990 | ||
1991 | int perf_counter_set_output(struct perf_counter *counter, int output_fd); | ||
1992 | |||
1963 | static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | 1993 | static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) |
1964 | { | 1994 | { |
1965 | struct perf_counter *counter = file->private_data; | 1995 | struct perf_counter *counter = file->private_data; |
@@ -1983,6 +2013,9 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |||
1983 | case PERF_COUNTER_IOC_PERIOD: | 2013 | case PERF_COUNTER_IOC_PERIOD: |
1984 | return perf_counter_period(counter, (u64 __user *)arg); | 2014 | return perf_counter_period(counter, (u64 __user *)arg); |
1985 | 2015 | ||
2016 | case PERF_COUNTER_IOC_SET_OUTPUT: | ||
2017 | return perf_counter_set_output(counter, arg); | ||
2018 | |||
1986 | default: | 2019 | default: |
1987 | return -ENOTTY; | 2020 | return -ENOTTY; |
1988 | } | 2021 | } |
@@ -2253,6 +2286,11 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) | |||
2253 | 2286 | ||
2254 | WARN_ON_ONCE(counter->ctx->parent_ctx); | 2287 | WARN_ON_ONCE(counter->ctx->parent_ctx); |
2255 | mutex_lock(&counter->mmap_mutex); | 2288 | mutex_lock(&counter->mmap_mutex); |
2289 | if (counter->output) { | ||
2290 | ret = -EINVAL; | ||
2291 | goto unlock; | ||
2292 | } | ||
2293 | |||
2256 | if (atomic_inc_not_zero(&counter->mmap_count)) { | 2294 | if (atomic_inc_not_zero(&counter->mmap_count)) { |
2257 | if (nr_pages != counter->data->nr_pages) | 2295 | if (nr_pages != counter->data->nr_pages) |
2258 | ret = -EINVAL; | 2296 | ret = -EINVAL; |
@@ -2638,6 +2676,7 @@ static int perf_output_begin(struct perf_output_handle *handle, | |||
2638 | struct perf_counter *counter, unsigned int size, | 2676 | struct perf_counter *counter, unsigned int size, |
2639 | int nmi, int sample) | 2677 | int nmi, int sample) |
2640 | { | 2678 | { |
2679 | struct perf_counter *output_counter; | ||
2641 | struct perf_mmap_data *data; | 2680 | struct perf_mmap_data *data; |
2642 | unsigned int offset, head; | 2681 | unsigned int offset, head; |
2643 | int have_lost; | 2682 | int have_lost; |
@@ -2647,13 +2686,17 @@ static int perf_output_begin(struct perf_output_handle *handle, | |||
2647 | u64 lost; | 2686 | u64 lost; |
2648 | } lost_event; | 2687 | } lost_event; |
2649 | 2688 | ||
2689 | rcu_read_lock(); | ||
2650 | /* | 2690 | /* |
2651 | * For inherited counters we send all the output towards the parent. | 2691 | * For inherited counters we send all the output towards the parent. |
2652 | */ | 2692 | */ |
2653 | if (counter->parent) | 2693 | if (counter->parent) |
2654 | counter = counter->parent; | 2694 | counter = counter->parent; |
2655 | 2695 | ||
2656 | rcu_read_lock(); | 2696 | output_counter = rcu_dereference(counter->output); |
2697 | if (output_counter) | ||
2698 | counter = output_counter; | ||
2699 | |||
2657 | data = rcu_dereference(counter->data); | 2700 | data = rcu_dereference(counter->data); |
2658 | if (!data) | 2701 | if (!data) |
2659 | goto out; | 2702 | goto out; |
@@ -3934,6 +3977,7 @@ static const struct pmu *tp_perf_counter_init(struct perf_counter *counter) | |||
3934 | * have these. | 3977 | * have these. |
3935 | */ | 3978 | */ |
3936 | if ((counter->attr.sample_type & PERF_SAMPLE_RAW) && | 3979 | if ((counter->attr.sample_type & PERF_SAMPLE_RAW) && |
3980 | perf_paranoid_tracepoint_raw() && | ||
3937 | !capable(CAP_SYS_ADMIN)) | 3981 | !capable(CAP_SYS_ADMIN)) |
3938 | return ERR_PTR(-EPERM); | 3982 | return ERR_PTR(-EPERM); |
3939 | 3983 | ||
@@ -4066,6 +4110,7 @@ perf_counter_alloc(struct perf_counter_attr *attr, | |||
4066 | hwc->sample_period = attr->sample_period; | 4110 | hwc->sample_period = attr->sample_period; |
4067 | if (attr->freq && attr->sample_freq) | 4111 | if (attr->freq && attr->sample_freq) |
4068 | hwc->sample_period = 1; | 4112 | hwc->sample_period = 1; |
4113 | hwc->last_period = hwc->sample_period; | ||
4069 | 4114 | ||
4070 | atomic64_set(&hwc->period_left, hwc->sample_period); | 4115 | atomic64_set(&hwc->period_left, hwc->sample_period); |
4071 | 4116 | ||
@@ -4201,6 +4246,57 @@ err_size: | |||
4201 | goto out; | 4246 | goto out; |
4202 | } | 4247 | } |
4203 | 4248 | ||
4249 | int perf_counter_set_output(struct perf_counter *counter, int output_fd) | ||
4250 | { | ||
4251 | struct perf_counter *output_counter = NULL; | ||
4252 | struct file *output_file = NULL; | ||
4253 | struct perf_counter *old_output; | ||
4254 | int fput_needed = 0; | ||
4255 | int ret = -EINVAL; | ||
4256 | |||
4257 | if (!output_fd) | ||
4258 | goto set; | ||
4259 | |||
4260 | output_file = fget_light(output_fd, &fput_needed); | ||
4261 | if (!output_file) | ||
4262 | return -EBADF; | ||
4263 | |||
4264 | if (output_file->f_op != &perf_fops) | ||
4265 | goto out; | ||
4266 | |||
4267 | output_counter = output_file->private_data; | ||
4268 | |||
4269 | /* Don't chain output fds */ | ||
4270 | if (output_counter->output) | ||
4271 | goto out; | ||
4272 | |||
4273 | /* Don't set an output fd when we already have an output channel */ | ||
4274 | if (counter->data) | ||
4275 | goto out; | ||
4276 | |||
4277 | atomic_long_inc(&output_file->f_count); | ||
4278 | |||
4279 | set: | ||
4280 | mutex_lock(&counter->mmap_mutex); | ||
4281 | old_output = counter->output; | ||
4282 | rcu_assign_pointer(counter->output, output_counter); | ||
4283 | mutex_unlock(&counter->mmap_mutex); | ||
4284 | |||
4285 | if (old_output) { | ||
4286 | /* | ||
4287 | * we need to make sure no existing perf_output_*() | ||
4288 | * is still referencing this counter. | ||
4289 | */ | ||
4290 | synchronize_rcu(); | ||
4291 | fput(old_output->filp); | ||
4292 | } | ||
4293 | |||
4294 | ret = 0; | ||
4295 | out: | ||
4296 | fput_light(output_file, fput_needed); | ||
4297 | return ret; | ||
4298 | } | ||
4299 | |||
4204 | /** | 4300 | /** |
4205 | * sys_perf_counter_open - open a performance counter, associate it to a task/cpu | 4301 | * sys_perf_counter_open - open a performance counter, associate it to a task/cpu |
4206 | * | 4302 | * |
@@ -4220,15 +4316,15 @@ SYSCALL_DEFINE5(perf_counter_open, | |||
4220 | struct file *group_file = NULL; | 4316 | struct file *group_file = NULL; |
4221 | int fput_needed = 0; | 4317 | int fput_needed = 0; |
4222 | int fput_needed2 = 0; | 4318 | int fput_needed2 = 0; |
4223 | int ret; | 4319 | int err; |
4224 | 4320 | ||
4225 | /* for future expandability... */ | 4321 | /* for future expandability... */ |
4226 | if (flags) | 4322 | if (flags & ~(PERF_FLAG_FD_NO_GROUP | PERF_FLAG_FD_OUTPUT)) |
4227 | return -EINVAL; | 4323 | return -EINVAL; |
4228 | 4324 | ||
4229 | ret = perf_copy_attr(attr_uptr, &attr); | 4325 | err = perf_copy_attr(attr_uptr, &attr); |
4230 | if (ret) | 4326 | if (err) |
4231 | return ret; | 4327 | return err; |
4232 | 4328 | ||
4233 | if (!attr.exclude_kernel) { | 4329 | if (!attr.exclude_kernel) { |
4234 | if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) | 4330 | if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) |
@@ -4251,8 +4347,8 @@ SYSCALL_DEFINE5(perf_counter_open, | |||
4251 | * Look up the group leader (we will attach this counter to it): | 4347 | * Look up the group leader (we will attach this counter to it): |
4252 | */ | 4348 | */ |
4253 | group_leader = NULL; | 4349 | group_leader = NULL; |
4254 | if (group_fd != -1) { | 4350 | if (group_fd != -1 && !(flags & PERF_FLAG_FD_NO_GROUP)) { |
4255 | ret = -EINVAL; | 4351 | err = -EINVAL; |
4256 | group_file = fget_light(group_fd, &fput_needed); | 4352 | group_file = fget_light(group_fd, &fput_needed); |
4257 | if (!group_file) | 4353 | if (!group_file) |
4258 | goto err_put_context; | 4354 | goto err_put_context; |
@@ -4281,18 +4377,24 @@ SYSCALL_DEFINE5(perf_counter_open, | |||
4281 | 4377 | ||
4282 | counter = perf_counter_alloc(&attr, cpu, ctx, group_leader, | 4378 | counter = perf_counter_alloc(&attr, cpu, ctx, group_leader, |
4283 | NULL, GFP_KERNEL); | 4379 | NULL, GFP_KERNEL); |
4284 | ret = PTR_ERR(counter); | 4380 | err = PTR_ERR(counter); |
4285 | if (IS_ERR(counter)) | 4381 | if (IS_ERR(counter)) |
4286 | goto err_put_context; | 4382 | goto err_put_context; |
4287 | 4383 | ||
4288 | ret = anon_inode_getfd("[perf_counter]", &perf_fops, counter, 0); | 4384 | err = anon_inode_getfd("[perf_counter]", &perf_fops, counter, 0); |
4289 | if (ret < 0) | 4385 | if (err < 0) |
4290 | goto err_free_put_context; | 4386 | goto err_free_put_context; |
4291 | 4387 | ||
4292 | counter_file = fget_light(ret, &fput_needed2); | 4388 | counter_file = fget_light(err, &fput_needed2); |
4293 | if (!counter_file) | 4389 | if (!counter_file) |
4294 | goto err_free_put_context; | 4390 | goto err_free_put_context; |
4295 | 4391 | ||
4392 | if (flags & PERF_FLAG_FD_OUTPUT) { | ||
4393 | err = perf_counter_set_output(counter, group_fd); | ||
4394 | if (err) | ||
4395 | goto err_fput_free_put_context; | ||
4396 | } | ||
4397 | |||
4296 | counter->filp = counter_file; | 4398 | counter->filp = counter_file; |
4297 | WARN_ON_ONCE(ctx->parent_ctx); | 4399 | WARN_ON_ONCE(ctx->parent_ctx); |
4298 | mutex_lock(&ctx->mutex); | 4400 | mutex_lock(&ctx->mutex); |
@@ -4306,20 +4408,20 @@ SYSCALL_DEFINE5(perf_counter_open, | |||
4306 | list_add_tail(&counter->owner_entry, ¤t->perf_counter_list); | 4408 | list_add_tail(&counter->owner_entry, ¤t->perf_counter_list); |
4307 | mutex_unlock(¤t->perf_counter_mutex); | 4409 | mutex_unlock(¤t->perf_counter_mutex); |
4308 | 4410 | ||
4411 | err_fput_free_put_context: | ||
4309 | fput_light(counter_file, fput_needed2); | 4412 | fput_light(counter_file, fput_needed2); |
4310 | 4413 | ||
4311 | out_fput: | ||
4312 | fput_light(group_file, fput_needed); | ||
4313 | |||
4314 | return ret; | ||
4315 | |||
4316 | err_free_put_context: | 4414 | err_free_put_context: |
4317 | kfree(counter); | 4415 | if (err < 0) |
4416 | kfree(counter); | ||
4318 | 4417 | ||
4319 | err_put_context: | 4418 | err_put_context: |
4320 | put_ctx(ctx); | 4419 | if (err < 0) |
4420 | put_ctx(ctx); | ||
4421 | |||
4422 | fput_light(group_file, fput_needed); | ||
4321 | 4423 | ||
4322 | goto out_fput; | 4424 | return err; |
4323 | } | 4425 | } |
4324 | 4426 | ||
4325 | /* | 4427 | /* |