aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/perf_counter.c
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2009-09-11 23:35:13 -0400
committerDavid S. Miller <davem@davemloft.net>2009-09-11 23:35:13 -0400
commitcabc5c0f7fa1342049042d6e147db5a73773955b (patch)
tree2be09ae1777d580c7dfe05d6d5b76e57281ec447 /kernel/perf_counter.c
parentb73d884756303316ead4cd7dad51236b2a515a1a (diff)
parent86d710146fb9975f04c505ec78caa43d227c1018 (diff)
Merge branch 'master' of /home/davem/src/GIT/linux-2.6/
Conflicts: arch/sparc/Kconfig
Diffstat (limited to 'kernel/perf_counter.c')
-rw-r--r--kernel/perf_counter.c176
1 files changed, 139 insertions, 37 deletions
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index f274e1959885..e0d91fdf0c3c 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -46,11 +46,17 @@ static atomic_t nr_task_counters __read_mostly;
46 46
47/* 47/*
48 * perf counter paranoia level: 48 * perf counter paranoia level:
49 * 0 - not paranoid 49 * -1 - not paranoid at all
50 * 1 - disallow cpu counters to unpriv 50 * 0 - disallow raw tracepoint access for unpriv
51 * 2 - disallow kernel profiling to unpriv 51 * 1 - disallow cpu counters for unpriv
52 * 2 - disallow kernel profiling for unpriv
52 */ 53 */
53int sysctl_perf_counter_paranoid __read_mostly; 54int sysctl_perf_counter_paranoid __read_mostly = 1;
55
56static inline bool perf_paranoid_tracepoint_raw(void)
57{
58 return sysctl_perf_counter_paranoid > -1;
59}
54 60
55static inline bool perf_paranoid_cpu(void) 61static inline bool perf_paranoid_cpu(void)
56{ 62{
@@ -469,7 +475,8 @@ static void update_counter_times(struct perf_counter *counter)
469 struct perf_counter_context *ctx = counter->ctx; 475 struct perf_counter_context *ctx = counter->ctx;
470 u64 run_end; 476 u64 run_end;
471 477
472 if (counter->state < PERF_COUNTER_STATE_INACTIVE) 478 if (counter->state < PERF_COUNTER_STATE_INACTIVE ||
479 counter->group_leader->state < PERF_COUNTER_STATE_INACTIVE)
473 return; 480 return;
474 481
475 counter->total_time_enabled = ctx->time - counter->tstamp_enabled; 482 counter->total_time_enabled = ctx->time - counter->tstamp_enabled;
@@ -518,7 +525,7 @@ static void __perf_counter_disable(void *info)
518 */ 525 */
519 if (counter->state >= PERF_COUNTER_STATE_INACTIVE) { 526 if (counter->state >= PERF_COUNTER_STATE_INACTIVE) {
520 update_context_time(ctx); 527 update_context_time(ctx);
521 update_counter_times(counter); 528 update_group_times(counter);
522 if (counter == counter->group_leader) 529 if (counter == counter->group_leader)
523 group_sched_out(counter, cpuctx, ctx); 530 group_sched_out(counter, cpuctx, ctx);
524 else 531 else
@@ -573,7 +580,7 @@ static void perf_counter_disable(struct perf_counter *counter)
573 * in, so we can change the state safely. 580 * in, so we can change the state safely.
574 */ 581 */
575 if (counter->state == PERF_COUNTER_STATE_INACTIVE) { 582 if (counter->state == PERF_COUNTER_STATE_INACTIVE) {
576 update_counter_times(counter); 583 update_group_times(counter);
577 counter->state = PERF_COUNTER_STATE_OFF; 584 counter->state = PERF_COUNTER_STATE_OFF;
578 } 585 }
579 586
@@ -851,6 +858,27 @@ retry:
851} 858}
852 859
853/* 860/*
861 * Put a counter into inactive state and update time fields.
862 * Enabling the leader of a group effectively enables all
863 * the group members that aren't explicitly disabled, so we
864 * have to update their ->tstamp_enabled also.
865 * Note: this works for group members as well as group leaders
866 * since the non-leader members' sibling_lists will be empty.
867 */
868static void __perf_counter_mark_enabled(struct perf_counter *counter,
869 struct perf_counter_context *ctx)
870{
871 struct perf_counter *sub;
872
873 counter->state = PERF_COUNTER_STATE_INACTIVE;
874 counter->tstamp_enabled = ctx->time - counter->total_time_enabled;
875 list_for_each_entry(sub, &counter->sibling_list, list_entry)
876 if (sub->state >= PERF_COUNTER_STATE_INACTIVE)
877 sub->tstamp_enabled =
878 ctx->time - sub->total_time_enabled;
879}
880
881/*
854 * Cross CPU call to enable a performance counter 882 * Cross CPU call to enable a performance counter
855 */ 883 */
856static void __perf_counter_enable(void *info) 884static void __perf_counter_enable(void *info)
@@ -877,8 +905,7 @@ static void __perf_counter_enable(void *info)
877 905
878 if (counter->state >= PERF_COUNTER_STATE_INACTIVE) 906 if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
879 goto unlock; 907 goto unlock;
880 counter->state = PERF_COUNTER_STATE_INACTIVE; 908 __perf_counter_mark_enabled(counter, ctx);
881 counter->tstamp_enabled = ctx->time - counter->total_time_enabled;
882 909
883 /* 910 /*
884 * If the counter is in a group and isn't the group leader, 911 * If the counter is in a group and isn't the group leader,
@@ -971,11 +998,9 @@ static void perf_counter_enable(struct perf_counter *counter)
971 * Since we have the lock this context can't be scheduled 998 * Since we have the lock this context can't be scheduled
972 * in, so we can change the state safely. 999 * in, so we can change the state safely.
973 */ 1000 */
974 if (counter->state == PERF_COUNTER_STATE_OFF) { 1001 if (counter->state == PERF_COUNTER_STATE_OFF)
975 counter->state = PERF_COUNTER_STATE_INACTIVE; 1002 __perf_counter_mark_enabled(counter, ctx);
976 counter->tstamp_enabled = 1003
977 ctx->time - counter->total_time_enabled;
978 }
979 out: 1004 out:
980 spin_unlock_irq(&ctx->lock); 1005 spin_unlock_irq(&ctx->lock);
981} 1006}
@@ -1479,9 +1504,7 @@ static void perf_counter_enable_on_exec(struct task_struct *task)
1479 counter->attr.enable_on_exec = 0; 1504 counter->attr.enable_on_exec = 0;
1480 if (counter->state >= PERF_COUNTER_STATE_INACTIVE) 1505 if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
1481 continue; 1506 continue;
1482 counter->state = PERF_COUNTER_STATE_INACTIVE; 1507 __perf_counter_mark_enabled(counter, ctx);
1483 counter->tstamp_enabled =
1484 ctx->time - counter->total_time_enabled;
1485 enabled = 1; 1508 enabled = 1;
1486 } 1509 }
1487 1510
@@ -1675,6 +1698,11 @@ static void free_counter(struct perf_counter *counter)
1675 atomic_dec(&nr_task_counters); 1698 atomic_dec(&nr_task_counters);
1676 } 1699 }
1677 1700
1701 if (counter->output) {
1702 fput(counter->output->filp);
1703 counter->output = NULL;
1704 }
1705
1678 if (counter->destroy) 1706 if (counter->destroy)
1679 counter->destroy(counter); 1707 counter->destroy(counter);
1680 1708
@@ -1960,6 +1988,8 @@ unlock:
1960 return ret; 1988 return ret;
1961} 1989}
1962 1990
1991int perf_counter_set_output(struct perf_counter *counter, int output_fd);
1992
1963static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 1993static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1964{ 1994{
1965 struct perf_counter *counter = file->private_data; 1995 struct perf_counter *counter = file->private_data;
@@ -1983,6 +2013,9 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1983 case PERF_COUNTER_IOC_PERIOD: 2013 case PERF_COUNTER_IOC_PERIOD:
1984 return perf_counter_period(counter, (u64 __user *)arg); 2014 return perf_counter_period(counter, (u64 __user *)arg);
1985 2015
2016 case PERF_COUNTER_IOC_SET_OUTPUT:
2017 return perf_counter_set_output(counter, arg);
2018
1986 default: 2019 default:
1987 return -ENOTTY; 2020 return -ENOTTY;
1988 } 2021 }
@@ -2253,6 +2286,11 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
2253 2286
2254 WARN_ON_ONCE(counter->ctx->parent_ctx); 2287 WARN_ON_ONCE(counter->ctx->parent_ctx);
2255 mutex_lock(&counter->mmap_mutex); 2288 mutex_lock(&counter->mmap_mutex);
2289 if (counter->output) {
2290 ret = -EINVAL;
2291 goto unlock;
2292 }
2293
2256 if (atomic_inc_not_zero(&counter->mmap_count)) { 2294 if (atomic_inc_not_zero(&counter->mmap_count)) {
2257 if (nr_pages != counter->data->nr_pages) 2295 if (nr_pages != counter->data->nr_pages)
2258 ret = -EINVAL; 2296 ret = -EINVAL;
@@ -2638,6 +2676,7 @@ static int perf_output_begin(struct perf_output_handle *handle,
2638 struct perf_counter *counter, unsigned int size, 2676 struct perf_counter *counter, unsigned int size,
2639 int nmi, int sample) 2677 int nmi, int sample)
2640{ 2678{
2679 struct perf_counter *output_counter;
2641 struct perf_mmap_data *data; 2680 struct perf_mmap_data *data;
2642 unsigned int offset, head; 2681 unsigned int offset, head;
2643 int have_lost; 2682 int have_lost;
@@ -2647,13 +2686,17 @@ static int perf_output_begin(struct perf_output_handle *handle,
2647 u64 lost; 2686 u64 lost;
2648 } lost_event; 2687 } lost_event;
2649 2688
2689 rcu_read_lock();
2650 /* 2690 /*
2651 * For inherited counters we send all the output towards the parent. 2691 * For inherited counters we send all the output towards the parent.
2652 */ 2692 */
2653 if (counter->parent) 2693 if (counter->parent)
2654 counter = counter->parent; 2694 counter = counter->parent;
2655 2695
2656 rcu_read_lock(); 2696 output_counter = rcu_dereference(counter->output);
2697 if (output_counter)
2698 counter = output_counter;
2699
2657 data = rcu_dereference(counter->data); 2700 data = rcu_dereference(counter->data);
2658 if (!data) 2701 if (!data)
2659 goto out; 2702 goto out;
@@ -3934,6 +3977,7 @@ static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
3934 * have these. 3977 * have these.
3935 */ 3978 */
3936 if ((counter->attr.sample_type & PERF_SAMPLE_RAW) && 3979 if ((counter->attr.sample_type & PERF_SAMPLE_RAW) &&
3980 perf_paranoid_tracepoint_raw() &&
3937 !capable(CAP_SYS_ADMIN)) 3981 !capable(CAP_SYS_ADMIN))
3938 return ERR_PTR(-EPERM); 3982 return ERR_PTR(-EPERM);
3939 3983
@@ -4066,6 +4110,7 @@ perf_counter_alloc(struct perf_counter_attr *attr,
4066 hwc->sample_period = attr->sample_period; 4110 hwc->sample_period = attr->sample_period;
4067 if (attr->freq && attr->sample_freq) 4111 if (attr->freq && attr->sample_freq)
4068 hwc->sample_period = 1; 4112 hwc->sample_period = 1;
4113 hwc->last_period = hwc->sample_period;
4069 4114
4070 atomic64_set(&hwc->period_left, hwc->sample_period); 4115 atomic64_set(&hwc->period_left, hwc->sample_period);
4071 4116
@@ -4201,6 +4246,57 @@ err_size:
4201 goto out; 4246 goto out;
4202} 4247}
4203 4248
4249int perf_counter_set_output(struct perf_counter *counter, int output_fd)
4250{
4251 struct perf_counter *output_counter = NULL;
4252 struct file *output_file = NULL;
4253 struct perf_counter *old_output;
4254 int fput_needed = 0;
4255 int ret = -EINVAL;
4256
4257 if (!output_fd)
4258 goto set;
4259
4260 output_file = fget_light(output_fd, &fput_needed);
4261 if (!output_file)
4262 return -EBADF;
4263
4264 if (output_file->f_op != &perf_fops)
4265 goto out;
4266
4267 output_counter = output_file->private_data;
4268
4269 /* Don't chain output fds */
4270 if (output_counter->output)
4271 goto out;
4272
4273 /* Don't set an output fd when we already have an output channel */
4274 if (counter->data)
4275 goto out;
4276
4277 atomic_long_inc(&output_file->f_count);
4278
4279set:
4280 mutex_lock(&counter->mmap_mutex);
4281 old_output = counter->output;
4282 rcu_assign_pointer(counter->output, output_counter);
4283 mutex_unlock(&counter->mmap_mutex);
4284
4285 if (old_output) {
4286 /*
4287 * we need to make sure no existing perf_output_*()
4288 * is still referencing this counter.
4289 */
4290 synchronize_rcu();
4291 fput(old_output->filp);
4292 }
4293
4294 ret = 0;
4295out:
4296 fput_light(output_file, fput_needed);
4297 return ret;
4298}
4299
4204/** 4300/**
4205 * sys_perf_counter_open - open a performance counter, associate it to a task/cpu 4301 * sys_perf_counter_open - open a performance counter, associate it to a task/cpu
4206 * 4302 *
@@ -4220,15 +4316,15 @@ SYSCALL_DEFINE5(perf_counter_open,
4220 struct file *group_file = NULL; 4316 struct file *group_file = NULL;
4221 int fput_needed = 0; 4317 int fput_needed = 0;
4222 int fput_needed2 = 0; 4318 int fput_needed2 = 0;
4223 int ret; 4319 int err;
4224 4320
4225 /* for future expandability... */ 4321 /* for future expandability... */
4226 if (flags) 4322 if (flags & ~(PERF_FLAG_FD_NO_GROUP | PERF_FLAG_FD_OUTPUT))
4227 return -EINVAL; 4323 return -EINVAL;
4228 4324
4229 ret = perf_copy_attr(attr_uptr, &attr); 4325 err = perf_copy_attr(attr_uptr, &attr);
4230 if (ret) 4326 if (err)
4231 return ret; 4327 return err;
4232 4328
4233 if (!attr.exclude_kernel) { 4329 if (!attr.exclude_kernel) {
4234 if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) 4330 if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
@@ -4251,8 +4347,8 @@ SYSCALL_DEFINE5(perf_counter_open,
4251 * Look up the group leader (we will attach this counter to it): 4347 * Look up the group leader (we will attach this counter to it):
4252 */ 4348 */
4253 group_leader = NULL; 4349 group_leader = NULL;
4254 if (group_fd != -1) { 4350 if (group_fd != -1 && !(flags & PERF_FLAG_FD_NO_GROUP)) {
4255 ret = -EINVAL; 4351 err = -EINVAL;
4256 group_file = fget_light(group_fd, &fput_needed); 4352 group_file = fget_light(group_fd, &fput_needed);
4257 if (!group_file) 4353 if (!group_file)
4258 goto err_put_context; 4354 goto err_put_context;
@@ -4281,18 +4377,24 @@ SYSCALL_DEFINE5(perf_counter_open,
4281 4377
4282 counter = perf_counter_alloc(&attr, cpu, ctx, group_leader, 4378 counter = perf_counter_alloc(&attr, cpu, ctx, group_leader,
4283 NULL, GFP_KERNEL); 4379 NULL, GFP_KERNEL);
4284 ret = PTR_ERR(counter); 4380 err = PTR_ERR(counter);
4285 if (IS_ERR(counter)) 4381 if (IS_ERR(counter))
4286 goto err_put_context; 4382 goto err_put_context;
4287 4383
4288 ret = anon_inode_getfd("[perf_counter]", &perf_fops, counter, 0); 4384 err = anon_inode_getfd("[perf_counter]", &perf_fops, counter, 0);
4289 if (ret < 0) 4385 if (err < 0)
4290 goto err_free_put_context; 4386 goto err_free_put_context;
4291 4387
4292 counter_file = fget_light(ret, &fput_needed2); 4388 counter_file = fget_light(err, &fput_needed2);
4293 if (!counter_file) 4389 if (!counter_file)
4294 goto err_free_put_context; 4390 goto err_free_put_context;
4295 4391
4392 if (flags & PERF_FLAG_FD_OUTPUT) {
4393 err = perf_counter_set_output(counter, group_fd);
4394 if (err)
4395 goto err_fput_free_put_context;
4396 }
4397
4296 counter->filp = counter_file; 4398 counter->filp = counter_file;
4297 WARN_ON_ONCE(ctx->parent_ctx); 4399 WARN_ON_ONCE(ctx->parent_ctx);
4298 mutex_lock(&ctx->mutex); 4400 mutex_lock(&ctx->mutex);
@@ -4306,20 +4408,20 @@ SYSCALL_DEFINE5(perf_counter_open,
4306 list_add_tail(&counter->owner_entry, &current->perf_counter_list); 4408 list_add_tail(&counter->owner_entry, &current->perf_counter_list);
4307 mutex_unlock(&current->perf_counter_mutex); 4409 mutex_unlock(&current->perf_counter_mutex);
4308 4410
4411err_fput_free_put_context:
4309 fput_light(counter_file, fput_needed2); 4412 fput_light(counter_file, fput_needed2);
4310 4413
4311out_fput:
4312 fput_light(group_file, fput_needed);
4313
4314 return ret;
4315
4316err_free_put_context: 4414err_free_put_context:
4317 kfree(counter); 4415 if (err < 0)
4416 kfree(counter);
4318 4417
4319err_put_context: 4418err_put_context:
4320 put_ctx(ctx); 4419 if (err < 0)
4420 put_ctx(ctx);
4421
4422 fput_light(group_file, fput_needed);
4321 4423
4322 goto out_fput; 4424 return err;
4323} 4425}
4324 4426
4325/* 4427/*