aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/events
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2012-04-14 07:18:27 -0400
committerIngo Molnar <mingo@kernel.org>2012-04-14 07:19:04 -0400
commit6ac1ef482d7ae0c690f1640bf6eb818ff9a2d91e (patch)
tree021cc9f6b477146fcebe6f3be4752abfa2ba18a9 /kernel/events
parent682968e0c425c60f0dde37977e5beb2b12ddc4cc (diff)
parenta385ec4f11bdcf81af094c03e2444ee9b7fad2e5 (diff)
Merge branch 'perf/core' into perf/uprobes
Merge in latest upstream (and the latest perf development tree), to prepare for tooling changes, and also to pick up v3.4 MM changes that the uprobes code needs to take care of. Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/events')
-rw-r--r--kernel/events/core.c217
-rw-r--r--kernel/events/hw_breakpoint.c6
2 files changed, 205 insertions, 18 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 94afe5b91c6a..a6a9ec4cd8f5 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -118,6 +118,13 @@ static int cpu_function_call(int cpu, int (*func) (void *info), void *info)
118 PERF_FLAG_FD_OUTPUT |\ 118 PERF_FLAG_FD_OUTPUT |\
119 PERF_FLAG_PID_CGROUP) 119 PERF_FLAG_PID_CGROUP)
120 120
121/*
122 * branch priv levels that need permission checks
123 */
124#define PERF_SAMPLE_BRANCH_PERM_PLM \
125 (PERF_SAMPLE_BRANCH_KERNEL |\
126 PERF_SAMPLE_BRANCH_HV)
127
121enum event_type_t { 128enum event_type_t {
122 EVENT_FLEXIBLE = 0x1, 129 EVENT_FLEXIBLE = 0x1,
123 EVENT_PINNED = 0x2, 130 EVENT_PINNED = 0x2,
@@ -128,8 +135,9 @@ enum event_type_t {
128 * perf_sched_events : >0 events exist 135 * perf_sched_events : >0 events exist
129 * perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu 136 * perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu
130 */ 137 */
131struct jump_label_key_deferred perf_sched_events __read_mostly; 138struct static_key_deferred perf_sched_events __read_mostly;
132static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); 139static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
140static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events);
133 141
134static atomic_t nr_mmap_events __read_mostly; 142static atomic_t nr_mmap_events __read_mostly;
135static atomic_t nr_comm_events __read_mostly; 143static atomic_t nr_comm_events __read_mostly;
@@ -881,6 +889,9 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
881 if (is_cgroup_event(event)) 889 if (is_cgroup_event(event))
882 ctx->nr_cgroups++; 890 ctx->nr_cgroups++;
883 891
892 if (has_branch_stack(event))
893 ctx->nr_branch_stack++;
894
884 list_add_rcu(&event->event_entry, &ctx->event_list); 895 list_add_rcu(&event->event_entry, &ctx->event_list);
885 if (!ctx->nr_events) 896 if (!ctx->nr_events)
886 perf_pmu_rotate_start(ctx->pmu); 897 perf_pmu_rotate_start(ctx->pmu);
@@ -1020,6 +1031,9 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
1020 cpuctx->cgrp = NULL; 1031 cpuctx->cgrp = NULL;
1021 } 1032 }
1022 1033
1034 if (has_branch_stack(event))
1035 ctx->nr_branch_stack--;
1036
1023 ctx->nr_events--; 1037 ctx->nr_events--;
1024 if (event->attr.inherit_stat) 1038 if (event->attr.inherit_stat)
1025 ctx->nr_stat--; 1039 ctx->nr_stat--;
@@ -2195,6 +2209,66 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
2195} 2209}
2196 2210
2197/* 2211/*
2212 * When sampling the branck stack in system-wide, it may be necessary
2213 * to flush the stack on context switch. This happens when the branch
2214 * stack does not tag its entries with the pid of the current task.
2215 * Otherwise it becomes impossible to associate a branch entry with a
2216 * task. This ambiguity is more likely to appear when the branch stack
2217 * supports priv level filtering and the user sets it to monitor only
2218 * at the user level (which could be a useful measurement in system-wide
2219 * mode). In that case, the risk is high of having a branch stack with
2220 * branch from multiple tasks. Flushing may mean dropping the existing
2221 * entries or stashing them somewhere in the PMU specific code layer.
2222 *
2223 * This function provides the context switch callback to the lower code
2224 * layer. It is invoked ONLY when there is at least one system-wide context
2225 * with at least one active event using taken branch sampling.
2226 */
2227static void perf_branch_stack_sched_in(struct task_struct *prev,
2228 struct task_struct *task)
2229{
2230 struct perf_cpu_context *cpuctx;
2231 struct pmu *pmu;
2232 unsigned long flags;
2233
2234 /* no need to flush branch stack if not changing task */
2235 if (prev == task)
2236 return;
2237
2238 local_irq_save(flags);
2239
2240 rcu_read_lock();
2241
2242 list_for_each_entry_rcu(pmu, &pmus, entry) {
2243 cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
2244
2245 /*
2246 * check if the context has at least one
2247 * event using PERF_SAMPLE_BRANCH_STACK
2248 */
2249 if (cpuctx->ctx.nr_branch_stack > 0
2250 && pmu->flush_branch_stack) {
2251
2252 pmu = cpuctx->ctx.pmu;
2253
2254 perf_ctx_lock(cpuctx, cpuctx->task_ctx);
2255
2256 perf_pmu_disable(pmu);
2257
2258 pmu->flush_branch_stack();
2259
2260 perf_pmu_enable(pmu);
2261
2262 perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
2263 }
2264 }
2265
2266 rcu_read_unlock();
2267
2268 local_irq_restore(flags);
2269}
2270
2271/*
2198 * Called from scheduler to add the events of the current task 2272 * Called from scheduler to add the events of the current task
2199 * with interrupts disabled. 2273 * with interrupts disabled.
2200 * 2274 *
@@ -2225,6 +2299,10 @@ void __perf_event_task_sched_in(struct task_struct *prev,
2225 */ 2299 */
2226 if (atomic_read(&__get_cpu_var(perf_cgroup_events))) 2300 if (atomic_read(&__get_cpu_var(perf_cgroup_events)))
2227 perf_cgroup_sched_in(prev, task); 2301 perf_cgroup_sched_in(prev, task);
2302
2303 /* check for system-wide branch_stack events */
2304 if (atomic_read(&__get_cpu_var(perf_branch_stack_events)))
2305 perf_branch_stack_sched_in(prev, task);
2228} 2306}
2229 2307
2230static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) 2308static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
@@ -2778,7 +2856,7 @@ static void free_event(struct perf_event *event)
2778 2856
2779 if (!event->parent) { 2857 if (!event->parent) {
2780 if (event->attach_state & PERF_ATTACH_TASK) 2858 if (event->attach_state & PERF_ATTACH_TASK)
2781 jump_label_dec_deferred(&perf_sched_events); 2859 static_key_slow_dec_deferred(&perf_sched_events);
2782 if (event->attr.mmap || event->attr.mmap_data) 2860 if (event->attr.mmap || event->attr.mmap_data)
2783 atomic_dec(&nr_mmap_events); 2861 atomic_dec(&nr_mmap_events);
2784 if (event->attr.comm) 2862 if (event->attr.comm)
@@ -2789,7 +2867,15 @@ static void free_event(struct perf_event *event)
2789 put_callchain_buffers(); 2867 put_callchain_buffers();
2790 if (is_cgroup_event(event)) { 2868 if (is_cgroup_event(event)) {
2791 atomic_dec(&per_cpu(perf_cgroup_events, event->cpu)); 2869 atomic_dec(&per_cpu(perf_cgroup_events, event->cpu));
2792 jump_label_dec_deferred(&perf_sched_events); 2870 static_key_slow_dec_deferred(&perf_sched_events);
2871 }
2872
2873 if (has_branch_stack(event)) {
2874 static_key_slow_dec_deferred(&perf_sched_events);
2875 /* is system-wide event */
2876 if (!(event->attach_state & PERF_ATTACH_TASK))
2877 atomic_dec(&per_cpu(perf_branch_stack_events,
2878 event->cpu));
2793 } 2879 }
2794 } 2880 }
2795 2881
@@ -3262,7 +3348,7 @@ static void calc_timer_values(struct perf_event *event,
3262 *running = ctx_time - event->tstamp_running; 3348 *running = ctx_time - event->tstamp_running;
3263} 3349}
3264 3350
3265void __weak perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now) 3351void __weak arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
3266{ 3352{
3267} 3353}
3268 3354
@@ -3312,7 +3398,7 @@ void perf_event_update_userpage(struct perf_event *event)
3312 userpg->time_running = running + 3398 userpg->time_running = running +
3313 atomic64_read(&event->child_total_time_running); 3399 atomic64_read(&event->child_total_time_running);
3314 3400
3315 perf_update_user_clock(userpg, now); 3401 arch_perf_update_userpage(userpg, now);
3316 3402
3317 barrier(); 3403 barrier();
3318 ++userpg->lock; 3404 ++userpg->lock;
@@ -3907,6 +3993,24 @@ void perf_output_sample(struct perf_output_handle *handle,
3907 } 3993 }
3908 } 3994 }
3909 } 3995 }
3996
3997 if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
3998 if (data->br_stack) {
3999 size_t size;
4000
4001 size = data->br_stack->nr
4002 * sizeof(struct perf_branch_entry);
4003
4004 perf_output_put(handle, data->br_stack->nr);
4005 perf_output_copy(handle, data->br_stack->entries, size);
4006 } else {
4007 /*
4008 * we always store at least the value of nr
4009 */
4010 u64 nr = 0;
4011 perf_output_put(handle, nr);
4012 }
4013 }
3910} 4014}
3911 4015
3912void perf_prepare_sample(struct perf_event_header *header, 4016void perf_prepare_sample(struct perf_event_header *header,
@@ -3949,6 +4053,15 @@ void perf_prepare_sample(struct perf_event_header *header,
3949 WARN_ON_ONCE(size & (sizeof(u64)-1)); 4053 WARN_ON_ONCE(size & (sizeof(u64)-1));
3950 header->size += size; 4054 header->size += size;
3951 } 4055 }
4056
4057 if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
4058 int size = sizeof(u64); /* nr */
4059 if (data->br_stack) {
4060 size += data->br_stack->nr
4061 * sizeof(struct perf_branch_entry);
4062 }
4063 header->size += size;
4064 }
3952} 4065}
3953 4066
3954static void perf_event_output(struct perf_event *event, 4067static void perf_event_output(struct perf_event *event,
@@ -4991,7 +5104,7 @@ fail:
4991 return err; 5104 return err;
4992} 5105}
4993 5106
4994struct jump_label_key perf_swevent_enabled[PERF_COUNT_SW_MAX]; 5107struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
4995 5108
4996static void sw_perf_event_destroy(struct perf_event *event) 5109static void sw_perf_event_destroy(struct perf_event *event)
4997{ 5110{
@@ -4999,7 +5112,7 @@ static void sw_perf_event_destroy(struct perf_event *event)
4999 5112
5000 WARN_ON(event->parent); 5113 WARN_ON(event->parent);
5001 5114
5002 jump_label_dec(&perf_swevent_enabled[event_id]); 5115 static_key_slow_dec(&perf_swevent_enabled[event_id]);
5003 swevent_hlist_put(event); 5116 swevent_hlist_put(event);
5004} 5117}
5005 5118
@@ -5010,6 +5123,12 @@ static int perf_swevent_init(struct perf_event *event)
5010 if (event->attr.type != PERF_TYPE_SOFTWARE) 5123 if (event->attr.type != PERF_TYPE_SOFTWARE)
5011 return -ENOENT; 5124 return -ENOENT;
5012 5125
5126 /*
5127 * no branch sampling for software events
5128 */
5129 if (has_branch_stack(event))
5130 return -EOPNOTSUPP;
5131
5013 switch (event_id) { 5132 switch (event_id) {
5014 case PERF_COUNT_SW_CPU_CLOCK: 5133 case PERF_COUNT_SW_CPU_CLOCK:
5015 case PERF_COUNT_SW_TASK_CLOCK: 5134 case PERF_COUNT_SW_TASK_CLOCK:
@@ -5029,7 +5148,7 @@ static int perf_swevent_init(struct perf_event *event)
5029 if (err) 5148 if (err)
5030 return err; 5149 return err;
5031 5150
5032 jump_label_inc(&perf_swevent_enabled[event_id]); 5151 static_key_slow_inc(&perf_swevent_enabled[event_id]);
5033 event->destroy = sw_perf_event_destroy; 5152 event->destroy = sw_perf_event_destroy;
5034 } 5153 }
5035 5154
@@ -5120,6 +5239,12 @@ static int perf_tp_event_init(struct perf_event *event)
5120 if (event->attr.type != PERF_TYPE_TRACEPOINT) 5239 if (event->attr.type != PERF_TYPE_TRACEPOINT)
5121 return -ENOENT; 5240 return -ENOENT;
5122 5241
5242 /*
5243 * no branch sampling for tracepoint events
5244 */
5245 if (has_branch_stack(event))
5246 return -EOPNOTSUPP;
5247
5123 err = perf_trace_init(event); 5248 err = perf_trace_init(event);
5124 if (err) 5249 if (err)
5125 return err; 5250 return err;
@@ -5345,6 +5470,12 @@ static int cpu_clock_event_init(struct perf_event *event)
5345 if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK) 5470 if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK)
5346 return -ENOENT; 5471 return -ENOENT;
5347 5472
5473 /*
5474 * no branch sampling for software events
5475 */
5476 if (has_branch_stack(event))
5477 return -EOPNOTSUPP;
5478
5348 perf_swevent_init_hrtimer(event); 5479 perf_swevent_init_hrtimer(event);
5349 5480
5350 return 0; 5481 return 0;
@@ -5419,6 +5550,12 @@ static int task_clock_event_init(struct perf_event *event)
5419 if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK) 5550 if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK)
5420 return -ENOENT; 5551 return -ENOENT;
5421 5552
5553 /*
5554 * no branch sampling for software events
5555 */
5556 if (has_branch_stack(event))
5557 return -EOPNOTSUPP;
5558
5422 perf_swevent_init_hrtimer(event); 5559 perf_swevent_init_hrtimer(event);
5423 5560
5424 return 0; 5561 return 0;
@@ -5852,7 +5989,7 @@ done:
5852 5989
5853 if (!event->parent) { 5990 if (!event->parent) {
5854 if (event->attach_state & PERF_ATTACH_TASK) 5991 if (event->attach_state & PERF_ATTACH_TASK)
5855 jump_label_inc(&perf_sched_events.key); 5992 static_key_slow_inc(&perf_sched_events.key);
5856 if (event->attr.mmap || event->attr.mmap_data) 5993 if (event->attr.mmap || event->attr.mmap_data)
5857 atomic_inc(&nr_mmap_events); 5994 atomic_inc(&nr_mmap_events);
5858 if (event->attr.comm) 5995 if (event->attr.comm)
@@ -5866,6 +6003,12 @@ done:
5866 return ERR_PTR(err); 6003 return ERR_PTR(err);
5867 } 6004 }
5868 } 6005 }
6006 if (has_branch_stack(event)) {
6007 static_key_slow_inc(&perf_sched_events.key);
6008 if (!(event->attach_state & PERF_ATTACH_TASK))
6009 atomic_inc(&per_cpu(perf_branch_stack_events,
6010 event->cpu));
6011 }
5869 } 6012 }
5870 6013
5871 return event; 6014 return event;
@@ -5935,6 +6078,40 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
5935 if (attr->read_format & ~(PERF_FORMAT_MAX-1)) 6078 if (attr->read_format & ~(PERF_FORMAT_MAX-1))
5936 return -EINVAL; 6079 return -EINVAL;
5937 6080
6081 if (attr->sample_type & PERF_SAMPLE_BRANCH_STACK) {
6082 u64 mask = attr->branch_sample_type;
6083
6084 /* only using defined bits */
6085 if (mask & ~(PERF_SAMPLE_BRANCH_MAX-1))
6086 return -EINVAL;
6087
6088 /* at least one branch bit must be set */
6089 if (!(mask & ~PERF_SAMPLE_BRANCH_PLM_ALL))
6090 return -EINVAL;
6091
6092 /* kernel level capture: check permissions */
6093 if ((mask & PERF_SAMPLE_BRANCH_PERM_PLM)
6094 && perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
6095 return -EACCES;
6096
6097 /* propagate priv level, when not set for branch */
6098 if (!(mask & PERF_SAMPLE_BRANCH_PLM_ALL)) {
6099
6100 /* exclude_kernel checked on syscall entry */
6101 if (!attr->exclude_kernel)
6102 mask |= PERF_SAMPLE_BRANCH_KERNEL;
6103
6104 if (!attr->exclude_user)
6105 mask |= PERF_SAMPLE_BRANCH_USER;
6106
6107 if (!attr->exclude_hv)
6108 mask |= PERF_SAMPLE_BRANCH_HV;
6109 /*
6110 * adjust user setting (for HW filter setup)
6111 */
6112 attr->branch_sample_type = mask;
6113 }
6114 }
5938out: 6115out:
5939 return ret; 6116 return ret;
5940 6117
@@ -6090,7 +6267,7 @@ SYSCALL_DEFINE5(perf_event_open,
6090 * - that may need work on context switch 6267 * - that may need work on context switch
6091 */ 6268 */
6092 atomic_inc(&per_cpu(perf_cgroup_events, event->cpu)); 6269 atomic_inc(&per_cpu(perf_cgroup_events, event->cpu));
6093 jump_label_inc(&perf_sched_events.key); 6270 static_key_slow_inc(&perf_sched_events.key);
6094 } 6271 }
6095 6272
6096 /* 6273 /*
@@ -6939,6 +7116,13 @@ void __init perf_event_init(void)
6939 7116
6940 /* do not patch jump label more than once per second */ 7117 /* do not patch jump label more than once per second */
6941 jump_label_rate_limit(&perf_sched_events, HZ); 7118 jump_label_rate_limit(&perf_sched_events, HZ);
7119
7120 /*
7121 * Build time assertion that we keep the data_head at the intended
7122 * location. IOW, validation we got the __reserved[] size right.
7123 */
7124 BUILD_BUG_ON((offsetof(struct perf_event_mmap_page, data_head))
7125 != 1024);
6942} 7126}
6943 7127
6944static int __init perf_event_sysfs_init(void) 7128static int __init perf_event_sysfs_init(void)
@@ -6970,8 +7154,7 @@ unlock:
6970device_initcall(perf_event_sysfs_init); 7154device_initcall(perf_event_sysfs_init);
6971 7155
6972#ifdef CONFIG_CGROUP_PERF 7156#ifdef CONFIG_CGROUP_PERF
6973static struct cgroup_subsys_state *perf_cgroup_create( 7157static struct cgroup_subsys_state *perf_cgroup_create(struct cgroup *cont)
6974 struct cgroup_subsys *ss, struct cgroup *cont)
6975{ 7158{
6976 struct perf_cgroup *jc; 7159 struct perf_cgroup *jc;
6977 7160
@@ -6988,8 +7171,7 @@ static struct cgroup_subsys_state *perf_cgroup_create(
6988 return &jc->css; 7171 return &jc->css;
6989} 7172}
6990 7173
6991static void perf_cgroup_destroy(struct cgroup_subsys *ss, 7174static void perf_cgroup_destroy(struct cgroup *cont)
6992 struct cgroup *cont)
6993{ 7175{
6994 struct perf_cgroup *jc; 7176 struct perf_cgroup *jc;
6995 jc = container_of(cgroup_subsys_state(cont, perf_subsys_id), 7177 jc = container_of(cgroup_subsys_state(cont, perf_subsys_id),
@@ -7005,8 +7187,7 @@ static int __perf_cgroup_move(void *info)
7005 return 0; 7187 return 0;
7006} 7188}
7007 7189
7008static void perf_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, 7190static void perf_cgroup_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
7009 struct cgroup_taskset *tset)
7010{ 7191{
7011 struct task_struct *task; 7192 struct task_struct *task;
7012 7193
@@ -7014,8 +7195,8 @@ static void perf_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
7014 task_function_call(task, __perf_cgroup_move, task); 7195 task_function_call(task, __perf_cgroup_move, task);
7015} 7196}
7016 7197
7017static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp, 7198static void perf_cgroup_exit(struct cgroup *cgrp, struct cgroup *old_cgrp,
7018 struct cgroup *old_cgrp, struct task_struct *task) 7199 struct task_struct *task)
7019{ 7200{
7020 /* 7201 /*
7021 * cgroup_exit() is called in the copy_process() failure path. 7202 * cgroup_exit() is called in the copy_process() failure path.
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index 3330022a7ac1..bb38c4d3ee12 100644
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -581,6 +581,12 @@ static int hw_breakpoint_event_init(struct perf_event *bp)
581 if (bp->attr.type != PERF_TYPE_BREAKPOINT) 581 if (bp->attr.type != PERF_TYPE_BREAKPOINT)
582 return -ENOENT; 582 return -ENOENT;
583 583
584 /*
585 * no branch sampling for breakpoint events
586 */
587 if (has_branch_stack(bp))
588 return -EOPNOTSUPP;
589
584 err = register_perf_hw_breakpoint(bp); 590 err = register_perf_hw_breakpoint(bp);
585 if (err) 591 if (err)
586 return err; 592 return err;