diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/events/core.c | 177 | ||||
-rw-r--r-- | kernel/events/hw_breakpoint.c | 6 |
2 files changed, 183 insertions, 0 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c index e8b32ac75ce3..c61234b1a988 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -118,6 +118,13 @@ static int cpu_function_call(int cpu, int (*func) (void *info), void *info) | |||
118 | PERF_FLAG_FD_OUTPUT |\ | 118 | PERF_FLAG_FD_OUTPUT |\ |
119 | PERF_FLAG_PID_CGROUP) | 119 | PERF_FLAG_PID_CGROUP) |
120 | 120 | ||
121 | /* | ||
122 | * branch priv levels that need permission checks | ||
123 | */ | ||
124 | #define PERF_SAMPLE_BRANCH_PERM_PLM \ | ||
125 | (PERF_SAMPLE_BRANCH_KERNEL |\ | ||
126 | PERF_SAMPLE_BRANCH_HV) | ||
127 | |||
121 | enum event_type_t { | 128 | enum event_type_t { |
122 | EVENT_FLEXIBLE = 0x1, | 129 | EVENT_FLEXIBLE = 0x1, |
123 | EVENT_PINNED = 0x2, | 130 | EVENT_PINNED = 0x2, |
@@ -130,6 +137,7 @@ enum event_type_t { | |||
130 | */ | 137 | */ |
131 | struct static_key_deferred perf_sched_events __read_mostly; | 138 | struct static_key_deferred perf_sched_events __read_mostly; |
132 | static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); | 139 | static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); |
140 | static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events); | ||
133 | 141 | ||
134 | static atomic_t nr_mmap_events __read_mostly; | 142 | static atomic_t nr_mmap_events __read_mostly; |
135 | static atomic_t nr_comm_events __read_mostly; | 143 | static atomic_t nr_comm_events __read_mostly; |
@@ -881,6 +889,9 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) | |||
881 | if (is_cgroup_event(event)) | 889 | if (is_cgroup_event(event)) |
882 | ctx->nr_cgroups++; | 890 | ctx->nr_cgroups++; |
883 | 891 | ||
892 | if (has_branch_stack(event)) | ||
893 | ctx->nr_branch_stack++; | ||
894 | |||
884 | list_add_rcu(&event->event_entry, &ctx->event_list); | 895 | list_add_rcu(&event->event_entry, &ctx->event_list); |
885 | if (!ctx->nr_events) | 896 | if (!ctx->nr_events) |
886 | perf_pmu_rotate_start(ctx->pmu); | 897 | perf_pmu_rotate_start(ctx->pmu); |
@@ -1020,6 +1031,9 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) | |||
1020 | cpuctx->cgrp = NULL; | 1031 | cpuctx->cgrp = NULL; |
1021 | } | 1032 | } |
1022 | 1033 | ||
1034 | if (has_branch_stack(event)) | ||
1035 | ctx->nr_branch_stack--; | ||
1036 | |||
1023 | ctx->nr_events--; | 1037 | ctx->nr_events--; |
1024 | if (event->attr.inherit_stat) | 1038 | if (event->attr.inherit_stat) |
1025 | ctx->nr_stat--; | 1039 | ctx->nr_stat--; |
@@ -2195,6 +2209,66 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx, | |||
2195 | } | 2209 | } |
2196 | 2210 | ||
2197 | /* | 2211 | /* |
2212 | * When sampling the branck stack in system-wide, it may be necessary | ||
2213 | * to flush the stack on context switch. This happens when the branch | ||
2214 | * stack does not tag its entries with the pid of the current task. | ||
2215 | * Otherwise it becomes impossible to associate a branch entry with a | ||
2216 | * task. This ambiguity is more likely to appear when the branch stack | ||
2217 | * supports priv level filtering and the user sets it to monitor only | ||
2218 | * at the user level (which could be a useful measurement in system-wide | ||
2219 | * mode). In that case, the risk is high of having a branch stack with | ||
2220 | * branch from multiple tasks. Flushing may mean dropping the existing | ||
2221 | * entries or stashing them somewhere in the PMU specific code layer. | ||
2222 | * | ||
2223 | * This function provides the context switch callback to the lower code | ||
2224 | * layer. It is invoked ONLY when there is at least one system-wide context | ||
2225 | * with at least one active event using taken branch sampling. | ||
2226 | */ | ||
2227 | static void perf_branch_stack_sched_in(struct task_struct *prev, | ||
2228 | struct task_struct *task) | ||
2229 | { | ||
2230 | struct perf_cpu_context *cpuctx; | ||
2231 | struct pmu *pmu; | ||
2232 | unsigned long flags; | ||
2233 | |||
2234 | /* no need to flush branch stack if not changing task */ | ||
2235 | if (prev == task) | ||
2236 | return; | ||
2237 | |||
2238 | local_irq_save(flags); | ||
2239 | |||
2240 | rcu_read_lock(); | ||
2241 | |||
2242 | list_for_each_entry_rcu(pmu, &pmus, entry) { | ||
2243 | cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); | ||
2244 | |||
2245 | /* | ||
2246 | * check if the context has at least one | ||
2247 | * event using PERF_SAMPLE_BRANCH_STACK | ||
2248 | */ | ||
2249 | if (cpuctx->ctx.nr_branch_stack > 0 | ||
2250 | && pmu->flush_branch_stack) { | ||
2251 | |||
2252 | pmu = cpuctx->ctx.pmu; | ||
2253 | |||
2254 | perf_ctx_lock(cpuctx, cpuctx->task_ctx); | ||
2255 | |||
2256 | perf_pmu_disable(pmu); | ||
2257 | |||
2258 | pmu->flush_branch_stack(); | ||
2259 | |||
2260 | perf_pmu_enable(pmu); | ||
2261 | |||
2262 | perf_ctx_unlock(cpuctx, cpuctx->task_ctx); | ||
2263 | } | ||
2264 | } | ||
2265 | |||
2266 | rcu_read_unlock(); | ||
2267 | |||
2268 | local_irq_restore(flags); | ||
2269 | } | ||
2270 | |||
2271 | /* | ||
2198 | * Called from scheduler to add the events of the current task | 2272 | * Called from scheduler to add the events of the current task |
2199 | * with interrupts disabled. | 2273 | * with interrupts disabled. |
2200 | * | 2274 | * |
@@ -2225,6 +2299,10 @@ void __perf_event_task_sched_in(struct task_struct *prev, | |||
2225 | */ | 2299 | */ |
2226 | if (atomic_read(&__get_cpu_var(perf_cgroup_events))) | 2300 | if (atomic_read(&__get_cpu_var(perf_cgroup_events))) |
2227 | perf_cgroup_sched_in(prev, task); | 2301 | perf_cgroup_sched_in(prev, task); |
2302 | |||
2303 | /* check for system-wide branch_stack events */ | ||
2304 | if (atomic_read(&__get_cpu_var(perf_branch_stack_events))) | ||
2305 | perf_branch_stack_sched_in(prev, task); | ||
2228 | } | 2306 | } |
2229 | 2307 | ||
2230 | static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) | 2308 | static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) |
@@ -2791,6 +2869,14 @@ static void free_event(struct perf_event *event) | |||
2791 | atomic_dec(&per_cpu(perf_cgroup_events, event->cpu)); | 2869 | atomic_dec(&per_cpu(perf_cgroup_events, event->cpu)); |
2792 | static_key_slow_dec_deferred(&perf_sched_events); | 2870 | static_key_slow_dec_deferred(&perf_sched_events); |
2793 | } | 2871 | } |
2872 | |||
2873 | if (has_branch_stack(event)) { | ||
2874 | static_key_slow_dec_deferred(&perf_sched_events); | ||
2875 | /* is system-wide event */ | ||
2876 | if (!(event->attach_state & PERF_ATTACH_TASK)) | ||
2877 | atomic_dec(&per_cpu(perf_branch_stack_events, | ||
2878 | event->cpu)); | ||
2879 | } | ||
2794 | } | 2880 | } |
2795 | 2881 | ||
2796 | if (event->rb) { | 2882 | if (event->rb) { |
@@ -3907,6 +3993,24 @@ void perf_output_sample(struct perf_output_handle *handle, | |||
3907 | } | 3993 | } |
3908 | } | 3994 | } |
3909 | } | 3995 | } |
3996 | |||
3997 | if (sample_type & PERF_SAMPLE_BRANCH_STACK) { | ||
3998 | if (data->br_stack) { | ||
3999 | size_t size; | ||
4000 | |||
4001 | size = data->br_stack->nr | ||
4002 | * sizeof(struct perf_branch_entry); | ||
4003 | |||
4004 | perf_output_put(handle, data->br_stack->nr); | ||
4005 | perf_output_copy(handle, data->br_stack->entries, size); | ||
4006 | } else { | ||
4007 | /* | ||
4008 | * we always store at least the value of nr | ||
4009 | */ | ||
4010 | u64 nr = 0; | ||
4011 | perf_output_put(handle, nr); | ||
4012 | } | ||
4013 | } | ||
3910 | } | 4014 | } |
3911 | 4015 | ||
3912 | void perf_prepare_sample(struct perf_event_header *header, | 4016 | void perf_prepare_sample(struct perf_event_header *header, |
@@ -3949,6 +4053,15 @@ void perf_prepare_sample(struct perf_event_header *header, | |||
3949 | WARN_ON_ONCE(size & (sizeof(u64)-1)); | 4053 | WARN_ON_ONCE(size & (sizeof(u64)-1)); |
3950 | header->size += size; | 4054 | header->size += size; |
3951 | } | 4055 | } |
4056 | |||
4057 | if (sample_type & PERF_SAMPLE_BRANCH_STACK) { | ||
4058 | int size = sizeof(u64); /* nr */ | ||
4059 | if (data->br_stack) { | ||
4060 | size += data->br_stack->nr | ||
4061 | * sizeof(struct perf_branch_entry); | ||
4062 | } | ||
4063 | header->size += size; | ||
4064 | } | ||
3952 | } | 4065 | } |
3953 | 4066 | ||
3954 | static void perf_event_output(struct perf_event *event, | 4067 | static void perf_event_output(struct perf_event *event, |
@@ -5010,6 +5123,12 @@ static int perf_swevent_init(struct perf_event *event) | |||
5010 | if (event->attr.type != PERF_TYPE_SOFTWARE) | 5123 | if (event->attr.type != PERF_TYPE_SOFTWARE) |
5011 | return -ENOENT; | 5124 | return -ENOENT; |
5012 | 5125 | ||
5126 | /* | ||
5127 | * no branch sampling for software events | ||
5128 | */ | ||
5129 | if (has_branch_stack(event)) | ||
5130 | return -EOPNOTSUPP; | ||
5131 | |||
5013 | switch (event_id) { | 5132 | switch (event_id) { |
5014 | case PERF_COUNT_SW_CPU_CLOCK: | 5133 | case PERF_COUNT_SW_CPU_CLOCK: |
5015 | case PERF_COUNT_SW_TASK_CLOCK: | 5134 | case PERF_COUNT_SW_TASK_CLOCK: |
@@ -5120,6 +5239,12 @@ static int perf_tp_event_init(struct perf_event *event) | |||
5120 | if (event->attr.type != PERF_TYPE_TRACEPOINT) | 5239 | if (event->attr.type != PERF_TYPE_TRACEPOINT) |
5121 | return -ENOENT; | 5240 | return -ENOENT; |
5122 | 5241 | ||
5242 | /* | ||
5243 | * no branch sampling for tracepoint events | ||
5244 | */ | ||
5245 | if (has_branch_stack(event)) | ||
5246 | return -EOPNOTSUPP; | ||
5247 | |||
5123 | err = perf_trace_init(event); | 5248 | err = perf_trace_init(event); |
5124 | if (err) | 5249 | if (err) |
5125 | return err; | 5250 | return err; |
@@ -5345,6 +5470,12 @@ static int cpu_clock_event_init(struct perf_event *event) | |||
5345 | if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK) | 5470 | if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK) |
5346 | return -ENOENT; | 5471 | return -ENOENT; |
5347 | 5472 | ||
5473 | /* | ||
5474 | * no branch sampling for software events | ||
5475 | */ | ||
5476 | if (has_branch_stack(event)) | ||
5477 | return -EOPNOTSUPP; | ||
5478 | |||
5348 | perf_swevent_init_hrtimer(event); | 5479 | perf_swevent_init_hrtimer(event); |
5349 | 5480 | ||
5350 | return 0; | 5481 | return 0; |
@@ -5419,6 +5550,12 @@ static int task_clock_event_init(struct perf_event *event) | |||
5419 | if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK) | 5550 | if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK) |
5420 | return -ENOENT; | 5551 | return -ENOENT; |
5421 | 5552 | ||
5553 | /* | ||
5554 | * no branch sampling for software events | ||
5555 | */ | ||
5556 | if (has_branch_stack(event)) | ||
5557 | return -EOPNOTSUPP; | ||
5558 | |||
5422 | perf_swevent_init_hrtimer(event); | 5559 | perf_swevent_init_hrtimer(event); |
5423 | 5560 | ||
5424 | return 0; | 5561 | return 0; |
@@ -5866,6 +6003,12 @@ done: | |||
5866 | return ERR_PTR(err); | 6003 | return ERR_PTR(err); |
5867 | } | 6004 | } |
5868 | } | 6005 | } |
6006 | if (has_branch_stack(event)) { | ||
6007 | static_key_slow_inc(&perf_sched_events.key); | ||
6008 | if (!(event->attach_state & PERF_ATTACH_TASK)) | ||
6009 | atomic_inc(&per_cpu(perf_branch_stack_events, | ||
6010 | event->cpu)); | ||
6011 | } | ||
5869 | } | 6012 | } |
5870 | 6013 | ||
5871 | return event; | 6014 | return event; |
@@ -5935,6 +6078,40 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, | |||
5935 | if (attr->read_format & ~(PERF_FORMAT_MAX-1)) | 6078 | if (attr->read_format & ~(PERF_FORMAT_MAX-1)) |
5936 | return -EINVAL; | 6079 | return -EINVAL; |
5937 | 6080 | ||
6081 | if (attr->sample_type & PERF_SAMPLE_BRANCH_STACK) { | ||
6082 | u64 mask = attr->branch_sample_type; | ||
6083 | |||
6084 | /* only using defined bits */ | ||
6085 | if (mask & ~(PERF_SAMPLE_BRANCH_MAX-1)) | ||
6086 | return -EINVAL; | ||
6087 | |||
6088 | /* at least one branch bit must be set */ | ||
6089 | if (!(mask & ~PERF_SAMPLE_BRANCH_PLM_ALL)) | ||
6090 | return -EINVAL; | ||
6091 | |||
6092 | /* kernel level capture: check permissions */ | ||
6093 | if ((mask & PERF_SAMPLE_BRANCH_PERM_PLM) | ||
6094 | && perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) | ||
6095 | return -EACCES; | ||
6096 | |||
6097 | /* propagate priv level, when not set for branch */ | ||
6098 | if (!(mask & PERF_SAMPLE_BRANCH_PLM_ALL)) { | ||
6099 | |||
6100 | /* exclude_kernel checked on syscall entry */ | ||
6101 | if (!attr->exclude_kernel) | ||
6102 | mask |= PERF_SAMPLE_BRANCH_KERNEL; | ||
6103 | |||
6104 | if (!attr->exclude_user) | ||
6105 | mask |= PERF_SAMPLE_BRANCH_USER; | ||
6106 | |||
6107 | if (!attr->exclude_hv) | ||
6108 | mask |= PERF_SAMPLE_BRANCH_HV; | ||
6109 | /* | ||
6110 | * adjust user setting (for HW filter setup) | ||
6111 | */ | ||
6112 | attr->branch_sample_type = mask; | ||
6113 | } | ||
6114 | } | ||
5938 | out: | 6115 | out: |
5939 | return ret; | 6116 | return ret; |
5940 | 6117 | ||
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c index 3330022a7ac1..bb38c4d3ee12 100644 --- a/kernel/events/hw_breakpoint.c +++ b/kernel/events/hw_breakpoint.c | |||
@@ -581,6 +581,12 @@ static int hw_breakpoint_event_init(struct perf_event *bp) | |||
581 | if (bp->attr.type != PERF_TYPE_BREAKPOINT) | 581 | if (bp->attr.type != PERF_TYPE_BREAKPOINT) |
582 | return -ENOENT; | 582 | return -ENOENT; |
583 | 583 | ||
584 | /* | ||
585 | * no branch sampling for breakpoint events | ||
586 | */ | ||
587 | if (has_branch_stack(bp)) | ||
588 | return -EOPNOTSUPP; | ||
589 | |||
584 | err = register_perf_hw_breakpoint(bp); | 590 | err = register_perf_hw_breakpoint(bp); |
585 | if (err) | 591 | if (err) |
586 | return err; | 592 | return err; |