aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/perf_event.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/perf_event.c')
-rw-r--r--kernel/perf_event.c135
1 files changed, 111 insertions, 24 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 517d827f4982..eac7e3364335 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -31,6 +31,7 @@
31#include <linux/kernel_stat.h> 31#include <linux/kernel_stat.h>
32#include <linux/perf_event.h> 32#include <linux/perf_event.h>
33#include <linux/ftrace_event.h> 33#include <linux/ftrace_event.h>
34#include <linux/hw_breakpoint.h>
34 35
35#include <asm/irq_regs.h> 36#include <asm/irq_regs.h>
36 37
@@ -674,6 +675,8 @@ event_sched_in(struct perf_event *event,
674 675
675 event->tstamp_running += ctx->time - event->tstamp_stopped; 676 event->tstamp_running += ctx->time - event->tstamp_stopped;
676 677
678 event->shadow_ctx_time = ctx->time - ctx->timestamp;
679
677 if (!is_software_event(event)) 680 if (!is_software_event(event))
678 cpuctx->active_oncpu++; 681 cpuctx->active_oncpu++;
679 ctx->nr_active++; 682 ctx->nr_active++;
@@ -1284,8 +1287,6 @@ void __perf_event_task_sched_out(struct task_struct *task,
1284{ 1287{
1285 int ctxn; 1288 int ctxn;
1286 1289
1287 perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0);
1288
1289 for_each_task_context_nr(ctxn) 1290 for_each_task_context_nr(ctxn)
1290 perf_event_context_sched_out(task, ctxn, next); 1291 perf_event_context_sched_out(task, ctxn, next);
1291} 1292}
@@ -1619,8 +1620,12 @@ static void rotate_ctx(struct perf_event_context *ctx)
1619{ 1620{
1620 raw_spin_lock(&ctx->lock); 1621 raw_spin_lock(&ctx->lock);
1621 1622
1622 /* Rotate the first entry last of non-pinned groups */ 1623 /*
1623 list_rotate_left(&ctx->flexible_groups); 1624 * Rotate the first entry last of non-pinned groups. Rotation might be
1625 * disabled by the inheritance code.
1626 */
1627 if (!ctx->rotate_disable)
1628 list_rotate_left(&ctx->flexible_groups);
1624 1629
1625 raw_spin_unlock(&ctx->lock); 1630 raw_spin_unlock(&ctx->lock);
1626} 1631}
@@ -2232,11 +2237,6 @@ int perf_event_release_kernel(struct perf_event *event)
2232 raw_spin_unlock_irq(&ctx->lock); 2237 raw_spin_unlock_irq(&ctx->lock);
2233 mutex_unlock(&ctx->mutex); 2238 mutex_unlock(&ctx->mutex);
2234 2239
2235 mutex_lock(&event->owner->perf_event_mutex);
2236 list_del_init(&event->owner_entry);
2237 mutex_unlock(&event->owner->perf_event_mutex);
2238 put_task_struct(event->owner);
2239
2240 free_event(event); 2240 free_event(event);
2241 2241
2242 return 0; 2242 return 0;
@@ -2249,9 +2249,43 @@ EXPORT_SYMBOL_GPL(perf_event_release_kernel);
2249static int perf_release(struct inode *inode, struct file *file) 2249static int perf_release(struct inode *inode, struct file *file)
2250{ 2250{
2251 struct perf_event *event = file->private_data; 2251 struct perf_event *event = file->private_data;
2252 struct task_struct *owner;
2252 2253
2253 file->private_data = NULL; 2254 file->private_data = NULL;
2254 2255
2256 rcu_read_lock();
2257 owner = ACCESS_ONCE(event->owner);
2258 /*
2259 * Matches the smp_wmb() in perf_event_exit_task(). If we observe
2260 * !owner it means the list deletion is complete and we can indeed
2261 * free this event, otherwise we need to serialize on
2262 * owner->perf_event_mutex.
2263 */
2264 smp_read_barrier_depends();
2265 if (owner) {
2266 /*
2267 * Since delayed_put_task_struct() also drops the last
2268 * task reference we can safely take a new reference
2269 * while holding the rcu_read_lock().
2270 */
2271 get_task_struct(owner);
2272 }
2273 rcu_read_unlock();
2274
2275 if (owner) {
2276 mutex_lock(&owner->perf_event_mutex);
2277 /*
2278 * We have to re-check the event->owner field, if it is cleared
2279 * we raced with perf_event_exit_task(), acquiring the mutex
2280 * ensured they're done, and we can proceed with freeing the
2281 * event.
2282 */
2283 if (event->owner)
2284 list_del_init(&event->owner_entry);
2285 mutex_unlock(&owner->perf_event_mutex);
2286 put_task_struct(owner);
2287 }
2288
2255 return perf_event_release_kernel(event); 2289 return perf_event_release_kernel(event);
2256} 2290}
2257 2291
@@ -3396,7 +3430,8 @@ static u32 perf_event_tid(struct perf_event *event, struct task_struct *p)
3396} 3430}
3397 3431
3398static void perf_output_read_one(struct perf_output_handle *handle, 3432static void perf_output_read_one(struct perf_output_handle *handle,
3399 struct perf_event *event) 3433 struct perf_event *event,
3434 u64 enabled, u64 running)
3400{ 3435{
3401 u64 read_format = event->attr.read_format; 3436 u64 read_format = event->attr.read_format;
3402 u64 values[4]; 3437 u64 values[4];
@@ -3404,11 +3439,11 @@ static void perf_output_read_one(struct perf_output_handle *handle,
3404 3439
3405 values[n++] = perf_event_count(event); 3440 values[n++] = perf_event_count(event);
3406 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { 3441 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
3407 values[n++] = event->total_time_enabled + 3442 values[n++] = enabled +
3408 atomic64_read(&event->child_total_time_enabled); 3443 atomic64_read(&event->child_total_time_enabled);
3409 } 3444 }
3410 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { 3445 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
3411 values[n++] = event->total_time_running + 3446 values[n++] = running +
3412 atomic64_read(&event->child_total_time_running); 3447 atomic64_read(&event->child_total_time_running);
3413 } 3448 }
3414 if (read_format & PERF_FORMAT_ID) 3449 if (read_format & PERF_FORMAT_ID)
@@ -3421,7 +3456,8 @@ static void perf_output_read_one(struct perf_output_handle *handle,
3421 * XXX PERF_FORMAT_GROUP vs inherited events seems difficult. 3456 * XXX PERF_FORMAT_GROUP vs inherited events seems difficult.
3422 */ 3457 */
3423static void perf_output_read_group(struct perf_output_handle *handle, 3458static void perf_output_read_group(struct perf_output_handle *handle,
3424 struct perf_event *event) 3459 struct perf_event *event,
3460 u64 enabled, u64 running)
3425{ 3461{
3426 struct perf_event *leader = event->group_leader, *sub; 3462 struct perf_event *leader = event->group_leader, *sub;
3427 u64 read_format = event->attr.read_format; 3463 u64 read_format = event->attr.read_format;
@@ -3431,10 +3467,10 @@ static void perf_output_read_group(struct perf_output_handle *handle,
3431 values[n++] = 1 + leader->nr_siblings; 3467 values[n++] = 1 + leader->nr_siblings;
3432 3468
3433 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) 3469 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
3434 values[n++] = leader->total_time_enabled; 3470 values[n++] = enabled;
3435 3471
3436 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) 3472 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
3437 values[n++] = leader->total_time_running; 3473 values[n++] = running;
3438 3474
3439 if (leader != event) 3475 if (leader != event)
3440 leader->pmu->read(leader); 3476 leader->pmu->read(leader);
@@ -3459,13 +3495,35 @@ static void perf_output_read_group(struct perf_output_handle *handle,
3459 } 3495 }
3460} 3496}
3461 3497
3498#define PERF_FORMAT_TOTAL_TIMES (PERF_FORMAT_TOTAL_TIME_ENABLED|\
3499 PERF_FORMAT_TOTAL_TIME_RUNNING)
3500
3462static void perf_output_read(struct perf_output_handle *handle, 3501static void perf_output_read(struct perf_output_handle *handle,
3463 struct perf_event *event) 3502 struct perf_event *event)
3464{ 3503{
3504 u64 enabled = 0, running = 0, now, ctx_time;
3505 u64 read_format = event->attr.read_format;
3506
3507 /*
3508 * compute total_time_enabled, total_time_running
3509 * based on snapshot values taken when the event
3510 * was last scheduled in.
3511 *
3512 * we cannot simply called update_context_time()
3513 * because of locking issue as we are called in
3514 * NMI context
3515 */
3516 if (read_format & PERF_FORMAT_TOTAL_TIMES) {
3517 now = perf_clock();
3518 ctx_time = event->shadow_ctx_time + now;
3519 enabled = ctx_time - event->tstamp_enabled;
3520 running = ctx_time - event->tstamp_running;
3521 }
3522
3465 if (event->attr.read_format & PERF_FORMAT_GROUP) 3523 if (event->attr.read_format & PERF_FORMAT_GROUP)
3466 perf_output_read_group(handle, event); 3524 perf_output_read_group(handle, event, enabled, running);
3467 else 3525 else
3468 perf_output_read_one(handle, event); 3526 perf_output_read_one(handle, event, enabled, running);
3469} 3527}
3470 3528
3471void perf_output_sample(struct perf_output_handle *handle, 3529void perf_output_sample(struct perf_output_handle *handle,
@@ -5651,7 +5709,7 @@ SYSCALL_DEFINE5(perf_event_open,
5651 mutex_unlock(&ctx->mutex); 5709 mutex_unlock(&ctx->mutex);
5652 5710
5653 event->owner = current; 5711 event->owner = current;
5654 get_task_struct(current); 5712
5655 mutex_lock(&current->perf_event_mutex); 5713 mutex_lock(&current->perf_event_mutex);
5656 list_add_tail(&event->owner_entry, &current->perf_event_list); 5714 list_add_tail(&event->owner_entry, &current->perf_event_list);
5657 mutex_unlock(&current->perf_event_mutex); 5715 mutex_unlock(&current->perf_event_mutex);
@@ -5719,12 +5777,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
5719 ++ctx->generation; 5777 ++ctx->generation;
5720 mutex_unlock(&ctx->mutex); 5778 mutex_unlock(&ctx->mutex);
5721 5779
5722 event->owner = current;
5723 get_task_struct(current);
5724 mutex_lock(&current->perf_event_mutex);
5725 list_add_tail(&event->owner_entry, &current->perf_event_list);
5726 mutex_unlock(&current->perf_event_mutex);
5727
5728 return event; 5780 return event;
5729 5781
5730err_free: 5782err_free:
@@ -5875,8 +5927,24 @@ again:
5875 */ 5927 */
5876void perf_event_exit_task(struct task_struct *child) 5928void perf_event_exit_task(struct task_struct *child)
5877{ 5929{
5930 struct perf_event *event, *tmp;
5878 int ctxn; 5931 int ctxn;
5879 5932
5933 mutex_lock(&child->perf_event_mutex);
5934 list_for_each_entry_safe(event, tmp, &child->perf_event_list,
5935 owner_entry) {
5936 list_del_init(&event->owner_entry);
5937
5938 /*
5939 * Ensure the list deletion is visible before we clear
5940 * the owner, closes a race against perf_release() where
5941 * we need to serialize on the owner->perf_event_mutex.
5942 */
5943 smp_wmb();
5944 event->owner = NULL;
5945 }
5946 mutex_unlock(&child->perf_event_mutex);
5947
5880 for_each_task_context_nr(ctxn) 5948 for_each_task_context_nr(ctxn)
5881 perf_event_exit_task_context(child, ctxn); 5949 perf_event_exit_task_context(child, ctxn);
5882} 5950}
@@ -6096,6 +6164,7 @@ int perf_event_init_context(struct task_struct *child, int ctxn)
6096 struct perf_event *event; 6164 struct perf_event *event;
6097 struct task_struct *parent = current; 6165 struct task_struct *parent = current;
6098 int inherited_all = 1; 6166 int inherited_all = 1;
6167 unsigned long flags;
6099 int ret = 0; 6168 int ret = 0;
6100 6169
6101 child->perf_event_ctxp[ctxn] = NULL; 6170 child->perf_event_ctxp[ctxn] = NULL;
@@ -6136,6 +6205,15 @@ int perf_event_init_context(struct task_struct *child, int ctxn)
6136 break; 6205 break;
6137 } 6206 }
6138 6207
6208 /*
6209 * We can't hold ctx->lock when iterating the ->flexible_group list due
6210 * to allocations, but we need to prevent rotation because
6211 * rotate_ctx() will change the list from interrupt context.
6212 */
6213 raw_spin_lock_irqsave(&parent_ctx->lock, flags);
6214 parent_ctx->rotate_disable = 1;
6215 raw_spin_unlock_irqrestore(&parent_ctx->lock, flags);
6216
6139 list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) { 6217 list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) {
6140 ret = inherit_task_group(event, parent, parent_ctx, 6218 ret = inherit_task_group(event, parent, parent_ctx,
6141 child, ctxn, &inherited_all); 6219 child, ctxn, &inherited_all);
@@ -6143,6 +6221,10 @@ int perf_event_init_context(struct task_struct *child, int ctxn)
6143 break; 6221 break;
6144 } 6222 }
6145 6223
6224 raw_spin_lock_irqsave(&parent_ctx->lock, flags);
6225 parent_ctx->rotate_disable = 0;
6226 raw_spin_unlock_irqrestore(&parent_ctx->lock, flags);
6227
6146 child_ctx = child->perf_event_ctxp[ctxn]; 6228 child_ctx = child->perf_event_ctxp[ctxn];
6147 6229
6148 if (child_ctx && inherited_all) { 6230 if (child_ctx && inherited_all) {
@@ -6295,6 +6377,8 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
6295 6377
6296void __init perf_event_init(void) 6378void __init perf_event_init(void)
6297{ 6379{
6380 int ret;
6381
6298 perf_event_init_all_cpus(); 6382 perf_event_init_all_cpus();
6299 init_srcu_struct(&pmus_srcu); 6383 init_srcu_struct(&pmus_srcu);
6300 perf_pmu_register(&perf_swevent); 6384 perf_pmu_register(&perf_swevent);
@@ -6302,4 +6386,7 @@ void __init perf_event_init(void)
6302 perf_pmu_register(&perf_task_clock); 6386 perf_pmu_register(&perf_task_clock);
6303 perf_tp_register(); 6387 perf_tp_register();
6304 perf_cpu_notifier(perf_cpu_notify); 6388 perf_cpu_notifier(perf_cpu_notify);
6389
6390 ret = init_hw_breakpoint();
6391 WARN(ret, "hw_breakpoint initialization failed with: %d", ret);
6305} 6392}