aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/perf_event.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/perf_event.c')
-rw-r--r--kernel/perf_event.c111
1 files changed, 91 insertions, 20 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 517d827f4982..671f6c8c8a32 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -31,6 +31,7 @@
31#include <linux/kernel_stat.h> 31#include <linux/kernel_stat.h>
32#include <linux/perf_event.h> 32#include <linux/perf_event.h>
33#include <linux/ftrace_event.h> 33#include <linux/ftrace_event.h>
34#include <linux/hw_breakpoint.h>
34 35
35#include <asm/irq_regs.h> 36#include <asm/irq_regs.h>
36 37
@@ -674,6 +675,8 @@ event_sched_in(struct perf_event *event,
674 675
675 event->tstamp_running += ctx->time - event->tstamp_stopped; 676 event->tstamp_running += ctx->time - event->tstamp_stopped;
676 677
678 event->shadow_ctx_time = ctx->time - ctx->timestamp;
679
677 if (!is_software_event(event)) 680 if (!is_software_event(event))
678 cpuctx->active_oncpu++; 681 cpuctx->active_oncpu++;
679 ctx->nr_active++; 682 ctx->nr_active++;
@@ -2232,11 +2235,6 @@ int perf_event_release_kernel(struct perf_event *event)
2232 raw_spin_unlock_irq(&ctx->lock); 2235 raw_spin_unlock_irq(&ctx->lock);
2233 mutex_unlock(&ctx->mutex); 2236 mutex_unlock(&ctx->mutex);
2234 2237
2235 mutex_lock(&event->owner->perf_event_mutex);
2236 list_del_init(&event->owner_entry);
2237 mutex_unlock(&event->owner->perf_event_mutex);
2238 put_task_struct(event->owner);
2239
2240 free_event(event); 2238 free_event(event);
2241 2239
2242 return 0; 2240 return 0;
@@ -2249,9 +2247,43 @@ EXPORT_SYMBOL_GPL(perf_event_release_kernel);
2249static int perf_release(struct inode *inode, struct file *file) 2247static int perf_release(struct inode *inode, struct file *file)
2250{ 2248{
2251 struct perf_event *event = file->private_data; 2249 struct perf_event *event = file->private_data;
2250 struct task_struct *owner;
2252 2251
2253 file->private_data = NULL; 2252 file->private_data = NULL;
2254 2253
2254 rcu_read_lock();
2255 owner = ACCESS_ONCE(event->owner);
2256 /*
2257 * Matches the smp_wmb() in perf_event_exit_task(). If we observe
2258 * !owner it means the list deletion is complete and we can indeed
2259 * free this event, otherwise we need to serialize on
2260 * owner->perf_event_mutex.
2261 */
2262 smp_read_barrier_depends();
2263 if (owner) {
2264 /*
2265 * Since delayed_put_task_struct() also drops the last
2266 * task reference we can safely take a new reference
2267 * while holding the rcu_read_lock().
2268 */
2269 get_task_struct(owner);
2270 }
2271 rcu_read_unlock();
2272
2273 if (owner) {
2274 mutex_lock(&owner->perf_event_mutex);
2275 /*
2276 * We have to re-check the event->owner field, if it is cleared
2277 * we raced with perf_event_exit_task(), acquiring the mutex
2278 * ensured they're done, and we can proceed with freeing the
2279 * event.
2280 */
2281 if (event->owner)
2282 list_del_init(&event->owner_entry);
2283 mutex_unlock(&owner->perf_event_mutex);
2284 put_task_struct(owner);
2285 }
2286
2255 return perf_event_release_kernel(event); 2287 return perf_event_release_kernel(event);
2256} 2288}
2257 2289
@@ -3396,7 +3428,8 @@ static u32 perf_event_tid(struct perf_event *event, struct task_struct *p)
3396} 3428}
3397 3429
3398static void perf_output_read_one(struct perf_output_handle *handle, 3430static void perf_output_read_one(struct perf_output_handle *handle,
3399 struct perf_event *event) 3431 struct perf_event *event,
3432 u64 enabled, u64 running)
3400{ 3433{
3401 u64 read_format = event->attr.read_format; 3434 u64 read_format = event->attr.read_format;
3402 u64 values[4]; 3435 u64 values[4];
@@ -3404,11 +3437,11 @@ static void perf_output_read_one(struct perf_output_handle *handle,
3404 3437
3405 values[n++] = perf_event_count(event); 3438 values[n++] = perf_event_count(event);
3406 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { 3439 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
3407 values[n++] = event->total_time_enabled + 3440 values[n++] = enabled +
3408 atomic64_read(&event->child_total_time_enabled); 3441 atomic64_read(&event->child_total_time_enabled);
3409 } 3442 }
3410 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { 3443 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
3411 values[n++] = event->total_time_running + 3444 values[n++] = running +
3412 atomic64_read(&event->child_total_time_running); 3445 atomic64_read(&event->child_total_time_running);
3413 } 3446 }
3414 if (read_format & PERF_FORMAT_ID) 3447 if (read_format & PERF_FORMAT_ID)
@@ -3421,7 +3454,8 @@ static void perf_output_read_one(struct perf_output_handle *handle,
3421 * XXX PERF_FORMAT_GROUP vs inherited events seems difficult. 3454 * XXX PERF_FORMAT_GROUP vs inherited events seems difficult.
3422 */ 3455 */
3423static void perf_output_read_group(struct perf_output_handle *handle, 3456static void perf_output_read_group(struct perf_output_handle *handle,
3424 struct perf_event *event) 3457 struct perf_event *event,
3458 u64 enabled, u64 running)
3425{ 3459{
3426 struct perf_event *leader = event->group_leader, *sub; 3460 struct perf_event *leader = event->group_leader, *sub;
3427 u64 read_format = event->attr.read_format; 3461 u64 read_format = event->attr.read_format;
@@ -3431,10 +3465,10 @@ static void perf_output_read_group(struct perf_output_handle *handle,
3431 values[n++] = 1 + leader->nr_siblings; 3465 values[n++] = 1 + leader->nr_siblings;
3432 3466
3433 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) 3467 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
3434 values[n++] = leader->total_time_enabled; 3468 values[n++] = enabled;
3435 3469
3436 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) 3470 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
3437 values[n++] = leader->total_time_running; 3471 values[n++] = running;
3438 3472
3439 if (leader != event) 3473 if (leader != event)
3440 leader->pmu->read(leader); 3474 leader->pmu->read(leader);
@@ -3459,13 +3493,35 @@ static void perf_output_read_group(struct perf_output_handle *handle,
3459 } 3493 }
3460} 3494}
3461 3495
3496#define PERF_FORMAT_TOTAL_TIMES (PERF_FORMAT_TOTAL_TIME_ENABLED|\
3497 PERF_FORMAT_TOTAL_TIME_RUNNING)
3498
3462static void perf_output_read(struct perf_output_handle *handle, 3499static void perf_output_read(struct perf_output_handle *handle,
3463 struct perf_event *event) 3500 struct perf_event *event)
3464{ 3501{
3502 u64 enabled = 0, running = 0, now, ctx_time;
3503 u64 read_format = event->attr.read_format;
3504
3505 /*
3506 * compute total_time_enabled, total_time_running
3507 * based on snapshot values taken when the event
3508 * was last scheduled in.
3509 *
3510 * we cannot simply called update_context_time()
3511 * because of locking issue as we are called in
3512 * NMI context
3513 */
3514 if (read_format & PERF_FORMAT_TOTAL_TIMES) {
3515 now = perf_clock();
3516 ctx_time = event->shadow_ctx_time + now;
3517 enabled = ctx_time - event->tstamp_enabled;
3518 running = ctx_time - event->tstamp_running;
3519 }
3520
3465 if (event->attr.read_format & PERF_FORMAT_GROUP) 3521 if (event->attr.read_format & PERF_FORMAT_GROUP)
3466 perf_output_read_group(handle, event); 3522 perf_output_read_group(handle, event, enabled, running);
3467 else 3523 else
3468 perf_output_read_one(handle, event); 3524 perf_output_read_one(handle, event, enabled, running);
3469} 3525}
3470 3526
3471void perf_output_sample(struct perf_output_handle *handle, 3527void perf_output_sample(struct perf_output_handle *handle,
@@ -5651,7 +5707,7 @@ SYSCALL_DEFINE5(perf_event_open,
5651 mutex_unlock(&ctx->mutex); 5707 mutex_unlock(&ctx->mutex);
5652 5708
5653 event->owner = current; 5709 event->owner = current;
5654 get_task_struct(current); 5710
5655 mutex_lock(&current->perf_event_mutex); 5711 mutex_lock(&current->perf_event_mutex);
5656 list_add_tail(&event->owner_entry, &current->perf_event_list); 5712 list_add_tail(&event->owner_entry, &current->perf_event_list);
5657 mutex_unlock(&current->perf_event_mutex); 5713 mutex_unlock(&current->perf_event_mutex);
@@ -5719,12 +5775,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
5719 ++ctx->generation; 5775 ++ctx->generation;
5720 mutex_unlock(&ctx->mutex); 5776 mutex_unlock(&ctx->mutex);
5721 5777
5722 event->owner = current;
5723 get_task_struct(current);
5724 mutex_lock(&current->perf_event_mutex);
5725 list_add_tail(&event->owner_entry, &current->perf_event_list);
5726 mutex_unlock(&current->perf_event_mutex);
5727
5728 return event; 5778 return event;
5729 5779
5730err_free: 5780err_free:
@@ -5875,8 +5925,24 @@ again:
5875 */ 5925 */
5876void perf_event_exit_task(struct task_struct *child) 5926void perf_event_exit_task(struct task_struct *child)
5877{ 5927{
5928 struct perf_event *event, *tmp;
5878 int ctxn; 5929 int ctxn;
5879 5930
5931 mutex_lock(&child->perf_event_mutex);
5932 list_for_each_entry_safe(event, tmp, &child->perf_event_list,
5933 owner_entry) {
5934 list_del_init(&event->owner_entry);
5935
5936 /*
5937 * Ensure the list deletion is visible before we clear
5938 * the owner, closes a race against perf_release() where
5939 * we need to serialize on the owner->perf_event_mutex.
5940 */
5941 smp_wmb();
5942 event->owner = NULL;
5943 }
5944 mutex_unlock(&child->perf_event_mutex);
5945
5880 for_each_task_context_nr(ctxn) 5946 for_each_task_context_nr(ctxn)
5881 perf_event_exit_task_context(child, ctxn); 5947 perf_event_exit_task_context(child, ctxn);
5882} 5948}
@@ -6295,6 +6361,8 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
6295 6361
6296void __init perf_event_init(void) 6362void __init perf_event_init(void)
6297{ 6363{
6364 int ret;
6365
6298 perf_event_init_all_cpus(); 6366 perf_event_init_all_cpus();
6299 init_srcu_struct(&pmus_srcu); 6367 init_srcu_struct(&pmus_srcu);
6300 perf_pmu_register(&perf_swevent); 6368 perf_pmu_register(&perf_swevent);
@@ -6302,4 +6370,7 @@ void __init perf_event_init(void)
6302 perf_pmu_register(&perf_task_clock); 6370 perf_pmu_register(&perf_task_clock);
6303 perf_tp_register(); 6371 perf_tp_register();
6304 perf_cpu_notifier(perf_cpu_notify); 6372 perf_cpu_notifier(perf_cpu_notify);
6373
6374 ret = init_hw_breakpoint();
6375 WARN(ret, "hw_breakpoint initialization failed with: %d", ret);
6305} 6376}