diff options
Diffstat (limited to 'kernel/perf_event.c')
| -rw-r--r-- | kernel/perf_event.c | 2355 | 
1 files changed, 1408 insertions, 947 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index db5b56064687..64507eaa2d9e 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c  | |||
| @@ -31,24 +31,18 @@ | |||
| 31 | #include <linux/kernel_stat.h> | 31 | #include <linux/kernel_stat.h> | 
| 32 | #include <linux/perf_event.h> | 32 | #include <linux/perf_event.h> | 
| 33 | #include <linux/ftrace_event.h> | 33 | #include <linux/ftrace_event.h> | 
| 34 | #include <linux/hw_breakpoint.h> | ||
| 35 | 34 | ||
| 36 | #include <asm/irq_regs.h> | 35 | #include <asm/irq_regs.h> | 
| 37 | 36 | ||
| 38 | /* | ||
| 39 | * Each CPU has a list of per CPU events: | ||
| 40 | */ | ||
| 41 | static DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context); | ||
| 42 | |||
| 43 | int perf_max_events __read_mostly = 1; | ||
| 44 | static int perf_reserved_percpu __read_mostly; | ||
| 45 | static int perf_overcommit __read_mostly = 1; | ||
| 46 | |||
| 47 | static atomic_t nr_events __read_mostly; | 37 | static atomic_t nr_events __read_mostly; | 
| 48 | static atomic_t nr_mmap_events __read_mostly; | 38 | static atomic_t nr_mmap_events __read_mostly; | 
| 49 | static atomic_t nr_comm_events __read_mostly; | 39 | static atomic_t nr_comm_events __read_mostly; | 
| 50 | static atomic_t nr_task_events __read_mostly; | 40 | static atomic_t nr_task_events __read_mostly; | 
| 51 | 41 | ||
| 42 | static LIST_HEAD(pmus); | ||
| 43 | static DEFINE_MUTEX(pmus_lock); | ||
| 44 | static struct srcu_struct pmus_srcu; | ||
| 45 | |||
| 52 | /* | 46 | /* | 
| 53 | * perf event paranoia level: | 47 | * perf event paranoia level: | 
| 54 | * -1 - not paranoid at all | 48 | * -1 - not paranoid at all | 
| @@ -67,36 +61,38 @@ int sysctl_perf_event_sample_rate __read_mostly = 100000; | |||
| 67 | 61 | ||
| 68 | static atomic64_t perf_event_id; | 62 | static atomic64_t perf_event_id; | 
| 69 | 63 | ||
| 70 | /* | 64 | void __weak perf_event_print_debug(void) { } | 
| 71 | * Lock for (sysadmin-configurable) event reservations: | ||
| 72 | */ | ||
| 73 | static DEFINE_SPINLOCK(perf_resource_lock); | ||
| 74 | 65 | ||
| 75 | /* | 66 | void perf_pmu_disable(struct pmu *pmu) | 
| 76 | * Architecture provided APIs - weak aliases: | ||
| 77 | */ | ||
| 78 | extern __weak const struct pmu *hw_perf_event_init(struct perf_event *event) | ||
| 79 | { | 67 | { | 
| 80 | return NULL; | 68 | int *count = this_cpu_ptr(pmu->pmu_disable_count); | 
| 69 | if (!(*count)++) | ||
| 70 | pmu->pmu_disable(pmu); | ||
| 81 | } | 71 | } | 
| 82 | 72 | ||
| 83 | void __weak hw_perf_disable(void) { barrier(); } | 73 | void perf_pmu_enable(struct pmu *pmu) | 
| 84 | void __weak hw_perf_enable(void) { barrier(); } | ||
| 85 | |||
| 86 | void __weak perf_event_print_debug(void) { } | ||
| 87 | |||
| 88 | static DEFINE_PER_CPU(int, perf_disable_count); | ||
| 89 | |||
| 90 | void perf_disable(void) | ||
| 91 | { | 74 | { | 
| 92 | if (!__get_cpu_var(perf_disable_count)++) | 75 | int *count = this_cpu_ptr(pmu->pmu_disable_count); | 
| 93 | hw_perf_disable(); | 76 | if (!--(*count)) | 
| 77 | pmu->pmu_enable(pmu); | ||
| 94 | } | 78 | } | 
| 95 | 79 | ||
| 96 | void perf_enable(void) | 80 | static DEFINE_PER_CPU(struct list_head, rotation_list); | 
| 81 | |||
| 82 | /* | ||
| 83 | * perf_pmu_rotate_start() and perf_rotate_context() are fully serialized | ||
| 84 | * because they're strictly cpu affine and rotate_start is called with IRQs | ||
| 85 | * disabled, while rotate_context is called from IRQ context. | ||
| 86 | */ | ||
| 87 | static void perf_pmu_rotate_start(struct pmu *pmu) | ||
| 97 | { | 88 | { | 
| 98 | if (!--__get_cpu_var(perf_disable_count)) | 89 | struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); | 
| 99 | hw_perf_enable(); | 90 | struct list_head *head = &__get_cpu_var(rotation_list); | 
| 91 | |||
| 92 | WARN_ON(!irqs_disabled()); | ||
| 93 | |||
| 94 | if (list_empty(&cpuctx->rotation_list)) | ||
| 95 | list_add(&cpuctx->rotation_list, head); | ||
| 100 | } | 96 | } | 
| 101 | 97 | ||
| 102 | static void get_ctx(struct perf_event_context *ctx) | 98 | static void get_ctx(struct perf_event_context *ctx) | 
| @@ -151,13 +147,13 @@ static u64 primary_event_id(struct perf_event *event) | |||
| 151 | * the context could get moved to another task. | 147 | * the context could get moved to another task. | 
| 152 | */ | 148 | */ | 
| 153 | static struct perf_event_context * | 149 | static struct perf_event_context * | 
| 154 | perf_lock_task_context(struct task_struct *task, unsigned long *flags) | 150 | perf_lock_task_context(struct task_struct *task, int ctxn, unsigned long *flags) | 
| 155 | { | 151 | { | 
| 156 | struct perf_event_context *ctx; | 152 | struct perf_event_context *ctx; | 
| 157 | 153 | ||
| 158 | rcu_read_lock(); | 154 | rcu_read_lock(); | 
| 159 | retry: | 155 | retry: | 
| 160 | ctx = rcu_dereference(task->perf_event_ctxp); | 156 | ctx = rcu_dereference(task->perf_event_ctxp[ctxn]); | 
| 161 | if (ctx) { | 157 | if (ctx) { | 
| 162 | /* | 158 | /* | 
| 163 | * If this context is a clone of another, it might | 159 | * If this context is a clone of another, it might | 
| @@ -170,7 +166,7 @@ perf_lock_task_context(struct task_struct *task, unsigned long *flags) | |||
| 170 | * can't get swapped on us any more. | 166 | * can't get swapped on us any more. | 
| 171 | */ | 167 | */ | 
| 172 | raw_spin_lock_irqsave(&ctx->lock, *flags); | 168 | raw_spin_lock_irqsave(&ctx->lock, *flags); | 
| 173 | if (ctx != rcu_dereference(task->perf_event_ctxp)) { | 169 | if (ctx != rcu_dereference(task->perf_event_ctxp[ctxn])) { | 
| 174 | raw_spin_unlock_irqrestore(&ctx->lock, *flags); | 170 | raw_spin_unlock_irqrestore(&ctx->lock, *flags); | 
| 175 | goto retry; | 171 | goto retry; | 
| 176 | } | 172 | } | 
| @@ -189,12 +185,13 @@ perf_lock_task_context(struct task_struct *task, unsigned long *flags) | |||
| 189 | * can't get swapped to another task. This also increments its | 185 | * can't get swapped to another task. This also increments its | 
| 190 | * reference count so that the context can't get freed. | 186 | * reference count so that the context can't get freed. | 
| 191 | */ | 187 | */ | 
| 192 | static struct perf_event_context *perf_pin_task_context(struct task_struct *task) | 188 | static struct perf_event_context * | 
| 189 | perf_pin_task_context(struct task_struct *task, int ctxn) | ||
| 193 | { | 190 | { | 
| 194 | struct perf_event_context *ctx; | 191 | struct perf_event_context *ctx; | 
| 195 | unsigned long flags; | 192 | unsigned long flags; | 
| 196 | 193 | ||
| 197 | ctx = perf_lock_task_context(task, &flags); | 194 | ctx = perf_lock_task_context(task, ctxn, &flags); | 
| 198 | if (ctx) { | 195 | if (ctx) { | 
| 199 | ++ctx->pin_count; | 196 | ++ctx->pin_count; | 
| 200 | raw_spin_unlock_irqrestore(&ctx->lock, flags); | 197 | raw_spin_unlock_irqrestore(&ctx->lock, flags); | 
| @@ -302,6 +299,8 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) | |||
| 302 | } | 299 | } | 
| 303 | 300 | ||
| 304 | list_add_rcu(&event->event_entry, &ctx->event_list); | 301 | list_add_rcu(&event->event_entry, &ctx->event_list); | 
| 302 | if (!ctx->nr_events) | ||
| 303 | perf_pmu_rotate_start(ctx->pmu); | ||
| 305 | ctx->nr_events++; | 304 | ctx->nr_events++; | 
| 306 | if (event->attr.inherit_stat) | 305 | if (event->attr.inherit_stat) | 
| 307 | ctx->nr_stat++; | 306 | ctx->nr_stat++; | 
| @@ -436,7 +435,7 @@ event_sched_out(struct perf_event *event, | |||
| 436 | event->state = PERF_EVENT_STATE_OFF; | 435 | event->state = PERF_EVENT_STATE_OFF; | 
| 437 | } | 436 | } | 
| 438 | event->tstamp_stopped = ctx->time; | 437 | event->tstamp_stopped = ctx->time; | 
| 439 | event->pmu->disable(event); | 438 | event->pmu->del(event, 0); | 
| 440 | event->oncpu = -1; | 439 | event->oncpu = -1; | 
| 441 | 440 | ||
| 442 | if (!is_software_event(event)) | 441 | if (!is_software_event(event)) | 
| @@ -466,6 +465,12 @@ group_sched_out(struct perf_event *group_event, | |||
| 466 | cpuctx->exclusive = 0; | 465 | cpuctx->exclusive = 0; | 
| 467 | } | 466 | } | 
| 468 | 467 | ||
| 468 | static inline struct perf_cpu_context * | ||
| 469 | __get_cpu_context(struct perf_event_context *ctx) | ||
| 470 | { | ||
| 471 | return this_cpu_ptr(ctx->pmu->pmu_cpu_context); | ||
| 472 | } | ||
| 473 | |||
| 469 | /* | 474 | /* | 
| 470 | * Cross CPU call to remove a performance event | 475 | * Cross CPU call to remove a performance event | 
| 471 | * | 476 | * | 
| @@ -474,9 +479,9 @@ group_sched_out(struct perf_event *group_event, | |||
| 474 | */ | 479 | */ | 
| 475 | static void __perf_event_remove_from_context(void *info) | 480 | static void __perf_event_remove_from_context(void *info) | 
| 476 | { | 481 | { | 
| 477 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | ||
| 478 | struct perf_event *event = info; | 482 | struct perf_event *event = info; | 
| 479 | struct perf_event_context *ctx = event->ctx; | 483 | struct perf_event_context *ctx = event->ctx; | 
| 484 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); | ||
| 480 | 485 | ||
| 481 | /* | 486 | /* | 
| 482 | * If this is a task context, we need to check whether it is | 487 | * If this is a task context, we need to check whether it is | 
| @@ -487,27 +492,11 @@ static void __perf_event_remove_from_context(void *info) | |||
| 487 | return; | 492 | return; | 
| 488 | 493 | ||
| 489 | raw_spin_lock(&ctx->lock); | 494 | raw_spin_lock(&ctx->lock); | 
| 490 | /* | ||
| 491 | * Protect the list operation against NMI by disabling the | ||
| 492 | * events on a global level. | ||
| 493 | */ | ||
| 494 | perf_disable(); | ||
| 495 | 495 | ||
| 496 | event_sched_out(event, cpuctx, ctx); | 496 | event_sched_out(event, cpuctx, ctx); | 
| 497 | 497 | ||
| 498 | list_del_event(event, ctx); | 498 | list_del_event(event, ctx); | 
| 499 | 499 | ||
| 500 | if (!ctx->task) { | ||
| 501 | /* | ||
| 502 | * Allow more per task events with respect to the | ||
| 503 | * reservation: | ||
| 504 | */ | ||
| 505 | cpuctx->max_pertask = | ||
| 506 | min(perf_max_events - ctx->nr_events, | ||
| 507 | perf_max_events - perf_reserved_percpu); | ||
| 508 | } | ||
| 509 | |||
| 510 | perf_enable(); | ||
| 511 | raw_spin_unlock(&ctx->lock); | 500 | raw_spin_unlock(&ctx->lock); | 
| 512 | } | 501 | } | 
| 513 | 502 | ||
| @@ -572,8 +561,8 @@ retry: | |||
| 572 | static void __perf_event_disable(void *info) | 561 | static void __perf_event_disable(void *info) | 
| 573 | { | 562 | { | 
| 574 | struct perf_event *event = info; | 563 | struct perf_event *event = info; | 
| 575 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | ||
| 576 | struct perf_event_context *ctx = event->ctx; | 564 | struct perf_event_context *ctx = event->ctx; | 
| 565 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); | ||
| 577 | 566 | ||
| 578 | /* | 567 | /* | 
| 579 | * If this is a per-task event, need to check whether this | 568 | * If this is a per-task event, need to check whether this | 
| @@ -628,7 +617,7 @@ void perf_event_disable(struct perf_event *event) | |||
| 628 | return; | 617 | return; | 
| 629 | } | 618 | } | 
| 630 | 619 | ||
| 631 | retry: | 620 | retry: | 
| 632 | task_oncpu_function_call(task, __perf_event_disable, event); | 621 | task_oncpu_function_call(task, __perf_event_disable, event); | 
| 633 | 622 | ||
| 634 | raw_spin_lock_irq(&ctx->lock); | 623 | raw_spin_lock_irq(&ctx->lock); | 
| @@ -667,7 +656,7 @@ event_sched_in(struct perf_event *event, | |||
| 667 | */ | 656 | */ | 
| 668 | smp_wmb(); | 657 | smp_wmb(); | 
| 669 | 658 | ||
| 670 | if (event->pmu->enable(event)) { | 659 | if (event->pmu->add(event, PERF_EF_START)) { | 
| 671 | event->state = PERF_EVENT_STATE_INACTIVE; | 660 | event->state = PERF_EVENT_STATE_INACTIVE; | 
| 672 | event->oncpu = -1; | 661 | event->oncpu = -1; | 
| 673 | return -EAGAIN; | 662 | return -EAGAIN; | 
| @@ -691,22 +680,15 @@ group_sched_in(struct perf_event *group_event, | |||
| 691 | struct perf_event_context *ctx) | 680 | struct perf_event_context *ctx) | 
| 692 | { | 681 | { | 
| 693 | struct perf_event *event, *partial_group = NULL; | 682 | struct perf_event *event, *partial_group = NULL; | 
| 694 | const struct pmu *pmu = group_event->pmu; | 683 | struct pmu *pmu = group_event->pmu; | 
| 695 | bool txn = false; | ||
| 696 | 684 | ||
| 697 | if (group_event->state == PERF_EVENT_STATE_OFF) | 685 | if (group_event->state == PERF_EVENT_STATE_OFF) | 
| 698 | return 0; | 686 | return 0; | 
| 699 | 687 | ||
| 700 | /* Check if group transaction availabe */ | 688 | pmu->start_txn(pmu); | 
| 701 | if (pmu->start_txn) | ||
| 702 | txn = true; | ||
| 703 | |||
| 704 | if (txn) | ||
| 705 | pmu->start_txn(pmu); | ||
| 706 | 689 | ||
| 707 | if (event_sched_in(group_event, cpuctx, ctx)) { | 690 | if (event_sched_in(group_event, cpuctx, ctx)) { | 
| 708 | if (txn) | 691 | pmu->cancel_txn(pmu); | 
| 709 | pmu->cancel_txn(pmu); | ||
| 710 | return -EAGAIN; | 692 | return -EAGAIN; | 
| 711 | } | 693 | } | 
| 712 | 694 | ||
| @@ -720,7 +702,7 @@ group_sched_in(struct perf_event *group_event, | |||
| 720 | } | 702 | } | 
| 721 | } | 703 | } | 
| 722 | 704 | ||
| 723 | if (!txn || !pmu->commit_txn(pmu)) | 705 | if (!pmu->commit_txn(pmu)) | 
| 724 | return 0; | 706 | return 0; | 
| 725 | 707 | ||
| 726 | group_error: | 708 | group_error: | 
| @@ -735,8 +717,7 @@ group_error: | |||
| 735 | } | 717 | } | 
| 736 | event_sched_out(group_event, cpuctx, ctx); | 718 | event_sched_out(group_event, cpuctx, ctx); | 
| 737 | 719 | ||
| 738 | if (txn) | 720 | pmu->cancel_txn(pmu); | 
| 739 | pmu->cancel_txn(pmu); | ||
| 740 | 721 | ||
| 741 | return -EAGAIN; | 722 | return -EAGAIN; | 
| 742 | } | 723 | } | 
| @@ -789,10 +770,10 @@ static void add_event_to_ctx(struct perf_event *event, | |||
| 789 | */ | 770 | */ | 
| 790 | static void __perf_install_in_context(void *info) | 771 | static void __perf_install_in_context(void *info) | 
| 791 | { | 772 | { | 
| 792 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | ||
| 793 | struct perf_event *event = info; | 773 | struct perf_event *event = info; | 
| 794 | struct perf_event_context *ctx = event->ctx; | 774 | struct perf_event_context *ctx = event->ctx; | 
| 795 | struct perf_event *leader = event->group_leader; | 775 | struct perf_event *leader = event->group_leader; | 
| 776 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); | ||
| 796 | int err; | 777 | int err; | 
| 797 | 778 | ||
| 798 | /* | 779 | /* | 
| @@ -812,12 +793,6 @@ static void __perf_install_in_context(void *info) | |||
| 812 | ctx->is_active = 1; | 793 | ctx->is_active = 1; | 
| 813 | update_context_time(ctx); | 794 | update_context_time(ctx); | 
| 814 | 795 | ||
| 815 | /* | ||
| 816 | * Protect the list operation against NMI by disabling the | ||
| 817 | * events on a global level. NOP for non NMI based events. | ||
| 818 | */ | ||
| 819 | perf_disable(); | ||
| 820 | |||
| 821 | add_event_to_ctx(event, ctx); | 796 | add_event_to_ctx(event, ctx); | 
| 822 | 797 | ||
| 823 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | 798 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | 
| @@ -855,12 +830,7 @@ static void __perf_install_in_context(void *info) | |||
| 855 | } | 830 | } | 
| 856 | } | 831 | } | 
| 857 | 832 | ||
| 858 | if (!err && !ctx->task && cpuctx->max_pertask) | 833 | unlock: | 
| 859 | cpuctx->max_pertask--; | ||
| 860 | |||
| 861 | unlock: | ||
| 862 | perf_enable(); | ||
| 863 | |||
| 864 | raw_spin_unlock(&ctx->lock); | 834 | raw_spin_unlock(&ctx->lock); | 
| 865 | } | 835 | } | 
| 866 | 836 | ||
| @@ -883,6 +853,8 @@ perf_install_in_context(struct perf_event_context *ctx, | |||
| 883 | { | 853 | { | 
| 884 | struct task_struct *task = ctx->task; | 854 | struct task_struct *task = ctx->task; | 
| 885 | 855 | ||
| 856 | event->ctx = ctx; | ||
| 857 | |||
| 886 | if (!task) { | 858 | if (!task) { | 
| 887 | /* | 859 | /* | 
| 888 | * Per cpu events are installed via an smp call and | 860 | * Per cpu events are installed via an smp call and | 
| @@ -931,10 +903,12 @@ static void __perf_event_mark_enabled(struct perf_event *event, | |||
| 931 | 903 | ||
| 932 | event->state = PERF_EVENT_STATE_INACTIVE; | 904 | event->state = PERF_EVENT_STATE_INACTIVE; | 
| 933 | event->tstamp_enabled = ctx->time - event->total_time_enabled; | 905 | event->tstamp_enabled = ctx->time - event->total_time_enabled; | 
| 934 | list_for_each_entry(sub, &event->sibling_list, group_entry) | 906 | list_for_each_entry(sub, &event->sibling_list, group_entry) { | 
| 935 | if (sub->state >= PERF_EVENT_STATE_INACTIVE) | 907 | if (sub->state >= PERF_EVENT_STATE_INACTIVE) { | 
| 936 | sub->tstamp_enabled = | 908 | sub->tstamp_enabled = | 
| 937 | ctx->time - sub->total_time_enabled; | 909 | ctx->time - sub->total_time_enabled; | 
| 910 | } | ||
| 911 | } | ||
| 938 | } | 912 | } | 
| 939 | 913 | ||
| 940 | /* | 914 | /* | 
| @@ -943,9 +917,9 @@ static void __perf_event_mark_enabled(struct perf_event *event, | |||
| 943 | static void __perf_event_enable(void *info) | 917 | static void __perf_event_enable(void *info) | 
| 944 | { | 918 | { | 
| 945 | struct perf_event *event = info; | 919 | struct perf_event *event = info; | 
| 946 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | ||
| 947 | struct perf_event_context *ctx = event->ctx; | 920 | struct perf_event_context *ctx = event->ctx; | 
| 948 | struct perf_event *leader = event->group_leader; | 921 | struct perf_event *leader = event->group_leader; | 
| 922 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); | ||
| 949 | int err; | 923 | int err; | 
| 950 | 924 | ||
| 951 | /* | 925 | /* | 
| @@ -979,12 +953,10 @@ static void __perf_event_enable(void *info) | |||
| 979 | if (!group_can_go_on(event, cpuctx, 1)) { | 953 | if (!group_can_go_on(event, cpuctx, 1)) { | 
| 980 | err = -EEXIST; | 954 | err = -EEXIST; | 
| 981 | } else { | 955 | } else { | 
| 982 | perf_disable(); | ||
| 983 | if (event == leader) | 956 | if (event == leader) | 
| 984 | err = group_sched_in(event, cpuctx, ctx); | 957 | err = group_sched_in(event, cpuctx, ctx); | 
| 985 | else | 958 | else | 
| 986 | err = event_sched_in(event, cpuctx, ctx); | 959 | err = event_sched_in(event, cpuctx, ctx); | 
| 987 | perf_enable(); | ||
| 988 | } | 960 | } | 
| 989 | 961 | ||
| 990 | if (err) { | 962 | if (err) { | 
| @@ -1000,7 +972,7 @@ static void __perf_event_enable(void *info) | |||
| 1000 | } | 972 | } | 
| 1001 | } | 973 | } | 
| 1002 | 974 | ||
| 1003 | unlock: | 975 | unlock: | 
| 1004 | raw_spin_unlock(&ctx->lock); | 976 | raw_spin_unlock(&ctx->lock); | 
| 1005 | } | 977 | } | 
| 1006 | 978 | ||
| @@ -1041,7 +1013,7 @@ void perf_event_enable(struct perf_event *event) | |||
| 1041 | if (event->state == PERF_EVENT_STATE_ERROR) | 1013 | if (event->state == PERF_EVENT_STATE_ERROR) | 
| 1042 | event->state = PERF_EVENT_STATE_OFF; | 1014 | event->state = PERF_EVENT_STATE_OFF; | 
| 1043 | 1015 | ||
| 1044 | retry: | 1016 | retry: | 
| 1045 | raw_spin_unlock_irq(&ctx->lock); | 1017 | raw_spin_unlock_irq(&ctx->lock); | 
| 1046 | task_oncpu_function_call(task, __perf_event_enable, event); | 1018 | task_oncpu_function_call(task, __perf_event_enable, event); | 
| 1047 | 1019 | ||
| @@ -1061,7 +1033,7 @@ void perf_event_enable(struct perf_event *event) | |||
| 1061 | if (event->state == PERF_EVENT_STATE_OFF) | 1033 | if (event->state == PERF_EVENT_STATE_OFF) | 
| 1062 | __perf_event_mark_enabled(event, ctx); | 1034 | __perf_event_mark_enabled(event, ctx); | 
| 1063 | 1035 | ||
| 1064 | out: | 1036 | out: | 
| 1065 | raw_spin_unlock_irq(&ctx->lock); | 1037 | raw_spin_unlock_irq(&ctx->lock); | 
| 1066 | } | 1038 | } | 
| 1067 | 1039 | ||
| @@ -1092,26 +1064,26 @@ static void ctx_sched_out(struct perf_event_context *ctx, | |||
| 1092 | struct perf_event *event; | 1064 | struct perf_event *event; | 
| 1093 | 1065 | ||
| 1094 | raw_spin_lock(&ctx->lock); | 1066 | raw_spin_lock(&ctx->lock); | 
| 1067 | perf_pmu_disable(ctx->pmu); | ||
| 1095 | ctx->is_active = 0; | 1068 | ctx->is_active = 0; | 
| 1096 | if (likely(!ctx->nr_events)) | 1069 | if (likely(!ctx->nr_events)) | 
| 1097 | goto out; | 1070 | goto out; | 
| 1098 | update_context_time(ctx); | 1071 | update_context_time(ctx); | 
| 1099 | 1072 | ||
| 1100 | perf_disable(); | ||
| 1101 | if (!ctx->nr_active) | 1073 | if (!ctx->nr_active) | 
| 1102 | goto out_enable; | 1074 | goto out; | 
| 1103 | 1075 | ||
| 1104 | if (event_type & EVENT_PINNED) | 1076 | if (event_type & EVENT_PINNED) { | 
| 1105 | list_for_each_entry(event, &ctx->pinned_groups, group_entry) | 1077 | list_for_each_entry(event, &ctx->pinned_groups, group_entry) | 
| 1106 | group_sched_out(event, cpuctx, ctx); | 1078 | group_sched_out(event, cpuctx, ctx); | 
| 1079 | } | ||
| 1107 | 1080 | ||
| 1108 | if (event_type & EVENT_FLEXIBLE) | 1081 | if (event_type & EVENT_FLEXIBLE) { | 
| 1109 | list_for_each_entry(event, &ctx->flexible_groups, group_entry) | 1082 | list_for_each_entry(event, &ctx->flexible_groups, group_entry) | 
| 1110 | group_sched_out(event, cpuctx, ctx); | 1083 | group_sched_out(event, cpuctx, ctx); | 
| 1111 | 1084 | } | |
| 1112 | out_enable: | 1085 | out: | 
| 1113 | perf_enable(); | 1086 | perf_pmu_enable(ctx->pmu); | 
| 1114 | out: | ||
| 1115 | raw_spin_unlock(&ctx->lock); | 1087 | raw_spin_unlock(&ctx->lock); | 
| 1116 | } | 1088 | } | 
| 1117 | 1089 | ||
| @@ -1209,34 +1181,25 @@ static void perf_event_sync_stat(struct perf_event_context *ctx, | |||
| 1209 | } | 1181 | } | 
| 1210 | } | 1182 | } | 
| 1211 | 1183 | ||
| 1212 | /* | 1184 | void perf_event_context_sched_out(struct task_struct *task, int ctxn, | 
| 1213 | * Called from scheduler to remove the events of the current task, | 1185 | struct task_struct *next) | 
| 1214 | * with interrupts disabled. | ||
| 1215 | * | ||
| 1216 | * We stop each event and update the event value in event->count. | ||
| 1217 | * | ||
| 1218 | * This does not protect us against NMI, but disable() | ||
| 1219 | * sets the disabled bit in the control field of event _before_ | ||
| 1220 | * accessing the event control register. If a NMI hits, then it will | ||
| 1221 | * not restart the event. | ||
| 1222 | */ | ||
| 1223 | void perf_event_task_sched_out(struct task_struct *task, | ||
| 1224 | struct task_struct *next) | ||
| 1225 | { | 1186 | { | 
| 1226 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | 1187 | struct perf_event_context *ctx = task->perf_event_ctxp[ctxn]; | 
| 1227 | struct perf_event_context *ctx = task->perf_event_ctxp; | ||
| 1228 | struct perf_event_context *next_ctx; | 1188 | struct perf_event_context *next_ctx; | 
| 1229 | struct perf_event_context *parent; | 1189 | struct perf_event_context *parent; | 
| 1190 | struct perf_cpu_context *cpuctx; | ||
| 1230 | int do_switch = 1; | 1191 | int do_switch = 1; | 
| 1231 | 1192 | ||
| 1232 | perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0); | 1193 | if (likely(!ctx)) | 
| 1194 | return; | ||
| 1233 | 1195 | ||
| 1234 | if (likely(!ctx || !cpuctx->task_ctx)) | 1196 | cpuctx = __get_cpu_context(ctx); | 
| 1197 | if (!cpuctx->task_ctx) | ||
| 1235 | return; | 1198 | return; | 
| 1236 | 1199 | ||
| 1237 | rcu_read_lock(); | 1200 | rcu_read_lock(); | 
| 1238 | parent = rcu_dereference(ctx->parent_ctx); | 1201 | parent = rcu_dereference(ctx->parent_ctx); | 
| 1239 | next_ctx = next->perf_event_ctxp; | 1202 | next_ctx = next->perf_event_ctxp[ctxn]; | 
| 1240 | if (parent && next_ctx && | 1203 | if (parent && next_ctx && | 
| 1241 | rcu_dereference(next_ctx->parent_ctx) == parent) { | 1204 | rcu_dereference(next_ctx->parent_ctx) == parent) { | 
| 1242 | /* | 1205 | /* | 
| @@ -1255,8 +1218,8 @@ void perf_event_task_sched_out(struct task_struct *task, | |||
| 1255 | * XXX do we need a memory barrier of sorts | 1218 | * XXX do we need a memory barrier of sorts | 
| 1256 | * wrt to rcu_dereference() of perf_event_ctxp | 1219 | * wrt to rcu_dereference() of perf_event_ctxp | 
| 1257 | */ | 1220 | */ | 
| 1258 | task->perf_event_ctxp = next_ctx; | 1221 | task->perf_event_ctxp[ctxn] = next_ctx; | 
| 1259 | next->perf_event_ctxp = ctx; | 1222 | next->perf_event_ctxp[ctxn] = ctx; | 
| 1260 | ctx->task = next; | 1223 | ctx->task = next; | 
| 1261 | next_ctx->task = task; | 1224 | next_ctx->task = task; | 
| 1262 | do_switch = 0; | 1225 | do_switch = 0; | 
| @@ -1274,10 +1237,35 @@ void perf_event_task_sched_out(struct task_struct *task, | |||
| 1274 | } | 1237 | } | 
| 1275 | } | 1238 | } | 
| 1276 | 1239 | ||
| 1240 | #define for_each_task_context_nr(ctxn) \ | ||
| 1241 | for ((ctxn) = 0; (ctxn) < perf_nr_task_contexts; (ctxn)++) | ||
| 1242 | |||
| 1243 | /* | ||
| 1244 | * Called from scheduler to remove the events of the current task, | ||
| 1245 | * with interrupts disabled. | ||
| 1246 | * | ||
| 1247 | * We stop each event and update the event value in event->count. | ||
| 1248 | * | ||
| 1249 | * This does not protect us against NMI, but disable() | ||
| 1250 | * sets the disabled bit in the control field of event _before_ | ||
| 1251 | * accessing the event control register. If a NMI hits, then it will | ||
| 1252 | * not restart the event. | ||
| 1253 | */ | ||
| 1254 | void perf_event_task_sched_out(struct task_struct *task, | ||
| 1255 | struct task_struct *next) | ||
| 1256 | { | ||
| 1257 | int ctxn; | ||
| 1258 | |||
| 1259 | perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0); | ||
| 1260 | |||
| 1261 | for_each_task_context_nr(ctxn) | ||
| 1262 | perf_event_context_sched_out(task, ctxn, next); | ||
| 1263 | } | ||
| 1264 | |||
| 1277 | static void task_ctx_sched_out(struct perf_event_context *ctx, | 1265 | static void task_ctx_sched_out(struct perf_event_context *ctx, | 
| 1278 | enum event_type_t event_type) | 1266 | enum event_type_t event_type) | 
| 1279 | { | 1267 | { | 
| 1280 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | 1268 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); | 
| 1281 | 1269 | ||
| 1282 | if (!cpuctx->task_ctx) | 1270 | if (!cpuctx->task_ctx) | 
| 1283 | return; | 1271 | return; | 
| @@ -1350,9 +1338,10 @@ ctx_flexible_sched_in(struct perf_event_context *ctx, | |||
| 1350 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | 1338 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | 
| 1351 | continue; | 1339 | continue; | 
| 1352 | 1340 | ||
| 1353 | if (group_can_go_on(event, cpuctx, can_add_hw)) | 1341 | if (group_can_go_on(event, cpuctx, can_add_hw)) { | 
| 1354 | if (group_sched_in(event, cpuctx, ctx)) | 1342 | if (group_sched_in(event, cpuctx, ctx)) | 
| 1355 | can_add_hw = 0; | 1343 | can_add_hw = 0; | 
| 1344 | } | ||
| 1356 | } | 1345 | } | 
| 1357 | } | 1346 | } | 
| 1358 | 1347 | ||
| @@ -1368,8 +1357,6 @@ ctx_sched_in(struct perf_event_context *ctx, | |||
| 1368 | 1357 | ||
| 1369 | ctx->timestamp = perf_clock(); | 1358 | ctx->timestamp = perf_clock(); | 
| 1370 | 1359 | ||
| 1371 | perf_disable(); | ||
| 1372 | |||
| 1373 | /* | 1360 | /* | 
| 1374 | * First go through the list and put on any pinned groups | 1361 | * First go through the list and put on any pinned groups | 
| 1375 | * in order to give them the best chance of going on. | 1362 | * in order to give them the best chance of going on. | 
| @@ -1381,8 +1368,7 @@ ctx_sched_in(struct perf_event_context *ctx, | |||
| 1381 | if (event_type & EVENT_FLEXIBLE) | 1368 | if (event_type & EVENT_FLEXIBLE) | 
| 1382 | ctx_flexible_sched_in(ctx, cpuctx); | 1369 | ctx_flexible_sched_in(ctx, cpuctx); | 
| 1383 | 1370 | ||
| 1384 | perf_enable(); | 1371 | out: | 
| 1385 | out: | ||
| 1386 | raw_spin_unlock(&ctx->lock); | 1372 | raw_spin_unlock(&ctx->lock); | 
| 1387 | } | 1373 | } | 
| 1388 | 1374 | ||
| @@ -1394,43 +1380,28 @@ static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, | |||
| 1394 | ctx_sched_in(ctx, cpuctx, event_type); | 1380 | ctx_sched_in(ctx, cpuctx, event_type); | 
| 1395 | } | 1381 | } | 
| 1396 | 1382 | ||
| 1397 | static void task_ctx_sched_in(struct task_struct *task, | 1383 | static void task_ctx_sched_in(struct perf_event_context *ctx, | 
| 1398 | enum event_type_t event_type) | 1384 | enum event_type_t event_type) | 
| 1399 | { | 1385 | { | 
| 1400 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | 1386 | struct perf_cpu_context *cpuctx; | 
| 1401 | struct perf_event_context *ctx = task->perf_event_ctxp; | ||
| 1402 | 1387 | ||
| 1403 | if (likely(!ctx)) | 1388 | cpuctx = __get_cpu_context(ctx); | 
| 1404 | return; | ||
| 1405 | if (cpuctx->task_ctx == ctx) | 1389 | if (cpuctx->task_ctx == ctx) | 
| 1406 | return; | 1390 | return; | 
| 1391 | |||
| 1407 | ctx_sched_in(ctx, cpuctx, event_type); | 1392 | ctx_sched_in(ctx, cpuctx, event_type); | 
| 1408 | cpuctx->task_ctx = ctx; | 1393 | cpuctx->task_ctx = ctx; | 
| 1409 | } | 1394 | } | 
| 1410 | /* | ||
| 1411 | * Called from scheduler to add the events of the current task | ||
| 1412 | * with interrupts disabled. | ||
| 1413 | * | ||
| 1414 | * We restore the event value and then enable it. | ||
| 1415 | * | ||
| 1416 | * This does not protect us against NMI, but enable() | ||
| 1417 | * sets the enabled bit in the control field of event _before_ | ||
| 1418 | * accessing the event control register. If a NMI hits, then it will | ||
| 1419 | * keep the event running. | ||
| 1420 | */ | ||
| 1421 | void perf_event_task_sched_in(struct task_struct *task) | ||
| 1422 | { | ||
| 1423 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | ||
| 1424 | struct perf_event_context *ctx = task->perf_event_ctxp; | ||
| 1425 | 1395 | ||
| 1426 | if (likely(!ctx)) | 1396 | void perf_event_context_sched_in(struct perf_event_context *ctx) | 
| 1427 | return; | 1397 | { | 
| 1398 | struct perf_cpu_context *cpuctx; | ||
| 1428 | 1399 | ||
| 1400 | cpuctx = __get_cpu_context(ctx); | ||
| 1429 | if (cpuctx->task_ctx == ctx) | 1401 | if (cpuctx->task_ctx == ctx) | 
| 1430 | return; | 1402 | return; | 
| 1431 | 1403 | ||
| 1432 | perf_disable(); | 1404 | perf_pmu_disable(ctx->pmu); | 
| 1433 | |||
| 1434 | /* | 1405 | /* | 
| 1435 | * We want to keep the following priority order: | 1406 | * We want to keep the following priority order: | 
| 1436 | * cpu pinned (that don't need to move), task pinned, | 1407 | * cpu pinned (that don't need to move), task pinned, | 
| @@ -1444,7 +1415,37 @@ void perf_event_task_sched_in(struct task_struct *task) | |||
| 1444 | 1415 | ||
| 1445 | cpuctx->task_ctx = ctx; | 1416 | cpuctx->task_ctx = ctx; | 
| 1446 | 1417 | ||
| 1447 | perf_enable(); | 1418 | /* | 
| 1419 | * Since these rotations are per-cpu, we need to ensure the | ||
| 1420 | * cpu-context we got scheduled on is actually rotating. | ||
| 1421 | */ | ||
| 1422 | perf_pmu_rotate_start(ctx->pmu); | ||
| 1423 | perf_pmu_enable(ctx->pmu); | ||
| 1424 | } | ||
| 1425 | |||
| 1426 | /* | ||
| 1427 | * Called from scheduler to add the events of the current task | ||
| 1428 | * with interrupts disabled. | ||
| 1429 | * | ||
| 1430 | * We restore the event value and then enable it. | ||
| 1431 | * | ||
| 1432 | * This does not protect us against NMI, but enable() | ||
| 1433 | * sets the enabled bit in the control field of event _before_ | ||
| 1434 | * accessing the event control register. If a NMI hits, then it will | ||
| 1435 | * keep the event running. | ||
| 1436 | */ | ||
| 1437 | void perf_event_task_sched_in(struct task_struct *task) | ||
| 1438 | { | ||
| 1439 | struct perf_event_context *ctx; | ||
| 1440 | int ctxn; | ||
| 1441 | |||
| 1442 | for_each_task_context_nr(ctxn) { | ||
| 1443 | ctx = task->perf_event_ctxp[ctxn]; | ||
| 1444 | if (likely(!ctx)) | ||
| 1445 | continue; | ||
| 1446 | |||
| 1447 | perf_event_context_sched_in(ctx); | ||
| 1448 | } | ||
| 1448 | } | 1449 | } | 
| 1449 | 1450 | ||
| 1450 | #define MAX_INTERRUPTS (~0ULL) | 1451 | #define MAX_INTERRUPTS (~0ULL) | 
| @@ -1524,22 +1525,6 @@ do { \ | |||
| 1524 | return div64_u64(dividend, divisor); | 1525 | return div64_u64(dividend, divisor); | 
| 1525 | } | 1526 | } | 
| 1526 | 1527 | ||
| 1527 | static void perf_event_stop(struct perf_event *event) | ||
| 1528 | { | ||
| 1529 | if (!event->pmu->stop) | ||
| 1530 | return event->pmu->disable(event); | ||
| 1531 | |||
| 1532 | return event->pmu->stop(event); | ||
| 1533 | } | ||
| 1534 | |||
| 1535 | static int perf_event_start(struct perf_event *event) | ||
| 1536 | { | ||
| 1537 | if (!event->pmu->start) | ||
| 1538 | return event->pmu->enable(event); | ||
| 1539 | |||
| 1540 | return event->pmu->start(event); | ||
| 1541 | } | ||
| 1542 | |||
| 1543 | static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) | 1528 | static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) | 
| 1544 | { | 1529 | { | 
| 1545 | struct hw_perf_event *hwc = &event->hw; | 1530 | struct hw_perf_event *hwc = &event->hw; | 
| @@ -1559,15 +1544,13 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) | |||
| 1559 | hwc->sample_period = sample_period; | 1544 | hwc->sample_period = sample_period; | 
| 1560 | 1545 | ||
| 1561 | if (local64_read(&hwc->period_left) > 8*sample_period) { | 1546 | if (local64_read(&hwc->period_left) > 8*sample_period) { | 
| 1562 | perf_disable(); | 1547 | event->pmu->stop(event, PERF_EF_UPDATE); | 
| 1563 | perf_event_stop(event); | ||
| 1564 | local64_set(&hwc->period_left, 0); | 1548 | local64_set(&hwc->period_left, 0); | 
| 1565 | perf_event_start(event); | 1549 | event->pmu->start(event, PERF_EF_RELOAD); | 
| 1566 | perf_enable(); | ||
| 1567 | } | 1550 | } | 
| 1568 | } | 1551 | } | 
| 1569 | 1552 | ||
| 1570 | static void perf_ctx_adjust_freq(struct perf_event_context *ctx) | 1553 | static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period) | 
| 1571 | { | 1554 | { | 
| 1572 | struct perf_event *event; | 1555 | struct perf_event *event; | 
| 1573 | struct hw_perf_event *hwc; | 1556 | struct hw_perf_event *hwc; | 
| @@ -1592,23 +1575,19 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx) | |||
| 1592 | */ | 1575 | */ | 
| 1593 | if (interrupts == MAX_INTERRUPTS) { | 1576 | if (interrupts == MAX_INTERRUPTS) { | 
| 1594 | perf_log_throttle(event, 1); | 1577 | perf_log_throttle(event, 1); | 
| 1595 | perf_disable(); | 1578 | event->pmu->start(event, 0); | 
| 1596 | event->pmu->unthrottle(event); | ||
| 1597 | perf_enable(); | ||
| 1598 | } | 1579 | } | 
| 1599 | 1580 | ||
| 1600 | if (!event->attr.freq || !event->attr.sample_freq) | 1581 | if (!event->attr.freq || !event->attr.sample_freq) | 
| 1601 | continue; | 1582 | continue; | 
| 1602 | 1583 | ||
| 1603 | perf_disable(); | ||
| 1604 | event->pmu->read(event); | 1584 | event->pmu->read(event); | 
| 1605 | now = local64_read(&event->count); | 1585 | now = local64_read(&event->count); | 
| 1606 | delta = now - hwc->freq_count_stamp; | 1586 | delta = now - hwc->freq_count_stamp; | 
| 1607 | hwc->freq_count_stamp = now; | 1587 | hwc->freq_count_stamp = now; | 
| 1608 | 1588 | ||
| 1609 | if (delta > 0) | 1589 | if (delta > 0) | 
| 1610 | perf_adjust_period(event, TICK_NSEC, delta); | 1590 | perf_adjust_period(event, period, delta); | 
| 1611 | perf_enable(); | ||
| 1612 | } | 1591 | } | 
| 1613 | raw_spin_unlock(&ctx->lock); | 1592 | raw_spin_unlock(&ctx->lock); | 
| 1614 | } | 1593 | } | 
| @@ -1626,32 +1605,38 @@ static void rotate_ctx(struct perf_event_context *ctx) | |||
| 1626 | raw_spin_unlock(&ctx->lock); | 1605 | raw_spin_unlock(&ctx->lock); | 
| 1627 | } | 1606 | } | 
| 1628 | 1607 | ||
| 1629 | void perf_event_task_tick(struct task_struct *curr) | 1608 | /* | 
| 1609 | * perf_pmu_rotate_start() and perf_rotate_context() are fully serialized | ||
| 1610 | * because they're strictly cpu affine and rotate_start is called with IRQs | ||
| 1611 | * disabled, while rotate_context is called from IRQ context. | ||
| 1612 | */ | ||
| 1613 | static void perf_rotate_context(struct perf_cpu_context *cpuctx) | ||
| 1630 | { | 1614 | { | 
| 1631 | struct perf_cpu_context *cpuctx; | 1615 | u64 interval = (u64)cpuctx->jiffies_interval * TICK_NSEC; | 
| 1632 | struct perf_event_context *ctx; | 1616 | struct perf_event_context *ctx = NULL; | 
| 1633 | int rotate = 0; | 1617 | int rotate = 0, remove = 1; | 
| 1634 | 1618 | ||
| 1635 | if (!atomic_read(&nr_events)) | 1619 | if (cpuctx->ctx.nr_events) { | 
| 1636 | return; | 1620 | remove = 0; | 
| 1637 | 1621 | if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active) | |
| 1638 | cpuctx = &__get_cpu_var(perf_cpu_context); | 1622 | rotate = 1; | 
| 1639 | if (cpuctx->ctx.nr_events && | 1623 | } | 
| 1640 | cpuctx->ctx.nr_events != cpuctx->ctx.nr_active) | ||
| 1641 | rotate = 1; | ||
| 1642 | 1624 | ||
| 1643 | ctx = curr->perf_event_ctxp; | 1625 | ctx = cpuctx->task_ctx; | 
| 1644 | if (ctx && ctx->nr_events && ctx->nr_events != ctx->nr_active) | 1626 | if (ctx && ctx->nr_events) { | 
| 1645 | rotate = 1; | 1627 | remove = 0; | 
| 1628 | if (ctx->nr_events != ctx->nr_active) | ||
| 1629 | rotate = 1; | ||
| 1630 | } | ||
| 1646 | 1631 | ||
| 1647 | perf_ctx_adjust_freq(&cpuctx->ctx); | 1632 | perf_pmu_disable(cpuctx->ctx.pmu); | 
| 1633 | perf_ctx_adjust_freq(&cpuctx->ctx, interval); | ||
| 1648 | if (ctx) | 1634 | if (ctx) | 
| 1649 | perf_ctx_adjust_freq(ctx); | 1635 | perf_ctx_adjust_freq(ctx, interval); | 
| 1650 | 1636 | ||
| 1651 | if (!rotate) | 1637 | if (!rotate) | 
| 1652 | return; | 1638 | goto done; | 
| 1653 | 1639 | ||
| 1654 | perf_disable(); | ||
| 1655 | cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); | 1640 | cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); | 
| 1656 | if (ctx) | 1641 | if (ctx) | 
| 1657 | task_ctx_sched_out(ctx, EVENT_FLEXIBLE); | 1642 | task_ctx_sched_out(ctx, EVENT_FLEXIBLE); | 
| @@ -1662,8 +1647,27 @@ void perf_event_task_tick(struct task_struct *curr) | |||
| 1662 | 1647 | ||
| 1663 | cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE); | 1648 | cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE); | 
| 1664 | if (ctx) | 1649 | if (ctx) | 
| 1665 | task_ctx_sched_in(curr, EVENT_FLEXIBLE); | 1650 | task_ctx_sched_in(ctx, EVENT_FLEXIBLE); | 
| 1666 | perf_enable(); | 1651 | |
| 1652 | done: | ||
| 1653 | if (remove) | ||
| 1654 | list_del_init(&cpuctx->rotation_list); | ||
| 1655 | |||
| 1656 | perf_pmu_enable(cpuctx->ctx.pmu); | ||
| 1657 | } | ||
| 1658 | |||
| 1659 | void perf_event_task_tick(void) | ||
| 1660 | { | ||
| 1661 | struct list_head *head = &__get_cpu_var(rotation_list); | ||
| 1662 | struct perf_cpu_context *cpuctx, *tmp; | ||
| 1663 | |||
| 1664 | WARN_ON(!irqs_disabled()); | ||
| 1665 | |||
| 1666 | list_for_each_entry_safe(cpuctx, tmp, head, rotation_list) { | ||
| 1667 | if (cpuctx->jiffies_interval == 1 || | ||
| 1668 | !(jiffies % cpuctx->jiffies_interval)) | ||
| 1669 | perf_rotate_context(cpuctx); | ||
| 1670 | } | ||
| 1667 | } | 1671 | } | 
| 1668 | 1672 | ||
| 1669 | static int event_enable_on_exec(struct perf_event *event, | 1673 | static int event_enable_on_exec(struct perf_event *event, | 
| @@ -1685,20 +1689,18 @@ static int event_enable_on_exec(struct perf_event *event, | |||
| 1685 | * Enable all of a task's events that have been marked enable-on-exec. | 1689 | * Enable all of a task's events that have been marked enable-on-exec. | 
| 1686 | * This expects task == current. | 1690 | * This expects task == current. | 
| 1687 | */ | 1691 | */ | 
| 1688 | static void perf_event_enable_on_exec(struct task_struct *task) | 1692 | static void perf_event_enable_on_exec(struct perf_event_context *ctx) | 
| 1689 | { | 1693 | { | 
| 1690 | struct perf_event_context *ctx; | ||
| 1691 | struct perf_event *event; | 1694 | struct perf_event *event; | 
| 1692 | unsigned long flags; | 1695 | unsigned long flags; | 
| 1693 | int enabled = 0; | 1696 | int enabled = 0; | 
| 1694 | int ret; | 1697 | int ret; | 
| 1695 | 1698 | ||
| 1696 | local_irq_save(flags); | 1699 | local_irq_save(flags); | 
| 1697 | ctx = task->perf_event_ctxp; | ||
| 1698 | if (!ctx || !ctx->nr_events) | 1700 | if (!ctx || !ctx->nr_events) | 
| 1699 | goto out; | 1701 | goto out; | 
| 1700 | 1702 | ||
| 1701 | __perf_event_task_sched_out(ctx); | 1703 | task_ctx_sched_out(ctx, EVENT_ALL); | 
| 1702 | 1704 | ||
| 1703 | raw_spin_lock(&ctx->lock); | 1705 | raw_spin_lock(&ctx->lock); | 
| 1704 | 1706 | ||
| @@ -1722,8 +1724,8 @@ static void perf_event_enable_on_exec(struct task_struct *task) | |||
| 1722 | 1724 | ||
| 1723 | raw_spin_unlock(&ctx->lock); | 1725 | raw_spin_unlock(&ctx->lock); | 
| 1724 | 1726 | ||
| 1725 | perf_event_task_sched_in(task); | 1727 | perf_event_context_sched_in(ctx); | 
| 1726 | out: | 1728 | out: | 
| 1727 | local_irq_restore(flags); | 1729 | local_irq_restore(flags); | 
| 1728 | } | 1730 | } | 
| 1729 | 1731 | ||
| @@ -1732,9 +1734,9 @@ static void perf_event_enable_on_exec(struct task_struct *task) | |||
| 1732 | */ | 1734 | */ | 
| 1733 | static void __perf_event_read(void *info) | 1735 | static void __perf_event_read(void *info) | 
| 1734 | { | 1736 | { | 
| 1735 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | ||
| 1736 | struct perf_event *event = info; | 1737 | struct perf_event *event = info; | 
| 1737 | struct perf_event_context *ctx = event->ctx; | 1738 | struct perf_event_context *ctx = event->ctx; | 
| 1739 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); | ||
| 1738 | 1740 | ||
| 1739 | /* | 1741 | /* | 
| 1740 | * If this is a task context, we need to check whether it is | 1742 | * If this is a task context, we need to check whether it is | 
| @@ -1782,11 +1784,219 @@ static u64 perf_event_read(struct perf_event *event) | |||
| 1782 | } | 1784 | } | 
| 1783 | 1785 | ||
| 1784 | /* | 1786 | /* | 
| 1785 | * Initialize the perf_event context in a task_struct: | 1787 | * Callchain support | 
| 1786 | */ | 1788 | */ | 
| 1789 | |||
| 1790 | struct callchain_cpus_entries { | ||
| 1791 | struct rcu_head rcu_head; | ||
| 1792 | struct perf_callchain_entry *cpu_entries[0]; | ||
| 1793 | }; | ||
| 1794 | |||
| 1795 | static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]); | ||
| 1796 | static atomic_t nr_callchain_events; | ||
| 1797 | static DEFINE_MUTEX(callchain_mutex); | ||
| 1798 | struct callchain_cpus_entries *callchain_cpus_entries; | ||
| 1799 | |||
| 1800 | |||
| 1801 | __weak void perf_callchain_kernel(struct perf_callchain_entry *entry, | ||
| 1802 | struct pt_regs *regs) | ||
| 1803 | { | ||
| 1804 | } | ||
| 1805 | |||
| 1806 | __weak void perf_callchain_user(struct perf_callchain_entry *entry, | ||
| 1807 | struct pt_regs *regs) | ||
| 1808 | { | ||
| 1809 | } | ||
| 1810 | |||
| 1811 | static void release_callchain_buffers_rcu(struct rcu_head *head) | ||
| 1812 | { | ||
| 1813 | struct callchain_cpus_entries *entries; | ||
| 1814 | int cpu; | ||
| 1815 | |||
| 1816 | entries = container_of(head, struct callchain_cpus_entries, rcu_head); | ||
| 1817 | |||
| 1818 | for_each_possible_cpu(cpu) | ||
| 1819 | kfree(entries->cpu_entries[cpu]); | ||
| 1820 | |||
| 1821 | kfree(entries); | ||
| 1822 | } | ||
| 1823 | |||
| 1824 | static void release_callchain_buffers(void) | ||
| 1825 | { | ||
| 1826 | struct callchain_cpus_entries *entries; | ||
| 1827 | |||
| 1828 | entries = callchain_cpus_entries; | ||
| 1829 | rcu_assign_pointer(callchain_cpus_entries, NULL); | ||
| 1830 | call_rcu(&entries->rcu_head, release_callchain_buffers_rcu); | ||
| 1831 | } | ||
| 1832 | |||
| 1833 | static int alloc_callchain_buffers(void) | ||
| 1834 | { | ||
| 1835 | int cpu; | ||
| 1836 | int size; | ||
| 1837 | struct callchain_cpus_entries *entries; | ||
| 1838 | |||
| 1839 | /* | ||
| 1840 | * We can't use the percpu allocation API for data that can be | ||
| 1841 | * accessed from NMI. Use a temporary manual per cpu allocation | ||
| 1842 | * until that gets sorted out. | ||
| 1843 | */ | ||
| 1844 | size = sizeof(*entries) + sizeof(struct perf_callchain_entry *) * | ||
| 1845 | num_possible_cpus(); | ||
| 1846 | |||
| 1847 | entries = kzalloc(size, GFP_KERNEL); | ||
| 1848 | if (!entries) | ||
| 1849 | return -ENOMEM; | ||
| 1850 | |||
| 1851 | size = sizeof(struct perf_callchain_entry) * PERF_NR_CONTEXTS; | ||
| 1852 | |||
| 1853 | for_each_possible_cpu(cpu) { | ||
| 1854 | entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL, | ||
| 1855 | cpu_to_node(cpu)); | ||
| 1856 | if (!entries->cpu_entries[cpu]) | ||
| 1857 | goto fail; | ||
| 1858 | } | ||
| 1859 | |||
| 1860 | rcu_assign_pointer(callchain_cpus_entries, entries); | ||
| 1861 | |||
| 1862 | return 0; | ||
| 1863 | |||
| 1864 | fail: | ||
| 1865 | for_each_possible_cpu(cpu) | ||
| 1866 | kfree(entries->cpu_entries[cpu]); | ||
| 1867 | kfree(entries); | ||
| 1868 | |||
| 1869 | return -ENOMEM; | ||
| 1870 | } | ||
| 1871 | |||
| 1872 | static int get_callchain_buffers(void) | ||
| 1873 | { | ||
| 1874 | int err = 0; | ||
| 1875 | int count; | ||
| 1876 | |||
| 1877 | mutex_lock(&callchain_mutex); | ||
| 1878 | |||
| 1879 | count = atomic_inc_return(&nr_callchain_events); | ||
| 1880 | if (WARN_ON_ONCE(count < 1)) { | ||
| 1881 | err = -EINVAL; | ||
| 1882 | goto exit; | ||
| 1883 | } | ||
| 1884 | |||
| 1885 | if (count > 1) { | ||
| 1886 | /* If the allocation failed, give up */ | ||
| 1887 | if (!callchain_cpus_entries) | ||
| 1888 | err = -ENOMEM; | ||
| 1889 | goto exit; | ||
| 1890 | } | ||
| 1891 | |||
| 1892 | err = alloc_callchain_buffers(); | ||
| 1893 | if (err) | ||
| 1894 | release_callchain_buffers(); | ||
| 1895 | exit: | ||
| 1896 | mutex_unlock(&callchain_mutex); | ||
| 1897 | |||
| 1898 | return err; | ||
| 1899 | } | ||
| 1900 | |||
| 1901 | static void put_callchain_buffers(void) | ||
| 1902 | { | ||
| 1903 | if (atomic_dec_and_mutex_lock(&nr_callchain_events, &callchain_mutex)) { | ||
| 1904 | release_callchain_buffers(); | ||
| 1905 | mutex_unlock(&callchain_mutex); | ||
| 1906 | } | ||
| 1907 | } | ||
| 1908 | |||
| 1909 | static int get_recursion_context(int *recursion) | ||
| 1910 | { | ||
| 1911 | int rctx; | ||
| 1912 | |||
| 1913 | if (in_nmi()) | ||
| 1914 | rctx = 3; | ||
| 1915 | else if (in_irq()) | ||
| 1916 | rctx = 2; | ||
| 1917 | else if (in_softirq()) | ||
| 1918 | rctx = 1; | ||
| 1919 | else | ||
| 1920 | rctx = 0; | ||
| 1921 | |||
| 1922 | if (recursion[rctx]) | ||
| 1923 | return -1; | ||
| 1924 | |||
| 1925 | recursion[rctx]++; | ||
| 1926 | barrier(); | ||
| 1927 | |||
| 1928 | return rctx; | ||
| 1929 | } | ||
| 1930 | |||
| 1931 | static inline void put_recursion_context(int *recursion, int rctx) | ||
| 1932 | { | ||
| 1933 | barrier(); | ||
| 1934 | recursion[rctx]--; | ||
| 1935 | } | ||
| 1936 | |||
| 1937 | static struct perf_callchain_entry *get_callchain_entry(int *rctx) | ||
| 1938 | { | ||
| 1939 | int cpu; | ||
| 1940 | struct callchain_cpus_entries *entries; | ||
| 1941 | |||
| 1942 | *rctx = get_recursion_context(__get_cpu_var(callchain_recursion)); | ||
| 1943 | if (*rctx == -1) | ||
| 1944 | return NULL; | ||
| 1945 | |||
| 1946 | entries = rcu_dereference(callchain_cpus_entries); | ||
| 1947 | if (!entries) | ||
| 1948 | return NULL; | ||
| 1949 | |||
| 1950 | cpu = smp_processor_id(); | ||
| 1951 | |||
| 1952 | return &entries->cpu_entries[cpu][*rctx]; | ||
| 1953 | } | ||
| 1954 | |||
| 1787 | static void | 1955 | static void | 
| 1788 | __perf_event_init_context(struct perf_event_context *ctx, | 1956 | put_callchain_entry(int rctx) | 
| 1789 | struct task_struct *task) | 1957 | { | 
| 1958 | put_recursion_context(__get_cpu_var(callchain_recursion), rctx); | ||
| 1959 | } | ||
| 1960 | |||
| 1961 | static struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | ||
| 1962 | { | ||
| 1963 | int rctx; | ||
| 1964 | struct perf_callchain_entry *entry; | ||
| 1965 | |||
| 1966 | |||
| 1967 | entry = get_callchain_entry(&rctx); | ||
| 1968 | if (rctx == -1) | ||
| 1969 | return NULL; | ||
| 1970 | |||
| 1971 | if (!entry) | ||
| 1972 | goto exit_put; | ||
| 1973 | |||
| 1974 | entry->nr = 0; | ||
| 1975 | |||
| 1976 | if (!user_mode(regs)) { | ||
| 1977 | perf_callchain_store(entry, PERF_CONTEXT_KERNEL); | ||
| 1978 | perf_callchain_kernel(entry, regs); | ||
| 1979 | if (current->mm) | ||
| 1980 | regs = task_pt_regs(current); | ||
| 1981 | else | ||
| 1982 | regs = NULL; | ||
| 1983 | } | ||
| 1984 | |||
| 1985 | if (regs) { | ||
| 1986 | perf_callchain_store(entry, PERF_CONTEXT_USER); | ||
| 1987 | perf_callchain_user(entry, regs); | ||
| 1988 | } | ||
| 1989 | |||
| 1990 | exit_put: | ||
| 1991 | put_callchain_entry(rctx); | ||
| 1992 | |||
| 1993 | return entry; | ||
| 1994 | } | ||
| 1995 | |||
| 1996 | /* | ||
| 1997 | * Initialize the perf_event context in a task_struct: | ||
| 1998 | */ | ||
| 1999 | static void __perf_event_init_context(struct perf_event_context *ctx) | ||
| 1790 | { | 2000 | { | 
| 1791 | raw_spin_lock_init(&ctx->lock); | 2001 | raw_spin_lock_init(&ctx->lock); | 
| 1792 | mutex_init(&ctx->mutex); | 2002 | mutex_init(&ctx->mutex); | 
| @@ -1794,45 +2004,38 @@ __perf_event_init_context(struct perf_event_context *ctx, | |||
| 1794 | INIT_LIST_HEAD(&ctx->flexible_groups); | 2004 | INIT_LIST_HEAD(&ctx->flexible_groups); | 
| 1795 | INIT_LIST_HEAD(&ctx->event_list); | 2005 | INIT_LIST_HEAD(&ctx->event_list); | 
| 1796 | atomic_set(&ctx->refcount, 1); | 2006 | atomic_set(&ctx->refcount, 1); | 
| 1797 | ctx->task = task; | ||
| 1798 | } | 2007 | } | 
| 1799 | 2008 | ||
| 1800 | static struct perf_event_context *find_get_context(pid_t pid, int cpu) | 2009 | static struct perf_event_context * | 
| 2010 | alloc_perf_context(struct pmu *pmu, struct task_struct *task) | ||
| 1801 | { | 2011 | { | 
| 1802 | struct perf_event_context *ctx; | 2012 | struct perf_event_context *ctx; | 
| 1803 | struct perf_cpu_context *cpuctx; | ||
| 1804 | struct task_struct *task; | ||
| 1805 | unsigned long flags; | ||
| 1806 | int err; | ||
| 1807 | |||
| 1808 | if (pid == -1 && cpu != -1) { | ||
| 1809 | /* Must be root to operate on a CPU event: */ | ||
| 1810 | if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) | ||
| 1811 | return ERR_PTR(-EACCES); | ||
| 1812 | 2013 | ||
| 1813 | if (cpu < 0 || cpu >= nr_cpumask_bits) | 2014 | ctx = kzalloc(sizeof(struct perf_event_context), GFP_KERNEL); | 
| 1814 | return ERR_PTR(-EINVAL); | 2015 | if (!ctx) | 
| 2016 | return NULL; | ||
| 1815 | 2017 | ||
| 1816 | /* | 2018 | __perf_event_init_context(ctx); | 
| 1817 | * We could be clever and allow to attach a event to an | 2019 | if (task) { | 
| 1818 | * offline CPU and activate it when the CPU comes up, but | 2020 | ctx->task = task; | 
| 1819 | * that's for later. | 2021 | get_task_struct(task); | 
| 1820 | */ | 2022 | } | 
| 1821 | if (!cpu_online(cpu)) | 2023 | ctx->pmu = pmu; | 
| 1822 | return ERR_PTR(-ENODEV); | ||
| 1823 | 2024 | ||
| 1824 | cpuctx = &per_cpu(perf_cpu_context, cpu); | 2025 | return ctx; | 
| 1825 | ctx = &cpuctx->ctx; | 2026 | } | 
| 1826 | get_ctx(ctx); | ||
| 1827 | 2027 | ||
| 1828 | return ctx; | 2028 | static struct task_struct * | 
| 1829 | } | 2029 | find_lively_task_by_vpid(pid_t vpid) | 
| 2030 | { | ||
| 2031 | struct task_struct *task; | ||
| 2032 | int err; | ||
| 1830 | 2033 | ||
| 1831 | rcu_read_lock(); | 2034 | rcu_read_lock(); | 
| 1832 | if (!pid) | 2035 | if (!vpid) | 
| 1833 | task = current; | 2036 | task = current; | 
| 1834 | else | 2037 | else | 
| 1835 | task = find_task_by_vpid(pid); | 2038 | task = find_task_by_vpid(vpid); | 
| 1836 | if (task) | 2039 | if (task) | 
| 1837 | get_task_struct(task); | 2040 | get_task_struct(task); | 
| 1838 | rcu_read_unlock(); | 2041 | rcu_read_unlock(); | 
| @@ -1852,35 +2055,79 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu) | |||
| 1852 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) | 2055 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) | 
| 1853 | goto errout; | 2056 | goto errout; | 
| 1854 | 2057 | ||
| 1855 | retry: | 2058 | return task; | 
| 1856 | ctx = perf_lock_task_context(task, &flags); | 2059 | errout: | 
| 2060 | put_task_struct(task); | ||
| 2061 | return ERR_PTR(err); | ||
| 2062 | |||
| 2063 | } | ||
| 2064 | |||
| 2065 | static struct perf_event_context * | ||
| 2066 | find_get_context(struct pmu *pmu, struct task_struct *task, int cpu) | ||
| 2067 | { | ||
| 2068 | struct perf_event_context *ctx; | ||
| 2069 | struct perf_cpu_context *cpuctx; | ||
| 2070 | unsigned long flags; | ||
| 2071 | int ctxn, err; | ||
| 2072 | |||
| 2073 | if (!task && cpu != -1) { | ||
| 2074 | /* Must be root to operate on a CPU event: */ | ||
| 2075 | if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) | ||
| 2076 | return ERR_PTR(-EACCES); | ||
| 2077 | |||
| 2078 | if (cpu < 0 || cpu >= nr_cpumask_bits) | ||
| 2079 | return ERR_PTR(-EINVAL); | ||
| 2080 | |||
| 2081 | /* | ||
| 2082 | * We could be clever and allow to attach a event to an | ||
| 2083 | * offline CPU and activate it when the CPU comes up, but | ||
| 2084 | * that's for later. | ||
| 2085 | */ | ||
| 2086 | if (!cpu_online(cpu)) | ||
| 2087 | return ERR_PTR(-ENODEV); | ||
| 2088 | |||
| 2089 | cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); | ||
| 2090 | ctx = &cpuctx->ctx; | ||
| 2091 | get_ctx(ctx); | ||
| 2092 | |||
| 2093 | return ctx; | ||
| 2094 | } | ||
| 2095 | |||
| 2096 | err = -EINVAL; | ||
| 2097 | ctxn = pmu->task_ctx_nr; | ||
| 2098 | if (ctxn < 0) | ||
| 2099 | goto errout; | ||
| 2100 | |||
| 2101 | retry: | ||
| 2102 | ctx = perf_lock_task_context(task, ctxn, &flags); | ||
| 1857 | if (ctx) { | 2103 | if (ctx) { | 
| 1858 | unclone_ctx(ctx); | 2104 | unclone_ctx(ctx); | 
| 1859 | raw_spin_unlock_irqrestore(&ctx->lock, flags); | 2105 | raw_spin_unlock_irqrestore(&ctx->lock, flags); | 
| 1860 | } | 2106 | } | 
| 1861 | 2107 | ||
| 1862 | if (!ctx) { | 2108 | if (!ctx) { | 
| 1863 | ctx = kzalloc(sizeof(struct perf_event_context), GFP_KERNEL); | 2109 | ctx = alloc_perf_context(pmu, task); | 
| 1864 | err = -ENOMEM; | 2110 | err = -ENOMEM; | 
| 1865 | if (!ctx) | 2111 | if (!ctx) | 
| 1866 | goto errout; | 2112 | goto errout; | 
| 1867 | __perf_event_init_context(ctx, task); | 2113 | |
| 1868 | get_ctx(ctx); | 2114 | get_ctx(ctx); | 
| 1869 | if (cmpxchg(&task->perf_event_ctxp, NULL, ctx)) { | 2115 | |
| 2116 | if (cmpxchg(&task->perf_event_ctxp[ctxn], NULL, ctx)) { | ||
| 1870 | /* | 2117 | /* | 
| 1871 | * We raced with some other task; use | 2118 | * We raced with some other task; use | 
| 1872 | * the context they set. | 2119 | * the context they set. | 
| 1873 | */ | 2120 | */ | 
| 2121 | put_task_struct(task); | ||
| 1874 | kfree(ctx); | 2122 | kfree(ctx); | 
| 1875 | goto retry; | 2123 | goto retry; | 
| 1876 | } | 2124 | } | 
| 1877 | get_task_struct(task); | ||
| 1878 | } | 2125 | } | 
| 1879 | 2126 | ||
| 1880 | put_task_struct(task); | 2127 | put_task_struct(task); | 
| 1881 | return ctx; | 2128 | return ctx; | 
| 1882 | 2129 | ||
| 1883 | errout: | 2130 | errout: | 
| 1884 | put_task_struct(task); | 2131 | put_task_struct(task); | 
| 1885 | return ERR_PTR(err); | 2132 | return ERR_PTR(err); | 
| 1886 | } | 2133 | } | 
| @@ -1913,6 +2160,8 @@ static void free_event(struct perf_event *event) | |||
| 1913 | atomic_dec(&nr_comm_events); | 2160 | atomic_dec(&nr_comm_events); | 
| 1914 | if (event->attr.task) | 2161 | if (event->attr.task) | 
| 1915 | atomic_dec(&nr_task_events); | 2162 | atomic_dec(&nr_task_events); | 
| 2163 | if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) | ||
| 2164 | put_callchain_buffers(); | ||
| 1916 | } | 2165 | } | 
| 1917 | 2166 | ||
| 1918 | if (event->buffer) { | 2167 | if (event->buffer) { | 
| @@ -1923,7 +2172,9 @@ static void free_event(struct perf_event *event) | |||
| 1923 | if (event->destroy) | 2172 | if (event->destroy) | 
| 1924 | event->destroy(event); | 2173 | event->destroy(event); | 
| 1925 | 2174 | ||
| 1926 | put_ctx(event->ctx); | 2175 | if (event->ctx) | 
| 2176 | put_ctx(event->ctx); | ||
| 2177 | |||
| 1927 | call_rcu(&event->rcu_head, free_event_rcu); | 2178 | call_rcu(&event->rcu_head, free_event_rcu); | 
| 1928 | } | 2179 | } | 
| 1929 | 2180 | ||
| @@ -2344,6 +2595,9 @@ int perf_event_task_disable(void) | |||
| 2344 | 2595 | ||
| 2345 | static int perf_event_index(struct perf_event *event) | 2596 | static int perf_event_index(struct perf_event *event) | 
| 2346 | { | 2597 | { | 
| 2598 | if (event->hw.state & PERF_HES_STOPPED) | ||
| 2599 | return 0; | ||
| 2600 | |||
| 2347 | if (event->state != PERF_EVENT_STATE_ACTIVE) | 2601 | if (event->state != PERF_EVENT_STATE_ACTIVE) | 
| 2348 | return 0; | 2602 | return 0; | 
| 2349 | 2603 | ||
| @@ -2956,16 +3210,6 @@ void perf_event_do_pending(void) | |||
| 2956 | } | 3210 | } | 
| 2957 | 3211 | ||
| 2958 | /* | 3212 | /* | 
| 2959 | * Callchain support -- arch specific | ||
| 2960 | */ | ||
| 2961 | |||
| 2962 | __weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | ||
| 2963 | { | ||
| 2964 | return NULL; | ||
| 2965 | } | ||
| 2966 | |||
| 2967 | |||
| 2968 | /* | ||
| 2969 | * We assume there is only KVM supporting the callbacks. | 3213 | * We assume there is only KVM supporting the callbacks. | 
| 2970 | * Later on, we might change it to a list if there is | 3214 | * Later on, we might change it to a list if there is | 
| 2971 | * another virtualization implementation supporting the callbacks. | 3215 | * another virtualization implementation supporting the callbacks. | 
| @@ -3071,7 +3315,7 @@ again: | |||
| 3071 | if (handle->wakeup != local_read(&buffer->wakeup)) | 3315 | if (handle->wakeup != local_read(&buffer->wakeup)) | 
| 3072 | perf_output_wakeup(handle); | 3316 | perf_output_wakeup(handle); | 
| 3073 | 3317 | ||
| 3074 | out: | 3318 | out: | 
| 3075 | preempt_enable(); | 3319 | preempt_enable(); | 
| 3076 | } | 3320 | } | 
| 3077 | 3321 | ||
| @@ -3459,14 +3703,20 @@ static void perf_event_output(struct perf_event *event, int nmi, | |||
| 3459 | struct perf_output_handle handle; | 3703 | struct perf_output_handle handle; | 
| 3460 | struct perf_event_header header; | 3704 | struct perf_event_header header; | 
| 3461 | 3705 | ||
| 3706 | /* protect the callchain buffers */ | ||
| 3707 | rcu_read_lock(); | ||
| 3708 | |||
| 3462 | perf_prepare_sample(&header, data, event, regs); | 3709 | perf_prepare_sample(&header, data, event, regs); | 
| 3463 | 3710 | ||
| 3464 | if (perf_output_begin(&handle, event, header.size, nmi, 1)) | 3711 | if (perf_output_begin(&handle, event, header.size, nmi, 1)) | 
| 3465 | return; | 3712 | goto exit; | 
| 3466 | 3713 | ||
| 3467 | perf_output_sample(&handle, &header, data, event); | 3714 | perf_output_sample(&handle, &header, data, event); | 
| 3468 | 3715 | ||
| 3469 | perf_output_end(&handle); | 3716 | perf_output_end(&handle); | 
| 3717 | |||
| 3718 | exit: | ||
| 3719 | rcu_read_unlock(); | ||
| 3470 | } | 3720 | } | 
| 3471 | 3721 | ||
| 3472 | /* | 3722 | /* | 
| @@ -3580,16 +3830,27 @@ static void perf_event_task_ctx(struct perf_event_context *ctx, | |||
| 3580 | static void perf_event_task_event(struct perf_task_event *task_event) | 3830 | static void perf_event_task_event(struct perf_task_event *task_event) | 
| 3581 | { | 3831 | { | 
| 3582 | struct perf_cpu_context *cpuctx; | 3832 | struct perf_cpu_context *cpuctx; | 
| 3583 | struct perf_event_context *ctx = task_event->task_ctx; | 3833 | struct perf_event_context *ctx; | 
| 3834 | struct pmu *pmu; | ||
| 3835 | int ctxn; | ||
| 3584 | 3836 | ||
| 3585 | rcu_read_lock(); | 3837 | rcu_read_lock(); | 
| 3586 | cpuctx = &get_cpu_var(perf_cpu_context); | 3838 | list_for_each_entry_rcu(pmu, &pmus, entry) { | 
| 3587 | perf_event_task_ctx(&cpuctx->ctx, task_event); | 3839 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); | 
| 3588 | if (!ctx) | 3840 | perf_event_task_ctx(&cpuctx->ctx, task_event); | 
| 3589 | ctx = rcu_dereference(current->perf_event_ctxp); | 3841 | |
| 3590 | if (ctx) | 3842 | ctx = task_event->task_ctx; | 
| 3591 | perf_event_task_ctx(ctx, task_event); | 3843 | if (!ctx) { | 
| 3592 | put_cpu_var(perf_cpu_context); | 3844 | ctxn = pmu->task_ctx_nr; | 
| 3845 | if (ctxn < 0) | ||
| 3846 | goto next; | ||
| 3847 | ctx = rcu_dereference(current->perf_event_ctxp[ctxn]); | ||
| 3848 | } | ||
| 3849 | if (ctx) | ||
| 3850 | perf_event_task_ctx(ctx, task_event); | ||
| 3851 | next: | ||
| 3852 | put_cpu_ptr(pmu->pmu_cpu_context); | ||
| 3853 | } | ||
| 3593 | rcu_read_unlock(); | 3854 | rcu_read_unlock(); | 
| 3594 | } | 3855 | } | 
| 3595 | 3856 | ||
| @@ -3694,8 +3955,10 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) | |||
| 3694 | { | 3955 | { | 
| 3695 | struct perf_cpu_context *cpuctx; | 3956 | struct perf_cpu_context *cpuctx; | 
| 3696 | struct perf_event_context *ctx; | 3957 | struct perf_event_context *ctx; | 
| 3697 | unsigned int size; | ||
| 3698 | char comm[TASK_COMM_LEN]; | 3958 | char comm[TASK_COMM_LEN]; | 
| 3959 | unsigned int size; | ||
| 3960 | struct pmu *pmu; | ||
| 3961 | int ctxn; | ||
| 3699 | 3962 | ||
| 3700 | memset(comm, 0, sizeof(comm)); | 3963 | memset(comm, 0, sizeof(comm)); | 
| 3701 | strlcpy(comm, comm_event->task->comm, sizeof(comm)); | 3964 | strlcpy(comm, comm_event->task->comm, sizeof(comm)); | 
| @@ -3707,21 +3970,36 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) | |||
| 3707 | comm_event->event_id.header.size = sizeof(comm_event->event_id) + size; | 3970 | comm_event->event_id.header.size = sizeof(comm_event->event_id) + size; | 
| 3708 | 3971 | ||
| 3709 | rcu_read_lock(); | 3972 | rcu_read_lock(); | 
| 3710 | cpuctx = &get_cpu_var(perf_cpu_context); | 3973 | list_for_each_entry_rcu(pmu, &pmus, entry) { | 
| 3711 | perf_event_comm_ctx(&cpuctx->ctx, comm_event); | 3974 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); | 
| 3712 | ctx = rcu_dereference(current->perf_event_ctxp); | 3975 | perf_event_comm_ctx(&cpuctx->ctx, comm_event); | 
| 3713 | if (ctx) | 3976 | |
| 3714 | perf_event_comm_ctx(ctx, comm_event); | 3977 | ctxn = pmu->task_ctx_nr; | 
| 3715 | put_cpu_var(perf_cpu_context); | 3978 | if (ctxn < 0) | 
| 3979 | goto next; | ||
| 3980 | |||
| 3981 | ctx = rcu_dereference(current->perf_event_ctxp[ctxn]); | ||
| 3982 | if (ctx) | ||
| 3983 | perf_event_comm_ctx(ctx, comm_event); | ||
| 3984 | next: | ||
| 3985 | put_cpu_ptr(pmu->pmu_cpu_context); | ||
| 3986 | } | ||
| 3716 | rcu_read_unlock(); | 3987 | rcu_read_unlock(); | 
| 3717 | } | 3988 | } | 
| 3718 | 3989 | ||
| 3719 | void perf_event_comm(struct task_struct *task) | 3990 | void perf_event_comm(struct task_struct *task) | 
| 3720 | { | 3991 | { | 
| 3721 | struct perf_comm_event comm_event; | 3992 | struct perf_comm_event comm_event; | 
| 3993 | struct perf_event_context *ctx; | ||
| 3994 | int ctxn; | ||
| 3722 | 3995 | ||
| 3723 | if (task->perf_event_ctxp) | 3996 | for_each_task_context_nr(ctxn) { | 
| 3724 | perf_event_enable_on_exec(task); | 3997 | ctx = task->perf_event_ctxp[ctxn]; | 
| 3998 | if (!ctx) | ||
| 3999 | continue; | ||
| 4000 | |||
| 4001 | perf_event_enable_on_exec(ctx); | ||
| 4002 | } | ||
| 3725 | 4003 | ||
| 3726 | if (!atomic_read(&nr_comm_events)) | 4004 | if (!atomic_read(&nr_comm_events)) | 
| 3727 | return; | 4005 | return; | 
| @@ -3823,6 +4101,8 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) | |||
| 3823 | char tmp[16]; | 4101 | char tmp[16]; | 
| 3824 | char *buf = NULL; | 4102 | char *buf = NULL; | 
| 3825 | const char *name; | 4103 | const char *name; | 
| 4104 | struct pmu *pmu; | ||
| 4105 | int ctxn; | ||
| 3826 | 4106 | ||
| 3827 | memset(tmp, 0, sizeof(tmp)); | 4107 | memset(tmp, 0, sizeof(tmp)); | 
| 3828 | 4108 | ||
| @@ -3875,12 +4155,23 @@ got_name: | |||
| 3875 | mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size; | 4155 | mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size; | 
| 3876 | 4156 | ||
| 3877 | rcu_read_lock(); | 4157 | rcu_read_lock(); | 
| 3878 | cpuctx = &get_cpu_var(perf_cpu_context); | 4158 | list_for_each_entry_rcu(pmu, &pmus, entry) { | 
| 3879 | perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, vma->vm_flags & VM_EXEC); | 4159 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); | 
| 3880 | ctx = rcu_dereference(current->perf_event_ctxp); | 4160 | perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, | 
| 3881 | if (ctx) | 4161 | vma->vm_flags & VM_EXEC); | 
| 3882 | perf_event_mmap_ctx(ctx, mmap_event, vma->vm_flags & VM_EXEC); | 4162 | |
| 3883 | put_cpu_var(perf_cpu_context); | 4163 | ctxn = pmu->task_ctx_nr; | 
| 4164 | if (ctxn < 0) | ||
| 4165 | goto next; | ||
| 4166 | |||
| 4167 | ctx = rcu_dereference(current->perf_event_ctxp[ctxn]); | ||
| 4168 | if (ctx) { | ||
| 4169 | perf_event_mmap_ctx(ctx, mmap_event, | ||
| 4170 | vma->vm_flags & VM_EXEC); | ||
| 4171 | } | ||
| 4172 | next: | ||
| 4173 | put_cpu_ptr(pmu->pmu_cpu_context); | ||
| 4174 | } | ||
| 3884 | rcu_read_unlock(); | 4175 | rcu_read_unlock(); | 
| 3885 | 4176 | ||
| 3886 | kfree(buf); | 4177 | kfree(buf); | 
| @@ -3962,8 +4253,6 @@ static int __perf_event_overflow(struct perf_event *event, int nmi, | |||
| 3962 | struct hw_perf_event *hwc = &event->hw; | 4253 | struct hw_perf_event *hwc = &event->hw; | 
| 3963 | int ret = 0; | 4254 | int ret = 0; | 
| 3964 | 4255 | ||
| 3965 | throttle = (throttle && event->pmu->unthrottle != NULL); | ||
| 3966 | |||
| 3967 | if (!throttle) { | 4256 | if (!throttle) { | 
| 3968 | hwc->interrupts++; | 4257 | hwc->interrupts++; | 
| 3969 | } else { | 4258 | } else { | 
| @@ -4031,6 +4320,17 @@ int perf_event_overflow(struct perf_event *event, int nmi, | |||
| 4031 | * Generic software event infrastructure | 4320 | * Generic software event infrastructure | 
| 4032 | */ | 4321 | */ | 
| 4033 | 4322 | ||
| 4323 | struct swevent_htable { | ||
| 4324 | struct swevent_hlist *swevent_hlist; | ||
| 4325 | struct mutex hlist_mutex; | ||
| 4326 | int hlist_refcount; | ||
| 4327 | |||
| 4328 | /* Recursion avoidance in each contexts */ | ||
| 4329 | int recursion[PERF_NR_CONTEXTS]; | ||
| 4330 | }; | ||
| 4331 | |||
| 4332 | static DEFINE_PER_CPU(struct swevent_htable, swevent_htable); | ||
| 4333 | |||
| 4034 | /* | 4334 | /* | 
| 4035 | * We directly increment event->count and keep a second value in | 4335 | * We directly increment event->count and keep a second value in | 
| 4036 | * event->hw.period_left to count intervals. This period event | 4336 | * event->hw.period_left to count intervals. This period event | 
| @@ -4088,7 +4388,7 @@ static void perf_swevent_overflow(struct perf_event *event, u64 overflow, | |||
| 4088 | } | 4388 | } | 
| 4089 | } | 4389 | } | 
| 4090 | 4390 | ||
| 4091 | static void perf_swevent_add(struct perf_event *event, u64 nr, | 4391 | static void perf_swevent_event(struct perf_event *event, u64 nr, | 
| 4092 | int nmi, struct perf_sample_data *data, | 4392 | int nmi, struct perf_sample_data *data, | 
| 4093 | struct pt_regs *regs) | 4393 | struct pt_regs *regs) | 
| 4094 | { | 4394 | { | 
| @@ -4114,6 +4414,9 @@ static void perf_swevent_add(struct perf_event *event, u64 nr, | |||
| 4114 | static int perf_exclude_event(struct perf_event *event, | 4414 | static int perf_exclude_event(struct perf_event *event, | 
| 4115 | struct pt_regs *regs) | 4415 | struct pt_regs *regs) | 
| 4116 | { | 4416 | { | 
| 4417 | if (event->hw.state & PERF_HES_STOPPED) | ||
| 4418 | return 0; | ||
| 4419 | |||
| 4117 | if (regs) { | 4420 | if (regs) { | 
| 4118 | if (event->attr.exclude_user && user_mode(regs)) | 4421 | if (event->attr.exclude_user && user_mode(regs)) | 
| 4119 | return 1; | 4422 | return 1; | 
| @@ -4160,11 +4463,11 @@ __find_swevent_head(struct swevent_hlist *hlist, u64 type, u32 event_id) | |||
| 4160 | 4463 | ||
| 4161 | /* For the read side: events when they trigger */ | 4464 | /* For the read side: events when they trigger */ | 
| 4162 | static inline struct hlist_head * | 4465 | static inline struct hlist_head * | 
| 4163 | find_swevent_head_rcu(struct perf_cpu_context *ctx, u64 type, u32 event_id) | 4466 | find_swevent_head_rcu(struct swevent_htable *swhash, u64 type, u32 event_id) | 
| 4164 | { | 4467 | { | 
| 4165 | struct swevent_hlist *hlist; | 4468 | struct swevent_hlist *hlist; | 
| 4166 | 4469 | ||
| 4167 | hlist = rcu_dereference(ctx->swevent_hlist); | 4470 | hlist = rcu_dereference(swhash->swevent_hlist); | 
| 4168 | if (!hlist) | 4471 | if (!hlist) | 
| 4169 | return NULL; | 4472 | return NULL; | 
| 4170 | 4473 | ||
| @@ -4173,7 +4476,7 @@ find_swevent_head_rcu(struct perf_cpu_context *ctx, u64 type, u32 event_id) | |||
| 4173 | 4476 | ||
| 4174 | /* For the event head insertion and removal in the hlist */ | 4477 | /* For the event head insertion and removal in the hlist */ | 
| 4175 | static inline struct hlist_head * | 4478 | static inline struct hlist_head * | 
| 4176 | find_swevent_head(struct perf_cpu_context *ctx, struct perf_event *event) | 4479 | find_swevent_head(struct swevent_htable *swhash, struct perf_event *event) | 
| 4177 | { | 4480 | { | 
| 4178 | struct swevent_hlist *hlist; | 4481 | struct swevent_hlist *hlist; | 
| 4179 | u32 event_id = event->attr.config; | 4482 | u32 event_id = event->attr.config; | 
| @@ -4184,7 +4487,7 @@ find_swevent_head(struct perf_cpu_context *ctx, struct perf_event *event) | |||
| 4184 | * and release. Which makes the protected version suitable here. | 4487 | * and release. Which makes the protected version suitable here. | 
| 4185 | * The context lock guarantees that. | 4488 | * The context lock guarantees that. | 
| 4186 | */ | 4489 | */ | 
| 4187 | hlist = rcu_dereference_protected(ctx->swevent_hlist, | 4490 | hlist = rcu_dereference_protected(swhash->swevent_hlist, | 
| 4188 | lockdep_is_held(&event->ctx->lock)); | 4491 | lockdep_is_held(&event->ctx->lock)); | 
| 4189 | if (!hlist) | 4492 | if (!hlist) | 
| 4190 | return NULL; | 4493 | return NULL; | 
| @@ -4197,23 +4500,19 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id, | |||
| 4197 | struct perf_sample_data *data, | 4500 | struct perf_sample_data *data, | 
| 4198 | struct pt_regs *regs) | 4501 | struct pt_regs *regs) | 
| 4199 | { | 4502 | { | 
| 4200 | struct perf_cpu_context *cpuctx; | 4503 | struct swevent_htable *swhash = &__get_cpu_var(swevent_htable); | 
| 4201 | struct perf_event *event; | 4504 | struct perf_event *event; | 
| 4202 | struct hlist_node *node; | 4505 | struct hlist_node *node; | 
| 4203 | struct hlist_head *head; | 4506 | struct hlist_head *head; | 
| 4204 | 4507 | ||
| 4205 | cpuctx = &__get_cpu_var(perf_cpu_context); | ||
| 4206 | |||
| 4207 | rcu_read_lock(); | 4508 | rcu_read_lock(); | 
| 4208 | 4509 | head = find_swevent_head_rcu(swhash, type, event_id); | |
| 4209 | head = find_swevent_head_rcu(cpuctx, type, event_id); | ||
| 4210 | |||
| 4211 | if (!head) | 4510 | if (!head) | 
| 4212 | goto end; | 4511 | goto end; | 
| 4213 | 4512 | ||
| 4214 | hlist_for_each_entry_rcu(event, node, head, hlist_entry) { | 4513 | hlist_for_each_entry_rcu(event, node, head, hlist_entry) { | 
| 4215 | if (perf_swevent_match(event, type, event_id, data, regs)) | 4514 | if (perf_swevent_match(event, type, event_id, data, regs)) | 
| 4216 | perf_swevent_add(event, nr, nmi, data, regs); | 4515 | perf_swevent_event(event, nr, nmi, data, regs); | 
| 4217 | } | 4516 | } | 
| 4218 | end: | 4517 | end: | 
| 4219 | rcu_read_unlock(); | 4518 | rcu_read_unlock(); | 
| @@ -4221,33 +4520,17 @@ end: | |||
| 4221 | 4520 | ||
| 4222 | int perf_swevent_get_recursion_context(void) | 4521 | int perf_swevent_get_recursion_context(void) | 
| 4223 | { | 4522 | { | 
| 4224 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | 4523 | struct swevent_htable *swhash = &__get_cpu_var(swevent_htable); | 
| 4225 | int rctx; | ||
| 4226 | |||
| 4227 | if (in_nmi()) | ||
| 4228 | rctx = 3; | ||
| 4229 | else if (in_irq()) | ||
| 4230 | rctx = 2; | ||
| 4231 | else if (in_softirq()) | ||
| 4232 | rctx = 1; | ||
| 4233 | else | ||
| 4234 | rctx = 0; | ||
| 4235 | 4524 | ||
| 4236 | if (cpuctx->recursion[rctx]) | 4525 | return get_recursion_context(swhash->recursion); | 
| 4237 | return -1; | ||
| 4238 | |||
| 4239 | cpuctx->recursion[rctx]++; | ||
| 4240 | barrier(); | ||
| 4241 | |||
| 4242 | return rctx; | ||
| 4243 | } | 4526 | } | 
| 4244 | EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context); | 4527 | EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context); | 
| 4245 | 4528 | ||
| 4246 | void inline perf_swevent_put_recursion_context(int rctx) | 4529 | void inline perf_swevent_put_recursion_context(int rctx) | 
| 4247 | { | 4530 | { | 
| 4248 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | 4531 | struct swevent_htable *swhash = &__get_cpu_var(swevent_htable); | 
| 4249 | barrier(); | 4532 | |
| 4250 | cpuctx->recursion[rctx]--; | 4533 | put_recursion_context(swhash->recursion, rctx); | 
| 4251 | } | 4534 | } | 
| 4252 | 4535 | ||
| 4253 | void __perf_sw_event(u32 event_id, u64 nr, int nmi, | 4536 | void __perf_sw_event(u32 event_id, u64 nr, int nmi, | 
| @@ -4273,20 +4556,20 @@ static void perf_swevent_read(struct perf_event *event) | |||
| 4273 | { | 4556 | { | 
| 4274 | } | 4557 | } | 
| 4275 | 4558 | ||
| 4276 | static int perf_swevent_enable(struct perf_event *event) | 4559 | static int perf_swevent_add(struct perf_event *event, int flags) | 
| 4277 | { | 4560 | { | 
| 4561 | struct swevent_htable *swhash = &__get_cpu_var(swevent_htable); | ||
| 4278 | struct hw_perf_event *hwc = &event->hw; | 4562 | struct hw_perf_event *hwc = &event->hw; | 
| 4279 | struct perf_cpu_context *cpuctx; | ||
| 4280 | struct hlist_head *head; | 4563 | struct hlist_head *head; | 
| 4281 | 4564 | ||
| 4282 | cpuctx = &__get_cpu_var(perf_cpu_context); | ||
| 4283 | |||
| 4284 | if (hwc->sample_period) { | 4565 | if (hwc->sample_period) { | 
| 4285 | hwc->last_period = hwc->sample_period; | 4566 | hwc->last_period = hwc->sample_period; | 
| 4286 | perf_swevent_set_period(event); | 4567 | perf_swevent_set_period(event); | 
| 4287 | } | 4568 | } | 
| 4288 | 4569 | ||
| 4289 | head = find_swevent_head(cpuctx, event); | 4570 | hwc->state = !(flags & PERF_EF_START); | 
| 4571 | |||
| 4572 | head = find_swevent_head(swhash, event); | ||
| 4290 | if (WARN_ON_ONCE(!head)) | 4573 | if (WARN_ON_ONCE(!head)) | 
| 4291 | return -EINVAL; | 4574 | return -EINVAL; | 
| 4292 | 4575 | ||
| @@ -4295,202 +4578,27 @@ static int perf_swevent_enable(struct perf_event *event) | |||
| 4295 | return 0; | 4578 | return 0; | 
| 4296 | } | 4579 | } | 
| 4297 | 4580 | ||
| 4298 | static void perf_swevent_disable(struct perf_event *event) | 4581 | static void perf_swevent_del(struct perf_event *event, int flags) | 
| 4299 | { | 4582 | { | 
| 4300 | hlist_del_rcu(&event->hlist_entry); | 4583 | hlist_del_rcu(&event->hlist_entry); | 
| 4301 | } | 4584 | } | 
| 4302 | 4585 | ||
| 4303 | static void perf_swevent_void(struct perf_event *event) | 4586 | static void perf_swevent_start(struct perf_event *event, int flags) | 
| 4304 | { | ||
| 4305 | } | ||
| 4306 | |||
| 4307 | static int perf_swevent_int(struct perf_event *event) | ||
| 4308 | { | ||
| 4309 | return 0; | ||
| 4310 | } | ||
| 4311 | |||
| 4312 | static const struct pmu perf_ops_generic = { | ||
| 4313 | .enable = perf_swevent_enable, | ||
| 4314 | .disable = perf_swevent_disable, | ||
| 4315 | .start = perf_swevent_int, | ||
| 4316 | .stop = perf_swevent_void, | ||
| 4317 | .read = perf_swevent_read, | ||
| 4318 | .unthrottle = perf_swevent_void, /* hwc->interrupts already reset */ | ||
| 4319 | }; | ||
| 4320 | |||
| 4321 | /* | ||
| 4322 | * hrtimer based swevent callback | ||
| 4323 | */ | ||
| 4324 | |||
| 4325 | static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) | ||
| 4326 | { | ||
| 4327 | enum hrtimer_restart ret = HRTIMER_RESTART; | ||
| 4328 | struct perf_sample_data data; | ||
| 4329 | struct pt_regs *regs; | ||
| 4330 | struct perf_event *event; | ||
| 4331 | u64 period; | ||
| 4332 | |||
| 4333 | event = container_of(hrtimer, struct perf_event, hw.hrtimer); | ||
| 4334 | event->pmu->read(event); | ||
| 4335 | |||
| 4336 | perf_sample_data_init(&data, 0); | ||
| 4337 | data.period = event->hw.last_period; | ||
| 4338 | regs = get_irq_regs(); | ||
| 4339 | |||
| 4340 | if (regs && !perf_exclude_event(event, regs)) { | ||
| 4341 | if (!(event->attr.exclude_idle && current->pid == 0)) | ||
| 4342 | if (perf_event_overflow(event, 0, &data, regs)) | ||
| 4343 | ret = HRTIMER_NORESTART; | ||
| 4344 | } | ||
| 4345 | |||
| 4346 | period = max_t(u64, 10000, event->hw.sample_period); | ||
| 4347 | hrtimer_forward_now(hrtimer, ns_to_ktime(period)); | ||
| 4348 | |||
| 4349 | return ret; | ||
| 4350 | } | ||
| 4351 | |||
| 4352 | static void perf_swevent_start_hrtimer(struct perf_event *event) | ||
| 4353 | { | 4587 | { | 
| 4354 | struct hw_perf_event *hwc = &event->hw; | 4588 | event->hw.state = 0; | 
| 4355 | |||
| 4356 | hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
| 4357 | hwc->hrtimer.function = perf_swevent_hrtimer; | ||
| 4358 | if (hwc->sample_period) { | ||
| 4359 | u64 period; | ||
| 4360 | |||
| 4361 | if (hwc->remaining) { | ||
| 4362 | if (hwc->remaining < 0) | ||
| 4363 | period = 10000; | ||
| 4364 | else | ||
| 4365 | period = hwc->remaining; | ||
| 4366 | hwc->remaining = 0; | ||
| 4367 | } else { | ||
| 4368 | period = max_t(u64, 10000, hwc->sample_period); | ||
| 4369 | } | ||
| 4370 | __hrtimer_start_range_ns(&hwc->hrtimer, | ||
| 4371 | ns_to_ktime(period), 0, | ||
| 4372 | HRTIMER_MODE_REL, 0); | ||
| 4373 | } | ||
| 4374 | } | 4589 | } | 
| 4375 | 4590 | ||
| 4376 | static void perf_swevent_cancel_hrtimer(struct perf_event *event) | 4591 | static void perf_swevent_stop(struct perf_event *event, int flags) | 
| 4377 | { | 4592 | { | 
| 4378 | struct hw_perf_event *hwc = &event->hw; | 4593 | event->hw.state = PERF_HES_STOPPED; | 
| 4379 | |||
| 4380 | if (hwc->sample_period) { | ||
| 4381 | ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer); | ||
| 4382 | hwc->remaining = ktime_to_ns(remaining); | ||
| 4383 | |||
| 4384 | hrtimer_cancel(&hwc->hrtimer); | ||
| 4385 | } | ||
| 4386 | } | 4594 | } | 
| 4387 | 4595 | ||
| 4388 | /* | ||
| 4389 | * Software event: cpu wall time clock | ||
| 4390 | */ | ||
| 4391 | |||
| 4392 | static void cpu_clock_perf_event_update(struct perf_event *event) | ||
| 4393 | { | ||
| 4394 | int cpu = raw_smp_processor_id(); | ||
| 4395 | s64 prev; | ||
| 4396 | u64 now; | ||
| 4397 | |||
| 4398 | now = cpu_clock(cpu); | ||
| 4399 | prev = local64_xchg(&event->hw.prev_count, now); | ||
| 4400 | local64_add(now - prev, &event->count); | ||
| 4401 | } | ||
| 4402 | |||
| 4403 | static int cpu_clock_perf_event_enable(struct perf_event *event) | ||
| 4404 | { | ||
| 4405 | struct hw_perf_event *hwc = &event->hw; | ||
| 4406 | int cpu = raw_smp_processor_id(); | ||
| 4407 | |||
| 4408 | local64_set(&hwc->prev_count, cpu_clock(cpu)); | ||
| 4409 | perf_swevent_start_hrtimer(event); | ||
| 4410 | |||
| 4411 | return 0; | ||
| 4412 | } | ||
| 4413 | |||
| 4414 | static void cpu_clock_perf_event_disable(struct perf_event *event) | ||
| 4415 | { | ||
| 4416 | perf_swevent_cancel_hrtimer(event); | ||
| 4417 | cpu_clock_perf_event_update(event); | ||
| 4418 | } | ||
| 4419 | |||
| 4420 | static void cpu_clock_perf_event_read(struct perf_event *event) | ||
| 4421 | { | ||
| 4422 | cpu_clock_perf_event_update(event); | ||
| 4423 | } | ||
| 4424 | |||
| 4425 | static const struct pmu perf_ops_cpu_clock = { | ||
| 4426 | .enable = cpu_clock_perf_event_enable, | ||
| 4427 | .disable = cpu_clock_perf_event_disable, | ||
| 4428 | .read = cpu_clock_perf_event_read, | ||
| 4429 | }; | ||
| 4430 | |||
| 4431 | /* | ||
| 4432 | * Software event: task time clock | ||
| 4433 | */ | ||
| 4434 | |||
| 4435 | static void task_clock_perf_event_update(struct perf_event *event, u64 now) | ||
| 4436 | { | ||
| 4437 | u64 prev; | ||
| 4438 | s64 delta; | ||
| 4439 | |||
| 4440 | prev = local64_xchg(&event->hw.prev_count, now); | ||
| 4441 | delta = now - prev; | ||
| 4442 | local64_add(delta, &event->count); | ||
| 4443 | } | ||
| 4444 | |||
| 4445 | static int task_clock_perf_event_enable(struct perf_event *event) | ||
| 4446 | { | ||
| 4447 | struct hw_perf_event *hwc = &event->hw; | ||
| 4448 | u64 now; | ||
| 4449 | |||
| 4450 | now = event->ctx->time; | ||
| 4451 | |||
| 4452 | local64_set(&hwc->prev_count, now); | ||
| 4453 | |||
| 4454 | perf_swevent_start_hrtimer(event); | ||
| 4455 | |||
| 4456 | return 0; | ||
| 4457 | } | ||
| 4458 | |||
| 4459 | static void task_clock_perf_event_disable(struct perf_event *event) | ||
| 4460 | { | ||
| 4461 | perf_swevent_cancel_hrtimer(event); | ||
| 4462 | task_clock_perf_event_update(event, event->ctx->time); | ||
| 4463 | |||
| 4464 | } | ||
| 4465 | |||
| 4466 | static void task_clock_perf_event_read(struct perf_event *event) | ||
| 4467 | { | ||
| 4468 | u64 time; | ||
| 4469 | |||
| 4470 | if (!in_nmi()) { | ||
| 4471 | update_context_time(event->ctx); | ||
| 4472 | time = event->ctx->time; | ||
| 4473 | } else { | ||
| 4474 | u64 now = perf_clock(); | ||
| 4475 | u64 delta = now - event->ctx->timestamp; | ||
| 4476 | time = event->ctx->time + delta; | ||
| 4477 | } | ||
| 4478 | |||
| 4479 | task_clock_perf_event_update(event, time); | ||
| 4480 | } | ||
| 4481 | |||
| 4482 | static const struct pmu perf_ops_task_clock = { | ||
| 4483 | .enable = task_clock_perf_event_enable, | ||
| 4484 | .disable = task_clock_perf_event_disable, | ||
| 4485 | .read = task_clock_perf_event_read, | ||
| 4486 | }; | ||
| 4487 | |||
| 4488 | /* Deref the hlist from the update side */ | 4596 | /* Deref the hlist from the update side */ | 
| 4489 | static inline struct swevent_hlist * | 4597 | static inline struct swevent_hlist * | 
| 4490 | swevent_hlist_deref(struct perf_cpu_context *cpuctx) | 4598 | swevent_hlist_deref(struct swevent_htable *swhash) | 
| 4491 | { | 4599 | { | 
| 4492 | return rcu_dereference_protected(cpuctx->swevent_hlist, | 4600 | return rcu_dereference_protected(swhash->swevent_hlist, | 
| 4493 | lockdep_is_held(&cpuctx->hlist_mutex)); | 4601 | lockdep_is_held(&swhash->hlist_mutex)); | 
| 4494 | } | 4602 | } | 
| 4495 | 4603 | ||
| 4496 | static void swevent_hlist_release_rcu(struct rcu_head *rcu_head) | 4604 | static void swevent_hlist_release_rcu(struct rcu_head *rcu_head) | 
| @@ -4501,27 +4609,27 @@ static void swevent_hlist_release_rcu(struct rcu_head *rcu_head) | |||
| 4501 | kfree(hlist); | 4609 | kfree(hlist); | 
| 4502 | } | 4610 | } | 
| 4503 | 4611 | ||
| 4504 | static void swevent_hlist_release(struct perf_cpu_context *cpuctx) | 4612 | static void swevent_hlist_release(struct swevent_htable *swhash) | 
| 4505 | { | 4613 | { | 
| 4506 | struct swevent_hlist *hlist = swevent_hlist_deref(cpuctx); | 4614 | struct swevent_hlist *hlist = swevent_hlist_deref(swhash); | 
| 4507 | 4615 | ||
| 4508 | if (!hlist) | 4616 | if (!hlist) | 
| 4509 | return; | 4617 | return; | 
| 4510 | 4618 | ||
| 4511 | rcu_assign_pointer(cpuctx->swevent_hlist, NULL); | 4619 | rcu_assign_pointer(swhash->swevent_hlist, NULL); | 
| 4512 | call_rcu(&hlist->rcu_head, swevent_hlist_release_rcu); | 4620 | call_rcu(&hlist->rcu_head, swevent_hlist_release_rcu); | 
| 4513 | } | 4621 | } | 
| 4514 | 4622 | ||
| 4515 | static void swevent_hlist_put_cpu(struct perf_event *event, int cpu) | 4623 | static void swevent_hlist_put_cpu(struct perf_event *event, int cpu) | 
| 4516 | { | 4624 | { | 
| 4517 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); | 4625 | struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); | 
| 4518 | 4626 | ||
| 4519 | mutex_lock(&cpuctx->hlist_mutex); | 4627 | mutex_lock(&swhash->hlist_mutex); | 
| 4520 | 4628 | ||
| 4521 | if (!--cpuctx->hlist_refcount) | 4629 | if (!--swhash->hlist_refcount) | 
| 4522 | swevent_hlist_release(cpuctx); | 4630 | swevent_hlist_release(swhash); | 
| 4523 | 4631 | ||
| 4524 | mutex_unlock(&cpuctx->hlist_mutex); | 4632 | mutex_unlock(&swhash->hlist_mutex); | 
| 4525 | } | 4633 | } | 
| 4526 | 4634 | ||
| 4527 | static void swevent_hlist_put(struct perf_event *event) | 4635 | static void swevent_hlist_put(struct perf_event *event) | 
| @@ -4539,12 +4647,12 @@ static void swevent_hlist_put(struct perf_event *event) | |||
| 4539 | 4647 | ||
| 4540 | static int swevent_hlist_get_cpu(struct perf_event *event, int cpu) | 4648 | static int swevent_hlist_get_cpu(struct perf_event *event, int cpu) | 
| 4541 | { | 4649 | { | 
| 4542 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); | 4650 | struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); | 
| 4543 | int err = 0; | 4651 | int err = 0; | 
| 4544 | 4652 | ||
| 4545 | mutex_lock(&cpuctx->hlist_mutex); | 4653 | mutex_lock(&swhash->hlist_mutex); | 
| 4546 | 4654 | ||
| 4547 | if (!swevent_hlist_deref(cpuctx) && cpu_online(cpu)) { | 4655 | if (!swevent_hlist_deref(swhash) && cpu_online(cpu)) { | 
| 4548 | struct swevent_hlist *hlist; | 4656 | struct swevent_hlist *hlist; | 
| 4549 | 4657 | ||
| 4550 | hlist = kzalloc(sizeof(*hlist), GFP_KERNEL); | 4658 | hlist = kzalloc(sizeof(*hlist), GFP_KERNEL); | 
| @@ -4552,11 +4660,11 @@ static int swevent_hlist_get_cpu(struct perf_event *event, int cpu) | |||
| 4552 | err = -ENOMEM; | 4660 | err = -ENOMEM; | 
| 4553 | goto exit; | 4661 | goto exit; | 
| 4554 | } | 4662 | } | 
| 4555 | rcu_assign_pointer(cpuctx->swevent_hlist, hlist); | 4663 | rcu_assign_pointer(swhash->swevent_hlist, hlist); | 
| 4556 | } | 4664 | } | 
| 4557 | cpuctx->hlist_refcount++; | 4665 | swhash->hlist_refcount++; | 
| 4558 | exit: | 4666 | exit: | 
| 4559 | mutex_unlock(&cpuctx->hlist_mutex); | 4667 | mutex_unlock(&swhash->hlist_mutex); | 
| 4560 | 4668 | ||
| 4561 | return err; | 4669 | return err; | 
| 4562 | } | 4670 | } | 
| @@ -4580,7 +4688,7 @@ static int swevent_hlist_get(struct perf_event *event) | |||
| 4580 | put_online_cpus(); | 4688 | put_online_cpus(); | 
| 4581 | 4689 | ||
| 4582 | return 0; | 4690 | return 0; | 
| 4583 | fail: | 4691 | fail: | 
| 4584 | for_each_possible_cpu(cpu) { | 4692 | for_each_possible_cpu(cpu) { | 
| 4585 | if (cpu == failed_cpu) | 4693 | if (cpu == failed_cpu) | 
| 4586 | break; | 4694 | break; | 
| @@ -4591,17 +4699,64 @@ static int swevent_hlist_get(struct perf_event *event) | |||
| 4591 | return err; | 4699 | return err; | 
| 4592 | } | 4700 | } | 
| 4593 | 4701 | ||
| 4594 | #ifdef CONFIG_EVENT_TRACING | 4702 | atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; | 
| 4703 | |||
| 4704 | static void sw_perf_event_destroy(struct perf_event *event) | ||
| 4705 | { | ||
| 4706 | u64 event_id = event->attr.config; | ||
| 4707 | |||
| 4708 | WARN_ON(event->parent); | ||
| 4709 | |||
| 4710 | atomic_dec(&perf_swevent_enabled[event_id]); | ||
| 4711 | swevent_hlist_put(event); | ||
| 4712 | } | ||
| 4713 | |||
| 4714 | static int perf_swevent_init(struct perf_event *event) | ||
| 4715 | { | ||
| 4716 | int event_id = event->attr.config; | ||
| 4595 | 4717 | ||
| 4596 | static const struct pmu perf_ops_tracepoint = { | 4718 | if (event->attr.type != PERF_TYPE_SOFTWARE) | 
| 4597 | .enable = perf_trace_enable, | 4719 | return -ENOENT; | 
| 4598 | .disable = perf_trace_disable, | 4720 | |
| 4599 | .start = perf_swevent_int, | 4721 | switch (event_id) { | 
| 4600 | .stop = perf_swevent_void, | 4722 | case PERF_COUNT_SW_CPU_CLOCK: | 
| 4723 | case PERF_COUNT_SW_TASK_CLOCK: | ||
| 4724 | return -ENOENT; | ||
| 4725 | |||
| 4726 | default: | ||
| 4727 | break; | ||
| 4728 | } | ||
| 4729 | |||
| 4730 | if (event_id > PERF_COUNT_SW_MAX) | ||
| 4731 | return -ENOENT; | ||
| 4732 | |||
| 4733 | if (!event->parent) { | ||
| 4734 | int err; | ||
| 4735 | |||
| 4736 | err = swevent_hlist_get(event); | ||
| 4737 | if (err) | ||
| 4738 | return err; | ||
| 4739 | |||
| 4740 | atomic_inc(&perf_swevent_enabled[event_id]); | ||
| 4741 | event->destroy = sw_perf_event_destroy; | ||
| 4742 | } | ||
| 4743 | |||
| 4744 | return 0; | ||
| 4745 | } | ||
| 4746 | |||
| 4747 | static struct pmu perf_swevent = { | ||
| 4748 | .task_ctx_nr = perf_sw_context, | ||
| 4749 | |||
| 4750 | .event_init = perf_swevent_init, | ||
| 4751 | .add = perf_swevent_add, | ||
| 4752 | .del = perf_swevent_del, | ||
| 4753 | .start = perf_swevent_start, | ||
| 4754 | .stop = perf_swevent_stop, | ||
| 4601 | .read = perf_swevent_read, | 4755 | .read = perf_swevent_read, | 
| 4602 | .unthrottle = perf_swevent_void, | ||
| 4603 | }; | 4756 | }; | 
| 4604 | 4757 | ||
| 4758 | #ifdef CONFIG_EVENT_TRACING | ||
| 4759 | |||
| 4605 | static int perf_tp_filter_match(struct perf_event *event, | 4760 | static int perf_tp_filter_match(struct perf_event *event, | 
| 4606 | struct perf_sample_data *data) | 4761 | struct perf_sample_data *data) | 
| 4607 | { | 4762 | { | 
| @@ -4645,7 +4800,7 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size, | |||
| 4645 | 4800 | ||
| 4646 | hlist_for_each_entry_rcu(event, node, head, hlist_entry) { | 4801 | hlist_for_each_entry_rcu(event, node, head, hlist_entry) { | 
| 4647 | if (perf_tp_event_match(event, &data, regs)) | 4802 | if (perf_tp_event_match(event, &data, regs)) | 
| 4648 | perf_swevent_add(event, count, 1, &data, regs); | 4803 | perf_swevent_event(event, count, 1, &data, regs); | 
| 4649 | } | 4804 | } | 
| 4650 | 4805 | ||
| 4651 | perf_swevent_put_recursion_context(rctx); | 4806 | perf_swevent_put_recursion_context(rctx); | 
| @@ -4657,10 +4812,13 @@ static void tp_perf_event_destroy(struct perf_event *event) | |||
| 4657 | perf_trace_destroy(event); | 4812 | perf_trace_destroy(event); | 
| 4658 | } | 4813 | } | 
| 4659 | 4814 | ||
| 4660 | static const struct pmu *tp_perf_event_init(struct perf_event *event) | 4815 | static int perf_tp_event_init(struct perf_event *event) | 
| 4661 | { | 4816 | { | 
| 4662 | int err; | 4817 | int err; | 
| 4663 | 4818 | ||
| 4819 | if (event->attr.type != PERF_TYPE_TRACEPOINT) | ||
| 4820 | return -ENOENT; | ||
| 4821 | |||
| 4664 | /* | 4822 | /* | 
| 4665 | * Raw tracepoint data is a severe data leak, only allow root to | 4823 | * Raw tracepoint data is a severe data leak, only allow root to | 
| 4666 | * have these. | 4824 | * have these. | 
| @@ -4668,15 +4826,31 @@ static const struct pmu *tp_perf_event_init(struct perf_event *event) | |||
| 4668 | if ((event->attr.sample_type & PERF_SAMPLE_RAW) && | 4826 | if ((event->attr.sample_type & PERF_SAMPLE_RAW) && | 
| 4669 | perf_paranoid_tracepoint_raw() && | 4827 | perf_paranoid_tracepoint_raw() && | 
| 4670 | !capable(CAP_SYS_ADMIN)) | 4828 | !capable(CAP_SYS_ADMIN)) | 
| 4671 | return ERR_PTR(-EPERM); | 4829 | return -EPERM; | 
| 4672 | 4830 | ||
| 4673 | err = perf_trace_init(event); | 4831 | err = perf_trace_init(event); | 
| 4674 | if (err) | 4832 | if (err) | 
| 4675 | return NULL; | 4833 | return err; | 
| 4676 | 4834 | ||
| 4677 | event->destroy = tp_perf_event_destroy; | 4835 | event->destroy = tp_perf_event_destroy; | 
| 4678 | 4836 | ||
| 4679 | return &perf_ops_tracepoint; | 4837 | return 0; | 
| 4838 | } | ||
| 4839 | |||
| 4840 | static struct pmu perf_tracepoint = { | ||
| 4841 | .task_ctx_nr = perf_sw_context, | ||
| 4842 | |||
| 4843 | .event_init = perf_tp_event_init, | ||
| 4844 | .add = perf_trace_add, | ||
| 4845 | .del = perf_trace_del, | ||
| 4846 | .start = perf_swevent_start, | ||
| 4847 | .stop = perf_swevent_stop, | ||
| 4848 | .read = perf_swevent_read, | ||
| 4849 | }; | ||
| 4850 | |||
| 4851 | static inline void perf_tp_register(void) | ||
| 4852 | { | ||
| 4853 | perf_pmu_register(&perf_tracepoint); | ||
| 4680 | } | 4854 | } | 
| 4681 | 4855 | ||
| 4682 | static int perf_event_set_filter(struct perf_event *event, void __user *arg) | 4856 | static int perf_event_set_filter(struct perf_event *event, void __user *arg) | 
| @@ -4704,9 +4878,8 @@ static void perf_event_free_filter(struct perf_event *event) | |||
| 4704 | 4878 | ||
| 4705 | #else | 4879 | #else | 
| 4706 | 4880 | ||
| 4707 | static const struct pmu *tp_perf_event_init(struct perf_event *event) | 4881 | static inline void perf_tp_register(void) | 
| 4708 | { | 4882 | { | 
| 4709 | return NULL; | ||
| 4710 | } | 4883 | } | 
| 4711 | 4884 | ||
| 4712 | static int perf_event_set_filter(struct perf_event *event, void __user *arg) | 4885 | static int perf_event_set_filter(struct perf_event *event, void __user *arg) | 
| @@ -4721,105 +4894,389 @@ static void perf_event_free_filter(struct perf_event *event) | |||
| 4721 | #endif /* CONFIG_EVENT_TRACING */ | 4894 | #endif /* CONFIG_EVENT_TRACING */ | 
| 4722 | 4895 | ||
| 4723 | #ifdef CONFIG_HAVE_HW_BREAKPOINT | 4896 | #ifdef CONFIG_HAVE_HW_BREAKPOINT | 
| 4724 | static void bp_perf_event_destroy(struct perf_event *event) | 4897 | void perf_bp_event(struct perf_event *bp, void *data) | 
| 4725 | { | 4898 | { | 
| 4726 | release_bp_slot(event); | 4899 | struct perf_sample_data sample; | 
| 4900 | struct pt_regs *regs = data; | ||
| 4901 | |||
| 4902 | perf_sample_data_init(&sample, bp->attr.bp_addr); | ||
| 4903 | |||
| 4904 | if (!bp->hw.state && !perf_exclude_event(bp, regs)) | ||
| 4905 | perf_swevent_event(bp, 1, 1, &sample, regs); | ||
| 4727 | } | 4906 | } | 
| 4907 | #endif | ||
| 4908 | |||
| 4909 | /* | ||
| 4910 | * hrtimer based swevent callback | ||
| 4911 | */ | ||
| 4728 | 4912 | ||
| 4729 | static const struct pmu *bp_perf_event_init(struct perf_event *bp) | 4913 | static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) | 
| 4730 | { | 4914 | { | 
| 4731 | int err; | 4915 | enum hrtimer_restart ret = HRTIMER_RESTART; | 
| 4916 | struct perf_sample_data data; | ||
| 4917 | struct pt_regs *regs; | ||
| 4918 | struct perf_event *event; | ||
| 4919 | u64 period; | ||
| 4732 | 4920 | ||
| 4733 | err = register_perf_hw_breakpoint(bp); | 4921 | event = container_of(hrtimer, struct perf_event, hw.hrtimer); | 
| 4734 | if (err) | 4922 | event->pmu->read(event); | 
| 4735 | return ERR_PTR(err); | ||
| 4736 | 4923 | ||
| 4737 | bp->destroy = bp_perf_event_destroy; | 4924 | perf_sample_data_init(&data, 0); | 
| 4925 | data.period = event->hw.last_period; | ||
| 4926 | regs = get_irq_regs(); | ||
| 4927 | |||
| 4928 | if (regs && !perf_exclude_event(event, regs)) { | ||
| 4929 | if (!(event->attr.exclude_idle && current->pid == 0)) | ||
| 4930 | if (perf_event_overflow(event, 0, &data, regs)) | ||
| 4931 | ret = HRTIMER_NORESTART; | ||
| 4932 | } | ||
| 4738 | 4933 | ||
| 4739 | return &perf_ops_bp; | 4934 | period = max_t(u64, 10000, event->hw.sample_period); | 
| 4935 | hrtimer_forward_now(hrtimer, ns_to_ktime(period)); | ||
| 4936 | |||
| 4937 | return ret; | ||
| 4740 | } | 4938 | } | 
| 4741 | 4939 | ||
| 4742 | void perf_bp_event(struct perf_event *bp, void *data) | 4940 | static void perf_swevent_start_hrtimer(struct perf_event *event) | 
| 4743 | { | 4941 | { | 
| 4744 | struct perf_sample_data sample; | 4942 | struct hw_perf_event *hwc = &event->hw; | 
| 4745 | struct pt_regs *regs = data; | ||
| 4746 | 4943 | ||
| 4747 | perf_sample_data_init(&sample, bp->attr.bp_addr); | 4944 | hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | 
| 4945 | hwc->hrtimer.function = perf_swevent_hrtimer; | ||
| 4946 | if (hwc->sample_period) { | ||
| 4947 | s64 period = local64_read(&hwc->period_left); | ||
| 4748 | 4948 | ||
| 4749 | if (!perf_exclude_event(bp, regs)) | 4949 | if (period) { | 
| 4750 | perf_swevent_add(bp, 1, 1, &sample, regs); | 4950 | if (period < 0) | 
| 4951 | period = 10000; | ||
| 4952 | |||
| 4953 | local64_set(&hwc->period_left, 0); | ||
| 4954 | } else { | ||
| 4955 | period = max_t(u64, 10000, hwc->sample_period); | ||
| 4956 | } | ||
| 4957 | __hrtimer_start_range_ns(&hwc->hrtimer, | ||
| 4958 | ns_to_ktime(period), 0, | ||
| 4959 | HRTIMER_MODE_REL_PINNED, 0); | ||
| 4960 | } | ||
| 4751 | } | 4961 | } | 
| 4752 | #else | 4962 | |
| 4753 | static const struct pmu *bp_perf_event_init(struct perf_event *bp) | 4963 | static void perf_swevent_cancel_hrtimer(struct perf_event *event) | 
| 4754 | { | 4964 | { | 
| 4755 | return NULL; | 4965 | struct hw_perf_event *hwc = &event->hw; | 
| 4966 | |||
| 4967 | if (hwc->sample_period) { | ||
| 4968 | ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer); | ||
| 4969 | local64_set(&hwc->period_left, ktime_to_ns(remaining)); | ||
| 4970 | |||
| 4971 | hrtimer_cancel(&hwc->hrtimer); | ||
| 4972 | } | ||
| 4756 | } | 4973 | } | 
| 4757 | 4974 | ||
| 4758 | void perf_bp_event(struct perf_event *bp, void *regs) | 4975 | /* | 
| 4976 | * Software event: cpu wall time clock | ||
| 4977 | */ | ||
| 4978 | |||
| 4979 | static void cpu_clock_event_update(struct perf_event *event) | ||
| 4759 | { | 4980 | { | 
| 4981 | s64 prev; | ||
| 4982 | u64 now; | ||
| 4983 | |||
| 4984 | now = local_clock(); | ||
| 4985 | prev = local64_xchg(&event->hw.prev_count, now); | ||
| 4986 | local64_add(now - prev, &event->count); | ||
| 4760 | } | 4987 | } | 
| 4761 | #endif | ||
| 4762 | 4988 | ||
| 4763 | atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; | 4989 | static void cpu_clock_event_start(struct perf_event *event, int flags) | 
| 4990 | { | ||
| 4991 | local64_set(&event->hw.prev_count, local_clock()); | ||
| 4992 | perf_swevent_start_hrtimer(event); | ||
| 4993 | } | ||
| 4764 | 4994 | ||
| 4765 | static void sw_perf_event_destroy(struct perf_event *event) | 4995 | static void cpu_clock_event_stop(struct perf_event *event, int flags) | 
| 4766 | { | 4996 | { | 
| 4767 | u64 event_id = event->attr.config; | 4997 | perf_swevent_cancel_hrtimer(event); | 
| 4998 | cpu_clock_event_update(event); | ||
| 4999 | } | ||
| 4768 | 5000 | ||
| 4769 | WARN_ON(event->parent); | 5001 | static int cpu_clock_event_add(struct perf_event *event, int flags) | 
| 5002 | { | ||
| 5003 | if (flags & PERF_EF_START) | ||
| 5004 | cpu_clock_event_start(event, flags); | ||
| 4770 | 5005 | ||
| 4771 | atomic_dec(&perf_swevent_enabled[event_id]); | 5006 | return 0; | 
| 4772 | swevent_hlist_put(event); | ||
| 4773 | } | 5007 | } | 
| 4774 | 5008 | ||
| 4775 | static const struct pmu *sw_perf_event_init(struct perf_event *event) | 5009 | static void cpu_clock_event_del(struct perf_event *event, int flags) | 
| 4776 | { | 5010 | { | 
| 4777 | const struct pmu *pmu = NULL; | 5011 | cpu_clock_event_stop(event, flags); | 
| 4778 | u64 event_id = event->attr.config; | 5012 | } | 
| 5013 | |||
| 5014 | static void cpu_clock_event_read(struct perf_event *event) | ||
| 5015 | { | ||
| 5016 | cpu_clock_event_update(event); | ||
| 5017 | } | ||
| 5018 | |||
| 5019 | static int cpu_clock_event_init(struct perf_event *event) | ||
| 5020 | { | ||
| 5021 | if (event->attr.type != PERF_TYPE_SOFTWARE) | ||
| 5022 | return -ENOENT; | ||
| 5023 | |||
| 5024 | if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK) | ||
| 5025 | return -ENOENT; | ||
| 5026 | |||
| 5027 | return 0; | ||
| 5028 | } | ||
| 5029 | |||
| 5030 | static struct pmu perf_cpu_clock = { | ||
| 5031 | .task_ctx_nr = perf_sw_context, | ||
| 5032 | |||
| 5033 | .event_init = cpu_clock_event_init, | ||
| 5034 | .add = cpu_clock_event_add, | ||
| 5035 | .del = cpu_clock_event_del, | ||
| 5036 | .start = cpu_clock_event_start, | ||
| 5037 | .stop = cpu_clock_event_stop, | ||
| 5038 | .read = cpu_clock_event_read, | ||
| 5039 | }; | ||
| 5040 | |||
| 5041 | /* | ||
| 5042 | * Software event: task time clock | ||
| 5043 | */ | ||
| 5044 | |||
| 5045 | static void task_clock_event_update(struct perf_event *event, u64 now) | ||
| 5046 | { | ||
| 5047 | u64 prev; | ||
| 5048 | s64 delta; | ||
| 5049 | |||
| 5050 | prev = local64_xchg(&event->hw.prev_count, now); | ||
| 5051 | delta = now - prev; | ||
| 5052 | local64_add(delta, &event->count); | ||
| 5053 | } | ||
| 5054 | |||
| 5055 | static void task_clock_event_start(struct perf_event *event, int flags) | ||
| 5056 | { | ||
| 5057 | local64_set(&event->hw.prev_count, event->ctx->time); | ||
| 5058 | perf_swevent_start_hrtimer(event); | ||
| 5059 | } | ||
| 5060 | |||
| 5061 | static void task_clock_event_stop(struct perf_event *event, int flags) | ||
| 5062 | { | ||
| 5063 | perf_swevent_cancel_hrtimer(event); | ||
| 5064 | task_clock_event_update(event, event->ctx->time); | ||
| 5065 | } | ||
| 5066 | |||
| 5067 | static int task_clock_event_add(struct perf_event *event, int flags) | ||
| 5068 | { | ||
| 5069 | if (flags & PERF_EF_START) | ||
| 5070 | task_clock_event_start(event, flags); | ||
| 5071 | |||
| 5072 | return 0; | ||
| 5073 | } | ||
| 5074 | |||
| 5075 | static void task_clock_event_del(struct perf_event *event, int flags) | ||
| 5076 | { | ||
| 5077 | task_clock_event_stop(event, PERF_EF_UPDATE); | ||
| 5078 | } | ||
| 5079 | |||
| 5080 | static void task_clock_event_read(struct perf_event *event) | ||
| 5081 | { | ||
| 5082 | u64 time; | ||
| 5083 | |||
| 5084 | if (!in_nmi()) { | ||
| 5085 | update_context_time(event->ctx); | ||
| 5086 | time = event->ctx->time; | ||
| 5087 | } else { | ||
| 5088 | u64 now = perf_clock(); | ||
| 5089 | u64 delta = now - event->ctx->timestamp; | ||
| 5090 | time = event->ctx->time + delta; | ||
| 5091 | } | ||
| 5092 | |||
| 5093 | task_clock_event_update(event, time); | ||
| 5094 | } | ||
| 5095 | |||
| 5096 | static int task_clock_event_init(struct perf_event *event) | ||
| 5097 | { | ||
| 5098 | if (event->attr.type != PERF_TYPE_SOFTWARE) | ||
| 5099 | return -ENOENT; | ||
| 5100 | |||
| 5101 | if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK) | ||
| 5102 | return -ENOENT; | ||
| 5103 | |||
| 5104 | return 0; | ||
| 5105 | } | ||
| 5106 | |||
| 5107 | static struct pmu perf_task_clock = { | ||
| 5108 | .task_ctx_nr = perf_sw_context, | ||
| 5109 | |||
| 5110 | .event_init = task_clock_event_init, | ||
| 5111 | .add = task_clock_event_add, | ||
| 5112 | .del = task_clock_event_del, | ||
| 5113 | .start = task_clock_event_start, | ||
| 5114 | .stop = task_clock_event_stop, | ||
| 5115 | .read = task_clock_event_read, | ||
| 5116 | }; | ||
| 5117 | |||
| 5118 | static void perf_pmu_nop_void(struct pmu *pmu) | ||
| 5119 | { | ||
| 5120 | } | ||
| 5121 | |||
| 5122 | static int perf_pmu_nop_int(struct pmu *pmu) | ||
| 5123 | { | ||
| 5124 | return 0; | ||
| 5125 | } | ||
| 5126 | |||
| 5127 | static void perf_pmu_start_txn(struct pmu *pmu) | ||
| 5128 | { | ||
| 5129 | perf_pmu_disable(pmu); | ||
| 5130 | } | ||
| 5131 | |||
| 5132 | static int perf_pmu_commit_txn(struct pmu *pmu) | ||
| 5133 | { | ||
| 5134 | perf_pmu_enable(pmu); | ||
| 5135 | return 0; | ||
| 5136 | } | ||
| 4779 | 5137 | ||
| 5138 | static void perf_pmu_cancel_txn(struct pmu *pmu) | ||
| 5139 | { | ||
| 5140 | perf_pmu_enable(pmu); | ||
| 5141 | } | ||
| 5142 | |||
| 5143 | /* | ||
| 5144 | * Ensures all contexts with the same task_ctx_nr have the same | ||
| 5145 | * pmu_cpu_context too. | ||
| 5146 | */ | ||
| 5147 | static void *find_pmu_context(int ctxn) | ||
| 5148 | { | ||
| 5149 | struct pmu *pmu; | ||
| 5150 | |||
| 5151 | if (ctxn < 0) | ||
| 5152 | return NULL; | ||
| 5153 | |||
| 5154 | list_for_each_entry(pmu, &pmus, entry) { | ||
| 5155 | if (pmu->task_ctx_nr == ctxn) | ||
| 5156 | return pmu->pmu_cpu_context; | ||
| 5157 | } | ||
| 5158 | |||
| 5159 | return NULL; | ||
| 5160 | } | ||
| 5161 | |||
| 5162 | static void free_pmu_context(void * __percpu cpu_context) | ||
| 5163 | { | ||
| 5164 | struct pmu *pmu; | ||
| 5165 | |||
| 5166 | mutex_lock(&pmus_lock); | ||
| 4780 | /* | 5167 | /* | 
| 4781 | * Software events (currently) can't in general distinguish | 5168 | * Like a real lame refcount. | 
| 4782 | * between user, kernel and hypervisor events. | ||
| 4783 | * However, context switches and cpu migrations are considered | ||
| 4784 | * to be kernel events, and page faults are never hypervisor | ||
| 4785 | * events. | ||
| 4786 | */ | 5169 | */ | 
| 4787 | switch (event_id) { | 5170 | list_for_each_entry(pmu, &pmus, entry) { | 
| 4788 | case PERF_COUNT_SW_CPU_CLOCK: | 5171 | if (pmu->pmu_cpu_context == cpu_context) | 
| 4789 | pmu = &perf_ops_cpu_clock; | 5172 | goto out; | 
| 5173 | } | ||
| 4790 | 5174 | ||
| 4791 | break; | 5175 | free_percpu(cpu_context); | 
| 4792 | case PERF_COUNT_SW_TASK_CLOCK: | 5176 | out: | 
| 4793 | /* | 5177 | mutex_unlock(&pmus_lock); | 
| 4794 | * If the user instantiates this as a per-cpu event, | 5178 | } | 
| 4795 | * use the cpu_clock event instead. | ||
| 4796 | */ | ||
| 4797 | if (event->ctx->task) | ||
| 4798 | pmu = &perf_ops_task_clock; | ||
| 4799 | else | ||
| 4800 | pmu = &perf_ops_cpu_clock; | ||
| 4801 | 5179 | ||
| 4802 | break; | 5180 | int perf_pmu_register(struct pmu *pmu) | 
| 4803 | case PERF_COUNT_SW_PAGE_FAULTS: | 5181 | { | 
| 4804 | case PERF_COUNT_SW_PAGE_FAULTS_MIN: | 5182 | int cpu, ret; | 
| 4805 | case PERF_COUNT_SW_PAGE_FAULTS_MAJ: | ||
| 4806 | case PERF_COUNT_SW_CONTEXT_SWITCHES: | ||
| 4807 | case PERF_COUNT_SW_CPU_MIGRATIONS: | ||
| 4808 | case PERF_COUNT_SW_ALIGNMENT_FAULTS: | ||
| 4809 | case PERF_COUNT_SW_EMULATION_FAULTS: | ||
| 4810 | if (!event->parent) { | ||
| 4811 | int err; | ||
| 4812 | |||
| 4813 | err = swevent_hlist_get(event); | ||
| 4814 | if (err) | ||
| 4815 | return ERR_PTR(err); | ||
| 4816 | 5183 | ||
| 4817 | atomic_inc(&perf_swevent_enabled[event_id]); | 5184 | mutex_lock(&pmus_lock); | 
| 4818 | event->destroy = sw_perf_event_destroy; | 5185 | ret = -ENOMEM; | 
| 5186 | pmu->pmu_disable_count = alloc_percpu(int); | ||
| 5187 | if (!pmu->pmu_disable_count) | ||
| 5188 | goto unlock; | ||
| 5189 | |||
| 5190 | pmu->pmu_cpu_context = find_pmu_context(pmu->task_ctx_nr); | ||
| 5191 | if (pmu->pmu_cpu_context) | ||
| 5192 | goto got_cpu_context; | ||
| 5193 | |||
| 5194 | pmu->pmu_cpu_context = alloc_percpu(struct perf_cpu_context); | ||
| 5195 | if (!pmu->pmu_cpu_context) | ||
| 5196 | goto free_pdc; | ||
| 5197 | |||
| 5198 | for_each_possible_cpu(cpu) { | ||
| 5199 | struct perf_cpu_context *cpuctx; | ||
| 5200 | |||
| 5201 | cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); | ||
| 5202 | __perf_event_init_context(&cpuctx->ctx); | ||
| 5203 | cpuctx->ctx.type = cpu_context; | ||
| 5204 | cpuctx->ctx.pmu = pmu; | ||
| 5205 | cpuctx->jiffies_interval = 1; | ||
| 5206 | INIT_LIST_HEAD(&cpuctx->rotation_list); | ||
| 5207 | } | ||
| 5208 | |||
| 5209 | got_cpu_context: | ||
| 5210 | if (!pmu->start_txn) { | ||
| 5211 | if (pmu->pmu_enable) { | ||
| 5212 | /* | ||
| 5213 | * If we have pmu_enable/pmu_disable calls, install | ||
| 5214 | * transaction stubs that use that to try and batch | ||
| 5215 | * hardware accesses. | ||
| 5216 | */ | ||
| 5217 | pmu->start_txn = perf_pmu_start_txn; | ||
| 5218 | pmu->commit_txn = perf_pmu_commit_txn; | ||
| 5219 | pmu->cancel_txn = perf_pmu_cancel_txn; | ||
| 5220 | } else { | ||
| 5221 | pmu->start_txn = perf_pmu_nop_void; | ||
| 5222 | pmu->commit_txn = perf_pmu_nop_int; | ||
| 5223 | pmu->cancel_txn = perf_pmu_nop_void; | ||
| 5224 | } | ||
| 5225 | } | ||
| 5226 | |||
| 5227 | if (!pmu->pmu_enable) { | ||
| 5228 | pmu->pmu_enable = perf_pmu_nop_void; | ||
| 5229 | pmu->pmu_disable = perf_pmu_nop_void; | ||
| 5230 | } | ||
| 5231 | |||
| 5232 | list_add_rcu(&pmu->entry, &pmus); | ||
| 5233 | ret = 0; | ||
| 5234 | unlock: | ||
| 5235 | mutex_unlock(&pmus_lock); | ||
| 5236 | |||
| 5237 | return ret; | ||
| 5238 | |||
| 5239 | free_pdc: | ||
| 5240 | free_percpu(pmu->pmu_disable_count); | ||
| 5241 | goto unlock; | ||
| 5242 | } | ||
| 5243 | |||
| 5244 | void perf_pmu_unregister(struct pmu *pmu) | ||
| 5245 | { | ||
| 5246 | mutex_lock(&pmus_lock); | ||
| 5247 | list_del_rcu(&pmu->entry); | ||
| 5248 | mutex_unlock(&pmus_lock); | ||
| 5249 | |||
| 5250 | /* | ||
| 5251 | * We dereference the pmu list under both SRCU and regular RCU, so | ||
| 5252 | * synchronize against both of those. | ||
| 5253 | */ | ||
| 5254 | synchronize_srcu(&pmus_srcu); | ||
| 5255 | synchronize_rcu(); | ||
| 5256 | |||
| 5257 | free_percpu(pmu->pmu_disable_count); | ||
| 5258 | free_pmu_context(pmu->pmu_cpu_context); | ||
| 5259 | } | ||
| 5260 | |||
| 5261 | struct pmu *perf_init_event(struct perf_event *event) | ||
| 5262 | { | ||
| 5263 | struct pmu *pmu = NULL; | ||
| 5264 | int idx; | ||
| 5265 | |||
| 5266 | idx = srcu_read_lock(&pmus_srcu); | ||
| 5267 | list_for_each_entry_rcu(pmu, &pmus, entry) { | ||
| 5268 | int ret = pmu->event_init(event); | ||
| 5269 | if (!ret) | ||
| 5270 | goto unlock; | ||
| 5271 | |||
| 5272 | if (ret != -ENOENT) { | ||
| 5273 | pmu = ERR_PTR(ret); | ||
| 5274 | goto unlock; | ||
| 4819 | } | 5275 | } | 
| 4820 | pmu = &perf_ops_generic; | ||
| 4821 | break; | ||
| 4822 | } | 5276 | } | 
| 5277 | pmu = ERR_PTR(-ENOENT); | ||
| 5278 | unlock: | ||
| 5279 | srcu_read_unlock(&pmus_srcu, idx); | ||
| 4823 | 5280 | ||
| 4824 | return pmu; | 5281 | return pmu; | 
| 4825 | } | 5282 | } | 
| @@ -4828,20 +5285,17 @@ static const struct pmu *sw_perf_event_init(struct perf_event *event) | |||
| 4828 | * Allocate and initialize a event structure | 5285 | * Allocate and initialize a event structure | 
| 4829 | */ | 5286 | */ | 
| 4830 | static struct perf_event * | 5287 | static struct perf_event * | 
| 4831 | perf_event_alloc(struct perf_event_attr *attr, | 5288 | perf_event_alloc(struct perf_event_attr *attr, int cpu, | 
| 4832 | int cpu, | ||
| 4833 | struct perf_event_context *ctx, | ||
| 4834 | struct perf_event *group_leader, | 5289 | struct perf_event *group_leader, | 
| 4835 | struct perf_event *parent_event, | 5290 | struct perf_event *parent_event, | 
| 4836 | perf_overflow_handler_t overflow_handler, | 5291 | perf_overflow_handler_t overflow_handler) | 
| 4837 | gfp_t gfpflags) | ||
| 4838 | { | 5292 | { | 
| 4839 | const struct pmu *pmu; | 5293 | struct pmu *pmu; | 
| 4840 | struct perf_event *event; | 5294 | struct perf_event *event; | 
| 4841 | struct hw_perf_event *hwc; | 5295 | struct hw_perf_event *hwc; | 
| 4842 | long err; | 5296 | long err; | 
| 4843 | 5297 | ||
| 4844 | event = kzalloc(sizeof(*event), gfpflags); | 5298 | event = kzalloc(sizeof(*event), GFP_KERNEL); | 
| 4845 | if (!event) | 5299 | if (!event) | 
| 4846 | return ERR_PTR(-ENOMEM); | 5300 | return ERR_PTR(-ENOMEM); | 
| 4847 | 5301 | ||
| @@ -4866,7 +5320,6 @@ perf_event_alloc(struct perf_event_attr *attr, | |||
| 4866 | event->attr = *attr; | 5320 | event->attr = *attr; | 
| 4867 | event->group_leader = group_leader; | 5321 | event->group_leader = group_leader; | 
| 4868 | event->pmu = NULL; | 5322 | event->pmu = NULL; | 
| 4869 | event->ctx = ctx; | ||
| 4870 | event->oncpu = -1; | 5323 | event->oncpu = -1; | 
| 4871 | 5324 | ||
| 4872 | event->parent = parent_event; | 5325 | event->parent = parent_event; | 
| @@ -4900,29 +5353,8 @@ perf_event_alloc(struct perf_event_attr *attr, | |||
| 4900 | if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP)) | 5353 | if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP)) | 
| 4901 | goto done; | 5354 | goto done; | 
| 4902 | 5355 | ||
| 4903 | switch (attr->type) { | 5356 | pmu = perf_init_event(event); | 
| 4904 | case PERF_TYPE_RAW: | ||
| 4905 | case PERF_TYPE_HARDWARE: | ||
| 4906 | case PERF_TYPE_HW_CACHE: | ||
| 4907 | pmu = hw_perf_event_init(event); | ||
| 4908 | break; | ||
| 4909 | 5357 | ||
| 4910 | case PERF_TYPE_SOFTWARE: | ||
| 4911 | pmu = sw_perf_event_init(event); | ||
| 4912 | break; | ||
| 4913 | |||
| 4914 | case PERF_TYPE_TRACEPOINT: | ||
| 4915 | pmu = tp_perf_event_init(event); | ||
| 4916 | break; | ||
| 4917 | |||
| 4918 | case PERF_TYPE_BREAKPOINT: | ||
| 4919 | pmu = bp_perf_event_init(event); | ||
| 4920 | break; | ||
| 4921 | |||
| 4922 | |||
| 4923 | default: | ||
| 4924 | break; | ||
| 4925 | } | ||
| 4926 | done: | 5358 | done: | 
| 4927 | err = 0; | 5359 | err = 0; | 
| 4928 | if (!pmu) | 5360 | if (!pmu) | 
| @@ -4947,6 +5379,13 @@ done: | |||
| 4947 | atomic_inc(&nr_comm_events); | 5379 | atomic_inc(&nr_comm_events); | 
| 4948 | if (event->attr.task) | 5380 | if (event->attr.task) | 
| 4949 | atomic_inc(&nr_task_events); | 5381 | atomic_inc(&nr_task_events); | 
| 5382 | if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) { | ||
| 5383 | err = get_callchain_buffers(); | ||
| 5384 | if (err) { | ||
| 5385 | free_event(event); | ||
| 5386 | return ERR_PTR(err); | ||
| 5387 | } | ||
| 5388 | } | ||
| 4950 | } | 5389 | } | 
| 4951 | 5390 | ||
| 4952 | return event; | 5391 | return event; | 
| @@ -5094,12 +5533,16 @@ SYSCALL_DEFINE5(perf_event_open, | |||
| 5094 | struct perf_event_attr __user *, attr_uptr, | 5533 | struct perf_event_attr __user *, attr_uptr, | 
| 5095 | pid_t, pid, int, cpu, int, group_fd, unsigned long, flags) | 5534 | pid_t, pid, int, cpu, int, group_fd, unsigned long, flags) | 
| 5096 | { | 5535 | { | 
| 5097 | struct perf_event *event, *group_leader = NULL, *output_event = NULL; | 5536 | struct perf_event *group_leader = NULL, *output_event = NULL; | 
| 5537 | struct perf_event *event, *sibling; | ||
| 5098 | struct perf_event_attr attr; | 5538 | struct perf_event_attr attr; | 
| 5099 | struct perf_event_context *ctx; | 5539 | struct perf_event_context *ctx; | 
| 5100 | struct file *event_file = NULL; | 5540 | struct file *event_file = NULL; | 
| 5101 | struct file *group_file = NULL; | 5541 | struct file *group_file = NULL; | 
| 5542 | struct task_struct *task = NULL; | ||
| 5543 | struct pmu *pmu; | ||
| 5102 | int event_fd; | 5544 | int event_fd; | 
| 5545 | int move_group = 0; | ||
| 5103 | int fput_needed = 0; | 5546 | int fput_needed = 0; | 
| 5104 | int err; | 5547 | int err; | 
| 5105 | 5548 | ||
| @@ -5125,20 +5568,11 @@ SYSCALL_DEFINE5(perf_event_open, | |||
| 5125 | if (event_fd < 0) | 5568 | if (event_fd < 0) | 
| 5126 | return event_fd; | 5569 | return event_fd; | 
| 5127 | 5570 | ||
| 5128 | /* | ||
| 5129 | * Get the target context (task or percpu): | ||
| 5130 | */ | ||
| 5131 | ctx = find_get_context(pid, cpu); | ||
| 5132 | if (IS_ERR(ctx)) { | ||
| 5133 | err = PTR_ERR(ctx); | ||
| 5134 | goto err_fd; | ||
| 5135 | } | ||
| 5136 | |||
| 5137 | if (group_fd != -1) { | 5571 | if (group_fd != -1) { | 
| 5138 | group_leader = perf_fget_light(group_fd, &fput_needed); | 5572 | group_leader = perf_fget_light(group_fd, &fput_needed); | 
| 5139 | if (IS_ERR(group_leader)) { | 5573 | if (IS_ERR(group_leader)) { | 
| 5140 | err = PTR_ERR(group_leader); | 5574 | err = PTR_ERR(group_leader); | 
| 5141 | goto err_put_context; | 5575 | goto err_fd; | 
| 5142 | } | 5576 | } | 
| 5143 | group_file = group_leader->filp; | 5577 | group_file = group_leader->filp; | 
| 5144 | if (flags & PERF_FLAG_FD_OUTPUT) | 5578 | if (flags & PERF_FLAG_FD_OUTPUT) | 
| @@ -5147,6 +5581,58 @@ SYSCALL_DEFINE5(perf_event_open, | |||
| 5147 | group_leader = NULL; | 5581 | group_leader = NULL; | 
| 5148 | } | 5582 | } | 
| 5149 | 5583 | ||
| 5584 | event = perf_event_alloc(&attr, cpu, group_leader, NULL, NULL); | ||
| 5585 | if (IS_ERR(event)) { | ||
| 5586 | err = PTR_ERR(event); | ||
| 5587 | goto err_fd; | ||
| 5588 | } | ||
| 5589 | |||
| 5590 | /* | ||
| 5591 | * Special case software events and allow them to be part of | ||
| 5592 | * any hardware group. | ||
| 5593 | */ | ||
| 5594 | pmu = event->pmu; | ||
| 5595 | |||
| 5596 | if (group_leader && | ||
| 5597 | (is_software_event(event) != is_software_event(group_leader))) { | ||
| 5598 | if (is_software_event(event)) { | ||
| 5599 | /* | ||
| 5600 | * If event and group_leader are not both a software | ||
| 5601 | * event, and event is, then group leader is not. | ||
| 5602 | * | ||
| 5603 | * Allow the addition of software events to !software | ||
| 5604 | * groups, this is safe because software events never | ||
| 5605 | * fail to schedule. | ||
| 5606 | */ | ||
| 5607 | pmu = group_leader->pmu; | ||
| 5608 | } else if (is_software_event(group_leader) && | ||
| 5609 | (group_leader->group_flags & PERF_GROUP_SOFTWARE)) { | ||
| 5610 | /* | ||
| 5611 | * In case the group is a pure software group, and we | ||
| 5612 | * try to add a hardware event, move the whole group to | ||
| 5613 | * the hardware context. | ||
| 5614 | */ | ||
| 5615 | move_group = 1; | ||
| 5616 | } | ||
| 5617 | } | ||
| 5618 | |||
| 5619 | if (pid != -1) { | ||
| 5620 | task = find_lively_task_by_vpid(pid); | ||
| 5621 | if (IS_ERR(task)) { | ||
| 5622 | err = PTR_ERR(task); | ||
| 5623 | goto err_group_fd; | ||
| 5624 | } | ||
| 5625 | } | ||
| 5626 | |||
| 5627 | /* | ||
| 5628 | * Get the target context (task or percpu): | ||
| 5629 | */ | ||
| 5630 | ctx = find_get_context(pmu, task, cpu); | ||
| 5631 | if (IS_ERR(ctx)) { | ||
| 5632 | err = PTR_ERR(ctx); | ||
| 5633 | goto err_group_fd; | ||
| 5634 | } | ||
| 5635 | |||
| 5150 | /* | 5636 | /* | 
| 5151 | * Look up the group leader (we will attach this event to it): | 5637 | * Look up the group leader (we will attach this event to it): | 
| 5152 | */ | 5638 | */ | 
| @@ -5158,42 +5644,66 @@ SYSCALL_DEFINE5(perf_event_open, | |||
| 5158 | * becoming part of another group-sibling): | 5644 | * becoming part of another group-sibling): | 
| 5159 | */ | 5645 | */ | 
| 5160 | if (group_leader->group_leader != group_leader) | 5646 | if (group_leader->group_leader != group_leader) | 
| 5161 | goto err_put_context; | 5647 | goto err_context; | 
| 5162 | /* | 5648 | /* | 
| 5163 | * Do not allow to attach to a group in a different | 5649 | * Do not allow to attach to a group in a different | 
| 5164 | * task or CPU context: | 5650 | * task or CPU context: | 
| 5165 | */ | 5651 | */ | 
| 5166 | if (group_leader->ctx != ctx) | 5652 | if (move_group) { | 
| 5167 | goto err_put_context; | 5653 | if (group_leader->ctx->type != ctx->type) | 
| 5654 | goto err_context; | ||
| 5655 | } else { | ||
| 5656 | if (group_leader->ctx != ctx) | ||
| 5657 | goto err_context; | ||
| 5658 | } | ||
| 5659 | |||
| 5168 | /* | 5660 | /* | 
| 5169 | * Only a group leader can be exclusive or pinned | 5661 | * Only a group leader can be exclusive or pinned | 
| 5170 | */ | 5662 | */ | 
| 5171 | if (attr.exclusive || attr.pinned) | 5663 | if (attr.exclusive || attr.pinned) | 
| 5172 | goto err_put_context; | 5664 | goto err_context; | 
| 5173 | } | ||
| 5174 | |||
| 5175 | event = perf_event_alloc(&attr, cpu, ctx, group_leader, | ||
| 5176 | NULL, NULL, GFP_KERNEL); | ||
| 5177 | if (IS_ERR(event)) { | ||
| 5178 | err = PTR_ERR(event); | ||
| 5179 | goto err_put_context; | ||
| 5180 | } | 5665 | } | 
| 5181 | 5666 | ||
| 5182 | if (output_event) { | 5667 | if (output_event) { | 
| 5183 | err = perf_event_set_output(event, output_event); | 5668 | err = perf_event_set_output(event, output_event); | 
| 5184 | if (err) | 5669 | if (err) | 
| 5185 | goto err_free_put_context; | 5670 | goto err_context; | 
| 5186 | } | 5671 | } | 
| 5187 | 5672 | ||
| 5188 | event_file = anon_inode_getfile("[perf_event]", &perf_fops, event, O_RDWR); | 5673 | event_file = anon_inode_getfile("[perf_event]", &perf_fops, event, O_RDWR); | 
| 5189 | if (IS_ERR(event_file)) { | 5674 | if (IS_ERR(event_file)) { | 
| 5190 | err = PTR_ERR(event_file); | 5675 | err = PTR_ERR(event_file); | 
| 5191 | goto err_free_put_context; | 5676 | goto err_context; | 
| 5677 | } | ||
| 5678 | |||
| 5679 | if (move_group) { | ||
| 5680 | struct perf_event_context *gctx = group_leader->ctx; | ||
| 5681 | |||
| 5682 | mutex_lock(&gctx->mutex); | ||
| 5683 | perf_event_remove_from_context(group_leader); | ||
| 5684 | list_for_each_entry(sibling, &group_leader->sibling_list, | ||
| 5685 | group_entry) { | ||
| 5686 | perf_event_remove_from_context(sibling); | ||
| 5687 | put_ctx(gctx); | ||
| 5688 | } | ||
| 5689 | mutex_unlock(&gctx->mutex); | ||
| 5690 | put_ctx(gctx); | ||
| 5192 | } | 5691 | } | 
| 5193 | 5692 | ||
| 5194 | event->filp = event_file; | 5693 | event->filp = event_file; | 
| 5195 | WARN_ON_ONCE(ctx->parent_ctx); | 5694 | WARN_ON_ONCE(ctx->parent_ctx); | 
| 5196 | mutex_lock(&ctx->mutex); | 5695 | mutex_lock(&ctx->mutex); | 
| 5696 | |||
| 5697 | if (move_group) { | ||
| 5698 | perf_install_in_context(ctx, group_leader, cpu); | ||
| 5699 | get_ctx(ctx); | ||
| 5700 | list_for_each_entry(sibling, &group_leader->sibling_list, | ||
| 5701 | group_entry) { | ||
| 5702 | perf_install_in_context(ctx, sibling, cpu); | ||
| 5703 | get_ctx(ctx); | ||
| 5704 | } | ||
| 5705 | } | ||
| 5706 | |||
| 5197 | perf_install_in_context(ctx, event, cpu); | 5707 | perf_install_in_context(ctx, event, cpu); | 
| 5198 | ++ctx->generation; | 5708 | ++ctx->generation; | 
| 5199 | mutex_unlock(&ctx->mutex); | 5709 | mutex_unlock(&ctx->mutex); | 
| @@ -5214,11 +5724,11 @@ SYSCALL_DEFINE5(perf_event_open, | |||
| 5214 | fd_install(event_fd, event_file); | 5724 | fd_install(event_fd, event_file); | 
| 5215 | return event_fd; | 5725 | return event_fd; | 
| 5216 | 5726 | ||
| 5217 | err_free_put_context: | 5727 | err_context: | 
| 5218 | free_event(event); | ||
| 5219 | err_put_context: | ||
| 5220 | fput_light(group_file, fput_needed); | ||
| 5221 | put_ctx(ctx); | 5728 | put_ctx(ctx); | 
| 5729 | err_group_fd: | ||
| 5730 | fput_light(group_file, fput_needed); | ||
| 5731 | free_event(event); | ||
| 5222 | err_fd: | 5732 | err_fd: | 
| 5223 | put_unused_fd(event_fd); | 5733 | put_unused_fd(event_fd); | 
| 5224 | return err; | 5734 | return err; | 
| @@ -5229,32 +5739,31 @@ err_fd: | |||
| 5229 | * | 5739 | * | 
| 5230 | * @attr: attributes of the counter to create | 5740 | * @attr: attributes of the counter to create | 
| 5231 | * @cpu: cpu in which the counter is bound | 5741 | * @cpu: cpu in which the counter is bound | 
| 5232 | * @pid: task to profile | 5742 | * @task: task to profile (NULL for percpu) | 
| 5233 | */ | 5743 | */ | 
| 5234 | struct perf_event * | 5744 | struct perf_event * | 
| 5235 | perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, | 5745 | perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, | 
| 5236 | pid_t pid, | 5746 | struct task_struct *task, | 
| 5237 | perf_overflow_handler_t overflow_handler) | 5747 | perf_overflow_handler_t overflow_handler) | 
| 5238 | { | 5748 | { | 
| 5239 | struct perf_event *event; | ||
| 5240 | struct perf_event_context *ctx; | 5749 | struct perf_event_context *ctx; | 
| 5750 | struct perf_event *event; | ||
| 5241 | int err; | 5751 | int err; | 
| 5242 | 5752 | ||
| 5243 | /* | 5753 | /* | 
| 5244 | * Get the target context (task or percpu): | 5754 | * Get the target context (task or percpu): | 
| 5245 | */ | 5755 | */ | 
| 5246 | 5756 | ||
| 5247 | ctx = find_get_context(pid, cpu); | 5757 | event = perf_event_alloc(attr, cpu, NULL, NULL, overflow_handler); | 
| 5248 | if (IS_ERR(ctx)) { | ||
| 5249 | err = PTR_ERR(ctx); | ||
| 5250 | goto err_exit; | ||
| 5251 | } | ||
| 5252 | |||
| 5253 | event = perf_event_alloc(attr, cpu, ctx, NULL, | ||
| 5254 | NULL, overflow_handler, GFP_KERNEL); | ||
| 5255 | if (IS_ERR(event)) { | 5758 | if (IS_ERR(event)) { | 
| 5256 | err = PTR_ERR(event); | 5759 | err = PTR_ERR(event); | 
| 5257 | goto err_put_context; | 5760 | goto err; | 
| 5761 | } | ||
| 5762 | |||
| 5763 | ctx = find_get_context(event->pmu, task, cpu); | ||
| 5764 | if (IS_ERR(ctx)) { | ||
| 5765 | err = PTR_ERR(ctx); | ||
| 5766 | goto err_free; | ||
| 5258 | } | 5767 | } | 
| 5259 | 5768 | ||
| 5260 | event->filp = NULL; | 5769 | event->filp = NULL; | 
| @@ -5272,112 +5781,13 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, | |||
| 5272 | 5781 | ||
| 5273 | return event; | 5782 | return event; | 
| 5274 | 5783 | ||
| 5275 | err_put_context: | 5784 | err_free: | 
| 5276 | put_ctx(ctx); | 5785 | free_event(event); | 
| 5277 | err_exit: | 5786 | err: | 
| 5278 | return ERR_PTR(err); | 5787 | return ERR_PTR(err); | 
| 5279 | } | 5788 | } | 
| 5280 | EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter); | 5789 | EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter); | 
| 5281 | 5790 | ||
| 5282 | /* | ||
| 5283 | * inherit a event from parent task to child task: | ||
| 5284 | */ | ||
| 5285 | static struct perf_event * | ||
| 5286 | inherit_event(struct perf_event *parent_event, | ||
| 5287 | struct task_struct *parent, | ||
| 5288 | struct perf_event_context *parent_ctx, | ||
| 5289 | struct task_struct *child, | ||
| 5290 | struct perf_event *group_leader, | ||
| 5291 | struct perf_event_context *child_ctx) | ||
| 5292 | { | ||
| 5293 | struct perf_event *child_event; | ||
| 5294 | |||
| 5295 | /* | ||
| 5296 | * Instead of creating recursive hierarchies of events, | ||
| 5297 | * we link inherited events back to the original parent, | ||
| 5298 | * which has a filp for sure, which we use as the reference | ||
| 5299 | * count: | ||
| 5300 | */ | ||
| 5301 | if (parent_event->parent) | ||
| 5302 | parent_event = parent_event->parent; | ||
| 5303 | |||
| 5304 | child_event = perf_event_alloc(&parent_event->attr, | ||
| 5305 | parent_event->cpu, child_ctx, | ||
| 5306 | group_leader, parent_event, | ||
| 5307 | NULL, GFP_KERNEL); | ||
| 5308 | if (IS_ERR(child_event)) | ||
| 5309 | return child_event; | ||
| 5310 | get_ctx(child_ctx); | ||
| 5311 | |||
| 5312 | /* | ||
| 5313 | * Make the child state follow the state of the parent event, | ||
| 5314 | * not its attr.disabled bit. We hold the parent's mutex, | ||
| 5315 | * so we won't race with perf_event_{en, dis}able_family. | ||
| 5316 | */ | ||
| 5317 | if (parent_event->state >= PERF_EVENT_STATE_INACTIVE) | ||
| 5318 | child_event->state = PERF_EVENT_STATE_INACTIVE; | ||
| 5319 | else | ||
| 5320 | child_event->state = PERF_EVENT_STATE_OFF; | ||
| 5321 | |||
| 5322 | if (parent_event->attr.freq) { | ||
| 5323 | u64 sample_period = parent_event->hw.sample_period; | ||
| 5324 | struct hw_perf_event *hwc = &child_event->hw; | ||
| 5325 | |||
| 5326 | hwc->sample_period = sample_period; | ||
| 5327 | hwc->last_period = sample_period; | ||
| 5328 | |||
| 5329 | local64_set(&hwc->period_left, sample_period); | ||
| 5330 | } | ||
| 5331 | |||
| 5332 | child_event->overflow_handler = parent_event->overflow_handler; | ||
| 5333 | |||
| 5334 | /* | ||
| 5335 | * Link it up in the child's context: | ||
| 5336 | */ | ||
| 5337 | add_event_to_ctx(child_event, child_ctx); | ||
| 5338 | |||
| 5339 | /* | ||
| 5340 | * Get a reference to the parent filp - we will fput it | ||
| 5341 | * when the child event exits. This is safe to do because | ||
| 5342 | * we are in the parent and we know that the filp still | ||
| 5343 | * exists and has a nonzero count: | ||
| 5344 | */ | ||
| 5345 | atomic_long_inc(&parent_event->filp->f_count); | ||
| 5346 | |||
| 5347 | /* | ||
| 5348 | * Link this into the parent event's child list | ||
| 5349 | */ | ||
| 5350 | WARN_ON_ONCE(parent_event->ctx->parent_ctx); | ||
| 5351 | mutex_lock(&parent_event->child_mutex); | ||
| 5352 | list_add_tail(&child_event->child_list, &parent_event->child_list); | ||
| 5353 | mutex_unlock(&parent_event->child_mutex); | ||
| 5354 | |||
| 5355 | return child_event; | ||
| 5356 | } | ||
| 5357 | |||
| 5358 | static int inherit_group(struct perf_event *parent_event, | ||
| 5359 | struct task_struct *parent, | ||
| 5360 | struct perf_event_context *parent_ctx, | ||
| 5361 | struct task_struct *child, | ||
| 5362 | struct perf_event_context *child_ctx) | ||
| 5363 | { | ||
| 5364 | struct perf_event *leader; | ||
| 5365 | struct perf_event *sub; | ||
| 5366 | struct perf_event *child_ctr; | ||
| 5367 | |||
| 5368 | leader = inherit_event(parent_event, parent, parent_ctx, | ||
| 5369 | child, NULL, child_ctx); | ||
| 5370 | if (IS_ERR(leader)) | ||
| 5371 | return PTR_ERR(leader); | ||
| 5372 | list_for_each_entry(sub, &parent_event->sibling_list, group_entry) { | ||
| 5373 | child_ctr = inherit_event(sub, parent, parent_ctx, | ||
| 5374 | child, leader, child_ctx); | ||
| 5375 | if (IS_ERR(child_ctr)) | ||
| 5376 | return PTR_ERR(child_ctr); | ||
| 5377 | } | ||
| 5378 | return 0; | ||
| 5379 | } | ||
| 5380 | |||
| 5381 | static void sync_child_event(struct perf_event *child_event, | 5791 | static void sync_child_event(struct perf_event *child_event, | 
| 5382 | struct task_struct *child) | 5792 | struct task_struct *child) | 
| 5383 | { | 5793 | { | 
| @@ -5434,16 +5844,13 @@ __perf_event_exit_task(struct perf_event *child_event, | |||
| 5434 | } | 5844 | } | 
| 5435 | } | 5845 | } | 
| 5436 | 5846 | ||
| 5437 | /* | 5847 | static void perf_event_exit_task_context(struct task_struct *child, int ctxn) | 
| 5438 | * When a child task exits, feed back event values to parent events. | ||
| 5439 | */ | ||
| 5440 | void perf_event_exit_task(struct task_struct *child) | ||
| 5441 | { | 5848 | { | 
| 5442 | struct perf_event *child_event, *tmp; | 5849 | struct perf_event *child_event, *tmp; | 
| 5443 | struct perf_event_context *child_ctx; | 5850 | struct perf_event_context *child_ctx; | 
| 5444 | unsigned long flags; | 5851 | unsigned long flags; | 
| 5445 | 5852 | ||
| 5446 | if (likely(!child->perf_event_ctxp)) { | 5853 | if (likely(!child->perf_event_ctxp[ctxn])) { | 
| 5447 | perf_event_task(child, NULL, 0); | 5854 | perf_event_task(child, NULL, 0); | 
| 5448 | return; | 5855 | return; | 
| 5449 | } | 5856 | } | 
| @@ -5455,7 +5862,7 @@ void perf_event_exit_task(struct task_struct *child) | |||
| 5455 | * scheduled, so we are now safe from rescheduling changing | 5862 | * scheduled, so we are now safe from rescheduling changing | 
| 5456 | * our context. | 5863 | * our context. | 
| 5457 | */ | 5864 | */ | 
| 5458 | child_ctx = child->perf_event_ctxp; | 5865 | child_ctx = child->perf_event_ctxp[ctxn]; | 
| 5459 | __perf_event_task_sched_out(child_ctx); | 5866 | __perf_event_task_sched_out(child_ctx); | 
| 5460 | 5867 | ||
| 5461 | /* | 5868 | /* | 
| @@ -5464,7 +5871,7 @@ void perf_event_exit_task(struct task_struct *child) | |||
| 5464 | * incremented the context's refcount before we do put_ctx below. | 5871 | * incremented the context's refcount before we do put_ctx below. | 
| 5465 | */ | 5872 | */ | 
| 5466 | raw_spin_lock(&child_ctx->lock); | 5873 | raw_spin_lock(&child_ctx->lock); | 
| 5467 | child->perf_event_ctxp = NULL; | 5874 | child->perf_event_ctxp[ctxn] = NULL; | 
| 5468 | /* | 5875 | /* | 
| 5469 | * If this context is a clone; unclone it so it can't get | 5876 | * If this context is a clone; unclone it so it can't get | 
| 5470 | * swapped to another process while we're removing all | 5877 | * swapped to another process while we're removing all | 
| @@ -5517,6 +5924,17 @@ again: | |||
| 5517 | put_ctx(child_ctx); | 5924 | put_ctx(child_ctx); | 
| 5518 | } | 5925 | } | 
| 5519 | 5926 | ||
| 5927 | /* | ||
| 5928 | * When a child task exits, feed back event values to parent events. | ||
| 5929 | */ | ||
| 5930 | void perf_event_exit_task(struct task_struct *child) | ||
| 5931 | { | ||
| 5932 | int ctxn; | ||
| 5933 | |||
| 5934 | for_each_task_context_nr(ctxn) | ||
| 5935 | perf_event_exit_task_context(child, ctxn); | ||
| 5936 | } | ||
| 5937 | |||
| 5520 | static void perf_free_event(struct perf_event *event, | 5938 | static void perf_free_event(struct perf_event *event, | 
| 5521 | struct perf_event_context *ctx) | 5939 | struct perf_event_context *ctx) | 
| 5522 | { | 5940 | { | 
| @@ -5538,48 +5956,165 @@ static void perf_free_event(struct perf_event *event, | |||
| 5538 | 5956 | ||
| 5539 | /* | 5957 | /* | 
| 5540 | * free an unexposed, unused context as created by inheritance by | 5958 | * free an unexposed, unused context as created by inheritance by | 
| 5541 | * init_task below, used by fork() in case of fail. | 5959 | * perf_event_init_task below, used by fork() in case of fail. | 
| 5542 | */ | 5960 | */ | 
| 5543 | void perf_event_free_task(struct task_struct *task) | 5961 | void perf_event_free_task(struct task_struct *task) | 
| 5544 | { | 5962 | { | 
| 5545 | struct perf_event_context *ctx = task->perf_event_ctxp; | 5963 | struct perf_event_context *ctx; | 
| 5546 | struct perf_event *event, *tmp; | 5964 | struct perf_event *event, *tmp; | 
| 5965 | int ctxn; | ||
| 5547 | 5966 | ||
| 5548 | if (!ctx) | 5967 | for_each_task_context_nr(ctxn) { | 
| 5549 | return; | 5968 | ctx = task->perf_event_ctxp[ctxn]; | 
| 5969 | if (!ctx) | ||
| 5970 | continue; | ||
| 5550 | 5971 | ||
| 5551 | mutex_lock(&ctx->mutex); | 5972 | mutex_lock(&ctx->mutex); | 
| 5552 | again: | 5973 | again: | 
| 5553 | list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) | 5974 | list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, | 
| 5554 | perf_free_event(event, ctx); | 5975 | group_entry) | 
| 5976 | perf_free_event(event, ctx); | ||
| 5555 | 5977 | ||
| 5556 | list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, | 5978 | list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, | 
| 5557 | group_entry) | 5979 | group_entry) | 
| 5558 | perf_free_event(event, ctx); | 5980 | perf_free_event(event, ctx); | 
| 5559 | 5981 | ||
| 5560 | if (!list_empty(&ctx->pinned_groups) || | 5982 | if (!list_empty(&ctx->pinned_groups) || | 
| 5561 | !list_empty(&ctx->flexible_groups)) | 5983 | !list_empty(&ctx->flexible_groups)) | 
| 5562 | goto again; | 5984 | goto again; | 
| 5563 | 5985 | ||
| 5564 | mutex_unlock(&ctx->mutex); | 5986 | mutex_unlock(&ctx->mutex); | 
| 5565 | 5987 | ||
| 5566 | put_ctx(ctx); | 5988 | put_ctx(ctx); | 
| 5989 | } | ||
| 5990 | } | ||
| 5991 | |||
| 5992 | void perf_event_delayed_put(struct task_struct *task) | ||
| 5993 | { | ||
| 5994 | int ctxn; | ||
| 5995 | |||
| 5996 | for_each_task_context_nr(ctxn) | ||
| 5997 | WARN_ON_ONCE(task->perf_event_ctxp[ctxn]); | ||
| 5998 | } | ||
| 5999 | |||
| 6000 | /* | ||
| 6001 | * inherit a event from parent task to child task: | ||
| 6002 | */ | ||
| 6003 | static struct perf_event * | ||
| 6004 | inherit_event(struct perf_event *parent_event, | ||
| 6005 | struct task_struct *parent, | ||
| 6006 | struct perf_event_context *parent_ctx, | ||
| 6007 | struct task_struct *child, | ||
| 6008 | struct perf_event *group_leader, | ||
| 6009 | struct perf_event_context *child_ctx) | ||
| 6010 | { | ||
| 6011 | struct perf_event *child_event; | ||
| 6012 | unsigned long flags; | ||
| 6013 | |||
| 6014 | /* | ||
| 6015 | * Instead of creating recursive hierarchies of events, | ||
| 6016 | * we link inherited events back to the original parent, | ||
| 6017 | * which has a filp for sure, which we use as the reference | ||
| 6018 | * count: | ||
| 6019 | */ | ||
| 6020 | if (parent_event->parent) | ||
| 6021 | parent_event = parent_event->parent; | ||
| 6022 | |||
| 6023 | child_event = perf_event_alloc(&parent_event->attr, | ||
| 6024 | parent_event->cpu, | ||
| 6025 | group_leader, parent_event, | ||
| 6026 | NULL); | ||
| 6027 | if (IS_ERR(child_event)) | ||
| 6028 | return child_event; | ||
| 6029 | get_ctx(child_ctx); | ||
| 6030 | |||
| 6031 | /* | ||
| 6032 | * Make the child state follow the state of the parent event, | ||
| 6033 | * not its attr.disabled bit. We hold the parent's mutex, | ||
| 6034 | * so we won't race with perf_event_{en, dis}able_family. | ||
| 6035 | */ | ||
| 6036 | if (parent_event->state >= PERF_EVENT_STATE_INACTIVE) | ||
| 6037 | child_event->state = PERF_EVENT_STATE_INACTIVE; | ||
| 6038 | else | ||
| 6039 | child_event->state = PERF_EVENT_STATE_OFF; | ||
| 6040 | |||
| 6041 | if (parent_event->attr.freq) { | ||
| 6042 | u64 sample_period = parent_event->hw.sample_period; | ||
| 6043 | struct hw_perf_event *hwc = &child_event->hw; | ||
| 6044 | |||
| 6045 | hwc->sample_period = sample_period; | ||
| 6046 | hwc->last_period = sample_period; | ||
| 6047 | |||
| 6048 | local64_set(&hwc->period_left, sample_period); | ||
| 6049 | } | ||
| 6050 | |||
| 6051 | child_event->ctx = child_ctx; | ||
| 6052 | child_event->overflow_handler = parent_event->overflow_handler; | ||
| 6053 | |||
| 6054 | /* | ||
| 6055 | * Link it up in the child's context: | ||
| 6056 | */ | ||
| 6057 | raw_spin_lock_irqsave(&child_ctx->lock, flags); | ||
| 6058 | add_event_to_ctx(child_event, child_ctx); | ||
| 6059 | raw_spin_unlock_irqrestore(&child_ctx->lock, flags); | ||
| 6060 | |||
| 6061 | /* | ||
| 6062 | * Get a reference to the parent filp - we will fput it | ||
| 6063 | * when the child event exits. This is safe to do because | ||
| 6064 | * we are in the parent and we know that the filp still | ||
| 6065 | * exists and has a nonzero count: | ||
| 6066 | */ | ||
| 6067 | atomic_long_inc(&parent_event->filp->f_count); | ||
| 6068 | |||
| 6069 | /* | ||
| 6070 | * Link this into the parent event's child list | ||
| 6071 | */ | ||
| 6072 | WARN_ON_ONCE(parent_event->ctx->parent_ctx); | ||
| 6073 | mutex_lock(&parent_event->child_mutex); | ||
| 6074 | list_add_tail(&child_event->child_list, &parent_event->child_list); | ||
| 6075 | mutex_unlock(&parent_event->child_mutex); | ||
| 6076 | |||
| 6077 | return child_event; | ||
| 6078 | } | ||
| 6079 | |||
| 6080 | static int inherit_group(struct perf_event *parent_event, | ||
| 6081 | struct task_struct *parent, | ||
| 6082 | struct perf_event_context *parent_ctx, | ||
| 6083 | struct task_struct *child, | ||
| 6084 | struct perf_event_context *child_ctx) | ||
| 6085 | { | ||
| 6086 | struct perf_event *leader; | ||
| 6087 | struct perf_event *sub; | ||
| 6088 | struct perf_event *child_ctr; | ||
| 6089 | |||
| 6090 | leader = inherit_event(parent_event, parent, parent_ctx, | ||
| 6091 | child, NULL, child_ctx); | ||
| 6092 | if (IS_ERR(leader)) | ||
| 6093 | return PTR_ERR(leader); | ||
| 6094 | list_for_each_entry(sub, &parent_event->sibling_list, group_entry) { | ||
| 6095 | child_ctr = inherit_event(sub, parent, parent_ctx, | ||
| 6096 | child, leader, child_ctx); | ||
| 6097 | if (IS_ERR(child_ctr)) | ||
| 6098 | return PTR_ERR(child_ctr); | ||
| 6099 | } | ||
| 6100 | return 0; | ||
| 5567 | } | 6101 | } | 
| 5568 | 6102 | ||
| 5569 | static int | 6103 | static int | 
| 5570 | inherit_task_group(struct perf_event *event, struct task_struct *parent, | 6104 | inherit_task_group(struct perf_event *event, struct task_struct *parent, | 
| 5571 | struct perf_event_context *parent_ctx, | 6105 | struct perf_event_context *parent_ctx, | 
| 5572 | struct task_struct *child, | 6106 | struct task_struct *child, int ctxn, | 
| 5573 | int *inherited_all) | 6107 | int *inherited_all) | 
| 5574 | { | 6108 | { | 
| 5575 | int ret; | 6109 | int ret; | 
| 5576 | struct perf_event_context *child_ctx = child->perf_event_ctxp; | 6110 | struct perf_event_context *child_ctx; | 
| 5577 | 6111 | ||
| 5578 | if (!event->attr.inherit) { | 6112 | if (!event->attr.inherit) { | 
| 5579 | *inherited_all = 0; | 6113 | *inherited_all = 0; | 
| 5580 | return 0; | 6114 | return 0; | 
| 5581 | } | 6115 | } | 
| 5582 | 6116 | ||
| 6117 | child_ctx = child->perf_event_ctxp[ctxn]; | ||
| 5583 | if (!child_ctx) { | 6118 | if (!child_ctx) { | 
| 5584 | /* | 6119 | /* | 
| 5585 | * This is executed from the parent task context, so | 6120 | * This is executed from the parent task context, so | 
| @@ -5588,14 +6123,11 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent, | |||
| 5588 | * child. | 6123 | * child. | 
| 5589 | */ | 6124 | */ | 
| 5590 | 6125 | ||
| 5591 | child_ctx = kzalloc(sizeof(struct perf_event_context), | 6126 | child_ctx = alloc_perf_context(event->pmu, child); | 
| 5592 | GFP_KERNEL); | ||
| 5593 | if (!child_ctx) | 6127 | if (!child_ctx) | 
| 5594 | return -ENOMEM; | 6128 | return -ENOMEM; | 
| 5595 | 6129 | ||
| 5596 | __perf_event_init_context(child_ctx, child); | 6130 | child->perf_event_ctxp[ctxn] = child_ctx; | 
| 5597 | child->perf_event_ctxp = child_ctx; | ||
| 5598 | get_task_struct(child); | ||
| 5599 | } | 6131 | } | 
| 5600 | 6132 | ||
| 5601 | ret = inherit_group(event, parent, parent_ctx, | 6133 | ret = inherit_group(event, parent, parent_ctx, | 
| @@ -5607,11 +6139,10 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent, | |||
| 5607 | return ret; | 6139 | return ret; | 
| 5608 | } | 6140 | } | 
| 5609 | 6141 | ||
| 5610 | |||
| 5611 | /* | 6142 | /* | 
| 5612 | * Initialize the perf_event context in task_struct | 6143 | * Initialize the perf_event context in task_struct | 
| 5613 | */ | 6144 | */ | 
| 5614 | int perf_event_init_task(struct task_struct *child) | 6145 | int perf_event_init_context(struct task_struct *child, int ctxn) | 
| 5615 | { | 6146 | { | 
| 5616 | struct perf_event_context *child_ctx, *parent_ctx; | 6147 | struct perf_event_context *child_ctx, *parent_ctx; | 
| 5617 | struct perf_event_context *cloned_ctx; | 6148 | struct perf_event_context *cloned_ctx; | 
| @@ -5620,19 +6151,19 @@ int perf_event_init_task(struct task_struct *child) | |||
| 5620 | int inherited_all = 1; | 6151 | int inherited_all = 1; | 
| 5621 | int ret = 0; | 6152 | int ret = 0; | 
| 5622 | 6153 | ||
| 5623 | child->perf_event_ctxp = NULL; | 6154 | child->perf_event_ctxp[ctxn] = NULL; | 
| 5624 | 6155 | ||
| 5625 | mutex_init(&child->perf_event_mutex); | 6156 | mutex_init(&child->perf_event_mutex); | 
| 5626 | INIT_LIST_HEAD(&child->perf_event_list); | 6157 | INIT_LIST_HEAD(&child->perf_event_list); | 
| 5627 | 6158 | ||
| 5628 | if (likely(!parent->perf_event_ctxp)) | 6159 | if (likely(!parent->perf_event_ctxp[ctxn])) | 
| 5629 | return 0; | 6160 | return 0; | 
| 5630 | 6161 | ||
| 5631 | /* | 6162 | /* | 
| 5632 | * If the parent's context is a clone, pin it so it won't get | 6163 | * If the parent's context is a clone, pin it so it won't get | 
| 5633 | * swapped under us. | 6164 | * swapped under us. | 
| 5634 | */ | 6165 | */ | 
| 5635 | parent_ctx = perf_pin_task_context(parent); | 6166 | parent_ctx = perf_pin_task_context(parent, ctxn); | 
| 5636 | 6167 | ||
| 5637 | /* | 6168 | /* | 
| 5638 | * No need to check if parent_ctx != NULL here; since we saw | 6169 | * No need to check if parent_ctx != NULL here; since we saw | 
| @@ -5652,20 +6183,20 @@ int perf_event_init_task(struct task_struct *child) | |||
| 5652 | * the list, not manipulating it: | 6183 | * the list, not manipulating it: | 
| 5653 | */ | 6184 | */ | 
| 5654 | list_for_each_entry(event, &parent_ctx->pinned_groups, group_entry) { | 6185 | list_for_each_entry(event, &parent_ctx->pinned_groups, group_entry) { | 
| 5655 | ret = inherit_task_group(event, parent, parent_ctx, child, | 6186 | ret = inherit_task_group(event, parent, parent_ctx, | 
| 5656 | &inherited_all); | 6187 | child, ctxn, &inherited_all); | 
| 5657 | if (ret) | 6188 | if (ret) | 
| 5658 | break; | 6189 | break; | 
| 5659 | } | 6190 | } | 
| 5660 | 6191 | ||
| 5661 | list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) { | 6192 | list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) { | 
| 5662 | ret = inherit_task_group(event, parent, parent_ctx, child, | 6193 | ret = inherit_task_group(event, parent, parent_ctx, | 
| 5663 | &inherited_all); | 6194 | child, ctxn, &inherited_all); | 
| 5664 | if (ret) | 6195 | if (ret) | 
| 5665 | break; | 6196 | break; | 
| 5666 | } | 6197 | } | 
| 5667 | 6198 | ||
| 5668 | child_ctx = child->perf_event_ctxp; | 6199 | child_ctx = child->perf_event_ctxp[ctxn]; | 
| 5669 | 6200 | ||
| 5670 | if (child_ctx && inherited_all) { | 6201 | if (child_ctx && inherited_all) { | 
| 5671 | /* | 6202 | /* | 
| @@ -5694,63 +6225,98 @@ int perf_event_init_task(struct task_struct *child) | |||
| 5694 | return ret; | 6225 | return ret; | 
| 5695 | } | 6226 | } | 
| 5696 | 6227 | ||
| 6228 | /* | ||
| 6229 | * Initialize the perf_event context in task_struct | ||
| 6230 | */ | ||
| 6231 | int perf_event_init_task(struct task_struct *child) | ||
| 6232 | { | ||
| 6233 | int ctxn, ret; | ||
| 6234 | |||
| 6235 | for_each_task_context_nr(ctxn) { | ||
| 6236 | ret = perf_event_init_context(child, ctxn); | ||
| 6237 | if (ret) | ||
| 6238 | return ret; | ||
| 6239 | } | ||
| 6240 | |||
| 6241 | return 0; | ||
| 6242 | } | ||
| 6243 | |||
| 5697 | static void __init perf_event_init_all_cpus(void) | 6244 | static void __init perf_event_init_all_cpus(void) | 
| 5698 | { | 6245 | { | 
| 6246 | struct swevent_htable *swhash; | ||
| 5699 | int cpu; | 6247 | int cpu; | 
| 5700 | struct perf_cpu_context *cpuctx; | ||
| 5701 | 6248 | ||
| 5702 | for_each_possible_cpu(cpu) { | 6249 | for_each_possible_cpu(cpu) { | 
| 5703 | cpuctx = &per_cpu(perf_cpu_context, cpu); | 6250 | swhash = &per_cpu(swevent_htable, cpu); | 
| 5704 | mutex_init(&cpuctx->hlist_mutex); | 6251 | mutex_init(&swhash->hlist_mutex); | 
| 5705 | __perf_event_init_context(&cpuctx->ctx, NULL); | 6252 | INIT_LIST_HEAD(&per_cpu(rotation_list, cpu)); | 
| 5706 | } | 6253 | } | 
| 5707 | } | 6254 | } | 
| 5708 | 6255 | ||
| 5709 | static void __cpuinit perf_event_init_cpu(int cpu) | 6256 | static void __cpuinit perf_event_init_cpu(int cpu) | 
| 5710 | { | 6257 | { | 
| 5711 | struct perf_cpu_context *cpuctx; | 6258 | struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); | 
| 5712 | |||
| 5713 | cpuctx = &per_cpu(perf_cpu_context, cpu); | ||
| 5714 | |||
| 5715 | spin_lock(&perf_resource_lock); | ||
| 5716 | cpuctx->max_pertask = perf_max_events - perf_reserved_percpu; | ||
| 5717 | spin_unlock(&perf_resource_lock); | ||
| 5718 | 6259 | ||
| 5719 | mutex_lock(&cpuctx->hlist_mutex); | 6260 | mutex_lock(&swhash->hlist_mutex); | 
| 5720 | if (cpuctx->hlist_refcount > 0) { | 6261 | if (swhash->hlist_refcount > 0) { | 
| 5721 | struct swevent_hlist *hlist; | 6262 | struct swevent_hlist *hlist; | 
| 5722 | 6263 | ||
| 5723 | hlist = kzalloc(sizeof(*hlist), GFP_KERNEL); | 6264 | hlist = kzalloc_node(sizeof(*hlist), GFP_KERNEL, cpu_to_node(cpu)); | 
| 5724 | WARN_ON_ONCE(!hlist); | 6265 | WARN_ON(!hlist); | 
| 5725 | rcu_assign_pointer(cpuctx->swevent_hlist, hlist); | 6266 | rcu_assign_pointer(swhash->swevent_hlist, hlist); | 
| 5726 | } | 6267 | } | 
| 5727 | mutex_unlock(&cpuctx->hlist_mutex); | 6268 | mutex_unlock(&swhash->hlist_mutex); | 
| 5728 | } | 6269 | } | 
| 5729 | 6270 | ||
| 5730 | #ifdef CONFIG_HOTPLUG_CPU | 6271 | #ifdef CONFIG_HOTPLUG_CPU | 
| 5731 | static void __perf_event_exit_cpu(void *info) | 6272 | static void perf_pmu_rotate_stop(struct pmu *pmu) | 
| 5732 | { | 6273 | { | 
| 5733 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | 6274 | struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); | 
| 5734 | struct perf_event_context *ctx = &cpuctx->ctx; | 6275 | |
| 6276 | WARN_ON(!irqs_disabled()); | ||
| 6277 | |||
| 6278 | list_del_init(&cpuctx->rotation_list); | ||
| 6279 | } | ||
| 6280 | |||
| 6281 | static void __perf_event_exit_context(void *__info) | ||
| 6282 | { | ||
| 6283 | struct perf_event_context *ctx = __info; | ||
| 5735 | struct perf_event *event, *tmp; | 6284 | struct perf_event *event, *tmp; | 
| 5736 | 6285 | ||
| 6286 | perf_pmu_rotate_stop(ctx->pmu); | ||
| 6287 | |||
| 5737 | list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) | 6288 | list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) | 
| 5738 | __perf_event_remove_from_context(event); | 6289 | __perf_event_remove_from_context(event); | 
| 5739 | list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry) | 6290 | list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry) | 
| 5740 | __perf_event_remove_from_context(event); | 6291 | __perf_event_remove_from_context(event); | 
| 5741 | } | 6292 | } | 
| 6293 | |||
| 6294 | static void perf_event_exit_cpu_context(int cpu) | ||
| 6295 | { | ||
| 6296 | struct perf_event_context *ctx; | ||
| 6297 | struct pmu *pmu; | ||
| 6298 | int idx; | ||
| 6299 | |||
| 6300 | idx = srcu_read_lock(&pmus_srcu); | ||
| 6301 | list_for_each_entry_rcu(pmu, &pmus, entry) { | ||
| 6302 | ctx = &per_cpu_ptr(pmu->pmu_cpu_context, cpu)->ctx; | ||
| 6303 | |||
| 6304 | mutex_lock(&ctx->mutex); | ||
| 6305 | smp_call_function_single(cpu, __perf_event_exit_context, ctx, 1); | ||
| 6306 | mutex_unlock(&ctx->mutex); | ||
| 6307 | } | ||
| 6308 | srcu_read_unlock(&pmus_srcu, idx); | ||
| 6309 | } | ||
| 6310 | |||
| 5742 | static void perf_event_exit_cpu(int cpu) | 6311 | static void perf_event_exit_cpu(int cpu) | 
| 5743 | { | 6312 | { | 
| 5744 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); | 6313 | struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); | 
| 5745 | struct perf_event_context *ctx = &cpuctx->ctx; | ||
| 5746 | 6314 | ||
| 5747 | mutex_lock(&cpuctx->hlist_mutex); | 6315 | mutex_lock(&swhash->hlist_mutex); | 
| 5748 | swevent_hlist_release(cpuctx); | 6316 | swevent_hlist_release(swhash); | 
| 5749 | mutex_unlock(&cpuctx->hlist_mutex); | 6317 | mutex_unlock(&swhash->hlist_mutex); | 
| 5750 | 6318 | ||
| 5751 | mutex_lock(&ctx->mutex); | 6319 | perf_event_exit_cpu_context(cpu); | 
| 5752 | smp_call_function_single(cpu, __perf_event_exit_cpu, NULL, 1); | ||
| 5753 | mutex_unlock(&ctx->mutex); | ||
| 5754 | } | 6320 | } | 
| 5755 | #else | 6321 | #else | 
| 5756 | static inline void perf_event_exit_cpu(int cpu) { } | 6322 | static inline void perf_event_exit_cpu(int cpu) { } | 
| @@ -5780,118 +6346,13 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) | |||
| 5780 | return NOTIFY_OK; | 6346 | return NOTIFY_OK; | 
| 5781 | } | 6347 | } | 
| 5782 | 6348 | ||
| 5783 | /* | ||
| 5784 | * This has to have a higher priority than migration_notifier in sched.c. | ||
| 5785 | */ | ||
| 5786 | static struct notifier_block __cpuinitdata perf_cpu_nb = { | ||
| 5787 | .notifier_call = perf_cpu_notify, | ||
| 5788 | .priority = 20, | ||
| 5789 | }; | ||
| 5790 | |||
| 5791 | void __init perf_event_init(void) | 6349 | void __init perf_event_init(void) | 
| 5792 | { | 6350 | { | 
| 5793 | perf_event_init_all_cpus(); | 6351 | perf_event_init_all_cpus(); | 
| 5794 | perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE, | 6352 | init_srcu_struct(&pmus_srcu); | 
| 5795 | (void *)(long)smp_processor_id()); | 6353 | perf_pmu_register(&perf_swevent); | 
| 5796 | perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_ONLINE, | 6354 | perf_pmu_register(&perf_cpu_clock); | 
| 5797 | (void *)(long)smp_processor_id()); | 6355 | perf_pmu_register(&perf_task_clock); | 
| 5798 | register_cpu_notifier(&perf_cpu_nb); | 6356 | perf_tp_register(); | 
| 5799 | } | 6357 | perf_cpu_notifier(perf_cpu_notify); | 
| 5800 | |||
| 5801 | static ssize_t perf_show_reserve_percpu(struct sysdev_class *class, | ||
| 5802 | struct sysdev_class_attribute *attr, | ||
| 5803 | char *buf) | ||
| 5804 | { | ||
| 5805 | return sprintf(buf, "%d\n", perf_reserved_percpu); | ||
| 5806 | } | ||
| 5807 | |||
| 5808 | static ssize_t | ||
| 5809 | perf_set_reserve_percpu(struct sysdev_class *class, | ||
| 5810 | struct sysdev_class_attribute *attr, | ||
| 5811 | const char *buf, | ||
| 5812 | size_t count) | ||
| 5813 | { | ||
| 5814 | struct perf_cpu_context *cpuctx; | ||
| 5815 | unsigned long val; | ||
| 5816 | int err, cpu, mpt; | ||
| 5817 | |||
| 5818 | err = strict_strtoul(buf, 10, &val); | ||
| 5819 | if (err) | ||
| 5820 | return err; | ||
| 5821 | if (val > perf_max_events) | ||
| 5822 | return -EINVAL; | ||
| 5823 | |||
| 5824 | spin_lock(&perf_resource_lock); | ||
| 5825 | perf_reserved_percpu = val; | ||
| 5826 | for_each_online_cpu(cpu) { | ||
| 5827 | cpuctx = &per_cpu(perf_cpu_context, cpu); | ||
| 5828 | raw_spin_lock_irq(&cpuctx->ctx.lock); | ||
| 5829 | mpt = min(perf_max_events - cpuctx->ctx.nr_events, | ||
| 5830 | perf_max_events - perf_reserved_percpu); | ||
| 5831 | cpuctx->max_pertask = mpt; | ||
| 5832 | raw_spin_unlock_irq(&cpuctx->ctx.lock); | ||
| 5833 | } | ||
| 5834 | spin_unlock(&perf_resource_lock); | ||
| 5835 | |||
| 5836 | return count; | ||
| 5837 | } | ||
| 5838 | |||
| 5839 | static ssize_t perf_show_overcommit(struct sysdev_class *class, | ||
| 5840 | struct sysdev_class_attribute *attr, | ||
| 5841 | char *buf) | ||
| 5842 | { | ||
| 5843 | return sprintf(buf, "%d\n", perf_overcommit); | ||
| 5844 | } | ||
| 5845 | |||
| 5846 | static ssize_t | ||
| 5847 | perf_set_overcommit(struct sysdev_class *class, | ||
| 5848 | struct sysdev_class_attribute *attr, | ||
| 5849 | const char *buf, size_t count) | ||
| 5850 | { | ||
| 5851 | unsigned long val; | ||
| 5852 | int err; | ||
| 5853 | |||
| 5854 | err = strict_strtoul(buf, 10, &val); | ||
| 5855 | if (err) | ||
| 5856 | return err; | ||
| 5857 | if (val > 1) | ||
| 5858 | return -EINVAL; | ||
| 5859 | |||
| 5860 | spin_lock(&perf_resource_lock); | ||
| 5861 | perf_overcommit = val; | ||
| 5862 | spin_unlock(&perf_resource_lock); | ||
| 5863 | |||
| 5864 | return count; | ||
| 5865 | } | ||
| 5866 | |||
| 5867 | static SYSDEV_CLASS_ATTR( | ||
| 5868 | reserve_percpu, | ||
| 5869 | 0644, | ||
| 5870 | perf_show_reserve_percpu, | ||
| 5871 | perf_set_reserve_percpu | ||
| 5872 | ); | ||
| 5873 | |||
| 5874 | static SYSDEV_CLASS_ATTR( | ||
| 5875 | overcommit, | ||
| 5876 | 0644, | ||
| 5877 | perf_show_overcommit, | ||
| 5878 | perf_set_overcommit | ||
| 5879 | ); | ||
| 5880 | |||
| 5881 | static struct attribute *perfclass_attrs[] = { | ||
| 5882 | &attr_reserve_percpu.attr, | ||
| 5883 | &attr_overcommit.attr, | ||
| 5884 | NULL | ||
| 5885 | }; | ||
| 5886 | |||
| 5887 | static struct attribute_group perfclass_attr_group = { | ||
| 5888 | .attrs = perfclass_attrs, | ||
| 5889 | .name = "perf_events", | ||
| 5890 | }; | ||
| 5891 | |||
| 5892 | static int __init perf_event_sysfs_init(void) | ||
| 5893 | { | ||
| 5894 | return sysfs_create_group(&cpu_sysdev_class.kset.kobj, | ||
| 5895 | &perfclass_attr_group); | ||
| 5896 | } | 6358 | } | 
| 5897 | device_initcall(perf_event_sysfs_init); | ||
