aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/perf_counter.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/perf_counter.c')
-rw-r--r--kernel/perf_counter.c394
1 files changed, 216 insertions, 178 deletions
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 8cb94a52d1bb..cc768ab81ac8 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2176,6 +2176,13 @@ static int perf_mmap_data_alloc(struct perf_counter *counter, int nr_pages)
2176 data->nr_pages = nr_pages; 2176 data->nr_pages = nr_pages;
2177 atomic_set(&data->lock, -1); 2177 atomic_set(&data->lock, -1);
2178 2178
2179 if (counter->attr.watermark) {
2180 data->watermark = min_t(long, PAGE_SIZE * nr_pages,
2181 counter->attr.wakeup_watermark);
2182 }
2183 if (!data->watermark)
2184 data->watermark = max(PAGE_SIZE, PAGE_SIZE * nr_pages / 4);
2185
2179 rcu_assign_pointer(counter->data, data); 2186 rcu_assign_pointer(counter->data, data);
2180 2187
2181 return 0; 2188 return 0;
@@ -2315,7 +2322,8 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
2315 lock_limit >>= PAGE_SHIFT; 2322 lock_limit >>= PAGE_SHIFT;
2316 locked = vma->vm_mm->locked_vm + extra; 2323 locked = vma->vm_mm->locked_vm + extra;
2317 2324
2318 if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) { 2325 if ((locked > lock_limit) && perf_paranoid_tracepoint_raw() &&
2326 !capable(CAP_IPC_LOCK)) {
2319 ret = -EPERM; 2327 ret = -EPERM;
2320 goto unlock; 2328 goto unlock;
2321 } 2329 }
@@ -2504,35 +2512,15 @@ __weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
2504/* 2512/*
2505 * Output 2513 * Output
2506 */ 2514 */
2507 2515static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail,
2508struct perf_output_handle { 2516 unsigned long offset, unsigned long head)
2509 struct perf_counter *counter;
2510 struct perf_mmap_data *data;
2511 unsigned long head;
2512 unsigned long offset;
2513 int nmi;
2514 int sample;
2515 int locked;
2516 unsigned long flags;
2517};
2518
2519static bool perf_output_space(struct perf_mmap_data *data,
2520 unsigned int offset, unsigned int head)
2521{ 2517{
2522 unsigned long tail;
2523 unsigned long mask; 2518 unsigned long mask;
2524 2519
2525 if (!data->writable) 2520 if (!data->writable)
2526 return true; 2521 return true;
2527 2522
2528 mask = (data->nr_pages << PAGE_SHIFT) - 1; 2523 mask = (data->nr_pages << PAGE_SHIFT) - 1;
2529 /*
2530 * Userspace could choose to issue a mb() before updating the tail
2531 * pointer. So that all reads will be completed before the write is
2532 * issued.
2533 */
2534 tail = ACCESS_ONCE(data->user_page->data_tail);
2535 smp_rmb();
2536 2524
2537 offset = (offset - tail) & mask; 2525 offset = (offset - tail) & mask;
2538 head = (head - tail) & mask; 2526 head = (head - tail) & mask;
@@ -2633,8 +2621,8 @@ out:
2633 local_irq_restore(handle->flags); 2621 local_irq_restore(handle->flags);
2634} 2622}
2635 2623
2636static void perf_output_copy(struct perf_output_handle *handle, 2624void perf_output_copy(struct perf_output_handle *handle,
2637 const void *buf, unsigned int len) 2625 const void *buf, unsigned int len)
2638{ 2626{
2639 unsigned int pages_mask; 2627 unsigned int pages_mask;
2640 unsigned int offset; 2628 unsigned int offset;
@@ -2669,16 +2657,13 @@ static void perf_output_copy(struct perf_output_handle *handle,
2669 WARN_ON_ONCE(((long)(handle->head - handle->offset)) < 0); 2657 WARN_ON_ONCE(((long)(handle->head - handle->offset)) < 0);
2670} 2658}
2671 2659
2672#define perf_output_put(handle, x) \ 2660int perf_output_begin(struct perf_output_handle *handle,
2673 perf_output_copy((handle), &(x), sizeof(x)) 2661 struct perf_counter *counter, unsigned int size,
2674 2662 int nmi, int sample)
2675static int perf_output_begin(struct perf_output_handle *handle,
2676 struct perf_counter *counter, unsigned int size,
2677 int nmi, int sample)
2678{ 2663{
2679 struct perf_counter *output_counter; 2664 struct perf_counter *output_counter;
2680 struct perf_mmap_data *data; 2665 struct perf_mmap_data *data;
2681 unsigned int offset, head; 2666 unsigned long tail, offset, head;
2682 int have_lost; 2667 int have_lost;
2683 struct { 2668 struct {
2684 struct perf_event_header header; 2669 struct perf_event_header header;
@@ -2716,16 +2701,23 @@ static int perf_output_begin(struct perf_output_handle *handle,
2716 perf_output_lock(handle); 2701 perf_output_lock(handle);
2717 2702
2718 do { 2703 do {
2704 /*
2705 * Userspace could choose to issue a mb() before updating the
2706 * tail pointer. So that all reads will be completed before the
2707 * write is issued.
2708 */
2709 tail = ACCESS_ONCE(data->user_page->data_tail);
2710 smp_rmb();
2719 offset = head = atomic_long_read(&data->head); 2711 offset = head = atomic_long_read(&data->head);
2720 head += size; 2712 head += size;
2721 if (unlikely(!perf_output_space(data, offset, head))) 2713 if (unlikely(!perf_output_space(data, tail, offset, head)))
2722 goto fail; 2714 goto fail;
2723 } while (atomic_long_cmpxchg(&data->head, offset, head) != offset); 2715 } while (atomic_long_cmpxchg(&data->head, offset, head) != offset);
2724 2716
2725 handle->offset = offset; 2717 handle->offset = offset;
2726 handle->head = head; 2718 handle->head = head;
2727 2719
2728 if ((offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT)) 2720 if (head - tail > data->watermark)
2729 atomic_set(&data->wakeup, 1); 2721 atomic_set(&data->wakeup, 1);
2730 2722
2731 if (have_lost) { 2723 if (have_lost) {
@@ -2749,7 +2741,7 @@ out:
2749 return -ENOSPC; 2741 return -ENOSPC;
2750} 2742}
2751 2743
2752static void perf_output_end(struct perf_output_handle *handle) 2744void perf_output_end(struct perf_output_handle *handle)
2753{ 2745{
2754 struct perf_counter *counter = handle->counter; 2746 struct perf_counter *counter = handle->counter;
2755 struct perf_mmap_data *data = handle->data; 2747 struct perf_mmap_data *data = handle->data;
@@ -2863,156 +2855,176 @@ static void perf_output_read(struct perf_output_handle *handle,
2863 perf_output_read_one(handle, counter); 2855 perf_output_read_one(handle, counter);
2864} 2856}
2865 2857
2866void perf_counter_output(struct perf_counter *counter, int nmi, 2858void perf_output_sample(struct perf_output_handle *handle,
2867 struct perf_sample_data *data) 2859 struct perf_event_header *header,
2860 struct perf_sample_data *data,
2861 struct perf_counter *counter)
2868{ 2862{
2869 int ret; 2863 u64 sample_type = data->type;
2870 u64 sample_type = counter->attr.sample_type;
2871 struct perf_output_handle handle;
2872 struct perf_event_header header;
2873 u64 ip;
2874 struct {
2875 u32 pid, tid;
2876 } tid_entry;
2877 struct perf_callchain_entry *callchain = NULL;
2878 int callchain_size = 0;
2879 u64 time;
2880 struct {
2881 u32 cpu, reserved;
2882 } cpu_entry;
2883 2864
2884 header.type = PERF_EVENT_SAMPLE; 2865 perf_output_put(handle, *header);
2885 header.size = sizeof(header);
2886 2866
2887 header.misc = 0; 2867 if (sample_type & PERF_SAMPLE_IP)
2888 header.misc |= perf_misc_flags(data->regs); 2868 perf_output_put(handle, data->ip);
2889
2890 if (sample_type & PERF_SAMPLE_IP) {
2891 ip = perf_instruction_pointer(data->regs);
2892 header.size += sizeof(ip);
2893 }
2894
2895 if (sample_type & PERF_SAMPLE_TID) {
2896 /* namespace issues */
2897 tid_entry.pid = perf_counter_pid(counter, current);
2898 tid_entry.tid = perf_counter_tid(counter, current);
2899
2900 header.size += sizeof(tid_entry);
2901 }
2902 2869
2903 if (sample_type & PERF_SAMPLE_TIME) { 2870 if (sample_type & PERF_SAMPLE_TID)
2904 /* 2871 perf_output_put(handle, data->tid_entry);
2905 * Maybe do better on x86 and provide cpu_clock_nmi()
2906 */
2907 time = sched_clock();
2908 2872
2909 header.size += sizeof(u64); 2873 if (sample_type & PERF_SAMPLE_TIME)
2910 } 2874 perf_output_put(handle, data->time);
2911 2875
2912 if (sample_type & PERF_SAMPLE_ADDR) 2876 if (sample_type & PERF_SAMPLE_ADDR)
2913 header.size += sizeof(u64); 2877 perf_output_put(handle, data->addr);
2914 2878
2915 if (sample_type & PERF_SAMPLE_ID) 2879 if (sample_type & PERF_SAMPLE_ID)
2916 header.size += sizeof(u64); 2880 perf_output_put(handle, data->id);
2917 2881
2918 if (sample_type & PERF_SAMPLE_STREAM_ID) 2882 if (sample_type & PERF_SAMPLE_STREAM_ID)
2919 header.size += sizeof(u64); 2883 perf_output_put(handle, data->stream_id);
2920
2921 if (sample_type & PERF_SAMPLE_CPU) {
2922 header.size += sizeof(cpu_entry);
2923 2884
2924 cpu_entry.cpu = raw_smp_processor_id(); 2885 if (sample_type & PERF_SAMPLE_CPU)
2925 cpu_entry.reserved = 0; 2886 perf_output_put(handle, data->cpu_entry);
2926 }
2927 2887
2928 if (sample_type & PERF_SAMPLE_PERIOD) 2888 if (sample_type & PERF_SAMPLE_PERIOD)
2929 header.size += sizeof(u64); 2889 perf_output_put(handle, data->period);
2930 2890
2931 if (sample_type & PERF_SAMPLE_READ) 2891 if (sample_type & PERF_SAMPLE_READ)
2932 header.size += perf_counter_read_size(counter); 2892 perf_output_read(handle, counter);
2933 2893
2934 if (sample_type & PERF_SAMPLE_CALLCHAIN) { 2894 if (sample_type & PERF_SAMPLE_CALLCHAIN) {
2935 callchain = perf_callchain(data->regs); 2895 if (data->callchain) {
2896 int size = 1;
2936 2897
2937 if (callchain) { 2898 if (data->callchain)
2938 callchain_size = (1 + callchain->nr) * sizeof(u64); 2899 size += data->callchain->nr;
2939 header.size += callchain_size; 2900
2940 } else 2901 size *= sizeof(u64);
2941 header.size += sizeof(u64); 2902
2903 perf_output_copy(handle, data->callchain, size);
2904 } else {
2905 u64 nr = 0;
2906 perf_output_put(handle, nr);
2907 }
2942 } 2908 }
2943 2909
2944 if (sample_type & PERF_SAMPLE_RAW) { 2910 if (sample_type & PERF_SAMPLE_RAW) {
2945 int size = sizeof(u32); 2911 if (data->raw) {
2912 perf_output_put(handle, data->raw->size);
2913 perf_output_copy(handle, data->raw->data,
2914 data->raw->size);
2915 } else {
2916 struct {
2917 u32 size;
2918 u32 data;
2919 } raw = {
2920 .size = sizeof(u32),
2921 .data = 0,
2922 };
2923 perf_output_put(handle, raw);
2924 }
2925 }
2926}
2946 2927
2947 if (data->raw) 2928void perf_prepare_sample(struct perf_event_header *header,
2948 size += data->raw->size; 2929 struct perf_sample_data *data,
2949 else 2930 struct perf_counter *counter,
2950 size += sizeof(u32); 2931 struct pt_regs *regs)
2932{
2933 u64 sample_type = counter->attr.sample_type;
2951 2934
2952 WARN_ON_ONCE(size & (sizeof(u64)-1)); 2935 data->type = sample_type;
2953 header.size += size;
2954 }
2955 2936
2956 ret = perf_output_begin(&handle, counter, header.size, nmi, 1); 2937 header->type = PERF_EVENT_SAMPLE;
2957 if (ret) 2938 header->size = sizeof(*header);
2958 return;
2959 2939
2960 perf_output_put(&handle, header); 2940 header->misc = 0;
2941 header->misc |= perf_misc_flags(regs);
2961 2942
2962 if (sample_type & PERF_SAMPLE_IP) 2943 if (sample_type & PERF_SAMPLE_IP) {
2963 perf_output_put(&handle, ip); 2944 data->ip = perf_instruction_pointer(regs);
2964 2945
2965 if (sample_type & PERF_SAMPLE_TID) 2946 header->size += sizeof(data->ip);
2966 perf_output_put(&handle, tid_entry); 2947 }
2967 2948
2968 if (sample_type & PERF_SAMPLE_TIME) 2949 if (sample_type & PERF_SAMPLE_TID) {
2969 perf_output_put(&handle, time); 2950 /* namespace issues */
2951 data->tid_entry.pid = perf_counter_pid(counter, current);
2952 data->tid_entry.tid = perf_counter_tid(counter, current);
2953
2954 header->size += sizeof(data->tid_entry);
2955 }
2956
2957 if (sample_type & PERF_SAMPLE_TIME) {
2958 data->time = perf_clock();
2959
2960 header->size += sizeof(data->time);
2961 }
2970 2962
2971 if (sample_type & PERF_SAMPLE_ADDR) 2963 if (sample_type & PERF_SAMPLE_ADDR)
2972 perf_output_put(&handle, data->addr); 2964 header->size += sizeof(data->addr);
2973 2965
2974 if (sample_type & PERF_SAMPLE_ID) { 2966 if (sample_type & PERF_SAMPLE_ID) {
2975 u64 id = primary_counter_id(counter); 2967 data->id = primary_counter_id(counter);
2976 2968
2977 perf_output_put(&handle, id); 2969 header->size += sizeof(data->id);
2978 } 2970 }
2979 2971
2980 if (sample_type & PERF_SAMPLE_STREAM_ID) 2972 if (sample_type & PERF_SAMPLE_STREAM_ID) {
2981 perf_output_put(&handle, counter->id); 2973 data->stream_id = counter->id;
2982 2974
2983 if (sample_type & PERF_SAMPLE_CPU) 2975 header->size += sizeof(data->stream_id);
2984 perf_output_put(&handle, cpu_entry); 2976 }
2977
2978 if (sample_type & PERF_SAMPLE_CPU) {
2979 data->cpu_entry.cpu = raw_smp_processor_id();
2980 data->cpu_entry.reserved = 0;
2981
2982 header->size += sizeof(data->cpu_entry);
2983 }
2985 2984
2986 if (sample_type & PERF_SAMPLE_PERIOD) 2985 if (sample_type & PERF_SAMPLE_PERIOD)
2987 perf_output_put(&handle, data->period); 2986 header->size += sizeof(data->period);
2988 2987
2989 if (sample_type & PERF_SAMPLE_READ) 2988 if (sample_type & PERF_SAMPLE_READ)
2990 perf_output_read(&handle, counter); 2989 header->size += perf_counter_read_size(counter);
2991 2990
2992 if (sample_type & PERF_SAMPLE_CALLCHAIN) { 2991 if (sample_type & PERF_SAMPLE_CALLCHAIN) {
2993 if (callchain) 2992 int size = 1;
2994 perf_output_copy(&handle, callchain, callchain_size); 2993
2995 else { 2994 data->callchain = perf_callchain(regs);
2996 u64 nr = 0; 2995
2997 perf_output_put(&handle, nr); 2996 if (data->callchain)
2998 } 2997 size += data->callchain->nr;
2998
2999 header->size += size * sizeof(u64);
2999 } 3000 }
3000 3001
3001 if (sample_type & PERF_SAMPLE_RAW) { 3002 if (sample_type & PERF_SAMPLE_RAW) {
3002 if (data->raw) { 3003 int size = sizeof(u32);
3003 perf_output_put(&handle, data->raw->size); 3004
3004 perf_output_copy(&handle, data->raw->data, data->raw->size); 3005 if (data->raw)
3005 } else { 3006 size += data->raw->size;
3006 struct { 3007 else
3007 u32 size; 3008 size += sizeof(u32);
3008 u32 data; 3009
3009 } raw = { 3010 WARN_ON_ONCE(size & (sizeof(u64)-1));
3010 .size = sizeof(u32), 3011 header->size += size;
3011 .data = 0,
3012 };
3013 perf_output_put(&handle, raw);
3014 }
3015 } 3012 }
3013}
3014
3015static void perf_counter_output(struct perf_counter *counter, int nmi,
3016 struct perf_sample_data *data,
3017 struct pt_regs *regs)
3018{
3019 struct perf_output_handle handle;
3020 struct perf_event_header header;
3021
3022 perf_prepare_sample(&header, data, counter, regs);
3023
3024 if (perf_output_begin(&handle, counter, header.size, nmi, 1))
3025 return;
3026
3027 perf_output_sample(&handle, &header, data, counter);
3016 3028
3017 perf_output_end(&handle); 3029 perf_output_end(&handle);
3018} 3030}
@@ -3071,6 +3083,7 @@ struct perf_task_event {
3071 u32 ppid; 3083 u32 ppid;
3072 u32 tid; 3084 u32 tid;
3073 u32 ptid; 3085 u32 ptid;
3086 u64 time;
3074 } event; 3087 } event;
3075}; 3088};
3076 3089
@@ -3078,9 +3091,12 @@ static void perf_counter_task_output(struct perf_counter *counter,
3078 struct perf_task_event *task_event) 3091 struct perf_task_event *task_event)
3079{ 3092{
3080 struct perf_output_handle handle; 3093 struct perf_output_handle handle;
3081 int size = task_event->event.header.size; 3094 int size;
3082 struct task_struct *task = task_event->task; 3095 struct task_struct *task = task_event->task;
3083 int ret = perf_output_begin(&handle, counter, size, 0, 0); 3096 int ret;
3097
3098 size = task_event->event.header.size;
3099 ret = perf_output_begin(&handle, counter, size, 0, 0);
3084 3100
3085 if (ret) 3101 if (ret)
3086 return; 3102 return;
@@ -3091,7 +3107,10 @@ static void perf_counter_task_output(struct perf_counter *counter,
3091 task_event->event.tid = perf_counter_tid(counter, task); 3107 task_event->event.tid = perf_counter_tid(counter, task);
3092 task_event->event.ptid = perf_counter_tid(counter, current); 3108 task_event->event.ptid = perf_counter_tid(counter, current);
3093 3109
3110 task_event->event.time = perf_clock();
3111
3094 perf_output_put(&handle, task_event->event); 3112 perf_output_put(&handle, task_event->event);
3113
3095 perf_output_end(&handle); 3114 perf_output_end(&handle);
3096} 3115}
3097 3116
@@ -3473,7 +3492,7 @@ static void perf_log_throttle(struct perf_counter *counter, int enable)
3473 .misc = 0, 3492 .misc = 0,
3474 .size = sizeof(throttle_event), 3493 .size = sizeof(throttle_event),
3475 }, 3494 },
3476 .time = sched_clock(), 3495 .time = perf_clock(),
3477 .id = primary_counter_id(counter), 3496 .id = primary_counter_id(counter),
3478 .stream_id = counter->id, 3497 .stream_id = counter->id,
3479 }; 3498 };
@@ -3493,14 +3512,16 @@ static void perf_log_throttle(struct perf_counter *counter, int enable)
3493 * Generic counter overflow handling, sampling. 3512 * Generic counter overflow handling, sampling.
3494 */ 3513 */
3495 3514
3496int perf_counter_overflow(struct perf_counter *counter, int nmi, 3515static int __perf_counter_overflow(struct perf_counter *counter, int nmi,
3497 struct perf_sample_data *data) 3516 int throttle, struct perf_sample_data *data,
3517 struct pt_regs *regs)
3498{ 3518{
3499 int events = atomic_read(&counter->event_limit); 3519 int events = atomic_read(&counter->event_limit);
3500 int throttle = counter->pmu->unthrottle != NULL;
3501 struct hw_perf_counter *hwc = &counter->hw; 3520 struct hw_perf_counter *hwc = &counter->hw;
3502 int ret = 0; 3521 int ret = 0;
3503 3522
3523 throttle = (throttle && counter->pmu->unthrottle != NULL);
3524
3504 if (!throttle) { 3525 if (!throttle) {
3505 hwc->interrupts++; 3526 hwc->interrupts++;
3506 } else { 3527 } else {
@@ -3523,7 +3544,7 @@ int perf_counter_overflow(struct perf_counter *counter, int nmi,
3523 } 3544 }
3524 3545
3525 if (counter->attr.freq) { 3546 if (counter->attr.freq) {
3526 u64 now = sched_clock(); 3547 u64 now = perf_clock();
3527 s64 delta = now - hwc->freq_stamp; 3548 s64 delta = now - hwc->freq_stamp;
3528 3549
3529 hwc->freq_stamp = now; 3550 hwc->freq_stamp = now;
@@ -3549,10 +3570,17 @@ int perf_counter_overflow(struct perf_counter *counter, int nmi,
3549 perf_counter_disable(counter); 3570 perf_counter_disable(counter);
3550 } 3571 }
3551 3572
3552 perf_counter_output(counter, nmi, data); 3573 perf_counter_output(counter, nmi, data, regs);
3553 return ret; 3574 return ret;
3554} 3575}
3555 3576
3577int perf_counter_overflow(struct perf_counter *counter, int nmi,
3578 struct perf_sample_data *data,
3579 struct pt_regs *regs)
3580{
3581 return __perf_counter_overflow(counter, nmi, 1, data, regs);
3582}
3583
3556/* 3584/*
3557 * Generic software counter infrastructure 3585 * Generic software counter infrastructure
3558 */ 3586 */
@@ -3588,9 +3616,11 @@ again:
3588} 3616}
3589 3617
3590static void perf_swcounter_overflow(struct perf_counter *counter, 3618static void perf_swcounter_overflow(struct perf_counter *counter,
3591 int nmi, struct perf_sample_data *data) 3619 int nmi, struct perf_sample_data *data,
3620 struct pt_regs *regs)
3592{ 3621{
3593 struct hw_perf_counter *hwc = &counter->hw; 3622 struct hw_perf_counter *hwc = &counter->hw;
3623 int throttle = 0;
3594 u64 overflow; 3624 u64 overflow;
3595 3625
3596 data->period = counter->hw.last_period; 3626 data->period = counter->hw.last_period;
@@ -3600,13 +3630,15 @@ static void perf_swcounter_overflow(struct perf_counter *counter,
3600 return; 3630 return;
3601 3631
3602 for (; overflow; overflow--) { 3632 for (; overflow; overflow--) {
3603 if (perf_counter_overflow(counter, nmi, data)) { 3633 if (__perf_counter_overflow(counter, nmi, throttle,
3634 data, regs)) {
3604 /* 3635 /*
3605 * We inhibit the overflow from happening when 3636 * We inhibit the overflow from happening when
3606 * hwc->interrupts == MAX_INTERRUPTS. 3637 * hwc->interrupts == MAX_INTERRUPTS.
3607 */ 3638 */
3608 break; 3639 break;
3609 } 3640 }
3641 throttle = 1;
3610 } 3642 }
3611} 3643}
3612 3644
@@ -3618,7 +3650,8 @@ static void perf_swcounter_unthrottle(struct perf_counter *counter)
3618} 3650}
3619 3651
3620static void perf_swcounter_add(struct perf_counter *counter, u64 nr, 3652static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
3621 int nmi, struct perf_sample_data *data) 3653 int nmi, struct perf_sample_data *data,
3654 struct pt_regs *regs)
3622{ 3655{
3623 struct hw_perf_counter *hwc = &counter->hw; 3656 struct hw_perf_counter *hwc = &counter->hw;
3624 3657
@@ -3627,11 +3660,11 @@ static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
3627 if (!hwc->sample_period) 3660 if (!hwc->sample_period)
3628 return; 3661 return;
3629 3662
3630 if (!data->regs) 3663 if (!regs)
3631 return; 3664 return;
3632 3665
3633 if (!atomic64_add_negative(nr, &hwc->period_left)) 3666 if (!atomic64_add_negative(nr, &hwc->period_left))
3634 perf_swcounter_overflow(counter, nmi, data); 3667 perf_swcounter_overflow(counter, nmi, data, regs);
3635} 3668}
3636 3669
3637static int perf_swcounter_is_counting(struct perf_counter *counter) 3670static int perf_swcounter_is_counting(struct perf_counter *counter)
@@ -3690,7 +3723,8 @@ static int perf_swcounter_match(struct perf_counter *counter,
3690static void perf_swcounter_ctx_event(struct perf_counter_context *ctx, 3723static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
3691 enum perf_type_id type, 3724 enum perf_type_id type,
3692 u32 event, u64 nr, int nmi, 3725 u32 event, u64 nr, int nmi,
3693 struct perf_sample_data *data) 3726 struct perf_sample_data *data,
3727 struct pt_regs *regs)
3694{ 3728{
3695 struct perf_counter *counter; 3729 struct perf_counter *counter;
3696 3730
@@ -3699,8 +3733,8 @@ static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
3699 3733
3700 rcu_read_lock(); 3734 rcu_read_lock();
3701 list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { 3735 list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
3702 if (perf_swcounter_match(counter, type, event, data->regs)) 3736 if (perf_swcounter_match(counter, type, event, regs))
3703 perf_swcounter_add(counter, nr, nmi, data); 3737 perf_swcounter_add(counter, nr, nmi, data, regs);
3704 } 3738 }
3705 rcu_read_unlock(); 3739 rcu_read_unlock();
3706} 3740}
@@ -3721,7 +3755,8 @@ static int *perf_swcounter_recursion_context(struct perf_cpu_context *cpuctx)
3721 3755
3722static void do_perf_swcounter_event(enum perf_type_id type, u32 event, 3756static void do_perf_swcounter_event(enum perf_type_id type, u32 event,
3723 u64 nr, int nmi, 3757 u64 nr, int nmi,
3724 struct perf_sample_data *data) 3758 struct perf_sample_data *data,
3759 struct pt_regs *regs)
3725{ 3760{
3726 struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); 3761 struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
3727 int *recursion = perf_swcounter_recursion_context(cpuctx); 3762 int *recursion = perf_swcounter_recursion_context(cpuctx);
@@ -3734,7 +3769,7 @@ static void do_perf_swcounter_event(enum perf_type_id type, u32 event,
3734 barrier(); 3769 barrier();
3735 3770
3736 perf_swcounter_ctx_event(&cpuctx->ctx, type, event, 3771 perf_swcounter_ctx_event(&cpuctx->ctx, type, event,
3737 nr, nmi, data); 3772 nr, nmi, data, regs);
3738 rcu_read_lock(); 3773 rcu_read_lock();
3739 /* 3774 /*
3740 * doesn't really matter which of the child contexts the 3775 * doesn't really matter which of the child contexts the
@@ -3742,7 +3777,7 @@ static void do_perf_swcounter_event(enum perf_type_id type, u32 event,
3742 */ 3777 */
3743 ctx = rcu_dereference(current->perf_counter_ctxp); 3778 ctx = rcu_dereference(current->perf_counter_ctxp);
3744 if (ctx) 3779 if (ctx)
3745 perf_swcounter_ctx_event(ctx, type, event, nr, nmi, data); 3780 perf_swcounter_ctx_event(ctx, type, event, nr, nmi, data, regs);
3746 rcu_read_unlock(); 3781 rcu_read_unlock();
3747 3782
3748 barrier(); 3783 barrier();
@@ -3756,11 +3791,11 @@ void __perf_swcounter_event(u32 event, u64 nr, int nmi,
3756 struct pt_regs *regs, u64 addr) 3791 struct pt_regs *regs, u64 addr)
3757{ 3792{
3758 struct perf_sample_data data = { 3793 struct perf_sample_data data = {
3759 .regs = regs,
3760 .addr = addr, 3794 .addr = addr,
3761 }; 3795 };
3762 3796
3763 do_perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi, &data); 3797 do_perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi,
3798 &data, regs);
3764} 3799}
3765 3800
3766static void perf_swcounter_read(struct perf_counter *counter) 3801static void perf_swcounter_read(struct perf_counter *counter)
@@ -3797,6 +3832,7 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
3797{ 3832{
3798 enum hrtimer_restart ret = HRTIMER_RESTART; 3833 enum hrtimer_restart ret = HRTIMER_RESTART;
3799 struct perf_sample_data data; 3834 struct perf_sample_data data;
3835 struct pt_regs *regs;
3800 struct perf_counter *counter; 3836 struct perf_counter *counter;
3801 u64 period; 3837 u64 period;
3802 3838
@@ -3804,17 +3840,17 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
3804 counter->pmu->read(counter); 3840 counter->pmu->read(counter);
3805 3841
3806 data.addr = 0; 3842 data.addr = 0;
3807 data.regs = get_irq_regs(); 3843 regs = get_irq_regs();
3808 /* 3844 /*
3809 * In case we exclude kernel IPs or are somehow not in interrupt 3845 * In case we exclude kernel IPs or are somehow not in interrupt
3810 * context, provide the next best thing, the user IP. 3846 * context, provide the next best thing, the user IP.
3811 */ 3847 */
3812 if ((counter->attr.exclude_kernel || !data.regs) && 3848 if ((counter->attr.exclude_kernel || !regs) &&
3813 !counter->attr.exclude_user) 3849 !counter->attr.exclude_user)
3814 data.regs = task_pt_regs(current); 3850 regs = task_pt_regs(current);
3815 3851
3816 if (data.regs) { 3852 if (regs) {
3817 if (perf_counter_overflow(counter, 0, &data)) 3853 if (perf_counter_overflow(counter, 0, &data, regs))
3818 ret = HRTIMER_NORESTART; 3854 ret = HRTIMER_NORESTART;
3819 } 3855 }
3820 3856
@@ -3950,15 +3986,17 @@ void perf_tpcounter_event(int event_id, u64 addr, u64 count, void *record,
3950 }; 3986 };
3951 3987
3952 struct perf_sample_data data = { 3988 struct perf_sample_data data = {
3953 .regs = get_irq_regs(),
3954 .addr = addr, 3989 .addr = addr,
3955 .raw = &raw, 3990 .raw = &raw,
3956 }; 3991 };
3957 3992
3958 if (!data.regs) 3993 struct pt_regs *regs = get_irq_regs();
3959 data.regs = task_pt_regs(current); 3994
3995 if (!regs)
3996 regs = task_pt_regs(current);
3960 3997
3961 do_perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, &data); 3998 do_perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, count, 1,
3999 &data, regs);
3962} 4000}
3963EXPORT_SYMBOL_GPL(perf_tpcounter_event); 4001EXPORT_SYMBOL_GPL(perf_tpcounter_event);
3964 4002
@@ -4170,8 +4208,8 @@ done:
4170static int perf_copy_attr(struct perf_counter_attr __user *uattr, 4208static int perf_copy_attr(struct perf_counter_attr __user *uattr,
4171 struct perf_counter_attr *attr) 4209 struct perf_counter_attr *attr)
4172{ 4210{
4173 int ret;
4174 u32 size; 4211 u32 size;
4212 int ret;
4175 4213
4176 if (!access_ok(VERIFY_WRITE, uattr, PERF_ATTR_SIZE_VER0)) 4214 if (!access_ok(VERIFY_WRITE, uattr, PERF_ATTR_SIZE_VER0))
4177 return -EFAULT; 4215 return -EFAULT;
@@ -4196,19 +4234,19 @@ static int perf_copy_attr(struct perf_counter_attr __user *uattr,
4196 4234
4197 /* 4235 /*
4198 * If we're handed a bigger struct than we know of, 4236 * If we're handed a bigger struct than we know of,
4199 * ensure all the unknown bits are 0. 4237 * ensure all the unknown bits are 0 - i.e. new
4238 * user-space does not rely on any kernel feature
4239 * extensions we dont know about yet.
4200 */ 4240 */
4201 if (size > sizeof(*attr)) { 4241 if (size > sizeof(*attr)) {
4202 unsigned long val; 4242 unsigned char __user *addr;
4203 unsigned long __user *addr; 4243 unsigned char __user *end;
4204 unsigned long __user *end; 4244 unsigned char val;
4205 4245
4206 addr = PTR_ALIGN((void __user *)uattr + sizeof(*attr), 4246 addr = (void __user *)uattr + sizeof(*attr);
4207 sizeof(unsigned long)); 4247 end = (void __user *)uattr + size;
4208 end = PTR_ALIGN((void __user *)uattr + size,
4209 sizeof(unsigned long));
4210 4248
4211 for (; addr < end; addr += sizeof(unsigned long)) { 4249 for (; addr < end; addr++) {
4212 ret = get_user(val, addr); 4250 ret = get_user(val, addr);
4213 if (ret) 4251 if (ret)
4214 return ret; 4252 return ret;