aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/perf_event.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/perf_event.c')
-rw-r--r--kernel/perf_event.c94
1 files changed, 28 insertions, 66 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index ff5d430d45a7..8cf737da3ec4 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -2519,8 +2519,6 @@ perf_mmap_data_init(struct perf_event *event, struct perf_mmap_data *data)
2519{ 2519{
2520 long max_size = perf_data_size(data); 2520 long max_size = perf_data_size(data);
2521 2521
2522 atomic_set(&data->lock, -1);
2523
2524 if (event->attr.watermark) { 2522 if (event->attr.watermark) {
2525 data->watermark = min_t(long, max_size, 2523 data->watermark = min_t(long, max_size,
2526 event->attr.wakeup_watermark); 2524 event->attr.wakeup_watermark);
@@ -2906,82 +2904,56 @@ static void perf_output_wakeup(struct perf_output_handle *handle)
2906} 2904}
2907 2905
2908/* 2906/*
2909 * Curious locking construct.
2910 *
2911 * We need to ensure a later event_id doesn't publish a head when a former 2907 * We need to ensure a later event_id doesn't publish a head when a former
2912 * event_id isn't done writing. However since we need to deal with NMIs we 2908 * event isn't done writing. However since we need to deal with NMIs we
2913 * cannot fully serialize things. 2909 * cannot fully serialize things.
2914 * 2910 *
2915 * What we do is serialize between CPUs so we only have to deal with NMI
2916 * nesting on a single CPU.
2917 *
2918 * We only publish the head (and generate a wakeup) when the outer-most 2911 * We only publish the head (and generate a wakeup) when the outer-most
2919 * event_id completes. 2912 * event completes.
2920 */ 2913 */
2921static void perf_output_lock(struct perf_output_handle *handle) 2914static void perf_output_get_handle(struct perf_output_handle *handle)
2922{ 2915{
2923 struct perf_mmap_data *data = handle->data; 2916 struct perf_mmap_data *data = handle->data;
2924 int cur, cpu = get_cpu();
2925 2917
2926 handle->locked = 0; 2918 preempt_disable();
2927 2919 atomic_inc(&data->nest);
2928 for (;;) {
2929 cur = atomic_cmpxchg(&data->lock, -1, cpu);
2930 if (cur == -1) {
2931 handle->locked = 1;
2932 break;
2933 }
2934 if (cur == cpu)
2935 break;
2936
2937 cpu_relax();
2938 }
2939} 2920}
2940 2921
2941static void perf_output_unlock(struct perf_output_handle *handle) 2922static void perf_output_put_handle(struct perf_output_handle *handle)
2942{ 2923{
2943 struct perf_mmap_data *data = handle->data; 2924 struct perf_mmap_data *data = handle->data;
2944 unsigned long head; 2925 unsigned long head;
2945 int cpu;
2946
2947 data->done_head = data->head;
2948
2949 if (!handle->locked)
2950 goto out;
2951 2926
2952again: 2927again:
2953 /* 2928 head = atomic_long_read(&data->head);
2954 * The xchg implies a full barrier that ensures all writes are done
2955 * before we publish the new head, matched by a rmb() in userspace when
2956 * reading this position.
2957 */
2958 while ((head = atomic_long_xchg(&data->done_head, 0)))
2959 data->user_page->data_head = head;
2960 2929
2961 /* 2930 /*
2962 * NMI can happen here, which means we can miss a done_head update. 2931 * IRQ/NMI can happen here, which means we can miss a head update.
2963 */ 2932 */
2964 2933
2965 cpu = atomic_xchg(&data->lock, -1); 2934 if (!atomic_dec_and_test(&data->nest))
2966 WARN_ON_ONCE(cpu != smp_processor_id()); 2935 return;
2967 2936
2968 /* 2937 /*
2969 * Therefore we have to validate we did not indeed do so. 2938 * Publish the known good head. Rely on the full barrier implied
2939 * by atomic_dec_and_test() order the data->head read and this
2940 * write.
2970 */ 2941 */
2971 if (unlikely(atomic_long_read(&data->done_head))) { 2942 data->user_page->data_head = head;
2972 /*
2973 * Since we had it locked, we can lock it again.
2974 */
2975 while (atomic_cmpxchg(&data->lock, -1, cpu) != -1)
2976 cpu_relax();
2977 2943
2944 /*
2945 * Now check if we missed an update, rely on the (compiler)
2946 * barrier in atomic_dec_and_test() to re-read data->head.
2947 */
2948 if (unlikely(head != atomic_long_read(&data->head))) {
2949 atomic_inc(&data->nest);
2978 goto again; 2950 goto again;
2979 } 2951 }
2980 2952
2981 if (atomic_xchg(&data->wakeup, 0)) 2953 if (atomic_xchg(&data->wakeup, 0))
2982 perf_output_wakeup(handle); 2954 perf_output_wakeup(handle);
2983out: 2955
2984 put_cpu(); 2956 preempt_enable();
2985} 2957}
2986 2958
2987void perf_output_copy(struct perf_output_handle *handle, 2959void perf_output_copy(struct perf_output_handle *handle,
@@ -3063,7 +3035,7 @@ int perf_output_begin(struct perf_output_handle *handle,
3063 if (have_lost) 3035 if (have_lost)
3064 size += sizeof(lost_event); 3036 size += sizeof(lost_event);
3065 3037
3066 perf_output_lock(handle); 3038 perf_output_get_handle(handle);
3067 3039
3068 do { 3040 do {
3069 /* 3041 /*
@@ -3083,7 +3055,7 @@ int perf_output_begin(struct perf_output_handle *handle,
3083 handle->head = head; 3055 handle->head = head;
3084 3056
3085 if (head - tail > data->watermark) 3057 if (head - tail > data->watermark)
3086 atomic_set(&data->wakeup, 1); 3058 atomic_inc(&data->wakeup);
3087 3059
3088 if (have_lost) { 3060 if (have_lost) {
3089 lost_event.header.type = PERF_RECORD_LOST; 3061 lost_event.header.type = PERF_RECORD_LOST;
@@ -3099,7 +3071,7 @@ int perf_output_begin(struct perf_output_handle *handle,
3099 3071
3100fail: 3072fail:
3101 atomic_inc(&data->lost); 3073 atomic_inc(&data->lost);
3102 perf_output_unlock(handle); 3074 perf_output_put_handle(handle);
3103out: 3075out:
3104 rcu_read_unlock(); 3076 rcu_read_unlock();
3105 3077
@@ -3117,11 +3089,11 @@ void perf_output_end(struct perf_output_handle *handle)
3117 int events = atomic_inc_return(&data->events); 3089 int events = atomic_inc_return(&data->events);
3118 if (events >= wakeup_events) { 3090 if (events >= wakeup_events) {
3119 atomic_sub(wakeup_events, &data->events); 3091 atomic_sub(wakeup_events, &data->events);
3120 atomic_set(&data->wakeup, 1); 3092 atomic_inc(&data->wakeup);
3121 } 3093 }
3122 } 3094 }
3123 3095
3124 perf_output_unlock(handle); 3096 perf_output_put_handle(handle);
3125 rcu_read_unlock(); 3097 rcu_read_unlock();
3126} 3098}
3127 3099
@@ -3457,22 +3429,13 @@ static void perf_event_task_output(struct perf_event *event,
3457{ 3429{
3458 struct perf_output_handle handle; 3430 struct perf_output_handle handle;
3459 struct task_struct *task = task_event->task; 3431 struct task_struct *task = task_event->task;
3460 unsigned long flags;
3461 int size, ret; 3432 int size, ret;
3462 3433
3463 /*
3464 * If this CPU attempts to acquire an rq lock held by a CPU spinning
3465 * in perf_output_lock() from interrupt context, it's game over.
3466 */
3467 local_irq_save(flags);
3468
3469 size = task_event->event_id.header.size; 3434 size = task_event->event_id.header.size;
3470 ret = perf_output_begin(&handle, event, size, 0, 0); 3435 ret = perf_output_begin(&handle, event, size, 0, 0);
3471 3436
3472 if (ret) { 3437 if (ret)
3473 local_irq_restore(flags);
3474 return; 3438 return;
3475 }
3476 3439
3477 task_event->event_id.pid = perf_event_pid(event, task); 3440 task_event->event_id.pid = perf_event_pid(event, task);
3478 task_event->event_id.ppid = perf_event_pid(event, current); 3441 task_event->event_id.ppid = perf_event_pid(event, current);
@@ -3483,7 +3446,6 @@ static void perf_event_task_output(struct perf_event *event,
3483 perf_output_put(&handle, task_event->event_id); 3446 perf_output_put(&handle, task_event->event_id);
3484 3447
3485 perf_output_end(&handle); 3448 perf_output_end(&handle);
3486 local_irq_restore(flags);
3487} 3449}
3488 3450
3489static int perf_event_task_match(struct perf_event *event) 3451static int perf_event_task_match(struct perf_event *event)