diff options
Diffstat (limited to 'kernel/perf_event.c')
-rw-r--r-- | kernel/perf_event.c | 94 |
1 files changed, 28 insertions, 66 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index ff5d430d45a7..8cf737da3ec4 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
@@ -2519,8 +2519,6 @@ perf_mmap_data_init(struct perf_event *event, struct perf_mmap_data *data) | |||
2519 | { | 2519 | { |
2520 | long max_size = perf_data_size(data); | 2520 | long max_size = perf_data_size(data); |
2521 | 2521 | ||
2522 | atomic_set(&data->lock, -1); | ||
2523 | |||
2524 | if (event->attr.watermark) { | 2522 | if (event->attr.watermark) { |
2525 | data->watermark = min_t(long, max_size, | 2523 | data->watermark = min_t(long, max_size, |
2526 | event->attr.wakeup_watermark); | 2524 | event->attr.wakeup_watermark); |
@@ -2906,82 +2904,56 @@ static void perf_output_wakeup(struct perf_output_handle *handle) | |||
2906 | } | 2904 | } |
2907 | 2905 | ||
2908 | /* | 2906 | /* |
2909 | * Curious locking construct. | ||
2910 | * | ||
2911 | * We need to ensure a later event_id doesn't publish a head when a former | 2907 | * We need to ensure a later event_id doesn't publish a head when a former |
2912 | * event_id isn't done writing. However since we need to deal with NMIs we | 2908 | * event isn't done writing. However since we need to deal with NMIs we |
2913 | * cannot fully serialize things. | 2909 | * cannot fully serialize things. |
2914 | * | 2910 | * |
2915 | * What we do is serialize between CPUs so we only have to deal with NMI | ||
2916 | * nesting on a single CPU. | ||
2917 | * | ||
2918 | * We only publish the head (and generate a wakeup) when the outer-most | 2911 | * We only publish the head (and generate a wakeup) when the outer-most |
2919 | * event_id completes. | 2912 | * event completes. |
2920 | */ | 2913 | */ |
2921 | static void perf_output_lock(struct perf_output_handle *handle) | 2914 | static void perf_output_get_handle(struct perf_output_handle *handle) |
2922 | { | 2915 | { |
2923 | struct perf_mmap_data *data = handle->data; | 2916 | struct perf_mmap_data *data = handle->data; |
2924 | int cur, cpu = get_cpu(); | ||
2925 | 2917 | ||
2926 | handle->locked = 0; | 2918 | preempt_disable(); |
2927 | 2919 | atomic_inc(&data->nest); | |
2928 | for (;;) { | ||
2929 | cur = atomic_cmpxchg(&data->lock, -1, cpu); | ||
2930 | if (cur == -1) { | ||
2931 | handle->locked = 1; | ||
2932 | break; | ||
2933 | } | ||
2934 | if (cur == cpu) | ||
2935 | break; | ||
2936 | |||
2937 | cpu_relax(); | ||
2938 | } | ||
2939 | } | 2920 | } |
2940 | 2921 | ||
2941 | static void perf_output_unlock(struct perf_output_handle *handle) | 2922 | static void perf_output_put_handle(struct perf_output_handle *handle) |
2942 | { | 2923 | { |
2943 | struct perf_mmap_data *data = handle->data; | 2924 | struct perf_mmap_data *data = handle->data; |
2944 | unsigned long head; | 2925 | unsigned long head; |
2945 | int cpu; | ||
2946 | |||
2947 | data->done_head = data->head; | ||
2948 | |||
2949 | if (!handle->locked) | ||
2950 | goto out; | ||
2951 | 2926 | ||
2952 | again: | 2927 | again: |
2953 | /* | 2928 | head = atomic_long_read(&data->head); |
2954 | * The xchg implies a full barrier that ensures all writes are done | ||
2955 | * before we publish the new head, matched by a rmb() in userspace when | ||
2956 | * reading this position. | ||
2957 | */ | ||
2958 | while ((head = atomic_long_xchg(&data->done_head, 0))) | ||
2959 | data->user_page->data_head = head; | ||
2960 | 2929 | ||
2961 | /* | 2930 | /* |
2962 | * NMI can happen here, which means we can miss a done_head update. | 2931 | * IRQ/NMI can happen here, which means we can miss a head update. |
2963 | */ | 2932 | */ |
2964 | 2933 | ||
2965 | cpu = atomic_xchg(&data->lock, -1); | 2934 | if (!atomic_dec_and_test(&data->nest)) |
2966 | WARN_ON_ONCE(cpu != smp_processor_id()); | 2935 | return; |
2967 | 2936 | ||
2968 | /* | 2937 | /* |
2969 | * Therefore we have to validate we did not indeed do so. | 2938 | * Publish the known good head. Rely on the full barrier implied |
2939 | * by atomic_dec_and_test() order the data->head read and this | ||
2940 | * write. | ||
2970 | */ | 2941 | */ |
2971 | if (unlikely(atomic_long_read(&data->done_head))) { | 2942 | data->user_page->data_head = head; |
2972 | /* | ||
2973 | * Since we had it locked, we can lock it again. | ||
2974 | */ | ||
2975 | while (atomic_cmpxchg(&data->lock, -1, cpu) != -1) | ||
2976 | cpu_relax(); | ||
2977 | 2943 | ||
2944 | /* | ||
2945 | * Now check if we missed an update, rely on the (compiler) | ||
2946 | * barrier in atomic_dec_and_test() to re-read data->head. | ||
2947 | */ | ||
2948 | if (unlikely(head != atomic_long_read(&data->head))) { | ||
2949 | atomic_inc(&data->nest); | ||
2978 | goto again; | 2950 | goto again; |
2979 | } | 2951 | } |
2980 | 2952 | ||
2981 | if (atomic_xchg(&data->wakeup, 0)) | 2953 | if (atomic_xchg(&data->wakeup, 0)) |
2982 | perf_output_wakeup(handle); | 2954 | perf_output_wakeup(handle); |
2983 | out: | 2955 | |
2984 | put_cpu(); | 2956 | preempt_enable(); |
2985 | } | 2957 | } |
2986 | 2958 | ||
2987 | void perf_output_copy(struct perf_output_handle *handle, | 2959 | void perf_output_copy(struct perf_output_handle *handle, |
@@ -3063,7 +3035,7 @@ int perf_output_begin(struct perf_output_handle *handle, | |||
3063 | if (have_lost) | 3035 | if (have_lost) |
3064 | size += sizeof(lost_event); | 3036 | size += sizeof(lost_event); |
3065 | 3037 | ||
3066 | perf_output_lock(handle); | 3038 | perf_output_get_handle(handle); |
3067 | 3039 | ||
3068 | do { | 3040 | do { |
3069 | /* | 3041 | /* |
@@ -3083,7 +3055,7 @@ int perf_output_begin(struct perf_output_handle *handle, | |||
3083 | handle->head = head; | 3055 | handle->head = head; |
3084 | 3056 | ||
3085 | if (head - tail > data->watermark) | 3057 | if (head - tail > data->watermark) |
3086 | atomic_set(&data->wakeup, 1); | 3058 | atomic_inc(&data->wakeup); |
3087 | 3059 | ||
3088 | if (have_lost) { | 3060 | if (have_lost) { |
3089 | lost_event.header.type = PERF_RECORD_LOST; | 3061 | lost_event.header.type = PERF_RECORD_LOST; |
@@ -3099,7 +3071,7 @@ int perf_output_begin(struct perf_output_handle *handle, | |||
3099 | 3071 | ||
3100 | fail: | 3072 | fail: |
3101 | atomic_inc(&data->lost); | 3073 | atomic_inc(&data->lost); |
3102 | perf_output_unlock(handle); | 3074 | perf_output_put_handle(handle); |
3103 | out: | 3075 | out: |
3104 | rcu_read_unlock(); | 3076 | rcu_read_unlock(); |
3105 | 3077 | ||
@@ -3117,11 +3089,11 @@ void perf_output_end(struct perf_output_handle *handle) | |||
3117 | int events = atomic_inc_return(&data->events); | 3089 | int events = atomic_inc_return(&data->events); |
3118 | if (events >= wakeup_events) { | 3090 | if (events >= wakeup_events) { |
3119 | atomic_sub(wakeup_events, &data->events); | 3091 | atomic_sub(wakeup_events, &data->events); |
3120 | atomic_set(&data->wakeup, 1); | 3092 | atomic_inc(&data->wakeup); |
3121 | } | 3093 | } |
3122 | } | 3094 | } |
3123 | 3095 | ||
3124 | perf_output_unlock(handle); | 3096 | perf_output_put_handle(handle); |
3125 | rcu_read_unlock(); | 3097 | rcu_read_unlock(); |
3126 | } | 3098 | } |
3127 | 3099 | ||
@@ -3457,22 +3429,13 @@ static void perf_event_task_output(struct perf_event *event, | |||
3457 | { | 3429 | { |
3458 | struct perf_output_handle handle; | 3430 | struct perf_output_handle handle; |
3459 | struct task_struct *task = task_event->task; | 3431 | struct task_struct *task = task_event->task; |
3460 | unsigned long flags; | ||
3461 | int size, ret; | 3432 | int size, ret; |
3462 | 3433 | ||
3463 | /* | ||
3464 | * If this CPU attempts to acquire an rq lock held by a CPU spinning | ||
3465 | * in perf_output_lock() from interrupt context, it's game over. | ||
3466 | */ | ||
3467 | local_irq_save(flags); | ||
3468 | |||
3469 | size = task_event->event_id.header.size; | 3434 | size = task_event->event_id.header.size; |
3470 | ret = perf_output_begin(&handle, event, size, 0, 0); | 3435 | ret = perf_output_begin(&handle, event, size, 0, 0); |
3471 | 3436 | ||
3472 | if (ret) { | 3437 | if (ret) |
3473 | local_irq_restore(flags); | ||
3474 | return; | 3438 | return; |
3475 | } | ||
3476 | 3439 | ||
3477 | task_event->event_id.pid = perf_event_pid(event, task); | 3440 | task_event->event_id.pid = perf_event_pid(event, task); |
3478 | task_event->event_id.ppid = perf_event_pid(event, current); | 3441 | task_event->event_id.ppid = perf_event_pid(event, current); |
@@ -3483,7 +3446,6 @@ static void perf_event_task_output(struct perf_event *event, | |||
3483 | perf_output_put(&handle, task_event->event_id); | 3446 | perf_output_put(&handle, task_event->event_id); |
3484 | 3447 | ||
3485 | perf_output_end(&handle); | 3448 | perf_output_end(&handle); |
3486 | local_irq_restore(flags); | ||
3487 | } | 3449 | } |
3488 | 3450 | ||
3489 | static int perf_event_task_match(struct perf_event *event) | 3451 | static int perf_event_task_match(struct perf_event *event) |