diff options
| -rw-r--r-- | include/linux/perf_event.h | 5 | ||||
| -rw-r--r-- | kernel/perf_event.c | 94 |
2 files changed, 30 insertions, 69 deletions
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 0b521fc8f5b0..f1f853a9d5eb 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h | |||
| @@ -597,12 +597,12 @@ struct perf_mmap_data { | |||
| 597 | atomic_t events; /* event_id limit */ | 597 | atomic_t events; /* event_id limit */ |
| 598 | 598 | ||
| 599 | atomic_long_t head; /* write position */ | 599 | atomic_long_t head; /* write position */ |
| 600 | atomic_long_t done_head; /* completed head */ | ||
| 601 | 600 | ||
| 602 | atomic_t lock; /* concurrent writes */ | ||
| 603 | atomic_t wakeup; /* needs a wakeup */ | 601 | atomic_t wakeup; /* needs a wakeup */ |
| 604 | atomic_t lost; /* nr records lost */ | 602 | atomic_t lost; /* nr records lost */ |
| 605 | 603 | ||
| 604 | atomic_t nest; /* nested writers */ | ||
| 605 | |||
| 606 | long watermark; /* wakeup watermark */ | 606 | long watermark; /* wakeup watermark */ |
| 607 | 607 | ||
| 608 | struct perf_event_mmap_page *user_page; | 608 | struct perf_event_mmap_page *user_page; |
| @@ -807,7 +807,6 @@ struct perf_output_handle { | |||
| 807 | unsigned long offset; | 807 | unsigned long offset; |
| 808 | int nmi; | 808 | int nmi; |
| 809 | int sample; | 809 | int sample; |
| 810 | int locked; | ||
| 811 | }; | 810 | }; |
| 812 | 811 | ||
| 813 | #ifdef CONFIG_PERF_EVENTS | 812 | #ifdef CONFIG_PERF_EVENTS |
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index ff5d430d45a7..8cf737da3ec4 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
| @@ -2519,8 +2519,6 @@ perf_mmap_data_init(struct perf_event *event, struct perf_mmap_data *data) | |||
| 2519 | { | 2519 | { |
| 2520 | long max_size = perf_data_size(data); | 2520 | long max_size = perf_data_size(data); |
| 2521 | 2521 | ||
| 2522 | atomic_set(&data->lock, -1); | ||
| 2523 | |||
| 2524 | if (event->attr.watermark) { | 2522 | if (event->attr.watermark) { |
| 2525 | data->watermark = min_t(long, max_size, | 2523 | data->watermark = min_t(long, max_size, |
| 2526 | event->attr.wakeup_watermark); | 2524 | event->attr.wakeup_watermark); |
| @@ -2906,82 +2904,56 @@ static void perf_output_wakeup(struct perf_output_handle *handle) | |||
| 2906 | } | 2904 | } |
| 2907 | 2905 | ||
| 2908 | /* | 2906 | /* |
| 2909 | * Curious locking construct. | ||
| 2910 | * | ||
| 2911 | * We need to ensure a later event_id doesn't publish a head when a former | 2907 | * We need to ensure a later event_id doesn't publish a head when a former |
| 2912 | * event_id isn't done writing. However since we need to deal with NMIs we | 2908 | * event isn't done writing. However since we need to deal with NMIs we |
| 2913 | * cannot fully serialize things. | 2909 | * cannot fully serialize things. |
| 2914 | * | 2910 | * |
| 2915 | * What we do is serialize between CPUs so we only have to deal with NMI | ||
| 2916 | * nesting on a single CPU. | ||
| 2917 | * | ||
| 2918 | * We only publish the head (and generate a wakeup) when the outer-most | 2911 | * We only publish the head (and generate a wakeup) when the outer-most |
| 2919 | * event_id completes. | 2912 | * event completes. |
| 2920 | */ | 2913 | */ |
| 2921 | static void perf_output_lock(struct perf_output_handle *handle) | 2914 | static void perf_output_get_handle(struct perf_output_handle *handle) |
| 2922 | { | 2915 | { |
| 2923 | struct perf_mmap_data *data = handle->data; | 2916 | struct perf_mmap_data *data = handle->data; |
| 2924 | int cur, cpu = get_cpu(); | ||
| 2925 | 2917 | ||
| 2926 | handle->locked = 0; | 2918 | preempt_disable(); |
| 2927 | 2919 | atomic_inc(&data->nest); | |
| 2928 | for (;;) { | ||
| 2929 | cur = atomic_cmpxchg(&data->lock, -1, cpu); | ||
| 2930 | if (cur == -1) { | ||
| 2931 | handle->locked = 1; | ||
| 2932 | break; | ||
| 2933 | } | ||
| 2934 | if (cur == cpu) | ||
| 2935 | break; | ||
| 2936 | |||
| 2937 | cpu_relax(); | ||
| 2938 | } | ||
| 2939 | } | 2920 | } |
| 2940 | 2921 | ||
| 2941 | static void perf_output_unlock(struct perf_output_handle *handle) | 2922 | static void perf_output_put_handle(struct perf_output_handle *handle) |
| 2942 | { | 2923 | { |
| 2943 | struct perf_mmap_data *data = handle->data; | 2924 | struct perf_mmap_data *data = handle->data; |
| 2944 | unsigned long head; | 2925 | unsigned long head; |
| 2945 | int cpu; | ||
| 2946 | |||
| 2947 | data->done_head = data->head; | ||
| 2948 | |||
| 2949 | if (!handle->locked) | ||
| 2950 | goto out; | ||
| 2951 | 2926 | ||
| 2952 | again: | 2927 | again: |
| 2953 | /* | 2928 | head = atomic_long_read(&data->head); |
| 2954 | * The xchg implies a full barrier that ensures all writes are done | ||
| 2955 | * before we publish the new head, matched by a rmb() in userspace when | ||
| 2956 | * reading this position. | ||
| 2957 | */ | ||
| 2958 | while ((head = atomic_long_xchg(&data->done_head, 0))) | ||
| 2959 | data->user_page->data_head = head; | ||
| 2960 | 2929 | ||
| 2961 | /* | 2930 | /* |
| 2962 | * NMI can happen here, which means we can miss a done_head update. | 2931 | * IRQ/NMI can happen here, which means we can miss a head update. |
| 2963 | */ | 2932 | */ |
| 2964 | 2933 | ||
| 2965 | cpu = atomic_xchg(&data->lock, -1); | 2934 | if (!atomic_dec_and_test(&data->nest)) |
| 2966 | WARN_ON_ONCE(cpu != smp_processor_id()); | 2935 | return; |
| 2967 | 2936 | ||
| 2968 | /* | 2937 | /* |
| 2969 | * Therefore we have to validate we did not indeed do so. | 2938 | * Publish the known good head. Rely on the full barrier implied |
| 2939 | * by atomic_dec_and_test() order the data->head read and this | ||
| 2940 | * write. | ||
| 2970 | */ | 2941 | */ |
| 2971 | if (unlikely(atomic_long_read(&data->done_head))) { | 2942 | data->user_page->data_head = head; |
| 2972 | /* | ||
| 2973 | * Since we had it locked, we can lock it again. | ||
| 2974 | */ | ||
| 2975 | while (atomic_cmpxchg(&data->lock, -1, cpu) != -1) | ||
| 2976 | cpu_relax(); | ||
| 2977 | 2943 | ||
| 2944 | /* | ||
| 2945 | * Now check if we missed an update, rely on the (compiler) | ||
| 2946 | * barrier in atomic_dec_and_test() to re-read data->head. | ||
| 2947 | */ | ||
| 2948 | if (unlikely(head != atomic_long_read(&data->head))) { | ||
| 2949 | atomic_inc(&data->nest); | ||
| 2978 | goto again; | 2950 | goto again; |
| 2979 | } | 2951 | } |
| 2980 | 2952 | ||
| 2981 | if (atomic_xchg(&data->wakeup, 0)) | 2953 | if (atomic_xchg(&data->wakeup, 0)) |
| 2982 | perf_output_wakeup(handle); | 2954 | perf_output_wakeup(handle); |
| 2983 | out: | 2955 | |
| 2984 | put_cpu(); | 2956 | preempt_enable(); |
| 2985 | } | 2957 | } |
| 2986 | 2958 | ||
| 2987 | void perf_output_copy(struct perf_output_handle *handle, | 2959 | void perf_output_copy(struct perf_output_handle *handle, |
| @@ -3063,7 +3035,7 @@ int perf_output_begin(struct perf_output_handle *handle, | |||
| 3063 | if (have_lost) | 3035 | if (have_lost) |
| 3064 | size += sizeof(lost_event); | 3036 | size += sizeof(lost_event); |
| 3065 | 3037 | ||
| 3066 | perf_output_lock(handle); | 3038 | perf_output_get_handle(handle); |
| 3067 | 3039 | ||
| 3068 | do { | 3040 | do { |
| 3069 | /* | 3041 | /* |
| @@ -3083,7 +3055,7 @@ int perf_output_begin(struct perf_output_handle *handle, | |||
| 3083 | handle->head = head; | 3055 | handle->head = head; |
| 3084 | 3056 | ||
| 3085 | if (head - tail > data->watermark) | 3057 | if (head - tail > data->watermark) |
| 3086 | atomic_set(&data->wakeup, 1); | 3058 | atomic_inc(&data->wakeup); |
| 3087 | 3059 | ||
| 3088 | if (have_lost) { | 3060 | if (have_lost) { |
| 3089 | lost_event.header.type = PERF_RECORD_LOST; | 3061 | lost_event.header.type = PERF_RECORD_LOST; |
| @@ -3099,7 +3071,7 @@ int perf_output_begin(struct perf_output_handle *handle, | |||
| 3099 | 3071 | ||
| 3100 | fail: | 3072 | fail: |
| 3101 | atomic_inc(&data->lost); | 3073 | atomic_inc(&data->lost); |
| 3102 | perf_output_unlock(handle); | 3074 | perf_output_put_handle(handle); |
| 3103 | out: | 3075 | out: |
| 3104 | rcu_read_unlock(); | 3076 | rcu_read_unlock(); |
| 3105 | 3077 | ||
| @@ -3117,11 +3089,11 @@ void perf_output_end(struct perf_output_handle *handle) | |||
| 3117 | int events = atomic_inc_return(&data->events); | 3089 | int events = atomic_inc_return(&data->events); |
| 3118 | if (events >= wakeup_events) { | 3090 | if (events >= wakeup_events) { |
| 3119 | atomic_sub(wakeup_events, &data->events); | 3091 | atomic_sub(wakeup_events, &data->events); |
| 3120 | atomic_set(&data->wakeup, 1); | 3092 | atomic_inc(&data->wakeup); |
| 3121 | } | 3093 | } |
| 3122 | } | 3094 | } |
| 3123 | 3095 | ||
| 3124 | perf_output_unlock(handle); | 3096 | perf_output_put_handle(handle); |
| 3125 | rcu_read_unlock(); | 3097 | rcu_read_unlock(); |
| 3126 | } | 3098 | } |
| 3127 | 3099 | ||
| @@ -3457,22 +3429,13 @@ static void perf_event_task_output(struct perf_event *event, | |||
| 3457 | { | 3429 | { |
| 3458 | struct perf_output_handle handle; | 3430 | struct perf_output_handle handle; |
| 3459 | struct task_struct *task = task_event->task; | 3431 | struct task_struct *task = task_event->task; |
| 3460 | unsigned long flags; | ||
| 3461 | int size, ret; | 3432 | int size, ret; |
| 3462 | 3433 | ||
| 3463 | /* | ||
| 3464 | * If this CPU attempts to acquire an rq lock held by a CPU spinning | ||
| 3465 | * in perf_output_lock() from interrupt context, it's game over. | ||
| 3466 | */ | ||
| 3467 | local_irq_save(flags); | ||
| 3468 | |||
| 3469 | size = task_event->event_id.header.size; | 3434 | size = task_event->event_id.header.size; |
| 3470 | ret = perf_output_begin(&handle, event, size, 0, 0); | 3435 | ret = perf_output_begin(&handle, event, size, 0, 0); |
| 3471 | 3436 | ||
| 3472 | if (ret) { | 3437 | if (ret) |
| 3473 | local_irq_restore(flags); | ||
| 3474 | return; | 3438 | return; |
| 3475 | } | ||
| 3476 | 3439 | ||
| 3477 | task_event->event_id.pid = perf_event_pid(event, task); | 3440 | task_event->event_id.pid = perf_event_pid(event, task); |
| 3478 | task_event->event_id.ppid = perf_event_pid(event, current); | 3441 | task_event->event_id.ppid = perf_event_pid(event, current); |
| @@ -3483,7 +3446,6 @@ static void perf_event_task_output(struct perf_event *event, | |||
| 3483 | perf_output_put(&handle, task_event->event_id); | 3446 | perf_output_put(&handle, task_event->event_id); |
| 3484 | 3447 | ||
| 3485 | perf_output_end(&handle); | 3448 | perf_output_end(&handle); |
| 3486 | local_irq_restore(flags); | ||
| 3487 | } | 3449 | } |
| 3488 | 3450 | ||
| 3489 | static int perf_event_task_match(struct perf_event *event) | 3451 | static int perf_event_task_match(struct perf_event *event) |
