perf: Optimize the perf_output() path by removing IRQ-disables

Since we can now assume there is only a single writer to each buffer, we can remove per-cpu lock thingy and use a simply nest-count to the same effect. This removes the need to disable IRQs. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
author: Peter Zijlstra <a.p.zijlstra@chello.nl> 2010-05-18 04:50:41 -0400
committer: Ingo Molnar <mingo@elte.hu> 2010-05-18 12:35:48 -0400
commit: ef60777c9abd999db5eb4e338aae3eb593ae8e10 (patch)
tree: 2dee468a922ebea2241d1c1ec10e581d62bf2db6 /kernel
parent: c7920614cebbf269a7c8397ff959a8dcf727465c (diff)
1 files changed, 28 insertions, 66 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index ff5d430d45a7..8cf737da3ec4 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -2519,8 +2519,6 @@ perf_mmap_data_init(struct perf_event *event, struct perf_mmap_data *data)
 {
        long max_size = perf_data_size(data);
-        atomic_set(&data->lock, -1);
        if (event->attr.watermark) {
                data->watermark = min_t(long, max_size,
                                        event->attr.wakeup_watermark);
@@ -2906,82 +2904,56 @@ static void perf_output_wakeup(struct perf_output_handle *handle)
 }
 /*
- * Curious locking construct.
- *
 * We need to ensure a later event_id doesn't publish a head when a former
- * event_id isn't done writing. However since we need to deal with NMIs we
+ * event isn't done writing. However since we need to deal with NMIs we
 * cannot fully serialize things.
 *
- * What we do is serialize between CPUs so we only have to deal with NMI
- * nesting on a single CPU.
- *
 * We only publish the head (and generate a wakeup) when the outer-most
- * event_id completes.
+ * event completes.
 */
-static void perf_output_lock(struct perf_output_handle *handle)
+static void perf_output_get_handle(struct perf_output_handle *handle)
 {
        struct perf_mmap_data *data = handle->data;
-        int cur, cpu = get_cpu();
-        handle->locked = 0;
+        preempt_disable();
+        atomic_inc(&data->nest);
-        for (;;) {
-                cur = atomic_cmpxchg(&data->lock, -1, cpu);
-                if (cur == -1) {
-                        handle->locked = 1;
-                        break;
-                }
-                if (cur == cpu)
-                        break;
-                cpu_relax();
-        }
 }
-static void perf_output_unlock(struct perf_output_handle *handle)
+static void perf_output_put_handle(struct perf_output_handle *handle)
 {
        struct perf_mmap_data *data = handle->data;
        unsigned long head;
-        int cpu;
-        data->done_head = data->head;
-        if (!handle->locked)
-                goto out;
 again:
-        /*
+        head = atomic_long_read(&data->head);
-         * The xchg implies a full barrier that ensures all writes are done
-         * before we publish the new head, matched by a rmb() in userspace when
-         * reading this position.
-         */
-        while ((head = atomic_long_xchg(&data->done_head, 0)))
-                data->user_page->data_head = head;
        /*
-         * NMI can happen here, which means we can miss a done_head update.
+         * IRQ/NMI can happen here, which means we can miss a head update.
         */
-        cpu = atomic_xchg(&data->lock, -1);
+        if (!atomic_dec_and_test(&data->nest))
-        WARN_ON_ONCE(cpu != smp_processor_id());
+                return;
        /*
-         * Therefore we have to validate we did not indeed do so.
+         * Publish the known good head. Rely on the full barrier implied
+         * by atomic_dec_and_test() order the data->head read and this
+         * write.
         */
-        if (unlikely(atomic_long_read(&data->done_head))) {
+        data->user_page->data_head = head;
-                /*
-                 * Since we had it locked, we can lock it again.
-                 */
-                while (atomic_cmpxchg(&data->lock, -1, cpu) != -1)
-                        cpu_relax();
+        /*
+         * Now check if we missed an update, rely on the (compiler)
+         * barrier in atomic_dec_and_test() to re-read data->head.
+         */
+        if (unlikely(head != atomic_long_read(&data->head))) {
+                atomic_inc(&data->nest);
                goto again;
        }
        if (atomic_xchg(&data->wakeup, 0))
                perf_output_wakeup(handle);
-out:
-        put_cpu();
+        preempt_enable();
 }
 void perf_output_copy(struct perf_output_handle *handle,
@@ -3063,7 +3035,7 @@ int perf_output_begin(struct perf_output_handle *handle,
        if (have_lost)
                size += sizeof(lost_event);
-        perf_output_lock(handle);
+        perf_output_get_handle(handle);
        do {
                /*
@@ -3083,7 +3055,7 @@ int perf_output_begin(struct perf_output_handle *handle,
        handle->head    = head;
        if (head - tail > data->watermark)
-                atomic_set(&data->wakeup, 1);
+                atomic_inc(&data->wakeup);
        if (have_lost) {
                lost_event.header.type = PERF_RECORD_LOST;
@@ -3099,7 +3071,7 @@ int perf_output_begin(struct perf_output_handle *handle,
 fail:
        atomic_inc(&data->lost);
-        perf_output_unlock(handle);
+        perf_output_put_handle(handle);
 out:
        rcu_read_unlock();
@@ -3117,11 +3089,11 @@ void perf_output_end(struct perf_output_handle *handle)
                int events = atomic_inc_return(&data->events);
                if (events >= wakeup_events) {
                        atomic_sub(wakeup_events, &data->events);
-                        atomic_set(&data->wakeup, 1);
+                        atomic_inc(&data->wakeup);
                }
        }
-        perf_output_unlock(handle);
+        perf_output_put_handle(handle);
        rcu_read_unlock();
 }
@@ -3457,22 +3429,13 @@ static void perf_event_task_output(struct perf_event *event,
 {
        struct perf_output_handle handle;
        struct task_struct *task = task_event->task;
-        unsigned long flags;
        int size, ret;
-        /*
-         * If this CPU attempts to acquire an rq lock held by a CPU spinning
-         * in perf_output_lock() from interrupt context, it's game over.
-         */
-        local_irq_save(flags);
        size  = task_event->event_id.header.size;
        ret = perf_output_begin(&handle, event, size, 0, 0);
-        if (ret) {
+        if (ret)
-                local_irq_restore(flags);
                return;
-        }
        task_event->event_id.pid = perf_event_pid(event, task);
        task_event->event_id.ppid = perf_event_pid(event, current);
@@ -3483,7 +3446,6 @@ static void perf_event_task_output(struct perf_event *event,
        perf_output_put(&handle, task_event->event_id);
        perf_output_end(&handle);
-        local_irq_restore(flags);
 }
 static int perf_event_task_match(struct perf_event *event)
author	Peter Zijlstra <a.p.zijlstra@chello.nl>	2010-05-18 04:50:41 -0400
committer	Ingo Molnar <mingo@elte.hu>	2010-05-18 12:35:48 -0400
commit	ef60777c9abd999db5eb4e338aae3eb593ae8e10 (patch)
tree	2dee468a922ebea2241d1c1ec10e581d62bf2db6 /kernel
parent	c7920614cebbf269a7c8397ff959a8dcf727465c (diff)

diff --git a/kernel/perf_event.c b/kernel/perf_event.c index ff5d430d45a7..8cf737da3ec4 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c
@@ -2519,8 +2519,6 @@ perf_mmap_data_init(struct perf_event event, struct perf_mmap_data data)
2519	{	2519	{
2520	long max_size = perf_data_size(data);	2520	long max_size = perf_data_size(data);
2521		2521
2522	atomic_set(&data->lock, -1);
2523
2524	if (event->attr.watermark) {	2522	if (event->attr.watermark) {
2525	data->watermark = min_t(long, max_size,	2523	data->watermark = min_t(long, max_size,
2526	event->attr.wakeup_watermark);	2524	event->attr.wakeup_watermark);
@@ -2906,82 +2904,56 @@ static void perf_output_wakeup(struct perf_output_handle *handle)
2906	}	2904	}
2907		2905
2908	/*	2906	/*
2909	* Curious locking construct.
2910	*
2911	* We need to ensure a later event_id doesn't publish a head when a former	2907	* We need to ensure a later event_id doesn't publish a head when a former
2912	* event_id isn't done writing. However since we need to deal with NMIs we	2908	* event isn't done writing. However since we need to deal with NMIs we
2913	* cannot fully serialize things.	2909	* cannot fully serialize things.
2914	*	2910	*
2915	* What we do is serialize between CPUs so we only have to deal with NMI
2916	* nesting on a single CPU.
2917	*
2918	* We only publish the head (and generate a wakeup) when the outer-most	2911	* We only publish the head (and generate a wakeup) when the outer-most
2919	* event_id completes.	2912	* event completes.
2920	*/	2913	*/
2921	static void perf_output_lock(struct perf_output_handle *handle)	2914	static void perf_output_get_handle(struct perf_output_handle *handle)
2922	{	2915	{
2923	struct perf_mmap_data *data = handle->data;	2916	struct perf_mmap_data *data = handle->data;
2924	int cur, cpu = get_cpu();
2925		2917
2926	handle->locked = 0;	2918	preempt_disable();
2927		2919	atomic_inc(&data->nest);
2928	for (;;) {
2929	cur = atomic_cmpxchg(&data->lock, -1, cpu);
2930	if (cur == -1) {
2931	handle->locked = 1;
2932	break;
2933	}
2934	if (cur == cpu)
2935	break;
2936
2937	cpu_relax();
2938	}
2939	}	2920	}
2940		2921
2941	static void perf_output_unlock(struct perf_output_handle *handle)	2922	static void perf_output_put_handle(struct perf_output_handle *handle)
2942	{	2923	{
2943	struct perf_mmap_data *data = handle->data;	2924	struct perf_mmap_data *data = handle->data;
2944	unsigned long head;	2925	unsigned long head;
2945	int cpu;
2946
2947	data->done_head = data->head;
2948
2949	if (!handle->locked)
2950	goto out;
2951		2926
2952	again:	2927	again:
2953	/*	2928	head = atomic_long_read(&data->head);
2954	* The xchg implies a full barrier that ensures all writes are done
2955	* before we publish the new head, matched by a rmb() in userspace when
2956	* reading this position.
2957	*/
2958	while ((head = atomic_long_xchg(&data->done_head, 0)))
2959	data->user_page->data_head = head;
2960		2929
2961	/*	2930	/*
2962	* NMI can happen here, which means we can miss a done_head update.	2931	* IRQ/NMI can happen here, which means we can miss a head update.
2963	*/	2932	*/
2964		2933
2965	cpu = atomic_xchg(&data->lock, -1);	2934	if (!atomic_dec_and_test(&data->nest))
2966	WARN_ON_ONCE(cpu != smp_processor_id());	2935	return;
2967		2936
2968	/*	2937	/*
2969	* Therefore we have to validate we did not indeed do so.	2938	* Publish the known good head. Rely on the full barrier implied
		2939	* by atomic_dec_and_test() order the data->head read and this
		2940	* write.
2970	*/	2941	*/
2971	if (unlikely(atomic_long_read(&data->done_head))) {	2942	data->user_page->data_head = head;
2972	/*
2973	* Since we had it locked, we can lock it again.
2974	*/
2975	while (atomic_cmpxchg(&data->lock, -1, cpu) != -1)
2976	cpu_relax();
2977		2943
		2944	/*
		2945	* Now check if we missed an update, rely on the (compiler)
		2946	* barrier in atomic_dec_and_test() to re-read data->head.
		2947	*/
		2948	if (unlikely(head != atomic_long_read(&data->head))) {
		2949	atomic_inc(&data->nest);
2978	goto again;	2950	goto again;
2979	}	2951	}
2980		2952
2981	if (atomic_xchg(&data->wakeup, 0))	2953	if (atomic_xchg(&data->wakeup, 0))
2982	perf_output_wakeup(handle);	2954	perf_output_wakeup(handle);
2983	out:	2955
2984	put_cpu();	2956	preempt_enable();
2985	}	2957	}
2986		2958
2987	void perf_output_copy(struct perf_output_handle *handle,	2959	void perf_output_copy(struct perf_output_handle *handle,
@@ -3063,7 +3035,7 @@ int perf_output_begin(struct perf_output_handle *handle,
3063	if (have_lost)	3035	if (have_lost)
3064	size += sizeof(lost_event);	3036	size += sizeof(lost_event);
3065		3037
3066	perf_output_lock(handle);	3038	perf_output_get_handle(handle);
3067		3039
3068	do {	3040	do {
3069	/*	3041	/*
@@ -3083,7 +3055,7 @@ int perf_output_begin(struct perf_output_handle *handle,
3083	handle->head = head;	3055	handle->head = head;
3084		3056
3085	if (head - tail > data->watermark)	3057	if (head - tail > data->watermark)
3086	atomic_set(&data->wakeup, 1);	3058	atomic_inc(&data->wakeup);
3087		3059
3088	if (have_lost) {	3060	if (have_lost) {
3089	lost_event.header.type = PERF_RECORD_LOST;	3061	lost_event.header.type = PERF_RECORD_LOST;
@@ -3099,7 +3071,7 @@ int perf_output_begin(struct perf_output_handle *handle,
3099		3071
3100	fail:	3072	fail:
3101	atomic_inc(&data->lost);	3073	atomic_inc(&data->lost);
3102	perf_output_unlock(handle);	3074	perf_output_put_handle(handle);
3103	out:	3075	out:
3104	rcu_read_unlock();	3076	rcu_read_unlock();
3105		3077
@@ -3117,11 +3089,11 @@ void perf_output_end(struct perf_output_handle *handle)
3117	int events = atomic_inc_return(&data->events);	3089	int events = atomic_inc_return(&data->events);
3118	if (events >= wakeup_events) {	3090	if (events >= wakeup_events) {
3119	atomic_sub(wakeup_events, &data->events);	3091	atomic_sub(wakeup_events, &data->events);
3120	atomic_set(&data->wakeup, 1);	3092	atomic_inc(&data->wakeup);
3121	}	3093	}
3122	}	3094	}
3123		3095
3124	perf_output_unlock(handle);	3096	perf_output_put_handle(handle);
3125	rcu_read_unlock();	3097	rcu_read_unlock();
3126	}	3098	}
3127		3099
@@ -3457,22 +3429,13 @@ static void perf_event_task_output(struct perf_event *event,
3457	{	3429	{
3458	struct perf_output_handle handle;	3430	struct perf_output_handle handle;
3459	struct task_struct *task = task_event->task;	3431	struct task_struct *task = task_event->task;
3460	unsigned long flags;
3461	int size, ret;	3432	int size, ret;
3462		3433
3463	/*
3464	* If this CPU attempts to acquire an rq lock held by a CPU spinning
3465	* in perf_output_lock() from interrupt context, it's game over.
3466	*/
3467	local_irq_save(flags);
3468
3469	size = task_event->event_id.header.size;	3434	size = task_event->event_id.header.size;
3470	ret = perf_output_begin(&handle, event, size, 0, 0);	3435	ret = perf_output_begin(&handle, event, size, 0, 0);
3471		3436
3472	if (ret) {	3437	if (ret)
3473	local_irq_restore(flags);
3474	return;	3438	return;
3475	}
3476		3439
3477	task_event->event_id.pid = perf_event_pid(event, task);	3440	task_event->event_id.pid = perf_event_pid(event, task);
3478	task_event->event_id.ppid = perf_event_pid(event, current);	3441	task_event->event_id.ppid = perf_event_pid(event, current);
@@ -3483,7 +3446,6 @@ static void perf_event_task_output(struct perf_event *event,
3483	perf_output_put(&handle, task_event->event_id);	3446	perf_output_put(&handle, task_event->event_id);
3484		3447
3485	perf_output_end(&handle);	3448	perf_output_end(&handle);
3486	local_irq_restore(flags);
3487	}	3449	}
3488		3450
3489	static int perf_event_task_match(struct perf_event *event)	3451	static int perf_event_task_match(struct perf_event *event)