irq_work: Add generic hardirq context callbacks

Provide a mechanism that allows running code in IRQ context. It is most useful for NMI code that needs to interact with the rest of the system -- like wakeup a task to drain buffers. Perf currently has such a mechanism, so extract that and provide it as a generic feature, independent of perf so that others may also benefit. The IRQ context callback is generated through self-IPIs where possible, or on architectures like powerpc the decrementer (the built-in timer facility) is set to generate an interrupt immediately. Architectures that don't have anything like this get to do with a callback from the timer tick. These architectures can call irq_work_run() at the tail of any IRQ handlers that might enqueue such work (like the perf IRQ handler) to avoid undue latencies in processing the work. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Acked-by: Kyle McMartin <kyle@mcmartin.ca> Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com> [ various fixes ] Signed-off-by: Huang Ying <ying.huang@intel.com> LKML-Reference: <1287036094.7768.291.camel@yhuang-dev> Signed-off-by: Ingo Molnar <mingo@elte.hu>
author: Peter Zijlstra <a.p.zijlstra@chello.nl> 2010-10-14 02:01:34 -0400
committer: Ingo Molnar <mingo@elte.hu> 2010-10-18 13:58:50 -0400
commit: e360adbe29241a0194e10e20595360dd7b98a2b3 (patch)
tree: ef5fa5f50a895096bfb25bc11b25949603158238 /kernel
parent: 8e5fc1a7320baf6076391607515dceb61319b36a (diff)
4 files changed, 176 insertions, 101 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index d52b473c99a1..4d9bf5f8531f 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -23,6 +23,7 @@ CFLAGS_REMOVE_rtmutex-debug.o = -pg
 CFLAGS_REMOVE_cgroup-debug.o = -pg
 CFLAGS_REMOVE_sched_clock.o = -pg
 CFLAGS_REMOVE_perf_event.o = -pg
+CFLAGS_REMOVE_irq_work.o = -pg
 endif
 obj-$(CONFIG_FREEZER) += freezer.o
@@ -100,6 +101,7 @@ obj-$(CONFIG_TRACING) += trace/
 obj-$(CONFIG_X86_DS) += trace/
 obj-$(CONFIG_RING_BUFFER) += trace/
 obj-$(CONFIG_SMP) += sched_cpupri.o
+obj-$(CONFIG_IRQ_WORK) += irq_work.o
 obj-$(CONFIG_PERF_EVENTS) += perf_event.o
 obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
 obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
new file mode 100644
index 000000000000..f16763ff8481
--- /dev/null
+++ b/kernel/irq_work.c
@@ -0,0 +1,164 @@
+/*
+ * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *
+ * Provides a framework for enqueueing and running callbacks from hardirq
+ * context. The enqueueing is NMI-safe.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/irq_work.h>
+#include <linux/hardirq.h>
+/*
+ * An entry can be in one of four states:
+ *
+ * free      NULL, 0 -> {claimed}       : free to be used
+ * claimed   NULL, 3 -> {pending}       : claimed to be enqueued
+ * pending   next, 3 -> {busy}          : queued, pending callback
+ * busy      NULL, 2 -> {free, claimed} : callback in progress, can be claimed
+ *
+ * We use the lower two bits of the next pointer to keep PENDING and BUSY
+ * flags.
+ */
+#define IRQ_WORK_PENDING        1UL
+#define IRQ_WORK_BUSY           2UL
+#define IRQ_WORK_FLAGS          3UL
+static inline bool irq_work_is_set(struct irq_work *entry, int flags)
+{
+        return (unsigned long)entry->next & flags;
+}
+static inline struct irq_work *irq_work_next(struct irq_work *entry)
+{
+        unsigned long next = (unsigned long)entry->next;
+        next &= ~IRQ_WORK_FLAGS;
+        return (struct irq_work *)next;
+}
+static inline struct irq_work *next_flags(struct irq_work *entry, int flags)
+{
+        unsigned long next = (unsigned long)entry;
+        next |= flags;
+        return (struct irq_work *)next;
+}
+static DEFINE_PER_CPU(struct irq_work *, irq_work_list);
+/*
+ * Claim the entry so that no one else will poke at it.
+ */
+static bool irq_work_claim(struct irq_work *entry)
+{
+        struct irq_work *next, *nflags;
+        do {
+                next = entry->next;
+                if ((unsigned long)next & IRQ_WORK_PENDING)
+                        return false;
+                nflags = next_flags(next, IRQ_WORK_FLAGS);
+        } while (cmpxchg(&entry->next, next, nflags) != next);
+        return true;
+}
+void __weak arch_irq_work_raise(void)
+{
+        /*
+         * Lame architectures will get the timer tick callback
+         */
+}
+/*
+ * Queue the entry and raise the IPI if needed.
+ */
+static void __irq_work_queue(struct irq_work *entry)
+{
+        struct irq_work **head, *next;
+        head = &get_cpu_var(irq_work_list);
+        do {
+                next = *head;
+                /* Can assign non-atomic because we keep the flags set. */
+                entry->next = next_flags(next, IRQ_WORK_FLAGS);
+        } while (cmpxchg(head, next, entry) != next);
+        /* The list was empty, raise self-interrupt to start processing. */
+        if (!irq_work_next(entry))
+                arch_irq_work_raise();
+        put_cpu_var(irq_work_list);
+}
+/*
+ * Enqueue the irq_work @entry, returns true on success, failure when the
+ * @entry was already enqueued by someone else.
+ *
+ * Can be re-enqueued while the callback is still in progress.
+ */
+bool irq_work_queue(struct irq_work *entry)
+{
+        if (!irq_work_claim(entry)) {
+                /*
+                 * Already enqueued, can't do!
+                 */
+                return false;
+        }
+        __irq_work_queue(entry);
+        return true;
+}
+EXPORT_SYMBOL_GPL(irq_work_queue);
+/*
+ * Run the irq_work entries on this cpu. Requires to be ran from hardirq
+ * context with local IRQs disabled.
+ */
+void irq_work_run(void)
+{
+        struct irq_work *list, **head;
+        head = &__get_cpu_var(irq_work_list);
+        if (*head == NULL)
+                return;
+        BUG_ON(!in_irq());
+        BUG_ON(!irqs_disabled());
+        list = xchg(head, NULL);
+        while (list != NULL) {
+                struct irq_work *entry = list;
+                list = irq_work_next(list);
+                /*
+                 * Clear the PENDING bit, after this point the @entry
+                 * can be re-used.
+                 */
+                entry->next = next_flags(NULL, IRQ_WORK_BUSY);
+                entry->func(entry);
+                /*
+                 * Clear the BUSY bit and return to the free state if
+                 * no-one else claimed it meanwhile.
+                 */
+                cmpxchg(&entry->next, next_flags(NULL, IRQ_WORK_BUSY), NULL);
+        }
+}
+EXPORT_SYMBOL_GPL(irq_work_run);
+/*
+ * Synchronize against the irq_work @entry, ensures the entry is not
+ * currently in use.
+ */
+void irq_work_sync(struct irq_work *entry)
+{
+        WARN_ON_ONCE(irqs_disabled());
+        while (irq_work_is_set(entry, IRQ_WORK_BUSY))
+                cpu_relax();
+}
+EXPORT_SYMBOL_GPL(irq_work_sync);
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 634f86a4b2f9..99b9700e74d0 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -2206,12 +2206,11 @@ static void free_event_rcu(struct rcu_head *head)
        kfree(event);
 }
-static void perf_pending_sync(struct perf_event *event);
 static void perf_buffer_put(struct perf_buffer *buffer);
 static void free_event(struct perf_event *event)
 {
-        perf_pending_sync(event);
+        irq_work_sync(&event->pending);
        if (!event->parent) {
                atomic_dec(&nr_events);
@@ -3162,16 +3161,7 @@ void perf_event_wakeup(struct perf_event *event)
        }
 }
-/*
+static void perf_pending_event(struct irq_work *entry)
- * Pending wakeups
- *
- * Handle the case where we need to wakeup up from NMI (or rq->lock) context.
- *
- * The NMI bit means we cannot possibly take locks. Therefore, maintain a
- * single linked list and use cmpxchg() to add entries lockless.
- */
-static void perf_pending_event(struct perf_pending_entry *entry)
 {
        struct perf_event *event = container_of(entry,
                        struct perf_event, pending);
@@ -3187,89 +3177,6 @@ static void perf_pending_event(struct perf_pending_entry *entry)
        }
 }
-#define PENDING_TAIL ((struct perf_pending_entry *)-1UL)
-static DEFINE_PER_CPU(struct perf_pending_entry *, perf_pending_head) = {
-        PENDING_TAIL,
-};
-static void perf_pending_queue(struct perf_pending_entry *entry,
-                               void (*func)(struct perf_pending_entry *))
-{
-        struct perf_pending_entry **head;
-        if (cmpxchg(&entry->next, NULL, PENDING_TAIL) != NULL)
-                return;
-        entry->func = func;
-        head = &get_cpu_var(perf_pending_head);
-        do {
-                entry->next = *head;
-        } while (cmpxchg(head, entry->next, entry) != entry->next);
-        set_perf_event_pending();
-        put_cpu_var(perf_pending_head);
-}
-static int __perf_pending_run(void)
-{
-        struct perf_pending_entry *list;
-        int nr = 0;
-        list = xchg(&__get_cpu_var(perf_pending_head), PENDING_TAIL);
-        while (list != PENDING_TAIL) {
-                void (*func)(struct perf_pending_entry *);
-                struct perf_pending_entry *entry = list;
-                list = list->next;
-                func = entry->func;
-                entry->next = NULL;
-                /*
-                 * Ensure we observe the unqueue before we issue the wakeup,
-                 * so that we won't be waiting forever.
-                 * -- see perf_not_pending().
-                 */
-                smp_wmb();
-                func(entry);
-                nr++;
-        }
-        return nr;
-}
-static inline int perf_not_pending(struct perf_event *event)
-{
-        /*
-         * If we flush on whatever cpu we run, there is a chance we don't
-         * need to wait.
-         */
-        get_cpu();
-        __perf_pending_run();
-        put_cpu();
-        /*
-         * Ensure we see the proper queue state before going to sleep
-         * so that we do not miss the wakeup. -- see perf_pending_handle()
-         */
-        smp_rmb();
-        return event->pending.next == NULL;
-}
-static void perf_pending_sync(struct perf_event *event)
-{
-        wait_event(event->waitq, perf_not_pending(event));
-}
-void perf_event_do_pending(void)
-{
-        __perf_pending_run();
-}
 /*
 * We assume there is only KVM supporting the callbacks.
 * Later on, we might change it to a list if there is
@@ -3319,8 +3226,7 @@ static void perf_output_wakeup(struct perf_output_handle *handle)
        if (handle->nmi) {
                handle->event->pending_wakeup = 1;
-                perf_pending_queue(&handle->event->pending,
+                irq_work_queue(&handle->event->pending);
-                                   perf_pending_event);
        } else
                perf_event_wakeup(handle->event);
 }
@@ -4356,8 +4262,7 @@ static int __perf_event_overflow(struct perf_event *event, int nmi,
                event->pending_kill = POLL_HUP;
                if (nmi) {
                        event->pending_disable = 1;
-                        perf_pending_queue(&event->pending,
+                        irq_work_queue(&event->pending);
-                                           perf_pending_event);
                } else
                        perf_event_disable(event);
        }
@@ -5374,6 +5279,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
        INIT_LIST_HEAD(&event->event_entry);
        INIT_LIST_HEAD(&event->sibling_list);
        init_waitqueue_head(&event->waitq);
+        init_irq_work(&event->pending, perf_pending_event);
        mutex_init(&event->mmap_mutex);
diff --git a/kernel/timer.c b/kernel/timer.c
index 97bf05baade7..68a9ae7679b7 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -37,7 +37,7 @@
 #include <linux/delay.h>
 #include <linux/tick.h>
 #include <linux/kallsyms.h>
-#include <linux/perf_event.h>
+#include <linux/irq_work.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
@@ -1279,7 +1279,10 @@ void update_process_times(int user_tick)
        run_local_timers();
        rcu_check_callbacks(cpu, user_tick);
        printk_tick();
-        perf_event_do_pending();
+#ifdef CONFIG_IRQ_WORK
+        if (in_irq())
+                irq_work_run();
+#endif
        scheduler_tick();
        run_posix_cpu_timers(p);
 }
author	Peter Zijlstra <a.p.zijlstra@chello.nl>	2010-10-14 02:01:34 -0400
committer	Ingo Molnar <mingo@elte.hu>	2010-10-18 13:58:50 -0400
commit	e360adbe29241a0194e10e20595360dd7b98a2b3 (patch)
tree	ef5fa5f50a895096bfb25bc11b25949603158238 /kernel
parent	8e5fc1a7320baf6076391607515dceb61319b36a (diff)

diff --git a/kernel/Makefile b/kernel/Makefile index d52b473c99a1..4d9bf5f8531f 100644 --- a/kernel/Makefile +++ b/kernel/Makefile
@@ -23,6 +23,7 @@ CFLAGS_REMOVE_rtmutex-debug.o = -pg
23	CFLAGS_REMOVE_cgroup-debug.o = -pg	23	CFLAGS_REMOVE_cgroup-debug.o = -pg
24	CFLAGS_REMOVE_sched_clock.o = -pg	24	CFLAGS_REMOVE_sched_clock.o = -pg
25	CFLAGS_REMOVE_perf_event.o = -pg	25	CFLAGS_REMOVE_perf_event.o = -pg
		26	CFLAGS_REMOVE_irq_work.o = -pg
26	endif	27	endif
27		28
28	obj-$(CONFIG_FREEZER) += freezer.o	29	obj-$(CONFIG_FREEZER) += freezer.o
@@ -100,6 +101,7 @@ obj-$(CONFIG_TRACING) += trace/
100	obj-$(CONFIG_X86_DS) += trace/	101	obj-$(CONFIG_X86_DS) += trace/
101	obj-$(CONFIG_RING_BUFFER) += trace/	102	obj-$(CONFIG_RING_BUFFER) += trace/
102	obj-$(CONFIG_SMP) += sched_cpupri.o	103	obj-$(CONFIG_SMP) += sched_cpupri.o
		104	obj-$(CONFIG_IRQ_WORK) += irq_work.o
103	obj-$(CONFIG_PERF_EVENTS) += perf_event.o	105	obj-$(CONFIG_PERF_EVENTS) += perf_event.o
104	obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o	106	obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
105	obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o	107	obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o


diff --git a/kernel/irq_work.c b/kernel/irq_work.c new file mode 100644 index 000000000000..f16763ff8481 --- /dev/null +++ b/kernel/irq_work.c
@@ -0,0 +1,164 @@
		1	/*
		2	* Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
		3	*
		4	* Provides a framework for enqueueing and running callbacks from hardirq
		5	* context. The enqueueing is NMI-safe.
		6	*/
		7
		8	#include <linux/kernel.h>
		9	#include <linux/module.h>
		10	#include <linux/irq_work.h>
		11	#include <linux/hardirq.h>
		12
		13	/*
		14	* An entry can be in one of four states:
		15	*
		16	* free NULL, 0 -> {claimed} : free to be used
		17	* claimed NULL, 3 -> {pending} : claimed to be enqueued
		18	* pending next, 3 -> {busy} : queued, pending callback
		19	* busy NULL, 2 -> {free, claimed} : callback in progress, can be claimed
		20	*
		21	* We use the lower two bits of the next pointer to keep PENDING and BUSY
		22	* flags.
		23	*/
		24
		25	#define IRQ_WORK_PENDING 1UL
		26	#define IRQ_WORK_BUSY 2UL
		27	#define IRQ_WORK_FLAGS 3UL
		28
		29	static inline bool irq_work_is_set(struct irq_work *entry, int flags)
		30	{
		31	return (unsigned long)entry->next & flags;
		32	}
		33
		34	static inline struct irq_work irq_work_next(struct irq_work entry)
		35	{
		36	unsigned long next = (unsigned long)entry->next;
		37	next &= ~IRQ_WORK_FLAGS;
		38	return (struct irq_work *)next;
		39	}
		40
		41	static inline struct irq_work next_flags(struct irq_work entry, int flags)
		42	{
		43	unsigned long next = (unsigned long)entry;
		44	next \|= flags;
		45	return (struct irq_work *)next;
		46	}
		47
		48	static DEFINE_PER_CPU(struct irq_work *, irq_work_list);
		49
		50	/*
		51	* Claim the entry so that no one else will poke at it.
		52	*/
		53	static bool irq_work_claim(struct irq_work *entry)
		54	{
		55	struct irq_work next, nflags;
		56
		57	do {
		58	next = entry->next;
		59	if ((unsigned long)next & IRQ_WORK_PENDING)
		60	return false;
		61	nflags = next_flags(next, IRQ_WORK_FLAGS);
		62	} while (cmpxchg(&entry->next, next, nflags) != next);
		63
		64	return true;
		65	}
		66
		67
		68	void __weak arch_irq_work_raise(void)
		69	{
		70	/*
		71	* Lame architectures will get the timer tick callback
		72	*/
		73	}
		74
		75	/*
		76	* Queue the entry and raise the IPI if needed.
		77	*/
		78	static void __irq_work_queue(struct irq_work *entry)
		79	{
		80	struct irq_work *head, next;
		81
		82	head = &get_cpu_var(irq_work_list);
		83
		84	do {
		85	next = *head;
		86	/* Can assign non-atomic because we keep the flags set. */
		87	entry->next = next_flags(next, IRQ_WORK_FLAGS);
		88	} while (cmpxchg(head, next, entry) != next);
		89
		90	/* The list was empty, raise self-interrupt to start processing. */
		91	if (!irq_work_next(entry))
		92	arch_irq_work_raise();
		93
		94	put_cpu_var(irq_work_list);
		95	}
		96
		97	/*
		98	* Enqueue the irq_work @entry, returns true on success, failure when the
		99	* @entry was already enqueued by someone else.
		100	*
		101	* Can be re-enqueued while the callback is still in progress.
		102	*/
		103	bool irq_work_queue(struct irq_work *entry)
		104	{
		105	if (!irq_work_claim(entry)) {
		106	/*
		107	* Already enqueued, can't do!
		108	*/
		109	return false;
		110	}
		111
		112	__irq_work_queue(entry);
		113	return true;
		114	}
		115	EXPORT_SYMBOL_GPL(irq_work_queue);
		116
		117	/*
		118	* Run the irq_work entries on this cpu. Requires to be ran from hardirq
		119	* context with local IRQs disabled.
		120	*/
		121	void irq_work_run(void)
		122	{
		123	struct irq_work list, *head;
		124
		125	head = &__get_cpu_var(irq_work_list);
		126	if (*head == NULL)
		127	return;
		128
		129	BUG_ON(!in_irq());
		130	BUG_ON(!irqs_disabled());
		131
		132	list = xchg(head, NULL);
		133	while (list != NULL) {
		134	struct irq_work *entry = list;
		135
		136	list = irq_work_next(list);
		137
		138	/*
		139	* Clear the PENDING bit, after this point the @entry
		140	* can be re-used.
		141	*/
		142	entry->next = next_flags(NULL, IRQ_WORK_BUSY);
		143	entry->func(entry);
		144	/*
		145	* Clear the BUSY bit and return to the free state if
		146	* no-one else claimed it meanwhile.
		147	*/
		148	cmpxchg(&entry->next, next_flags(NULL, IRQ_WORK_BUSY), NULL);
		149	}
		150	}
		151	EXPORT_SYMBOL_GPL(irq_work_run);
		152
		153	/*
		154	* Synchronize against the irq_work @entry, ensures the entry is not
		155	* currently in use.
		156	*/
		157	void irq_work_sync(struct irq_work *entry)
		158	{
		159	WARN_ON_ONCE(irqs_disabled());
		160
		161	while (irq_work_is_set(entry, IRQ_WORK_BUSY))
		162	cpu_relax();
		163	}
		164	EXPORT_SYMBOL_GPL(irq_work_sync);


diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 634f86a4b2f9..99b9700e74d0 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c
@@ -2206,12 +2206,11 @@ static void free_event_rcu(struct rcu_head *head)
2206	kfree(event);	2206	kfree(event);
2207	}	2207	}
2208		2208
2209	static void perf_pending_sync(struct perf_event *event);
2210	static void perf_buffer_put(struct perf_buffer *buffer);	2209	static void perf_buffer_put(struct perf_buffer *buffer);
2211		2210
2212	static void free_event(struct perf_event *event)	2211	static void free_event(struct perf_event *event)
2213	{	2212	{
2214	perf_pending_sync(event);	2213	irq_work_sync(&event->pending);
2215		2214
2216	if (!event->parent) {	2215	if (!event->parent) {
2217	atomic_dec(&nr_events);	2216	atomic_dec(&nr_events);
@@ -3162,16 +3161,7 @@ void perf_event_wakeup(struct perf_event *event)
3162	}	3161	}
3163	}	3162	}
3164		3163
3165	/*	3164	static void perf_pending_event(struct irq_work *entry)
3166	* Pending wakeups
3167	*
3168	* Handle the case where we need to wakeup up from NMI (or rq->lock) context.
3169	*
3170	* The NMI bit means we cannot possibly take locks. Therefore, maintain a
3171	* single linked list and use cmpxchg() to add entries lockless.
3172	*/
3173
3174	static void perf_pending_event(struct perf_pending_entry *entry)
3175	{	3165	{
3176	struct perf_event *event = container_of(entry,	3166	struct perf_event *event = container_of(entry,
3177	struct perf_event, pending);	3167	struct perf_event, pending);
@@ -3187,89 +3177,6 @@ static void perf_pending_event(struct perf_pending_entry *entry)
3187	}	3177	}
3188	}	3178	}
3189		3179
3190	#define PENDING_TAIL ((struct perf_pending_entry *)-1UL)
3191
3192	static DEFINE_PER_CPU(struct perf_pending_entry *, perf_pending_head) = {
3193	PENDING_TAIL,
3194	};
3195
3196	static void perf_pending_queue(struct perf_pending_entry *entry,
3197	void (func)(struct perf_pending_entry ))
3198	{
3199	struct perf_pending_entry **head;
3200
3201	if (cmpxchg(&entry->next, NULL, PENDING_TAIL) != NULL)
3202	return;
3203
3204	entry->func = func;
3205
3206	head = &get_cpu_var(perf_pending_head);
3207
3208	do {
3209	entry->next = *head;
3210	} while (cmpxchg(head, entry->next, entry) != entry->next);
3211
3212	set_perf_event_pending();
3213
3214	put_cpu_var(perf_pending_head);
3215	}
3216
3217	static int __perf_pending_run(void)
3218	{
3219	struct perf_pending_entry *list;
3220	int nr = 0;
3221
3222	list = xchg(&__get_cpu_var(perf_pending_head), PENDING_TAIL);
3223	while (list != PENDING_TAIL) {
3224	void (func)(struct perf_pending_entry );
3225	struct perf_pending_entry *entry = list;
3226
3227	list = list->next;
3228
3229	func = entry->func;
3230	entry->next = NULL;
3231	/*
3232	* Ensure we observe the unqueue before we issue the wakeup,
3233	* so that we won't be waiting forever.
3234	* -- see perf_not_pending().
3235	*/
3236	smp_wmb();
3237
3238	func(entry);
3239	nr++;
3240	}
3241
3242	return nr;
3243	}
3244
3245	static inline int perf_not_pending(struct perf_event *event)
3246	{
3247	/*
3248	* If we flush on whatever cpu we run, there is a chance we don't
3249	* need to wait.
3250	*/
3251	get_cpu();
3252	__perf_pending_run();
3253	put_cpu();
3254
3255	/*
3256	* Ensure we see the proper queue state before going to sleep
3257	* so that we do not miss the wakeup. -- see perf_pending_handle()
3258	*/
3259	smp_rmb();
3260	return event->pending.next == NULL;
3261	}
3262
3263	static void perf_pending_sync(struct perf_event *event)
3264	{
3265	wait_event(event->waitq, perf_not_pending(event));
3266	}
3267
3268	void perf_event_do_pending(void)
3269	{
3270	__perf_pending_run();
3271	}
3272
3273	/*	3180	/*
3274	* We assume there is only KVM supporting the callbacks.	3181	* We assume there is only KVM supporting the callbacks.
3275	* Later on, we might change it to a list if there is	3182	* Later on, we might change it to a list if there is
@@ -3319,8 +3226,7 @@ static void perf_output_wakeup(struct perf_output_handle *handle)
3319		3226
3320	if (handle->nmi) {	3227	if (handle->nmi) {
3321	handle->event->pending_wakeup = 1;	3228	handle->event->pending_wakeup = 1;
3322	perf_pending_queue(&handle->event->pending,	3229	irq_work_queue(&handle->event->pending);
3323	perf_pending_event);
3324	} else	3230	} else
3325	perf_event_wakeup(handle->event);	3231	perf_event_wakeup(handle->event);
3326	}	3232	}
@@ -4356,8 +4262,7 @@ static int __perf_event_overflow(struct perf_event *event, int nmi,
4356	event->pending_kill = POLL_HUP;	4262	event->pending_kill = POLL_HUP;
4357	if (nmi) {	4263	if (nmi) {
4358	event->pending_disable = 1;	4264	event->pending_disable = 1;
4359	perf_pending_queue(&event->pending,	4265	irq_work_queue(&event->pending);
4360	perf_pending_event);
4361	} else	4266	} else
4362	perf_event_disable(event);	4267	perf_event_disable(event);
4363	}	4268	}
@@ -5374,6 +5279,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
5374	INIT_LIST_HEAD(&event->event_entry);	5279	INIT_LIST_HEAD(&event->event_entry);
5375	INIT_LIST_HEAD(&event->sibling_list);	5280	INIT_LIST_HEAD(&event->sibling_list);
5376	init_waitqueue_head(&event->waitq);	5281	init_waitqueue_head(&event->waitq);
		5282	init_irq_work(&event->pending, perf_pending_event);
5377		5283
5378	mutex_init(&event->mmap_mutex);	5284	mutex_init(&event->mmap_mutex);
5379		5285


diff --git a/kernel/timer.c b/kernel/timer.c index 97bf05baade7..68a9ae7679b7 100644 --- a/kernel/timer.c +++ b/kernel/timer.c
@@ -37,7 +37,7 @@
37	#include <linux/delay.h>	37	#include <linux/delay.h>
38	#include <linux/tick.h>	38	#include <linux/tick.h>
39	#include <linux/kallsyms.h>	39	#include <linux/kallsyms.h>
40	#include <linux/perf_event.h>	40	#include <linux/irq_work.h>
41	#include <linux/sched.h>	41	#include <linux/sched.h>
42	#include <linux/slab.h>	42	#include <linux/slab.h>
43		43
@@ -1279,7 +1279,10 @@ void update_process_times(int user_tick)
1279	run_local_timers();	1279	run_local_timers();
1280	rcu_check_callbacks(cpu, user_tick);	1280	rcu_check_callbacks(cpu, user_tick);
1281	printk_tick();	1281	printk_tick();
1282	perf_event_do_pending();	1282	#ifdef CONFIG_IRQ_WORK
		1283	if (in_irq())
		1284	irq_work_run();
		1285	#endif
1283	scheduler_tick();	1286	scheduler_tick();
1284	run_posix_cpu_timers(p);	1287	run_posix_cpu_timers(p);
1285	}	1288	}