irq_work: Add generic hardirq context callbacks

Provide a mechanism that allows running code in IRQ context. It is most useful for NMI code that needs to interact with the rest of the system -- like wakeup a task to drain buffers. Perf currently has such a mechanism, so extract that and provide it as a generic feature, independent of perf so that others may also benefit. The IRQ context callback is generated through self-IPIs where possible, or on architectures like powerpc the decrementer (the built-in timer facility) is set to generate an interrupt immediately. Architectures that don't have anything like this get to do with a callback from the timer tick. These architectures can call irq_work_run() at the tail of any IRQ handlers that might enqueue such work (like the perf IRQ handler) to avoid undue latencies in processing the work. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Acked-by: Kyle McMartin <kyle@mcmartin.ca> Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com> [ various fixes ] Signed-off-by: Huang Ying <ying.huang@intel.com> LKML-Reference: <1287036094.7768.291.camel@yhuang-dev> Signed-off-by: Ingo Molnar <mingo@elte.hu>
author: Peter Zijlstra <a.p.zijlstra@chello.nl> 2010-10-14 02:01:34 -0400
committer: Ingo Molnar <mingo@elte.hu> 2010-10-18 13:58:50 -0400
commit: e360adbe29241a0194e10e20595360dd7b98a2b3 (patch)
tree: ef5fa5f50a895096bfb25bc11b25949603158238 /kernel/irq_work.c
parent: 8e5fc1a7320baf6076391607515dceb61319b36a (diff)
1 files changed, 164 insertions, 0 deletions
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
new file mode 100644
index 000000000000..f16763ff8481
--- /dev/null
+++ b/kernel/irq_work.c
@@ -0,0 +1,164 @@
+/*
+ * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *
+ * Provides a framework for enqueueing and running callbacks from hardirq
+ * context. The enqueueing is NMI-safe.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/irq_work.h>
+#include <linux/hardirq.h>
+/*
+ * An entry can be in one of four states:
+ *
+ * free      NULL, 0 -> {claimed}       : free to be used
+ * claimed   NULL, 3 -> {pending}       : claimed to be enqueued
+ * pending   next, 3 -> {busy}          : queued, pending callback
+ * busy      NULL, 2 -> {free, claimed} : callback in progress, can be claimed
+ *
+ * We use the lower two bits of the next pointer to keep PENDING and BUSY
+ * flags.
+ */
+#define IRQ_WORK_PENDING        1UL
+#define IRQ_WORK_BUSY           2UL
+#define IRQ_WORK_FLAGS          3UL
+static inline bool irq_work_is_set(struct irq_work *entry, int flags)
+{
+        return (unsigned long)entry->next & flags;
+}
+static inline struct irq_work *irq_work_next(struct irq_work *entry)
+{
+        unsigned long next = (unsigned long)entry->next;
+        next &= ~IRQ_WORK_FLAGS;
+        return (struct irq_work *)next;
+}
+static inline struct irq_work *next_flags(struct irq_work *entry, int flags)
+{
+        unsigned long next = (unsigned long)entry;
+        next |= flags;
+        return (struct irq_work *)next;
+}
+static DEFINE_PER_CPU(struct irq_work *, irq_work_list);
+/*
+ * Claim the entry so that no one else will poke at it.
+ */
+static bool irq_work_claim(struct irq_work *entry)
+{
+        struct irq_work *next, *nflags;
+        do {
+                next = entry->next;
+                if ((unsigned long)next & IRQ_WORK_PENDING)
+                        return false;
+                nflags = next_flags(next, IRQ_WORK_FLAGS);
+        } while (cmpxchg(&entry->next, next, nflags) != next);
+        return true;
+}
+void __weak arch_irq_work_raise(void)
+{
+        /*
+         * Lame architectures will get the timer tick callback
+         */
+}
+/*
+ * Queue the entry and raise the IPI if needed.
+ */
+static void __irq_work_queue(struct irq_work *entry)
+{
+        struct irq_work **head, *next;
+        head = &get_cpu_var(irq_work_list);
+        do {
+                next = *head;
+                /* Can assign non-atomic because we keep the flags set. */
+                entry->next = next_flags(next, IRQ_WORK_FLAGS);
+        } while (cmpxchg(head, next, entry) != next);
+        /* The list was empty, raise self-interrupt to start processing. */
+        if (!irq_work_next(entry))
+                arch_irq_work_raise();
+        put_cpu_var(irq_work_list);
+}
+/*
+ * Enqueue the irq_work @entry, returns true on success, failure when the
+ * @entry was already enqueued by someone else.
+ *
+ * Can be re-enqueued while the callback is still in progress.
+ */
+bool irq_work_queue(struct irq_work *entry)
+{
+        if (!irq_work_claim(entry)) {
+                /*
+                 * Already enqueued, can't do!
+                 */
+                return false;
+        }
+        __irq_work_queue(entry);
+        return true;
+}
+EXPORT_SYMBOL_GPL(irq_work_queue);
+/*
+ * Run the irq_work entries on this cpu. Requires to be ran from hardirq
+ * context with local IRQs disabled.
+ */
+void irq_work_run(void)
+{
+        struct irq_work *list, **head;
+        head = &__get_cpu_var(irq_work_list);
+        if (*head == NULL)
+                return;
+        BUG_ON(!in_irq());
+        BUG_ON(!irqs_disabled());
+        list = xchg(head, NULL);
+        while (list != NULL) {
+                struct irq_work *entry = list;
+                list = irq_work_next(list);
+                /*
+                 * Clear the PENDING bit, after this point the @entry
+                 * can be re-used.
+                 */
+                entry->next = next_flags(NULL, IRQ_WORK_BUSY);
+                entry->func(entry);
+                /*
+                 * Clear the BUSY bit and return to the free state if
+                 * no-one else claimed it meanwhile.
+                 */
+                cmpxchg(&entry->next, next_flags(NULL, IRQ_WORK_BUSY), NULL);
+        }
+}
+EXPORT_SYMBOL_GPL(irq_work_run);
+/*
+ * Synchronize against the irq_work @entry, ensures the entry is not
+ * currently in use.
+ */
+void irq_work_sync(struct irq_work *entry)
+{
+        WARN_ON_ONCE(irqs_disabled());
+        while (irq_work_is_set(entry, IRQ_WORK_BUSY))
+                cpu_relax();
+}
+EXPORT_SYMBOL_GPL(irq_work_sync);
author	Peter Zijlstra <a.p.zijlstra@chello.nl>	2010-10-14 02:01:34 -0400
committer	Ingo Molnar <mingo@elte.hu>	2010-10-18 13:58:50 -0400
commit	e360adbe29241a0194e10e20595360dd7b98a2b3 (patch)
tree	ef5fa5f50a895096bfb25bc11b25949603158238 /kernel/irq_work.c
parent	8e5fc1a7320baf6076391607515dceb61319b36a (diff)

diff --git a/kernel/irq_work.c b/kernel/irq_work.c new file mode 100644 index 000000000000..f16763ff8481 --- /dev/null +++ b/kernel/irq_work.c
@@ -0,0 +1,164 @@
	1	/*
	2	* Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
	3	*
	4	* Provides a framework for enqueueing and running callbacks from hardirq
	5	* context. The enqueueing is NMI-safe.
	6	*/
	7
	8	#include <linux/kernel.h>
	9	#include <linux/module.h>
	10	#include <linux/irq_work.h>
	11	#include <linux/hardirq.h>
	12
	13	/*
	14	* An entry can be in one of four states:
	15	*
	16	* free NULL, 0 -> {claimed} : free to be used
	17	* claimed NULL, 3 -> {pending} : claimed to be enqueued
	18	* pending next, 3 -> {busy} : queued, pending callback
	19	* busy NULL, 2 -> {free, claimed} : callback in progress, can be claimed
	20	*
	21	* We use the lower two bits of the next pointer to keep PENDING and BUSY
	22	* flags.
	23	*/
	24
	25	#define IRQ_WORK_PENDING 1UL
	26	#define IRQ_WORK_BUSY 2UL
	27	#define IRQ_WORK_FLAGS 3UL
	28
	29	static inline bool irq_work_is_set(struct irq_work *entry, int flags)
	30	{
	31	return (unsigned long)entry->next & flags;
	32	}
	33
	34	static inline struct irq_work irq_work_next(struct irq_work entry)
	35	{
	36	unsigned long next = (unsigned long)entry->next;
	37	next &= ~IRQ_WORK_FLAGS;
	38	return (struct irq_work *)next;
	39	}
	40
	41	static inline struct irq_work next_flags(struct irq_work entry, int flags)
	42	{
	43	unsigned long next = (unsigned long)entry;
	44	next \|= flags;
	45	return (struct irq_work *)next;
	46	}
	47
	48	static DEFINE_PER_CPU(struct irq_work *, irq_work_list);
	49
	50	/*
	51	* Claim the entry so that no one else will poke at it.
	52	*/
	53	static bool irq_work_claim(struct irq_work *entry)
	54	{
	55	struct irq_work next, nflags;
	56
	57	do {
	58	next = entry->next;
	59	if ((unsigned long)next & IRQ_WORK_PENDING)
	60	return false;
	61	nflags = next_flags(next, IRQ_WORK_FLAGS);
	62	} while (cmpxchg(&entry->next, next, nflags) != next);
	63
	64	return true;
	65	}
	66
	67
	68	void __weak arch_irq_work_raise(void)
	69	{
	70	/*
	71	* Lame architectures will get the timer tick callback
	72	*/
	73	}
	74
	75	/*
	76	* Queue the entry and raise the IPI if needed.
	77	*/
	78	static void __irq_work_queue(struct irq_work *entry)
	79	{
	80	struct irq_work *head, next;
	81
	82	head = &get_cpu_var(irq_work_list);
	83
	84	do {
	85	next = *head;
	86	/* Can assign non-atomic because we keep the flags set. */
	87	entry->next = next_flags(next, IRQ_WORK_FLAGS);
	88	} while (cmpxchg(head, next, entry) != next);
	89
	90	/* The list was empty, raise self-interrupt to start processing. */
	91	if (!irq_work_next(entry))
	92	arch_irq_work_raise();
	93
	94	put_cpu_var(irq_work_list);
	95	}
	96
	97	/*
	98	* Enqueue the irq_work @entry, returns true on success, failure when the
	99	* @entry was already enqueued by someone else.
	100	*
	101	* Can be re-enqueued while the callback is still in progress.
	102	*/
	103	bool irq_work_queue(struct irq_work *entry)
	104	{
	105	if (!irq_work_claim(entry)) {
	106	/*
	107	* Already enqueued, can't do!
	108	*/
	109	return false;
	110	}
	111
	112	__irq_work_queue(entry);
	113	return true;
	114	}
	115	EXPORT_SYMBOL_GPL(irq_work_queue);
	116
	117	/*
	118	* Run the irq_work entries on this cpu. Requires to be ran from hardirq
	119	* context with local IRQs disabled.
	120	*/
	121	void irq_work_run(void)
	122	{
	123	struct irq_work list, *head;
	124
	125	head = &__get_cpu_var(irq_work_list);
	126	if (*head == NULL)
	127	return;
	128
	129	BUG_ON(!in_irq());
	130	BUG_ON(!irqs_disabled());
	131
	132	list = xchg(head, NULL);
	133	while (list != NULL) {
	134	struct irq_work *entry = list;
	135
	136	list = irq_work_next(list);
	137
	138	/*
	139	* Clear the PENDING bit, after this point the @entry
	140	* can be re-used.
	141	*/
	142	entry->next = next_flags(NULL, IRQ_WORK_BUSY);
	143	entry->func(entry);
	144	/*
	145	* Clear the BUSY bit and return to the free state if
	146	* no-one else claimed it meanwhile.
	147	*/
	148	cmpxchg(&entry->next, next_flags(NULL, IRQ_WORK_BUSY), NULL);
	149	}
	150	}
	151	EXPORT_SYMBOL_GPL(irq_work_run);
	152
	153	/*
	154	* Synchronize against the irq_work @entry, ensures the entry is not
	155	* currently in use.
	156	*/
	157	void irq_work_sync(struct irq_work *entry)
	158	{
	159	WARN_ON_ONCE(irqs_disabled());
	160
	161	while (irq_work_is_set(entry, IRQ_WORK_BUSY))
	162	cpu_relax();
	163	}
	164	EXPORT_SYMBOL_GPL(irq_work_sync);