aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--block/Makefile2
-rw-r--r--block/blk-iopoll.c220
-rw-r--r--include/linux/blk-iopoll.h41
-rw-r--r--include/linux/interrupt.h1
-rw-r--r--kernel/sysctl.c10
5 files changed, 272 insertions, 2 deletions
diff --git a/block/Makefile b/block/Makefile
index 6c54ed0ff755..ba74ca6bfa14 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -5,7 +5,7 @@
5obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ 5obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
6 blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \ 6 blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \
7 blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ 7 blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
8 ioctl.o genhd.o scsi_ioctl.o 8 blk-iopoll.o ioctl.o genhd.o scsi_ioctl.o
9 9
10obj-$(CONFIG_BLK_DEV_BSG) += bsg.o 10obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
11obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o 11obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
diff --git a/block/blk-iopoll.c b/block/blk-iopoll.c
new file mode 100644
index 000000000000..566db1e7c1c7
--- /dev/null
+++ b/block/blk-iopoll.c
@@ -0,0 +1,220 @@
1/*
2 * Functions related to interrupt-poll handling in the block layer. This
3 * is similar to NAPI for network devices.
4 */
5#include <linux/kernel.h>
6#include <linux/module.h>
7#include <linux/init.h>
8#include <linux/bio.h>
9#include <linux/blkdev.h>
10#include <linux/interrupt.h>
11#include <linux/cpu.h>
12#include <linux/blk-iopoll.h>
13#include <linux/delay.h>
14
15#include "blk.h"
16
17int blk_iopoll_enabled = 1;
18EXPORT_SYMBOL(blk_iopoll_enabled);
19
20static DEFINE_PER_CPU(struct list_head, blk_cpu_iopoll);
21
22/**
23 * blk_iopoll_sched - Schedule a run of the iopoll handler
24 * @iop: The parent iopoll structure
25 *
26 * Description:
27 * Add this blk_iopoll structure to the pending poll list and trigger the raise
28 * of the blk iopoll softirq. The driver must already have gotten a succesful
29 * return from blk_iopoll_sched_prep() before calling this.
30 **/
31void blk_iopoll_sched(struct blk_iopoll *iop)
32{
33 unsigned long flags;
34
35 local_irq_save(flags);
36 list_add_tail(&iop->list, &__get_cpu_var(blk_cpu_iopoll));
37 __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
38 local_irq_restore(flags);
39}
40EXPORT_SYMBOL(blk_iopoll_sched);
41
42/**
43 * __blk_iopoll_complete - Mark this @iop as un-polled again
44 * @iop: The parent iopoll structure
45 *
46 * Description:
47 * See blk_iopoll_complete(). This function must be called with interrupts disabled.
48 **/
49void __blk_iopoll_complete(struct blk_iopoll *iop)
50{
51 list_del(&iop->list);
52 smp_mb__before_clear_bit();
53 clear_bit_unlock(IOPOLL_F_SCHED, &iop->state);
54}
55EXPORT_SYMBOL(__blk_iopoll_complete);
56
57/**
58 * blk_iopoll_complete - Mark this @iop as un-polled again
59 * @iop: The parent iopoll structure
60 *
61 * Description:
62 * If a driver consumes less than the assigned budget in its run of the iopoll
63 * handler, it'll end the polled mode by calling this function. The iopoll handler
64 * will not be invoked again before blk_iopoll_sched_prep() is called.
65 **/
66void blk_iopoll_complete(struct blk_iopoll *iopoll)
67{
68 unsigned long flags;
69
70 local_irq_save(flags);
71 __blk_iopoll_complete(iopoll);
72 local_irq_restore(flags);
73}
74EXPORT_SYMBOL(blk_iopoll_complete);
75
76static void blk_iopoll_softirq(struct softirq_action *h)
77{
78 struct list_head *list = &__get_cpu_var(blk_cpu_iopoll);
79 unsigned long start_time = jiffies;
80 int rearm = 0, budget = 64;
81
82 local_irq_disable();
83
84 while (!list_empty(list)) {
85 struct blk_iopoll *iop;
86 int work, weight;
87
88 /*
89 * If softirq window is exhausted then punt.
90 */
91 if (budget <= 0 || time_after(jiffies, start_time)) {
92 rearm = 1;
93 break;
94 }
95
96 local_irq_enable();
97
98 /* Even though interrupts have been re-enabled, this
99 * access is safe because interrupts can only add new
100 * entries to the tail of this list, and only ->poll()
101 * calls can remove this head entry from the list.
102 */
103 iop = list_entry(list->next, struct blk_iopoll, list);
104
105 weight = iop->weight;
106 work = 0;
107 if (test_bit(IOPOLL_F_SCHED, &iop->state))
108 work = iop->poll(iop, weight);
109
110 budget -= work;
111
112 local_irq_disable();
113
114 /* Drivers must not modify the NAPI state if they
115 * consume the entire weight. In such cases this code
116 * still "owns" the NAPI instance and therefore can
117 * move the instance around on the list at-will.
118 */
119 if (work >= weight) {
120 if (blk_iopoll_disable_pending(iop))
121 __blk_iopoll_complete(iop);
122 else
123 list_move_tail(&iop->list, list);
124 }
125 }
126
127 if (rearm)
128 __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
129
130 local_irq_enable();
131}
132
133/**
134 * blk_iopoll_disable - Disable iopoll on this @iop
135 * @iop: The parent iopoll structure
136 *
137 * Description:
138 * Disable io polling and wait for any pending callbacks to have completed.
139 **/
140void blk_iopoll_disable(struct blk_iopoll *iop)
141{
142 set_bit(IOPOLL_F_DISABLE, &iop->state);
143 while (test_and_set_bit(IOPOLL_F_SCHED, &iop->state))
144 msleep(1);
145 clear_bit(IOPOLL_F_DISABLE, &iop->state);
146}
147EXPORT_SYMBOL(blk_iopoll_disable);
148
149/**
150 * blk_iopoll_enable - Enable iopoll on this @iop
151 * @iop: The parent iopoll structure
152 *
153 * Description:
154 * Enable iopoll on this @iop. Note that the handler run will not be scheduled, it
155 * will only mark it as active.
156 **/
157void blk_iopoll_enable(struct blk_iopoll *iop)
158{
159 BUG_ON(!test_bit(IOPOLL_F_SCHED, &iop->state));
160 smp_mb__before_clear_bit();
161 clear_bit_unlock(IOPOLL_F_SCHED, &iop->state);
162}
163EXPORT_SYMBOL(blk_iopoll_enable);
164
165/**
166 * blk_iopoll_init - Initialize this @iop
167 * @iop: The parent iopoll structure
168 * @weight: The default weight (or command completion budget)
169 * @poll_fn: The handler to invoke
170 *
171 * Description:
172 * Initialize this blk_iopoll structure. Before being actively used, the driver
173 * must call blk_iopoll_enable().
174 **/
175void blk_iopoll_init(struct blk_iopoll *iop, int weight, blk_iopoll_fn *poll_fn)
176{
177 memset(iop, 0, sizeof(*iop));
178 INIT_LIST_HEAD(&iop->list);
179 iop->weight = weight;
180 iop->poll = poll_fn;
181 set_bit(IOPOLL_F_SCHED, &iop->state);
182}
183EXPORT_SYMBOL(blk_iopoll_init);
184
185static int __cpuinit blk_iopoll_cpu_notify(struct notifier_block *self,
186 unsigned long action, void *hcpu)
187{
188 /*
189 * If a CPU goes away, splice its entries to the current CPU
190 * and trigger a run of the softirq
191 */
192 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
193 int cpu = (unsigned long) hcpu;
194
195 local_irq_disable();
196 list_splice_init(&per_cpu(blk_cpu_iopoll, cpu),
197 &__get_cpu_var(blk_cpu_iopoll));
198 raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
199 local_irq_enable();
200 }
201
202 return NOTIFY_OK;
203}
204
205static struct notifier_block __cpuinitdata blk_iopoll_cpu_notifier = {
206 .notifier_call = blk_iopoll_cpu_notify,
207};
208
209static __init int blk_iopoll_setup(void)
210{
211 int i;
212
213 for_each_possible_cpu(i)
214 INIT_LIST_HEAD(&per_cpu(blk_cpu_iopoll, i));
215
216 open_softirq(BLOCK_IOPOLL_SOFTIRQ, blk_iopoll_softirq);
217 register_hotcpu_notifier(&blk_iopoll_cpu_notifier);
218 return 0;
219}
220subsys_initcall(blk_iopoll_setup);
diff --git a/include/linux/blk-iopoll.h b/include/linux/blk-iopoll.h
new file mode 100644
index 000000000000..b2e1739a2e7b
--- /dev/null
+++ b/include/linux/blk-iopoll.h
@@ -0,0 +1,41 @@
1#ifndef BLK_IOPOLL_H
2#define BLK_IOPOLL_H
3
4struct blk_iopoll;
5typedef int (blk_iopoll_fn)(struct blk_iopoll *, int);
6
7struct blk_iopoll {
8 struct list_head list;
9 unsigned long state;
10 unsigned long data;
11 int weight;
12 int max;
13 blk_iopoll_fn *poll;
14};
15
16enum {
17 IOPOLL_F_SCHED = 0,
18 IOPOLL_F_DISABLE = 1,
19};
20
21static inline int blk_iopoll_sched_prep(struct blk_iopoll *iop)
22{
23 return !test_bit(IOPOLL_F_DISABLE, &iop->state) &&
24 !test_and_set_bit(IOPOLL_F_SCHED, &iop->state);
25}
26
27static inline int blk_iopoll_disable_pending(struct blk_iopoll *iop)
28{
29 return test_bit(IOPOLL_F_DISABLE, &iop->state);
30}
31
32extern void blk_iopoll_sched(struct blk_iopoll *);
33extern void blk_iopoll_init(struct blk_iopoll *, int, blk_iopoll_fn *);
34extern void blk_iopoll_complete(struct blk_iopoll *);
35extern void __blk_iopoll_complete(struct blk_iopoll *);
36extern void blk_iopoll_enable(struct blk_iopoll *);
37extern void blk_iopoll_disable(struct blk_iopoll *);
38
39extern int blk_iopoll_enabled;
40
41#endif
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 35e7df1e9f30..edd8d5c90394 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -344,6 +344,7 @@ enum
344 NET_TX_SOFTIRQ, 344 NET_TX_SOFTIRQ,
345 NET_RX_SOFTIRQ, 345 NET_RX_SOFTIRQ,
346 BLOCK_SOFTIRQ, 346 BLOCK_SOFTIRQ,
347 BLOCK_IOPOLL_SOFTIRQ,
347 TASKLET_SOFTIRQ, 348 TASKLET_SOFTIRQ,
348 SCHED_SOFTIRQ, 349 SCHED_SOFTIRQ,
349 HRTIMER_SOFTIRQ, 350 HRTIMER_SOFTIRQ,
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 58be76017fd0..0ed9fa6f322e 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -92,6 +92,7 @@ extern int sysctl_nr_trim_pages;
92#ifdef CONFIG_RCU_TORTURE_TEST 92#ifdef CONFIG_RCU_TORTURE_TEST
93extern int rcutorture_runnable; 93extern int rcutorture_runnable;
94#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ 94#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
95extern int blk_iopoll_enabled;
95 96
96/* Constants used for minimum and maximum */ 97/* Constants used for minimum and maximum */
97#ifdef CONFIG_DETECT_SOFTLOCKUP 98#ifdef CONFIG_DETECT_SOFTLOCKUP
@@ -990,7 +991,14 @@ static struct ctl_table kern_table[] = {
990 .proc_handler = &proc_dointvec, 991 .proc_handler = &proc_dointvec,
991 }, 992 },
992#endif 993#endif
993 994 {
995 .ctl_name = CTL_UNNUMBERED,
996 .procname = "blk_iopoll",
997 .data = &blk_iopoll_enabled,
998 .maxlen = sizeof(int),
999 .mode = 0644,
1000 .proc_handler = &proc_dointvec,
1001 },
994/* 1002/*
995 * NOTE: do not add new entries to this table unless you have read 1003 * NOTE: do not add new entries to this table unless you have read
996 * Documentation/sysctl/ctl_unnumbered.txt 1004 * Documentation/sysctl/ctl_unnumbered.txt