diff options
-rw-r--r-- | block/Makefile | 2 | ||||
-rw-r--r-- | block/blk-iopoll.c | 220 | ||||
-rw-r--r-- | include/linux/blk-iopoll.h | 41 | ||||
-rw-r--r-- | include/linux/interrupt.h | 1 | ||||
-rw-r--r-- | kernel/sysctl.c | 10 |
5 files changed, 272 insertions, 2 deletions
diff --git a/block/Makefile b/block/Makefile index 6c54ed0ff755..ba74ca6bfa14 100644 --- a/block/Makefile +++ b/block/Makefile | |||
@@ -5,7 +5,7 @@ | |||
5 | obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ | 5 | obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ |
6 | blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \ | 6 | blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \ |
7 | blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ | 7 | blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ |
8 | ioctl.o genhd.o scsi_ioctl.o | 8 | blk-iopoll.o ioctl.o genhd.o scsi_ioctl.o |
9 | 9 | ||
10 | obj-$(CONFIG_BLK_DEV_BSG) += bsg.o | 10 | obj-$(CONFIG_BLK_DEV_BSG) += bsg.o |
11 | obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o | 11 | obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o |
diff --git a/block/blk-iopoll.c b/block/blk-iopoll.c new file mode 100644 index 000000000000..566db1e7c1c7 --- /dev/null +++ b/block/blk-iopoll.c | |||
@@ -0,0 +1,220 @@ | |||
1 | /* | ||
2 | * Functions related to interrupt-poll handling in the block layer. This | ||
3 | * is similar to NAPI for network devices. | ||
4 | */ | ||
5 | #include <linux/kernel.h> | ||
6 | #include <linux/module.h> | ||
7 | #include <linux/init.h> | ||
8 | #include <linux/bio.h> | ||
9 | #include <linux/blkdev.h> | ||
10 | #include <linux/interrupt.h> | ||
11 | #include <linux/cpu.h> | ||
12 | #include <linux/blk-iopoll.h> | ||
13 | #include <linux/delay.h> | ||
14 | |||
15 | #include "blk.h" | ||
16 | |||
17 | int blk_iopoll_enabled = 1; | ||
18 | EXPORT_SYMBOL(blk_iopoll_enabled); | ||
19 | |||
20 | static DEFINE_PER_CPU(struct list_head, blk_cpu_iopoll); | ||
21 | |||
22 | /** | ||
23 | * blk_iopoll_sched - Schedule a run of the iopoll handler | ||
24 | * @iop: The parent iopoll structure | ||
25 | * | ||
26 | * Description: | ||
27 | * Add this blk_iopoll structure to the pending poll list and trigger the raise | ||
28 | * of the blk iopoll softirq. The driver must already have gotten a succesful | ||
29 | * return from blk_iopoll_sched_prep() before calling this. | ||
30 | **/ | ||
31 | void blk_iopoll_sched(struct blk_iopoll *iop) | ||
32 | { | ||
33 | unsigned long flags; | ||
34 | |||
35 | local_irq_save(flags); | ||
36 | list_add_tail(&iop->list, &__get_cpu_var(blk_cpu_iopoll)); | ||
37 | __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ); | ||
38 | local_irq_restore(flags); | ||
39 | } | ||
40 | EXPORT_SYMBOL(blk_iopoll_sched); | ||
41 | |||
42 | /** | ||
43 | * __blk_iopoll_complete - Mark this @iop as un-polled again | ||
44 | * @iop: The parent iopoll structure | ||
45 | * | ||
46 | * Description: | ||
47 | * See blk_iopoll_complete(). This function must be called with interrupts disabled. | ||
48 | **/ | ||
49 | void __blk_iopoll_complete(struct blk_iopoll *iop) | ||
50 | { | ||
51 | list_del(&iop->list); | ||
52 | smp_mb__before_clear_bit(); | ||
53 | clear_bit_unlock(IOPOLL_F_SCHED, &iop->state); | ||
54 | } | ||
55 | EXPORT_SYMBOL(__blk_iopoll_complete); | ||
56 | |||
57 | /** | ||
58 | * blk_iopoll_complete - Mark this @iop as un-polled again | ||
59 | * @iop: The parent iopoll structure | ||
60 | * | ||
61 | * Description: | ||
62 | * If a driver consumes less than the assigned budget in its run of the iopoll | ||
63 | * handler, it'll end the polled mode by calling this function. The iopoll handler | ||
64 | * will not be invoked again before blk_iopoll_sched_prep() is called. | ||
65 | **/ | ||
66 | void blk_iopoll_complete(struct blk_iopoll *iopoll) | ||
67 | { | ||
68 | unsigned long flags; | ||
69 | |||
70 | local_irq_save(flags); | ||
71 | __blk_iopoll_complete(iopoll); | ||
72 | local_irq_restore(flags); | ||
73 | } | ||
74 | EXPORT_SYMBOL(blk_iopoll_complete); | ||
75 | |||
76 | static void blk_iopoll_softirq(struct softirq_action *h) | ||
77 | { | ||
78 | struct list_head *list = &__get_cpu_var(blk_cpu_iopoll); | ||
79 | unsigned long start_time = jiffies; | ||
80 | int rearm = 0, budget = 64; | ||
81 | |||
82 | local_irq_disable(); | ||
83 | |||
84 | while (!list_empty(list)) { | ||
85 | struct blk_iopoll *iop; | ||
86 | int work, weight; | ||
87 | |||
88 | /* | ||
89 | * If softirq window is exhausted then punt. | ||
90 | */ | ||
91 | if (budget <= 0 || time_after(jiffies, start_time)) { | ||
92 | rearm = 1; | ||
93 | break; | ||
94 | } | ||
95 | |||
96 | local_irq_enable(); | ||
97 | |||
98 | /* Even though interrupts have been re-enabled, this | ||
99 | * access is safe because interrupts can only add new | ||
100 | * entries to the tail of this list, and only ->poll() | ||
101 | * calls can remove this head entry from the list. | ||
102 | */ | ||
103 | iop = list_entry(list->next, struct blk_iopoll, list); | ||
104 | |||
105 | weight = iop->weight; | ||
106 | work = 0; | ||
107 | if (test_bit(IOPOLL_F_SCHED, &iop->state)) | ||
108 | work = iop->poll(iop, weight); | ||
109 | |||
110 | budget -= work; | ||
111 | |||
112 | local_irq_disable(); | ||
113 | |||
114 | /* Drivers must not modify the NAPI state if they | ||
115 | * consume the entire weight. In such cases this code | ||
116 | * still "owns" the NAPI instance and therefore can | ||
117 | * move the instance around on the list at-will. | ||
118 | */ | ||
119 | if (work >= weight) { | ||
120 | if (blk_iopoll_disable_pending(iop)) | ||
121 | __blk_iopoll_complete(iop); | ||
122 | else | ||
123 | list_move_tail(&iop->list, list); | ||
124 | } | ||
125 | } | ||
126 | |||
127 | if (rearm) | ||
128 | __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ); | ||
129 | |||
130 | local_irq_enable(); | ||
131 | } | ||
132 | |||
133 | /** | ||
134 | * blk_iopoll_disable - Disable iopoll on this @iop | ||
135 | * @iop: The parent iopoll structure | ||
136 | * | ||
137 | * Description: | ||
138 | * Disable io polling and wait for any pending callbacks to have completed. | ||
139 | **/ | ||
140 | void blk_iopoll_disable(struct blk_iopoll *iop) | ||
141 | { | ||
142 | set_bit(IOPOLL_F_DISABLE, &iop->state); | ||
143 | while (test_and_set_bit(IOPOLL_F_SCHED, &iop->state)) | ||
144 | msleep(1); | ||
145 | clear_bit(IOPOLL_F_DISABLE, &iop->state); | ||
146 | } | ||
147 | EXPORT_SYMBOL(blk_iopoll_disable); | ||
148 | |||
149 | /** | ||
150 | * blk_iopoll_enable - Enable iopoll on this @iop | ||
151 | * @iop: The parent iopoll structure | ||
152 | * | ||
153 | * Description: | ||
154 | * Enable iopoll on this @iop. Note that the handler run will not be scheduled, it | ||
155 | * will only mark it as active. | ||
156 | **/ | ||
157 | void blk_iopoll_enable(struct blk_iopoll *iop) | ||
158 | { | ||
159 | BUG_ON(!test_bit(IOPOLL_F_SCHED, &iop->state)); | ||
160 | smp_mb__before_clear_bit(); | ||
161 | clear_bit_unlock(IOPOLL_F_SCHED, &iop->state); | ||
162 | } | ||
163 | EXPORT_SYMBOL(blk_iopoll_enable); | ||
164 | |||
165 | /** | ||
166 | * blk_iopoll_init - Initialize this @iop | ||
167 | * @iop: The parent iopoll structure | ||
168 | * @weight: The default weight (or command completion budget) | ||
169 | * @poll_fn: The handler to invoke | ||
170 | * | ||
171 | * Description: | ||
172 | * Initialize this blk_iopoll structure. Before being actively used, the driver | ||
173 | * must call blk_iopoll_enable(). | ||
174 | **/ | ||
175 | void blk_iopoll_init(struct blk_iopoll *iop, int weight, blk_iopoll_fn *poll_fn) | ||
176 | { | ||
177 | memset(iop, 0, sizeof(*iop)); | ||
178 | INIT_LIST_HEAD(&iop->list); | ||
179 | iop->weight = weight; | ||
180 | iop->poll = poll_fn; | ||
181 | set_bit(IOPOLL_F_SCHED, &iop->state); | ||
182 | } | ||
183 | EXPORT_SYMBOL(blk_iopoll_init); | ||
184 | |||
185 | static int __cpuinit blk_iopoll_cpu_notify(struct notifier_block *self, | ||
186 | unsigned long action, void *hcpu) | ||
187 | { | ||
188 | /* | ||
189 | * If a CPU goes away, splice its entries to the current CPU | ||
190 | * and trigger a run of the softirq | ||
191 | */ | ||
192 | if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { | ||
193 | int cpu = (unsigned long) hcpu; | ||
194 | |||
195 | local_irq_disable(); | ||
196 | list_splice_init(&per_cpu(blk_cpu_iopoll, cpu), | ||
197 | &__get_cpu_var(blk_cpu_iopoll)); | ||
198 | raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ); | ||
199 | local_irq_enable(); | ||
200 | } | ||
201 | |||
202 | return NOTIFY_OK; | ||
203 | } | ||
204 | |||
205 | static struct notifier_block __cpuinitdata blk_iopoll_cpu_notifier = { | ||
206 | .notifier_call = blk_iopoll_cpu_notify, | ||
207 | }; | ||
208 | |||
209 | static __init int blk_iopoll_setup(void) | ||
210 | { | ||
211 | int i; | ||
212 | |||
213 | for_each_possible_cpu(i) | ||
214 | INIT_LIST_HEAD(&per_cpu(blk_cpu_iopoll, i)); | ||
215 | |||
216 | open_softirq(BLOCK_IOPOLL_SOFTIRQ, blk_iopoll_softirq); | ||
217 | register_hotcpu_notifier(&blk_iopoll_cpu_notifier); | ||
218 | return 0; | ||
219 | } | ||
220 | subsys_initcall(blk_iopoll_setup); | ||
diff --git a/include/linux/blk-iopoll.h b/include/linux/blk-iopoll.h new file mode 100644 index 000000000000..b2e1739a2e7b --- /dev/null +++ b/include/linux/blk-iopoll.h | |||
@@ -0,0 +1,41 @@ | |||
1 | #ifndef BLK_IOPOLL_H | ||
2 | #define BLK_IOPOLL_H | ||
3 | |||
4 | struct blk_iopoll; | ||
5 | typedef int (blk_iopoll_fn)(struct blk_iopoll *, int); | ||
6 | |||
7 | struct blk_iopoll { | ||
8 | struct list_head list; | ||
9 | unsigned long state; | ||
10 | unsigned long data; | ||
11 | int weight; | ||
12 | int max; | ||
13 | blk_iopoll_fn *poll; | ||
14 | }; | ||
15 | |||
16 | enum { | ||
17 | IOPOLL_F_SCHED = 0, | ||
18 | IOPOLL_F_DISABLE = 1, | ||
19 | }; | ||
20 | |||
21 | static inline int blk_iopoll_sched_prep(struct blk_iopoll *iop) | ||
22 | { | ||
23 | return !test_bit(IOPOLL_F_DISABLE, &iop->state) && | ||
24 | !test_and_set_bit(IOPOLL_F_SCHED, &iop->state); | ||
25 | } | ||
26 | |||
27 | static inline int blk_iopoll_disable_pending(struct blk_iopoll *iop) | ||
28 | { | ||
29 | return test_bit(IOPOLL_F_DISABLE, &iop->state); | ||
30 | } | ||
31 | |||
32 | extern void blk_iopoll_sched(struct blk_iopoll *); | ||
33 | extern void blk_iopoll_init(struct blk_iopoll *, int, blk_iopoll_fn *); | ||
34 | extern void blk_iopoll_complete(struct blk_iopoll *); | ||
35 | extern void __blk_iopoll_complete(struct blk_iopoll *); | ||
36 | extern void blk_iopoll_enable(struct blk_iopoll *); | ||
37 | extern void blk_iopoll_disable(struct blk_iopoll *); | ||
38 | |||
39 | extern int blk_iopoll_enabled; | ||
40 | |||
41 | #endif | ||
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 35e7df1e9f30..edd8d5c90394 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h | |||
@@ -344,6 +344,7 @@ enum | |||
344 | NET_TX_SOFTIRQ, | 344 | NET_TX_SOFTIRQ, |
345 | NET_RX_SOFTIRQ, | 345 | NET_RX_SOFTIRQ, |
346 | BLOCK_SOFTIRQ, | 346 | BLOCK_SOFTIRQ, |
347 | BLOCK_IOPOLL_SOFTIRQ, | ||
347 | TASKLET_SOFTIRQ, | 348 | TASKLET_SOFTIRQ, |
348 | SCHED_SOFTIRQ, | 349 | SCHED_SOFTIRQ, |
349 | HRTIMER_SOFTIRQ, | 350 | HRTIMER_SOFTIRQ, |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 58be76017fd0..0ed9fa6f322e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -92,6 +92,7 @@ extern int sysctl_nr_trim_pages; | |||
92 | #ifdef CONFIG_RCU_TORTURE_TEST | 92 | #ifdef CONFIG_RCU_TORTURE_TEST |
93 | extern int rcutorture_runnable; | 93 | extern int rcutorture_runnable; |
94 | #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ | 94 | #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ |
95 | extern int blk_iopoll_enabled; | ||
95 | 96 | ||
96 | /* Constants used for minimum and maximum */ | 97 | /* Constants used for minimum and maximum */ |
97 | #ifdef CONFIG_DETECT_SOFTLOCKUP | 98 | #ifdef CONFIG_DETECT_SOFTLOCKUP |
@@ -990,7 +991,14 @@ static struct ctl_table kern_table[] = { | |||
990 | .proc_handler = &proc_dointvec, | 991 | .proc_handler = &proc_dointvec, |
991 | }, | 992 | }, |
992 | #endif | 993 | #endif |
993 | 994 | { | |
995 | .ctl_name = CTL_UNNUMBERED, | ||
996 | .procname = "blk_iopoll", | ||
997 | .data = &blk_iopoll_enabled, | ||
998 | .maxlen = sizeof(int), | ||
999 | .mode = 0644, | ||
1000 | .proc_handler = &proc_dointvec, | ||
1001 | }, | ||
994 | /* | 1002 | /* |
995 | * NOTE: do not add new entries to this table unless you have read | 1003 | * NOTE: do not add new entries to this table unless you have read |
996 | * Documentation/sysctl/ctl_unnumbered.txt | 1004 | * Documentation/sysctl/ctl_unnumbered.txt |