diff options
author | Heinz Mauelshagen <mauelshagen@redhat.com> | 2007-05-09 05:33:06 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-05-09 15:30:47 -0400 |
commit | 26b9f228703f0518a90e7513d6fe7b6abeed5138 (patch) | |
tree | 3437e92667c338ea46ad47b064ce4908d7d75fe2 | |
parent | 0ba699347e96b5468b42b3decf1f381abbf99652 (diff) |
dm: delay target
New device-mapper target that can delay I/O (for testing). Reads can be
separated from writes, redirected to different underlying devices and delayed
by differing amounts of time.
Signed-off-by: Heinz Mauelshagen <mauelshagen@redhat.com>
Signed-off-by: Milan Broz <mbroz@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | Documentation/device-mapper/delay.txt | 26 | ||||
-rw-r--r-- | drivers/md/Kconfig | 9 | ||||
-rw-r--r-- | drivers/md/Makefile | 1 | ||||
-rw-r--r-- | drivers/md/dm-delay.c | 383 |
4 files changed, 419 insertions, 0 deletions
diff --git a/Documentation/device-mapper/delay.txt b/Documentation/device-mapper/delay.txt new file mode 100644 index 000000000000..15adc55359e5 --- /dev/null +++ b/Documentation/device-mapper/delay.txt | |||
@@ -0,0 +1,26 @@ | |||
1 | dm-delay | ||
2 | ======== | ||
3 | |||
4 | Device-Mapper's "delay" target delays reads and/or writes | ||
5 | and maps them to different devices. | ||
6 | |||
7 | Parameters: | ||
8 | <device> <offset> <delay> [<write_device> <write_offset> <write_delay>] | ||
9 | |||
10 | With separate write parameters, the first set is only used for reads. | ||
11 | Delays are specified in milliseconds. | ||
12 | |||
13 | Example scripts | ||
14 | =============== | ||
15 | [[ | ||
16 | #!/bin/sh | ||
17 | # Create device delaying rw operation for 500ms | ||
18 | echo "0 `blockdev --getsize $1` delay $1 0 500" | dmsetup create delayed | ||
19 | ]] | ||
20 | |||
21 | [[ | ||
22 | #!/bin/sh | ||
23 | # Create device delaying only write operation for 500ms and | ||
24 | # splitting reads and writes to different devices $1 $2 | ||
25 | echo "0 `blockdev --getsize $1` delay $1 0 0 $2 0 500" | dmsetup create delayed | ||
26 | ]] | ||
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 4540ade6b6b5..7df934d69134 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig | |||
@@ -262,6 +262,15 @@ config DM_MULTIPATH_EMC | |||
262 | ---help--- | 262 | ---help--- |
263 | Multipath support for EMC CX/AX series hardware. | 263 | Multipath support for EMC CX/AX series hardware. |
264 | 264 | ||
265 | config DM_DELAY | ||
266 | tristate "I/O delaying target (EXPERIMENTAL)" | ||
267 | depends on BLK_DEV_DM && EXPERIMENTAL | ||
268 | ---help--- | ||
269 | A target that delays reads and/or writes and can send | ||
270 | them to different devices. Useful for testing. | ||
271 | |||
272 | If unsure, say N. | ||
273 | |||
265 | endmenu | 274 | endmenu |
266 | 275 | ||
267 | endif | 276 | endif |
diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 34957a68d921..38754084eac7 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile | |||
@@ -31,6 +31,7 @@ obj-$(CONFIG_MD_FAULTY) += faulty.o | |||
31 | obj-$(CONFIG_BLK_DEV_MD) += md-mod.o | 31 | obj-$(CONFIG_BLK_DEV_MD) += md-mod.o |
32 | obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o | 32 | obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o |
33 | obj-$(CONFIG_DM_CRYPT) += dm-crypt.o | 33 | obj-$(CONFIG_DM_CRYPT) += dm-crypt.o |
34 | obj-$(CONFIG_DM_DELAY) += dm-delay.o | ||
34 | obj-$(CONFIG_DM_MULTIPATH) += dm-multipath.o dm-round-robin.o | 35 | obj-$(CONFIG_DM_MULTIPATH) += dm-multipath.o dm-round-robin.o |
35 | obj-$(CONFIG_DM_MULTIPATH_EMC) += dm-emc.o | 36 | obj-$(CONFIG_DM_MULTIPATH_EMC) += dm-emc.o |
36 | obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o | 37 | obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o |
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c new file mode 100644 index 000000000000..52c7cf9e5803 --- /dev/null +++ b/drivers/md/dm-delay.c | |||
@@ -0,0 +1,383 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2005-2007 Red Hat GmbH | ||
3 | * | ||
4 | * A target that delays reads and/or writes and can send | ||
5 | * them to different devices. | ||
6 | * | ||
7 | * This file is released under the GPL. | ||
8 | */ | ||
9 | |||
10 | #include <linux/module.h> | ||
11 | #include <linux/init.h> | ||
12 | #include <linux/blkdev.h> | ||
13 | #include <linux/bio.h> | ||
14 | #include <linux/slab.h> | ||
15 | |||
16 | #include "dm.h" | ||
17 | #include "dm-bio-list.h" | ||
18 | |||
19 | #define DM_MSG_PREFIX "delay" | ||
20 | |||
21 | struct delay_c { | ||
22 | struct timer_list delay_timer; | ||
23 | struct semaphore timer_lock; | ||
24 | struct work_struct flush_expired_bios; | ||
25 | struct list_head delayed_bios; | ||
26 | atomic_t may_delay; | ||
27 | mempool_t *delayed_pool; | ||
28 | |||
29 | struct dm_dev *dev_read; | ||
30 | sector_t start_read; | ||
31 | unsigned read_delay; | ||
32 | unsigned reads; | ||
33 | |||
34 | struct dm_dev *dev_write; | ||
35 | sector_t start_write; | ||
36 | unsigned write_delay; | ||
37 | unsigned writes; | ||
38 | }; | ||
39 | |||
40 | struct delay_info { | ||
41 | struct delay_c *context; | ||
42 | struct list_head list; | ||
43 | struct bio *bio; | ||
44 | unsigned long expires; | ||
45 | }; | ||
46 | |||
47 | static DEFINE_MUTEX(delayed_bios_lock); | ||
48 | |||
49 | static struct workqueue_struct *kdelayd_wq; | ||
50 | static struct kmem_cache *delayed_cache; | ||
51 | |||
52 | static void handle_delayed_timer(unsigned long data) | ||
53 | { | ||
54 | struct delay_c *dc = (struct delay_c *)data; | ||
55 | |||
56 | queue_work(kdelayd_wq, &dc->flush_expired_bios); | ||
57 | } | ||
58 | |||
59 | static void queue_timeout(struct delay_c *dc, unsigned long expires) | ||
60 | { | ||
61 | down(&dc->timer_lock); | ||
62 | |||
63 | if (!timer_pending(&dc->delay_timer) || expires < dc->delay_timer.expires) | ||
64 | mod_timer(&dc->delay_timer, expires); | ||
65 | |||
66 | up(&dc->timer_lock); | ||
67 | } | ||
68 | |||
69 | static void flush_bios(struct bio *bio) | ||
70 | { | ||
71 | struct bio *n; | ||
72 | |||
73 | while (bio) { | ||
74 | n = bio->bi_next; | ||
75 | bio->bi_next = NULL; | ||
76 | generic_make_request(bio); | ||
77 | bio = n; | ||
78 | } | ||
79 | } | ||
80 | |||
81 | static struct bio *flush_delayed_bios(struct delay_c *dc, int flush_all) | ||
82 | { | ||
83 | struct delay_info *delayed, *next; | ||
84 | unsigned long next_expires = 0; | ||
85 | int start_timer = 0; | ||
86 | BIO_LIST(flush_bios); | ||
87 | |||
88 | mutex_lock(&delayed_bios_lock); | ||
89 | list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) { | ||
90 | if (flush_all || time_after_eq(jiffies, delayed->expires)) { | ||
91 | list_del(&delayed->list); | ||
92 | bio_list_add(&flush_bios, delayed->bio); | ||
93 | if ((bio_data_dir(delayed->bio) == WRITE)) | ||
94 | delayed->context->writes--; | ||
95 | else | ||
96 | delayed->context->reads--; | ||
97 | mempool_free(delayed, dc->delayed_pool); | ||
98 | continue; | ||
99 | } | ||
100 | |||
101 | if (!start_timer) { | ||
102 | start_timer = 1; | ||
103 | next_expires = delayed->expires; | ||
104 | } else | ||
105 | next_expires = min(next_expires, delayed->expires); | ||
106 | } | ||
107 | |||
108 | mutex_unlock(&delayed_bios_lock); | ||
109 | |||
110 | if (start_timer) | ||
111 | queue_timeout(dc, next_expires); | ||
112 | |||
113 | return bio_list_get(&flush_bios); | ||
114 | } | ||
115 | |||
116 | static void flush_expired_bios(struct work_struct *work) | ||
117 | { | ||
118 | struct delay_c *dc; | ||
119 | |||
120 | dc = container_of(work, struct delay_c, flush_expired_bios); | ||
121 | flush_bios(flush_delayed_bios(dc, 0)); | ||
122 | } | ||
123 | |||
124 | /* | ||
125 | * Mapping parameters: | ||
126 | * <device> <offset> <delay> [<write_device> <write_offset> <write_delay>] | ||
127 | * | ||
128 | * With separate write parameters, the first set is only used for reads. | ||
129 | * Delays are specified in milliseconds. | ||
130 | */ | ||
131 | static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv) | ||
132 | { | ||
133 | struct delay_c *dc; | ||
134 | unsigned long long tmpll; | ||
135 | |||
136 | if (argc != 3 && argc != 6) { | ||
137 | ti->error = "requires exactly 3 or 6 arguments"; | ||
138 | return -EINVAL; | ||
139 | } | ||
140 | |||
141 | dc = kmalloc(sizeof(*dc), GFP_KERNEL); | ||
142 | if (!dc) { | ||
143 | ti->error = "Cannot allocate context"; | ||
144 | return -ENOMEM; | ||
145 | } | ||
146 | |||
147 | dc->reads = dc->writes = 0; | ||
148 | |||
149 | if (sscanf(argv[1], "%llu", &tmpll) != 1) { | ||
150 | ti->error = "Invalid device sector"; | ||
151 | goto bad; | ||
152 | } | ||
153 | dc->start_read = tmpll; | ||
154 | |||
155 | if (sscanf(argv[2], "%u", &dc->read_delay) != 1) { | ||
156 | ti->error = "Invalid delay"; | ||
157 | goto bad; | ||
158 | } | ||
159 | |||
160 | if (dm_get_device(ti, argv[0], dc->start_read, ti->len, | ||
161 | dm_table_get_mode(ti->table), &dc->dev_read)) { | ||
162 | ti->error = "Device lookup failed"; | ||
163 | goto bad; | ||
164 | } | ||
165 | |||
166 | if (argc == 3) { | ||
167 | dc->dev_write = NULL; | ||
168 | goto out; | ||
169 | } | ||
170 | |||
171 | if (sscanf(argv[4], "%llu", &tmpll) != 1) { | ||
172 | ti->error = "Invalid write device sector"; | ||
173 | goto bad; | ||
174 | } | ||
175 | dc->start_write = tmpll; | ||
176 | |||
177 | if (sscanf(argv[5], "%u", &dc->write_delay) != 1) { | ||
178 | ti->error = "Invalid write delay"; | ||
179 | goto bad; | ||
180 | } | ||
181 | |||
182 | if (dm_get_device(ti, argv[3], dc->start_write, ti->len, | ||
183 | dm_table_get_mode(ti->table), &dc->dev_write)) { | ||
184 | ti->error = "Write device lookup failed"; | ||
185 | dm_put_device(ti, dc->dev_read); | ||
186 | goto bad; | ||
187 | } | ||
188 | |||
189 | out: | ||
190 | dc->delayed_pool = mempool_create_slab_pool(128, delayed_cache); | ||
191 | if (!dc->delayed_pool) { | ||
192 | DMERR("Couldn't create delayed bio pool."); | ||
193 | goto bad; | ||
194 | } | ||
195 | |||
196 | init_timer(&dc->delay_timer); | ||
197 | dc->delay_timer.function = handle_delayed_timer; | ||
198 | dc->delay_timer.data = (unsigned long)dc; | ||
199 | |||
200 | INIT_WORK(&dc->flush_expired_bios, flush_expired_bios); | ||
201 | INIT_LIST_HEAD(&dc->delayed_bios); | ||
202 | init_MUTEX(&dc->timer_lock); | ||
203 | atomic_set(&dc->may_delay, 1); | ||
204 | |||
205 | ti->private = dc; | ||
206 | return 0; | ||
207 | |||
208 | bad: | ||
209 | kfree(dc); | ||
210 | return -EINVAL; | ||
211 | } | ||
212 | |||
213 | static void delay_dtr(struct dm_target *ti) | ||
214 | { | ||
215 | struct delay_c *dc = ti->private; | ||
216 | |||
217 | flush_workqueue(kdelayd_wq); | ||
218 | |||
219 | dm_put_device(ti, dc->dev_read); | ||
220 | |||
221 | if (dc->dev_write) | ||
222 | dm_put_device(ti, dc->dev_write); | ||
223 | |||
224 | mempool_destroy(dc->delayed_pool); | ||
225 | kfree(dc); | ||
226 | } | ||
227 | |||
228 | static int delay_bio(struct delay_c *dc, int delay, struct bio *bio) | ||
229 | { | ||
230 | struct delay_info *delayed; | ||
231 | unsigned long expires = 0; | ||
232 | |||
233 | if (!delay || !atomic_read(&dc->may_delay)) | ||
234 | return 1; | ||
235 | |||
236 | delayed = mempool_alloc(dc->delayed_pool, GFP_NOIO); | ||
237 | |||
238 | delayed->context = dc; | ||
239 | delayed->bio = bio; | ||
240 | delayed->expires = expires = jiffies + (delay * HZ / 1000); | ||
241 | |||
242 | mutex_lock(&delayed_bios_lock); | ||
243 | |||
244 | if (bio_data_dir(bio) == WRITE) | ||
245 | dc->writes++; | ||
246 | else | ||
247 | dc->reads++; | ||
248 | |||
249 | list_add_tail(&delayed->list, &dc->delayed_bios); | ||
250 | |||
251 | mutex_unlock(&delayed_bios_lock); | ||
252 | |||
253 | queue_timeout(dc, expires); | ||
254 | |||
255 | return 0; | ||
256 | } | ||
257 | |||
258 | static void delay_presuspend(struct dm_target *ti) | ||
259 | { | ||
260 | struct delay_c *dc = ti->private; | ||
261 | |||
262 | atomic_set(&dc->may_delay, 0); | ||
263 | del_timer_sync(&dc->delay_timer); | ||
264 | flush_bios(flush_delayed_bios(dc, 1)); | ||
265 | } | ||
266 | |||
267 | static void delay_resume(struct dm_target *ti) | ||
268 | { | ||
269 | struct delay_c *dc = ti->private; | ||
270 | |||
271 | atomic_set(&dc->may_delay, 1); | ||
272 | } | ||
273 | |||
274 | static int delay_map(struct dm_target *ti, struct bio *bio, | ||
275 | union map_info *map_context) | ||
276 | { | ||
277 | struct delay_c *dc = ti->private; | ||
278 | |||
279 | if ((bio_data_dir(bio) == WRITE) && (dc->dev_write)) { | ||
280 | bio->bi_bdev = dc->dev_write->bdev; | ||
281 | bio->bi_sector = dc->start_write + | ||
282 | (bio->bi_sector - ti->begin); | ||
283 | |||
284 | return delay_bio(dc, dc->write_delay, bio); | ||
285 | } | ||
286 | |||
287 | bio->bi_bdev = dc->dev_read->bdev; | ||
288 | bio->bi_sector = dc->start_read + | ||
289 | (bio->bi_sector - ti->begin); | ||
290 | |||
291 | return delay_bio(dc, dc->read_delay, bio); | ||
292 | } | ||
293 | |||
294 | static int delay_status(struct dm_target *ti, status_type_t type, | ||
295 | char *result, unsigned maxlen) | ||
296 | { | ||
297 | struct delay_c *dc = ti->private; | ||
298 | int sz = 0; | ||
299 | |||
300 | switch (type) { | ||
301 | case STATUSTYPE_INFO: | ||
302 | DMEMIT("%u %u", dc->reads, dc->writes); | ||
303 | break; | ||
304 | |||
305 | case STATUSTYPE_TABLE: | ||
306 | DMEMIT("%s %llu %u", dc->dev_read->name, | ||
307 | (unsigned long long) dc->start_read, | ||
308 | dc->read_delay); | ||
309 | if (dc->dev_write) | ||
310 | DMEMIT("%s %llu %u", dc->dev_write->name, | ||
311 | (unsigned long long) dc->start_write, | ||
312 | dc->write_delay); | ||
313 | break; | ||
314 | } | ||
315 | |||
316 | return 0; | ||
317 | } | ||
318 | |||
319 | static struct target_type delay_target = { | ||
320 | .name = "delay", | ||
321 | .version = {1, 0, 2}, | ||
322 | .module = THIS_MODULE, | ||
323 | .ctr = delay_ctr, | ||
324 | .dtr = delay_dtr, | ||
325 | .map = delay_map, | ||
326 | .presuspend = delay_presuspend, | ||
327 | .resume = delay_resume, | ||
328 | .status = delay_status, | ||
329 | }; | ||
330 | |||
331 | static int __init dm_delay_init(void) | ||
332 | { | ||
333 | int r = -ENOMEM; | ||
334 | |||
335 | kdelayd_wq = create_workqueue("kdelayd"); | ||
336 | if (!kdelayd_wq) { | ||
337 | DMERR("Couldn't start kdelayd"); | ||
338 | goto bad_queue; | ||
339 | } | ||
340 | |||
341 | delayed_cache = kmem_cache_create("dm-delay", | ||
342 | sizeof(struct delay_info), | ||
343 | __alignof__(struct delay_info), | ||
344 | 0, NULL, NULL); | ||
345 | if (!delayed_cache) { | ||
346 | DMERR("Couldn't create delayed bio cache."); | ||
347 | goto bad_memcache; | ||
348 | } | ||
349 | |||
350 | r = dm_register_target(&delay_target); | ||
351 | if (r < 0) { | ||
352 | DMERR("register failed %d", r); | ||
353 | goto bad_register; | ||
354 | } | ||
355 | |||
356 | return 0; | ||
357 | |||
358 | bad_register: | ||
359 | kmem_cache_destroy(delayed_cache); | ||
360 | bad_memcache: | ||
361 | destroy_workqueue(kdelayd_wq); | ||
362 | bad_queue: | ||
363 | return r; | ||
364 | } | ||
365 | |||
366 | static void __exit dm_delay_exit(void) | ||
367 | { | ||
368 | int r = dm_unregister_target(&delay_target); | ||
369 | |||
370 | if (r < 0) | ||
371 | DMERR("unregister failed %d", r); | ||
372 | |||
373 | kmem_cache_destroy(delayed_cache); | ||
374 | destroy_workqueue(kdelayd_wq); | ||
375 | } | ||
376 | |||
377 | /* Module hooks */ | ||
378 | module_init(dm_delay_init); | ||
379 | module_exit(dm_delay_exit); | ||
380 | |||
381 | MODULE_DESCRIPTION(DM_NAME " delay target"); | ||
382 | MODULE_AUTHOR("Heinz Mauelshagen <mauelshagen@redhat.com>"); | ||
383 | MODULE_LICENSE("GPL"); | ||