diff options
Diffstat (limited to 'drivers/md/dm-io.c')
-rw-r--r-- | drivers/md/dm-io.c | 120 |
1 files changed, 82 insertions, 38 deletions
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 3a2e6a2f8bdd..10f457ca6af2 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c | |||
@@ -5,6 +5,8 @@ | |||
5 | * This file is released under the GPL. | 5 | * This file is released under the GPL. |
6 | */ | 6 | */ |
7 | 7 | ||
8 | #include "dm.h" | ||
9 | |||
8 | #include <linux/device-mapper.h> | 10 | #include <linux/device-mapper.h> |
9 | 11 | ||
10 | #include <linux/bio.h> | 12 | #include <linux/bio.h> |
@@ -14,12 +16,19 @@ | |||
14 | #include <linux/slab.h> | 16 | #include <linux/slab.h> |
15 | #include <linux/dm-io.h> | 17 | #include <linux/dm-io.h> |
16 | 18 | ||
19 | #define DM_MSG_PREFIX "io" | ||
20 | |||
21 | #define DM_IO_MAX_REGIONS BITS_PER_LONG | ||
22 | |||
17 | struct dm_io_client { | 23 | struct dm_io_client { |
18 | mempool_t *pool; | 24 | mempool_t *pool; |
19 | struct bio_set *bios; | 25 | struct bio_set *bios; |
20 | }; | 26 | }; |
21 | 27 | ||
22 | /* FIXME: can we shrink this ? */ | 28 | /* |
29 | * Aligning 'struct io' reduces the number of bits required to store | ||
30 | * its address. Refer to store_io_and_region_in_bio() below. | ||
31 | */ | ||
23 | struct io { | 32 | struct io { |
24 | unsigned long error_bits; | 33 | unsigned long error_bits; |
25 | unsigned long eopnotsupp_bits; | 34 | unsigned long eopnotsupp_bits; |
@@ -28,7 +37,9 @@ struct io { | |||
28 | struct dm_io_client *client; | 37 | struct dm_io_client *client; |
29 | io_notify_fn callback; | 38 | io_notify_fn callback; |
30 | void *context; | 39 | void *context; |
31 | }; | 40 | } __attribute__((aligned(DM_IO_MAX_REGIONS))); |
41 | |||
42 | static struct kmem_cache *_dm_io_cache; | ||
32 | 43 | ||
33 | /* | 44 | /* |
34 | * io contexts are only dynamically allocated for asynchronous | 45 | * io contexts are only dynamically allocated for asynchronous |
@@ -53,7 +64,7 @@ struct dm_io_client *dm_io_client_create(unsigned num_pages) | |||
53 | if (!client) | 64 | if (!client) |
54 | return ERR_PTR(-ENOMEM); | 65 | return ERR_PTR(-ENOMEM); |
55 | 66 | ||
56 | client->pool = mempool_create_kmalloc_pool(ios, sizeof(struct io)); | 67 | client->pool = mempool_create_slab_pool(ios, _dm_io_cache); |
57 | if (!client->pool) | 68 | if (!client->pool) |
58 | goto bad; | 69 | goto bad; |
59 | 70 | ||
@@ -88,18 +99,29 @@ EXPORT_SYMBOL(dm_io_client_destroy); | |||
88 | 99 | ||
89 | /*----------------------------------------------------------------- | 100 | /*----------------------------------------------------------------- |
90 | * We need to keep track of which region a bio is doing io for. | 101 | * We need to keep track of which region a bio is doing io for. |
91 | * In order to save a memory allocation we store this the last | 102 | * To avoid a memory allocation to store just 5 or 6 bits, we |
92 | * bvec which we know is unused (blech). | 103 | * ensure the 'struct io' pointer is aligned so enough low bits are |
93 | * XXX This is ugly and can OOPS with some configs... find another way. | 104 | * always zero and then combine it with the region number directly in |
105 | * bi_private. | ||
94 | *---------------------------------------------------------------*/ | 106 | *---------------------------------------------------------------*/ |
95 | static inline void bio_set_region(struct bio *bio, unsigned region) | 107 | static void store_io_and_region_in_bio(struct bio *bio, struct io *io, |
108 | unsigned region) | ||
96 | { | 109 | { |
97 | bio->bi_io_vec[bio->bi_max_vecs].bv_len = region; | 110 | if (unlikely(!IS_ALIGNED((unsigned long)io, DM_IO_MAX_REGIONS))) { |
111 | DMCRIT("Unaligned struct io pointer %p", io); | ||
112 | BUG(); | ||
113 | } | ||
114 | |||
115 | bio->bi_private = (void *)((unsigned long)io | region); | ||
98 | } | 116 | } |
99 | 117 | ||
100 | static inline unsigned bio_get_region(struct bio *bio) | 118 | static void retrieve_io_and_region_from_bio(struct bio *bio, struct io **io, |
119 | unsigned *region) | ||
101 | { | 120 | { |
102 | return bio->bi_io_vec[bio->bi_max_vecs].bv_len; | 121 | unsigned long val = (unsigned long)bio->bi_private; |
122 | |||
123 | *io = (void *)(val & -(unsigned long)DM_IO_MAX_REGIONS); | ||
124 | *region = val & (DM_IO_MAX_REGIONS - 1); | ||
103 | } | 125 | } |
104 | 126 | ||
105 | /*----------------------------------------------------------------- | 127 | /*----------------------------------------------------------------- |
@@ -140,10 +162,8 @@ static void endio(struct bio *bio, int error) | |||
140 | /* | 162 | /* |
141 | * The bio destructor in bio_put() may use the io object. | 163 | * The bio destructor in bio_put() may use the io object. |
142 | */ | 164 | */ |
143 | io = bio->bi_private; | 165 | retrieve_io_and_region_from_bio(bio, &io, ®ion); |
144 | region = bio_get_region(bio); | ||
145 | 166 | ||
146 | bio->bi_max_vecs++; | ||
147 | bio_put(bio); | 167 | bio_put(bio); |
148 | 168 | ||
149 | dec_count(io, region, error); | 169 | dec_count(io, region, error); |
@@ -243,7 +263,10 @@ static void vm_dp_init(struct dpages *dp, void *data) | |||
243 | 263 | ||
244 | static void dm_bio_destructor(struct bio *bio) | 264 | static void dm_bio_destructor(struct bio *bio) |
245 | { | 265 | { |
246 | struct io *io = bio->bi_private; | 266 | unsigned region; |
267 | struct io *io; | ||
268 | |||
269 | retrieve_io_and_region_from_bio(bio, &io, ®ion); | ||
247 | 270 | ||
248 | bio_free(bio, io->client->bios); | 271 | bio_free(bio, io->client->bios); |
249 | } | 272 | } |
@@ -286,26 +309,23 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where, | |||
286 | unsigned num_bvecs; | 309 | unsigned num_bvecs; |
287 | sector_t remaining = where->count; | 310 | sector_t remaining = where->count; |
288 | 311 | ||
289 | while (remaining) { | 312 | /* |
313 | * where->count may be zero if rw holds a write barrier and we | ||
314 | * need to send a zero-sized barrier. | ||
315 | */ | ||
316 | do { | ||
290 | /* | 317 | /* |
291 | * Allocate a suitably sized-bio: we add an extra | 318 | * Allocate a suitably sized-bio. |
292 | * bvec for bio_get/set_region() and decrement bi_max_vecs | ||
293 | * to hide it from bio_add_page(). | ||
294 | */ | 319 | */ |
295 | num_bvecs = dm_sector_div_up(remaining, | 320 | num_bvecs = dm_sector_div_up(remaining, |
296 | (PAGE_SIZE >> SECTOR_SHIFT)); | 321 | (PAGE_SIZE >> SECTOR_SHIFT)); |
297 | num_bvecs = 1 + min_t(int, bio_get_nr_vecs(where->bdev), | 322 | num_bvecs = min_t(int, bio_get_nr_vecs(where->bdev), num_bvecs); |
298 | num_bvecs); | ||
299 | if (unlikely(num_bvecs > BIO_MAX_PAGES)) | ||
300 | num_bvecs = BIO_MAX_PAGES; | ||
301 | bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, io->client->bios); | 323 | bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, io->client->bios); |
302 | bio->bi_sector = where->sector + (where->count - remaining); | 324 | bio->bi_sector = where->sector + (where->count - remaining); |
303 | bio->bi_bdev = where->bdev; | 325 | bio->bi_bdev = where->bdev; |
304 | bio->bi_end_io = endio; | 326 | bio->bi_end_io = endio; |
305 | bio->bi_private = io; | ||
306 | bio->bi_destructor = dm_bio_destructor; | 327 | bio->bi_destructor = dm_bio_destructor; |
307 | bio->bi_max_vecs--; | 328 | store_io_and_region_in_bio(bio, io, region); |
308 | bio_set_region(bio, region); | ||
309 | 329 | ||
310 | /* | 330 | /* |
311 | * Try and add as many pages as possible. | 331 | * Try and add as many pages as possible. |
@@ -323,7 +343,7 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where, | |||
323 | 343 | ||
324 | atomic_inc(&io->count); | 344 | atomic_inc(&io->count); |
325 | submit_bio(rw, bio); | 345 | submit_bio(rw, bio); |
326 | } | 346 | } while (remaining); |
327 | } | 347 | } |
328 | 348 | ||
329 | static void dispatch_io(int rw, unsigned int num_regions, | 349 | static void dispatch_io(int rw, unsigned int num_regions, |
@@ -333,6 +353,8 @@ static void dispatch_io(int rw, unsigned int num_regions, | |||
333 | int i; | 353 | int i; |
334 | struct dpages old_pages = *dp; | 354 | struct dpages old_pages = *dp; |
335 | 355 | ||
356 | BUG_ON(num_regions > DM_IO_MAX_REGIONS); | ||
357 | |||
336 | if (sync) | 358 | if (sync) |
337 | rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG); | 359 | rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG); |
338 | 360 | ||
@@ -342,7 +364,7 @@ static void dispatch_io(int rw, unsigned int num_regions, | |||
342 | */ | 364 | */ |
343 | for (i = 0; i < num_regions; i++) { | 365 | for (i = 0; i < num_regions; i++) { |
344 | *dp = old_pages; | 366 | *dp = old_pages; |
345 | if (where[i].count) | 367 | if (where[i].count || (rw & (1 << BIO_RW_BARRIER))) |
346 | do_region(rw, i, where + i, dp, io); | 368 | do_region(rw, i, where + i, dp, io); |
347 | } | 369 | } |
348 | 370 | ||
@@ -357,7 +379,14 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions, | |||
357 | struct dm_io_region *where, int rw, struct dpages *dp, | 379 | struct dm_io_region *where, int rw, struct dpages *dp, |
358 | unsigned long *error_bits) | 380 | unsigned long *error_bits) |
359 | { | 381 | { |
360 | struct io io; | 382 | /* |
383 | * gcc <= 4.3 can't do the alignment for stack variables, so we must | ||
384 | * align it on our own. | ||
385 | * volatile prevents the optimizer from removing or reusing | ||
386 | * "io_" field from the stack frame (allowed in ANSI C). | ||
387 | */ | ||
388 | volatile char io_[sizeof(struct io) + __alignof__(struct io) - 1]; | ||
389 | struct io *io = (struct io *)PTR_ALIGN(&io_, __alignof__(struct io)); | ||
361 | 390 | ||
362 | if (num_regions > 1 && (rw & RW_MASK) != WRITE) { | 391 | if (num_regions > 1 && (rw & RW_MASK) != WRITE) { |
363 | WARN_ON(1); | 392 | WARN_ON(1); |
@@ -365,33 +394,33 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions, | |||
365 | } | 394 | } |
366 | 395 | ||
367 | retry: | 396 | retry: |
368 | io.error_bits = 0; | 397 | io->error_bits = 0; |
369 | io.eopnotsupp_bits = 0; | 398 | io->eopnotsupp_bits = 0; |
370 | atomic_set(&io.count, 1); /* see dispatch_io() */ | 399 | atomic_set(&io->count, 1); /* see dispatch_io() */ |
371 | io.sleeper = current; | 400 | io->sleeper = current; |
372 | io.client = client; | 401 | io->client = client; |
373 | 402 | ||
374 | dispatch_io(rw, num_regions, where, dp, &io, 1); | 403 | dispatch_io(rw, num_regions, where, dp, io, 1); |
375 | 404 | ||
376 | while (1) { | 405 | while (1) { |
377 | set_current_state(TASK_UNINTERRUPTIBLE); | 406 | set_current_state(TASK_UNINTERRUPTIBLE); |
378 | 407 | ||
379 | if (!atomic_read(&io.count)) | 408 | if (!atomic_read(&io->count)) |
380 | break; | 409 | break; |
381 | 410 | ||
382 | io_schedule(); | 411 | io_schedule(); |
383 | } | 412 | } |
384 | set_current_state(TASK_RUNNING); | 413 | set_current_state(TASK_RUNNING); |
385 | 414 | ||
386 | if (io.eopnotsupp_bits && (rw & (1 << BIO_RW_BARRIER))) { | 415 | if (io->eopnotsupp_bits && (rw & (1 << BIO_RW_BARRIER))) { |
387 | rw &= ~(1 << BIO_RW_BARRIER); | 416 | rw &= ~(1 << BIO_RW_BARRIER); |
388 | goto retry; | 417 | goto retry; |
389 | } | 418 | } |
390 | 419 | ||
391 | if (error_bits) | 420 | if (error_bits) |
392 | *error_bits = io.error_bits; | 421 | *error_bits = io->error_bits; |
393 | 422 | ||
394 | return io.error_bits ? -EIO : 0; | 423 | return io->error_bits ? -EIO : 0; |
395 | } | 424 | } |
396 | 425 | ||
397 | static int async_io(struct dm_io_client *client, unsigned int num_regions, | 426 | static int async_io(struct dm_io_client *client, unsigned int num_regions, |
@@ -472,3 +501,18 @@ int dm_io(struct dm_io_request *io_req, unsigned num_regions, | |||
472 | &dp, io_req->notify.fn, io_req->notify.context); | 501 | &dp, io_req->notify.fn, io_req->notify.context); |
473 | } | 502 | } |
474 | EXPORT_SYMBOL(dm_io); | 503 | EXPORT_SYMBOL(dm_io); |
504 | |||
505 | int __init dm_io_init(void) | ||
506 | { | ||
507 | _dm_io_cache = KMEM_CACHE(io, 0); | ||
508 | if (!_dm_io_cache) | ||
509 | return -ENOMEM; | ||
510 | |||
511 | return 0; | ||
512 | } | ||
513 | |||
514 | void dm_io_exit(void) | ||
515 | { | ||
516 | kmem_cache_destroy(_dm_io_cache); | ||
517 | _dm_io_cache = NULL; | ||
518 | } | ||