diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/md/dm-io.c | 89 |
1 files changed, 55 insertions, 34 deletions
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index b0d264e684fd..f6a714c5aab0 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c | |||
@@ -16,12 +16,19 @@ | |||
16 | #include <linux/slab.h> | 16 | #include <linux/slab.h> |
17 | #include <linux/dm-io.h> | 17 | #include <linux/dm-io.h> |
18 | 18 | ||
19 | #define DM_MSG_PREFIX "io" | ||
20 | |||
21 | #define DM_IO_MAX_REGIONS BITS_PER_LONG | ||
22 | |||
19 | struct dm_io_client { | 23 | struct dm_io_client { |
20 | mempool_t *pool; | 24 | mempool_t *pool; |
21 | struct bio_set *bios; | 25 | struct bio_set *bios; |
22 | }; | 26 | }; |
23 | 27 | ||
24 | /* FIXME: can we shrink this ? */ | 28 | /* |
29 | * Aligning 'struct io' reduces the number of bits required to store | ||
30 | * its address. Refer to store_io_and_region_in_bio() below. | ||
31 | */ | ||
25 | struct io { | 32 | struct io { |
26 | unsigned long error_bits; | 33 | unsigned long error_bits; |
27 | unsigned long eopnotsupp_bits; | 34 | unsigned long eopnotsupp_bits; |
@@ -30,7 +37,7 @@ struct io { | |||
30 | struct dm_io_client *client; | 37 | struct dm_io_client *client; |
31 | io_notify_fn callback; | 38 | io_notify_fn callback; |
32 | void *context; | 39 | void *context; |
33 | }; | 40 | } __attribute__((aligned(DM_IO_MAX_REGIONS))); |
34 | 41 | ||
35 | static struct kmem_cache *_dm_io_cache; | 42 | static struct kmem_cache *_dm_io_cache; |
36 | 43 | ||
@@ -92,18 +99,29 @@ EXPORT_SYMBOL(dm_io_client_destroy); | |||
92 | 99 | ||
93 | /*----------------------------------------------------------------- | 100 | /*----------------------------------------------------------------- |
94 | * We need to keep track of which region a bio is doing io for. | 101 | * We need to keep track of which region a bio is doing io for. |
95 | * In order to save a memory allocation we store this the last | 102 | * To avoid a memory allocation to store just 5 or 6 bits, we |
96 | * bvec which we know is unused (blech). | 103 | * ensure the 'struct io' pointer is aligned so enough low bits are |
97 | * XXX This is ugly and can OOPS with some configs... find another way. | 104 | * always zero and then combine it with the region number directly in |
105 | * bi_private. | ||
98 | *---------------------------------------------------------------*/ | 106 | *---------------------------------------------------------------*/ |
99 | static inline void bio_set_region(struct bio *bio, unsigned region) | 107 | static void store_io_and_region_in_bio(struct bio *bio, struct io *io, |
108 | unsigned region) | ||
100 | { | 109 | { |
101 | bio->bi_io_vec[bio->bi_max_vecs].bv_len = region; | 110 | if (unlikely(!IS_ALIGNED((unsigned long)io, DM_IO_MAX_REGIONS))) { |
111 | DMCRIT("Unaligned struct io pointer %p", io); | ||
112 | BUG(); | ||
113 | } | ||
114 | |||
115 | bio->bi_private = (void *)((unsigned long)io | region); | ||
102 | } | 116 | } |
103 | 117 | ||
104 | static inline unsigned bio_get_region(struct bio *bio) | 118 | static void retrieve_io_and_region_from_bio(struct bio *bio, struct io **io, |
119 | unsigned *region) | ||
105 | { | 120 | { |
106 | return bio->bi_io_vec[bio->bi_max_vecs].bv_len; | 121 | unsigned long val = (unsigned long)bio->bi_private; |
122 | |||
123 | *io = (void *)(val & -(unsigned long)DM_IO_MAX_REGIONS); | ||
124 | *region = val & (DM_IO_MAX_REGIONS - 1); | ||
107 | } | 125 | } |
108 | 126 | ||
109 | /*----------------------------------------------------------------- | 127 | /*----------------------------------------------------------------- |
@@ -144,10 +162,8 @@ static void endio(struct bio *bio, int error) | |||
144 | /* | 162 | /* |
145 | * The bio destructor in bio_put() may use the io object. | 163 | * The bio destructor in bio_put() may use the io object. |
146 | */ | 164 | */ |
147 | io = bio->bi_private; | 165 | retrieve_io_and_region_from_bio(bio, &io, ®ion); |
148 | region = bio_get_region(bio); | ||
149 | 166 | ||
150 | bio->bi_max_vecs++; | ||
151 | bio_put(bio); | 167 | bio_put(bio); |
152 | 168 | ||
153 | dec_count(io, region, error); | 169 | dec_count(io, region, error); |
@@ -247,7 +263,10 @@ static void vm_dp_init(struct dpages *dp, void *data) | |||
247 | 263 | ||
248 | static void dm_bio_destructor(struct bio *bio) | 264 | static void dm_bio_destructor(struct bio *bio) |
249 | { | 265 | { |
250 | struct io *io = bio->bi_private; | 266 | unsigned region; |
267 | struct io *io; | ||
268 | |||
269 | retrieve_io_and_region_from_bio(bio, &io, ®ion); | ||
251 | 270 | ||
252 | bio_free(bio, io->client->bios); | 271 | bio_free(bio, io->client->bios); |
253 | } | 272 | } |
@@ -292,24 +311,17 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where, | |||
292 | 311 | ||
293 | while (remaining) { | 312 | while (remaining) { |
294 | /* | 313 | /* |
295 | * Allocate a suitably sized-bio: we add an extra | 314 | * Allocate a suitably sized-bio. |
296 | * bvec for bio_get/set_region() and decrement bi_max_vecs | ||
297 | * to hide it from bio_add_page(). | ||
298 | */ | 315 | */ |
299 | num_bvecs = dm_sector_div_up(remaining, | 316 | num_bvecs = dm_sector_div_up(remaining, |
300 | (PAGE_SIZE >> SECTOR_SHIFT)); | 317 | (PAGE_SIZE >> SECTOR_SHIFT)); |
301 | num_bvecs = 1 + min_t(int, bio_get_nr_vecs(where->bdev), | 318 | num_bvecs = min_t(int, bio_get_nr_vecs(where->bdev), num_bvecs); |
302 | num_bvecs); | ||
303 | if (unlikely(num_bvecs > BIO_MAX_PAGES)) | ||
304 | num_bvecs = BIO_MAX_PAGES; | ||
305 | bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, io->client->bios); | 319 | bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, io->client->bios); |
306 | bio->bi_sector = where->sector + (where->count - remaining); | 320 | bio->bi_sector = where->sector + (where->count - remaining); |
307 | bio->bi_bdev = where->bdev; | 321 | bio->bi_bdev = where->bdev; |
308 | bio->bi_end_io = endio; | 322 | bio->bi_end_io = endio; |
309 | bio->bi_private = io; | ||
310 | bio->bi_destructor = dm_bio_destructor; | 323 | bio->bi_destructor = dm_bio_destructor; |
311 | bio->bi_max_vecs--; | 324 | store_io_and_region_in_bio(bio, io, region); |
312 | bio_set_region(bio, region); | ||
313 | 325 | ||
314 | /* | 326 | /* |
315 | * Try and add as many pages as possible. | 327 | * Try and add as many pages as possible. |
@@ -337,6 +349,8 @@ static void dispatch_io(int rw, unsigned int num_regions, | |||
337 | int i; | 349 | int i; |
338 | struct dpages old_pages = *dp; | 350 | struct dpages old_pages = *dp; |
339 | 351 | ||
352 | BUG_ON(num_regions > DM_IO_MAX_REGIONS); | ||
353 | |||
340 | if (sync) | 354 | if (sync) |
341 | rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG); | 355 | rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG); |
342 | 356 | ||
@@ -361,7 +375,14 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions, | |||
361 | struct dm_io_region *where, int rw, struct dpages *dp, | 375 | struct dm_io_region *where, int rw, struct dpages *dp, |
362 | unsigned long *error_bits) | 376 | unsigned long *error_bits) |
363 | { | 377 | { |
364 | struct io io; | 378 | /* |
379 | * gcc <= 4.3 can't do the alignment for stack variables, so we must | ||
380 | * align it on our own. | ||
381 | * volatile prevents the optimizer from removing or reusing | ||
382 | * "io_" field from the stack frame (allowed in ANSI C). | ||
383 | */ | ||
384 | volatile char io_[sizeof(struct io) + __alignof__(struct io) - 1]; | ||
385 | struct io *io = (struct io *)PTR_ALIGN(&io_, __alignof__(struct io)); | ||
365 | 386 | ||
366 | if (num_regions > 1 && (rw & RW_MASK) != WRITE) { | 387 | if (num_regions > 1 && (rw & RW_MASK) != WRITE) { |
367 | WARN_ON(1); | 388 | WARN_ON(1); |
@@ -369,33 +390,33 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions, | |||
369 | } | 390 | } |
370 | 391 | ||
371 | retry: | 392 | retry: |
372 | io.error_bits = 0; | 393 | io->error_bits = 0; |
373 | io.eopnotsupp_bits = 0; | 394 | io->eopnotsupp_bits = 0; |
374 | atomic_set(&io.count, 1); /* see dispatch_io() */ | 395 | atomic_set(&io->count, 1); /* see dispatch_io() */ |
375 | io.sleeper = current; | 396 | io->sleeper = current; |
376 | io.client = client; | 397 | io->client = client; |
377 | 398 | ||
378 | dispatch_io(rw, num_regions, where, dp, &io, 1); | 399 | dispatch_io(rw, num_regions, where, dp, io, 1); |
379 | 400 | ||
380 | while (1) { | 401 | while (1) { |
381 | set_current_state(TASK_UNINTERRUPTIBLE); | 402 | set_current_state(TASK_UNINTERRUPTIBLE); |
382 | 403 | ||
383 | if (!atomic_read(&io.count)) | 404 | if (!atomic_read(&io->count)) |
384 | break; | 405 | break; |
385 | 406 | ||
386 | io_schedule(); | 407 | io_schedule(); |
387 | } | 408 | } |
388 | set_current_state(TASK_RUNNING); | 409 | set_current_state(TASK_RUNNING); |
389 | 410 | ||
390 | if (io.eopnotsupp_bits && (rw & (1 << BIO_RW_BARRIER))) { | 411 | if (io->eopnotsupp_bits && (rw & (1 << BIO_RW_BARRIER))) { |
391 | rw &= ~(1 << BIO_RW_BARRIER); | 412 | rw &= ~(1 << BIO_RW_BARRIER); |
392 | goto retry; | 413 | goto retry; |
393 | } | 414 | } |
394 | 415 | ||
395 | if (error_bits) | 416 | if (error_bits) |
396 | *error_bits = io.error_bits; | 417 | *error_bits = io->error_bits; |
397 | 418 | ||
398 | return io.error_bits ? -EIO : 0; | 419 | return io->error_bits ? -EIO : 0; |
399 | } | 420 | } |
400 | 421 | ||
401 | static int async_io(struct dm_io_client *client, unsigned int num_regions, | 422 | static int async_io(struct dm_io_client *client, unsigned int num_regions, |