aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/dm-io.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/dm-io.c')
-rw-r--r--drivers/md/dm-io.c120
1 files changed, 82 insertions, 38 deletions
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 3a2e6a2f8bdd..10f457ca6af2 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -5,6 +5,8 @@
5 * This file is released under the GPL. 5 * This file is released under the GPL.
6 */ 6 */
7 7
8#include "dm.h"
9
8#include <linux/device-mapper.h> 10#include <linux/device-mapper.h>
9 11
10#include <linux/bio.h> 12#include <linux/bio.h>
@@ -14,12 +16,19 @@
14#include <linux/slab.h> 16#include <linux/slab.h>
15#include <linux/dm-io.h> 17#include <linux/dm-io.h>
16 18
19#define DM_MSG_PREFIX "io"
20
21#define DM_IO_MAX_REGIONS BITS_PER_LONG
22
17struct dm_io_client { 23struct dm_io_client {
18 mempool_t *pool; 24 mempool_t *pool;
19 struct bio_set *bios; 25 struct bio_set *bios;
20}; 26};
21 27
22/* FIXME: can we shrink this ? */ 28/*
29 * Aligning 'struct io' reduces the number of bits required to store
30 * its address. Refer to store_io_and_region_in_bio() below.
31 */
23struct io { 32struct io {
24 unsigned long error_bits; 33 unsigned long error_bits;
25 unsigned long eopnotsupp_bits; 34 unsigned long eopnotsupp_bits;
@@ -28,7 +37,9 @@ struct io {
28 struct dm_io_client *client; 37 struct dm_io_client *client;
29 io_notify_fn callback; 38 io_notify_fn callback;
30 void *context; 39 void *context;
31}; 40} __attribute__((aligned(DM_IO_MAX_REGIONS)));
41
42static struct kmem_cache *_dm_io_cache;
32 43
33/* 44/*
34 * io contexts are only dynamically allocated for asynchronous 45 * io contexts are only dynamically allocated for asynchronous
@@ -53,7 +64,7 @@ struct dm_io_client *dm_io_client_create(unsigned num_pages)
53 if (!client) 64 if (!client)
54 return ERR_PTR(-ENOMEM); 65 return ERR_PTR(-ENOMEM);
55 66
56 client->pool = mempool_create_kmalloc_pool(ios, sizeof(struct io)); 67 client->pool = mempool_create_slab_pool(ios, _dm_io_cache);
57 if (!client->pool) 68 if (!client->pool)
58 goto bad; 69 goto bad;
59 70
@@ -88,18 +99,29 @@ EXPORT_SYMBOL(dm_io_client_destroy);
88 99
89/*----------------------------------------------------------------- 100/*-----------------------------------------------------------------
90 * We need to keep track of which region a bio is doing io for. 101 * We need to keep track of which region a bio is doing io for.
91 * In order to save a memory allocation we store this the last 102 * To avoid a memory allocation to store just 5 or 6 bits, we
92 * bvec which we know is unused (blech). 103 * ensure the 'struct io' pointer is aligned so enough low bits are
93 * XXX This is ugly and can OOPS with some configs... find another way. 104 * always zero and then combine it with the region number directly in
105 * bi_private.
94 *---------------------------------------------------------------*/ 106 *---------------------------------------------------------------*/
95static inline void bio_set_region(struct bio *bio, unsigned region) 107static void store_io_and_region_in_bio(struct bio *bio, struct io *io,
108 unsigned region)
96{ 109{
97 bio->bi_io_vec[bio->bi_max_vecs].bv_len = region; 110 if (unlikely(!IS_ALIGNED((unsigned long)io, DM_IO_MAX_REGIONS))) {
111 DMCRIT("Unaligned struct io pointer %p", io);
112 BUG();
113 }
114
115 bio->bi_private = (void *)((unsigned long)io | region);
98} 116}
99 117
100static inline unsigned bio_get_region(struct bio *bio) 118static void retrieve_io_and_region_from_bio(struct bio *bio, struct io **io,
119 unsigned *region)
101{ 120{
102 return bio->bi_io_vec[bio->bi_max_vecs].bv_len; 121 unsigned long val = (unsigned long)bio->bi_private;
122
123 *io = (void *)(val & -(unsigned long)DM_IO_MAX_REGIONS);
124 *region = val & (DM_IO_MAX_REGIONS - 1);
103} 125}
104 126
105/*----------------------------------------------------------------- 127/*-----------------------------------------------------------------
@@ -140,10 +162,8 @@ static void endio(struct bio *bio, int error)
140 /* 162 /*
141 * The bio destructor in bio_put() may use the io object. 163 * The bio destructor in bio_put() may use the io object.
142 */ 164 */
143 io = bio->bi_private; 165 retrieve_io_and_region_from_bio(bio, &io, &region);
144 region = bio_get_region(bio);
145 166
146 bio->bi_max_vecs++;
147 bio_put(bio); 167 bio_put(bio);
148 168
149 dec_count(io, region, error); 169 dec_count(io, region, error);
@@ -243,7 +263,10 @@ static void vm_dp_init(struct dpages *dp, void *data)
243 263
244static void dm_bio_destructor(struct bio *bio) 264static void dm_bio_destructor(struct bio *bio)
245{ 265{
246 struct io *io = bio->bi_private; 266 unsigned region;
267 struct io *io;
268
269 retrieve_io_and_region_from_bio(bio, &io, &region);
247 270
248 bio_free(bio, io->client->bios); 271 bio_free(bio, io->client->bios);
249} 272}
@@ -286,26 +309,23 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
286 unsigned num_bvecs; 309 unsigned num_bvecs;
287 sector_t remaining = where->count; 310 sector_t remaining = where->count;
288 311
289 while (remaining) { 312 /*
313 * where->count may be zero if rw holds a write barrier and we
314 * need to send a zero-sized barrier.
315 */
316 do {
290 /* 317 /*
291 * Allocate a suitably sized-bio: we add an extra 318 * Allocate a suitably sized-bio.
292 * bvec for bio_get/set_region() and decrement bi_max_vecs
293 * to hide it from bio_add_page().
294 */ 319 */
295 num_bvecs = dm_sector_div_up(remaining, 320 num_bvecs = dm_sector_div_up(remaining,
296 (PAGE_SIZE >> SECTOR_SHIFT)); 321 (PAGE_SIZE >> SECTOR_SHIFT));
297 num_bvecs = 1 + min_t(int, bio_get_nr_vecs(where->bdev), 322 num_bvecs = min_t(int, bio_get_nr_vecs(where->bdev), num_bvecs);
298 num_bvecs);
299 if (unlikely(num_bvecs > BIO_MAX_PAGES))
300 num_bvecs = BIO_MAX_PAGES;
301 bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, io->client->bios); 323 bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, io->client->bios);
302 bio->bi_sector = where->sector + (where->count - remaining); 324 bio->bi_sector = where->sector + (where->count - remaining);
303 bio->bi_bdev = where->bdev; 325 bio->bi_bdev = where->bdev;
304 bio->bi_end_io = endio; 326 bio->bi_end_io = endio;
305 bio->bi_private = io;
306 bio->bi_destructor = dm_bio_destructor; 327 bio->bi_destructor = dm_bio_destructor;
307 bio->bi_max_vecs--; 328 store_io_and_region_in_bio(bio, io, region);
308 bio_set_region(bio, region);
309 329
310 /* 330 /*
311 * Try and add as many pages as possible. 331 * Try and add as many pages as possible.
@@ -323,7 +343,7 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
323 343
324 atomic_inc(&io->count); 344 atomic_inc(&io->count);
325 submit_bio(rw, bio); 345 submit_bio(rw, bio);
326 } 346 } while (remaining);
327} 347}
328 348
329static void dispatch_io(int rw, unsigned int num_regions, 349static void dispatch_io(int rw, unsigned int num_regions,
@@ -333,6 +353,8 @@ static void dispatch_io(int rw, unsigned int num_regions,
333 int i; 353 int i;
334 struct dpages old_pages = *dp; 354 struct dpages old_pages = *dp;
335 355
356 BUG_ON(num_regions > DM_IO_MAX_REGIONS);
357
336 if (sync) 358 if (sync)
337 rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG); 359 rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG);
338 360
@@ -342,7 +364,7 @@ static void dispatch_io(int rw, unsigned int num_regions,
342 */ 364 */
343 for (i = 0; i < num_regions; i++) { 365 for (i = 0; i < num_regions; i++) {
344 *dp = old_pages; 366 *dp = old_pages;
345 if (where[i].count) 367 if (where[i].count || (rw & (1 << BIO_RW_BARRIER)))
346 do_region(rw, i, where + i, dp, io); 368 do_region(rw, i, where + i, dp, io);
347 } 369 }
348 370
@@ -357,7 +379,14 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
357 struct dm_io_region *where, int rw, struct dpages *dp, 379 struct dm_io_region *where, int rw, struct dpages *dp,
358 unsigned long *error_bits) 380 unsigned long *error_bits)
359{ 381{
360 struct io io; 382 /*
383 * gcc <= 4.3 can't do the alignment for stack variables, so we must
384 * align it on our own.
385 * volatile prevents the optimizer from removing or reusing
386 * "io_" field from the stack frame (allowed in ANSI C).
387 */
388 volatile char io_[sizeof(struct io) + __alignof__(struct io) - 1];
389 struct io *io = (struct io *)PTR_ALIGN(&io_, __alignof__(struct io));
361 390
362 if (num_regions > 1 && (rw & RW_MASK) != WRITE) { 391 if (num_regions > 1 && (rw & RW_MASK) != WRITE) {
363 WARN_ON(1); 392 WARN_ON(1);
@@ -365,33 +394,33 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
365 } 394 }
366 395
367retry: 396retry:
368 io.error_bits = 0; 397 io->error_bits = 0;
369 io.eopnotsupp_bits = 0; 398 io->eopnotsupp_bits = 0;
370 atomic_set(&io.count, 1); /* see dispatch_io() */ 399 atomic_set(&io->count, 1); /* see dispatch_io() */
371 io.sleeper = current; 400 io->sleeper = current;
372 io.client = client; 401 io->client = client;
373 402
374 dispatch_io(rw, num_regions, where, dp, &io, 1); 403 dispatch_io(rw, num_regions, where, dp, io, 1);
375 404
376 while (1) { 405 while (1) {
377 set_current_state(TASK_UNINTERRUPTIBLE); 406 set_current_state(TASK_UNINTERRUPTIBLE);
378 407
379 if (!atomic_read(&io.count)) 408 if (!atomic_read(&io->count))
380 break; 409 break;
381 410
382 io_schedule(); 411 io_schedule();
383 } 412 }
384 set_current_state(TASK_RUNNING); 413 set_current_state(TASK_RUNNING);
385 414
386 if (io.eopnotsupp_bits && (rw & (1 << BIO_RW_BARRIER))) { 415 if (io->eopnotsupp_bits && (rw & (1 << BIO_RW_BARRIER))) {
387 rw &= ~(1 << BIO_RW_BARRIER); 416 rw &= ~(1 << BIO_RW_BARRIER);
388 goto retry; 417 goto retry;
389 } 418 }
390 419
391 if (error_bits) 420 if (error_bits)
392 *error_bits = io.error_bits; 421 *error_bits = io->error_bits;
393 422
394 return io.error_bits ? -EIO : 0; 423 return io->error_bits ? -EIO : 0;
395} 424}
396 425
397static int async_io(struct dm_io_client *client, unsigned int num_regions, 426static int async_io(struct dm_io_client *client, unsigned int num_regions,
@@ -472,3 +501,18 @@ int dm_io(struct dm_io_request *io_req, unsigned num_regions,
472 &dp, io_req->notify.fn, io_req->notify.context); 501 &dp, io_req->notify.fn, io_req->notify.context);
473} 502}
474EXPORT_SYMBOL(dm_io); 503EXPORT_SYMBOL(dm_io);
504
505int __init dm_io_init(void)
506{
507 _dm_io_cache = KMEM_CACHE(io, 0);
508 if (!_dm_io_cache)
509 return -ENOMEM;
510
511 return 0;
512}
513
514void dm_io_exit(void)
515{
516 kmem_cache_destroy(_dm_io_cache);
517 _dm_io_cache = NULL;
518}