aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorMikulas Patocka <mpatocka@redhat.com>2009-12-10 18:51:58 -0500
committerAlasdair G Kergon <agk@redhat.com>2009-12-10 18:51:58 -0500
commitf1e539874655ae9e74c1644fd54133b19f1b14e2 (patch)
tree72dd2956c3915a45984321c192964ce7a88dbe07 /drivers/md
parent952b355760c196ec014dd0b6878f85a11496e3da (diff)
dm io: remove extra bi_io_vec region hack
Remove the hack where we allocate an extra bi_io_vec to store additional private data. This hack prevents us from supporting barriers in dm-raid1 without first making another little block layer change. Instead of doing that, this patch eliminates the bi_io_vec abuse by storing the region number directly in the low bits of bi_private. We need to store two things for each bio, the pointer to the main io structure and, if parallel writes were requested, an index indicating which of these writes this bio belongs to. There can be at most BITS_PER_LONG regions - 32 or 64. The index (region number) was stored in the last (hidden) bio vector and the pointer to struct io was stored in bi_private. This patch now aligns "struct io" on BITS_PER_LONG bytes and stores the region number in the low BITS_PER_LONG bits of bi_private. Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Signed-off-by: Alasdair G Kergon <agk@redhat.com>
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/dm-io.c89
1 files changed, 55 insertions, 34 deletions
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index b0d264e684fd..f6a714c5aab0 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -16,12 +16,19 @@
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/dm-io.h> 17#include <linux/dm-io.h>
18 18
19#define DM_MSG_PREFIX "io"
20
21#define DM_IO_MAX_REGIONS BITS_PER_LONG
22
19struct dm_io_client { 23struct dm_io_client {
20 mempool_t *pool; 24 mempool_t *pool;
21 struct bio_set *bios; 25 struct bio_set *bios;
22}; 26};
23 27
24/* FIXME: can we shrink this ? */ 28/*
29 * Aligning 'struct io' reduces the number of bits required to store
30 * its address. Refer to store_io_and_region_in_bio() below.
31 */
25struct io { 32struct io {
26 unsigned long error_bits; 33 unsigned long error_bits;
27 unsigned long eopnotsupp_bits; 34 unsigned long eopnotsupp_bits;
@@ -30,7 +37,7 @@ struct io {
30 struct dm_io_client *client; 37 struct dm_io_client *client;
31 io_notify_fn callback; 38 io_notify_fn callback;
32 void *context; 39 void *context;
33}; 40} __attribute__((aligned(DM_IO_MAX_REGIONS)));
34 41
35static struct kmem_cache *_dm_io_cache; 42static struct kmem_cache *_dm_io_cache;
36 43
@@ -92,18 +99,29 @@ EXPORT_SYMBOL(dm_io_client_destroy);
92 99
93/*----------------------------------------------------------------- 100/*-----------------------------------------------------------------
94 * We need to keep track of which region a bio is doing io for. 101 * We need to keep track of which region a bio is doing io for.
95 * In order to save a memory allocation we store this the last 102 * To avoid a memory allocation to store just 5 or 6 bits, we
96 * bvec which we know is unused (blech). 103 * ensure the 'struct io' pointer is aligned so enough low bits are
97 * XXX This is ugly and can OOPS with some configs... find another way. 104 * always zero and then combine it with the region number directly in
105 * bi_private.
98 *---------------------------------------------------------------*/ 106 *---------------------------------------------------------------*/
99static inline void bio_set_region(struct bio *bio, unsigned region) 107static void store_io_and_region_in_bio(struct bio *bio, struct io *io,
108 unsigned region)
100{ 109{
101 bio->bi_io_vec[bio->bi_max_vecs].bv_len = region; 110 if (unlikely(!IS_ALIGNED((unsigned long)io, DM_IO_MAX_REGIONS))) {
111 DMCRIT("Unaligned struct io pointer %p", io);
112 BUG();
113 }
114
115 bio->bi_private = (void *)((unsigned long)io | region);
102} 116}
103 117
104static inline unsigned bio_get_region(struct bio *bio) 118static void retrieve_io_and_region_from_bio(struct bio *bio, struct io **io,
119 unsigned *region)
105{ 120{
106 return bio->bi_io_vec[bio->bi_max_vecs].bv_len; 121 unsigned long val = (unsigned long)bio->bi_private;
122
123 *io = (void *)(val & -(unsigned long)DM_IO_MAX_REGIONS);
124 *region = val & (DM_IO_MAX_REGIONS - 1);
107} 125}
108 126
109/*----------------------------------------------------------------- 127/*-----------------------------------------------------------------
@@ -144,10 +162,8 @@ static void endio(struct bio *bio, int error)
144 /* 162 /*
145 * The bio destructor in bio_put() may use the io object. 163 * The bio destructor in bio_put() may use the io object.
146 */ 164 */
147 io = bio->bi_private; 165 retrieve_io_and_region_from_bio(bio, &io, &region);
148 region = bio_get_region(bio);
149 166
150 bio->bi_max_vecs++;
151 bio_put(bio); 167 bio_put(bio);
152 168
153 dec_count(io, region, error); 169 dec_count(io, region, error);
@@ -247,7 +263,10 @@ static void vm_dp_init(struct dpages *dp, void *data)
247 263
248static void dm_bio_destructor(struct bio *bio) 264static void dm_bio_destructor(struct bio *bio)
249{ 265{
250 struct io *io = bio->bi_private; 266 unsigned region;
267 struct io *io;
268
269 retrieve_io_and_region_from_bio(bio, &io, &region);
251 270
252 bio_free(bio, io->client->bios); 271 bio_free(bio, io->client->bios);
253} 272}
@@ -292,24 +311,17 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
292 311
293 while (remaining) { 312 while (remaining) {
294 /* 313 /*
295 * Allocate a suitably sized-bio: we add an extra 314 * Allocate a suitably sized-bio.
296 * bvec for bio_get/set_region() and decrement bi_max_vecs
297 * to hide it from bio_add_page().
298 */ 315 */
299 num_bvecs = dm_sector_div_up(remaining, 316 num_bvecs = dm_sector_div_up(remaining,
300 (PAGE_SIZE >> SECTOR_SHIFT)); 317 (PAGE_SIZE >> SECTOR_SHIFT));
301 num_bvecs = 1 + min_t(int, bio_get_nr_vecs(where->bdev), 318 num_bvecs = min_t(int, bio_get_nr_vecs(where->bdev), num_bvecs);
302 num_bvecs);
303 if (unlikely(num_bvecs > BIO_MAX_PAGES))
304 num_bvecs = BIO_MAX_PAGES;
305 bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, io->client->bios); 319 bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, io->client->bios);
306 bio->bi_sector = where->sector + (where->count - remaining); 320 bio->bi_sector = where->sector + (where->count - remaining);
307 bio->bi_bdev = where->bdev; 321 bio->bi_bdev = where->bdev;
308 bio->bi_end_io = endio; 322 bio->bi_end_io = endio;
309 bio->bi_private = io;
310 bio->bi_destructor = dm_bio_destructor; 323 bio->bi_destructor = dm_bio_destructor;
311 bio->bi_max_vecs--; 324 store_io_and_region_in_bio(bio, io, region);
312 bio_set_region(bio, region);
313 325
314 /* 326 /*
315 * Try and add as many pages as possible. 327 * Try and add as many pages as possible.
@@ -337,6 +349,8 @@ static void dispatch_io(int rw, unsigned int num_regions,
337 int i; 349 int i;
338 struct dpages old_pages = *dp; 350 struct dpages old_pages = *dp;
339 351
352 BUG_ON(num_regions > DM_IO_MAX_REGIONS);
353
340 if (sync) 354 if (sync)
341 rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG); 355 rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG);
342 356
@@ -361,7 +375,14 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
361 struct dm_io_region *where, int rw, struct dpages *dp, 375 struct dm_io_region *where, int rw, struct dpages *dp,
362 unsigned long *error_bits) 376 unsigned long *error_bits)
363{ 377{
364 struct io io; 378 /*
379 * gcc <= 4.3 can't do the alignment for stack variables, so we must
380 * align it on our own.
381 * volatile prevents the optimizer from removing or reusing
382 * "io_" field from the stack frame (allowed in ANSI C).
383 */
384 volatile char io_[sizeof(struct io) + __alignof__(struct io) - 1];
385 struct io *io = (struct io *)PTR_ALIGN(&io_, __alignof__(struct io));
365 386
366 if (num_regions > 1 && (rw & RW_MASK) != WRITE) { 387 if (num_regions > 1 && (rw & RW_MASK) != WRITE) {
367 WARN_ON(1); 388 WARN_ON(1);
@@ -369,33 +390,33 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
369 } 390 }
370 391
371retry: 392retry:
372 io.error_bits = 0; 393 io->error_bits = 0;
373 io.eopnotsupp_bits = 0; 394 io->eopnotsupp_bits = 0;
374 atomic_set(&io.count, 1); /* see dispatch_io() */ 395 atomic_set(&io->count, 1); /* see dispatch_io() */
375 io.sleeper = current; 396 io->sleeper = current;
376 io.client = client; 397 io->client = client;
377 398
378 dispatch_io(rw, num_regions, where, dp, &io, 1); 399 dispatch_io(rw, num_regions, where, dp, io, 1);
379 400
380 while (1) { 401 while (1) {
381 set_current_state(TASK_UNINTERRUPTIBLE); 402 set_current_state(TASK_UNINTERRUPTIBLE);
382 403
383 if (!atomic_read(&io.count)) 404 if (!atomic_read(&io->count))
384 break; 405 break;
385 406
386 io_schedule(); 407 io_schedule();
387 } 408 }
388 set_current_state(TASK_RUNNING); 409 set_current_state(TASK_RUNNING);
389 410
390 if (io.eopnotsupp_bits && (rw & (1 << BIO_RW_BARRIER))) { 411 if (io->eopnotsupp_bits && (rw & (1 << BIO_RW_BARRIER))) {
391 rw &= ~(1 << BIO_RW_BARRIER); 412 rw &= ~(1 << BIO_RW_BARRIER);
392 goto retry; 413 goto retry;
393 } 414 }
394 415
395 if (error_bits) 416 if (error_bits)
396 *error_bits = io.error_bits; 417 *error_bits = io->error_bits;
397 418
398 return io.error_bits ? -EIO : 0; 419 return io->error_bits ? -EIO : 0;
399} 420}
400 421
401static int async_io(struct dm_io_client *client, unsigned int num_regions, 422static int async_io(struct dm_io_client *client, unsigned int num_regions,