diff options
author | Tejun Heo <tj@kernel.org> | 2010-09-03 05:56:19 -0400 |
---|---|---|
committer | Jens Axboe <jaxboe@fusionio.com> | 2010-09-10 06:35:38 -0400 |
commit | d87f4c14f27dc82d215108d8392a7d26687148a1 (patch) | |
tree | 55f2a81f3df5d70fd85c4428089f6fe28540bcf4 /drivers/md/dm.c | |
parent | 3a2edd0d6ddbd5fa3b389ea6db811285415ce6c8 (diff) |
dm: implement REQ_FLUSH/FUA support for bio-based dm
This patch converts bio-based dm to support REQ_FLUSH/FUA instead of
now deprecated REQ_HARDBARRIER.
* -EOPNOTSUPP handling logic dropped.
* Preflush is handled as before but postflush is dropped and replaced
with passing down REQ_FUA to member request_queues. This replaces
one array wide cache flush w/ member specific FUA writes.
* __split_and_process_bio() now calls __clone_and_map_flush() directly
for flushes and guarantees all FLUSH bio's going to targets are zero
` length.
* It's now guaranteed that all FLUSH bio's which are passed onto dm
targets are zero length. bio_empty_barrier() tests are replaced
with REQ_FLUSH tests.
* Empty WRITE_BARRIERs are replaced with WRITE_FLUSHes.
* Dropped unlikely() around REQ_FLUSH tests. Flushes are not unlikely
enough to be marked with unlikely().
* Block layer now filters out REQ_FLUSH/FUA bio's if the request_queue
doesn't support cache flushing. Advertise REQ_FLUSH | REQ_FUA
capability.
* Request based dm isn't converted yet. dm_init_request_based_queue()
resets flush support to 0 for now. To avoid disturbing request
based dm code, dm->flush_error is added for bio based dm while
requested based dm continues to use dm->barrier_error.
Lightly tested linear, stripe, raid1, snap and crypt targets. Please
proceed with caution as I'm not familiar with the code base.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: dm-devel@redhat.com
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
Diffstat (limited to 'drivers/md/dm.c')
-rw-r--r-- | drivers/md/dm.c | 119 |
1 files changed, 57 insertions, 62 deletions
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index b1d92be8f990..32e6622767ad 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
@@ -144,15 +144,16 @@ struct mapped_device { | |||
144 | spinlock_t deferred_lock; | 144 | spinlock_t deferred_lock; |
145 | 145 | ||
146 | /* | 146 | /* |
147 | * An error from the barrier request currently being processed. | 147 | * An error from the flush request currently being processed. |
148 | */ | 148 | */ |
149 | int barrier_error; | 149 | int flush_error; |
150 | 150 | ||
151 | /* | 151 | /* |
152 | * Protect barrier_error from concurrent endio processing | 152 | * Protect barrier_error from concurrent endio processing |
153 | * in request-based dm. | 153 | * in request-based dm. |
154 | */ | 154 | */ |
155 | spinlock_t barrier_error_lock; | 155 | spinlock_t barrier_error_lock; |
156 | int barrier_error; | ||
156 | 157 | ||
157 | /* | 158 | /* |
158 | * Processing queue (flush/barriers) | 159 | * Processing queue (flush/barriers) |
@@ -200,8 +201,8 @@ struct mapped_device { | |||
200 | /* sysfs handle */ | 201 | /* sysfs handle */ |
201 | struct kobject kobj; | 202 | struct kobject kobj; |
202 | 203 | ||
203 | /* zero-length barrier that will be cloned and submitted to targets */ | 204 | /* zero-length flush that will be cloned and submitted to targets */ |
204 | struct bio barrier_bio; | 205 | struct bio flush_bio; |
205 | }; | 206 | }; |
206 | 207 | ||
207 | /* | 208 | /* |
@@ -512,7 +513,7 @@ static void end_io_acct(struct dm_io *io) | |||
512 | 513 | ||
513 | /* | 514 | /* |
514 | * After this is decremented the bio must not be touched if it is | 515 | * After this is decremented the bio must not be touched if it is |
515 | * a barrier. | 516 | * a flush. |
516 | */ | 517 | */ |
517 | dm_disk(md)->part0.in_flight[rw] = pending = | 518 | dm_disk(md)->part0.in_flight[rw] = pending = |
518 | atomic_dec_return(&md->pending[rw]); | 519 | atomic_dec_return(&md->pending[rw]); |
@@ -626,7 +627,7 @@ static void dec_pending(struct dm_io *io, int error) | |||
626 | */ | 627 | */ |
627 | spin_lock_irqsave(&md->deferred_lock, flags); | 628 | spin_lock_irqsave(&md->deferred_lock, flags); |
628 | if (__noflush_suspending(md)) { | 629 | if (__noflush_suspending(md)) { |
629 | if (!(io->bio->bi_rw & REQ_HARDBARRIER)) | 630 | if (!(io->bio->bi_rw & REQ_FLUSH)) |
630 | bio_list_add_head(&md->deferred, | 631 | bio_list_add_head(&md->deferred, |
631 | io->bio); | 632 | io->bio); |
632 | } else | 633 | } else |
@@ -638,20 +639,14 @@ static void dec_pending(struct dm_io *io, int error) | |||
638 | io_error = io->error; | 639 | io_error = io->error; |
639 | bio = io->bio; | 640 | bio = io->bio; |
640 | 641 | ||
641 | if (bio->bi_rw & REQ_HARDBARRIER) { | 642 | if (bio->bi_rw & REQ_FLUSH) { |
642 | /* | 643 | /* |
643 | * There can be just one barrier request so we use | 644 | * There can be just one flush request so we use |
644 | * a per-device variable for error reporting. | 645 | * a per-device variable for error reporting. |
645 | * Note that you can't touch the bio after end_io_acct | 646 | * Note that you can't touch the bio after end_io_acct |
646 | * | ||
647 | * We ignore -EOPNOTSUPP for empty flush reported by | ||
648 | * underlying devices. We assume that if the device | ||
649 | * doesn't support empty barriers, it doesn't need | ||
650 | * cache flushing commands. | ||
651 | */ | 647 | */ |
652 | if (!md->barrier_error && | 648 | if (!md->flush_error) |
653 | !(bio_empty_barrier(bio) && io_error == -EOPNOTSUPP)) | 649 | md->flush_error = io_error; |
654 | md->barrier_error = io_error; | ||
655 | end_io_acct(io); | 650 | end_io_acct(io); |
656 | free_io(md, io); | 651 | free_io(md, io); |
657 | } else { | 652 | } else { |
@@ -1119,7 +1114,7 @@ static void dm_bio_destructor(struct bio *bio) | |||
1119 | } | 1114 | } |
1120 | 1115 | ||
1121 | /* | 1116 | /* |
1122 | * Creates a little bio that is just does part of a bvec. | 1117 | * Creates a little bio that just does part of a bvec. |
1123 | */ | 1118 | */ |
1124 | static struct bio *split_bvec(struct bio *bio, sector_t sector, | 1119 | static struct bio *split_bvec(struct bio *bio, sector_t sector, |
1125 | unsigned short idx, unsigned int offset, | 1120 | unsigned short idx, unsigned int offset, |
@@ -1134,7 +1129,7 @@ static struct bio *split_bvec(struct bio *bio, sector_t sector, | |||
1134 | 1129 | ||
1135 | clone->bi_sector = sector; | 1130 | clone->bi_sector = sector; |
1136 | clone->bi_bdev = bio->bi_bdev; | 1131 | clone->bi_bdev = bio->bi_bdev; |
1137 | clone->bi_rw = bio->bi_rw & ~REQ_HARDBARRIER; | 1132 | clone->bi_rw = bio->bi_rw; |
1138 | clone->bi_vcnt = 1; | 1133 | clone->bi_vcnt = 1; |
1139 | clone->bi_size = to_bytes(len); | 1134 | clone->bi_size = to_bytes(len); |
1140 | clone->bi_io_vec->bv_offset = offset; | 1135 | clone->bi_io_vec->bv_offset = offset; |
@@ -1161,7 +1156,6 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector, | |||
1161 | 1156 | ||
1162 | clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs); | 1157 | clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs); |
1163 | __bio_clone(clone, bio); | 1158 | __bio_clone(clone, bio); |
1164 | clone->bi_rw &= ~REQ_HARDBARRIER; | ||
1165 | clone->bi_destructor = dm_bio_destructor; | 1159 | clone->bi_destructor = dm_bio_destructor; |
1166 | clone->bi_sector = sector; | 1160 | clone->bi_sector = sector; |
1167 | clone->bi_idx = idx; | 1161 | clone->bi_idx = idx; |
@@ -1225,7 +1219,7 @@ static void __issue_target_requests(struct clone_info *ci, struct dm_target *ti, | |||
1225 | __issue_target_request(ci, ti, request_nr, len); | 1219 | __issue_target_request(ci, ti, request_nr, len); |
1226 | } | 1220 | } |
1227 | 1221 | ||
1228 | static int __clone_and_map_empty_barrier(struct clone_info *ci) | 1222 | static int __clone_and_map_flush(struct clone_info *ci) |
1229 | { | 1223 | { |
1230 | unsigned target_nr = 0; | 1224 | unsigned target_nr = 0; |
1231 | struct dm_target *ti; | 1225 | struct dm_target *ti; |
@@ -1289,9 +1283,6 @@ static int __clone_and_map(struct clone_info *ci) | |||
1289 | sector_t len = 0, max; | 1283 | sector_t len = 0, max; |
1290 | struct dm_target_io *tio; | 1284 | struct dm_target_io *tio; |
1291 | 1285 | ||
1292 | if (unlikely(bio_empty_barrier(bio))) | ||
1293 | return __clone_and_map_empty_barrier(ci); | ||
1294 | |||
1295 | if (unlikely(bio->bi_rw & REQ_DISCARD)) | 1286 | if (unlikely(bio->bi_rw & REQ_DISCARD)) |
1296 | return __clone_and_map_discard(ci); | 1287 | return __clone_and_map_discard(ci); |
1297 | 1288 | ||
@@ -1383,11 +1374,11 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) | |||
1383 | 1374 | ||
1384 | ci.map = dm_get_live_table(md); | 1375 | ci.map = dm_get_live_table(md); |
1385 | if (unlikely(!ci.map)) { | 1376 | if (unlikely(!ci.map)) { |
1386 | if (!(bio->bi_rw & REQ_HARDBARRIER)) | 1377 | if (!(bio->bi_rw & REQ_FLUSH)) |
1387 | bio_io_error(bio); | 1378 | bio_io_error(bio); |
1388 | else | 1379 | else |
1389 | if (!md->barrier_error) | 1380 | if (!md->flush_error) |
1390 | md->barrier_error = -EIO; | 1381 | md->flush_error = -EIO; |
1391 | return; | 1382 | return; |
1392 | } | 1383 | } |
1393 | 1384 | ||
@@ -1400,14 +1391,22 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) | |||
1400 | ci.io->md = md; | 1391 | ci.io->md = md; |
1401 | spin_lock_init(&ci.io->endio_lock); | 1392 | spin_lock_init(&ci.io->endio_lock); |
1402 | ci.sector = bio->bi_sector; | 1393 | ci.sector = bio->bi_sector; |
1403 | ci.sector_count = bio_sectors(bio); | 1394 | if (!(bio->bi_rw & REQ_FLUSH)) |
1404 | if (unlikely(bio_empty_barrier(bio))) | 1395 | ci.sector_count = bio_sectors(bio); |
1396 | else { | ||
1397 | /* all FLUSH bio's reaching here should be empty */ | ||
1398 | WARN_ON_ONCE(bio_has_data(bio)); | ||
1405 | ci.sector_count = 1; | 1399 | ci.sector_count = 1; |
1400 | } | ||
1406 | ci.idx = bio->bi_idx; | 1401 | ci.idx = bio->bi_idx; |
1407 | 1402 | ||
1408 | start_io_acct(ci.io); | 1403 | start_io_acct(ci.io); |
1409 | while (ci.sector_count && !error) | 1404 | while (ci.sector_count && !error) { |
1410 | error = __clone_and_map(&ci); | 1405 | if (!(bio->bi_rw & REQ_FLUSH)) |
1406 | error = __clone_and_map(&ci); | ||
1407 | else | ||
1408 | error = __clone_and_map_flush(&ci); | ||
1409 | } | ||
1411 | 1410 | ||
1412 | /* drop the extra reference count */ | 1411 | /* drop the extra reference count */ |
1413 | dec_pending(ci.io, error); | 1412 | dec_pending(ci.io, error); |
@@ -1492,11 +1491,11 @@ static int _dm_request(struct request_queue *q, struct bio *bio) | |||
1492 | part_stat_unlock(); | 1491 | part_stat_unlock(); |
1493 | 1492 | ||
1494 | /* | 1493 | /* |
1495 | * If we're suspended or the thread is processing barriers | 1494 | * If we're suspended or the thread is processing flushes |
1496 | * we have to queue this io for later. | 1495 | * we have to queue this io for later. |
1497 | */ | 1496 | */ |
1498 | if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) || | 1497 | if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) || |
1499 | unlikely(bio->bi_rw & REQ_HARDBARRIER)) { | 1498 | (bio->bi_rw & REQ_FLUSH)) { |
1500 | up_read(&md->io_lock); | 1499 | up_read(&md->io_lock); |
1501 | 1500 | ||
1502 | if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) && | 1501 | if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) && |
@@ -1940,6 +1939,7 @@ static void dm_init_md_queue(struct mapped_device *md) | |||
1940 | blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); | 1939 | blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); |
1941 | md->queue->unplug_fn = dm_unplug_all; | 1940 | md->queue->unplug_fn = dm_unplug_all; |
1942 | blk_queue_merge_bvec(md->queue, dm_merge_bvec); | 1941 | blk_queue_merge_bvec(md->queue, dm_merge_bvec); |
1942 | blk_queue_flush(md->queue, REQ_FLUSH | REQ_FUA); | ||
1943 | } | 1943 | } |
1944 | 1944 | ||
1945 | /* | 1945 | /* |
@@ -2245,7 +2245,8 @@ static int dm_init_request_based_queue(struct mapped_device *md) | |||
2245 | blk_queue_softirq_done(md->queue, dm_softirq_done); | 2245 | blk_queue_softirq_done(md->queue, dm_softirq_done); |
2246 | blk_queue_prep_rq(md->queue, dm_prep_fn); | 2246 | blk_queue_prep_rq(md->queue, dm_prep_fn); |
2247 | blk_queue_lld_busy(md->queue, dm_lld_busy); | 2247 | blk_queue_lld_busy(md->queue, dm_lld_busy); |
2248 | blk_queue_flush(md->queue, REQ_FLUSH); | 2248 | /* no flush support for request based dm yet */ |
2249 | blk_queue_flush(md->queue, 0); | ||
2249 | 2250 | ||
2250 | elv_register_queue(md->queue); | 2251 | elv_register_queue(md->queue); |
2251 | 2252 | ||
@@ -2406,41 +2407,35 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible) | |||
2406 | return r; | 2407 | return r; |
2407 | } | 2408 | } |
2408 | 2409 | ||
2409 | static void dm_flush(struct mapped_device *md) | 2410 | static void process_flush(struct mapped_device *md, struct bio *bio) |
2410 | { | 2411 | { |
2411 | dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); | 2412 | md->flush_error = 0; |
2412 | |||
2413 | bio_init(&md->barrier_bio); | ||
2414 | md->barrier_bio.bi_bdev = md->bdev; | ||
2415 | md->barrier_bio.bi_rw = WRITE_BARRIER; | ||
2416 | __split_and_process_bio(md, &md->barrier_bio); | ||
2417 | 2413 | ||
2414 | /* handle REQ_FLUSH */ | ||
2418 | dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); | 2415 | dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); |
2419 | } | ||
2420 | 2416 | ||
2421 | static void process_barrier(struct mapped_device *md, struct bio *bio) | 2417 | bio_init(&md->flush_bio); |
2422 | { | 2418 | md->flush_bio.bi_bdev = md->bdev; |
2423 | md->barrier_error = 0; | 2419 | md->flush_bio.bi_rw = WRITE_FLUSH; |
2420 | __split_and_process_bio(md, &md->flush_bio); | ||
2424 | 2421 | ||
2425 | dm_flush(md); | 2422 | dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); |
2426 | 2423 | ||
2427 | if (!bio_empty_barrier(bio)) { | 2424 | /* if it's an empty flush or the preflush failed, we're done */ |
2428 | __split_and_process_bio(md, bio); | 2425 | if (!bio_has_data(bio) || md->flush_error) { |
2429 | /* | 2426 | if (md->flush_error != DM_ENDIO_REQUEUE) |
2430 | * If the request isn't supported, don't waste time with | 2427 | bio_endio(bio, md->flush_error); |
2431 | * the second flush. | 2428 | else { |
2432 | */ | 2429 | spin_lock_irq(&md->deferred_lock); |
2433 | if (md->barrier_error != -EOPNOTSUPP) | 2430 | bio_list_add_head(&md->deferred, bio); |
2434 | dm_flush(md); | 2431 | spin_unlock_irq(&md->deferred_lock); |
2432 | } | ||
2433 | return; | ||
2435 | } | 2434 | } |
2436 | 2435 | ||
2437 | if (md->barrier_error != DM_ENDIO_REQUEUE) | 2436 | /* issue data + REQ_FUA */ |
2438 | bio_endio(bio, md->barrier_error); | 2437 | bio->bi_rw &= ~REQ_FLUSH; |
2439 | else { | 2438 | __split_and_process_bio(md, bio); |
2440 | spin_lock_irq(&md->deferred_lock); | ||
2441 | bio_list_add_head(&md->deferred, bio); | ||
2442 | spin_unlock_irq(&md->deferred_lock); | ||
2443 | } | ||
2444 | } | 2439 | } |
2445 | 2440 | ||
2446 | /* | 2441 | /* |
@@ -2469,8 +2464,8 @@ static void dm_wq_work(struct work_struct *work) | |||
2469 | if (dm_request_based(md)) | 2464 | if (dm_request_based(md)) |
2470 | generic_make_request(c); | 2465 | generic_make_request(c); |
2471 | else { | 2466 | else { |
2472 | if (c->bi_rw & REQ_HARDBARRIER) | 2467 | if (c->bi_rw & REQ_FLUSH) |
2473 | process_barrier(md, c); | 2468 | process_flush(md, c); |
2474 | else | 2469 | else |
2475 | __split_and_process_bio(md, c); | 2470 | __split_and_process_bio(md, c); |
2476 | } | 2471 | } |