diff options
author | Tejun Heo <tj@kernel.org> | 2010-09-03 05:56:19 -0400 |
---|---|---|
committer | Jens Axboe <jaxboe@fusionio.com> | 2010-09-10 06:35:38 -0400 |
commit | d87f4c14f27dc82d215108d8392a7d26687148a1 (patch) | |
tree | 55f2a81f3df5d70fd85c4428089f6fe28540bcf4 | |
parent | 3a2edd0d6ddbd5fa3b389ea6db811285415ce6c8 (diff) |
dm: implement REQ_FLUSH/FUA support for bio-based dm
This patch converts bio-based dm to support REQ_FLUSH/FUA instead of
now deprecated REQ_HARDBARRIER.
* -EOPNOTSUPP handling logic dropped.
* Preflush is handled as before but postflush is dropped and replaced
with passing down REQ_FUA to member request_queues. This replaces
one array wide cache flush w/ member specific FUA writes.
* __split_and_process_bio() now calls __clone_and_map_flush() directly
for flushes and guarantees all FLUSH bio's going to targets are zero
` length.
* It's now guaranteed that all FLUSH bio's which are passed onto dm
targets are zero length. bio_empty_barrier() tests are replaced
with REQ_FLUSH tests.
* Empty WRITE_BARRIERs are replaced with WRITE_FLUSHes.
* Dropped unlikely() around REQ_FLUSH tests. Flushes are not unlikely
enough to be marked with unlikely().
* Block layer now filters out REQ_FLUSH/FUA bio's if the request_queue
doesn't support cache flushing. Advertise REQ_FLUSH | REQ_FUA
capability.
* Request based dm isn't converted yet. dm_init_request_based_queue()
resets flush support to 0 for now. To avoid disturbing request
based dm code, dm->flush_error is added for bio based dm while
requested based dm continues to use dm->barrier_error.
Lightly tested linear, stripe, raid1, snap and crypt targets. Please
proceed with caution as I'm not familiar with the code base.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: dm-devel@redhat.com
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
-rw-r--r-- | drivers/md/dm-crypt.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-io.c | 20 | ||||
-rw-r--r-- | drivers/md/dm-log.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-raid1.c | 8 | ||||
-rw-r--r-- | drivers/md/dm-region-hash.c | 16 | ||||
-rw-r--r-- | drivers/md/dm-snap-persistent.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-snap.c | 6 | ||||
-rw-r--r-- | drivers/md/dm-stripe.c | 2 | ||||
-rw-r--r-- | drivers/md/dm.c | 119 |
9 files changed, 80 insertions, 97 deletions
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 368e8e98f705..d5b0e4c0e702 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c | |||
@@ -1278,7 +1278,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio, | |||
1278 | struct dm_crypt_io *io; | 1278 | struct dm_crypt_io *io; |
1279 | struct crypt_config *cc; | 1279 | struct crypt_config *cc; |
1280 | 1280 | ||
1281 | if (unlikely(bio_empty_barrier(bio))) { | 1281 | if (bio->bi_rw & REQ_FLUSH) { |
1282 | cc = ti->private; | 1282 | cc = ti->private; |
1283 | bio->bi_bdev = cc->dev->bdev; | 1283 | bio->bi_bdev = cc->dev->bdev; |
1284 | return DM_MAPIO_REMAPPED; | 1284 | return DM_MAPIO_REMAPPED; |
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 0590c75b0ab6..136d4f71a116 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c | |||
@@ -31,7 +31,6 @@ struct dm_io_client { | |||
31 | */ | 31 | */ |
32 | struct io { | 32 | struct io { |
33 | unsigned long error_bits; | 33 | unsigned long error_bits; |
34 | unsigned long eopnotsupp_bits; | ||
35 | atomic_t count; | 34 | atomic_t count; |
36 | struct task_struct *sleeper; | 35 | struct task_struct *sleeper; |
37 | struct dm_io_client *client; | 36 | struct dm_io_client *client; |
@@ -130,11 +129,8 @@ static void retrieve_io_and_region_from_bio(struct bio *bio, struct io **io, | |||
130 | *---------------------------------------------------------------*/ | 129 | *---------------------------------------------------------------*/ |
131 | static void dec_count(struct io *io, unsigned int region, int error) | 130 | static void dec_count(struct io *io, unsigned int region, int error) |
132 | { | 131 | { |
133 | if (error) { | 132 | if (error) |
134 | set_bit(region, &io->error_bits); | 133 | set_bit(region, &io->error_bits); |
135 | if (error == -EOPNOTSUPP) | ||
136 | set_bit(region, &io->eopnotsupp_bits); | ||
137 | } | ||
138 | 134 | ||
139 | if (atomic_dec_and_test(&io->count)) { | 135 | if (atomic_dec_and_test(&io->count)) { |
140 | if (io->sleeper) | 136 | if (io->sleeper) |
@@ -310,8 +306,8 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where, | |||
310 | sector_t remaining = where->count; | 306 | sector_t remaining = where->count; |
311 | 307 | ||
312 | /* | 308 | /* |
313 | * where->count may be zero if rw holds a write barrier and we | 309 | * where->count may be zero if rw holds a flush and we need to |
314 | * need to send a zero-sized barrier. | 310 | * send a zero-sized flush. |
315 | */ | 311 | */ |
316 | do { | 312 | do { |
317 | /* | 313 | /* |
@@ -364,7 +360,7 @@ static void dispatch_io(int rw, unsigned int num_regions, | |||
364 | */ | 360 | */ |
365 | for (i = 0; i < num_regions; i++) { | 361 | for (i = 0; i < num_regions; i++) { |
366 | *dp = old_pages; | 362 | *dp = old_pages; |
367 | if (where[i].count || (rw & REQ_HARDBARRIER)) | 363 | if (where[i].count || (rw & REQ_FLUSH)) |
368 | do_region(rw, i, where + i, dp, io); | 364 | do_region(rw, i, where + i, dp, io); |
369 | } | 365 | } |
370 | 366 | ||
@@ -393,9 +389,7 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions, | |||
393 | return -EIO; | 389 | return -EIO; |
394 | } | 390 | } |
395 | 391 | ||
396 | retry: | ||
397 | io->error_bits = 0; | 392 | io->error_bits = 0; |
398 | io->eopnotsupp_bits = 0; | ||
399 | atomic_set(&io->count, 1); /* see dispatch_io() */ | 393 | atomic_set(&io->count, 1); /* see dispatch_io() */ |
400 | io->sleeper = current; | 394 | io->sleeper = current; |
401 | io->client = client; | 395 | io->client = client; |
@@ -412,11 +406,6 @@ retry: | |||
412 | } | 406 | } |
413 | set_current_state(TASK_RUNNING); | 407 | set_current_state(TASK_RUNNING); |
414 | 408 | ||
415 | if (io->eopnotsupp_bits && (rw & REQ_HARDBARRIER)) { | ||
416 | rw &= ~REQ_HARDBARRIER; | ||
417 | goto retry; | ||
418 | } | ||
419 | |||
420 | if (error_bits) | 409 | if (error_bits) |
421 | *error_bits = io->error_bits; | 410 | *error_bits = io->error_bits; |
422 | 411 | ||
@@ -437,7 +426,6 @@ static int async_io(struct dm_io_client *client, unsigned int num_regions, | |||
437 | 426 | ||
438 | io = mempool_alloc(client->pool, GFP_NOIO); | 427 | io = mempool_alloc(client->pool, GFP_NOIO); |
439 | io->error_bits = 0; | 428 | io->error_bits = 0; |
440 | io->eopnotsupp_bits = 0; | ||
441 | atomic_set(&io->count, 1); /* see dispatch_io() */ | 429 | atomic_set(&io->count, 1); /* see dispatch_io() */ |
442 | io->sleeper = NULL; | 430 | io->sleeper = NULL; |
443 | io->client = client; | 431 | io->client = client; |
diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index 5a08be0222db..33420e68d153 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c | |||
@@ -300,7 +300,7 @@ static int flush_header(struct log_c *lc) | |||
300 | .count = 0, | 300 | .count = 0, |
301 | }; | 301 | }; |
302 | 302 | ||
303 | lc->io_req.bi_rw = WRITE_BARRIER; | 303 | lc->io_req.bi_rw = WRITE_FLUSH; |
304 | 304 | ||
305 | return dm_io(&lc->io_req, 1, &null_location, NULL); | 305 | return dm_io(&lc->io_req, 1, &null_location, NULL); |
306 | } | 306 | } |
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 7c081bcbc3cf..19a59b041c27 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c | |||
@@ -259,7 +259,7 @@ static int mirror_flush(struct dm_target *ti) | |||
259 | struct dm_io_region io[ms->nr_mirrors]; | 259 | struct dm_io_region io[ms->nr_mirrors]; |
260 | struct mirror *m; | 260 | struct mirror *m; |
261 | struct dm_io_request io_req = { | 261 | struct dm_io_request io_req = { |
262 | .bi_rw = WRITE_BARRIER, | 262 | .bi_rw = WRITE_FLUSH, |
263 | .mem.type = DM_IO_KMEM, | 263 | .mem.type = DM_IO_KMEM, |
264 | .mem.ptr.bvec = NULL, | 264 | .mem.ptr.bvec = NULL, |
265 | .client = ms->io_client, | 265 | .client = ms->io_client, |
@@ -629,7 +629,7 @@ static void do_write(struct mirror_set *ms, struct bio *bio) | |||
629 | struct dm_io_region io[ms->nr_mirrors], *dest = io; | 629 | struct dm_io_region io[ms->nr_mirrors], *dest = io; |
630 | struct mirror *m; | 630 | struct mirror *m; |
631 | struct dm_io_request io_req = { | 631 | struct dm_io_request io_req = { |
632 | .bi_rw = WRITE | (bio->bi_rw & WRITE_BARRIER), | 632 | .bi_rw = WRITE | (bio->bi_rw & WRITE_FLUSH_FUA), |
633 | .mem.type = DM_IO_BVEC, | 633 | .mem.type = DM_IO_BVEC, |
634 | .mem.ptr.bvec = bio->bi_io_vec + bio->bi_idx, | 634 | .mem.ptr.bvec = bio->bi_io_vec + bio->bi_idx, |
635 | .notify.fn = write_callback, | 635 | .notify.fn = write_callback, |
@@ -670,7 +670,7 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes) | |||
670 | bio_list_init(&requeue); | 670 | bio_list_init(&requeue); |
671 | 671 | ||
672 | while ((bio = bio_list_pop(writes))) { | 672 | while ((bio = bio_list_pop(writes))) { |
673 | if (unlikely(bio_empty_barrier(bio))) { | 673 | if (bio->bi_rw & REQ_FLUSH) { |
674 | bio_list_add(&sync, bio); | 674 | bio_list_add(&sync, bio); |
675 | continue; | 675 | continue; |
676 | } | 676 | } |
@@ -1203,7 +1203,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, | |||
1203 | * We need to dec pending if this was a write. | 1203 | * We need to dec pending if this was a write. |
1204 | */ | 1204 | */ |
1205 | if (rw == WRITE) { | 1205 | if (rw == WRITE) { |
1206 | if (likely(!bio_empty_barrier(bio))) | 1206 | if (!(bio->bi_rw & REQ_FLUSH)) |
1207 | dm_rh_dec(ms->rh, map_context->ll); | 1207 | dm_rh_dec(ms->rh, map_context->ll); |
1208 | return error; | 1208 | return error; |
1209 | } | 1209 | } |
diff --git a/drivers/md/dm-region-hash.c b/drivers/md/dm-region-hash.c index bd5c58b28868..dad011aed0c9 100644 --- a/drivers/md/dm-region-hash.c +++ b/drivers/md/dm-region-hash.c | |||
@@ -81,9 +81,9 @@ struct dm_region_hash { | |||
81 | struct list_head failed_recovered_regions; | 81 | struct list_head failed_recovered_regions; |
82 | 82 | ||
83 | /* | 83 | /* |
84 | * If there was a barrier failure no regions can be marked clean. | 84 | * If there was a flush failure no regions can be marked clean. |
85 | */ | 85 | */ |
86 | int barrier_failure; | 86 | int flush_failure; |
87 | 87 | ||
88 | void *context; | 88 | void *context; |
89 | sector_t target_begin; | 89 | sector_t target_begin; |
@@ -217,7 +217,7 @@ struct dm_region_hash *dm_region_hash_create( | |||
217 | INIT_LIST_HEAD(&rh->quiesced_regions); | 217 | INIT_LIST_HEAD(&rh->quiesced_regions); |
218 | INIT_LIST_HEAD(&rh->recovered_regions); | 218 | INIT_LIST_HEAD(&rh->recovered_regions); |
219 | INIT_LIST_HEAD(&rh->failed_recovered_regions); | 219 | INIT_LIST_HEAD(&rh->failed_recovered_regions); |
220 | rh->barrier_failure = 0; | 220 | rh->flush_failure = 0; |
221 | 221 | ||
222 | rh->region_pool = mempool_create_kmalloc_pool(MIN_REGIONS, | 222 | rh->region_pool = mempool_create_kmalloc_pool(MIN_REGIONS, |
223 | sizeof(struct dm_region)); | 223 | sizeof(struct dm_region)); |
@@ -399,8 +399,8 @@ void dm_rh_mark_nosync(struct dm_region_hash *rh, struct bio *bio) | |||
399 | region_t region = dm_rh_bio_to_region(rh, bio); | 399 | region_t region = dm_rh_bio_to_region(rh, bio); |
400 | int recovering = 0; | 400 | int recovering = 0; |
401 | 401 | ||
402 | if (bio_empty_barrier(bio)) { | 402 | if (bio->bi_rw & REQ_FLUSH) { |
403 | rh->barrier_failure = 1; | 403 | rh->flush_failure = 1; |
404 | return; | 404 | return; |
405 | } | 405 | } |
406 | 406 | ||
@@ -524,7 +524,7 @@ void dm_rh_inc_pending(struct dm_region_hash *rh, struct bio_list *bios) | |||
524 | struct bio *bio; | 524 | struct bio *bio; |
525 | 525 | ||
526 | for (bio = bios->head; bio; bio = bio->bi_next) { | 526 | for (bio = bios->head; bio; bio = bio->bi_next) { |
527 | if (bio_empty_barrier(bio)) | 527 | if (bio->bi_rw & REQ_FLUSH) |
528 | continue; | 528 | continue; |
529 | rh_inc(rh, dm_rh_bio_to_region(rh, bio)); | 529 | rh_inc(rh, dm_rh_bio_to_region(rh, bio)); |
530 | } | 530 | } |
@@ -555,9 +555,9 @@ void dm_rh_dec(struct dm_region_hash *rh, region_t region) | |||
555 | */ | 555 | */ |
556 | 556 | ||
557 | /* do nothing for DM_RH_NOSYNC */ | 557 | /* do nothing for DM_RH_NOSYNC */ |
558 | if (unlikely(rh->barrier_failure)) { | 558 | if (unlikely(rh->flush_failure)) { |
559 | /* | 559 | /* |
560 | * If a write barrier failed some time ago, we | 560 | * If a write flush failed some time ago, we |
561 | * don't know whether or not this write made it | 561 | * don't know whether or not this write made it |
562 | * to the disk, so we must resync the device. | 562 | * to the disk, so we must resync the device. |
563 | */ | 563 | */ |
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index cc2bdb83f9ad..0b61792a2780 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c | |||
@@ -687,7 +687,7 @@ static void persistent_commit_exception(struct dm_exception_store *store, | |||
687 | /* | 687 | /* |
688 | * Commit exceptions to disk. | 688 | * Commit exceptions to disk. |
689 | */ | 689 | */ |
690 | if (ps->valid && area_io(ps, WRITE_BARRIER)) | 690 | if (ps->valid && area_io(ps, WRITE_FLUSH_FUA)) |
691 | ps->valid = 0; | 691 | ps->valid = 0; |
692 | 692 | ||
693 | /* | 693 | /* |
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 5974d3094d97..eed210152b75 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c | |||
@@ -1587,7 +1587,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio, | |||
1587 | chunk_t chunk; | 1587 | chunk_t chunk; |
1588 | struct dm_snap_pending_exception *pe = NULL; | 1588 | struct dm_snap_pending_exception *pe = NULL; |
1589 | 1589 | ||
1590 | if (unlikely(bio_empty_barrier(bio))) { | 1590 | if (bio->bi_rw & REQ_FLUSH) { |
1591 | bio->bi_bdev = s->cow->bdev; | 1591 | bio->bi_bdev = s->cow->bdev; |
1592 | return DM_MAPIO_REMAPPED; | 1592 | return DM_MAPIO_REMAPPED; |
1593 | } | 1593 | } |
@@ -1691,7 +1691,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio, | |||
1691 | int r = DM_MAPIO_REMAPPED; | 1691 | int r = DM_MAPIO_REMAPPED; |
1692 | chunk_t chunk; | 1692 | chunk_t chunk; |
1693 | 1693 | ||
1694 | if (unlikely(bio_empty_barrier(bio))) { | 1694 | if (bio->bi_rw & REQ_FLUSH) { |
1695 | if (!map_context->target_request_nr) | 1695 | if (!map_context->target_request_nr) |
1696 | bio->bi_bdev = s->origin->bdev; | 1696 | bio->bi_bdev = s->origin->bdev; |
1697 | else | 1697 | else |
@@ -2135,7 +2135,7 @@ static int origin_map(struct dm_target *ti, struct bio *bio, | |||
2135 | struct dm_dev *dev = ti->private; | 2135 | struct dm_dev *dev = ti->private; |
2136 | bio->bi_bdev = dev->bdev; | 2136 | bio->bi_bdev = dev->bdev; |
2137 | 2137 | ||
2138 | if (unlikely(bio_empty_barrier(bio))) | 2138 | if (bio->bi_rw & REQ_FLUSH) |
2139 | return DM_MAPIO_REMAPPED; | 2139 | return DM_MAPIO_REMAPPED; |
2140 | 2140 | ||
2141 | /* Only tell snapshots if this is a write */ | 2141 | /* Only tell snapshots if this is a write */ |
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index c297f6da91ea..f0371b4c4fbf 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c | |||
@@ -271,7 +271,7 @@ static int stripe_map(struct dm_target *ti, struct bio *bio, | |||
271 | uint32_t stripe; | 271 | uint32_t stripe; |
272 | unsigned target_request_nr; | 272 | unsigned target_request_nr; |
273 | 273 | ||
274 | if (unlikely(bio_empty_barrier(bio))) { | 274 | if (bio->bi_rw & REQ_FLUSH) { |
275 | target_request_nr = map_context->target_request_nr; | 275 | target_request_nr = map_context->target_request_nr; |
276 | BUG_ON(target_request_nr >= sc->stripes); | 276 | BUG_ON(target_request_nr >= sc->stripes); |
277 | bio->bi_bdev = sc->stripe[target_request_nr].dev->bdev; | 277 | bio->bi_bdev = sc->stripe[target_request_nr].dev->bdev; |
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index b1d92be8f990..32e6622767ad 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
@@ -144,15 +144,16 @@ struct mapped_device { | |||
144 | spinlock_t deferred_lock; | 144 | spinlock_t deferred_lock; |
145 | 145 | ||
146 | /* | 146 | /* |
147 | * An error from the barrier request currently being processed. | 147 | * An error from the flush request currently being processed. |
148 | */ | 148 | */ |
149 | int barrier_error; | 149 | int flush_error; |
150 | 150 | ||
151 | /* | 151 | /* |
152 | * Protect barrier_error from concurrent endio processing | 152 | * Protect barrier_error from concurrent endio processing |
153 | * in request-based dm. | 153 | * in request-based dm. |
154 | */ | 154 | */ |
155 | spinlock_t barrier_error_lock; | 155 | spinlock_t barrier_error_lock; |
156 | int barrier_error; | ||
156 | 157 | ||
157 | /* | 158 | /* |
158 | * Processing queue (flush/barriers) | 159 | * Processing queue (flush/barriers) |
@@ -200,8 +201,8 @@ struct mapped_device { | |||
200 | /* sysfs handle */ | 201 | /* sysfs handle */ |
201 | struct kobject kobj; | 202 | struct kobject kobj; |
202 | 203 | ||
203 | /* zero-length barrier that will be cloned and submitted to targets */ | 204 | /* zero-length flush that will be cloned and submitted to targets */ |
204 | struct bio barrier_bio; | 205 | struct bio flush_bio; |
205 | }; | 206 | }; |
206 | 207 | ||
207 | /* | 208 | /* |
@@ -512,7 +513,7 @@ static void end_io_acct(struct dm_io *io) | |||
512 | 513 | ||
513 | /* | 514 | /* |
514 | * After this is decremented the bio must not be touched if it is | 515 | * After this is decremented the bio must not be touched if it is |
515 | * a barrier. | 516 | * a flush. |
516 | */ | 517 | */ |
517 | dm_disk(md)->part0.in_flight[rw] = pending = | 518 | dm_disk(md)->part0.in_flight[rw] = pending = |
518 | atomic_dec_return(&md->pending[rw]); | 519 | atomic_dec_return(&md->pending[rw]); |
@@ -626,7 +627,7 @@ static void dec_pending(struct dm_io *io, int error) | |||
626 | */ | 627 | */ |
627 | spin_lock_irqsave(&md->deferred_lock, flags); | 628 | spin_lock_irqsave(&md->deferred_lock, flags); |
628 | if (__noflush_suspending(md)) { | 629 | if (__noflush_suspending(md)) { |
629 | if (!(io->bio->bi_rw & REQ_HARDBARRIER)) | 630 | if (!(io->bio->bi_rw & REQ_FLUSH)) |
630 | bio_list_add_head(&md->deferred, | 631 | bio_list_add_head(&md->deferred, |
631 | io->bio); | 632 | io->bio); |
632 | } else | 633 | } else |
@@ -638,20 +639,14 @@ static void dec_pending(struct dm_io *io, int error) | |||
638 | io_error = io->error; | 639 | io_error = io->error; |
639 | bio = io->bio; | 640 | bio = io->bio; |
640 | 641 | ||
641 | if (bio->bi_rw & REQ_HARDBARRIER) { | 642 | if (bio->bi_rw & REQ_FLUSH) { |
642 | /* | 643 | /* |
643 | * There can be just one barrier request so we use | 644 | * There can be just one flush request so we use |
644 | * a per-device variable for error reporting. | 645 | * a per-device variable for error reporting. |
645 | * Note that you can't touch the bio after end_io_acct | 646 | * Note that you can't touch the bio after end_io_acct |
646 | * | ||
647 | * We ignore -EOPNOTSUPP for empty flush reported by | ||
648 | * underlying devices. We assume that if the device | ||
649 | * doesn't support empty barriers, it doesn't need | ||
650 | * cache flushing commands. | ||
651 | */ | 647 | */ |
652 | if (!md->barrier_error && | 648 | if (!md->flush_error) |
653 | !(bio_empty_barrier(bio) && io_error == -EOPNOTSUPP)) | 649 | md->flush_error = io_error; |
654 | md->barrier_error = io_error; | ||
655 | end_io_acct(io); | 650 | end_io_acct(io); |
656 | free_io(md, io); | 651 | free_io(md, io); |
657 | } else { | 652 | } else { |
@@ -1119,7 +1114,7 @@ static void dm_bio_destructor(struct bio *bio) | |||
1119 | } | 1114 | } |
1120 | 1115 | ||
1121 | /* | 1116 | /* |
1122 | * Creates a little bio that is just does part of a bvec. | 1117 | * Creates a little bio that just does part of a bvec. |
1123 | */ | 1118 | */ |
1124 | static struct bio *split_bvec(struct bio *bio, sector_t sector, | 1119 | static struct bio *split_bvec(struct bio *bio, sector_t sector, |
1125 | unsigned short idx, unsigned int offset, | 1120 | unsigned short idx, unsigned int offset, |
@@ -1134,7 +1129,7 @@ static struct bio *split_bvec(struct bio *bio, sector_t sector, | |||
1134 | 1129 | ||
1135 | clone->bi_sector = sector; | 1130 | clone->bi_sector = sector; |
1136 | clone->bi_bdev = bio->bi_bdev; | 1131 | clone->bi_bdev = bio->bi_bdev; |
1137 | clone->bi_rw = bio->bi_rw & ~REQ_HARDBARRIER; | 1132 | clone->bi_rw = bio->bi_rw; |
1138 | clone->bi_vcnt = 1; | 1133 | clone->bi_vcnt = 1; |
1139 | clone->bi_size = to_bytes(len); | 1134 | clone->bi_size = to_bytes(len); |
1140 | clone->bi_io_vec->bv_offset = offset; | 1135 | clone->bi_io_vec->bv_offset = offset; |
@@ -1161,7 +1156,6 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector, | |||
1161 | 1156 | ||
1162 | clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs); | 1157 | clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs); |
1163 | __bio_clone(clone, bio); | 1158 | __bio_clone(clone, bio); |
1164 | clone->bi_rw &= ~REQ_HARDBARRIER; | ||
1165 | clone->bi_destructor = dm_bio_destructor; | 1159 | clone->bi_destructor = dm_bio_destructor; |
1166 | clone->bi_sector = sector; | 1160 | clone->bi_sector = sector; |
1167 | clone->bi_idx = idx; | 1161 | clone->bi_idx = idx; |
@@ -1225,7 +1219,7 @@ static void __issue_target_requests(struct clone_info *ci, struct dm_target *ti, | |||
1225 | __issue_target_request(ci, ti, request_nr, len); | 1219 | __issue_target_request(ci, ti, request_nr, len); |
1226 | } | 1220 | } |
1227 | 1221 | ||
1228 | static int __clone_and_map_empty_barrier(struct clone_info *ci) | 1222 | static int __clone_and_map_flush(struct clone_info *ci) |
1229 | { | 1223 | { |
1230 | unsigned target_nr = 0; | 1224 | unsigned target_nr = 0; |
1231 | struct dm_target *ti; | 1225 | struct dm_target *ti; |
@@ -1289,9 +1283,6 @@ static int __clone_and_map(struct clone_info *ci) | |||
1289 | sector_t len = 0, max; | 1283 | sector_t len = 0, max; |
1290 | struct dm_target_io *tio; | 1284 | struct dm_target_io *tio; |
1291 | 1285 | ||
1292 | if (unlikely(bio_empty_barrier(bio))) | ||
1293 | return __clone_and_map_empty_barrier(ci); | ||
1294 | |||
1295 | if (unlikely(bio->bi_rw & REQ_DISCARD)) | 1286 | if (unlikely(bio->bi_rw & REQ_DISCARD)) |
1296 | return __clone_and_map_discard(ci); | 1287 | return __clone_and_map_discard(ci); |
1297 | 1288 | ||
@@ -1383,11 +1374,11 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) | |||
1383 | 1374 | ||
1384 | ci.map = dm_get_live_table(md); | 1375 | ci.map = dm_get_live_table(md); |
1385 | if (unlikely(!ci.map)) { | 1376 | if (unlikely(!ci.map)) { |
1386 | if (!(bio->bi_rw & REQ_HARDBARRIER)) | 1377 | if (!(bio->bi_rw & REQ_FLUSH)) |
1387 | bio_io_error(bio); | 1378 | bio_io_error(bio); |
1388 | else | 1379 | else |
1389 | if (!md->barrier_error) | 1380 | if (!md->flush_error) |
1390 | md->barrier_error = -EIO; | 1381 | md->flush_error = -EIO; |
1391 | return; | 1382 | return; |
1392 | } | 1383 | } |
1393 | 1384 | ||
@@ -1400,14 +1391,22 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) | |||
1400 | ci.io->md = md; | 1391 | ci.io->md = md; |
1401 | spin_lock_init(&ci.io->endio_lock); | 1392 | spin_lock_init(&ci.io->endio_lock); |
1402 | ci.sector = bio->bi_sector; | 1393 | ci.sector = bio->bi_sector; |
1403 | ci.sector_count = bio_sectors(bio); | 1394 | if (!(bio->bi_rw & REQ_FLUSH)) |
1404 | if (unlikely(bio_empty_barrier(bio))) | 1395 | ci.sector_count = bio_sectors(bio); |
1396 | else { | ||
1397 | /* all FLUSH bio's reaching here should be empty */ | ||
1398 | WARN_ON_ONCE(bio_has_data(bio)); | ||
1405 | ci.sector_count = 1; | 1399 | ci.sector_count = 1; |
1400 | } | ||
1406 | ci.idx = bio->bi_idx; | 1401 | ci.idx = bio->bi_idx; |
1407 | 1402 | ||
1408 | start_io_acct(ci.io); | 1403 | start_io_acct(ci.io); |
1409 | while (ci.sector_count && !error) | 1404 | while (ci.sector_count && !error) { |
1410 | error = __clone_and_map(&ci); | 1405 | if (!(bio->bi_rw & REQ_FLUSH)) |
1406 | error = __clone_and_map(&ci); | ||
1407 | else | ||
1408 | error = __clone_and_map_flush(&ci); | ||
1409 | } | ||
1411 | 1410 | ||
1412 | /* drop the extra reference count */ | 1411 | /* drop the extra reference count */ |
1413 | dec_pending(ci.io, error); | 1412 | dec_pending(ci.io, error); |
@@ -1492,11 +1491,11 @@ static int _dm_request(struct request_queue *q, struct bio *bio) | |||
1492 | part_stat_unlock(); | 1491 | part_stat_unlock(); |
1493 | 1492 | ||
1494 | /* | 1493 | /* |
1495 | * If we're suspended or the thread is processing barriers | 1494 | * If we're suspended or the thread is processing flushes |
1496 | * we have to queue this io for later. | 1495 | * we have to queue this io for later. |
1497 | */ | 1496 | */ |
1498 | if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) || | 1497 | if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) || |
1499 | unlikely(bio->bi_rw & REQ_HARDBARRIER)) { | 1498 | (bio->bi_rw & REQ_FLUSH)) { |
1500 | up_read(&md->io_lock); | 1499 | up_read(&md->io_lock); |
1501 | 1500 | ||
1502 | if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) && | 1501 | if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) && |
@@ -1940,6 +1939,7 @@ static void dm_init_md_queue(struct mapped_device *md) | |||
1940 | blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); | 1939 | blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); |
1941 | md->queue->unplug_fn = dm_unplug_all; | 1940 | md->queue->unplug_fn = dm_unplug_all; |
1942 | blk_queue_merge_bvec(md->queue, dm_merge_bvec); | 1941 | blk_queue_merge_bvec(md->queue, dm_merge_bvec); |
1942 | blk_queue_flush(md->queue, REQ_FLUSH | REQ_FUA); | ||
1943 | } | 1943 | } |
1944 | 1944 | ||
1945 | /* | 1945 | /* |
@@ -2245,7 +2245,8 @@ static int dm_init_request_based_queue(struct mapped_device *md) | |||
2245 | blk_queue_softirq_done(md->queue, dm_softirq_done); | 2245 | blk_queue_softirq_done(md->queue, dm_softirq_done); |
2246 | blk_queue_prep_rq(md->queue, dm_prep_fn); | 2246 | blk_queue_prep_rq(md->queue, dm_prep_fn); |
2247 | blk_queue_lld_busy(md->queue, dm_lld_busy); | 2247 | blk_queue_lld_busy(md->queue, dm_lld_busy); |
2248 | blk_queue_flush(md->queue, REQ_FLUSH); | 2248 | /* no flush support for request based dm yet */ |
2249 | blk_queue_flush(md->queue, 0); | ||
2249 | 2250 | ||
2250 | elv_register_queue(md->queue); | 2251 | elv_register_queue(md->queue); |
2251 | 2252 | ||
@@ -2406,41 +2407,35 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible) | |||
2406 | return r; | 2407 | return r; |
2407 | } | 2408 | } |
2408 | 2409 | ||
2409 | static void dm_flush(struct mapped_device *md) | 2410 | static void process_flush(struct mapped_device *md, struct bio *bio) |
2410 | { | 2411 | { |
2411 | dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); | 2412 | md->flush_error = 0; |
2412 | |||
2413 | bio_init(&md->barrier_bio); | ||
2414 | md->barrier_bio.bi_bdev = md->bdev; | ||
2415 | md->barrier_bio.bi_rw = WRITE_BARRIER; | ||
2416 | __split_and_process_bio(md, &md->barrier_bio); | ||
2417 | 2413 | ||
2414 | /* handle REQ_FLUSH */ | ||
2418 | dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); | 2415 | dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); |
2419 | } | ||
2420 | 2416 | ||
2421 | static void process_barrier(struct mapped_device *md, struct bio *bio) | 2417 | bio_init(&md->flush_bio); |
2422 | { | 2418 | md->flush_bio.bi_bdev = md->bdev; |
2423 | md->barrier_error = 0; | 2419 | md->flush_bio.bi_rw = WRITE_FLUSH; |
2420 | __split_and_process_bio(md, &md->flush_bio); | ||
2424 | 2421 | ||
2425 | dm_flush(md); | 2422 | dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); |
2426 | 2423 | ||
2427 | if (!bio_empty_barrier(bio)) { | 2424 | /* if it's an empty flush or the preflush failed, we're done */ |
2428 | __split_and_process_bio(md, bio); | 2425 | if (!bio_has_data(bio) || md->flush_error) { |
2429 | /* | 2426 | if (md->flush_error != DM_ENDIO_REQUEUE) |
2430 | * If the request isn't supported, don't waste time with | 2427 | bio_endio(bio, md->flush_error); |
2431 | * the second flush. | 2428 | else { |
2432 | */ | 2429 | spin_lock_irq(&md->deferred_lock); |
2433 | if (md->barrier_error != -EOPNOTSUPP) | 2430 | bio_list_add_head(&md->deferred, bio); |
2434 | dm_flush(md); | 2431 | spin_unlock_irq(&md->deferred_lock); |
2432 | } | ||
2433 | return; | ||
2435 | } | 2434 | } |
2436 | 2435 | ||
2437 | if (md->barrier_error != DM_ENDIO_REQUEUE) | 2436 | /* issue data + REQ_FUA */ |
2438 | bio_endio(bio, md->barrier_error); | 2437 | bio->bi_rw &= ~REQ_FLUSH; |
2439 | else { | 2438 | __split_and_process_bio(md, bio); |
2440 | spin_lock_irq(&md->deferred_lock); | ||
2441 | bio_list_add_head(&md->deferred, bio); | ||
2442 | spin_unlock_irq(&md->deferred_lock); | ||
2443 | } | ||
2444 | } | 2439 | } |
2445 | 2440 | ||
2446 | /* | 2441 | /* |
@@ -2469,8 +2464,8 @@ static void dm_wq_work(struct work_struct *work) | |||
2469 | if (dm_request_based(md)) | 2464 | if (dm_request_based(md)) |
2470 | generic_make_request(c); | 2465 | generic_make_request(c); |
2471 | else { | 2466 | else { |
2472 | if (c->bi_rw & REQ_HARDBARRIER) | 2467 | if (c->bi_rw & REQ_FLUSH) |
2473 | process_barrier(md, c); | 2468 | process_flush(md, c); |
2474 | else | 2469 | else |
2475 | __split_and_process_bio(md, c); | 2470 | __split_and_process_bio(md, c); |
2476 | } | 2471 | } |