diff options
| author | Tejun Heo <tj@kernel.org> | 2010-09-03 05:56:19 -0400 |
|---|---|---|
| committer | Jens Axboe <jaxboe@fusionio.com> | 2010-09-10 06:35:38 -0400 |
| commit | d87f4c14f27dc82d215108d8392a7d26687148a1 (patch) | |
| tree | 55f2a81f3df5d70fd85c4428089f6fe28540bcf4 | |
| parent | 3a2edd0d6ddbd5fa3b389ea6db811285415ce6c8 (diff) | |
dm: implement REQ_FLUSH/FUA support for bio-based dm
This patch converts bio-based dm to support REQ_FLUSH/FUA instead of
now deprecated REQ_HARDBARRIER.
* -EOPNOTSUPP handling logic dropped.
* Preflush is handled as before but postflush is dropped and replaced
with passing down REQ_FUA to member request_queues. This replaces
one array wide cache flush w/ member specific FUA writes.
* __split_and_process_bio() now calls __clone_and_map_flush() directly
for flushes and guarantees all FLUSH bio's going to targets are zero
` length.
* It's now guaranteed that all FLUSH bio's which are passed onto dm
targets are zero length. bio_empty_barrier() tests are replaced
with REQ_FLUSH tests.
* Empty WRITE_BARRIERs are replaced with WRITE_FLUSHes.
* Dropped unlikely() around REQ_FLUSH tests. Flushes are not unlikely
enough to be marked with unlikely().
* Block layer now filters out REQ_FLUSH/FUA bio's if the request_queue
doesn't support cache flushing. Advertise REQ_FLUSH | REQ_FUA
capability.
* Request based dm isn't converted yet. dm_init_request_based_queue()
resets flush support to 0 for now. To avoid disturbing request
based dm code, dm->flush_error is added for bio based dm while
requested based dm continues to use dm->barrier_error.
Lightly tested linear, stripe, raid1, snap and crypt targets. Please
proceed with caution as I'm not familiar with the code base.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: dm-devel@redhat.com
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
| -rw-r--r-- | drivers/md/dm-crypt.c | 2 | ||||
| -rw-r--r-- | drivers/md/dm-io.c | 20 | ||||
| -rw-r--r-- | drivers/md/dm-log.c | 2 | ||||
| -rw-r--r-- | drivers/md/dm-raid1.c | 8 | ||||
| -rw-r--r-- | drivers/md/dm-region-hash.c | 16 | ||||
| -rw-r--r-- | drivers/md/dm-snap-persistent.c | 2 | ||||
| -rw-r--r-- | drivers/md/dm-snap.c | 6 | ||||
| -rw-r--r-- | drivers/md/dm-stripe.c | 2 | ||||
| -rw-r--r-- | drivers/md/dm.c | 119 |
9 files changed, 80 insertions, 97 deletions
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 368e8e98f705..d5b0e4c0e702 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c | |||
| @@ -1278,7 +1278,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio, | |||
| 1278 | struct dm_crypt_io *io; | 1278 | struct dm_crypt_io *io; |
| 1279 | struct crypt_config *cc; | 1279 | struct crypt_config *cc; |
| 1280 | 1280 | ||
| 1281 | if (unlikely(bio_empty_barrier(bio))) { | 1281 | if (bio->bi_rw & REQ_FLUSH) { |
| 1282 | cc = ti->private; | 1282 | cc = ti->private; |
| 1283 | bio->bi_bdev = cc->dev->bdev; | 1283 | bio->bi_bdev = cc->dev->bdev; |
| 1284 | return DM_MAPIO_REMAPPED; | 1284 | return DM_MAPIO_REMAPPED; |
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 0590c75b0ab6..136d4f71a116 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c | |||
| @@ -31,7 +31,6 @@ struct dm_io_client { | |||
| 31 | */ | 31 | */ |
| 32 | struct io { | 32 | struct io { |
| 33 | unsigned long error_bits; | 33 | unsigned long error_bits; |
| 34 | unsigned long eopnotsupp_bits; | ||
| 35 | atomic_t count; | 34 | atomic_t count; |
| 36 | struct task_struct *sleeper; | 35 | struct task_struct *sleeper; |
| 37 | struct dm_io_client *client; | 36 | struct dm_io_client *client; |
| @@ -130,11 +129,8 @@ static void retrieve_io_and_region_from_bio(struct bio *bio, struct io **io, | |||
| 130 | *---------------------------------------------------------------*/ | 129 | *---------------------------------------------------------------*/ |
| 131 | static void dec_count(struct io *io, unsigned int region, int error) | 130 | static void dec_count(struct io *io, unsigned int region, int error) |
| 132 | { | 131 | { |
| 133 | if (error) { | 132 | if (error) |
| 134 | set_bit(region, &io->error_bits); | 133 | set_bit(region, &io->error_bits); |
| 135 | if (error == -EOPNOTSUPP) | ||
| 136 | set_bit(region, &io->eopnotsupp_bits); | ||
| 137 | } | ||
| 138 | 134 | ||
| 139 | if (atomic_dec_and_test(&io->count)) { | 135 | if (atomic_dec_and_test(&io->count)) { |
| 140 | if (io->sleeper) | 136 | if (io->sleeper) |
| @@ -310,8 +306,8 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where, | |||
| 310 | sector_t remaining = where->count; | 306 | sector_t remaining = where->count; |
| 311 | 307 | ||
| 312 | /* | 308 | /* |
| 313 | * where->count may be zero if rw holds a write barrier and we | 309 | * where->count may be zero if rw holds a flush and we need to |
| 314 | * need to send a zero-sized barrier. | 310 | * send a zero-sized flush. |
| 315 | */ | 311 | */ |
| 316 | do { | 312 | do { |
| 317 | /* | 313 | /* |
| @@ -364,7 +360,7 @@ static void dispatch_io(int rw, unsigned int num_regions, | |||
| 364 | */ | 360 | */ |
| 365 | for (i = 0; i < num_regions; i++) { | 361 | for (i = 0; i < num_regions; i++) { |
| 366 | *dp = old_pages; | 362 | *dp = old_pages; |
| 367 | if (where[i].count || (rw & REQ_HARDBARRIER)) | 363 | if (where[i].count || (rw & REQ_FLUSH)) |
| 368 | do_region(rw, i, where + i, dp, io); | 364 | do_region(rw, i, where + i, dp, io); |
| 369 | } | 365 | } |
| 370 | 366 | ||
| @@ -393,9 +389,7 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions, | |||
| 393 | return -EIO; | 389 | return -EIO; |
| 394 | } | 390 | } |
| 395 | 391 | ||
| 396 | retry: | ||
| 397 | io->error_bits = 0; | 392 | io->error_bits = 0; |
| 398 | io->eopnotsupp_bits = 0; | ||
| 399 | atomic_set(&io->count, 1); /* see dispatch_io() */ | 393 | atomic_set(&io->count, 1); /* see dispatch_io() */ |
| 400 | io->sleeper = current; | 394 | io->sleeper = current; |
| 401 | io->client = client; | 395 | io->client = client; |
| @@ -412,11 +406,6 @@ retry: | |||
| 412 | } | 406 | } |
| 413 | set_current_state(TASK_RUNNING); | 407 | set_current_state(TASK_RUNNING); |
| 414 | 408 | ||
| 415 | if (io->eopnotsupp_bits && (rw & REQ_HARDBARRIER)) { | ||
| 416 | rw &= ~REQ_HARDBARRIER; | ||
| 417 | goto retry; | ||
| 418 | } | ||
| 419 | |||
| 420 | if (error_bits) | 409 | if (error_bits) |
| 421 | *error_bits = io->error_bits; | 410 | *error_bits = io->error_bits; |
| 422 | 411 | ||
| @@ -437,7 +426,6 @@ static int async_io(struct dm_io_client *client, unsigned int num_regions, | |||
| 437 | 426 | ||
| 438 | io = mempool_alloc(client->pool, GFP_NOIO); | 427 | io = mempool_alloc(client->pool, GFP_NOIO); |
| 439 | io->error_bits = 0; | 428 | io->error_bits = 0; |
| 440 | io->eopnotsupp_bits = 0; | ||
| 441 | atomic_set(&io->count, 1); /* see dispatch_io() */ | 429 | atomic_set(&io->count, 1); /* see dispatch_io() */ |
| 442 | io->sleeper = NULL; | 430 | io->sleeper = NULL; |
| 443 | io->client = client; | 431 | io->client = client; |
diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index 5a08be0222db..33420e68d153 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c | |||
| @@ -300,7 +300,7 @@ static int flush_header(struct log_c *lc) | |||
| 300 | .count = 0, | 300 | .count = 0, |
| 301 | }; | 301 | }; |
| 302 | 302 | ||
| 303 | lc->io_req.bi_rw = WRITE_BARRIER; | 303 | lc->io_req.bi_rw = WRITE_FLUSH; |
| 304 | 304 | ||
| 305 | return dm_io(&lc->io_req, 1, &null_location, NULL); | 305 | return dm_io(&lc->io_req, 1, &null_location, NULL); |
| 306 | } | 306 | } |
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 7c081bcbc3cf..19a59b041c27 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c | |||
| @@ -259,7 +259,7 @@ static int mirror_flush(struct dm_target *ti) | |||
| 259 | struct dm_io_region io[ms->nr_mirrors]; | 259 | struct dm_io_region io[ms->nr_mirrors]; |
| 260 | struct mirror *m; | 260 | struct mirror *m; |
| 261 | struct dm_io_request io_req = { | 261 | struct dm_io_request io_req = { |
| 262 | .bi_rw = WRITE_BARRIER, | 262 | .bi_rw = WRITE_FLUSH, |
| 263 | .mem.type = DM_IO_KMEM, | 263 | .mem.type = DM_IO_KMEM, |
| 264 | .mem.ptr.bvec = NULL, | 264 | .mem.ptr.bvec = NULL, |
| 265 | .client = ms->io_client, | 265 | .client = ms->io_client, |
| @@ -629,7 +629,7 @@ static void do_write(struct mirror_set *ms, struct bio *bio) | |||
| 629 | struct dm_io_region io[ms->nr_mirrors], *dest = io; | 629 | struct dm_io_region io[ms->nr_mirrors], *dest = io; |
| 630 | struct mirror *m; | 630 | struct mirror *m; |
| 631 | struct dm_io_request io_req = { | 631 | struct dm_io_request io_req = { |
| 632 | .bi_rw = WRITE | (bio->bi_rw & WRITE_BARRIER), | 632 | .bi_rw = WRITE | (bio->bi_rw & WRITE_FLUSH_FUA), |
| 633 | .mem.type = DM_IO_BVEC, | 633 | .mem.type = DM_IO_BVEC, |
| 634 | .mem.ptr.bvec = bio->bi_io_vec + bio->bi_idx, | 634 | .mem.ptr.bvec = bio->bi_io_vec + bio->bi_idx, |
| 635 | .notify.fn = write_callback, | 635 | .notify.fn = write_callback, |
| @@ -670,7 +670,7 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes) | |||
| 670 | bio_list_init(&requeue); | 670 | bio_list_init(&requeue); |
| 671 | 671 | ||
| 672 | while ((bio = bio_list_pop(writes))) { | 672 | while ((bio = bio_list_pop(writes))) { |
| 673 | if (unlikely(bio_empty_barrier(bio))) { | 673 | if (bio->bi_rw & REQ_FLUSH) { |
| 674 | bio_list_add(&sync, bio); | 674 | bio_list_add(&sync, bio); |
| 675 | continue; | 675 | continue; |
| 676 | } | 676 | } |
| @@ -1203,7 +1203,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, | |||
| 1203 | * We need to dec pending if this was a write. | 1203 | * We need to dec pending if this was a write. |
| 1204 | */ | 1204 | */ |
| 1205 | if (rw == WRITE) { | 1205 | if (rw == WRITE) { |
| 1206 | if (likely(!bio_empty_barrier(bio))) | 1206 | if (!(bio->bi_rw & REQ_FLUSH)) |
| 1207 | dm_rh_dec(ms->rh, map_context->ll); | 1207 | dm_rh_dec(ms->rh, map_context->ll); |
| 1208 | return error; | 1208 | return error; |
| 1209 | } | 1209 | } |
diff --git a/drivers/md/dm-region-hash.c b/drivers/md/dm-region-hash.c index bd5c58b28868..dad011aed0c9 100644 --- a/drivers/md/dm-region-hash.c +++ b/drivers/md/dm-region-hash.c | |||
| @@ -81,9 +81,9 @@ struct dm_region_hash { | |||
| 81 | struct list_head failed_recovered_regions; | 81 | struct list_head failed_recovered_regions; |
| 82 | 82 | ||
| 83 | /* | 83 | /* |
| 84 | * If there was a barrier failure no regions can be marked clean. | 84 | * If there was a flush failure no regions can be marked clean. |
| 85 | */ | 85 | */ |
| 86 | int barrier_failure; | 86 | int flush_failure; |
| 87 | 87 | ||
| 88 | void *context; | 88 | void *context; |
| 89 | sector_t target_begin; | 89 | sector_t target_begin; |
| @@ -217,7 +217,7 @@ struct dm_region_hash *dm_region_hash_create( | |||
| 217 | INIT_LIST_HEAD(&rh->quiesced_regions); | 217 | INIT_LIST_HEAD(&rh->quiesced_regions); |
| 218 | INIT_LIST_HEAD(&rh->recovered_regions); | 218 | INIT_LIST_HEAD(&rh->recovered_regions); |
| 219 | INIT_LIST_HEAD(&rh->failed_recovered_regions); | 219 | INIT_LIST_HEAD(&rh->failed_recovered_regions); |
| 220 | rh->barrier_failure = 0; | 220 | rh->flush_failure = 0; |
| 221 | 221 | ||
| 222 | rh->region_pool = mempool_create_kmalloc_pool(MIN_REGIONS, | 222 | rh->region_pool = mempool_create_kmalloc_pool(MIN_REGIONS, |
| 223 | sizeof(struct dm_region)); | 223 | sizeof(struct dm_region)); |
| @@ -399,8 +399,8 @@ void dm_rh_mark_nosync(struct dm_region_hash *rh, struct bio *bio) | |||
| 399 | region_t region = dm_rh_bio_to_region(rh, bio); | 399 | region_t region = dm_rh_bio_to_region(rh, bio); |
| 400 | int recovering = 0; | 400 | int recovering = 0; |
| 401 | 401 | ||
| 402 | if (bio_empty_barrier(bio)) { | 402 | if (bio->bi_rw & REQ_FLUSH) { |
| 403 | rh->barrier_failure = 1; | 403 | rh->flush_failure = 1; |
| 404 | return; | 404 | return; |
| 405 | } | 405 | } |
| 406 | 406 | ||
| @@ -524,7 +524,7 @@ void dm_rh_inc_pending(struct dm_region_hash *rh, struct bio_list *bios) | |||
| 524 | struct bio *bio; | 524 | struct bio *bio; |
| 525 | 525 | ||
| 526 | for (bio = bios->head; bio; bio = bio->bi_next) { | 526 | for (bio = bios->head; bio; bio = bio->bi_next) { |
| 527 | if (bio_empty_barrier(bio)) | 527 | if (bio->bi_rw & REQ_FLUSH) |
| 528 | continue; | 528 | continue; |
| 529 | rh_inc(rh, dm_rh_bio_to_region(rh, bio)); | 529 | rh_inc(rh, dm_rh_bio_to_region(rh, bio)); |
| 530 | } | 530 | } |
| @@ -555,9 +555,9 @@ void dm_rh_dec(struct dm_region_hash *rh, region_t region) | |||
| 555 | */ | 555 | */ |
| 556 | 556 | ||
| 557 | /* do nothing for DM_RH_NOSYNC */ | 557 | /* do nothing for DM_RH_NOSYNC */ |
| 558 | if (unlikely(rh->barrier_failure)) { | 558 | if (unlikely(rh->flush_failure)) { |
| 559 | /* | 559 | /* |
| 560 | * If a write barrier failed some time ago, we | 560 | * If a write flush failed some time ago, we |
| 561 | * don't know whether or not this write made it | 561 | * don't know whether or not this write made it |
| 562 | * to the disk, so we must resync the device. | 562 | * to the disk, so we must resync the device. |
| 563 | */ | 563 | */ |
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index cc2bdb83f9ad..0b61792a2780 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c | |||
| @@ -687,7 +687,7 @@ static void persistent_commit_exception(struct dm_exception_store *store, | |||
| 687 | /* | 687 | /* |
| 688 | * Commit exceptions to disk. | 688 | * Commit exceptions to disk. |
| 689 | */ | 689 | */ |
| 690 | if (ps->valid && area_io(ps, WRITE_BARRIER)) | 690 | if (ps->valid && area_io(ps, WRITE_FLUSH_FUA)) |
| 691 | ps->valid = 0; | 691 | ps->valid = 0; |
| 692 | 692 | ||
| 693 | /* | 693 | /* |
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 5974d3094d97..eed210152b75 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c | |||
| @@ -1587,7 +1587,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio, | |||
| 1587 | chunk_t chunk; | 1587 | chunk_t chunk; |
| 1588 | struct dm_snap_pending_exception *pe = NULL; | 1588 | struct dm_snap_pending_exception *pe = NULL; |
| 1589 | 1589 | ||
| 1590 | if (unlikely(bio_empty_barrier(bio))) { | 1590 | if (bio->bi_rw & REQ_FLUSH) { |
| 1591 | bio->bi_bdev = s->cow->bdev; | 1591 | bio->bi_bdev = s->cow->bdev; |
| 1592 | return DM_MAPIO_REMAPPED; | 1592 | return DM_MAPIO_REMAPPED; |
| 1593 | } | 1593 | } |
| @@ -1691,7 +1691,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio, | |||
| 1691 | int r = DM_MAPIO_REMAPPED; | 1691 | int r = DM_MAPIO_REMAPPED; |
| 1692 | chunk_t chunk; | 1692 | chunk_t chunk; |
| 1693 | 1693 | ||
| 1694 | if (unlikely(bio_empty_barrier(bio))) { | 1694 | if (bio->bi_rw & REQ_FLUSH) { |
| 1695 | if (!map_context->target_request_nr) | 1695 | if (!map_context->target_request_nr) |
| 1696 | bio->bi_bdev = s->origin->bdev; | 1696 | bio->bi_bdev = s->origin->bdev; |
| 1697 | else | 1697 | else |
| @@ -2135,7 +2135,7 @@ static int origin_map(struct dm_target *ti, struct bio *bio, | |||
| 2135 | struct dm_dev *dev = ti->private; | 2135 | struct dm_dev *dev = ti->private; |
| 2136 | bio->bi_bdev = dev->bdev; | 2136 | bio->bi_bdev = dev->bdev; |
| 2137 | 2137 | ||
| 2138 | if (unlikely(bio_empty_barrier(bio))) | 2138 | if (bio->bi_rw & REQ_FLUSH) |
| 2139 | return DM_MAPIO_REMAPPED; | 2139 | return DM_MAPIO_REMAPPED; |
| 2140 | 2140 | ||
| 2141 | /* Only tell snapshots if this is a write */ | 2141 | /* Only tell snapshots if this is a write */ |
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index c297f6da91ea..f0371b4c4fbf 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c | |||
| @@ -271,7 +271,7 @@ static int stripe_map(struct dm_target *ti, struct bio *bio, | |||
| 271 | uint32_t stripe; | 271 | uint32_t stripe; |
| 272 | unsigned target_request_nr; | 272 | unsigned target_request_nr; |
| 273 | 273 | ||
| 274 | if (unlikely(bio_empty_barrier(bio))) { | 274 | if (bio->bi_rw & REQ_FLUSH) { |
| 275 | target_request_nr = map_context->target_request_nr; | 275 | target_request_nr = map_context->target_request_nr; |
| 276 | BUG_ON(target_request_nr >= sc->stripes); | 276 | BUG_ON(target_request_nr >= sc->stripes); |
| 277 | bio->bi_bdev = sc->stripe[target_request_nr].dev->bdev; | 277 | bio->bi_bdev = sc->stripe[target_request_nr].dev->bdev; |
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index b1d92be8f990..32e6622767ad 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
| @@ -144,15 +144,16 @@ struct mapped_device { | |||
| 144 | spinlock_t deferred_lock; | 144 | spinlock_t deferred_lock; |
| 145 | 145 | ||
| 146 | /* | 146 | /* |
| 147 | * An error from the barrier request currently being processed. | 147 | * An error from the flush request currently being processed. |
| 148 | */ | 148 | */ |
| 149 | int barrier_error; | 149 | int flush_error; |
| 150 | 150 | ||
| 151 | /* | 151 | /* |
| 152 | * Protect barrier_error from concurrent endio processing | 152 | * Protect barrier_error from concurrent endio processing |
| 153 | * in request-based dm. | 153 | * in request-based dm. |
| 154 | */ | 154 | */ |
| 155 | spinlock_t barrier_error_lock; | 155 | spinlock_t barrier_error_lock; |
| 156 | int barrier_error; | ||
| 156 | 157 | ||
| 157 | /* | 158 | /* |
| 158 | * Processing queue (flush/barriers) | 159 | * Processing queue (flush/barriers) |
| @@ -200,8 +201,8 @@ struct mapped_device { | |||
| 200 | /* sysfs handle */ | 201 | /* sysfs handle */ |
| 201 | struct kobject kobj; | 202 | struct kobject kobj; |
| 202 | 203 | ||
| 203 | /* zero-length barrier that will be cloned and submitted to targets */ | 204 | /* zero-length flush that will be cloned and submitted to targets */ |
| 204 | struct bio barrier_bio; | 205 | struct bio flush_bio; |
| 205 | }; | 206 | }; |
| 206 | 207 | ||
| 207 | /* | 208 | /* |
| @@ -512,7 +513,7 @@ static void end_io_acct(struct dm_io *io) | |||
| 512 | 513 | ||
| 513 | /* | 514 | /* |
| 514 | * After this is decremented the bio must not be touched if it is | 515 | * After this is decremented the bio must not be touched if it is |
| 515 | * a barrier. | 516 | * a flush. |
| 516 | */ | 517 | */ |
| 517 | dm_disk(md)->part0.in_flight[rw] = pending = | 518 | dm_disk(md)->part0.in_flight[rw] = pending = |
| 518 | atomic_dec_return(&md->pending[rw]); | 519 | atomic_dec_return(&md->pending[rw]); |
| @@ -626,7 +627,7 @@ static void dec_pending(struct dm_io *io, int error) | |||
| 626 | */ | 627 | */ |
| 627 | spin_lock_irqsave(&md->deferred_lock, flags); | 628 | spin_lock_irqsave(&md->deferred_lock, flags); |
| 628 | if (__noflush_suspending(md)) { | 629 | if (__noflush_suspending(md)) { |
| 629 | if (!(io->bio->bi_rw & REQ_HARDBARRIER)) | 630 | if (!(io->bio->bi_rw & REQ_FLUSH)) |
| 630 | bio_list_add_head(&md->deferred, | 631 | bio_list_add_head(&md->deferred, |
| 631 | io->bio); | 632 | io->bio); |
| 632 | } else | 633 | } else |
| @@ -638,20 +639,14 @@ static void dec_pending(struct dm_io *io, int error) | |||
| 638 | io_error = io->error; | 639 | io_error = io->error; |
| 639 | bio = io->bio; | 640 | bio = io->bio; |
| 640 | 641 | ||
| 641 | if (bio->bi_rw & REQ_HARDBARRIER) { | 642 | if (bio->bi_rw & REQ_FLUSH) { |
| 642 | /* | 643 | /* |
| 643 | * There can be just one barrier request so we use | 644 | * There can be just one flush request so we use |
| 644 | * a per-device variable for error reporting. | 645 | * a per-device variable for error reporting. |
| 645 | * Note that you can't touch the bio after end_io_acct | 646 | * Note that you can't touch the bio after end_io_acct |
| 646 | * | ||
| 647 | * We ignore -EOPNOTSUPP for empty flush reported by | ||
| 648 | * underlying devices. We assume that if the device | ||
| 649 | * doesn't support empty barriers, it doesn't need | ||
| 650 | * cache flushing commands. | ||
| 651 | */ | 647 | */ |
| 652 | if (!md->barrier_error && | 648 | if (!md->flush_error) |
| 653 | !(bio_empty_barrier(bio) && io_error == -EOPNOTSUPP)) | 649 | md->flush_error = io_error; |
| 654 | md->barrier_error = io_error; | ||
| 655 | end_io_acct(io); | 650 | end_io_acct(io); |
| 656 | free_io(md, io); | 651 | free_io(md, io); |
| 657 | } else { | 652 | } else { |
| @@ -1119,7 +1114,7 @@ static void dm_bio_destructor(struct bio *bio) | |||
| 1119 | } | 1114 | } |
| 1120 | 1115 | ||
| 1121 | /* | 1116 | /* |
| 1122 | * Creates a little bio that is just does part of a bvec. | 1117 | * Creates a little bio that just does part of a bvec. |
| 1123 | */ | 1118 | */ |
| 1124 | static struct bio *split_bvec(struct bio *bio, sector_t sector, | 1119 | static struct bio *split_bvec(struct bio *bio, sector_t sector, |
| 1125 | unsigned short idx, unsigned int offset, | 1120 | unsigned short idx, unsigned int offset, |
| @@ -1134,7 +1129,7 @@ static struct bio *split_bvec(struct bio *bio, sector_t sector, | |||
| 1134 | 1129 | ||
| 1135 | clone->bi_sector = sector; | 1130 | clone->bi_sector = sector; |
| 1136 | clone->bi_bdev = bio->bi_bdev; | 1131 | clone->bi_bdev = bio->bi_bdev; |
| 1137 | clone->bi_rw = bio->bi_rw & ~REQ_HARDBARRIER; | 1132 | clone->bi_rw = bio->bi_rw; |
| 1138 | clone->bi_vcnt = 1; | 1133 | clone->bi_vcnt = 1; |
| 1139 | clone->bi_size = to_bytes(len); | 1134 | clone->bi_size = to_bytes(len); |
| 1140 | clone->bi_io_vec->bv_offset = offset; | 1135 | clone->bi_io_vec->bv_offset = offset; |
| @@ -1161,7 +1156,6 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector, | |||
| 1161 | 1156 | ||
| 1162 | clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs); | 1157 | clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs); |
| 1163 | __bio_clone(clone, bio); | 1158 | __bio_clone(clone, bio); |
| 1164 | clone->bi_rw &= ~REQ_HARDBARRIER; | ||
| 1165 | clone->bi_destructor = dm_bio_destructor; | 1159 | clone->bi_destructor = dm_bio_destructor; |
| 1166 | clone->bi_sector = sector; | 1160 | clone->bi_sector = sector; |
| 1167 | clone->bi_idx = idx; | 1161 | clone->bi_idx = idx; |
| @@ -1225,7 +1219,7 @@ static void __issue_target_requests(struct clone_info *ci, struct dm_target *ti, | |||
| 1225 | __issue_target_request(ci, ti, request_nr, len); | 1219 | __issue_target_request(ci, ti, request_nr, len); |
| 1226 | } | 1220 | } |
| 1227 | 1221 | ||
| 1228 | static int __clone_and_map_empty_barrier(struct clone_info *ci) | 1222 | static int __clone_and_map_flush(struct clone_info *ci) |
| 1229 | { | 1223 | { |
| 1230 | unsigned target_nr = 0; | 1224 | unsigned target_nr = 0; |
| 1231 | struct dm_target *ti; | 1225 | struct dm_target *ti; |
| @@ -1289,9 +1283,6 @@ static int __clone_and_map(struct clone_info *ci) | |||
| 1289 | sector_t len = 0, max; | 1283 | sector_t len = 0, max; |
| 1290 | struct dm_target_io *tio; | 1284 | struct dm_target_io *tio; |
| 1291 | 1285 | ||
| 1292 | if (unlikely(bio_empty_barrier(bio))) | ||
| 1293 | return __clone_and_map_empty_barrier(ci); | ||
| 1294 | |||
| 1295 | if (unlikely(bio->bi_rw & REQ_DISCARD)) | 1286 | if (unlikely(bio->bi_rw & REQ_DISCARD)) |
| 1296 | return __clone_and_map_discard(ci); | 1287 | return __clone_and_map_discard(ci); |
| 1297 | 1288 | ||
| @@ -1383,11 +1374,11 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) | |||
| 1383 | 1374 | ||
| 1384 | ci.map = dm_get_live_table(md); | 1375 | ci.map = dm_get_live_table(md); |
| 1385 | if (unlikely(!ci.map)) { | 1376 | if (unlikely(!ci.map)) { |
| 1386 | if (!(bio->bi_rw & REQ_HARDBARRIER)) | 1377 | if (!(bio->bi_rw & REQ_FLUSH)) |
| 1387 | bio_io_error(bio); | 1378 | bio_io_error(bio); |
| 1388 | else | 1379 | else |
| 1389 | if (!md->barrier_error) | 1380 | if (!md->flush_error) |
| 1390 | md->barrier_error = -EIO; | 1381 | md->flush_error = -EIO; |
| 1391 | return; | 1382 | return; |
| 1392 | } | 1383 | } |
| 1393 | 1384 | ||
| @@ -1400,14 +1391,22 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) | |||
| 1400 | ci.io->md = md; | 1391 | ci.io->md = md; |
| 1401 | spin_lock_init(&ci.io->endio_lock); | 1392 | spin_lock_init(&ci.io->endio_lock); |
| 1402 | ci.sector = bio->bi_sector; | 1393 | ci.sector = bio->bi_sector; |
| 1403 | ci.sector_count = bio_sectors(bio); | 1394 | if (!(bio->bi_rw & REQ_FLUSH)) |
| 1404 | if (unlikely(bio_empty_barrier(bio))) | 1395 | ci.sector_count = bio_sectors(bio); |
| 1396 | else { | ||
| 1397 | /* all FLUSH bio's reaching here should be empty */ | ||
| 1398 | WARN_ON_ONCE(bio_has_data(bio)); | ||
| 1405 | ci.sector_count = 1; | 1399 | ci.sector_count = 1; |
| 1400 | } | ||
| 1406 | ci.idx = bio->bi_idx; | 1401 | ci.idx = bio->bi_idx; |
| 1407 | 1402 | ||
| 1408 | start_io_acct(ci.io); | 1403 | start_io_acct(ci.io); |
| 1409 | while (ci.sector_count && !error) | 1404 | while (ci.sector_count && !error) { |
| 1410 | error = __clone_and_map(&ci); | 1405 | if (!(bio->bi_rw & REQ_FLUSH)) |
| 1406 | error = __clone_and_map(&ci); | ||
| 1407 | else | ||
| 1408 | error = __clone_and_map_flush(&ci); | ||
| 1409 | } | ||
| 1411 | 1410 | ||
| 1412 | /* drop the extra reference count */ | 1411 | /* drop the extra reference count */ |
| 1413 | dec_pending(ci.io, error); | 1412 | dec_pending(ci.io, error); |
| @@ -1492,11 +1491,11 @@ static int _dm_request(struct request_queue *q, struct bio *bio) | |||
| 1492 | part_stat_unlock(); | 1491 | part_stat_unlock(); |
| 1493 | 1492 | ||
| 1494 | /* | 1493 | /* |
| 1495 | * If we're suspended or the thread is processing barriers | 1494 | * If we're suspended or the thread is processing flushes |
| 1496 | * we have to queue this io for later. | 1495 | * we have to queue this io for later. |
| 1497 | */ | 1496 | */ |
| 1498 | if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) || | 1497 | if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) || |
| 1499 | unlikely(bio->bi_rw & REQ_HARDBARRIER)) { | 1498 | (bio->bi_rw & REQ_FLUSH)) { |
| 1500 | up_read(&md->io_lock); | 1499 | up_read(&md->io_lock); |
| 1501 | 1500 | ||
| 1502 | if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) && | 1501 | if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) && |
| @@ -1940,6 +1939,7 @@ static void dm_init_md_queue(struct mapped_device *md) | |||
| 1940 | blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); | 1939 | blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); |
| 1941 | md->queue->unplug_fn = dm_unplug_all; | 1940 | md->queue->unplug_fn = dm_unplug_all; |
| 1942 | blk_queue_merge_bvec(md->queue, dm_merge_bvec); | 1941 | blk_queue_merge_bvec(md->queue, dm_merge_bvec); |
| 1942 | blk_queue_flush(md->queue, REQ_FLUSH | REQ_FUA); | ||
| 1943 | } | 1943 | } |
| 1944 | 1944 | ||
| 1945 | /* | 1945 | /* |
| @@ -2245,7 +2245,8 @@ static int dm_init_request_based_queue(struct mapped_device *md) | |||
| 2245 | blk_queue_softirq_done(md->queue, dm_softirq_done); | 2245 | blk_queue_softirq_done(md->queue, dm_softirq_done); |
| 2246 | blk_queue_prep_rq(md->queue, dm_prep_fn); | 2246 | blk_queue_prep_rq(md->queue, dm_prep_fn); |
| 2247 | blk_queue_lld_busy(md->queue, dm_lld_busy); | 2247 | blk_queue_lld_busy(md->queue, dm_lld_busy); |
| 2248 | blk_queue_flush(md->queue, REQ_FLUSH); | 2248 | /* no flush support for request based dm yet */ |
| 2249 | blk_queue_flush(md->queue, 0); | ||
| 2249 | 2250 | ||
| 2250 | elv_register_queue(md->queue); | 2251 | elv_register_queue(md->queue); |
| 2251 | 2252 | ||
| @@ -2406,41 +2407,35 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible) | |||
| 2406 | return r; | 2407 | return r; |
| 2407 | } | 2408 | } |
| 2408 | 2409 | ||
| 2409 | static void dm_flush(struct mapped_device *md) | 2410 | static void process_flush(struct mapped_device *md, struct bio *bio) |
| 2410 | { | 2411 | { |
| 2411 | dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); | 2412 | md->flush_error = 0; |
| 2412 | |||
| 2413 | bio_init(&md->barrier_bio); | ||
| 2414 | md->barrier_bio.bi_bdev = md->bdev; | ||
| 2415 | md->barrier_bio.bi_rw = WRITE_BARRIER; | ||
| 2416 | __split_and_process_bio(md, &md->barrier_bio); | ||
| 2417 | 2413 | ||
| 2414 | /* handle REQ_FLUSH */ | ||
| 2418 | dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); | 2415 | dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); |
| 2419 | } | ||
| 2420 | 2416 | ||
| 2421 | static void process_barrier(struct mapped_device *md, struct bio *bio) | 2417 | bio_init(&md->flush_bio); |
| 2422 | { | 2418 | md->flush_bio.bi_bdev = md->bdev; |
| 2423 | md->barrier_error = 0; | 2419 | md->flush_bio.bi_rw = WRITE_FLUSH; |
| 2420 | __split_and_process_bio(md, &md->flush_bio); | ||
| 2424 | 2421 | ||
| 2425 | dm_flush(md); | 2422 | dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); |
| 2426 | 2423 | ||
| 2427 | if (!bio_empty_barrier(bio)) { | 2424 | /* if it's an empty flush or the preflush failed, we're done */ |
| 2428 | __split_and_process_bio(md, bio); | 2425 | if (!bio_has_data(bio) || md->flush_error) { |
| 2429 | /* | 2426 | if (md->flush_error != DM_ENDIO_REQUEUE) |
| 2430 | * If the request isn't supported, don't waste time with | 2427 | bio_endio(bio, md->flush_error); |
| 2431 | * the second flush. | 2428 | else { |
| 2432 | */ | 2429 | spin_lock_irq(&md->deferred_lock); |
| 2433 | if (md->barrier_error != -EOPNOTSUPP) | 2430 | bio_list_add_head(&md->deferred, bio); |
| 2434 | dm_flush(md); | 2431 | spin_unlock_irq(&md->deferred_lock); |
| 2432 | } | ||
| 2433 | return; | ||
| 2435 | } | 2434 | } |
| 2436 | 2435 | ||
| 2437 | if (md->barrier_error != DM_ENDIO_REQUEUE) | 2436 | /* issue data + REQ_FUA */ |
| 2438 | bio_endio(bio, md->barrier_error); | 2437 | bio->bi_rw &= ~REQ_FLUSH; |
| 2439 | else { | 2438 | __split_and_process_bio(md, bio); |
| 2440 | spin_lock_irq(&md->deferred_lock); | ||
| 2441 | bio_list_add_head(&md->deferred, bio); | ||
| 2442 | spin_unlock_irq(&md->deferred_lock); | ||
| 2443 | } | ||
| 2444 | } | 2439 | } |
| 2445 | 2440 | ||
| 2446 | /* | 2441 | /* |
| @@ -2469,8 +2464,8 @@ static void dm_wq_work(struct work_struct *work) | |||
| 2469 | if (dm_request_based(md)) | 2464 | if (dm_request_based(md)) |
| 2470 | generic_make_request(c); | 2465 | generic_make_request(c); |
| 2471 | else { | 2466 | else { |
| 2472 | if (c->bi_rw & REQ_HARDBARRIER) | 2467 | if (c->bi_rw & REQ_FLUSH) |
| 2473 | process_barrier(md, c); | 2468 | process_flush(md, c); |
| 2474 | else | 2469 | else |
| 2475 | __split_and_process_bio(md, c); | 2470 | __split_and_process_bio(md, c); |
| 2476 | } | 2471 | } |
