aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2010-09-03 05:56:19 -0400
committerJens Axboe <jaxboe@fusionio.com>2010-09-10 06:35:38 -0400
commitd87f4c14f27dc82d215108d8392a7d26687148a1 (patch)
tree55f2a81f3df5d70fd85c4428089f6fe28540bcf4 /drivers
parent3a2edd0d6ddbd5fa3b389ea6db811285415ce6c8 (diff)
dm: implement REQ_FLUSH/FUA support for bio-based dm
This patch converts bio-based dm to support REQ_FLUSH/FUA instead of now deprecated REQ_HARDBARRIER. * -EOPNOTSUPP handling logic dropped. * Preflush is handled as before but postflush is dropped and replaced with passing down REQ_FUA to member request_queues. This replaces one array wide cache flush w/ member specific FUA writes. * __split_and_process_bio() now calls __clone_and_map_flush() directly for flushes and guarantees all FLUSH bio's going to targets are zero ` length. * It's now guaranteed that all FLUSH bio's which are passed onto dm targets are zero length. bio_empty_barrier() tests are replaced with REQ_FLUSH tests. * Empty WRITE_BARRIERs are replaced with WRITE_FLUSHes. * Dropped unlikely() around REQ_FLUSH tests. Flushes are not unlikely enough to be marked with unlikely(). * Block layer now filters out REQ_FLUSH/FUA bio's if the request_queue doesn't support cache flushing. Advertise REQ_FLUSH | REQ_FUA capability. * Request based dm isn't converted yet. dm_init_request_based_queue() resets flush support to 0 for now. To avoid disturbing request based dm code, dm->flush_error is added for bio based dm while requested based dm continues to use dm->barrier_error. Lightly tested linear, stripe, raid1, snap and crypt targets. Please proceed with caution as I'm not familiar with the code base. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: dm-devel@redhat.com Cc: Christoph Hellwig <hch@lst.de> Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/md/dm-crypt.c2
-rw-r--r--drivers/md/dm-io.c20
-rw-r--r--drivers/md/dm-log.c2
-rw-r--r--drivers/md/dm-raid1.c8
-rw-r--r--drivers/md/dm-region-hash.c16
-rw-r--r--drivers/md/dm-snap-persistent.c2
-rw-r--r--drivers/md/dm-snap.c6
-rw-r--r--drivers/md/dm-stripe.c2
-rw-r--r--drivers/md/dm.c119
9 files changed, 80 insertions, 97 deletions
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 368e8e98f705..d5b0e4c0e702 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1278,7 +1278,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio,
1278 struct dm_crypt_io *io; 1278 struct dm_crypt_io *io;
1279 struct crypt_config *cc; 1279 struct crypt_config *cc;
1280 1280
1281 if (unlikely(bio_empty_barrier(bio))) { 1281 if (bio->bi_rw & REQ_FLUSH) {
1282 cc = ti->private; 1282 cc = ti->private;
1283 bio->bi_bdev = cc->dev->bdev; 1283 bio->bi_bdev = cc->dev->bdev;
1284 return DM_MAPIO_REMAPPED; 1284 return DM_MAPIO_REMAPPED;
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 0590c75b0ab6..136d4f71a116 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -31,7 +31,6 @@ struct dm_io_client {
31 */ 31 */
32struct io { 32struct io {
33 unsigned long error_bits; 33 unsigned long error_bits;
34 unsigned long eopnotsupp_bits;
35 atomic_t count; 34 atomic_t count;
36 struct task_struct *sleeper; 35 struct task_struct *sleeper;
37 struct dm_io_client *client; 36 struct dm_io_client *client;
@@ -130,11 +129,8 @@ static void retrieve_io_and_region_from_bio(struct bio *bio, struct io **io,
130 *---------------------------------------------------------------*/ 129 *---------------------------------------------------------------*/
131static void dec_count(struct io *io, unsigned int region, int error) 130static void dec_count(struct io *io, unsigned int region, int error)
132{ 131{
133 if (error) { 132 if (error)
134 set_bit(region, &io->error_bits); 133 set_bit(region, &io->error_bits);
135 if (error == -EOPNOTSUPP)
136 set_bit(region, &io->eopnotsupp_bits);
137 }
138 134
139 if (atomic_dec_and_test(&io->count)) { 135 if (atomic_dec_and_test(&io->count)) {
140 if (io->sleeper) 136 if (io->sleeper)
@@ -310,8 +306,8 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
310 sector_t remaining = where->count; 306 sector_t remaining = where->count;
311 307
312 /* 308 /*
313 * where->count may be zero if rw holds a write barrier and we 309 * where->count may be zero if rw holds a flush and we need to
314 * need to send a zero-sized barrier. 310 * send a zero-sized flush.
315 */ 311 */
316 do { 312 do {
317 /* 313 /*
@@ -364,7 +360,7 @@ static void dispatch_io(int rw, unsigned int num_regions,
364 */ 360 */
365 for (i = 0; i < num_regions; i++) { 361 for (i = 0; i < num_regions; i++) {
366 *dp = old_pages; 362 *dp = old_pages;
367 if (where[i].count || (rw & REQ_HARDBARRIER)) 363 if (where[i].count || (rw & REQ_FLUSH))
368 do_region(rw, i, where + i, dp, io); 364 do_region(rw, i, where + i, dp, io);
369 } 365 }
370 366
@@ -393,9 +389,7 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
393 return -EIO; 389 return -EIO;
394 } 390 }
395 391
396retry:
397 io->error_bits = 0; 392 io->error_bits = 0;
398 io->eopnotsupp_bits = 0;
399 atomic_set(&io->count, 1); /* see dispatch_io() */ 393 atomic_set(&io->count, 1); /* see dispatch_io() */
400 io->sleeper = current; 394 io->sleeper = current;
401 io->client = client; 395 io->client = client;
@@ -412,11 +406,6 @@ retry:
412 } 406 }
413 set_current_state(TASK_RUNNING); 407 set_current_state(TASK_RUNNING);
414 408
415 if (io->eopnotsupp_bits && (rw & REQ_HARDBARRIER)) {
416 rw &= ~REQ_HARDBARRIER;
417 goto retry;
418 }
419
420 if (error_bits) 409 if (error_bits)
421 *error_bits = io->error_bits; 410 *error_bits = io->error_bits;
422 411
@@ -437,7 +426,6 @@ static int async_io(struct dm_io_client *client, unsigned int num_regions,
437 426
438 io = mempool_alloc(client->pool, GFP_NOIO); 427 io = mempool_alloc(client->pool, GFP_NOIO);
439 io->error_bits = 0; 428 io->error_bits = 0;
440 io->eopnotsupp_bits = 0;
441 atomic_set(&io->count, 1); /* see dispatch_io() */ 429 atomic_set(&io->count, 1); /* see dispatch_io() */
442 io->sleeper = NULL; 430 io->sleeper = NULL;
443 io->client = client; 431 io->client = client;
diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c
index 5a08be0222db..33420e68d153 100644
--- a/drivers/md/dm-log.c
+++ b/drivers/md/dm-log.c
@@ -300,7 +300,7 @@ static int flush_header(struct log_c *lc)
300 .count = 0, 300 .count = 0,
301 }; 301 };
302 302
303 lc->io_req.bi_rw = WRITE_BARRIER; 303 lc->io_req.bi_rw = WRITE_FLUSH;
304 304
305 return dm_io(&lc->io_req, 1, &null_location, NULL); 305 return dm_io(&lc->io_req, 1, &null_location, NULL);
306} 306}
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 7c081bcbc3cf..19a59b041c27 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -259,7 +259,7 @@ static int mirror_flush(struct dm_target *ti)
259 struct dm_io_region io[ms->nr_mirrors]; 259 struct dm_io_region io[ms->nr_mirrors];
260 struct mirror *m; 260 struct mirror *m;
261 struct dm_io_request io_req = { 261 struct dm_io_request io_req = {
262 .bi_rw = WRITE_BARRIER, 262 .bi_rw = WRITE_FLUSH,
263 .mem.type = DM_IO_KMEM, 263 .mem.type = DM_IO_KMEM,
264 .mem.ptr.bvec = NULL, 264 .mem.ptr.bvec = NULL,
265 .client = ms->io_client, 265 .client = ms->io_client,
@@ -629,7 +629,7 @@ static void do_write(struct mirror_set *ms, struct bio *bio)
629 struct dm_io_region io[ms->nr_mirrors], *dest = io; 629 struct dm_io_region io[ms->nr_mirrors], *dest = io;
630 struct mirror *m; 630 struct mirror *m;
631 struct dm_io_request io_req = { 631 struct dm_io_request io_req = {
632 .bi_rw = WRITE | (bio->bi_rw & WRITE_BARRIER), 632 .bi_rw = WRITE | (bio->bi_rw & WRITE_FLUSH_FUA),
633 .mem.type = DM_IO_BVEC, 633 .mem.type = DM_IO_BVEC,
634 .mem.ptr.bvec = bio->bi_io_vec + bio->bi_idx, 634 .mem.ptr.bvec = bio->bi_io_vec + bio->bi_idx,
635 .notify.fn = write_callback, 635 .notify.fn = write_callback,
@@ -670,7 +670,7 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
670 bio_list_init(&requeue); 670 bio_list_init(&requeue);
671 671
672 while ((bio = bio_list_pop(writes))) { 672 while ((bio = bio_list_pop(writes))) {
673 if (unlikely(bio_empty_barrier(bio))) { 673 if (bio->bi_rw & REQ_FLUSH) {
674 bio_list_add(&sync, bio); 674 bio_list_add(&sync, bio);
675 continue; 675 continue;
676 } 676 }
@@ -1203,7 +1203,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio,
1203 * We need to dec pending if this was a write. 1203 * We need to dec pending if this was a write.
1204 */ 1204 */
1205 if (rw == WRITE) { 1205 if (rw == WRITE) {
1206 if (likely(!bio_empty_barrier(bio))) 1206 if (!(bio->bi_rw & REQ_FLUSH))
1207 dm_rh_dec(ms->rh, map_context->ll); 1207 dm_rh_dec(ms->rh, map_context->ll);
1208 return error; 1208 return error;
1209 } 1209 }
diff --git a/drivers/md/dm-region-hash.c b/drivers/md/dm-region-hash.c
index bd5c58b28868..dad011aed0c9 100644
--- a/drivers/md/dm-region-hash.c
+++ b/drivers/md/dm-region-hash.c
@@ -81,9 +81,9 @@ struct dm_region_hash {
81 struct list_head failed_recovered_regions; 81 struct list_head failed_recovered_regions;
82 82
83 /* 83 /*
84 * If there was a barrier failure no regions can be marked clean. 84 * If there was a flush failure no regions can be marked clean.
85 */ 85 */
86 int barrier_failure; 86 int flush_failure;
87 87
88 void *context; 88 void *context;
89 sector_t target_begin; 89 sector_t target_begin;
@@ -217,7 +217,7 @@ struct dm_region_hash *dm_region_hash_create(
217 INIT_LIST_HEAD(&rh->quiesced_regions); 217 INIT_LIST_HEAD(&rh->quiesced_regions);
218 INIT_LIST_HEAD(&rh->recovered_regions); 218 INIT_LIST_HEAD(&rh->recovered_regions);
219 INIT_LIST_HEAD(&rh->failed_recovered_regions); 219 INIT_LIST_HEAD(&rh->failed_recovered_regions);
220 rh->barrier_failure = 0; 220 rh->flush_failure = 0;
221 221
222 rh->region_pool = mempool_create_kmalloc_pool(MIN_REGIONS, 222 rh->region_pool = mempool_create_kmalloc_pool(MIN_REGIONS,
223 sizeof(struct dm_region)); 223 sizeof(struct dm_region));
@@ -399,8 +399,8 @@ void dm_rh_mark_nosync(struct dm_region_hash *rh, struct bio *bio)
399 region_t region = dm_rh_bio_to_region(rh, bio); 399 region_t region = dm_rh_bio_to_region(rh, bio);
400 int recovering = 0; 400 int recovering = 0;
401 401
402 if (bio_empty_barrier(bio)) { 402 if (bio->bi_rw & REQ_FLUSH) {
403 rh->barrier_failure = 1; 403 rh->flush_failure = 1;
404 return; 404 return;
405 } 405 }
406 406
@@ -524,7 +524,7 @@ void dm_rh_inc_pending(struct dm_region_hash *rh, struct bio_list *bios)
524 struct bio *bio; 524 struct bio *bio;
525 525
526 for (bio = bios->head; bio; bio = bio->bi_next) { 526 for (bio = bios->head; bio; bio = bio->bi_next) {
527 if (bio_empty_barrier(bio)) 527 if (bio->bi_rw & REQ_FLUSH)
528 continue; 528 continue;
529 rh_inc(rh, dm_rh_bio_to_region(rh, bio)); 529 rh_inc(rh, dm_rh_bio_to_region(rh, bio));
530 } 530 }
@@ -555,9 +555,9 @@ void dm_rh_dec(struct dm_region_hash *rh, region_t region)
555 */ 555 */
556 556
557 /* do nothing for DM_RH_NOSYNC */ 557 /* do nothing for DM_RH_NOSYNC */
558 if (unlikely(rh->barrier_failure)) { 558 if (unlikely(rh->flush_failure)) {
559 /* 559 /*
560 * If a write barrier failed some time ago, we 560 * If a write flush failed some time ago, we
561 * don't know whether or not this write made it 561 * don't know whether or not this write made it
562 * to the disk, so we must resync the device. 562 * to the disk, so we must resync the device.
563 */ 563 */
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c
index cc2bdb83f9ad..0b61792a2780 100644
--- a/drivers/md/dm-snap-persistent.c
+++ b/drivers/md/dm-snap-persistent.c
@@ -687,7 +687,7 @@ static void persistent_commit_exception(struct dm_exception_store *store,
687 /* 687 /*
688 * Commit exceptions to disk. 688 * Commit exceptions to disk.
689 */ 689 */
690 if (ps->valid && area_io(ps, WRITE_BARRIER)) 690 if (ps->valid && area_io(ps, WRITE_FLUSH_FUA))
691 ps->valid = 0; 691 ps->valid = 0;
692 692
693 /* 693 /*
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 5974d3094d97..eed210152b75 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1587,7 +1587,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
1587 chunk_t chunk; 1587 chunk_t chunk;
1588 struct dm_snap_pending_exception *pe = NULL; 1588 struct dm_snap_pending_exception *pe = NULL;
1589 1589
1590 if (unlikely(bio_empty_barrier(bio))) { 1590 if (bio->bi_rw & REQ_FLUSH) {
1591 bio->bi_bdev = s->cow->bdev; 1591 bio->bi_bdev = s->cow->bdev;
1592 return DM_MAPIO_REMAPPED; 1592 return DM_MAPIO_REMAPPED;
1593 } 1593 }
@@ -1691,7 +1691,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio,
1691 int r = DM_MAPIO_REMAPPED; 1691 int r = DM_MAPIO_REMAPPED;
1692 chunk_t chunk; 1692 chunk_t chunk;
1693 1693
1694 if (unlikely(bio_empty_barrier(bio))) { 1694 if (bio->bi_rw & REQ_FLUSH) {
1695 if (!map_context->target_request_nr) 1695 if (!map_context->target_request_nr)
1696 bio->bi_bdev = s->origin->bdev; 1696 bio->bi_bdev = s->origin->bdev;
1697 else 1697 else
@@ -2135,7 +2135,7 @@ static int origin_map(struct dm_target *ti, struct bio *bio,
2135 struct dm_dev *dev = ti->private; 2135 struct dm_dev *dev = ti->private;
2136 bio->bi_bdev = dev->bdev; 2136 bio->bi_bdev = dev->bdev;
2137 2137
2138 if (unlikely(bio_empty_barrier(bio))) 2138 if (bio->bi_rw & REQ_FLUSH)
2139 return DM_MAPIO_REMAPPED; 2139 return DM_MAPIO_REMAPPED;
2140 2140
2141 /* Only tell snapshots if this is a write */ 2141 /* Only tell snapshots if this is a write */
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index c297f6da91ea..f0371b4c4fbf 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -271,7 +271,7 @@ static int stripe_map(struct dm_target *ti, struct bio *bio,
271 uint32_t stripe; 271 uint32_t stripe;
272 unsigned target_request_nr; 272 unsigned target_request_nr;
273 273
274 if (unlikely(bio_empty_barrier(bio))) { 274 if (bio->bi_rw & REQ_FLUSH) {
275 target_request_nr = map_context->target_request_nr; 275 target_request_nr = map_context->target_request_nr;
276 BUG_ON(target_request_nr >= sc->stripes); 276 BUG_ON(target_request_nr >= sc->stripes);
277 bio->bi_bdev = sc->stripe[target_request_nr].dev->bdev; 277 bio->bi_bdev = sc->stripe[target_request_nr].dev->bdev;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index b1d92be8f990..32e6622767ad 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -144,15 +144,16 @@ struct mapped_device {
144 spinlock_t deferred_lock; 144 spinlock_t deferred_lock;
145 145
146 /* 146 /*
147 * An error from the barrier request currently being processed. 147 * An error from the flush request currently being processed.
148 */ 148 */
149 int barrier_error; 149 int flush_error;
150 150
151 /* 151 /*
152 * Protect barrier_error from concurrent endio processing 152 * Protect barrier_error from concurrent endio processing
153 * in request-based dm. 153 * in request-based dm.
154 */ 154 */
155 spinlock_t barrier_error_lock; 155 spinlock_t barrier_error_lock;
156 int barrier_error;
156 157
157 /* 158 /*
158 * Processing queue (flush/barriers) 159 * Processing queue (flush/barriers)
@@ -200,8 +201,8 @@ struct mapped_device {
200 /* sysfs handle */ 201 /* sysfs handle */
201 struct kobject kobj; 202 struct kobject kobj;
202 203
203 /* zero-length barrier that will be cloned and submitted to targets */ 204 /* zero-length flush that will be cloned and submitted to targets */
204 struct bio barrier_bio; 205 struct bio flush_bio;
205}; 206};
206 207
207/* 208/*
@@ -512,7 +513,7 @@ static void end_io_acct(struct dm_io *io)
512 513
513 /* 514 /*
514 * After this is decremented the bio must not be touched if it is 515 * After this is decremented the bio must not be touched if it is
515 * a barrier. 516 * a flush.
516 */ 517 */
517 dm_disk(md)->part0.in_flight[rw] = pending = 518 dm_disk(md)->part0.in_flight[rw] = pending =
518 atomic_dec_return(&md->pending[rw]); 519 atomic_dec_return(&md->pending[rw]);
@@ -626,7 +627,7 @@ static void dec_pending(struct dm_io *io, int error)
626 */ 627 */
627 spin_lock_irqsave(&md->deferred_lock, flags); 628 spin_lock_irqsave(&md->deferred_lock, flags);
628 if (__noflush_suspending(md)) { 629 if (__noflush_suspending(md)) {
629 if (!(io->bio->bi_rw & REQ_HARDBARRIER)) 630 if (!(io->bio->bi_rw & REQ_FLUSH))
630 bio_list_add_head(&md->deferred, 631 bio_list_add_head(&md->deferred,
631 io->bio); 632 io->bio);
632 } else 633 } else
@@ -638,20 +639,14 @@ static void dec_pending(struct dm_io *io, int error)
638 io_error = io->error; 639 io_error = io->error;
639 bio = io->bio; 640 bio = io->bio;
640 641
641 if (bio->bi_rw & REQ_HARDBARRIER) { 642 if (bio->bi_rw & REQ_FLUSH) {
642 /* 643 /*
643 * There can be just one barrier request so we use 644 * There can be just one flush request so we use
644 * a per-device variable for error reporting. 645 * a per-device variable for error reporting.
645 * Note that you can't touch the bio after end_io_acct 646 * Note that you can't touch the bio after end_io_acct
646 *
647 * We ignore -EOPNOTSUPP for empty flush reported by
648 * underlying devices. We assume that if the device
649 * doesn't support empty barriers, it doesn't need
650 * cache flushing commands.
651 */ 647 */
652 if (!md->barrier_error && 648 if (!md->flush_error)
653 !(bio_empty_barrier(bio) && io_error == -EOPNOTSUPP)) 649 md->flush_error = io_error;
654 md->barrier_error = io_error;
655 end_io_acct(io); 650 end_io_acct(io);
656 free_io(md, io); 651 free_io(md, io);
657 } else { 652 } else {
@@ -1119,7 +1114,7 @@ static void dm_bio_destructor(struct bio *bio)
1119} 1114}
1120 1115
1121/* 1116/*
1122 * Creates a little bio that is just does part of a bvec. 1117 * Creates a little bio that just does part of a bvec.
1123 */ 1118 */
1124static struct bio *split_bvec(struct bio *bio, sector_t sector, 1119static struct bio *split_bvec(struct bio *bio, sector_t sector,
1125 unsigned short idx, unsigned int offset, 1120 unsigned short idx, unsigned int offset,
@@ -1134,7 +1129,7 @@ static struct bio *split_bvec(struct bio *bio, sector_t sector,
1134 1129
1135 clone->bi_sector = sector; 1130 clone->bi_sector = sector;
1136 clone->bi_bdev = bio->bi_bdev; 1131 clone->bi_bdev = bio->bi_bdev;
1137 clone->bi_rw = bio->bi_rw & ~REQ_HARDBARRIER; 1132 clone->bi_rw = bio->bi_rw;
1138 clone->bi_vcnt = 1; 1133 clone->bi_vcnt = 1;
1139 clone->bi_size = to_bytes(len); 1134 clone->bi_size = to_bytes(len);
1140 clone->bi_io_vec->bv_offset = offset; 1135 clone->bi_io_vec->bv_offset = offset;
@@ -1161,7 +1156,6 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector,
1161 1156
1162 clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs); 1157 clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs);
1163 __bio_clone(clone, bio); 1158 __bio_clone(clone, bio);
1164 clone->bi_rw &= ~REQ_HARDBARRIER;
1165 clone->bi_destructor = dm_bio_destructor; 1159 clone->bi_destructor = dm_bio_destructor;
1166 clone->bi_sector = sector; 1160 clone->bi_sector = sector;
1167 clone->bi_idx = idx; 1161 clone->bi_idx = idx;
@@ -1225,7 +1219,7 @@ static void __issue_target_requests(struct clone_info *ci, struct dm_target *ti,
1225 __issue_target_request(ci, ti, request_nr, len); 1219 __issue_target_request(ci, ti, request_nr, len);
1226} 1220}
1227 1221
1228static int __clone_and_map_empty_barrier(struct clone_info *ci) 1222static int __clone_and_map_flush(struct clone_info *ci)
1229{ 1223{
1230 unsigned target_nr = 0; 1224 unsigned target_nr = 0;
1231 struct dm_target *ti; 1225 struct dm_target *ti;
@@ -1289,9 +1283,6 @@ static int __clone_and_map(struct clone_info *ci)
1289 sector_t len = 0, max; 1283 sector_t len = 0, max;
1290 struct dm_target_io *tio; 1284 struct dm_target_io *tio;
1291 1285
1292 if (unlikely(bio_empty_barrier(bio)))
1293 return __clone_and_map_empty_barrier(ci);
1294
1295 if (unlikely(bio->bi_rw & REQ_DISCARD)) 1286 if (unlikely(bio->bi_rw & REQ_DISCARD))
1296 return __clone_and_map_discard(ci); 1287 return __clone_and_map_discard(ci);
1297 1288
@@ -1383,11 +1374,11 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
1383 1374
1384 ci.map = dm_get_live_table(md); 1375 ci.map = dm_get_live_table(md);
1385 if (unlikely(!ci.map)) { 1376 if (unlikely(!ci.map)) {
1386 if (!(bio->bi_rw & REQ_HARDBARRIER)) 1377 if (!(bio->bi_rw & REQ_FLUSH))
1387 bio_io_error(bio); 1378 bio_io_error(bio);
1388 else 1379 else
1389 if (!md->barrier_error) 1380 if (!md->flush_error)
1390 md->barrier_error = -EIO; 1381 md->flush_error = -EIO;
1391 return; 1382 return;
1392 } 1383 }
1393 1384
@@ -1400,14 +1391,22 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
1400 ci.io->md = md; 1391 ci.io->md = md;
1401 spin_lock_init(&ci.io->endio_lock); 1392 spin_lock_init(&ci.io->endio_lock);
1402 ci.sector = bio->bi_sector; 1393 ci.sector = bio->bi_sector;
1403 ci.sector_count = bio_sectors(bio); 1394 if (!(bio->bi_rw & REQ_FLUSH))
1404 if (unlikely(bio_empty_barrier(bio))) 1395 ci.sector_count = bio_sectors(bio);
1396 else {
1397 /* all FLUSH bio's reaching here should be empty */
1398 WARN_ON_ONCE(bio_has_data(bio));
1405 ci.sector_count = 1; 1399 ci.sector_count = 1;
1400 }
1406 ci.idx = bio->bi_idx; 1401 ci.idx = bio->bi_idx;
1407 1402
1408 start_io_acct(ci.io); 1403 start_io_acct(ci.io);
1409 while (ci.sector_count && !error) 1404 while (ci.sector_count && !error) {
1410 error = __clone_and_map(&ci); 1405 if (!(bio->bi_rw & REQ_FLUSH))
1406 error = __clone_and_map(&ci);
1407 else
1408 error = __clone_and_map_flush(&ci);
1409 }
1411 1410
1412 /* drop the extra reference count */ 1411 /* drop the extra reference count */
1413 dec_pending(ci.io, error); 1412 dec_pending(ci.io, error);
@@ -1492,11 +1491,11 @@ static int _dm_request(struct request_queue *q, struct bio *bio)
1492 part_stat_unlock(); 1491 part_stat_unlock();
1493 1492
1494 /* 1493 /*
1495 * If we're suspended or the thread is processing barriers 1494 * If we're suspended or the thread is processing flushes
1496 * we have to queue this io for later. 1495 * we have to queue this io for later.
1497 */ 1496 */
1498 if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) || 1497 if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) ||
1499 unlikely(bio->bi_rw & REQ_HARDBARRIER)) { 1498 (bio->bi_rw & REQ_FLUSH)) {
1500 up_read(&md->io_lock); 1499 up_read(&md->io_lock);
1501 1500
1502 if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) && 1501 if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) &&
@@ -1940,6 +1939,7 @@ static void dm_init_md_queue(struct mapped_device *md)
1940 blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); 1939 blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
1941 md->queue->unplug_fn = dm_unplug_all; 1940 md->queue->unplug_fn = dm_unplug_all;
1942 blk_queue_merge_bvec(md->queue, dm_merge_bvec); 1941 blk_queue_merge_bvec(md->queue, dm_merge_bvec);
1942 blk_queue_flush(md->queue, REQ_FLUSH | REQ_FUA);
1943} 1943}
1944 1944
1945/* 1945/*
@@ -2245,7 +2245,8 @@ static int dm_init_request_based_queue(struct mapped_device *md)
2245 blk_queue_softirq_done(md->queue, dm_softirq_done); 2245 blk_queue_softirq_done(md->queue, dm_softirq_done);
2246 blk_queue_prep_rq(md->queue, dm_prep_fn); 2246 blk_queue_prep_rq(md->queue, dm_prep_fn);
2247 blk_queue_lld_busy(md->queue, dm_lld_busy); 2247 blk_queue_lld_busy(md->queue, dm_lld_busy);
2248 blk_queue_flush(md->queue, REQ_FLUSH); 2248 /* no flush support for request based dm yet */
2249 blk_queue_flush(md->queue, 0);
2249 2250
2250 elv_register_queue(md->queue); 2251 elv_register_queue(md->queue);
2251 2252
@@ -2406,41 +2407,35 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
2406 return r; 2407 return r;
2407} 2408}
2408 2409
2409static void dm_flush(struct mapped_device *md) 2410static void process_flush(struct mapped_device *md, struct bio *bio)
2410{ 2411{
2411 dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); 2412 md->flush_error = 0;
2412
2413 bio_init(&md->barrier_bio);
2414 md->barrier_bio.bi_bdev = md->bdev;
2415 md->barrier_bio.bi_rw = WRITE_BARRIER;
2416 __split_and_process_bio(md, &md->barrier_bio);
2417 2413
2414 /* handle REQ_FLUSH */
2418 dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); 2415 dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
2419}
2420 2416
2421static void process_barrier(struct mapped_device *md, struct bio *bio) 2417 bio_init(&md->flush_bio);
2422{ 2418 md->flush_bio.bi_bdev = md->bdev;
2423 md->barrier_error = 0; 2419 md->flush_bio.bi_rw = WRITE_FLUSH;
2420 __split_and_process_bio(md, &md->flush_bio);
2424 2421
2425 dm_flush(md); 2422 dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
2426 2423
2427 if (!bio_empty_barrier(bio)) { 2424 /* if it's an empty flush or the preflush failed, we're done */
2428 __split_and_process_bio(md, bio); 2425 if (!bio_has_data(bio) || md->flush_error) {
2429 /* 2426 if (md->flush_error != DM_ENDIO_REQUEUE)
2430 * If the request isn't supported, don't waste time with 2427 bio_endio(bio, md->flush_error);
2431 * the second flush. 2428 else {
2432 */ 2429 spin_lock_irq(&md->deferred_lock);
2433 if (md->barrier_error != -EOPNOTSUPP) 2430 bio_list_add_head(&md->deferred, bio);
2434 dm_flush(md); 2431 spin_unlock_irq(&md->deferred_lock);
2432 }
2433 return;
2435 } 2434 }
2436 2435
2437 if (md->barrier_error != DM_ENDIO_REQUEUE) 2436 /* issue data + REQ_FUA */
2438 bio_endio(bio, md->barrier_error); 2437 bio->bi_rw &= ~REQ_FLUSH;
2439 else { 2438 __split_and_process_bio(md, bio);
2440 spin_lock_irq(&md->deferred_lock);
2441 bio_list_add_head(&md->deferred, bio);
2442 spin_unlock_irq(&md->deferred_lock);
2443 }
2444} 2439}
2445 2440
2446/* 2441/*
@@ -2469,8 +2464,8 @@ static void dm_wq_work(struct work_struct *work)
2469 if (dm_request_based(md)) 2464 if (dm_request_based(md))
2470 generic_make_request(c); 2465 generic_make_request(c);
2471 else { 2466 else {
2472 if (c->bi_rw & REQ_HARDBARRIER) 2467 if (c->bi_rw & REQ_FLUSH)
2473 process_barrier(md, c); 2468 process_flush(md, c);
2474 else 2469 else
2475 __split_and_process_bio(md, c); 2470 __split_and_process_bio(md, c);
2476 } 2471 }