diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-10-22 20:07:18 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-10-22 20:07:18 -0400 |
commit | a2887097f25cd38cadfc11d10769e2b349fb5eca (patch) | |
tree | cd4adcb305365d6ba9acd2c02d4eb9d0125c6f8d /drivers/md | |
parent | 8abfc6e7a45eb74e51904bbae676fae008b11366 (diff) | |
parent | 005a1d15f5a6b2bb4ada80349513effbf22b4588 (diff) |
Merge branch 'for-2.6.37/barrier' of git://git.kernel.dk/linux-2.6-block
* 'for-2.6.37/barrier' of git://git.kernel.dk/linux-2.6-block: (46 commits)
xen-blkfront: disable barrier/flush write support
Added blk-lib.c and blk-barrier.c was renamed to blk-flush.c
block: remove BLKDEV_IFL_WAIT
aic7xxx_old: removed unused 'req' variable
block: remove the BH_Eopnotsupp flag
block: remove the BLKDEV_IFL_BARRIER flag
block: remove the WRITE_BARRIER flag
swap: do not send discards as barriers
fat: do not send discards as barriers
ext4: do not send discards as barriers
jbd2: replace barriers with explicit flush / FUA usage
jbd2: Modify ASYNC_COMMIT code to not rely on queue draining on barrier
jbd: replace barriers with explicit flush / FUA usage
nilfs2: replace barriers with explicit flush / FUA usage
reiserfs: replace barriers with explicit flush / FUA usage
gfs2: replace barriers with explicit flush / FUA usage
btrfs: replace barriers with explicit flush / FUA usage
xfs: replace barriers with explicit flush / FUA usage
block: pass gfp_mask and flags to sb_issue_discard
dm: convey that all flushes are processed as empty
...
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/dm-crypt.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-io.c | 20 | ||||
-rw-r--r-- | drivers/md/dm-log.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-raid1.c | 8 | ||||
-rw-r--r-- | drivers/md/dm-region-hash.c | 16 | ||||
-rw-r--r-- | drivers/md/dm-snap-persistent.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-snap.c | 6 | ||||
-rw-r--r-- | drivers/md/dm-stripe.c | 2 | ||||
-rw-r--r-- | drivers/md/dm.c | 398 | ||||
-rw-r--r-- | drivers/md/linear.c | 4 | ||||
-rw-r--r-- | drivers/md/md.c | 117 | ||||
-rw-r--r-- | drivers/md/md.h | 23 | ||||
-rw-r--r-- | drivers/md/multipath.c | 4 | ||||
-rw-r--r-- | drivers/md/raid0.c | 4 | ||||
-rw-r--r-- | drivers/md/raid1.c | 176 | ||||
-rw-r--r-- | drivers/md/raid1.h | 2 | ||||
-rw-r--r-- | drivers/md/raid10.c | 7 | ||||
-rw-r--r-- | drivers/md/raid5.c | 43 | ||||
-rw-r--r-- | drivers/md/raid5.h | 1 |
19 files changed, 226 insertions, 611 deletions
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 368e8e98f705..d5b0e4c0e702 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c | |||
@@ -1278,7 +1278,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio, | |||
1278 | struct dm_crypt_io *io; | 1278 | struct dm_crypt_io *io; |
1279 | struct crypt_config *cc; | 1279 | struct crypt_config *cc; |
1280 | 1280 | ||
1281 | if (unlikely(bio_empty_barrier(bio))) { | 1281 | if (bio->bi_rw & REQ_FLUSH) { |
1282 | cc = ti->private; | 1282 | cc = ti->private; |
1283 | bio->bi_bdev = cc->dev->bdev; | 1283 | bio->bi_bdev = cc->dev->bdev; |
1284 | return DM_MAPIO_REMAPPED; | 1284 | return DM_MAPIO_REMAPPED; |
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 0590c75b0ab6..136d4f71a116 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c | |||
@@ -31,7 +31,6 @@ struct dm_io_client { | |||
31 | */ | 31 | */ |
32 | struct io { | 32 | struct io { |
33 | unsigned long error_bits; | 33 | unsigned long error_bits; |
34 | unsigned long eopnotsupp_bits; | ||
35 | atomic_t count; | 34 | atomic_t count; |
36 | struct task_struct *sleeper; | 35 | struct task_struct *sleeper; |
37 | struct dm_io_client *client; | 36 | struct dm_io_client *client; |
@@ -130,11 +129,8 @@ static void retrieve_io_and_region_from_bio(struct bio *bio, struct io **io, | |||
130 | *---------------------------------------------------------------*/ | 129 | *---------------------------------------------------------------*/ |
131 | static void dec_count(struct io *io, unsigned int region, int error) | 130 | static void dec_count(struct io *io, unsigned int region, int error) |
132 | { | 131 | { |
133 | if (error) { | 132 | if (error) |
134 | set_bit(region, &io->error_bits); | 133 | set_bit(region, &io->error_bits); |
135 | if (error == -EOPNOTSUPP) | ||
136 | set_bit(region, &io->eopnotsupp_bits); | ||
137 | } | ||
138 | 134 | ||
139 | if (atomic_dec_and_test(&io->count)) { | 135 | if (atomic_dec_and_test(&io->count)) { |
140 | if (io->sleeper) | 136 | if (io->sleeper) |
@@ -310,8 +306,8 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where, | |||
310 | sector_t remaining = where->count; | 306 | sector_t remaining = where->count; |
311 | 307 | ||
312 | /* | 308 | /* |
313 | * where->count may be zero if rw holds a write barrier and we | 309 | * where->count may be zero if rw holds a flush and we need to |
314 | * need to send a zero-sized barrier. | 310 | * send a zero-sized flush. |
315 | */ | 311 | */ |
316 | do { | 312 | do { |
317 | /* | 313 | /* |
@@ -364,7 +360,7 @@ static void dispatch_io(int rw, unsigned int num_regions, | |||
364 | */ | 360 | */ |
365 | for (i = 0; i < num_regions; i++) { | 361 | for (i = 0; i < num_regions; i++) { |
366 | *dp = old_pages; | 362 | *dp = old_pages; |
367 | if (where[i].count || (rw & REQ_HARDBARRIER)) | 363 | if (where[i].count || (rw & REQ_FLUSH)) |
368 | do_region(rw, i, where + i, dp, io); | 364 | do_region(rw, i, where + i, dp, io); |
369 | } | 365 | } |
370 | 366 | ||
@@ -393,9 +389,7 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions, | |||
393 | return -EIO; | 389 | return -EIO; |
394 | } | 390 | } |
395 | 391 | ||
396 | retry: | ||
397 | io->error_bits = 0; | 392 | io->error_bits = 0; |
398 | io->eopnotsupp_bits = 0; | ||
399 | atomic_set(&io->count, 1); /* see dispatch_io() */ | 393 | atomic_set(&io->count, 1); /* see dispatch_io() */ |
400 | io->sleeper = current; | 394 | io->sleeper = current; |
401 | io->client = client; | 395 | io->client = client; |
@@ -412,11 +406,6 @@ retry: | |||
412 | } | 406 | } |
413 | set_current_state(TASK_RUNNING); | 407 | set_current_state(TASK_RUNNING); |
414 | 408 | ||
415 | if (io->eopnotsupp_bits && (rw & REQ_HARDBARRIER)) { | ||
416 | rw &= ~REQ_HARDBARRIER; | ||
417 | goto retry; | ||
418 | } | ||
419 | |||
420 | if (error_bits) | 409 | if (error_bits) |
421 | *error_bits = io->error_bits; | 410 | *error_bits = io->error_bits; |
422 | 411 | ||
@@ -437,7 +426,6 @@ static int async_io(struct dm_io_client *client, unsigned int num_regions, | |||
437 | 426 | ||
438 | io = mempool_alloc(client->pool, GFP_NOIO); | 427 | io = mempool_alloc(client->pool, GFP_NOIO); |
439 | io->error_bits = 0; | 428 | io->error_bits = 0; |
440 | io->eopnotsupp_bits = 0; | ||
441 | atomic_set(&io->count, 1); /* see dispatch_io() */ | 429 | atomic_set(&io->count, 1); /* see dispatch_io() */ |
442 | io->sleeper = NULL; | 430 | io->sleeper = NULL; |
443 | io->client = client; | 431 | io->client = client; |
diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index 5a08be0222db..33420e68d153 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c | |||
@@ -300,7 +300,7 @@ static int flush_header(struct log_c *lc) | |||
300 | .count = 0, | 300 | .count = 0, |
301 | }; | 301 | }; |
302 | 302 | ||
303 | lc->io_req.bi_rw = WRITE_BARRIER; | 303 | lc->io_req.bi_rw = WRITE_FLUSH; |
304 | 304 | ||
305 | return dm_io(&lc->io_req, 1, &null_location, NULL); | 305 | return dm_io(&lc->io_req, 1, &null_location, NULL); |
306 | } | 306 | } |
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 7c081bcbc3cf..19a59b041c27 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c | |||
@@ -259,7 +259,7 @@ static int mirror_flush(struct dm_target *ti) | |||
259 | struct dm_io_region io[ms->nr_mirrors]; | 259 | struct dm_io_region io[ms->nr_mirrors]; |
260 | struct mirror *m; | 260 | struct mirror *m; |
261 | struct dm_io_request io_req = { | 261 | struct dm_io_request io_req = { |
262 | .bi_rw = WRITE_BARRIER, | 262 | .bi_rw = WRITE_FLUSH, |
263 | .mem.type = DM_IO_KMEM, | 263 | .mem.type = DM_IO_KMEM, |
264 | .mem.ptr.bvec = NULL, | 264 | .mem.ptr.bvec = NULL, |
265 | .client = ms->io_client, | 265 | .client = ms->io_client, |
@@ -629,7 +629,7 @@ static void do_write(struct mirror_set *ms, struct bio *bio) | |||
629 | struct dm_io_region io[ms->nr_mirrors], *dest = io; | 629 | struct dm_io_region io[ms->nr_mirrors], *dest = io; |
630 | struct mirror *m; | 630 | struct mirror *m; |
631 | struct dm_io_request io_req = { | 631 | struct dm_io_request io_req = { |
632 | .bi_rw = WRITE | (bio->bi_rw & WRITE_BARRIER), | 632 | .bi_rw = WRITE | (bio->bi_rw & WRITE_FLUSH_FUA), |
633 | .mem.type = DM_IO_BVEC, | 633 | .mem.type = DM_IO_BVEC, |
634 | .mem.ptr.bvec = bio->bi_io_vec + bio->bi_idx, | 634 | .mem.ptr.bvec = bio->bi_io_vec + bio->bi_idx, |
635 | .notify.fn = write_callback, | 635 | .notify.fn = write_callback, |
@@ -670,7 +670,7 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes) | |||
670 | bio_list_init(&requeue); | 670 | bio_list_init(&requeue); |
671 | 671 | ||
672 | while ((bio = bio_list_pop(writes))) { | 672 | while ((bio = bio_list_pop(writes))) { |
673 | if (unlikely(bio_empty_barrier(bio))) { | 673 | if (bio->bi_rw & REQ_FLUSH) { |
674 | bio_list_add(&sync, bio); | 674 | bio_list_add(&sync, bio); |
675 | continue; | 675 | continue; |
676 | } | 676 | } |
@@ -1203,7 +1203,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, | |||
1203 | * We need to dec pending if this was a write. | 1203 | * We need to dec pending if this was a write. |
1204 | */ | 1204 | */ |
1205 | if (rw == WRITE) { | 1205 | if (rw == WRITE) { |
1206 | if (likely(!bio_empty_barrier(bio))) | 1206 | if (!(bio->bi_rw & REQ_FLUSH)) |
1207 | dm_rh_dec(ms->rh, map_context->ll); | 1207 | dm_rh_dec(ms->rh, map_context->ll); |
1208 | return error; | 1208 | return error; |
1209 | } | 1209 | } |
diff --git a/drivers/md/dm-region-hash.c b/drivers/md/dm-region-hash.c index bd5c58b28868..dad011aed0c9 100644 --- a/drivers/md/dm-region-hash.c +++ b/drivers/md/dm-region-hash.c | |||
@@ -81,9 +81,9 @@ struct dm_region_hash { | |||
81 | struct list_head failed_recovered_regions; | 81 | struct list_head failed_recovered_regions; |
82 | 82 | ||
83 | /* | 83 | /* |
84 | * If there was a barrier failure no regions can be marked clean. | 84 | * If there was a flush failure no regions can be marked clean. |
85 | */ | 85 | */ |
86 | int barrier_failure; | 86 | int flush_failure; |
87 | 87 | ||
88 | void *context; | 88 | void *context; |
89 | sector_t target_begin; | 89 | sector_t target_begin; |
@@ -217,7 +217,7 @@ struct dm_region_hash *dm_region_hash_create( | |||
217 | INIT_LIST_HEAD(&rh->quiesced_regions); | 217 | INIT_LIST_HEAD(&rh->quiesced_regions); |
218 | INIT_LIST_HEAD(&rh->recovered_regions); | 218 | INIT_LIST_HEAD(&rh->recovered_regions); |
219 | INIT_LIST_HEAD(&rh->failed_recovered_regions); | 219 | INIT_LIST_HEAD(&rh->failed_recovered_regions); |
220 | rh->barrier_failure = 0; | 220 | rh->flush_failure = 0; |
221 | 221 | ||
222 | rh->region_pool = mempool_create_kmalloc_pool(MIN_REGIONS, | 222 | rh->region_pool = mempool_create_kmalloc_pool(MIN_REGIONS, |
223 | sizeof(struct dm_region)); | 223 | sizeof(struct dm_region)); |
@@ -399,8 +399,8 @@ void dm_rh_mark_nosync(struct dm_region_hash *rh, struct bio *bio) | |||
399 | region_t region = dm_rh_bio_to_region(rh, bio); | 399 | region_t region = dm_rh_bio_to_region(rh, bio); |
400 | int recovering = 0; | 400 | int recovering = 0; |
401 | 401 | ||
402 | if (bio_empty_barrier(bio)) { | 402 | if (bio->bi_rw & REQ_FLUSH) { |
403 | rh->barrier_failure = 1; | 403 | rh->flush_failure = 1; |
404 | return; | 404 | return; |
405 | } | 405 | } |
406 | 406 | ||
@@ -524,7 +524,7 @@ void dm_rh_inc_pending(struct dm_region_hash *rh, struct bio_list *bios) | |||
524 | struct bio *bio; | 524 | struct bio *bio; |
525 | 525 | ||
526 | for (bio = bios->head; bio; bio = bio->bi_next) { | 526 | for (bio = bios->head; bio; bio = bio->bi_next) { |
527 | if (bio_empty_barrier(bio)) | 527 | if (bio->bi_rw & REQ_FLUSH) |
528 | continue; | 528 | continue; |
529 | rh_inc(rh, dm_rh_bio_to_region(rh, bio)); | 529 | rh_inc(rh, dm_rh_bio_to_region(rh, bio)); |
530 | } | 530 | } |
@@ -555,9 +555,9 @@ void dm_rh_dec(struct dm_region_hash *rh, region_t region) | |||
555 | */ | 555 | */ |
556 | 556 | ||
557 | /* do nothing for DM_RH_NOSYNC */ | 557 | /* do nothing for DM_RH_NOSYNC */ |
558 | if (unlikely(rh->barrier_failure)) { | 558 | if (unlikely(rh->flush_failure)) { |
559 | /* | 559 | /* |
560 | * If a write barrier failed some time ago, we | 560 | * If a write flush failed some time ago, we |
561 | * don't know whether or not this write made it | 561 | * don't know whether or not this write made it |
562 | * to the disk, so we must resync the device. | 562 | * to the disk, so we must resync the device. |
563 | */ | 563 | */ |
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index cc2bdb83f9ad..0b61792a2780 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c | |||
@@ -687,7 +687,7 @@ static void persistent_commit_exception(struct dm_exception_store *store, | |||
687 | /* | 687 | /* |
688 | * Commit exceptions to disk. | 688 | * Commit exceptions to disk. |
689 | */ | 689 | */ |
690 | if (ps->valid && area_io(ps, WRITE_BARRIER)) | 690 | if (ps->valid && area_io(ps, WRITE_FLUSH_FUA)) |
691 | ps->valid = 0; | 691 | ps->valid = 0; |
692 | 692 | ||
693 | /* | 693 | /* |
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index f30f6e8d594e..53cf79d8bcbc 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c | |||
@@ -1585,7 +1585,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio, | |||
1585 | chunk_t chunk; | 1585 | chunk_t chunk; |
1586 | struct dm_snap_pending_exception *pe = NULL; | 1586 | struct dm_snap_pending_exception *pe = NULL; |
1587 | 1587 | ||
1588 | if (unlikely(bio_empty_barrier(bio))) { | 1588 | if (bio->bi_rw & REQ_FLUSH) { |
1589 | bio->bi_bdev = s->cow->bdev; | 1589 | bio->bi_bdev = s->cow->bdev; |
1590 | return DM_MAPIO_REMAPPED; | 1590 | return DM_MAPIO_REMAPPED; |
1591 | } | 1591 | } |
@@ -1689,7 +1689,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio, | |||
1689 | int r = DM_MAPIO_REMAPPED; | 1689 | int r = DM_MAPIO_REMAPPED; |
1690 | chunk_t chunk; | 1690 | chunk_t chunk; |
1691 | 1691 | ||
1692 | if (unlikely(bio_empty_barrier(bio))) { | 1692 | if (bio->bi_rw & REQ_FLUSH) { |
1693 | if (!map_context->target_request_nr) | 1693 | if (!map_context->target_request_nr) |
1694 | bio->bi_bdev = s->origin->bdev; | 1694 | bio->bi_bdev = s->origin->bdev; |
1695 | else | 1695 | else |
@@ -2133,7 +2133,7 @@ static int origin_map(struct dm_target *ti, struct bio *bio, | |||
2133 | struct dm_dev *dev = ti->private; | 2133 | struct dm_dev *dev = ti->private; |
2134 | bio->bi_bdev = dev->bdev; | 2134 | bio->bi_bdev = dev->bdev; |
2135 | 2135 | ||
2136 | if (unlikely(bio_empty_barrier(bio))) | 2136 | if (bio->bi_rw & REQ_FLUSH) |
2137 | return DM_MAPIO_REMAPPED; | 2137 | return DM_MAPIO_REMAPPED; |
2138 | 2138 | ||
2139 | /* Only tell snapshots if this is a write */ | 2139 | /* Only tell snapshots if this is a write */ |
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index c297f6da91ea..f0371b4c4fbf 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c | |||
@@ -271,7 +271,7 @@ static int stripe_map(struct dm_target *ti, struct bio *bio, | |||
271 | uint32_t stripe; | 271 | uint32_t stripe; |
272 | unsigned target_request_nr; | 272 | unsigned target_request_nr; |
273 | 273 | ||
274 | if (unlikely(bio_empty_barrier(bio))) { | 274 | if (bio->bi_rw & REQ_FLUSH) { |
275 | target_request_nr = map_context->target_request_nr; | 275 | target_request_nr = map_context->target_request_nr; |
276 | BUG_ON(target_request_nr >= sc->stripes); | 276 | BUG_ON(target_request_nr >= sc->stripes); |
277 | bio->bi_bdev = sc->stripe[target_request_nr].dev->bdev; | 277 | bio->bi_bdev = sc->stripe[target_request_nr].dev->bdev; |
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 7967eca5a2d5..7cb1352f7e7a 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
@@ -110,7 +110,6 @@ EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo); | |||
110 | #define DMF_FREEING 3 | 110 | #define DMF_FREEING 3 |
111 | #define DMF_DELETING 4 | 111 | #define DMF_DELETING 4 |
112 | #define DMF_NOFLUSH_SUSPENDING 5 | 112 | #define DMF_NOFLUSH_SUSPENDING 5 |
113 | #define DMF_QUEUE_IO_TO_THREAD 6 | ||
114 | 113 | ||
115 | /* | 114 | /* |
116 | * Work processed by per-device workqueue. | 115 | * Work processed by per-device workqueue. |
@@ -144,24 +143,9 @@ struct mapped_device { | |||
144 | spinlock_t deferred_lock; | 143 | spinlock_t deferred_lock; |
145 | 144 | ||
146 | /* | 145 | /* |
147 | * An error from the barrier request currently being processed. | 146 | * Processing queue (flush) |
148 | */ | ||
149 | int barrier_error; | ||
150 | |||
151 | /* | ||
152 | * Protect barrier_error from concurrent endio processing | ||
153 | * in request-based dm. | ||
154 | */ | ||
155 | spinlock_t barrier_error_lock; | ||
156 | |||
157 | /* | ||
158 | * Processing queue (flush/barriers) | ||
159 | */ | 147 | */ |
160 | struct workqueue_struct *wq; | 148 | struct workqueue_struct *wq; |
161 | struct work_struct barrier_work; | ||
162 | |||
163 | /* A pointer to the currently processing pre/post flush request */ | ||
164 | struct request *flush_request; | ||
165 | 149 | ||
166 | /* | 150 | /* |
167 | * The current mapping. | 151 | * The current mapping. |
@@ -200,8 +184,8 @@ struct mapped_device { | |||
200 | /* sysfs handle */ | 184 | /* sysfs handle */ |
201 | struct kobject kobj; | 185 | struct kobject kobj; |
202 | 186 | ||
203 | /* zero-length barrier that will be cloned and submitted to targets */ | 187 | /* zero-length flush that will be cloned and submitted to targets */ |
204 | struct bio barrier_bio; | 188 | struct bio flush_bio; |
205 | }; | 189 | }; |
206 | 190 | ||
207 | /* | 191 | /* |
@@ -512,7 +496,7 @@ static void end_io_acct(struct dm_io *io) | |||
512 | 496 | ||
513 | /* | 497 | /* |
514 | * After this is decremented the bio must not be touched if it is | 498 | * After this is decremented the bio must not be touched if it is |
515 | * a barrier. | 499 | * a flush. |
516 | */ | 500 | */ |
517 | dm_disk(md)->part0.in_flight[rw] = pending = | 501 | dm_disk(md)->part0.in_flight[rw] = pending = |
518 | atomic_dec_return(&md->pending[rw]); | 502 | atomic_dec_return(&md->pending[rw]); |
@@ -528,16 +512,12 @@ static void end_io_acct(struct dm_io *io) | |||
528 | */ | 512 | */ |
529 | static void queue_io(struct mapped_device *md, struct bio *bio) | 513 | static void queue_io(struct mapped_device *md, struct bio *bio) |
530 | { | 514 | { |
531 | down_write(&md->io_lock); | 515 | unsigned long flags; |
532 | 516 | ||
533 | spin_lock_irq(&md->deferred_lock); | 517 | spin_lock_irqsave(&md->deferred_lock, flags); |
534 | bio_list_add(&md->deferred, bio); | 518 | bio_list_add(&md->deferred, bio); |
535 | spin_unlock_irq(&md->deferred_lock); | 519 | spin_unlock_irqrestore(&md->deferred_lock, flags); |
536 | 520 | queue_work(md->wq, &md->work); | |
537 | if (!test_and_set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) | ||
538 | queue_work(md->wq, &md->work); | ||
539 | |||
540 | up_write(&md->io_lock); | ||
541 | } | 521 | } |
542 | 522 | ||
543 | /* | 523 | /* |
@@ -625,11 +605,9 @@ static void dec_pending(struct dm_io *io, int error) | |||
625 | * Target requested pushing back the I/O. | 605 | * Target requested pushing back the I/O. |
626 | */ | 606 | */ |
627 | spin_lock_irqsave(&md->deferred_lock, flags); | 607 | spin_lock_irqsave(&md->deferred_lock, flags); |
628 | if (__noflush_suspending(md)) { | 608 | if (__noflush_suspending(md)) |
629 | if (!(io->bio->bi_rw & REQ_HARDBARRIER)) | 609 | bio_list_add_head(&md->deferred, io->bio); |
630 | bio_list_add_head(&md->deferred, | 610 | else |
631 | io->bio); | ||
632 | } else | ||
633 | /* noflush suspend was interrupted. */ | 611 | /* noflush suspend was interrupted. */ |
634 | io->error = -EIO; | 612 | io->error = -EIO; |
635 | spin_unlock_irqrestore(&md->deferred_lock, flags); | 613 | spin_unlock_irqrestore(&md->deferred_lock, flags); |
@@ -637,32 +615,23 @@ static void dec_pending(struct dm_io *io, int error) | |||
637 | 615 | ||
638 | io_error = io->error; | 616 | io_error = io->error; |
639 | bio = io->bio; | 617 | bio = io->bio; |
618 | end_io_acct(io); | ||
619 | free_io(md, io); | ||
620 | |||
621 | if (io_error == DM_ENDIO_REQUEUE) | ||
622 | return; | ||
640 | 623 | ||
641 | if (bio->bi_rw & REQ_HARDBARRIER) { | 624 | if ((bio->bi_rw & REQ_FLUSH) && bio->bi_size) { |
642 | /* | 625 | /* |
643 | * There can be just one barrier request so we use | 626 | * Preflush done for flush with data, reissue |
644 | * a per-device variable for error reporting. | 627 | * without REQ_FLUSH. |
645 | * Note that you can't touch the bio after end_io_acct | ||
646 | * | ||
647 | * We ignore -EOPNOTSUPP for empty flush reported by | ||
648 | * underlying devices. We assume that if the device | ||
649 | * doesn't support empty barriers, it doesn't need | ||
650 | * cache flushing commands. | ||
651 | */ | 628 | */ |
652 | if (!md->barrier_error && | 629 | bio->bi_rw &= ~REQ_FLUSH; |
653 | !(bio_empty_barrier(bio) && io_error == -EOPNOTSUPP)) | 630 | queue_io(md, bio); |
654 | md->barrier_error = io_error; | ||
655 | end_io_acct(io); | ||
656 | free_io(md, io); | ||
657 | } else { | 631 | } else { |
658 | end_io_acct(io); | 632 | /* done with normal IO or empty flush */ |
659 | free_io(md, io); | 633 | trace_block_bio_complete(md->queue, bio); |
660 | 634 | bio_endio(bio, io_error); | |
661 | if (io_error != DM_ENDIO_REQUEUE) { | ||
662 | trace_block_bio_complete(md->queue, bio); | ||
663 | |||
664 | bio_endio(bio, io_error); | ||
665 | } | ||
666 | } | 635 | } |
667 | } | 636 | } |
668 | } | 637 | } |
@@ -755,23 +724,6 @@ static void end_clone_bio(struct bio *clone, int error) | |||
755 | blk_update_request(tio->orig, 0, nr_bytes); | 724 | blk_update_request(tio->orig, 0, nr_bytes); |
756 | } | 725 | } |
757 | 726 | ||
758 | static void store_barrier_error(struct mapped_device *md, int error) | ||
759 | { | ||
760 | unsigned long flags; | ||
761 | |||
762 | spin_lock_irqsave(&md->barrier_error_lock, flags); | ||
763 | /* | ||
764 | * Basically, the first error is taken, but: | ||
765 | * -EOPNOTSUPP supersedes any I/O error. | ||
766 | * Requeue request supersedes any I/O error but -EOPNOTSUPP. | ||
767 | */ | ||
768 | if (!md->barrier_error || error == -EOPNOTSUPP || | ||
769 | (md->barrier_error != -EOPNOTSUPP && | ||
770 | error == DM_ENDIO_REQUEUE)) | ||
771 | md->barrier_error = error; | ||
772 | spin_unlock_irqrestore(&md->barrier_error_lock, flags); | ||
773 | } | ||
774 | |||
775 | /* | 727 | /* |
776 | * Don't touch any member of the md after calling this function because | 728 | * Don't touch any member of the md after calling this function because |
777 | * the md may be freed in dm_put() at the end of this function. | 729 | * the md may be freed in dm_put() at the end of this function. |
@@ -809,13 +761,11 @@ static void free_rq_clone(struct request *clone) | |||
809 | static void dm_end_request(struct request *clone, int error) | 761 | static void dm_end_request(struct request *clone, int error) |
810 | { | 762 | { |
811 | int rw = rq_data_dir(clone); | 763 | int rw = rq_data_dir(clone); |
812 | int run_queue = 1; | ||
813 | bool is_barrier = clone->cmd_flags & REQ_HARDBARRIER; | ||
814 | struct dm_rq_target_io *tio = clone->end_io_data; | 764 | struct dm_rq_target_io *tio = clone->end_io_data; |
815 | struct mapped_device *md = tio->md; | 765 | struct mapped_device *md = tio->md; |
816 | struct request *rq = tio->orig; | 766 | struct request *rq = tio->orig; |
817 | 767 | ||
818 | if (rq->cmd_type == REQ_TYPE_BLOCK_PC && !is_barrier) { | 768 | if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { |
819 | rq->errors = clone->errors; | 769 | rq->errors = clone->errors; |
820 | rq->resid_len = clone->resid_len; | 770 | rq->resid_len = clone->resid_len; |
821 | 771 | ||
@@ -829,15 +779,8 @@ static void dm_end_request(struct request *clone, int error) | |||
829 | } | 779 | } |
830 | 780 | ||
831 | free_rq_clone(clone); | 781 | free_rq_clone(clone); |
832 | 782 | blk_end_request_all(rq, error); | |
833 | if (unlikely(is_barrier)) { | 783 | rq_completed(md, rw, true); |
834 | if (unlikely(error)) | ||
835 | store_barrier_error(md, error); | ||
836 | run_queue = 0; | ||
837 | } else | ||
838 | blk_end_request_all(rq, error); | ||
839 | |||
840 | rq_completed(md, rw, run_queue); | ||
841 | } | 784 | } |
842 | 785 | ||
843 | static void dm_unprep_request(struct request *rq) | 786 | static void dm_unprep_request(struct request *rq) |
@@ -862,16 +805,6 @@ void dm_requeue_unmapped_request(struct request *clone) | |||
862 | struct request_queue *q = rq->q; | 805 | struct request_queue *q = rq->q; |
863 | unsigned long flags; | 806 | unsigned long flags; |
864 | 807 | ||
865 | if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) { | ||
866 | /* | ||
867 | * Barrier clones share an original request. | ||
868 | * Leave it to dm_end_request(), which handles this special | ||
869 | * case. | ||
870 | */ | ||
871 | dm_end_request(clone, DM_ENDIO_REQUEUE); | ||
872 | return; | ||
873 | } | ||
874 | |||
875 | dm_unprep_request(rq); | 808 | dm_unprep_request(rq); |
876 | 809 | ||
877 | spin_lock_irqsave(q->queue_lock, flags); | 810 | spin_lock_irqsave(q->queue_lock, flags); |
@@ -961,19 +894,6 @@ static void dm_complete_request(struct request *clone, int error) | |||
961 | struct dm_rq_target_io *tio = clone->end_io_data; | 894 | struct dm_rq_target_io *tio = clone->end_io_data; |
962 | struct request *rq = tio->orig; | 895 | struct request *rq = tio->orig; |
963 | 896 | ||
964 | if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) { | ||
965 | /* | ||
966 | * Barrier clones share an original request. So can't use | ||
967 | * softirq_done with the original. | ||
968 | * Pass the clone to dm_done() directly in this special case. | ||
969 | * It is safe (even if clone->q->queue_lock is held here) | ||
970 | * because there is no I/O dispatching during the completion | ||
971 | * of barrier clone. | ||
972 | */ | ||
973 | dm_done(clone, error, true); | ||
974 | return; | ||
975 | } | ||
976 | |||
977 | tio->error = error; | 897 | tio->error = error; |
978 | rq->completion_data = clone; | 898 | rq->completion_data = clone; |
979 | blk_complete_request(rq); | 899 | blk_complete_request(rq); |
@@ -990,17 +910,6 @@ void dm_kill_unmapped_request(struct request *clone, int error) | |||
990 | struct dm_rq_target_io *tio = clone->end_io_data; | 910 | struct dm_rq_target_io *tio = clone->end_io_data; |
991 | struct request *rq = tio->orig; | 911 | struct request *rq = tio->orig; |
992 | 912 | ||
993 | if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) { | ||
994 | /* | ||
995 | * Barrier clones share an original request. | ||
996 | * Leave it to dm_end_request(), which handles this special | ||
997 | * case. | ||
998 | */ | ||
999 | BUG_ON(error > 0); | ||
1000 | dm_end_request(clone, error); | ||
1001 | return; | ||
1002 | } | ||
1003 | |||
1004 | rq->cmd_flags |= REQ_FAILED; | 913 | rq->cmd_flags |= REQ_FAILED; |
1005 | dm_complete_request(clone, error); | 914 | dm_complete_request(clone, error); |
1006 | } | 915 | } |
@@ -1119,7 +1028,7 @@ static void dm_bio_destructor(struct bio *bio) | |||
1119 | } | 1028 | } |
1120 | 1029 | ||
1121 | /* | 1030 | /* |
1122 | * Creates a little bio that is just does part of a bvec. | 1031 | * Creates a little bio that just does part of a bvec. |
1123 | */ | 1032 | */ |
1124 | static struct bio *split_bvec(struct bio *bio, sector_t sector, | 1033 | static struct bio *split_bvec(struct bio *bio, sector_t sector, |
1125 | unsigned short idx, unsigned int offset, | 1034 | unsigned short idx, unsigned int offset, |
@@ -1134,7 +1043,7 @@ static struct bio *split_bvec(struct bio *bio, sector_t sector, | |||
1134 | 1043 | ||
1135 | clone->bi_sector = sector; | 1044 | clone->bi_sector = sector; |
1136 | clone->bi_bdev = bio->bi_bdev; | 1045 | clone->bi_bdev = bio->bi_bdev; |
1137 | clone->bi_rw = bio->bi_rw & ~REQ_HARDBARRIER; | 1046 | clone->bi_rw = bio->bi_rw; |
1138 | clone->bi_vcnt = 1; | 1047 | clone->bi_vcnt = 1; |
1139 | clone->bi_size = to_bytes(len); | 1048 | clone->bi_size = to_bytes(len); |
1140 | clone->bi_io_vec->bv_offset = offset; | 1049 | clone->bi_io_vec->bv_offset = offset; |
@@ -1161,7 +1070,6 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector, | |||
1161 | 1070 | ||
1162 | clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs); | 1071 | clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs); |
1163 | __bio_clone(clone, bio); | 1072 | __bio_clone(clone, bio); |
1164 | clone->bi_rw &= ~REQ_HARDBARRIER; | ||
1165 | clone->bi_destructor = dm_bio_destructor; | 1073 | clone->bi_destructor = dm_bio_destructor; |
1166 | clone->bi_sector = sector; | 1074 | clone->bi_sector = sector; |
1167 | clone->bi_idx = idx; | 1075 | clone->bi_idx = idx; |
@@ -1225,16 +1133,15 @@ static void __issue_target_requests(struct clone_info *ci, struct dm_target *ti, | |||
1225 | __issue_target_request(ci, ti, request_nr, len); | 1133 | __issue_target_request(ci, ti, request_nr, len); |
1226 | } | 1134 | } |
1227 | 1135 | ||
1228 | static int __clone_and_map_empty_barrier(struct clone_info *ci) | 1136 | static int __clone_and_map_empty_flush(struct clone_info *ci) |
1229 | { | 1137 | { |
1230 | unsigned target_nr = 0; | 1138 | unsigned target_nr = 0; |
1231 | struct dm_target *ti; | 1139 | struct dm_target *ti; |
1232 | 1140 | ||
1141 | BUG_ON(bio_has_data(ci->bio)); | ||
1233 | while ((ti = dm_table_get_target(ci->map, target_nr++))) | 1142 | while ((ti = dm_table_get_target(ci->map, target_nr++))) |
1234 | __issue_target_requests(ci, ti, ti->num_flush_requests, 0); | 1143 | __issue_target_requests(ci, ti, ti->num_flush_requests, 0); |
1235 | 1144 | ||
1236 | ci->sector_count = 0; | ||
1237 | |||
1238 | return 0; | 1145 | return 0; |
1239 | } | 1146 | } |
1240 | 1147 | ||
@@ -1289,9 +1196,6 @@ static int __clone_and_map(struct clone_info *ci) | |||
1289 | sector_t len = 0, max; | 1196 | sector_t len = 0, max; |
1290 | struct dm_target_io *tio; | 1197 | struct dm_target_io *tio; |
1291 | 1198 | ||
1292 | if (unlikely(bio_empty_barrier(bio))) | ||
1293 | return __clone_and_map_empty_barrier(ci); | ||
1294 | |||
1295 | if (unlikely(bio->bi_rw & REQ_DISCARD)) | 1199 | if (unlikely(bio->bi_rw & REQ_DISCARD)) |
1296 | return __clone_and_map_discard(ci); | 1200 | return __clone_and_map_discard(ci); |
1297 | 1201 | ||
@@ -1383,16 +1287,11 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) | |||
1383 | 1287 | ||
1384 | ci.map = dm_get_live_table(md); | 1288 | ci.map = dm_get_live_table(md); |
1385 | if (unlikely(!ci.map)) { | 1289 | if (unlikely(!ci.map)) { |
1386 | if (!(bio->bi_rw & REQ_HARDBARRIER)) | 1290 | bio_io_error(bio); |
1387 | bio_io_error(bio); | ||
1388 | else | ||
1389 | if (!md->barrier_error) | ||
1390 | md->barrier_error = -EIO; | ||
1391 | return; | 1291 | return; |
1392 | } | 1292 | } |
1393 | 1293 | ||
1394 | ci.md = md; | 1294 | ci.md = md; |
1395 | ci.bio = bio; | ||
1396 | ci.io = alloc_io(md); | 1295 | ci.io = alloc_io(md); |
1397 | ci.io->error = 0; | 1296 | ci.io->error = 0; |
1398 | atomic_set(&ci.io->io_count, 1); | 1297 | atomic_set(&ci.io->io_count, 1); |
@@ -1400,14 +1299,20 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) | |||
1400 | ci.io->md = md; | 1299 | ci.io->md = md; |
1401 | spin_lock_init(&ci.io->endio_lock); | 1300 | spin_lock_init(&ci.io->endio_lock); |
1402 | ci.sector = bio->bi_sector; | 1301 | ci.sector = bio->bi_sector; |
1403 | ci.sector_count = bio_sectors(bio); | ||
1404 | if (unlikely(bio_empty_barrier(bio))) | ||
1405 | ci.sector_count = 1; | ||
1406 | ci.idx = bio->bi_idx; | 1302 | ci.idx = bio->bi_idx; |
1407 | 1303 | ||
1408 | start_io_acct(ci.io); | 1304 | start_io_acct(ci.io); |
1409 | while (ci.sector_count && !error) | 1305 | if (bio->bi_rw & REQ_FLUSH) { |
1410 | error = __clone_and_map(&ci); | 1306 | ci.bio = &ci.md->flush_bio; |
1307 | ci.sector_count = 0; | ||
1308 | error = __clone_and_map_empty_flush(&ci); | ||
1309 | /* dec_pending submits any data associated with flush */ | ||
1310 | } else { | ||
1311 | ci.bio = bio; | ||
1312 | ci.sector_count = bio_sectors(bio); | ||
1313 | while (ci.sector_count && !error) | ||
1314 | error = __clone_and_map(&ci); | ||
1315 | } | ||
1411 | 1316 | ||
1412 | /* drop the extra reference count */ | 1317 | /* drop the extra reference count */ |
1413 | dec_pending(ci.io, error); | 1318 | dec_pending(ci.io, error); |
@@ -1491,22 +1396,14 @@ static int _dm_request(struct request_queue *q, struct bio *bio) | |||
1491 | part_stat_add(cpu, &dm_disk(md)->part0, sectors[rw], bio_sectors(bio)); | 1396 | part_stat_add(cpu, &dm_disk(md)->part0, sectors[rw], bio_sectors(bio)); |
1492 | part_stat_unlock(); | 1397 | part_stat_unlock(); |
1493 | 1398 | ||
1494 | /* | 1399 | /* if we're suspended, we have to queue this io for later */ |
1495 | * If we're suspended or the thread is processing barriers | 1400 | if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) { |
1496 | * we have to queue this io for later. | ||
1497 | */ | ||
1498 | if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) || | ||
1499 | unlikely(bio->bi_rw & REQ_HARDBARRIER)) { | ||
1500 | up_read(&md->io_lock); | 1401 | up_read(&md->io_lock); |
1501 | 1402 | ||
1502 | if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) && | 1403 | if (bio_rw(bio) != READA) |
1503 | bio_rw(bio) == READA) { | 1404 | queue_io(md, bio); |
1405 | else | ||
1504 | bio_io_error(bio); | 1406 | bio_io_error(bio); |
1505 | return 0; | ||
1506 | } | ||
1507 | |||
1508 | queue_io(md, bio); | ||
1509 | |||
1510 | return 0; | 1407 | return 0; |
1511 | } | 1408 | } |
1512 | 1409 | ||
@@ -1537,14 +1434,6 @@ static int dm_request(struct request_queue *q, struct bio *bio) | |||
1537 | return _dm_request(q, bio); | 1434 | return _dm_request(q, bio); |
1538 | } | 1435 | } |
1539 | 1436 | ||
1540 | static bool dm_rq_is_flush_request(struct request *rq) | ||
1541 | { | ||
1542 | if (rq->cmd_flags & REQ_FLUSH) | ||
1543 | return true; | ||
1544 | else | ||
1545 | return false; | ||
1546 | } | ||
1547 | |||
1548 | void dm_dispatch_request(struct request *rq) | 1437 | void dm_dispatch_request(struct request *rq) |
1549 | { | 1438 | { |
1550 | int r; | 1439 | int r; |
@@ -1592,22 +1481,15 @@ static int setup_clone(struct request *clone, struct request *rq, | |||
1592 | { | 1481 | { |
1593 | int r; | 1482 | int r; |
1594 | 1483 | ||
1595 | if (dm_rq_is_flush_request(rq)) { | 1484 | r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC, |
1596 | blk_rq_init(NULL, clone); | 1485 | dm_rq_bio_constructor, tio); |
1597 | clone->cmd_type = REQ_TYPE_FS; | 1486 | if (r) |
1598 | clone->cmd_flags |= (REQ_HARDBARRIER | WRITE); | 1487 | return r; |
1599 | } else { | ||
1600 | r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC, | ||
1601 | dm_rq_bio_constructor, tio); | ||
1602 | if (r) | ||
1603 | return r; | ||
1604 | |||
1605 | clone->cmd = rq->cmd; | ||
1606 | clone->cmd_len = rq->cmd_len; | ||
1607 | clone->sense = rq->sense; | ||
1608 | clone->buffer = rq->buffer; | ||
1609 | } | ||
1610 | 1488 | ||
1489 | clone->cmd = rq->cmd; | ||
1490 | clone->cmd_len = rq->cmd_len; | ||
1491 | clone->sense = rq->sense; | ||
1492 | clone->buffer = rq->buffer; | ||
1611 | clone->end_io = end_clone_request; | 1493 | clone->end_io = end_clone_request; |
1612 | clone->end_io_data = tio; | 1494 | clone->end_io_data = tio; |
1613 | 1495 | ||
@@ -1648,9 +1530,6 @@ static int dm_prep_fn(struct request_queue *q, struct request *rq) | |||
1648 | struct mapped_device *md = q->queuedata; | 1530 | struct mapped_device *md = q->queuedata; |
1649 | struct request *clone; | 1531 | struct request *clone; |
1650 | 1532 | ||
1651 | if (unlikely(dm_rq_is_flush_request(rq))) | ||
1652 | return BLKPREP_OK; | ||
1653 | |||
1654 | if (unlikely(rq->special)) { | 1533 | if (unlikely(rq->special)) { |
1655 | DMWARN("Already has something in rq->special."); | 1534 | DMWARN("Already has something in rq->special."); |
1656 | return BLKPREP_KILL; | 1535 | return BLKPREP_KILL; |
@@ -1727,6 +1606,7 @@ static void dm_request_fn(struct request_queue *q) | |||
1727 | struct dm_table *map = dm_get_live_table(md); | 1606 | struct dm_table *map = dm_get_live_table(md); |
1728 | struct dm_target *ti; | 1607 | struct dm_target *ti; |
1729 | struct request *rq, *clone; | 1608 | struct request *rq, *clone; |
1609 | sector_t pos; | ||
1730 | 1610 | ||
1731 | /* | 1611 | /* |
1732 | * For suspend, check blk_queue_stopped() and increment | 1612 | * For suspend, check blk_queue_stopped() and increment |
@@ -1739,15 +1619,14 @@ static void dm_request_fn(struct request_queue *q) | |||
1739 | if (!rq) | 1619 | if (!rq) |
1740 | goto plug_and_out; | 1620 | goto plug_and_out; |
1741 | 1621 | ||
1742 | if (unlikely(dm_rq_is_flush_request(rq))) { | 1622 | /* always use block 0 to find the target for flushes for now */ |
1743 | BUG_ON(md->flush_request); | 1623 | pos = 0; |
1744 | md->flush_request = rq; | 1624 | if (!(rq->cmd_flags & REQ_FLUSH)) |
1745 | blk_start_request(rq); | 1625 | pos = blk_rq_pos(rq); |
1746 | queue_work(md->wq, &md->barrier_work); | 1626 | |
1747 | goto out; | 1627 | ti = dm_table_find_target(map, pos); |
1748 | } | 1628 | BUG_ON(!dm_target_is_valid(ti)); |
1749 | 1629 | ||
1750 | ti = dm_table_find_target(map, blk_rq_pos(rq)); | ||
1751 | if (ti->type->busy && ti->type->busy(ti)) | 1630 | if (ti->type->busy && ti->type->busy(ti)) |
1752 | goto plug_and_out; | 1631 | goto plug_and_out; |
1753 | 1632 | ||
@@ -1918,7 +1797,6 @@ out: | |||
1918 | static const struct block_device_operations dm_blk_dops; | 1797 | static const struct block_device_operations dm_blk_dops; |
1919 | 1798 | ||
1920 | static void dm_wq_work(struct work_struct *work); | 1799 | static void dm_wq_work(struct work_struct *work); |
1921 | static void dm_rq_barrier_work(struct work_struct *work); | ||
1922 | 1800 | ||
1923 | static void dm_init_md_queue(struct mapped_device *md) | 1801 | static void dm_init_md_queue(struct mapped_device *md) |
1924 | { | 1802 | { |
@@ -1940,6 +1818,7 @@ static void dm_init_md_queue(struct mapped_device *md) | |||
1940 | blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); | 1818 | blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); |
1941 | md->queue->unplug_fn = dm_unplug_all; | 1819 | md->queue->unplug_fn = dm_unplug_all; |
1942 | blk_queue_merge_bvec(md->queue, dm_merge_bvec); | 1820 | blk_queue_merge_bvec(md->queue, dm_merge_bvec); |
1821 | blk_queue_flush(md->queue, REQ_FLUSH | REQ_FUA); | ||
1943 | } | 1822 | } |
1944 | 1823 | ||
1945 | /* | 1824 | /* |
@@ -1972,7 +1851,6 @@ static struct mapped_device *alloc_dev(int minor) | |||
1972 | mutex_init(&md->suspend_lock); | 1851 | mutex_init(&md->suspend_lock); |
1973 | mutex_init(&md->type_lock); | 1852 | mutex_init(&md->type_lock); |
1974 | spin_lock_init(&md->deferred_lock); | 1853 | spin_lock_init(&md->deferred_lock); |
1975 | spin_lock_init(&md->barrier_error_lock); | ||
1976 | rwlock_init(&md->map_lock); | 1854 | rwlock_init(&md->map_lock); |
1977 | atomic_set(&md->holders, 1); | 1855 | atomic_set(&md->holders, 1); |
1978 | atomic_set(&md->open_count, 0); | 1856 | atomic_set(&md->open_count, 0); |
@@ -1995,7 +1873,6 @@ static struct mapped_device *alloc_dev(int minor) | |||
1995 | atomic_set(&md->pending[1], 0); | 1873 | atomic_set(&md->pending[1], 0); |
1996 | init_waitqueue_head(&md->wait); | 1874 | init_waitqueue_head(&md->wait); |
1997 | INIT_WORK(&md->work, dm_wq_work); | 1875 | INIT_WORK(&md->work, dm_wq_work); |
1998 | INIT_WORK(&md->barrier_work, dm_rq_barrier_work); | ||
1999 | init_waitqueue_head(&md->eventq); | 1876 | init_waitqueue_head(&md->eventq); |
2000 | 1877 | ||
2001 | md->disk->major = _major; | 1878 | md->disk->major = _major; |
@@ -2015,6 +1892,10 @@ static struct mapped_device *alloc_dev(int minor) | |||
2015 | if (!md->bdev) | 1892 | if (!md->bdev) |
2016 | goto bad_bdev; | 1893 | goto bad_bdev; |
2017 | 1894 | ||
1895 | bio_init(&md->flush_bio); | ||
1896 | md->flush_bio.bi_bdev = md->bdev; | ||
1897 | md->flush_bio.bi_rw = WRITE_FLUSH; | ||
1898 | |||
2018 | /* Populate the mapping, nobody knows we exist yet */ | 1899 | /* Populate the mapping, nobody knows we exist yet */ |
2019 | spin_lock(&_minor_lock); | 1900 | spin_lock(&_minor_lock); |
2020 | old_md = idr_replace(&_minor_idr, md, minor); | 1901 | old_md = idr_replace(&_minor_idr, md, minor); |
@@ -2245,7 +2126,6 @@ static int dm_init_request_based_queue(struct mapped_device *md) | |||
2245 | blk_queue_softirq_done(md->queue, dm_softirq_done); | 2126 | blk_queue_softirq_done(md->queue, dm_softirq_done); |
2246 | blk_queue_prep_rq(md->queue, dm_prep_fn); | 2127 | blk_queue_prep_rq(md->queue, dm_prep_fn); |
2247 | blk_queue_lld_busy(md->queue, dm_lld_busy); | 2128 | blk_queue_lld_busy(md->queue, dm_lld_busy); |
2248 | blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN_FLUSH); | ||
2249 | 2129 | ||
2250 | elv_register_queue(md->queue); | 2130 | elv_register_queue(md->queue); |
2251 | 2131 | ||
@@ -2406,43 +2286,6 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible) | |||
2406 | return r; | 2286 | return r; |
2407 | } | 2287 | } |
2408 | 2288 | ||
2409 | static void dm_flush(struct mapped_device *md) | ||
2410 | { | ||
2411 | dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); | ||
2412 | |||
2413 | bio_init(&md->barrier_bio); | ||
2414 | md->barrier_bio.bi_bdev = md->bdev; | ||
2415 | md->barrier_bio.bi_rw = WRITE_BARRIER; | ||
2416 | __split_and_process_bio(md, &md->barrier_bio); | ||
2417 | |||
2418 | dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); | ||
2419 | } | ||
2420 | |||
2421 | static void process_barrier(struct mapped_device *md, struct bio *bio) | ||
2422 | { | ||
2423 | md->barrier_error = 0; | ||
2424 | |||
2425 | dm_flush(md); | ||
2426 | |||
2427 | if (!bio_empty_barrier(bio)) { | ||
2428 | __split_and_process_bio(md, bio); | ||
2429 | /* | ||
2430 | * If the request isn't supported, don't waste time with | ||
2431 | * the second flush. | ||
2432 | */ | ||
2433 | if (md->barrier_error != -EOPNOTSUPP) | ||
2434 | dm_flush(md); | ||
2435 | } | ||
2436 | |||
2437 | if (md->barrier_error != DM_ENDIO_REQUEUE) | ||
2438 | bio_endio(bio, md->barrier_error); | ||
2439 | else { | ||
2440 | spin_lock_irq(&md->deferred_lock); | ||
2441 | bio_list_add_head(&md->deferred, bio); | ||
2442 | spin_unlock_irq(&md->deferred_lock); | ||
2443 | } | ||
2444 | } | ||
2445 | |||
2446 | /* | 2289 | /* |
2447 | * Process the deferred bios | 2290 | * Process the deferred bios |
2448 | */ | 2291 | */ |
@@ -2452,33 +2295,27 @@ static void dm_wq_work(struct work_struct *work) | |||
2452 | work); | 2295 | work); |
2453 | struct bio *c; | 2296 | struct bio *c; |
2454 | 2297 | ||
2455 | down_write(&md->io_lock); | 2298 | down_read(&md->io_lock); |
2456 | 2299 | ||
2457 | while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { | 2300 | while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { |
2458 | spin_lock_irq(&md->deferred_lock); | 2301 | spin_lock_irq(&md->deferred_lock); |
2459 | c = bio_list_pop(&md->deferred); | 2302 | c = bio_list_pop(&md->deferred); |
2460 | spin_unlock_irq(&md->deferred_lock); | 2303 | spin_unlock_irq(&md->deferred_lock); |
2461 | 2304 | ||
2462 | if (!c) { | 2305 | if (!c) |
2463 | clear_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags); | ||
2464 | break; | 2306 | break; |
2465 | } | ||
2466 | 2307 | ||
2467 | up_write(&md->io_lock); | 2308 | up_read(&md->io_lock); |
2468 | 2309 | ||
2469 | if (dm_request_based(md)) | 2310 | if (dm_request_based(md)) |
2470 | generic_make_request(c); | 2311 | generic_make_request(c); |
2471 | else { | 2312 | else |
2472 | if (c->bi_rw & REQ_HARDBARRIER) | 2313 | __split_and_process_bio(md, c); |
2473 | process_barrier(md, c); | ||
2474 | else | ||
2475 | __split_and_process_bio(md, c); | ||
2476 | } | ||
2477 | 2314 | ||
2478 | down_write(&md->io_lock); | 2315 | down_read(&md->io_lock); |
2479 | } | 2316 | } |
2480 | 2317 | ||
2481 | up_write(&md->io_lock); | 2318 | up_read(&md->io_lock); |
2482 | } | 2319 | } |
2483 | 2320 | ||
2484 | static void dm_queue_flush(struct mapped_device *md) | 2321 | static void dm_queue_flush(struct mapped_device *md) |
@@ -2488,73 +2325,6 @@ static void dm_queue_flush(struct mapped_device *md) | |||
2488 | queue_work(md->wq, &md->work); | 2325 | queue_work(md->wq, &md->work); |
2489 | } | 2326 | } |
2490 | 2327 | ||
2491 | static void dm_rq_set_target_request_nr(struct request *clone, unsigned request_nr) | ||
2492 | { | ||
2493 | struct dm_rq_target_io *tio = clone->end_io_data; | ||
2494 | |||
2495 | tio->info.target_request_nr = request_nr; | ||
2496 | } | ||
2497 | |||
2498 | /* Issue barrier requests to targets and wait for their completion. */ | ||
2499 | static int dm_rq_barrier(struct mapped_device *md) | ||
2500 | { | ||
2501 | int i, j; | ||
2502 | struct dm_table *map = dm_get_live_table(md); | ||
2503 | unsigned num_targets = dm_table_get_num_targets(map); | ||
2504 | struct dm_target *ti; | ||
2505 | struct request *clone; | ||
2506 | |||
2507 | md->barrier_error = 0; | ||
2508 | |||
2509 | for (i = 0; i < num_targets; i++) { | ||
2510 | ti = dm_table_get_target(map, i); | ||
2511 | for (j = 0; j < ti->num_flush_requests; j++) { | ||
2512 | clone = clone_rq(md->flush_request, md, GFP_NOIO); | ||
2513 | dm_rq_set_target_request_nr(clone, j); | ||
2514 | atomic_inc(&md->pending[rq_data_dir(clone)]); | ||
2515 | map_request(ti, clone, md); | ||
2516 | } | ||
2517 | } | ||
2518 | |||
2519 | dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); | ||
2520 | dm_table_put(map); | ||
2521 | |||
2522 | return md->barrier_error; | ||
2523 | } | ||
2524 | |||
2525 | static void dm_rq_barrier_work(struct work_struct *work) | ||
2526 | { | ||
2527 | int error; | ||
2528 | struct mapped_device *md = container_of(work, struct mapped_device, | ||
2529 | barrier_work); | ||
2530 | struct request_queue *q = md->queue; | ||
2531 | struct request *rq; | ||
2532 | unsigned long flags; | ||
2533 | |||
2534 | /* | ||
2535 | * Hold the md reference here and leave it at the last part so that | ||
2536 | * the md can't be deleted by device opener when the barrier request | ||
2537 | * completes. | ||
2538 | */ | ||
2539 | dm_get(md); | ||
2540 | |||
2541 | error = dm_rq_barrier(md); | ||
2542 | |||
2543 | rq = md->flush_request; | ||
2544 | md->flush_request = NULL; | ||
2545 | |||
2546 | if (error == DM_ENDIO_REQUEUE) { | ||
2547 | spin_lock_irqsave(q->queue_lock, flags); | ||
2548 | blk_requeue_request(q, rq); | ||
2549 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
2550 | } else | ||
2551 | blk_end_request_all(rq, error); | ||
2552 | |||
2553 | blk_run_queue(q); | ||
2554 | |||
2555 | dm_put(md); | ||
2556 | } | ||
2557 | |||
2558 | /* | 2328 | /* |
2559 | * Swap in a new table, returning the old one for the caller to destroy. | 2329 | * Swap in a new table, returning the old one for the caller to destroy. |
2560 | */ | 2330 | */ |
@@ -2677,23 +2447,17 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) | |||
2677 | * | 2447 | * |
2678 | * To get all processes out of __split_and_process_bio in dm_request, | 2448 | * To get all processes out of __split_and_process_bio in dm_request, |
2679 | * we take the write lock. To prevent any process from reentering | 2449 | * we take the write lock. To prevent any process from reentering |
2680 | * __split_and_process_bio from dm_request, we set | 2450 | * __split_and_process_bio from dm_request and quiesce the thread |
2681 | * DMF_QUEUE_IO_TO_THREAD. | 2451 | * (dm_wq_work), we set BMF_BLOCK_IO_FOR_SUSPEND and call |
2682 | * | 2452 | * flush_workqueue(md->wq). |
2683 | * To quiesce the thread (dm_wq_work), we set DMF_BLOCK_IO_FOR_SUSPEND | ||
2684 | * and call flush_workqueue(md->wq). flush_workqueue will wait until | ||
2685 | * dm_wq_work exits and DMF_BLOCK_IO_FOR_SUSPEND will prevent any | ||
2686 | * further calls to __split_and_process_bio from dm_wq_work. | ||
2687 | */ | 2453 | */ |
2688 | down_write(&md->io_lock); | 2454 | down_write(&md->io_lock); |
2689 | set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); | 2455 | set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); |
2690 | set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags); | ||
2691 | up_write(&md->io_lock); | 2456 | up_write(&md->io_lock); |
2692 | 2457 | ||
2693 | /* | 2458 | /* |
2694 | * Request-based dm uses md->wq for barrier (dm_rq_barrier_work) which | 2459 | * Stop md->queue before flushing md->wq in case request-based |
2695 | * can be kicked until md->queue is stopped. So stop md->queue before | 2460 | * dm defers requests to md->wq from md->queue. |
2696 | * flushing md->wq. | ||
2697 | */ | 2461 | */ |
2698 | if (dm_request_based(md)) | 2462 | if (dm_request_based(md)) |
2699 | stop_queue(md->queue); | 2463 | stop_queue(md->queue); |
diff --git a/drivers/md/linear.c b/drivers/md/linear.c index ba19060bcf3f..8a2f767f26d8 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c | |||
@@ -294,8 +294,8 @@ static int linear_make_request (mddev_t *mddev, struct bio *bio) | |||
294 | dev_info_t *tmp_dev; | 294 | dev_info_t *tmp_dev; |
295 | sector_t start_sector; | 295 | sector_t start_sector; |
296 | 296 | ||
297 | if (unlikely(bio->bi_rw & REQ_HARDBARRIER)) { | 297 | if (unlikely(bio->bi_rw & REQ_FLUSH)) { |
298 | md_barrier_request(mddev, bio); | 298 | md_flush_request(mddev, bio); |
299 | return 0; | 299 | return 0; |
300 | } | 300 | } |
301 | 301 | ||
diff --git a/drivers/md/md.c b/drivers/md/md.c index dbf822df942a..225815197a3d 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -227,12 +227,12 @@ static int md_make_request(struct request_queue *q, struct bio *bio) | |||
227 | return 0; | 227 | return 0; |
228 | } | 228 | } |
229 | rcu_read_lock(); | 229 | rcu_read_lock(); |
230 | if (mddev->suspended || mddev->barrier) { | 230 | if (mddev->suspended) { |
231 | DEFINE_WAIT(__wait); | 231 | DEFINE_WAIT(__wait); |
232 | for (;;) { | 232 | for (;;) { |
233 | prepare_to_wait(&mddev->sb_wait, &__wait, | 233 | prepare_to_wait(&mddev->sb_wait, &__wait, |
234 | TASK_UNINTERRUPTIBLE); | 234 | TASK_UNINTERRUPTIBLE); |
235 | if (!mddev->suspended && !mddev->barrier) | 235 | if (!mddev->suspended) |
236 | break; | 236 | break; |
237 | rcu_read_unlock(); | 237 | rcu_read_unlock(); |
238 | schedule(); | 238 | schedule(); |
@@ -283,40 +283,29 @@ EXPORT_SYMBOL_GPL(mddev_resume); | |||
283 | 283 | ||
284 | int mddev_congested(mddev_t *mddev, int bits) | 284 | int mddev_congested(mddev_t *mddev, int bits) |
285 | { | 285 | { |
286 | if (mddev->barrier) | ||
287 | return 1; | ||
288 | return mddev->suspended; | 286 | return mddev->suspended; |
289 | } | 287 | } |
290 | EXPORT_SYMBOL(mddev_congested); | 288 | EXPORT_SYMBOL(mddev_congested); |
291 | 289 | ||
292 | /* | 290 | /* |
293 | * Generic barrier handling for md | 291 | * Generic flush handling for md |
294 | */ | 292 | */ |
295 | 293 | ||
296 | #define POST_REQUEST_BARRIER ((void*)1) | 294 | static void md_end_flush(struct bio *bio, int err) |
297 | |||
298 | static void md_end_barrier(struct bio *bio, int err) | ||
299 | { | 295 | { |
300 | mdk_rdev_t *rdev = bio->bi_private; | 296 | mdk_rdev_t *rdev = bio->bi_private; |
301 | mddev_t *mddev = rdev->mddev; | 297 | mddev_t *mddev = rdev->mddev; |
302 | if (err == -EOPNOTSUPP && mddev->barrier != POST_REQUEST_BARRIER) | ||
303 | set_bit(BIO_EOPNOTSUPP, &mddev->barrier->bi_flags); | ||
304 | 298 | ||
305 | rdev_dec_pending(rdev, mddev); | 299 | rdev_dec_pending(rdev, mddev); |
306 | 300 | ||
307 | if (atomic_dec_and_test(&mddev->flush_pending)) { | 301 | if (atomic_dec_and_test(&mddev->flush_pending)) { |
308 | if (mddev->barrier == POST_REQUEST_BARRIER) { | 302 | /* The pre-request flush has finished */ |
309 | /* This was a post-request barrier */ | 303 | schedule_work(&mddev->flush_work); |
310 | mddev->barrier = NULL; | ||
311 | wake_up(&mddev->sb_wait); | ||
312 | } else | ||
313 | /* The pre-request barrier has finished */ | ||
314 | schedule_work(&mddev->barrier_work); | ||
315 | } | 304 | } |
316 | bio_put(bio); | 305 | bio_put(bio); |
317 | } | 306 | } |
318 | 307 | ||
319 | static void submit_barriers(mddev_t *mddev) | 308 | static void submit_flushes(mddev_t *mddev) |
320 | { | 309 | { |
321 | mdk_rdev_t *rdev; | 310 | mdk_rdev_t *rdev; |
322 | 311 | ||
@@ -333,60 +322,56 @@ static void submit_barriers(mddev_t *mddev) | |||
333 | atomic_inc(&rdev->nr_pending); | 322 | atomic_inc(&rdev->nr_pending); |
334 | rcu_read_unlock(); | 323 | rcu_read_unlock(); |
335 | bi = bio_alloc(GFP_KERNEL, 0); | 324 | bi = bio_alloc(GFP_KERNEL, 0); |
336 | bi->bi_end_io = md_end_barrier; | 325 | bi->bi_end_io = md_end_flush; |
337 | bi->bi_private = rdev; | 326 | bi->bi_private = rdev; |
338 | bi->bi_bdev = rdev->bdev; | 327 | bi->bi_bdev = rdev->bdev; |
339 | atomic_inc(&mddev->flush_pending); | 328 | atomic_inc(&mddev->flush_pending); |
340 | submit_bio(WRITE_BARRIER, bi); | 329 | submit_bio(WRITE_FLUSH, bi); |
341 | rcu_read_lock(); | 330 | rcu_read_lock(); |
342 | rdev_dec_pending(rdev, mddev); | 331 | rdev_dec_pending(rdev, mddev); |
343 | } | 332 | } |
344 | rcu_read_unlock(); | 333 | rcu_read_unlock(); |
345 | } | 334 | } |
346 | 335 | ||
347 | static void md_submit_barrier(struct work_struct *ws) | 336 | static void md_submit_flush_data(struct work_struct *ws) |
348 | { | 337 | { |
349 | mddev_t *mddev = container_of(ws, mddev_t, barrier_work); | 338 | mddev_t *mddev = container_of(ws, mddev_t, flush_work); |
350 | struct bio *bio = mddev->barrier; | 339 | struct bio *bio = mddev->flush_bio; |
351 | 340 | ||
352 | atomic_set(&mddev->flush_pending, 1); | 341 | atomic_set(&mddev->flush_pending, 1); |
353 | 342 | ||
354 | if (test_bit(BIO_EOPNOTSUPP, &bio->bi_flags)) | 343 | if (bio->bi_size == 0) |
355 | bio_endio(bio, -EOPNOTSUPP); | ||
356 | else if (bio->bi_size == 0) | ||
357 | /* an empty barrier - all done */ | 344 | /* an empty barrier - all done */ |
358 | bio_endio(bio, 0); | 345 | bio_endio(bio, 0); |
359 | else { | 346 | else { |
360 | bio->bi_rw &= ~REQ_HARDBARRIER; | 347 | bio->bi_rw &= ~REQ_FLUSH; |
361 | if (mddev->pers->make_request(mddev, bio)) | 348 | if (mddev->pers->make_request(mddev, bio)) |
362 | generic_make_request(bio); | 349 | generic_make_request(bio); |
363 | mddev->barrier = POST_REQUEST_BARRIER; | ||
364 | submit_barriers(mddev); | ||
365 | } | 350 | } |
366 | if (atomic_dec_and_test(&mddev->flush_pending)) { | 351 | if (atomic_dec_and_test(&mddev->flush_pending)) { |
367 | mddev->barrier = NULL; | 352 | mddev->flush_bio = NULL; |
368 | wake_up(&mddev->sb_wait); | 353 | wake_up(&mddev->sb_wait); |
369 | } | 354 | } |
370 | } | 355 | } |
371 | 356 | ||
372 | void md_barrier_request(mddev_t *mddev, struct bio *bio) | 357 | void md_flush_request(mddev_t *mddev, struct bio *bio) |
373 | { | 358 | { |
374 | spin_lock_irq(&mddev->write_lock); | 359 | spin_lock_irq(&mddev->write_lock); |
375 | wait_event_lock_irq(mddev->sb_wait, | 360 | wait_event_lock_irq(mddev->sb_wait, |
376 | !mddev->barrier, | 361 | !mddev->flush_bio, |
377 | mddev->write_lock, /*nothing*/); | 362 | mddev->write_lock, /*nothing*/); |
378 | mddev->barrier = bio; | 363 | mddev->flush_bio = bio; |
379 | spin_unlock_irq(&mddev->write_lock); | 364 | spin_unlock_irq(&mddev->write_lock); |
380 | 365 | ||
381 | atomic_set(&mddev->flush_pending, 1); | 366 | atomic_set(&mddev->flush_pending, 1); |
382 | INIT_WORK(&mddev->barrier_work, md_submit_barrier); | 367 | INIT_WORK(&mddev->flush_work, md_submit_flush_data); |
383 | 368 | ||
384 | submit_barriers(mddev); | 369 | submit_flushes(mddev); |
385 | 370 | ||
386 | if (atomic_dec_and_test(&mddev->flush_pending)) | 371 | if (atomic_dec_and_test(&mddev->flush_pending)) |
387 | schedule_work(&mddev->barrier_work); | 372 | schedule_work(&mddev->flush_work); |
388 | } | 373 | } |
389 | EXPORT_SYMBOL(md_barrier_request); | 374 | EXPORT_SYMBOL(md_flush_request); |
390 | 375 | ||
391 | /* Support for plugging. | 376 | /* Support for plugging. |
392 | * This mirrors the plugging support in request_queue, but does not | 377 | * This mirrors the plugging support in request_queue, but does not |
@@ -697,31 +682,6 @@ static void super_written(struct bio *bio, int error) | |||
697 | bio_put(bio); | 682 | bio_put(bio); |
698 | } | 683 | } |
699 | 684 | ||
700 | static void super_written_barrier(struct bio *bio, int error) | ||
701 | { | ||
702 | struct bio *bio2 = bio->bi_private; | ||
703 | mdk_rdev_t *rdev = bio2->bi_private; | ||
704 | mddev_t *mddev = rdev->mddev; | ||
705 | |||
706 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags) && | ||
707 | error == -EOPNOTSUPP) { | ||
708 | unsigned long flags; | ||
709 | /* barriers don't appear to be supported :-( */ | ||
710 | set_bit(BarriersNotsupp, &rdev->flags); | ||
711 | mddev->barriers_work = 0; | ||
712 | spin_lock_irqsave(&mddev->write_lock, flags); | ||
713 | bio2->bi_next = mddev->biolist; | ||
714 | mddev->biolist = bio2; | ||
715 | spin_unlock_irqrestore(&mddev->write_lock, flags); | ||
716 | wake_up(&mddev->sb_wait); | ||
717 | bio_put(bio); | ||
718 | } else { | ||
719 | bio_put(bio2); | ||
720 | bio->bi_private = rdev; | ||
721 | super_written(bio, error); | ||
722 | } | ||
723 | } | ||
724 | |||
725 | void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, | 685 | void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, |
726 | sector_t sector, int size, struct page *page) | 686 | sector_t sector, int size, struct page *page) |
727 | { | 687 | { |
@@ -730,51 +690,28 @@ void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, | |||
730 | * and decrement it on completion, waking up sb_wait | 690 | * and decrement it on completion, waking up sb_wait |
731 | * if zero is reached. | 691 | * if zero is reached. |
732 | * If an error occurred, call md_error | 692 | * If an error occurred, call md_error |
733 | * | ||
734 | * As we might need to resubmit the request if REQ_HARDBARRIER | ||
735 | * causes ENOTSUPP, we allocate a spare bio... | ||
736 | */ | 693 | */ |
737 | struct bio *bio = bio_alloc(GFP_NOIO, 1); | 694 | struct bio *bio = bio_alloc(GFP_NOIO, 1); |
738 | int rw = REQ_WRITE | REQ_SYNC | REQ_UNPLUG; | ||
739 | 695 | ||
740 | bio->bi_bdev = rdev->bdev; | 696 | bio->bi_bdev = rdev->bdev; |
741 | bio->bi_sector = sector; | 697 | bio->bi_sector = sector; |
742 | bio_add_page(bio, page, size, 0); | 698 | bio_add_page(bio, page, size, 0); |
743 | bio->bi_private = rdev; | 699 | bio->bi_private = rdev; |
744 | bio->bi_end_io = super_written; | 700 | bio->bi_end_io = super_written; |
745 | bio->bi_rw = rw; | ||
746 | 701 | ||
747 | atomic_inc(&mddev->pending_writes); | 702 | atomic_inc(&mddev->pending_writes); |
748 | if (!test_bit(BarriersNotsupp, &rdev->flags)) { | 703 | submit_bio(REQ_WRITE | REQ_SYNC | REQ_UNPLUG | REQ_FLUSH | REQ_FUA, |
749 | struct bio *rbio; | 704 | bio); |
750 | rw |= REQ_HARDBARRIER; | ||
751 | rbio = bio_clone(bio, GFP_NOIO); | ||
752 | rbio->bi_private = bio; | ||
753 | rbio->bi_end_io = super_written_barrier; | ||
754 | submit_bio(rw, rbio); | ||
755 | } else | ||
756 | submit_bio(rw, bio); | ||
757 | } | 705 | } |
758 | 706 | ||
759 | void md_super_wait(mddev_t *mddev) | 707 | void md_super_wait(mddev_t *mddev) |
760 | { | 708 | { |
761 | /* wait for all superblock writes that were scheduled to complete. | 709 | /* wait for all superblock writes that were scheduled to complete */ |
762 | * if any had to be retried (due to BARRIER problems), retry them | ||
763 | */ | ||
764 | DEFINE_WAIT(wq); | 710 | DEFINE_WAIT(wq); |
765 | for(;;) { | 711 | for(;;) { |
766 | prepare_to_wait(&mddev->sb_wait, &wq, TASK_UNINTERRUPTIBLE); | 712 | prepare_to_wait(&mddev->sb_wait, &wq, TASK_UNINTERRUPTIBLE); |
767 | if (atomic_read(&mddev->pending_writes)==0) | 713 | if (atomic_read(&mddev->pending_writes)==0) |
768 | break; | 714 | break; |
769 | while (mddev->biolist) { | ||
770 | struct bio *bio; | ||
771 | spin_lock_irq(&mddev->write_lock); | ||
772 | bio = mddev->biolist; | ||
773 | mddev->biolist = bio->bi_next ; | ||
774 | bio->bi_next = NULL; | ||
775 | spin_unlock_irq(&mddev->write_lock); | ||
776 | submit_bio(bio->bi_rw, bio); | ||
777 | } | ||
778 | schedule(); | 715 | schedule(); |
779 | } | 716 | } |
780 | finish_wait(&mddev->sb_wait, &wq); | 717 | finish_wait(&mddev->sb_wait, &wq); |
@@ -1071,7 +1008,6 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1071 | clear_bit(Faulty, &rdev->flags); | 1008 | clear_bit(Faulty, &rdev->flags); |
1072 | clear_bit(In_sync, &rdev->flags); | 1009 | clear_bit(In_sync, &rdev->flags); |
1073 | clear_bit(WriteMostly, &rdev->flags); | 1010 | clear_bit(WriteMostly, &rdev->flags); |
1074 | clear_bit(BarriersNotsupp, &rdev->flags); | ||
1075 | 1011 | ||
1076 | if (mddev->raid_disks == 0) { | 1012 | if (mddev->raid_disks == 0) { |
1077 | mddev->major_version = 0; | 1013 | mddev->major_version = 0; |
@@ -1486,7 +1422,6 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1486 | clear_bit(Faulty, &rdev->flags); | 1422 | clear_bit(Faulty, &rdev->flags); |
1487 | clear_bit(In_sync, &rdev->flags); | 1423 | clear_bit(In_sync, &rdev->flags); |
1488 | clear_bit(WriteMostly, &rdev->flags); | 1424 | clear_bit(WriteMostly, &rdev->flags); |
1489 | clear_bit(BarriersNotsupp, &rdev->flags); | ||
1490 | 1425 | ||
1491 | if (mddev->raid_disks == 0) { | 1426 | if (mddev->raid_disks == 0) { |
1492 | mddev->major_version = 1; | 1427 | mddev->major_version = 1; |
@@ -4505,7 +4440,6 @@ int md_run(mddev_t *mddev) | |||
4505 | /* may be over-ridden by personality */ | 4440 | /* may be over-ridden by personality */ |
4506 | mddev->resync_max_sectors = mddev->dev_sectors; | 4441 | mddev->resync_max_sectors = mddev->dev_sectors; |
4507 | 4442 | ||
4508 | mddev->barriers_work = 1; | ||
4509 | mddev->ok_start_degraded = start_dirty_degraded; | 4443 | mddev->ok_start_degraded = start_dirty_degraded; |
4510 | 4444 | ||
4511 | if (start_readonly && mddev->ro == 0) | 4445 | if (start_readonly && mddev->ro == 0) |
@@ -4684,7 +4618,6 @@ static void md_clean(mddev_t *mddev) | |||
4684 | mddev->recovery = 0; | 4618 | mddev->recovery = 0; |
4685 | mddev->in_sync = 0; | 4619 | mddev->in_sync = 0; |
4686 | mddev->degraded = 0; | 4620 | mddev->degraded = 0; |
4687 | mddev->barriers_work = 0; | ||
4688 | mddev->safemode = 0; | 4621 | mddev->safemode = 0; |
4689 | mddev->bitmap_info.offset = 0; | 4622 | mddev->bitmap_info.offset = 0; |
4690 | mddev->bitmap_info.default_offset = 0; | 4623 | mddev->bitmap_info.default_offset = 0; |
diff --git a/drivers/md/md.h b/drivers/md/md.h index 3931299788dc..112a2c32db0c 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h | |||
@@ -87,7 +87,6 @@ struct mdk_rdev_s | |||
87 | #define Faulty 1 /* device is known to have a fault */ | 87 | #define Faulty 1 /* device is known to have a fault */ |
88 | #define In_sync 2 /* device is in_sync with rest of array */ | 88 | #define In_sync 2 /* device is in_sync with rest of array */ |
89 | #define WriteMostly 4 /* Avoid reading if at all possible */ | 89 | #define WriteMostly 4 /* Avoid reading if at all possible */ |
90 | #define BarriersNotsupp 5 /* REQ_HARDBARRIER is not supported */ | ||
91 | #define AllReserved 6 /* If whole device is reserved for | 90 | #define AllReserved 6 /* If whole device is reserved for |
92 | * one array */ | 91 | * one array */ |
93 | #define AutoDetected 7 /* added by auto-detect */ | 92 | #define AutoDetected 7 /* added by auto-detect */ |
@@ -273,13 +272,6 @@ struct mddev_s | |||
273 | int degraded; /* whether md should consider | 272 | int degraded; /* whether md should consider |
274 | * adding a spare | 273 | * adding a spare |
275 | */ | 274 | */ |
276 | int barriers_work; /* initialised to true, cleared as soon | ||
277 | * as a barrier request to slave | ||
278 | * fails. Only supported | ||
279 | */ | ||
280 | struct bio *biolist; /* bios that need to be retried | ||
281 | * because REQ_HARDBARRIER is not supported | ||
282 | */ | ||
283 | 275 | ||
284 | atomic_t recovery_active; /* blocks scheduled, but not written */ | 276 | atomic_t recovery_active; /* blocks scheduled, but not written */ |
285 | wait_queue_head_t recovery_wait; | 277 | wait_queue_head_t recovery_wait; |
@@ -339,16 +331,13 @@ struct mddev_s | |||
339 | struct attribute_group *to_remove; | 331 | struct attribute_group *to_remove; |
340 | struct plug_handle *plug; /* if used by personality */ | 332 | struct plug_handle *plug; /* if used by personality */ |
341 | 333 | ||
342 | /* Generic barrier handling. | 334 | /* Generic flush handling. |
343 | * If there is a pending barrier request, all other | 335 | * The last to finish preflush schedules a worker to submit |
344 | * writes are blocked while the devices are flushed. | 336 | * the rest of the request (without the REQ_FLUSH flag). |
345 | * The last to finish a flush schedules a worker to | ||
346 | * submit the barrier request (without the barrier flag), | ||
347 | * then submit more flush requests. | ||
348 | */ | 337 | */ |
349 | struct bio *barrier; | 338 | struct bio *flush_bio; |
350 | atomic_t flush_pending; | 339 | atomic_t flush_pending; |
351 | struct work_struct barrier_work; | 340 | struct work_struct flush_work; |
352 | struct work_struct event_work; /* used by dm to report failure event */ | 341 | struct work_struct event_work; /* used by dm to report failure event */ |
353 | }; | 342 | }; |
354 | 343 | ||
@@ -502,7 +491,7 @@ extern void md_done_sync(mddev_t *mddev, int blocks, int ok); | |||
502 | extern void md_error(mddev_t *mddev, mdk_rdev_t *rdev); | 491 | extern void md_error(mddev_t *mddev, mdk_rdev_t *rdev); |
503 | 492 | ||
504 | extern int mddev_congested(mddev_t *mddev, int bits); | 493 | extern int mddev_congested(mddev_t *mddev, int bits); |
505 | extern void md_barrier_request(mddev_t *mddev, struct bio *bio); | 494 | extern void md_flush_request(mddev_t *mddev, struct bio *bio); |
506 | extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, | 495 | extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, |
507 | sector_t sector, int size, struct page *page); | 496 | sector_t sector, int size, struct page *page); |
508 | extern void md_super_wait(mddev_t *mddev); | 497 | extern void md_super_wait(mddev_t *mddev); |
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index 0307d217e7a4..6d7ddf32ef2e 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c | |||
@@ -142,8 +142,8 @@ static int multipath_make_request(mddev_t *mddev, struct bio * bio) | |||
142 | struct multipath_bh * mp_bh; | 142 | struct multipath_bh * mp_bh; |
143 | struct multipath_info *multipath; | 143 | struct multipath_info *multipath; |
144 | 144 | ||
145 | if (unlikely(bio->bi_rw & REQ_HARDBARRIER)) { | 145 | if (unlikely(bio->bi_rw & REQ_FLUSH)) { |
146 | md_barrier_request(mddev, bio); | 146 | md_flush_request(mddev, bio); |
147 | return 0; | 147 | return 0; |
148 | } | 148 | } |
149 | 149 | ||
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 6f7af46d623c..a39f4c355e55 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c | |||
@@ -483,8 +483,8 @@ static int raid0_make_request(mddev_t *mddev, struct bio *bio) | |||
483 | struct strip_zone *zone; | 483 | struct strip_zone *zone; |
484 | mdk_rdev_t *tmp_dev; | 484 | mdk_rdev_t *tmp_dev; |
485 | 485 | ||
486 | if (unlikely(bio->bi_rw & REQ_HARDBARRIER)) { | 486 | if (unlikely(bio->bi_rw & REQ_FLUSH)) { |
487 | md_barrier_request(mddev, bio); | 487 | md_flush_request(mddev, bio); |
488 | return 0; | 488 | return 0; |
489 | } | 489 | } |
490 | 490 | ||
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 0b830bbe1d8b..378a25894c57 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -319,83 +319,74 @@ static void raid1_end_write_request(struct bio *bio, int error) | |||
319 | if (r1_bio->bios[mirror] == bio) | 319 | if (r1_bio->bios[mirror] == bio) |
320 | break; | 320 | break; |
321 | 321 | ||
322 | if (error == -EOPNOTSUPP && test_bit(R1BIO_Barrier, &r1_bio->state)) { | 322 | /* |
323 | set_bit(BarriersNotsupp, &conf->mirrors[mirror].rdev->flags); | 323 | * 'one mirror IO has finished' event handler: |
324 | set_bit(R1BIO_BarrierRetry, &r1_bio->state); | 324 | */ |
325 | r1_bio->mddev->barriers_work = 0; | 325 | r1_bio->bios[mirror] = NULL; |
326 | /* Don't rdev_dec_pending in this branch - keep it for the retry */ | 326 | to_put = bio; |
327 | } else { | 327 | if (!uptodate) { |
328 | md_error(r1_bio->mddev, conf->mirrors[mirror].rdev); | ||
329 | /* an I/O failed, we can't clear the bitmap */ | ||
330 | set_bit(R1BIO_Degraded, &r1_bio->state); | ||
331 | } else | ||
328 | /* | 332 | /* |
329 | * this branch is our 'one mirror IO has finished' event handler: | 333 | * Set R1BIO_Uptodate in our master bio, so that we |
334 | * will return a good error code for to the higher | ||
335 | * levels even if IO on some other mirrored buffer | ||
336 | * fails. | ||
337 | * | ||
338 | * The 'master' represents the composite IO operation | ||
339 | * to user-side. So if something waits for IO, then it | ||
340 | * will wait for the 'master' bio. | ||
330 | */ | 341 | */ |
331 | r1_bio->bios[mirror] = NULL; | 342 | set_bit(R1BIO_Uptodate, &r1_bio->state); |
332 | to_put = bio; | 343 | |
333 | if (!uptodate) { | 344 | update_head_pos(mirror, r1_bio); |
334 | md_error(r1_bio->mddev, conf->mirrors[mirror].rdev); | 345 | |
335 | /* an I/O failed, we can't clear the bitmap */ | 346 | if (behind) { |
336 | set_bit(R1BIO_Degraded, &r1_bio->state); | 347 | if (test_bit(WriteMostly, &conf->mirrors[mirror].rdev->flags)) |
337 | } else | 348 | atomic_dec(&r1_bio->behind_remaining); |
338 | /* | 349 | |
339 | * Set R1BIO_Uptodate in our master bio, so that | 350 | /* |
340 | * we will return a good error code for to the higher | 351 | * In behind mode, we ACK the master bio once the I/O |
341 | * levels even if IO on some other mirrored buffer fails. | 352 | * has safely reached all non-writemostly |
342 | * | 353 | * disks. Setting the Returned bit ensures that this |
343 | * The 'master' represents the composite IO operation to | 354 | * gets done only once -- we don't ever want to return |
344 | * user-side. So if something waits for IO, then it will | 355 | * -EIO here, instead we'll wait |
345 | * wait for the 'master' bio. | 356 | */ |
346 | */ | 357 | if (atomic_read(&r1_bio->behind_remaining) >= (atomic_read(&r1_bio->remaining)-1) && |
347 | set_bit(R1BIO_Uptodate, &r1_bio->state); | 358 | test_bit(R1BIO_Uptodate, &r1_bio->state)) { |
348 | 359 | /* Maybe we can return now */ | |
349 | update_head_pos(mirror, r1_bio); | 360 | if (!test_and_set_bit(R1BIO_Returned, &r1_bio->state)) { |
350 | 361 | struct bio *mbio = r1_bio->master_bio; | |
351 | if (behind) { | 362 | PRINTK(KERN_DEBUG "raid1: behind end write sectors %llu-%llu\n", |
352 | if (test_bit(WriteMostly, &conf->mirrors[mirror].rdev->flags)) | 363 | (unsigned long long) mbio->bi_sector, |
353 | atomic_dec(&r1_bio->behind_remaining); | 364 | (unsigned long long) mbio->bi_sector + |
354 | 365 | (mbio->bi_size >> 9) - 1); | |
355 | /* In behind mode, we ACK the master bio once the I/O has safely | 366 | bio_endio(mbio, 0); |
356 | * reached all non-writemostly disks. Setting the Returned bit | ||
357 | * ensures that this gets done only once -- we don't ever want to | ||
358 | * return -EIO here, instead we'll wait */ | ||
359 | |||
360 | if (atomic_read(&r1_bio->behind_remaining) >= (atomic_read(&r1_bio->remaining)-1) && | ||
361 | test_bit(R1BIO_Uptodate, &r1_bio->state)) { | ||
362 | /* Maybe we can return now */ | ||
363 | if (!test_and_set_bit(R1BIO_Returned, &r1_bio->state)) { | ||
364 | struct bio *mbio = r1_bio->master_bio; | ||
365 | PRINTK(KERN_DEBUG "raid1: behind end write sectors %llu-%llu\n", | ||
366 | (unsigned long long) mbio->bi_sector, | ||
367 | (unsigned long long) mbio->bi_sector + | ||
368 | (mbio->bi_size >> 9) - 1); | ||
369 | bio_endio(mbio, 0); | ||
370 | } | ||
371 | } | 367 | } |
372 | } | 368 | } |
373 | rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev); | ||
374 | } | 369 | } |
370 | rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev); | ||
371 | |||
375 | /* | 372 | /* |
376 | * | ||
377 | * Let's see if all mirrored write operations have finished | 373 | * Let's see if all mirrored write operations have finished |
378 | * already. | 374 | * already. |
379 | */ | 375 | */ |
380 | if (atomic_dec_and_test(&r1_bio->remaining)) { | 376 | if (atomic_dec_and_test(&r1_bio->remaining)) { |
381 | if (test_bit(R1BIO_BarrierRetry, &r1_bio->state)) | 377 | if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { |
382 | reschedule_retry(r1_bio); | 378 | /* free extra copy of the data pages */ |
383 | else { | 379 | int i = bio->bi_vcnt; |
384 | /* it really is the end of this request */ | 380 | while (i--) |
385 | if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { | 381 | safe_put_page(bio->bi_io_vec[i].bv_page); |
386 | /* free extra copy of the data pages */ | ||
387 | int i = bio->bi_vcnt; | ||
388 | while (i--) | ||
389 | safe_put_page(bio->bi_io_vec[i].bv_page); | ||
390 | } | ||
391 | /* clear the bitmap if all writes complete successfully */ | ||
392 | bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector, | ||
393 | r1_bio->sectors, | ||
394 | !test_bit(R1BIO_Degraded, &r1_bio->state), | ||
395 | behind); | ||
396 | md_write_end(r1_bio->mddev); | ||
397 | raid_end_bio_io(r1_bio); | ||
398 | } | 382 | } |
383 | /* clear the bitmap if all writes complete successfully */ | ||
384 | bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector, | ||
385 | r1_bio->sectors, | ||
386 | !test_bit(R1BIO_Degraded, &r1_bio->state), | ||
387 | behind); | ||
388 | md_write_end(r1_bio->mddev); | ||
389 | raid_end_bio_io(r1_bio); | ||
399 | } | 390 | } |
400 | 391 | ||
401 | if (to_put) | 392 | if (to_put) |
@@ -788,16 +779,13 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
788 | struct page **behind_pages = NULL; | 779 | struct page **behind_pages = NULL; |
789 | const int rw = bio_data_dir(bio); | 780 | const int rw = bio_data_dir(bio); |
790 | const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); | 781 | const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); |
791 | unsigned long do_barriers; | 782 | const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA)); |
792 | mdk_rdev_t *blocked_rdev; | 783 | mdk_rdev_t *blocked_rdev; |
793 | 784 | ||
794 | /* | 785 | /* |
795 | * Register the new request and wait if the reconstruction | 786 | * Register the new request and wait if the reconstruction |
796 | * thread has put up a bar for new requests. | 787 | * thread has put up a bar for new requests. |
797 | * Continue immediately if no resync is active currently. | 788 | * Continue immediately if no resync is active currently. |
798 | * We test barriers_work *after* md_write_start as md_write_start | ||
799 | * may cause the first superblock write, and that will check out | ||
800 | * if barriers work. | ||
801 | */ | 789 | */ |
802 | 790 | ||
803 | md_write_start(mddev, bio); /* wait on superblock update early */ | 791 | md_write_start(mddev, bio); /* wait on superblock update early */ |
@@ -821,13 +809,6 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
821 | } | 809 | } |
822 | finish_wait(&conf->wait_barrier, &w); | 810 | finish_wait(&conf->wait_barrier, &w); |
823 | } | 811 | } |
824 | if (unlikely(!mddev->barriers_work && | ||
825 | (bio->bi_rw & REQ_HARDBARRIER))) { | ||
826 | if (rw == WRITE) | ||
827 | md_write_end(mddev); | ||
828 | bio_endio(bio, -EOPNOTSUPP); | ||
829 | return 0; | ||
830 | } | ||
831 | 812 | ||
832 | wait_barrier(conf); | 813 | wait_barrier(conf); |
833 | 814 | ||
@@ -959,10 +940,6 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
959 | atomic_set(&r1_bio->remaining, 0); | 940 | atomic_set(&r1_bio->remaining, 0); |
960 | atomic_set(&r1_bio->behind_remaining, 0); | 941 | atomic_set(&r1_bio->behind_remaining, 0); |
961 | 942 | ||
962 | do_barriers = bio->bi_rw & REQ_HARDBARRIER; | ||
963 | if (do_barriers) | ||
964 | set_bit(R1BIO_Barrier, &r1_bio->state); | ||
965 | |||
966 | bio_list_init(&bl); | 943 | bio_list_init(&bl); |
967 | for (i = 0; i < disks; i++) { | 944 | for (i = 0; i < disks; i++) { |
968 | struct bio *mbio; | 945 | struct bio *mbio; |
@@ -975,7 +952,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
975 | mbio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset; | 952 | mbio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset; |
976 | mbio->bi_bdev = conf->mirrors[i].rdev->bdev; | 953 | mbio->bi_bdev = conf->mirrors[i].rdev->bdev; |
977 | mbio->bi_end_io = raid1_end_write_request; | 954 | mbio->bi_end_io = raid1_end_write_request; |
978 | mbio->bi_rw = WRITE | do_barriers | do_sync; | 955 | mbio->bi_rw = WRITE | do_flush_fua | do_sync; |
979 | mbio->bi_private = r1_bio; | 956 | mbio->bi_private = r1_bio; |
980 | 957 | ||
981 | if (behind_pages) { | 958 | if (behind_pages) { |
@@ -1634,41 +1611,6 @@ static void raid1d(mddev_t *mddev) | |||
1634 | if (test_bit(R1BIO_IsSync, &r1_bio->state)) { | 1611 | if (test_bit(R1BIO_IsSync, &r1_bio->state)) { |
1635 | sync_request_write(mddev, r1_bio); | 1612 | sync_request_write(mddev, r1_bio); |
1636 | unplug = 1; | 1613 | unplug = 1; |
1637 | } else if (test_bit(R1BIO_BarrierRetry, &r1_bio->state)) { | ||
1638 | /* some requests in the r1bio were REQ_HARDBARRIER | ||
1639 | * requests which failed with -EOPNOTSUPP. Hohumm.. | ||
1640 | * Better resubmit without the barrier. | ||
1641 | * We know which devices to resubmit for, because | ||
1642 | * all others have had their bios[] entry cleared. | ||
1643 | * We already have a nr_pending reference on these rdevs. | ||
1644 | */ | ||
1645 | int i; | ||
1646 | const unsigned long do_sync = (r1_bio->master_bio->bi_rw & REQ_SYNC); | ||
1647 | clear_bit(R1BIO_BarrierRetry, &r1_bio->state); | ||
1648 | clear_bit(R1BIO_Barrier, &r1_bio->state); | ||
1649 | for (i=0; i < conf->raid_disks; i++) | ||
1650 | if (r1_bio->bios[i]) | ||
1651 | atomic_inc(&r1_bio->remaining); | ||
1652 | for (i=0; i < conf->raid_disks; i++) | ||
1653 | if (r1_bio->bios[i]) { | ||
1654 | struct bio_vec *bvec; | ||
1655 | int j; | ||
1656 | |||
1657 | bio = bio_clone(r1_bio->master_bio, GFP_NOIO); | ||
1658 | /* copy pages from the failed bio, as | ||
1659 | * this might be a write-behind device */ | ||
1660 | __bio_for_each_segment(bvec, bio, j, 0) | ||
1661 | bvec->bv_page = bio_iovec_idx(r1_bio->bios[i], j)->bv_page; | ||
1662 | bio_put(r1_bio->bios[i]); | ||
1663 | bio->bi_sector = r1_bio->sector + | ||
1664 | conf->mirrors[i].rdev->data_offset; | ||
1665 | bio->bi_bdev = conf->mirrors[i].rdev->bdev; | ||
1666 | bio->bi_end_io = raid1_end_write_request; | ||
1667 | bio->bi_rw = WRITE | do_sync; | ||
1668 | bio->bi_private = r1_bio; | ||
1669 | r1_bio->bios[i] = bio; | ||
1670 | generic_make_request(bio); | ||
1671 | } | ||
1672 | } else { | 1614 | } else { |
1673 | int disk; | 1615 | int disk; |
1674 | 1616 | ||
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h index 5f2d443ae28a..adf8cfd73313 100644 --- a/drivers/md/raid1.h +++ b/drivers/md/raid1.h | |||
@@ -117,8 +117,6 @@ struct r1bio_s { | |||
117 | #define R1BIO_IsSync 1 | 117 | #define R1BIO_IsSync 1 |
118 | #define R1BIO_Degraded 2 | 118 | #define R1BIO_Degraded 2 |
119 | #define R1BIO_BehindIO 3 | 119 | #define R1BIO_BehindIO 3 |
120 | #define R1BIO_Barrier 4 | ||
121 | #define R1BIO_BarrierRetry 5 | ||
122 | /* For write-behind requests, we call bi_end_io when | 120 | /* For write-behind requests, we call bi_end_io when |
123 | * the last non-write-behind device completes, providing | 121 | * the last non-write-behind device completes, providing |
124 | * any write was successful. Otherwise we call when | 122 | * any write was successful. Otherwise we call when |
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 84718383124d..f0d082f749be 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -800,12 +800,13 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
800 | int chunk_sects = conf->chunk_mask + 1; | 800 | int chunk_sects = conf->chunk_mask + 1; |
801 | const int rw = bio_data_dir(bio); | 801 | const int rw = bio_data_dir(bio); |
802 | const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); | 802 | const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); |
803 | const unsigned long do_fua = (bio->bi_rw & REQ_FUA); | ||
803 | struct bio_list bl; | 804 | struct bio_list bl; |
804 | unsigned long flags; | 805 | unsigned long flags; |
805 | mdk_rdev_t *blocked_rdev; | 806 | mdk_rdev_t *blocked_rdev; |
806 | 807 | ||
807 | if (unlikely(bio->bi_rw & REQ_HARDBARRIER)) { | 808 | if (unlikely(bio->bi_rw & REQ_FLUSH)) { |
808 | md_barrier_request(mddev, bio); | 809 | md_flush_request(mddev, bio); |
809 | return 0; | 810 | return 0; |
810 | } | 811 | } |
811 | 812 | ||
@@ -965,7 +966,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
965 | conf->mirrors[d].rdev->data_offset; | 966 | conf->mirrors[d].rdev->data_offset; |
966 | mbio->bi_bdev = conf->mirrors[d].rdev->bdev; | 967 | mbio->bi_bdev = conf->mirrors[d].rdev->bdev; |
967 | mbio->bi_end_io = raid10_end_write_request; | 968 | mbio->bi_end_io = raid10_end_write_request; |
968 | mbio->bi_rw = WRITE | do_sync; | 969 | mbio->bi_rw = WRITE | do_sync | do_fua; |
969 | mbio->bi_private = r10_bio; | 970 | mbio->bi_private = r10_bio; |
970 | 971 | ||
971 | atomic_inc(&r10_bio->remaining); | 972 | atomic_inc(&r10_bio->remaining); |
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 69b0a169e43d..31140d1259dc 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -506,9 +506,12 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) | |||
506 | int rw; | 506 | int rw; |
507 | struct bio *bi; | 507 | struct bio *bi; |
508 | mdk_rdev_t *rdev; | 508 | mdk_rdev_t *rdev; |
509 | if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags)) | 509 | if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags)) { |
510 | rw = WRITE; | 510 | if (test_and_clear_bit(R5_WantFUA, &sh->dev[i].flags)) |
511 | else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags)) | 511 | rw = WRITE_FUA; |
512 | else | ||
513 | rw = WRITE; | ||
514 | } else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags)) | ||
512 | rw = READ; | 515 | rw = READ; |
513 | else | 516 | else |
514 | continue; | 517 | continue; |
@@ -1031,6 +1034,8 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) | |||
1031 | 1034 | ||
1032 | while (wbi && wbi->bi_sector < | 1035 | while (wbi && wbi->bi_sector < |
1033 | dev->sector + STRIPE_SECTORS) { | 1036 | dev->sector + STRIPE_SECTORS) { |
1037 | if (wbi->bi_rw & REQ_FUA) | ||
1038 | set_bit(R5_WantFUA, &dev->flags); | ||
1034 | tx = async_copy_data(1, wbi, dev->page, | 1039 | tx = async_copy_data(1, wbi, dev->page, |
1035 | dev->sector, tx); | 1040 | dev->sector, tx); |
1036 | wbi = r5_next_bio(wbi, dev->sector); | 1041 | wbi = r5_next_bio(wbi, dev->sector); |
@@ -1048,15 +1053,22 @@ static void ops_complete_reconstruct(void *stripe_head_ref) | |||
1048 | int pd_idx = sh->pd_idx; | 1053 | int pd_idx = sh->pd_idx; |
1049 | int qd_idx = sh->qd_idx; | 1054 | int qd_idx = sh->qd_idx; |
1050 | int i; | 1055 | int i; |
1056 | bool fua = false; | ||
1051 | 1057 | ||
1052 | pr_debug("%s: stripe %llu\n", __func__, | 1058 | pr_debug("%s: stripe %llu\n", __func__, |
1053 | (unsigned long long)sh->sector); | 1059 | (unsigned long long)sh->sector); |
1054 | 1060 | ||
1061 | for (i = disks; i--; ) | ||
1062 | fua |= test_bit(R5_WantFUA, &sh->dev[i].flags); | ||
1063 | |||
1055 | for (i = disks; i--; ) { | 1064 | for (i = disks; i--; ) { |
1056 | struct r5dev *dev = &sh->dev[i]; | 1065 | struct r5dev *dev = &sh->dev[i]; |
1057 | 1066 | ||
1058 | if (dev->written || i == pd_idx || i == qd_idx) | 1067 | if (dev->written || i == pd_idx || i == qd_idx) { |
1059 | set_bit(R5_UPTODATE, &dev->flags); | 1068 | set_bit(R5_UPTODATE, &dev->flags); |
1069 | if (fua) | ||
1070 | set_bit(R5_WantFUA, &dev->flags); | ||
1071 | } | ||
1060 | } | 1072 | } |
1061 | 1073 | ||
1062 | if (sh->reconstruct_state == reconstruct_state_drain_run) | 1074 | if (sh->reconstruct_state == reconstruct_state_drain_run) |
@@ -3281,7 +3293,7 @@ static void handle_stripe5(struct stripe_head *sh) | |||
3281 | 3293 | ||
3282 | if (dec_preread_active) { | 3294 | if (dec_preread_active) { |
3283 | /* We delay this until after ops_run_io so that if make_request | 3295 | /* We delay this until after ops_run_io so that if make_request |
3284 | * is waiting on a barrier, it won't continue until the writes | 3296 | * is waiting on a flush, it won't continue until the writes |
3285 | * have actually been submitted. | 3297 | * have actually been submitted. |
3286 | */ | 3298 | */ |
3287 | atomic_dec(&conf->preread_active_stripes); | 3299 | atomic_dec(&conf->preread_active_stripes); |
@@ -3583,7 +3595,7 @@ static void handle_stripe6(struct stripe_head *sh) | |||
3583 | 3595 | ||
3584 | if (dec_preread_active) { | 3596 | if (dec_preread_active) { |
3585 | /* We delay this until after ops_run_io so that if make_request | 3597 | /* We delay this until after ops_run_io so that if make_request |
3586 | * is waiting on a barrier, it won't continue until the writes | 3598 | * is waiting on a flush, it won't continue until the writes |
3587 | * have actually been submitted. | 3599 | * have actually been submitted. |
3588 | */ | 3600 | */ |
3589 | atomic_dec(&conf->preread_active_stripes); | 3601 | atomic_dec(&conf->preread_active_stripes); |
@@ -3978,14 +3990,8 @@ static int make_request(mddev_t *mddev, struct bio * bi) | |||
3978 | const int rw = bio_data_dir(bi); | 3990 | const int rw = bio_data_dir(bi); |
3979 | int remaining; | 3991 | int remaining; |
3980 | 3992 | ||
3981 | if (unlikely(bi->bi_rw & REQ_HARDBARRIER)) { | 3993 | if (unlikely(bi->bi_rw & REQ_FLUSH)) { |
3982 | /* Drain all pending writes. We only really need | 3994 | md_flush_request(mddev, bi); |
3983 | * to ensure they have been submitted, but this is | ||
3984 | * easier. | ||
3985 | */ | ||
3986 | mddev->pers->quiesce(mddev, 1); | ||
3987 | mddev->pers->quiesce(mddev, 0); | ||
3988 | md_barrier_request(mddev, bi); | ||
3989 | return 0; | 3995 | return 0; |
3990 | } | 3996 | } |
3991 | 3997 | ||
@@ -4103,7 +4109,7 @@ static int make_request(mddev_t *mddev, struct bio * bi) | |||
4103 | finish_wait(&conf->wait_for_overlap, &w); | 4109 | finish_wait(&conf->wait_for_overlap, &w); |
4104 | set_bit(STRIPE_HANDLE, &sh->state); | 4110 | set_bit(STRIPE_HANDLE, &sh->state); |
4105 | clear_bit(STRIPE_DELAYED, &sh->state); | 4111 | clear_bit(STRIPE_DELAYED, &sh->state); |
4106 | if (mddev->barrier && | 4112 | if ((bi->bi_rw & REQ_SYNC) && |
4107 | !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) | 4113 | !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) |
4108 | atomic_inc(&conf->preread_active_stripes); | 4114 | atomic_inc(&conf->preread_active_stripes); |
4109 | release_stripe(sh); | 4115 | release_stripe(sh); |
@@ -4126,13 +4132,6 @@ static int make_request(mddev_t *mddev, struct bio * bi) | |||
4126 | bio_endio(bi, 0); | 4132 | bio_endio(bi, 0); |
4127 | } | 4133 | } |
4128 | 4134 | ||
4129 | if (mddev->barrier) { | ||
4130 | /* We need to wait for the stripes to all be handled. | ||
4131 | * So: wait for preread_active_stripes to drop to 0. | ||
4132 | */ | ||
4133 | wait_event(mddev->thread->wqueue, | ||
4134 | atomic_read(&conf->preread_active_stripes) == 0); | ||
4135 | } | ||
4136 | return 0; | 4135 | return 0; |
4137 | } | 4136 | } |
4138 | 4137 | ||
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 36eaed5dfd6e..2ace0582b409 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h | |||
@@ -275,6 +275,7 @@ struct r6_state { | |||
275 | * filling | 275 | * filling |
276 | */ | 276 | */ |
277 | #define R5_Wantdrain 13 /* dev->towrite needs to be drained */ | 277 | #define R5_Wantdrain 13 /* dev->towrite needs to be drained */ |
278 | #define R5_WantFUA 14 /* Write should be FUA */ | ||
278 | /* | 279 | /* |
279 | * Write method | 280 | * Write method |
280 | */ | 281 | */ |