aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMikulas Patocka <mpatocka@redhat.com>2009-04-08 19:27:16 -0400
committerAlasdair G Kergon <agk@redhat.com>2009-04-08 19:27:16 -0400
commitaf7e466a1acededbc10beaba9eec8531d561c566 (patch)
tree1d1d0655e2fc22e5b440202be4050fd083a66c04
parent92c639021ca6e962645114f02e356e7feb131d0b (diff)
dm: implement basic barrier support
Barriers are submitted to a worker thread that issues them in-order. The thread is modified so that when it sees a barrier request it waits for all pending IO before the request then submits the barrier and waits for it. (We must wait, otherwise it could be intermixed with following requests.) Errors from the barrier request are recorded in a per-device barrier_error variable. There may be only one barrier request in progress at once. For now, the barrier request is converted to a non-barrier request when sending it to the underlying device. This patch guarantees correct barrier behavior if the underlying device doesn't perform write-back caching. The same requirement existed before barriers were supported in dm. Bottom layer barrier support (sending barriers by target drivers) and handling devices with write-back caches will be done in further patches. Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Signed-off-by: Alasdair G Kergon <agk@redhat.com>
-rw-r--r--drivers/md/dm.c88
1 files changed, 68 insertions, 20 deletions
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index db022e5f3912..8a994be035ba 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -125,6 +125,11 @@ struct mapped_device {
125 spinlock_t deferred_lock; 125 spinlock_t deferred_lock;
126 126
127 /* 127 /*
128 * An error from the barrier request currently being processed.
129 */
130 int barrier_error;
131
132 /*
128 * Processing queue (flush/barriers) 133 * Processing queue (flush/barriers)
129 */ 134 */
130 struct workqueue_struct *wq; 135 struct workqueue_struct *wq;
@@ -425,6 +430,10 @@ static void end_io_acct(struct dm_io *io)
425 part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration); 430 part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration);
426 part_stat_unlock(); 431 part_stat_unlock();
427 432
433 /*
434 * After this is decremented the bio must not be touched if it is
435 * a barrier.
436 */
428 dm_disk(md)->part0.in_flight = pending = 437 dm_disk(md)->part0.in_flight = pending =
429 atomic_dec_return(&md->pending); 438 atomic_dec_return(&md->pending);
430 439
@@ -531,25 +540,35 @@ static void dec_pending(struct dm_io *io, int error)
531 */ 540 */
532 spin_lock_irqsave(&md->deferred_lock, flags); 541 spin_lock_irqsave(&md->deferred_lock, flags);
533 if (__noflush_suspending(md)) 542 if (__noflush_suspending(md))
534 bio_list_add(&md->deferred, io->bio); 543 bio_list_add_head(&md->deferred, io->bio);
535 else 544 else
536 /* noflush suspend was interrupted. */ 545 /* noflush suspend was interrupted. */
537 io->error = -EIO; 546 io->error = -EIO;
538 spin_unlock_irqrestore(&md->deferred_lock, flags); 547 spin_unlock_irqrestore(&md->deferred_lock, flags);
539 } 548 }
540 549
541 end_io_acct(io);
542
543 io_error = io->error; 550 io_error = io->error;
544 bio = io->bio; 551 bio = io->bio;
545 552
546 free_io(md, io); 553 if (bio_barrier(bio)) {
554 /*
555 * There can be just one barrier request so we use
556 * a per-device variable for error reporting.
557 * Note that you can't touch the bio after end_io_acct
558 */
559 md->barrier_error = io_error;
560 end_io_acct(io);
561 } else {
562 end_io_acct(io);
547 563
548 if (io_error != DM_ENDIO_REQUEUE) { 564 if (io_error != DM_ENDIO_REQUEUE) {
549 trace_block_bio_complete(md->queue, bio); 565 trace_block_bio_complete(md->queue, bio);
550 566
551 bio_endio(bio, io_error); 567 bio_endio(bio, io_error);
568 }
552 } 569 }
570
571 free_io(md, io);
553 } 572 }
554} 573}
555 574
@@ -691,7 +710,7 @@ static struct bio *split_bvec(struct bio *bio, sector_t sector,
691 710
692 clone->bi_sector = sector; 711 clone->bi_sector = sector;
693 clone->bi_bdev = bio->bi_bdev; 712 clone->bi_bdev = bio->bi_bdev;
694 clone->bi_rw = bio->bi_rw; 713 clone->bi_rw = bio->bi_rw & ~(1 << BIO_RW_BARRIER);
695 clone->bi_vcnt = 1; 714 clone->bi_vcnt = 1;
696 clone->bi_size = to_bytes(len); 715 clone->bi_size = to_bytes(len);
697 clone->bi_io_vec->bv_offset = offset; 716 clone->bi_io_vec->bv_offset = offset;
@@ -718,6 +737,7 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector,
718 737
719 clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs); 738 clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs);
720 __bio_clone(clone, bio); 739 __bio_clone(clone, bio);
740 clone->bi_rw &= ~(1 << BIO_RW_BARRIER);
721 clone->bi_destructor = dm_bio_destructor; 741 clone->bi_destructor = dm_bio_destructor;
722 clone->bi_sector = sector; 742 clone->bi_sector = sector;
723 clone->bi_idx = idx; 743 clone->bi_idx = idx;
@@ -846,7 +866,10 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
846 866
847 ci.map = dm_get_table(md); 867 ci.map = dm_get_table(md);
848 if (unlikely(!ci.map)) { 868 if (unlikely(!ci.map)) {
849 bio_io_error(bio); 869 if (!bio_barrier(bio))
870 bio_io_error(bio);
871 else
872 md->barrier_error = -EIO;
850 return; 873 return;
851 } 874 }
852 875
@@ -930,15 +953,6 @@ static int dm_request(struct request_queue *q, struct bio *bio)
930 struct mapped_device *md = q->queuedata; 953 struct mapped_device *md = q->queuedata;
931 int cpu; 954 int cpu;
932 955
933 /*
934 * There is no use in forwarding any barrier request since we can't
935 * guarantee it is (or can be) handled by the targets correctly.
936 */
937 if (unlikely(bio_barrier(bio))) {
938 bio_endio(bio, -EOPNOTSUPP);
939 return 0;
940 }
941
942 down_read(&md->io_lock); 956 down_read(&md->io_lock);
943 957
944 cpu = part_stat_lock(); 958 cpu = part_stat_lock();
@@ -950,7 +964,8 @@ static int dm_request(struct request_queue *q, struct bio *bio)
950 * If we're suspended or the thread is processing barriers 964 * If we're suspended or the thread is processing barriers
951 * we have to queue this io for later. 965 * we have to queue this io for later.
952 */ 966 */
953 if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags))) { 967 if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) ||
968 unlikely(bio_barrier(bio))) {
954 up_read(&md->io_lock); 969 up_read(&md->io_lock);
955 970
956 if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) && 971 if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) &&
@@ -1415,6 +1430,36 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
1415 return r; 1430 return r;
1416} 1431}
1417 1432
1433static int dm_flush(struct mapped_device *md)
1434{
1435 dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
1436 return 0;
1437}
1438
1439static void process_barrier(struct mapped_device *md, struct bio *bio)
1440{
1441 int error = dm_flush(md);
1442
1443 if (unlikely(error)) {
1444 bio_endio(bio, error);
1445 return;
1446 }
1447 if (bio_empty_barrier(bio)) {
1448 bio_endio(bio, 0);
1449 return;
1450 }
1451
1452 __split_and_process_bio(md, bio);
1453
1454 error = dm_flush(md);
1455
1456 if (!error && md->barrier_error)
1457 error = md->barrier_error;
1458
1459 if (md->barrier_error != DM_ENDIO_REQUEUE)
1460 bio_endio(bio, error);
1461}
1462
1418/* 1463/*
1419 * Process the deferred bios 1464 * Process the deferred bios
1420 */ 1465 */
@@ -1438,7 +1483,10 @@ static void dm_wq_work(struct work_struct *work)
1438 1483
1439 up_write(&md->io_lock); 1484 up_write(&md->io_lock);
1440 1485
1441 __split_and_process_bio(md, c); 1486 if (bio_barrier(c))
1487 process_barrier(md, c);
1488 else
1489 __split_and_process_bio(md, c);
1442 1490
1443 down_write(&md->io_lock); 1491 down_write(&md->io_lock);
1444 } 1492 }