aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/dm-raid1.c256
1 files changed, 211 insertions, 45 deletions
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index ec6d675bf766..38efa7071dd7 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -6,6 +6,7 @@
6 6
7#include "dm.h" 7#include "dm.h"
8#include "dm-bio-list.h" 8#include "dm-bio-list.h"
9#include "dm-bio-record.h"
9#include "dm-io.h" 10#include "dm-io.h"
10#include "dm-log.h" 11#include "dm-log.h"
11#include "kcopyd.h" 12#include "kcopyd.h"
@@ -141,6 +142,7 @@ struct mirror_set {
141 struct bio_list failures; 142 struct bio_list failures;
142 143
143 struct dm_io_client *io_client; 144 struct dm_io_client *io_client;
145 mempool_t *read_record_pool;
144 146
145 /* recovery */ 147 /* recovery */
146 region_t nr_regions; 148 region_t nr_regions;
@@ -647,24 +649,30 @@ static void rh_start_recovery(struct region_hash *rh)
647 wake(rh->ms); 649 wake(rh->ms);
648} 650}
649 651
652#define MIN_READ_RECORDS 20
653struct dm_raid1_read_record {
654 struct mirror *m;
655 struct dm_bio_details details;
656};
657
650/* 658/*
651 * Every mirror should look like this one. 659 * Every mirror should look like this one.
652 */ 660 */
653#define DEFAULT_MIRROR 0 661#define DEFAULT_MIRROR 0
654 662
655/* 663/*
656 * This is yucky. We squirrel the mirror_set struct away inside 664 * This is yucky. We squirrel the mirror struct away inside
657 * bi_next for write buffers. This is safe since the bh 665 * bi_next for read/write buffers. This is safe since the bh
658 * doesn't get submitted to the lower levels of block layer. 666 * doesn't get submitted to the lower levels of block layer.
659 */ 667 */
660static struct mirror_set *bio_get_ms(struct bio *bio) 668static struct mirror *bio_get_m(struct bio *bio)
661{ 669{
662 return (struct mirror_set *) bio->bi_next; 670 return (struct mirror *) bio->bi_next;
663} 671}
664 672
665static void bio_set_ms(struct bio *bio, struct mirror_set *ms) 673static void bio_set_m(struct bio *bio, struct mirror *m)
666{ 674{
667 bio->bi_next = (struct bio *) ms; 675 bio->bi_next = (struct bio *) m;
668} 676}
669 677
670static struct mirror *get_default_mirror(struct mirror_set *ms) 678static struct mirror *get_default_mirror(struct mirror_set *ms)
@@ -857,17 +865,105 @@ static void do_recovery(struct mirror_set *ms)
857 *---------------------------------------------------------------*/ 865 *---------------------------------------------------------------*/
858static struct mirror *choose_mirror(struct mirror_set *ms, sector_t sector) 866static struct mirror *choose_mirror(struct mirror_set *ms, sector_t sector)
859{ 867{
860 /* FIXME: add read balancing */ 868 struct mirror *m = get_default_mirror(ms);
861 return get_default_mirror(ms); 869
870 do {
871 if (likely(!atomic_read(&m->error_count)))
872 return m;
873
874 if (m-- == ms->mirror)
875 m += ms->nr_mirrors;
876 } while (m != get_default_mirror(ms));
877
878 return NULL;
879}
880
881static int default_ok(struct mirror *m)
882{
883 struct mirror *default_mirror = get_default_mirror(m->ms);
884
885 return !atomic_read(&default_mirror->error_count);
886}
887
888static int mirror_available(struct mirror_set *ms, struct bio *bio)
889{
890 region_t region = bio_to_region(&ms->rh, bio);
891
892 if (ms->rh.log->type->in_sync(ms->rh.log, region, 0))
893 return choose_mirror(ms, bio->bi_sector) ? 1 : 0;
894
895 return 0;
862} 896}
863 897
864/* 898/*
865 * remap a buffer to a particular mirror. 899 * remap a buffer to a particular mirror.
866 */ 900 */
867static void map_bio(struct mirror_set *ms, struct mirror *m, struct bio *bio) 901static sector_t map_sector(struct mirror *m, struct bio *bio)
902{
903 return m->offset + (bio->bi_sector - m->ms->ti->begin);
904}
905
906static void map_bio(struct mirror *m, struct bio *bio)
868{ 907{
869 bio->bi_bdev = m->dev->bdev; 908 bio->bi_bdev = m->dev->bdev;
870 bio->bi_sector = m->offset + (bio->bi_sector - ms->ti->begin); 909 bio->bi_sector = map_sector(m, bio);
910}
911
912static void map_region(struct io_region *io, struct mirror *m,
913 struct bio *bio)
914{
915 io->bdev = m->dev->bdev;
916 io->sector = map_sector(m, bio);
917 io->count = bio->bi_size >> 9;
918}
919
920/*-----------------------------------------------------------------
921 * Reads
922 *---------------------------------------------------------------*/
923static void read_callback(unsigned long error, void *context)
924{
925 struct bio *bio = context;
926 struct mirror *m;
927
928 m = bio_get_m(bio);
929 bio_set_m(bio, NULL);
930
931 if (likely(!error)) {
932 bio_endio(bio, 0);
933 return;
934 }
935
936 fail_mirror(m, DM_RAID1_READ_ERROR);
937
938 if (likely(default_ok(m)) || mirror_available(m->ms, bio)) {
939 DMWARN_LIMIT("Read failure on mirror device %s. "
940 "Trying alternative device.",
941 m->dev->name);
942 queue_bio(m->ms, bio, bio_rw(bio));
943 return;
944 }
945
946 DMERR_LIMIT("Read failure on mirror device %s. Failing I/O.",
947 m->dev->name);
948 bio_endio(bio, -EIO);
949}
950
951/* Asynchronous read. */
952static void read_async_bio(struct mirror *m, struct bio *bio)
953{
954 struct io_region io;
955 struct dm_io_request io_req = {
956 .bi_rw = READ,
957 .mem.type = DM_IO_BVEC,
958 .mem.ptr.bvec = bio->bi_io_vec + bio->bi_idx,
959 .notify.fn = read_callback,
960 .notify.context = bio,
961 .client = m->ms->io_client,
962 };
963
964 map_region(&io, m, bio);
965 bio_set_m(bio, m);
966 (void) dm_io(&io_req, 1, &io, NULL);
871} 967}
872 968
873static void do_reads(struct mirror_set *ms, struct bio_list *reads) 969static void do_reads(struct mirror_set *ms, struct bio_list *reads)
@@ -878,17 +974,20 @@ static void do_reads(struct mirror_set *ms, struct bio_list *reads)
878 974
879 while ((bio = bio_list_pop(reads))) { 975 while ((bio = bio_list_pop(reads))) {
880 region = bio_to_region(&ms->rh, bio); 976 region = bio_to_region(&ms->rh, bio);
977 m = get_default_mirror(ms);
881 978
882 /* 979 /*
883 * We can only read balance if the region is in sync. 980 * We can only read balance if the region is in sync.
884 */ 981 */
885 if (rh_in_sync(&ms->rh, region, 1)) 982 if (likely(rh_in_sync(&ms->rh, region, 1)))
886 m = choose_mirror(ms, bio->bi_sector); 983 m = choose_mirror(ms, bio->bi_sector);
887 else 984 else if (m && atomic_read(&m->error_count))
888 m = get_default_mirror(ms); 985 m = NULL;
889 986
890 map_bio(ms, m, bio); 987 if (likely(m))
891 generic_make_request(bio); 988 read_async_bio(m, bio);
989 else
990 bio_endio(bio, -EIO);
892 } 991 }
893} 992}
894 993
@@ -964,8 +1063,8 @@ static void write_callback(unsigned long error, void *context)
964 int should_wake = 0; 1063 int should_wake = 0;
965 unsigned long flags; 1064 unsigned long flags;
966 1065
967 ms = bio_get_ms(bio); 1066 ms = bio_get_m(bio)->ms;
968 bio_set_ms(bio, NULL); 1067 bio_set_m(bio, NULL);
969 1068
970 /* 1069 /*
971 * NOTE: We don't decrement the pending count here, 1070 * NOTE: We don't decrement the pending count here,
@@ -1008,7 +1107,7 @@ out:
1008static void do_write(struct mirror_set *ms, struct bio *bio) 1107static void do_write(struct mirror_set *ms, struct bio *bio)
1009{ 1108{
1010 unsigned int i; 1109 unsigned int i;
1011 struct io_region io[KCOPYD_MAX_REGIONS+1]; 1110 struct io_region io[ms->nr_mirrors], *dest = io;
1012 struct mirror *m; 1111 struct mirror *m;
1013 struct dm_io_request io_req = { 1112 struct dm_io_request io_req = {
1014 .bi_rw = WRITE, 1113 .bi_rw = WRITE,
@@ -1019,15 +1118,14 @@ static void do_write(struct mirror_set *ms, struct bio *bio)
1019 .client = ms->io_client, 1118 .client = ms->io_client,
1020 }; 1119 };
1021 1120
1022 for (i = 0; i < ms->nr_mirrors; i++) { 1121 for (i = 0, m = ms->mirror; i < ms->nr_mirrors; i++, m++)
1023 m = ms->mirror + i; 1122 map_region(dest++, m, bio);
1024
1025 io[i].bdev = m->dev->bdev;
1026 io[i].sector = m->offset + (bio->bi_sector - ms->ti->begin);
1027 io[i].count = bio->bi_size >> 9;
1028 }
1029 1123
1030 bio_set_ms(bio, ms); 1124 /*
1125 * Use default mirror because we only need it to retrieve the reference
1126 * to the mirror set in write_callback().
1127 */
1128 bio_set_m(bio, get_default_mirror(ms));
1031 1129
1032 (void) dm_io(&io_req, ms->nr_mirrors, io, NULL); 1130 (void) dm_io(&io_req, ms->nr_mirrors, io, NULL);
1033} 1131}
@@ -1092,7 +1190,7 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
1092 rh_delay(&ms->rh, bio); 1190 rh_delay(&ms->rh, bio);
1093 1191
1094 while ((bio = bio_list_pop(&nosync))) { 1192 while ((bio = bio_list_pop(&nosync))) {
1095 map_bio(ms, get_default_mirror(ms), bio); 1193 map_bio(get_default_mirror(ms), bio);
1096 generic_make_request(bio); 1194 generic_make_request(bio);
1097 } 1195 }
1098} 1196}
@@ -1231,9 +1329,19 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors,
1231 atomic_set(&ms->suspend, 0); 1329 atomic_set(&ms->suspend, 0);
1232 atomic_set(&ms->default_mirror, DEFAULT_MIRROR); 1330 atomic_set(&ms->default_mirror, DEFAULT_MIRROR);
1233 1331
1332 len = sizeof(struct dm_raid1_read_record);
1333 ms->read_record_pool = mempool_create_kmalloc_pool(MIN_READ_RECORDS,
1334 len);
1335 if (!ms->read_record_pool) {
1336 ti->error = "Error creating mirror read_record_pool";
1337 kfree(ms);
1338 return NULL;
1339 }
1340
1234 ms->io_client = dm_io_client_create(DM_IO_PAGES); 1341 ms->io_client = dm_io_client_create(DM_IO_PAGES);
1235 if (IS_ERR(ms->io_client)) { 1342 if (IS_ERR(ms->io_client)) {
1236 ti->error = "Error creating dm_io client"; 1343 ti->error = "Error creating dm_io client";
1344 mempool_destroy(ms->read_record_pool);
1237 kfree(ms); 1345 kfree(ms);
1238 return NULL; 1346 return NULL;
1239 } 1347 }
@@ -1241,6 +1349,7 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors,
1241 if (rh_init(&ms->rh, ms, dl, region_size, ms->nr_regions)) { 1349 if (rh_init(&ms->rh, ms, dl, region_size, ms->nr_regions)) {
1242 ti->error = "Error creating dirty region hash"; 1350 ti->error = "Error creating dirty region hash";
1243 dm_io_client_destroy(ms->io_client); 1351 dm_io_client_destroy(ms->io_client);
1352 mempool_destroy(ms->read_record_pool);
1244 kfree(ms); 1353 kfree(ms);
1245 return NULL; 1354 return NULL;
1246 } 1355 }
@@ -1256,6 +1365,7 @@ static void free_context(struct mirror_set *ms, struct dm_target *ti,
1256 1365
1257 dm_io_client_destroy(ms->io_client); 1366 dm_io_client_destroy(ms->io_client);
1258 rh_exit(&ms->rh); 1367 rh_exit(&ms->rh);
1368 mempool_destroy(ms->read_record_pool);
1259 kfree(ms); 1369 kfree(ms);
1260} 1370}
1261 1371
@@ -1510,10 +1620,11 @@ static int mirror_map(struct dm_target *ti, struct bio *bio,
1510 int r, rw = bio_rw(bio); 1620 int r, rw = bio_rw(bio);
1511 struct mirror *m; 1621 struct mirror *m;
1512 struct mirror_set *ms = ti->private; 1622 struct mirror_set *ms = ti->private;
1513 1623 struct dm_raid1_read_record *read_record = NULL;
1514 map_context->ll = bio_to_region(&ms->rh, bio);
1515 1624
1516 if (rw == WRITE) { 1625 if (rw == WRITE) {
1626 /* Save region for mirror_end_io() handler */
1627 map_context->ll = bio_to_region(&ms->rh, bio);
1517 queue_bio(ms, bio, rw); 1628 queue_bio(ms, bio, rw);
1518 return DM_MAPIO_SUBMITTED; 1629 return DM_MAPIO_SUBMITTED;
1519 } 1630 }
@@ -1523,28 +1634,34 @@ static int mirror_map(struct dm_target *ti, struct bio *bio,
1523 if (r < 0 && r != -EWOULDBLOCK) 1634 if (r < 0 && r != -EWOULDBLOCK)
1524 return r; 1635 return r;
1525 1636
1526 if (r == -EWOULDBLOCK) /* FIXME: ugly */
1527 r = DM_MAPIO_SUBMITTED;
1528
1529 /* 1637 /*
1530 * We don't want to fast track a recovery just for a read 1638 * If region is not in-sync queue the bio.
1531 * ahead. So we just let it silently fail.
1532 * FIXME: get rid of this.
1533 */ 1639 */
1534 if (!r && rw == READA) 1640 if (!r || (r == -EWOULDBLOCK)) {
1535 return -EIO; 1641 if (rw == READA)
1642 return -EWOULDBLOCK;
1536 1643
1537 if (!r) {
1538 /* Pass this io over to the daemon */
1539 queue_bio(ms, bio, rw); 1644 queue_bio(ms, bio, rw);
1540 return DM_MAPIO_SUBMITTED; 1645 return DM_MAPIO_SUBMITTED;
1541 } 1646 }
1542 1647
1648 /*
1649 * The region is in-sync and we can perform reads directly.
1650 * Store enough information so we can retry if it fails.
1651 */
1543 m = choose_mirror(ms, bio->bi_sector); 1652 m = choose_mirror(ms, bio->bi_sector);
1544 if (!m) 1653 if (unlikely(!m))
1545 return -EIO; 1654 return -EIO;
1546 1655
1547 map_bio(ms, m, bio); 1656 read_record = mempool_alloc(ms->read_record_pool, GFP_NOIO);
1657 if (likely(read_record)) {
1658 dm_bio_record(&read_record->details, bio);
1659 map_context->ptr = read_record;
1660 read_record->m = m;
1661 }
1662
1663 map_bio(m, bio);
1664
1548 return DM_MAPIO_REMAPPED; 1665 return DM_MAPIO_REMAPPED;
1549} 1666}
1550 1667
@@ -1553,15 +1670,64 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio,
1553{ 1670{
1554 int rw = bio_rw(bio); 1671 int rw = bio_rw(bio);
1555 struct mirror_set *ms = (struct mirror_set *) ti->private; 1672 struct mirror_set *ms = (struct mirror_set *) ti->private;
1556 region_t region = map_context->ll; 1673 struct mirror *m = NULL;
1674 struct dm_bio_details *bd = NULL;
1675 struct dm_raid1_read_record *read_record = map_context->ptr;
1557 1676
1558 /* 1677 /*
1559 * We need to dec pending if this was a write. 1678 * We need to dec pending if this was a write.
1560 */ 1679 */
1561 if (rw == WRITE) 1680 if (rw == WRITE) {
1562 rh_dec(&ms->rh, region); 1681 rh_dec(&ms->rh, map_context->ll);
1682 return error;
1683 }
1563 1684
1564 return 0; 1685 if (error == -EOPNOTSUPP)
1686 goto out;
1687
1688 if ((error == -EWOULDBLOCK) && bio_rw_ahead(bio))
1689 goto out;
1690
1691 if (unlikely(error)) {
1692 if (!read_record) {
1693 /*
1694 * There wasn't enough memory to record necessary
1695 * information for a retry or there was no other
1696 * mirror in-sync.
1697 */
1698 DMERR_LIMIT("Mirror read failed from %s.",
1699 m->dev->name);
1700 return -EIO;
1701 }
1702 DMERR("Mirror read failed from %s. Trying alternative device.",
1703 m->dev->name);
1704
1705 m = read_record->m;
1706 fail_mirror(m, DM_RAID1_READ_ERROR);
1707
1708 /*
1709 * A failed read is requeued for another attempt using an intact
1710 * mirror.
1711 */
1712 if (default_ok(m) || mirror_available(ms, bio)) {
1713 bd = &read_record->details;
1714
1715 dm_bio_restore(bd, bio);
1716 mempool_free(read_record, ms->read_record_pool);
1717 map_context->ptr = NULL;
1718 queue_bio(ms, bio, rw);
1719 return 1;
1720 }
1721 DMERR("All replicated volumes dead, failing I/O");
1722 }
1723
1724out:
1725 if (read_record) {
1726 mempool_free(read_record, ms->read_record_pool);
1727 map_context->ptr = NULL;
1728 }
1729
1730 return error;
1565} 1731}
1566 1732
1567static void mirror_presuspend(struct dm_target *ti) 1733static void mirror_presuspend(struct dm_target *ti)