aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2006-01-06 03:20:26 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-01-06 11:34:04 -0500
commitd11c171e636cfd2df818cf3411d88222c2f4fcef (patch)
tree1ca670d9eabad009cee23c44f6cb8cd1b3cccadb
parent18f08819f42b647783e4f6ea99141623881bf182 (diff)
[PATCH] md: allow raid1 to check consistency
Where performing a user-requested 'check' or 'repair', we read all readable devices, and compare the contents. We only write to blocks which had read errors, or blocks with content that differs from the first good device found. Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--drivers/md/raid1.c156
1 files changed, 128 insertions, 28 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index a8bc93d6ff63..7fbb60883280 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -106,15 +106,30 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
106 } 106 }
107 /* 107 /*
108 * Allocate RESYNC_PAGES data pages and attach them to 108 * Allocate RESYNC_PAGES data pages and attach them to
109 * the first bio; 109 * the first bio.
110 * If this is a user-requested check/repair, allocate
111 * RESYNC_PAGES for each bio.
110 */ 112 */
111 bio = r1_bio->bios[0]; 113 if (test_bit(MD_RECOVERY_REQUESTED, &pi->mddev->recovery))
112 for (i = 0; i < RESYNC_PAGES; i++) { 114 j = pi->raid_disks;
113 page = alloc_page(gfp_flags); 115 else
114 if (unlikely(!page)) 116 j = 1;
115 goto out_free_pages; 117 while(j--) {
116 118 bio = r1_bio->bios[j];
117 bio->bi_io_vec[i].bv_page = page; 119 for (i = 0; i < RESYNC_PAGES; i++) {
120 page = alloc_page(gfp_flags);
121 if (unlikely(!page))
122 goto out_free_pages;
123
124 bio->bi_io_vec[i].bv_page = page;
125 }
126 }
127 /* If not user-requests, copy the page pointers to all bios */
128 if (!test_bit(MD_RECOVERY_REQUESTED, &pi->mddev->recovery)) {
129 for (i=0; i<RESYNC_PAGES ; i++)
130 for (j=1; j<pi->raid_disks; j++)
131 r1_bio->bios[j]->bi_io_vec[i].bv_page =
132 r1_bio->bios[0]->bi_io_vec[i].bv_page;
118 } 133 }
119 134
120 r1_bio->master_bio = NULL; 135 r1_bio->master_bio = NULL;
@@ -122,8 +137,10 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
122 return r1_bio; 137 return r1_bio;
123 138
124out_free_pages: 139out_free_pages:
125 for ( ; i > 0 ; i--) 140 for (i=0; i < RESYNC_PAGES ; i++)
126 __free_page(bio->bi_io_vec[i-1].bv_page); 141 for (j=0 ; j < pi->raid_disks; j++)
142 __free_page(r1_bio->bios[j]->bi_io_vec[i].bv_page);
143 j = -1;
127out_free_bio: 144out_free_bio:
128 while ( ++j < pi->raid_disks ) 145 while ( ++j < pi->raid_disks )
129 bio_put(r1_bio->bios[j]); 146 bio_put(r1_bio->bios[j]);
@@ -134,14 +151,16 @@ out_free_bio:
134static void r1buf_pool_free(void *__r1_bio, void *data) 151static void r1buf_pool_free(void *__r1_bio, void *data)
135{ 152{
136 struct pool_info *pi = data; 153 struct pool_info *pi = data;
137 int i; 154 int i,j;
138 r1bio_t *r1bio = __r1_bio; 155 r1bio_t *r1bio = __r1_bio;
139 struct bio *bio = r1bio->bios[0];
140 156
141 for (i = 0; i < RESYNC_PAGES; i++) { 157 for (i = 0; i < RESYNC_PAGES; i++)
142 __free_page(bio->bi_io_vec[i].bv_page); 158 for (j = pi->raid_disks; j-- ;) {
143 bio->bi_io_vec[i].bv_page = NULL; 159 if (j == 0 ||
144 } 160 r1bio->bios[j]->bi_io_vec[i].bv_page !=
161 r1bio->bios[0]->bi_io_vec[i].bv_page)
162 __free_page(r1bio->bios[j]->bi_io_vec[i].bv_page);
163 }
145 for (i=0 ; i < pi->raid_disks; i++) 164 for (i=0 ; i < pi->raid_disks; i++)
146 bio_put(r1bio->bios[i]); 165 bio_put(r1bio->bios[i]);
147 166
@@ -1077,13 +1096,16 @@ abort:
1077static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error) 1096static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error)
1078{ 1097{
1079 r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private); 1098 r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private);
1099 int i;
1080 1100
1081 if (bio->bi_size) 1101 if (bio->bi_size)
1082 return 1; 1102 return 1;
1083 1103
1084 if (r1_bio->bios[r1_bio->read_disk] != bio) 1104 for (i=r1_bio->mddev->raid_disks; i--; )
1085 BUG(); 1105 if (r1_bio->bios[i] == bio)
1086 update_head_pos(r1_bio->read_disk, r1_bio); 1106 break;
1107 BUG_ON(i < 0);
1108 update_head_pos(i, r1_bio);
1087 /* 1109 /*
1088 * we have read a block, now it needs to be re-written, 1110 * we have read a block, now it needs to be re-written,
1089 * or re-read if the read failed. 1111 * or re-read if the read failed.
@@ -1091,7 +1113,9 @@ static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error)
1091 */ 1113 */
1092 if (test_bit(BIO_UPTODATE, &bio->bi_flags)) 1114 if (test_bit(BIO_UPTODATE, &bio->bi_flags))
1093 set_bit(R1BIO_Uptodate, &r1_bio->state); 1115 set_bit(R1BIO_Uptodate, &r1_bio->state);
1094 reschedule_retry(r1_bio); 1116
1117 if (atomic_dec_and_test(&r1_bio->remaining))
1118 reschedule_retry(r1_bio);
1095 return 0; 1119 return 0;
1096} 1120}
1097 1121
@@ -1134,9 +1158,65 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
1134 bio = r1_bio->bios[r1_bio->read_disk]; 1158 bio = r1_bio->bios[r1_bio->read_disk];
1135 1159
1136 1160
1137 /* 1161 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
1138 * schedule writes 1162 /* We have read all readable devices. If we haven't
1139 */ 1163 * got the block, then there is no hope left.
1164 * If we have, then we want to do a comparison
1165 * and skip the write if everything is the same.
1166 * If any blocks failed to read, then we need to
1167 * attempt an over-write
1168 */
1169 int primary;
1170 if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) {
1171 for (i=0; i<mddev->raid_disks; i++)
1172 if (r1_bio->bios[i]->bi_end_io == end_sync_read)
1173 md_error(mddev, conf->mirrors[i].rdev);
1174
1175 md_done_sync(mddev, r1_bio->sectors, 1);
1176 put_buf(r1_bio);
1177 return;
1178 }
1179 for (primary=0; primary<mddev->raid_disks; primary++)
1180 if (r1_bio->bios[primary]->bi_end_io == end_sync_read &&
1181 test_bit(BIO_UPTODATE, &r1_bio->bios[primary]->bi_flags)) {
1182 r1_bio->bios[primary]->bi_end_io = NULL;
1183 break;
1184 }
1185 r1_bio->read_disk = primary;
1186 for (i=0; i<mddev->raid_disks; i++)
1187 if (r1_bio->bios[i]->bi_end_io == end_sync_read &&
1188 test_bit(BIO_UPTODATE, &r1_bio->bios[i]->bi_flags)) {
1189 int j;
1190 int vcnt = r1_bio->sectors >> (PAGE_SHIFT- 9);
1191 struct bio *pbio = r1_bio->bios[primary];
1192 struct bio *sbio = r1_bio->bios[i];
1193 for (j = vcnt; j-- ; )
1194 if (memcmp(page_address(pbio->bi_io_vec[j].bv_page),
1195 page_address(sbio->bi_io_vec[j].bv_page),
1196 PAGE_SIZE))
1197 break;
1198 if (j >= 0)
1199 mddev->resync_mismatches += r1_bio->sectors;
1200 if (j < 0 || test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
1201 sbio->bi_end_io = NULL;
1202 else {
1203 /* fixup the bio for reuse */
1204 sbio->bi_vcnt = vcnt;
1205 sbio->bi_size = r1_bio->sectors << 9;
1206 sbio->bi_idx = 0;
1207 sbio->bi_phys_segments = 0;
1208 sbio->bi_hw_segments = 0;
1209 sbio->bi_hw_front_size = 0;
1210 sbio->bi_hw_back_size = 0;
1211 sbio->bi_flags &= ~(BIO_POOL_MASK - 1);
1212 sbio->bi_flags |= 1 << BIO_UPTODATE;
1213 sbio->bi_next = NULL;
1214 sbio->bi_sector = r1_bio->sector +
1215 conf->mirrors[i].rdev->data_offset;
1216 sbio->bi_bdev = conf->mirrors[i].rdev->bdev;
1217 }
1218 }
1219 }
1140 if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) { 1220 if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) {
1141 /* ouch - failed to read all of that. 1221 /* ouch - failed to read all of that.
1142 * Try some synchronous reads of other devices to get 1222 * Try some synchronous reads of other devices to get
@@ -1216,6 +1296,10 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
1216 idx ++; 1296 idx ++;
1217 } 1297 }
1218 } 1298 }
1299
1300 /*
1301 * schedule writes
1302 */
1219 atomic_set(&r1_bio->remaining, 1); 1303 atomic_set(&r1_bio->remaining, 1);
1220 for (i = 0; i < disks ; i++) { 1304 for (i = 0; i < disks ; i++) {
1221 wbio = r1_bio->bios[i]; 1305 wbio = r1_bio->bios[i];
@@ -1618,10 +1702,10 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
1618 for (i=0 ; i < conf->raid_disks; i++) { 1702 for (i=0 ; i < conf->raid_disks; i++) {
1619 bio = r1_bio->bios[i]; 1703 bio = r1_bio->bios[i];
1620 if (bio->bi_end_io) { 1704 if (bio->bi_end_io) {
1621 page = r1_bio->bios[0]->bi_io_vec[bio->bi_vcnt].bv_page; 1705 page = bio->bi_io_vec[bio->bi_vcnt].bv_page;
1622 if (bio_add_page(bio, page, len, 0) == 0) { 1706 if (bio_add_page(bio, page, len, 0) == 0) {
1623 /* stop here */ 1707 /* stop here */
1624 r1_bio->bios[0]->bi_io_vec[bio->bi_vcnt].bv_page = page; 1708 bio->bi_io_vec[bio->bi_vcnt].bv_page = page;
1625 while (i > 0) { 1709 while (i > 0) {
1626 i--; 1710 i--;
1627 bio = r1_bio->bios[i]; 1711 bio = r1_bio->bios[i];
@@ -1641,12 +1725,28 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
1641 sync_blocks -= (len>>9); 1725 sync_blocks -= (len>>9);
1642 } while (r1_bio->bios[disk]->bi_vcnt < RESYNC_PAGES); 1726 } while (r1_bio->bios[disk]->bi_vcnt < RESYNC_PAGES);
1643 bio_full: 1727 bio_full:
1644 bio = r1_bio->bios[r1_bio->read_disk];
1645 r1_bio->sectors = nr_sectors; 1728 r1_bio->sectors = nr_sectors;
1646 1729
1647 md_sync_acct(conf->mirrors[r1_bio->read_disk].rdev->bdev, nr_sectors); 1730 /* For a user-requested sync, we read all readable devices and do a
1731 * compare
1732 */
1733 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
1734 atomic_set(&r1_bio->remaining, read_targets);
1735 for (i=0; i<conf->raid_disks; i++) {
1736 bio = r1_bio->bios[i];
1737 if (bio->bi_end_io == end_sync_read) {
1738 md_sync_acct(conf->mirrors[i].rdev->bdev, nr_sectors);
1739 generic_make_request(bio);
1740 }
1741 }
1742 } else {
1743 atomic_set(&r1_bio->remaining, 1);
1744 bio = r1_bio->bios[r1_bio->read_disk];
1745 md_sync_acct(conf->mirrors[r1_bio->read_disk].rdev->bdev,
1746 nr_sectors);
1747 generic_make_request(bio);
1648 1748
1649 generic_make_request(bio); 1749 }
1650 1750
1651 return nr_sectors; 1751 return nr_sectors;
1652} 1752}