aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/md/raid1.c156
1 files changed, 128 insertions, 28 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index a8bc93d6ff63..7fbb60883280 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -106,15 +106,30 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
106 } 106 }
107 /* 107 /*
108 * Allocate RESYNC_PAGES data pages and attach them to 108 * Allocate RESYNC_PAGES data pages and attach them to
109 * the first bio; 109 * the first bio.
110 * If this is a user-requested check/repair, allocate
111 * RESYNC_PAGES for each bio.
110 */ 112 */
111 bio = r1_bio->bios[0]; 113 if (test_bit(MD_RECOVERY_REQUESTED, &pi->mddev->recovery))
112 for (i = 0; i < RESYNC_PAGES; i++) { 114 j = pi->raid_disks;
113 page = alloc_page(gfp_flags); 115 else
114 if (unlikely(!page)) 116 j = 1;
115 goto out_free_pages; 117 while(j--) {
116 118 bio = r1_bio->bios[j];
117 bio->bi_io_vec[i].bv_page = page; 119 for (i = 0; i < RESYNC_PAGES; i++) {
120 page = alloc_page(gfp_flags);
121 if (unlikely(!page))
122 goto out_free_pages;
123
124 bio->bi_io_vec[i].bv_page = page;
125 }
126 }
127 /* If not user-requests, copy the page pointers to all bios */
128 if (!test_bit(MD_RECOVERY_REQUESTED, &pi->mddev->recovery)) {
129 for (i=0; i<RESYNC_PAGES ; i++)
130 for (j=1; j<pi->raid_disks; j++)
131 r1_bio->bios[j]->bi_io_vec[i].bv_page =
132 r1_bio->bios[0]->bi_io_vec[i].bv_page;
118 } 133 }
119 134
120 r1_bio->master_bio = NULL; 135 r1_bio->master_bio = NULL;
@@ -122,8 +137,10 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
122 return r1_bio; 137 return r1_bio;
123 138
124out_free_pages: 139out_free_pages:
125 for ( ; i > 0 ; i--) 140 for (i=0; i < RESYNC_PAGES ; i++)
126 __free_page(bio->bi_io_vec[i-1].bv_page); 141 for (j=0 ; j < pi->raid_disks; j++)
142 __free_page(r1_bio->bios[j]->bi_io_vec[i].bv_page);
143 j = -1;
127out_free_bio: 144out_free_bio:
128 while ( ++j < pi->raid_disks ) 145 while ( ++j < pi->raid_disks )
129 bio_put(r1_bio->bios[j]); 146 bio_put(r1_bio->bios[j]);
@@ -134,14 +151,16 @@ out_free_bio:
134static void r1buf_pool_free(void *__r1_bio, void *data) 151static void r1buf_pool_free(void *__r1_bio, void *data)
135{ 152{
136 struct pool_info *pi = data; 153 struct pool_info *pi = data;
137 int i; 154 int i,j;
138 r1bio_t *r1bio = __r1_bio; 155 r1bio_t *r1bio = __r1_bio;
139 struct bio *bio = r1bio->bios[0];
140 156
141 for (i = 0; i < RESYNC_PAGES; i++) { 157 for (i = 0; i < RESYNC_PAGES; i++)
142 __free_page(bio->bi_io_vec[i].bv_page); 158 for (j = pi->raid_disks; j-- ;) {
143 bio->bi_io_vec[i].bv_page = NULL; 159 if (j == 0 ||
144 } 160 r1bio->bios[j]->bi_io_vec[i].bv_page !=
161 r1bio->bios[0]->bi_io_vec[i].bv_page)
162 __free_page(r1bio->bios[j]->bi_io_vec[i].bv_page);
163 }
145 for (i=0 ; i < pi->raid_disks; i++) 164 for (i=0 ; i < pi->raid_disks; i++)
146 bio_put(r1bio->bios[i]); 165 bio_put(r1bio->bios[i]);
147 166
@@ -1077,13 +1096,16 @@ abort:
1077static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error) 1096static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error)
1078{ 1097{
1079 r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private); 1098 r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private);
1099 int i;
1080 1100
1081 if (bio->bi_size) 1101 if (bio->bi_size)
1082 return 1; 1102 return 1;
1083 1103
1084 if (r1_bio->bios[r1_bio->read_disk] != bio) 1104 for (i=r1_bio->mddev->raid_disks; i--; )
1085 BUG(); 1105 if (r1_bio->bios[i] == bio)
1086 update_head_pos(r1_bio->read_disk, r1_bio); 1106 break;
1107 BUG_ON(i < 0);
1108 update_head_pos(i, r1_bio);
1087 /* 1109 /*
1088 * we have read a block, now it needs to be re-written, 1110 * we have read a block, now it needs to be re-written,
1089 * or re-read if the read failed. 1111 * or re-read if the read failed.
@@ -1091,7 +1113,9 @@ static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error)
1091 */ 1113 */
1092 if (test_bit(BIO_UPTODATE, &bio->bi_flags)) 1114 if (test_bit(BIO_UPTODATE, &bio->bi_flags))
1093 set_bit(R1BIO_Uptodate, &r1_bio->state); 1115 set_bit(R1BIO_Uptodate, &r1_bio->state);
1094 reschedule_retry(r1_bio); 1116
1117 if (atomic_dec_and_test(&r1_bio->remaining))
1118 reschedule_retry(r1_bio);
1095 return 0; 1119 return 0;
1096} 1120}
1097 1121
@@ -1134,9 +1158,65 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
1134 bio = r1_bio->bios[r1_bio->read_disk]; 1158 bio = r1_bio->bios[r1_bio->read_disk];
1135 1159
1136 1160
1137 /* 1161 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
1138 * schedule writes 1162 /* We have read all readable devices. If we haven't
1139 */ 1163 * got the block, then there is no hope left.
1164 * If we have, then we want to do a comparison
1165 * and skip the write if everything is the same.
1166 * If any blocks failed to read, then we need to
1167 * attempt an over-write
1168 */
1169 int primary;
1170 if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) {
1171 for (i=0; i<mddev->raid_disks; i++)
1172 if (r1_bio->bios[i]->bi_end_io == end_sync_read)
1173 md_error(mddev, conf->mirrors[i].rdev);
1174
1175 md_done_sync(mddev, r1_bio->sectors, 1);
1176 put_buf(r1_bio);
1177 return;
1178 }
1179 for (primary=0; primary<mddev->raid_disks; primary++)
1180 if (r1_bio->bios[primary]->bi_end_io == end_sync_read &&
1181 test_bit(BIO_UPTODATE, &r1_bio->bios[primary]->bi_flags)) {
1182 r1_bio->bios[primary]->bi_end_io = NULL;
1183 break;
1184 }
1185 r1_bio->read_disk = primary;
1186 for (i=0; i<mddev->raid_disks; i++)
1187 if (r1_bio->bios[i]->bi_end_io == end_sync_read &&
1188 test_bit(BIO_UPTODATE, &r1_bio->bios[i]->bi_flags)) {
1189 int j;
1190 int vcnt = r1_bio->sectors >> (PAGE_SHIFT- 9);
1191 struct bio *pbio = r1_bio->bios[primary];
1192 struct bio *sbio = r1_bio->bios[i];
1193 for (j = vcnt; j-- ; )
1194 if (memcmp(page_address(pbio->bi_io_vec[j].bv_page),
1195 page_address(sbio->bi_io_vec[j].bv_page),
1196 PAGE_SIZE))
1197 break;
1198 if (j >= 0)
1199 mddev->resync_mismatches += r1_bio->sectors;
1200 if (j < 0 || test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
1201 sbio->bi_end_io = NULL;
1202 else {
1203 /* fixup the bio for reuse */
1204 sbio->bi_vcnt = vcnt;
1205 sbio->bi_size = r1_bio->sectors << 9;
1206 sbio->bi_idx = 0;
1207 sbio->bi_phys_segments = 0;
1208 sbio->bi_hw_segments = 0;
1209 sbio->bi_hw_front_size = 0;
1210 sbio->bi_hw_back_size = 0;
1211 sbio->bi_flags &= ~(BIO_POOL_MASK - 1);
1212 sbio->bi_flags |= 1 << BIO_UPTODATE;
1213 sbio->bi_next = NULL;
1214 sbio->bi_sector = r1_bio->sector +
1215 conf->mirrors[i].rdev->data_offset;
1216 sbio->bi_bdev = conf->mirrors[i].rdev->bdev;
1217 }
1218 }
1219 }
1140 if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) { 1220 if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) {
1141 /* ouch - failed to read all of that. 1221 /* ouch - failed to read all of that.
1142 * Try some synchronous reads of other devices to get 1222 * Try some synchronous reads of other devices to get
@@ -1216,6 +1296,10 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
1216 idx ++; 1296 idx ++;
1217 } 1297 }
1218 } 1298 }
1299
1300 /*
1301 * schedule writes
1302 */
1219 atomic_set(&r1_bio->remaining, 1); 1303 atomic_set(&r1_bio->remaining, 1);
1220 for (i = 0; i < disks ; i++) { 1304 for (i = 0; i < disks ; i++) {
1221 wbio = r1_bio->bios[i]; 1305 wbio = r1_bio->bios[i];
@@ -1618,10 +1702,10 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
1618 for (i=0 ; i < conf->raid_disks; i++) { 1702 for (i=0 ; i < conf->raid_disks; i++) {
1619 bio = r1_bio->bios[i]; 1703 bio = r1_bio->bios[i];
1620 if (bio->bi_end_io) { 1704 if (bio->bi_end_io) {
1621 page = r1_bio->bios[0]->bi_io_vec[bio->bi_vcnt].bv_page; 1705 page = bio->bi_io_vec[bio->bi_vcnt].bv_page;
1622 if (bio_add_page(bio, page, len, 0) == 0) { 1706 if (bio_add_page(bio, page, len, 0) == 0) {
1623 /* stop here */ 1707 /* stop here */
1624 r1_bio->bios[0]->bi_io_vec[bio->bi_vcnt].bv_page = page; 1708 bio->bi_io_vec[bio->bi_vcnt].bv_page = page;
1625 while (i > 0) { 1709 while (i > 0) {
1626 i--; 1710 i--;
1627 bio = r1_bio->bios[i]; 1711 bio = r1_bio->bios[i];
@@ -1641,12 +1725,28 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
1641 sync_blocks -= (len>>9); 1725 sync_blocks -= (len>>9);
1642 } while (r1_bio->bios[disk]->bi_vcnt < RESYNC_PAGES); 1726 } while (r1_bio->bios[disk]->bi_vcnt < RESYNC_PAGES);
1643 bio_full: 1727 bio_full:
1644 bio = r1_bio->bios[r1_bio->read_disk];
1645 r1_bio->sectors = nr_sectors; 1728 r1_bio->sectors = nr_sectors;
1646 1729
1647 md_sync_acct(conf->mirrors[r1_bio->read_disk].rdev->bdev, nr_sectors); 1730 /* For a user-requested sync, we read all readable devices and do a
1731 * compare
1732 */
1733 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
1734 atomic_set(&r1_bio->remaining, read_targets);
1735 for (i=0; i<conf->raid_disks; i++) {
1736 bio = r1_bio->bios[i];
1737 if (bio->bi_end_io == end_sync_read) {
1738 md_sync_acct(conf->mirrors[i].rdev->bdev, nr_sectors);
1739 generic_make_request(bio);
1740 }
1741 }
1742 } else {
1743 atomic_set(&r1_bio->remaining, 1);
1744 bio = r1_bio->bios[r1_bio->read_disk];
1745 md_sync_acct(conf->mirrors[r1_bio->read_disk].rdev->bdev,
1746 nr_sectors);
1747 generic_make_request(bio);
1648 1748
1649 generic_make_request(bio); 1749 }
1650 1750
1651 return nr_sectors; 1751 return nr_sectors;
1652} 1752}