diff options
-rw-r--r-- | drivers/md/raid1.c | 156 |
1 files changed, 128 insertions, 28 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index a8bc93d6ff63..7fbb60883280 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -106,15 +106,30 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) | |||
106 | } | 106 | } |
107 | /* | 107 | /* |
108 | * Allocate RESYNC_PAGES data pages and attach them to | 108 | * Allocate RESYNC_PAGES data pages and attach them to |
109 | * the first bio; | 109 | * the first bio. |
110 | * If this is a user-requested check/repair, allocate | ||
111 | * RESYNC_PAGES for each bio. | ||
110 | */ | 112 | */ |
111 | bio = r1_bio->bios[0]; | 113 | if (test_bit(MD_RECOVERY_REQUESTED, &pi->mddev->recovery)) |
112 | for (i = 0; i < RESYNC_PAGES; i++) { | 114 | j = pi->raid_disks; |
113 | page = alloc_page(gfp_flags); | 115 | else |
114 | if (unlikely(!page)) | 116 | j = 1; |
115 | goto out_free_pages; | 117 | while(j--) { |
116 | 118 | bio = r1_bio->bios[j]; | |
117 | bio->bi_io_vec[i].bv_page = page; | 119 | for (i = 0; i < RESYNC_PAGES; i++) { |
120 | page = alloc_page(gfp_flags); | ||
121 | if (unlikely(!page)) | ||
122 | goto out_free_pages; | ||
123 | |||
124 | bio->bi_io_vec[i].bv_page = page; | ||
125 | } | ||
126 | } | ||
127 | /* If not user-requests, copy the page pointers to all bios */ | ||
128 | if (!test_bit(MD_RECOVERY_REQUESTED, &pi->mddev->recovery)) { | ||
129 | for (i=0; i<RESYNC_PAGES ; i++) | ||
130 | for (j=1; j<pi->raid_disks; j++) | ||
131 | r1_bio->bios[j]->bi_io_vec[i].bv_page = | ||
132 | r1_bio->bios[0]->bi_io_vec[i].bv_page; | ||
118 | } | 133 | } |
119 | 134 | ||
120 | r1_bio->master_bio = NULL; | 135 | r1_bio->master_bio = NULL; |
@@ -122,8 +137,10 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) | |||
122 | return r1_bio; | 137 | return r1_bio; |
123 | 138 | ||
124 | out_free_pages: | 139 | out_free_pages: |
125 | for ( ; i > 0 ; i--) | 140 | for (i=0; i < RESYNC_PAGES ; i++) |
126 | __free_page(bio->bi_io_vec[i-1].bv_page); | 141 | for (j=0 ; j < pi->raid_disks; j++) |
142 | __free_page(r1_bio->bios[j]->bi_io_vec[i].bv_page); | ||
143 | j = -1; | ||
127 | out_free_bio: | 144 | out_free_bio: |
128 | while ( ++j < pi->raid_disks ) | 145 | while ( ++j < pi->raid_disks ) |
129 | bio_put(r1_bio->bios[j]); | 146 | bio_put(r1_bio->bios[j]); |
@@ -134,14 +151,16 @@ out_free_bio: | |||
134 | static void r1buf_pool_free(void *__r1_bio, void *data) | 151 | static void r1buf_pool_free(void *__r1_bio, void *data) |
135 | { | 152 | { |
136 | struct pool_info *pi = data; | 153 | struct pool_info *pi = data; |
137 | int i; | 154 | int i,j; |
138 | r1bio_t *r1bio = __r1_bio; | 155 | r1bio_t *r1bio = __r1_bio; |
139 | struct bio *bio = r1bio->bios[0]; | ||
140 | 156 | ||
141 | for (i = 0; i < RESYNC_PAGES; i++) { | 157 | for (i = 0; i < RESYNC_PAGES; i++) |
142 | __free_page(bio->bi_io_vec[i].bv_page); | 158 | for (j = pi->raid_disks; j-- ;) { |
143 | bio->bi_io_vec[i].bv_page = NULL; | 159 | if (j == 0 || |
144 | } | 160 | r1bio->bios[j]->bi_io_vec[i].bv_page != |
161 | r1bio->bios[0]->bi_io_vec[i].bv_page) | ||
162 | __free_page(r1bio->bios[j]->bi_io_vec[i].bv_page); | ||
163 | } | ||
145 | for (i=0 ; i < pi->raid_disks; i++) | 164 | for (i=0 ; i < pi->raid_disks; i++) |
146 | bio_put(r1bio->bios[i]); | 165 | bio_put(r1bio->bios[i]); |
147 | 166 | ||
@@ -1077,13 +1096,16 @@ abort: | |||
1077 | static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error) | 1096 | static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error) |
1078 | { | 1097 | { |
1079 | r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private); | 1098 | r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private); |
1099 | int i; | ||
1080 | 1100 | ||
1081 | if (bio->bi_size) | 1101 | if (bio->bi_size) |
1082 | return 1; | 1102 | return 1; |
1083 | 1103 | ||
1084 | if (r1_bio->bios[r1_bio->read_disk] != bio) | 1104 | for (i=r1_bio->mddev->raid_disks; i--; ) |
1085 | BUG(); | 1105 | if (r1_bio->bios[i] == bio) |
1086 | update_head_pos(r1_bio->read_disk, r1_bio); | 1106 | break; |
1107 | BUG_ON(i < 0); | ||
1108 | update_head_pos(i, r1_bio); | ||
1087 | /* | 1109 | /* |
1088 | * we have read a block, now it needs to be re-written, | 1110 | * we have read a block, now it needs to be re-written, |
1089 | * or re-read if the read failed. | 1111 | * or re-read if the read failed. |
@@ -1091,7 +1113,9 @@ static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error) | |||
1091 | */ | 1113 | */ |
1092 | if (test_bit(BIO_UPTODATE, &bio->bi_flags)) | 1114 | if (test_bit(BIO_UPTODATE, &bio->bi_flags)) |
1093 | set_bit(R1BIO_Uptodate, &r1_bio->state); | 1115 | set_bit(R1BIO_Uptodate, &r1_bio->state); |
1094 | reschedule_retry(r1_bio); | 1116 | |
1117 | if (atomic_dec_and_test(&r1_bio->remaining)) | ||
1118 | reschedule_retry(r1_bio); | ||
1095 | return 0; | 1119 | return 0; |
1096 | } | 1120 | } |
1097 | 1121 | ||
@@ -1134,9 +1158,65 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) | |||
1134 | bio = r1_bio->bios[r1_bio->read_disk]; | 1158 | bio = r1_bio->bios[r1_bio->read_disk]; |
1135 | 1159 | ||
1136 | 1160 | ||
1137 | /* | 1161 | if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { |
1138 | * schedule writes | 1162 | /* We have read all readable devices. If we haven't |
1139 | */ | 1163 | * got the block, then there is no hope left. |
1164 | * If we have, then we want to do a comparison | ||
1165 | * and skip the write if everything is the same. | ||
1166 | * If any blocks failed to read, then we need to | ||
1167 | * attempt an over-write | ||
1168 | */ | ||
1169 | int primary; | ||
1170 | if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) { | ||
1171 | for (i=0; i<mddev->raid_disks; i++) | ||
1172 | if (r1_bio->bios[i]->bi_end_io == end_sync_read) | ||
1173 | md_error(mddev, conf->mirrors[i].rdev); | ||
1174 | |||
1175 | md_done_sync(mddev, r1_bio->sectors, 1); | ||
1176 | put_buf(r1_bio); | ||
1177 | return; | ||
1178 | } | ||
1179 | for (primary=0; primary<mddev->raid_disks; primary++) | ||
1180 | if (r1_bio->bios[primary]->bi_end_io == end_sync_read && | ||
1181 | test_bit(BIO_UPTODATE, &r1_bio->bios[primary]->bi_flags)) { | ||
1182 | r1_bio->bios[primary]->bi_end_io = NULL; | ||
1183 | break; | ||
1184 | } | ||
1185 | r1_bio->read_disk = primary; | ||
1186 | for (i=0; i<mddev->raid_disks; i++) | ||
1187 | if (r1_bio->bios[i]->bi_end_io == end_sync_read && | ||
1188 | test_bit(BIO_UPTODATE, &r1_bio->bios[i]->bi_flags)) { | ||
1189 | int j; | ||
1190 | int vcnt = r1_bio->sectors >> (PAGE_SHIFT- 9); | ||
1191 | struct bio *pbio = r1_bio->bios[primary]; | ||
1192 | struct bio *sbio = r1_bio->bios[i]; | ||
1193 | for (j = vcnt; j-- ; ) | ||
1194 | if (memcmp(page_address(pbio->bi_io_vec[j].bv_page), | ||
1195 | page_address(sbio->bi_io_vec[j].bv_page), | ||
1196 | PAGE_SIZE)) | ||
1197 | break; | ||
1198 | if (j >= 0) | ||
1199 | mddev->resync_mismatches += r1_bio->sectors; | ||
1200 | if (j < 0 || test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) | ||
1201 | sbio->bi_end_io = NULL; | ||
1202 | else { | ||
1203 | /* fixup the bio for reuse */ | ||
1204 | sbio->bi_vcnt = vcnt; | ||
1205 | sbio->bi_size = r1_bio->sectors << 9; | ||
1206 | sbio->bi_idx = 0; | ||
1207 | sbio->bi_phys_segments = 0; | ||
1208 | sbio->bi_hw_segments = 0; | ||
1209 | sbio->bi_hw_front_size = 0; | ||
1210 | sbio->bi_hw_back_size = 0; | ||
1211 | sbio->bi_flags &= ~(BIO_POOL_MASK - 1); | ||
1212 | sbio->bi_flags |= 1 << BIO_UPTODATE; | ||
1213 | sbio->bi_next = NULL; | ||
1214 | sbio->bi_sector = r1_bio->sector + | ||
1215 | conf->mirrors[i].rdev->data_offset; | ||
1216 | sbio->bi_bdev = conf->mirrors[i].rdev->bdev; | ||
1217 | } | ||
1218 | } | ||
1219 | } | ||
1140 | if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) { | 1220 | if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) { |
1141 | /* ouch - failed to read all of that. | 1221 | /* ouch - failed to read all of that. |
1142 | * Try some synchronous reads of other devices to get | 1222 | * Try some synchronous reads of other devices to get |
@@ -1216,6 +1296,10 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) | |||
1216 | idx ++; | 1296 | idx ++; |
1217 | } | 1297 | } |
1218 | } | 1298 | } |
1299 | |||
1300 | /* | ||
1301 | * schedule writes | ||
1302 | */ | ||
1219 | atomic_set(&r1_bio->remaining, 1); | 1303 | atomic_set(&r1_bio->remaining, 1); |
1220 | for (i = 0; i < disks ; i++) { | 1304 | for (i = 0; i < disks ; i++) { |
1221 | wbio = r1_bio->bios[i]; | 1305 | wbio = r1_bio->bios[i]; |
@@ -1618,10 +1702,10 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1618 | for (i=0 ; i < conf->raid_disks; i++) { | 1702 | for (i=0 ; i < conf->raid_disks; i++) { |
1619 | bio = r1_bio->bios[i]; | 1703 | bio = r1_bio->bios[i]; |
1620 | if (bio->bi_end_io) { | 1704 | if (bio->bi_end_io) { |
1621 | page = r1_bio->bios[0]->bi_io_vec[bio->bi_vcnt].bv_page; | 1705 | page = bio->bi_io_vec[bio->bi_vcnt].bv_page; |
1622 | if (bio_add_page(bio, page, len, 0) == 0) { | 1706 | if (bio_add_page(bio, page, len, 0) == 0) { |
1623 | /* stop here */ | 1707 | /* stop here */ |
1624 | r1_bio->bios[0]->bi_io_vec[bio->bi_vcnt].bv_page = page; | 1708 | bio->bi_io_vec[bio->bi_vcnt].bv_page = page; |
1625 | while (i > 0) { | 1709 | while (i > 0) { |
1626 | i--; | 1710 | i--; |
1627 | bio = r1_bio->bios[i]; | 1711 | bio = r1_bio->bios[i]; |
@@ -1641,12 +1725,28 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1641 | sync_blocks -= (len>>9); | 1725 | sync_blocks -= (len>>9); |
1642 | } while (r1_bio->bios[disk]->bi_vcnt < RESYNC_PAGES); | 1726 | } while (r1_bio->bios[disk]->bi_vcnt < RESYNC_PAGES); |
1643 | bio_full: | 1727 | bio_full: |
1644 | bio = r1_bio->bios[r1_bio->read_disk]; | ||
1645 | r1_bio->sectors = nr_sectors; | 1728 | r1_bio->sectors = nr_sectors; |
1646 | 1729 | ||
1647 | md_sync_acct(conf->mirrors[r1_bio->read_disk].rdev->bdev, nr_sectors); | 1730 | /* For a user-requested sync, we read all readable devices and do a |
1731 | * compare | ||
1732 | */ | ||
1733 | if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { | ||
1734 | atomic_set(&r1_bio->remaining, read_targets); | ||
1735 | for (i=0; i<conf->raid_disks; i++) { | ||
1736 | bio = r1_bio->bios[i]; | ||
1737 | if (bio->bi_end_io == end_sync_read) { | ||
1738 | md_sync_acct(conf->mirrors[i].rdev->bdev, nr_sectors); | ||
1739 | generic_make_request(bio); | ||
1740 | } | ||
1741 | } | ||
1742 | } else { | ||
1743 | atomic_set(&r1_bio->remaining, 1); | ||
1744 | bio = r1_bio->bios[r1_bio->read_disk]; | ||
1745 | md_sync_acct(conf->mirrors[r1_bio->read_disk].rdev->bdev, | ||
1746 | nr_sectors); | ||
1747 | generic_make_request(bio); | ||
1648 | 1748 | ||
1649 | generic_make_request(bio); | 1749 | } |
1650 | 1750 | ||
1651 | return nr_sectors; | 1751 | return nr_sectors; |
1652 | } | 1752 | } |