aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2011-05-11 00:40:44 -0400
committerNeilBrown <neilb@suse.de>2011-05-11 00:40:44 -0400
commita68e58703575b9c03d610a818e0241564fc17f68 (patch)
treef60a433b20a4e297d010a23775e78493687c160b
parent6f8d0c77cef5849433dd7beb0bd97e573cc4a6a3 (diff)
md/raid1: split out two sub-functions from sync_request_write
sync_request_write is too big and too deep. So split out two self-contains bits of functionality into separate function. Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r--drivers/md/raid1.c365
1 files changed, 192 insertions, 173 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index f0b0c79b3899..7fd7a4d6f5c7 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1181,194 +1181,213 @@ static void end_sync_write(struct bio *bio, int error)
1181 } 1181 }
1182} 1182}
1183 1183
1184static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) 1184static int fix_sync_read_error(r1bio_t *r1_bio)
1185{ 1185{
1186 /* Try some synchronous reads of other devices to get
1187 * good data, much like with normal read errors. Only
1188 * read into the pages we already have so we don't
1189 * need to re-issue the read request.
1190 * We don't need to freeze the array, because being in an
1191 * active sync request, there is no normal IO, and
1192 * no overlapping syncs.
1193 */
1194 mddev_t *mddev = r1_bio->mddev;
1186 conf_t *conf = mddev->private; 1195 conf_t *conf = mddev->private;
1187 int i; 1196 struct bio *bio = r1_bio->bios[r1_bio->read_disk];
1188 int disks = conf->raid_disks; 1197 sector_t sect = r1_bio->sector;
1189 struct bio *bio, *wbio; 1198 int sectors = r1_bio->sectors;
1199 int idx = 0;
1190 1200
1191 bio = r1_bio->bios[r1_bio->read_disk]; 1201 while(sectors) {
1192 1202 int s = sectors;
1193 1203 int d = r1_bio->read_disk;
1194 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { 1204 int success = 0;
1195 /* We have read all readable devices. If we haven't 1205 mdk_rdev_t *rdev;
1196 * got the block, then there is no hope left.
1197 * If we have, then we want to do a comparison
1198 * and skip the write if everything is the same.
1199 * If any blocks failed to read, then we need to
1200 * attempt an over-write
1201 */
1202 int primary;
1203 if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) {
1204 for (i=0; i<mddev->raid_disks; i++)
1205 if (r1_bio->bios[i]->bi_end_io == end_sync_read)
1206 md_error(mddev, conf->mirrors[i].rdev);
1207 1206
1208 md_done_sync(mddev, r1_bio->sectors, 1); 1207 if (s > (PAGE_SIZE>>9))
1208 s = PAGE_SIZE >> 9;
1209 do {
1210 if (r1_bio->bios[d]->bi_end_io == end_sync_read) {
1211 /* No rcu protection needed here devices
1212 * can only be removed when no resync is
1213 * active, and resync is currently active
1214 */
1215 rdev = conf->mirrors[d].rdev;
1216 if (sync_page_io(rdev,
1217 sect,
1218 s<<9,
1219 bio->bi_io_vec[idx].bv_page,
1220 READ, false)) {
1221 success = 1;
1222 break;
1223 }
1224 }
1225 d++;
1226 if (d == conf->raid_disks)
1227 d = 0;
1228 } while (!success && d != r1_bio->read_disk);
1229
1230 if (success) {
1231 int start = d;
1232 /* write it back and re-read */
1233 set_bit(R1BIO_Uptodate, &r1_bio->state);
1234 while (d != r1_bio->read_disk) {
1235 if (d == 0)
1236 d = conf->raid_disks;
1237 d--;
1238 if (r1_bio->bios[d]->bi_end_io != end_sync_read)
1239 continue;
1240 rdev = conf->mirrors[d].rdev;
1241 atomic_add(s, &rdev->corrected_errors);
1242 if (sync_page_io(rdev,
1243 sect,
1244 s<<9,
1245 bio->bi_io_vec[idx].bv_page,
1246 WRITE, false) == 0)
1247 md_error(mddev, rdev);
1248 }
1249 d = start;
1250 while (d != r1_bio->read_disk) {
1251 if (d == 0)
1252 d = conf->raid_disks;
1253 d--;
1254 if (r1_bio->bios[d]->bi_end_io != end_sync_read)
1255 continue;
1256 rdev = conf->mirrors[d].rdev;
1257 if (sync_page_io(rdev,
1258 sect,
1259 s<<9,
1260 bio->bi_io_vec[idx].bv_page,
1261 READ, false) == 0)
1262 md_error(mddev, rdev);
1263 }
1264 } else {
1265 char b[BDEVNAME_SIZE];
1266 /* Cannot read from anywhere, array is toast */
1267 md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev);
1268 printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O read error"
1269 " for block %llu\n",
1270 mdname(mddev),
1271 bdevname(bio->bi_bdev, b),
1272 (unsigned long long)r1_bio->sector);
1273 md_done_sync(mddev, r1_bio->sectors, 0);
1209 put_buf(r1_bio); 1274 put_buf(r1_bio);
1210 return; 1275 return 0;
1211 } 1276 }
1212 for (primary=0; primary<mddev->raid_disks; primary++) 1277 sectors -= s;
1213 if (r1_bio->bios[primary]->bi_end_io == end_sync_read && 1278 sect += s;
1214 test_bit(BIO_UPTODATE, &r1_bio->bios[primary]->bi_flags)) { 1279 idx ++;
1215 r1_bio->bios[primary]->bi_end_io = NULL; 1280 }
1216 rdev_dec_pending(conf->mirrors[primary].rdev, mddev); 1281 return 1;
1217 break; 1282}
1218 } 1283
1219 r1_bio->read_disk = primary; 1284static int process_checks(r1bio_t *r1_bio)
1285{
1286 /* We have read all readable devices. If we haven't
1287 * got the block, then there is no hope left.
1288 * If we have, then we want to do a comparison
1289 * and skip the write if everything is the same.
1290 * If any blocks failed to read, then we need to
1291 * attempt an over-write
1292 */
1293 mddev_t *mddev = r1_bio->mddev;
1294 conf_t *conf = mddev->private;
1295 int primary;
1296 int i;
1297
1298 if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) {
1220 for (i=0; i<mddev->raid_disks; i++) 1299 for (i=0; i<mddev->raid_disks; i++)
1221 if (r1_bio->bios[i]->bi_end_io == end_sync_read) { 1300 if (r1_bio->bios[i]->bi_end_io == end_sync_read)
1222 int j; 1301 md_error(mddev, conf->mirrors[i].rdev);
1223 int vcnt = r1_bio->sectors >> (PAGE_SHIFT- 9);
1224 struct bio *pbio = r1_bio->bios[primary];
1225 struct bio *sbio = r1_bio->bios[i];
1226
1227 if (test_bit(BIO_UPTODATE, &sbio->bi_flags)) {
1228 for (j = vcnt; j-- ; ) {
1229 struct page *p, *s;
1230 p = pbio->bi_io_vec[j].bv_page;
1231 s = sbio->bi_io_vec[j].bv_page;
1232 if (memcmp(page_address(p),
1233 page_address(s),
1234 PAGE_SIZE))
1235 break;
1236 }
1237 } else
1238 j = 0;
1239 if (j >= 0)
1240 mddev->resync_mismatches += r1_bio->sectors;
1241 if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)
1242 && test_bit(BIO_UPTODATE, &sbio->bi_flags))) {
1243 sbio->bi_end_io = NULL;
1244 rdev_dec_pending(conf->mirrors[i].rdev, mddev);
1245 } else {
1246 /* fixup the bio for reuse */
1247 int size;
1248 sbio->bi_vcnt = vcnt;
1249 sbio->bi_size = r1_bio->sectors << 9;
1250 sbio->bi_idx = 0;
1251 sbio->bi_phys_segments = 0;
1252 sbio->bi_flags &= ~(BIO_POOL_MASK - 1);
1253 sbio->bi_flags |= 1 << BIO_UPTODATE;
1254 sbio->bi_next = NULL;
1255 sbio->bi_sector = r1_bio->sector +
1256 conf->mirrors[i].rdev->data_offset;
1257 sbio->bi_bdev = conf->mirrors[i].rdev->bdev;
1258 size = sbio->bi_size;
1259 for (j = 0; j < vcnt ; j++) {
1260 struct bio_vec *bi;
1261 bi = &sbio->bi_io_vec[j];
1262 bi->bv_offset = 0;
1263 if (size > PAGE_SIZE)
1264 bi->bv_len = PAGE_SIZE;
1265 else
1266 bi->bv_len = size;
1267 size -= PAGE_SIZE;
1268 memcpy(page_address(bi->bv_page),
1269 page_address(pbio->bi_io_vec[j].bv_page),
1270 PAGE_SIZE);
1271 }
1272 1302
1273 } 1303 md_done_sync(mddev, r1_bio->sectors, 1);
1274 } 1304 put_buf(r1_bio);
1305 return -1;
1275 } 1306 }
1276 if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) { 1307 for (primary=0; primary<mddev->raid_disks; primary++)
1277 /* ouch - failed to read all of that. 1308 if (r1_bio->bios[primary]->bi_end_io == end_sync_read &&
1278 * Try some synchronous reads of other devices to get 1309 test_bit(BIO_UPTODATE, &r1_bio->bios[primary]->bi_flags)) {
1279 * good data, much like with normal read errors. Only 1310 r1_bio->bios[primary]->bi_end_io = NULL;
1280 * read into the pages we already have so we don't 1311 rdev_dec_pending(conf->mirrors[primary].rdev, mddev);
1281 * need to re-issue the read request. 1312 break;
1282 * We don't need to freeze the array, because being in an 1313 }
1283 * active sync request, there is no normal IO, and 1314 r1_bio->read_disk = primary;
1284 * no overlapping syncs. 1315 for (i=0; i<mddev->raid_disks; i++)
1285 */ 1316 if (r1_bio->bios[i]->bi_end_io == end_sync_read) {
1286 sector_t sect = r1_bio->sector; 1317 int j;
1287 int sectors = r1_bio->sectors; 1318 int vcnt = r1_bio->sectors >> (PAGE_SHIFT- 9);
1288 int idx = 0; 1319 struct bio *pbio = r1_bio->bios[primary];
1289 1320 struct bio *sbio = r1_bio->bios[i];
1290 while(sectors) { 1321
1291 int s = sectors; 1322 if (test_bit(BIO_UPTODATE, &sbio->bi_flags)) {
1292 int d = r1_bio->read_disk; 1323 for (j = vcnt; j-- ; ) {
1293 int success = 0; 1324 struct page *p, *s;
1294 mdk_rdev_t *rdev; 1325 p = pbio->bi_io_vec[j].bv_page;
1295 1326 s = sbio->bi_io_vec[j].bv_page;
1296 if (s > (PAGE_SIZE>>9)) 1327 if (memcmp(page_address(p),
1297 s = PAGE_SIZE >> 9; 1328 page_address(s),
1298 do { 1329 PAGE_SIZE))
1299 if (r1_bio->bios[d]->bi_end_io == end_sync_read) {
1300 /* No rcu protection needed here devices
1301 * can only be removed when no resync is
1302 * active, and resync is currently active
1303 */
1304 rdev = conf->mirrors[d].rdev;
1305 if (sync_page_io(rdev,
1306 sect,
1307 s<<9,
1308 bio->bi_io_vec[idx].bv_page,
1309 READ, false)) {
1310 success = 1;
1311 break; 1330 break;
1312 }
1313 }
1314 d++;
1315 if (d == conf->raid_disks)
1316 d = 0;
1317 } while (!success && d != r1_bio->read_disk);
1318
1319 if (success) {
1320 int start = d;
1321 /* write it back and re-read */
1322 set_bit(R1BIO_Uptodate, &r1_bio->state);
1323 while (d != r1_bio->read_disk) {
1324 if (d == 0)
1325 d = conf->raid_disks;
1326 d--;
1327 if (r1_bio->bios[d]->bi_end_io != end_sync_read)
1328 continue;
1329 rdev = conf->mirrors[d].rdev;
1330 atomic_add(s, &rdev->corrected_errors);
1331 if (sync_page_io(rdev,
1332 sect,
1333 s<<9,
1334 bio->bi_io_vec[idx].bv_page,
1335 WRITE, false) == 0)
1336 md_error(mddev, rdev);
1337 }
1338 d = start;
1339 while (d != r1_bio->read_disk) {
1340 if (d == 0)
1341 d = conf->raid_disks;
1342 d--;
1343 if (r1_bio->bios[d]->bi_end_io != end_sync_read)
1344 continue;
1345 rdev = conf->mirrors[d].rdev;
1346 if (sync_page_io(rdev,
1347 sect,
1348 s<<9,
1349 bio->bi_io_vec[idx].bv_page,
1350 READ, false) == 0)
1351 md_error(mddev, rdev);
1352 } 1331 }
1332 } else
1333 j = 0;
1334 if (j >= 0)
1335 mddev->resync_mismatches += r1_bio->sectors;
1336 if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)
1337 && test_bit(BIO_UPTODATE, &sbio->bi_flags))) {
1338 sbio->bi_end_io = NULL;
1339 rdev_dec_pending(conf->mirrors[i].rdev, mddev);
1353 } else { 1340 } else {
1354 char b[BDEVNAME_SIZE]; 1341 /* fixup the bio for reuse */
1355 /* Cannot read from anywhere, array is toast */ 1342 int size;
1356 md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev); 1343 sbio->bi_vcnt = vcnt;
1357 printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O read error" 1344 sbio->bi_size = r1_bio->sectors << 9;
1358 " for block %llu\n", 1345 sbio->bi_idx = 0;
1359 mdname(mddev), 1346 sbio->bi_phys_segments = 0;
1360 bdevname(bio->bi_bdev, b), 1347 sbio->bi_flags &= ~(BIO_POOL_MASK - 1);
1361 (unsigned long long)r1_bio->sector); 1348 sbio->bi_flags |= 1 << BIO_UPTODATE;
1362 md_done_sync(mddev, r1_bio->sectors, 0); 1349 sbio->bi_next = NULL;
1363 put_buf(r1_bio); 1350 sbio->bi_sector = r1_bio->sector +
1364 return; 1351 conf->mirrors[i].rdev->data_offset;
1352 sbio->bi_bdev = conf->mirrors[i].rdev->bdev;
1353 size = sbio->bi_size;
1354 for (j = 0; j < vcnt ; j++) {
1355 struct bio_vec *bi;
1356 bi = &sbio->bi_io_vec[j];
1357 bi->bv_offset = 0;
1358 if (size > PAGE_SIZE)
1359 bi->bv_len = PAGE_SIZE;
1360 else
1361 bi->bv_len = size;
1362 size -= PAGE_SIZE;
1363 memcpy(page_address(bi->bv_page),
1364 page_address(pbio->bi_io_vec[j].bv_page),
1365 PAGE_SIZE);
1366 }
1367
1365 } 1368 }
1366 sectors -= s;
1367 sect += s;
1368 idx ++;
1369 } 1369 }
1370 } 1370 return 0;
1371}
1372
1373static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
1374{
1375 conf_t *conf = mddev->private;
1376 int i;
1377 int disks = conf->raid_disks;
1378 struct bio *bio, *wbio;
1379
1380 bio = r1_bio->bios[r1_bio->read_disk];
1381
1371 1382
1383 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
1384 if (process_checks(r1_bio) < 0)
1385 return;
1386
1387 if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
1388 /* ouch - failed to read all of that. */
1389 if (!fix_sync_read_error(r1_bio))
1390 return;
1372 /* 1391 /*
1373 * schedule writes 1392 * schedule writes
1374 */ 1393 */