diff options
author | NeilBrown <neilb@suse.de> | 2011-05-11 00:40:44 -0400 |
---|---|---|
committer | NeilBrown <neilb@suse.de> | 2011-05-11 00:40:44 -0400 |
commit | a68e58703575b9c03d610a818e0241564fc17f68 (patch) | |
tree | f60a433b20a4e297d010a23775e78493687c160b /drivers | |
parent | 6f8d0c77cef5849433dd7beb0bd97e573cc4a6a3 (diff) |
md/raid1: split out two sub-functions from sync_request_write
sync_request_write is too big and too deep.
So split out two self-contains bits of functionality into separate
function.
Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/md/raid1.c | 365 |
1 files changed, 192 insertions, 173 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index f0b0c79b3899..7fd7a4d6f5c7 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -1181,194 +1181,213 @@ static void end_sync_write(struct bio *bio, int error) | |||
1181 | } | 1181 | } |
1182 | } | 1182 | } |
1183 | 1183 | ||
1184 | static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) | 1184 | static int fix_sync_read_error(r1bio_t *r1_bio) |
1185 | { | 1185 | { |
1186 | /* Try some synchronous reads of other devices to get | ||
1187 | * good data, much like with normal read errors. Only | ||
1188 | * read into the pages we already have so we don't | ||
1189 | * need to re-issue the read request. | ||
1190 | * We don't need to freeze the array, because being in an | ||
1191 | * active sync request, there is no normal IO, and | ||
1192 | * no overlapping syncs. | ||
1193 | */ | ||
1194 | mddev_t *mddev = r1_bio->mddev; | ||
1186 | conf_t *conf = mddev->private; | 1195 | conf_t *conf = mddev->private; |
1187 | int i; | 1196 | struct bio *bio = r1_bio->bios[r1_bio->read_disk]; |
1188 | int disks = conf->raid_disks; | 1197 | sector_t sect = r1_bio->sector; |
1189 | struct bio *bio, *wbio; | 1198 | int sectors = r1_bio->sectors; |
1199 | int idx = 0; | ||
1190 | 1200 | ||
1191 | bio = r1_bio->bios[r1_bio->read_disk]; | 1201 | while(sectors) { |
1192 | 1202 | int s = sectors; | |
1193 | 1203 | int d = r1_bio->read_disk; | |
1194 | if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { | 1204 | int success = 0; |
1195 | /* We have read all readable devices. If we haven't | 1205 | mdk_rdev_t *rdev; |
1196 | * got the block, then there is no hope left. | ||
1197 | * If we have, then we want to do a comparison | ||
1198 | * and skip the write if everything is the same. | ||
1199 | * If any blocks failed to read, then we need to | ||
1200 | * attempt an over-write | ||
1201 | */ | ||
1202 | int primary; | ||
1203 | if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) { | ||
1204 | for (i=0; i<mddev->raid_disks; i++) | ||
1205 | if (r1_bio->bios[i]->bi_end_io == end_sync_read) | ||
1206 | md_error(mddev, conf->mirrors[i].rdev); | ||
1207 | 1206 | ||
1208 | md_done_sync(mddev, r1_bio->sectors, 1); | 1207 | if (s > (PAGE_SIZE>>9)) |
1208 | s = PAGE_SIZE >> 9; | ||
1209 | do { | ||
1210 | if (r1_bio->bios[d]->bi_end_io == end_sync_read) { | ||
1211 | /* No rcu protection needed here devices | ||
1212 | * can only be removed when no resync is | ||
1213 | * active, and resync is currently active | ||
1214 | */ | ||
1215 | rdev = conf->mirrors[d].rdev; | ||
1216 | if (sync_page_io(rdev, | ||
1217 | sect, | ||
1218 | s<<9, | ||
1219 | bio->bi_io_vec[idx].bv_page, | ||
1220 | READ, false)) { | ||
1221 | success = 1; | ||
1222 | break; | ||
1223 | } | ||
1224 | } | ||
1225 | d++; | ||
1226 | if (d == conf->raid_disks) | ||
1227 | d = 0; | ||
1228 | } while (!success && d != r1_bio->read_disk); | ||
1229 | |||
1230 | if (success) { | ||
1231 | int start = d; | ||
1232 | /* write it back and re-read */ | ||
1233 | set_bit(R1BIO_Uptodate, &r1_bio->state); | ||
1234 | while (d != r1_bio->read_disk) { | ||
1235 | if (d == 0) | ||
1236 | d = conf->raid_disks; | ||
1237 | d--; | ||
1238 | if (r1_bio->bios[d]->bi_end_io != end_sync_read) | ||
1239 | continue; | ||
1240 | rdev = conf->mirrors[d].rdev; | ||
1241 | atomic_add(s, &rdev->corrected_errors); | ||
1242 | if (sync_page_io(rdev, | ||
1243 | sect, | ||
1244 | s<<9, | ||
1245 | bio->bi_io_vec[idx].bv_page, | ||
1246 | WRITE, false) == 0) | ||
1247 | md_error(mddev, rdev); | ||
1248 | } | ||
1249 | d = start; | ||
1250 | while (d != r1_bio->read_disk) { | ||
1251 | if (d == 0) | ||
1252 | d = conf->raid_disks; | ||
1253 | d--; | ||
1254 | if (r1_bio->bios[d]->bi_end_io != end_sync_read) | ||
1255 | continue; | ||
1256 | rdev = conf->mirrors[d].rdev; | ||
1257 | if (sync_page_io(rdev, | ||
1258 | sect, | ||
1259 | s<<9, | ||
1260 | bio->bi_io_vec[idx].bv_page, | ||
1261 | READ, false) == 0) | ||
1262 | md_error(mddev, rdev); | ||
1263 | } | ||
1264 | } else { | ||
1265 | char b[BDEVNAME_SIZE]; | ||
1266 | /* Cannot read from anywhere, array is toast */ | ||
1267 | md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev); | ||
1268 | printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O read error" | ||
1269 | " for block %llu\n", | ||
1270 | mdname(mddev), | ||
1271 | bdevname(bio->bi_bdev, b), | ||
1272 | (unsigned long long)r1_bio->sector); | ||
1273 | md_done_sync(mddev, r1_bio->sectors, 0); | ||
1209 | put_buf(r1_bio); | 1274 | put_buf(r1_bio); |
1210 | return; | 1275 | return 0; |
1211 | } | 1276 | } |
1212 | for (primary=0; primary<mddev->raid_disks; primary++) | 1277 | sectors -= s; |
1213 | if (r1_bio->bios[primary]->bi_end_io == end_sync_read && | 1278 | sect += s; |
1214 | test_bit(BIO_UPTODATE, &r1_bio->bios[primary]->bi_flags)) { | 1279 | idx ++; |
1215 | r1_bio->bios[primary]->bi_end_io = NULL; | 1280 | } |
1216 | rdev_dec_pending(conf->mirrors[primary].rdev, mddev); | 1281 | return 1; |
1217 | break; | 1282 | } |
1218 | } | 1283 | |
1219 | r1_bio->read_disk = primary; | 1284 | static int process_checks(r1bio_t *r1_bio) |
1285 | { | ||
1286 | /* We have read all readable devices. If we haven't | ||
1287 | * got the block, then there is no hope left. | ||
1288 | * If we have, then we want to do a comparison | ||
1289 | * and skip the write if everything is the same. | ||
1290 | * If any blocks failed to read, then we need to | ||
1291 | * attempt an over-write | ||
1292 | */ | ||
1293 | mddev_t *mddev = r1_bio->mddev; | ||
1294 | conf_t *conf = mddev->private; | ||
1295 | int primary; | ||
1296 | int i; | ||
1297 | |||
1298 | if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) { | ||
1220 | for (i=0; i<mddev->raid_disks; i++) | 1299 | for (i=0; i<mddev->raid_disks; i++) |
1221 | if (r1_bio->bios[i]->bi_end_io == end_sync_read) { | 1300 | if (r1_bio->bios[i]->bi_end_io == end_sync_read) |
1222 | int j; | 1301 | md_error(mddev, conf->mirrors[i].rdev); |
1223 | int vcnt = r1_bio->sectors >> (PAGE_SHIFT- 9); | ||
1224 | struct bio *pbio = r1_bio->bios[primary]; | ||
1225 | struct bio *sbio = r1_bio->bios[i]; | ||
1226 | |||
1227 | if (test_bit(BIO_UPTODATE, &sbio->bi_flags)) { | ||
1228 | for (j = vcnt; j-- ; ) { | ||
1229 | struct page *p, *s; | ||
1230 | p = pbio->bi_io_vec[j].bv_page; | ||
1231 | s = sbio->bi_io_vec[j].bv_page; | ||
1232 | if (memcmp(page_address(p), | ||
1233 | page_address(s), | ||
1234 | PAGE_SIZE)) | ||
1235 | break; | ||
1236 | } | ||
1237 | } else | ||
1238 | j = 0; | ||
1239 | if (j >= 0) | ||
1240 | mddev->resync_mismatches += r1_bio->sectors; | ||
1241 | if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery) | ||
1242 | && test_bit(BIO_UPTODATE, &sbio->bi_flags))) { | ||
1243 | sbio->bi_end_io = NULL; | ||
1244 | rdev_dec_pending(conf->mirrors[i].rdev, mddev); | ||
1245 | } else { | ||
1246 | /* fixup the bio for reuse */ | ||
1247 | int size; | ||
1248 | sbio->bi_vcnt = vcnt; | ||
1249 | sbio->bi_size = r1_bio->sectors << 9; | ||
1250 | sbio->bi_idx = 0; | ||
1251 | sbio->bi_phys_segments = 0; | ||
1252 | sbio->bi_flags &= ~(BIO_POOL_MASK - 1); | ||
1253 | sbio->bi_flags |= 1 << BIO_UPTODATE; | ||
1254 | sbio->bi_next = NULL; | ||
1255 | sbio->bi_sector = r1_bio->sector + | ||
1256 | conf->mirrors[i].rdev->data_offset; | ||
1257 | sbio->bi_bdev = conf->mirrors[i].rdev->bdev; | ||
1258 | size = sbio->bi_size; | ||
1259 | for (j = 0; j < vcnt ; j++) { | ||
1260 | struct bio_vec *bi; | ||
1261 | bi = &sbio->bi_io_vec[j]; | ||
1262 | bi->bv_offset = 0; | ||
1263 | if (size > PAGE_SIZE) | ||
1264 | bi->bv_len = PAGE_SIZE; | ||
1265 | else | ||
1266 | bi->bv_len = size; | ||
1267 | size -= PAGE_SIZE; | ||
1268 | memcpy(page_address(bi->bv_page), | ||
1269 | page_address(pbio->bi_io_vec[j].bv_page), | ||
1270 | PAGE_SIZE); | ||
1271 | } | ||
1272 | 1302 | ||
1273 | } | 1303 | md_done_sync(mddev, r1_bio->sectors, 1); |
1274 | } | 1304 | put_buf(r1_bio); |
1305 | return -1; | ||
1275 | } | 1306 | } |
1276 | if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) { | 1307 | for (primary=0; primary<mddev->raid_disks; primary++) |
1277 | /* ouch - failed to read all of that. | 1308 | if (r1_bio->bios[primary]->bi_end_io == end_sync_read && |
1278 | * Try some synchronous reads of other devices to get | 1309 | test_bit(BIO_UPTODATE, &r1_bio->bios[primary]->bi_flags)) { |
1279 | * good data, much like with normal read errors. Only | 1310 | r1_bio->bios[primary]->bi_end_io = NULL; |
1280 | * read into the pages we already have so we don't | 1311 | rdev_dec_pending(conf->mirrors[primary].rdev, mddev); |
1281 | * need to re-issue the read request. | 1312 | break; |
1282 | * We don't need to freeze the array, because being in an | 1313 | } |
1283 | * active sync request, there is no normal IO, and | 1314 | r1_bio->read_disk = primary; |
1284 | * no overlapping syncs. | 1315 | for (i=0; i<mddev->raid_disks; i++) |
1285 | */ | 1316 | if (r1_bio->bios[i]->bi_end_io == end_sync_read) { |
1286 | sector_t sect = r1_bio->sector; | 1317 | int j; |
1287 | int sectors = r1_bio->sectors; | 1318 | int vcnt = r1_bio->sectors >> (PAGE_SHIFT- 9); |
1288 | int idx = 0; | 1319 | struct bio *pbio = r1_bio->bios[primary]; |
1289 | 1320 | struct bio *sbio = r1_bio->bios[i]; | |
1290 | while(sectors) { | 1321 | |
1291 | int s = sectors; | 1322 | if (test_bit(BIO_UPTODATE, &sbio->bi_flags)) { |
1292 | int d = r1_bio->read_disk; | 1323 | for (j = vcnt; j-- ; ) { |
1293 | int success = 0; | 1324 | struct page *p, *s; |
1294 | mdk_rdev_t *rdev; | 1325 | p = pbio->bi_io_vec[j].bv_page; |
1295 | 1326 | s = sbio->bi_io_vec[j].bv_page; | |
1296 | if (s > (PAGE_SIZE>>9)) | 1327 | if (memcmp(page_address(p), |
1297 | s = PAGE_SIZE >> 9; | 1328 | page_address(s), |
1298 | do { | 1329 | PAGE_SIZE)) |
1299 | if (r1_bio->bios[d]->bi_end_io == end_sync_read) { | ||
1300 | /* No rcu protection needed here devices | ||
1301 | * can only be removed when no resync is | ||
1302 | * active, and resync is currently active | ||
1303 | */ | ||
1304 | rdev = conf->mirrors[d].rdev; | ||
1305 | if (sync_page_io(rdev, | ||
1306 | sect, | ||
1307 | s<<9, | ||
1308 | bio->bi_io_vec[idx].bv_page, | ||
1309 | READ, false)) { | ||
1310 | success = 1; | ||
1311 | break; | 1330 | break; |
1312 | } | ||
1313 | } | ||
1314 | d++; | ||
1315 | if (d == conf->raid_disks) | ||
1316 | d = 0; | ||
1317 | } while (!success && d != r1_bio->read_disk); | ||
1318 | |||
1319 | if (success) { | ||
1320 | int start = d; | ||
1321 | /* write it back and re-read */ | ||
1322 | set_bit(R1BIO_Uptodate, &r1_bio->state); | ||
1323 | while (d != r1_bio->read_disk) { | ||
1324 | if (d == 0) | ||
1325 | d = conf->raid_disks; | ||
1326 | d--; | ||
1327 | if (r1_bio->bios[d]->bi_end_io != end_sync_read) | ||
1328 | continue; | ||
1329 | rdev = conf->mirrors[d].rdev; | ||
1330 | atomic_add(s, &rdev->corrected_errors); | ||
1331 | if (sync_page_io(rdev, | ||
1332 | sect, | ||
1333 | s<<9, | ||
1334 | bio->bi_io_vec[idx].bv_page, | ||
1335 | WRITE, false) == 0) | ||
1336 | md_error(mddev, rdev); | ||
1337 | } | ||
1338 | d = start; | ||
1339 | while (d != r1_bio->read_disk) { | ||
1340 | if (d == 0) | ||
1341 | d = conf->raid_disks; | ||
1342 | d--; | ||
1343 | if (r1_bio->bios[d]->bi_end_io != end_sync_read) | ||
1344 | continue; | ||
1345 | rdev = conf->mirrors[d].rdev; | ||
1346 | if (sync_page_io(rdev, | ||
1347 | sect, | ||
1348 | s<<9, | ||
1349 | bio->bi_io_vec[idx].bv_page, | ||
1350 | READ, false) == 0) | ||
1351 | md_error(mddev, rdev); | ||
1352 | } | 1331 | } |
1332 | } else | ||
1333 | j = 0; | ||
1334 | if (j >= 0) | ||
1335 | mddev->resync_mismatches += r1_bio->sectors; | ||
1336 | if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery) | ||
1337 | && test_bit(BIO_UPTODATE, &sbio->bi_flags))) { | ||
1338 | sbio->bi_end_io = NULL; | ||
1339 | rdev_dec_pending(conf->mirrors[i].rdev, mddev); | ||
1353 | } else { | 1340 | } else { |
1354 | char b[BDEVNAME_SIZE]; | 1341 | /* fixup the bio for reuse */ |
1355 | /* Cannot read from anywhere, array is toast */ | 1342 | int size; |
1356 | md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev); | 1343 | sbio->bi_vcnt = vcnt; |
1357 | printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O read error" | 1344 | sbio->bi_size = r1_bio->sectors << 9; |
1358 | " for block %llu\n", | 1345 | sbio->bi_idx = 0; |
1359 | mdname(mddev), | 1346 | sbio->bi_phys_segments = 0; |
1360 | bdevname(bio->bi_bdev, b), | 1347 | sbio->bi_flags &= ~(BIO_POOL_MASK - 1); |
1361 | (unsigned long long)r1_bio->sector); | 1348 | sbio->bi_flags |= 1 << BIO_UPTODATE; |
1362 | md_done_sync(mddev, r1_bio->sectors, 0); | 1349 | sbio->bi_next = NULL; |
1363 | put_buf(r1_bio); | 1350 | sbio->bi_sector = r1_bio->sector + |
1364 | return; | 1351 | conf->mirrors[i].rdev->data_offset; |
1352 | sbio->bi_bdev = conf->mirrors[i].rdev->bdev; | ||
1353 | size = sbio->bi_size; | ||
1354 | for (j = 0; j < vcnt ; j++) { | ||
1355 | struct bio_vec *bi; | ||
1356 | bi = &sbio->bi_io_vec[j]; | ||
1357 | bi->bv_offset = 0; | ||
1358 | if (size > PAGE_SIZE) | ||
1359 | bi->bv_len = PAGE_SIZE; | ||
1360 | else | ||
1361 | bi->bv_len = size; | ||
1362 | size -= PAGE_SIZE; | ||
1363 | memcpy(page_address(bi->bv_page), | ||
1364 | page_address(pbio->bi_io_vec[j].bv_page), | ||
1365 | PAGE_SIZE); | ||
1366 | } | ||
1367 | |||
1365 | } | 1368 | } |
1366 | sectors -= s; | ||
1367 | sect += s; | ||
1368 | idx ++; | ||
1369 | } | 1369 | } |
1370 | } | 1370 | return 0; |
1371 | } | ||
1372 | |||
1373 | static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) | ||
1374 | { | ||
1375 | conf_t *conf = mddev->private; | ||
1376 | int i; | ||
1377 | int disks = conf->raid_disks; | ||
1378 | struct bio *bio, *wbio; | ||
1379 | |||
1380 | bio = r1_bio->bios[r1_bio->read_disk]; | ||
1381 | |||
1371 | 1382 | ||
1383 | if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) | ||
1384 | if (process_checks(r1_bio) < 0) | ||
1385 | return; | ||
1386 | |||
1387 | if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) | ||
1388 | /* ouch - failed to read all of that. */ | ||
1389 | if (!fix_sync_read_error(r1_bio)) | ||
1390 | return; | ||
1372 | /* | 1391 | /* |
1373 | * schedule writes | 1392 | * schedule writes |
1374 | */ | 1393 | */ |