aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/raid1.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/raid1.c')
-rw-r--r--drivers/md/raid1.c506
1 files changed, 252 insertions, 254 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 2b7a7ff401dc..5d096096f958 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -297,23 +297,24 @@ static void raid1_end_read_request(struct bio *bio, int error)
297 rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev); 297 rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev);
298} 298}
299 299
300static void r1_bio_write_done(r1bio_t *r1_bio, int vcnt, struct bio_vec *bv, 300static void r1_bio_write_done(r1bio_t *r1_bio)
301 int behind)
302{ 301{
303 if (atomic_dec_and_test(&r1_bio->remaining)) 302 if (atomic_dec_and_test(&r1_bio->remaining))
304 { 303 {
305 /* it really is the end of this request */ 304 /* it really is the end of this request */
306 if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { 305 if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
307 /* free extra copy of the data pages */ 306 /* free extra copy of the data pages */
308 int i = vcnt; 307 int i = r1_bio->behind_page_count;
309 while (i--) 308 while (i--)
310 safe_put_page(bv[i].bv_page); 309 safe_put_page(r1_bio->behind_pages[i]);
310 kfree(r1_bio->behind_pages);
311 r1_bio->behind_pages = NULL;
311 } 312 }
312 /* clear the bitmap if all writes complete successfully */ 313 /* clear the bitmap if all writes complete successfully */
313 bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector, 314 bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector,
314 r1_bio->sectors, 315 r1_bio->sectors,
315 !test_bit(R1BIO_Degraded, &r1_bio->state), 316 !test_bit(R1BIO_Degraded, &r1_bio->state),
316 behind); 317 test_bit(R1BIO_BehindIO, &r1_bio->state));
317 md_write_end(r1_bio->mddev); 318 md_write_end(r1_bio->mddev);
318 raid_end_bio_io(r1_bio); 319 raid_end_bio_io(r1_bio);
319 } 320 }
@@ -386,7 +387,7 @@ static void raid1_end_write_request(struct bio *bio, int error)
386 * Let's see if all mirrored write operations have finished 387 * Let's see if all mirrored write operations have finished
387 * already. 388 * already.
388 */ 389 */
389 r1_bio_write_done(r1_bio, bio->bi_vcnt, bio->bi_io_vec, behind); 390 r1_bio_write_done(r1_bio);
390 391
391 if (to_put) 392 if (to_put)
392 bio_put(to_put); 393 bio_put(to_put);
@@ -411,10 +412,10 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
411{ 412{
412 const sector_t this_sector = r1_bio->sector; 413 const sector_t this_sector = r1_bio->sector;
413 const int sectors = r1_bio->sectors; 414 const int sectors = r1_bio->sectors;
414 int new_disk = -1;
415 int start_disk; 415 int start_disk;
416 int best_disk;
416 int i; 417 int i;
417 sector_t new_distance, current_distance; 418 sector_t best_dist;
418 mdk_rdev_t *rdev; 419 mdk_rdev_t *rdev;
419 int choose_first; 420 int choose_first;
420 421
@@ -425,6 +426,8 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
425 * We take the first readable disk when above the resync window. 426 * We take the first readable disk when above the resync window.
426 */ 427 */
427 retry: 428 retry:
429 best_disk = -1;
430 best_dist = MaxSector;
428 if (conf->mddev->recovery_cp < MaxSector && 431 if (conf->mddev->recovery_cp < MaxSector &&
429 (this_sector + sectors >= conf->next_resync)) { 432 (this_sector + sectors >= conf->next_resync)) {
430 choose_first = 1; 433 choose_first = 1;
@@ -434,8 +437,8 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
434 start_disk = conf->last_used; 437 start_disk = conf->last_used;
435 } 438 }
436 439
437 /* make sure the disk is operational */
438 for (i = 0 ; i < conf->raid_disks ; i++) { 440 for (i = 0 ; i < conf->raid_disks ; i++) {
441 sector_t dist;
439 int disk = start_disk + i; 442 int disk = start_disk + i;
440 if (disk >= conf->raid_disks) 443 if (disk >= conf->raid_disks)
441 disk -= conf->raid_disks; 444 disk -= conf->raid_disks;
@@ -443,60 +446,43 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
443 rdev = rcu_dereference(conf->mirrors[disk].rdev); 446 rdev = rcu_dereference(conf->mirrors[disk].rdev);
444 if (r1_bio->bios[disk] == IO_BLOCKED 447 if (r1_bio->bios[disk] == IO_BLOCKED
445 || rdev == NULL 448 || rdev == NULL
446 || !test_bit(In_sync, &rdev->flags)) 449 || test_bit(Faulty, &rdev->flags))
447 continue; 450 continue;
448 451 if (!test_bit(In_sync, &rdev->flags) &&
449 new_disk = disk; 452 rdev->recovery_offset < this_sector + sectors)
450 if (!test_bit(WriteMostly, &rdev->flags))
451 break;
452 }
453
454 if (new_disk < 0 || choose_first)
455 goto rb_out;
456
457 /*
458 * Don't change to another disk for sequential reads:
459 */
460 if (conf->next_seq_sect == this_sector)
461 goto rb_out;
462 if (this_sector == conf->mirrors[new_disk].head_position)
463 goto rb_out;
464
465 current_distance = abs(this_sector
466 - conf->mirrors[new_disk].head_position);
467
468 /* look for a better disk - i.e. head is closer */
469 start_disk = new_disk;
470 for (i = 1; i < conf->raid_disks; i++) {
471 int disk = start_disk + 1;
472 if (disk >= conf->raid_disks)
473 disk -= conf->raid_disks;
474
475 rdev = rcu_dereference(conf->mirrors[disk].rdev);
476 if (r1_bio->bios[disk] == IO_BLOCKED
477 || rdev == NULL
478 || !test_bit(In_sync, &rdev->flags)
479 || test_bit(WriteMostly, &rdev->flags))
480 continue; 453 continue;
481 454 if (test_bit(WriteMostly, &rdev->flags)) {
482 if (!atomic_read(&rdev->nr_pending)) { 455 /* Don't balance among write-mostly, just
483 new_disk = disk; 456 * use the first as a last resort */
457 if (best_disk < 0)
458 best_disk = disk;
459 continue;
460 }
461 /* This is a reasonable device to use. It might
462 * even be best.
463 */
464 dist = abs(this_sector - conf->mirrors[disk].head_position);
465 if (choose_first
466 /* Don't change to another disk for sequential reads */
467 || conf->next_seq_sect == this_sector
468 || dist == 0
469 /* If device is idle, use it */
470 || atomic_read(&rdev->nr_pending) == 0) {
471 best_disk = disk;
484 break; 472 break;
485 } 473 }
486 new_distance = abs(this_sector - conf->mirrors[disk].head_position); 474 if (dist < best_dist) {
487 if (new_distance < current_distance) { 475 best_dist = dist;
488 current_distance = new_distance; 476 best_disk = disk;
489 new_disk = disk;
490 } 477 }
491 } 478 }
492 479
493 rb_out: 480 if (best_disk >= 0) {
494 if (new_disk >= 0) { 481 rdev = rcu_dereference(conf->mirrors[best_disk].rdev);
495 rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
496 if (!rdev) 482 if (!rdev)
497 goto retry; 483 goto retry;
498 atomic_inc(&rdev->nr_pending); 484 atomic_inc(&rdev->nr_pending);
499 if (!test_bit(In_sync, &rdev->flags)) { 485 if (test_bit(Faulty, &rdev->flags)) {
500 /* cannot risk returning a device that failed 486 /* cannot risk returning a device that failed
501 * before we inc'ed nr_pending 487 * before we inc'ed nr_pending
502 */ 488 */
@@ -504,11 +490,11 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
504 goto retry; 490 goto retry;
505 } 491 }
506 conf->next_seq_sect = this_sector + sectors; 492 conf->next_seq_sect = this_sector + sectors;
507 conf->last_used = new_disk; 493 conf->last_used = best_disk;
508 } 494 }
509 rcu_read_unlock(); 495 rcu_read_unlock();
510 496
511 return new_disk; 497 return best_disk;
512} 498}
513 499
514static int raid1_congested(void *data, int bits) 500static int raid1_congested(void *data, int bits)
@@ -675,37 +661,36 @@ static void unfreeze_array(conf_t *conf)
675 661
676 662
677/* duplicate the data pages for behind I/O 663/* duplicate the data pages for behind I/O
678 * We return a list of bio_vec rather than just page pointers
679 * as it makes freeing easier
680 */ 664 */
681static struct bio_vec *alloc_behind_pages(struct bio *bio) 665static void alloc_behind_pages(struct bio *bio, r1bio_t *r1_bio)
682{ 666{
683 int i; 667 int i;
684 struct bio_vec *bvec; 668 struct bio_vec *bvec;
685 struct bio_vec *pages = kzalloc(bio->bi_vcnt * sizeof(struct bio_vec), 669 struct page **pages = kzalloc(bio->bi_vcnt * sizeof(struct page*),
686 GFP_NOIO); 670 GFP_NOIO);
687 if (unlikely(!pages)) 671 if (unlikely(!pages))
688 goto do_sync_io; 672 return;
689 673
690 bio_for_each_segment(bvec, bio, i) { 674 bio_for_each_segment(bvec, bio, i) {
691 pages[i].bv_page = alloc_page(GFP_NOIO); 675 pages[i] = alloc_page(GFP_NOIO);
692 if (unlikely(!pages[i].bv_page)) 676 if (unlikely(!pages[i]))
693 goto do_sync_io; 677 goto do_sync_io;
694 memcpy(kmap(pages[i].bv_page) + bvec->bv_offset, 678 memcpy(kmap(pages[i]) + bvec->bv_offset,
695 kmap(bvec->bv_page) + bvec->bv_offset, bvec->bv_len); 679 kmap(bvec->bv_page) + bvec->bv_offset, bvec->bv_len);
696 kunmap(pages[i].bv_page); 680 kunmap(pages[i]);
697 kunmap(bvec->bv_page); 681 kunmap(bvec->bv_page);
698 } 682 }
699 683 r1_bio->behind_pages = pages;
700 return pages; 684 r1_bio->behind_page_count = bio->bi_vcnt;
685 set_bit(R1BIO_BehindIO, &r1_bio->state);
686 return;
701 687
702do_sync_io: 688do_sync_io:
703 if (pages) 689 for (i = 0; i < bio->bi_vcnt; i++)
704 for (i = 0; i < bio->bi_vcnt && pages[i].bv_page; i++) 690 if (pages[i])
705 put_page(pages[i].bv_page); 691 put_page(pages[i]);
706 kfree(pages); 692 kfree(pages);
707 PRINTK("%dB behind alloc failed, doing sync I/O\n", bio->bi_size); 693 PRINTK("%dB behind alloc failed, doing sync I/O\n", bio->bi_size);
708 return NULL;
709} 694}
710 695
711static int make_request(mddev_t *mddev, struct bio * bio) 696static int make_request(mddev_t *mddev, struct bio * bio)
@@ -717,7 +702,6 @@ static int make_request(mddev_t *mddev, struct bio * bio)
717 int i, targets = 0, disks; 702 int i, targets = 0, disks;
718 struct bitmap *bitmap; 703 struct bitmap *bitmap;
719 unsigned long flags; 704 unsigned long flags;
720 struct bio_vec *behind_pages = NULL;
721 const int rw = bio_data_dir(bio); 705 const int rw = bio_data_dir(bio);
722 const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); 706 const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
723 const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA)); 707 const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA));
@@ -870,9 +854,8 @@ static int make_request(mddev_t *mddev, struct bio * bio)
870 if (bitmap && 854 if (bitmap &&
871 (atomic_read(&bitmap->behind_writes) 855 (atomic_read(&bitmap->behind_writes)
872 < mddev->bitmap_info.max_write_behind) && 856 < mddev->bitmap_info.max_write_behind) &&
873 !waitqueue_active(&bitmap->behind_wait) && 857 !waitqueue_active(&bitmap->behind_wait))
874 (behind_pages = alloc_behind_pages(bio)) != NULL) 858 alloc_behind_pages(bio, r1_bio);
875 set_bit(R1BIO_BehindIO, &r1_bio->state);
876 859
877 atomic_set(&r1_bio->remaining, 1); 860 atomic_set(&r1_bio->remaining, 1);
878 atomic_set(&r1_bio->behind_remaining, 0); 861 atomic_set(&r1_bio->behind_remaining, 0);
@@ -893,7 +876,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
893 mbio->bi_rw = WRITE | do_flush_fua | do_sync; 876 mbio->bi_rw = WRITE | do_flush_fua | do_sync;
894 mbio->bi_private = r1_bio; 877 mbio->bi_private = r1_bio;
895 878
896 if (behind_pages) { 879 if (r1_bio->behind_pages) {
897 struct bio_vec *bvec; 880 struct bio_vec *bvec;
898 int j; 881 int j;
899 882
@@ -905,7 +888,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
905 * them all 888 * them all
906 */ 889 */
907 __bio_for_each_segment(bvec, mbio, j, 0) 890 __bio_for_each_segment(bvec, mbio, j, 0)
908 bvec->bv_page = behind_pages[j].bv_page; 891 bvec->bv_page = r1_bio->behind_pages[j];
909 if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags)) 892 if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags))
910 atomic_inc(&r1_bio->behind_remaining); 893 atomic_inc(&r1_bio->behind_remaining);
911 } 894 }
@@ -915,8 +898,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
915 bio_list_add(&conf->pending_bio_list, mbio); 898 bio_list_add(&conf->pending_bio_list, mbio);
916 spin_unlock_irqrestore(&conf->device_lock, flags); 899 spin_unlock_irqrestore(&conf->device_lock, flags);
917 } 900 }
918 r1_bio_write_done(r1_bio, bio->bi_vcnt, behind_pages, behind_pages != NULL); 901 r1_bio_write_done(r1_bio);
919 kfree(behind_pages); /* the behind pages are attached to the bios now */
920 902
921 /* In case raid1d snuck in to freeze_array */ 903 /* In case raid1d snuck in to freeze_array */
922 wake_up(&conf->wait_barrier); 904 wake_up(&conf->wait_barrier);
@@ -1196,194 +1178,210 @@ static void end_sync_write(struct bio *bio, int error)
1196 } 1178 }
1197} 1179}
1198 1180
1199static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) 1181static int fix_sync_read_error(r1bio_t *r1_bio)
1200{ 1182{
1183 /* Try some synchronous reads of other devices to get
1184 * good data, much like with normal read errors. Only
1185 * read into the pages we already have so we don't
1186 * need to re-issue the read request.
1187 * We don't need to freeze the array, because being in an
1188 * active sync request, there is no normal IO, and
1189 * no overlapping syncs.
1190 */
1191 mddev_t *mddev = r1_bio->mddev;
1201 conf_t *conf = mddev->private; 1192 conf_t *conf = mddev->private;
1202 int i; 1193 struct bio *bio = r1_bio->bios[r1_bio->read_disk];
1203 int disks = conf->raid_disks; 1194 sector_t sect = r1_bio->sector;
1204 struct bio *bio, *wbio; 1195 int sectors = r1_bio->sectors;
1205 1196 int idx = 0;
1206 bio = r1_bio->bios[r1_bio->read_disk];
1207 1197
1198 while(sectors) {
1199 int s = sectors;
1200 int d = r1_bio->read_disk;
1201 int success = 0;
1202 mdk_rdev_t *rdev;
1203 int start;
1208 1204
1209 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { 1205 if (s > (PAGE_SIZE>>9))
1210 /* We have read all readable devices. If we haven't 1206 s = PAGE_SIZE >> 9;
1211 * got the block, then there is no hope left. 1207 do {
1212 * If we have, then we want to do a comparison 1208 if (r1_bio->bios[d]->bi_end_io == end_sync_read) {
1213 * and skip the write if everything is the same. 1209 /* No rcu protection needed here devices
1214 * If any blocks failed to read, then we need to 1210 * can only be removed when no resync is
1215 * attempt an over-write 1211 * active, and resync is currently active
1216 */ 1212 */
1217 int primary; 1213 rdev = conf->mirrors[d].rdev;
1218 if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) { 1214 if (sync_page_io(rdev,
1219 for (i=0; i<mddev->raid_disks; i++) 1215 sect,
1220 if (r1_bio->bios[i]->bi_end_io == end_sync_read) 1216 s<<9,
1221 md_error(mddev, conf->mirrors[i].rdev); 1217 bio->bi_io_vec[idx].bv_page,
1218 READ, false)) {
1219 success = 1;
1220 break;
1221 }
1222 }
1223 d++;
1224 if (d == conf->raid_disks)
1225 d = 0;
1226 } while (!success && d != r1_bio->read_disk);
1222 1227
1223 md_done_sync(mddev, r1_bio->sectors, 1); 1228 if (!success) {
1229 char b[BDEVNAME_SIZE];
1230 /* Cannot read from anywhere, array is toast */
1231 md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev);
1232 printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O read error"
1233 " for block %llu\n",
1234 mdname(mddev),
1235 bdevname(bio->bi_bdev, b),
1236 (unsigned long long)r1_bio->sector);
1237 md_done_sync(mddev, r1_bio->sectors, 0);
1224 put_buf(r1_bio); 1238 put_buf(r1_bio);
1225 return; 1239 return 0;
1226 } 1240 }
1227 for (primary=0; primary<mddev->raid_disks; primary++)
1228 if (r1_bio->bios[primary]->bi_end_io == end_sync_read &&
1229 test_bit(BIO_UPTODATE, &r1_bio->bios[primary]->bi_flags)) {
1230 r1_bio->bios[primary]->bi_end_io = NULL;
1231 rdev_dec_pending(conf->mirrors[primary].rdev, mddev);
1232 break;
1233 }
1234 r1_bio->read_disk = primary;
1235 for (i=0; i<mddev->raid_disks; i++)
1236 if (r1_bio->bios[i]->bi_end_io == end_sync_read) {
1237 int j;
1238 int vcnt = r1_bio->sectors >> (PAGE_SHIFT- 9);
1239 struct bio *pbio = r1_bio->bios[primary];
1240 struct bio *sbio = r1_bio->bios[i];
1241
1242 if (test_bit(BIO_UPTODATE, &sbio->bi_flags)) {
1243 for (j = vcnt; j-- ; ) {
1244 struct page *p, *s;
1245 p = pbio->bi_io_vec[j].bv_page;
1246 s = sbio->bi_io_vec[j].bv_page;
1247 if (memcmp(page_address(p),
1248 page_address(s),
1249 PAGE_SIZE))
1250 break;
1251 }
1252 } else
1253 j = 0;
1254 if (j >= 0)
1255 mddev->resync_mismatches += r1_bio->sectors;
1256 if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)
1257 && test_bit(BIO_UPTODATE, &sbio->bi_flags))) {
1258 sbio->bi_end_io = NULL;
1259 rdev_dec_pending(conf->mirrors[i].rdev, mddev);
1260 } else {
1261 /* fixup the bio for reuse */
1262 int size;
1263 sbio->bi_vcnt = vcnt;
1264 sbio->bi_size = r1_bio->sectors << 9;
1265 sbio->bi_idx = 0;
1266 sbio->bi_phys_segments = 0;
1267 sbio->bi_flags &= ~(BIO_POOL_MASK - 1);
1268 sbio->bi_flags |= 1 << BIO_UPTODATE;
1269 sbio->bi_next = NULL;
1270 sbio->bi_sector = r1_bio->sector +
1271 conf->mirrors[i].rdev->data_offset;
1272 sbio->bi_bdev = conf->mirrors[i].rdev->bdev;
1273 size = sbio->bi_size;
1274 for (j = 0; j < vcnt ; j++) {
1275 struct bio_vec *bi;
1276 bi = &sbio->bi_io_vec[j];
1277 bi->bv_offset = 0;
1278 if (size > PAGE_SIZE)
1279 bi->bv_len = PAGE_SIZE;
1280 else
1281 bi->bv_len = size;
1282 size -= PAGE_SIZE;
1283 memcpy(page_address(bi->bv_page),
1284 page_address(pbio->bi_io_vec[j].bv_page),
1285 PAGE_SIZE);
1286 }
1287 1241
1288 } 1242 start = d;
1289 } 1243 /* write it back and re-read */
1244 while (d != r1_bio->read_disk) {
1245 if (d == 0)
1246 d = conf->raid_disks;
1247 d--;
1248 if (r1_bio->bios[d]->bi_end_io != end_sync_read)
1249 continue;
1250 rdev = conf->mirrors[d].rdev;
1251 if (sync_page_io(rdev,
1252 sect,
1253 s<<9,
1254 bio->bi_io_vec[idx].bv_page,
1255 WRITE, false) == 0) {
1256 r1_bio->bios[d]->bi_end_io = NULL;
1257 rdev_dec_pending(rdev, mddev);
1258 md_error(mddev, rdev);
1259 } else
1260 atomic_add(s, &rdev->corrected_errors);
1261 }
1262 d = start;
1263 while (d != r1_bio->read_disk) {
1264 if (d == 0)
1265 d = conf->raid_disks;
1266 d--;
1267 if (r1_bio->bios[d]->bi_end_io != end_sync_read)
1268 continue;
1269 rdev = conf->mirrors[d].rdev;
1270 if (sync_page_io(rdev,
1271 sect,
1272 s<<9,
1273 bio->bi_io_vec[idx].bv_page,
1274 READ, false) == 0)
1275 md_error(mddev, rdev);
1276 }
1277 sectors -= s;
1278 sect += s;
1279 idx ++;
1290 } 1280 }
1291 if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) { 1281 set_bit(R1BIO_Uptodate, &r1_bio->state);
1292 /* ouch - failed to read all of that. 1282 set_bit(BIO_UPTODATE, &bio->bi_flags);
1293 * Try some synchronous reads of other devices to get 1283 return 1;
1294 * good data, much like with normal read errors. Only 1284}
1295 * read into the pages we already have so we don't 1285
1296 * need to re-issue the read request. 1286static int process_checks(r1bio_t *r1_bio)
1297 * We don't need to freeze the array, because being in an 1287{
1298 * active sync request, there is no normal IO, and 1288 /* We have read all readable devices. If we haven't
1299 * no overlapping syncs. 1289 * got the block, then there is no hope left.
1300 */ 1290 * If we have, then we want to do a comparison
1301 sector_t sect = r1_bio->sector; 1291 * and skip the write if everything is the same.
1302 int sectors = r1_bio->sectors; 1292 * If any blocks failed to read, then we need to
1303 int idx = 0; 1293 * attempt an over-write
1304 1294 */
1305 while(sectors) { 1295 mddev_t *mddev = r1_bio->mddev;
1306 int s = sectors; 1296 conf_t *conf = mddev->private;
1307 int d = r1_bio->read_disk; 1297 int primary;
1308 int success = 0; 1298 int i;
1309 mdk_rdev_t *rdev; 1299
1310 1300 for (primary = 0; primary < conf->raid_disks; primary++)
1311 if (s > (PAGE_SIZE>>9)) 1301 if (r1_bio->bios[primary]->bi_end_io == end_sync_read &&
1312 s = PAGE_SIZE >> 9; 1302 test_bit(BIO_UPTODATE, &r1_bio->bios[primary]->bi_flags)) {
1313 do { 1303 r1_bio->bios[primary]->bi_end_io = NULL;
1314 if (r1_bio->bios[d]->bi_end_io == end_sync_read) { 1304 rdev_dec_pending(conf->mirrors[primary].rdev, mddev);
1315 /* No rcu protection needed here devices 1305 break;
1316 * can only be removed when no resync is 1306 }
1317 * active, and resync is currently active 1307 r1_bio->read_disk = primary;
1318 */ 1308 for (i = 0; i < conf->raid_disks; i++) {
1319 rdev = conf->mirrors[d].rdev; 1309 int j;
1320 if (sync_page_io(rdev, 1310 int vcnt = r1_bio->sectors >> (PAGE_SHIFT- 9);
1321 sect, 1311 struct bio *pbio = r1_bio->bios[primary];
1322 s<<9, 1312 struct bio *sbio = r1_bio->bios[i];
1323 bio->bi_io_vec[idx].bv_page, 1313 int size;
1324 READ, false)) { 1314
1325 success = 1; 1315 if (r1_bio->bios[i]->bi_end_io != end_sync_read)
1326 break; 1316 continue;
1327 } 1317
1328 } 1318 if (test_bit(BIO_UPTODATE, &sbio->bi_flags)) {
1329 d++; 1319 for (j = vcnt; j-- ; ) {
1330 if (d == conf->raid_disks) 1320 struct page *p, *s;
1331 d = 0; 1321 p = pbio->bi_io_vec[j].bv_page;
1332 } while (!success && d != r1_bio->read_disk); 1322 s = sbio->bi_io_vec[j].bv_page;
1333 1323 if (memcmp(page_address(p),
1334 if (success) { 1324 page_address(s),
1335 int start = d; 1325 PAGE_SIZE))
1336 /* write it back and re-read */ 1326 break;
1337 set_bit(R1BIO_Uptodate, &r1_bio->state);
1338 while (d != r1_bio->read_disk) {
1339 if (d == 0)
1340 d = conf->raid_disks;
1341 d--;
1342 if (r1_bio->bios[d]->bi_end_io != end_sync_read)
1343 continue;
1344 rdev = conf->mirrors[d].rdev;
1345 atomic_add(s, &rdev->corrected_errors);
1346 if (sync_page_io(rdev,
1347 sect,
1348 s<<9,
1349 bio->bi_io_vec[idx].bv_page,
1350 WRITE, false) == 0)
1351 md_error(mddev, rdev);
1352 }
1353 d = start;
1354 while (d != r1_bio->read_disk) {
1355 if (d == 0)
1356 d = conf->raid_disks;
1357 d--;
1358 if (r1_bio->bios[d]->bi_end_io != end_sync_read)
1359 continue;
1360 rdev = conf->mirrors[d].rdev;
1361 if (sync_page_io(rdev,
1362 sect,
1363 s<<9,
1364 bio->bi_io_vec[idx].bv_page,
1365 READ, false) == 0)
1366 md_error(mddev, rdev);
1367 }
1368 } else {
1369 char b[BDEVNAME_SIZE];
1370 /* Cannot read from anywhere, array is toast */
1371 md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev);
1372 printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O read error"
1373 " for block %llu\n",
1374 mdname(mddev),
1375 bdevname(bio->bi_bdev, b),
1376 (unsigned long long)r1_bio->sector);
1377 md_done_sync(mddev, r1_bio->sectors, 0);
1378 put_buf(r1_bio);
1379 return;
1380 } 1327 }
1381 sectors -= s; 1328 } else
1382 sect += s; 1329 j = 0;
1383 idx ++; 1330 if (j >= 0)
1331 mddev->resync_mismatches += r1_bio->sectors;
1332 if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)
1333 && test_bit(BIO_UPTODATE, &sbio->bi_flags))) {
1334 /* No need to write to this device. */
1335 sbio->bi_end_io = NULL;
1336 rdev_dec_pending(conf->mirrors[i].rdev, mddev);
1337 continue;
1338 }
1339 /* fixup the bio for reuse */
1340 sbio->bi_vcnt = vcnt;
1341 sbio->bi_size = r1_bio->sectors << 9;
1342 sbio->bi_idx = 0;
1343 sbio->bi_phys_segments = 0;
1344 sbio->bi_flags &= ~(BIO_POOL_MASK - 1);
1345 sbio->bi_flags |= 1 << BIO_UPTODATE;
1346 sbio->bi_next = NULL;
1347 sbio->bi_sector = r1_bio->sector +
1348 conf->mirrors[i].rdev->data_offset;
1349 sbio->bi_bdev = conf->mirrors[i].rdev->bdev;
1350 size = sbio->bi_size;
1351 for (j = 0; j < vcnt ; j++) {
1352 struct bio_vec *bi;
1353 bi = &sbio->bi_io_vec[j];
1354 bi->bv_offset = 0;
1355 if (size > PAGE_SIZE)
1356 bi->bv_len = PAGE_SIZE;
1357 else
1358 bi->bv_len = size;
1359 size -= PAGE_SIZE;
1360 memcpy(page_address(bi->bv_page),
1361 page_address(pbio->bi_io_vec[j].bv_page),
1362 PAGE_SIZE);
1384 } 1363 }
1385 } 1364 }
1365 return 0;
1366}
1386 1367
1368static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
1369{
1370 conf_t *conf = mddev->private;
1371 int i;
1372 int disks = conf->raid_disks;
1373 struct bio *bio, *wbio;
1374
1375 bio = r1_bio->bios[r1_bio->read_disk];
1376
1377 if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
1378 /* ouch - failed to read all of that. */
1379 if (!fix_sync_read_error(r1_bio))
1380 return;
1381
1382 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
1383 if (process_checks(r1_bio) < 0)
1384 return;
1387 /* 1385 /*
1388 * schedule writes 1386 * schedule writes
1389 */ 1387 */
@@ -2063,7 +2061,7 @@ static int raid1_resize(mddev_t *mddev, sector_t sectors)
2063 set_capacity(mddev->gendisk, mddev->array_sectors); 2061 set_capacity(mddev->gendisk, mddev->array_sectors);
2064 revalidate_disk(mddev->gendisk); 2062 revalidate_disk(mddev->gendisk);
2065 if (sectors > mddev->dev_sectors && 2063 if (sectors > mddev->dev_sectors &&
2066 mddev->recovery_cp == MaxSector) { 2064 mddev->recovery_cp > mddev->dev_sectors) {
2067 mddev->recovery_cp = mddev->dev_sectors; 2065 mddev->recovery_cp = mddev->dev_sectors;
2068 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 2066 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
2069 } 2067 }