diff options
-rw-r--r-- | drivers/md/md.c | 49 | ||||
-rw-r--r-- | drivers/md/md.h | 1 | ||||
-rw-r--r-- | drivers/md/raid1.c | 208 | ||||
-rw-r--r-- | drivers/md/raid1.h | 4 |
4 files changed, 233 insertions, 29 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index 7ae3c5a18001..48217e8aa0eb 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -215,6 +215,55 @@ struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask, | |||
215 | } | 215 | } |
216 | EXPORT_SYMBOL_GPL(bio_clone_mddev); | 216 | EXPORT_SYMBOL_GPL(bio_clone_mddev); |
217 | 217 | ||
218 | void md_trim_bio(struct bio *bio, int offset, int size) | ||
219 | { | ||
220 | /* 'bio' is a cloned bio which we need to trim to match | ||
221 | * the given offset and size. | ||
222 | * This requires adjusting bi_sector, bi_size, and bi_io_vec | ||
223 | */ | ||
224 | int i; | ||
225 | struct bio_vec *bvec; | ||
226 | int sofar = 0; | ||
227 | |||
228 | size <<= 9; | ||
229 | if (offset == 0 && size == bio->bi_size) | ||
230 | return; | ||
231 | |||
232 | bio->bi_sector += offset; | ||
233 | bio->bi_size = size; | ||
234 | offset <<= 9; | ||
235 | clear_bit(BIO_SEG_VALID, &bio->bi_flags); | ||
236 | |||
237 | while (bio->bi_idx < bio->bi_vcnt && | ||
238 | bio->bi_io_vec[bio->bi_idx].bv_len <= offset) { | ||
239 | /* remove this whole bio_vec */ | ||
240 | offset -= bio->bi_io_vec[bio->bi_idx].bv_len; | ||
241 | bio->bi_idx++; | ||
242 | } | ||
243 | if (bio->bi_idx < bio->bi_vcnt) { | ||
244 | bio->bi_io_vec[bio->bi_idx].bv_offset += offset; | ||
245 | bio->bi_io_vec[bio->bi_idx].bv_len -= offset; | ||
246 | } | ||
247 | /* avoid any complications with bi_idx being non-zero*/ | ||
248 | if (bio->bi_idx) { | ||
249 | memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx, | ||
250 | (bio->bi_vcnt - bio->bi_idx) * sizeof(struct bio_vec)); | ||
251 | bio->bi_vcnt -= bio->bi_idx; | ||
252 | bio->bi_idx = 0; | ||
253 | } | ||
254 | /* Make sure vcnt and last bv are not too big */ | ||
255 | bio_for_each_segment(bvec, bio, i) { | ||
256 | if (sofar + bvec->bv_len > size) | ||
257 | bvec->bv_len = size - sofar; | ||
258 | if (bvec->bv_len == 0) { | ||
259 | bio->bi_vcnt = i; | ||
260 | break; | ||
261 | } | ||
262 | sofar += bvec->bv_len; | ||
263 | } | ||
264 | } | ||
265 | EXPORT_SYMBOL_GPL(md_trim_bio); | ||
266 | |||
218 | /* | 267 | /* |
219 | * We have a system wide 'event count' that is incremented | 268 | * We have a system wide 'event count' that is incremented |
220 | * on any 'interesting' event, and readers of /proc/mdstat | 269 | * on any 'interesting' event, and readers of /proc/mdstat |
diff --git a/drivers/md/md.h b/drivers/md/md.h index aea9e9ff8a33..7c3192c0a29a 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h | |||
@@ -575,4 +575,5 @@ extern struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask, | |||
575 | extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs, | 575 | extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs, |
576 | mddev_t *mddev); | 576 | mddev_t *mddev); |
577 | extern int mddev_check_plugged(mddev_t *mddev); | 577 | extern int mddev_check_plugged(mddev_t *mddev); |
578 | extern void md_trim_bio(struct bio *bio, int offset, int size); | ||
578 | #endif /* _MD_MD_H */ | 579 | #endif /* _MD_MD_H */ |
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 8db311d7cddc..cc3939dc9e3d 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -41,11 +41,7 @@ | |||
41 | #include "bitmap.h" | 41 | #include "bitmap.h" |
42 | 42 | ||
43 | #define DEBUG 0 | 43 | #define DEBUG 0 |
44 | #if DEBUG | 44 | #define PRINTK(x...) do { if (DEBUG) printk(x); } while (0) |
45 | #define PRINTK(x...) printk(x) | ||
46 | #else | ||
47 | #define PRINTK(x...) | ||
48 | #endif | ||
49 | 45 | ||
50 | /* | 46 | /* |
51 | * Number of guaranteed r1bios in case of extreme VM load: | 47 | * Number of guaranteed r1bios in case of extreme VM load: |
@@ -177,12 +173,6 @@ static void free_r1bio(r1bio_t *r1_bio) | |||
177 | { | 173 | { |
178 | conf_t *conf = r1_bio->mddev->private; | 174 | conf_t *conf = r1_bio->mddev->private; |
179 | 175 | ||
180 | /* | ||
181 | * Wake up any possible resync thread that waits for the device | ||
182 | * to go idle. | ||
183 | */ | ||
184 | allow_barrier(conf); | ||
185 | |||
186 | put_all_bios(conf, r1_bio); | 176 | put_all_bios(conf, r1_bio); |
187 | mempool_free(r1_bio, conf->r1bio_pool); | 177 | mempool_free(r1_bio, conf->r1bio_pool); |
188 | } | 178 | } |
@@ -223,6 +213,33 @@ static void reschedule_retry(r1bio_t *r1_bio) | |||
223 | * operation and are ready to return a success/failure code to the buffer | 213 | * operation and are ready to return a success/failure code to the buffer |
224 | * cache layer. | 214 | * cache layer. |
225 | */ | 215 | */ |
216 | static void call_bio_endio(r1bio_t *r1_bio) | ||
217 | { | ||
218 | struct bio *bio = r1_bio->master_bio; | ||
219 | int done; | ||
220 | conf_t *conf = r1_bio->mddev->private; | ||
221 | |||
222 | if (bio->bi_phys_segments) { | ||
223 | unsigned long flags; | ||
224 | spin_lock_irqsave(&conf->device_lock, flags); | ||
225 | bio->bi_phys_segments--; | ||
226 | done = (bio->bi_phys_segments == 0); | ||
227 | spin_unlock_irqrestore(&conf->device_lock, flags); | ||
228 | } else | ||
229 | done = 1; | ||
230 | |||
231 | if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) | ||
232 | clear_bit(BIO_UPTODATE, &bio->bi_flags); | ||
233 | if (done) { | ||
234 | bio_endio(bio, 0); | ||
235 | /* | ||
236 | * Wake up any possible resync thread that waits for the device | ||
237 | * to go idle. | ||
238 | */ | ||
239 | allow_barrier(conf); | ||
240 | } | ||
241 | } | ||
242 | |||
226 | static void raid_end_bio_io(r1bio_t *r1_bio) | 243 | static void raid_end_bio_io(r1bio_t *r1_bio) |
227 | { | 244 | { |
228 | struct bio *bio = r1_bio->master_bio; | 245 | struct bio *bio = r1_bio->master_bio; |
@@ -235,8 +252,7 @@ static void raid_end_bio_io(r1bio_t *r1_bio) | |||
235 | (unsigned long long) bio->bi_sector + | 252 | (unsigned long long) bio->bi_sector + |
236 | (bio->bi_size >> 9) - 1); | 253 | (bio->bi_size >> 9) - 1); |
237 | 254 | ||
238 | bio_endio(bio, | 255 | call_bio_endio(r1_bio); |
239 | test_bit(R1BIO_Uptodate, &r1_bio->state) ? 0 : -EIO); | ||
240 | } | 256 | } |
241 | free_r1bio(r1_bio); | 257 | free_r1bio(r1_bio); |
242 | } | 258 | } |
@@ -295,6 +311,7 @@ static void raid1_end_read_request(struct bio *bio, int error) | |||
295 | bdevname(conf->mirrors[mirror].rdev->bdev, | 311 | bdevname(conf->mirrors[mirror].rdev->bdev, |
296 | b), | 312 | b), |
297 | (unsigned long long)r1_bio->sector); | 313 | (unsigned long long)r1_bio->sector); |
314 | set_bit(R1BIO_ReadError, &r1_bio->state); | ||
298 | reschedule_retry(r1_bio); | 315 | reschedule_retry(r1_bio); |
299 | } | 316 | } |
300 | 317 | ||
@@ -381,7 +398,7 @@ static void raid1_end_write_request(struct bio *bio, int error) | |||
381 | (unsigned long long) mbio->bi_sector, | 398 | (unsigned long long) mbio->bi_sector, |
382 | (unsigned long long) mbio->bi_sector + | 399 | (unsigned long long) mbio->bi_sector + |
383 | (mbio->bi_size >> 9) - 1); | 400 | (mbio->bi_size >> 9) - 1); |
384 | bio_endio(mbio, 0); | 401 | call_bio_endio(r1_bio); |
385 | } | 402 | } |
386 | } | 403 | } |
387 | } | 404 | } |
@@ -412,10 +429,11 @@ static void raid1_end_write_request(struct bio *bio, int error) | |||
412 | * | 429 | * |
413 | * The rdev for the device selected will have nr_pending incremented. | 430 | * The rdev for the device selected will have nr_pending incremented. |
414 | */ | 431 | */ |
415 | static int read_balance(conf_t *conf, r1bio_t *r1_bio) | 432 | static int read_balance(conf_t *conf, r1bio_t *r1_bio, int *max_sectors) |
416 | { | 433 | { |
417 | const sector_t this_sector = r1_bio->sector; | 434 | const sector_t this_sector = r1_bio->sector; |
418 | const int sectors = r1_bio->sectors; | 435 | int sectors; |
436 | int best_good_sectors; | ||
419 | int start_disk; | 437 | int start_disk; |
420 | int best_disk; | 438 | int best_disk; |
421 | int i; | 439 | int i; |
@@ -430,8 +448,11 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) | |||
430 | * We take the first readable disk when above the resync window. | 448 | * We take the first readable disk when above the resync window. |
431 | */ | 449 | */ |
432 | retry: | 450 | retry: |
451 | sectors = r1_bio->sectors; | ||
433 | best_disk = -1; | 452 | best_disk = -1; |
434 | best_dist = MaxSector; | 453 | best_dist = MaxSector; |
454 | best_good_sectors = 0; | ||
455 | |||
435 | if (conf->mddev->recovery_cp < MaxSector && | 456 | if (conf->mddev->recovery_cp < MaxSector && |
436 | (this_sector + sectors >= conf->next_resync)) { | 457 | (this_sector + sectors >= conf->next_resync)) { |
437 | choose_first = 1; | 458 | choose_first = 1; |
@@ -443,6 +464,9 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) | |||
443 | 464 | ||
444 | for (i = 0 ; i < conf->raid_disks ; i++) { | 465 | for (i = 0 ; i < conf->raid_disks ; i++) { |
445 | sector_t dist; | 466 | sector_t dist; |
467 | sector_t first_bad; | ||
468 | int bad_sectors; | ||
469 | |||
446 | int disk = start_disk + i; | 470 | int disk = start_disk + i; |
447 | if (disk >= conf->raid_disks) | 471 | if (disk >= conf->raid_disks) |
448 | disk -= conf->raid_disks; | 472 | disk -= conf->raid_disks; |
@@ -465,6 +489,35 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) | |||
465 | /* This is a reasonable device to use. It might | 489 | /* This is a reasonable device to use. It might |
466 | * even be best. | 490 | * even be best. |
467 | */ | 491 | */ |
492 | if (is_badblock(rdev, this_sector, sectors, | ||
493 | &first_bad, &bad_sectors)) { | ||
494 | if (best_dist < MaxSector) | ||
495 | /* already have a better device */ | ||
496 | continue; | ||
497 | if (first_bad <= this_sector) { | ||
498 | /* cannot read here. If this is the 'primary' | ||
499 | * device, then we must not read beyond | ||
500 | * bad_sectors from another device.. | ||
501 | */ | ||
502 | bad_sectors -= (this_sector - first_bad); | ||
503 | if (choose_first && sectors > bad_sectors) | ||
504 | sectors = bad_sectors; | ||
505 | if (best_good_sectors > sectors) | ||
506 | best_good_sectors = sectors; | ||
507 | |||
508 | } else { | ||
509 | sector_t good_sectors = first_bad - this_sector; | ||
510 | if (good_sectors > best_good_sectors) { | ||
511 | best_good_sectors = good_sectors; | ||
512 | best_disk = disk; | ||
513 | } | ||
514 | if (choose_first) | ||
515 | break; | ||
516 | } | ||
517 | continue; | ||
518 | } else | ||
519 | best_good_sectors = sectors; | ||
520 | |||
468 | dist = abs(this_sector - conf->mirrors[disk].head_position); | 521 | dist = abs(this_sector - conf->mirrors[disk].head_position); |
469 | if (choose_first | 522 | if (choose_first |
470 | /* Don't change to another disk for sequential reads */ | 523 | /* Don't change to another disk for sequential reads */ |
@@ -493,10 +546,12 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) | |||
493 | rdev_dec_pending(rdev, conf->mddev); | 546 | rdev_dec_pending(rdev, conf->mddev); |
494 | goto retry; | 547 | goto retry; |
495 | } | 548 | } |
549 | sectors = best_good_sectors; | ||
496 | conf->next_seq_sect = this_sector + sectors; | 550 | conf->next_seq_sect = this_sector + sectors; |
497 | conf->last_used = best_disk; | 551 | conf->last_used = best_disk; |
498 | } | 552 | } |
499 | rcu_read_unlock(); | 553 | rcu_read_unlock(); |
554 | *max_sectors = sectors; | ||
500 | 555 | ||
501 | return best_disk; | 556 | return best_disk; |
502 | } | 557 | } |
@@ -763,11 +818,25 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
763 | r1_bio->mddev = mddev; | 818 | r1_bio->mddev = mddev; |
764 | r1_bio->sector = bio->bi_sector; | 819 | r1_bio->sector = bio->bi_sector; |
765 | 820 | ||
821 | /* We might need to issue multiple reads to different | ||
822 | * devices if there are bad blocks around, so we keep | ||
823 | * track of the number of reads in bio->bi_phys_segments. | ||
824 | * If this is 0, there is only one r1_bio and no locking | ||
825 | * will be needed when requests complete. If it is | ||
826 | * non-zero, then it is the number of not-completed requests. | ||
827 | */ | ||
828 | bio->bi_phys_segments = 0; | ||
829 | clear_bit(BIO_SEG_VALID, &bio->bi_flags); | ||
830 | |||
766 | if (rw == READ) { | 831 | if (rw == READ) { |
767 | /* | 832 | /* |
768 | * read balancing logic: | 833 | * read balancing logic: |
769 | */ | 834 | */ |
770 | int rdisk = read_balance(conf, r1_bio); | 835 | int max_sectors; |
836 | int rdisk; | ||
837 | |||
838 | read_again: | ||
839 | rdisk = read_balance(conf, r1_bio, &max_sectors); | ||
771 | 840 | ||
772 | if (rdisk < 0) { | 841 | if (rdisk < 0) { |
773 | /* couldn't find anywhere to read from */ | 842 | /* couldn't find anywhere to read from */ |
@@ -788,6 +857,8 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
788 | r1_bio->read_disk = rdisk; | 857 | r1_bio->read_disk = rdisk; |
789 | 858 | ||
790 | read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev); | 859 | read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev); |
860 | md_trim_bio(read_bio, r1_bio->sector - bio->bi_sector, | ||
861 | max_sectors); | ||
791 | 862 | ||
792 | r1_bio->bios[rdisk] = read_bio; | 863 | r1_bio->bios[rdisk] = read_bio; |
793 | 864 | ||
@@ -797,7 +868,38 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
797 | read_bio->bi_rw = READ | do_sync; | 868 | read_bio->bi_rw = READ | do_sync; |
798 | read_bio->bi_private = r1_bio; | 869 | read_bio->bi_private = r1_bio; |
799 | 870 | ||
800 | generic_make_request(read_bio); | 871 | if (max_sectors < r1_bio->sectors) { |
872 | /* could not read all from this device, so we will | ||
873 | * need another r1_bio. | ||
874 | */ | ||
875 | int sectors_handled; | ||
876 | |||
877 | sectors_handled = (r1_bio->sector + max_sectors | ||
878 | - bio->bi_sector); | ||
879 | r1_bio->sectors = max_sectors; | ||
880 | spin_lock_irq(&conf->device_lock); | ||
881 | if (bio->bi_phys_segments == 0) | ||
882 | bio->bi_phys_segments = 2; | ||
883 | else | ||
884 | bio->bi_phys_segments++; | ||
885 | spin_unlock_irq(&conf->device_lock); | ||
886 | /* Cannot call generic_make_request directly | ||
887 | * as that will be queued in __make_request | ||
888 | * and subsequent mempool_alloc might block waiting | ||
889 | * for it. So hand bio over to raid1d. | ||
890 | */ | ||
891 | reschedule_retry(r1_bio); | ||
892 | |||
893 | r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO); | ||
894 | |||
895 | r1_bio->master_bio = bio; | ||
896 | r1_bio->sectors = (bio->bi_size >> 9) - sectors_handled; | ||
897 | r1_bio->state = 0; | ||
898 | r1_bio->mddev = mddev; | ||
899 | r1_bio->sector = bio->bi_sector + sectors_handled; | ||
900 | goto read_again; | ||
901 | } else | ||
902 | generic_make_request(read_bio); | ||
801 | return 0; | 903 | return 0; |
802 | } | 904 | } |
803 | 905 | ||
@@ -849,8 +951,6 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
849 | goto retry_write; | 951 | goto retry_write; |
850 | } | 952 | } |
851 | 953 | ||
852 | BUG_ON(targets == 0); /* we never fail the last device */ | ||
853 | |||
854 | if (targets < conf->raid_disks) { | 954 | if (targets < conf->raid_disks) { |
855 | /* array is degraded, we will not clear the bitmap | 955 | /* array is degraded, we will not clear the bitmap |
856 | * on I/O completion (see raid1_end_write_request) */ | 956 | * on I/O completion (see raid1_end_write_request) */ |
@@ -1425,7 +1525,7 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) | |||
1425 | * | 1525 | * |
1426 | * 1. Retries failed read operations on working mirrors. | 1526 | * 1. Retries failed read operations on working mirrors. |
1427 | * 2. Updates the raid superblock when problems encounter. | 1527 | * 2. Updates the raid superblock when problems encounter. |
1428 | * 3. Performs writes following reads for array syncronising. | 1528 | * 3. Performs writes following reads for array synchronising. |
1429 | */ | 1529 | */ |
1430 | 1530 | ||
1431 | static void fix_read_error(conf_t *conf, int read_disk, | 1531 | static void fix_read_error(conf_t *conf, int read_disk, |
@@ -1448,9 +1548,14 @@ static void fix_read_error(conf_t *conf, int read_disk, | |||
1448 | * which is the thread that might remove | 1548 | * which is the thread that might remove |
1449 | * a device. If raid1d ever becomes multi-threaded.... | 1549 | * a device. If raid1d ever becomes multi-threaded.... |
1450 | */ | 1550 | */ |
1551 | sector_t first_bad; | ||
1552 | int bad_sectors; | ||
1553 | |||
1451 | rdev = conf->mirrors[d].rdev; | 1554 | rdev = conf->mirrors[d].rdev; |
1452 | if (rdev && | 1555 | if (rdev && |
1453 | test_bit(In_sync, &rdev->flags) && | 1556 | test_bit(In_sync, &rdev->flags) && |
1557 | is_badblock(rdev, sect, s, | ||
1558 | &first_bad, &bad_sectors) == 0 && | ||
1454 | sync_page_io(rdev, sect, s<<9, | 1559 | sync_page_io(rdev, sect, s<<9, |
1455 | conf->tmppage, READ, false)) | 1560 | conf->tmppage, READ, false)) |
1456 | success = 1; | 1561 | success = 1; |
@@ -1546,9 +1651,11 @@ static void raid1d(mddev_t *mddev) | |||
1546 | conf = mddev->private; | 1651 | conf = mddev->private; |
1547 | if (test_bit(R1BIO_IsSync, &r1_bio->state)) | 1652 | if (test_bit(R1BIO_IsSync, &r1_bio->state)) |
1548 | sync_request_write(mddev, r1_bio); | 1653 | sync_request_write(mddev, r1_bio); |
1549 | else { | 1654 | else if (test_bit(R1BIO_ReadError, &r1_bio->state)) { |
1550 | int disk; | 1655 | int disk; |
1656 | int max_sectors; | ||
1551 | 1657 | ||
1658 | clear_bit(R1BIO_ReadError, &r1_bio->state); | ||
1552 | /* we got a read error. Maybe the drive is bad. Maybe just | 1659 | /* we got a read error. Maybe the drive is bad. Maybe just |
1553 | * the block and we can fix it. | 1660 | * the block and we can fix it. |
1554 | * We freeze all other IO, and try reading the block from | 1661 | * We freeze all other IO, and try reading the block from |
@@ -1568,21 +1675,28 @@ static void raid1d(mddev_t *mddev) | |||
1568 | conf->mirrors[r1_bio->read_disk].rdev); | 1675 | conf->mirrors[r1_bio->read_disk].rdev); |
1569 | 1676 | ||
1570 | bio = r1_bio->bios[r1_bio->read_disk]; | 1677 | bio = r1_bio->bios[r1_bio->read_disk]; |
1571 | if ((disk=read_balance(conf, r1_bio)) == -1) { | 1678 | bdevname(bio->bi_bdev, b); |
1679 | read_more: | ||
1680 | disk = read_balance(conf, r1_bio, &max_sectors); | ||
1681 | if (disk == -1) { | ||
1572 | printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O" | 1682 | printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O" |
1573 | " read error for block %llu\n", | 1683 | " read error for block %llu\n", |
1574 | mdname(mddev), | 1684 | mdname(mddev), b, |
1575 | bdevname(bio->bi_bdev,b), | ||
1576 | (unsigned long long)r1_bio->sector); | 1685 | (unsigned long long)r1_bio->sector); |
1577 | raid_end_bio_io(r1_bio); | 1686 | raid_end_bio_io(r1_bio); |
1578 | } else { | 1687 | } else { |
1579 | const unsigned long do_sync = r1_bio->master_bio->bi_rw & REQ_SYNC; | 1688 | const unsigned long do_sync = r1_bio->master_bio->bi_rw & REQ_SYNC; |
1580 | r1_bio->bios[r1_bio->read_disk] = | 1689 | if (bio) { |
1581 | mddev->ro ? IO_BLOCKED : NULL; | 1690 | r1_bio->bios[r1_bio->read_disk] = |
1691 | mddev->ro ? IO_BLOCKED : NULL; | ||
1692 | bio_put(bio); | ||
1693 | } | ||
1582 | r1_bio->read_disk = disk; | 1694 | r1_bio->read_disk = disk; |
1583 | bio_put(bio); | ||
1584 | bio = bio_clone_mddev(r1_bio->master_bio, | 1695 | bio = bio_clone_mddev(r1_bio->master_bio, |
1585 | GFP_NOIO, mddev); | 1696 | GFP_NOIO, mddev); |
1697 | md_trim_bio(bio, | ||
1698 | r1_bio->sector - bio->bi_sector, | ||
1699 | max_sectors); | ||
1586 | r1_bio->bios[r1_bio->read_disk] = bio; | 1700 | r1_bio->bios[r1_bio->read_disk] = bio; |
1587 | rdev = conf->mirrors[disk].rdev; | 1701 | rdev = conf->mirrors[disk].rdev; |
1588 | printk_ratelimited( | 1702 | printk_ratelimited( |
@@ -1597,8 +1711,44 @@ static void raid1d(mddev_t *mddev) | |||
1597 | bio->bi_end_io = raid1_end_read_request; | 1711 | bio->bi_end_io = raid1_end_read_request; |
1598 | bio->bi_rw = READ | do_sync; | 1712 | bio->bi_rw = READ | do_sync; |
1599 | bio->bi_private = r1_bio; | 1713 | bio->bi_private = r1_bio; |
1600 | generic_make_request(bio); | 1714 | if (max_sectors < r1_bio->sectors) { |
1715 | /* Drat - have to split this up more */ | ||
1716 | struct bio *mbio = r1_bio->master_bio; | ||
1717 | int sectors_handled = | ||
1718 | r1_bio->sector + max_sectors | ||
1719 | - mbio->bi_sector; | ||
1720 | r1_bio->sectors = max_sectors; | ||
1721 | spin_lock_irq(&conf->device_lock); | ||
1722 | if (mbio->bi_phys_segments == 0) | ||
1723 | mbio->bi_phys_segments = 2; | ||
1724 | else | ||
1725 | mbio->bi_phys_segments++; | ||
1726 | spin_unlock_irq(&conf->device_lock); | ||
1727 | generic_make_request(bio); | ||
1728 | bio = NULL; | ||
1729 | |||
1730 | r1_bio = mempool_alloc(conf->r1bio_pool, | ||
1731 | GFP_NOIO); | ||
1732 | |||
1733 | r1_bio->master_bio = mbio; | ||
1734 | r1_bio->sectors = (mbio->bi_size >> 9) | ||
1735 | - sectors_handled; | ||
1736 | r1_bio->state = 0; | ||
1737 | set_bit(R1BIO_ReadError, | ||
1738 | &r1_bio->state); | ||
1739 | r1_bio->mddev = mddev; | ||
1740 | r1_bio->sector = mbio->bi_sector | ||
1741 | + sectors_handled; | ||
1742 | |||
1743 | goto read_more; | ||
1744 | } else | ||
1745 | generic_make_request(bio); | ||
1601 | } | 1746 | } |
1747 | } else { | ||
1748 | /* just a partial read to be scheduled from separate | ||
1749 | * context | ||
1750 | */ | ||
1751 | generic_make_request(r1_bio->bios[r1_bio->read_disk]); | ||
1602 | } | 1752 | } |
1603 | cond_resched(); | 1753 | cond_resched(); |
1604 | } | 1754 | } |
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h index 3cd18cfda2ad..aa6af37ca01b 100644 --- a/drivers/md/raid1.h +++ b/drivers/md/raid1.h | |||
@@ -123,6 +123,10 @@ struct r1bio_s { | |||
123 | #define R1BIO_IsSync 1 | 123 | #define R1BIO_IsSync 1 |
124 | #define R1BIO_Degraded 2 | 124 | #define R1BIO_Degraded 2 |
125 | #define R1BIO_BehindIO 3 | 125 | #define R1BIO_BehindIO 3 |
126 | /* Set ReadError on bios that experience a readerror so that | ||
127 | * raid1d knows what to do with them. | ||
128 | */ | ||
129 | #define R1BIO_ReadError 4 | ||
126 | /* For write-behind requests, we call bi_end_io when | 130 | /* For write-behind requests, we call bi_end_io when |
127 | * the last non-write-behind device completes, providing | 131 | * the last non-write-behind device completes, providing |
128 | * any write was successful. Otherwise we call when | 132 | * any write was successful. Otherwise we call when |