aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/raid1.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/raid1.c')
-rw-r--r--drivers/md/raid1.c114
1 files changed, 59 insertions, 55 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index e59b10e66edb..a948da8012de 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -263,7 +263,7 @@ static inline void update_head_pos(int disk, r1bio_t *r1_bio)
263static void raid1_end_read_request(struct bio *bio, int error) 263static void raid1_end_read_request(struct bio *bio, int error)
264{ 264{
265 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 265 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
266 r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private); 266 r1bio_t *r1_bio = bio->bi_private;
267 int mirror; 267 int mirror;
268 conf_t *conf = r1_bio->mddev->private; 268 conf_t *conf = r1_bio->mddev->private;
269 269
@@ -297,7 +297,8 @@ static void raid1_end_read_request(struct bio *bio, int error)
297 */ 297 */
298 char b[BDEVNAME_SIZE]; 298 char b[BDEVNAME_SIZE];
299 if (printk_ratelimit()) 299 if (printk_ratelimit())
300 printk(KERN_ERR "raid1: %s: rescheduling sector %llu\n", 300 printk(KERN_ERR "md/raid1:%s: %s: rescheduling sector %llu\n",
301 mdname(conf->mddev),
301 bdevname(conf->mirrors[mirror].rdev->bdev,b), (unsigned long long)r1_bio->sector); 302 bdevname(conf->mirrors[mirror].rdev->bdev,b), (unsigned long long)r1_bio->sector);
302 reschedule_retry(r1_bio); 303 reschedule_retry(r1_bio);
303 } 304 }
@@ -308,7 +309,7 @@ static void raid1_end_read_request(struct bio *bio, int error)
308static void raid1_end_write_request(struct bio *bio, int error) 309static void raid1_end_write_request(struct bio *bio, int error)
309{ 310{
310 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 311 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
311 r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private); 312 r1bio_t *r1_bio = bio->bi_private;
312 int mirror, behind = test_bit(R1BIO_BehindIO, &r1_bio->state); 313 int mirror, behind = test_bit(R1BIO_BehindIO, &r1_bio->state);
313 conf_t *conf = r1_bio->mddev->private; 314 conf_t *conf = r1_bio->mddev->private;
314 struct bio *to_put = NULL; 315 struct bio *to_put = NULL;
@@ -418,7 +419,7 @@ static void raid1_end_write_request(struct bio *bio, int error)
418 */ 419 */
419static int read_balance(conf_t *conf, r1bio_t *r1_bio) 420static int read_balance(conf_t *conf, r1bio_t *r1_bio)
420{ 421{
421 const unsigned long this_sector = r1_bio->sector; 422 const sector_t this_sector = r1_bio->sector;
422 int new_disk = conf->last_used, disk = new_disk; 423 int new_disk = conf->last_used, disk = new_disk;
423 int wonly_disk = -1; 424 int wonly_disk = -1;
424 const int sectors = r1_bio->sectors; 425 const int sectors = r1_bio->sectors;
@@ -434,7 +435,7 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
434 retry: 435 retry:
435 if (conf->mddev->recovery_cp < MaxSector && 436 if (conf->mddev->recovery_cp < MaxSector &&
436 (this_sector + sectors >= conf->next_resync)) { 437 (this_sector + sectors >= conf->next_resync)) {
437 /* Choose the first operation device, for consistancy */ 438 /* Choose the first operational device, for consistancy */
438 new_disk = 0; 439 new_disk = 0;
439 440
440 for (rdev = rcu_dereference(conf->mirrors[new_disk].rdev); 441 for (rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
@@ -774,9 +775,8 @@ do_sync_io:
774 return NULL; 775 return NULL;
775} 776}
776 777
777static int make_request(struct request_queue *q, struct bio * bio) 778static int make_request(mddev_t *mddev, struct bio * bio)
778{ 779{
779 mddev_t *mddev = q->queuedata;
780 conf_t *conf = mddev->private; 780 conf_t *conf = mddev->private;
781 mirror_info_t *mirror; 781 mirror_info_t *mirror;
782 r1bio_t *r1_bio; 782 r1bio_t *r1_bio;
@@ -788,7 +788,6 @@ static int make_request(struct request_queue *q, struct bio * bio)
788 struct page **behind_pages = NULL; 788 struct page **behind_pages = NULL;
789 const int rw = bio_data_dir(bio); 789 const int rw = bio_data_dir(bio);
790 const bool do_sync = bio_rw_flagged(bio, BIO_RW_SYNCIO); 790 const bool do_sync = bio_rw_flagged(bio, BIO_RW_SYNCIO);
791 int cpu;
792 bool do_barriers; 791 bool do_barriers;
793 mdk_rdev_t *blocked_rdev; 792 mdk_rdev_t *blocked_rdev;
794 793
@@ -834,12 +833,6 @@ static int make_request(struct request_queue *q, struct bio * bio)
834 833
835 bitmap = mddev->bitmap; 834 bitmap = mddev->bitmap;
836 835
837 cpu = part_stat_lock();
838 part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
839 part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw],
840 bio_sectors(bio));
841 part_stat_unlock();
842
843 /* 836 /*
844 * make_request() can abort the operation when READA is being 837 * make_request() can abort the operation when READA is being
845 * used and no empty request is available. 838 * used and no empty request is available.
@@ -866,6 +859,15 @@ static int make_request(struct request_queue *q, struct bio * bio)
866 } 859 }
867 mirror = conf->mirrors + rdisk; 860 mirror = conf->mirrors + rdisk;
868 861
862 if (test_bit(WriteMostly, &mirror->rdev->flags) &&
863 bitmap) {
864 /* Reading from a write-mostly device must
865 * take care not to over-take any writes
866 * that are 'behind'
867 */
868 wait_event(bitmap->behind_wait,
869 atomic_read(&bitmap->behind_writes) == 0);
870 }
869 r1_bio->read_disk = rdisk; 871 r1_bio->read_disk = rdisk;
870 872
871 read_bio = bio_clone(bio, GFP_NOIO); 873 read_bio = bio_clone(bio, GFP_NOIO);
@@ -912,9 +914,10 @@ static int make_request(struct request_queue *q, struct bio * bio)
912 if (test_bit(Faulty, &rdev->flags)) { 914 if (test_bit(Faulty, &rdev->flags)) {
913 rdev_dec_pending(rdev, mddev); 915 rdev_dec_pending(rdev, mddev);
914 r1_bio->bios[i] = NULL; 916 r1_bio->bios[i] = NULL;
915 } else 917 } else {
916 r1_bio->bios[i] = bio; 918 r1_bio->bios[i] = bio;
917 targets++; 919 targets++;
920 }
918 } else 921 } else
919 r1_bio->bios[i] = NULL; 922 r1_bio->bios[i] = NULL;
920 } 923 }
@@ -942,10 +945,14 @@ static int make_request(struct request_queue *q, struct bio * bio)
942 set_bit(R1BIO_Degraded, &r1_bio->state); 945 set_bit(R1BIO_Degraded, &r1_bio->state);
943 } 946 }
944 947
945 /* do behind I/O ? */ 948 /* do behind I/O ?
949 * Not if there are too many, or cannot allocate memory,
950 * or a reader on WriteMostly is waiting for behind writes
951 * to flush */
946 if (bitmap && 952 if (bitmap &&
947 (atomic_read(&bitmap->behind_writes) 953 (atomic_read(&bitmap->behind_writes)
948 < mddev->bitmap_info.max_write_behind) && 954 < mddev->bitmap_info.max_write_behind) &&
955 !waitqueue_active(&bitmap->behind_wait) &&
949 (behind_pages = alloc_behind_pages(bio)) != NULL) 956 (behind_pages = alloc_behind_pages(bio)) != NULL)
950 set_bit(R1BIO_BehindIO, &r1_bio->state); 957 set_bit(R1BIO_BehindIO, &r1_bio->state);
951 958
@@ -1070,21 +1077,22 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
1070 } else 1077 } else
1071 set_bit(Faulty, &rdev->flags); 1078 set_bit(Faulty, &rdev->flags);
1072 set_bit(MD_CHANGE_DEVS, &mddev->flags); 1079 set_bit(MD_CHANGE_DEVS, &mddev->flags);
1073 printk(KERN_ALERT "raid1: Disk failure on %s, disabling device.\n" 1080 printk(KERN_ALERT "md/raid1:%s: Disk failure on %s, disabling device.\n"
1074 "raid1: Operation continuing on %d devices.\n", 1081 KERN_ALERT "md/raid1:%s: Operation continuing on %d devices.\n",
1075 bdevname(rdev->bdev,b), conf->raid_disks - mddev->degraded); 1082 mdname(mddev), bdevname(rdev->bdev, b),
1083 mdname(mddev), conf->raid_disks - mddev->degraded);
1076} 1084}
1077 1085
1078static void print_conf(conf_t *conf) 1086static void print_conf(conf_t *conf)
1079{ 1087{
1080 int i; 1088 int i;
1081 1089
1082 printk("RAID1 conf printout:\n"); 1090 printk(KERN_DEBUG "RAID1 conf printout:\n");
1083 if (!conf) { 1091 if (!conf) {
1084 printk("(!conf)\n"); 1092 printk(KERN_DEBUG "(!conf)\n");
1085 return; 1093 return;
1086 } 1094 }
1087 printk(" --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded, 1095 printk(KERN_DEBUG " --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded,
1088 conf->raid_disks); 1096 conf->raid_disks);
1089 1097
1090 rcu_read_lock(); 1098 rcu_read_lock();
@@ -1092,7 +1100,7 @@ static void print_conf(conf_t *conf)
1092 char b[BDEVNAME_SIZE]; 1100 char b[BDEVNAME_SIZE];
1093 mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev); 1101 mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
1094 if (rdev) 1102 if (rdev)
1095 printk(" disk %d, wo:%d, o:%d, dev:%s\n", 1103 printk(KERN_DEBUG " disk %d, wo:%d, o:%d, dev:%s\n",
1096 i, !test_bit(In_sync, &rdev->flags), 1104 i, !test_bit(In_sync, &rdev->flags),
1097 !test_bit(Faulty, &rdev->flags), 1105 !test_bit(Faulty, &rdev->flags),
1098 bdevname(rdev->bdev,b)); 1106 bdevname(rdev->bdev,b));
@@ -1223,7 +1231,7 @@ abort:
1223 1231
1224static void end_sync_read(struct bio *bio, int error) 1232static void end_sync_read(struct bio *bio, int error)
1225{ 1233{
1226 r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private); 1234 r1bio_t *r1_bio = bio->bi_private;
1227 int i; 1235 int i;
1228 1236
1229 for (i=r1_bio->mddev->raid_disks; i--; ) 1237 for (i=r1_bio->mddev->raid_disks; i--; )
@@ -1246,7 +1254,7 @@ static void end_sync_read(struct bio *bio, int error)
1246static void end_sync_write(struct bio *bio, int error) 1254static void end_sync_write(struct bio *bio, int error)
1247{ 1255{
1248 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 1256 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
1249 r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private); 1257 r1bio_t *r1_bio = bio->bi_private;
1250 mddev_t *mddev = r1_bio->mddev; 1258 mddev_t *mddev = r1_bio->mddev;
1251 conf_t *conf = mddev->private; 1259 conf_t *conf = mddev->private;
1252 int i; 1260 int i;
@@ -1453,9 +1461,10 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
1453 char b[BDEVNAME_SIZE]; 1461 char b[BDEVNAME_SIZE];
1454 /* Cannot read from anywhere, array is toast */ 1462 /* Cannot read from anywhere, array is toast */
1455 md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev); 1463 md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev);
1456 printk(KERN_ALERT "raid1: %s: unrecoverable I/O read error" 1464 printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O read error"
1457 " for block %llu\n", 1465 " for block %llu\n",
1458 bdevname(bio->bi_bdev,b), 1466 mdname(mddev),
1467 bdevname(bio->bi_bdev, b),
1459 (unsigned long long)r1_bio->sector); 1468 (unsigned long long)r1_bio->sector);
1460 md_done_sync(mddev, r1_bio->sectors, 0); 1469 md_done_sync(mddev, r1_bio->sectors, 0);
1461 put_buf(r1_bio); 1470 put_buf(r1_bio);
@@ -1577,7 +1586,7 @@ static void fix_read_error(conf_t *conf, int read_disk,
1577 else { 1586 else {
1578 atomic_add(s, &rdev->corrected_errors); 1587 atomic_add(s, &rdev->corrected_errors);
1579 printk(KERN_INFO 1588 printk(KERN_INFO
1580 "raid1:%s: read error corrected " 1589 "md/raid1:%s: read error corrected "
1581 "(%d sectors at %llu on %s)\n", 1590 "(%d sectors at %llu on %s)\n",
1582 mdname(mddev), s, 1591 mdname(mddev), s,
1583 (unsigned long long)(sect + 1592 (unsigned long long)(sect +
@@ -1682,8 +1691,9 @@ static void raid1d(mddev_t *mddev)
1682 1691
1683 bio = r1_bio->bios[r1_bio->read_disk]; 1692 bio = r1_bio->bios[r1_bio->read_disk];
1684 if ((disk=read_balance(conf, r1_bio)) == -1) { 1693 if ((disk=read_balance(conf, r1_bio)) == -1) {
1685 printk(KERN_ALERT "raid1: %s: unrecoverable I/O" 1694 printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O"
1686 " read error for block %llu\n", 1695 " read error for block %llu\n",
1696 mdname(mddev),
1687 bdevname(bio->bi_bdev,b), 1697 bdevname(bio->bi_bdev,b),
1688 (unsigned long long)r1_bio->sector); 1698 (unsigned long long)r1_bio->sector);
1689 raid_end_bio_io(r1_bio); 1699 raid_end_bio_io(r1_bio);
@@ -1697,10 +1707,11 @@ static void raid1d(mddev_t *mddev)
1697 r1_bio->bios[r1_bio->read_disk] = bio; 1707 r1_bio->bios[r1_bio->read_disk] = bio;
1698 rdev = conf->mirrors[disk].rdev; 1708 rdev = conf->mirrors[disk].rdev;
1699 if (printk_ratelimit()) 1709 if (printk_ratelimit())
1700 printk(KERN_ERR "raid1: %s: redirecting sector %llu to" 1710 printk(KERN_ERR "md/raid1:%s: redirecting sector %llu to"
1701 " another mirror\n", 1711 " other mirror: %s\n",
1702 bdevname(rdev->bdev,b), 1712 mdname(mddev),
1703 (unsigned long long)r1_bio->sector); 1713 (unsigned long long)r1_bio->sector,
1714 bdevname(rdev->bdev,b));
1704 bio->bi_sector = r1_bio->sector + rdev->data_offset; 1715 bio->bi_sector = r1_bio->sector + rdev->data_offset;
1705 bio->bi_bdev = rdev->bdev; 1716 bio->bi_bdev = rdev->bdev;
1706 bio->bi_end_io = raid1_end_read_request; 1717 bio->bi_end_io = raid1_end_read_request;
@@ -1755,13 +1766,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
1755 int still_degraded = 0; 1766 int still_degraded = 0;
1756 1767
1757 if (!conf->r1buf_pool) 1768 if (!conf->r1buf_pool)
1758 {
1759/*
1760 printk("sync start - bitmap %p\n", mddev->bitmap);
1761*/
1762 if (init_resync(conf)) 1769 if (init_resync(conf))
1763 return 0; 1770 return 0;
1764 }
1765 1771
1766 max_sector = mddev->dev_sectors; 1772 max_sector = mddev->dev_sectors;
1767 if (sector_nr >= max_sector) { 1773 if (sector_nr >= max_sector) {
@@ -2042,7 +2048,7 @@ static conf_t *setup_conf(mddev_t *mddev)
2042 2048
2043 err = -EIO; 2049 err = -EIO;
2044 if (conf->last_used < 0) { 2050 if (conf->last_used < 0) {
2045 printk(KERN_ERR "raid1: no operational mirrors for %s\n", 2051 printk(KERN_ERR "md/raid1:%s: no operational mirrors\n",
2046 mdname(mddev)); 2052 mdname(mddev));
2047 goto abort; 2053 goto abort;
2048 } 2054 }
@@ -2050,7 +2056,7 @@ static conf_t *setup_conf(mddev_t *mddev)
2050 conf->thread = md_register_thread(raid1d, mddev, NULL); 2056 conf->thread = md_register_thread(raid1d, mddev, NULL);
2051 if (!conf->thread) { 2057 if (!conf->thread) {
2052 printk(KERN_ERR 2058 printk(KERN_ERR
2053 "raid1: couldn't allocate thread for %s\n", 2059 "md/raid1:%s: couldn't allocate thread\n",
2054 mdname(mddev)); 2060 mdname(mddev));
2055 goto abort; 2061 goto abort;
2056 } 2062 }
@@ -2076,12 +2082,12 @@ static int run(mddev_t *mddev)
2076 mdk_rdev_t *rdev; 2082 mdk_rdev_t *rdev;
2077 2083
2078 if (mddev->level != 1) { 2084 if (mddev->level != 1) {
2079 printk("raid1: %s: raid level not set to mirroring (%d)\n", 2085 printk(KERN_ERR "md/raid1:%s: raid level not set to mirroring (%d)\n",
2080 mdname(mddev), mddev->level); 2086 mdname(mddev), mddev->level);
2081 return -EIO; 2087 return -EIO;
2082 } 2088 }
2083 if (mddev->reshape_position != MaxSector) { 2089 if (mddev->reshape_position != MaxSector) {
2084 printk("raid1: %s: reshape_position set but not supported\n", 2090 printk(KERN_ERR "md/raid1:%s: reshape_position set but not supported\n",
2085 mdname(mddev)); 2091 mdname(mddev));
2086 return -EIO; 2092 return -EIO;
2087 } 2093 }
@@ -2124,11 +2130,11 @@ static int run(mddev_t *mddev)
2124 mddev->recovery_cp = MaxSector; 2130 mddev->recovery_cp = MaxSector;
2125 2131
2126 if (mddev->recovery_cp != MaxSector) 2132 if (mddev->recovery_cp != MaxSector)
2127 printk(KERN_NOTICE "raid1: %s is not clean" 2133 printk(KERN_NOTICE "md/raid1:%s: not clean"
2128 " -- starting background reconstruction\n", 2134 " -- starting background reconstruction\n",
2129 mdname(mddev)); 2135 mdname(mddev));
2130 printk(KERN_INFO 2136 printk(KERN_INFO
2131 "raid1: raid set %s active with %d out of %d mirrors\n", 2137 "md/raid1:%s: active with %d out of %d mirrors\n",
2132 mdname(mddev), mddev->raid_disks - mddev->degraded, 2138 mdname(mddev), mddev->raid_disks - mddev->degraded,
2133 mddev->raid_disks); 2139 mddev->raid_disks);
2134 2140
@@ -2152,15 +2158,14 @@ static int stop(mddev_t *mddev)
2152{ 2158{
2153 conf_t *conf = mddev->private; 2159 conf_t *conf = mddev->private;
2154 struct bitmap *bitmap = mddev->bitmap; 2160 struct bitmap *bitmap = mddev->bitmap;
2155 int behind_wait = 0;
2156 2161
2157 /* wait for behind writes to complete */ 2162 /* wait for behind writes to complete */
2158 while (bitmap && atomic_read(&bitmap->behind_writes) > 0) { 2163 if (bitmap && atomic_read(&bitmap->behind_writes) > 0) {
2159 behind_wait++; 2164 printk(KERN_INFO "md/raid1:%s: behind writes in progress - waiting to stop.\n",
2160 printk(KERN_INFO "raid1: behind writes in progress on device %s, waiting to stop (%d)\n", mdname(mddev), behind_wait); 2165 mdname(mddev));
2161 set_current_state(TASK_UNINTERRUPTIBLE);
2162 schedule_timeout(HZ); /* wait a second */
2163 /* need to kick something here to make sure I/O goes? */ 2166 /* need to kick something here to make sure I/O goes? */
2167 wait_event(bitmap->behind_wait,
2168 atomic_read(&bitmap->behind_writes) == 0);
2164 } 2169 }
2165 2170
2166 raise_barrier(conf); 2171 raise_barrier(conf);
@@ -2191,7 +2196,6 @@ static int raid1_resize(mddev_t *mddev, sector_t sectors)
2191 if (mddev->array_sectors > raid1_size(mddev, sectors, 0)) 2196 if (mddev->array_sectors > raid1_size(mddev, sectors, 0))
2192 return -EINVAL; 2197 return -EINVAL;
2193 set_capacity(mddev->gendisk, mddev->array_sectors); 2198 set_capacity(mddev->gendisk, mddev->array_sectors);
2194 mddev->changed = 1;
2195 revalidate_disk(mddev->gendisk); 2199 revalidate_disk(mddev->gendisk);
2196 if (sectors > mddev->dev_sectors && 2200 if (sectors > mddev->dev_sectors &&
2197 mddev->recovery_cp == MaxSector) { 2201 mddev->recovery_cp == MaxSector) {
@@ -2286,9 +2290,9 @@ static int raid1_reshape(mddev_t *mddev)
2286 if (sysfs_create_link(&mddev->kobj, 2290 if (sysfs_create_link(&mddev->kobj,
2287 &rdev->kobj, nm)) 2291 &rdev->kobj, nm))
2288 printk(KERN_WARNING 2292 printk(KERN_WARNING
2289 "md/raid1: cannot register " 2293 "md/raid1:%s: cannot register "
2290 "%s for %s\n", 2294 "%s\n",
2291 nm, mdname(mddev)); 2295 mdname(mddev), nm);
2292 } 2296 }
2293 if (rdev) 2297 if (rdev)
2294 newmirrors[d2++].rdev = rdev; 2298 newmirrors[d2++].rdev = rdev;