aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/raid1.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/raid1.c')
-rw-r--r--drivers/md/raid1.c217
1 files changed, 140 insertions, 77 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index e07ce2e033a9..859bd3ffe435 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -677,6 +677,7 @@ static void raise_barrier(conf_t *conf)
677static void lower_barrier(conf_t *conf) 677static void lower_barrier(conf_t *conf)
678{ 678{
679 unsigned long flags; 679 unsigned long flags;
680 BUG_ON(conf->barrier <= 0);
680 spin_lock_irqsave(&conf->resync_lock, flags); 681 spin_lock_irqsave(&conf->resync_lock, flags);
681 conf->barrier--; 682 conf->barrier--;
682 spin_unlock_irqrestore(&conf->resync_lock, flags); 683 spin_unlock_irqrestore(&conf->resync_lock, flags);
@@ -801,6 +802,25 @@ static int make_request(struct request_queue *q, struct bio * bio)
801 802
802 md_write_start(mddev, bio); /* wait on superblock update early */ 803 md_write_start(mddev, bio); /* wait on superblock update early */
803 804
805 if (bio_data_dir(bio) == WRITE &&
806 bio->bi_sector + bio->bi_size/512 > mddev->suspend_lo &&
807 bio->bi_sector < mddev->suspend_hi) {
808 /* As the suspend_* range is controlled by
809 * userspace, we want an interruptible
810 * wait.
811 */
812 DEFINE_WAIT(w);
813 for (;;) {
814 flush_signals(current);
815 prepare_to_wait(&conf->wait_barrier,
816 &w, TASK_INTERRUPTIBLE);
817 if (bio->bi_sector + bio->bi_size/512 <= mddev->suspend_lo ||
818 bio->bi_sector >= mddev->suspend_hi)
819 break;
820 schedule();
821 }
822 finish_wait(&conf->wait_barrier, &w);
823 }
804 if (unlikely(!mddev->barriers_work && 824 if (unlikely(!mddev->barriers_work &&
805 bio_rw_flagged(bio, BIO_RW_BARRIER))) { 825 bio_rw_flagged(bio, BIO_RW_BARRIER))) {
806 if (rw == WRITE) 826 if (rw == WRITE)
@@ -923,7 +943,8 @@ static int make_request(struct request_queue *q, struct bio * bio)
923 943
924 /* do behind I/O ? */ 944 /* do behind I/O ? */
925 if (bitmap && 945 if (bitmap &&
926 atomic_read(&bitmap->behind_writes) < bitmap->max_write_behind && 946 (atomic_read(&bitmap->behind_writes)
947 < mddev->bitmap_info.max_write_behind) &&
927 (behind_pages = alloc_behind_pages(bio)) != NULL) 948 (behind_pages = alloc_behind_pages(bio)) != NULL)
928 set_bit(R1BIO_BehindIO, &r1_bio->state); 949 set_bit(R1BIO_BehindIO, &r1_bio->state);
929 950
@@ -1941,74 +1962,48 @@ static sector_t raid1_size(mddev_t *mddev, sector_t sectors, int raid_disks)
1941 return mddev->dev_sectors; 1962 return mddev->dev_sectors;
1942} 1963}
1943 1964
1944static int run(mddev_t *mddev) 1965static conf_t *setup_conf(mddev_t *mddev)
1945{ 1966{
1946 conf_t *conf; 1967 conf_t *conf;
1947 int i, j, disk_idx; 1968 int i;
1948 mirror_info_t *disk; 1969 mirror_info_t *disk;
1949 mdk_rdev_t *rdev; 1970 mdk_rdev_t *rdev;
1971 int err = -ENOMEM;
1950 1972
1951 if (mddev->level != 1) {
1952 printk("raid1: %s: raid level not set to mirroring (%d)\n",
1953 mdname(mddev), mddev->level);
1954 goto out;
1955 }
1956 if (mddev->reshape_position != MaxSector) {
1957 printk("raid1: %s: reshape_position set but not supported\n",
1958 mdname(mddev));
1959 goto out;
1960 }
1961 /*
1962 * copy the already verified devices into our private RAID1
1963 * bookkeeping area. [whatever we allocate in run(),
1964 * should be freed in stop()]
1965 */
1966 conf = kzalloc(sizeof(conf_t), GFP_KERNEL); 1973 conf = kzalloc(sizeof(conf_t), GFP_KERNEL);
1967 mddev->private = conf;
1968 if (!conf) 1974 if (!conf)
1969 goto out_no_mem; 1975 goto abort;
1970 1976
1971 conf->mirrors = kzalloc(sizeof(struct mirror_info)*mddev->raid_disks, 1977 conf->mirrors = kzalloc(sizeof(struct mirror_info)*mddev->raid_disks,
1972 GFP_KERNEL); 1978 GFP_KERNEL);
1973 if (!conf->mirrors) 1979 if (!conf->mirrors)
1974 goto out_no_mem; 1980 goto abort;
1975 1981
1976 conf->tmppage = alloc_page(GFP_KERNEL); 1982 conf->tmppage = alloc_page(GFP_KERNEL);
1977 if (!conf->tmppage) 1983 if (!conf->tmppage)
1978 goto out_no_mem; 1984 goto abort;
1979 1985
1980 conf->poolinfo = kmalloc(sizeof(*conf->poolinfo), GFP_KERNEL); 1986 conf->poolinfo = kzalloc(sizeof(*conf->poolinfo), GFP_KERNEL);
1981 if (!conf->poolinfo) 1987 if (!conf->poolinfo)
1982 goto out_no_mem; 1988 goto abort;
1983 conf->poolinfo->mddev = NULL;
1984 conf->poolinfo->raid_disks = mddev->raid_disks; 1989 conf->poolinfo->raid_disks = mddev->raid_disks;
1985 conf->r1bio_pool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc, 1990 conf->r1bio_pool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc,
1986 r1bio_pool_free, 1991 r1bio_pool_free,
1987 conf->poolinfo); 1992 conf->poolinfo);
1988 if (!conf->r1bio_pool) 1993 if (!conf->r1bio_pool)
1989 goto out_no_mem; 1994 goto abort;
1995
1990 conf->poolinfo->mddev = mddev; 1996 conf->poolinfo->mddev = mddev;
1991 1997
1992 spin_lock_init(&conf->device_lock); 1998 spin_lock_init(&conf->device_lock);
1993 mddev->queue->queue_lock = &conf->device_lock;
1994
1995 list_for_each_entry(rdev, &mddev->disks, same_set) { 1999 list_for_each_entry(rdev, &mddev->disks, same_set) {
1996 disk_idx = rdev->raid_disk; 2000 int disk_idx = rdev->raid_disk;
1997 if (disk_idx >= mddev->raid_disks 2001 if (disk_idx >= mddev->raid_disks
1998 || disk_idx < 0) 2002 || disk_idx < 0)
1999 continue; 2003 continue;
2000 disk = conf->mirrors + disk_idx; 2004 disk = conf->mirrors + disk_idx;
2001 2005
2002 disk->rdev = rdev; 2006 disk->rdev = rdev;
2003 disk_stack_limits(mddev->gendisk, rdev->bdev,
2004 rdev->data_offset << 9);
2005 /* as we don't honour merge_bvec_fn, we must never risk
2006 * violating it, so limit ->max_sector to one PAGE, as
2007 * a one page request is never in violation.
2008 */
2009 if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
2010 queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
2011 blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
2012 2007
2013 disk->head_position = 0; 2008 disk->head_position = 0;
2014 } 2009 }
@@ -2022,8 +2017,7 @@ static int run(mddev_t *mddev)
2022 bio_list_init(&conf->pending_bio_list); 2017 bio_list_init(&conf->pending_bio_list);
2023 bio_list_init(&conf->flushing_bio_list); 2018 bio_list_init(&conf->flushing_bio_list);
2024 2019
2025 2020 conf->last_used = -1;
2026 mddev->degraded = 0;
2027 for (i = 0; i < conf->raid_disks; i++) { 2021 for (i = 0; i < conf->raid_disks; i++) {
2028 2022
2029 disk = conf->mirrors + i; 2023 disk = conf->mirrors + i;
@@ -2031,38 +2025,97 @@ static int run(mddev_t *mddev)
2031 if (!disk->rdev || 2025 if (!disk->rdev ||
2032 !test_bit(In_sync, &disk->rdev->flags)) { 2026 !test_bit(In_sync, &disk->rdev->flags)) {
2033 disk->head_position = 0; 2027 disk->head_position = 0;
2034 mddev->degraded++;
2035 if (disk->rdev) 2028 if (disk->rdev)
2036 conf->fullsync = 1; 2029 conf->fullsync = 1;
2037 } 2030 } else if (conf->last_used < 0)
2031 /*
2032 * The first working device is used as a
2033 * starting point to read balancing.
2034 */
2035 conf->last_used = i;
2038 } 2036 }
2039 if (mddev->degraded == conf->raid_disks) { 2037
2038 err = -EIO;
2039 if (conf->last_used < 0) {
2040 printk(KERN_ERR "raid1: no operational mirrors for %s\n", 2040 printk(KERN_ERR "raid1: no operational mirrors for %s\n",
2041 mdname(mddev)); 2041 mdname(mddev));
2042 goto out_free_conf; 2042 goto abort;
2043 } 2043 }
2044 if (conf->raid_disks - mddev->degraded == 1) 2044 err = -ENOMEM;
2045 mddev->recovery_cp = MaxSector; 2045 conf->thread = md_register_thread(raid1d, mddev, NULL);
2046 if (!conf->thread) {
2047 printk(KERN_ERR
2048 "raid1: couldn't allocate thread for %s\n",
2049 mdname(mddev));
2050 goto abort;
2051 }
2052
2053 return conf;
2054
2055 abort:
2056 if (conf) {
2057 if (conf->r1bio_pool)
2058 mempool_destroy(conf->r1bio_pool);
2059 kfree(conf->mirrors);
2060 safe_put_page(conf->tmppage);
2061 kfree(conf->poolinfo);
2062 kfree(conf);
2063 }
2064 return ERR_PTR(err);
2065}
2046 2066
2067static int run(mddev_t *mddev)
2068{
2069 conf_t *conf;
2070 int i;
2071 mdk_rdev_t *rdev;
2072
2073 if (mddev->level != 1) {
2074 printk("raid1: %s: raid level not set to mirroring (%d)\n",
2075 mdname(mddev), mddev->level);
2076 return -EIO;
2077 }
2078 if (mddev->reshape_position != MaxSector) {
2079 printk("raid1: %s: reshape_position set but not supported\n",
2080 mdname(mddev));
2081 return -EIO;
2082 }
2047 /* 2083 /*
2048 * find the first working one and use it as a starting point 2084 * copy the already verified devices into our private RAID1
2049 * to read balancing. 2085 * bookkeeping area. [whatever we allocate in run(),
2086 * should be freed in stop()]
2050 */ 2087 */
2051 for (j = 0; j < conf->raid_disks && 2088 if (mddev->private == NULL)
2052 (!conf->mirrors[j].rdev || 2089 conf = setup_conf(mddev);
2053 !test_bit(In_sync, &conf->mirrors[j].rdev->flags)) ; j++) 2090 else
2054 /* nothing */; 2091 conf = mddev->private;
2055 conf->last_used = j;
2056 2092
2093 if (IS_ERR(conf))
2094 return PTR_ERR(conf);
2057 2095
2058 mddev->thread = md_register_thread(raid1d, mddev, NULL); 2096 mddev->queue->queue_lock = &conf->device_lock;
2059 if (!mddev->thread) { 2097 list_for_each_entry(rdev, &mddev->disks, same_set) {
2060 printk(KERN_ERR 2098 disk_stack_limits(mddev->gendisk, rdev->bdev,
2061 "raid1: couldn't allocate thread for %s\n", 2099 rdev->data_offset << 9);
2062 mdname(mddev)); 2100 /* as we don't honour merge_bvec_fn, we must never risk
2063 goto out_free_conf; 2101 * violating it, so limit ->max_sector to one PAGE, as
2102 * a one page request is never in violation.
2103 */
2104 if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
2105 queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
2106 blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
2064 } 2107 }
2065 2108
2109 mddev->degraded = 0;
2110 for (i=0; i < conf->raid_disks; i++)
2111 if (conf->mirrors[i].rdev == NULL ||
2112 !test_bit(In_sync, &conf->mirrors[i].rdev->flags) ||
2113 test_bit(Faulty, &conf->mirrors[i].rdev->flags))
2114 mddev->degraded++;
2115
2116 if (conf->raid_disks - mddev->degraded == 1)
2117 mddev->recovery_cp = MaxSector;
2118
2066 if (mddev->recovery_cp != MaxSector) 2119 if (mddev->recovery_cp != MaxSector)
2067 printk(KERN_NOTICE "raid1: %s is not clean" 2120 printk(KERN_NOTICE "raid1: %s is not clean"
2068 " -- starting background reconstruction\n", 2121 " -- starting background reconstruction\n",
@@ -2071,9 +2124,14 @@ static int run(mddev_t *mddev)
2071 "raid1: raid set %s active with %d out of %d mirrors\n", 2124 "raid1: raid set %s active with %d out of %d mirrors\n",
2072 mdname(mddev), mddev->raid_disks - mddev->degraded, 2125 mdname(mddev), mddev->raid_disks - mddev->degraded,
2073 mddev->raid_disks); 2126 mddev->raid_disks);
2127
2074 /* 2128 /*
2075 * Ok, everything is just fine now 2129 * Ok, everything is just fine now
2076 */ 2130 */
2131 mddev->thread = conf->thread;
2132 conf->thread = NULL;
2133 mddev->private = conf;
2134
2077 md_set_array_sectors(mddev, raid1_size(mddev, 0, 0)); 2135 md_set_array_sectors(mddev, raid1_size(mddev, 0, 0));
2078 2136
2079 mddev->queue->unplug_fn = raid1_unplug; 2137 mddev->queue->unplug_fn = raid1_unplug;
@@ -2081,23 +2139,6 @@ static int run(mddev_t *mddev)
2081 mddev->queue->backing_dev_info.congested_data = mddev; 2139 mddev->queue->backing_dev_info.congested_data = mddev;
2082 md_integrity_register(mddev); 2140 md_integrity_register(mddev);
2083 return 0; 2141 return 0;
2084
2085out_no_mem:
2086 printk(KERN_ERR "raid1: couldn't allocate memory for %s\n",
2087 mdname(mddev));
2088
2089out_free_conf:
2090 if (conf) {
2091 if (conf->r1bio_pool)
2092 mempool_destroy(conf->r1bio_pool);
2093 kfree(conf->mirrors);
2094 safe_put_page(conf->tmppage);
2095 kfree(conf->poolinfo);
2096 kfree(conf);
2097 mddev->private = NULL;
2098 }
2099out:
2100 return -EIO;
2101} 2142}
2102 2143
2103static int stop(mddev_t *mddev) 2144static int stop(mddev_t *mddev)
@@ -2271,6 +2312,9 @@ static void raid1_quiesce(mddev_t *mddev, int state)
2271 conf_t *conf = mddev->private; 2312 conf_t *conf = mddev->private;
2272 2313
2273 switch(state) { 2314 switch(state) {
2315 case 2: /* wake for suspend */
2316 wake_up(&conf->wait_barrier);
2317 break;
2274 case 1: 2318 case 1:
2275 raise_barrier(conf); 2319 raise_barrier(conf);
2276 break; 2320 break;
@@ -2280,6 +2324,23 @@ static void raid1_quiesce(mddev_t *mddev, int state)
2280 } 2324 }
2281} 2325}
2282 2326
2327static void *raid1_takeover(mddev_t *mddev)
2328{
2329 /* raid1 can take over:
2330 * raid5 with 2 devices, any layout or chunk size
2331 */
2332 if (mddev->level == 5 && mddev->raid_disks == 2) {
2333 conf_t *conf;
2334 mddev->new_level = 1;
2335 mddev->new_layout = 0;
2336 mddev->new_chunk_sectors = 0;
2337 conf = setup_conf(mddev);
2338 if (!IS_ERR(conf))
2339 conf->barrier = 1;
2340 return conf;
2341 }
2342 return ERR_PTR(-EINVAL);
2343}
2283 2344
2284static struct mdk_personality raid1_personality = 2345static struct mdk_personality raid1_personality =
2285{ 2346{
@@ -2299,6 +2360,7 @@ static struct mdk_personality raid1_personality =
2299 .size = raid1_size, 2360 .size = raid1_size,
2300 .check_reshape = raid1_reshape, 2361 .check_reshape = raid1_reshape,
2301 .quiesce = raid1_quiesce, 2362 .quiesce = raid1_quiesce,
2363 .takeover = raid1_takeover,
2302}; 2364};
2303 2365
2304static int __init raid_init(void) 2366static int __init raid_init(void)
@@ -2314,6 +2376,7 @@ static void raid_exit(void)
2314module_init(raid_init); 2376module_init(raid_init);
2315module_exit(raid_exit); 2377module_exit(raid_exit);
2316MODULE_LICENSE("GPL"); 2378MODULE_LICENSE("GPL");
2379MODULE_DESCRIPTION("RAID1 (mirroring) personality for MD");
2317MODULE_ALIAS("md-personality-3"); /* RAID1 */ 2380MODULE_ALIAS("md-personality-3"); /* RAID1 */
2318MODULE_ALIAS("md-raid1"); 2381MODULE_ALIAS("md-raid1");
2319MODULE_ALIAS("md-level-1"); 2382MODULE_ALIAS("md-level-1");