aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/md/raid1.c191
-rw-r--r--drivers/md/raid1.h5
2 files changed, 120 insertions, 76 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 35b2d8646ae9..7549b0bad326 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -677,6 +677,7 @@ static void raise_barrier(conf_t *conf)
677static void lower_barrier(conf_t *conf) 677static void lower_barrier(conf_t *conf)
678{ 678{
679 unsigned long flags; 679 unsigned long flags;
680 BUG_ON(conf->barrier <= 0);
680 spin_lock_irqsave(&conf->resync_lock, flags); 681 spin_lock_irqsave(&conf->resync_lock, flags);
681 conf->barrier--; 682 conf->barrier--;
682 spin_unlock_irqrestore(&conf->resync_lock, flags); 683 spin_unlock_irqrestore(&conf->resync_lock, flags);
@@ -1960,74 +1961,48 @@ static sector_t raid1_size(mddev_t *mddev, sector_t sectors, int raid_disks)
1960 return mddev->dev_sectors; 1961 return mddev->dev_sectors;
1961} 1962}
1962 1963
1963static int run(mddev_t *mddev) 1964static conf_t *setup_conf(mddev_t *mddev)
1964{ 1965{
1965 conf_t *conf; 1966 conf_t *conf;
1966 int i, j, disk_idx; 1967 int i;
1967 mirror_info_t *disk; 1968 mirror_info_t *disk;
1968 mdk_rdev_t *rdev; 1969 mdk_rdev_t *rdev;
1970 int err = -ENOMEM;
1969 1971
1970 if (mddev->level != 1) {
1971 printk("raid1: %s: raid level not set to mirroring (%d)\n",
1972 mdname(mddev), mddev->level);
1973 goto out;
1974 }
1975 if (mddev->reshape_position != MaxSector) {
1976 printk("raid1: %s: reshape_position set but not supported\n",
1977 mdname(mddev));
1978 goto out;
1979 }
1980 /*
1981 * copy the already verified devices into our private RAID1
1982 * bookkeeping area. [whatever we allocate in run(),
1983 * should be freed in stop()]
1984 */
1985 conf = kzalloc(sizeof(conf_t), GFP_KERNEL); 1972 conf = kzalloc(sizeof(conf_t), GFP_KERNEL);
1986 mddev->private = conf;
1987 if (!conf) 1973 if (!conf)
1988 goto out_no_mem; 1974 goto abort;
1989 1975
1990 conf->mirrors = kzalloc(sizeof(struct mirror_info)*mddev->raid_disks, 1976 conf->mirrors = kzalloc(sizeof(struct mirror_info)*mddev->raid_disks,
1991 GFP_KERNEL); 1977 GFP_KERNEL);
1992 if (!conf->mirrors) 1978 if (!conf->mirrors)
1993 goto out_no_mem; 1979 goto abort;
1994 1980
1995 conf->tmppage = alloc_page(GFP_KERNEL); 1981 conf->tmppage = alloc_page(GFP_KERNEL);
1996 if (!conf->tmppage) 1982 if (!conf->tmppage)
1997 goto out_no_mem; 1983 goto abort;
1998 1984
1999 conf->poolinfo = kmalloc(sizeof(*conf->poolinfo), GFP_KERNEL); 1985 conf->poolinfo = kzalloc(sizeof(*conf->poolinfo), GFP_KERNEL);
2000 if (!conf->poolinfo) 1986 if (!conf->poolinfo)
2001 goto out_no_mem; 1987 goto abort;
2002 conf->poolinfo->mddev = NULL;
2003 conf->poolinfo->raid_disks = mddev->raid_disks; 1988 conf->poolinfo->raid_disks = mddev->raid_disks;
2004 conf->r1bio_pool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc, 1989 conf->r1bio_pool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc,
2005 r1bio_pool_free, 1990 r1bio_pool_free,
2006 conf->poolinfo); 1991 conf->poolinfo);
2007 if (!conf->r1bio_pool) 1992 if (!conf->r1bio_pool)
2008 goto out_no_mem; 1993 goto abort;
1994
2009 conf->poolinfo->mddev = mddev; 1995 conf->poolinfo->mddev = mddev;
2010 1996
2011 spin_lock_init(&conf->device_lock); 1997 spin_lock_init(&conf->device_lock);
2012 mddev->queue->queue_lock = &conf->device_lock;
2013
2014 list_for_each_entry(rdev, &mddev->disks, same_set) { 1998 list_for_each_entry(rdev, &mddev->disks, same_set) {
2015 disk_idx = rdev->raid_disk; 1999 int disk_idx = rdev->raid_disk;
2016 if (disk_idx >= mddev->raid_disks 2000 if (disk_idx >= mddev->raid_disks
2017 || disk_idx < 0) 2001 || disk_idx < 0)
2018 continue; 2002 continue;
2019 disk = conf->mirrors + disk_idx; 2003 disk = conf->mirrors + disk_idx;
2020 2004
2021 disk->rdev = rdev; 2005 disk->rdev = rdev;
2022 disk_stack_limits(mddev->gendisk, rdev->bdev,
2023 rdev->data_offset << 9);
2024 /* as we don't honour merge_bvec_fn, we must never risk
2025 * violating it, so limit ->max_sector to one PAGE, as
2026 * a one page request is never in violation.
2027 */
2028 if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
2029 queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
2030 blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
2031 2006
2032 disk->head_position = 0; 2007 disk->head_position = 0;
2033 } 2008 }
@@ -2041,8 +2016,7 @@ static int run(mddev_t *mddev)
2041 bio_list_init(&conf->pending_bio_list); 2016 bio_list_init(&conf->pending_bio_list);
2042 bio_list_init(&conf->flushing_bio_list); 2017 bio_list_init(&conf->flushing_bio_list);
2043 2018
2044 2019 conf->last_used = -1;
2045 mddev->degraded = 0;
2046 for (i = 0; i < conf->raid_disks; i++) { 2020 for (i = 0; i < conf->raid_disks; i++) {
2047 2021
2048 disk = conf->mirrors + i; 2022 disk = conf->mirrors + i;
@@ -2050,38 +2024,97 @@ static int run(mddev_t *mddev)
2050 if (!disk->rdev || 2024 if (!disk->rdev ||
2051 !test_bit(In_sync, &disk->rdev->flags)) { 2025 !test_bit(In_sync, &disk->rdev->flags)) {
2052 disk->head_position = 0; 2026 disk->head_position = 0;
2053 mddev->degraded++;
2054 if (disk->rdev) 2027 if (disk->rdev)
2055 conf->fullsync = 1; 2028 conf->fullsync = 1;
2056 } 2029 } else if (conf->last_used < 0)
2030 /*
2031 * The first working device is used as a
2032 * starting point to read balancing.
2033 */
2034 conf->last_used = i;
2057 } 2035 }
2058 if (mddev->degraded == conf->raid_disks) { 2036
2037 err = -EIO;
2038 if (conf->last_used < 0) {
2059 printk(KERN_ERR "raid1: no operational mirrors for %s\n", 2039 printk(KERN_ERR "raid1: no operational mirrors for %s\n",
2060 mdname(mddev)); 2040 mdname(mddev));
2061 goto out_free_conf; 2041 goto abort;
2042 }
2043 err = -ENOMEM;
2044 conf->thread = md_register_thread(raid1d, mddev, NULL);
2045 if (!conf->thread) {
2046 printk(KERN_ERR
2047 "raid1: couldn't allocate thread for %s\n",
2048 mdname(mddev));
2049 goto abort;
2062 } 2050 }
2063 if (conf->raid_disks - mddev->degraded == 1)
2064 mddev->recovery_cp = MaxSector;
2065 2051
2052 return conf;
2053
2054 abort:
2055 if (conf) {
2056 if (conf->r1bio_pool)
2057 mempool_destroy(conf->r1bio_pool);
2058 kfree(conf->mirrors);
2059 safe_put_page(conf->tmppage);
2060 kfree(conf->poolinfo);
2061 kfree(conf);
2062 }
2063 return ERR_PTR(err);
2064}
2065
2066static int run(mddev_t *mddev)
2067{
2068 conf_t *conf;
2069 int i;
2070 mdk_rdev_t *rdev;
2071
2072 if (mddev->level != 1) {
2073 printk("raid1: %s: raid level not set to mirroring (%d)\n",
2074 mdname(mddev), mddev->level);
2075 return -EIO;
2076 }
2077 if (mddev->reshape_position != MaxSector) {
2078 printk("raid1: %s: reshape_position set but not supported\n",
2079 mdname(mddev));
2080 return -EIO;
2081 }
2066 /* 2082 /*
2067 * find the first working one and use it as a starting point 2083 * copy the already verified devices into our private RAID1
2068 * to read balancing. 2084 * bookkeeping area. [whatever we allocate in run(),
2085 * should be freed in stop()]
2069 */ 2086 */
2070 for (j = 0; j < conf->raid_disks && 2087 if (mddev->private == NULL)
2071 (!conf->mirrors[j].rdev || 2088 conf = setup_conf(mddev);
2072 !test_bit(In_sync, &conf->mirrors[j].rdev->flags)) ; j++) 2089 else
2073 /* nothing */; 2090 conf = mddev->private;
2074 conf->last_used = j;
2075 2091
2092 if (IS_ERR(conf))
2093 return PTR_ERR(conf);
2076 2094
2077 mddev->thread = md_register_thread(raid1d, mddev, NULL); 2095 mddev->queue->queue_lock = &conf->device_lock;
2078 if (!mddev->thread) { 2096 list_for_each_entry(rdev, &mddev->disks, same_set) {
2079 printk(KERN_ERR 2097 disk_stack_limits(mddev->gendisk, rdev->bdev,
2080 "raid1: couldn't allocate thread for %s\n", 2098 rdev->data_offset << 9);
2081 mdname(mddev)); 2099 /* as we don't honour merge_bvec_fn, we must never risk
2082 goto out_free_conf; 2100 * violating it, so limit ->max_sector to one PAGE, as
2101 * a one page request is never in violation.
2102 */
2103 if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
2104 queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
2105 blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
2083 } 2106 }
2084 2107
2108 mddev->degraded = 0;
2109 for (i=0; i < conf->raid_disks; i++)
2110 if (conf->mirrors[i].rdev == NULL ||
2111 !test_bit(In_sync, &conf->mirrors[i].rdev->flags) ||
2112 test_bit(Faulty, &conf->mirrors[i].rdev->flags))
2113 mddev->degraded++;
2114
2115 if (conf->raid_disks - mddev->degraded == 1)
2116 mddev->recovery_cp = MaxSector;
2117
2085 if (mddev->recovery_cp != MaxSector) 2118 if (mddev->recovery_cp != MaxSector)
2086 printk(KERN_NOTICE "raid1: %s is not clean" 2119 printk(KERN_NOTICE "raid1: %s is not clean"
2087 " -- starting background reconstruction\n", 2120 " -- starting background reconstruction\n",
@@ -2090,9 +2123,14 @@ static int run(mddev_t *mddev)
2090 "raid1: raid set %s active with %d out of %d mirrors\n", 2123 "raid1: raid set %s active with %d out of %d mirrors\n",
2091 mdname(mddev), mddev->raid_disks - mddev->degraded, 2124 mdname(mddev), mddev->raid_disks - mddev->degraded,
2092 mddev->raid_disks); 2125 mddev->raid_disks);
2126
2093 /* 2127 /*
2094 * Ok, everything is just fine now 2128 * Ok, everything is just fine now
2095 */ 2129 */
2130 mddev->thread = conf->thread;
2131 conf->thread = NULL;
2132 mddev->private = conf;
2133
2096 md_set_array_sectors(mddev, raid1_size(mddev, 0, 0)); 2134 md_set_array_sectors(mddev, raid1_size(mddev, 0, 0));
2097 2135
2098 mddev->queue->unplug_fn = raid1_unplug; 2136 mddev->queue->unplug_fn = raid1_unplug;
@@ -2100,23 +2138,6 @@ static int run(mddev_t *mddev)
2100 mddev->queue->backing_dev_info.congested_data = mddev; 2138 mddev->queue->backing_dev_info.congested_data = mddev;
2101 md_integrity_register(mddev); 2139 md_integrity_register(mddev);
2102 return 0; 2140 return 0;
2103
2104out_no_mem:
2105 printk(KERN_ERR "raid1: couldn't allocate memory for %s\n",
2106 mdname(mddev));
2107
2108out_free_conf:
2109 if (conf) {
2110 if (conf->r1bio_pool)
2111 mempool_destroy(conf->r1bio_pool);
2112 kfree(conf->mirrors);
2113 safe_put_page(conf->tmppage);
2114 kfree(conf->poolinfo);
2115 kfree(conf);
2116 mddev->private = NULL;
2117 }
2118out:
2119 return -EIO;
2120} 2141}
2121 2142
2122static int stop(mddev_t *mddev) 2143static int stop(mddev_t *mddev)
@@ -2302,6 +2323,23 @@ static void raid1_quiesce(mddev_t *mddev, int state)
2302 } 2323 }
2303} 2324}
2304 2325
2326static void *raid1_takeover(mddev_t *mddev)
2327{
2328 /* raid1 can take over:
2329 * raid5 with 2 devices, any layout or chunk size
2330 */
2331 if (mddev->level == 5 && mddev->raid_disks == 2) {
2332 conf_t *conf;
2333 mddev->new_level = 1;
2334 mddev->new_layout = 0;
2335 mddev->new_chunk_sectors = 0;
2336 conf = setup_conf(mddev);
2337 if (!IS_ERR(conf))
2338 conf->barrier = 1;
2339 return conf;
2340 }
2341 return ERR_PTR(-EINVAL);
2342}
2305 2343
2306static struct mdk_personality raid1_personality = 2344static struct mdk_personality raid1_personality =
2307{ 2345{
@@ -2321,6 +2359,7 @@ static struct mdk_personality raid1_personality =
2321 .size = raid1_size, 2359 .size = raid1_size,
2322 .check_reshape = raid1_reshape, 2360 .check_reshape = raid1_reshape,
2323 .quiesce = raid1_quiesce, 2361 .quiesce = raid1_quiesce,
2362 .takeover = raid1_takeover,
2324}; 2363};
2325 2364
2326static int __init raid_init(void) 2365static int __init raid_init(void)
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h
index e87b84deff68..5f2d443ae28a 100644
--- a/drivers/md/raid1.h
+++ b/drivers/md/raid1.h
@@ -59,6 +59,11 @@ struct r1_private_data_s {
59 59
60 mempool_t *r1bio_pool; 60 mempool_t *r1bio_pool;
61 mempool_t *r1buf_pool; 61 mempool_t *r1buf_pool;
62
63 /* When taking over an array from a different personality, we store
64 * the new thread here until we fully activate the array.
65 */
66 struct mdk_thread_s *thread;
62}; 67};
63 68
64typedef struct r1_private_data_s conf_t; 69typedef struct r1_private_data_s conf_t;