diff options
-rw-r--r-- | drivers/md/raid1.c | 191 | ||||
-rw-r--r-- | drivers/md/raid1.h | 5 |
2 files changed, 120 insertions, 76 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 35b2d8646ae9..7549b0bad326 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -677,6 +677,7 @@ static void raise_barrier(conf_t *conf) | |||
677 | static void lower_barrier(conf_t *conf) | 677 | static void lower_barrier(conf_t *conf) |
678 | { | 678 | { |
679 | unsigned long flags; | 679 | unsigned long flags; |
680 | BUG_ON(conf->barrier <= 0); | ||
680 | spin_lock_irqsave(&conf->resync_lock, flags); | 681 | spin_lock_irqsave(&conf->resync_lock, flags); |
681 | conf->barrier--; | 682 | conf->barrier--; |
682 | spin_unlock_irqrestore(&conf->resync_lock, flags); | 683 | spin_unlock_irqrestore(&conf->resync_lock, flags); |
@@ -1960,74 +1961,48 @@ static sector_t raid1_size(mddev_t *mddev, sector_t sectors, int raid_disks) | |||
1960 | return mddev->dev_sectors; | 1961 | return mddev->dev_sectors; |
1961 | } | 1962 | } |
1962 | 1963 | ||
1963 | static int run(mddev_t *mddev) | 1964 | static conf_t *setup_conf(mddev_t *mddev) |
1964 | { | 1965 | { |
1965 | conf_t *conf; | 1966 | conf_t *conf; |
1966 | int i, j, disk_idx; | 1967 | int i; |
1967 | mirror_info_t *disk; | 1968 | mirror_info_t *disk; |
1968 | mdk_rdev_t *rdev; | 1969 | mdk_rdev_t *rdev; |
1970 | int err = -ENOMEM; | ||
1969 | 1971 | ||
1970 | if (mddev->level != 1) { | ||
1971 | printk("raid1: %s: raid level not set to mirroring (%d)\n", | ||
1972 | mdname(mddev), mddev->level); | ||
1973 | goto out; | ||
1974 | } | ||
1975 | if (mddev->reshape_position != MaxSector) { | ||
1976 | printk("raid1: %s: reshape_position set but not supported\n", | ||
1977 | mdname(mddev)); | ||
1978 | goto out; | ||
1979 | } | ||
1980 | /* | ||
1981 | * copy the already verified devices into our private RAID1 | ||
1982 | * bookkeeping area. [whatever we allocate in run(), | ||
1983 | * should be freed in stop()] | ||
1984 | */ | ||
1985 | conf = kzalloc(sizeof(conf_t), GFP_KERNEL); | 1972 | conf = kzalloc(sizeof(conf_t), GFP_KERNEL); |
1986 | mddev->private = conf; | ||
1987 | if (!conf) | 1973 | if (!conf) |
1988 | goto out_no_mem; | 1974 | goto abort; |
1989 | 1975 | ||
1990 | conf->mirrors = kzalloc(sizeof(struct mirror_info)*mddev->raid_disks, | 1976 | conf->mirrors = kzalloc(sizeof(struct mirror_info)*mddev->raid_disks, |
1991 | GFP_KERNEL); | 1977 | GFP_KERNEL); |
1992 | if (!conf->mirrors) | 1978 | if (!conf->mirrors) |
1993 | goto out_no_mem; | 1979 | goto abort; |
1994 | 1980 | ||
1995 | conf->tmppage = alloc_page(GFP_KERNEL); | 1981 | conf->tmppage = alloc_page(GFP_KERNEL); |
1996 | if (!conf->tmppage) | 1982 | if (!conf->tmppage) |
1997 | goto out_no_mem; | 1983 | goto abort; |
1998 | 1984 | ||
1999 | conf->poolinfo = kmalloc(sizeof(*conf->poolinfo), GFP_KERNEL); | 1985 | conf->poolinfo = kzalloc(sizeof(*conf->poolinfo), GFP_KERNEL); |
2000 | if (!conf->poolinfo) | 1986 | if (!conf->poolinfo) |
2001 | goto out_no_mem; | 1987 | goto abort; |
2002 | conf->poolinfo->mddev = NULL; | ||
2003 | conf->poolinfo->raid_disks = mddev->raid_disks; | 1988 | conf->poolinfo->raid_disks = mddev->raid_disks; |
2004 | conf->r1bio_pool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc, | 1989 | conf->r1bio_pool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc, |
2005 | r1bio_pool_free, | 1990 | r1bio_pool_free, |
2006 | conf->poolinfo); | 1991 | conf->poolinfo); |
2007 | if (!conf->r1bio_pool) | 1992 | if (!conf->r1bio_pool) |
2008 | goto out_no_mem; | 1993 | goto abort; |
1994 | |||
2009 | conf->poolinfo->mddev = mddev; | 1995 | conf->poolinfo->mddev = mddev; |
2010 | 1996 | ||
2011 | spin_lock_init(&conf->device_lock); | 1997 | spin_lock_init(&conf->device_lock); |
2012 | mddev->queue->queue_lock = &conf->device_lock; | ||
2013 | |||
2014 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 1998 | list_for_each_entry(rdev, &mddev->disks, same_set) { |
2015 | disk_idx = rdev->raid_disk; | 1999 | int disk_idx = rdev->raid_disk; |
2016 | if (disk_idx >= mddev->raid_disks | 2000 | if (disk_idx >= mddev->raid_disks |
2017 | || disk_idx < 0) | 2001 | || disk_idx < 0) |
2018 | continue; | 2002 | continue; |
2019 | disk = conf->mirrors + disk_idx; | 2003 | disk = conf->mirrors + disk_idx; |
2020 | 2004 | ||
2021 | disk->rdev = rdev; | 2005 | disk->rdev = rdev; |
2022 | disk_stack_limits(mddev->gendisk, rdev->bdev, | ||
2023 | rdev->data_offset << 9); | ||
2024 | /* as we don't honour merge_bvec_fn, we must never risk | ||
2025 | * violating it, so limit ->max_sector to one PAGE, as | ||
2026 | * a one page request is never in violation. | ||
2027 | */ | ||
2028 | if (rdev->bdev->bd_disk->queue->merge_bvec_fn && | ||
2029 | queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9)) | ||
2030 | blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); | ||
2031 | 2006 | ||
2032 | disk->head_position = 0; | 2007 | disk->head_position = 0; |
2033 | } | 2008 | } |
@@ -2041,8 +2016,7 @@ static int run(mddev_t *mddev) | |||
2041 | bio_list_init(&conf->pending_bio_list); | 2016 | bio_list_init(&conf->pending_bio_list); |
2042 | bio_list_init(&conf->flushing_bio_list); | 2017 | bio_list_init(&conf->flushing_bio_list); |
2043 | 2018 | ||
2044 | 2019 | conf->last_used = -1; | |
2045 | mddev->degraded = 0; | ||
2046 | for (i = 0; i < conf->raid_disks; i++) { | 2020 | for (i = 0; i < conf->raid_disks; i++) { |
2047 | 2021 | ||
2048 | disk = conf->mirrors + i; | 2022 | disk = conf->mirrors + i; |
@@ -2050,38 +2024,97 @@ static int run(mddev_t *mddev) | |||
2050 | if (!disk->rdev || | 2024 | if (!disk->rdev || |
2051 | !test_bit(In_sync, &disk->rdev->flags)) { | 2025 | !test_bit(In_sync, &disk->rdev->flags)) { |
2052 | disk->head_position = 0; | 2026 | disk->head_position = 0; |
2053 | mddev->degraded++; | ||
2054 | if (disk->rdev) | 2027 | if (disk->rdev) |
2055 | conf->fullsync = 1; | 2028 | conf->fullsync = 1; |
2056 | } | 2029 | } else if (conf->last_used < 0) |
2030 | /* | ||
2031 | * The first working device is used as a | ||
2032 | * starting point to read balancing. | ||
2033 | */ | ||
2034 | conf->last_used = i; | ||
2057 | } | 2035 | } |
2058 | if (mddev->degraded == conf->raid_disks) { | 2036 | |
2037 | err = -EIO; | ||
2038 | if (conf->last_used < 0) { | ||
2059 | printk(KERN_ERR "raid1: no operational mirrors for %s\n", | 2039 | printk(KERN_ERR "raid1: no operational mirrors for %s\n", |
2060 | mdname(mddev)); | 2040 | mdname(mddev)); |
2061 | goto out_free_conf; | 2041 | goto abort; |
2042 | } | ||
2043 | err = -ENOMEM; | ||
2044 | conf->thread = md_register_thread(raid1d, mddev, NULL); | ||
2045 | if (!conf->thread) { | ||
2046 | printk(KERN_ERR | ||
2047 | "raid1: couldn't allocate thread for %s\n", | ||
2048 | mdname(mddev)); | ||
2049 | goto abort; | ||
2062 | } | 2050 | } |
2063 | if (conf->raid_disks - mddev->degraded == 1) | ||
2064 | mddev->recovery_cp = MaxSector; | ||
2065 | 2051 | ||
2052 | return conf; | ||
2053 | |||
2054 | abort: | ||
2055 | if (conf) { | ||
2056 | if (conf->r1bio_pool) | ||
2057 | mempool_destroy(conf->r1bio_pool); | ||
2058 | kfree(conf->mirrors); | ||
2059 | safe_put_page(conf->tmppage); | ||
2060 | kfree(conf->poolinfo); | ||
2061 | kfree(conf); | ||
2062 | } | ||
2063 | return ERR_PTR(err); | ||
2064 | } | ||
2065 | |||
2066 | static int run(mddev_t *mddev) | ||
2067 | { | ||
2068 | conf_t *conf; | ||
2069 | int i; | ||
2070 | mdk_rdev_t *rdev; | ||
2071 | |||
2072 | if (mddev->level != 1) { | ||
2073 | printk("raid1: %s: raid level not set to mirroring (%d)\n", | ||
2074 | mdname(mddev), mddev->level); | ||
2075 | return -EIO; | ||
2076 | } | ||
2077 | if (mddev->reshape_position != MaxSector) { | ||
2078 | printk("raid1: %s: reshape_position set but not supported\n", | ||
2079 | mdname(mddev)); | ||
2080 | return -EIO; | ||
2081 | } | ||
2066 | /* | 2082 | /* |
2067 | * find the first working one and use it as a starting point | 2083 | * copy the already verified devices into our private RAID1 |
2068 | * to read balancing. | 2084 | * bookkeeping area. [whatever we allocate in run(), |
2085 | * should be freed in stop()] | ||
2069 | */ | 2086 | */ |
2070 | for (j = 0; j < conf->raid_disks && | 2087 | if (mddev->private == NULL) |
2071 | (!conf->mirrors[j].rdev || | 2088 | conf = setup_conf(mddev); |
2072 | !test_bit(In_sync, &conf->mirrors[j].rdev->flags)) ; j++) | 2089 | else |
2073 | /* nothing */; | 2090 | conf = mddev->private; |
2074 | conf->last_used = j; | ||
2075 | 2091 | ||
2092 | if (IS_ERR(conf)) | ||
2093 | return PTR_ERR(conf); | ||
2076 | 2094 | ||
2077 | mddev->thread = md_register_thread(raid1d, mddev, NULL); | 2095 | mddev->queue->queue_lock = &conf->device_lock; |
2078 | if (!mddev->thread) { | 2096 | list_for_each_entry(rdev, &mddev->disks, same_set) { |
2079 | printk(KERN_ERR | 2097 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
2080 | "raid1: couldn't allocate thread for %s\n", | 2098 | rdev->data_offset << 9); |
2081 | mdname(mddev)); | 2099 | /* as we don't honour merge_bvec_fn, we must never risk |
2082 | goto out_free_conf; | 2100 | * violating it, so limit ->max_sector to one PAGE, as |
2101 | * a one page request is never in violation. | ||
2102 | */ | ||
2103 | if (rdev->bdev->bd_disk->queue->merge_bvec_fn && | ||
2104 | queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9)) | ||
2105 | blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); | ||
2083 | } | 2106 | } |
2084 | 2107 | ||
2108 | mddev->degraded = 0; | ||
2109 | for (i=0; i < conf->raid_disks; i++) | ||
2110 | if (conf->mirrors[i].rdev == NULL || | ||
2111 | !test_bit(In_sync, &conf->mirrors[i].rdev->flags) || | ||
2112 | test_bit(Faulty, &conf->mirrors[i].rdev->flags)) | ||
2113 | mddev->degraded++; | ||
2114 | |||
2115 | if (conf->raid_disks - mddev->degraded == 1) | ||
2116 | mddev->recovery_cp = MaxSector; | ||
2117 | |||
2085 | if (mddev->recovery_cp != MaxSector) | 2118 | if (mddev->recovery_cp != MaxSector) |
2086 | printk(KERN_NOTICE "raid1: %s is not clean" | 2119 | printk(KERN_NOTICE "raid1: %s is not clean" |
2087 | " -- starting background reconstruction\n", | 2120 | " -- starting background reconstruction\n", |
@@ -2090,9 +2123,14 @@ static int run(mddev_t *mddev) | |||
2090 | "raid1: raid set %s active with %d out of %d mirrors\n", | 2123 | "raid1: raid set %s active with %d out of %d mirrors\n", |
2091 | mdname(mddev), mddev->raid_disks - mddev->degraded, | 2124 | mdname(mddev), mddev->raid_disks - mddev->degraded, |
2092 | mddev->raid_disks); | 2125 | mddev->raid_disks); |
2126 | |||
2093 | /* | 2127 | /* |
2094 | * Ok, everything is just fine now | 2128 | * Ok, everything is just fine now |
2095 | */ | 2129 | */ |
2130 | mddev->thread = conf->thread; | ||
2131 | conf->thread = NULL; | ||
2132 | mddev->private = conf; | ||
2133 | |||
2096 | md_set_array_sectors(mddev, raid1_size(mddev, 0, 0)); | 2134 | md_set_array_sectors(mddev, raid1_size(mddev, 0, 0)); |
2097 | 2135 | ||
2098 | mddev->queue->unplug_fn = raid1_unplug; | 2136 | mddev->queue->unplug_fn = raid1_unplug; |
@@ -2100,23 +2138,6 @@ static int run(mddev_t *mddev) | |||
2100 | mddev->queue->backing_dev_info.congested_data = mddev; | 2138 | mddev->queue->backing_dev_info.congested_data = mddev; |
2101 | md_integrity_register(mddev); | 2139 | md_integrity_register(mddev); |
2102 | return 0; | 2140 | return 0; |
2103 | |||
2104 | out_no_mem: | ||
2105 | printk(KERN_ERR "raid1: couldn't allocate memory for %s\n", | ||
2106 | mdname(mddev)); | ||
2107 | |||
2108 | out_free_conf: | ||
2109 | if (conf) { | ||
2110 | if (conf->r1bio_pool) | ||
2111 | mempool_destroy(conf->r1bio_pool); | ||
2112 | kfree(conf->mirrors); | ||
2113 | safe_put_page(conf->tmppage); | ||
2114 | kfree(conf->poolinfo); | ||
2115 | kfree(conf); | ||
2116 | mddev->private = NULL; | ||
2117 | } | ||
2118 | out: | ||
2119 | return -EIO; | ||
2120 | } | 2141 | } |
2121 | 2142 | ||
2122 | static int stop(mddev_t *mddev) | 2143 | static int stop(mddev_t *mddev) |
@@ -2302,6 +2323,23 @@ static void raid1_quiesce(mddev_t *mddev, int state) | |||
2302 | } | 2323 | } |
2303 | } | 2324 | } |
2304 | 2325 | ||
2326 | static void *raid1_takeover(mddev_t *mddev) | ||
2327 | { | ||
2328 | /* raid1 can take over: | ||
2329 | * raid5 with 2 devices, any layout or chunk size | ||
2330 | */ | ||
2331 | if (mddev->level == 5 && mddev->raid_disks == 2) { | ||
2332 | conf_t *conf; | ||
2333 | mddev->new_level = 1; | ||
2334 | mddev->new_layout = 0; | ||
2335 | mddev->new_chunk_sectors = 0; | ||
2336 | conf = setup_conf(mddev); | ||
2337 | if (!IS_ERR(conf)) | ||
2338 | conf->barrier = 1; | ||
2339 | return conf; | ||
2340 | } | ||
2341 | return ERR_PTR(-EINVAL); | ||
2342 | } | ||
2305 | 2343 | ||
2306 | static struct mdk_personality raid1_personality = | 2344 | static struct mdk_personality raid1_personality = |
2307 | { | 2345 | { |
@@ -2321,6 +2359,7 @@ static struct mdk_personality raid1_personality = | |||
2321 | .size = raid1_size, | 2359 | .size = raid1_size, |
2322 | .check_reshape = raid1_reshape, | 2360 | .check_reshape = raid1_reshape, |
2323 | .quiesce = raid1_quiesce, | 2361 | .quiesce = raid1_quiesce, |
2362 | .takeover = raid1_takeover, | ||
2324 | }; | 2363 | }; |
2325 | 2364 | ||
2326 | static int __init raid_init(void) | 2365 | static int __init raid_init(void) |
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h index e87b84deff68..5f2d443ae28a 100644 --- a/drivers/md/raid1.h +++ b/drivers/md/raid1.h | |||
@@ -59,6 +59,11 @@ struct r1_private_data_s { | |||
59 | 59 | ||
60 | mempool_t *r1bio_pool; | 60 | mempool_t *r1bio_pool; |
61 | mempool_t *r1buf_pool; | 61 | mempool_t *r1buf_pool; |
62 | |||
63 | /* When taking over an array from a different personality, we store | ||
64 | * the new thread here until we fully activate the array. | ||
65 | */ | ||
66 | struct mdk_thread_s *thread; | ||
62 | }; | 67 | }; |
63 | 68 | ||
64 | typedef struct r1_private_data_s conf_t; | 69 | typedef struct r1_private_data_s conf_t; |