diff options
Diffstat (limited to 'drivers/md/raid1.c')
-rw-r--r-- | drivers/md/raid1.c | 217 |
1 files changed, 140 insertions, 77 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index e07ce2e033a9..859bd3ffe435 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -677,6 +677,7 @@ static void raise_barrier(conf_t *conf) | |||
677 | static void lower_barrier(conf_t *conf) | 677 | static void lower_barrier(conf_t *conf) |
678 | { | 678 | { |
679 | unsigned long flags; | 679 | unsigned long flags; |
680 | BUG_ON(conf->barrier <= 0); | ||
680 | spin_lock_irqsave(&conf->resync_lock, flags); | 681 | spin_lock_irqsave(&conf->resync_lock, flags); |
681 | conf->barrier--; | 682 | conf->barrier--; |
682 | spin_unlock_irqrestore(&conf->resync_lock, flags); | 683 | spin_unlock_irqrestore(&conf->resync_lock, flags); |
@@ -801,6 +802,25 @@ static int make_request(struct request_queue *q, struct bio * bio) | |||
801 | 802 | ||
802 | md_write_start(mddev, bio); /* wait on superblock update early */ | 803 | md_write_start(mddev, bio); /* wait on superblock update early */ |
803 | 804 | ||
805 | if (bio_data_dir(bio) == WRITE && | ||
806 | bio->bi_sector + bio->bi_size/512 > mddev->suspend_lo && | ||
807 | bio->bi_sector < mddev->suspend_hi) { | ||
808 | /* As the suspend_* range is controlled by | ||
809 | * userspace, we want an interruptible | ||
810 | * wait. | ||
811 | */ | ||
812 | DEFINE_WAIT(w); | ||
813 | for (;;) { | ||
814 | flush_signals(current); | ||
815 | prepare_to_wait(&conf->wait_barrier, | ||
816 | &w, TASK_INTERRUPTIBLE); | ||
817 | if (bio->bi_sector + bio->bi_size/512 <= mddev->suspend_lo || | ||
818 | bio->bi_sector >= mddev->suspend_hi) | ||
819 | break; | ||
820 | schedule(); | ||
821 | } | ||
822 | finish_wait(&conf->wait_barrier, &w); | ||
823 | } | ||
804 | if (unlikely(!mddev->barriers_work && | 824 | if (unlikely(!mddev->barriers_work && |
805 | bio_rw_flagged(bio, BIO_RW_BARRIER))) { | 825 | bio_rw_flagged(bio, BIO_RW_BARRIER))) { |
806 | if (rw == WRITE) | 826 | if (rw == WRITE) |
@@ -923,7 +943,8 @@ static int make_request(struct request_queue *q, struct bio * bio) | |||
923 | 943 | ||
924 | /* do behind I/O ? */ | 944 | /* do behind I/O ? */ |
925 | if (bitmap && | 945 | if (bitmap && |
926 | atomic_read(&bitmap->behind_writes) < bitmap->max_write_behind && | 946 | (atomic_read(&bitmap->behind_writes) |
947 | < mddev->bitmap_info.max_write_behind) && | ||
927 | (behind_pages = alloc_behind_pages(bio)) != NULL) | 948 | (behind_pages = alloc_behind_pages(bio)) != NULL) |
928 | set_bit(R1BIO_BehindIO, &r1_bio->state); | 949 | set_bit(R1BIO_BehindIO, &r1_bio->state); |
929 | 950 | ||
@@ -1941,74 +1962,48 @@ static sector_t raid1_size(mddev_t *mddev, sector_t sectors, int raid_disks) | |||
1941 | return mddev->dev_sectors; | 1962 | return mddev->dev_sectors; |
1942 | } | 1963 | } |
1943 | 1964 | ||
1944 | static int run(mddev_t *mddev) | 1965 | static conf_t *setup_conf(mddev_t *mddev) |
1945 | { | 1966 | { |
1946 | conf_t *conf; | 1967 | conf_t *conf; |
1947 | int i, j, disk_idx; | 1968 | int i; |
1948 | mirror_info_t *disk; | 1969 | mirror_info_t *disk; |
1949 | mdk_rdev_t *rdev; | 1970 | mdk_rdev_t *rdev; |
1971 | int err = -ENOMEM; | ||
1950 | 1972 | ||
1951 | if (mddev->level != 1) { | ||
1952 | printk("raid1: %s: raid level not set to mirroring (%d)\n", | ||
1953 | mdname(mddev), mddev->level); | ||
1954 | goto out; | ||
1955 | } | ||
1956 | if (mddev->reshape_position != MaxSector) { | ||
1957 | printk("raid1: %s: reshape_position set but not supported\n", | ||
1958 | mdname(mddev)); | ||
1959 | goto out; | ||
1960 | } | ||
1961 | /* | ||
1962 | * copy the already verified devices into our private RAID1 | ||
1963 | * bookkeeping area. [whatever we allocate in run(), | ||
1964 | * should be freed in stop()] | ||
1965 | */ | ||
1966 | conf = kzalloc(sizeof(conf_t), GFP_KERNEL); | 1973 | conf = kzalloc(sizeof(conf_t), GFP_KERNEL); |
1967 | mddev->private = conf; | ||
1968 | if (!conf) | 1974 | if (!conf) |
1969 | goto out_no_mem; | 1975 | goto abort; |
1970 | 1976 | ||
1971 | conf->mirrors = kzalloc(sizeof(struct mirror_info)*mddev->raid_disks, | 1977 | conf->mirrors = kzalloc(sizeof(struct mirror_info)*mddev->raid_disks, |
1972 | GFP_KERNEL); | 1978 | GFP_KERNEL); |
1973 | if (!conf->mirrors) | 1979 | if (!conf->mirrors) |
1974 | goto out_no_mem; | 1980 | goto abort; |
1975 | 1981 | ||
1976 | conf->tmppage = alloc_page(GFP_KERNEL); | 1982 | conf->tmppage = alloc_page(GFP_KERNEL); |
1977 | if (!conf->tmppage) | 1983 | if (!conf->tmppage) |
1978 | goto out_no_mem; | 1984 | goto abort; |
1979 | 1985 | ||
1980 | conf->poolinfo = kmalloc(sizeof(*conf->poolinfo), GFP_KERNEL); | 1986 | conf->poolinfo = kzalloc(sizeof(*conf->poolinfo), GFP_KERNEL); |
1981 | if (!conf->poolinfo) | 1987 | if (!conf->poolinfo) |
1982 | goto out_no_mem; | 1988 | goto abort; |
1983 | conf->poolinfo->mddev = NULL; | ||
1984 | conf->poolinfo->raid_disks = mddev->raid_disks; | 1989 | conf->poolinfo->raid_disks = mddev->raid_disks; |
1985 | conf->r1bio_pool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc, | 1990 | conf->r1bio_pool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc, |
1986 | r1bio_pool_free, | 1991 | r1bio_pool_free, |
1987 | conf->poolinfo); | 1992 | conf->poolinfo); |
1988 | if (!conf->r1bio_pool) | 1993 | if (!conf->r1bio_pool) |
1989 | goto out_no_mem; | 1994 | goto abort; |
1995 | |||
1990 | conf->poolinfo->mddev = mddev; | 1996 | conf->poolinfo->mddev = mddev; |
1991 | 1997 | ||
1992 | spin_lock_init(&conf->device_lock); | 1998 | spin_lock_init(&conf->device_lock); |
1993 | mddev->queue->queue_lock = &conf->device_lock; | ||
1994 | |||
1995 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 1999 | list_for_each_entry(rdev, &mddev->disks, same_set) { |
1996 | disk_idx = rdev->raid_disk; | 2000 | int disk_idx = rdev->raid_disk; |
1997 | if (disk_idx >= mddev->raid_disks | 2001 | if (disk_idx >= mddev->raid_disks |
1998 | || disk_idx < 0) | 2002 | || disk_idx < 0) |
1999 | continue; | 2003 | continue; |
2000 | disk = conf->mirrors + disk_idx; | 2004 | disk = conf->mirrors + disk_idx; |
2001 | 2005 | ||
2002 | disk->rdev = rdev; | 2006 | disk->rdev = rdev; |
2003 | disk_stack_limits(mddev->gendisk, rdev->bdev, | ||
2004 | rdev->data_offset << 9); | ||
2005 | /* as we don't honour merge_bvec_fn, we must never risk | ||
2006 | * violating it, so limit ->max_sector to one PAGE, as | ||
2007 | * a one page request is never in violation. | ||
2008 | */ | ||
2009 | if (rdev->bdev->bd_disk->queue->merge_bvec_fn && | ||
2010 | queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9)) | ||
2011 | blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); | ||
2012 | 2007 | ||
2013 | disk->head_position = 0; | 2008 | disk->head_position = 0; |
2014 | } | 2009 | } |
@@ -2022,8 +2017,7 @@ static int run(mddev_t *mddev) | |||
2022 | bio_list_init(&conf->pending_bio_list); | 2017 | bio_list_init(&conf->pending_bio_list); |
2023 | bio_list_init(&conf->flushing_bio_list); | 2018 | bio_list_init(&conf->flushing_bio_list); |
2024 | 2019 | ||
2025 | 2020 | conf->last_used = -1; | |
2026 | mddev->degraded = 0; | ||
2027 | for (i = 0; i < conf->raid_disks; i++) { | 2021 | for (i = 0; i < conf->raid_disks; i++) { |
2028 | 2022 | ||
2029 | disk = conf->mirrors + i; | 2023 | disk = conf->mirrors + i; |
@@ -2031,38 +2025,97 @@ static int run(mddev_t *mddev) | |||
2031 | if (!disk->rdev || | 2025 | if (!disk->rdev || |
2032 | !test_bit(In_sync, &disk->rdev->flags)) { | 2026 | !test_bit(In_sync, &disk->rdev->flags)) { |
2033 | disk->head_position = 0; | 2027 | disk->head_position = 0; |
2034 | mddev->degraded++; | ||
2035 | if (disk->rdev) | 2028 | if (disk->rdev) |
2036 | conf->fullsync = 1; | 2029 | conf->fullsync = 1; |
2037 | } | 2030 | } else if (conf->last_used < 0) |
2031 | /* | ||
2032 | * The first working device is used as a | ||
2033 | * starting point to read balancing. | ||
2034 | */ | ||
2035 | conf->last_used = i; | ||
2038 | } | 2036 | } |
2039 | if (mddev->degraded == conf->raid_disks) { | 2037 | |
2038 | err = -EIO; | ||
2039 | if (conf->last_used < 0) { | ||
2040 | printk(KERN_ERR "raid1: no operational mirrors for %s\n", | 2040 | printk(KERN_ERR "raid1: no operational mirrors for %s\n", |
2041 | mdname(mddev)); | 2041 | mdname(mddev)); |
2042 | goto out_free_conf; | 2042 | goto abort; |
2043 | } | 2043 | } |
2044 | if (conf->raid_disks - mddev->degraded == 1) | 2044 | err = -ENOMEM; |
2045 | mddev->recovery_cp = MaxSector; | 2045 | conf->thread = md_register_thread(raid1d, mddev, NULL); |
2046 | if (!conf->thread) { | ||
2047 | printk(KERN_ERR | ||
2048 | "raid1: couldn't allocate thread for %s\n", | ||
2049 | mdname(mddev)); | ||
2050 | goto abort; | ||
2051 | } | ||
2052 | |||
2053 | return conf; | ||
2054 | |||
2055 | abort: | ||
2056 | if (conf) { | ||
2057 | if (conf->r1bio_pool) | ||
2058 | mempool_destroy(conf->r1bio_pool); | ||
2059 | kfree(conf->mirrors); | ||
2060 | safe_put_page(conf->tmppage); | ||
2061 | kfree(conf->poolinfo); | ||
2062 | kfree(conf); | ||
2063 | } | ||
2064 | return ERR_PTR(err); | ||
2065 | } | ||
2046 | 2066 | ||
2067 | static int run(mddev_t *mddev) | ||
2068 | { | ||
2069 | conf_t *conf; | ||
2070 | int i; | ||
2071 | mdk_rdev_t *rdev; | ||
2072 | |||
2073 | if (mddev->level != 1) { | ||
2074 | printk("raid1: %s: raid level not set to mirroring (%d)\n", | ||
2075 | mdname(mddev), mddev->level); | ||
2076 | return -EIO; | ||
2077 | } | ||
2078 | if (mddev->reshape_position != MaxSector) { | ||
2079 | printk("raid1: %s: reshape_position set but not supported\n", | ||
2080 | mdname(mddev)); | ||
2081 | return -EIO; | ||
2082 | } | ||
2047 | /* | 2083 | /* |
2048 | * find the first working one and use it as a starting point | 2084 | * copy the already verified devices into our private RAID1 |
2049 | * to read balancing. | 2085 | * bookkeeping area. [whatever we allocate in run(), |
2086 | * should be freed in stop()] | ||
2050 | */ | 2087 | */ |
2051 | for (j = 0; j < conf->raid_disks && | 2088 | if (mddev->private == NULL) |
2052 | (!conf->mirrors[j].rdev || | 2089 | conf = setup_conf(mddev); |
2053 | !test_bit(In_sync, &conf->mirrors[j].rdev->flags)) ; j++) | 2090 | else |
2054 | /* nothing */; | 2091 | conf = mddev->private; |
2055 | conf->last_used = j; | ||
2056 | 2092 | ||
2093 | if (IS_ERR(conf)) | ||
2094 | return PTR_ERR(conf); | ||
2057 | 2095 | ||
2058 | mddev->thread = md_register_thread(raid1d, mddev, NULL); | 2096 | mddev->queue->queue_lock = &conf->device_lock; |
2059 | if (!mddev->thread) { | 2097 | list_for_each_entry(rdev, &mddev->disks, same_set) { |
2060 | printk(KERN_ERR | 2098 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
2061 | "raid1: couldn't allocate thread for %s\n", | 2099 | rdev->data_offset << 9); |
2062 | mdname(mddev)); | 2100 | /* as we don't honour merge_bvec_fn, we must never risk |
2063 | goto out_free_conf; | 2101 | * violating it, so limit ->max_sector to one PAGE, as |
2102 | * a one page request is never in violation. | ||
2103 | */ | ||
2104 | if (rdev->bdev->bd_disk->queue->merge_bvec_fn && | ||
2105 | queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9)) | ||
2106 | blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); | ||
2064 | } | 2107 | } |
2065 | 2108 | ||
2109 | mddev->degraded = 0; | ||
2110 | for (i=0; i < conf->raid_disks; i++) | ||
2111 | if (conf->mirrors[i].rdev == NULL || | ||
2112 | !test_bit(In_sync, &conf->mirrors[i].rdev->flags) || | ||
2113 | test_bit(Faulty, &conf->mirrors[i].rdev->flags)) | ||
2114 | mddev->degraded++; | ||
2115 | |||
2116 | if (conf->raid_disks - mddev->degraded == 1) | ||
2117 | mddev->recovery_cp = MaxSector; | ||
2118 | |||
2066 | if (mddev->recovery_cp != MaxSector) | 2119 | if (mddev->recovery_cp != MaxSector) |
2067 | printk(KERN_NOTICE "raid1: %s is not clean" | 2120 | printk(KERN_NOTICE "raid1: %s is not clean" |
2068 | " -- starting background reconstruction\n", | 2121 | " -- starting background reconstruction\n", |
@@ -2071,9 +2124,14 @@ static int run(mddev_t *mddev) | |||
2071 | "raid1: raid set %s active with %d out of %d mirrors\n", | 2124 | "raid1: raid set %s active with %d out of %d mirrors\n", |
2072 | mdname(mddev), mddev->raid_disks - mddev->degraded, | 2125 | mdname(mddev), mddev->raid_disks - mddev->degraded, |
2073 | mddev->raid_disks); | 2126 | mddev->raid_disks); |
2127 | |||
2074 | /* | 2128 | /* |
2075 | * Ok, everything is just fine now | 2129 | * Ok, everything is just fine now |
2076 | */ | 2130 | */ |
2131 | mddev->thread = conf->thread; | ||
2132 | conf->thread = NULL; | ||
2133 | mddev->private = conf; | ||
2134 | |||
2077 | md_set_array_sectors(mddev, raid1_size(mddev, 0, 0)); | 2135 | md_set_array_sectors(mddev, raid1_size(mddev, 0, 0)); |
2078 | 2136 | ||
2079 | mddev->queue->unplug_fn = raid1_unplug; | 2137 | mddev->queue->unplug_fn = raid1_unplug; |
@@ -2081,23 +2139,6 @@ static int run(mddev_t *mddev) | |||
2081 | mddev->queue->backing_dev_info.congested_data = mddev; | 2139 | mddev->queue->backing_dev_info.congested_data = mddev; |
2082 | md_integrity_register(mddev); | 2140 | md_integrity_register(mddev); |
2083 | return 0; | 2141 | return 0; |
2084 | |||
2085 | out_no_mem: | ||
2086 | printk(KERN_ERR "raid1: couldn't allocate memory for %s\n", | ||
2087 | mdname(mddev)); | ||
2088 | |||
2089 | out_free_conf: | ||
2090 | if (conf) { | ||
2091 | if (conf->r1bio_pool) | ||
2092 | mempool_destroy(conf->r1bio_pool); | ||
2093 | kfree(conf->mirrors); | ||
2094 | safe_put_page(conf->tmppage); | ||
2095 | kfree(conf->poolinfo); | ||
2096 | kfree(conf); | ||
2097 | mddev->private = NULL; | ||
2098 | } | ||
2099 | out: | ||
2100 | return -EIO; | ||
2101 | } | 2142 | } |
2102 | 2143 | ||
2103 | static int stop(mddev_t *mddev) | 2144 | static int stop(mddev_t *mddev) |
@@ -2271,6 +2312,9 @@ static void raid1_quiesce(mddev_t *mddev, int state) | |||
2271 | conf_t *conf = mddev->private; | 2312 | conf_t *conf = mddev->private; |
2272 | 2313 | ||
2273 | switch(state) { | 2314 | switch(state) { |
2315 | case 2: /* wake for suspend */ | ||
2316 | wake_up(&conf->wait_barrier); | ||
2317 | break; | ||
2274 | case 1: | 2318 | case 1: |
2275 | raise_barrier(conf); | 2319 | raise_barrier(conf); |
2276 | break; | 2320 | break; |
@@ -2280,6 +2324,23 @@ static void raid1_quiesce(mddev_t *mddev, int state) | |||
2280 | } | 2324 | } |
2281 | } | 2325 | } |
2282 | 2326 | ||
2327 | static void *raid1_takeover(mddev_t *mddev) | ||
2328 | { | ||
2329 | /* raid1 can take over: | ||
2330 | * raid5 with 2 devices, any layout or chunk size | ||
2331 | */ | ||
2332 | if (mddev->level == 5 && mddev->raid_disks == 2) { | ||
2333 | conf_t *conf; | ||
2334 | mddev->new_level = 1; | ||
2335 | mddev->new_layout = 0; | ||
2336 | mddev->new_chunk_sectors = 0; | ||
2337 | conf = setup_conf(mddev); | ||
2338 | if (!IS_ERR(conf)) | ||
2339 | conf->barrier = 1; | ||
2340 | return conf; | ||
2341 | } | ||
2342 | return ERR_PTR(-EINVAL); | ||
2343 | } | ||
2283 | 2344 | ||
2284 | static struct mdk_personality raid1_personality = | 2345 | static struct mdk_personality raid1_personality = |
2285 | { | 2346 | { |
@@ -2299,6 +2360,7 @@ static struct mdk_personality raid1_personality = | |||
2299 | .size = raid1_size, | 2360 | .size = raid1_size, |
2300 | .check_reshape = raid1_reshape, | 2361 | .check_reshape = raid1_reshape, |
2301 | .quiesce = raid1_quiesce, | 2362 | .quiesce = raid1_quiesce, |
2363 | .takeover = raid1_takeover, | ||
2302 | }; | 2364 | }; |
2303 | 2365 | ||
2304 | static int __init raid_init(void) | 2366 | static int __init raid_init(void) |
@@ -2314,6 +2376,7 @@ static void raid_exit(void) | |||
2314 | module_init(raid_init); | 2376 | module_init(raid_init); |
2315 | module_exit(raid_exit); | 2377 | module_exit(raid_exit); |
2316 | MODULE_LICENSE("GPL"); | 2378 | MODULE_LICENSE("GPL"); |
2379 | MODULE_DESCRIPTION("RAID1 (mirroring) personality for MD"); | ||
2317 | MODULE_ALIAS("md-personality-3"); /* RAID1 */ | 2380 | MODULE_ALIAS("md-personality-3"); /* RAID1 */ |
2318 | MODULE_ALIAS("md-raid1"); | 2381 | MODULE_ALIAS("md-raid1"); |
2319 | MODULE_ALIAS("md-level-1"); | 2382 | MODULE_ALIAS("md-level-1"); |