aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/raid1.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/raid1.c')
-rw-r--r--drivers/md/raid1.c236
1 files changed, 153 insertions, 83 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index e07ce2e033a9..e59b10e66edb 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -31,6 +31,7 @@
31 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 31 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
32 */ 32 */
33 33
34#include <linux/slab.h>
34#include <linux/delay.h> 35#include <linux/delay.h>
35#include <linux/blkdev.h> 36#include <linux/blkdev.h>
36#include <linux/seq_file.h> 37#include <linux/seq_file.h>
@@ -677,6 +678,7 @@ static void raise_barrier(conf_t *conf)
677static void lower_barrier(conf_t *conf) 678static void lower_barrier(conf_t *conf)
678{ 679{
679 unsigned long flags; 680 unsigned long flags;
681 BUG_ON(conf->barrier <= 0);
680 spin_lock_irqsave(&conf->resync_lock, flags); 682 spin_lock_irqsave(&conf->resync_lock, flags);
681 conf->barrier--; 683 conf->barrier--;
682 spin_unlock_irqrestore(&conf->resync_lock, flags); 684 spin_unlock_irqrestore(&conf->resync_lock, flags);
@@ -801,6 +803,25 @@ static int make_request(struct request_queue *q, struct bio * bio)
801 803
802 md_write_start(mddev, bio); /* wait on superblock update early */ 804 md_write_start(mddev, bio); /* wait on superblock update early */
803 805
806 if (bio_data_dir(bio) == WRITE &&
807 bio->bi_sector + bio->bi_size/512 > mddev->suspend_lo &&
808 bio->bi_sector < mddev->suspend_hi) {
809 /* As the suspend_* range is controlled by
810 * userspace, we want an interruptible
811 * wait.
812 */
813 DEFINE_WAIT(w);
814 for (;;) {
815 flush_signals(current);
816 prepare_to_wait(&conf->wait_barrier,
817 &w, TASK_INTERRUPTIBLE);
818 if (bio->bi_sector + bio->bi_size/512 <= mddev->suspend_lo ||
819 bio->bi_sector >= mddev->suspend_hi)
820 break;
821 schedule();
822 }
823 finish_wait(&conf->wait_barrier, &w);
824 }
804 if (unlikely(!mddev->barriers_work && 825 if (unlikely(!mddev->barriers_work &&
805 bio_rw_flagged(bio, BIO_RW_BARRIER))) { 826 bio_rw_flagged(bio, BIO_RW_BARRIER))) {
806 if (rw == WRITE) 827 if (rw == WRITE)
@@ -923,7 +944,8 @@ static int make_request(struct request_queue *q, struct bio * bio)
923 944
924 /* do behind I/O ? */ 945 /* do behind I/O ? */
925 if (bitmap && 946 if (bitmap &&
926 atomic_read(&bitmap->behind_writes) < bitmap->max_write_behind && 947 (atomic_read(&bitmap->behind_writes)
948 < mddev->bitmap_info.max_write_behind) &&
927 (behind_pages = alloc_behind_pages(bio)) != NULL) 949 (behind_pages = alloc_behind_pages(bio)) != NULL)
928 set_bit(R1BIO_BehindIO, &r1_bio->state); 950 set_bit(R1BIO_BehindIO, &r1_bio->state);
929 951
@@ -1131,13 +1153,17 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
1131 1153
1132 disk_stack_limits(mddev->gendisk, rdev->bdev, 1154 disk_stack_limits(mddev->gendisk, rdev->bdev,
1133 rdev->data_offset << 9); 1155 rdev->data_offset << 9);
1134 /* as we don't honour merge_bvec_fn, we must never risk 1156 /* as we don't honour merge_bvec_fn, we must
1135 * violating it, so limit ->max_sector to one PAGE, as 1157 * never risk violating it, so limit
1136 * a one page request is never in violation. 1158 * ->max_segments to one lying with a single
1159 * page, as a one page request is never in
1160 * violation.
1137 */ 1161 */
1138 if (rdev->bdev->bd_disk->queue->merge_bvec_fn && 1162 if (rdev->bdev->bd_disk->queue->merge_bvec_fn) {
1139 queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9)) 1163 blk_queue_max_segments(mddev->queue, 1);
1140 blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); 1164 blk_queue_segment_boundary(mddev->queue,
1165 PAGE_CACHE_SIZE - 1);
1166 }
1141 1167
1142 p->head_position = 0; 1168 p->head_position = 0;
1143 rdev->raid_disk = mirror; 1169 rdev->raid_disk = mirror;
@@ -1941,74 +1967,48 @@ static sector_t raid1_size(mddev_t *mddev, sector_t sectors, int raid_disks)
1941 return mddev->dev_sectors; 1967 return mddev->dev_sectors;
1942} 1968}
1943 1969
1944static int run(mddev_t *mddev) 1970static conf_t *setup_conf(mddev_t *mddev)
1945{ 1971{
1946 conf_t *conf; 1972 conf_t *conf;
1947 int i, j, disk_idx; 1973 int i;
1948 mirror_info_t *disk; 1974 mirror_info_t *disk;
1949 mdk_rdev_t *rdev; 1975 mdk_rdev_t *rdev;
1976 int err = -ENOMEM;
1950 1977
1951 if (mddev->level != 1) {
1952 printk("raid1: %s: raid level not set to mirroring (%d)\n",
1953 mdname(mddev), mddev->level);
1954 goto out;
1955 }
1956 if (mddev->reshape_position != MaxSector) {
1957 printk("raid1: %s: reshape_position set but not supported\n",
1958 mdname(mddev));
1959 goto out;
1960 }
1961 /*
1962 * copy the already verified devices into our private RAID1
1963 * bookkeeping area. [whatever we allocate in run(),
1964 * should be freed in stop()]
1965 */
1966 conf = kzalloc(sizeof(conf_t), GFP_KERNEL); 1978 conf = kzalloc(sizeof(conf_t), GFP_KERNEL);
1967 mddev->private = conf;
1968 if (!conf) 1979 if (!conf)
1969 goto out_no_mem; 1980 goto abort;
1970 1981
1971 conf->mirrors = kzalloc(sizeof(struct mirror_info)*mddev->raid_disks, 1982 conf->mirrors = kzalloc(sizeof(struct mirror_info)*mddev->raid_disks,
1972 GFP_KERNEL); 1983 GFP_KERNEL);
1973 if (!conf->mirrors) 1984 if (!conf->mirrors)
1974 goto out_no_mem; 1985 goto abort;
1975 1986
1976 conf->tmppage = alloc_page(GFP_KERNEL); 1987 conf->tmppage = alloc_page(GFP_KERNEL);
1977 if (!conf->tmppage) 1988 if (!conf->tmppage)
1978 goto out_no_mem; 1989 goto abort;
1979 1990
1980 conf->poolinfo = kmalloc(sizeof(*conf->poolinfo), GFP_KERNEL); 1991 conf->poolinfo = kzalloc(sizeof(*conf->poolinfo), GFP_KERNEL);
1981 if (!conf->poolinfo) 1992 if (!conf->poolinfo)
1982 goto out_no_mem; 1993 goto abort;
1983 conf->poolinfo->mddev = NULL;
1984 conf->poolinfo->raid_disks = mddev->raid_disks; 1994 conf->poolinfo->raid_disks = mddev->raid_disks;
1985 conf->r1bio_pool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc, 1995 conf->r1bio_pool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc,
1986 r1bio_pool_free, 1996 r1bio_pool_free,
1987 conf->poolinfo); 1997 conf->poolinfo);
1988 if (!conf->r1bio_pool) 1998 if (!conf->r1bio_pool)
1989 goto out_no_mem; 1999 goto abort;
2000
1990 conf->poolinfo->mddev = mddev; 2001 conf->poolinfo->mddev = mddev;
1991 2002
1992 spin_lock_init(&conf->device_lock); 2003 spin_lock_init(&conf->device_lock);
1993 mddev->queue->queue_lock = &conf->device_lock;
1994
1995 list_for_each_entry(rdev, &mddev->disks, same_set) { 2004 list_for_each_entry(rdev, &mddev->disks, same_set) {
1996 disk_idx = rdev->raid_disk; 2005 int disk_idx = rdev->raid_disk;
1997 if (disk_idx >= mddev->raid_disks 2006 if (disk_idx >= mddev->raid_disks
1998 || disk_idx < 0) 2007 || disk_idx < 0)
1999 continue; 2008 continue;
2000 disk = conf->mirrors + disk_idx; 2009 disk = conf->mirrors + disk_idx;
2001 2010
2002 disk->rdev = rdev; 2011 disk->rdev = rdev;
2003 disk_stack_limits(mddev->gendisk, rdev->bdev,
2004 rdev->data_offset << 9);
2005 /* as we don't honour merge_bvec_fn, we must never risk
2006 * violating it, so limit ->max_sector to one PAGE, as
2007 * a one page request is never in violation.
2008 */
2009 if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
2010 queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
2011 blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
2012 2012
2013 disk->head_position = 0; 2013 disk->head_position = 0;
2014 } 2014 }
@@ -2022,8 +2022,7 @@ static int run(mddev_t *mddev)
2022 bio_list_init(&conf->pending_bio_list); 2022 bio_list_init(&conf->pending_bio_list);
2023 bio_list_init(&conf->flushing_bio_list); 2023 bio_list_init(&conf->flushing_bio_list);
2024 2024
2025 2025 conf->last_used = -1;
2026 mddev->degraded = 0;
2027 for (i = 0; i < conf->raid_disks; i++) { 2026 for (i = 0; i < conf->raid_disks; i++) {
2028 2027
2029 disk = conf->mirrors + i; 2028 disk = conf->mirrors + i;
@@ -2031,38 +2030,99 @@ static int run(mddev_t *mddev)
2031 if (!disk->rdev || 2030 if (!disk->rdev ||
2032 !test_bit(In_sync, &disk->rdev->flags)) { 2031 !test_bit(In_sync, &disk->rdev->flags)) {
2033 disk->head_position = 0; 2032 disk->head_position = 0;
2034 mddev->degraded++;
2035 if (disk->rdev) 2033 if (disk->rdev)
2036 conf->fullsync = 1; 2034 conf->fullsync = 1;
2037 } 2035 } else if (conf->last_used < 0)
2036 /*
2037 * The first working device is used as a
2038 * starting point to read balancing.
2039 */
2040 conf->last_used = i;
2038 } 2041 }
2039 if (mddev->degraded == conf->raid_disks) { 2042
2043 err = -EIO;
2044 if (conf->last_used < 0) {
2040 printk(KERN_ERR "raid1: no operational mirrors for %s\n", 2045 printk(KERN_ERR "raid1: no operational mirrors for %s\n",
2041 mdname(mddev)); 2046 mdname(mddev));
2042 goto out_free_conf; 2047 goto abort;
2043 } 2048 }
2044 if (conf->raid_disks - mddev->degraded == 1) 2049 err = -ENOMEM;
2045 mddev->recovery_cp = MaxSector; 2050 conf->thread = md_register_thread(raid1d, mddev, NULL);
2051 if (!conf->thread) {
2052 printk(KERN_ERR
2053 "raid1: couldn't allocate thread for %s\n",
2054 mdname(mddev));
2055 goto abort;
2056 }
2057
2058 return conf;
2059
2060 abort:
2061 if (conf) {
2062 if (conf->r1bio_pool)
2063 mempool_destroy(conf->r1bio_pool);
2064 kfree(conf->mirrors);
2065 safe_put_page(conf->tmppage);
2066 kfree(conf->poolinfo);
2067 kfree(conf);
2068 }
2069 return ERR_PTR(err);
2070}
2046 2071
2072static int run(mddev_t *mddev)
2073{
2074 conf_t *conf;
2075 int i;
2076 mdk_rdev_t *rdev;
2077
2078 if (mddev->level != 1) {
2079 printk("raid1: %s: raid level not set to mirroring (%d)\n",
2080 mdname(mddev), mddev->level);
2081 return -EIO;
2082 }
2083 if (mddev->reshape_position != MaxSector) {
2084 printk("raid1: %s: reshape_position set but not supported\n",
2085 mdname(mddev));
2086 return -EIO;
2087 }
2047 /* 2088 /*
2048 * find the first working one and use it as a starting point 2089 * copy the already verified devices into our private RAID1
2049 * to read balancing. 2090 * bookkeeping area. [whatever we allocate in run(),
2091 * should be freed in stop()]
2050 */ 2092 */
2051 for (j = 0; j < conf->raid_disks && 2093 if (mddev->private == NULL)
2052 (!conf->mirrors[j].rdev || 2094 conf = setup_conf(mddev);
2053 !test_bit(In_sync, &conf->mirrors[j].rdev->flags)) ; j++) 2095 else
2054 /* nothing */; 2096 conf = mddev->private;
2055 conf->last_used = j;
2056 2097
2098 if (IS_ERR(conf))
2099 return PTR_ERR(conf);
2057 2100
2058 mddev->thread = md_register_thread(raid1d, mddev, NULL); 2101 mddev->queue->queue_lock = &conf->device_lock;
2059 if (!mddev->thread) { 2102 list_for_each_entry(rdev, &mddev->disks, same_set) {
2060 printk(KERN_ERR 2103 disk_stack_limits(mddev->gendisk, rdev->bdev,
2061 "raid1: couldn't allocate thread for %s\n", 2104 rdev->data_offset << 9);
2062 mdname(mddev)); 2105 /* as we don't honour merge_bvec_fn, we must never risk
2063 goto out_free_conf; 2106 * violating it, so limit ->max_segments to 1 lying within
2107 * a single page, as a one page request is never in violation.
2108 */
2109 if (rdev->bdev->bd_disk->queue->merge_bvec_fn) {
2110 blk_queue_max_segments(mddev->queue, 1);
2111 blk_queue_segment_boundary(mddev->queue,
2112 PAGE_CACHE_SIZE - 1);
2113 }
2064 } 2114 }
2065 2115
2116 mddev->degraded = 0;
2117 for (i=0; i < conf->raid_disks; i++)
2118 if (conf->mirrors[i].rdev == NULL ||
2119 !test_bit(In_sync, &conf->mirrors[i].rdev->flags) ||
2120 test_bit(Faulty, &conf->mirrors[i].rdev->flags))
2121 mddev->degraded++;
2122
2123 if (conf->raid_disks - mddev->degraded == 1)
2124 mddev->recovery_cp = MaxSector;
2125
2066 if (mddev->recovery_cp != MaxSector) 2126 if (mddev->recovery_cp != MaxSector)
2067 printk(KERN_NOTICE "raid1: %s is not clean" 2127 printk(KERN_NOTICE "raid1: %s is not clean"
2068 " -- starting background reconstruction\n", 2128 " -- starting background reconstruction\n",
@@ -2071,9 +2131,14 @@ static int run(mddev_t *mddev)
2071 "raid1: raid set %s active with %d out of %d mirrors\n", 2131 "raid1: raid set %s active with %d out of %d mirrors\n",
2072 mdname(mddev), mddev->raid_disks - mddev->degraded, 2132 mdname(mddev), mddev->raid_disks - mddev->degraded,
2073 mddev->raid_disks); 2133 mddev->raid_disks);
2134
2074 /* 2135 /*
2075 * Ok, everything is just fine now 2136 * Ok, everything is just fine now
2076 */ 2137 */
2138 mddev->thread = conf->thread;
2139 conf->thread = NULL;
2140 mddev->private = conf;
2141
2077 md_set_array_sectors(mddev, raid1_size(mddev, 0, 0)); 2142 md_set_array_sectors(mddev, raid1_size(mddev, 0, 0));
2078 2143
2079 mddev->queue->unplug_fn = raid1_unplug; 2144 mddev->queue->unplug_fn = raid1_unplug;
@@ -2081,23 +2146,6 @@ static int run(mddev_t *mddev)
2081 mddev->queue->backing_dev_info.congested_data = mddev; 2146 mddev->queue->backing_dev_info.congested_data = mddev;
2082 md_integrity_register(mddev); 2147 md_integrity_register(mddev);
2083 return 0; 2148 return 0;
2084
2085out_no_mem:
2086 printk(KERN_ERR "raid1: couldn't allocate memory for %s\n",
2087 mdname(mddev));
2088
2089out_free_conf:
2090 if (conf) {
2091 if (conf->r1bio_pool)
2092 mempool_destroy(conf->r1bio_pool);
2093 kfree(conf->mirrors);
2094 safe_put_page(conf->tmppage);
2095 kfree(conf->poolinfo);
2096 kfree(conf);
2097 mddev->private = NULL;
2098 }
2099out:
2100 return -EIO;
2101} 2149}
2102 2150
2103static int stop(mddev_t *mddev) 2151static int stop(mddev_t *mddev)
@@ -2271,6 +2319,9 @@ static void raid1_quiesce(mddev_t *mddev, int state)
2271 conf_t *conf = mddev->private; 2319 conf_t *conf = mddev->private;
2272 2320
2273 switch(state) { 2321 switch(state) {
2322 case 2: /* wake for suspend */
2323 wake_up(&conf->wait_barrier);
2324 break;
2274 case 1: 2325 case 1:
2275 raise_barrier(conf); 2326 raise_barrier(conf);
2276 break; 2327 break;
@@ -2280,6 +2331,23 @@ static void raid1_quiesce(mddev_t *mddev, int state)
2280 } 2331 }
2281} 2332}
2282 2333
2334static void *raid1_takeover(mddev_t *mddev)
2335{
2336 /* raid1 can take over:
2337 * raid5 with 2 devices, any layout or chunk size
2338 */
2339 if (mddev->level == 5 && mddev->raid_disks == 2) {
2340 conf_t *conf;
2341 mddev->new_level = 1;
2342 mddev->new_layout = 0;
2343 mddev->new_chunk_sectors = 0;
2344 conf = setup_conf(mddev);
2345 if (!IS_ERR(conf))
2346 conf->barrier = 1;
2347 return conf;
2348 }
2349 return ERR_PTR(-EINVAL);
2350}
2283 2351
2284static struct mdk_personality raid1_personality = 2352static struct mdk_personality raid1_personality =
2285{ 2353{
@@ -2299,6 +2367,7 @@ static struct mdk_personality raid1_personality =
2299 .size = raid1_size, 2367 .size = raid1_size,
2300 .check_reshape = raid1_reshape, 2368 .check_reshape = raid1_reshape,
2301 .quiesce = raid1_quiesce, 2369 .quiesce = raid1_quiesce,
2370 .takeover = raid1_takeover,
2302}; 2371};
2303 2372
2304static int __init raid_init(void) 2373static int __init raid_init(void)
@@ -2314,6 +2383,7 @@ static void raid_exit(void)
2314module_init(raid_init); 2383module_init(raid_init);
2315module_exit(raid_exit); 2384module_exit(raid_exit);
2316MODULE_LICENSE("GPL"); 2385MODULE_LICENSE("GPL");
2386MODULE_DESCRIPTION("RAID1 (mirroring) personality for MD");
2317MODULE_ALIAS("md-personality-3"); /* RAID1 */ 2387MODULE_ALIAS("md-personality-3"); /* RAID1 */
2318MODULE_ALIAS("md-raid1"); 2388MODULE_ALIAS("md-raid1");
2319MODULE_ALIAS("md-level-1"); 2389MODULE_ALIAS("md-level-1");