aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2008-02-06 04:39:54 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2008-02-06 13:41:18 -0500
commitc5d79adba7ced41d7ac097c2ab74759d10522dd5 (patch)
treeab5a45046244602f2a27be0b34fb26155a3c8dee
parent1ec4a9398dc05061b6258061676fede733458893 (diff)
md: allow devices to be shared between md arrays
Currently, a given device is "claimed" by a particular array so that it cannot be used by other arrays. This is not ideal for DDF and other metadata schemes which have their own partitioning concept. So for externally managed metadata, just claim the device for md in general, require that "offset" and "size" are set properly for each device, and make sure that if a device is included in different arrays then the active sections do not overlap. This involves adding another flag to the rdev which makes it awkward to set "->flags = 0" to clear certain flags. So now clear flags explicitly by name when we want to clear things. Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--drivers/md/md.c88
-rw-r--r--include/linux/raid/md_k.h2
2 files changed, 80 insertions, 10 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 78fe3e97ff99..7c9a87b02e77 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -774,7 +774,11 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
774 __u64 ev1 = md_event(sb); 774 __u64 ev1 = md_event(sb);
775 775
776 rdev->raid_disk = -1; 776 rdev->raid_disk = -1;
777 rdev->flags = 0; 777 clear_bit(Faulty, &rdev->flags);
778 clear_bit(In_sync, &rdev->flags);
779 clear_bit(WriteMostly, &rdev->flags);
780 clear_bit(BarriersNotsupp, &rdev->flags);
781
778 if (mddev->raid_disks == 0) { 782 if (mddev->raid_disks == 0) {
779 mddev->major_version = 0; 783 mddev->major_version = 0;
780 mddev->minor_version = sb->minor_version; 784 mddev->minor_version = sb->minor_version;
@@ -1154,7 +1158,11 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
1154 __u64 ev1 = le64_to_cpu(sb->events); 1158 __u64 ev1 = le64_to_cpu(sb->events);
1155 1159
1156 rdev->raid_disk = -1; 1160 rdev->raid_disk = -1;
1157 rdev->flags = 0; 1161 clear_bit(Faulty, &rdev->flags);
1162 clear_bit(In_sync, &rdev->flags);
1163 clear_bit(WriteMostly, &rdev->flags);
1164 clear_bit(BarriersNotsupp, &rdev->flags);
1165
1158 if (mddev->raid_disks == 0) { 1166 if (mddev->raid_disks == 0) {
1159 mddev->major_version = 1; 1167 mddev->major_version = 1;
1160 mddev->patch_version = 0; 1168 mddev->patch_version = 0;
@@ -1402,7 +1410,7 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
1402 goto fail; 1410 goto fail;
1403 } 1411 }
1404 list_add(&rdev->same_set, &mddev->disks); 1412 list_add(&rdev->same_set, &mddev->disks);
1405 bd_claim_by_disk(rdev->bdev, rdev, mddev->gendisk); 1413 bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk);
1406 return 0; 1414 return 0;
1407 1415
1408 fail: 1416 fail:
@@ -1442,7 +1450,7 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev)
1442 * otherwise reused by a RAID array (or any other kernel 1450 * otherwise reused by a RAID array (or any other kernel
1443 * subsystem), by bd_claiming the device. 1451 * subsystem), by bd_claiming the device.
1444 */ 1452 */
1445static int lock_rdev(mdk_rdev_t *rdev, dev_t dev) 1453static int lock_rdev(mdk_rdev_t *rdev, dev_t dev, int shared)
1446{ 1454{
1447 int err = 0; 1455 int err = 0;
1448 struct block_device *bdev; 1456 struct block_device *bdev;
@@ -1454,13 +1462,15 @@ static int lock_rdev(mdk_rdev_t *rdev, dev_t dev)
1454 __bdevname(dev, b)); 1462 __bdevname(dev, b));
1455 return PTR_ERR(bdev); 1463 return PTR_ERR(bdev);
1456 } 1464 }
1457 err = bd_claim(bdev, rdev); 1465 err = bd_claim(bdev, shared ? (mdk_rdev_t *)lock_rdev : rdev);
1458 if (err) { 1466 if (err) {
1459 printk(KERN_ERR "md: could not bd_claim %s.\n", 1467 printk(KERN_ERR "md: could not bd_claim %s.\n",
1460 bdevname(bdev, b)); 1468 bdevname(bdev, b));
1461 blkdev_put(bdev); 1469 blkdev_put(bdev);
1462 return err; 1470 return err;
1463 } 1471 }
1472 if (!shared)
1473 set_bit(AllReserved, &rdev->flags);
1464 rdev->bdev = bdev; 1474 rdev->bdev = bdev;
1465 return err; 1475 return err;
1466} 1476}
@@ -1925,7 +1935,8 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
1925 return -ENOSPC; 1935 return -ENOSPC;
1926 rdev->raid_disk = slot; 1936 rdev->raid_disk = slot;
1927 /* assume it is working */ 1937 /* assume it is working */
1928 rdev->flags = 0; 1938 clear_bit(Faulty, &rdev->flags);
1939 clear_bit(WriteMostly, &rdev->flags);
1929 set_bit(In_sync, &rdev->flags); 1940 set_bit(In_sync, &rdev->flags);
1930 } 1941 }
1931 return len; 1942 return len;
@@ -1950,6 +1961,10 @@ offset_store(mdk_rdev_t *rdev, const char *buf, size_t len)
1950 return -EINVAL; 1961 return -EINVAL;
1951 if (rdev->mddev->pers) 1962 if (rdev->mddev->pers)
1952 return -EBUSY; 1963 return -EBUSY;
1964 if (rdev->size && rdev->mddev->external)
1965 /* Must set offset before size, so overlap checks
1966 * can be sane */
1967 return -EBUSY;
1953 rdev->data_offset = offset; 1968 rdev->data_offset = offset;
1954 return len; 1969 return len;
1955} 1970}
@@ -1963,16 +1978,69 @@ rdev_size_show(mdk_rdev_t *rdev, char *page)
1963 return sprintf(page, "%llu\n", (unsigned long long)rdev->size); 1978 return sprintf(page, "%llu\n", (unsigned long long)rdev->size);
1964} 1979}
1965 1980
1981static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
1982{
1983 /* check if two start/length pairs overlap */
1984 if (s1+l1 <= s2)
1985 return 0;
1986 if (s2+l2 <= s1)
1987 return 0;
1988 return 1;
1989}
1990
1966static ssize_t 1991static ssize_t
1967rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) 1992rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
1968{ 1993{
1969 char *e; 1994 char *e;
1970 unsigned long long size = simple_strtoull(buf, &e, 10); 1995 unsigned long long size = simple_strtoull(buf, &e, 10);
1996 unsigned long long oldsize = rdev->size;
1971 if (e==buf || (*e && *e != '\n')) 1997 if (e==buf || (*e && *e != '\n'))
1972 return -EINVAL; 1998 return -EINVAL;
1973 if (rdev->mddev->pers) 1999 if (rdev->mddev->pers)
1974 return -EBUSY; 2000 return -EBUSY;
1975 rdev->size = size; 2001 rdev->size = size;
2002 if (size > oldsize && rdev->mddev->external) {
2003 /* need to check that all other rdevs with the same ->bdev
2004 * do not overlap. We need to unlock the mddev to avoid
2005 * a deadlock. We have already changed rdev->size, and if
2006 * we have to change it back, we will have the lock again.
2007 */
2008 mddev_t *mddev;
2009 int overlap = 0;
2010 struct list_head *tmp, *tmp2;
2011
2012 mddev_unlock(rdev->mddev);
2013 ITERATE_MDDEV(mddev, tmp) {
2014 mdk_rdev_t *rdev2;
2015
2016 mddev_lock(mddev);
2017 ITERATE_RDEV(mddev, rdev2, tmp2)
2018 if (test_bit(AllReserved, &rdev2->flags) ||
2019 (rdev->bdev == rdev2->bdev &&
2020 rdev != rdev2 &&
2021 overlaps(rdev->data_offset, rdev->size,
2022 rdev2->data_offset, rdev2->size))) {
2023 overlap = 1;
2024 break;
2025 }
2026 mddev_unlock(mddev);
2027 if (overlap) {
2028 mddev_put(mddev);
2029 break;
2030 }
2031 }
2032 mddev_lock(rdev->mddev);
2033 if (overlap) {
2034 /* Someone else could have slipped in a size
2035 * change here, but doing so is just silly.
2036 * We put oldsize back because we *know* it is
2037 * safe, and trust userspace not to race with
2038 * itself
2039 */
2040 rdev->size = oldsize;
2041 return -EBUSY;
2042 }
2043 }
1976 if (size < rdev->mddev->size || rdev->mddev->size == 0) 2044 if (size < rdev->mddev->size || rdev->mddev->size == 0)
1977 rdev->mddev->size = size; 2045 rdev->mddev->size = size;
1978 return len; 2046 return len;
@@ -2056,7 +2124,7 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi
2056 if ((err = alloc_disk_sb(rdev))) 2124 if ((err = alloc_disk_sb(rdev)))
2057 goto abort_free; 2125 goto abort_free;
2058 2126
2059 err = lock_rdev(rdev, newdev); 2127 err = lock_rdev(rdev, newdev, super_format == -2);
2060 if (err) 2128 if (err)
2061 goto abort_free; 2129 goto abort_free;
2062 2130
@@ -2609,7 +2677,9 @@ new_dev_store(mddev_t *mddev, const char *buf, size_t len)
2609 if (err < 0) 2677 if (err < 0)
2610 goto out; 2678 goto out;
2611 } 2679 }
2612 } else 2680 } else if (mddev->external)
2681 rdev = md_import_device(dev, -2, -1);
2682 else
2613 rdev = md_import_device(dev, -1, -1); 2683 rdev = md_import_device(dev, -1, -1);
2614 2684
2615 if (IS_ERR(rdev)) 2685 if (IS_ERR(rdev))
@@ -4019,8 +4089,6 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
4019 else 4089 else
4020 rdev->raid_disk = -1; 4090 rdev->raid_disk = -1;
4021 4091
4022 rdev->flags = 0;
4023
4024 if (rdev->raid_disk < mddev->raid_disks) 4092 if (rdev->raid_disk < mddev->raid_disks)
4025 if (info->state & (1<<MD_DISK_SYNC)) 4093 if (info->state & (1<<MD_DISK_SYNC))
4026 set_bit(In_sync, &rdev->flags); 4094 set_bit(In_sync, &rdev->flags);
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index c77dca3221ed..5b2102e40286 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -81,6 +81,8 @@ struct mdk_rdev_s
81#define In_sync 2 /* device is in_sync with rest of array */ 81#define In_sync 2 /* device is in_sync with rest of array */
82#define WriteMostly 4 /* Avoid reading if at all possible */ 82#define WriteMostly 4 /* Avoid reading if at all possible */
83#define BarriersNotsupp 5 /* BIO_RW_BARRIER is not supported */ 83#define BarriersNotsupp 5 /* BIO_RW_BARRIER is not supported */
84#define AllReserved 6 /* If whole device is reserved for
85 * one array */
84 86
85 int desc_nr; /* descriptor index in the superblock */ 87 int desc_nr; /* descriptor index in the superblock */
86 int raid_disk; /* role of device in array */ 88 int raid_disk; /* role of device in array */