aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2009-12-13 20:49:56 -0500
committerNeilBrown <neilb@suse.de>2009-12-13 20:51:41 -0500
commitece5cff0da9e696c360fff592cb5f51b6419e4d6 (patch)
tree1c5cab0e89ae466be9edb476ceb91e5c898fafa6 /drivers
parent624ce4f5658fa3e0303c1217bba2706142fe7568 (diff)
md: Support write-intent bitmaps with externally managed metadata.
In this case, the metadata needs to not be in the same sector as the bitmap. md will not read/write any bitmap metadata. Config must be done via sysfs and when a recovery makes the array non-degraded again, writing 'true' to 'bitmap/can_clear' will allow bits in the bitmap to be cleared again. Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/md/bitmap.c142
-rw-r--r--drivers/md/bitmap.h11
-rw-r--r--drivers/md/md.h1
3 files changed, 121 insertions, 33 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 62958491f329..de5c42df8d17 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -497,6 +497,8 @@ void bitmap_update_sb(struct bitmap *bitmap)
497 497
498 if (!bitmap || !bitmap->mddev) /* no bitmap for this array */ 498 if (!bitmap || !bitmap->mddev) /* no bitmap for this array */
499 return; 499 return;
500 if (bitmap->mddev->bitmap_info.external)
501 return;
500 spin_lock_irqsave(&bitmap->lock, flags); 502 spin_lock_irqsave(&bitmap->lock, flags);
501 if (!bitmap->sb_page) { /* no superblock */ 503 if (!bitmap->sb_page) { /* no superblock */
502 spin_unlock_irqrestore(&bitmap->lock, flags); 504 spin_unlock_irqrestore(&bitmap->lock, flags);
@@ -676,16 +678,26 @@ static int bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits,
676 * general bitmap file operations 678 * general bitmap file operations
677 */ 679 */
678 680
681/*
682 * on-disk bitmap:
683 *
684 * Use one bit per "chunk" (block set). We do the disk I/O on the bitmap
685 * file a page at a time. There's a superblock at the start of the file.
686 */
679/* calculate the index of the page that contains this bit */ 687/* calculate the index of the page that contains this bit */
680static inline unsigned long file_page_index(unsigned long chunk) 688static inline unsigned long file_page_index(struct bitmap *bitmap, unsigned long chunk)
681{ 689{
682 return CHUNK_BIT_OFFSET(chunk) >> PAGE_BIT_SHIFT; 690 if (!bitmap->mddev->bitmap_info.external)
691 chunk += sizeof(bitmap_super_t) << 3;
692 return chunk >> PAGE_BIT_SHIFT;
683} 693}
684 694
685/* calculate the (bit) offset of this bit within a page */ 695/* calculate the (bit) offset of this bit within a page */
686static inline unsigned long file_page_offset(unsigned long chunk) 696static inline unsigned long file_page_offset(struct bitmap *bitmap, unsigned long chunk)
687{ 697{
688 return CHUNK_BIT_OFFSET(chunk) & (PAGE_BITS - 1); 698 if (!bitmap->mddev->bitmap_info.external)
699 chunk += sizeof(bitmap_super_t) << 3;
700 return chunk & (PAGE_BITS - 1);
689} 701}
690 702
691/* 703/*
@@ -698,8 +710,9 @@ static inline unsigned long file_page_offset(unsigned long chunk)
698static inline struct page *filemap_get_page(struct bitmap *bitmap, 710static inline struct page *filemap_get_page(struct bitmap *bitmap,
699 unsigned long chunk) 711 unsigned long chunk)
700{ 712{
701 if (file_page_index(chunk) >= bitmap->file_pages) return NULL; 713 if (file_page_index(bitmap, chunk) >= bitmap->file_pages) return NULL;
702 return bitmap->filemap[file_page_index(chunk) - file_page_index(0)]; 714 return bitmap->filemap[file_page_index(bitmap, chunk)
715 - file_page_index(bitmap, 0)];
703} 716}
704 717
705 718
@@ -722,7 +735,7 @@ static void bitmap_file_unmap(struct bitmap *bitmap)
722 spin_unlock_irqrestore(&bitmap->lock, flags); 735 spin_unlock_irqrestore(&bitmap->lock, flags);
723 736
724 while (pages--) 737 while (pages--)
725 if (map[pages]->index != 0) /* 0 is sb_page, release it below */ 738 if (map[pages] != sb_page) /* 0 is sb_page, release it below */
726 free_buffers(map[pages]); 739 free_buffers(map[pages]);
727 kfree(map); 740 kfree(map);
728 kfree(attr); 741 kfree(attr);
@@ -833,7 +846,7 @@ static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
833 846
834 page = filemap_get_page(bitmap, chunk); 847 page = filemap_get_page(bitmap, chunk);
835 if (!page) return; 848 if (!page) return;
836 bit = file_page_offset(chunk); 849 bit = file_page_offset(bitmap, chunk);
837 850
838 /* set the bit */ 851 /* set the bit */
839 kaddr = kmap_atomic(page, KM_USER0); 852 kaddr = kmap_atomic(page, KM_USER0);
@@ -931,14 +944,17 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
931 "recovery\n", bmname(bitmap)); 944 "recovery\n", bmname(bitmap));
932 945
933 bytes = (chunks + 7) / 8; 946 bytes = (chunks + 7) / 8;
947 if (!bitmap->mddev->bitmap_info.external)
948 bytes += sizeof(bitmap_super_t);
934 949
935 num_pages = (bytes + sizeof(bitmap_super_t) + PAGE_SIZE - 1) / PAGE_SIZE; 950
951 num_pages = (bytes + PAGE_SIZE - 1) / PAGE_SIZE;
936 952
937 if (file && i_size_read(file->f_mapping->host) < bytes + sizeof(bitmap_super_t)) { 953 if (file && i_size_read(file->f_mapping->host) < bytes) {
938 printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n", 954 printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n",
939 bmname(bitmap), 955 bmname(bitmap),
940 (unsigned long) i_size_read(file->f_mapping->host), 956 (unsigned long) i_size_read(file->f_mapping->host),
941 bytes + sizeof(bitmap_super_t)); 957 bytes);
942 goto err; 958 goto err;
943 } 959 }
944 960
@@ -959,17 +975,16 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
959 975
960 for (i = 0; i < chunks; i++) { 976 for (i = 0; i < chunks; i++) {
961 int b; 977 int b;
962 index = file_page_index(i); 978 index = file_page_index(bitmap, i);
963 bit = file_page_offset(i); 979 bit = file_page_offset(bitmap, i);
964 if (index != oldindex) { /* this is a new page, read it in */ 980 if (index != oldindex) { /* this is a new page, read it in */
965 int count; 981 int count;
966 /* unmap the old page, we're done with it */ 982 /* unmap the old page, we're done with it */
967 if (index == num_pages-1) 983 if (index == num_pages-1)
968 count = bytes + sizeof(bitmap_super_t) 984 count = bytes - index * PAGE_SIZE;
969 - index * PAGE_SIZE;
970 else 985 else
971 count = PAGE_SIZE; 986 count = PAGE_SIZE;
972 if (index == 0) { 987 if (index == 0 && bitmap->sb_page) {
973 /* 988 /*
974 * if we're here then the superblock page 989 * if we're here then the superblock page
975 * contains some bits (PAGE_SIZE != sizeof sb) 990 * contains some bits (PAGE_SIZE != sizeof sb)
@@ -1164,7 +1179,8 @@ void bitmap_daemon_work(mddev_t *mddev)
1164 /* We are possibly going to clear some bits, so make 1179 /* We are possibly going to clear some bits, so make
1165 * sure that events_cleared is up-to-date. 1180 * sure that events_cleared is up-to-date.
1166 */ 1181 */
1167 if (bitmap->need_sync) { 1182 if (bitmap->need_sync &&
1183 bitmap->mddev->bitmap_info.external == 0) {
1168 bitmap_super_t *sb; 1184 bitmap_super_t *sb;
1169 bitmap->need_sync = 0; 1185 bitmap->need_sync = 0;
1170 sb = kmap_atomic(bitmap->sb_page, KM_USER0); 1186 sb = kmap_atomic(bitmap->sb_page, KM_USER0);
@@ -1174,7 +1190,8 @@ void bitmap_daemon_work(mddev_t *mddev)
1174 write_page(bitmap, bitmap->sb_page, 1); 1190 write_page(bitmap, bitmap->sb_page, 1);
1175 } 1191 }
1176 spin_lock_irqsave(&bitmap->lock, flags); 1192 spin_lock_irqsave(&bitmap->lock, flags);
1177 clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); 1193 if (!bitmap->need_sync)
1194 clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
1178 } 1195 }
1179 bmc = bitmap_get_counter(bitmap, 1196 bmc = bitmap_get_counter(bitmap,
1180 (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap), 1197 (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap),
@@ -1189,7 +1206,7 @@ void bitmap_daemon_work(mddev_t *mddev)
1189 if (*bmc == 2) { 1206 if (*bmc == 2) {
1190 *bmc=1; /* maybe clear the bit next time */ 1207 *bmc=1; /* maybe clear the bit next time */
1191 set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); 1208 set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
1192 } else if (*bmc == 1) { 1209 } else if (*bmc == 1 && !bitmap->need_sync) {
1193 /* we can clear the bit */ 1210 /* we can clear the bit */
1194 *bmc = 0; 1211 *bmc = 0;
1195 bitmap_count_page(bitmap, 1212 bitmap_count_page(bitmap,
@@ -1199,9 +1216,11 @@ void bitmap_daemon_work(mddev_t *mddev)
1199 /* clear the bit */ 1216 /* clear the bit */
1200 paddr = kmap_atomic(page, KM_USER0); 1217 paddr = kmap_atomic(page, KM_USER0);
1201 if (bitmap->flags & BITMAP_HOSTENDIAN) 1218 if (bitmap->flags & BITMAP_HOSTENDIAN)
1202 clear_bit(file_page_offset(j), paddr); 1219 clear_bit(file_page_offset(bitmap, j),
1220 paddr);
1203 else 1221 else
1204 ext2_clear_bit(file_page_offset(j), paddr); 1222 ext2_clear_bit(file_page_offset(bitmap, j),
1223 paddr);
1205 kunmap_atomic(paddr, KM_USER0); 1224 kunmap_atomic(paddr, KM_USER0);
1206 } 1225 }
1207 } else 1226 } else
@@ -1356,6 +1375,7 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto
1356 bitmap->events_cleared < bitmap->mddev->events) { 1375 bitmap->events_cleared < bitmap->mddev->events) {
1357 bitmap->events_cleared = bitmap->mddev->events; 1376 bitmap->events_cleared = bitmap->mddev->events;
1358 bitmap->need_sync = 1; 1377 bitmap->need_sync = 1;
1378 sysfs_notify_dirent(bitmap->sysfs_can_clear);
1359 } 1379 }
1360 1380
1361 if (!success && ! (*bmc & NEEDED_MASK)) 1381 if (!success && ! (*bmc & NEEDED_MASK))
@@ -1613,6 +1633,9 @@ void bitmap_destroy(mddev_t *mddev)
1613 if (mddev->thread) 1633 if (mddev->thread)
1614 mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT; 1634 mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
1615 1635
1636 if (bitmap->sysfs_can_clear)
1637 sysfs_put(bitmap->sysfs_can_clear);
1638
1616 bitmap_free(bitmap); 1639 bitmap_free(bitmap);
1617} 1640}
1618 1641
@@ -1629,6 +1652,7 @@ int bitmap_create(mddev_t *mddev)
1629 struct file *file = mddev->bitmap_info.file; 1652 struct file *file = mddev->bitmap_info.file;
1630 int err; 1653 int err;
1631 sector_t start; 1654 sector_t start;
1655 struct sysfs_dirent *bm;
1632 1656
1633 BUILD_BUG_ON(sizeof(bitmap_super_t) != 256); 1657 BUILD_BUG_ON(sizeof(bitmap_super_t) != 256);
1634 1658
@@ -1648,6 +1672,13 @@ int bitmap_create(mddev_t *mddev)
1648 1672
1649 bitmap->mddev = mddev; 1673 bitmap->mddev = mddev;
1650 1674
1675 bm = sysfs_get_dirent(mddev->kobj.sd, "bitmap");
1676 if (bm) {
1677 bitmap->sysfs_can_clear = sysfs_get_dirent(bm, "can_clear");
1678 sysfs_put(bm);
1679 } else
1680 bitmap->sysfs_can_clear = NULL;
1681
1651 bitmap->file = file; 1682 bitmap->file = file;
1652 if (file) { 1683 if (file) {
1653 get_file(file); 1684 get_file(file);
@@ -1658,7 +1689,16 @@ int bitmap_create(mddev_t *mddev)
1658 vfs_fsync(file, file->f_dentry, 1); 1689 vfs_fsync(file, file->f_dentry, 1);
1659 } 1690 }
1660 /* read superblock from bitmap file (this sets mddev->bitmap_info.chunksize) */ 1691 /* read superblock from bitmap file (this sets mddev->bitmap_info.chunksize) */
1661 err = bitmap_read_sb(bitmap); 1692 if (!mddev->bitmap_info.external)
1693 err = bitmap_read_sb(bitmap);
1694 else {
1695 err = 0;
1696 if (mddev->bitmap_info.chunksize == 0 ||
1697 mddev->bitmap_info.daemon_sleep == 0)
1698 /* chunksize and time_base need to be
1699 * set first. */
1700 err = -EINVAL;
1701 }
1662 if (err) 1702 if (err)
1663 goto error; 1703 goto error;
1664 1704
@@ -1777,7 +1817,8 @@ location_store(mddev_t *mddev, const char *buf, size_t len)
1777 return rv; 1817 return rv;
1778 if (offset == 0) 1818 if (offset == 0)
1779 return -EINVAL; 1819 return -EINVAL;
1780 if (mddev->major_version == 0 && 1820 if (mddev->bitmap_info.external == 0 &&
1821 mddev->major_version == 0 &&
1781 offset != mddev->bitmap_info.default_offset) 1822 offset != mddev->bitmap_info.default_offset)
1782 return -EINVAL; 1823 return -EINVAL;
1783 mddev->bitmap_info.offset = offset; 1824 mddev->bitmap_info.offset = offset;
@@ -1906,11 +1947,66 @@ chunksize_store(mddev_t *mddev, const char *buf, size_t len)
1906static struct md_sysfs_entry bitmap_chunksize = 1947static struct md_sysfs_entry bitmap_chunksize =
1907__ATTR(chunksize, S_IRUGO|S_IWUSR, chunksize_show, chunksize_store); 1948__ATTR(chunksize, S_IRUGO|S_IWUSR, chunksize_show, chunksize_store);
1908 1949
1950static ssize_t metadata_show(mddev_t *mddev, char *page)
1951{
1952 return sprintf(page, "%s\n", (mddev->bitmap_info.external
1953 ? "external" : "internal"));
1954}
1955
1956static ssize_t metadata_store(mddev_t *mddev, const char *buf, size_t len)
1957{
1958 if (mddev->bitmap ||
1959 mddev->bitmap_info.file ||
1960 mddev->bitmap_info.offset)
1961 return -EBUSY;
1962 if (strncmp(buf, "external", 8) == 0)
1963 mddev->bitmap_info.external = 1;
1964 else if (strncmp(buf, "internal", 8) == 0)
1965 mddev->bitmap_info.external = 0;
1966 else
1967 return -EINVAL;
1968 return len;
1969}
1970
1971static struct md_sysfs_entry bitmap_metadata =
1972__ATTR(metadata, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
1973
1974static ssize_t can_clear_show(mddev_t *mddev, char *page)
1975{
1976 int len;
1977 if (mddev->bitmap)
1978 len = sprintf(page, "%s\n", (mddev->bitmap->need_sync ?
1979 "false" : "true"));
1980 else
1981 len = sprintf(page, "\n");
1982 return len;
1983}
1984
1985static ssize_t can_clear_store(mddev_t *mddev, const char *buf, size_t len)
1986{
1987 if (mddev->bitmap == NULL)
1988 return -ENOENT;
1989 if (strncmp(buf, "false", 5) == 0)
1990 mddev->bitmap->need_sync = 1;
1991 else if (strncmp(buf, "true", 4) == 0) {
1992 if (mddev->degraded)
1993 return -EBUSY;
1994 mddev->bitmap->need_sync = 0;
1995 } else
1996 return -EINVAL;
1997 return len;
1998}
1999
2000static struct md_sysfs_entry bitmap_can_clear =
2001__ATTR(can_clear, S_IRUGO|S_IWUSR, can_clear_show, can_clear_store);
2002
1909static struct attribute *md_bitmap_attrs[] = { 2003static struct attribute *md_bitmap_attrs[] = {
1910 &bitmap_location.attr, 2004 &bitmap_location.attr,
1911 &bitmap_timeout.attr, 2005 &bitmap_timeout.attr,
1912 &bitmap_backlog.attr, 2006 &bitmap_backlog.attr,
1913 &bitmap_chunksize.attr, 2007 &bitmap_chunksize.attr,
2008 &bitmap_metadata.attr,
2009 &bitmap_can_clear.attr,
1914 NULL 2010 NULL
1915}; 2011};
1916struct attribute_group md_bitmap_group = { 2012struct attribute_group md_bitmap_group = {
diff --git a/drivers/md/bitmap.h b/drivers/md/bitmap.h
index 50ee4240f5db..cb821d76d1b4 100644
--- a/drivers/md/bitmap.h
+++ b/drivers/md/bitmap.h
@@ -118,16 +118,6 @@ typedef __u16 bitmap_counter_t;
118 (CHUNK_BLOCK_SHIFT(bitmap) + PAGE_COUNTER_SHIFT - 1) 118 (CHUNK_BLOCK_SHIFT(bitmap) + PAGE_COUNTER_SHIFT - 1)
119#define PAGEPTR_BLOCK_MASK(bitmap) (PAGEPTR_BLOCK_RATIO(bitmap) - 1) 119#define PAGEPTR_BLOCK_MASK(bitmap) (PAGEPTR_BLOCK_RATIO(bitmap) - 1)
120 120
121/*
122 * on-disk bitmap:
123 *
124 * Use one bit per "chunk" (block set). We do the disk I/O on the bitmap
125 * file a page at a time. There's a superblock at the start of the file.
126 */
127
128/* map chunks (bits) to file pages - offset by the size of the superblock */
129#define CHUNK_BIT_OFFSET(chunk) ((chunk) + (sizeof(bitmap_super_t) << 3))
130
131#endif 121#endif
132 122
133/* 123/*
@@ -250,6 +240,7 @@ struct bitmap {
250 wait_queue_head_t write_wait; 240 wait_queue_head_t write_wait;
251 wait_queue_head_t overflow_wait; 241 wait_queue_head_t overflow_wait;
252 242
243 struct sysfs_dirent *sysfs_can_clear;
253}; 244};
254 245
255/* the bitmap API */ 246/* the bitmap API */
diff --git a/drivers/md/md.h b/drivers/md/md.h
index fce02073f1a4..d9138885b87f 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -296,6 +296,7 @@ struct mddev_s
296 unsigned long chunksize; 296 unsigned long chunksize;
297 unsigned long daemon_sleep; /* how many seconds between updates? */ 297 unsigned long daemon_sleep; /* how many seconds between updates? */
298 unsigned long max_write_behind; /* write-behind mode */ 298 unsigned long max_write_behind; /* write-behind mode */
299 int external;
299 } bitmap_info; 300 } bitmap_info;
300 301
301 struct list_head all_mddevs; 302 struct list_head all_mddevs;