aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2009-12-13 20:49:56 -0500
committerNeilBrown <neilb@suse.de>2009-12-13 20:51:41 -0500
commitece5cff0da9e696c360fff592cb5f51b6419e4d6 (patch)
tree1c5cab0e89ae466be9edb476ceb91e5c898fafa6
parent624ce4f5658fa3e0303c1217bba2706142fe7568 (diff)
md: Support write-intent bitmaps with externally managed metadata.
In this case, the metadata needs to not be in the same sector as the bitmap. md will not read/write any bitmap metadata. Config must be done via sysfs and when a recovery makes the array non-degraded again, writing 'true' to 'bitmap/can_clear' will allow bits in the bitmap to be cleared again. Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r--Documentation/md.txt16
-rw-r--r--drivers/md/bitmap.c142
-rw-r--r--drivers/md/bitmap.h11
-rw-r--r--drivers/md/md.h1
4 files changed, 137 insertions, 33 deletions
diff --git a/Documentation/md.txt b/Documentation/md.txt
index 18fad6876228..21d26fb5d02b 100644
--- a/Documentation/md.txt
+++ b/Documentation/md.txt
@@ -322,6 +322,22 @@ All md devices contain:
322 'backlog' sets a limit on the number of concurrent background 322 'backlog' sets a limit on the number of concurrent background
323 writes. If there are more than this, new writes will by 323 writes. If there are more than this, new writes will by
324 synchronous. 324 synchronous.
325 bitmap/metadata
326 This can be either 'internal' or 'external'.
327 'internal' is the default and means the metadata for the bitmap
328 is stored in the first 256 bytes of the allocated space and is
329 managed by the md module.
330 'external' means that bitmap metadata is managed externally to
331 the kernel (i.e. by some userspace program)
332 bitmap/can_clear
333 This is either 'true' or 'false'. If 'true', then bits in the
334 bitmap will be cleared when the corresponding blocks are thought
335 to be in-sync. If 'false', bits will never be cleared.
336 This is automatically set to 'false' if a write happens on a
337 degraded array, or if the array becomes degraded during a write.
338 When metadata is managed externally, it should be set to true
339 once the array becomes non-degraded, and this fact has been
340 recorded in the metadata.
325 341
326 342
327 343
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 62958491f329..de5c42df8d17 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -497,6 +497,8 @@ void bitmap_update_sb(struct bitmap *bitmap)
497 497
498 if (!bitmap || !bitmap->mddev) /* no bitmap for this array */ 498 if (!bitmap || !bitmap->mddev) /* no bitmap for this array */
499 return; 499 return;
500 if (bitmap->mddev->bitmap_info.external)
501 return;
500 spin_lock_irqsave(&bitmap->lock, flags); 502 spin_lock_irqsave(&bitmap->lock, flags);
501 if (!bitmap->sb_page) { /* no superblock */ 503 if (!bitmap->sb_page) { /* no superblock */
502 spin_unlock_irqrestore(&bitmap->lock, flags); 504 spin_unlock_irqrestore(&bitmap->lock, flags);
@@ -676,16 +678,26 @@ static int bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits,
676 * general bitmap file operations 678 * general bitmap file operations
677 */ 679 */
678 680
681/*
682 * on-disk bitmap:
683 *
684 * Use one bit per "chunk" (block set). We do the disk I/O on the bitmap
685 * file a page at a time. There's a superblock at the start of the file.
686 */
679/* calculate the index of the page that contains this bit */ 687/* calculate the index of the page that contains this bit */
680static inline unsigned long file_page_index(unsigned long chunk) 688static inline unsigned long file_page_index(struct bitmap *bitmap, unsigned long chunk)
681{ 689{
682 return CHUNK_BIT_OFFSET(chunk) >> PAGE_BIT_SHIFT; 690 if (!bitmap->mddev->bitmap_info.external)
691 chunk += sizeof(bitmap_super_t) << 3;
692 return chunk >> PAGE_BIT_SHIFT;
683} 693}
684 694
685/* calculate the (bit) offset of this bit within a page */ 695/* calculate the (bit) offset of this bit within a page */
686static inline unsigned long file_page_offset(unsigned long chunk) 696static inline unsigned long file_page_offset(struct bitmap *bitmap, unsigned long chunk)
687{ 697{
688 return CHUNK_BIT_OFFSET(chunk) & (PAGE_BITS - 1); 698 if (!bitmap->mddev->bitmap_info.external)
699 chunk += sizeof(bitmap_super_t) << 3;
700 return chunk & (PAGE_BITS - 1);
689} 701}
690 702
691/* 703/*
@@ -698,8 +710,9 @@ static inline unsigned long file_page_offset(unsigned long chunk)
698static inline struct page *filemap_get_page(struct bitmap *bitmap, 710static inline struct page *filemap_get_page(struct bitmap *bitmap,
699 unsigned long chunk) 711 unsigned long chunk)
700{ 712{
701 if (file_page_index(chunk) >= bitmap->file_pages) return NULL; 713 if (file_page_index(bitmap, chunk) >= bitmap->file_pages) return NULL;
702 return bitmap->filemap[file_page_index(chunk) - file_page_index(0)]; 714 return bitmap->filemap[file_page_index(bitmap, chunk)
715 - file_page_index(bitmap, 0)];
703} 716}
704 717
705 718
@@ -722,7 +735,7 @@ static void bitmap_file_unmap(struct bitmap *bitmap)
722 spin_unlock_irqrestore(&bitmap->lock, flags); 735 spin_unlock_irqrestore(&bitmap->lock, flags);
723 736
724 while (pages--) 737 while (pages--)
725 if (map[pages]->index != 0) /* 0 is sb_page, release it below */ 738 if (map[pages] != sb_page) /* 0 is sb_page, release it below */
726 free_buffers(map[pages]); 739 free_buffers(map[pages]);
727 kfree(map); 740 kfree(map);
728 kfree(attr); 741 kfree(attr);
@@ -833,7 +846,7 @@ static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
833 846
834 page = filemap_get_page(bitmap, chunk); 847 page = filemap_get_page(bitmap, chunk);
835 if (!page) return; 848 if (!page) return;
836 bit = file_page_offset(chunk); 849 bit = file_page_offset(bitmap, chunk);
837 850
838 /* set the bit */ 851 /* set the bit */
839 kaddr = kmap_atomic(page, KM_USER0); 852 kaddr = kmap_atomic(page, KM_USER0);
@@ -931,14 +944,17 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
931 "recovery\n", bmname(bitmap)); 944 "recovery\n", bmname(bitmap));
932 945
933 bytes = (chunks + 7) / 8; 946 bytes = (chunks + 7) / 8;
947 if (!bitmap->mddev->bitmap_info.external)
948 bytes += sizeof(bitmap_super_t);
934 949
935 num_pages = (bytes + sizeof(bitmap_super_t) + PAGE_SIZE - 1) / PAGE_SIZE; 950
951 num_pages = (bytes + PAGE_SIZE - 1) / PAGE_SIZE;
936 952
937 if (file && i_size_read(file->f_mapping->host) < bytes + sizeof(bitmap_super_t)) { 953 if (file && i_size_read(file->f_mapping->host) < bytes) {
938 printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n", 954 printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n",
939 bmname(bitmap), 955 bmname(bitmap),
940 (unsigned long) i_size_read(file->f_mapping->host), 956 (unsigned long) i_size_read(file->f_mapping->host),
941 bytes + sizeof(bitmap_super_t)); 957 bytes);
942 goto err; 958 goto err;
943 } 959 }
944 960
@@ -959,17 +975,16 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
959 975
960 for (i = 0; i < chunks; i++) { 976 for (i = 0; i < chunks; i++) {
961 int b; 977 int b;
962 index = file_page_index(i); 978 index = file_page_index(bitmap, i);
963 bit = file_page_offset(i); 979 bit = file_page_offset(bitmap, i);
964 if (index != oldindex) { /* this is a new page, read it in */ 980 if (index != oldindex) { /* this is a new page, read it in */
965 int count; 981 int count;
966 /* unmap the old page, we're done with it */ 982 /* unmap the old page, we're done with it */
967 if (index == num_pages-1) 983 if (index == num_pages-1)
968 count = bytes + sizeof(bitmap_super_t) 984 count = bytes - index * PAGE_SIZE;
969 - index * PAGE_SIZE;
970 else 985 else
971 count = PAGE_SIZE; 986 count = PAGE_SIZE;
972 if (index == 0) { 987 if (index == 0 && bitmap->sb_page) {
973 /* 988 /*
974 * if we're here then the superblock page 989 * if we're here then the superblock page
975 * contains some bits (PAGE_SIZE != sizeof sb) 990 * contains some bits (PAGE_SIZE != sizeof sb)
@@ -1164,7 +1179,8 @@ void bitmap_daemon_work(mddev_t *mddev)
1164 /* We are possibly going to clear some bits, so make 1179 /* We are possibly going to clear some bits, so make
1165 * sure that events_cleared is up-to-date. 1180 * sure that events_cleared is up-to-date.
1166 */ 1181 */
1167 if (bitmap->need_sync) { 1182 if (bitmap->need_sync &&
1183 bitmap->mddev->bitmap_info.external == 0) {
1168 bitmap_super_t *sb; 1184 bitmap_super_t *sb;
1169 bitmap->need_sync = 0; 1185 bitmap->need_sync = 0;
1170 sb = kmap_atomic(bitmap->sb_page, KM_USER0); 1186 sb = kmap_atomic(bitmap->sb_page, KM_USER0);
@@ -1174,7 +1190,8 @@ void bitmap_daemon_work(mddev_t *mddev)
1174 write_page(bitmap, bitmap->sb_page, 1); 1190 write_page(bitmap, bitmap->sb_page, 1);
1175 } 1191 }
1176 spin_lock_irqsave(&bitmap->lock, flags); 1192 spin_lock_irqsave(&bitmap->lock, flags);
1177 clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); 1193 if (!bitmap->need_sync)
1194 clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
1178 } 1195 }
1179 bmc = bitmap_get_counter(bitmap, 1196 bmc = bitmap_get_counter(bitmap,
1180 (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap), 1197 (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap),
@@ -1189,7 +1206,7 @@ void bitmap_daemon_work(mddev_t *mddev)
1189 if (*bmc == 2) { 1206 if (*bmc == 2) {
1190 *bmc=1; /* maybe clear the bit next time */ 1207 *bmc=1; /* maybe clear the bit next time */
1191 set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); 1208 set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
1192 } else if (*bmc == 1) { 1209 } else if (*bmc == 1 && !bitmap->need_sync) {
1193 /* we can clear the bit */ 1210 /* we can clear the bit */
1194 *bmc = 0; 1211 *bmc = 0;
1195 bitmap_count_page(bitmap, 1212 bitmap_count_page(bitmap,
@@ -1199,9 +1216,11 @@ void bitmap_daemon_work(mddev_t *mddev)
1199 /* clear the bit */ 1216 /* clear the bit */
1200 paddr = kmap_atomic(page, KM_USER0); 1217 paddr = kmap_atomic(page, KM_USER0);
1201 if (bitmap->flags & BITMAP_HOSTENDIAN) 1218 if (bitmap->flags & BITMAP_HOSTENDIAN)
1202 clear_bit(file_page_offset(j), paddr); 1219 clear_bit(file_page_offset(bitmap, j),
1220 paddr);
1203 else 1221 else
1204 ext2_clear_bit(file_page_offset(j), paddr); 1222 ext2_clear_bit(file_page_offset(bitmap, j),
1223 paddr);
1205 kunmap_atomic(paddr, KM_USER0); 1224 kunmap_atomic(paddr, KM_USER0);
1206 } 1225 }
1207 } else 1226 } else
@@ -1356,6 +1375,7 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto
1356 bitmap->events_cleared < bitmap->mddev->events) { 1375 bitmap->events_cleared < bitmap->mddev->events) {
1357 bitmap->events_cleared = bitmap->mddev->events; 1376 bitmap->events_cleared = bitmap->mddev->events;
1358 bitmap->need_sync = 1; 1377 bitmap->need_sync = 1;
1378 sysfs_notify_dirent(bitmap->sysfs_can_clear);
1359 } 1379 }
1360 1380
1361 if (!success && ! (*bmc & NEEDED_MASK)) 1381 if (!success && ! (*bmc & NEEDED_MASK))
@@ -1613,6 +1633,9 @@ void bitmap_destroy(mddev_t *mddev)
1613 if (mddev->thread) 1633 if (mddev->thread)
1614 mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT; 1634 mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
1615 1635
1636 if (bitmap->sysfs_can_clear)
1637 sysfs_put(bitmap->sysfs_can_clear);
1638
1616 bitmap_free(bitmap); 1639 bitmap_free(bitmap);
1617} 1640}
1618 1641
@@ -1629,6 +1652,7 @@ int bitmap_create(mddev_t *mddev)
1629 struct file *file = mddev->bitmap_info.file; 1652 struct file *file = mddev->bitmap_info.file;
1630 int err; 1653 int err;
1631 sector_t start; 1654 sector_t start;
1655 struct sysfs_dirent *bm;
1632 1656
1633 BUILD_BUG_ON(sizeof(bitmap_super_t) != 256); 1657 BUILD_BUG_ON(sizeof(bitmap_super_t) != 256);
1634 1658
@@ -1648,6 +1672,13 @@ int bitmap_create(mddev_t *mddev)
1648 1672
1649 bitmap->mddev = mddev; 1673 bitmap->mddev = mddev;
1650 1674
1675 bm = sysfs_get_dirent(mddev->kobj.sd, "bitmap");
1676 if (bm) {
1677 bitmap->sysfs_can_clear = sysfs_get_dirent(bm, "can_clear");
1678 sysfs_put(bm);
1679 } else
1680 bitmap->sysfs_can_clear = NULL;
1681
1651 bitmap->file = file; 1682 bitmap->file = file;
1652 if (file) { 1683 if (file) {
1653 get_file(file); 1684 get_file(file);
@@ -1658,7 +1689,16 @@ int bitmap_create(mddev_t *mddev)
1658 vfs_fsync(file, file->f_dentry, 1); 1689 vfs_fsync(file, file->f_dentry, 1);
1659 } 1690 }
1660 /* read superblock from bitmap file (this sets mddev->bitmap_info.chunksize) */ 1691 /* read superblock from bitmap file (this sets mddev->bitmap_info.chunksize) */
1661 err = bitmap_read_sb(bitmap); 1692 if (!mddev->bitmap_info.external)
1693 err = bitmap_read_sb(bitmap);
1694 else {
1695 err = 0;
1696 if (mddev->bitmap_info.chunksize == 0 ||
1697 mddev->bitmap_info.daemon_sleep == 0)
1698 /* chunksize and time_base need to be
1699 * set first. */
1700 err = -EINVAL;
1701 }
1662 if (err) 1702 if (err)
1663 goto error; 1703 goto error;
1664 1704
@@ -1777,7 +1817,8 @@ location_store(mddev_t *mddev, const char *buf, size_t len)
1777 return rv; 1817 return rv;
1778 if (offset == 0) 1818 if (offset == 0)
1779 return -EINVAL; 1819 return -EINVAL;
1780 if (mddev->major_version == 0 && 1820 if (mddev->bitmap_info.external == 0 &&
1821 mddev->major_version == 0 &&
1781 offset != mddev->bitmap_info.default_offset) 1822 offset != mddev->bitmap_info.default_offset)
1782 return -EINVAL; 1823 return -EINVAL;
1783 mddev->bitmap_info.offset = offset; 1824 mddev->bitmap_info.offset = offset;
@@ -1906,11 +1947,66 @@ chunksize_store(mddev_t *mddev, const char *buf, size_t len)
1906static struct md_sysfs_entry bitmap_chunksize = 1947static struct md_sysfs_entry bitmap_chunksize =
1907__ATTR(chunksize, S_IRUGO|S_IWUSR, chunksize_show, chunksize_store); 1948__ATTR(chunksize, S_IRUGO|S_IWUSR, chunksize_show, chunksize_store);
1908 1949
1950static ssize_t metadata_show(mddev_t *mddev, char *page)
1951{
1952 return sprintf(page, "%s\n", (mddev->bitmap_info.external
1953 ? "external" : "internal"));
1954}
1955
1956static ssize_t metadata_store(mddev_t *mddev, const char *buf, size_t len)
1957{
1958 if (mddev->bitmap ||
1959 mddev->bitmap_info.file ||
1960 mddev->bitmap_info.offset)
1961 return -EBUSY;
1962 if (strncmp(buf, "external", 8) == 0)
1963 mddev->bitmap_info.external = 1;
1964 else if (strncmp(buf, "internal", 8) == 0)
1965 mddev->bitmap_info.external = 0;
1966 else
1967 return -EINVAL;
1968 return len;
1969}
1970
1971static struct md_sysfs_entry bitmap_metadata =
1972__ATTR(metadata, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
1973
1974static ssize_t can_clear_show(mddev_t *mddev, char *page)
1975{
1976 int len;
1977 if (mddev->bitmap)
1978 len = sprintf(page, "%s\n", (mddev->bitmap->need_sync ?
1979 "false" : "true"));
1980 else
1981 len = sprintf(page, "\n");
1982 return len;
1983}
1984
1985static ssize_t can_clear_store(mddev_t *mddev, const char *buf, size_t len)
1986{
1987 if (mddev->bitmap == NULL)
1988 return -ENOENT;
1989 if (strncmp(buf, "false", 5) == 0)
1990 mddev->bitmap->need_sync = 1;
1991 else if (strncmp(buf, "true", 4) == 0) {
1992 if (mddev->degraded)
1993 return -EBUSY;
1994 mddev->bitmap->need_sync = 0;
1995 } else
1996 return -EINVAL;
1997 return len;
1998}
1999
2000static struct md_sysfs_entry bitmap_can_clear =
2001__ATTR(can_clear, S_IRUGO|S_IWUSR, can_clear_show, can_clear_store);
2002
1909static struct attribute *md_bitmap_attrs[] = { 2003static struct attribute *md_bitmap_attrs[] = {
1910 &bitmap_location.attr, 2004 &bitmap_location.attr,
1911 &bitmap_timeout.attr, 2005 &bitmap_timeout.attr,
1912 &bitmap_backlog.attr, 2006 &bitmap_backlog.attr,
1913 &bitmap_chunksize.attr, 2007 &bitmap_chunksize.attr,
2008 &bitmap_metadata.attr,
2009 &bitmap_can_clear.attr,
1914 NULL 2010 NULL
1915}; 2011};
1916struct attribute_group md_bitmap_group = { 2012struct attribute_group md_bitmap_group = {
diff --git a/drivers/md/bitmap.h b/drivers/md/bitmap.h
index 50ee4240f5db..cb821d76d1b4 100644
--- a/drivers/md/bitmap.h
+++ b/drivers/md/bitmap.h
@@ -118,16 +118,6 @@ typedef __u16 bitmap_counter_t;
118 (CHUNK_BLOCK_SHIFT(bitmap) + PAGE_COUNTER_SHIFT - 1) 118 (CHUNK_BLOCK_SHIFT(bitmap) + PAGE_COUNTER_SHIFT - 1)
119#define PAGEPTR_BLOCK_MASK(bitmap) (PAGEPTR_BLOCK_RATIO(bitmap) - 1) 119#define PAGEPTR_BLOCK_MASK(bitmap) (PAGEPTR_BLOCK_RATIO(bitmap) - 1)
120 120
121/*
122 * on-disk bitmap:
123 *
124 * Use one bit per "chunk" (block set). We do the disk I/O on the bitmap
125 * file a page at a time. There's a superblock at the start of the file.
126 */
127
128/* map chunks (bits) to file pages - offset by the size of the superblock */
129#define CHUNK_BIT_OFFSET(chunk) ((chunk) + (sizeof(bitmap_super_t) << 3))
130
131#endif 121#endif
132 122
133/* 123/*
@@ -250,6 +240,7 @@ struct bitmap {
250 wait_queue_head_t write_wait; 240 wait_queue_head_t write_wait;
251 wait_queue_head_t overflow_wait; 241 wait_queue_head_t overflow_wait;
252 242
243 struct sysfs_dirent *sysfs_can_clear;
253}; 244};
254 245
255/* the bitmap API */ 246/* the bitmap API */
diff --git a/drivers/md/md.h b/drivers/md/md.h
index fce02073f1a4..d9138885b87f 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -296,6 +296,7 @@ struct mddev_s
296 unsigned long chunksize; 296 unsigned long chunksize;
297 unsigned long daemon_sleep; /* how many seconds between updates? */ 297 unsigned long daemon_sleep; /* how many seconds between updates? */
298 unsigned long max_write_behind; /* write-behind mode */ 298 unsigned long max_write_behind; /* write-behind mode */
299 int external;
299 } bitmap_info; 300 } bitmap_info;
300 301
301 struct list_head all_mddevs; 302 struct list_head all_mddevs;