diff options
author | NeilBrown <neilb@suse.de> | 2009-12-13 20:49:56 -0500 |
---|---|---|
committer | NeilBrown <neilb@suse.de> | 2009-12-13 20:51:41 -0500 |
commit | ece5cff0da9e696c360fff592cb5f51b6419e4d6 (patch) | |
tree | 1c5cab0e89ae466be9edb476ceb91e5c898fafa6 /drivers | |
parent | 624ce4f5658fa3e0303c1217bba2706142fe7568 (diff) |
md: Support write-intent bitmaps with externally managed metadata.
In this case, the metadata needs to not be in the same
sector as the bitmap.
md will not read/write any bitmap metadata. Config must be
done via sysfs and when a recovery makes the array non-degraded
again, writing 'true' to 'bitmap/can_clear' will allow bits in
the bitmap to be cleared again.
Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/md/bitmap.c | 142 | ||||
-rw-r--r-- | drivers/md/bitmap.h | 11 | ||||
-rw-r--r-- | drivers/md/md.h | 1 |
3 files changed, 121 insertions, 33 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 62958491f329..de5c42df8d17 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c | |||
@@ -497,6 +497,8 @@ void bitmap_update_sb(struct bitmap *bitmap) | |||
497 | 497 | ||
498 | if (!bitmap || !bitmap->mddev) /* no bitmap for this array */ | 498 | if (!bitmap || !bitmap->mddev) /* no bitmap for this array */ |
499 | return; | 499 | return; |
500 | if (bitmap->mddev->bitmap_info.external) | ||
501 | return; | ||
500 | spin_lock_irqsave(&bitmap->lock, flags); | 502 | spin_lock_irqsave(&bitmap->lock, flags); |
501 | if (!bitmap->sb_page) { /* no superblock */ | 503 | if (!bitmap->sb_page) { /* no superblock */ |
502 | spin_unlock_irqrestore(&bitmap->lock, flags); | 504 | spin_unlock_irqrestore(&bitmap->lock, flags); |
@@ -676,16 +678,26 @@ static int bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits, | |||
676 | * general bitmap file operations | 678 | * general bitmap file operations |
677 | */ | 679 | */ |
678 | 680 | ||
681 | /* | ||
682 | * on-disk bitmap: | ||
683 | * | ||
684 | * Use one bit per "chunk" (block set). We do the disk I/O on the bitmap | ||
685 | * file a page at a time. There's a superblock at the start of the file. | ||
686 | */ | ||
679 | /* calculate the index of the page that contains this bit */ | 687 | /* calculate the index of the page that contains this bit */ |
680 | static inline unsigned long file_page_index(unsigned long chunk) | 688 | static inline unsigned long file_page_index(struct bitmap *bitmap, unsigned long chunk) |
681 | { | 689 | { |
682 | return CHUNK_BIT_OFFSET(chunk) >> PAGE_BIT_SHIFT; | 690 | if (!bitmap->mddev->bitmap_info.external) |
691 | chunk += sizeof(bitmap_super_t) << 3; | ||
692 | return chunk >> PAGE_BIT_SHIFT; | ||
683 | } | 693 | } |
684 | 694 | ||
685 | /* calculate the (bit) offset of this bit within a page */ | 695 | /* calculate the (bit) offset of this bit within a page */ |
686 | static inline unsigned long file_page_offset(unsigned long chunk) | 696 | static inline unsigned long file_page_offset(struct bitmap *bitmap, unsigned long chunk) |
687 | { | 697 | { |
688 | return CHUNK_BIT_OFFSET(chunk) & (PAGE_BITS - 1); | 698 | if (!bitmap->mddev->bitmap_info.external) |
699 | chunk += sizeof(bitmap_super_t) << 3; | ||
700 | return chunk & (PAGE_BITS - 1); | ||
689 | } | 701 | } |
690 | 702 | ||
691 | /* | 703 | /* |
@@ -698,8 +710,9 @@ static inline unsigned long file_page_offset(unsigned long chunk) | |||
698 | static inline struct page *filemap_get_page(struct bitmap *bitmap, | 710 | static inline struct page *filemap_get_page(struct bitmap *bitmap, |
699 | unsigned long chunk) | 711 | unsigned long chunk) |
700 | { | 712 | { |
701 | if (file_page_index(chunk) >= bitmap->file_pages) return NULL; | 713 | if (file_page_index(bitmap, chunk) >= bitmap->file_pages) return NULL; |
702 | return bitmap->filemap[file_page_index(chunk) - file_page_index(0)]; | 714 | return bitmap->filemap[file_page_index(bitmap, chunk) |
715 | - file_page_index(bitmap, 0)]; | ||
703 | } | 716 | } |
704 | 717 | ||
705 | 718 | ||
@@ -722,7 +735,7 @@ static void bitmap_file_unmap(struct bitmap *bitmap) | |||
722 | spin_unlock_irqrestore(&bitmap->lock, flags); | 735 | spin_unlock_irqrestore(&bitmap->lock, flags); |
723 | 736 | ||
724 | while (pages--) | 737 | while (pages--) |
725 | if (map[pages]->index != 0) /* 0 is sb_page, release it below */ | 738 | if (map[pages] != sb_page) /* 0 is sb_page, release it below */ |
726 | free_buffers(map[pages]); | 739 | free_buffers(map[pages]); |
727 | kfree(map); | 740 | kfree(map); |
728 | kfree(attr); | 741 | kfree(attr); |
@@ -833,7 +846,7 @@ static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block) | |||
833 | 846 | ||
834 | page = filemap_get_page(bitmap, chunk); | 847 | page = filemap_get_page(bitmap, chunk); |
835 | if (!page) return; | 848 | if (!page) return; |
836 | bit = file_page_offset(chunk); | 849 | bit = file_page_offset(bitmap, chunk); |
837 | 850 | ||
838 | /* set the bit */ | 851 | /* set the bit */ |
839 | kaddr = kmap_atomic(page, KM_USER0); | 852 | kaddr = kmap_atomic(page, KM_USER0); |
@@ -931,14 +944,17 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) | |||
931 | "recovery\n", bmname(bitmap)); | 944 | "recovery\n", bmname(bitmap)); |
932 | 945 | ||
933 | bytes = (chunks + 7) / 8; | 946 | bytes = (chunks + 7) / 8; |
947 | if (!bitmap->mddev->bitmap_info.external) | ||
948 | bytes += sizeof(bitmap_super_t); | ||
934 | 949 | ||
935 | num_pages = (bytes + sizeof(bitmap_super_t) + PAGE_SIZE - 1) / PAGE_SIZE; | 950 | |
951 | num_pages = (bytes + PAGE_SIZE - 1) / PAGE_SIZE; | ||
936 | 952 | ||
937 | if (file && i_size_read(file->f_mapping->host) < bytes + sizeof(bitmap_super_t)) { | 953 | if (file && i_size_read(file->f_mapping->host) < bytes) { |
938 | printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n", | 954 | printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n", |
939 | bmname(bitmap), | 955 | bmname(bitmap), |
940 | (unsigned long) i_size_read(file->f_mapping->host), | 956 | (unsigned long) i_size_read(file->f_mapping->host), |
941 | bytes + sizeof(bitmap_super_t)); | 957 | bytes); |
942 | goto err; | 958 | goto err; |
943 | } | 959 | } |
944 | 960 | ||
@@ -959,17 +975,16 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) | |||
959 | 975 | ||
960 | for (i = 0; i < chunks; i++) { | 976 | for (i = 0; i < chunks; i++) { |
961 | int b; | 977 | int b; |
962 | index = file_page_index(i); | 978 | index = file_page_index(bitmap, i); |
963 | bit = file_page_offset(i); | 979 | bit = file_page_offset(bitmap, i); |
964 | if (index != oldindex) { /* this is a new page, read it in */ | 980 | if (index != oldindex) { /* this is a new page, read it in */ |
965 | int count; | 981 | int count; |
966 | /* unmap the old page, we're done with it */ | 982 | /* unmap the old page, we're done with it */ |
967 | if (index == num_pages-1) | 983 | if (index == num_pages-1) |
968 | count = bytes + sizeof(bitmap_super_t) | 984 | count = bytes - index * PAGE_SIZE; |
969 | - index * PAGE_SIZE; | ||
970 | else | 985 | else |
971 | count = PAGE_SIZE; | 986 | count = PAGE_SIZE; |
972 | if (index == 0) { | 987 | if (index == 0 && bitmap->sb_page) { |
973 | /* | 988 | /* |
974 | * if we're here then the superblock page | 989 | * if we're here then the superblock page |
975 | * contains some bits (PAGE_SIZE != sizeof sb) | 990 | * contains some bits (PAGE_SIZE != sizeof sb) |
@@ -1164,7 +1179,8 @@ void bitmap_daemon_work(mddev_t *mddev) | |||
1164 | /* We are possibly going to clear some bits, so make | 1179 | /* We are possibly going to clear some bits, so make |
1165 | * sure that events_cleared is up-to-date. | 1180 | * sure that events_cleared is up-to-date. |
1166 | */ | 1181 | */ |
1167 | if (bitmap->need_sync) { | 1182 | if (bitmap->need_sync && |
1183 | bitmap->mddev->bitmap_info.external == 0) { | ||
1168 | bitmap_super_t *sb; | 1184 | bitmap_super_t *sb; |
1169 | bitmap->need_sync = 0; | 1185 | bitmap->need_sync = 0; |
1170 | sb = kmap_atomic(bitmap->sb_page, KM_USER0); | 1186 | sb = kmap_atomic(bitmap->sb_page, KM_USER0); |
@@ -1174,7 +1190,8 @@ void bitmap_daemon_work(mddev_t *mddev) | |||
1174 | write_page(bitmap, bitmap->sb_page, 1); | 1190 | write_page(bitmap, bitmap->sb_page, 1); |
1175 | } | 1191 | } |
1176 | spin_lock_irqsave(&bitmap->lock, flags); | 1192 | spin_lock_irqsave(&bitmap->lock, flags); |
1177 | clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); | 1193 | if (!bitmap->need_sync) |
1194 | clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); | ||
1178 | } | 1195 | } |
1179 | bmc = bitmap_get_counter(bitmap, | 1196 | bmc = bitmap_get_counter(bitmap, |
1180 | (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap), | 1197 | (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap), |
@@ -1189,7 +1206,7 @@ void bitmap_daemon_work(mddev_t *mddev) | |||
1189 | if (*bmc == 2) { | 1206 | if (*bmc == 2) { |
1190 | *bmc=1; /* maybe clear the bit next time */ | 1207 | *bmc=1; /* maybe clear the bit next time */ |
1191 | set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); | 1208 | set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); |
1192 | } else if (*bmc == 1) { | 1209 | } else if (*bmc == 1 && !bitmap->need_sync) { |
1193 | /* we can clear the bit */ | 1210 | /* we can clear the bit */ |
1194 | *bmc = 0; | 1211 | *bmc = 0; |
1195 | bitmap_count_page(bitmap, | 1212 | bitmap_count_page(bitmap, |
@@ -1199,9 +1216,11 @@ void bitmap_daemon_work(mddev_t *mddev) | |||
1199 | /* clear the bit */ | 1216 | /* clear the bit */ |
1200 | paddr = kmap_atomic(page, KM_USER0); | 1217 | paddr = kmap_atomic(page, KM_USER0); |
1201 | if (bitmap->flags & BITMAP_HOSTENDIAN) | 1218 | if (bitmap->flags & BITMAP_HOSTENDIAN) |
1202 | clear_bit(file_page_offset(j), paddr); | 1219 | clear_bit(file_page_offset(bitmap, j), |
1220 | paddr); | ||
1203 | else | 1221 | else |
1204 | ext2_clear_bit(file_page_offset(j), paddr); | 1222 | ext2_clear_bit(file_page_offset(bitmap, j), |
1223 | paddr); | ||
1205 | kunmap_atomic(paddr, KM_USER0); | 1224 | kunmap_atomic(paddr, KM_USER0); |
1206 | } | 1225 | } |
1207 | } else | 1226 | } else |
@@ -1356,6 +1375,7 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto | |||
1356 | bitmap->events_cleared < bitmap->mddev->events) { | 1375 | bitmap->events_cleared < bitmap->mddev->events) { |
1357 | bitmap->events_cleared = bitmap->mddev->events; | 1376 | bitmap->events_cleared = bitmap->mddev->events; |
1358 | bitmap->need_sync = 1; | 1377 | bitmap->need_sync = 1; |
1378 | sysfs_notify_dirent(bitmap->sysfs_can_clear); | ||
1359 | } | 1379 | } |
1360 | 1380 | ||
1361 | if (!success && ! (*bmc & NEEDED_MASK)) | 1381 | if (!success && ! (*bmc & NEEDED_MASK)) |
@@ -1613,6 +1633,9 @@ void bitmap_destroy(mddev_t *mddev) | |||
1613 | if (mddev->thread) | 1633 | if (mddev->thread) |
1614 | mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT; | 1634 | mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT; |
1615 | 1635 | ||
1636 | if (bitmap->sysfs_can_clear) | ||
1637 | sysfs_put(bitmap->sysfs_can_clear); | ||
1638 | |||
1616 | bitmap_free(bitmap); | 1639 | bitmap_free(bitmap); |
1617 | } | 1640 | } |
1618 | 1641 | ||
@@ -1629,6 +1652,7 @@ int bitmap_create(mddev_t *mddev) | |||
1629 | struct file *file = mddev->bitmap_info.file; | 1652 | struct file *file = mddev->bitmap_info.file; |
1630 | int err; | 1653 | int err; |
1631 | sector_t start; | 1654 | sector_t start; |
1655 | struct sysfs_dirent *bm; | ||
1632 | 1656 | ||
1633 | BUILD_BUG_ON(sizeof(bitmap_super_t) != 256); | 1657 | BUILD_BUG_ON(sizeof(bitmap_super_t) != 256); |
1634 | 1658 | ||
@@ -1648,6 +1672,13 @@ int bitmap_create(mddev_t *mddev) | |||
1648 | 1672 | ||
1649 | bitmap->mddev = mddev; | 1673 | bitmap->mddev = mddev; |
1650 | 1674 | ||
1675 | bm = sysfs_get_dirent(mddev->kobj.sd, "bitmap"); | ||
1676 | if (bm) { | ||
1677 | bitmap->sysfs_can_clear = sysfs_get_dirent(bm, "can_clear"); | ||
1678 | sysfs_put(bm); | ||
1679 | } else | ||
1680 | bitmap->sysfs_can_clear = NULL; | ||
1681 | |||
1651 | bitmap->file = file; | 1682 | bitmap->file = file; |
1652 | if (file) { | 1683 | if (file) { |
1653 | get_file(file); | 1684 | get_file(file); |
@@ -1658,7 +1689,16 @@ int bitmap_create(mddev_t *mddev) | |||
1658 | vfs_fsync(file, file->f_dentry, 1); | 1689 | vfs_fsync(file, file->f_dentry, 1); |
1659 | } | 1690 | } |
1660 | /* read superblock from bitmap file (this sets mddev->bitmap_info.chunksize) */ | 1691 | /* read superblock from bitmap file (this sets mddev->bitmap_info.chunksize) */ |
1661 | err = bitmap_read_sb(bitmap); | 1692 | if (!mddev->bitmap_info.external) |
1693 | err = bitmap_read_sb(bitmap); | ||
1694 | else { | ||
1695 | err = 0; | ||
1696 | if (mddev->bitmap_info.chunksize == 0 || | ||
1697 | mddev->bitmap_info.daemon_sleep == 0) | ||
1698 | /* chunksize and time_base need to be | ||
1699 | * set first. */ | ||
1700 | err = -EINVAL; | ||
1701 | } | ||
1662 | if (err) | 1702 | if (err) |
1663 | goto error; | 1703 | goto error; |
1664 | 1704 | ||
@@ -1777,7 +1817,8 @@ location_store(mddev_t *mddev, const char *buf, size_t len) | |||
1777 | return rv; | 1817 | return rv; |
1778 | if (offset == 0) | 1818 | if (offset == 0) |
1779 | return -EINVAL; | 1819 | return -EINVAL; |
1780 | if (mddev->major_version == 0 && | 1820 | if (mddev->bitmap_info.external == 0 && |
1821 | mddev->major_version == 0 && | ||
1781 | offset != mddev->bitmap_info.default_offset) | 1822 | offset != mddev->bitmap_info.default_offset) |
1782 | return -EINVAL; | 1823 | return -EINVAL; |
1783 | mddev->bitmap_info.offset = offset; | 1824 | mddev->bitmap_info.offset = offset; |
@@ -1906,11 +1947,66 @@ chunksize_store(mddev_t *mddev, const char *buf, size_t len) | |||
1906 | static struct md_sysfs_entry bitmap_chunksize = | 1947 | static struct md_sysfs_entry bitmap_chunksize = |
1907 | __ATTR(chunksize, S_IRUGO|S_IWUSR, chunksize_show, chunksize_store); | 1948 | __ATTR(chunksize, S_IRUGO|S_IWUSR, chunksize_show, chunksize_store); |
1908 | 1949 | ||
1950 | static ssize_t metadata_show(mddev_t *mddev, char *page) | ||
1951 | { | ||
1952 | return sprintf(page, "%s\n", (mddev->bitmap_info.external | ||
1953 | ? "external" : "internal")); | ||
1954 | } | ||
1955 | |||
1956 | static ssize_t metadata_store(mddev_t *mddev, const char *buf, size_t len) | ||
1957 | { | ||
1958 | if (mddev->bitmap || | ||
1959 | mddev->bitmap_info.file || | ||
1960 | mddev->bitmap_info.offset) | ||
1961 | return -EBUSY; | ||
1962 | if (strncmp(buf, "external", 8) == 0) | ||
1963 | mddev->bitmap_info.external = 1; | ||
1964 | else if (strncmp(buf, "internal", 8) == 0) | ||
1965 | mddev->bitmap_info.external = 0; | ||
1966 | else | ||
1967 | return -EINVAL; | ||
1968 | return len; | ||
1969 | } | ||
1970 | |||
1971 | static struct md_sysfs_entry bitmap_metadata = | ||
1972 | __ATTR(metadata, S_IRUGO|S_IWUSR, metadata_show, metadata_store); | ||
1973 | |||
1974 | static ssize_t can_clear_show(mddev_t *mddev, char *page) | ||
1975 | { | ||
1976 | int len; | ||
1977 | if (mddev->bitmap) | ||
1978 | len = sprintf(page, "%s\n", (mddev->bitmap->need_sync ? | ||
1979 | "false" : "true")); | ||
1980 | else | ||
1981 | len = sprintf(page, "\n"); | ||
1982 | return len; | ||
1983 | } | ||
1984 | |||
1985 | static ssize_t can_clear_store(mddev_t *mddev, const char *buf, size_t len) | ||
1986 | { | ||
1987 | if (mddev->bitmap == NULL) | ||
1988 | return -ENOENT; | ||
1989 | if (strncmp(buf, "false", 5) == 0) | ||
1990 | mddev->bitmap->need_sync = 1; | ||
1991 | else if (strncmp(buf, "true", 4) == 0) { | ||
1992 | if (mddev->degraded) | ||
1993 | return -EBUSY; | ||
1994 | mddev->bitmap->need_sync = 0; | ||
1995 | } else | ||
1996 | return -EINVAL; | ||
1997 | return len; | ||
1998 | } | ||
1999 | |||
2000 | static struct md_sysfs_entry bitmap_can_clear = | ||
2001 | __ATTR(can_clear, S_IRUGO|S_IWUSR, can_clear_show, can_clear_store); | ||
2002 | |||
1909 | static struct attribute *md_bitmap_attrs[] = { | 2003 | static struct attribute *md_bitmap_attrs[] = { |
1910 | &bitmap_location.attr, | 2004 | &bitmap_location.attr, |
1911 | &bitmap_timeout.attr, | 2005 | &bitmap_timeout.attr, |
1912 | &bitmap_backlog.attr, | 2006 | &bitmap_backlog.attr, |
1913 | &bitmap_chunksize.attr, | 2007 | &bitmap_chunksize.attr, |
2008 | &bitmap_metadata.attr, | ||
2009 | &bitmap_can_clear.attr, | ||
1914 | NULL | 2010 | NULL |
1915 | }; | 2011 | }; |
1916 | struct attribute_group md_bitmap_group = { | 2012 | struct attribute_group md_bitmap_group = { |
diff --git a/drivers/md/bitmap.h b/drivers/md/bitmap.h index 50ee4240f5db..cb821d76d1b4 100644 --- a/drivers/md/bitmap.h +++ b/drivers/md/bitmap.h | |||
@@ -118,16 +118,6 @@ typedef __u16 bitmap_counter_t; | |||
118 | (CHUNK_BLOCK_SHIFT(bitmap) + PAGE_COUNTER_SHIFT - 1) | 118 | (CHUNK_BLOCK_SHIFT(bitmap) + PAGE_COUNTER_SHIFT - 1) |
119 | #define PAGEPTR_BLOCK_MASK(bitmap) (PAGEPTR_BLOCK_RATIO(bitmap) - 1) | 119 | #define PAGEPTR_BLOCK_MASK(bitmap) (PAGEPTR_BLOCK_RATIO(bitmap) - 1) |
120 | 120 | ||
121 | /* | ||
122 | * on-disk bitmap: | ||
123 | * | ||
124 | * Use one bit per "chunk" (block set). We do the disk I/O on the bitmap | ||
125 | * file a page at a time. There's a superblock at the start of the file. | ||
126 | */ | ||
127 | |||
128 | /* map chunks (bits) to file pages - offset by the size of the superblock */ | ||
129 | #define CHUNK_BIT_OFFSET(chunk) ((chunk) + (sizeof(bitmap_super_t) << 3)) | ||
130 | |||
131 | #endif | 121 | #endif |
132 | 122 | ||
133 | /* | 123 | /* |
@@ -250,6 +240,7 @@ struct bitmap { | |||
250 | wait_queue_head_t write_wait; | 240 | wait_queue_head_t write_wait; |
251 | wait_queue_head_t overflow_wait; | 241 | wait_queue_head_t overflow_wait; |
252 | 242 | ||
243 | struct sysfs_dirent *sysfs_can_clear; | ||
253 | }; | 244 | }; |
254 | 245 | ||
255 | /* the bitmap API */ | 246 | /* the bitmap API */ |
diff --git a/drivers/md/md.h b/drivers/md/md.h index fce02073f1a4..d9138885b87f 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h | |||
@@ -296,6 +296,7 @@ struct mddev_s | |||
296 | unsigned long chunksize; | 296 | unsigned long chunksize; |
297 | unsigned long daemon_sleep; /* how many seconds between updates? */ | 297 | unsigned long daemon_sleep; /* how many seconds between updates? */ |
298 | unsigned long max_write_behind; /* write-behind mode */ | 298 | unsigned long max_write_behind; /* write-behind mode */ |
299 | int external; | ||
299 | } bitmap_info; | 300 | } bitmap_info; |
300 | 301 | ||
301 | struct list_head all_mddevs; | 302 | struct list_head all_mddevs; |