diff options
| author | Goldwyn Rodrigues <rgoldwyn@suse.com> | 2014-06-06 12:50:56 -0400 |
|---|---|---|
| committer | Goldwyn Rodrigues <rgoldwyn@suse.com> | 2015-02-23 08:30:11 -0500 |
| commit | b97e92574c0bf335db1cd2ec491d8ff5cd5d0b49 (patch) | |
| tree | acb492fceb25430e7a9c1b8404fe70f17062c3b9 /drivers/md | |
| parent | cf921cc19cf7c1e99f730a2faa02d80817d684a2 (diff) | |
Use separate bitmaps for each nodes in the cluster
On-disk format:
0 4k 8k 12k
-------------------------------------------------------------------
| idle | md super | bm super [0] + bits |
| bm bits[0, contd] | bm super[1] + bits | bm bits[1, contd] |
| bm super[2] + bits | bm bits [2, contd] | bm super[3] + bits |
| bm bits [3, contd] | | |
Bitmap super has a field nodes, which defines the maximum number
of nodes the device can use. While reading the bitmap super, if
the cluster finds out that the number of nodes is > 0:
1. Requests the md-cluster module.
2. Calls md_cluster_ops->join(), which sets up clustering such as
joining DLM lockspace.
Since the first time, the first bitmap is read. After the call
to the cluster_setup, the bitmap offset is adjusted and the
superblock is re-read. This also ensures the bitmap is read
the bitmap lock (when bitmap lock is introduced in later patches)
Questions:
1. cluster name is repeated in all bitmap supers. Is that okay?
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Diffstat (limited to 'drivers/md')
| -rw-r--r-- | drivers/md/bitmap.c | 67 | ||||
| -rw-r--r-- | drivers/md/bitmap.h | 1 | ||||
| -rw-r--r-- | drivers/md/md-cluster.c | 6 |
3 files changed, 64 insertions, 10 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index b43a75a246e7..b1d94eee3346 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c | |||
| @@ -205,6 +205,10 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait) | |||
| 205 | struct block_device *bdev; | 205 | struct block_device *bdev; |
| 206 | struct mddev *mddev = bitmap->mddev; | 206 | struct mddev *mddev = bitmap->mddev; |
| 207 | struct bitmap_storage *store = &bitmap->storage; | 207 | struct bitmap_storage *store = &bitmap->storage; |
| 208 | int node_offset = 0; | ||
| 209 | |||
| 210 | if (mddev_is_clustered(bitmap->mddev)) | ||
| 211 | node_offset = bitmap->cluster_slot * store->file_pages; | ||
| 208 | 212 | ||
| 209 | while ((rdev = next_active_rdev(rdev, mddev)) != NULL) { | 213 | while ((rdev = next_active_rdev(rdev, mddev)) != NULL) { |
| 210 | int size = PAGE_SIZE; | 214 | int size = PAGE_SIZE; |
| @@ -549,6 +553,7 @@ static int bitmap_read_sb(struct bitmap *bitmap) | |||
| 549 | unsigned long sectors_reserved = 0; | 553 | unsigned long sectors_reserved = 0; |
| 550 | int err = -EINVAL; | 554 | int err = -EINVAL; |
| 551 | struct page *sb_page; | 555 | struct page *sb_page; |
| 556 | int cluster_setup_done = 0; | ||
| 552 | 557 | ||
| 553 | if (!bitmap->storage.file && !bitmap->mddev->bitmap_info.offset) { | 558 | if (!bitmap->storage.file && !bitmap->mddev->bitmap_info.offset) { |
| 554 | chunksize = 128 * 1024 * 1024; | 559 | chunksize = 128 * 1024 * 1024; |
| @@ -564,6 +569,7 @@ static int bitmap_read_sb(struct bitmap *bitmap) | |||
| 564 | return -ENOMEM; | 569 | return -ENOMEM; |
| 565 | bitmap->storage.sb_page = sb_page; | 570 | bitmap->storage.sb_page = sb_page; |
| 566 | 571 | ||
| 572 | re_read: | ||
| 567 | if (bitmap->storage.file) { | 573 | if (bitmap->storage.file) { |
| 568 | loff_t isize = i_size_read(bitmap->storage.file->f_mapping->host); | 574 | loff_t isize = i_size_read(bitmap->storage.file->f_mapping->host); |
| 569 | int bytes = isize > PAGE_SIZE ? PAGE_SIZE : isize; | 575 | int bytes = isize > PAGE_SIZE ? PAGE_SIZE : isize; |
| @@ -579,6 +585,7 @@ static int bitmap_read_sb(struct bitmap *bitmap) | |||
| 579 | if (err) | 585 | if (err) |
| 580 | return err; | 586 | return err; |
| 581 | 587 | ||
| 588 | err = -EINVAL; | ||
| 582 | sb = kmap_atomic(sb_page); | 589 | sb = kmap_atomic(sb_page); |
| 583 | 590 | ||
| 584 | chunksize = le32_to_cpu(sb->chunksize); | 591 | chunksize = le32_to_cpu(sb->chunksize); |
| @@ -586,6 +593,7 @@ static int bitmap_read_sb(struct bitmap *bitmap) | |||
| 586 | write_behind = le32_to_cpu(sb->write_behind); | 593 | write_behind = le32_to_cpu(sb->write_behind); |
| 587 | sectors_reserved = le32_to_cpu(sb->sectors_reserved); | 594 | sectors_reserved = le32_to_cpu(sb->sectors_reserved); |
| 588 | nodes = le32_to_cpu(sb->nodes); | 595 | nodes = le32_to_cpu(sb->nodes); |
| 596 | strlcpy(bitmap->mddev->bitmap_info.cluster_name, sb->cluster_name, 64); | ||
| 589 | 597 | ||
| 590 | /* verify that the bitmap-specific fields are valid */ | 598 | /* verify that the bitmap-specific fields are valid */ |
| 591 | if (sb->magic != cpu_to_le32(BITMAP_MAGIC)) | 599 | if (sb->magic != cpu_to_le32(BITMAP_MAGIC)) |
| @@ -622,7 +630,7 @@ static int bitmap_read_sb(struct bitmap *bitmap) | |||
| 622 | goto out; | 630 | goto out; |
| 623 | } | 631 | } |
| 624 | events = le64_to_cpu(sb->events); | 632 | events = le64_to_cpu(sb->events); |
| 625 | if (events < bitmap->mddev->events) { | 633 | if (!nodes && (events < bitmap->mddev->events)) { |
| 626 | printk(KERN_INFO | 634 | printk(KERN_INFO |
| 627 | "%s: bitmap file is out of date (%llu < %llu) " | 635 | "%s: bitmap file is out of date (%llu < %llu) " |
| 628 | "-- forcing full recovery\n", | 636 | "-- forcing full recovery\n", |
| @@ -639,8 +647,34 @@ static int bitmap_read_sb(struct bitmap *bitmap) | |||
| 639 | bitmap->events_cleared = le64_to_cpu(sb->events_cleared); | 647 | bitmap->events_cleared = le64_to_cpu(sb->events_cleared); |
| 640 | strlcpy(bitmap->mddev->bitmap_info.cluster_name, sb->cluster_name, 64); | 648 | strlcpy(bitmap->mddev->bitmap_info.cluster_name, sb->cluster_name, 64); |
| 641 | err = 0; | 649 | err = 0; |
| 650 | |||
| 642 | out: | 651 | out: |
| 643 | kunmap_atomic(sb); | 652 | kunmap_atomic(sb); |
| 653 | if (nodes && !cluster_setup_done) { | ||
| 654 | sector_t bm_blocks; | ||
| 655 | |||
| 656 | bm_blocks = sector_div(bitmap->mddev->resync_max_sectors, (chunksize >> 9)); | ||
| 657 | bm_blocks = bm_blocks << 3; | ||
| 658 | /* We have bitmap supers at 4k boundaries, hence this | ||
| 659 | * is hardcoded */ | ||
| 660 | bm_blocks = DIV_ROUND_UP(bm_blocks, 4096); | ||
| 661 | err = md_setup_cluster(bitmap->mddev, nodes); | ||
| 662 | if (err) { | ||
| 663 | pr_err("%s: Could not setup cluster service (%d)\n", | ||
| 664 | bmname(bitmap), err); | ||
| 665 | goto out_no_sb; | ||
| 666 | } | ||
| 667 | bitmap->cluster_slot = md_cluster_ops->slot_number(bitmap->mddev); | ||
| 668 | bitmap->mddev->bitmap_info.offset += | ||
| 669 | bitmap->cluster_slot * (bm_blocks << 3); | ||
| 670 | pr_info("%s:%d bm slot: %d offset: %llu\n", __func__, __LINE__, | ||
| 671 | bitmap->cluster_slot, | ||
| 672 | (unsigned long long)bitmap->mddev->bitmap_info.offset); | ||
| 673 | cluster_setup_done = 1; | ||
| 674 | goto re_read; | ||
| 675 | } | ||
| 676 | |||
| 677 | |||
| 644 | out_no_sb: | 678 | out_no_sb: |
| 645 | if (test_bit(BITMAP_STALE, &bitmap->flags)) | 679 | if (test_bit(BITMAP_STALE, &bitmap->flags)) |
| 646 | bitmap->events_cleared = bitmap->mddev->events; | 680 | bitmap->events_cleared = bitmap->mddev->events; |
| @@ -651,8 +685,11 @@ out_no_sb: | |||
| 651 | if (bitmap->mddev->bitmap_info.space == 0 || | 685 | if (bitmap->mddev->bitmap_info.space == 0 || |
| 652 | bitmap->mddev->bitmap_info.space > sectors_reserved) | 686 | bitmap->mddev->bitmap_info.space > sectors_reserved) |
| 653 | bitmap->mddev->bitmap_info.space = sectors_reserved; | 687 | bitmap->mddev->bitmap_info.space = sectors_reserved; |
| 654 | if (err) | 688 | if (err) { |
| 655 | bitmap_print_sb(bitmap); | 689 | bitmap_print_sb(bitmap); |
| 690 | if (cluster_setup_done) | ||
| 691 | md_cluster_stop(bitmap->mddev); | ||
| 692 | } | ||
| 656 | return err; | 693 | return err; |
| 657 | } | 694 | } |
| 658 | 695 | ||
| @@ -697,9 +734,10 @@ static inline struct page *filemap_get_page(struct bitmap_storage *store, | |||
| 697 | } | 734 | } |
| 698 | 735 | ||
| 699 | static int bitmap_storage_alloc(struct bitmap_storage *store, | 736 | static int bitmap_storage_alloc(struct bitmap_storage *store, |
| 700 | unsigned long chunks, int with_super) | 737 | unsigned long chunks, int with_super, |
| 738 | int slot_number) | ||
| 701 | { | 739 | { |
| 702 | int pnum; | 740 | int pnum, offset = 0; |
| 703 | unsigned long num_pages; | 741 | unsigned long num_pages; |
| 704 | unsigned long bytes; | 742 | unsigned long bytes; |
| 705 | 743 | ||
| @@ -708,6 +746,7 @@ static int bitmap_storage_alloc(struct bitmap_storage *store, | |||
| 708 | bytes += sizeof(bitmap_super_t); | 746 | bytes += sizeof(bitmap_super_t); |
| 709 | 747 | ||
| 710 | num_pages = DIV_ROUND_UP(bytes, PAGE_SIZE); | 748 | num_pages = DIV_ROUND_UP(bytes, PAGE_SIZE); |
| 749 | offset = slot_number * (num_pages - 1); | ||
| 711 | 750 | ||
| 712 | store->filemap = kmalloc(sizeof(struct page *) | 751 | store->filemap = kmalloc(sizeof(struct page *) |
| 713 | * num_pages, GFP_KERNEL); | 752 | * num_pages, GFP_KERNEL); |
| @@ -718,20 +757,22 @@ static int bitmap_storage_alloc(struct bitmap_storage *store, | |||
| 718 | store->sb_page = alloc_page(GFP_KERNEL|__GFP_ZERO); | 757 | store->sb_page = alloc_page(GFP_KERNEL|__GFP_ZERO); |
| 719 | if (store->sb_page == NULL) | 758 | if (store->sb_page == NULL) |
| 720 | return -ENOMEM; | 759 | return -ENOMEM; |
| 721 | store->sb_page->index = 0; | ||
| 722 | } | 760 | } |
| 761 | |||
| 723 | pnum = 0; | 762 | pnum = 0; |
| 724 | if (store->sb_page) { | 763 | if (store->sb_page) { |
| 725 | store->filemap[0] = store->sb_page; | 764 | store->filemap[0] = store->sb_page; |
| 726 | pnum = 1; | 765 | pnum = 1; |
| 766 | store->sb_page->index = offset; | ||
| 727 | } | 767 | } |
| 768 | |||
| 728 | for ( ; pnum < num_pages; pnum++) { | 769 | for ( ; pnum < num_pages; pnum++) { |
| 729 | store->filemap[pnum] = alloc_page(GFP_KERNEL|__GFP_ZERO); | 770 | store->filemap[pnum] = alloc_page(GFP_KERNEL|__GFP_ZERO); |
| 730 | if (!store->filemap[pnum]) { | 771 | if (!store->filemap[pnum]) { |
| 731 | store->file_pages = pnum; | 772 | store->file_pages = pnum; |
| 732 | return -ENOMEM; | 773 | return -ENOMEM; |
| 733 | } | 774 | } |
| 734 | store->filemap[pnum]->index = pnum; | 775 | store->filemap[pnum]->index = pnum + offset; |
| 735 | } | 776 | } |
| 736 | store->file_pages = pnum; | 777 | store->file_pages = pnum; |
| 737 | 778 | ||
| @@ -940,7 +981,7 @@ static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int n | |||
| 940 | */ | 981 | */ |
| 941 | static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) | 982 | static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) |
| 942 | { | 983 | { |
| 943 | unsigned long i, chunks, index, oldindex, bit; | 984 | unsigned long i, chunks, index, oldindex, bit, node_offset = 0; |
| 944 | struct page *page = NULL; | 985 | struct page *page = NULL; |
| 945 | unsigned long bit_cnt = 0; | 986 | unsigned long bit_cnt = 0; |
| 946 | struct file *file; | 987 | struct file *file; |
| @@ -986,6 +1027,9 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) | |||
| 986 | if (!bitmap->mddev->bitmap_info.external) | 1027 | if (!bitmap->mddev->bitmap_info.external) |
| 987 | offset = sizeof(bitmap_super_t); | 1028 | offset = sizeof(bitmap_super_t); |
| 988 | 1029 | ||
| 1030 | if (mddev_is_clustered(bitmap->mddev)) | ||
| 1031 | node_offset = bitmap->cluster_slot * (DIV_ROUND_UP(store->bytes, PAGE_SIZE)); | ||
| 1032 | |||
| 989 | for (i = 0; i < chunks; i++) { | 1033 | for (i = 0; i < chunks; i++) { |
| 990 | int b; | 1034 | int b; |
| 991 | index = file_page_index(&bitmap->storage, i); | 1035 | index = file_page_index(&bitmap->storage, i); |
| @@ -1006,7 +1050,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) | |||
| 1006 | bitmap->mddev, | 1050 | bitmap->mddev, |
| 1007 | bitmap->mddev->bitmap_info.offset, | 1051 | bitmap->mddev->bitmap_info.offset, |
| 1008 | page, | 1052 | page, |
| 1009 | index, count); | 1053 | index + node_offset, count); |
| 1010 | 1054 | ||
| 1011 | if (ret) | 1055 | if (ret) |
| 1012 | goto err; | 1056 | goto err; |
| @@ -1212,7 +1256,6 @@ void bitmap_daemon_work(struct mddev *mddev) | |||
| 1212 | j < bitmap->storage.file_pages | 1256 | j < bitmap->storage.file_pages |
| 1213 | && !test_bit(BITMAP_STALE, &bitmap->flags); | 1257 | && !test_bit(BITMAP_STALE, &bitmap->flags); |
| 1214 | j++) { | 1258 | j++) { |
| 1215 | |||
| 1216 | if (test_page_attr(bitmap, j, | 1259 | if (test_page_attr(bitmap, j, |
| 1217 | BITMAP_PAGE_DIRTY)) | 1260 | BITMAP_PAGE_DIRTY)) |
| 1218 | /* bitmap_unplug will handle the rest */ | 1261 | /* bitmap_unplug will handle the rest */ |
| @@ -1596,6 +1639,9 @@ static void bitmap_free(struct bitmap *bitmap) | |||
| 1596 | if (!bitmap) /* there was no bitmap */ | 1639 | if (!bitmap) /* there was no bitmap */ |
| 1597 | return; | 1640 | return; |
| 1598 | 1641 | ||
| 1642 | if (mddev_is_clustered(bitmap->mddev) && bitmap->mddev->cluster_info) | ||
| 1643 | md_cluster_stop(bitmap->mddev); | ||
| 1644 | |||
| 1599 | /* Shouldn't be needed - but just in case.... */ | 1645 | /* Shouldn't be needed - but just in case.... */ |
| 1600 | wait_event(bitmap->write_wait, | 1646 | wait_event(bitmap->write_wait, |
| 1601 | atomic_read(&bitmap->pending_writes) == 0); | 1647 | atomic_read(&bitmap->pending_writes) == 0); |
| @@ -1854,7 +1900,8 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks, | |||
| 1854 | memset(&store, 0, sizeof(store)); | 1900 | memset(&store, 0, sizeof(store)); |
| 1855 | if (bitmap->mddev->bitmap_info.offset || bitmap->mddev->bitmap_info.file) | 1901 | if (bitmap->mddev->bitmap_info.offset || bitmap->mddev->bitmap_info.file) |
| 1856 | ret = bitmap_storage_alloc(&store, chunks, | 1902 | ret = bitmap_storage_alloc(&store, chunks, |
| 1857 | !bitmap->mddev->bitmap_info.external); | 1903 | !bitmap->mddev->bitmap_info.external, |
| 1904 | bitmap->cluster_slot); | ||
| 1858 | if (ret) | 1905 | if (ret) |
| 1859 | goto err; | 1906 | goto err; |
| 1860 | 1907 | ||
diff --git a/drivers/md/bitmap.h b/drivers/md/bitmap.h index ec9032f105b8..4e9acb08bbe0 100644 --- a/drivers/md/bitmap.h +++ b/drivers/md/bitmap.h | |||
| @@ -227,6 +227,7 @@ struct bitmap { | |||
| 227 | wait_queue_head_t behind_wait; | 227 | wait_queue_head_t behind_wait; |
| 228 | 228 | ||
| 229 | struct kernfs_node *sysfs_can_clear; | 229 | struct kernfs_node *sysfs_can_clear; |
| 230 | int cluster_slot; /* Slot offset for clustered env */ | ||
| 230 | }; | 231 | }; |
| 231 | 232 | ||
| 232 | /* the bitmap API */ | 233 | /* the bitmap API */ |
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index 1f3c8f39ecb2..66700e244a40 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c | |||
| @@ -196,6 +196,12 @@ static int join(struct mddev *mddev, int nodes) | |||
| 196 | if (ret) | 196 | if (ret) |
| 197 | goto err; | 197 | goto err; |
| 198 | wait_for_completion(&cinfo->completion); | 198 | wait_for_completion(&cinfo->completion); |
| 199 | if (nodes <= cinfo->slot_number) { | ||
| 200 | pr_err("md-cluster: Slot allotted(%d) greater than available slots(%d)", cinfo->slot_number - 1, | ||
| 201 | nodes); | ||
| 202 | ret = -ERANGE; | ||
| 203 | goto err; | ||
| 204 | } | ||
| 199 | cinfo->sb_lock = lockres_init(mddev, "cmd-super", | 205 | cinfo->sb_lock = lockres_init(mddev, "cmd-super", |
| 200 | NULL, 0); | 206 | NULL, 0); |
| 201 | if (!cinfo->sb_lock) { | 207 | if (!cinfo->sb_lock) { |
