diff options
author | Chris Mason <chris.mason@oracle.com> | 2008-03-25 16:50:33 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2008-09-25 11:04:01 -0400 |
commit | 593060d756e0c2382d59cf00d5f3b9e3a336c408 (patch) | |
tree | c102f8abd9210eeba8ae4e7f6626b80cfed1b8d8 /fs/btrfs/volumes.c | |
parent | 8a4b83cc8bd75fca29ac68615896d9e92820e7c2 (diff) |
Btrfs: Implement raid0 when multiple devices are present
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/volumes.c')
-rw-r--r-- | fs/btrfs/volumes.c | 130 |
1 files changed, 100 insertions, 30 deletions
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 263f01cc3db4..d8fce32a3bbc 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/sched.h> | 18 | #include <linux/sched.h> |
19 | #include <linux/bio.h> | 19 | #include <linux/bio.h> |
20 | #include <linux/buffer_head.h> | 20 | #include <linux/buffer_head.h> |
21 | #include <asm/div64.h> | ||
21 | #include "ctree.h" | 22 | #include "ctree.h" |
22 | #include "extent_map.h" | 23 | #include "extent_map.h" |
23 | #include "disk-io.h" | 24 | #include "disk-io.h" |
@@ -25,10 +26,24 @@ | |||
25 | #include "print-tree.h" | 26 | #include "print-tree.h" |
26 | #include "volumes.h" | 27 | #include "volumes.h" |
27 | 28 | ||
28 | struct map_lookup { | 29 | struct stripe { |
29 | struct btrfs_device *dev; | 30 | struct btrfs_device *dev; |
30 | u64 physical; | 31 | u64 physical; |
31 | }; | 32 | }; |
33 | |||
34 | struct map_lookup { | ||
35 | u64 type; | ||
36 | int io_align; | ||
37 | int io_width; | ||
38 | int stripe_len; | ||
39 | int sector_size; | ||
40 | int num_stripes; | ||
41 | struct stripe stripes[]; | ||
42 | }; | ||
43 | |||
44 | #define map_lookup_size(n) (sizeof(struct map_lookup) + \ | ||
45 | (sizeof(struct stripe) * (n))) | ||
46 | |||
32 | static DEFINE_MUTEX(uuid_mutex); | 47 | static DEFINE_MUTEX(uuid_mutex); |
33 | static LIST_HEAD(fs_uuids); | 48 | static LIST_HEAD(fs_uuids); |
34 | 49 | ||
@@ -592,6 +607,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
592 | u64 *num_bytes, u64 type) | 607 | u64 *num_bytes, u64 type) |
593 | { | 608 | { |
594 | u64 dev_offset; | 609 | u64 dev_offset; |
610 | struct btrfs_fs_info *info = extent_root->fs_info; | ||
595 | struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root; | 611 | struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root; |
596 | struct btrfs_stripe *stripes; | 612 | struct btrfs_stripe *stripes; |
597 | struct btrfs_device *device = NULL; | 613 | struct btrfs_device *device = NULL; |
@@ -610,10 +626,18 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
610 | int looped = 0; | 626 | int looped = 0; |
611 | int ret; | 627 | int ret; |
612 | int index; | 628 | int index; |
629 | int stripe_len = 64 * 1024; | ||
613 | struct btrfs_key key; | 630 | struct btrfs_key key; |
614 | 631 | ||
615 | if (list_empty(dev_list)) | 632 | if (list_empty(dev_list)) |
616 | return -ENOSPC; | 633 | return -ENOSPC; |
634 | |||
635 | if (type & BTRFS_BLOCK_GROUP_RAID0) | ||
636 | num_stripes = btrfs_super_num_devices(&info->super_copy); | ||
637 | if (type & BTRFS_BLOCK_GROUP_DATA) | ||
638 | stripe_len = 64 * 1024; | ||
639 | if (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)) | ||
640 | stripe_len = 32 * 1024; | ||
617 | again: | 641 | again: |
618 | INIT_LIST_HEAD(&private_devs); | 642 | INIT_LIST_HEAD(&private_devs); |
619 | cur = dev_list->next; | 643 | cur = dev_list->next; |
@@ -650,9 +674,15 @@ again: | |||
650 | if (!chunk) | 674 | if (!chunk) |
651 | return -ENOMEM; | 675 | return -ENOMEM; |
652 | 676 | ||
677 | map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); | ||
678 | if (!map) { | ||
679 | kfree(chunk); | ||
680 | return -ENOMEM; | ||
681 | } | ||
682 | |||
653 | stripes = &chunk->stripe; | 683 | stripes = &chunk->stripe; |
654 | 684 | ||
655 | *num_bytes = calc_size; | 685 | *num_bytes = calc_size * num_stripes; |
656 | index = 0; | 686 | index = 0; |
657 | while(index < num_stripes) { | 687 | while(index < num_stripes) { |
658 | BUG_ON(list_empty(&private_devs)); | 688 | BUG_ON(list_empty(&private_devs)); |
@@ -669,6 +699,8 @@ printk("alloc chunk size %Lu from dev %Lu\n", calc_size, device->devid); | |||
669 | ret = btrfs_update_device(trans, device); | 699 | ret = btrfs_update_device(trans, device); |
670 | BUG_ON(ret); | 700 | BUG_ON(ret); |
671 | 701 | ||
702 | map->stripes[index].dev = device; | ||
703 | map->stripes[index].physical = dev_offset; | ||
672 | btrfs_set_stack_stripe_devid(stripes + index, device->devid); | 704 | btrfs_set_stack_stripe_devid(stripes + index, device->devid); |
673 | btrfs_set_stack_stripe_offset(stripes + index, dev_offset); | 705 | btrfs_set_stack_stripe_offset(stripes + index, dev_offset); |
674 | physical = dev_offset; | 706 | physical = dev_offset; |
@@ -680,12 +712,18 @@ printk("alloc chunk size %Lu from dev %Lu\n", calc_size, device->devid); | |||
680 | key.offset = *num_bytes; | 712 | key.offset = *num_bytes; |
681 | key.type = BTRFS_CHUNK_ITEM_KEY; | 713 | key.type = BTRFS_CHUNK_ITEM_KEY; |
682 | btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid); | 714 | btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid); |
683 | btrfs_set_stack_chunk_stripe_len(chunk, 64 * 1024); | 715 | btrfs_set_stack_chunk_stripe_len(chunk, stripe_len); |
684 | btrfs_set_stack_chunk_type(chunk, type); | 716 | btrfs_set_stack_chunk_type(chunk, type); |
685 | btrfs_set_stack_chunk_num_stripes(chunk, num_stripes); | 717 | btrfs_set_stack_chunk_num_stripes(chunk, num_stripes); |
686 | btrfs_set_stack_chunk_io_align(chunk, extent_root->sectorsize); | 718 | btrfs_set_stack_chunk_io_align(chunk, stripe_len); |
687 | btrfs_set_stack_chunk_io_width(chunk, extent_root->sectorsize); | 719 | btrfs_set_stack_chunk_io_width(chunk, stripe_len); |
688 | btrfs_set_stack_chunk_sector_size(chunk, extent_root->sectorsize); | 720 | btrfs_set_stack_chunk_sector_size(chunk, extent_root->sectorsize); |
721 | map->sector_size = extent_root->sectorsize; | ||
722 | map->stripe_len = stripe_len; | ||
723 | map->io_align = stripe_len; | ||
724 | map->io_width = stripe_len; | ||
725 | map->type = type; | ||
726 | map->num_stripes = num_stripes; | ||
689 | 727 | ||
690 | ret = btrfs_insert_item(trans, chunk_root, &key, chunk, | 728 | ret = btrfs_insert_item(trans, chunk_root, &key, chunk, |
691 | btrfs_chunk_item_size(num_stripes)); | 729 | btrfs_chunk_item_size(num_stripes)); |
@@ -695,25 +733,11 @@ printk("alloc chunk size %Lu from dev %Lu\n", calc_size, device->devid); | |||
695 | em = alloc_extent_map(GFP_NOFS); | 733 | em = alloc_extent_map(GFP_NOFS); |
696 | if (!em) | 734 | if (!em) |
697 | return -ENOMEM; | 735 | return -ENOMEM; |
698 | map = kmalloc(sizeof(*map), GFP_NOFS); | ||
699 | if (!map) { | ||
700 | free_extent_map(em); | ||
701 | return -ENOMEM; | ||
702 | } | ||
703 | |||
704 | em->bdev = (struct block_device *)map; | 736 | em->bdev = (struct block_device *)map; |
705 | em->start = key.objectid; | 737 | em->start = key.objectid; |
706 | em->len = key.offset; | 738 | em->len = key.offset; |
707 | em->block_start = 0; | 739 | em->block_start = 0; |
708 | 740 | ||
709 | map->physical = physical; | ||
710 | map->dev = device; | ||
711 | |||
712 | if (!map->dev) { | ||
713 | kfree(map); | ||
714 | free_extent_map(em); | ||
715 | return -EIO; | ||
716 | } | ||
717 | kfree(chunk); | 741 | kfree(chunk); |
718 | 742 | ||
719 | em_tree = &extent_root->fs_info->mapping_tree.map_tree; | 743 | em_tree = &extent_root->fs_info->mapping_tree.map_tree; |
@@ -758,6 +782,9 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, | |||
758 | struct map_lookup *map; | 782 | struct map_lookup *map; |
759 | struct extent_map_tree *em_tree = &map_tree->map_tree; | 783 | struct extent_map_tree *em_tree = &map_tree->map_tree; |
760 | u64 offset; | 784 | u64 offset; |
785 | u64 stripe_offset; | ||
786 | u64 stripe_nr; | ||
787 | int stripe_index; | ||
761 | 788 | ||
762 | 789 | ||
763 | spin_lock(&em_tree->lock); | 790 | spin_lock(&em_tree->lock); |
@@ -767,9 +794,40 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, | |||
767 | BUG_ON(em->start > logical || em->start + em->len < logical); | 794 | BUG_ON(em->start > logical || em->start + em->len < logical); |
768 | map = (struct map_lookup *)em->bdev; | 795 | map = (struct map_lookup *)em->bdev; |
769 | offset = logical - em->start; | 796 | offset = logical - em->start; |
770 | *phys = map->physical + offset; | 797 | |
771 | *length = em->len - offset; | 798 | stripe_nr = offset; |
772 | *dev = map->dev; | 799 | /* |
800 | * stripe_nr counts the total number of stripes we have to stride | ||
801 | * to get to this block | ||
802 | */ | ||
803 | do_div(stripe_nr, map->stripe_len); | ||
804 | |||
805 | stripe_offset = stripe_nr * map->stripe_len; | ||
806 | BUG_ON(offset < stripe_offset); | ||
807 | |||
808 | /* stripe_offset is the offset of this block in its stripe*/ | ||
809 | stripe_offset = offset - stripe_offset; | ||
810 | |||
811 | /* | ||
812 | * after this do_div call, stripe_nr is the number of stripes | ||
813 | * on this device we have to walk to find the data, and | ||
814 | * stripe_index is the number of our device in the stripe array | ||
815 | */ | ||
816 | stripe_index = do_div(stripe_nr, map->num_stripes); | ||
817 | |||
818 | BUG_ON(stripe_index >= map->num_stripes); | ||
819 | |||
820 | *phys = map->stripes[stripe_index].physical + stripe_offset + | ||
821 | stripe_nr * map->stripe_len; | ||
822 | |||
823 | if (map->type & BTRFS_BLOCK_GROUP_RAID0) { | ||
824 | /* we limit the length of each bio to what fits in a stripe */ | ||
825 | *length = min_t(u64, em->len - offset, | ||
826 | map->stripe_len - stripe_offset); | ||
827 | } else { | ||
828 | *length = em->len - offset; | ||
829 | } | ||
830 | *dev = map->stripes[stripe_index].dev; | ||
773 | free_extent_map(em); | 831 | free_extent_map(em); |
774 | spin_unlock(&em_tree->lock); | 832 | spin_unlock(&em_tree->lock); |
775 | return 0; | 833 | return 0; |
@@ -822,7 +880,9 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, | |||
822 | u64 logical; | 880 | u64 logical; |
823 | u64 length; | 881 | u64 length; |
824 | u64 devid; | 882 | u64 devid; |
883 | int num_stripes; | ||
825 | int ret; | 884 | int ret; |
885 | int i; | ||
826 | 886 | ||
827 | logical = key->objectid; | 887 | logical = key->objectid; |
828 | length = key->offset; | 888 | length = key->offset; |
@@ -846,7 +906,8 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, | |||
846 | em = alloc_extent_map(GFP_NOFS); | 906 | em = alloc_extent_map(GFP_NOFS); |
847 | if (!em) | 907 | if (!em) |
848 | return -ENOMEM; | 908 | return -ENOMEM; |
849 | map = kmalloc(sizeof(*map), GFP_NOFS); | 909 | num_stripes = btrfs_chunk_num_stripes(leaf, chunk); |
910 | map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); | ||
850 | if (!map) { | 911 | if (!map) { |
851 | free_extent_map(em); | 912 | free_extent_map(em); |
852 | return -ENOMEM; | 913 | return -ENOMEM; |
@@ -857,13 +918,22 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, | |||
857 | em->len = length; | 918 | em->len = length; |
858 | em->block_start = 0; | 919 | em->block_start = 0; |
859 | 920 | ||
860 | map->physical = btrfs_stripe_offset_nr(leaf, chunk, 0); | 921 | map->num_stripes = num_stripes; |
861 | devid = btrfs_stripe_devid_nr(leaf, chunk, 0); | 922 | map->io_width = btrfs_chunk_io_width(leaf, chunk); |
862 | map->dev = btrfs_find_device(root, devid); | 923 | map->io_align = btrfs_chunk_io_align(leaf, chunk); |
863 | if (!map->dev) { | 924 | map->sector_size = btrfs_chunk_sector_size(leaf, chunk); |
864 | kfree(map); | 925 | map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk); |
865 | free_extent_map(em); | 926 | map->type = btrfs_chunk_type(leaf, chunk); |
866 | return -EIO; | 927 | for (i = 0; i < num_stripes; i++) { |
928 | map->stripes[i].physical = | ||
929 | btrfs_stripe_offset_nr(leaf, chunk, i); | ||
930 | devid = btrfs_stripe_devid_nr(leaf, chunk, i); | ||
931 | map->stripes[i].dev = btrfs_find_device(root, devid); | ||
932 | if (!map->stripes[i].dev) { | ||
933 | kfree(map); | ||
934 | free_extent_map(em); | ||
935 | return -EIO; | ||
936 | } | ||
867 | } | 937 | } |
868 | 938 | ||
869 | spin_lock(&map_tree->map_tree.lock); | 939 | spin_lock(&map_tree->map_tree.lock); |