diff options
author | Chris Mason <chris.mason@oracle.com> | 2008-04-03 16:29:03 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2008-09-25 11:04:01 -0400 |
commit | 8790d502e4401a4a3a4175b83a3a47e8d595c771 (patch) | |
tree | 7e01c053d6a99c3c2a6fb71a7338b083e953f0b3 /fs/btrfs/volumes.c | |
parent | 0ef8b2428a339e83fe225e82ef8bd0ea3f62b180 (diff) |
Btrfs: Add support for mirroring across drives
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/volumes.c')
-rw-r--r-- | fs/btrfs/volumes.c | 154 |
1 files changed, 126 insertions, 28 deletions
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 169be0f7285b..bc3c0b97588e 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -31,6 +31,13 @@ struct stripe { | |||
31 | u64 physical; | 31 | u64 physical; |
32 | }; | 32 | }; |
33 | 33 | ||
34 | struct multi_bio { | ||
35 | atomic_t stripes; | ||
36 | bio_end_io_t *end_io; | ||
37 | void *private; | ||
38 | int error; | ||
39 | }; | ||
40 | |||
34 | struct map_lookup { | 41 | struct map_lookup { |
35 | u64 type; | 42 | u64 type; |
36 | int io_align; | 43 | int io_align; |
@@ -632,12 +639,12 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
632 | if (list_empty(dev_list)) | 639 | if (list_empty(dev_list)) |
633 | return -ENOSPC; | 640 | return -ENOSPC; |
634 | 641 | ||
635 | if (type & BTRFS_BLOCK_GROUP_RAID0) | 642 | if (type & (BTRFS_BLOCK_GROUP_RAID0)) |
636 | num_stripes = btrfs_super_num_devices(&info->super_copy); | 643 | num_stripes = btrfs_super_num_devices(&info->super_copy); |
637 | if (type & BTRFS_BLOCK_GROUP_DATA) | 644 | if (type & (BTRFS_BLOCK_GROUP_RAID1)) { |
638 | stripe_len = 64 * 1024; | 645 | num_stripes = min_t(u64, 2, |
639 | if (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)) | 646 | btrfs_super_num_devices(&info->super_copy)); |
640 | stripe_len = 32 * 1024; | 647 | } |
641 | again: | 648 | again: |
642 | INIT_LIST_HEAD(&private_devs); | 649 | INIT_LIST_HEAD(&private_devs); |
643 | cur = dev_list->next; | 650 | cur = dev_list->next; |
@@ -682,7 +689,11 @@ again: | |||
682 | 689 | ||
683 | stripes = &chunk->stripe; | 690 | stripes = &chunk->stripe; |
684 | 691 | ||
685 | *num_bytes = calc_size * num_stripes; | 692 | if (type & BTRFS_BLOCK_GROUP_RAID1) |
693 | *num_bytes = calc_size; | ||
694 | else | ||
695 | *num_bytes = calc_size * num_stripes; | ||
696 | |||
686 | index = 0; | 697 | index = 0; |
687 | while(index < num_stripes) { | 698 | while(index < num_stripes) { |
688 | BUG_ON(list_empty(&private_devs)); | 699 | BUG_ON(list_empty(&private_devs)); |
@@ -694,7 +705,7 @@ again: | |||
694 | key.objectid, | 705 | key.objectid, |
695 | calc_size, &dev_offset); | 706 | calc_size, &dev_offset); |
696 | BUG_ON(ret); | 707 | BUG_ON(ret); |
697 | printk("alloc chunk size %Lu from dev %Lu\n", calc_size, device->devid); | 708 | printk("alloc chunk start %Lu size %Lu from dev %Lu type %Lu\n", key.objectid, calc_size, device->devid, type); |
698 | device->bytes_used += calc_size; | 709 | device->bytes_used += calc_size; |
699 | ret = btrfs_update_device(trans, device); | 710 | ret = btrfs_update_device(trans, device); |
700 | BUG_ON(ret); | 711 | BUG_ON(ret); |
@@ -774,9 +785,9 @@ void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree) | |||
774 | } | 785 | } |
775 | } | 786 | } |
776 | 787 | ||
777 | int btrfs_map_block(struct btrfs_mapping_tree *map_tree, | 788 | int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, |
778 | u64 logical, u64 *phys, u64 *length, | 789 | int dev_nr, u64 logical, u64 *phys, u64 *length, |
779 | struct btrfs_device **dev) | 790 | struct btrfs_device **dev, int *total_devs) |
780 | { | 791 | { |
781 | struct extent_map *em; | 792 | struct extent_map *em; |
782 | struct map_lookup *map; | 793 | struct map_lookup *map; |
@@ -808,19 +819,39 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, | |||
808 | /* stripe_offset is the offset of this block in its stripe*/ | 819 | /* stripe_offset is the offset of this block in its stripe*/ |
809 | stripe_offset = offset - stripe_offset; | 820 | stripe_offset = offset - stripe_offset; |
810 | 821 | ||
811 | /* | 822 | if (map->type & BTRFS_BLOCK_GROUP_RAID1) { |
812 | * after this do_div call, stripe_nr is the number of stripes | 823 | stripe_index = dev_nr; |
813 | * on this device we have to walk to find the data, and | 824 | if (rw & (1 << BIO_RW)) |
814 | * stripe_index is the number of our device in the stripe array | 825 | *total_devs = map->num_stripes; |
815 | */ | 826 | else { |
816 | stripe_index = do_div(stripe_nr, map->num_stripes); | 827 | int i; |
817 | 828 | u64 least = (u64)-1; | |
829 | struct btrfs_device *cur; | ||
830 | |||
831 | for (i = 0; i < map->num_stripes; i++) { | ||
832 | cur = map->stripes[i].dev; | ||
833 | spin_lock(&cur->io_lock); | ||
834 | if (cur->total_ios < least) { | ||
835 | least = cur->total_ios; | ||
836 | stripe_index = i; | ||
837 | } | ||
838 | spin_unlock(&cur->io_lock); | ||
839 | } | ||
840 | *total_devs = 1; | ||
841 | } | ||
842 | } else { | ||
843 | /* | ||
844 | * after this do_div call, stripe_nr is the number of stripes | ||
845 | * on this device we have to walk to find the data, and | ||
846 | * stripe_index is the number of our device in the stripe array | ||
847 | */ | ||
848 | stripe_index = do_div(stripe_nr, map->num_stripes); | ||
849 | } | ||
818 | BUG_ON(stripe_index >= map->num_stripes); | 850 | BUG_ON(stripe_index >= map->num_stripes); |
819 | |||
820 | *phys = map->stripes[stripe_index].physical + stripe_offset + | 851 | *phys = map->stripes[stripe_index].physical + stripe_offset + |
821 | stripe_nr * map->stripe_len; | 852 | stripe_nr * map->stripe_len; |
822 | 853 | ||
823 | if (map->type & BTRFS_BLOCK_GROUP_RAID0) { | 854 | if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1)) { |
824 | /* we limit the length of each bio to what fits in a stripe */ | 855 | /* we limit the length of each bio to what fits in a stripe */ |
825 | *length = min_t(u64, em->len - offset, | 856 | *length = min_t(u64, em->len - offset, |
826 | map->stripe_len - stripe_offset); | 857 | map->stripe_len - stripe_offset); |
@@ -833,33 +864,98 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, | |||
833 | return 0; | 864 | return 0; |
834 | } | 865 | } |
835 | 866 | ||
867 | #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) | ||
868 | static void end_bio_multi_stripe(struct bio *bio, int err) | ||
869 | #else | ||
870 | static int end_bio_multi_stripe(struct bio *bio, | ||
871 | unsigned int bytes_done, int err) | ||
872 | #endif | ||
873 | { | ||
874 | struct multi_bio *multi = bio->bi_private; | ||
875 | |||
876 | #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) | ||
877 | if (bio->bi_size) | ||
878 | return 1; | ||
879 | #endif | ||
880 | if (err) | ||
881 | multi->error = err; | ||
882 | |||
883 | if (atomic_dec_and_test(&multi->stripes)) { | ||
884 | bio->bi_private = multi->private; | ||
885 | bio->bi_end_io = multi->end_io; | ||
886 | |||
887 | if (!err && multi->error) | ||
888 | err = multi->error; | ||
889 | kfree(multi); | ||
890 | |||
891 | bio_endio(bio, err); | ||
892 | } else { | ||
893 | bio_put(bio); | ||
894 | } | ||
895 | #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) | ||
896 | return 0; | ||
897 | #endif | ||
898 | } | ||
899 | |||
836 | int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio) | 900 | int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio) |
837 | { | 901 | { |
838 | struct btrfs_mapping_tree *map_tree; | 902 | struct btrfs_mapping_tree *map_tree; |
839 | struct btrfs_device *dev; | 903 | struct btrfs_device *dev; |
904 | struct bio *first_bio = bio; | ||
840 | u64 logical = bio->bi_sector << 9; | 905 | u64 logical = bio->bi_sector << 9; |
841 | u64 physical; | 906 | u64 physical; |
842 | u64 length = 0; | 907 | u64 length = 0; |
843 | u64 map_length; | 908 | u64 map_length; |
844 | struct bio_vec *bvec; | 909 | struct bio_vec *bvec; |
910 | struct multi_bio *multi = NULL; | ||
845 | int i; | 911 | int i; |
846 | int ret; | 912 | int ret; |
913 | int dev_nr = 0; | ||
914 | int total_devs = 1; | ||
847 | 915 | ||
848 | bio_for_each_segment(bvec, bio, i) { | 916 | bio_for_each_segment(bvec, bio, i) { |
849 | length += bvec->bv_len; | 917 | length += bvec->bv_len; |
850 | } | 918 | } |
919 | |||
851 | map_tree = &root->fs_info->mapping_tree; | 920 | map_tree = &root->fs_info->mapping_tree; |
852 | map_length = length; | 921 | map_length = length; |
853 | ret = btrfs_map_block(map_tree, logical, &physical, &map_length, &dev); | 922 | while(dev_nr < total_devs) { |
854 | if (map_length < length) { | 923 | ret = btrfs_map_block(map_tree, rw, dev_nr, logical, |
855 | printk("mapping failed logical %Lu bio len %Lu physical %Lu " | 924 | &physical, &map_length, &dev, |
856 | "len %Lu\n", logical, length, physical, map_length); | 925 | &total_devs); |
857 | BUG(); | 926 | if (map_length < length) { |
927 | printk("mapping failed logical %Lu bio len %Lu physical %Lu " | ||
928 | "len %Lu\n", logical, length, physical, map_length); | ||
929 | BUG(); | ||
930 | } | ||
931 | BUG_ON(map_length < length); | ||
932 | if (total_devs > 1) { | ||
933 | if (!multi) { | ||
934 | multi = kmalloc(sizeof(*multi), GFP_NOFS); | ||
935 | atomic_set(&multi->stripes, 1); | ||
936 | multi->end_io = bio->bi_end_io; | ||
937 | multi->private = first_bio->bi_private; | ||
938 | multi->error = 0; | ||
939 | } else { | ||
940 | atomic_inc(&multi->stripes); | ||
941 | } | ||
942 | if (dev_nr < total_devs - 1) { | ||
943 | bio = bio_clone(first_bio, GFP_NOFS); | ||
944 | BUG_ON(!bio); | ||
945 | } else { | ||
946 | bio = first_bio; | ||
947 | } | ||
948 | bio->bi_private = multi; | ||
949 | bio->bi_end_io = end_bio_multi_stripe; | ||
950 | } | ||
951 | bio->bi_sector = physical >> 9; | ||
952 | bio->bi_bdev = dev->bdev; | ||
953 | spin_lock(&dev->io_lock); | ||
954 | dev->total_ios++; | ||
955 | spin_unlock(&dev->io_lock); | ||
956 | submit_bio(rw, bio); | ||
957 | dev_nr++; | ||
858 | } | 958 | } |
859 | BUG_ON(map_length < length); | ||
860 | bio->bi_sector = physical >> 9; | ||
861 | bio->bi_bdev = dev->bdev; | ||
862 | submit_bio(rw, bio); | ||
863 | return 0; | 959 | return 0; |
864 | } | 960 | } |
865 | 961 | ||
@@ -982,6 +1078,8 @@ static int read_one_dev(struct btrfs_root *root, | |||
982 | return -ENOMEM; | 1078 | return -ENOMEM; |
983 | list_add(&device->dev_list, | 1079 | list_add(&device->dev_list, |
984 | &root->fs_info->fs_devices->devices); | 1080 | &root->fs_info->fs_devices->devices); |
1081 | device->total_ios = 0; | ||
1082 | spin_lock_init(&device->io_lock); | ||
985 | } | 1083 | } |
986 | 1084 | ||
987 | fill_device_from_item(leaf, dev_item, device); | 1085 | fill_device_from_item(leaf, dev_item, device); |