diff options
author | Chris Mason <chris.mason@oracle.com> | 2008-04-03 16:29:03 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2008-09-25 11:04:01 -0400 |
commit | 8790d502e4401a4a3a4175b83a3a47e8d595c771 (patch) | |
tree | 7e01c053d6a99c3c2a6fb71a7338b083e953f0b3 | |
parent | 0ef8b2428a339e83fe225e82ef8bd0ea3f62b180 (diff) |
Btrfs: Add support for mirroring across drives
Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r-- | fs/btrfs/ctree.h | 9 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 20 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 93 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 4 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 154 | ||||
-rw-r--r-- | fs/btrfs/volumes.h | 11 |
6 files changed, 198 insertions, 93 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 67d533cf8f47..0a207861472e 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -418,6 +418,7 @@ struct btrfs_csum_item { | |||
418 | #define BTRFS_BLOCK_GROUP_SYSTEM (1 << 1) | 418 | #define BTRFS_BLOCK_GROUP_SYSTEM (1 << 1) |
419 | #define BTRFS_BLOCK_GROUP_METADATA (1 << 2) | 419 | #define BTRFS_BLOCK_GROUP_METADATA (1 << 2) |
420 | #define BTRFS_BLOCK_GROUP_RAID0 (1 << 3) | 420 | #define BTRFS_BLOCK_GROUP_RAID0 (1 << 3) |
421 | #define BTRFS_BLOCK_GROUP_RAID1 (1 << 4) | ||
421 | 422 | ||
422 | 423 | ||
423 | struct btrfs_block_group_item { | 424 | struct btrfs_block_group_item { |
@@ -504,8 +505,12 @@ struct btrfs_fs_info { | |||
504 | u64 delalloc_bytes; | 505 | u64 delalloc_bytes; |
505 | u64 last_alloc; | 506 | u64 last_alloc; |
506 | u64 last_data_alloc; | 507 | u64 last_data_alloc; |
507 | int extra_data_alloc_bits; | 508 | int avail_data_alloc_bits; |
508 | int extra_alloc_bits; | 509 | int avail_metadata_alloc_bits; |
510 | int avail_system_alloc_bits; | ||
511 | int data_alloc_profile; | ||
512 | int metadata_alloc_profile; | ||
513 | int system_alloc_profile; | ||
509 | }; | 514 | }; |
510 | 515 | ||
511 | /* | 516 | /* |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e40fb318ad99..ff75ad586767 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -735,7 +735,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
735 | GFP_NOFS); | 735 | GFP_NOFS); |
736 | struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root), | 736 | struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root), |
737 | GFP_NOFS); | 737 | GFP_NOFS); |
738 | struct btrfs_fs_info *fs_info = kmalloc(sizeof(*fs_info), | 738 | struct btrfs_fs_info *fs_info = kzalloc(sizeof(*fs_info), |
739 | GFP_NOFS); | 739 | GFP_NOFS); |
740 | struct btrfs_root *chunk_root = kmalloc(sizeof(struct btrfs_root), | 740 | struct btrfs_root *chunk_root = kmalloc(sizeof(struct btrfs_root), |
741 | GFP_NOFS); | 741 | GFP_NOFS); |
@@ -744,6 +744,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
744 | int ret; | 744 | int ret; |
745 | int err = -EINVAL; | 745 | int err = -EINVAL; |
746 | struct btrfs_super_block *disk_super; | 746 | struct btrfs_super_block *disk_super; |
747 | |||
747 | if (!extent_root || !tree_root || !fs_info) { | 748 | if (!extent_root || !tree_root || !fs_info) { |
748 | err = -ENOMEM; | 749 | err = -ENOMEM; |
749 | goto fail; | 750 | goto fail; |
@@ -756,11 +757,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
756 | spin_lock_init(&fs_info->delalloc_lock); | 757 | spin_lock_init(&fs_info->delalloc_lock); |
757 | spin_lock_init(&fs_info->new_trans_lock); | 758 | spin_lock_init(&fs_info->new_trans_lock); |
758 | 759 | ||
759 | memset(&fs_info->super_kobj, 0, sizeof(fs_info->super_kobj)); | ||
760 | init_completion(&fs_info->kobj_unregister); | 760 | init_completion(&fs_info->kobj_unregister); |
761 | sb_set_blocksize(sb, 4096); | 761 | sb_set_blocksize(sb, 4096); |
762 | fs_info->running_transaction = NULL; | ||
763 | fs_info->last_trans_committed = 0; | ||
764 | fs_info->tree_root = tree_root; | 762 | fs_info->tree_root = tree_root; |
765 | fs_info->extent_root = extent_root; | 763 | fs_info->extent_root = extent_root; |
766 | fs_info->chunk_root = chunk_root; | 764 | fs_info->chunk_root = chunk_root; |
@@ -770,11 +768,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
770 | INIT_LIST_HEAD(&fs_info->space_info); | 768 | INIT_LIST_HEAD(&fs_info->space_info); |
771 | btrfs_mapping_init(&fs_info->mapping_tree); | 769 | btrfs_mapping_init(&fs_info->mapping_tree); |
772 | fs_info->sb = sb; | 770 | fs_info->sb = sb; |
773 | fs_info->throttles = 0; | ||
774 | fs_info->mount_opt = 0; | ||
775 | fs_info->max_extent = (u64)-1; | 771 | fs_info->max_extent = (u64)-1; |
776 | fs_info->max_inline = 8192 * 1024; | 772 | fs_info->max_inline = 8192 * 1024; |
777 | fs_info->delalloc_bytes = 0; | ||
778 | setup_bdi(fs_info, &fs_info->bdi); | 773 | setup_bdi(fs_info, &fs_info->bdi); |
779 | fs_info->btree_inode = new_inode(sb); | 774 | fs_info->btree_inode = new_inode(sb); |
780 | fs_info->btree_inode->i_ino = 1; | 775 | fs_info->btree_inode->i_ino = 1; |
@@ -802,12 +797,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
802 | extent_io_tree_init(&fs_info->extent_ins, | 797 | extent_io_tree_init(&fs_info->extent_ins, |
803 | fs_info->btree_inode->i_mapping, GFP_NOFS); | 798 | fs_info->btree_inode->i_mapping, GFP_NOFS); |
804 | fs_info->do_barriers = 1; | 799 | fs_info->do_barriers = 1; |
805 | fs_info->closing = 0; | ||
806 | fs_info->total_pinned = 0; | ||
807 | fs_info->last_alloc = 0; | ||
808 | fs_info->last_data_alloc = 0; | ||
809 | fs_info->extra_alloc_bits = 0; | ||
810 | fs_info->extra_data_alloc_bits = 0; | ||
811 | 800 | ||
812 | #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) | 801 | #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) |
813 | INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info); | 802 | INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info); |
@@ -923,6 +912,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
923 | btrfs_read_block_groups(extent_root); | 912 | btrfs_read_block_groups(extent_root); |
924 | 913 | ||
925 | fs_info->generation = btrfs_super_generation(disk_super) + 1; | 914 | fs_info->generation = btrfs_super_generation(disk_super) + 1; |
915 | if (btrfs_super_num_devices(disk_super) > 0) { | ||
916 | fs_info->data_alloc_profile = BTRFS_BLOCK_GROUP_RAID0; | ||
917 | fs_info->metadata_alloc_profile = BTRFS_BLOCK_GROUP_RAID1; | ||
918 | fs_info->system_alloc_profile = BTRFS_BLOCK_GROUP_RAID0; | ||
919 | } | ||
926 | mutex_unlock(&fs_info->fs_mutex); | 920 | mutex_unlock(&fs_info->fs_mutex); |
927 | return tree_root; | 921 | return tree_root; |
928 | 922 | ||
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a34c289aec21..4ab98d8b73fa 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -230,9 +230,13 @@ again: | |||
230 | goto new_group; | 230 | goto new_group; |
231 | if (start + num > total_fs_bytes) | 231 | if (start + num > total_fs_bytes) |
232 | goto new_group; | 232 | goto new_group; |
233 | if (!block_group_bits(cache, data)) { | ||
234 | printk("block group bits don't match %Lu %Lu\n", cache->flags, data); | ||
235 | } | ||
233 | *start_ret = start; | 236 | *start_ret = start; |
234 | return 0; | 237 | return 0; |
235 | } out: | 238 | } |
239 | out: | ||
236 | cache = btrfs_lookup_block_group(root->fs_info, search_start); | 240 | cache = btrfs_lookup_block_group(root->fs_info, search_start); |
237 | if (!cache) { | 241 | if (!cache) { |
238 | printk("Unable to find block group for %Lu\n", search_start); | 242 | printk("Unable to find block group for %Lu\n", search_start); |
@@ -365,14 +369,17 @@ again: | |||
365 | if (cache->key.objectid > total_fs_bytes) | 369 | if (cache->key.objectid > total_fs_bytes) |
366 | break; | 370 | break; |
367 | 371 | ||
368 | if (full_search) | 372 | if (block_group_bits(cache, data)) { |
369 | free_check = cache->key.offset; | 373 | if (full_search) |
370 | else | 374 | free_check = cache->key.offset; |
371 | free_check = div_factor(cache->key.offset, factor); | 375 | else |
376 | free_check = div_factor(cache->key.offset, | ||
377 | factor); | ||
372 | 378 | ||
373 | if (used + cache->pinned < free_check) { | 379 | if (used + cache->pinned < free_check) { |
374 | found_group = cache; | 380 | found_group = cache; |
375 | goto found; | 381 | goto found; |
382 | } | ||
376 | } | 383 | } |
377 | cond_resched(); | 384 | cond_resched(); |
378 | } | 385 | } |
@@ -1038,6 +1045,19 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
1038 | return 0; | 1045 | return 0; |
1039 | } | 1046 | } |
1040 | 1047 | ||
1048 | static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) | ||
1049 | { | ||
1050 | u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 | | ||
1051 | BTRFS_BLOCK_GROUP_RAID1); | ||
1052 | if (extra_flags) { | ||
1053 | if (flags & BTRFS_BLOCK_GROUP_DATA) | ||
1054 | fs_info->avail_data_alloc_bits |= extra_flags; | ||
1055 | if (flags & BTRFS_BLOCK_GROUP_METADATA) | ||
1056 | fs_info->avail_metadata_alloc_bits |= extra_flags; | ||
1057 | if (flags & BTRFS_BLOCK_GROUP_SYSTEM) | ||
1058 | fs_info->avail_system_alloc_bits |= extra_flags; | ||
1059 | } | ||
1060 | } | ||
1041 | 1061 | ||
1042 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, | 1062 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, |
1043 | struct btrfs_root *extent_root, u64 alloc_bytes, | 1063 | struct btrfs_root *extent_root, u64 alloc_bytes, |
@@ -1060,7 +1080,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
1060 | if (space_info->full) | 1080 | if (space_info->full) |
1061 | return 0; | 1081 | return 0; |
1062 | 1082 | ||
1063 | thresh = div_factor(space_info->total_bytes, 7); | 1083 | thresh = div_factor(space_info->total_bytes, 6); |
1064 | if ((space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) < | 1084 | if ((space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) < |
1065 | thresh) | 1085 | thresh) |
1066 | return 0; | 1086 | return 0; |
@@ -1079,16 +1099,7 @@ printk("space info full %Lu\n", flags); | |||
1079 | start, num_bytes); | 1099 | start, num_bytes); |
1080 | BUG_ON(ret); | 1100 | BUG_ON(ret); |
1081 | 1101 | ||
1082 | if (flags & BTRFS_BLOCK_GROUP_RAID0) { | 1102 | set_avail_alloc_bits(extent_root->fs_info, flags); |
1083 | if (flags & BTRFS_BLOCK_GROUP_DATA) { | ||
1084 | extent_root->fs_info->extra_data_alloc_bits = | ||
1085 | BTRFS_BLOCK_GROUP_RAID0; | ||
1086 | } | ||
1087 | if (flags & BTRFS_BLOCK_GROUP_METADATA) { | ||
1088 | extent_root->fs_info->extra_alloc_bits = | ||
1089 | BTRFS_BLOCK_GROUP_RAID0; | ||
1090 | } | ||
1091 | } | ||
1092 | return 0; | 1103 | return 0; |
1093 | } | 1104 | } |
1094 | 1105 | ||
@@ -1529,6 +1540,7 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, | |||
1529 | 1540 | ||
1530 | if (data & BTRFS_BLOCK_GROUP_METADATA) { | 1541 | if (data & BTRFS_BLOCK_GROUP_METADATA) { |
1531 | last_ptr = &root->fs_info->last_alloc; | 1542 | last_ptr = &root->fs_info->last_alloc; |
1543 | empty_cluster = 256 * 1024; | ||
1532 | } | 1544 | } |
1533 | 1545 | ||
1534 | if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) { | 1546 | if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) { |
@@ -1693,6 +1705,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, | |||
1693 | u64 root_used; | 1705 | u64 root_used; |
1694 | u64 search_start = 0; | 1706 | u64 search_start = 0; |
1695 | u64 new_hint; | 1707 | u64 new_hint; |
1708 | u64 alloc_profile; | ||
1696 | u32 sizes[2]; | 1709 | u32 sizes[2]; |
1697 | struct btrfs_fs_info *info = root->fs_info; | 1710 | struct btrfs_fs_info *info = root->fs_info; |
1698 | struct btrfs_root *extent_root = info->extent_root; | 1711 | struct btrfs_root *extent_root = info->extent_root; |
@@ -1700,31 +1713,32 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, | |||
1700 | struct btrfs_extent_ref *ref; | 1713 | struct btrfs_extent_ref *ref; |
1701 | struct btrfs_path *path; | 1714 | struct btrfs_path *path; |
1702 | struct btrfs_key keys[2]; | 1715 | struct btrfs_key keys[2]; |
1703 | int extra_chunk_alloc_bits = 0; | ||
1704 | 1716 | ||
1705 | if (data) { | 1717 | if (data) { |
1706 | data = BTRFS_BLOCK_GROUP_DATA | info->extra_data_alloc_bits; | 1718 | alloc_profile = info->avail_data_alloc_bits & |
1719 | info->data_alloc_profile; | ||
1720 | data = BTRFS_BLOCK_GROUP_DATA | alloc_profile; | ||
1707 | } else if (root == root->fs_info->chunk_root) { | 1721 | } else if (root == root->fs_info->chunk_root) { |
1708 | data = BTRFS_BLOCK_GROUP_SYSTEM; | 1722 | alloc_profile = info->avail_system_alloc_bits & |
1723 | info->system_alloc_profile; | ||
1724 | data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile; | ||
1709 | } else { | 1725 | } else { |
1710 | data = BTRFS_BLOCK_GROUP_METADATA | info->extra_alloc_bits; | 1726 | alloc_profile = info->avail_metadata_alloc_bits & |
1727 | info->metadata_alloc_profile; | ||
1728 | data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; | ||
1711 | } | 1729 | } |
1712 | if (btrfs_super_num_devices(&info->super_copy) > 1 && | ||
1713 | !(data & BTRFS_BLOCK_GROUP_SYSTEM)) | ||
1714 | extra_chunk_alloc_bits = BTRFS_BLOCK_GROUP_RAID0; | ||
1715 | 1730 | ||
1716 | if (root->ref_cows) { | 1731 | if (root->ref_cows) { |
1717 | if (!(data & BTRFS_BLOCK_GROUP_METADATA)) { | 1732 | if (!(data & BTRFS_BLOCK_GROUP_METADATA)) { |
1718 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | 1733 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, |
1719 | 2 * 1024 * 1024, | 1734 | 2 * 1024 * 1024, |
1720 | BTRFS_BLOCK_GROUP_METADATA | | 1735 | BTRFS_BLOCK_GROUP_METADATA | |
1721 | info->extra_alloc_bits | | 1736 | (info->metadata_alloc_profile & |
1722 | extra_chunk_alloc_bits); | 1737 | info->avail_metadata_alloc_bits)); |
1723 | BUG_ON(ret); | 1738 | BUG_ON(ret); |
1724 | } | 1739 | } |
1725 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | 1740 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, |
1726 | num_bytes + 2 * 1024 * 1024, data | | 1741 | num_bytes + 2 * 1024 * 1024, data); |
1727 | extra_chunk_alloc_bits); | ||
1728 | BUG_ON(ret); | 1742 | BUG_ON(ret); |
1729 | } | 1743 | } |
1730 | 1744 | ||
@@ -2046,12 +2060,12 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, | |||
2046 | if (!next || !btrfs_buffer_uptodate(next)) { | 2060 | if (!next || !btrfs_buffer_uptodate(next)) { |
2047 | free_extent_buffer(next); | 2061 | free_extent_buffer(next); |
2048 | reada_walk_down(root, cur, path->slots[*level]); | 2062 | reada_walk_down(root, cur, path->slots[*level]); |
2063 | |||
2064 | mutex_unlock(&root->fs_info->fs_mutex); | ||
2049 | next = read_tree_block(root, bytenr, blocksize); | 2065 | next = read_tree_block(root, bytenr, blocksize); |
2066 | mutex_lock(&root->fs_info->fs_mutex); | ||
2050 | 2067 | ||
2051 | /* we used to drop the lock above, keep the | 2068 | /* we've dropped the lock, double check */ |
2052 | * code to double check so that we won't forget | ||
2053 | * when we drop the lock again in the future | ||
2054 | */ | ||
2055 | ret = lookup_extent_ref(trans, root, bytenr, | 2069 | ret = lookup_extent_ref(trans, root, bytenr, |
2056 | blocksize, &refs); | 2070 | blocksize, &refs); |
2057 | BUG_ON(ret); | 2071 | BUG_ON(ret); |
@@ -2739,16 +2753,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
2739 | } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) { | 2753 | } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) { |
2740 | bit = BLOCK_GROUP_METADATA; | 2754 | bit = BLOCK_GROUP_METADATA; |
2741 | } | 2755 | } |
2742 | if (cache->flags & BTRFS_BLOCK_GROUP_RAID0) { | 2756 | set_avail_alloc_bits(info, cache->flags); |
2743 | if (cache->flags & BTRFS_BLOCK_GROUP_DATA) { | ||
2744 | info->extra_data_alloc_bits = | ||
2745 | BTRFS_BLOCK_GROUP_RAID0; | ||
2746 | } | ||
2747 | if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) { | ||
2748 | info->extra_alloc_bits = | ||
2749 | BTRFS_BLOCK_GROUP_RAID0; | ||
2750 | } | ||
2751 | } | ||
2752 | 2757 | ||
2753 | ret = update_space_info(info, cache->flags, found_key.offset, | 2758 | ret = update_space_info(info, cache->flags, found_key.offset, |
2754 | btrfs_block_group_used(&cache->item), | 2759 | btrfs_block_group_used(&cache->item), |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0fa7cf227f1a..a8ae68c6fbb8 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -306,6 +306,7 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, | |||
306 | u64 physical; | 306 | u64 physical; |
307 | u64 length = 0; | 307 | u64 length = 0; |
308 | u64 map_length; | 308 | u64 map_length; |
309 | int total_devs; | ||
309 | struct bio_vec *bvec; | 310 | struct bio_vec *bvec; |
310 | int i; | 311 | int i; |
311 | int ret; | 312 | int ret; |
@@ -315,7 +316,8 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, | |||
315 | } | 316 | } |
316 | map_tree = &root->fs_info->mapping_tree; | 317 | map_tree = &root->fs_info->mapping_tree; |
317 | map_length = length; | 318 | map_length = length; |
318 | ret = btrfs_map_block(map_tree, logical, &physical, &map_length, &dev); | 319 | ret = btrfs_map_block(map_tree, READ, 0, logical, &physical, |
320 | &map_length, &dev, &total_devs); | ||
319 | if (map_length < length + size) { | 321 | if (map_length < length + size) { |
320 | return 1; | 322 | return 1; |
321 | } | 323 | } |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 169be0f7285b..bc3c0b97588e 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -31,6 +31,13 @@ struct stripe { | |||
31 | u64 physical; | 31 | u64 physical; |
32 | }; | 32 | }; |
33 | 33 | ||
34 | struct multi_bio { | ||
35 | atomic_t stripes; | ||
36 | bio_end_io_t *end_io; | ||
37 | void *private; | ||
38 | int error; | ||
39 | }; | ||
40 | |||
34 | struct map_lookup { | 41 | struct map_lookup { |
35 | u64 type; | 42 | u64 type; |
36 | int io_align; | 43 | int io_align; |
@@ -632,12 +639,12 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
632 | if (list_empty(dev_list)) | 639 | if (list_empty(dev_list)) |
633 | return -ENOSPC; | 640 | return -ENOSPC; |
634 | 641 | ||
635 | if (type & BTRFS_BLOCK_GROUP_RAID0) | 642 | if (type & (BTRFS_BLOCK_GROUP_RAID0)) |
636 | num_stripes = btrfs_super_num_devices(&info->super_copy); | 643 | num_stripes = btrfs_super_num_devices(&info->super_copy); |
637 | if (type & BTRFS_BLOCK_GROUP_DATA) | 644 | if (type & (BTRFS_BLOCK_GROUP_RAID1)) { |
638 | stripe_len = 64 * 1024; | 645 | num_stripes = min_t(u64, 2, |
639 | if (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)) | 646 | btrfs_super_num_devices(&info->super_copy)); |
640 | stripe_len = 32 * 1024; | 647 | } |
641 | again: | 648 | again: |
642 | INIT_LIST_HEAD(&private_devs); | 649 | INIT_LIST_HEAD(&private_devs); |
643 | cur = dev_list->next; | 650 | cur = dev_list->next; |
@@ -682,7 +689,11 @@ again: | |||
682 | 689 | ||
683 | stripes = &chunk->stripe; | 690 | stripes = &chunk->stripe; |
684 | 691 | ||
685 | *num_bytes = calc_size * num_stripes; | 692 | if (type & BTRFS_BLOCK_GROUP_RAID1) |
693 | *num_bytes = calc_size; | ||
694 | else | ||
695 | *num_bytes = calc_size * num_stripes; | ||
696 | |||
686 | index = 0; | 697 | index = 0; |
687 | while(index < num_stripes) { | 698 | while(index < num_stripes) { |
688 | BUG_ON(list_empty(&private_devs)); | 699 | BUG_ON(list_empty(&private_devs)); |
@@ -694,7 +705,7 @@ again: | |||
694 | key.objectid, | 705 | key.objectid, |
695 | calc_size, &dev_offset); | 706 | calc_size, &dev_offset); |
696 | BUG_ON(ret); | 707 | BUG_ON(ret); |
697 | printk("alloc chunk size %Lu from dev %Lu\n", calc_size, device->devid); | 708 | printk("alloc chunk start %Lu size %Lu from dev %Lu type %Lu\n", key.objectid, calc_size, device->devid, type); |
698 | device->bytes_used += calc_size; | 709 | device->bytes_used += calc_size; |
699 | ret = btrfs_update_device(trans, device); | 710 | ret = btrfs_update_device(trans, device); |
700 | BUG_ON(ret); | 711 | BUG_ON(ret); |
@@ -774,9 +785,9 @@ void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree) | |||
774 | } | 785 | } |
775 | } | 786 | } |
776 | 787 | ||
777 | int btrfs_map_block(struct btrfs_mapping_tree *map_tree, | 788 | int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, |
778 | u64 logical, u64 *phys, u64 *length, | 789 | int dev_nr, u64 logical, u64 *phys, u64 *length, |
779 | struct btrfs_device **dev) | 790 | struct btrfs_device **dev, int *total_devs) |
780 | { | 791 | { |
781 | struct extent_map *em; | 792 | struct extent_map *em; |
782 | struct map_lookup *map; | 793 | struct map_lookup *map; |
@@ -808,19 +819,39 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, | |||
808 | /* stripe_offset is the offset of this block in its stripe*/ | 819 | /* stripe_offset is the offset of this block in its stripe*/ |
809 | stripe_offset = offset - stripe_offset; | 820 | stripe_offset = offset - stripe_offset; |
810 | 821 | ||
811 | /* | 822 | if (map->type & BTRFS_BLOCK_GROUP_RAID1) { |
812 | * after this do_div call, stripe_nr is the number of stripes | 823 | stripe_index = dev_nr; |
813 | * on this device we have to walk to find the data, and | 824 | if (rw & (1 << BIO_RW)) |
814 | * stripe_index is the number of our device in the stripe array | 825 | *total_devs = map->num_stripes; |
815 | */ | 826 | else { |
816 | stripe_index = do_div(stripe_nr, map->num_stripes); | 827 | int i; |
817 | 828 | u64 least = (u64)-1; | |
829 | struct btrfs_device *cur; | ||
830 | |||
831 | for (i = 0; i < map->num_stripes; i++) { | ||
832 | cur = map->stripes[i].dev; | ||
833 | spin_lock(&cur->io_lock); | ||
834 | if (cur->total_ios < least) { | ||
835 | least = cur->total_ios; | ||
836 | stripe_index = i; | ||
837 | } | ||
838 | spin_unlock(&cur->io_lock); | ||
839 | } | ||
840 | *total_devs = 1; | ||
841 | } | ||
842 | } else { | ||
843 | /* | ||
844 | * after this do_div call, stripe_nr is the number of stripes | ||
845 | * on this device we have to walk to find the data, and | ||
846 | * stripe_index is the number of our device in the stripe array | ||
847 | */ | ||
848 | stripe_index = do_div(stripe_nr, map->num_stripes); | ||
849 | } | ||
818 | BUG_ON(stripe_index >= map->num_stripes); | 850 | BUG_ON(stripe_index >= map->num_stripes); |
819 | |||
820 | *phys = map->stripes[stripe_index].physical + stripe_offset + | 851 | *phys = map->stripes[stripe_index].physical + stripe_offset + |
821 | stripe_nr * map->stripe_len; | 852 | stripe_nr * map->stripe_len; |
822 | 853 | ||
823 | if (map->type & BTRFS_BLOCK_GROUP_RAID0) { | 854 | if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1)) { |
824 | /* we limit the length of each bio to what fits in a stripe */ | 855 | /* we limit the length of each bio to what fits in a stripe */ |
825 | *length = min_t(u64, em->len - offset, | 856 | *length = min_t(u64, em->len - offset, |
826 | map->stripe_len - stripe_offset); | 857 | map->stripe_len - stripe_offset); |
@@ -833,33 +864,98 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, | |||
833 | return 0; | 864 | return 0; |
834 | } | 865 | } |
835 | 866 | ||
867 | #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) | ||
868 | static void end_bio_multi_stripe(struct bio *bio, int err) | ||
869 | #else | ||
870 | static int end_bio_multi_stripe(struct bio *bio, | ||
871 | unsigned int bytes_done, int err) | ||
872 | #endif | ||
873 | { | ||
874 | struct multi_bio *multi = bio->bi_private; | ||
875 | |||
876 | #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) | ||
877 | if (bio->bi_size) | ||
878 | return 1; | ||
879 | #endif | ||
880 | if (err) | ||
881 | multi->error = err; | ||
882 | |||
883 | if (atomic_dec_and_test(&multi->stripes)) { | ||
884 | bio->bi_private = multi->private; | ||
885 | bio->bi_end_io = multi->end_io; | ||
886 | |||
887 | if (!err && multi->error) | ||
888 | err = multi->error; | ||
889 | kfree(multi); | ||
890 | |||
891 | bio_endio(bio, err); | ||
892 | } else { | ||
893 | bio_put(bio); | ||
894 | } | ||
895 | #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) | ||
896 | return 0; | ||
897 | #endif | ||
898 | } | ||
899 | |||
836 | int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio) | 900 | int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio) |
837 | { | 901 | { |
838 | struct btrfs_mapping_tree *map_tree; | 902 | struct btrfs_mapping_tree *map_tree; |
839 | struct btrfs_device *dev; | 903 | struct btrfs_device *dev; |
904 | struct bio *first_bio = bio; | ||
840 | u64 logical = bio->bi_sector << 9; | 905 | u64 logical = bio->bi_sector << 9; |
841 | u64 physical; | 906 | u64 physical; |
842 | u64 length = 0; | 907 | u64 length = 0; |
843 | u64 map_length; | 908 | u64 map_length; |
844 | struct bio_vec *bvec; | 909 | struct bio_vec *bvec; |
910 | struct multi_bio *multi = NULL; | ||
845 | int i; | 911 | int i; |
846 | int ret; | 912 | int ret; |
913 | int dev_nr = 0; | ||
914 | int total_devs = 1; | ||
847 | 915 | ||
848 | bio_for_each_segment(bvec, bio, i) { | 916 | bio_for_each_segment(bvec, bio, i) { |
849 | length += bvec->bv_len; | 917 | length += bvec->bv_len; |
850 | } | 918 | } |
919 | |||
851 | map_tree = &root->fs_info->mapping_tree; | 920 | map_tree = &root->fs_info->mapping_tree; |
852 | map_length = length; | 921 | map_length = length; |
853 | ret = btrfs_map_block(map_tree, logical, &physical, &map_length, &dev); | 922 | while(dev_nr < total_devs) { |
854 | if (map_length < length) { | 923 | ret = btrfs_map_block(map_tree, rw, dev_nr, logical, |
855 | printk("mapping failed logical %Lu bio len %Lu physical %Lu " | 924 | &physical, &map_length, &dev, |
856 | "len %Lu\n", logical, length, physical, map_length); | 925 | &total_devs); |
857 | BUG(); | 926 | if (map_length < length) { |
927 | printk("mapping failed logical %Lu bio len %Lu physical %Lu " | ||
928 | "len %Lu\n", logical, length, physical, map_length); | ||
929 | BUG(); | ||
930 | } | ||
931 | BUG_ON(map_length < length); | ||
932 | if (total_devs > 1) { | ||
933 | if (!multi) { | ||
934 | multi = kmalloc(sizeof(*multi), GFP_NOFS); | ||
935 | atomic_set(&multi->stripes, 1); | ||
936 | multi->end_io = bio->bi_end_io; | ||
937 | multi->private = first_bio->bi_private; | ||
938 | multi->error = 0; | ||
939 | } else { | ||
940 | atomic_inc(&multi->stripes); | ||
941 | } | ||
942 | if (dev_nr < total_devs - 1) { | ||
943 | bio = bio_clone(first_bio, GFP_NOFS); | ||
944 | BUG_ON(!bio); | ||
945 | } else { | ||
946 | bio = first_bio; | ||
947 | } | ||
948 | bio->bi_private = multi; | ||
949 | bio->bi_end_io = end_bio_multi_stripe; | ||
950 | } | ||
951 | bio->bi_sector = physical >> 9; | ||
952 | bio->bi_bdev = dev->bdev; | ||
953 | spin_lock(&dev->io_lock); | ||
954 | dev->total_ios++; | ||
955 | spin_unlock(&dev->io_lock); | ||
956 | submit_bio(rw, bio); | ||
957 | dev_nr++; | ||
858 | } | 958 | } |
859 | BUG_ON(map_length < length); | ||
860 | bio->bi_sector = physical >> 9; | ||
861 | bio->bi_bdev = dev->bdev; | ||
862 | submit_bio(rw, bio); | ||
863 | return 0; | 959 | return 0; |
864 | } | 960 | } |
865 | 961 | ||
@@ -982,6 +1078,8 @@ static int read_one_dev(struct btrfs_root *root, | |||
982 | return -ENOMEM; | 1078 | return -ENOMEM; |
983 | list_add(&device->dev_list, | 1079 | list_add(&device->dev_list, |
984 | &root->fs_info->fs_devices->devices); | 1080 | &root->fs_info->fs_devices->devices); |
1081 | device->total_ios = 0; | ||
1082 | spin_lock_init(&device->io_lock); | ||
985 | } | 1083 | } |
986 | 1084 | ||
987 | fill_device_from_item(leaf, dev_item, device); | 1085 | fill_device_from_item(leaf, dev_item, device); |
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 12f297eb0559..35dec3efd78c 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
@@ -18,12 +18,16 @@ | |||
18 | 18 | ||
19 | #ifndef __BTRFS_VOLUMES_ | 19 | #ifndef __BTRFS_VOLUMES_ |
20 | #define __BTRFS_VOLUMES_ | 20 | #define __BTRFS_VOLUMES_ |
21 | |||
21 | struct btrfs_device { | 22 | struct btrfs_device { |
22 | struct list_head dev_list; | 23 | struct list_head dev_list; |
23 | struct btrfs_root *dev_root; | 24 | struct btrfs_root *dev_root; |
25 | spinlock_t io_lock; | ||
24 | 26 | ||
25 | struct block_device *bdev; | 27 | struct block_device *bdev; |
26 | 28 | ||
29 | u64 total_ios; | ||
30 | |||
27 | char *name; | 31 | char *name; |
28 | 32 | ||
29 | /* the internal btrfs device id */ | 33 | /* the internal btrfs device id */ |
@@ -68,9 +72,9 @@ struct btrfs_fs_devices { | |||
68 | int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, | 72 | int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, |
69 | struct btrfs_device *device, | 73 | struct btrfs_device *device, |
70 | u64 owner, u64 num_bytes, u64 *start); | 74 | u64 owner, u64 num_bytes, u64 *start); |
71 | int btrfs_map_block(struct btrfs_mapping_tree *map_tree, | 75 | int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, int stripe_nr, |
72 | u64 logical, u64 *phys, u64 *length, | 76 | u64 logical, u64 *phys, u64 *length, |
73 | struct btrfs_device **dev); | 77 | struct btrfs_device **dev, int *total_stripes); |
74 | int btrfs_read_sys_array(struct btrfs_root *root); | 78 | int btrfs_read_sys_array(struct btrfs_root *root); |
75 | int btrfs_read_chunk_tree(struct btrfs_root *root); | 79 | int btrfs_read_chunk_tree(struct btrfs_root *root); |
76 | int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | 80 | int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, |
@@ -80,9 +84,6 @@ void btrfs_mapping_init(struct btrfs_mapping_tree *tree); | |||
80 | void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree); | 84 | void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree); |
81 | int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio); | 85 | int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio); |
82 | int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf); | 86 | int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf); |
83 | int btrfs_map_block(struct btrfs_mapping_tree *map_tree, | ||
84 | u64 logical, u64 *phys, u64 *length, | ||
85 | struct btrfs_device **dev); | ||
86 | int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | 87 | int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, |
87 | int flags, void *holder); | 88 | int flags, void *holder); |
88 | int btrfs_scan_one_device(const char *path, int flags, void *holder, | 89 | int btrfs_scan_one_device(const char *path, int flags, void *holder, |