aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2008-04-03 16:29:03 -0400
committerChris Mason <chris.mason@oracle.com>2008-09-25 11:04:01 -0400
commit8790d502e4401a4a3a4175b83a3a47e8d595c771 (patch)
tree7e01c053d6a99c3c2a6fb71a7338b083e953f0b3
parent0ef8b2428a339e83fe225e82ef8bd0ea3f62b180 (diff)
Btrfs: Add support for mirroring across drives
Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r--fs/btrfs/ctree.h9
-rw-r--r--fs/btrfs/disk-io.c20
-rw-r--r--fs/btrfs/extent-tree.c93
-rw-r--r--fs/btrfs/inode.c4
-rw-r--r--fs/btrfs/volumes.c154
-rw-r--r--fs/btrfs/volumes.h11
6 files changed, 198 insertions, 93 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 67d533cf8f47..0a207861472e 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -418,6 +418,7 @@ struct btrfs_csum_item {
418#define BTRFS_BLOCK_GROUP_SYSTEM (1 << 1) 418#define BTRFS_BLOCK_GROUP_SYSTEM (1 << 1)
419#define BTRFS_BLOCK_GROUP_METADATA (1 << 2) 419#define BTRFS_BLOCK_GROUP_METADATA (1 << 2)
420#define BTRFS_BLOCK_GROUP_RAID0 (1 << 3) 420#define BTRFS_BLOCK_GROUP_RAID0 (1 << 3)
421#define BTRFS_BLOCK_GROUP_RAID1 (1 << 4)
421 422
422 423
423struct btrfs_block_group_item { 424struct btrfs_block_group_item {
@@ -504,8 +505,12 @@ struct btrfs_fs_info {
504 u64 delalloc_bytes; 505 u64 delalloc_bytes;
505 u64 last_alloc; 506 u64 last_alloc;
506 u64 last_data_alloc; 507 u64 last_data_alloc;
507 int extra_data_alloc_bits; 508 int avail_data_alloc_bits;
508 int extra_alloc_bits; 509 int avail_metadata_alloc_bits;
510 int avail_system_alloc_bits;
511 int data_alloc_profile;
512 int metadata_alloc_profile;
513 int system_alloc_profile;
509}; 514};
510 515
511/* 516/*
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e40fb318ad99..ff75ad586767 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -735,7 +735,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
735 GFP_NOFS); 735 GFP_NOFS);
736 struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root), 736 struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root),
737 GFP_NOFS); 737 GFP_NOFS);
738 struct btrfs_fs_info *fs_info = kmalloc(sizeof(*fs_info), 738 struct btrfs_fs_info *fs_info = kzalloc(sizeof(*fs_info),
739 GFP_NOFS); 739 GFP_NOFS);
740 struct btrfs_root *chunk_root = kmalloc(sizeof(struct btrfs_root), 740 struct btrfs_root *chunk_root = kmalloc(sizeof(struct btrfs_root),
741 GFP_NOFS); 741 GFP_NOFS);
@@ -744,6 +744,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
744 int ret; 744 int ret;
745 int err = -EINVAL; 745 int err = -EINVAL;
746 struct btrfs_super_block *disk_super; 746 struct btrfs_super_block *disk_super;
747
747 if (!extent_root || !tree_root || !fs_info) { 748 if (!extent_root || !tree_root || !fs_info) {
748 err = -ENOMEM; 749 err = -ENOMEM;
749 goto fail; 750 goto fail;
@@ -756,11 +757,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
756 spin_lock_init(&fs_info->delalloc_lock); 757 spin_lock_init(&fs_info->delalloc_lock);
757 spin_lock_init(&fs_info->new_trans_lock); 758 spin_lock_init(&fs_info->new_trans_lock);
758 759
759 memset(&fs_info->super_kobj, 0, sizeof(fs_info->super_kobj));
760 init_completion(&fs_info->kobj_unregister); 760 init_completion(&fs_info->kobj_unregister);
761 sb_set_blocksize(sb, 4096); 761 sb_set_blocksize(sb, 4096);
762 fs_info->running_transaction = NULL;
763 fs_info->last_trans_committed = 0;
764 fs_info->tree_root = tree_root; 762 fs_info->tree_root = tree_root;
765 fs_info->extent_root = extent_root; 763 fs_info->extent_root = extent_root;
766 fs_info->chunk_root = chunk_root; 764 fs_info->chunk_root = chunk_root;
@@ -770,11 +768,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
770 INIT_LIST_HEAD(&fs_info->space_info); 768 INIT_LIST_HEAD(&fs_info->space_info);
771 btrfs_mapping_init(&fs_info->mapping_tree); 769 btrfs_mapping_init(&fs_info->mapping_tree);
772 fs_info->sb = sb; 770 fs_info->sb = sb;
773 fs_info->throttles = 0;
774 fs_info->mount_opt = 0;
775 fs_info->max_extent = (u64)-1; 771 fs_info->max_extent = (u64)-1;
776 fs_info->max_inline = 8192 * 1024; 772 fs_info->max_inline = 8192 * 1024;
777 fs_info->delalloc_bytes = 0;
778 setup_bdi(fs_info, &fs_info->bdi); 773 setup_bdi(fs_info, &fs_info->bdi);
779 fs_info->btree_inode = new_inode(sb); 774 fs_info->btree_inode = new_inode(sb);
780 fs_info->btree_inode->i_ino = 1; 775 fs_info->btree_inode->i_ino = 1;
@@ -802,12 +797,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
802 extent_io_tree_init(&fs_info->extent_ins, 797 extent_io_tree_init(&fs_info->extent_ins,
803 fs_info->btree_inode->i_mapping, GFP_NOFS); 798 fs_info->btree_inode->i_mapping, GFP_NOFS);
804 fs_info->do_barriers = 1; 799 fs_info->do_barriers = 1;
805 fs_info->closing = 0;
806 fs_info->total_pinned = 0;
807 fs_info->last_alloc = 0;
808 fs_info->last_data_alloc = 0;
809 fs_info->extra_alloc_bits = 0;
810 fs_info->extra_data_alloc_bits = 0;
811 800
812#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) 801#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
813 INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info); 802 INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info);
@@ -923,6 +912,11 @@ struct btrfs_root *open_ctree(struct super_block *sb,
923 btrfs_read_block_groups(extent_root); 912 btrfs_read_block_groups(extent_root);
924 913
925 fs_info->generation = btrfs_super_generation(disk_super) + 1; 914 fs_info->generation = btrfs_super_generation(disk_super) + 1;
915 if (btrfs_super_num_devices(disk_super) > 0) {
916 fs_info->data_alloc_profile = BTRFS_BLOCK_GROUP_RAID0;
917 fs_info->metadata_alloc_profile = BTRFS_BLOCK_GROUP_RAID1;
918 fs_info->system_alloc_profile = BTRFS_BLOCK_GROUP_RAID0;
919 }
926 mutex_unlock(&fs_info->fs_mutex); 920 mutex_unlock(&fs_info->fs_mutex);
927 return tree_root; 921 return tree_root;
928 922
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index a34c289aec21..4ab98d8b73fa 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -230,9 +230,13 @@ again:
230 goto new_group; 230 goto new_group;
231 if (start + num > total_fs_bytes) 231 if (start + num > total_fs_bytes)
232 goto new_group; 232 goto new_group;
233 if (!block_group_bits(cache, data)) {
234 printk("block group bits don't match %Lu %Lu\n", cache->flags, data);
235 }
233 *start_ret = start; 236 *start_ret = start;
234 return 0; 237 return 0;
235 } out: 238 }
239out:
236 cache = btrfs_lookup_block_group(root->fs_info, search_start); 240 cache = btrfs_lookup_block_group(root->fs_info, search_start);
237 if (!cache) { 241 if (!cache) {
238 printk("Unable to find block group for %Lu\n", search_start); 242 printk("Unable to find block group for %Lu\n", search_start);
@@ -365,14 +369,17 @@ again:
365 if (cache->key.objectid > total_fs_bytes) 369 if (cache->key.objectid > total_fs_bytes)
366 break; 370 break;
367 371
368 if (full_search) 372 if (block_group_bits(cache, data)) {
369 free_check = cache->key.offset; 373 if (full_search)
370 else 374 free_check = cache->key.offset;
371 free_check = div_factor(cache->key.offset, factor); 375 else
376 free_check = div_factor(cache->key.offset,
377 factor);
372 378
373 if (used + cache->pinned < free_check) { 379 if (used + cache->pinned < free_check) {
374 found_group = cache; 380 found_group = cache;
375 goto found; 381 goto found;
382 }
376 } 383 }
377 cond_resched(); 384 cond_resched();
378 } 385 }
@@ -1038,6 +1045,19 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
1038 return 0; 1045 return 0;
1039} 1046}
1040 1047
1048static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
1049{
1050 u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
1051 BTRFS_BLOCK_GROUP_RAID1);
1052 if (extra_flags) {
1053 if (flags & BTRFS_BLOCK_GROUP_DATA)
1054 fs_info->avail_data_alloc_bits |= extra_flags;
1055 if (flags & BTRFS_BLOCK_GROUP_METADATA)
1056 fs_info->avail_metadata_alloc_bits |= extra_flags;
1057 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
1058 fs_info->avail_system_alloc_bits |= extra_flags;
1059 }
1060}
1041 1061
1042static int do_chunk_alloc(struct btrfs_trans_handle *trans, 1062static int do_chunk_alloc(struct btrfs_trans_handle *trans,
1043 struct btrfs_root *extent_root, u64 alloc_bytes, 1063 struct btrfs_root *extent_root, u64 alloc_bytes,
@@ -1060,7 +1080,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
1060 if (space_info->full) 1080 if (space_info->full)
1061 return 0; 1081 return 0;
1062 1082
1063 thresh = div_factor(space_info->total_bytes, 7); 1083 thresh = div_factor(space_info->total_bytes, 6);
1064 if ((space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) < 1084 if ((space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) <
1065 thresh) 1085 thresh)
1066 return 0; 1086 return 0;
@@ -1079,16 +1099,7 @@ printk("space info full %Lu\n", flags);
1079 start, num_bytes); 1099 start, num_bytes);
1080 BUG_ON(ret); 1100 BUG_ON(ret);
1081 1101
1082 if (flags & BTRFS_BLOCK_GROUP_RAID0) { 1102 set_avail_alloc_bits(extent_root->fs_info, flags);
1083 if (flags & BTRFS_BLOCK_GROUP_DATA) {
1084 extent_root->fs_info->extra_data_alloc_bits =
1085 BTRFS_BLOCK_GROUP_RAID0;
1086 }
1087 if (flags & BTRFS_BLOCK_GROUP_METADATA) {
1088 extent_root->fs_info->extra_alloc_bits =
1089 BTRFS_BLOCK_GROUP_RAID0;
1090 }
1091 }
1092 return 0; 1103 return 0;
1093} 1104}
1094 1105
@@ -1529,6 +1540,7 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans,
1529 1540
1530 if (data & BTRFS_BLOCK_GROUP_METADATA) { 1541 if (data & BTRFS_BLOCK_GROUP_METADATA) {
1531 last_ptr = &root->fs_info->last_alloc; 1542 last_ptr = &root->fs_info->last_alloc;
1543 empty_cluster = 256 * 1024;
1532 } 1544 }
1533 1545
1534 if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) { 1546 if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) {
@@ -1693,6 +1705,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1693 u64 root_used; 1705 u64 root_used;
1694 u64 search_start = 0; 1706 u64 search_start = 0;
1695 u64 new_hint; 1707 u64 new_hint;
1708 u64 alloc_profile;
1696 u32 sizes[2]; 1709 u32 sizes[2];
1697 struct btrfs_fs_info *info = root->fs_info; 1710 struct btrfs_fs_info *info = root->fs_info;
1698 struct btrfs_root *extent_root = info->extent_root; 1711 struct btrfs_root *extent_root = info->extent_root;
@@ -1700,31 +1713,32 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1700 struct btrfs_extent_ref *ref; 1713 struct btrfs_extent_ref *ref;
1701 struct btrfs_path *path; 1714 struct btrfs_path *path;
1702 struct btrfs_key keys[2]; 1715 struct btrfs_key keys[2];
1703 int extra_chunk_alloc_bits = 0;
1704 1716
1705 if (data) { 1717 if (data) {
1706 data = BTRFS_BLOCK_GROUP_DATA | info->extra_data_alloc_bits; 1718 alloc_profile = info->avail_data_alloc_bits &
1719 info->data_alloc_profile;
1720 data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
1707 } else if (root == root->fs_info->chunk_root) { 1721 } else if (root == root->fs_info->chunk_root) {
1708 data = BTRFS_BLOCK_GROUP_SYSTEM; 1722 alloc_profile = info->avail_system_alloc_bits &
1723 info->system_alloc_profile;
1724 data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
1709 } else { 1725 } else {
1710 data = BTRFS_BLOCK_GROUP_METADATA | info->extra_alloc_bits; 1726 alloc_profile = info->avail_metadata_alloc_bits &
1727 info->metadata_alloc_profile;
1728 data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
1711 } 1729 }
1712 if (btrfs_super_num_devices(&info->super_copy) > 1 &&
1713 !(data & BTRFS_BLOCK_GROUP_SYSTEM))
1714 extra_chunk_alloc_bits = BTRFS_BLOCK_GROUP_RAID0;
1715 1730
1716 if (root->ref_cows) { 1731 if (root->ref_cows) {
1717 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) { 1732 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
1718 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 1733 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
1719 2 * 1024 * 1024, 1734 2 * 1024 * 1024,
1720 BTRFS_BLOCK_GROUP_METADATA | 1735 BTRFS_BLOCK_GROUP_METADATA |
1721 info->extra_alloc_bits | 1736 (info->metadata_alloc_profile &
1722 extra_chunk_alloc_bits); 1737 info->avail_metadata_alloc_bits));
1723 BUG_ON(ret); 1738 BUG_ON(ret);
1724 } 1739 }
1725 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 1740 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
1726 num_bytes + 2 * 1024 * 1024, data | 1741 num_bytes + 2 * 1024 * 1024, data);
1727 extra_chunk_alloc_bits);
1728 BUG_ON(ret); 1742 BUG_ON(ret);
1729 } 1743 }
1730 1744
@@ -2046,12 +2060,12 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
2046 if (!next || !btrfs_buffer_uptodate(next)) { 2060 if (!next || !btrfs_buffer_uptodate(next)) {
2047 free_extent_buffer(next); 2061 free_extent_buffer(next);
2048 reada_walk_down(root, cur, path->slots[*level]); 2062 reada_walk_down(root, cur, path->slots[*level]);
2063
2064 mutex_unlock(&root->fs_info->fs_mutex);
2049 next = read_tree_block(root, bytenr, blocksize); 2065 next = read_tree_block(root, bytenr, blocksize);
2066 mutex_lock(&root->fs_info->fs_mutex);
2050 2067
2051 /* we used to drop the lock above, keep the 2068 /* we've dropped the lock, double check */
2052 * code to double check so that we won't forget
2053 * when we drop the lock again in the future
2054 */
2055 ret = lookup_extent_ref(trans, root, bytenr, 2069 ret = lookup_extent_ref(trans, root, bytenr,
2056 blocksize, &refs); 2070 blocksize, &refs);
2057 BUG_ON(ret); 2071 BUG_ON(ret);
@@ -2739,16 +2753,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
2739 } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) { 2753 } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) {
2740 bit = BLOCK_GROUP_METADATA; 2754 bit = BLOCK_GROUP_METADATA;
2741 } 2755 }
2742 if (cache->flags & BTRFS_BLOCK_GROUP_RAID0) { 2756 set_avail_alloc_bits(info, cache->flags);
2743 if (cache->flags & BTRFS_BLOCK_GROUP_DATA) {
2744 info->extra_data_alloc_bits =
2745 BTRFS_BLOCK_GROUP_RAID0;
2746 }
2747 if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) {
2748 info->extra_alloc_bits =
2749 BTRFS_BLOCK_GROUP_RAID0;
2750 }
2751 }
2752 2757
2753 ret = update_space_info(info, cache->flags, found_key.offset, 2758 ret = update_space_info(info, cache->flags, found_key.offset,
2754 btrfs_block_group_used(&cache->item), 2759 btrfs_block_group_used(&cache->item),
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 0fa7cf227f1a..a8ae68c6fbb8 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -306,6 +306,7 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
306 u64 physical; 306 u64 physical;
307 u64 length = 0; 307 u64 length = 0;
308 u64 map_length; 308 u64 map_length;
309 int total_devs;
309 struct bio_vec *bvec; 310 struct bio_vec *bvec;
310 int i; 311 int i;
311 int ret; 312 int ret;
@@ -315,7 +316,8 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
315 } 316 }
316 map_tree = &root->fs_info->mapping_tree; 317 map_tree = &root->fs_info->mapping_tree;
317 map_length = length; 318 map_length = length;
318 ret = btrfs_map_block(map_tree, logical, &physical, &map_length, &dev); 319 ret = btrfs_map_block(map_tree, READ, 0, logical, &physical,
320 &map_length, &dev, &total_devs);
319 if (map_length < length + size) { 321 if (map_length < length + size) {
320 return 1; 322 return 1;
321 } 323 }
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 169be0f7285b..bc3c0b97588e 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -31,6 +31,13 @@ struct stripe {
31 u64 physical; 31 u64 physical;
32}; 32};
33 33
34struct multi_bio {
35 atomic_t stripes;
36 bio_end_io_t *end_io;
37 void *private;
38 int error;
39};
40
34struct map_lookup { 41struct map_lookup {
35 u64 type; 42 u64 type;
36 int io_align; 43 int io_align;
@@ -632,12 +639,12 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
632 if (list_empty(dev_list)) 639 if (list_empty(dev_list))
633 return -ENOSPC; 640 return -ENOSPC;
634 641
635 if (type & BTRFS_BLOCK_GROUP_RAID0) 642 if (type & (BTRFS_BLOCK_GROUP_RAID0))
636 num_stripes = btrfs_super_num_devices(&info->super_copy); 643 num_stripes = btrfs_super_num_devices(&info->super_copy);
637 if (type & BTRFS_BLOCK_GROUP_DATA) 644 if (type & (BTRFS_BLOCK_GROUP_RAID1)) {
638 stripe_len = 64 * 1024; 645 num_stripes = min_t(u64, 2,
639 if (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)) 646 btrfs_super_num_devices(&info->super_copy));
640 stripe_len = 32 * 1024; 647 }
641again: 648again:
642 INIT_LIST_HEAD(&private_devs); 649 INIT_LIST_HEAD(&private_devs);
643 cur = dev_list->next; 650 cur = dev_list->next;
@@ -682,7 +689,11 @@ again:
682 689
683 stripes = &chunk->stripe; 690 stripes = &chunk->stripe;
684 691
685 *num_bytes = calc_size * num_stripes; 692 if (type & BTRFS_BLOCK_GROUP_RAID1)
693 *num_bytes = calc_size;
694 else
695 *num_bytes = calc_size * num_stripes;
696
686 index = 0; 697 index = 0;
687 while(index < num_stripes) { 698 while(index < num_stripes) {
688 BUG_ON(list_empty(&private_devs)); 699 BUG_ON(list_empty(&private_devs));
@@ -694,7 +705,7 @@ again:
694 key.objectid, 705 key.objectid,
695 calc_size, &dev_offset); 706 calc_size, &dev_offset);
696 BUG_ON(ret); 707 BUG_ON(ret);
697printk("alloc chunk size %Lu from dev %Lu\n", calc_size, device->devid); 708printk("alloc chunk start %Lu size %Lu from dev %Lu type %Lu\n", key.objectid, calc_size, device->devid, type);
698 device->bytes_used += calc_size; 709 device->bytes_used += calc_size;
699 ret = btrfs_update_device(trans, device); 710 ret = btrfs_update_device(trans, device);
700 BUG_ON(ret); 711 BUG_ON(ret);
@@ -774,9 +785,9 @@ void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree)
774 } 785 }
775} 786}
776 787
777int btrfs_map_block(struct btrfs_mapping_tree *map_tree, 788int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
778 u64 logical, u64 *phys, u64 *length, 789 int dev_nr, u64 logical, u64 *phys, u64 *length,
779 struct btrfs_device **dev) 790 struct btrfs_device **dev, int *total_devs)
780{ 791{
781 struct extent_map *em; 792 struct extent_map *em;
782 struct map_lookup *map; 793 struct map_lookup *map;
@@ -808,19 +819,39 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree,
808 /* stripe_offset is the offset of this block in its stripe*/ 819 /* stripe_offset is the offset of this block in its stripe*/
809 stripe_offset = offset - stripe_offset; 820 stripe_offset = offset - stripe_offset;
810 821
811 /* 822 if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
812 * after this do_div call, stripe_nr is the number of stripes 823 stripe_index = dev_nr;
813 * on this device we have to walk to find the data, and 824 if (rw & (1 << BIO_RW))
814 * stripe_index is the number of our device in the stripe array 825 *total_devs = map->num_stripes;
815 */ 826 else {
816 stripe_index = do_div(stripe_nr, map->num_stripes); 827 int i;
817 828 u64 least = (u64)-1;
829 struct btrfs_device *cur;
830
831 for (i = 0; i < map->num_stripes; i++) {
832 cur = map->stripes[i].dev;
833 spin_lock(&cur->io_lock);
834 if (cur->total_ios < least) {
835 least = cur->total_ios;
836 stripe_index = i;
837 }
838 spin_unlock(&cur->io_lock);
839 }
840 *total_devs = 1;
841 }
842 } else {
843 /*
844 * after this do_div call, stripe_nr is the number of stripes
845 * on this device we have to walk to find the data, and
846 * stripe_index is the number of our device in the stripe array
847 */
848 stripe_index = do_div(stripe_nr, map->num_stripes);
849 }
818 BUG_ON(stripe_index >= map->num_stripes); 850 BUG_ON(stripe_index >= map->num_stripes);
819
820 *phys = map->stripes[stripe_index].physical + stripe_offset + 851 *phys = map->stripes[stripe_index].physical + stripe_offset +
821 stripe_nr * map->stripe_len; 852 stripe_nr * map->stripe_len;
822 853
823 if (map->type & BTRFS_BLOCK_GROUP_RAID0) { 854 if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1)) {
824 /* we limit the length of each bio to what fits in a stripe */ 855 /* we limit the length of each bio to what fits in a stripe */
825 *length = min_t(u64, em->len - offset, 856 *length = min_t(u64, em->len - offset,
826 map->stripe_len - stripe_offset); 857 map->stripe_len - stripe_offset);
@@ -833,33 +864,98 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree,
833 return 0; 864 return 0;
834} 865}
835 866
867#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
868static void end_bio_multi_stripe(struct bio *bio, int err)
869#else
870static int end_bio_multi_stripe(struct bio *bio,
871 unsigned int bytes_done, int err)
872#endif
873{
874 struct multi_bio *multi = bio->bi_private;
875
876#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
877 if (bio->bi_size)
878 return 1;
879#endif
880 if (err)
881 multi->error = err;
882
883 if (atomic_dec_and_test(&multi->stripes)) {
884 bio->bi_private = multi->private;
885 bio->bi_end_io = multi->end_io;
886
887 if (!err && multi->error)
888 err = multi->error;
889 kfree(multi);
890
891 bio_endio(bio, err);
892 } else {
893 bio_put(bio);
894 }
895#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
896 return 0;
897#endif
898}
899
836int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio) 900int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio)
837{ 901{
838 struct btrfs_mapping_tree *map_tree; 902 struct btrfs_mapping_tree *map_tree;
839 struct btrfs_device *dev; 903 struct btrfs_device *dev;
904 struct bio *first_bio = bio;
840 u64 logical = bio->bi_sector << 9; 905 u64 logical = bio->bi_sector << 9;
841 u64 physical; 906 u64 physical;
842 u64 length = 0; 907 u64 length = 0;
843 u64 map_length; 908 u64 map_length;
844 struct bio_vec *bvec; 909 struct bio_vec *bvec;
910 struct multi_bio *multi = NULL;
845 int i; 911 int i;
846 int ret; 912 int ret;
913 int dev_nr = 0;
914 int total_devs = 1;
847 915
848 bio_for_each_segment(bvec, bio, i) { 916 bio_for_each_segment(bvec, bio, i) {
849 length += bvec->bv_len; 917 length += bvec->bv_len;
850 } 918 }
919
851 map_tree = &root->fs_info->mapping_tree; 920 map_tree = &root->fs_info->mapping_tree;
852 map_length = length; 921 map_length = length;
853 ret = btrfs_map_block(map_tree, logical, &physical, &map_length, &dev); 922 while(dev_nr < total_devs) {
854 if (map_length < length) { 923 ret = btrfs_map_block(map_tree, rw, dev_nr, logical,
855 printk("mapping failed logical %Lu bio len %Lu physical %Lu " 924 &physical, &map_length, &dev,
856 "len %Lu\n", logical, length, physical, map_length); 925 &total_devs);
857 BUG(); 926 if (map_length < length) {
927 printk("mapping failed logical %Lu bio len %Lu physical %Lu "
928 "len %Lu\n", logical, length, physical, map_length);
929 BUG();
930 }
931 BUG_ON(map_length < length);
932 if (total_devs > 1) {
933 if (!multi) {
934 multi = kmalloc(sizeof(*multi), GFP_NOFS);
935 atomic_set(&multi->stripes, 1);
936 multi->end_io = bio->bi_end_io;
937 multi->private = first_bio->bi_private;
938 multi->error = 0;
939 } else {
940 atomic_inc(&multi->stripes);
941 }
942 if (dev_nr < total_devs - 1) {
943 bio = bio_clone(first_bio, GFP_NOFS);
944 BUG_ON(!bio);
945 } else {
946 bio = first_bio;
947 }
948 bio->bi_private = multi;
949 bio->bi_end_io = end_bio_multi_stripe;
950 }
951 bio->bi_sector = physical >> 9;
952 bio->bi_bdev = dev->bdev;
953 spin_lock(&dev->io_lock);
954 dev->total_ios++;
955 spin_unlock(&dev->io_lock);
956 submit_bio(rw, bio);
957 dev_nr++;
858 } 958 }
859 BUG_ON(map_length < length);
860 bio->bi_sector = physical >> 9;
861 bio->bi_bdev = dev->bdev;
862 submit_bio(rw, bio);
863 return 0; 959 return 0;
864} 960}
865 961
@@ -982,6 +1078,8 @@ static int read_one_dev(struct btrfs_root *root,
982 return -ENOMEM; 1078 return -ENOMEM;
983 list_add(&device->dev_list, 1079 list_add(&device->dev_list,
984 &root->fs_info->fs_devices->devices); 1080 &root->fs_info->fs_devices->devices);
1081 device->total_ios = 0;
1082 spin_lock_init(&device->io_lock);
985 } 1083 }
986 1084
987 fill_device_from_item(leaf, dev_item, device); 1085 fill_device_from_item(leaf, dev_item, device);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 12f297eb0559..35dec3efd78c 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -18,12 +18,16 @@
18 18
19#ifndef __BTRFS_VOLUMES_ 19#ifndef __BTRFS_VOLUMES_
20#define __BTRFS_VOLUMES_ 20#define __BTRFS_VOLUMES_
21
21struct btrfs_device { 22struct btrfs_device {
22 struct list_head dev_list; 23 struct list_head dev_list;
23 struct btrfs_root *dev_root; 24 struct btrfs_root *dev_root;
25 spinlock_t io_lock;
24 26
25 struct block_device *bdev; 27 struct block_device *bdev;
26 28
29 u64 total_ios;
30
27 char *name; 31 char *name;
28 32
29 /* the internal btrfs device id */ 33 /* the internal btrfs device id */
@@ -68,9 +72,9 @@ struct btrfs_fs_devices {
68int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, 72int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
69 struct btrfs_device *device, 73 struct btrfs_device *device,
70 u64 owner, u64 num_bytes, u64 *start); 74 u64 owner, u64 num_bytes, u64 *start);
71int btrfs_map_block(struct btrfs_mapping_tree *map_tree, 75int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, int stripe_nr,
72 u64 logical, u64 *phys, u64 *length, 76 u64 logical, u64 *phys, u64 *length,
73 struct btrfs_device **dev); 77 struct btrfs_device **dev, int *total_stripes);
74int btrfs_read_sys_array(struct btrfs_root *root); 78int btrfs_read_sys_array(struct btrfs_root *root);
75int btrfs_read_chunk_tree(struct btrfs_root *root); 79int btrfs_read_chunk_tree(struct btrfs_root *root);
76int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, 80int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
@@ -80,9 +84,6 @@ void btrfs_mapping_init(struct btrfs_mapping_tree *tree);
80void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree); 84void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree);
81int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio); 85int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio);
82int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf); 86int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf);
83int btrfs_map_block(struct btrfs_mapping_tree *map_tree,
84 u64 logical, u64 *phys, u64 *length,
85 struct btrfs_device **dev);
86int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, 87int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
87 int flags, void *holder); 88 int flags, void *holder);
88int btrfs_scan_one_device(const char *path, int flags, void *holder, 89int btrfs_scan_one_device(const char *path, int flags, void *holder,