diff options
Diffstat (limited to 'fs/btrfs/volumes.c')
-rw-r--r-- | fs/btrfs/volumes.c | 626 |
1 files changed, 459 insertions, 167 deletions
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 1718e1a5c320..d158530233b7 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/blkdev.h> | 22 | #include <linux/blkdev.h> |
23 | #include <linux/random.h> | 23 | #include <linux/random.h> |
24 | #include <linux/iocontext.h> | 24 | #include <linux/iocontext.h> |
25 | #include <linux/capability.h> | ||
25 | #include <asm/div64.h> | 26 | #include <asm/div64.h> |
26 | #include "compat.h" | 27 | #include "compat.h" |
27 | #include "ctree.h" | 28 | #include "ctree.h" |
@@ -600,8 +601,10 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | |||
600 | set_blocksize(bdev, 4096); | 601 | set_blocksize(bdev, 4096); |
601 | 602 | ||
602 | bh = btrfs_read_dev_super(bdev); | 603 | bh = btrfs_read_dev_super(bdev); |
603 | if (!bh) | 604 | if (!bh) { |
605 | ret = -EINVAL; | ||
604 | goto error_close; | 606 | goto error_close; |
607 | } | ||
605 | 608 | ||
606 | disk_super = (struct btrfs_super_block *)bh->b_data; | 609 | disk_super = (struct btrfs_super_block *)bh->b_data; |
607 | devid = btrfs_stack_device_id(&disk_super->dev_item); | 610 | devid = btrfs_stack_device_id(&disk_super->dev_item); |
@@ -703,7 +706,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, | |||
703 | goto error_close; | 706 | goto error_close; |
704 | bh = btrfs_read_dev_super(bdev); | 707 | bh = btrfs_read_dev_super(bdev); |
705 | if (!bh) { | 708 | if (!bh) { |
706 | ret = -EIO; | 709 | ret = -EINVAL; |
707 | goto error_close; | 710 | goto error_close; |
708 | } | 711 | } |
709 | disk_super = (struct btrfs_super_block *)bh->b_data; | 712 | disk_super = (struct btrfs_super_block *)bh->b_data; |
@@ -729,59 +732,167 @@ error: | |||
729 | return ret; | 732 | return ret; |
730 | } | 733 | } |
731 | 734 | ||
735 | /* helper to account the used device space in the range */ | ||
736 | int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, | ||
737 | u64 end, u64 *length) | ||
738 | { | ||
739 | struct btrfs_key key; | ||
740 | struct btrfs_root *root = device->dev_root; | ||
741 | struct btrfs_dev_extent *dev_extent; | ||
742 | struct btrfs_path *path; | ||
743 | u64 extent_end; | ||
744 | int ret; | ||
745 | int slot; | ||
746 | struct extent_buffer *l; | ||
747 | |||
748 | *length = 0; | ||
749 | |||
750 | if (start >= device->total_bytes) | ||
751 | return 0; | ||
752 | |||
753 | path = btrfs_alloc_path(); | ||
754 | if (!path) | ||
755 | return -ENOMEM; | ||
756 | path->reada = 2; | ||
757 | |||
758 | key.objectid = device->devid; | ||
759 | key.offset = start; | ||
760 | key.type = BTRFS_DEV_EXTENT_KEY; | ||
761 | |||
762 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
763 | if (ret < 0) | ||
764 | goto out; | ||
765 | if (ret > 0) { | ||
766 | ret = btrfs_previous_item(root, path, key.objectid, key.type); | ||
767 | if (ret < 0) | ||
768 | goto out; | ||
769 | } | ||
770 | |||
771 | while (1) { | ||
772 | l = path->nodes[0]; | ||
773 | slot = path->slots[0]; | ||
774 | if (slot >= btrfs_header_nritems(l)) { | ||
775 | ret = btrfs_next_leaf(root, path); | ||
776 | if (ret == 0) | ||
777 | continue; | ||
778 | if (ret < 0) | ||
779 | goto out; | ||
780 | |||
781 | break; | ||
782 | } | ||
783 | btrfs_item_key_to_cpu(l, &key, slot); | ||
784 | |||
785 | if (key.objectid < device->devid) | ||
786 | goto next; | ||
787 | |||
788 | if (key.objectid > device->devid) | ||
789 | break; | ||
790 | |||
791 | if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) | ||
792 | goto next; | ||
793 | |||
794 | dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); | ||
795 | extent_end = key.offset + btrfs_dev_extent_length(l, | ||
796 | dev_extent); | ||
797 | if (key.offset <= start && extent_end > end) { | ||
798 | *length = end - start + 1; | ||
799 | break; | ||
800 | } else if (key.offset <= start && extent_end > start) | ||
801 | *length += extent_end - start; | ||
802 | else if (key.offset > start && extent_end <= end) | ||
803 | *length += extent_end - key.offset; | ||
804 | else if (key.offset > start && key.offset <= end) { | ||
805 | *length += end - key.offset + 1; | ||
806 | break; | ||
807 | } else if (key.offset > end) | ||
808 | break; | ||
809 | |||
810 | next: | ||
811 | path->slots[0]++; | ||
812 | } | ||
813 | ret = 0; | ||
814 | out: | ||
815 | btrfs_free_path(path); | ||
816 | return ret; | ||
817 | } | ||
818 | |||
732 | /* | 819 | /* |
820 | * find_free_dev_extent - find free space in the specified device | ||
821 | * @trans: transaction handler | ||
822 | * @device: the device which we search the free space in | ||
823 | * @num_bytes: the size of the free space that we need | ||
824 | * @start: store the start of the free space. | ||
825 | * @len: the size of the free space. that we find, or the size of the max | ||
826 | * free space if we don't find suitable free space | ||
827 | * | ||
733 | * this uses a pretty simple search, the expectation is that it is | 828 | * this uses a pretty simple search, the expectation is that it is |
734 | * called very infrequently and that a given device has a small number | 829 | * called very infrequently and that a given device has a small number |
735 | * of extents | 830 | * of extents |
831 | * | ||
832 | * @start is used to store the start of the free space if we find. But if we | ||
833 | * don't find suitable free space, it will be used to store the start position | ||
834 | * of the max free space. | ||
835 | * | ||
836 | * @len is used to store the size of the free space that we find. | ||
837 | * But if we don't find suitable free space, it is used to store the size of | ||
838 | * the max free space. | ||
736 | */ | 839 | */ |
737 | int find_free_dev_extent(struct btrfs_trans_handle *trans, | 840 | int find_free_dev_extent(struct btrfs_trans_handle *trans, |
738 | struct btrfs_device *device, u64 num_bytes, | 841 | struct btrfs_device *device, u64 num_bytes, |
739 | u64 *start, u64 *max_avail) | 842 | u64 *start, u64 *len) |
740 | { | 843 | { |
741 | struct btrfs_key key; | 844 | struct btrfs_key key; |
742 | struct btrfs_root *root = device->dev_root; | 845 | struct btrfs_root *root = device->dev_root; |
743 | struct btrfs_dev_extent *dev_extent = NULL; | 846 | struct btrfs_dev_extent *dev_extent; |
744 | struct btrfs_path *path; | 847 | struct btrfs_path *path; |
745 | u64 hole_size = 0; | 848 | u64 hole_size; |
746 | u64 last_byte = 0; | 849 | u64 max_hole_start; |
747 | u64 search_start = 0; | 850 | u64 max_hole_size; |
851 | u64 extent_end; | ||
852 | u64 search_start; | ||
748 | u64 search_end = device->total_bytes; | 853 | u64 search_end = device->total_bytes; |
749 | int ret; | 854 | int ret; |
750 | int slot = 0; | 855 | int slot; |
751 | int start_found; | ||
752 | struct extent_buffer *l; | 856 | struct extent_buffer *l; |
753 | 857 | ||
754 | path = btrfs_alloc_path(); | ||
755 | if (!path) | ||
756 | return -ENOMEM; | ||
757 | path->reada = 2; | ||
758 | start_found = 0; | ||
759 | |||
760 | /* FIXME use last free of some kind */ | 858 | /* FIXME use last free of some kind */ |
761 | 859 | ||
762 | /* we don't want to overwrite the superblock on the drive, | 860 | /* we don't want to overwrite the superblock on the drive, |
763 | * so we make sure to start at an offset of at least 1MB | 861 | * so we make sure to start at an offset of at least 1MB |
764 | */ | 862 | */ |
765 | search_start = max((u64)1024 * 1024, search_start); | 863 | search_start = 1024 * 1024; |
766 | 864 | ||
767 | if (root->fs_info->alloc_start + num_bytes <= device->total_bytes) | 865 | if (root->fs_info->alloc_start + num_bytes <= search_end) |
768 | search_start = max(root->fs_info->alloc_start, search_start); | 866 | search_start = max(root->fs_info->alloc_start, search_start); |
769 | 867 | ||
868 | max_hole_start = search_start; | ||
869 | max_hole_size = 0; | ||
870 | |||
871 | if (search_start >= search_end) { | ||
872 | ret = -ENOSPC; | ||
873 | goto error; | ||
874 | } | ||
875 | |||
876 | path = btrfs_alloc_path(); | ||
877 | if (!path) { | ||
878 | ret = -ENOMEM; | ||
879 | goto error; | ||
880 | } | ||
881 | path->reada = 2; | ||
882 | |||
770 | key.objectid = device->devid; | 883 | key.objectid = device->devid; |
771 | key.offset = search_start; | 884 | key.offset = search_start; |
772 | key.type = BTRFS_DEV_EXTENT_KEY; | 885 | key.type = BTRFS_DEV_EXTENT_KEY; |
886 | |||
773 | ret = btrfs_search_slot(trans, root, &key, path, 0, 0); | 887 | ret = btrfs_search_slot(trans, root, &key, path, 0, 0); |
774 | if (ret < 0) | 888 | if (ret < 0) |
775 | goto error; | 889 | goto out; |
776 | if (ret > 0) { | 890 | if (ret > 0) { |
777 | ret = btrfs_previous_item(root, path, key.objectid, key.type); | 891 | ret = btrfs_previous_item(root, path, key.objectid, key.type); |
778 | if (ret < 0) | 892 | if (ret < 0) |
779 | goto error; | 893 | goto out; |
780 | if (ret > 0) | ||
781 | start_found = 1; | ||
782 | } | 894 | } |
783 | l = path->nodes[0]; | 895 | |
784 | btrfs_item_key_to_cpu(l, &key, path->slots[0]); | ||
785 | while (1) { | 896 | while (1) { |
786 | l = path->nodes[0]; | 897 | l = path->nodes[0]; |
787 | slot = path->slots[0]; | 898 | slot = path->slots[0]; |
@@ -790,24 +901,9 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans, | |||
790 | if (ret == 0) | 901 | if (ret == 0) |
791 | continue; | 902 | continue; |
792 | if (ret < 0) | 903 | if (ret < 0) |
793 | goto error; | 904 | goto out; |
794 | no_more_items: | 905 | |
795 | if (!start_found) { | 906 | break; |
796 | if (search_start >= search_end) { | ||
797 | ret = -ENOSPC; | ||
798 | goto error; | ||
799 | } | ||
800 | *start = search_start; | ||
801 | start_found = 1; | ||
802 | goto check_pending; | ||
803 | } | ||
804 | *start = last_byte > search_start ? | ||
805 | last_byte : search_start; | ||
806 | if (search_end <= *start) { | ||
807 | ret = -ENOSPC; | ||
808 | goto error; | ||
809 | } | ||
810 | goto check_pending; | ||
811 | } | 907 | } |
812 | btrfs_item_key_to_cpu(l, &key, slot); | 908 | btrfs_item_key_to_cpu(l, &key, slot); |
813 | 909 | ||
@@ -815,48 +911,62 @@ no_more_items: | |||
815 | goto next; | 911 | goto next; |
816 | 912 | ||
817 | if (key.objectid > device->devid) | 913 | if (key.objectid > device->devid) |
818 | goto no_more_items; | 914 | break; |
819 | 915 | ||
820 | if (key.offset >= search_start && key.offset > last_byte && | 916 | if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) |
821 | start_found) { | 917 | goto next; |
822 | if (last_byte < search_start) | ||
823 | last_byte = search_start; | ||
824 | hole_size = key.offset - last_byte; | ||
825 | 918 | ||
826 | if (hole_size > *max_avail) | 919 | if (key.offset > search_start) { |
827 | *max_avail = hole_size; | 920 | hole_size = key.offset - search_start; |
828 | 921 | ||
829 | if (key.offset > last_byte && | 922 | if (hole_size > max_hole_size) { |
830 | hole_size >= num_bytes) { | 923 | max_hole_start = search_start; |
831 | *start = last_byte; | 924 | max_hole_size = hole_size; |
832 | goto check_pending; | 925 | } |
926 | |||
927 | /* | ||
928 | * If this free space is greater than which we need, | ||
929 | * it must be the max free space that we have found | ||
930 | * until now, so max_hole_start must point to the start | ||
931 | * of this free space and the length of this free space | ||
932 | * is stored in max_hole_size. Thus, we return | ||
933 | * max_hole_start and max_hole_size and go back to the | ||
934 | * caller. | ||
935 | */ | ||
936 | if (hole_size >= num_bytes) { | ||
937 | ret = 0; | ||
938 | goto out; | ||
833 | } | 939 | } |
834 | } | 940 | } |
835 | if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) | ||
836 | goto next; | ||
837 | 941 | ||
838 | start_found = 1; | ||
839 | dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); | 942 | dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); |
840 | last_byte = key.offset + btrfs_dev_extent_length(l, dev_extent); | 943 | extent_end = key.offset + btrfs_dev_extent_length(l, |
944 | dev_extent); | ||
945 | if (extent_end > search_start) | ||
946 | search_start = extent_end; | ||
841 | next: | 947 | next: |
842 | path->slots[0]++; | 948 | path->slots[0]++; |
843 | cond_resched(); | 949 | cond_resched(); |
844 | } | 950 | } |
845 | check_pending: | ||
846 | /* we have to make sure we didn't find an extent that has already | ||
847 | * been allocated by the map tree or the original allocation | ||
848 | */ | ||
849 | BUG_ON(*start < search_start); | ||
850 | 951 | ||
851 | if (*start + num_bytes > search_end) { | 952 | hole_size = search_end- search_start; |
852 | ret = -ENOSPC; | 953 | if (hole_size > max_hole_size) { |
853 | goto error; | 954 | max_hole_start = search_start; |
955 | max_hole_size = hole_size; | ||
854 | } | 956 | } |
855 | /* check for pending inserts here */ | ||
856 | ret = 0; | ||
857 | 957 | ||
858 | error: | 958 | /* See above. */ |
959 | if (hole_size < num_bytes) | ||
960 | ret = -ENOSPC; | ||
961 | else | ||
962 | ret = 0; | ||
963 | |||
964 | out: | ||
859 | btrfs_free_path(path); | 965 | btrfs_free_path(path); |
966 | error: | ||
967 | *start = max_hole_start; | ||
968 | if (len) | ||
969 | *len = max_hole_size; | ||
860 | return ret; | 970 | return ret; |
861 | } | 971 | } |
862 | 972 | ||
@@ -1196,7 +1306,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1196 | set_blocksize(bdev, 4096); | 1306 | set_blocksize(bdev, 4096); |
1197 | bh = btrfs_read_dev_super(bdev); | 1307 | bh = btrfs_read_dev_super(bdev); |
1198 | if (!bh) { | 1308 | if (!bh) { |
1199 | ret = -EIO; | 1309 | ret = -EINVAL; |
1200 | goto error_close; | 1310 | goto error_close; |
1201 | } | 1311 | } |
1202 | disk_super = (struct btrfs_super_block *)bh->b_data; | 1312 | disk_super = (struct btrfs_super_block *)bh->b_data; |
@@ -1916,6 +2026,9 @@ int btrfs_balance(struct btrfs_root *dev_root) | |||
1916 | if (dev_root->fs_info->sb->s_flags & MS_RDONLY) | 2026 | if (dev_root->fs_info->sb->s_flags & MS_RDONLY) |
1917 | return -EROFS; | 2027 | return -EROFS; |
1918 | 2028 | ||
2029 | if (!capable(CAP_SYS_ADMIN)) | ||
2030 | return -EPERM; | ||
2031 | |||
1919 | mutex_lock(&dev_root->fs_info->volume_mutex); | 2032 | mutex_lock(&dev_root->fs_info->volume_mutex); |
1920 | dev_root = dev_root->fs_info->dev_root; | 2033 | dev_root = dev_root->fs_info->dev_root; |
1921 | 2034 | ||
@@ -2154,66 +2267,67 @@ static noinline u64 chunk_bytes_by_type(u64 type, u64 calc_size, | |||
2154 | return calc_size * num_stripes; | 2267 | return calc_size * num_stripes; |
2155 | } | 2268 | } |
2156 | 2269 | ||
2157 | static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | 2270 | /* Used to sort the devices by max_avail(descending sort) */ |
2158 | struct btrfs_root *extent_root, | 2271 | int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2) |
2159 | struct map_lookup **map_ret, | ||
2160 | u64 *num_bytes, u64 *stripe_size, | ||
2161 | u64 start, u64 type) | ||
2162 | { | 2272 | { |
2163 | struct btrfs_fs_info *info = extent_root->fs_info; | 2273 | if (((struct btrfs_device_info *)dev_info1)->max_avail > |
2164 | struct btrfs_device *device = NULL; | 2274 | ((struct btrfs_device_info *)dev_info2)->max_avail) |
2165 | struct btrfs_fs_devices *fs_devices = info->fs_devices; | 2275 | return -1; |
2166 | struct list_head *cur; | 2276 | else if (((struct btrfs_device_info *)dev_info1)->max_avail < |
2167 | struct map_lookup *map = NULL; | 2277 | ((struct btrfs_device_info *)dev_info2)->max_avail) |
2168 | struct extent_map_tree *em_tree; | 2278 | return 1; |
2169 | struct extent_map *em; | 2279 | else |
2170 | struct list_head private_devs; | 2280 | return 0; |
2171 | int min_stripe_size = 1 * 1024 * 1024; | 2281 | } |
2172 | u64 calc_size = 1024 * 1024 * 1024; | ||
2173 | u64 max_chunk_size = calc_size; | ||
2174 | u64 min_free; | ||
2175 | u64 avail; | ||
2176 | u64 max_avail = 0; | ||
2177 | u64 dev_offset; | ||
2178 | int num_stripes = 1; | ||
2179 | int min_stripes = 1; | ||
2180 | int sub_stripes = 0; | ||
2181 | int looped = 0; | ||
2182 | int ret; | ||
2183 | int index; | ||
2184 | int stripe_len = 64 * 1024; | ||
2185 | 2282 | ||
2186 | if ((type & BTRFS_BLOCK_GROUP_RAID1) && | 2283 | static int __btrfs_calc_nstripes(struct btrfs_fs_devices *fs_devices, u64 type, |
2187 | (type & BTRFS_BLOCK_GROUP_DUP)) { | 2284 | int *num_stripes, int *min_stripes, |
2188 | WARN_ON(1); | 2285 | int *sub_stripes) |
2189 | type &= ~BTRFS_BLOCK_GROUP_DUP; | 2286 | { |
2190 | } | 2287 | *num_stripes = 1; |
2191 | if (list_empty(&fs_devices->alloc_list)) | 2288 | *min_stripes = 1; |
2192 | return -ENOSPC; | 2289 | *sub_stripes = 0; |
2193 | 2290 | ||
2194 | if (type & (BTRFS_BLOCK_GROUP_RAID0)) { | 2291 | if (type & (BTRFS_BLOCK_GROUP_RAID0)) { |
2195 | num_stripes = fs_devices->rw_devices; | 2292 | *num_stripes = fs_devices->rw_devices; |
2196 | min_stripes = 2; | 2293 | *min_stripes = 2; |
2197 | } | 2294 | } |
2198 | if (type & (BTRFS_BLOCK_GROUP_DUP)) { | 2295 | if (type & (BTRFS_BLOCK_GROUP_DUP)) { |
2199 | num_stripes = 2; | 2296 | *num_stripes = 2; |
2200 | min_stripes = 2; | 2297 | *min_stripes = 2; |
2201 | } | 2298 | } |
2202 | if (type & (BTRFS_BLOCK_GROUP_RAID1)) { | 2299 | if (type & (BTRFS_BLOCK_GROUP_RAID1)) { |
2203 | if (fs_devices->rw_devices < 2) | 2300 | if (fs_devices->rw_devices < 2) |
2204 | return -ENOSPC; | 2301 | return -ENOSPC; |
2205 | num_stripes = 2; | 2302 | *num_stripes = 2; |
2206 | min_stripes = 2; | 2303 | *min_stripes = 2; |
2207 | } | 2304 | } |
2208 | if (type & (BTRFS_BLOCK_GROUP_RAID10)) { | 2305 | if (type & (BTRFS_BLOCK_GROUP_RAID10)) { |
2209 | num_stripes = fs_devices->rw_devices; | 2306 | *num_stripes = fs_devices->rw_devices; |
2210 | if (num_stripes < 4) | 2307 | if (*num_stripes < 4) |
2211 | return -ENOSPC; | 2308 | return -ENOSPC; |
2212 | num_stripes &= ~(u32)1; | 2309 | *num_stripes &= ~(u32)1; |
2213 | sub_stripes = 2; | 2310 | *sub_stripes = 2; |
2214 | min_stripes = 4; | 2311 | *min_stripes = 4; |
2215 | } | 2312 | } |
2216 | 2313 | ||
2314 | return 0; | ||
2315 | } | ||
2316 | |||
2317 | static u64 __btrfs_calc_stripe_size(struct btrfs_fs_devices *fs_devices, | ||
2318 | u64 proposed_size, u64 type, | ||
2319 | int num_stripes, int small_stripe) | ||
2320 | { | ||
2321 | int min_stripe_size = 1 * 1024 * 1024; | ||
2322 | u64 calc_size = proposed_size; | ||
2323 | u64 max_chunk_size = calc_size; | ||
2324 | int ncopies = 1; | ||
2325 | |||
2326 | if (type & (BTRFS_BLOCK_GROUP_RAID1 | | ||
2327 | BTRFS_BLOCK_GROUP_DUP | | ||
2328 | BTRFS_BLOCK_GROUP_RAID10)) | ||
2329 | ncopies = 2; | ||
2330 | |||
2217 | if (type & BTRFS_BLOCK_GROUP_DATA) { | 2331 | if (type & BTRFS_BLOCK_GROUP_DATA) { |
2218 | max_chunk_size = 10 * calc_size; | 2332 | max_chunk_size = 10 * calc_size; |
2219 | min_stripe_size = 64 * 1024 * 1024; | 2333 | min_stripe_size = 64 * 1024 * 1024; |
@@ -2230,51 +2344,209 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
2230 | max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1), | 2344 | max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1), |
2231 | max_chunk_size); | 2345 | max_chunk_size); |
2232 | 2346 | ||
2233 | again: | 2347 | if (calc_size * num_stripes > max_chunk_size * ncopies) { |
2234 | max_avail = 0; | 2348 | calc_size = max_chunk_size * ncopies; |
2235 | if (!map || map->num_stripes != num_stripes) { | ||
2236 | kfree(map); | ||
2237 | map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); | ||
2238 | if (!map) | ||
2239 | return -ENOMEM; | ||
2240 | map->num_stripes = num_stripes; | ||
2241 | } | ||
2242 | |||
2243 | if (calc_size * num_stripes > max_chunk_size) { | ||
2244 | calc_size = max_chunk_size; | ||
2245 | do_div(calc_size, num_stripes); | 2349 | do_div(calc_size, num_stripes); |
2246 | do_div(calc_size, stripe_len); | 2350 | do_div(calc_size, BTRFS_STRIPE_LEN); |
2247 | calc_size *= stripe_len; | 2351 | calc_size *= BTRFS_STRIPE_LEN; |
2248 | } | 2352 | } |
2249 | 2353 | ||
2250 | /* we don't want tiny stripes */ | 2354 | /* we don't want tiny stripes */ |
2251 | if (!looped) | 2355 | if (!small_stripe) |
2252 | calc_size = max_t(u64, min_stripe_size, calc_size); | 2356 | calc_size = max_t(u64, min_stripe_size, calc_size); |
2253 | 2357 | ||
2254 | /* | 2358 | /* |
2255 | * we're about to do_div by the stripe_len so lets make sure | 2359 | * we're about to do_div by the BTRFS_STRIPE_LEN so lets make sure |
2256 | * we end up with something bigger than a stripe | 2360 | * we end up with something bigger than a stripe |
2257 | */ | 2361 | */ |
2258 | calc_size = max_t(u64, calc_size, stripe_len * 4); | 2362 | calc_size = max_t(u64, calc_size, BTRFS_STRIPE_LEN); |
2363 | |||
2364 | do_div(calc_size, BTRFS_STRIPE_LEN); | ||
2365 | calc_size *= BTRFS_STRIPE_LEN; | ||
2366 | |||
2367 | return calc_size; | ||
2368 | } | ||
2369 | |||
2370 | static struct map_lookup *__shrink_map_lookup_stripes(struct map_lookup *map, | ||
2371 | int num_stripes) | ||
2372 | { | ||
2373 | struct map_lookup *new; | ||
2374 | size_t len = map_lookup_size(num_stripes); | ||
2375 | |||
2376 | BUG_ON(map->num_stripes < num_stripes); | ||
2377 | |||
2378 | if (map->num_stripes == num_stripes) | ||
2379 | return map; | ||
2380 | |||
2381 | new = kmalloc(len, GFP_NOFS); | ||
2382 | if (!new) { | ||
2383 | /* just change map->num_stripes */ | ||
2384 | map->num_stripes = num_stripes; | ||
2385 | return map; | ||
2386 | } | ||
2387 | |||
2388 | memcpy(new, map, len); | ||
2389 | new->num_stripes = num_stripes; | ||
2390 | kfree(map); | ||
2391 | return new; | ||
2392 | } | ||
2393 | |||
2394 | /* | ||
2395 | * helper to allocate device space from btrfs_device_info, in which we stored | ||
2396 | * max free space information of every device. It is used when we can not | ||
2397 | * allocate chunks by default size. | ||
2398 | * | ||
2399 | * By this helper, we can allocate a new chunk as larger as possible. | ||
2400 | */ | ||
2401 | static int __btrfs_alloc_tiny_space(struct btrfs_trans_handle *trans, | ||
2402 | struct btrfs_fs_devices *fs_devices, | ||
2403 | struct btrfs_device_info *devices, | ||
2404 | int nr_device, u64 type, | ||
2405 | struct map_lookup **map_lookup, | ||
2406 | int min_stripes, u64 *stripe_size) | ||
2407 | { | ||
2408 | int i, index, sort_again = 0; | ||
2409 | int min_devices = min_stripes; | ||
2410 | u64 max_avail, min_free; | ||
2411 | struct map_lookup *map = *map_lookup; | ||
2412 | int ret; | ||
2413 | |||
2414 | if (nr_device < min_stripes) | ||
2415 | return -ENOSPC; | ||
2416 | |||
2417 | btrfs_descending_sort_devices(devices, nr_device); | ||
2418 | |||
2419 | max_avail = devices[0].max_avail; | ||
2420 | if (!max_avail) | ||
2421 | return -ENOSPC; | ||
2422 | |||
2423 | for (i = 0; i < nr_device; i++) { | ||
2424 | /* | ||
2425 | * if dev_offset = 0, it means the free space of this device | ||
2426 | * is less than what we need, and we didn't search max avail | ||
2427 | * extent on this device, so do it now. | ||
2428 | */ | ||
2429 | if (!devices[i].dev_offset) { | ||
2430 | ret = find_free_dev_extent(trans, devices[i].dev, | ||
2431 | max_avail, | ||
2432 | &devices[i].dev_offset, | ||
2433 | &devices[i].max_avail); | ||
2434 | if (ret != 0 && ret != -ENOSPC) | ||
2435 | return ret; | ||
2436 | sort_again = 1; | ||
2437 | } | ||
2438 | } | ||
2439 | |||
2440 | /* we update the max avail free extent of each devices, sort again */ | ||
2441 | if (sort_again) | ||
2442 | btrfs_descending_sort_devices(devices, nr_device); | ||
2443 | |||
2444 | if (type & BTRFS_BLOCK_GROUP_DUP) | ||
2445 | min_devices = 1; | ||
2446 | |||
2447 | if (!devices[min_devices - 1].max_avail) | ||
2448 | return -ENOSPC; | ||
2449 | |||
2450 | max_avail = devices[min_devices - 1].max_avail; | ||
2451 | if (type & BTRFS_BLOCK_GROUP_DUP) | ||
2452 | do_div(max_avail, 2); | ||
2453 | |||
2454 | max_avail = __btrfs_calc_stripe_size(fs_devices, max_avail, type, | ||
2455 | min_stripes, 1); | ||
2456 | if (type & BTRFS_BLOCK_GROUP_DUP) | ||
2457 | min_free = max_avail * 2; | ||
2458 | else | ||
2459 | min_free = max_avail; | ||
2460 | |||
2461 | if (min_free > devices[min_devices - 1].max_avail) | ||
2462 | return -ENOSPC; | ||
2463 | |||
2464 | map = __shrink_map_lookup_stripes(map, min_stripes); | ||
2465 | *stripe_size = max_avail; | ||
2466 | |||
2467 | index = 0; | ||
2468 | for (i = 0; i < min_stripes; i++) { | ||
2469 | map->stripes[i].dev = devices[index].dev; | ||
2470 | map->stripes[i].physical = devices[index].dev_offset; | ||
2471 | if (type & BTRFS_BLOCK_GROUP_DUP) { | ||
2472 | i++; | ||
2473 | map->stripes[i].dev = devices[index].dev; | ||
2474 | map->stripes[i].physical = devices[index].dev_offset + | ||
2475 | max_avail; | ||
2476 | } | ||
2477 | index++; | ||
2478 | } | ||
2479 | *map_lookup = map; | ||
2480 | |||
2481 | return 0; | ||
2482 | } | ||
2259 | 2483 | ||
2260 | do_div(calc_size, stripe_len); | 2484 | static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, |
2261 | calc_size *= stripe_len; | 2485 | struct btrfs_root *extent_root, |
2486 | struct map_lookup **map_ret, | ||
2487 | u64 *num_bytes, u64 *stripe_size, | ||
2488 | u64 start, u64 type) | ||
2489 | { | ||
2490 | struct btrfs_fs_info *info = extent_root->fs_info; | ||
2491 | struct btrfs_device *device = NULL; | ||
2492 | struct btrfs_fs_devices *fs_devices = info->fs_devices; | ||
2493 | struct list_head *cur; | ||
2494 | struct map_lookup *map; | ||
2495 | struct extent_map_tree *em_tree; | ||
2496 | struct extent_map *em; | ||
2497 | struct btrfs_device_info *devices_info; | ||
2498 | struct list_head private_devs; | ||
2499 | u64 calc_size = 1024 * 1024 * 1024; | ||
2500 | u64 min_free; | ||
2501 | u64 avail; | ||
2502 | u64 dev_offset; | ||
2503 | int num_stripes; | ||
2504 | int min_stripes; | ||
2505 | int sub_stripes; | ||
2506 | int min_devices; /* the min number of devices we need */ | ||
2507 | int i; | ||
2508 | int ret; | ||
2509 | int index; | ||
2510 | |||
2511 | if ((type & BTRFS_BLOCK_GROUP_RAID1) && | ||
2512 | (type & BTRFS_BLOCK_GROUP_DUP)) { | ||
2513 | WARN_ON(1); | ||
2514 | type &= ~BTRFS_BLOCK_GROUP_DUP; | ||
2515 | } | ||
2516 | if (list_empty(&fs_devices->alloc_list)) | ||
2517 | return -ENOSPC; | ||
2518 | |||
2519 | ret = __btrfs_calc_nstripes(fs_devices, type, &num_stripes, | ||
2520 | &min_stripes, &sub_stripes); | ||
2521 | if (ret) | ||
2522 | return ret; | ||
2523 | |||
2524 | devices_info = kzalloc(sizeof(*devices_info) * fs_devices->rw_devices, | ||
2525 | GFP_NOFS); | ||
2526 | if (!devices_info) | ||
2527 | return -ENOMEM; | ||
2528 | |||
2529 | map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); | ||
2530 | if (!map) { | ||
2531 | ret = -ENOMEM; | ||
2532 | goto error; | ||
2533 | } | ||
2534 | map->num_stripes = num_stripes; | ||
2262 | 2535 | ||
2263 | cur = fs_devices->alloc_list.next; | 2536 | cur = fs_devices->alloc_list.next; |
2264 | index = 0; | 2537 | index = 0; |
2538 | i = 0; | ||
2265 | 2539 | ||
2266 | if (type & BTRFS_BLOCK_GROUP_DUP) | 2540 | calc_size = __btrfs_calc_stripe_size(fs_devices, calc_size, type, |
2541 | num_stripes, 0); | ||
2542 | |||
2543 | if (type & BTRFS_BLOCK_GROUP_DUP) { | ||
2267 | min_free = calc_size * 2; | 2544 | min_free = calc_size * 2; |
2268 | else | 2545 | min_devices = 1; |
2546 | } else { | ||
2269 | min_free = calc_size; | 2547 | min_free = calc_size; |
2270 | 2548 | min_devices = min_stripes; | |
2271 | /* | 2549 | } |
2272 | * we add 1MB because we never use the first 1MB of the device, unless | ||
2273 | * we've looped, then we are likely allocating the maximum amount of | ||
2274 | * space left already | ||
2275 | */ | ||
2276 | if (!looped) | ||
2277 | min_free += 1024 * 1024; | ||
2278 | 2550 | ||
2279 | INIT_LIST_HEAD(&private_devs); | 2551 | INIT_LIST_HEAD(&private_devs); |
2280 | while (index < num_stripes) { | 2552 | while (index < num_stripes) { |
@@ -2287,27 +2559,39 @@ again: | |||
2287 | cur = cur->next; | 2559 | cur = cur->next; |
2288 | 2560 | ||
2289 | if (device->in_fs_metadata && avail >= min_free) { | 2561 | if (device->in_fs_metadata && avail >= min_free) { |
2290 | ret = find_free_dev_extent(trans, device, | 2562 | ret = find_free_dev_extent(trans, device, min_free, |
2291 | min_free, &dev_offset, | 2563 | &devices_info[i].dev_offset, |
2292 | &max_avail); | 2564 | &devices_info[i].max_avail); |
2293 | if (ret == 0) { | 2565 | if (ret == 0) { |
2294 | list_move_tail(&device->dev_alloc_list, | 2566 | list_move_tail(&device->dev_alloc_list, |
2295 | &private_devs); | 2567 | &private_devs); |
2296 | map->stripes[index].dev = device; | 2568 | map->stripes[index].dev = device; |
2297 | map->stripes[index].physical = dev_offset; | 2569 | map->stripes[index].physical = |
2570 | devices_info[i].dev_offset; | ||
2298 | index++; | 2571 | index++; |
2299 | if (type & BTRFS_BLOCK_GROUP_DUP) { | 2572 | if (type & BTRFS_BLOCK_GROUP_DUP) { |
2300 | map->stripes[index].dev = device; | 2573 | map->stripes[index].dev = device; |
2301 | map->stripes[index].physical = | 2574 | map->stripes[index].physical = |
2302 | dev_offset + calc_size; | 2575 | devices_info[i].dev_offset + |
2576 | calc_size; | ||
2303 | index++; | 2577 | index++; |
2304 | } | 2578 | } |
2305 | } | 2579 | } else if (ret != -ENOSPC) |
2306 | } else if (device->in_fs_metadata && avail > max_avail) | 2580 | goto error; |
2307 | max_avail = avail; | 2581 | |
2582 | devices_info[i].dev = device; | ||
2583 | i++; | ||
2584 | } else if (device->in_fs_metadata && | ||
2585 | avail >= BTRFS_STRIPE_LEN) { | ||
2586 | devices_info[i].dev = device; | ||
2587 | devices_info[i].max_avail = avail; | ||
2588 | i++; | ||
2589 | } | ||
2590 | |||
2308 | if (cur == &fs_devices->alloc_list) | 2591 | if (cur == &fs_devices->alloc_list) |
2309 | break; | 2592 | break; |
2310 | } | 2593 | } |
2594 | |||
2311 | list_splice(&private_devs, &fs_devices->alloc_list); | 2595 | list_splice(&private_devs, &fs_devices->alloc_list); |
2312 | if (index < num_stripes) { | 2596 | if (index < num_stripes) { |
2313 | if (index >= min_stripes) { | 2597 | if (index >= min_stripes) { |
@@ -2316,34 +2600,36 @@ again: | |||
2316 | num_stripes /= sub_stripes; | 2600 | num_stripes /= sub_stripes; |
2317 | num_stripes *= sub_stripes; | 2601 | num_stripes *= sub_stripes; |
2318 | } | 2602 | } |
2319 | looped = 1; | 2603 | |
2320 | goto again; | 2604 | map = __shrink_map_lookup_stripes(map, num_stripes); |
2321 | } | 2605 | } else if (i >= min_devices) { |
2322 | if (!looped && max_avail > 0) { | 2606 | ret = __btrfs_alloc_tiny_space(trans, fs_devices, |
2323 | looped = 1; | 2607 | devices_info, i, type, |
2324 | calc_size = max_avail; | 2608 | &map, min_stripes, |
2325 | goto again; | 2609 | &calc_size); |
2610 | if (ret) | ||
2611 | goto error; | ||
2612 | } else { | ||
2613 | ret = -ENOSPC; | ||
2614 | goto error; | ||
2326 | } | 2615 | } |
2327 | kfree(map); | ||
2328 | return -ENOSPC; | ||
2329 | } | 2616 | } |
2330 | map->sector_size = extent_root->sectorsize; | 2617 | map->sector_size = extent_root->sectorsize; |
2331 | map->stripe_len = stripe_len; | 2618 | map->stripe_len = BTRFS_STRIPE_LEN; |
2332 | map->io_align = stripe_len; | 2619 | map->io_align = BTRFS_STRIPE_LEN; |
2333 | map->io_width = stripe_len; | 2620 | map->io_width = BTRFS_STRIPE_LEN; |
2334 | map->type = type; | 2621 | map->type = type; |
2335 | map->num_stripes = num_stripes; | ||
2336 | map->sub_stripes = sub_stripes; | 2622 | map->sub_stripes = sub_stripes; |
2337 | 2623 | ||
2338 | *map_ret = map; | 2624 | *map_ret = map; |
2339 | *stripe_size = calc_size; | 2625 | *stripe_size = calc_size; |
2340 | *num_bytes = chunk_bytes_by_type(type, calc_size, | 2626 | *num_bytes = chunk_bytes_by_type(type, calc_size, |
2341 | num_stripes, sub_stripes); | 2627 | map->num_stripes, sub_stripes); |
2342 | 2628 | ||
2343 | em = alloc_extent_map(GFP_NOFS); | 2629 | em = alloc_extent_map(GFP_NOFS); |
2344 | if (!em) { | 2630 | if (!em) { |
2345 | kfree(map); | 2631 | ret = -ENOMEM; |
2346 | return -ENOMEM; | 2632 | goto error; |
2347 | } | 2633 | } |
2348 | em->bdev = (struct block_device *)map; | 2634 | em->bdev = (struct block_device *)map; |
2349 | em->start = start; | 2635 | em->start = start; |
@@ -2376,7 +2662,13 @@ again: | |||
2376 | index++; | 2662 | index++; |
2377 | } | 2663 | } |
2378 | 2664 | ||
2665 | kfree(devices_info); | ||
2379 | return 0; | 2666 | return 0; |
2667 | |||
2668 | error: | ||
2669 | kfree(map); | ||
2670 | kfree(devices_info); | ||
2671 | return ret; | ||
2380 | } | 2672 | } |
2381 | 2673 | ||
2382 | static int __finish_chunk_alloc(struct btrfs_trans_handle *trans, | 2674 | static int __finish_chunk_alloc(struct btrfs_trans_handle *trans, |