aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/volumes.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/volumes.c')
-rw-r--r--fs/btrfs/volumes.c626
1 files changed, 459 insertions, 167 deletions
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 1718e1a5c320..d158530233b7 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -22,6 +22,7 @@
22#include <linux/blkdev.h> 22#include <linux/blkdev.h>
23#include <linux/random.h> 23#include <linux/random.h>
24#include <linux/iocontext.h> 24#include <linux/iocontext.h>
25#include <linux/capability.h>
25#include <asm/div64.h> 26#include <asm/div64.h>
26#include "compat.h" 27#include "compat.h"
27#include "ctree.h" 28#include "ctree.h"
@@ -600,8 +601,10 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
600 set_blocksize(bdev, 4096); 601 set_blocksize(bdev, 4096);
601 602
602 bh = btrfs_read_dev_super(bdev); 603 bh = btrfs_read_dev_super(bdev);
603 if (!bh) 604 if (!bh) {
605 ret = -EINVAL;
604 goto error_close; 606 goto error_close;
607 }
605 608
606 disk_super = (struct btrfs_super_block *)bh->b_data; 609 disk_super = (struct btrfs_super_block *)bh->b_data;
607 devid = btrfs_stack_device_id(&disk_super->dev_item); 610 devid = btrfs_stack_device_id(&disk_super->dev_item);
@@ -703,7 +706,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
703 goto error_close; 706 goto error_close;
704 bh = btrfs_read_dev_super(bdev); 707 bh = btrfs_read_dev_super(bdev);
705 if (!bh) { 708 if (!bh) {
706 ret = -EIO; 709 ret = -EINVAL;
707 goto error_close; 710 goto error_close;
708 } 711 }
709 disk_super = (struct btrfs_super_block *)bh->b_data; 712 disk_super = (struct btrfs_super_block *)bh->b_data;
@@ -729,59 +732,167 @@ error:
729 return ret; 732 return ret;
730} 733}
731 734
735/* helper to account the used device space in the range */
736int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
737 u64 end, u64 *length)
738{
739 struct btrfs_key key;
740 struct btrfs_root *root = device->dev_root;
741 struct btrfs_dev_extent *dev_extent;
742 struct btrfs_path *path;
743 u64 extent_end;
744 int ret;
745 int slot;
746 struct extent_buffer *l;
747
748 *length = 0;
749
750 if (start >= device->total_bytes)
751 return 0;
752
753 path = btrfs_alloc_path();
754 if (!path)
755 return -ENOMEM;
756 path->reada = 2;
757
758 key.objectid = device->devid;
759 key.offset = start;
760 key.type = BTRFS_DEV_EXTENT_KEY;
761
762 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
763 if (ret < 0)
764 goto out;
765 if (ret > 0) {
766 ret = btrfs_previous_item(root, path, key.objectid, key.type);
767 if (ret < 0)
768 goto out;
769 }
770
771 while (1) {
772 l = path->nodes[0];
773 slot = path->slots[0];
774 if (slot >= btrfs_header_nritems(l)) {
775 ret = btrfs_next_leaf(root, path);
776 if (ret == 0)
777 continue;
778 if (ret < 0)
779 goto out;
780
781 break;
782 }
783 btrfs_item_key_to_cpu(l, &key, slot);
784
785 if (key.objectid < device->devid)
786 goto next;
787
788 if (key.objectid > device->devid)
789 break;
790
791 if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY)
792 goto next;
793
794 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
795 extent_end = key.offset + btrfs_dev_extent_length(l,
796 dev_extent);
797 if (key.offset <= start && extent_end > end) {
798 *length = end - start + 1;
799 break;
800 } else if (key.offset <= start && extent_end > start)
801 *length += extent_end - start;
802 else if (key.offset > start && extent_end <= end)
803 *length += extent_end - key.offset;
804 else if (key.offset > start && key.offset <= end) {
805 *length += end - key.offset + 1;
806 break;
807 } else if (key.offset > end)
808 break;
809
810next:
811 path->slots[0]++;
812 }
813 ret = 0;
814out:
815 btrfs_free_path(path);
816 return ret;
817}
818
732/* 819/*
820 * find_free_dev_extent - find free space in the specified device
821 * @trans: transaction handler
822 * @device: the device which we search the free space in
823 * @num_bytes: the size of the free space that we need
824 * @start: store the start of the free space.
825 * @len: the size of the free space. that we find, or the size of the max
826 * free space if we don't find suitable free space
827 *
733 * this uses a pretty simple search, the expectation is that it is 828 * this uses a pretty simple search, the expectation is that it is
734 * called very infrequently and that a given device has a small number 829 * called very infrequently and that a given device has a small number
735 * of extents 830 * of extents
831 *
832 * @start is used to store the start of the free space if we find. But if we
833 * don't find suitable free space, it will be used to store the start position
834 * of the max free space.
835 *
836 * @len is used to store the size of the free space that we find.
837 * But if we don't find suitable free space, it is used to store the size of
838 * the max free space.
736 */ 839 */
737int find_free_dev_extent(struct btrfs_trans_handle *trans, 840int find_free_dev_extent(struct btrfs_trans_handle *trans,
738 struct btrfs_device *device, u64 num_bytes, 841 struct btrfs_device *device, u64 num_bytes,
739 u64 *start, u64 *max_avail) 842 u64 *start, u64 *len)
740{ 843{
741 struct btrfs_key key; 844 struct btrfs_key key;
742 struct btrfs_root *root = device->dev_root; 845 struct btrfs_root *root = device->dev_root;
743 struct btrfs_dev_extent *dev_extent = NULL; 846 struct btrfs_dev_extent *dev_extent;
744 struct btrfs_path *path; 847 struct btrfs_path *path;
745 u64 hole_size = 0; 848 u64 hole_size;
746 u64 last_byte = 0; 849 u64 max_hole_start;
747 u64 search_start = 0; 850 u64 max_hole_size;
851 u64 extent_end;
852 u64 search_start;
748 u64 search_end = device->total_bytes; 853 u64 search_end = device->total_bytes;
749 int ret; 854 int ret;
750 int slot = 0; 855 int slot;
751 int start_found;
752 struct extent_buffer *l; 856 struct extent_buffer *l;
753 857
754 path = btrfs_alloc_path();
755 if (!path)
756 return -ENOMEM;
757 path->reada = 2;
758 start_found = 0;
759
760 /* FIXME use last free of some kind */ 858 /* FIXME use last free of some kind */
761 859
762 /* we don't want to overwrite the superblock on the drive, 860 /* we don't want to overwrite the superblock on the drive,
763 * so we make sure to start at an offset of at least 1MB 861 * so we make sure to start at an offset of at least 1MB
764 */ 862 */
765 search_start = max((u64)1024 * 1024, search_start); 863 search_start = 1024 * 1024;
766 864
767 if (root->fs_info->alloc_start + num_bytes <= device->total_bytes) 865 if (root->fs_info->alloc_start + num_bytes <= search_end)
768 search_start = max(root->fs_info->alloc_start, search_start); 866 search_start = max(root->fs_info->alloc_start, search_start);
769 867
868 max_hole_start = search_start;
869 max_hole_size = 0;
870
871 if (search_start >= search_end) {
872 ret = -ENOSPC;
873 goto error;
874 }
875
876 path = btrfs_alloc_path();
877 if (!path) {
878 ret = -ENOMEM;
879 goto error;
880 }
881 path->reada = 2;
882
770 key.objectid = device->devid; 883 key.objectid = device->devid;
771 key.offset = search_start; 884 key.offset = search_start;
772 key.type = BTRFS_DEV_EXTENT_KEY; 885 key.type = BTRFS_DEV_EXTENT_KEY;
886
773 ret = btrfs_search_slot(trans, root, &key, path, 0, 0); 887 ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
774 if (ret < 0) 888 if (ret < 0)
775 goto error; 889 goto out;
776 if (ret > 0) { 890 if (ret > 0) {
777 ret = btrfs_previous_item(root, path, key.objectid, key.type); 891 ret = btrfs_previous_item(root, path, key.objectid, key.type);
778 if (ret < 0) 892 if (ret < 0)
779 goto error; 893 goto out;
780 if (ret > 0)
781 start_found = 1;
782 } 894 }
783 l = path->nodes[0]; 895
784 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
785 while (1) { 896 while (1) {
786 l = path->nodes[0]; 897 l = path->nodes[0];
787 slot = path->slots[0]; 898 slot = path->slots[0];
@@ -790,24 +901,9 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans,
790 if (ret == 0) 901 if (ret == 0)
791 continue; 902 continue;
792 if (ret < 0) 903 if (ret < 0)
793 goto error; 904 goto out;
794no_more_items: 905
795 if (!start_found) { 906 break;
796 if (search_start >= search_end) {
797 ret = -ENOSPC;
798 goto error;
799 }
800 *start = search_start;
801 start_found = 1;
802 goto check_pending;
803 }
804 *start = last_byte > search_start ?
805 last_byte : search_start;
806 if (search_end <= *start) {
807 ret = -ENOSPC;
808 goto error;
809 }
810 goto check_pending;
811 } 907 }
812 btrfs_item_key_to_cpu(l, &key, slot); 908 btrfs_item_key_to_cpu(l, &key, slot);
813 909
@@ -815,48 +911,62 @@ no_more_items:
815 goto next; 911 goto next;
816 912
817 if (key.objectid > device->devid) 913 if (key.objectid > device->devid)
818 goto no_more_items; 914 break;
819 915
820 if (key.offset >= search_start && key.offset > last_byte && 916 if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY)
821 start_found) { 917 goto next;
822 if (last_byte < search_start)
823 last_byte = search_start;
824 hole_size = key.offset - last_byte;
825 918
826 if (hole_size > *max_avail) 919 if (key.offset > search_start) {
827 *max_avail = hole_size; 920 hole_size = key.offset - search_start;
828 921
829 if (key.offset > last_byte && 922 if (hole_size > max_hole_size) {
830 hole_size >= num_bytes) { 923 max_hole_start = search_start;
831 *start = last_byte; 924 max_hole_size = hole_size;
832 goto check_pending; 925 }
926
927 /*
928 * If this free space is greater than which we need,
929 * it must be the max free space that we have found
930 * until now, so max_hole_start must point to the start
931 * of this free space and the length of this free space
932 * is stored in max_hole_size. Thus, we return
933 * max_hole_start and max_hole_size and go back to the
934 * caller.
935 */
936 if (hole_size >= num_bytes) {
937 ret = 0;
938 goto out;
833 } 939 }
834 } 940 }
835 if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY)
836 goto next;
837 941
838 start_found = 1;
839 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); 942 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
840 last_byte = key.offset + btrfs_dev_extent_length(l, dev_extent); 943 extent_end = key.offset + btrfs_dev_extent_length(l,
944 dev_extent);
945 if (extent_end > search_start)
946 search_start = extent_end;
841next: 947next:
842 path->slots[0]++; 948 path->slots[0]++;
843 cond_resched(); 949 cond_resched();
844 } 950 }
845check_pending:
846 /* we have to make sure we didn't find an extent that has already
847 * been allocated by the map tree or the original allocation
848 */
849 BUG_ON(*start < search_start);
850 951
851 if (*start + num_bytes > search_end) { 952 hole_size = search_end- search_start;
852 ret = -ENOSPC; 953 if (hole_size > max_hole_size) {
853 goto error; 954 max_hole_start = search_start;
955 max_hole_size = hole_size;
854 } 956 }
855 /* check for pending inserts here */
856 ret = 0;
857 957
858error: 958 /* See above. */
959 if (hole_size < num_bytes)
960 ret = -ENOSPC;
961 else
962 ret = 0;
963
964out:
859 btrfs_free_path(path); 965 btrfs_free_path(path);
966error:
967 *start = max_hole_start;
968 if (len)
969 *len = max_hole_size;
860 return ret; 970 return ret;
861} 971}
862 972
@@ -1196,7 +1306,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1196 set_blocksize(bdev, 4096); 1306 set_blocksize(bdev, 4096);
1197 bh = btrfs_read_dev_super(bdev); 1307 bh = btrfs_read_dev_super(bdev);
1198 if (!bh) { 1308 if (!bh) {
1199 ret = -EIO; 1309 ret = -EINVAL;
1200 goto error_close; 1310 goto error_close;
1201 } 1311 }
1202 disk_super = (struct btrfs_super_block *)bh->b_data; 1312 disk_super = (struct btrfs_super_block *)bh->b_data;
@@ -1916,6 +2026,9 @@ int btrfs_balance(struct btrfs_root *dev_root)
1916 if (dev_root->fs_info->sb->s_flags & MS_RDONLY) 2026 if (dev_root->fs_info->sb->s_flags & MS_RDONLY)
1917 return -EROFS; 2027 return -EROFS;
1918 2028
2029 if (!capable(CAP_SYS_ADMIN))
2030 return -EPERM;
2031
1919 mutex_lock(&dev_root->fs_info->volume_mutex); 2032 mutex_lock(&dev_root->fs_info->volume_mutex);
1920 dev_root = dev_root->fs_info->dev_root; 2033 dev_root = dev_root->fs_info->dev_root;
1921 2034
@@ -2154,66 +2267,67 @@ static noinline u64 chunk_bytes_by_type(u64 type, u64 calc_size,
2154 return calc_size * num_stripes; 2267 return calc_size * num_stripes;
2155} 2268}
2156 2269
2157static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, 2270/* Used to sort the devices by max_avail(descending sort) */
2158 struct btrfs_root *extent_root, 2271int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2)
2159 struct map_lookup **map_ret,
2160 u64 *num_bytes, u64 *stripe_size,
2161 u64 start, u64 type)
2162{ 2272{
2163 struct btrfs_fs_info *info = extent_root->fs_info; 2273 if (((struct btrfs_device_info *)dev_info1)->max_avail >
2164 struct btrfs_device *device = NULL; 2274 ((struct btrfs_device_info *)dev_info2)->max_avail)
2165 struct btrfs_fs_devices *fs_devices = info->fs_devices; 2275 return -1;
2166 struct list_head *cur; 2276 else if (((struct btrfs_device_info *)dev_info1)->max_avail <
2167 struct map_lookup *map = NULL; 2277 ((struct btrfs_device_info *)dev_info2)->max_avail)
2168 struct extent_map_tree *em_tree; 2278 return 1;
2169 struct extent_map *em; 2279 else
2170 struct list_head private_devs; 2280 return 0;
2171 int min_stripe_size = 1 * 1024 * 1024; 2281}
2172 u64 calc_size = 1024 * 1024 * 1024;
2173 u64 max_chunk_size = calc_size;
2174 u64 min_free;
2175 u64 avail;
2176 u64 max_avail = 0;
2177 u64 dev_offset;
2178 int num_stripes = 1;
2179 int min_stripes = 1;
2180 int sub_stripes = 0;
2181 int looped = 0;
2182 int ret;
2183 int index;
2184 int stripe_len = 64 * 1024;
2185 2282
2186 if ((type & BTRFS_BLOCK_GROUP_RAID1) && 2283static int __btrfs_calc_nstripes(struct btrfs_fs_devices *fs_devices, u64 type,
2187 (type & BTRFS_BLOCK_GROUP_DUP)) { 2284 int *num_stripes, int *min_stripes,
2188 WARN_ON(1); 2285 int *sub_stripes)
2189 type &= ~BTRFS_BLOCK_GROUP_DUP; 2286{
2190 } 2287 *num_stripes = 1;
2191 if (list_empty(&fs_devices->alloc_list)) 2288 *min_stripes = 1;
2192 return -ENOSPC; 2289 *sub_stripes = 0;
2193 2290
2194 if (type & (BTRFS_BLOCK_GROUP_RAID0)) { 2291 if (type & (BTRFS_BLOCK_GROUP_RAID0)) {
2195 num_stripes = fs_devices->rw_devices; 2292 *num_stripes = fs_devices->rw_devices;
2196 min_stripes = 2; 2293 *min_stripes = 2;
2197 } 2294 }
2198 if (type & (BTRFS_BLOCK_GROUP_DUP)) { 2295 if (type & (BTRFS_BLOCK_GROUP_DUP)) {
2199 num_stripes = 2; 2296 *num_stripes = 2;
2200 min_stripes = 2; 2297 *min_stripes = 2;
2201 } 2298 }
2202 if (type & (BTRFS_BLOCK_GROUP_RAID1)) { 2299 if (type & (BTRFS_BLOCK_GROUP_RAID1)) {
2203 if (fs_devices->rw_devices < 2) 2300 if (fs_devices->rw_devices < 2)
2204 return -ENOSPC; 2301 return -ENOSPC;
2205 num_stripes = 2; 2302 *num_stripes = 2;
2206 min_stripes = 2; 2303 *min_stripes = 2;
2207 } 2304 }
2208 if (type & (BTRFS_BLOCK_GROUP_RAID10)) { 2305 if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
2209 num_stripes = fs_devices->rw_devices; 2306 *num_stripes = fs_devices->rw_devices;
2210 if (num_stripes < 4) 2307 if (*num_stripes < 4)
2211 return -ENOSPC; 2308 return -ENOSPC;
2212 num_stripes &= ~(u32)1; 2309 *num_stripes &= ~(u32)1;
2213 sub_stripes = 2; 2310 *sub_stripes = 2;
2214 min_stripes = 4; 2311 *min_stripes = 4;
2215 } 2312 }
2216 2313
2314 return 0;
2315}
2316
2317static u64 __btrfs_calc_stripe_size(struct btrfs_fs_devices *fs_devices,
2318 u64 proposed_size, u64 type,
2319 int num_stripes, int small_stripe)
2320{
2321 int min_stripe_size = 1 * 1024 * 1024;
2322 u64 calc_size = proposed_size;
2323 u64 max_chunk_size = calc_size;
2324 int ncopies = 1;
2325
2326 if (type & (BTRFS_BLOCK_GROUP_RAID1 |
2327 BTRFS_BLOCK_GROUP_DUP |
2328 BTRFS_BLOCK_GROUP_RAID10))
2329 ncopies = 2;
2330
2217 if (type & BTRFS_BLOCK_GROUP_DATA) { 2331 if (type & BTRFS_BLOCK_GROUP_DATA) {
2218 max_chunk_size = 10 * calc_size; 2332 max_chunk_size = 10 * calc_size;
2219 min_stripe_size = 64 * 1024 * 1024; 2333 min_stripe_size = 64 * 1024 * 1024;
@@ -2230,51 +2344,209 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
2230 max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1), 2344 max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1),
2231 max_chunk_size); 2345 max_chunk_size);
2232 2346
2233again: 2347 if (calc_size * num_stripes > max_chunk_size * ncopies) {
2234 max_avail = 0; 2348 calc_size = max_chunk_size * ncopies;
2235 if (!map || map->num_stripes != num_stripes) {
2236 kfree(map);
2237 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
2238 if (!map)
2239 return -ENOMEM;
2240 map->num_stripes = num_stripes;
2241 }
2242
2243 if (calc_size * num_stripes > max_chunk_size) {
2244 calc_size = max_chunk_size;
2245 do_div(calc_size, num_stripes); 2349 do_div(calc_size, num_stripes);
2246 do_div(calc_size, stripe_len); 2350 do_div(calc_size, BTRFS_STRIPE_LEN);
2247 calc_size *= stripe_len; 2351 calc_size *= BTRFS_STRIPE_LEN;
2248 } 2352 }
2249 2353
2250 /* we don't want tiny stripes */ 2354 /* we don't want tiny stripes */
2251 if (!looped) 2355 if (!small_stripe)
2252 calc_size = max_t(u64, min_stripe_size, calc_size); 2356 calc_size = max_t(u64, min_stripe_size, calc_size);
2253 2357
2254 /* 2358 /*
2255 * we're about to do_div by the stripe_len so lets make sure 2359 * we're about to do_div by the BTRFS_STRIPE_LEN so lets make sure
2256 * we end up with something bigger than a stripe 2360 * we end up with something bigger than a stripe
2257 */ 2361 */
2258 calc_size = max_t(u64, calc_size, stripe_len * 4); 2362 calc_size = max_t(u64, calc_size, BTRFS_STRIPE_LEN);
2363
2364 do_div(calc_size, BTRFS_STRIPE_LEN);
2365 calc_size *= BTRFS_STRIPE_LEN;
2366
2367 return calc_size;
2368}
2369
2370static struct map_lookup *__shrink_map_lookup_stripes(struct map_lookup *map,
2371 int num_stripes)
2372{
2373 struct map_lookup *new;
2374 size_t len = map_lookup_size(num_stripes);
2375
2376 BUG_ON(map->num_stripes < num_stripes);
2377
2378 if (map->num_stripes == num_stripes)
2379 return map;
2380
2381 new = kmalloc(len, GFP_NOFS);
2382 if (!new) {
2383 /* just change map->num_stripes */
2384 map->num_stripes = num_stripes;
2385 return map;
2386 }
2387
2388 memcpy(new, map, len);
2389 new->num_stripes = num_stripes;
2390 kfree(map);
2391 return new;
2392}
2393
2394/*
2395 * helper to allocate device space from btrfs_device_info, in which we stored
2396 * max free space information of every device. It is used when we can not
2397 * allocate chunks by default size.
2398 *
2399 * By this helper, we can allocate a new chunk as larger as possible.
2400 */
2401static int __btrfs_alloc_tiny_space(struct btrfs_trans_handle *trans,
2402 struct btrfs_fs_devices *fs_devices,
2403 struct btrfs_device_info *devices,
2404 int nr_device, u64 type,
2405 struct map_lookup **map_lookup,
2406 int min_stripes, u64 *stripe_size)
2407{
2408 int i, index, sort_again = 0;
2409 int min_devices = min_stripes;
2410 u64 max_avail, min_free;
2411 struct map_lookup *map = *map_lookup;
2412 int ret;
2413
2414 if (nr_device < min_stripes)
2415 return -ENOSPC;
2416
2417 btrfs_descending_sort_devices(devices, nr_device);
2418
2419 max_avail = devices[0].max_avail;
2420 if (!max_avail)
2421 return -ENOSPC;
2422
2423 for (i = 0; i < nr_device; i++) {
2424 /*
2425 * if dev_offset = 0, it means the free space of this device
2426 * is less than what we need, and we didn't search max avail
2427 * extent on this device, so do it now.
2428 */
2429 if (!devices[i].dev_offset) {
2430 ret = find_free_dev_extent(trans, devices[i].dev,
2431 max_avail,
2432 &devices[i].dev_offset,
2433 &devices[i].max_avail);
2434 if (ret != 0 && ret != -ENOSPC)
2435 return ret;
2436 sort_again = 1;
2437 }
2438 }
2439
2440 /* we update the max avail free extent of each devices, sort again */
2441 if (sort_again)
2442 btrfs_descending_sort_devices(devices, nr_device);
2443
2444 if (type & BTRFS_BLOCK_GROUP_DUP)
2445 min_devices = 1;
2446
2447 if (!devices[min_devices - 1].max_avail)
2448 return -ENOSPC;
2449
2450 max_avail = devices[min_devices - 1].max_avail;
2451 if (type & BTRFS_BLOCK_GROUP_DUP)
2452 do_div(max_avail, 2);
2453
2454 max_avail = __btrfs_calc_stripe_size(fs_devices, max_avail, type,
2455 min_stripes, 1);
2456 if (type & BTRFS_BLOCK_GROUP_DUP)
2457 min_free = max_avail * 2;
2458 else
2459 min_free = max_avail;
2460
2461 if (min_free > devices[min_devices - 1].max_avail)
2462 return -ENOSPC;
2463
2464 map = __shrink_map_lookup_stripes(map, min_stripes);
2465 *stripe_size = max_avail;
2466
2467 index = 0;
2468 for (i = 0; i < min_stripes; i++) {
2469 map->stripes[i].dev = devices[index].dev;
2470 map->stripes[i].physical = devices[index].dev_offset;
2471 if (type & BTRFS_BLOCK_GROUP_DUP) {
2472 i++;
2473 map->stripes[i].dev = devices[index].dev;
2474 map->stripes[i].physical = devices[index].dev_offset +
2475 max_avail;
2476 }
2477 index++;
2478 }
2479 *map_lookup = map;
2480
2481 return 0;
2482}
2259 2483
2260 do_div(calc_size, stripe_len); 2484static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
2261 calc_size *= stripe_len; 2485 struct btrfs_root *extent_root,
2486 struct map_lookup **map_ret,
2487 u64 *num_bytes, u64 *stripe_size,
2488 u64 start, u64 type)
2489{
2490 struct btrfs_fs_info *info = extent_root->fs_info;
2491 struct btrfs_device *device = NULL;
2492 struct btrfs_fs_devices *fs_devices = info->fs_devices;
2493 struct list_head *cur;
2494 struct map_lookup *map;
2495 struct extent_map_tree *em_tree;
2496 struct extent_map *em;
2497 struct btrfs_device_info *devices_info;
2498 struct list_head private_devs;
2499 u64 calc_size = 1024 * 1024 * 1024;
2500 u64 min_free;
2501 u64 avail;
2502 u64 dev_offset;
2503 int num_stripes;
2504 int min_stripes;
2505 int sub_stripes;
2506 int min_devices; /* the min number of devices we need */
2507 int i;
2508 int ret;
2509 int index;
2510
2511 if ((type & BTRFS_BLOCK_GROUP_RAID1) &&
2512 (type & BTRFS_BLOCK_GROUP_DUP)) {
2513 WARN_ON(1);
2514 type &= ~BTRFS_BLOCK_GROUP_DUP;
2515 }
2516 if (list_empty(&fs_devices->alloc_list))
2517 return -ENOSPC;
2518
2519 ret = __btrfs_calc_nstripes(fs_devices, type, &num_stripes,
2520 &min_stripes, &sub_stripes);
2521 if (ret)
2522 return ret;
2523
2524 devices_info = kzalloc(sizeof(*devices_info) * fs_devices->rw_devices,
2525 GFP_NOFS);
2526 if (!devices_info)
2527 return -ENOMEM;
2528
2529 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
2530 if (!map) {
2531 ret = -ENOMEM;
2532 goto error;
2533 }
2534 map->num_stripes = num_stripes;
2262 2535
2263 cur = fs_devices->alloc_list.next; 2536 cur = fs_devices->alloc_list.next;
2264 index = 0; 2537 index = 0;
2538 i = 0;
2265 2539
2266 if (type & BTRFS_BLOCK_GROUP_DUP) 2540 calc_size = __btrfs_calc_stripe_size(fs_devices, calc_size, type,
2541 num_stripes, 0);
2542
2543 if (type & BTRFS_BLOCK_GROUP_DUP) {
2267 min_free = calc_size * 2; 2544 min_free = calc_size * 2;
2268 else 2545 min_devices = 1;
2546 } else {
2269 min_free = calc_size; 2547 min_free = calc_size;
2270 2548 min_devices = min_stripes;
2271 /* 2549 }
2272 * we add 1MB because we never use the first 1MB of the device, unless
2273 * we've looped, then we are likely allocating the maximum amount of
2274 * space left already
2275 */
2276 if (!looped)
2277 min_free += 1024 * 1024;
2278 2550
2279 INIT_LIST_HEAD(&private_devs); 2551 INIT_LIST_HEAD(&private_devs);
2280 while (index < num_stripes) { 2552 while (index < num_stripes) {
@@ -2287,27 +2559,39 @@ again:
2287 cur = cur->next; 2559 cur = cur->next;
2288 2560
2289 if (device->in_fs_metadata && avail >= min_free) { 2561 if (device->in_fs_metadata && avail >= min_free) {
2290 ret = find_free_dev_extent(trans, device, 2562 ret = find_free_dev_extent(trans, device, min_free,
2291 min_free, &dev_offset, 2563 &devices_info[i].dev_offset,
2292 &max_avail); 2564 &devices_info[i].max_avail);
2293 if (ret == 0) { 2565 if (ret == 0) {
2294 list_move_tail(&device->dev_alloc_list, 2566 list_move_tail(&device->dev_alloc_list,
2295 &private_devs); 2567 &private_devs);
2296 map->stripes[index].dev = device; 2568 map->stripes[index].dev = device;
2297 map->stripes[index].physical = dev_offset; 2569 map->stripes[index].physical =
2570 devices_info[i].dev_offset;
2298 index++; 2571 index++;
2299 if (type & BTRFS_BLOCK_GROUP_DUP) { 2572 if (type & BTRFS_BLOCK_GROUP_DUP) {
2300 map->stripes[index].dev = device; 2573 map->stripes[index].dev = device;
2301 map->stripes[index].physical = 2574 map->stripes[index].physical =
2302 dev_offset + calc_size; 2575 devices_info[i].dev_offset +
2576 calc_size;
2303 index++; 2577 index++;
2304 } 2578 }
2305 } 2579 } else if (ret != -ENOSPC)
2306 } else if (device->in_fs_metadata && avail > max_avail) 2580 goto error;
2307 max_avail = avail; 2581
2582 devices_info[i].dev = device;
2583 i++;
2584 } else if (device->in_fs_metadata &&
2585 avail >= BTRFS_STRIPE_LEN) {
2586 devices_info[i].dev = device;
2587 devices_info[i].max_avail = avail;
2588 i++;
2589 }
2590
2308 if (cur == &fs_devices->alloc_list) 2591 if (cur == &fs_devices->alloc_list)
2309 break; 2592 break;
2310 } 2593 }
2594
2311 list_splice(&private_devs, &fs_devices->alloc_list); 2595 list_splice(&private_devs, &fs_devices->alloc_list);
2312 if (index < num_stripes) { 2596 if (index < num_stripes) {
2313 if (index >= min_stripes) { 2597 if (index >= min_stripes) {
@@ -2316,34 +2600,36 @@ again:
2316 num_stripes /= sub_stripes; 2600 num_stripes /= sub_stripes;
2317 num_stripes *= sub_stripes; 2601 num_stripes *= sub_stripes;
2318 } 2602 }
2319 looped = 1; 2603
2320 goto again; 2604 map = __shrink_map_lookup_stripes(map, num_stripes);
2321 } 2605 } else if (i >= min_devices) {
2322 if (!looped && max_avail > 0) { 2606 ret = __btrfs_alloc_tiny_space(trans, fs_devices,
2323 looped = 1; 2607 devices_info, i, type,
2324 calc_size = max_avail; 2608 &map, min_stripes,
2325 goto again; 2609 &calc_size);
2610 if (ret)
2611 goto error;
2612 } else {
2613 ret = -ENOSPC;
2614 goto error;
2326 } 2615 }
2327 kfree(map);
2328 return -ENOSPC;
2329 } 2616 }
2330 map->sector_size = extent_root->sectorsize; 2617 map->sector_size = extent_root->sectorsize;
2331 map->stripe_len = stripe_len; 2618 map->stripe_len = BTRFS_STRIPE_LEN;
2332 map->io_align = stripe_len; 2619 map->io_align = BTRFS_STRIPE_LEN;
2333 map->io_width = stripe_len; 2620 map->io_width = BTRFS_STRIPE_LEN;
2334 map->type = type; 2621 map->type = type;
2335 map->num_stripes = num_stripes;
2336 map->sub_stripes = sub_stripes; 2622 map->sub_stripes = sub_stripes;
2337 2623
2338 *map_ret = map; 2624 *map_ret = map;
2339 *stripe_size = calc_size; 2625 *stripe_size = calc_size;
2340 *num_bytes = chunk_bytes_by_type(type, calc_size, 2626 *num_bytes = chunk_bytes_by_type(type, calc_size,
2341 num_stripes, sub_stripes); 2627 map->num_stripes, sub_stripes);
2342 2628
2343 em = alloc_extent_map(GFP_NOFS); 2629 em = alloc_extent_map(GFP_NOFS);
2344 if (!em) { 2630 if (!em) {
2345 kfree(map); 2631 ret = -ENOMEM;
2346 return -ENOMEM; 2632 goto error;
2347 } 2633 }
2348 em->bdev = (struct block_device *)map; 2634 em->bdev = (struct block_device *)map;
2349 em->start = start; 2635 em->start = start;
@@ -2376,7 +2662,13 @@ again:
2376 index++; 2662 index++;
2377 } 2663 }
2378 2664
2665 kfree(devices_info);
2379 return 0; 2666 return 0;
2667
2668error:
2669 kfree(map);
2670 kfree(devices_info);
2671 return ret;
2380} 2672}
2381 2673
2382static int __finish_chunk_alloc(struct btrfs_trans_handle *trans, 2674static int __finish_chunk_alloc(struct btrfs_trans_handle *trans,