diff options
| author | Chris Mason <chris.mason@oracle.com> | 2011-05-22 12:36:34 -0400 |
|---|---|---|
| committer | Chris Mason <chris.mason@oracle.com> | 2011-05-22 12:36:34 -0400 |
| commit | aa2dfb372a2a647beedac163ce6f8b0fcbefac29 (patch) | |
| tree | ff64f4d4921df2f0fbe5b356dc9b2384c7957dc1 /fs | |
| parent | 945d8962ceee6bb273365d0bdf42f763225b290f (diff) | |
| parent | 73c5de0051533cbdf2bb656586c3eb21a475aa7d (diff) | |
Merge branch 'allocator' of git://git.kernel.org/pub/scm/linux/kernel/git/arne/btrfs-unstable-arne into inode_numbers
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs')
| -rw-r--r-- | fs/btrfs/super.c | 26 | ||||
| -rw-r--r-- | fs/btrfs/volumes.c | 493 | ||||
| -rw-r--r-- | fs/btrfs/volumes.h | 16 |
3 files changed, 201 insertions, 334 deletions
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index fb72e2bea882..006655c1d1f7 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
| @@ -914,6 +914,32 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) | |||
| 914 | return 0; | 914 | return 0; |
| 915 | } | 915 | } |
| 916 | 916 | ||
| 917 | /* Used to sort the devices by max_avail(descending sort) */ | ||
| 918 | static int btrfs_cmp_device_free_bytes(const void *dev_info1, | ||
| 919 | const void *dev_info2) | ||
| 920 | { | ||
| 921 | if (((struct btrfs_device_info *)dev_info1)->max_avail > | ||
| 922 | ((struct btrfs_device_info *)dev_info2)->max_avail) | ||
| 923 | return -1; | ||
| 924 | else if (((struct btrfs_device_info *)dev_info1)->max_avail < | ||
| 925 | ((struct btrfs_device_info *)dev_info2)->max_avail) | ||
| 926 | return 1; | ||
| 927 | else | ||
| 928 | return 0; | ||
| 929 | } | ||
| 930 | |||
| 931 | /* | ||
| 932 | * sort the devices by max_avail, in which max free extent size of each device | ||
| 933 | * is stored.(Descending Sort) | ||
| 934 | */ | ||
| 935 | static inline void btrfs_descending_sort_devices( | ||
| 936 | struct btrfs_device_info *devices, | ||
| 937 | size_t nr_devices) | ||
| 938 | { | ||
| 939 | sort(devices, nr_devices, sizeof(struct btrfs_device_info), | ||
| 940 | btrfs_cmp_device_free_bytes, NULL); | ||
| 941 | } | ||
| 942 | |||
| 917 | /* | 943 | /* |
| 918 | * The helper to calc the free space on the devices that can be used to store | 944 | * The helper to calc the free space on the devices that can be used to store |
| 919 | * file data. | 945 | * file data. |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index cd0b31a9ba3d..f777145203df 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
| @@ -805,10 +805,7 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans, | |||
| 805 | /* we don't want to overwrite the superblock on the drive, | 805 | /* we don't want to overwrite the superblock on the drive, |
| 806 | * so we make sure to start at an offset of at least 1MB | 806 | * so we make sure to start at an offset of at least 1MB |
| 807 | */ | 807 | */ |
| 808 | search_start = 1024 * 1024; | 808 | search_start = max(root->fs_info->alloc_start, 1024ull * 1024); |
| 809 | |||
| 810 | if (root->fs_info->alloc_start + num_bytes <= search_end) | ||
| 811 | search_start = max(root->fs_info->alloc_start, search_start); | ||
| 812 | 809 | ||
| 813 | max_hole_start = search_start; | 810 | max_hole_start = search_start; |
| 814 | max_hole_size = 0; | 811 | max_hole_size = 0; |
| @@ -2227,275 +2224,204 @@ static int btrfs_add_system_chunk(struct btrfs_trans_handle *trans, | |||
| 2227 | return 0; | 2224 | return 0; |
| 2228 | } | 2225 | } |
| 2229 | 2226 | ||
| 2230 | static noinline u64 chunk_bytes_by_type(u64 type, u64 calc_size, | 2227 | /* |
| 2231 | int num_stripes, int sub_stripes) | 2228 | * sort the devices in descending order by max_avail, total_avail |
| 2229 | */ | ||
| 2230 | static int btrfs_cmp_device_info(const void *a, const void *b) | ||
| 2232 | { | 2231 | { |
| 2233 | if (type & (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP)) | 2232 | const struct btrfs_device_info *di_a = a; |
| 2234 | return calc_size; | 2233 | const struct btrfs_device_info *di_b = b; |
| 2235 | else if (type & BTRFS_BLOCK_GROUP_RAID10) | ||
| 2236 | return calc_size * (num_stripes / sub_stripes); | ||
| 2237 | else | ||
| 2238 | return calc_size * num_stripes; | ||
| 2239 | } | ||
| 2240 | 2234 | ||
| 2241 | /* Used to sort the devices by max_avail(descending sort) */ | 2235 | if (di_a->max_avail > di_b->max_avail) |
| 2242 | int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2) | ||
| 2243 | { | ||
| 2244 | if (((struct btrfs_device_info *)dev_info1)->max_avail > | ||
| 2245 | ((struct btrfs_device_info *)dev_info2)->max_avail) | ||
| 2246 | return -1; | 2236 | return -1; |
| 2247 | else if (((struct btrfs_device_info *)dev_info1)->max_avail < | 2237 | if (di_a->max_avail < di_b->max_avail) |
| 2248 | ((struct btrfs_device_info *)dev_info2)->max_avail) | ||
| 2249 | return 1; | 2238 | return 1; |
| 2250 | else | 2239 | if (di_a->total_avail > di_b->total_avail) |
| 2251 | return 0; | 2240 | return -1; |
| 2241 | if (di_a->total_avail < di_b->total_avail) | ||
| 2242 | return 1; | ||
| 2243 | return 0; | ||
| 2252 | } | 2244 | } |
| 2253 | 2245 | ||
| 2254 | static int __btrfs_calc_nstripes(struct btrfs_fs_devices *fs_devices, u64 type, | 2246 | static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, |
| 2255 | int *num_stripes, int *min_stripes, | 2247 | struct btrfs_root *extent_root, |
| 2256 | int *sub_stripes) | 2248 | struct map_lookup **map_ret, |
| 2249 | u64 *num_bytes_out, u64 *stripe_size_out, | ||
| 2250 | u64 start, u64 type) | ||
| 2257 | { | 2251 | { |
| 2258 | *num_stripes = 1; | 2252 | struct btrfs_fs_info *info = extent_root->fs_info; |
| 2259 | *min_stripes = 1; | 2253 | struct btrfs_fs_devices *fs_devices = info->fs_devices; |
| 2260 | *sub_stripes = 0; | 2254 | struct list_head *cur; |
| 2255 | struct map_lookup *map = NULL; | ||
| 2256 | struct extent_map_tree *em_tree; | ||
| 2257 | struct extent_map *em; | ||
| 2258 | struct btrfs_device_info *devices_info = NULL; | ||
| 2259 | u64 total_avail; | ||
| 2260 | int num_stripes; /* total number of stripes to allocate */ | ||
| 2261 | int sub_stripes; /* sub_stripes info for map */ | ||
| 2262 | int dev_stripes; /* stripes per dev */ | ||
| 2263 | int devs_max; /* max devs to use */ | ||
| 2264 | int devs_min; /* min devs needed */ | ||
| 2265 | int devs_increment; /* ndevs has to be a multiple of this */ | ||
| 2266 | int ncopies; /* how many copies to data has */ | ||
| 2267 | int ret; | ||
| 2268 | u64 max_stripe_size; | ||
| 2269 | u64 max_chunk_size; | ||
| 2270 | u64 stripe_size; | ||
| 2271 | u64 num_bytes; | ||
| 2272 | int ndevs; | ||
| 2273 | int i; | ||
| 2274 | int j; | ||
| 2261 | 2275 | ||
| 2262 | if (type & (BTRFS_BLOCK_GROUP_RAID0)) { | 2276 | if ((type & BTRFS_BLOCK_GROUP_RAID1) && |
| 2263 | *num_stripes = fs_devices->rw_devices; | 2277 | (type & BTRFS_BLOCK_GROUP_DUP)) { |
| 2264 | *min_stripes = 2; | 2278 | WARN_ON(1); |
| 2265 | } | 2279 | type &= ~BTRFS_BLOCK_GROUP_DUP; |
| 2266 | if (type & (BTRFS_BLOCK_GROUP_DUP)) { | ||
| 2267 | *num_stripes = 2; | ||
| 2268 | *min_stripes = 2; | ||
| 2269 | } | ||
| 2270 | if (type & (BTRFS_BLOCK_GROUP_RAID1)) { | ||
| 2271 | if (fs_devices->rw_devices < 2) | ||
| 2272 | return -ENOSPC; | ||
| 2273 | *num_stripes = 2; | ||
| 2274 | *min_stripes = 2; | ||
| 2275 | } | ||
| 2276 | if (type & (BTRFS_BLOCK_GROUP_RAID10)) { | ||
| 2277 | *num_stripes = fs_devices->rw_devices; | ||
| 2278 | if (*num_stripes < 4) | ||
| 2279 | return -ENOSPC; | ||
| 2280 | *num_stripes &= ~(u32)1; | ||
| 2281 | *sub_stripes = 2; | ||
| 2282 | *min_stripes = 4; | ||
| 2283 | } | 2280 | } |
| 2284 | 2281 | ||
| 2285 | return 0; | 2282 | if (list_empty(&fs_devices->alloc_list)) |
| 2286 | } | 2283 | return -ENOSPC; |
| 2287 | 2284 | ||
| 2288 | static u64 __btrfs_calc_stripe_size(struct btrfs_fs_devices *fs_devices, | 2285 | sub_stripes = 1; |
| 2289 | u64 proposed_size, u64 type, | 2286 | dev_stripes = 1; |
| 2290 | int num_stripes, int small_stripe) | 2287 | devs_increment = 1; |
| 2291 | { | 2288 | ncopies = 1; |
| 2292 | int min_stripe_size = 1 * 1024 * 1024; | 2289 | devs_max = 0; /* 0 == as many as possible */ |
| 2293 | u64 calc_size = proposed_size; | 2290 | devs_min = 1; |
| 2294 | u64 max_chunk_size = calc_size; | ||
| 2295 | int ncopies = 1; | ||
| 2296 | 2291 | ||
| 2297 | if (type & (BTRFS_BLOCK_GROUP_RAID1 | | 2292 | /* |
| 2298 | BTRFS_BLOCK_GROUP_DUP | | 2293 | * define the properties of each RAID type. |
| 2299 | BTRFS_BLOCK_GROUP_RAID10)) | 2294 | * FIXME: move this to a global table and use it in all RAID |
| 2295 | * calculation code | ||
| 2296 | */ | ||
| 2297 | if (type & (BTRFS_BLOCK_GROUP_DUP)) { | ||
| 2298 | dev_stripes = 2; | ||
| 2299 | ncopies = 2; | ||
| 2300 | devs_max = 1; | ||
| 2301 | } else if (type & (BTRFS_BLOCK_GROUP_RAID0)) { | ||
| 2302 | devs_min = 2; | ||
| 2303 | } else if (type & (BTRFS_BLOCK_GROUP_RAID1)) { | ||
| 2304 | devs_increment = 2; | ||
| 2300 | ncopies = 2; | 2305 | ncopies = 2; |
| 2306 | devs_max = 2; | ||
| 2307 | devs_min = 2; | ||
| 2308 | } else if (type & (BTRFS_BLOCK_GROUP_RAID10)) { | ||
| 2309 | sub_stripes = 2; | ||
| 2310 | devs_increment = 2; | ||
| 2311 | ncopies = 2; | ||
| 2312 | devs_min = 4; | ||
| 2313 | } else { | ||
| 2314 | devs_max = 1; | ||
| 2315 | } | ||
| 2301 | 2316 | ||
| 2302 | if (type & BTRFS_BLOCK_GROUP_DATA) { | 2317 | if (type & BTRFS_BLOCK_GROUP_DATA) { |
| 2303 | max_chunk_size = 10 * calc_size; | 2318 | max_stripe_size = 1024 * 1024 * 1024; |
| 2304 | min_stripe_size = 64 * 1024 * 1024; | 2319 | max_chunk_size = 10 * max_stripe_size; |
| 2305 | } else if (type & BTRFS_BLOCK_GROUP_METADATA) { | 2320 | } else if (type & BTRFS_BLOCK_GROUP_METADATA) { |
| 2306 | max_chunk_size = 256 * 1024 * 1024; | 2321 | max_stripe_size = 256 * 1024 * 1024; |
| 2307 | min_stripe_size = 32 * 1024 * 1024; | 2322 | max_chunk_size = max_stripe_size; |
| 2308 | } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) { | 2323 | } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) { |
| 2309 | calc_size = 8 * 1024 * 1024; | 2324 | max_stripe_size = 8 * 1024 * 1024; |
| 2310 | max_chunk_size = calc_size * 2; | 2325 | max_chunk_size = 2 * max_stripe_size; |
| 2311 | min_stripe_size = 1 * 1024 * 1024; | 2326 | } else { |
| 2327 | printk(KERN_ERR "btrfs: invalid chunk type 0x%llx requested\n", | ||
| 2328 | type); | ||
| 2329 | BUG_ON(1); | ||
| 2312 | } | 2330 | } |
| 2313 | 2331 | ||
| 2314 | /* we don't want a chunk larger than 10% of writeable space */ | 2332 | /* we don't want a chunk larger than 10% of writeable space */ |
| 2315 | max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1), | 2333 | max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1), |
| 2316 | max_chunk_size); | 2334 | max_chunk_size); |
| 2317 | 2335 | ||
| 2318 | if (calc_size * num_stripes > max_chunk_size * ncopies) { | 2336 | devices_info = kzalloc(sizeof(*devices_info) * fs_devices->rw_devices, |
| 2319 | calc_size = max_chunk_size * ncopies; | 2337 | GFP_NOFS); |
| 2320 | do_div(calc_size, num_stripes); | 2338 | if (!devices_info) |
| 2321 | do_div(calc_size, BTRFS_STRIPE_LEN); | 2339 | return -ENOMEM; |
| 2322 | calc_size *= BTRFS_STRIPE_LEN; | ||
| 2323 | } | ||
| 2324 | 2340 | ||
| 2325 | /* we don't want tiny stripes */ | 2341 | cur = fs_devices->alloc_list.next; |
| 2326 | if (!small_stripe) | ||
| 2327 | calc_size = max_t(u64, min_stripe_size, calc_size); | ||
| 2328 | 2342 | ||
| 2329 | /* | 2343 | /* |
| 2330 | * we're about to do_div by the BTRFS_STRIPE_LEN so lets make sure | 2344 | * in the first pass through the devices list, we gather information |
| 2331 | * we end up with something bigger than a stripe | 2345 | * about the available holes on each device. |
| 2332 | */ | 2346 | */ |
| 2333 | calc_size = max_t(u64, calc_size, BTRFS_STRIPE_LEN); | 2347 | ndevs = 0; |
| 2334 | 2348 | while (cur != &fs_devices->alloc_list) { | |
| 2335 | do_div(calc_size, BTRFS_STRIPE_LEN); | 2349 | struct btrfs_device *device; |
| 2336 | calc_size *= BTRFS_STRIPE_LEN; | 2350 | u64 max_avail; |
| 2337 | 2351 | u64 dev_offset; | |
| 2338 | return calc_size; | ||
| 2339 | } | ||
| 2340 | |||
| 2341 | static struct map_lookup *__shrink_map_lookup_stripes(struct map_lookup *map, | ||
| 2342 | int num_stripes) | ||
| 2343 | { | ||
| 2344 | struct map_lookup *new; | ||
| 2345 | size_t len = map_lookup_size(num_stripes); | ||
| 2346 | |||
| 2347 | BUG_ON(map->num_stripes < num_stripes); | ||
| 2348 | |||
| 2349 | if (map->num_stripes == num_stripes) | ||
| 2350 | return map; | ||
| 2351 | |||
| 2352 | new = kmalloc(len, GFP_NOFS); | ||
| 2353 | if (!new) { | ||
| 2354 | /* just change map->num_stripes */ | ||
| 2355 | map->num_stripes = num_stripes; | ||
| 2356 | return map; | ||
| 2357 | } | ||
| 2358 | |||
| 2359 | memcpy(new, map, len); | ||
| 2360 | new->num_stripes = num_stripes; | ||
| 2361 | kfree(map); | ||
| 2362 | return new; | ||
| 2363 | } | ||
| 2364 | 2352 | ||
| 2365 | /* | 2353 | device = list_entry(cur, struct btrfs_device, dev_alloc_list); |
| 2366 | * helper to allocate device space from btrfs_device_info, in which we stored | ||
| 2367 | * max free space information of every device. It is used when we can not | ||
| 2368 | * allocate chunks by default size. | ||
| 2369 | * | ||
| 2370 | * By this helper, we can allocate a new chunk as larger as possible. | ||
| 2371 | */ | ||
| 2372 | static int __btrfs_alloc_tiny_space(struct btrfs_trans_handle *trans, | ||
| 2373 | struct btrfs_fs_devices *fs_devices, | ||
| 2374 | struct btrfs_device_info *devices, | ||
| 2375 | int nr_device, u64 type, | ||
| 2376 | struct map_lookup **map_lookup, | ||
| 2377 | int min_stripes, u64 *stripe_size) | ||
| 2378 | { | ||
| 2379 | int i, index, sort_again = 0; | ||
| 2380 | int min_devices = min_stripes; | ||
| 2381 | u64 max_avail, min_free; | ||
| 2382 | struct map_lookup *map = *map_lookup; | ||
| 2383 | int ret; | ||
| 2384 | 2354 | ||
| 2385 | if (nr_device < min_stripes) | 2355 | cur = cur->next; |
| 2386 | return -ENOSPC; | ||
| 2387 | 2356 | ||
| 2388 | btrfs_descending_sort_devices(devices, nr_device); | 2357 | if (!device->writeable) { |
| 2358 | printk(KERN_ERR | ||
| 2359 | "btrfs: read-only device in alloc_list\n"); | ||
| 2360 | WARN_ON(1); | ||
| 2361 | continue; | ||
| 2362 | } | ||
| 2389 | 2363 | ||
| 2390 | max_avail = devices[0].max_avail; | 2364 | if (!device->in_fs_metadata) |
| 2391 | if (!max_avail) | 2365 | continue; |
| 2392 | return -ENOSPC; | ||
| 2393 | 2366 | ||
| 2394 | for (i = 0; i < nr_device; i++) { | 2367 | if (device->total_bytes > device->bytes_used) |
| 2395 | /* | 2368 | total_avail = device->total_bytes - device->bytes_used; |
| 2396 | * if dev_offset = 0, it means the free space of this device | 2369 | else |
| 2397 | * is less than what we need, and we didn't search max avail | 2370 | total_avail = 0; |
| 2398 | * extent on this device, so do it now. | 2371 | /* avail is off by max(alloc_start, 1MB), but that is the same |
| 2372 | * for all devices, so it doesn't hurt the sorting later on | ||
| 2399 | */ | 2373 | */ |
| 2400 | if (!devices[i].dev_offset) { | ||
| 2401 | ret = find_free_dev_extent(trans, devices[i].dev, | ||
| 2402 | max_avail, | ||
| 2403 | &devices[i].dev_offset, | ||
| 2404 | &devices[i].max_avail); | ||
| 2405 | if (ret != 0 && ret != -ENOSPC) | ||
| 2406 | return ret; | ||
| 2407 | sort_again = 1; | ||
| 2408 | } | ||
| 2409 | } | ||
| 2410 | 2374 | ||
| 2411 | /* we update the max avail free extent of each devices, sort again */ | 2375 | ret = find_free_dev_extent(trans, device, |
| 2412 | if (sort_again) | 2376 | max_stripe_size * dev_stripes, |
| 2413 | btrfs_descending_sort_devices(devices, nr_device); | 2377 | &dev_offset, &max_avail); |
| 2414 | 2378 | if (ret && ret != -ENOSPC) | |
| 2415 | if (type & BTRFS_BLOCK_GROUP_DUP) | 2379 | goto error; |
| 2416 | min_devices = 1; | ||
| 2417 | 2380 | ||
| 2418 | if (!devices[min_devices - 1].max_avail) | 2381 | if (ret == 0) |
| 2419 | return -ENOSPC; | 2382 | max_avail = max_stripe_size * dev_stripes; |
| 2420 | 2383 | ||
| 2421 | max_avail = devices[min_devices - 1].max_avail; | 2384 | if (max_avail < BTRFS_STRIPE_LEN * dev_stripes) |
| 2422 | if (type & BTRFS_BLOCK_GROUP_DUP) | 2385 | continue; |
| 2423 | do_div(max_avail, 2); | ||
| 2424 | 2386 | ||
| 2425 | max_avail = __btrfs_calc_stripe_size(fs_devices, max_avail, type, | 2387 | devices_info[ndevs].dev_offset = dev_offset; |
| 2426 | min_stripes, 1); | 2388 | devices_info[ndevs].max_avail = max_avail; |
| 2427 | if (type & BTRFS_BLOCK_GROUP_DUP) | 2389 | devices_info[ndevs].total_avail = total_avail; |
| 2428 | min_free = max_avail * 2; | 2390 | devices_info[ndevs].dev = device; |
| 2429 | else | 2391 | ++ndevs; |
| 2430 | min_free = max_avail; | 2392 | } |
| 2431 | 2393 | ||
| 2432 | if (min_free > devices[min_devices - 1].max_avail) | 2394 | /* |
| 2433 | return -ENOSPC; | 2395 | * now sort the devices by hole size / available space |
| 2396 | */ | ||
| 2397 | sort(devices_info, ndevs, sizeof(struct btrfs_device_info), | ||
| 2398 | btrfs_cmp_device_info, NULL); | ||
| 2434 | 2399 | ||
| 2435 | map = __shrink_map_lookup_stripes(map, min_stripes); | 2400 | /* round down to number of usable stripes */ |
| 2436 | *stripe_size = max_avail; | 2401 | ndevs -= ndevs % devs_increment; |
| 2437 | 2402 | ||
| 2438 | index = 0; | 2403 | if (ndevs < devs_increment * sub_stripes || ndevs < devs_min) { |
| 2439 | for (i = 0; i < min_stripes; i++) { | 2404 | ret = -ENOSPC; |
| 2440 | map->stripes[i].dev = devices[index].dev; | 2405 | goto error; |
| 2441 | map->stripes[i].physical = devices[index].dev_offset; | ||
| 2442 | if (type & BTRFS_BLOCK_GROUP_DUP) { | ||
| 2443 | i++; | ||
| 2444 | map->stripes[i].dev = devices[index].dev; | ||
| 2445 | map->stripes[i].physical = devices[index].dev_offset + | ||
| 2446 | max_avail; | ||
| 2447 | } | ||
| 2448 | index++; | ||
| 2449 | } | 2406 | } |
| 2450 | *map_lookup = map; | ||
| 2451 | |||
| 2452 | return 0; | ||
| 2453 | } | ||
| 2454 | 2407 | ||
| 2455 | static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | 2408 | if (devs_max && ndevs > devs_max) |
| 2456 | struct btrfs_root *extent_root, | 2409 | ndevs = devs_max; |
| 2457 | struct map_lookup **map_ret, | 2410 | /* |
| 2458 | u64 *num_bytes, u64 *stripe_size, | 2411 | * the primary goal is to maximize the number of stripes, so use as many |
| 2459 | u64 start, u64 type) | 2412 | * devices as possible, even if the stripes are not maximum sized. |
| 2460 | { | 2413 | */ |
| 2461 | struct btrfs_fs_info *info = extent_root->fs_info; | 2414 | stripe_size = devices_info[ndevs-1].max_avail; |
| 2462 | struct btrfs_device *device = NULL; | 2415 | num_stripes = ndevs * dev_stripes; |
| 2463 | struct btrfs_fs_devices *fs_devices = info->fs_devices; | ||
| 2464 | struct list_head *cur; | ||
| 2465 | struct map_lookup *map; | ||
| 2466 | struct extent_map_tree *em_tree; | ||
| 2467 | struct extent_map *em; | ||
| 2468 | struct btrfs_device_info *devices_info; | ||
| 2469 | struct list_head private_devs; | ||
| 2470 | u64 calc_size = 1024 * 1024 * 1024; | ||
| 2471 | u64 min_free; | ||
| 2472 | u64 avail; | ||
| 2473 | u64 dev_offset; | ||
| 2474 | int num_stripes; | ||
| 2475 | int min_stripes; | ||
| 2476 | int sub_stripes; | ||
| 2477 | int min_devices; /* the min number of devices we need */ | ||
| 2478 | int i; | ||
| 2479 | int ret; | ||
| 2480 | int index; | ||
| 2481 | 2416 | ||
| 2482 | if ((type & BTRFS_BLOCK_GROUP_RAID1) && | 2417 | if (stripe_size * num_stripes > max_chunk_size * ncopies) { |
| 2483 | (type & BTRFS_BLOCK_GROUP_DUP)) { | 2418 | stripe_size = max_chunk_size * ncopies; |
| 2484 | WARN_ON(1); | 2419 | do_div(stripe_size, num_stripes); |
| 2485 | type &= ~BTRFS_BLOCK_GROUP_DUP; | ||
| 2486 | } | 2420 | } |
| 2487 | if (list_empty(&fs_devices->alloc_list)) | ||
| 2488 | return -ENOSPC; | ||
| 2489 | 2421 | ||
| 2490 | ret = __btrfs_calc_nstripes(fs_devices, type, &num_stripes, | 2422 | do_div(stripe_size, dev_stripes); |
| 2491 | &min_stripes, &sub_stripes); | 2423 | do_div(stripe_size, BTRFS_STRIPE_LEN); |
| 2492 | if (ret) | 2424 | stripe_size *= BTRFS_STRIPE_LEN; |
| 2493 | return ret; | ||
| 2494 | |||
| 2495 | devices_info = kzalloc(sizeof(*devices_info) * fs_devices->rw_devices, | ||
| 2496 | GFP_NOFS); | ||
| 2497 | if (!devices_info) | ||
| 2498 | return -ENOMEM; | ||
| 2499 | 2425 | ||
| 2500 | map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); | 2426 | map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); |
| 2501 | if (!map) { | 2427 | if (!map) { |
| @@ -2504,85 +2430,12 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
| 2504 | } | 2430 | } |
| 2505 | map->num_stripes = num_stripes; | 2431 | map->num_stripes = num_stripes; |
| 2506 | 2432 | ||
| 2507 | cur = fs_devices->alloc_list.next; | 2433 | for (i = 0; i < ndevs; ++i) { |
| 2508 | index = 0; | 2434 | for (j = 0; j < dev_stripes; ++j) { |
| 2509 | i = 0; | 2435 | int s = i * dev_stripes + j; |
| 2510 | 2436 | map->stripes[s].dev = devices_info[i].dev; | |
| 2511 | calc_size = __btrfs_calc_stripe_size(fs_devices, calc_size, type, | 2437 | map->stripes[s].physical = devices_info[i].dev_offset + |
| 2512 | num_stripes, 0); | 2438 | j * stripe_size; |
| 2513 | |||
| 2514 | if (type & BTRFS_BLOCK_GROUP_DUP) { | ||
| 2515 | min_free = calc_size * 2; | ||
| 2516 | min_devices = 1; | ||
| 2517 | } else { | ||
| 2518 | min_free = calc_size; | ||
| 2519 | min_devices = min_stripes; | ||
| 2520 | } | ||
| 2521 | |||
| 2522 | INIT_LIST_HEAD(&private_devs); | ||
| 2523 | while (index < num_stripes) { | ||
| 2524 | device = list_entry(cur, struct btrfs_device, dev_alloc_list); | ||
| 2525 | BUG_ON(!device->writeable); | ||
| 2526 | if (device->total_bytes > device->bytes_used) | ||
| 2527 | avail = device->total_bytes - device->bytes_used; | ||
| 2528 | else | ||
| 2529 | avail = 0; | ||
| 2530 | cur = cur->next; | ||
| 2531 | |||
| 2532 | if (device->in_fs_metadata && avail >= min_free) { | ||
| 2533 | ret = find_free_dev_extent(trans, device, min_free, | ||
| 2534 | &devices_info[i].dev_offset, | ||
| 2535 | &devices_info[i].max_avail); | ||
| 2536 | if (ret == 0) { | ||
| 2537 | list_move_tail(&device->dev_alloc_list, | ||
| 2538 | &private_devs); | ||
| 2539 | map->stripes[index].dev = device; | ||
| 2540 | map->stripes[index].physical = | ||
| 2541 | devices_info[i].dev_offset; | ||
| 2542 | index++; | ||
| 2543 | if (type & BTRFS_BLOCK_GROUP_DUP) { | ||
| 2544 | map->stripes[index].dev = device; | ||
| 2545 | map->stripes[index].physical = | ||
| 2546 | devices_info[i].dev_offset + | ||
| 2547 | calc_size; | ||
| 2548 | index++; | ||
| 2549 | } | ||
| 2550 | } else if (ret != -ENOSPC) | ||
| 2551 | goto error; | ||
| 2552 | |||
| 2553 | devices_info[i].dev = device; | ||
| 2554 | i++; | ||
| 2555 | } else if (device->in_fs_metadata && | ||
| 2556 | avail >= BTRFS_STRIPE_LEN) { | ||
| 2557 | devices_info[i].dev = device; | ||
| 2558 | devices_info[i].max_avail = avail; | ||
| 2559 | i++; | ||
| 2560 | } | ||
| 2561 | |||
| 2562 | if (cur == &fs_devices->alloc_list) | ||
| 2563 | break; | ||
| 2564 | } | ||
| 2565 | |||
| 2566 | list_splice(&private_devs, &fs_devices->alloc_list); | ||
| 2567 | if (index < num_stripes) { | ||
| 2568 | if (index >= min_stripes) { | ||
| 2569 | num_stripes = index; | ||
| 2570 | if (type & (BTRFS_BLOCK_GROUP_RAID10)) { | ||
| 2571 | num_stripes /= sub_stripes; | ||
| 2572 | num_stripes *= sub_stripes; | ||
| 2573 | } | ||
| 2574 | |||
| 2575 | map = __shrink_map_lookup_stripes(map, num_stripes); | ||
| 2576 | } else if (i >= min_devices) { | ||
| 2577 | ret = __btrfs_alloc_tiny_space(trans, fs_devices, | ||
| 2578 | devices_info, i, type, | ||
| 2579 | &map, min_stripes, | ||
| 2580 | &calc_size); | ||
| 2581 | if (ret) | ||
| 2582 | goto error; | ||
| 2583 | } else { | ||
| 2584 | ret = -ENOSPC; | ||
| 2585 | goto error; | ||
| 2586 | } | 2439 | } |
| 2587 | } | 2440 | } |
| 2588 | map->sector_size = extent_root->sectorsize; | 2441 | map->sector_size = extent_root->sectorsize; |
| @@ -2593,11 +2446,12 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
| 2593 | map->sub_stripes = sub_stripes; | 2446 | map->sub_stripes = sub_stripes; |
| 2594 | 2447 | ||
| 2595 | *map_ret = map; | 2448 | *map_ret = map; |
| 2596 | *stripe_size = calc_size; | 2449 | num_bytes = stripe_size * (num_stripes / ncopies); |
| 2597 | *num_bytes = chunk_bytes_by_type(type, calc_size, | ||
| 2598 | map->num_stripes, sub_stripes); | ||
| 2599 | 2450 | ||
| 2600 | trace_btrfs_chunk_alloc(info->chunk_root, map, start, *num_bytes); | 2451 | *stripe_size_out = stripe_size; |
| 2452 | *num_bytes_out = num_bytes; | ||
| 2453 | |||
| 2454 | trace_btrfs_chunk_alloc(info->chunk_root, map, start, num_bytes); | ||
| 2601 | 2455 | ||
| 2602 | em = alloc_extent_map(); | 2456 | em = alloc_extent_map(); |
| 2603 | if (!em) { | 2457 | if (!em) { |
| @@ -2606,7 +2460,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
| 2606 | } | 2460 | } |
| 2607 | em->bdev = (struct block_device *)map; | 2461 | em->bdev = (struct block_device *)map; |
| 2608 | em->start = start; | 2462 | em->start = start; |
| 2609 | em->len = *num_bytes; | 2463 | em->len = num_bytes; |
| 2610 | em->block_start = 0; | 2464 | em->block_start = 0; |
| 2611 | em->block_len = em->len; | 2465 | em->block_len = em->len; |
| 2612 | 2466 | ||
| @@ -2619,20 +2473,21 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
| 2619 | 2473 | ||
| 2620 | ret = btrfs_make_block_group(trans, extent_root, 0, type, | 2474 | ret = btrfs_make_block_group(trans, extent_root, 0, type, |
| 2621 | BTRFS_FIRST_CHUNK_TREE_OBJECTID, | 2475 | BTRFS_FIRST_CHUNK_TREE_OBJECTID, |
| 2622 | start, *num_bytes); | 2476 | start, num_bytes); |
| 2623 | BUG_ON(ret); | 2477 | BUG_ON(ret); |
| 2624 | 2478 | ||
| 2625 | index = 0; | 2479 | for (i = 0; i < map->num_stripes; ++i) { |
| 2626 | while (index < map->num_stripes) { | 2480 | struct btrfs_device *device; |
| 2627 | device = map->stripes[index].dev; | 2481 | u64 dev_offset; |
| 2628 | dev_offset = map->stripes[index].physical; | 2482 | |
| 2483 | device = map->stripes[i].dev; | ||
| 2484 | dev_offset = map->stripes[i].physical; | ||
| 2629 | 2485 | ||
| 2630 | ret = btrfs_alloc_dev_extent(trans, device, | 2486 | ret = btrfs_alloc_dev_extent(trans, device, |
| 2631 | info->chunk_root->root_key.objectid, | 2487 | info->chunk_root->root_key.objectid, |
| 2632 | BTRFS_FIRST_CHUNK_TREE_OBJECTID, | 2488 | BTRFS_FIRST_CHUNK_TREE_OBJECTID, |
| 2633 | start, dev_offset, calc_size); | 2489 | start, dev_offset, stripe_size); |
| 2634 | BUG_ON(ret); | 2490 | BUG_ON(ret); |
| 2635 | index++; | ||
| 2636 | } | 2491 | } |
| 2637 | 2492 | ||
| 2638 | kfree(devices_info); | 2493 | kfree(devices_info); |
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 5669ae8ea1c9..05d5d199381a 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
| @@ -144,6 +144,7 @@ struct btrfs_device_info { | |||
| 144 | struct btrfs_device *dev; | 144 | struct btrfs_device *dev; |
| 145 | u64 dev_offset; | 145 | u64 dev_offset; |
| 146 | u64 max_avail; | 146 | u64 max_avail; |
| 147 | u64 total_avail; | ||
| 147 | }; | 148 | }; |
| 148 | 149 | ||
| 149 | struct map_lookup { | 150 | struct map_lookup { |
| @@ -157,21 +158,6 @@ struct map_lookup { | |||
| 157 | struct btrfs_bio_stripe stripes[]; | 158 | struct btrfs_bio_stripe stripes[]; |
| 158 | }; | 159 | }; |
| 159 | 160 | ||
| 160 | /* Used to sort the devices by max_avail(descending sort) */ | ||
| 161 | int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2); | ||
| 162 | |||
| 163 | /* | ||
| 164 | * sort the devices by max_avail, in which max free extent size of each device | ||
| 165 | * is stored.(Descending Sort) | ||
| 166 | */ | ||
| 167 | static inline void btrfs_descending_sort_devices( | ||
| 168 | struct btrfs_device_info *devices, | ||
| 169 | size_t nr_devices) | ||
| 170 | { | ||
| 171 | sort(devices, nr_devices, sizeof(struct btrfs_device_info), | ||
| 172 | btrfs_cmp_device_free_bytes, NULL); | ||
| 173 | } | ||
| 174 | |||
| 175 | int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, | 161 | int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, |
| 176 | u64 end, u64 *length); | 162 | u64 end, u64 *length); |
| 177 | 163 | ||
