aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/btrfs/volumes.c379
-rw-r--r--fs/btrfs/volumes.h24
2 files changed, 300 insertions, 103 deletions
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 4838bd395e49..c22784b989b7 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -877,7 +877,7 @@ out:
877 btrfs_free_path(path); 877 btrfs_free_path(path);
878error: 878error:
879 *start = max_hole_start; 879 *start = max_hole_start;
880 if (len && max_hole_size > *len) 880 if (len)
881 *len = max_hole_size; 881 *len = max_hole_size;
882 return ret; 882 return ret;
883} 883}
@@ -2176,70 +2176,67 @@ static noinline u64 chunk_bytes_by_type(u64 type, u64 calc_size,
2176 return calc_size * num_stripes; 2176 return calc_size * num_stripes;
2177} 2177}
2178 2178
2179static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, 2179/* Used to sort the devices by max_avail(descending sort) */
2180 struct btrfs_root *extent_root, 2180int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2)
2181 struct map_lookup **map_ret,
2182 u64 *num_bytes, u64 *stripe_size,
2183 u64 start, u64 type)
2184{ 2181{
2185 struct btrfs_fs_info *info = extent_root->fs_info; 2182 if (((struct btrfs_device_info *)dev_info1)->max_avail >
2186 struct btrfs_device *device = NULL; 2183 ((struct btrfs_device_info *)dev_info2)->max_avail)
2187 struct btrfs_fs_devices *fs_devices = info->fs_devices; 2184 return -1;
2188 struct list_head *cur; 2185 else if (((struct btrfs_device_info *)dev_info1)->max_avail <
2189 struct map_lookup *map = NULL; 2186 ((struct btrfs_device_info *)dev_info2)->max_avail)
2190 struct extent_map_tree *em_tree; 2187 return 1;
2191 struct extent_map *em; 2188 else
2192 struct list_head private_devs; 2189 return 0;
2193 int min_stripe_size = 1 * 1024 * 1024; 2190}
2194 u64 calc_size = 1024 * 1024 * 1024;
2195 u64 max_chunk_size = calc_size;
2196 u64 min_free;
2197 u64 avail;
2198 u64 max_avail = 0;
2199 u64 dev_offset;
2200 int num_stripes = 1;
2201 int min_stripes = 1;
2202 int sub_stripes = 0;
2203 int ncopies = 1;
2204 int looped = 0;
2205 int ret;
2206 int index;
2207 int stripe_len = 64 * 1024;
2208 2191
2209 if ((type & BTRFS_BLOCK_GROUP_RAID1) && 2192static int __btrfs_calc_nstripes(struct btrfs_fs_devices *fs_devices, u64 type,
2210 (type & BTRFS_BLOCK_GROUP_DUP)) { 2193 int *num_stripes, int *min_stripes,
2211 WARN_ON(1); 2194 int *sub_stripes)
2212 type &= ~BTRFS_BLOCK_GROUP_DUP; 2195{
2213 } 2196 *num_stripes = 1;
2214 if (list_empty(&fs_devices->alloc_list)) 2197 *min_stripes = 1;
2215 return -ENOSPC; 2198 *sub_stripes = 0;
2216 2199
2217 if (type & (BTRFS_BLOCK_GROUP_RAID0)) { 2200 if (type & (BTRFS_BLOCK_GROUP_RAID0)) {
2218 num_stripes = fs_devices->rw_devices; 2201 *num_stripes = fs_devices->rw_devices;
2219 min_stripes = 2; 2202 *min_stripes = 2;
2220 } 2203 }
2221 if (type & (BTRFS_BLOCK_GROUP_DUP)) { 2204 if (type & (BTRFS_BLOCK_GROUP_DUP)) {
2222 num_stripes = 2; 2205 *num_stripes = 2;
2223 min_stripes = 2; 2206 *min_stripes = 2;
2224 ncopies = 2;
2225 } 2207 }
2226 if (type & (BTRFS_BLOCK_GROUP_RAID1)) { 2208 if (type & (BTRFS_BLOCK_GROUP_RAID1)) {
2227 if (fs_devices->rw_devices < 2) 2209 if (fs_devices->rw_devices < 2)
2228 return -ENOSPC; 2210 return -ENOSPC;
2229 num_stripes = 2; 2211 *num_stripes = 2;
2230 min_stripes = 2; 2212 *min_stripes = 2;
2231 ncopies = 2;
2232 } 2213 }
2233 if (type & (BTRFS_BLOCK_GROUP_RAID10)) { 2214 if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
2234 num_stripes = fs_devices->rw_devices; 2215 *num_stripes = fs_devices->rw_devices;
2235 if (num_stripes < 4) 2216 if (*num_stripes < 4)
2236 return -ENOSPC; 2217 return -ENOSPC;
2237 num_stripes &= ~(u32)1; 2218 *num_stripes &= ~(u32)1;
2238 sub_stripes = 2; 2219 *sub_stripes = 2;
2239 ncopies = 2; 2220 *min_stripes = 4;
2240 min_stripes = 4;
2241 } 2221 }
2242 2222
2223 return 0;
2224}
2225
2226static u64 __btrfs_calc_stripe_size(struct btrfs_fs_devices *fs_devices,
2227 u64 proposed_size, u64 type,
2228 int num_stripes, int small_stripe)
2229{
2230 int min_stripe_size = 1 * 1024 * 1024;
2231 u64 calc_size = proposed_size;
2232 u64 max_chunk_size = calc_size;
2233 int ncopies = 1;
2234
2235 if (type & (BTRFS_BLOCK_GROUP_RAID1 |
2236 BTRFS_BLOCK_GROUP_DUP |
2237 BTRFS_BLOCK_GROUP_RAID10))
2238 ncopies = 2;
2239
2243 if (type & BTRFS_BLOCK_GROUP_DATA) { 2240 if (type & BTRFS_BLOCK_GROUP_DATA) {
2244 max_chunk_size = 10 * calc_size; 2241 max_chunk_size = 10 * calc_size;
2245 min_stripe_size = 64 * 1024 * 1024; 2242 min_stripe_size = 64 * 1024 * 1024;
@@ -2256,51 +2253,209 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
2256 max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1), 2253 max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1),
2257 max_chunk_size); 2254 max_chunk_size);
2258 2255
2259again:
2260 max_avail = 0;
2261 if (!map || map->num_stripes != num_stripes) {
2262 kfree(map);
2263 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
2264 if (!map)
2265 return -ENOMEM;
2266 map->num_stripes = num_stripes;
2267 }
2268
2269 if (calc_size * num_stripes > max_chunk_size * ncopies) { 2256 if (calc_size * num_stripes > max_chunk_size * ncopies) {
2270 calc_size = max_chunk_size * ncopies; 2257 calc_size = max_chunk_size * ncopies;
2271 do_div(calc_size, num_stripes); 2258 do_div(calc_size, num_stripes);
2272 do_div(calc_size, stripe_len); 2259 do_div(calc_size, BTRFS_STRIPE_LEN);
2273 calc_size *= stripe_len; 2260 calc_size *= BTRFS_STRIPE_LEN;
2274 } 2261 }
2275 2262
2276 /* we don't want tiny stripes */ 2263 /* we don't want tiny stripes */
2277 if (!looped) 2264 if (!small_stripe)
2278 calc_size = max_t(u64, min_stripe_size, calc_size); 2265 calc_size = max_t(u64, min_stripe_size, calc_size);
2279 2266
2280 /* 2267 /*
2281 * we're about to do_div by the stripe_len so lets make sure 2268 * we're about to do_div by the BTRFS_STRIPE_LEN so lets make sure
2282 * we end up with something bigger than a stripe 2269 * we end up with something bigger than a stripe
2283 */ 2270 */
2284 calc_size = max_t(u64, calc_size, stripe_len * 4); 2271 calc_size = max_t(u64, calc_size, BTRFS_STRIPE_LEN);
2272
2273 do_div(calc_size, BTRFS_STRIPE_LEN);
2274 calc_size *= BTRFS_STRIPE_LEN;
2275
2276 return calc_size;
2277}
2278
2279static struct map_lookup *__shrink_map_lookup_stripes(struct map_lookup *map,
2280 int num_stripes)
2281{
2282 struct map_lookup *new;
2283 size_t len = map_lookup_size(num_stripes);
2284
2285 BUG_ON(map->num_stripes < num_stripes);
2286
2287 if (map->num_stripes == num_stripes)
2288 return map;
2289
2290 new = kmalloc(len, GFP_NOFS);
2291 if (!new) {
2292 /* just change map->num_stripes */
2293 map->num_stripes = num_stripes;
2294 return map;
2295 }
2296
2297 memcpy(new, map, len);
2298 new->num_stripes = num_stripes;
2299 kfree(map);
2300 return new;
2301}
2302
2303/*
2304 * helper to allocate device space from btrfs_device_info, in which we stored
2305 * max free space information of every device. It is used when we can not
2306 * allocate chunks by default size.
2307 *
2308 * By this helper, we can allocate a new chunk as larger as possible.
2309 */
2310static int __btrfs_alloc_tiny_space(struct btrfs_trans_handle *trans,
2311 struct btrfs_fs_devices *fs_devices,
2312 struct btrfs_device_info *devices,
2313 int nr_device, u64 type,
2314 struct map_lookup **map_lookup,
2315 int min_stripes, u64 *stripe_size)
2316{
2317 int i, index, sort_again = 0;
2318 int min_devices = min_stripes;
2319 u64 max_avail, min_free;
2320 struct map_lookup *map = *map_lookup;
2321 int ret;
2322
2323 if (nr_device < min_stripes)
2324 return -ENOSPC;
2325
2326 btrfs_descending_sort_devices(devices, nr_device);
2327
2328 max_avail = devices[0].max_avail;
2329 if (!max_avail)
2330 return -ENOSPC;
2331
2332 for (i = 0; i < nr_device; i++) {
2333 /*
2334 * if dev_offset = 0, it means the free space of this device
2335 * is less than what we need, and we didn't search max avail
2336 * extent on this device, so do it now.
2337 */
2338 if (!devices[i].dev_offset) {
2339 ret = find_free_dev_extent(trans, devices[i].dev,
2340 max_avail,
2341 &devices[i].dev_offset,
2342 &devices[i].max_avail);
2343 if (ret != 0 && ret != -ENOSPC)
2344 return ret;
2345 sort_again = 1;
2346 }
2347 }
2348
2349 /* we update the max avail free extent of each devices, sort again */
2350 if (sort_again)
2351 btrfs_descending_sort_devices(devices, nr_device);
2352
2353 if (type & BTRFS_BLOCK_GROUP_DUP)
2354 min_devices = 1;
2355
2356 if (!devices[min_devices - 1].max_avail)
2357 return -ENOSPC;
2358
2359 max_avail = devices[min_devices - 1].max_avail;
2360 if (type & BTRFS_BLOCK_GROUP_DUP)
2361 do_div(max_avail, 2);
2285 2362
2286 do_div(calc_size, stripe_len); 2363 max_avail = __btrfs_calc_stripe_size(fs_devices, max_avail, type,
2287 calc_size *= stripe_len; 2364 min_stripes, 1);
2365 if (type & BTRFS_BLOCK_GROUP_DUP)
2366 min_free = max_avail * 2;
2367 else
2368 min_free = max_avail;
2369
2370 if (min_free > devices[min_devices - 1].max_avail)
2371 return -ENOSPC;
2372
2373 map = __shrink_map_lookup_stripes(map, min_stripes);
2374 *stripe_size = max_avail;
2375
2376 index = 0;
2377 for (i = 0; i < min_stripes; i++) {
2378 map->stripes[i].dev = devices[index].dev;
2379 map->stripes[i].physical = devices[index].dev_offset;
2380 if (type & BTRFS_BLOCK_GROUP_DUP) {
2381 i++;
2382 map->stripes[i].dev = devices[index].dev;
2383 map->stripes[i].physical = devices[index].dev_offset +
2384 max_avail;
2385 }
2386 index++;
2387 }
2388 *map_lookup = map;
2389
2390 return 0;
2391}
2392
2393static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
2394 struct btrfs_root *extent_root,
2395 struct map_lookup **map_ret,
2396 u64 *num_bytes, u64 *stripe_size,
2397 u64 start, u64 type)
2398{
2399 struct btrfs_fs_info *info = extent_root->fs_info;
2400 struct btrfs_device *device = NULL;
2401 struct btrfs_fs_devices *fs_devices = info->fs_devices;
2402 struct list_head *cur;
2403 struct map_lookup *map;
2404 struct extent_map_tree *em_tree;
2405 struct extent_map *em;
2406 struct btrfs_device_info *devices_info;
2407 struct list_head private_devs;
2408 u64 calc_size = 1024 * 1024 * 1024;
2409 u64 min_free;
2410 u64 avail;
2411 u64 dev_offset;
2412 int num_stripes;
2413 int min_stripes;
2414 int sub_stripes;
2415 int min_devices; /* the min number of devices we need */
2416 int i;
2417 int ret;
2418 int index;
2419
2420 if ((type & BTRFS_BLOCK_GROUP_RAID1) &&
2421 (type & BTRFS_BLOCK_GROUP_DUP)) {
2422 WARN_ON(1);
2423 type &= ~BTRFS_BLOCK_GROUP_DUP;
2424 }
2425 if (list_empty(&fs_devices->alloc_list))
2426 return -ENOSPC;
2427
2428 ret = __btrfs_calc_nstripes(fs_devices, type, &num_stripes,
2429 &min_stripes, &sub_stripes);
2430 if (ret)
2431 return ret;
2432
2433 devices_info = kzalloc(sizeof(*devices_info) * fs_devices->rw_devices,
2434 GFP_NOFS);
2435 if (!devices_info)
2436 return -ENOMEM;
2437
2438 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
2439 if (!map) {
2440 ret = -ENOMEM;
2441 goto error;
2442 }
2443 map->num_stripes = num_stripes;
2288 2444
2289 cur = fs_devices->alloc_list.next; 2445 cur = fs_devices->alloc_list.next;
2290 index = 0; 2446 index = 0;
2447 i = 0;
2291 2448
2292 if (type & BTRFS_BLOCK_GROUP_DUP) 2449 calc_size = __btrfs_calc_stripe_size(fs_devices, calc_size, type,
2450 num_stripes, 0);
2451
2452 if (type & BTRFS_BLOCK_GROUP_DUP) {
2293 min_free = calc_size * 2; 2453 min_free = calc_size * 2;
2294 else 2454 min_devices = 1;
2455 } else {
2295 min_free = calc_size; 2456 min_free = calc_size;
2296 2457 min_devices = min_stripes;
2297 /* 2458 }
2298 * we add 1MB because we never use the first 1MB of the device, unless
2299 * we've looped, then we are likely allocating the maximum amount of
2300 * space left already
2301 */
2302 if (!looped)
2303 min_free += 1024 * 1024;
2304 2459
2305 INIT_LIST_HEAD(&private_devs); 2460 INIT_LIST_HEAD(&private_devs);
2306 while (index < num_stripes) { 2461 while (index < num_stripes) {
@@ -2313,27 +2468,39 @@ again:
2313 cur = cur->next; 2468 cur = cur->next;
2314 2469
2315 if (device->in_fs_metadata && avail >= min_free) { 2470 if (device->in_fs_metadata && avail >= min_free) {
2316 ret = find_free_dev_extent(trans, device, 2471 ret = find_free_dev_extent(trans, device, min_free,
2317 min_free, &dev_offset, 2472 &devices_info[i].dev_offset,
2318 &max_avail); 2473 &devices_info[i].max_avail);
2319 if (ret == 0) { 2474 if (ret == 0) {
2320 list_move_tail(&device->dev_alloc_list, 2475 list_move_tail(&device->dev_alloc_list,
2321 &private_devs); 2476 &private_devs);
2322 map->stripes[index].dev = device; 2477 map->stripes[index].dev = device;
2323 map->stripes[index].physical = dev_offset; 2478 map->stripes[index].physical =
2479 devices_info[i].dev_offset;
2324 index++; 2480 index++;
2325 if (type & BTRFS_BLOCK_GROUP_DUP) { 2481 if (type & BTRFS_BLOCK_GROUP_DUP) {
2326 map->stripes[index].dev = device; 2482 map->stripes[index].dev = device;
2327 map->stripes[index].physical = 2483 map->stripes[index].physical =
2328 dev_offset + calc_size; 2484 devices_info[i].dev_offset +
2485 calc_size;
2329 index++; 2486 index++;
2330 } 2487 }
2331 } 2488 } else if (ret != -ENOSPC)
2332 } else if (device->in_fs_metadata && avail > max_avail) 2489 goto error;
2333 max_avail = avail; 2490
2491 devices_info[i].dev = device;
2492 i++;
2493 } else if (device->in_fs_metadata &&
2494 avail >= BTRFS_STRIPE_LEN) {
2495 devices_info[i].dev = device;
2496 devices_info[i].max_avail = avail;
2497 i++;
2498 }
2499
2334 if (cur == &fs_devices->alloc_list) 2500 if (cur == &fs_devices->alloc_list)
2335 break; 2501 break;
2336 } 2502 }
2503
2337 list_splice(&private_devs, &fs_devices->alloc_list); 2504 list_splice(&private_devs, &fs_devices->alloc_list);
2338 if (index < num_stripes) { 2505 if (index < num_stripes) {
2339 if (index >= min_stripes) { 2506 if (index >= min_stripes) {
@@ -2342,36 +2509,36 @@ again:
2342 num_stripes /= sub_stripes; 2509 num_stripes /= sub_stripes;
2343 num_stripes *= sub_stripes; 2510 num_stripes *= sub_stripes;
2344 } 2511 }
2345 looped = 1; 2512
2346 goto again; 2513 map = __shrink_map_lookup_stripes(map, num_stripes);
2347 } 2514 } else if (i >= min_devices) {
2348 if (!looped && max_avail > 0) { 2515 ret = __btrfs_alloc_tiny_space(trans, fs_devices,
2349 looped = 1; 2516 devices_info, i, type,
2350 calc_size = max_avail; 2517 &map, min_stripes,
2351 if (type & BTRFS_BLOCK_GROUP_DUP) 2518 &calc_size);
2352 do_div(calc_size, 2); 2519 if (ret)
2353 goto again; 2520 goto error;
2521 } else {
2522 ret = -ENOSPC;
2523 goto error;
2354 } 2524 }
2355 kfree(map);
2356 return -ENOSPC;
2357 } 2525 }
2358 map->sector_size = extent_root->sectorsize; 2526 map->sector_size = extent_root->sectorsize;
2359 map->stripe_len = stripe_len; 2527 map->stripe_len = BTRFS_STRIPE_LEN;
2360 map->io_align = stripe_len; 2528 map->io_align = BTRFS_STRIPE_LEN;
2361 map->io_width = stripe_len; 2529 map->io_width = BTRFS_STRIPE_LEN;
2362 map->type = type; 2530 map->type = type;
2363 map->num_stripes = num_stripes;
2364 map->sub_stripes = sub_stripes; 2531 map->sub_stripes = sub_stripes;
2365 2532
2366 *map_ret = map; 2533 *map_ret = map;
2367 *stripe_size = calc_size; 2534 *stripe_size = calc_size;
2368 *num_bytes = chunk_bytes_by_type(type, calc_size, 2535 *num_bytes = chunk_bytes_by_type(type, calc_size,
2369 num_stripes, sub_stripes); 2536 map->num_stripes, sub_stripes);
2370 2537
2371 em = alloc_extent_map(GFP_NOFS); 2538 em = alloc_extent_map(GFP_NOFS);
2372 if (!em) { 2539 if (!em) {
2373 kfree(map); 2540 ret = -ENOMEM;
2374 return -ENOMEM; 2541 goto error;
2375 } 2542 }
2376 em->bdev = (struct block_device *)map; 2543 em->bdev = (struct block_device *)map;
2377 em->start = start; 2544 em->start = start;
@@ -2404,7 +2571,13 @@ again:
2404 index++; 2571 index++;
2405 } 2572 }
2406 2573
2574 kfree(devices_info);
2407 return 0; 2575 return 0;
2576
2577error:
2578 kfree(map);
2579 kfree(devices_info);
2580 return ret;
2408} 2581}
2409 2582
2410static int __finish_chunk_alloc(struct btrfs_trans_handle *trans, 2583static int __finish_chunk_alloc(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index a668c0116982..a5cfedf393f9 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -20,8 +20,11 @@
20#define __BTRFS_VOLUMES_ 20#define __BTRFS_VOLUMES_
21 21
22#include <linux/bio.h> 22#include <linux/bio.h>
23#include <linux/sort.h>
23#include "async-thread.h" 24#include "async-thread.h"
24 25
26#define BTRFS_STRIPE_LEN (64 * 1024)
27
25struct buffer_head; 28struct buffer_head;
26struct btrfs_pending_bios { 29struct btrfs_pending_bios {
27 struct bio *head; 30 struct bio *head;
@@ -137,6 +140,27 @@ struct btrfs_multi_bio {
137 struct btrfs_bio_stripe stripes[]; 140 struct btrfs_bio_stripe stripes[];
138}; 141};
139 142
143struct btrfs_device_info {
144 struct btrfs_device *dev;
145 u64 dev_offset;
146 u64 max_avail;
147};
148
149/* Used to sort the devices by max_avail(descending sort) */
150int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2);
151
152/*
153 * sort the devices by max_avail, in which max free extent size of each device
154 * is stored.(Descending Sort)
155 */
156static inline void btrfs_descending_sort_devices(
157 struct btrfs_device_info *devices,
158 size_t nr_devices)
159{
160 sort(devices, nr_devices, sizeof(struct btrfs_device_info),
161 btrfs_cmp_device_free_bytes, NULL);
162}
163
140#define btrfs_multi_bio_size(n) (sizeof(struct btrfs_multi_bio) + \ 164#define btrfs_multi_bio_size(n) (sizeof(struct btrfs_multi_bio) + \
141 (sizeof(struct btrfs_bio_stripe) * (n))) 165 (sizeof(struct btrfs_bio_stripe) * (n)))
142 166