aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorZheng Yan <zheng.yan@oracle.com>2008-09-26 10:05:48 -0400
committerChris Mason <chris.mason@oracle.com>2008-09-26 10:05:48 -0400
commite8569813849b5da394a195c7e76b4faa452b12d1 (patch)
tree87b09cd5bfc3dda16e181c247fdd7f77b6c463ea
parent24ab9cd85c11bccacbd0cce7f8e1aebd4930404c (diff)
Btrfs: allocator fixes for space balancing update
* Reserved extent accounting: reserved extents have been allocated in the rbtrees that track free space but have not been allocated on disk. They were never properly accounted for in the past, making it hard to know how much space was really free. * btrfs_find_block_group used to return NULL for block groups that had been removed by the space balancing code. This made it hard to account for space during the final stages of a balance run. Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r--fs/btrfs/ctree.h2
-rw-r--r--fs/btrfs/extent-tree.c136
2 files changed, 67 insertions, 71 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 3b3c1ca50c5d..c683aaa925fa 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -498,6 +498,7 @@ struct btrfs_space_info {
498 u64 total_bytes; 498 u64 total_bytes;
499 u64 bytes_used; 499 u64 bytes_used;
500 u64 bytes_pinned; 500 u64 bytes_pinned;
501 u64 bytes_reserved;
501 int full; 502 int full;
502 int force_alloc; 503 int force_alloc;
503 struct list_head list; 504 struct list_head list;
@@ -519,6 +520,7 @@ struct btrfs_block_group_cache {
519 struct btrfs_block_group_item item; 520 struct btrfs_block_group_item item;
520 spinlock_t lock; 521 spinlock_t lock;
521 u64 pinned; 522 u64 pinned;
523 u64 reserved;
522 u64 flags; 524 u64 flags;
523 int cached; 525 int cached;
524 int ro; 526 int ro;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index fe4e11b31a43..3e2f969de42d 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -325,12 +325,9 @@ static int noinline find_free_space(struct btrfs_root *root,
325 struct btrfs_block_group_cache *cache = *cache_ret; 325 struct btrfs_block_group_cache *cache = *cache_ret;
326 struct btrfs_free_space *info = NULL; 326 struct btrfs_free_space *info = NULL;
327 u64 last; 327 u64 last;
328 u64 total_fs_bytes;
329 u64 search_start = *start_ret; 328 u64 search_start = *start_ret;
330 329
331 WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); 330 WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
332 total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
333
334 if (!cache) 331 if (!cache)
335 goto out; 332 goto out;
336 333
@@ -354,7 +351,7 @@ new_group:
354 last = cache->key.objectid + cache->key.offset; 351 last = cache->key.objectid + cache->key.offset;
355 352
356 cache = btrfs_lookup_first_block_group(root->fs_info, last); 353 cache = btrfs_lookup_first_block_group(root->fs_info, last);
357 if (!cache || cache->key.objectid >= total_fs_bytes) 354 if (!cache)
358 goto out; 355 goto out;
359 356
360 *cache_ret = cache; 357 *cache_ret = cache;
@@ -385,7 +382,6 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
385 return found; 382 return found;
386 } 383 }
387 return NULL; 384 return NULL;
388
389} 385}
390 386
391static struct btrfs_block_group_cache * 387static struct btrfs_block_group_cache *
@@ -396,7 +392,6 @@ __btrfs_find_block_group(struct btrfs_root *root,
396 struct btrfs_block_group_cache *cache; 392 struct btrfs_block_group_cache *cache;
397 struct btrfs_block_group_cache *found_group = NULL; 393 struct btrfs_block_group_cache *found_group = NULL;
398 struct btrfs_fs_info *info = root->fs_info; 394 struct btrfs_fs_info *info = root->fs_info;
399 struct btrfs_space_info *sinfo;
400 u64 used; 395 u64 used;
401 u64 last = 0; 396 u64 last = 0;
402 u64 free_check; 397 u64 free_check;
@@ -413,7 +408,7 @@ __btrfs_find_block_group(struct btrfs_root *root,
413 if (shint && block_group_bits(shint, data) && !shint->ro) { 408 if (shint && block_group_bits(shint, data) && !shint->ro) {
414 spin_lock(&shint->lock); 409 spin_lock(&shint->lock);
415 used = btrfs_block_group_used(&shint->item); 410 used = btrfs_block_group_used(&shint->item);
416 if (used + shint->pinned < 411 if (used + shint->pinned + shint->reserved <
417 div_factor(shint->key.offset, factor)) { 412 div_factor(shint->key.offset, factor)) {
418 spin_unlock(&shint->lock); 413 spin_unlock(&shint->lock);
419 return shint; 414 return shint;
@@ -424,7 +419,7 @@ __btrfs_find_block_group(struct btrfs_root *root,
424 if (hint && !hint->ro && block_group_bits(hint, data)) { 419 if (hint && !hint->ro && block_group_bits(hint, data)) {
425 spin_lock(&hint->lock); 420 spin_lock(&hint->lock);
426 used = btrfs_block_group_used(&hint->item); 421 used = btrfs_block_group_used(&hint->item);
427 if (used + hint->pinned < 422 if (used + hint->pinned + hint->reserved <
428 div_factor(hint->key.offset, factor)) { 423 div_factor(hint->key.offset, factor)) {
429 spin_unlock(&hint->lock); 424 spin_unlock(&hint->lock);
430 return hint; 425 return hint;
@@ -437,27 +432,9 @@ __btrfs_find_block_group(struct btrfs_root *root,
437 else 432 else
438 last = search_start; 433 last = search_start;
439 } 434 }
440 sinfo = __find_space_info(root->fs_info, data);
441 if (!sinfo)
442 goto found;
443again: 435again:
444 while(1) { 436 while (1) {
445 struct list_head *l; 437 cache = btrfs_lookup_first_block_group(root->fs_info, last);
446
447 cache = NULL;
448
449 spin_lock(&sinfo->lock);
450 list_for_each(l, &sinfo->block_groups) {
451 struct btrfs_block_group_cache *entry;
452 entry = list_entry(l, struct btrfs_block_group_cache,
453 list);
454 if ((entry->key.objectid >= last) &&
455 (!cache || (entry->key.objectid <
456 cache->key.objectid)))
457 cache = entry;
458 }
459 spin_unlock(&sinfo->lock);
460
461 if (!cache) 438 if (!cache)
462 break; 439 break;
463 440
@@ -467,7 +444,8 @@ again:
467 444
468 if (!cache->ro && block_group_bits(cache, data)) { 445 if (!cache->ro && block_group_bits(cache, data)) {
469 free_check = div_factor(cache->key.offset, factor); 446 free_check = div_factor(cache->key.offset, factor);
470 if (used + cache->pinned < free_check) { 447 if (used + cache->pinned + cache->reserved <
448 free_check) {
471 found_group = cache; 449 found_group = cache;
472 spin_unlock(&cache->lock); 450 spin_unlock(&cache->lock);
473 goto found; 451 goto found;
@@ -1414,6 +1392,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
1414 if (!cache) 1392 if (!cache)
1415 break; 1393 break;
1416 1394
1395 cache->dirty = 0;
1417 last += cache->key.offset; 1396 last += cache->key.offset;
1418 1397
1419 err = write_one_cache_group(trans, root, 1398 err = write_one_cache_group(trans, root,
@@ -1427,8 +1406,6 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
1427 werr = err; 1406 werr = err;
1428 continue; 1407 continue;
1429 } 1408 }
1430
1431 cache->dirty = 0;
1432 } 1409 }
1433 btrfs_free_path(path); 1410 btrfs_free_path(path);
1434 mutex_unlock(&root->fs_info->alloc_mutex); 1411 mutex_unlock(&root->fs_info->alloc_mutex);
@@ -1460,6 +1437,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
1460 found->total_bytes = total_bytes; 1437 found->total_bytes = total_bytes;
1461 found->bytes_used = bytes_used; 1438 found->bytes_used = bytes_used;
1462 found->bytes_pinned = 0; 1439 found->bytes_pinned = 0;
1440 found->bytes_reserved = 0;
1463 found->full = 0; 1441 found->full = 0;
1464 found->force_alloc = 0; 1442 found->force_alloc = 0;
1465 *space_info = found; 1443 *space_info = found;
@@ -1539,8 +1517,8 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
1539 1517
1540 thresh = div_factor(space_info->total_bytes, 6); 1518 thresh = div_factor(space_info->total_bytes, 6);
1541 if (!force && 1519 if (!force &&
1542 (space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) < 1520 (space_info->bytes_used + space_info->bytes_pinned +
1543 thresh) 1521 space_info->bytes_reserved + alloc_bytes) < thresh)
1544 goto out; 1522 goto out;
1545 1523
1546 mutex_lock(&extent_root->fs_info->chunk_mutex); 1524 mutex_lock(&extent_root->fs_info->chunk_mutex);
@@ -1621,7 +1599,6 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
1621 return cache->key.objectid; 1599 return cache->key.objectid;
1622} 1600}
1623 1601
1624
1625int btrfs_update_pinned_extents(struct btrfs_root *root, 1602int btrfs_update_pinned_extents(struct btrfs_root *root,
1626 u64 bytenr, u64 num, int pin) 1603 u64 bytenr, u64 num, int pin)
1627{ 1604{
@@ -1639,29 +1616,20 @@ int btrfs_update_pinned_extents(struct btrfs_root *root,
1639 } 1616 }
1640 while (num > 0) { 1617 while (num > 0) {
1641 cache = btrfs_lookup_block_group(fs_info, bytenr); 1618 cache = btrfs_lookup_block_group(fs_info, bytenr);
1642 if (!cache) { 1619 BUG_ON(!cache);
1643 u64 first = first_logical_byte(root, bytenr); 1620 len = min(num, cache->key.offset -
1644 WARN_ON(first < bytenr); 1621 (bytenr - cache->key.objectid));
1645 len = min(first - bytenr, num);
1646 } else {
1647 len = min(num, cache->key.offset -
1648 (bytenr - cache->key.objectid));
1649 }
1650 if (pin) { 1622 if (pin) {
1651 if (cache) { 1623 spin_lock(&cache->lock);
1652 spin_lock(&cache->lock); 1624 cache->pinned += len;
1653 cache->pinned += len; 1625 cache->space_info->bytes_pinned += len;
1654 cache->space_info->bytes_pinned += len; 1626 spin_unlock(&cache->lock);
1655 spin_unlock(&cache->lock);
1656 }
1657 fs_info->total_pinned += len; 1627 fs_info->total_pinned += len;
1658 } else { 1628 } else {
1659 if (cache) { 1629 spin_lock(&cache->lock);
1660 spin_lock(&cache->lock); 1630 cache->pinned -= len;
1661 cache->pinned -= len; 1631 cache->space_info->bytes_pinned -= len;
1662 cache->space_info->bytes_pinned -= len; 1632 spin_unlock(&cache->lock);
1663 spin_unlock(&cache->lock);
1664 }
1665 fs_info->total_pinned -= len; 1633 fs_info->total_pinned -= len;
1666 } 1634 }
1667 bytenr += len; 1635 bytenr += len;
@@ -1670,6 +1638,36 @@ int btrfs_update_pinned_extents(struct btrfs_root *root,
1670 return 0; 1638 return 0;
1671} 1639}
1672 1640
1641static int update_reserved_extents(struct btrfs_root *root,
1642 u64 bytenr, u64 num, int reserve)
1643{
1644 u64 len;
1645 struct btrfs_block_group_cache *cache;
1646 struct btrfs_fs_info *fs_info = root->fs_info;
1647
1648 WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
1649 while (num > 0) {
1650 cache = btrfs_lookup_block_group(fs_info, bytenr);
1651 BUG_ON(!cache);
1652 len = min(num, cache->key.offset -
1653 (bytenr - cache->key.objectid));
1654 if (reserve) {
1655 spin_lock(&cache->lock);
1656 cache->reserved += len;
1657 cache->space_info->bytes_reserved += len;
1658 spin_unlock(&cache->lock);
1659 } else {
1660 spin_lock(&cache->lock);
1661 cache->reserved -= len;
1662 cache->space_info->bytes_reserved -= len;
1663 spin_unlock(&cache->lock);
1664 }
1665 bytenr += len;
1666 num -= len;
1667 }
1668 return 0;
1669}
1670
1673int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy) 1671int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
1674{ 1672{
1675 u64 last = 0; 1673 u64 last = 0;
@@ -2126,6 +2124,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
2126 cache = btrfs_lookup_block_group(root->fs_info, bytenr); 2124 cache = btrfs_lookup_block_group(root->fs_info, bytenr);
2127 BUG_ON(!cache); 2125 BUG_ON(!cache);
2128 btrfs_add_free_space(cache, bytenr, num_bytes); 2126 btrfs_add_free_space(cache, bytenr, num_bytes);
2127 update_reserved_extents(root, bytenr, num_bytes, 0);
2129 return 0; 2128 return 0;
2130 } 2129 }
2131 pin = 1; 2130 pin = 1;
@@ -2225,14 +2224,11 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans,
2225 search_start = max(search_start, first_logical_byte(root, 0)); 2224 search_start = max(search_start, first_logical_byte(root, 0));
2226 orig_search_start = search_start; 2225 orig_search_start = search_start;
2227 2226
2228 if (search_end == (u64)-1)
2229 search_end = btrfs_super_total_bytes(&info->super_copy);
2230
2231 search_start = max(search_start, hint_byte); 2227 search_start = max(search_start, hint_byte);
2232 total_needed += empty_size; 2228 total_needed += empty_size;
2233 2229
2234new_group: 2230new_group:
2235 block_group = btrfs_lookup_block_group(info, search_start); 2231 block_group = btrfs_lookup_first_block_group(info, search_start);
2236 2232
2237 /* 2233 /*
2238 * Ok this looks a little tricky, buts its really simple. First if we 2234 * Ok this looks a little tricky, buts its really simple. First if we
@@ -2257,12 +2253,8 @@ new_group:
2257 ret = do_chunk_alloc(trans, root, 2253 ret = do_chunk_alloc(trans, root,
2258 num_bytes + 2 * 1024 * 1024, 2254 num_bytes + 2 * 1024 * 1024,
2259 data, 1); 2255 data, 1);
2260 if (ret < 0) { 2256 if (ret < 0)
2261 struct btrfs_space_info *info;
2262
2263 info = __find_space_info(root->fs_info, data);
2264 goto error; 2257 goto error;
2265 }
2266 BUG_ON(ret); 2258 BUG_ON(ret);
2267 chunk_alloc_done = 1; 2259 chunk_alloc_done = 1;
2268 search_start = orig_search_start; 2260 search_start = orig_search_start;
@@ -2378,22 +2370,24 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes)
2378 struct list_head *l; 2370 struct list_head *l;
2379 2371
2380 printk(KERN_INFO "space_info has %Lu free, is %sfull\n", 2372 printk(KERN_INFO "space_info has %Lu free, is %sfull\n",
2381 info->total_bytes - info->bytes_used - info->bytes_pinned, 2373 info->total_bytes - info->bytes_used - info->bytes_pinned -
2382 (info->full) ? "" : "not "); 2374 info->bytes_reserved, (info->full) ? "" : "not ");
2383 2375
2384 spin_lock(&info->lock); 2376 spin_lock(&info->lock);
2385 list_for_each(l, &info->block_groups) { 2377 list_for_each(l, &info->block_groups) {
2386 cache = list_entry(l, struct btrfs_block_group_cache, list); 2378 cache = list_entry(l, struct btrfs_block_group_cache, list);
2387 spin_lock(&cache->lock); 2379 spin_lock(&cache->lock);
2388 printk(KERN_INFO "block group %Lu has %Lu bytes, %Lu used " 2380 printk(KERN_INFO "block group %Lu has %Lu bytes, %Lu used "
2389 "%Lu pinned\n", 2381 "%Lu pinned %Lu reserved\n",
2390 cache->key.objectid, cache->key.offset, 2382 cache->key.objectid, cache->key.offset,
2391 btrfs_block_group_used(&cache->item), cache->pinned); 2383 btrfs_block_group_used(&cache->item),
2384 cache->pinned, cache->reserved);
2392 btrfs_dump_free_space(cache, bytes); 2385 btrfs_dump_free_space(cache, bytes);
2393 spin_unlock(&cache->lock); 2386 spin_unlock(&cache->lock);
2394 } 2387 }
2395 spin_unlock(&info->lock); 2388 spin_unlock(&info->lock);
2396} 2389}
2390
2397static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, 2391static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans,
2398 struct btrfs_root *root, 2392 struct btrfs_root *root,
2399 u64 num_bytes, u64 min_alloc_size, 2393 u64 num_bytes, u64 min_alloc_size,
@@ -2500,6 +2494,7 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
2500 ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size, 2494 ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
2501 empty_size, hint_byte, search_end, ins, 2495 empty_size, hint_byte, search_end, ins,
2502 data); 2496 data);
2497 update_reserved_extents(root, ins->objectid, ins->offset, 1);
2503 maybe_unlock_mutex(root); 2498 maybe_unlock_mutex(root);
2504 return ret; 2499 return ret;
2505} 2500}
@@ -2625,6 +2620,7 @@ int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
2625 ret = __btrfs_alloc_reserved_extent(trans, root, parent, 2620 ret = __btrfs_alloc_reserved_extent(trans, root, parent,
2626 root_objectid, ref_generation, 2621 root_objectid, ref_generation,
2627 owner, owner_offset, ins); 2622 owner, owner_offset, ins);
2623 update_reserved_extents(root, ins->objectid, ins->offset, 0);
2628 maybe_unlock_mutex(root); 2624 maybe_unlock_mutex(root);
2629 return ret; 2625 return ret;
2630} 2626}
@@ -2685,6 +2681,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
2685 owner_objectid, owner_offset, ins); 2681 owner_objectid, owner_offset, ins);
2686 BUG_ON(ret); 2682 BUG_ON(ret);
2687 2683
2684 } else {
2685 update_reserved_extents(root, ins->objectid, ins->offset, 1);
2688 } 2686 }
2689 maybe_unlock_mutex(root); 2687 maybe_unlock_mutex(root);
2690 return ret; 2688 return ret;
@@ -3974,10 +3972,6 @@ int btrfs_read_block_groups(struct btrfs_root *root)
3974 3972
3975 ret = btrfs_add_block_group_cache(root->fs_info, cache); 3973 ret = btrfs_add_block_group_cache(root->fs_info, cache);
3976 BUG_ON(ret); 3974 BUG_ON(ret);
3977
3978 if (key.objectid >=
3979 btrfs_super_total_bytes(&info->super_copy))
3980 break;
3981 } 3975 }
3982 ret = 0; 3976 ret = 0;
3983error: 3977error: