aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
authorYan, Zheng <zheng.yan@oracle.com>2010-05-16 10:46:25 -0400
committerChris Mason <chris.mason@oracle.com>2010-05-25 10:34:50 -0400
commitf0486c68e4bd9a06a5904d3eeb3a0d73a83befb8 (patch)
tree509428ef400ef45e875a3c448b63b86cbea36aea /fs/btrfs/extent-tree.c
parent2ead6ae770d9f9dec9f4286bf0fd9001b4388c4b (diff)
Btrfs: Introduce contexts for metadata reservation
Introducing metadata reseravtion contexts has two major advantages. First, it makes metadata reseravtion more traceable. Second, it can reclaim freed space and re-add them to the itself after transaction committed. Besides add btrfs_block_rsv structure and related helper functions, This patch contains following changes: Move code that decides if freed tree block should be pinned into btrfs_free_tree_block(). Make space accounting more accurate, mainly for handling read only block groups. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c1029
1 files changed, 701 insertions, 328 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index f32b1618ee6d..3367278ac6a1 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -35,10 +35,9 @@
35 35
36static int update_block_group(struct btrfs_trans_handle *trans, 36static int update_block_group(struct btrfs_trans_handle *trans,
37 struct btrfs_root *root, 37 struct btrfs_root *root,
38 u64 bytenr, u64 num_bytes, int alloc, 38 u64 bytenr, u64 num_bytes, int alloc);
39 int mark_free); 39static int update_reserved_bytes(struct btrfs_block_group_cache *cache,
40static int update_reserved_extents(struct btrfs_block_group_cache *cache, 40 u64 num_bytes, int reserve, int sinfo);
41 u64 num_bytes, int reserve);
42static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 41static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
43 struct btrfs_root *root, 42 struct btrfs_root *root,
44 u64 bytenr, u64 num_bytes, u64 parent, 43 u64 bytenr, u64 num_bytes, u64 parent,
@@ -61,12 +60,6 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
61static int do_chunk_alloc(struct btrfs_trans_handle *trans, 60static int do_chunk_alloc(struct btrfs_trans_handle *trans,
62 struct btrfs_root *extent_root, u64 alloc_bytes, 61 struct btrfs_root *extent_root, u64 alloc_bytes,
63 u64 flags, int force); 62 u64 flags, int force);
64static int pin_down_bytes(struct btrfs_trans_handle *trans,
65 struct btrfs_root *root,
66 struct btrfs_path *path,
67 u64 bytenr, u64 num_bytes,
68 int is_data, int reserved,
69 struct extent_buffer **must_clean);
70static int find_next_key(struct btrfs_path *path, int level, 63static int find_next_key(struct btrfs_path *path, int level,
71 struct btrfs_key *key); 64 struct btrfs_key *key);
72static void dump_space_info(struct btrfs_space_info *info, u64 bytes, 65static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
@@ -97,8 +90,12 @@ void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
97 90
98void btrfs_put_block_group(struct btrfs_block_group_cache *cache) 91void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
99{ 92{
100 if (atomic_dec_and_test(&cache->count)) 93 if (atomic_dec_and_test(&cache->count)) {
94 WARN_ON(cache->pinned > 0);
95 WARN_ON(cache->reserved > 0);
96 WARN_ON(cache->reserved_pinned > 0);
101 kfree(cache); 97 kfree(cache);
98 }
102} 99}
103 100
104/* 101/*
@@ -325,7 +322,7 @@ static int caching_kthread(void *data)
325 322
326 exclude_super_stripes(extent_root, block_group); 323 exclude_super_stripes(extent_root, block_group);
327 spin_lock(&block_group->space_info->lock); 324 spin_lock(&block_group->space_info->lock);
328 block_group->space_info->bytes_super += block_group->bytes_super; 325 block_group->space_info->bytes_readonly += block_group->bytes_super;
329 spin_unlock(&block_group->space_info->lock); 326 spin_unlock(&block_group->space_info->lock);
330 327
331 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); 328 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
@@ -1880,7 +1877,6 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
1880 return ret; 1877 return ret;
1881} 1878}
1882 1879
1883
1884/* helper function to actually process a single delayed ref entry */ 1880/* helper function to actually process a single delayed ref entry */
1885static int run_one_delayed_ref(struct btrfs_trans_handle *trans, 1881static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
1886 struct btrfs_root *root, 1882 struct btrfs_root *root,
@@ -1900,32 +1896,14 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
1900 BUG_ON(extent_op); 1896 BUG_ON(extent_op);
1901 head = btrfs_delayed_node_to_head(node); 1897 head = btrfs_delayed_node_to_head(node);
1902 if (insert_reserved) { 1898 if (insert_reserved) {
1903 int mark_free = 0; 1899 btrfs_pin_extent(root, node->bytenr,
1904 struct extent_buffer *must_clean = NULL; 1900 node->num_bytes, 1);
1905
1906 ret = pin_down_bytes(trans, root, NULL,
1907 node->bytenr, node->num_bytes,
1908 head->is_data, 1, &must_clean);
1909 if (ret > 0)
1910 mark_free = 1;
1911
1912 if (must_clean) {
1913 clean_tree_block(NULL, root, must_clean);
1914 btrfs_tree_unlock(must_clean);
1915 free_extent_buffer(must_clean);
1916 }
1917 if (head->is_data) { 1901 if (head->is_data) {
1918 ret = btrfs_del_csums(trans, root, 1902 ret = btrfs_del_csums(trans, root,
1919 node->bytenr, 1903 node->bytenr,
1920 node->num_bytes); 1904 node->num_bytes);
1921 BUG_ON(ret); 1905 BUG_ON(ret);
1922 } 1906 }
1923 if (mark_free) {
1924 ret = btrfs_free_reserved_extent(root,
1925 node->bytenr,
1926 node->num_bytes);
1927 BUG_ON(ret);
1928 }
1929 } 1907 }
1930 mutex_unlock(&head->mutex); 1908 mutex_unlock(&head->mutex);
1931 return 0; 1909 return 0;
@@ -2356,6 +2334,8 @@ int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
2356 ret = 0; 2334 ret = 0;
2357out: 2335out:
2358 btrfs_free_path(path); 2336 btrfs_free_path(path);
2337 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
2338 WARN_ON(ret > 0);
2359 return ret; 2339 return ret;
2360} 2340}
2361 2341
@@ -2706,7 +2686,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
2706 found->bytes_pinned = 0; 2686 found->bytes_pinned = 0;
2707 found->bytes_reserved = 0; 2687 found->bytes_reserved = 0;
2708 found->bytes_readonly = 0; 2688 found->bytes_readonly = 0;
2709 found->bytes_delalloc = 0; 2689 found->bytes_may_use = 0;
2710 found->full = 0; 2690 found->full = 0;
2711 found->force_alloc = 0; 2691 found->force_alloc = 0;
2712 *space_info = found; 2692 *space_info = found;
@@ -2731,19 +2711,6 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
2731 } 2711 }
2732} 2712}
2733 2713
2734static void set_block_group_readonly(struct btrfs_block_group_cache *cache)
2735{
2736 spin_lock(&cache->space_info->lock);
2737 spin_lock(&cache->lock);
2738 if (!cache->ro) {
2739 cache->space_info->bytes_readonly += cache->key.offset -
2740 btrfs_block_group_used(&cache->item);
2741 cache->ro = 1;
2742 }
2743 spin_unlock(&cache->lock);
2744 spin_unlock(&cache->space_info->lock);
2745}
2746
2747u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) 2714u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
2748{ 2715{
2749 u64 num_devices = root->fs_info->fs_devices->rw_devices; 2716 u64 num_devices = root->fs_info->fs_devices->rw_devices;
@@ -2802,11 +2769,8 @@ static u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
2802 2769
2803void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode) 2770void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode)
2804{ 2771{
2805 u64 alloc_target;
2806
2807 alloc_target = btrfs_get_alloc_profile(root, 1);
2808 BTRFS_I(inode)->space_info = __find_space_info(root->fs_info, 2772 BTRFS_I(inode)->space_info = __find_space_info(root->fs_info,
2809 alloc_target); 2773 BTRFS_BLOCK_GROUP_DATA);
2810} 2774}
2811 2775
2812static u64 calculate_bytes_needed(struct btrfs_root *root, int num_items) 2776static u64 calculate_bytes_needed(struct btrfs_root *root, int num_items)
@@ -3412,10 +3376,334 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3412 return reclaimed >= to_reclaim; 3376 return reclaimed >= to_reclaim;
3413} 3377}
3414 3378
3379static int should_retry_reserve(struct btrfs_trans_handle *trans,
3380 struct btrfs_root *root,
3381 struct btrfs_block_rsv *block_rsv,
3382 u64 num_bytes, int *retries)
3383{
3384 struct btrfs_space_info *space_info = block_rsv->space_info;
3385 int ret;
3386
3387 if ((*retries) > 2)
3388 return -ENOSPC;
3389
3390 ret = maybe_allocate_chunk(trans, root, space_info, num_bytes);
3391 if (ret)
3392 return 1;
3393
3394 if (trans && trans->transaction->in_commit)
3395 return -ENOSPC;
3396
3397 ret = shrink_delalloc(trans, root, space_info, num_bytes);
3398 if (ret)
3399 return ret;
3400
3401 spin_lock(&space_info->lock);
3402 if (space_info->bytes_pinned < num_bytes)
3403 ret = 1;
3404 spin_unlock(&space_info->lock);
3405 if (ret)
3406 return -ENOSPC;
3407
3408 (*retries)++;
3409
3410 if (trans)
3411 return -EAGAIN;
3412
3413 trans = btrfs_join_transaction(root, 1);
3414 BUG_ON(IS_ERR(trans));
3415 ret = btrfs_commit_transaction(trans, root);
3416 BUG_ON(ret);
3417
3418 return 1;
3419}
3420
3421static int reserve_metadata_bytes(struct btrfs_block_rsv *block_rsv,
3422 u64 num_bytes)
3423{
3424 struct btrfs_space_info *space_info = block_rsv->space_info;
3425 u64 unused;
3426 int ret = -ENOSPC;
3427
3428 spin_lock(&space_info->lock);
3429 unused = space_info->bytes_used + space_info->bytes_reserved +
3430 space_info->bytes_pinned + space_info->bytes_readonly;
3431
3432 if (unused < space_info->total_bytes)
3433 unused = space_info->total_bytes - unused;
3434 else
3435 unused = 0;
3436
3437 if (unused >= num_bytes) {
3438 if (block_rsv->priority >= 10) {
3439 space_info->bytes_reserved += num_bytes;
3440 ret = 0;
3441 } else {
3442 if ((unused + block_rsv->reserved) *
3443 block_rsv->priority >=
3444 (num_bytes + block_rsv->reserved) * 10) {
3445 space_info->bytes_reserved += num_bytes;
3446 ret = 0;
3447 }
3448 }
3449 }
3450 spin_unlock(&space_info->lock);
3451
3452 return ret;
3453}
3454
3455static struct btrfs_block_rsv *get_block_rsv(struct btrfs_trans_handle *trans,
3456 struct btrfs_root *root)
3457{
3458 struct btrfs_block_rsv *block_rsv;
3459 if (root->ref_cows)
3460 block_rsv = trans->block_rsv;
3461 else
3462 block_rsv = root->block_rsv;
3463
3464 if (!block_rsv)
3465 block_rsv = &root->fs_info->empty_block_rsv;
3466
3467 return block_rsv;
3468}
3469
3470static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
3471 u64 num_bytes)
3472{
3473 int ret = -ENOSPC;
3474 spin_lock(&block_rsv->lock);
3475 if (block_rsv->reserved >= num_bytes) {
3476 block_rsv->reserved -= num_bytes;
3477 if (block_rsv->reserved < block_rsv->size)
3478 block_rsv->full = 0;
3479 ret = 0;
3480 }
3481 spin_unlock(&block_rsv->lock);
3482 return ret;
3483}
3484
3485static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
3486 u64 num_bytes, int update_size)
3487{
3488 spin_lock(&block_rsv->lock);
3489 block_rsv->reserved += num_bytes;
3490 if (update_size)
3491 block_rsv->size += num_bytes;
3492 else if (block_rsv->reserved >= block_rsv->size)
3493 block_rsv->full = 1;
3494 spin_unlock(&block_rsv->lock);
3495}
3496
3497void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv,
3498 struct btrfs_block_rsv *dest, u64 num_bytes)
3499{
3500 struct btrfs_space_info *space_info = block_rsv->space_info;
3501
3502 spin_lock(&block_rsv->lock);
3503 if (num_bytes == (u64)-1)
3504 num_bytes = block_rsv->size;
3505 block_rsv->size -= num_bytes;
3506 if (block_rsv->reserved >= block_rsv->size) {
3507 num_bytes = block_rsv->reserved - block_rsv->size;
3508 block_rsv->reserved = block_rsv->size;
3509 block_rsv->full = 1;
3510 } else {
3511 num_bytes = 0;
3512 }
3513 spin_unlock(&block_rsv->lock);
3514
3515 if (num_bytes > 0) {
3516 if (dest) {
3517 block_rsv_add_bytes(dest, num_bytes, 0);
3518 } else {
3519 spin_lock(&space_info->lock);
3520 space_info->bytes_reserved -= num_bytes;
3521 spin_unlock(&space_info->lock);
3522 }
3523 }
3524}
3525
3526static int block_rsv_migrate_bytes(struct btrfs_block_rsv *src,
3527 struct btrfs_block_rsv *dst, u64 num_bytes)
3528{
3529 int ret;
3530
3531 ret = block_rsv_use_bytes(src, num_bytes);
3532 if (ret)
3533 return ret;
3534
3535 block_rsv_add_bytes(dst, num_bytes, 1);
3536 return 0;
3537}
3538
3539void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv)
3540{
3541 memset(rsv, 0, sizeof(*rsv));
3542 spin_lock_init(&rsv->lock);
3543 atomic_set(&rsv->usage, 1);
3544 rsv->priority = 6;
3545 INIT_LIST_HEAD(&rsv->list);
3546}
3547
3548struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root)
3549{
3550 struct btrfs_block_rsv *block_rsv;
3551 struct btrfs_fs_info *fs_info = root->fs_info;
3552 u64 alloc_target;
3553
3554 block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS);
3555 if (!block_rsv)
3556 return NULL;
3557
3558 btrfs_init_block_rsv(block_rsv);
3559
3560 alloc_target = btrfs_get_alloc_profile(root, 0);
3561 block_rsv->space_info = __find_space_info(fs_info,
3562 BTRFS_BLOCK_GROUP_METADATA);
3563
3564 return block_rsv;
3565}
3566
3567void btrfs_free_block_rsv(struct btrfs_root *root,
3568 struct btrfs_block_rsv *rsv)
3569{
3570 if (rsv && atomic_dec_and_test(&rsv->usage)) {
3571 btrfs_block_rsv_release(root, rsv, (u64)-1);
3572 if (!rsv->durable)
3573 kfree(rsv);
3574 }
3575}
3576
3577/*
3578 * make the block_rsv struct be able to capture freed space.
3579 * the captured space will re-add to the the block_rsv struct
3580 * after transaction commit
3581 */
3582void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info,
3583 struct btrfs_block_rsv *block_rsv)
3584{
3585 block_rsv->durable = 1;
3586 mutex_lock(&fs_info->durable_block_rsv_mutex);
3587 list_add_tail(&block_rsv->list, &fs_info->durable_block_rsv_list);
3588 mutex_unlock(&fs_info->durable_block_rsv_mutex);
3589}
3590
3591int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
3592 struct btrfs_root *root,
3593 struct btrfs_block_rsv *block_rsv,
3594 u64 num_bytes, int *retries)
3595{
3596 int ret;
3597
3598 if (num_bytes == 0)
3599 return 0;
3600again:
3601 ret = reserve_metadata_bytes(block_rsv, num_bytes);
3602 if (!ret) {
3603 block_rsv_add_bytes(block_rsv, num_bytes, 1);
3604 return 0;
3605 }
3606
3607 ret = should_retry_reserve(trans, root, block_rsv, num_bytes, retries);
3608 if (ret > 0)
3609 goto again;
3610
3611 return ret;
3612}
3613
3614int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
3615 struct btrfs_root *root,
3616 struct btrfs_block_rsv *block_rsv,
3617 u64 min_reserved, int min_factor)
3618{
3619 u64 num_bytes = 0;
3620 int commit_trans = 0;
3621 int ret = -ENOSPC;
3622
3623 if (!block_rsv)
3624 return 0;
3625
3626 spin_lock(&block_rsv->lock);
3627 if (min_factor > 0)
3628 num_bytes = div_factor(block_rsv->size, min_factor);
3629 if (min_reserved > num_bytes)
3630 num_bytes = min_reserved;
3631
3632 if (block_rsv->reserved >= num_bytes) {
3633 ret = 0;
3634 } else {
3635 num_bytes -= block_rsv->reserved;
3636 if (block_rsv->durable &&
3637 block_rsv->freed[0] + block_rsv->freed[1] >= num_bytes)
3638 commit_trans = 1;
3639 }
3640 spin_unlock(&block_rsv->lock);
3641 if (!ret)
3642 return 0;
3643
3644 if (block_rsv->refill_used) {
3645 ret = reserve_metadata_bytes(block_rsv, num_bytes);
3646 if (!ret) {
3647 block_rsv_add_bytes(block_rsv, num_bytes, 0);
3648 return 0;
3649 }
3650 }
3651
3652 if (commit_trans) {
3653 if (trans)
3654 return -EAGAIN;
3655
3656 trans = btrfs_join_transaction(root, 1);
3657 BUG_ON(IS_ERR(trans));
3658 ret = btrfs_commit_transaction(trans, root);
3659 return 0;
3660 }
3661
3662 WARN_ON(1);
3663 printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n",
3664 block_rsv->size, block_rsv->reserved,
3665 block_rsv->freed[0], block_rsv->freed[1]);
3666
3667 return -ENOSPC;
3668}
3669
3670int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
3671 struct btrfs_block_rsv *dst_rsv,
3672 u64 num_bytes)
3673{
3674 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
3675}
3676
3677void btrfs_block_rsv_release(struct btrfs_root *root,
3678 struct btrfs_block_rsv *block_rsv,
3679 u64 num_bytes)
3680{
3681 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
3682 if (global_rsv->full || global_rsv == block_rsv ||
3683 block_rsv->space_info != global_rsv->space_info)
3684 global_rsv = NULL;
3685 block_rsv_release_bytes(block_rsv, global_rsv, num_bytes);
3686}
3687
3688static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
3689{
3690 struct btrfs_space_info *space_info;
3691
3692 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
3693 fs_info->chunk_block_rsv.space_info = space_info;
3694 fs_info->chunk_block_rsv.priority = 10;
3695
3696 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
3697 fs_info->trans_block_rsv.space_info = space_info;
3698 fs_info->empty_block_rsv.space_info = space_info;
3699 fs_info->empty_block_rsv.priority = 10;
3700
3701 fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv;
3702}
3703
3415static int update_block_group(struct btrfs_trans_handle *trans, 3704static int update_block_group(struct btrfs_trans_handle *trans,
3416 struct btrfs_root *root, 3705 struct btrfs_root *root,
3417 u64 bytenr, u64 num_bytes, int alloc, 3706 u64 bytenr, u64 num_bytes, int alloc)
3418 int mark_free)
3419{ 3707{
3420 struct btrfs_block_group_cache *cache; 3708 struct btrfs_block_group_cache *cache;
3421 struct btrfs_fs_info *info = root->fs_info; 3709 struct btrfs_fs_info *info = root->fs_info;
@@ -3459,30 +3747,21 @@ static int update_block_group(struct btrfs_trans_handle *trans,
3459 cache->space_info->bytes_reserved -= num_bytes; 3747 cache->space_info->bytes_reserved -= num_bytes;
3460 cache->space_info->bytes_used += num_bytes; 3748 cache->space_info->bytes_used += num_bytes;
3461 cache->space_info->disk_used += num_bytes * factor; 3749 cache->space_info->disk_used += num_bytes * factor;
3462 if (cache->ro)
3463 cache->space_info->bytes_readonly -= num_bytes;
3464 spin_unlock(&cache->lock); 3750 spin_unlock(&cache->lock);
3465 spin_unlock(&cache->space_info->lock); 3751 spin_unlock(&cache->space_info->lock);
3466 } else { 3752 } else {
3467 old_val -= num_bytes; 3753 old_val -= num_bytes;
3468 btrfs_set_block_group_used(&cache->item, old_val); 3754 btrfs_set_block_group_used(&cache->item, old_val);
3755 cache->pinned += num_bytes;
3756 cache->space_info->bytes_pinned += num_bytes;
3469 cache->space_info->bytes_used -= num_bytes; 3757 cache->space_info->bytes_used -= num_bytes;
3470 cache->space_info->disk_used -= num_bytes * factor; 3758 cache->space_info->disk_used -= num_bytes * factor;
3471 if (cache->ro)
3472 cache->space_info->bytes_readonly += num_bytes;
3473 spin_unlock(&cache->lock); 3759 spin_unlock(&cache->lock);
3474 spin_unlock(&cache->space_info->lock); 3760 spin_unlock(&cache->space_info->lock);
3475 if (mark_free) {
3476 int ret;
3477 3761
3478 ret = btrfs_discard_extent(root, bytenr, 3762 set_extent_dirty(info->pinned_extents,
3479 num_bytes); 3763 bytenr, bytenr + num_bytes - 1,
3480 WARN_ON(ret); 3764 GFP_NOFS | __GFP_NOFAIL);
3481
3482 ret = btrfs_add_free_space(cache, bytenr,
3483 num_bytes);
3484 WARN_ON(ret);
3485 }
3486 } 3765 }
3487 btrfs_put_block_group(cache); 3766 btrfs_put_block_group(cache);
3488 total -= num_bytes; 3767 total -= num_bytes;
@@ -3506,18 +3785,10 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
3506 return bytenr; 3785 return bytenr;
3507} 3786}
3508 3787
3509/* 3788static int pin_down_extent(struct btrfs_root *root,
3510 * this function must be called within transaction 3789 struct btrfs_block_group_cache *cache,
3511 */ 3790 u64 bytenr, u64 num_bytes, int reserved)
3512int btrfs_pin_extent(struct btrfs_root *root,
3513 u64 bytenr, u64 num_bytes, int reserved)
3514{ 3791{
3515 struct btrfs_fs_info *fs_info = root->fs_info;
3516 struct btrfs_block_group_cache *cache;
3517
3518 cache = btrfs_lookup_block_group(fs_info, bytenr);
3519 BUG_ON(!cache);
3520
3521 spin_lock(&cache->space_info->lock); 3792 spin_lock(&cache->space_info->lock);
3522 spin_lock(&cache->lock); 3793 spin_lock(&cache->lock);
3523 cache->pinned += num_bytes; 3794 cache->pinned += num_bytes;
@@ -3529,28 +3800,68 @@ int btrfs_pin_extent(struct btrfs_root *root,
3529 spin_unlock(&cache->lock); 3800 spin_unlock(&cache->lock);
3530 spin_unlock(&cache->space_info->lock); 3801 spin_unlock(&cache->space_info->lock);
3531 3802
3532 btrfs_put_block_group(cache); 3803 set_extent_dirty(root->fs_info->pinned_extents, bytenr,
3804 bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
3805 return 0;
3806}
3533 3807
3534 set_extent_dirty(fs_info->pinned_extents, 3808/*
3535 bytenr, bytenr + num_bytes - 1, GFP_NOFS); 3809 * this function must be called within transaction
3810 */
3811int btrfs_pin_extent(struct btrfs_root *root,
3812 u64 bytenr, u64 num_bytes, int reserved)
3813{
3814 struct btrfs_block_group_cache *cache;
3815
3816 cache = btrfs_lookup_block_group(root->fs_info, bytenr);
3817 BUG_ON(!cache);
3818
3819 pin_down_extent(root, cache, bytenr, num_bytes, reserved);
3820
3821 btrfs_put_block_group(cache);
3536 return 0; 3822 return 0;
3537} 3823}
3538 3824
3539static int update_reserved_extents(struct btrfs_block_group_cache *cache, 3825/*
3540 u64 num_bytes, int reserve) 3826 * update size of reserved extents. this function may return -EAGAIN
3827 * if 'reserve' is true or 'sinfo' is false.
3828 */
3829static int update_reserved_bytes(struct btrfs_block_group_cache *cache,
3830 u64 num_bytes, int reserve, int sinfo)
3541{ 3831{
3542 spin_lock(&cache->space_info->lock); 3832 int ret = 0;
3543 spin_lock(&cache->lock); 3833 if (sinfo) {
3544 if (reserve) { 3834 struct btrfs_space_info *space_info = cache->space_info;
3545 cache->reserved += num_bytes; 3835 spin_lock(&space_info->lock);
3546 cache->space_info->bytes_reserved += num_bytes; 3836 spin_lock(&cache->lock);
3837 if (reserve) {
3838 if (cache->ro) {
3839 ret = -EAGAIN;
3840 } else {
3841 cache->reserved += num_bytes;
3842 space_info->bytes_reserved += num_bytes;
3843 }
3844 } else {
3845 if (cache->ro)
3846 space_info->bytes_readonly += num_bytes;
3847 cache->reserved -= num_bytes;
3848 space_info->bytes_reserved -= num_bytes;
3849 }
3850 spin_unlock(&cache->lock);
3851 spin_unlock(&space_info->lock);
3547 } else { 3852 } else {
3548 cache->reserved -= num_bytes; 3853 spin_lock(&cache->lock);
3549 cache->space_info->bytes_reserved -= num_bytes; 3854 if (cache->ro) {
3855 ret = -EAGAIN;
3856 } else {
3857 if (reserve)
3858 cache->reserved += num_bytes;
3859 else
3860 cache->reserved -= num_bytes;
3861 }
3862 spin_unlock(&cache->lock);
3550 } 3863 }
3551 spin_unlock(&cache->lock); 3864 return ret;
3552 spin_unlock(&cache->space_info->lock);
3553 return 0;
3554} 3865}
3555 3866
3556int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, 3867int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
@@ -3607,14 +3918,21 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
3607 btrfs_add_free_space(cache, start, len); 3918 btrfs_add_free_space(cache, start, len);
3608 } 3919 }
3609 3920
3921 start += len;
3922
3610 spin_lock(&cache->space_info->lock); 3923 spin_lock(&cache->space_info->lock);
3611 spin_lock(&cache->lock); 3924 spin_lock(&cache->lock);
3612 cache->pinned -= len; 3925 cache->pinned -= len;
3613 cache->space_info->bytes_pinned -= len; 3926 cache->space_info->bytes_pinned -= len;
3927 if (cache->ro) {
3928 cache->space_info->bytes_readonly += len;
3929 } else if (cache->reserved_pinned > 0) {
3930 len = min(len, cache->reserved_pinned);
3931 cache->reserved_pinned -= len;
3932 cache->space_info->bytes_reserved += len;
3933 }
3614 spin_unlock(&cache->lock); 3934 spin_unlock(&cache->lock);
3615 spin_unlock(&cache->space_info->lock); 3935 spin_unlock(&cache->space_info->lock);
3616
3617 start += len;
3618 } 3936 }
3619 3937
3620 if (cache) 3938 if (cache)
@@ -3627,8 +3945,11 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
3627{ 3945{
3628 struct btrfs_fs_info *fs_info = root->fs_info; 3946 struct btrfs_fs_info *fs_info = root->fs_info;
3629 struct extent_io_tree *unpin; 3947 struct extent_io_tree *unpin;
3948 struct btrfs_block_rsv *block_rsv;
3949 struct btrfs_block_rsv *next_rsv;
3630 u64 start; 3950 u64 start;
3631 u64 end; 3951 u64 end;
3952 int idx;
3632 int ret; 3953 int ret;
3633 3954
3634 if (fs_info->pinned_extents == &fs_info->freed_extents[0]) 3955 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
@@ -3649,59 +3970,30 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
3649 cond_resched(); 3970 cond_resched();
3650 } 3971 }
3651 3972
3652 return ret; 3973 mutex_lock(&fs_info->durable_block_rsv_mutex);
3653} 3974 list_for_each_entry_safe(block_rsv, next_rsv,
3654 3975 &fs_info->durable_block_rsv_list, list) {
3655static int pin_down_bytes(struct btrfs_trans_handle *trans,
3656 struct btrfs_root *root,
3657 struct btrfs_path *path,
3658 u64 bytenr, u64 num_bytes,
3659 int is_data, int reserved,
3660 struct extent_buffer **must_clean)
3661{
3662 int err = 0;
3663 struct extent_buffer *buf;
3664
3665 if (is_data)
3666 goto pinit;
3667 3976
3668 /* 3977 idx = trans->transid & 0x1;
3669 * discard is sloooow, and so triggering discards on 3978 if (block_rsv->freed[idx] > 0) {
3670 * individual btree blocks isn't a good plan. Just 3979 block_rsv_add_bytes(block_rsv,
3671 * pin everything in discard mode. 3980 block_rsv->freed[idx], 0);
3672 */ 3981 block_rsv->freed[idx] = 0;
3673 if (btrfs_test_opt(root, DISCARD)) 3982 }
3674 goto pinit; 3983 if (atomic_read(&block_rsv->usage) == 0) {
3675 3984 btrfs_block_rsv_release(root, block_rsv, (u64)-1);
3676 buf = btrfs_find_tree_block(root, bytenr, num_bytes);
3677 if (!buf)
3678 goto pinit;
3679 3985
3680 /* we can reuse a block if it hasn't been written 3986 if (block_rsv->freed[0] == 0 &&
3681 * and it is from this transaction. We can't 3987 block_rsv->freed[1] == 0) {
3682 * reuse anything from the tree log root because 3988 list_del_init(&block_rsv->list);
3683 * it has tiny sub-transactions. 3989 kfree(block_rsv);
3684 */ 3990 }
3685 if (btrfs_buffer_uptodate(buf, 0) && 3991 } else {
3686 btrfs_try_tree_lock(buf)) { 3992 btrfs_block_rsv_release(root, block_rsv, 0);
3687 u64 header_owner = btrfs_header_owner(buf);
3688 u64 header_transid = btrfs_header_generation(buf);
3689 if (header_owner != BTRFS_TREE_LOG_OBJECTID &&
3690 header_transid == trans->transid &&
3691 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
3692 *must_clean = buf;
3693 return 1;
3694 } 3993 }
3695 btrfs_tree_unlock(buf);
3696 } 3994 }
3697 free_extent_buffer(buf); 3995 mutex_unlock(&fs_info->durable_block_rsv_mutex);
3698pinit:
3699 if (path)
3700 btrfs_set_path_blocking(path);
3701 /* unlocks the pinned mutex */
3702 btrfs_pin_extent(root, bytenr, num_bytes, reserved);
3703 3996
3704 BUG_ON(err < 0);
3705 return 0; 3997 return 0;
3706} 3998}
3707 3999
@@ -3862,9 +4154,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
3862 BUG_ON(ret); 4154 BUG_ON(ret);
3863 } 4155 }
3864 } else { 4156 } else {
3865 int mark_free = 0;
3866 struct extent_buffer *must_clean = NULL;
3867
3868 if (found_extent) { 4157 if (found_extent) {
3869 BUG_ON(is_data && refs_to_drop != 4158 BUG_ON(is_data && refs_to_drop !=
3870 extent_data_ref_count(root, path, iref)); 4159 extent_data_ref_count(root, path, iref));
@@ -3877,31 +4166,11 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
3877 } 4166 }
3878 } 4167 }
3879 4168
3880 ret = pin_down_bytes(trans, root, path, bytenr,
3881 num_bytes, is_data, 0, &must_clean);
3882 if (ret > 0)
3883 mark_free = 1;
3884 BUG_ON(ret < 0);
3885 /*
3886 * it is going to be very rare for someone to be waiting
3887 * on the block we're freeing. del_items might need to
3888 * schedule, so rather than get fancy, just force it
3889 * to blocking here
3890 */
3891 if (must_clean)
3892 btrfs_set_lock_blocking(must_clean);
3893
3894 ret = btrfs_del_items(trans, extent_root, path, path->slots[0], 4169 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
3895 num_to_del); 4170 num_to_del);
3896 BUG_ON(ret); 4171 BUG_ON(ret);
3897 btrfs_release_path(extent_root, path); 4172 btrfs_release_path(extent_root, path);
3898 4173
3899 if (must_clean) {
3900 clean_tree_block(NULL, root, must_clean);
3901 btrfs_tree_unlock(must_clean);
3902 free_extent_buffer(must_clean);
3903 }
3904
3905 if (is_data) { 4174 if (is_data) {
3906 ret = btrfs_del_csums(trans, root, bytenr, num_bytes); 4175 ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
3907 BUG_ON(ret); 4176 BUG_ON(ret);
@@ -3911,8 +4180,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
3911 (bytenr + num_bytes - 1) >> PAGE_CACHE_SHIFT); 4180 (bytenr + num_bytes - 1) >> PAGE_CACHE_SHIFT);
3912 } 4181 }
3913 4182
3914 ret = update_block_group(trans, root, bytenr, num_bytes, 0, 4183 ret = update_block_group(trans, root, bytenr, num_bytes, 0);
3915 mark_free);
3916 BUG_ON(ret); 4184 BUG_ON(ret);
3917 } 4185 }
3918 btrfs_free_path(path); 4186 btrfs_free_path(path);
@@ -3920,7 +4188,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
3920} 4188}
3921 4189
3922/* 4190/*
3923 * when we free an extent, it is possible (and likely) that we free the last 4191 * when we free an block, it is possible (and likely) that we free the last
3924 * delayed ref for that extent as well. This searches the delayed ref tree for 4192 * delayed ref for that extent as well. This searches the delayed ref tree for
3925 * a given extent, and if there are no other delayed refs to be processed, it 4193 * a given extent, and if there are no other delayed refs to be processed, it
3926 * removes it from the tree. 4194 * removes it from the tree.
@@ -3932,7 +4200,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
3932 struct btrfs_delayed_ref_root *delayed_refs; 4200 struct btrfs_delayed_ref_root *delayed_refs;
3933 struct btrfs_delayed_ref_node *ref; 4201 struct btrfs_delayed_ref_node *ref;
3934 struct rb_node *node; 4202 struct rb_node *node;
3935 int ret; 4203 int ret = 0;
3936 4204
3937 delayed_refs = &trans->transaction->delayed_refs; 4205 delayed_refs = &trans->transaction->delayed_refs;
3938 spin_lock(&delayed_refs->lock); 4206 spin_lock(&delayed_refs->lock);
@@ -3984,17 +4252,99 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
3984 list_del_init(&head->cluster); 4252 list_del_init(&head->cluster);
3985 spin_unlock(&delayed_refs->lock); 4253 spin_unlock(&delayed_refs->lock);
3986 4254
3987 ret = run_one_delayed_ref(trans, root->fs_info->tree_root, 4255 BUG_ON(head->extent_op);
3988 &head->node, head->extent_op, 4256 if (head->must_insert_reserved)
3989 head->must_insert_reserved); 4257 ret = 1;
3990 BUG_ON(ret); 4258
4259 mutex_unlock(&head->mutex);
3991 btrfs_put_delayed_ref(&head->node); 4260 btrfs_put_delayed_ref(&head->node);
3992 return 0; 4261 return ret;
3993out: 4262out:
3994 spin_unlock(&delayed_refs->lock); 4263 spin_unlock(&delayed_refs->lock);
3995 return 0; 4264 return 0;
3996} 4265}
3997 4266
4267void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
4268 struct btrfs_root *root,
4269 struct extent_buffer *buf,
4270 u64 parent, int last_ref)
4271{
4272 struct btrfs_block_rsv *block_rsv;
4273 struct btrfs_block_group_cache *cache = NULL;
4274 int ret;
4275
4276 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
4277 ret = btrfs_add_delayed_tree_ref(trans, buf->start, buf->len,
4278 parent, root->root_key.objectid,
4279 btrfs_header_level(buf),
4280 BTRFS_DROP_DELAYED_REF, NULL);
4281 BUG_ON(ret);
4282 }
4283
4284 if (!last_ref)
4285 return;
4286
4287 block_rsv = get_block_rsv(trans, root);
4288 cache = btrfs_lookup_block_group(root->fs_info, buf->start);
4289 BUG_ON(block_rsv->space_info != cache->space_info);
4290
4291 if (btrfs_header_generation(buf) == trans->transid) {
4292 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
4293 ret = check_ref_cleanup(trans, root, buf->start);
4294 if (!ret)
4295 goto pin;
4296 }
4297
4298 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
4299 pin_down_extent(root, cache, buf->start, buf->len, 1);
4300 goto pin;
4301 }
4302
4303 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
4304
4305 btrfs_add_free_space(cache, buf->start, buf->len);
4306 ret = update_reserved_bytes(cache, buf->len, 0, 0);
4307 if (ret == -EAGAIN) {
4308 /* block group became read-only */
4309 update_reserved_bytes(cache, buf->len, 0, 1);
4310 goto out;
4311 }
4312
4313 ret = 1;
4314 spin_lock(&block_rsv->lock);
4315 if (block_rsv->reserved < block_rsv->size) {
4316 block_rsv->reserved += buf->len;
4317 ret = 0;
4318 }
4319 spin_unlock(&block_rsv->lock);
4320
4321 if (ret) {
4322 spin_lock(&cache->space_info->lock);
4323 cache->space_info->bytes_reserved -= buf->len;
4324 spin_unlock(&cache->space_info->lock);
4325 }
4326 goto out;
4327 }
4328pin:
4329 if (block_rsv->durable && !cache->ro) {
4330 ret = 0;
4331 spin_lock(&cache->lock);
4332 if (!cache->ro) {
4333 cache->reserved_pinned += buf->len;
4334 ret = 1;
4335 }
4336 spin_unlock(&cache->lock);
4337
4338 if (ret) {
4339 spin_lock(&block_rsv->lock);
4340 block_rsv->freed[trans->transid & 0x1] += buf->len;
4341 spin_unlock(&block_rsv->lock);
4342 }
4343 }
4344out:
4345 btrfs_put_block_group(cache);
4346}
4347
3998int btrfs_free_extent(struct btrfs_trans_handle *trans, 4348int btrfs_free_extent(struct btrfs_trans_handle *trans,
3999 struct btrfs_root *root, 4349 struct btrfs_root *root,
4000 u64 bytenr, u64 num_bytes, u64 parent, 4350 u64 bytenr, u64 num_bytes, u64 parent,
@@ -4016,8 +4366,6 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
4016 parent, root_objectid, (int)owner, 4366 parent, root_objectid, (int)owner,
4017 BTRFS_DROP_DELAYED_REF, NULL); 4367 BTRFS_DROP_DELAYED_REF, NULL);
4018 BUG_ON(ret); 4368 BUG_ON(ret);
4019 ret = check_ref_cleanup(trans, root, bytenr);
4020 BUG_ON(ret);
4021 } else { 4369 } else {
4022 ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes, 4370 ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes,
4023 parent, root_objectid, owner, 4371 parent, root_objectid, owner,
@@ -4027,21 +4375,6 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
4027 return ret; 4375 return ret;
4028} 4376}
4029 4377
4030int btrfs_free_tree_block(struct btrfs_trans_handle *trans,
4031 struct btrfs_root *root,
4032 u64 bytenr, u32 blocksize,
4033 u64 parent, u64 root_objectid, int level)
4034{
4035 u64 used;
4036 spin_lock(&root->node_lock);
4037 used = btrfs_root_used(&root->root_item) - blocksize;
4038 btrfs_set_root_used(&root->root_item, used);
4039 spin_unlock(&root->node_lock);
4040
4041 return btrfs_free_extent(trans, root, bytenr, blocksize,
4042 parent, root_objectid, level, 0);
4043}
4044
4045static u64 stripe_align(struct btrfs_root *root, u64 val) 4378static u64 stripe_align(struct btrfs_root *root, u64 val)
4046{ 4379{
4047 u64 mask = ((u64)root->stripesize - 1); 4380 u64 mask = ((u64)root->stripesize - 1);
@@ -4131,7 +4464,6 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
4131 u64 num_bytes, u64 empty_size, 4464 u64 num_bytes, u64 empty_size,
4132 u64 search_start, u64 search_end, 4465 u64 search_start, u64 search_end,
4133 u64 hint_byte, struct btrfs_key *ins, 4466 u64 hint_byte, struct btrfs_key *ins,
4134 u64 exclude_start, u64 exclude_nr,
4135 int data) 4467 int data)
4136{ 4468{
4137 int ret = 0; 4469 int ret = 0;
@@ -4143,8 +4475,8 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
4143 int done_chunk_alloc = 0; 4475 int done_chunk_alloc = 0;
4144 struct btrfs_space_info *space_info; 4476 struct btrfs_space_info *space_info;
4145 int last_ptr_loop = 0; 4477 int last_ptr_loop = 0;
4146 int index = 0;
4147 int loop = 0; 4478 int loop = 0;
4479 int index = 0;
4148 bool found_uncached_bg = false; 4480 bool found_uncached_bg = false;
4149 bool failed_cluster_refill = false; 4481 bool failed_cluster_refill = false;
4150 bool failed_alloc = false; 4482 bool failed_alloc = false;
@@ -4415,23 +4747,22 @@ checks:
4415 goto loop; 4747 goto loop;
4416 } 4748 }
4417 4749
4418 if (exclude_nr > 0 && 4750 ins->objectid = search_start;
4419 (search_start + num_bytes > exclude_start && 4751 ins->offset = num_bytes;
4420 search_start < exclude_start + exclude_nr)) {
4421 search_start = exclude_start + exclude_nr;
4422 4752
4753 if (offset < search_start)
4754 btrfs_add_free_space(block_group, offset,
4755 search_start - offset);
4756 BUG_ON(offset > search_start);
4757
4758 ret = update_reserved_bytes(block_group, num_bytes, 1,
4759 (data & BTRFS_BLOCK_GROUP_DATA));
4760 if (ret == -EAGAIN) {
4423 btrfs_add_free_space(block_group, offset, num_bytes); 4761 btrfs_add_free_space(block_group, offset, num_bytes);
4424 /*
4425 * if search_start is still in this block group
4426 * then we just re-search this block group
4427 */
4428 if (search_start >= block_group->key.objectid &&
4429 search_start < (block_group->key.objectid +
4430 block_group->key.offset))
4431 goto have_block_group;
4432 goto loop; 4762 goto loop;
4433 } 4763 }
4434 4764
4765 /* we are all good, lets return */
4435 ins->objectid = search_start; 4766 ins->objectid = search_start;
4436 ins->offset = num_bytes; 4767 ins->offset = num_bytes;
4437 4768
@@ -4439,10 +4770,6 @@ checks:
4439 btrfs_add_free_space(block_group, offset, 4770 btrfs_add_free_space(block_group, offset,
4440 search_start - offset); 4771 search_start - offset);
4441 BUG_ON(offset > search_start); 4772 BUG_ON(offset > search_start);
4442
4443 update_reserved_extents(block_group, num_bytes, 1);
4444
4445 /* we are all good, lets return */
4446 break; 4773 break;
4447loop: 4774loop:
4448 failed_cluster_refill = false; 4775 failed_cluster_refill = false;
@@ -4616,9 +4943,8 @@ again:
4616 4943
4617 WARN_ON(num_bytes < root->sectorsize); 4944 WARN_ON(num_bytes < root->sectorsize);
4618 ret = find_free_extent(trans, root, num_bytes, empty_size, 4945 ret = find_free_extent(trans, root, num_bytes, empty_size,
4619 search_start, search_end, hint_byte, ins, 4946 search_start, search_end, hint_byte,
4620 trans->alloc_exclude_start, 4947 ins, data);
4621 trans->alloc_exclude_nr, data);
4622 4948
4623 if (ret == -ENOSPC && num_bytes > min_alloc_size) { 4949 if (ret == -ENOSPC && num_bytes > min_alloc_size) {
4624 num_bytes = num_bytes >> 1; 4950 num_bytes = num_bytes >> 1;
@@ -4656,7 +4982,7 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
4656 ret = btrfs_discard_extent(root, start, len); 4982 ret = btrfs_discard_extent(root, start, len);
4657 4983
4658 btrfs_add_free_space(cache, start, len); 4984 btrfs_add_free_space(cache, start, len);
4659 update_reserved_extents(cache, len, 0); 4985 update_reserved_bytes(cache, len, 0, 1);
4660 btrfs_put_block_group(cache); 4986 btrfs_put_block_group(cache);
4661 4987
4662 return ret; 4988 return ret;
@@ -4719,8 +5045,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
4719 btrfs_mark_buffer_dirty(path->nodes[0]); 5045 btrfs_mark_buffer_dirty(path->nodes[0]);
4720 btrfs_free_path(path); 5046 btrfs_free_path(path);
4721 5047
4722 ret = update_block_group(trans, root, ins->objectid, ins->offset, 5048 ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
4723 1, 0);
4724 if (ret) { 5049 if (ret) {
4725 printk(KERN_ERR "btrfs update block group failed for %llu " 5050 printk(KERN_ERR "btrfs update block group failed for %llu "
4726 "%llu\n", (unsigned long long)ins->objectid, 5051 "%llu\n", (unsigned long long)ins->objectid,
@@ -4780,8 +5105,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
4780 btrfs_mark_buffer_dirty(leaf); 5105 btrfs_mark_buffer_dirty(leaf);
4781 btrfs_free_path(path); 5106 btrfs_free_path(path);
4782 5107
4783 ret = update_block_group(trans, root, ins->objectid, ins->offset, 5108 ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
4784 1, 0);
4785 if (ret) { 5109 if (ret) {
4786 printk(KERN_ERR "btrfs update block group failed for %llu " 5110 printk(KERN_ERR "btrfs update block group failed for %llu "
4787 "%llu\n", (unsigned long long)ins->objectid, 5111 "%llu\n", (unsigned long long)ins->objectid,
@@ -4857,73 +5181,14 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
4857 put_caching_control(caching_ctl); 5181 put_caching_control(caching_ctl);
4858 } 5182 }
4859 5183
4860 update_reserved_extents(block_group, ins->offset, 1); 5184 ret = update_reserved_bytes(block_group, ins->offset, 1, 1);
5185 BUG_ON(ret);
4861 btrfs_put_block_group(block_group); 5186 btrfs_put_block_group(block_group);
4862 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, 5187 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
4863 0, owner, offset, ins, 1); 5188 0, owner, offset, ins, 1);
4864 return ret; 5189 return ret;
4865} 5190}
4866 5191
4867/*
4868 * finds a free extent and does all the dirty work required for allocation
4869 * returns the key for the extent through ins, and a tree buffer for
4870 * the first block of the extent through buf.
4871 *
4872 * returns 0 if everything worked, non-zero otherwise.
4873 */
4874static int alloc_tree_block(struct btrfs_trans_handle *trans,
4875 struct btrfs_root *root,
4876 u64 num_bytes, u64 parent, u64 root_objectid,
4877 struct btrfs_disk_key *key, int level,
4878 u64 empty_size, u64 hint_byte, u64 search_end,
4879 struct btrfs_key *ins)
4880{
4881 int ret;
4882 u64 flags = 0;
4883
4884 ret = btrfs_reserve_extent(trans, root, num_bytes, num_bytes,
4885 empty_size, hint_byte, search_end,
4886 ins, 0);
4887 if (ret)
4888 return ret;
4889
4890 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
4891 if (parent == 0)
4892 parent = ins->objectid;
4893 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
4894 } else
4895 BUG_ON(parent > 0);
4896
4897 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
4898 struct btrfs_delayed_extent_op *extent_op;
4899 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
4900 BUG_ON(!extent_op);
4901 if (key)
4902 memcpy(&extent_op->key, key, sizeof(extent_op->key));
4903 else
4904 memset(&extent_op->key, 0, sizeof(extent_op->key));
4905 extent_op->flags_to_set = flags;
4906 extent_op->update_key = 1;
4907 extent_op->update_flags = 1;
4908 extent_op->is_data = 0;
4909
4910 ret = btrfs_add_delayed_tree_ref(trans, ins->objectid,
4911 ins->offset, parent, root_objectid,
4912 level, BTRFS_ADD_DELAYED_EXTENT,
4913 extent_op);
4914 BUG_ON(ret);
4915 }
4916
4917 if (root_objectid == root->root_key.objectid) {
4918 u64 used;
4919 spin_lock(&root->node_lock);
4920 used = btrfs_root_used(&root->root_item) + num_bytes;
4921 btrfs_set_root_used(&root->root_item, used);
4922 spin_unlock(&root->node_lock);
4923 }
4924 return ret;
4925}
4926
4927struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, 5192struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
4928 struct btrfs_root *root, 5193 struct btrfs_root *root,
4929 u64 bytenr, u32 blocksize, 5194 u64 bytenr, u32 blocksize,
@@ -4962,8 +5227,45 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
4962 return buf; 5227 return buf;
4963} 5228}
4964 5229
5230static struct btrfs_block_rsv *
5231use_block_rsv(struct btrfs_trans_handle *trans,
5232 struct btrfs_root *root, u32 blocksize)
5233{
5234 struct btrfs_block_rsv *block_rsv;
5235 int ret;
5236
5237 block_rsv = get_block_rsv(trans, root);
5238
5239 if (block_rsv->size == 0) {
5240 ret = reserve_metadata_bytes(block_rsv, blocksize);
5241 if (ret)
5242 return ERR_PTR(ret);
5243 return block_rsv;
5244 }
5245
5246 ret = block_rsv_use_bytes(block_rsv, blocksize);
5247 if (!ret)
5248 return block_rsv;
5249
5250 WARN_ON(1);
5251 printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n",
5252 block_rsv->size, block_rsv->reserved,
5253 block_rsv->freed[0], block_rsv->freed[1]);
5254
5255 return ERR_PTR(-ENOSPC);
5256}
5257
5258static void unuse_block_rsv(struct btrfs_block_rsv *block_rsv, u32 blocksize)
5259{
5260 block_rsv_add_bytes(block_rsv, blocksize, 0);
5261 block_rsv_release_bytes(block_rsv, NULL, 0);
5262}
5263
4965/* 5264/*
4966 * helper function to allocate a block for a given tree 5265 * finds a free extent and does all the dirty work required for allocation
5266 * returns the key for the extent through ins, and a tree buffer for
5267 * the first block of the extent through buf.
5268 *
4967 * returns the tree buffer or NULL. 5269 * returns the tree buffer or NULL.
4968 */ 5270 */
4969struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, 5271struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
@@ -4973,18 +5275,53 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
4973 u64 hint, u64 empty_size) 5275 u64 hint, u64 empty_size)
4974{ 5276{
4975 struct btrfs_key ins; 5277 struct btrfs_key ins;
4976 int ret; 5278 struct btrfs_block_rsv *block_rsv;
4977 struct extent_buffer *buf; 5279 struct extent_buffer *buf;
5280 u64 flags = 0;
5281 int ret;
5282
4978 5283
4979 ret = alloc_tree_block(trans, root, blocksize, parent, root_objectid, 5284 block_rsv = use_block_rsv(trans, root, blocksize);
4980 key, level, empty_size, hint, (u64)-1, &ins); 5285 if (IS_ERR(block_rsv))
5286 return ERR_CAST(block_rsv);
5287
5288 ret = btrfs_reserve_extent(trans, root, blocksize, blocksize,
5289 empty_size, hint, (u64)-1, &ins, 0);
4981 if (ret) { 5290 if (ret) {
4982 BUG_ON(ret > 0); 5291 unuse_block_rsv(block_rsv, blocksize);
4983 return ERR_PTR(ret); 5292 return ERR_PTR(ret);
4984 } 5293 }
4985 5294
4986 buf = btrfs_init_new_buffer(trans, root, ins.objectid, 5295 buf = btrfs_init_new_buffer(trans, root, ins.objectid,
4987 blocksize, level); 5296 blocksize, level);
5297 BUG_ON(IS_ERR(buf));
5298
5299 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
5300 if (parent == 0)
5301 parent = ins.objectid;
5302 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5303 } else
5304 BUG_ON(parent > 0);
5305
5306 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
5307 struct btrfs_delayed_extent_op *extent_op;
5308 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
5309 BUG_ON(!extent_op);
5310 if (key)
5311 memcpy(&extent_op->key, key, sizeof(extent_op->key));
5312 else
5313 memset(&extent_op->key, 0, sizeof(extent_op->key));
5314 extent_op->flags_to_set = flags;
5315 extent_op->update_key = 1;
5316 extent_op->update_flags = 1;
5317 extent_op->is_data = 0;
5318
5319 ret = btrfs_add_delayed_tree_ref(trans, ins.objectid,
5320 ins.offset, parent, root_objectid,
5321 level, BTRFS_ADD_DELAYED_EXTENT,
5322 extent_op);
5323 BUG_ON(ret);
5324 }
4988 return buf; 5325 return buf;
4989} 5326}
4990 5327
@@ -5309,7 +5646,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
5309 struct btrfs_path *path, 5646 struct btrfs_path *path,
5310 struct walk_control *wc) 5647 struct walk_control *wc)
5311{ 5648{
5312 int ret = 0; 5649 int ret;
5313 int level = wc->level; 5650 int level = wc->level;
5314 struct extent_buffer *eb = path->nodes[level]; 5651 struct extent_buffer *eb = path->nodes[level];
5315 u64 parent = 0; 5652 u64 parent = 0;
@@ -5387,13 +5724,11 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
5387 btrfs_header_owner(path->nodes[level + 1])); 5724 btrfs_header_owner(path->nodes[level + 1]));
5388 } 5725 }
5389 5726
5390 ret = btrfs_free_extent(trans, root, eb->start, eb->len, parent, 5727 btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1);
5391 root->root_key.objectid, level, 0);
5392 BUG_ON(ret);
5393out: 5728out:
5394 wc->refs[level] = 0; 5729 wc->refs[level] = 0;
5395 wc->flags[level] = 0; 5730 wc->flags[level] = 0;
5396 return ret; 5731 return 0;
5397} 5732}
5398 5733
5399static noinline int walk_down_tree(struct btrfs_trans_handle *trans, 5734static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
@@ -7216,48 +7551,80 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
7216 return flags; 7551 return flags;
7217} 7552}
7218 7553
7219static int __alloc_chunk_for_shrink(struct btrfs_root *root, 7554static int set_block_group_ro(struct btrfs_block_group_cache *cache)
7220 struct btrfs_block_group_cache *shrink_block_group,
7221 int force)
7222{ 7555{
7223 struct btrfs_trans_handle *trans; 7556 struct btrfs_space_info *sinfo = cache->space_info;
7224 u64 new_alloc_flags; 7557 u64 num_bytes;
7225 u64 calc; 7558 int ret = -ENOSPC;
7226 7559
7227 spin_lock(&shrink_block_group->lock); 7560 if (cache->ro)
7228 if (btrfs_block_group_used(&shrink_block_group->item) + 7561 return 0;
7229 shrink_block_group->reserved > 0) {
7230 spin_unlock(&shrink_block_group->lock);
7231 7562
7232 trans = btrfs_start_transaction(root, 1); 7563 spin_lock(&sinfo->lock);
7233 spin_lock(&shrink_block_group->lock); 7564 spin_lock(&cache->lock);
7565 num_bytes = cache->key.offset - cache->reserved - cache->pinned -
7566 cache->bytes_super - btrfs_block_group_used(&cache->item);
7567
7568 if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned +
7569 sinfo->bytes_may_use + sinfo->bytes_readonly +
7570 cache->reserved_pinned + num_bytes < sinfo->total_bytes) {
7571 sinfo->bytes_readonly += num_bytes;
7572 sinfo->bytes_reserved += cache->reserved_pinned;
7573 cache->reserved_pinned = 0;
7574 cache->ro = 1;
7575 ret = 0;
7576 }
7577 spin_unlock(&cache->lock);
7578 spin_unlock(&sinfo->lock);
7579 return ret;
7580}
7234 7581
7235 new_alloc_flags = update_block_group_flags(root, 7582int btrfs_set_block_group_ro(struct btrfs_root *root,
7236 shrink_block_group->flags); 7583 struct btrfs_block_group_cache *cache)
7237 if (new_alloc_flags != shrink_block_group->flags) {
7238 calc =
7239 btrfs_block_group_used(&shrink_block_group->item);
7240 } else {
7241 calc = shrink_block_group->key.offset;
7242 }
7243 spin_unlock(&shrink_block_group->lock);
7244 7584
7245 do_chunk_alloc(trans, root->fs_info->extent_root, 7585{
7246 calc + 2 * 1024 * 1024, new_alloc_flags, force); 7586 struct btrfs_trans_handle *trans;
7587 u64 alloc_flags;
7588 int ret;
7247 7589
7248 btrfs_end_transaction(trans, root); 7590 BUG_ON(cache->ro);
7249 } else
7250 spin_unlock(&shrink_block_group->lock);
7251 return 0;
7252}
7253 7591
7592 trans = btrfs_join_transaction(root, 1);
7593 BUG_ON(IS_ERR(trans));
7254 7594
7255int btrfs_prepare_block_group_relocation(struct btrfs_root *root, 7595 alloc_flags = update_block_group_flags(root, cache->flags);
7256 struct btrfs_block_group_cache *group) 7596 if (alloc_flags != cache->flags)
7597 do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
7257 7598
7599 ret = set_block_group_ro(cache);
7600 if (!ret)
7601 goto out;
7602 alloc_flags = get_alloc_profile(root, cache->space_info->flags);
7603 ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
7604 if (ret < 0)
7605 goto out;
7606 ret = set_block_group_ro(cache);
7607out:
7608 btrfs_end_transaction(trans, root);
7609 return ret;
7610}
7611
7612int btrfs_set_block_group_rw(struct btrfs_root *root,
7613 struct btrfs_block_group_cache *cache)
7258{ 7614{
7259 __alloc_chunk_for_shrink(root, group, 1); 7615 struct btrfs_space_info *sinfo = cache->space_info;
7260 set_block_group_readonly(group); 7616 u64 num_bytes;
7617
7618 BUG_ON(!cache->ro);
7619
7620 spin_lock(&sinfo->lock);
7621 spin_lock(&cache->lock);
7622 num_bytes = cache->key.offset - cache->reserved - cache->pinned -
7623 cache->bytes_super - btrfs_block_group_used(&cache->item);
7624 sinfo->bytes_readonly -= num_bytes;
7625 cache->ro = 0;
7626 spin_unlock(&cache->lock);
7627 spin_unlock(&sinfo->lock);
7261 return 0; 7628 return 0;
7262} 7629}
7263 7630
@@ -7428,7 +7795,11 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
7428 space_info = list_entry(info->space_info.next, 7795 space_info = list_entry(info->space_info.next,
7429 struct btrfs_space_info, 7796 struct btrfs_space_info,
7430 list); 7797 list);
7431 7798 if (space_info->bytes_pinned > 0 ||
7799 space_info->bytes_reserved > 0) {
7800 WARN_ON(1);
7801 dump_space_info(space_info, 0, 0);
7802 }
7432 list_del(&space_info->list); 7803 list_del(&space_info->list);
7433 kfree(space_info); 7804 kfree(space_info);
7434 } 7805 }
@@ -7476,7 +7847,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7476 cache = kzalloc(sizeof(*cache), GFP_NOFS); 7847 cache = kzalloc(sizeof(*cache), GFP_NOFS);
7477 if (!cache) { 7848 if (!cache) {
7478 ret = -ENOMEM; 7849 ret = -ENOMEM;
7479 break; 7850 goto error;
7480 } 7851 }
7481 7852
7482 atomic_set(&cache->count, 1); 7853 atomic_set(&cache->count, 1);
@@ -7533,7 +7904,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7533 BUG_ON(ret); 7904 BUG_ON(ret);
7534 cache->space_info = space_info; 7905 cache->space_info = space_info;
7535 spin_lock(&cache->space_info->lock); 7906 spin_lock(&cache->space_info->lock);
7536 cache->space_info->bytes_super += cache->bytes_super; 7907 cache->space_info->bytes_readonly += cache->bytes_super;
7537 spin_unlock(&cache->space_info->lock); 7908 spin_unlock(&cache->space_info->lock);
7538 7909
7539 __link_block_group(space_info, cache); 7910 __link_block_group(space_info, cache);
@@ -7543,7 +7914,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7543 7914
7544 set_avail_alloc_bits(root->fs_info, cache->flags); 7915 set_avail_alloc_bits(root->fs_info, cache->flags);
7545 if (btrfs_chunk_readonly(root, cache->key.objectid)) 7916 if (btrfs_chunk_readonly(root, cache->key.objectid))
7546 set_block_group_readonly(cache); 7917 set_block_group_ro(cache);
7547 } 7918 }
7548 7919
7549 list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) { 7920 list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) {
@@ -7557,10 +7928,12 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7557 * mirrored block groups. 7928 * mirrored block groups.
7558 */ 7929 */
7559 list_for_each_entry(cache, &space_info->block_groups[3], list) 7930 list_for_each_entry(cache, &space_info->block_groups[3], list)
7560 set_block_group_readonly(cache); 7931 set_block_group_ro(cache);
7561 list_for_each_entry(cache, &space_info->block_groups[4], list) 7932 list_for_each_entry(cache, &space_info->block_groups[4], list)
7562 set_block_group_readonly(cache); 7933 set_block_group_ro(cache);
7563 } 7934 }
7935
7936 init_global_block_rsv(info);
7564 ret = 0; 7937 ret = 0;
7565error: 7938error:
7566 btrfs_free_path(path); 7939 btrfs_free_path(path);
@@ -7621,7 +7994,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
7621 BUG_ON(ret); 7994 BUG_ON(ret);
7622 7995
7623 spin_lock(&cache->space_info->lock); 7996 spin_lock(&cache->space_info->lock);
7624 cache->space_info->bytes_super += cache->bytes_super; 7997 cache->space_info->bytes_readonly += cache->bytes_super;
7625 spin_unlock(&cache->space_info->lock); 7998 spin_unlock(&cache->space_info->lock);
7626 7999
7627 __link_block_group(cache->space_info, cache); 8000 __link_block_group(cache->space_info, cache);