diff options
author | Yan, Zheng <zheng.yan@oracle.com> | 2010-05-16 10:48:46 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2010-05-25 10:34:50 -0400 |
commit | a22285a6a32390195235171b89d157ed1a1fe932 (patch) | |
tree | 3fabc88a029e1af4f2fdcc708e7b62ef3cf3703a /fs/btrfs/extent-tree.c | |
parent | f0486c68e4bd9a06a5904d3eeb3a0d73a83befb8 (diff) |
Btrfs: Integrate metadata reservation with start_transaction
Besides simplify the code, this change makes sure all metadata
reservation for normal metadata operations are released after
committing transaction.
Changes since V1:
Add code that check if unlink and rmdir will free space.
Add ENOSPC handling for clone ioctl.
Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r-- | fs/btrfs/extent-tree.c | 283 |
1 files changed, 169 insertions, 114 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 3367278ac6a1..657df6e002d3 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -616,6 +616,113 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) | |||
616 | } | 616 | } |
617 | 617 | ||
618 | /* | 618 | /* |
619 | * helper function to lookup reference count and flags of extent. | ||
620 | * | ||
621 | * the head node for delayed ref is used to store the sum of all the | ||
622 | * reference count modifications queued up in the rbtree. the head | ||
623 | * node may also store the extent flags to set. This way you can check | ||
624 | * to see what the reference count and extent flags would be if all of | ||
625 | * the delayed refs are not processed. | ||
626 | */ | ||
627 | int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, | ||
628 | struct btrfs_root *root, u64 bytenr, | ||
629 | u64 num_bytes, u64 *refs, u64 *flags) | ||
630 | { | ||
631 | struct btrfs_delayed_ref_head *head; | ||
632 | struct btrfs_delayed_ref_root *delayed_refs; | ||
633 | struct btrfs_path *path; | ||
634 | struct btrfs_extent_item *ei; | ||
635 | struct extent_buffer *leaf; | ||
636 | struct btrfs_key key; | ||
637 | u32 item_size; | ||
638 | u64 num_refs; | ||
639 | u64 extent_flags; | ||
640 | int ret; | ||
641 | |||
642 | path = btrfs_alloc_path(); | ||
643 | if (!path) | ||
644 | return -ENOMEM; | ||
645 | |||
646 | key.objectid = bytenr; | ||
647 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
648 | key.offset = num_bytes; | ||
649 | if (!trans) { | ||
650 | path->skip_locking = 1; | ||
651 | path->search_commit_root = 1; | ||
652 | } | ||
653 | again: | ||
654 | ret = btrfs_search_slot(trans, root->fs_info->extent_root, | ||
655 | &key, path, 0, 0); | ||
656 | if (ret < 0) | ||
657 | goto out_free; | ||
658 | |||
659 | if (ret == 0) { | ||
660 | leaf = path->nodes[0]; | ||
661 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); | ||
662 | if (item_size >= sizeof(*ei)) { | ||
663 | ei = btrfs_item_ptr(leaf, path->slots[0], | ||
664 | struct btrfs_extent_item); | ||
665 | num_refs = btrfs_extent_refs(leaf, ei); | ||
666 | extent_flags = btrfs_extent_flags(leaf, ei); | ||
667 | } else { | ||
668 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | ||
669 | struct btrfs_extent_item_v0 *ei0; | ||
670 | BUG_ON(item_size != sizeof(*ei0)); | ||
671 | ei0 = btrfs_item_ptr(leaf, path->slots[0], | ||
672 | struct btrfs_extent_item_v0); | ||
673 | num_refs = btrfs_extent_refs_v0(leaf, ei0); | ||
674 | /* FIXME: this isn't correct for data */ | ||
675 | extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF; | ||
676 | #else | ||
677 | BUG(); | ||
678 | #endif | ||
679 | } | ||
680 | BUG_ON(num_refs == 0); | ||
681 | } else { | ||
682 | num_refs = 0; | ||
683 | extent_flags = 0; | ||
684 | ret = 0; | ||
685 | } | ||
686 | |||
687 | if (!trans) | ||
688 | goto out; | ||
689 | |||
690 | delayed_refs = &trans->transaction->delayed_refs; | ||
691 | spin_lock(&delayed_refs->lock); | ||
692 | head = btrfs_find_delayed_ref_head(trans, bytenr); | ||
693 | if (head) { | ||
694 | if (!mutex_trylock(&head->mutex)) { | ||
695 | atomic_inc(&head->node.refs); | ||
696 | spin_unlock(&delayed_refs->lock); | ||
697 | |||
698 | btrfs_release_path(root->fs_info->extent_root, path); | ||
699 | |||
700 | mutex_lock(&head->mutex); | ||
701 | mutex_unlock(&head->mutex); | ||
702 | btrfs_put_delayed_ref(&head->node); | ||
703 | goto again; | ||
704 | } | ||
705 | if (head->extent_op && head->extent_op->update_flags) | ||
706 | extent_flags |= head->extent_op->flags_to_set; | ||
707 | else | ||
708 | BUG_ON(num_refs == 0); | ||
709 | |||
710 | num_refs += head->node.ref_mod; | ||
711 | mutex_unlock(&head->mutex); | ||
712 | } | ||
713 | spin_unlock(&delayed_refs->lock); | ||
714 | out: | ||
715 | WARN_ON(num_refs == 0); | ||
716 | if (refs) | ||
717 | *refs = num_refs; | ||
718 | if (flags) | ||
719 | *flags = extent_flags; | ||
720 | out_free: | ||
721 | btrfs_free_path(path); | ||
722 | return ret; | ||
723 | } | ||
724 | |||
725 | /* | ||
619 | * Back reference rules. Back refs have three main goals: | 726 | * Back reference rules. Back refs have three main goals: |
620 | * | 727 | * |
621 | * 1) differentiate between all holders of references to an extent so that | 728 | * 1) differentiate between all holders of references to an extent so that |
@@ -2949,113 +3056,6 @@ again: | |||
2949 | } | 3056 | } |
2950 | 3057 | ||
2951 | /* | 3058 | /* |
2952 | * unreserve num_items number of items worth of metadata space. This needs to | ||
2953 | * be paired with btrfs_reserve_metadata_space. | ||
2954 | * | ||
2955 | * NOTE: if you have the option, run this _AFTER_ you do a | ||
2956 | * btrfs_end_transaction, since btrfs_end_transaction will run delayed ref | ||
2957 | * oprations which will result in more used metadata, so we want to make sure we | ||
2958 | * can do that without issue. | ||
2959 | */ | ||
2960 | int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items) | ||
2961 | { | ||
2962 | struct btrfs_fs_info *info = root->fs_info; | ||
2963 | struct btrfs_space_info *meta_sinfo; | ||
2964 | u64 num_bytes; | ||
2965 | u64 alloc_target; | ||
2966 | bool bug = false; | ||
2967 | |||
2968 | /* get the space info for where the metadata will live */ | ||
2969 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
2970 | meta_sinfo = __find_space_info(info, alloc_target); | ||
2971 | |||
2972 | num_bytes = calculate_bytes_needed(root, num_items); | ||
2973 | |||
2974 | spin_lock(&meta_sinfo->lock); | ||
2975 | if (meta_sinfo->bytes_may_use < num_bytes) { | ||
2976 | bug = true; | ||
2977 | meta_sinfo->bytes_may_use = 0; | ||
2978 | } else { | ||
2979 | meta_sinfo->bytes_may_use -= num_bytes; | ||
2980 | } | ||
2981 | spin_unlock(&meta_sinfo->lock); | ||
2982 | |||
2983 | BUG_ON(bug); | ||
2984 | |||
2985 | return 0; | ||
2986 | } | ||
2987 | |||
2988 | /* | ||
2989 | * Reserve some metadata space for use. We'll calculate the worste case number | ||
2990 | * of bytes that would be needed to modify num_items number of items. If we | ||
2991 | * have space, fantastic, if not, you get -ENOSPC. Please call | ||
2992 | * btrfs_unreserve_metadata_space when you are done for the _SAME_ number of | ||
2993 | * items you reserved, since whatever metadata you needed should have already | ||
2994 | * been allocated. | ||
2995 | * | ||
2996 | * This will commit the transaction to make more space if we don't have enough | ||
2997 | * metadata space. THe only time we don't do this is if we're reserving space | ||
2998 | * inside of a transaction, then we will just return -ENOSPC and it is the | ||
2999 | * callers responsibility to handle it properly. | ||
3000 | */ | ||
3001 | int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items) | ||
3002 | { | ||
3003 | struct btrfs_fs_info *info = root->fs_info; | ||
3004 | struct btrfs_space_info *meta_sinfo; | ||
3005 | u64 num_bytes; | ||
3006 | u64 used; | ||
3007 | u64 alloc_target; | ||
3008 | int retries = 0; | ||
3009 | |||
3010 | /* get the space info for where the metadata will live */ | ||
3011 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
3012 | meta_sinfo = __find_space_info(info, alloc_target); | ||
3013 | |||
3014 | num_bytes = calculate_bytes_needed(root, num_items); | ||
3015 | again: | ||
3016 | spin_lock(&meta_sinfo->lock); | ||
3017 | |||
3018 | if (unlikely(!meta_sinfo->bytes_root)) | ||
3019 | meta_sinfo->bytes_root = calculate_bytes_needed(root, 6); | ||
3020 | |||
3021 | if (!retries) | ||
3022 | meta_sinfo->bytes_may_use += num_bytes; | ||
3023 | |||
3024 | used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | ||
3025 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + | ||
3026 | meta_sinfo->bytes_super + meta_sinfo->bytes_root + | ||
3027 | meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc; | ||
3028 | |||
3029 | if (used > meta_sinfo->total_bytes) { | ||
3030 | retries++; | ||
3031 | if (retries == 1) { | ||
3032 | if (maybe_allocate_chunk(NULL, root, meta_sinfo, | ||
3033 | num_bytes)) | ||
3034 | goto again; | ||
3035 | retries++; | ||
3036 | } else { | ||
3037 | spin_unlock(&meta_sinfo->lock); | ||
3038 | } | ||
3039 | |||
3040 | if (retries == 2) { | ||
3041 | shrink_delalloc(NULL, root, meta_sinfo, num_bytes); | ||
3042 | goto again; | ||
3043 | } | ||
3044 | spin_lock(&meta_sinfo->lock); | ||
3045 | meta_sinfo->bytes_may_use -= num_bytes; | ||
3046 | spin_unlock(&meta_sinfo->lock); | ||
3047 | |||
3048 | dump_space_info(meta_sinfo, 0, 0); | ||
3049 | return -ENOSPC; | ||
3050 | } | ||
3051 | |||
3052 | check_force_delalloc(meta_sinfo); | ||
3053 | spin_unlock(&meta_sinfo->lock); | ||
3054 | |||
3055 | return 0; | ||
3056 | } | ||
3057 | |||
3058 | /* | ||
3059 | * This will check the space that the inode allocates from to make sure we have | 3059 | * This will check the space that the inode allocates from to make sure we have |
3060 | * enough space for bytes. | 3060 | * enough space for bytes. |
3061 | */ | 3061 | */ |
@@ -3095,9 +3095,9 @@ again: | |||
3095 | spin_unlock(&data_sinfo->lock); | 3095 | spin_unlock(&data_sinfo->lock); |
3096 | alloc: | 3096 | alloc: |
3097 | alloc_target = btrfs_get_alloc_profile(root, 1); | 3097 | alloc_target = btrfs_get_alloc_profile(root, 1); |
3098 | trans = btrfs_start_transaction(root, 1); | 3098 | trans = btrfs_join_transaction(root, 1); |
3099 | if (!trans) | 3099 | if (IS_ERR(trans)) |
3100 | return -ENOMEM; | 3100 | return PTR_ERR(trans); |
3101 | 3101 | ||
3102 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | 3102 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, |
3103 | bytes + 2 * 1024 * 1024, | 3103 | bytes + 2 * 1024 * 1024, |
@@ -3118,8 +3118,8 @@ alloc: | |||
3118 | if (!committed && !root->fs_info->open_ioctl_trans) { | 3118 | if (!committed && !root->fs_info->open_ioctl_trans) { |
3119 | committed = 1; | 3119 | committed = 1; |
3120 | trans = btrfs_join_transaction(root, 1); | 3120 | trans = btrfs_join_transaction(root, 1); |
3121 | if (!trans) | 3121 | if (IS_ERR(trans)) |
3122 | return -ENOMEM; | 3122 | return PTR_ERR(trans); |
3123 | ret = btrfs_commit_transaction(trans, root); | 3123 | ret = btrfs_commit_transaction(trans, root); |
3124 | if (ret) | 3124 | if (ret) |
3125 | return ret; | 3125 | return ret; |
@@ -3701,6 +3701,59 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info) | |||
3701 | fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv; | 3701 | fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv; |
3702 | } | 3702 | } |
3703 | 3703 | ||
3704 | static u64 calc_trans_metadata_size(struct btrfs_root *root, int num_items) | ||
3705 | { | ||
3706 | return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * | ||
3707 | 3 * num_items; | ||
3708 | } | ||
3709 | |||
3710 | int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, | ||
3711 | struct btrfs_root *root, | ||
3712 | int num_items, int *retries) | ||
3713 | { | ||
3714 | u64 num_bytes; | ||
3715 | int ret; | ||
3716 | |||
3717 | if (num_items == 0 || root->fs_info->chunk_root == root) | ||
3718 | return 0; | ||
3719 | |||
3720 | num_bytes = calc_trans_metadata_size(root, num_items); | ||
3721 | ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv, | ||
3722 | num_bytes, retries); | ||
3723 | if (!ret) { | ||
3724 | trans->bytes_reserved += num_bytes; | ||
3725 | trans->block_rsv = &root->fs_info->trans_block_rsv; | ||
3726 | } | ||
3727 | return ret; | ||
3728 | } | ||
3729 | |||
3730 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, | ||
3731 | struct btrfs_root *root) | ||
3732 | { | ||
3733 | if (!trans->bytes_reserved) | ||
3734 | return; | ||
3735 | |||
3736 | BUG_ON(trans->block_rsv != &root->fs_info->trans_block_rsv); | ||
3737 | btrfs_block_rsv_release(root, trans->block_rsv, | ||
3738 | trans->bytes_reserved); | ||
3739 | trans->bytes_reserved = 0; | ||
3740 | } | ||
3741 | |||
3742 | int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans, | ||
3743 | struct btrfs_pending_snapshot *pending) | ||
3744 | { | ||
3745 | struct btrfs_root *root = pending->root; | ||
3746 | struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root); | ||
3747 | struct btrfs_block_rsv *dst_rsv = &pending->block_rsv; | ||
3748 | /* | ||
3749 | * two for root back/forward refs, two for directory entries | ||
3750 | * and one for root of the snapshot. | ||
3751 | */ | ||
3752 | u64 num_bytes = calc_trans_metadata_size(root, 5); | ||
3753 | dst_rsv->space_info = src_rsv->space_info; | ||
3754 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); | ||
3755 | } | ||
3756 | |||
3704 | static int update_block_group(struct btrfs_trans_handle *trans, | 3757 | static int update_block_group(struct btrfs_trans_handle *trans, |
3705 | struct btrfs_root *root, | 3758 | struct btrfs_root *root, |
3706 | u64 bytenr, u64 num_bytes, int alloc) | 3759 | u64 bytenr, u64 num_bytes, int alloc) |
@@ -5824,7 +5877,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) | |||
5824 | wc = kzalloc(sizeof(*wc), GFP_NOFS); | 5877 | wc = kzalloc(sizeof(*wc), GFP_NOFS); |
5825 | BUG_ON(!wc); | 5878 | BUG_ON(!wc); |
5826 | 5879 | ||
5827 | trans = btrfs_start_transaction(tree_root, 1); | 5880 | trans = btrfs_start_transaction(tree_root, 0); |
5828 | 5881 | ||
5829 | if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { | 5882 | if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { |
5830 | level = btrfs_header_level(root->node); | 5883 | level = btrfs_header_level(root->node); |
@@ -5920,7 +5973,9 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) | |||
5920 | BUG_ON(ret); | 5973 | BUG_ON(ret); |
5921 | 5974 | ||
5922 | btrfs_end_transaction(trans, tree_root); | 5975 | btrfs_end_transaction(trans, tree_root); |
5923 | trans = btrfs_start_transaction(tree_root, 1); | 5976 | trans = btrfs_start_transaction(tree_root, 0); |
5977 | if (IS_ERR(trans)) | ||
5978 | return PTR_ERR(trans); | ||
5924 | } else { | 5979 | } else { |
5925 | unsigned long update; | 5980 | unsigned long update; |
5926 | update = trans->delayed_ref_updates; | 5981 | update = trans->delayed_ref_updates; |