diff options
author | Josef Bacik <jbacik@redhat.com> | 2008-11-12 14:19:50 -0500 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2008-11-12 14:19:50 -0500 |
commit | f3465ca44e2a51fd647c167045768a8ab5a96603 (patch) | |
tree | 3d08ed21a29374321469d4f43391fec7f34d8214 /fs/btrfs/ctree.c | |
parent | c5c9cd4d1b827fe545ed2a945e91e3a6909f3886 (diff) |
Btrfs: batch extent inserts/updates/deletions on the extent root
While profiling the allocator I noticed a good amount of time was being spent in
finish_current_insert and del_pending_extents, and as the filesystem filled up
more and more time was being spent in those functions. This patch aims to try
and reduce that problem. This happens two ways
1) track if we tried to delete an extent that we are going to update or insert.
Once we get into finish_current_insert we discard any of the extents that were
marked for deletion. This saves us from doing unnecessary work almost every
time finish_current_insert runs.
2) Batch insertion/updates/deletions. Instead of doing a btrfs_search_slot for
each individual extent and doing the needed operation, we instead keep the leaf
around and see if there is anything else we can do on that leaf. On the insert
case I introduced a btrfs_insert_some_items, which will take an array of keys
with an array of data_sizes and try and squeeze in as many of those keys as
possible, and then return how many keys it was able to insert. In the update
case we search for an extent ref, update the ref and then loop through the leaf
to see if any of the other refs we are looking to update are on that leaf, and
then once we are done we release the path and search for the next ref we need to
update. And finally for the deletion we try and delete the extent+ref in pairs,
so we will try to find extent+ref pairs next to the extent we are trying to free
and free them in bulk if possible.
This along with the other cluster fix that Chris pushed out a bit ago helps make
the allocator preform more uniformly as it fills up the disk. There is still a
slight drop as we fill up the disk since we start having to stick new blocks in
odd places which results in more COW's than on a empty fs, but the drop is not
nearly as severe as it was before.
Signed-off-by: Josef Bacik <jbacik@redhat.com>
Diffstat (limited to 'fs/btrfs/ctree.c')
-rw-r--r-- | fs/btrfs/ctree.c | 170 |
1 files changed, 170 insertions, 0 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index ac61c50a3311..8bb452456d90 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -431,6 +431,25 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) | |||
431 | return 0; | 431 | return 0; |
432 | } | 432 | } |
433 | 433 | ||
434 | /* | ||
435 | * same as comp_keys only with two btrfs_key's | ||
436 | */ | ||
437 | static int comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2) | ||
438 | { | ||
439 | if (k1->objectid > k2->objectid) | ||
440 | return 1; | ||
441 | if (k1->objectid < k2->objectid) | ||
442 | return -1; | ||
443 | if (k1->type > k2->type) | ||
444 | return 1; | ||
445 | if (k1->type < k2->type) | ||
446 | return -1; | ||
447 | if (k1->offset > k2->offset) | ||
448 | return 1; | ||
449 | if (k1->offset < k2->offset) | ||
450 | return -1; | ||
451 | return 0; | ||
452 | } | ||
434 | 453 | ||
435 | /* | 454 | /* |
436 | * this is used by the defrag code to go through all the | 455 | * this is used by the defrag code to go through all the |
@@ -3002,6 +3021,157 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, | |||
3002 | /* | 3021 | /* |
3003 | * Given a key and some data, insert items into the tree. | 3022 | * Given a key and some data, insert items into the tree. |
3004 | * This does all the path init required, making room in the tree if needed. | 3023 | * This does all the path init required, making room in the tree if needed. |
3024 | * Returns the number of keys that were inserted. | ||
3025 | */ | ||
3026 | int btrfs_insert_some_items(struct btrfs_trans_handle *trans, | ||
3027 | struct btrfs_root *root, | ||
3028 | struct btrfs_path *path, | ||
3029 | struct btrfs_key *cpu_key, u32 *data_size, | ||
3030 | int nr) | ||
3031 | { | ||
3032 | struct extent_buffer *leaf; | ||
3033 | struct btrfs_item *item; | ||
3034 | int ret = 0; | ||
3035 | int slot; | ||
3036 | int slot_orig; | ||
3037 | int i; | ||
3038 | u32 nritems; | ||
3039 | u32 total_data = 0; | ||
3040 | u32 total_size = 0; | ||
3041 | unsigned int data_end; | ||
3042 | struct btrfs_disk_key disk_key; | ||
3043 | struct btrfs_key found_key; | ||
3044 | |||
3045 | found_key.objectid = 0; | ||
3046 | nr = min_t(int, nr, BTRFS_NODEPTRS_PER_BLOCK(root)); | ||
3047 | |||
3048 | for (i = 0; i < nr; i++) | ||
3049 | total_data += data_size[i]; | ||
3050 | |||
3051 | total_data = min_t(u32, total_data, BTRFS_LEAF_DATA_SIZE(root)); | ||
3052 | total_size = total_data + (nr * sizeof(struct btrfs_item)); | ||
3053 | ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1); | ||
3054 | if (ret == 0) | ||
3055 | return -EEXIST; | ||
3056 | if (ret < 0) | ||
3057 | goto out; | ||
3058 | |||
3059 | slot_orig = path->slots[0]; | ||
3060 | leaf = path->nodes[0]; | ||
3061 | |||
3062 | nritems = btrfs_header_nritems(leaf); | ||
3063 | data_end = leaf_data_end(root, leaf); | ||
3064 | |||
3065 | if (btrfs_leaf_free_space(root, leaf) < total_size) { | ||
3066 | for (i = nr; i >= 0; i--) { | ||
3067 | total_data -= data_size[i]; | ||
3068 | total_size -= data_size[i] + sizeof(struct btrfs_item); | ||
3069 | if (total_size < btrfs_leaf_free_space(root, leaf)) | ||
3070 | break; | ||
3071 | } | ||
3072 | nr = i; | ||
3073 | } | ||
3074 | |||
3075 | slot = path->slots[0]; | ||
3076 | BUG_ON(slot < 0); | ||
3077 | |||
3078 | if (slot != nritems) { | ||
3079 | unsigned int old_data = btrfs_item_end_nr(leaf, slot); | ||
3080 | |||
3081 | item = btrfs_item_nr(leaf, slot); | ||
3082 | btrfs_item_key_to_cpu(leaf, &found_key, slot); | ||
3083 | |||
3084 | /* figure out how many keys we can insert in here */ | ||
3085 | total_data = data_size[0]; | ||
3086 | for (i = 1; i < nr; i++) { | ||
3087 | if (comp_cpu_keys(&found_key, cpu_key + i) <= 0) | ||
3088 | break; | ||
3089 | total_data += data_size[i]; | ||
3090 | } | ||
3091 | nr = i; | ||
3092 | |||
3093 | if (old_data < data_end) { | ||
3094 | btrfs_print_leaf(root, leaf); | ||
3095 | printk("slot %d old_data %d data_end %d\n", | ||
3096 | slot, old_data, data_end); | ||
3097 | BUG_ON(1); | ||
3098 | } | ||
3099 | /* | ||
3100 | * item0..itemN ... dataN.offset..dataN.size .. data0.size | ||
3101 | */ | ||
3102 | /* first correct the data pointers */ | ||
3103 | WARN_ON(leaf->map_token); | ||
3104 | for (i = slot; i < nritems; i++) { | ||
3105 | u32 ioff; | ||
3106 | |||
3107 | item = btrfs_item_nr(leaf, i); | ||
3108 | if (!leaf->map_token) { | ||
3109 | map_extent_buffer(leaf, (unsigned long)item, | ||
3110 | sizeof(struct btrfs_item), | ||
3111 | &leaf->map_token, &leaf->kaddr, | ||
3112 | &leaf->map_start, &leaf->map_len, | ||
3113 | KM_USER1); | ||
3114 | } | ||
3115 | |||
3116 | ioff = btrfs_item_offset(leaf, item); | ||
3117 | btrfs_set_item_offset(leaf, item, ioff - total_data); | ||
3118 | } | ||
3119 | if (leaf->map_token) { | ||
3120 | unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); | ||
3121 | leaf->map_token = NULL; | ||
3122 | } | ||
3123 | |||
3124 | /* shift the items */ | ||
3125 | memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), | ||
3126 | btrfs_item_nr_offset(slot), | ||
3127 | (nritems - slot) * sizeof(struct btrfs_item)); | ||
3128 | |||
3129 | /* shift the data */ | ||
3130 | memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + | ||
3131 | data_end - total_data, btrfs_leaf_data(leaf) + | ||
3132 | data_end, old_data - data_end); | ||
3133 | data_end = old_data; | ||
3134 | } else { | ||
3135 | /* | ||
3136 | * this sucks but it has to be done, if we are inserting at | ||
3137 | * the end of the leaf only insert 1 of the items, since we | ||
3138 | * have no way of knowing whats on the next leaf and we'd have | ||
3139 | * to drop our current locks to figure it out | ||
3140 | */ | ||
3141 | nr = 1; | ||
3142 | } | ||
3143 | |||
3144 | /* setup the item for the new data */ | ||
3145 | for (i = 0; i < nr; i++) { | ||
3146 | btrfs_cpu_key_to_disk(&disk_key, cpu_key + i); | ||
3147 | btrfs_set_item_key(leaf, &disk_key, slot + i); | ||
3148 | item = btrfs_item_nr(leaf, slot + i); | ||
3149 | btrfs_set_item_offset(leaf, item, data_end - data_size[i]); | ||
3150 | data_end -= data_size[i]; | ||
3151 | btrfs_set_item_size(leaf, item, data_size[i]); | ||
3152 | } | ||
3153 | btrfs_set_header_nritems(leaf, nritems + nr); | ||
3154 | btrfs_mark_buffer_dirty(leaf); | ||
3155 | |||
3156 | ret = 0; | ||
3157 | if (slot == 0) { | ||
3158 | btrfs_cpu_key_to_disk(&disk_key, cpu_key); | ||
3159 | ret = fixup_low_keys(trans, root, path, &disk_key, 1); | ||
3160 | } | ||
3161 | |||
3162 | if (btrfs_leaf_free_space(root, leaf) < 0) { | ||
3163 | btrfs_print_leaf(root, leaf); | ||
3164 | BUG(); | ||
3165 | } | ||
3166 | out: | ||
3167 | if (!ret) | ||
3168 | ret = nr; | ||
3169 | return ret; | ||
3170 | } | ||
3171 | |||
3172 | /* | ||
3173 | * Given a key and some data, insert items into the tree. | ||
3174 | * This does all the path init required, making room in the tree if needed. | ||
3005 | */ | 3175 | */ |
3006 | int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, | 3176 | int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, |
3007 | struct btrfs_root *root, | 3177 | struct btrfs_root *root, |