diff options
author | Josef Bacik <josef@redhat.com> | 2010-06-21 14:48:16 -0400 |
---|---|---|
committer | Josef Bacik <josef@redhat.com> | 2010-10-28 15:59:09 -0400 |
commit | 0af3d00bad38d3bb9912a60928ad0669f17bdb76 (patch) | |
tree | abbf4c773138a33dcde483ac60f016c4b5e55dcc /fs/btrfs/extent-tree.c | |
parent | f6f94e2ab1b33f0082ac22d71f66385a60d8157f (diff) |
Btrfs: create special free space cache inode
In order to save free space cache, we need an inode to hold the data, and we
need a special item to point at the right inode for the right block group. So
first, create a special item that will point to the right inode, and the number
of extent entries we will have and the number of bitmaps we will have. We
truncate and pre-allocate space everytime to make sure it's uptodate.
This feature will be turned on as soon as you mount with -o space_cache, however
it is safe to boot into old kernels, they will just generate the cache the old
fashion way. When you boot back into a newer kernel we will notice that we
modified and not the cache and automatically discard the cache.
Signed-off-by: Josef Bacik <josef@redhat.com>
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r-- | fs/btrfs/extent-tree.c | 231 |
1 files changed, 225 insertions, 6 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 32d094002a57..aab40fb3faed 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -2688,6 +2688,109 @@ next_block_group(struct btrfs_root *root, | |||
2688 | return cache; | 2688 | return cache; |
2689 | } | 2689 | } |
2690 | 2690 | ||
2691 | static int cache_save_setup(struct btrfs_block_group_cache *block_group, | ||
2692 | struct btrfs_trans_handle *trans, | ||
2693 | struct btrfs_path *path) | ||
2694 | { | ||
2695 | struct btrfs_root *root = block_group->fs_info->tree_root; | ||
2696 | struct inode *inode = NULL; | ||
2697 | u64 alloc_hint = 0; | ||
2698 | int num_pages = 0; | ||
2699 | int retries = 0; | ||
2700 | int ret = 0; | ||
2701 | |||
2702 | /* | ||
2703 | * If this block group is smaller than 100 megs don't bother caching the | ||
2704 | * block group. | ||
2705 | */ | ||
2706 | if (block_group->key.offset < (100 * 1024 * 1024)) { | ||
2707 | spin_lock(&block_group->lock); | ||
2708 | block_group->disk_cache_state = BTRFS_DC_WRITTEN; | ||
2709 | spin_unlock(&block_group->lock); | ||
2710 | return 0; | ||
2711 | } | ||
2712 | |||
2713 | again: | ||
2714 | inode = lookup_free_space_inode(root, block_group, path); | ||
2715 | if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { | ||
2716 | ret = PTR_ERR(inode); | ||
2717 | btrfs_release_path(root, path); | ||
2718 | goto out; | ||
2719 | } | ||
2720 | |||
2721 | if (IS_ERR(inode)) { | ||
2722 | BUG_ON(retries); | ||
2723 | retries++; | ||
2724 | |||
2725 | if (block_group->ro) | ||
2726 | goto out_free; | ||
2727 | |||
2728 | ret = create_free_space_inode(root, trans, block_group, path); | ||
2729 | if (ret) | ||
2730 | goto out_free; | ||
2731 | goto again; | ||
2732 | } | ||
2733 | |||
2734 | /* | ||
2735 | * We want to set the generation to 0, that way if anything goes wrong | ||
2736 | * from here on out we know not to trust this cache when we load up next | ||
2737 | * time. | ||
2738 | */ | ||
2739 | BTRFS_I(inode)->generation = 0; | ||
2740 | ret = btrfs_update_inode(trans, root, inode); | ||
2741 | WARN_ON(ret); | ||
2742 | |||
2743 | if (i_size_read(inode) > 0) { | ||
2744 | ret = btrfs_truncate_free_space_cache(root, trans, path, | ||
2745 | inode); | ||
2746 | if (ret) | ||
2747 | goto out_put; | ||
2748 | } | ||
2749 | |||
2750 | spin_lock(&block_group->lock); | ||
2751 | if (block_group->cached != BTRFS_CACHE_FINISHED) { | ||
2752 | spin_unlock(&block_group->lock); | ||
2753 | goto out_put; | ||
2754 | } | ||
2755 | spin_unlock(&block_group->lock); | ||
2756 | |||
2757 | num_pages = (int)div64_u64(block_group->key.offset, 1024 * 1024 * 1024); | ||
2758 | if (!num_pages) | ||
2759 | num_pages = 1; | ||
2760 | |||
2761 | /* | ||
2762 | * Just to make absolutely sure we have enough space, we're going to | ||
2763 | * preallocate 12 pages worth of space for each block group. In | ||
2764 | * practice we ought to use at most 8, but we need extra space so we can | ||
2765 | * add our header and have a terminator between the extents and the | ||
2766 | * bitmaps. | ||
2767 | */ | ||
2768 | num_pages *= 16; | ||
2769 | num_pages *= PAGE_CACHE_SIZE; | ||
2770 | |||
2771 | ret = btrfs_check_data_free_space(inode, num_pages); | ||
2772 | if (ret) | ||
2773 | goto out_put; | ||
2774 | |||
2775 | ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages, | ||
2776 | num_pages, num_pages, | ||
2777 | &alloc_hint); | ||
2778 | btrfs_free_reserved_data_space(inode, num_pages); | ||
2779 | out_put: | ||
2780 | iput(inode); | ||
2781 | out_free: | ||
2782 | btrfs_release_path(root, path); | ||
2783 | out: | ||
2784 | spin_lock(&block_group->lock); | ||
2785 | if (ret) | ||
2786 | block_group->disk_cache_state = BTRFS_DC_ERROR; | ||
2787 | else | ||
2788 | block_group->disk_cache_state = BTRFS_DC_SETUP; | ||
2789 | spin_unlock(&block_group->lock); | ||
2790 | |||
2791 | return ret; | ||
2792 | } | ||
2793 | |||
2691 | int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | 2794 | int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, |
2692 | struct btrfs_root *root) | 2795 | struct btrfs_root *root) |
2693 | { | 2796 | { |
@@ -2700,6 +2803,25 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | |||
2700 | if (!path) | 2803 | if (!path) |
2701 | return -ENOMEM; | 2804 | return -ENOMEM; |
2702 | 2805 | ||
2806 | again: | ||
2807 | while (1) { | ||
2808 | cache = btrfs_lookup_first_block_group(root->fs_info, last); | ||
2809 | while (cache) { | ||
2810 | if (cache->disk_cache_state == BTRFS_DC_CLEAR) | ||
2811 | break; | ||
2812 | cache = next_block_group(root, cache); | ||
2813 | } | ||
2814 | if (!cache) { | ||
2815 | if (last == 0) | ||
2816 | break; | ||
2817 | last = 0; | ||
2818 | continue; | ||
2819 | } | ||
2820 | err = cache_save_setup(cache, trans, path); | ||
2821 | last = cache->key.objectid + cache->key.offset; | ||
2822 | btrfs_put_block_group(cache); | ||
2823 | } | ||
2824 | |||
2703 | while (1) { | 2825 | while (1) { |
2704 | if (last == 0) { | 2826 | if (last == 0) { |
2705 | err = btrfs_run_delayed_refs(trans, root, | 2827 | err = btrfs_run_delayed_refs(trans, root, |
@@ -2709,6 +2831,11 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | |||
2709 | 2831 | ||
2710 | cache = btrfs_lookup_first_block_group(root->fs_info, last); | 2832 | cache = btrfs_lookup_first_block_group(root->fs_info, last); |
2711 | while (cache) { | 2833 | while (cache) { |
2834 | if (cache->disk_cache_state == BTRFS_DC_CLEAR) { | ||
2835 | btrfs_put_block_group(cache); | ||
2836 | goto again; | ||
2837 | } | ||
2838 | |||
2712 | if (cache->dirty) | 2839 | if (cache->dirty) |
2713 | break; | 2840 | break; |
2714 | cache = next_block_group(root, cache); | 2841 | cache = next_block_group(root, cache); |
@@ -2883,11 +3010,16 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes) | |||
2883 | struct btrfs_space_info *data_sinfo; | 3010 | struct btrfs_space_info *data_sinfo; |
2884 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3011 | struct btrfs_root *root = BTRFS_I(inode)->root; |
2885 | u64 used; | 3012 | u64 used; |
2886 | int ret = 0, committed = 0; | 3013 | int ret = 0, committed = 0, alloc_chunk = 1; |
2887 | 3014 | ||
2888 | /* make sure bytes are sectorsize aligned */ | 3015 | /* make sure bytes are sectorsize aligned */ |
2889 | bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); | 3016 | bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); |
2890 | 3017 | ||
3018 | if (root == root->fs_info->tree_root) { | ||
3019 | alloc_chunk = 0; | ||
3020 | committed = 1; | ||
3021 | } | ||
3022 | |||
2891 | data_sinfo = BTRFS_I(inode)->space_info; | 3023 | data_sinfo = BTRFS_I(inode)->space_info; |
2892 | if (!data_sinfo) | 3024 | if (!data_sinfo) |
2893 | goto alloc; | 3025 | goto alloc; |
@@ -2906,7 +3038,7 @@ again: | |||
2906 | * if we don't have enough free bytes in this space then we need | 3038 | * if we don't have enough free bytes in this space then we need |
2907 | * to alloc a new chunk. | 3039 | * to alloc a new chunk. |
2908 | */ | 3040 | */ |
2909 | if (!data_sinfo->full) { | 3041 | if (!data_sinfo->full && alloc_chunk) { |
2910 | u64 alloc_target; | 3042 | u64 alloc_target; |
2911 | 3043 | ||
2912 | data_sinfo->force_alloc = 1; | 3044 | data_sinfo->force_alloc = 1; |
@@ -3777,12 +3909,12 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
3777 | struct btrfs_root *root, | 3909 | struct btrfs_root *root, |
3778 | u64 bytenr, u64 num_bytes, int alloc) | 3910 | u64 bytenr, u64 num_bytes, int alloc) |
3779 | { | 3911 | { |
3780 | struct btrfs_block_group_cache *cache; | 3912 | struct btrfs_block_group_cache *cache = NULL; |
3781 | struct btrfs_fs_info *info = root->fs_info; | 3913 | struct btrfs_fs_info *info = root->fs_info; |
3782 | int factor; | ||
3783 | u64 total = num_bytes; | 3914 | u64 total = num_bytes; |
3784 | u64 old_val; | 3915 | u64 old_val; |
3785 | u64 byte_in_group; | 3916 | u64 byte_in_group; |
3917 | int factor; | ||
3786 | 3918 | ||
3787 | /* block accounting for super block */ | 3919 | /* block accounting for super block */ |
3788 | spin_lock(&info->delalloc_lock); | 3920 | spin_lock(&info->delalloc_lock); |
@@ -3804,11 +3936,17 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
3804 | factor = 2; | 3936 | factor = 2; |
3805 | else | 3937 | else |
3806 | factor = 1; | 3938 | factor = 1; |
3939 | |||
3807 | byte_in_group = bytenr - cache->key.objectid; | 3940 | byte_in_group = bytenr - cache->key.objectid; |
3808 | WARN_ON(byte_in_group > cache->key.offset); | 3941 | WARN_ON(byte_in_group > cache->key.offset); |
3809 | 3942 | ||
3810 | spin_lock(&cache->space_info->lock); | 3943 | spin_lock(&cache->space_info->lock); |
3811 | spin_lock(&cache->lock); | 3944 | spin_lock(&cache->lock); |
3945 | |||
3946 | if (btrfs_super_cache_generation(&info->super_copy) != 0 && | ||
3947 | cache->disk_cache_state < BTRFS_DC_CLEAR) | ||
3948 | cache->disk_cache_state = BTRFS_DC_CLEAR; | ||
3949 | |||
3812 | cache->dirty = 1; | 3950 | cache->dirty = 1; |
3813 | old_val = btrfs_block_group_used(&cache->item); | 3951 | old_val = btrfs_block_group_used(&cache->item); |
3814 | num_bytes = min(total, cache->key.offset - byte_in_group); | 3952 | num_bytes = min(total, cache->key.offset - byte_in_group); |
@@ -7814,6 +7952,40 @@ out: | |||
7814 | return ret; | 7952 | return ret; |
7815 | } | 7953 | } |
7816 | 7954 | ||
7955 | void btrfs_put_block_group_cache(struct btrfs_fs_info *info) | ||
7956 | { | ||
7957 | struct btrfs_block_group_cache *block_group; | ||
7958 | u64 last = 0; | ||
7959 | |||
7960 | while (1) { | ||
7961 | struct inode *inode; | ||
7962 | |||
7963 | block_group = btrfs_lookup_first_block_group(info, last); | ||
7964 | while (block_group) { | ||
7965 | spin_lock(&block_group->lock); | ||
7966 | if (block_group->iref) | ||
7967 | break; | ||
7968 | spin_unlock(&block_group->lock); | ||
7969 | block_group = next_block_group(info->tree_root, | ||
7970 | block_group); | ||
7971 | } | ||
7972 | if (!block_group) { | ||
7973 | if (last == 0) | ||
7974 | break; | ||
7975 | last = 0; | ||
7976 | continue; | ||
7977 | } | ||
7978 | |||
7979 | inode = block_group->inode; | ||
7980 | block_group->iref = 0; | ||
7981 | block_group->inode = NULL; | ||
7982 | spin_unlock(&block_group->lock); | ||
7983 | iput(inode); | ||
7984 | last = block_group->key.objectid + block_group->key.offset; | ||
7985 | btrfs_put_block_group(block_group); | ||
7986 | } | ||
7987 | } | ||
7988 | |||
7817 | int btrfs_free_block_groups(struct btrfs_fs_info *info) | 7989 | int btrfs_free_block_groups(struct btrfs_fs_info *info) |
7818 | { | 7990 | { |
7819 | struct btrfs_block_group_cache *block_group; | 7991 | struct btrfs_block_group_cache *block_group; |
@@ -7897,6 +8069,8 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7897 | struct btrfs_key key; | 8069 | struct btrfs_key key; |
7898 | struct btrfs_key found_key; | 8070 | struct btrfs_key found_key; |
7899 | struct extent_buffer *leaf; | 8071 | struct extent_buffer *leaf; |
8072 | int need_clear = 0; | ||
8073 | u64 cache_gen; | ||
7900 | 8074 | ||
7901 | root = info->extent_root; | 8075 | root = info->extent_root; |
7902 | key.objectid = 0; | 8076 | key.objectid = 0; |
@@ -7906,6 +8080,11 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7906 | if (!path) | 8080 | if (!path) |
7907 | return -ENOMEM; | 8081 | return -ENOMEM; |
7908 | 8082 | ||
8083 | cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy); | ||
8084 | if (cache_gen != 0 && | ||
8085 | btrfs_super_generation(&root->fs_info->super_copy) != cache_gen) | ||
8086 | need_clear = 1; | ||
8087 | |||
7909 | while (1) { | 8088 | while (1) { |
7910 | ret = find_first_block_group(root, path, &key); | 8089 | ret = find_first_block_group(root, path, &key); |
7911 | if (ret > 0) | 8090 | if (ret > 0) |
@@ -7928,6 +8107,9 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7928 | INIT_LIST_HEAD(&cache->list); | 8107 | INIT_LIST_HEAD(&cache->list); |
7929 | INIT_LIST_HEAD(&cache->cluster_list); | 8108 | INIT_LIST_HEAD(&cache->cluster_list); |
7930 | 8109 | ||
8110 | if (need_clear) | ||
8111 | cache->disk_cache_state = BTRFS_DC_CLEAR; | ||
8112 | |||
7931 | /* | 8113 | /* |
7932 | * we only want to have 32k of ram per block group for keeping | 8114 | * we only want to have 32k of ram per block group for keeping |
7933 | * track of free space, and if we pass 1/2 of that we want to | 8115 | * track of free space, and if we pass 1/2 of that we want to |
@@ -8032,6 +8214,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
8032 | cache->key.offset = size; | 8214 | cache->key.offset = size; |
8033 | cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; | 8215 | cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; |
8034 | cache->sectorsize = root->sectorsize; | 8216 | cache->sectorsize = root->sectorsize; |
8217 | cache->fs_info = root->fs_info; | ||
8035 | 8218 | ||
8036 | /* | 8219 | /* |
8037 | * we only want to have 32k of ram per block group for keeping track | 8220 | * we only want to have 32k of ram per block group for keeping track |
@@ -8088,7 +8271,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
8088 | struct btrfs_path *path; | 8271 | struct btrfs_path *path; |
8089 | struct btrfs_block_group_cache *block_group; | 8272 | struct btrfs_block_group_cache *block_group; |
8090 | struct btrfs_free_cluster *cluster; | 8273 | struct btrfs_free_cluster *cluster; |
8274 | struct btrfs_root *tree_root = root->fs_info->tree_root; | ||
8091 | struct btrfs_key key; | 8275 | struct btrfs_key key; |
8276 | struct inode *inode; | ||
8092 | int ret; | 8277 | int ret; |
8093 | 8278 | ||
8094 | root = root->fs_info->extent_root; | 8279 | root = root->fs_info->extent_root; |
@@ -8097,8 +8282,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
8097 | BUG_ON(!block_group); | 8282 | BUG_ON(!block_group); |
8098 | BUG_ON(!block_group->ro); | 8283 | BUG_ON(!block_group->ro); |
8099 | 8284 | ||
8100 | memcpy(&key, &block_group->key, sizeof(key)); | ||
8101 | |||
8102 | /* make sure this block group isn't part of an allocation cluster */ | 8285 | /* make sure this block group isn't part of an allocation cluster */ |
8103 | cluster = &root->fs_info->data_alloc_cluster; | 8286 | cluster = &root->fs_info->data_alloc_cluster; |
8104 | spin_lock(&cluster->refill_lock); | 8287 | spin_lock(&cluster->refill_lock); |
@@ -8117,6 +8300,40 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
8117 | path = btrfs_alloc_path(); | 8300 | path = btrfs_alloc_path(); |
8118 | BUG_ON(!path); | 8301 | BUG_ON(!path); |
8119 | 8302 | ||
8303 | inode = lookup_free_space_inode(root, block_group, path); | ||
8304 | if (!IS_ERR(inode)) { | ||
8305 | btrfs_orphan_add(trans, inode); | ||
8306 | clear_nlink(inode); | ||
8307 | /* One for the block groups ref */ | ||
8308 | spin_lock(&block_group->lock); | ||
8309 | if (block_group->iref) { | ||
8310 | block_group->iref = 0; | ||
8311 | block_group->inode = NULL; | ||
8312 | spin_unlock(&block_group->lock); | ||
8313 | iput(inode); | ||
8314 | } else { | ||
8315 | spin_unlock(&block_group->lock); | ||
8316 | } | ||
8317 | /* One for our lookup ref */ | ||
8318 | iput(inode); | ||
8319 | } | ||
8320 | |||
8321 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; | ||
8322 | key.offset = block_group->key.objectid; | ||
8323 | key.type = 0; | ||
8324 | |||
8325 | ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1); | ||
8326 | if (ret < 0) | ||
8327 | goto out; | ||
8328 | if (ret > 0) | ||
8329 | btrfs_release_path(tree_root, path); | ||
8330 | if (ret == 0) { | ||
8331 | ret = btrfs_del_item(trans, tree_root, path); | ||
8332 | if (ret) | ||
8333 | goto out; | ||
8334 | btrfs_release_path(tree_root, path); | ||
8335 | } | ||
8336 | |||
8120 | spin_lock(&root->fs_info->block_group_cache_lock); | 8337 | spin_lock(&root->fs_info->block_group_cache_lock); |
8121 | rb_erase(&block_group->cache_node, | 8338 | rb_erase(&block_group->cache_node, |
8122 | &root->fs_info->block_group_cache_tree); | 8339 | &root->fs_info->block_group_cache_tree); |
@@ -8140,6 +8357,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
8140 | block_group->space_info->bytes_readonly -= block_group->key.offset; | 8357 | block_group->space_info->bytes_readonly -= block_group->key.offset; |
8141 | spin_unlock(&block_group->space_info->lock); | 8358 | spin_unlock(&block_group->space_info->lock); |
8142 | 8359 | ||
8360 | memcpy(&key, &block_group->key, sizeof(key)); | ||
8361 | |||
8143 | btrfs_clear_space_info_full(root->fs_info); | 8362 | btrfs_clear_space_info_full(root->fs_info); |
8144 | 8363 | ||
8145 | btrfs_put_block_group(block_group); | 8364 | btrfs_put_block_group(block_group); |