diff options
author | Josef Bacik <jbacik@redhat.com> | 2009-02-20 11:00:09 -0500 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2009-02-20 11:00:09 -0500 |
commit | 6a63209fc02d5483371f07e4913ee8abad608051 (patch) | |
tree | 7595e0df452928b677b66a64baf0cb3b7ec53dfc /fs/btrfs/extent-tree.c | |
parent | 2cfbd50b536c878e58ab3681c4e944fa3d99b415 (diff) |
Btrfs: add better -ENOSPC handling
This is a step in the direction of better -ENOSPC handling. Instead of
checking the global bytes counter we check the space_info bytes counters to
make sure we have enough space.
If we don't we go ahead and try to allocate a new chunk, and then if that fails
we return -ENOSPC. This patch adds two counters to btrfs_space_info,
bytes_delalloc and bytes_may_use.
bytes_delalloc account for extents we've actually setup for delalloc and will
be allocated at some point down the line.
bytes_may_use is to keep track of how many bytes we may use for delalloc at
some point. When we actually set the extent_bit for the delalloc bytes we
subtract the reserved bytes from the bytes_may_use counter. This keeps us from
not actually being able to allocate space for any delalloc bytes.
Signed-off-by: Josef Bacik <jbacik@redhat.com>
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r-- | fs/btrfs/extent-tree.c | 215 |
1 files changed, 200 insertions, 15 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0a5d796c9f7e..e11875e97c2f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -60,6 +60,10 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
60 | u64 bytenr, u64 num_bytes, int alloc, | 60 | u64 bytenr, u64 num_bytes, int alloc, |
61 | int mark_free); | 61 | int mark_free); |
62 | 62 | ||
63 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, | ||
64 | struct btrfs_root *extent_root, u64 alloc_bytes, | ||
65 | u64 flags, int force); | ||
66 | |||
63 | static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) | 67 | static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) |
64 | { | 68 | { |
65 | return (cache->flags & bits) == bits; | 69 | return (cache->flags & bits) == bits; |
@@ -1909,6 +1913,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
1909 | found->bytes_pinned = 0; | 1913 | found->bytes_pinned = 0; |
1910 | found->bytes_reserved = 0; | 1914 | found->bytes_reserved = 0; |
1911 | found->bytes_readonly = 0; | 1915 | found->bytes_readonly = 0; |
1916 | found->bytes_delalloc = 0; | ||
1912 | found->full = 0; | 1917 | found->full = 0; |
1913 | found->force_alloc = 0; | 1918 | found->force_alloc = 0; |
1914 | *space_info = found; | 1919 | *space_info = found; |
@@ -1972,6 +1977,196 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) | |||
1972 | return flags; | 1977 | return flags; |
1973 | } | 1978 | } |
1974 | 1979 | ||
1980 | static u64 btrfs_get_alloc_profile(struct btrfs_root *root, u64 data) | ||
1981 | { | ||
1982 | struct btrfs_fs_info *info = root->fs_info; | ||
1983 | u64 alloc_profile; | ||
1984 | |||
1985 | if (data) { | ||
1986 | alloc_profile = info->avail_data_alloc_bits & | ||
1987 | info->data_alloc_profile; | ||
1988 | data = BTRFS_BLOCK_GROUP_DATA | alloc_profile; | ||
1989 | } else if (root == root->fs_info->chunk_root) { | ||
1990 | alloc_profile = info->avail_system_alloc_bits & | ||
1991 | info->system_alloc_profile; | ||
1992 | data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile; | ||
1993 | } else { | ||
1994 | alloc_profile = info->avail_metadata_alloc_bits & | ||
1995 | info->metadata_alloc_profile; | ||
1996 | data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; | ||
1997 | } | ||
1998 | |||
1999 | return btrfs_reduce_alloc_profile(root, data); | ||
2000 | } | ||
2001 | |||
2002 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode) | ||
2003 | { | ||
2004 | u64 alloc_target; | ||
2005 | |||
2006 | alloc_target = btrfs_get_alloc_profile(root, 1); | ||
2007 | BTRFS_I(inode)->space_info = __find_space_info(root->fs_info, | ||
2008 | alloc_target); | ||
2009 | } | ||
2010 | |||
2011 | /* | ||
2012 | * for now this just makes sure we have at least 5% of our metadata space free | ||
2013 | * for use. | ||
2014 | */ | ||
2015 | int btrfs_check_metadata_free_space(struct btrfs_root *root) | ||
2016 | { | ||
2017 | struct btrfs_fs_info *info = root->fs_info; | ||
2018 | struct btrfs_space_info *meta_sinfo; | ||
2019 | u64 alloc_target, thresh; | ||
2020 | |||
2021 | /* get the space info for where the metadata will live */ | ||
2022 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
2023 | meta_sinfo = __find_space_info(info, alloc_target); | ||
2024 | |||
2025 | /* | ||
2026 | * if the metadata area isn't maxed out then there is no sense in | ||
2027 | * checking how much is used, since we can always allocate a new chunk | ||
2028 | */ | ||
2029 | if (!meta_sinfo->full) | ||
2030 | return 0; | ||
2031 | |||
2032 | spin_lock(&meta_sinfo->lock); | ||
2033 | thresh = meta_sinfo->total_bytes * 95; | ||
2034 | |||
2035 | do_div(thresh, 100); | ||
2036 | |||
2037 | if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | ||
2038 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly > thresh) { | ||
2039 | spin_unlock(&meta_sinfo->lock); | ||
2040 | return -ENOSPC; | ||
2041 | } | ||
2042 | spin_unlock(&meta_sinfo->lock); | ||
2043 | |||
2044 | return 0; | ||
2045 | } | ||
2046 | |||
2047 | /* | ||
2048 | * This will check the space that the inode allocates from to make sure we have | ||
2049 | * enough space for bytes. | ||
2050 | */ | ||
2051 | int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, | ||
2052 | u64 bytes) | ||
2053 | { | ||
2054 | struct btrfs_space_info *data_sinfo; | ||
2055 | int ret = 0; | ||
2056 | |||
2057 | /* make sure bytes are sectorsize aligned */ | ||
2058 | bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); | ||
2059 | |||
2060 | data_sinfo = BTRFS_I(inode)->space_info; | ||
2061 | again: | ||
2062 | /* make sure we have enough space to handle the data first */ | ||
2063 | spin_lock(&data_sinfo->lock); | ||
2064 | if (data_sinfo->total_bytes - data_sinfo->bytes_used - | ||
2065 | data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved - | ||
2066 | data_sinfo->bytes_pinned - data_sinfo->bytes_readonly - | ||
2067 | data_sinfo->bytes_may_use < bytes) { | ||
2068 | /* | ||
2069 | * if we don't have enough free bytes in this space then we need | ||
2070 | * to alloc a new chunk. | ||
2071 | */ | ||
2072 | if (!data_sinfo->full) { | ||
2073 | u64 alloc_target; | ||
2074 | struct btrfs_trans_handle *trans; | ||
2075 | |||
2076 | data_sinfo->force_alloc = 1; | ||
2077 | spin_unlock(&data_sinfo->lock); | ||
2078 | |||
2079 | alloc_target = btrfs_get_alloc_profile(root, 1); | ||
2080 | trans = btrfs_start_transaction(root, 1); | ||
2081 | if (!trans) | ||
2082 | return -ENOMEM; | ||
2083 | |||
2084 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | ||
2085 | bytes + 2 * 1024 * 1024, | ||
2086 | alloc_target, 0); | ||
2087 | btrfs_end_transaction(trans, root); | ||
2088 | if (ret) | ||
2089 | return ret; | ||
2090 | goto again; | ||
2091 | } | ||
2092 | spin_unlock(&data_sinfo->lock); | ||
2093 | printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes" | ||
2094 | ", %llu bytes_used, %llu bytes_reserved, " | ||
2095 | "%llu bytes_pinned, %llu bytes_readonly, %llu may use" | ||
2096 | "%llu total\n", bytes, data_sinfo->bytes_delalloc, | ||
2097 | data_sinfo->bytes_used, data_sinfo->bytes_reserved, | ||
2098 | data_sinfo->bytes_pinned, data_sinfo->bytes_readonly, | ||
2099 | data_sinfo->bytes_may_use, data_sinfo->total_bytes); | ||
2100 | return -ENOSPC; | ||
2101 | } | ||
2102 | data_sinfo->bytes_may_use += bytes; | ||
2103 | BTRFS_I(inode)->reserved_bytes += bytes; | ||
2104 | spin_unlock(&data_sinfo->lock); | ||
2105 | |||
2106 | return btrfs_check_metadata_free_space(root); | ||
2107 | } | ||
2108 | |||
2109 | /* | ||
2110 | * if there was an error for whatever reason after calling | ||
2111 | * btrfs_check_data_free_space, call this so we can cleanup the counters. | ||
2112 | */ | ||
2113 | void btrfs_free_reserved_data_space(struct btrfs_root *root, | ||
2114 | struct inode *inode, u64 bytes) | ||
2115 | { | ||
2116 | struct btrfs_space_info *data_sinfo; | ||
2117 | |||
2118 | /* make sure bytes are sectorsize aligned */ | ||
2119 | bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); | ||
2120 | |||
2121 | data_sinfo = BTRFS_I(inode)->space_info; | ||
2122 | spin_lock(&data_sinfo->lock); | ||
2123 | data_sinfo->bytes_may_use -= bytes; | ||
2124 | BTRFS_I(inode)->reserved_bytes -= bytes; | ||
2125 | spin_unlock(&data_sinfo->lock); | ||
2126 | } | ||
2127 | |||
2128 | /* called when we are adding a delalloc extent to the inode's io_tree */ | ||
2129 | void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode, | ||
2130 | u64 bytes) | ||
2131 | { | ||
2132 | struct btrfs_space_info *data_sinfo; | ||
2133 | |||
2134 | /* get the space info for where this inode will be storing its data */ | ||
2135 | data_sinfo = BTRFS_I(inode)->space_info; | ||
2136 | |||
2137 | /* make sure we have enough space to handle the data first */ | ||
2138 | spin_lock(&data_sinfo->lock); | ||
2139 | data_sinfo->bytes_delalloc += bytes; | ||
2140 | |||
2141 | /* | ||
2142 | * we are adding a delalloc extent without calling | ||
2143 | * btrfs_check_data_free_space first. This happens on a weird | ||
2144 | * writepage condition, but shouldn't hurt our accounting | ||
2145 | */ | ||
2146 | if (unlikely(bytes > BTRFS_I(inode)->reserved_bytes)) { | ||
2147 | data_sinfo->bytes_may_use -= BTRFS_I(inode)->reserved_bytes; | ||
2148 | BTRFS_I(inode)->reserved_bytes = 0; | ||
2149 | } else { | ||
2150 | data_sinfo->bytes_may_use -= bytes; | ||
2151 | BTRFS_I(inode)->reserved_bytes -= bytes; | ||
2152 | } | ||
2153 | |||
2154 | spin_unlock(&data_sinfo->lock); | ||
2155 | } | ||
2156 | |||
2157 | /* called when we are clearing an delalloc extent from the inode's io_tree */ | ||
2158 | void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, | ||
2159 | u64 bytes) | ||
2160 | { | ||
2161 | struct btrfs_space_info *info; | ||
2162 | |||
2163 | info = BTRFS_I(inode)->space_info; | ||
2164 | |||
2165 | spin_lock(&info->lock); | ||
2166 | info->bytes_delalloc -= bytes; | ||
2167 | spin_unlock(&info->lock); | ||
2168 | } | ||
2169 | |||
1975 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, | 2170 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, |
1976 | struct btrfs_root *extent_root, u64 alloc_bytes, | 2171 | struct btrfs_root *extent_root, u64 alloc_bytes, |
1977 | u64 flags, int force) | 2172 | u64 flags, int force) |
@@ -3105,6 +3300,10 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes) | |||
3105 | (unsigned long long)(info->total_bytes - info->bytes_used - | 3300 | (unsigned long long)(info->total_bytes - info->bytes_used - |
3106 | info->bytes_pinned - info->bytes_reserved), | 3301 | info->bytes_pinned - info->bytes_reserved), |
3107 | (info->full) ? "" : "not "); | 3302 | (info->full) ? "" : "not "); |
3303 | printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," | ||
3304 | " may_use=%llu, used=%llu\n", info->total_bytes, | ||
3305 | info->bytes_pinned, info->bytes_delalloc, info->bytes_may_use, | ||
3306 | info->bytes_used); | ||
3108 | 3307 | ||
3109 | down_read(&info->groups_sem); | 3308 | down_read(&info->groups_sem); |
3110 | list_for_each_entry(cache, &info->block_groups, list) { | 3309 | list_for_each_entry(cache, &info->block_groups, list) { |
@@ -3131,24 +3330,10 @@ static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, | |||
3131 | { | 3330 | { |
3132 | int ret; | 3331 | int ret; |
3133 | u64 search_start = 0; | 3332 | u64 search_start = 0; |
3134 | u64 alloc_profile; | ||
3135 | struct btrfs_fs_info *info = root->fs_info; | 3333 | struct btrfs_fs_info *info = root->fs_info; |
3136 | 3334 | ||
3137 | if (data) { | 3335 | data = btrfs_get_alloc_profile(root, data); |
3138 | alloc_profile = info->avail_data_alloc_bits & | ||
3139 | info->data_alloc_profile; | ||
3140 | data = BTRFS_BLOCK_GROUP_DATA | alloc_profile; | ||
3141 | } else if (root == root->fs_info->chunk_root) { | ||
3142 | alloc_profile = info->avail_system_alloc_bits & | ||
3143 | info->system_alloc_profile; | ||
3144 | data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile; | ||
3145 | } else { | ||
3146 | alloc_profile = info->avail_metadata_alloc_bits & | ||
3147 | info->metadata_alloc_profile; | ||
3148 | data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; | ||
3149 | } | ||
3150 | again: | 3336 | again: |
3151 | data = btrfs_reduce_alloc_profile(root, data); | ||
3152 | /* | 3337 | /* |
3153 | * the only place that sets empty_size is btrfs_realloc_node, which | 3338 | * the only place that sets empty_size is btrfs_realloc_node, which |
3154 | * is not called recursively on allocations | 3339 | * is not called recursively on allocations |