diff options
author | Amit Arora <aarora@in.ibm.com> | 2007-07-17 21:42:38 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2007-07-17 21:42:38 -0400 |
commit | 56055d3ae4cc7fa6d2b10885f20269de8a989ed7 (patch) | |
tree | ab0008be38ef4d2789aee16d084fdaa2d5acfc32 /fs/ext4/extents.c | |
parent | a2df2a63407803a833f82e1fa6693826c8c9d584 (diff) |
write support for preallocated blocks
This patch adds write support to the uninitialized extents that get
created when a preallocation is done using fallocate(). It takes care of
splitting the extents into multiple (upto three) extents and merging the
new split extents with neighbouring ones, if possible.
Signed-off-by: Amit Arora <aarora@in.ibm.com>
Diffstat (limited to 'fs/ext4/extents.c')
-rw-r--r-- | fs/ext4/extents.c | 254 |
1 files changed, 222 insertions, 32 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index ba25832a756c..ded3d469f978 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -1141,6 +1141,53 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, | |||
1141 | } | 1141 | } |
1142 | 1142 | ||
1143 | /* | 1143 | /* |
1144 | * This function tries to merge the "ex" extent to the next extent in the tree. | ||
1145 | * It always tries to merge towards right. If you want to merge towards | ||
1146 | * left, pass "ex - 1" as argument instead of "ex". | ||
1147 | * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns | ||
1148 | * 1 if they got merged. | ||
1149 | */ | ||
1150 | int ext4_ext_try_to_merge(struct inode *inode, | ||
1151 | struct ext4_ext_path *path, | ||
1152 | struct ext4_extent *ex) | ||
1153 | { | ||
1154 | struct ext4_extent_header *eh; | ||
1155 | unsigned int depth, len; | ||
1156 | int merge_done = 0; | ||
1157 | int uninitialized = 0; | ||
1158 | |||
1159 | depth = ext_depth(inode); | ||
1160 | BUG_ON(path[depth].p_hdr == NULL); | ||
1161 | eh = path[depth].p_hdr; | ||
1162 | |||
1163 | while (ex < EXT_LAST_EXTENT(eh)) { | ||
1164 | if (!ext4_can_extents_be_merged(inode, ex, ex + 1)) | ||
1165 | break; | ||
1166 | /* merge with next extent! */ | ||
1167 | if (ext4_ext_is_uninitialized(ex)) | ||
1168 | uninitialized = 1; | ||
1169 | ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) | ||
1170 | + ext4_ext_get_actual_len(ex + 1)); | ||
1171 | if (uninitialized) | ||
1172 | ext4_ext_mark_uninitialized(ex); | ||
1173 | |||
1174 | if (ex + 1 < EXT_LAST_EXTENT(eh)) { | ||
1175 | len = (EXT_LAST_EXTENT(eh) - ex - 1) | ||
1176 | * sizeof(struct ext4_extent); | ||
1177 | memmove(ex + 1, ex + 2, len); | ||
1178 | } | ||
1179 | eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries) - 1); | ||
1180 | merge_done = 1; | ||
1181 | WARN_ON(eh->eh_entries == 0); | ||
1182 | if (!eh->eh_entries) | ||
1183 | ext4_error(inode->i_sb, "ext4_ext_try_to_merge", | ||
1184 | "inode#%lu, eh->eh_entries = 0!", inode->i_ino); | ||
1185 | } | ||
1186 | |||
1187 | return merge_done; | ||
1188 | } | ||
1189 | |||
1190 | /* | ||
1144 | * check if a portion of the "newext" extent overlaps with an | 1191 | * check if a portion of the "newext" extent overlaps with an |
1145 | * existing extent. | 1192 | * existing extent. |
1146 | * | 1193 | * |
@@ -1328,25 +1375,7 @@ has_space: | |||
1328 | 1375 | ||
1329 | merge: | 1376 | merge: |
1330 | /* try to merge extents to the right */ | 1377 | /* try to merge extents to the right */ |
1331 | while (nearex < EXT_LAST_EXTENT(eh)) { | 1378 | ext4_ext_try_to_merge(inode, path, nearex); |
1332 | if (!ext4_can_extents_be_merged(inode, nearex, nearex + 1)) | ||
1333 | break; | ||
1334 | /* merge with next extent! */ | ||
1335 | if (ext4_ext_is_uninitialized(nearex)) | ||
1336 | uninitialized = 1; | ||
1337 | nearex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(nearex) | ||
1338 | + ext4_ext_get_actual_len(nearex + 1)); | ||
1339 | if (uninitialized) | ||
1340 | ext4_ext_mark_uninitialized(nearex); | ||
1341 | |||
1342 | if (nearex + 1 < EXT_LAST_EXTENT(eh)) { | ||
1343 | len = (EXT_LAST_EXTENT(eh) - nearex - 1) | ||
1344 | * sizeof(struct ext4_extent); | ||
1345 | memmove(nearex + 1, nearex + 2, len); | ||
1346 | } | ||
1347 | eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)-1); | ||
1348 | BUG_ON(eh->eh_entries == 0); | ||
1349 | } | ||
1350 | 1379 | ||
1351 | /* try to merge extents to the left */ | 1380 | /* try to merge extents to the left */ |
1352 | 1381 | ||
@@ -2012,15 +2041,158 @@ void ext4_ext_release(struct super_block *sb) | |||
2012 | #endif | 2041 | #endif |
2013 | } | 2042 | } |
2014 | 2043 | ||
2044 | /* | ||
2045 | * This function is called by ext4_ext_get_blocks() if someone tries to write | ||
2046 | * to an uninitialized extent. It may result in splitting the uninitialized | ||
2047 | * extent into multiple extents (upto three - one initialized and two | ||
2048 | * uninitialized). | ||
2049 | * There are three possibilities: | ||
2050 | * a> There is no split required: Entire extent should be initialized | ||
2051 | * b> Splits in two extents: Write is happening at either end of the extent | ||
2052 | * c> Splits in three extents: Somone is writing in middle of the extent | ||
2053 | */ | ||
2054 | int ext4_ext_convert_to_initialized(handle_t *handle, struct inode *inode, | ||
2055 | struct ext4_ext_path *path, | ||
2056 | ext4_fsblk_t iblock, | ||
2057 | unsigned long max_blocks) | ||
2058 | { | ||
2059 | struct ext4_extent *ex, newex; | ||
2060 | struct ext4_extent *ex1 = NULL; | ||
2061 | struct ext4_extent *ex2 = NULL; | ||
2062 | struct ext4_extent *ex3 = NULL; | ||
2063 | struct ext4_extent_header *eh; | ||
2064 | unsigned int allocated, ee_block, ee_len, depth; | ||
2065 | ext4_fsblk_t newblock; | ||
2066 | int err = 0; | ||
2067 | int ret = 0; | ||
2068 | |||
2069 | depth = ext_depth(inode); | ||
2070 | eh = path[depth].p_hdr; | ||
2071 | ex = path[depth].p_ext; | ||
2072 | ee_block = le32_to_cpu(ex->ee_block); | ||
2073 | ee_len = ext4_ext_get_actual_len(ex); | ||
2074 | allocated = ee_len - (iblock - ee_block); | ||
2075 | newblock = iblock - ee_block + ext_pblock(ex); | ||
2076 | ex2 = ex; | ||
2077 | |||
2078 | /* ex1: ee_block to iblock - 1 : uninitialized */ | ||
2079 | if (iblock > ee_block) { | ||
2080 | ex1 = ex; | ||
2081 | ex1->ee_len = cpu_to_le16(iblock - ee_block); | ||
2082 | ext4_ext_mark_uninitialized(ex1); | ||
2083 | ex2 = &newex; | ||
2084 | } | ||
2085 | /* | ||
2086 | * for sanity, update the length of the ex2 extent before | ||
2087 | * we insert ex3, if ex1 is NULL. This is to avoid temporary | ||
2088 | * overlap of blocks. | ||
2089 | */ | ||
2090 | if (!ex1 && allocated > max_blocks) | ||
2091 | ex2->ee_len = cpu_to_le16(max_blocks); | ||
2092 | /* ex3: to ee_block + ee_len : uninitialised */ | ||
2093 | if (allocated > max_blocks) { | ||
2094 | unsigned int newdepth; | ||
2095 | ex3 = &newex; | ||
2096 | ex3->ee_block = cpu_to_le32(iblock + max_blocks); | ||
2097 | ext4_ext_store_pblock(ex3, newblock + max_blocks); | ||
2098 | ex3->ee_len = cpu_to_le16(allocated - max_blocks); | ||
2099 | ext4_ext_mark_uninitialized(ex3); | ||
2100 | err = ext4_ext_insert_extent(handle, inode, path, ex3); | ||
2101 | if (err) | ||
2102 | goto out; | ||
2103 | /* | ||
2104 | * The depth, and hence eh & ex might change | ||
2105 | * as part of the insert above. | ||
2106 | */ | ||
2107 | newdepth = ext_depth(inode); | ||
2108 | if (newdepth != depth) { | ||
2109 | depth = newdepth; | ||
2110 | path = ext4_ext_find_extent(inode, iblock, NULL); | ||
2111 | if (IS_ERR(path)) { | ||
2112 | err = PTR_ERR(path); | ||
2113 | path = NULL; | ||
2114 | goto out; | ||
2115 | } | ||
2116 | eh = path[depth].p_hdr; | ||
2117 | ex = path[depth].p_ext; | ||
2118 | if (ex2 != &newex) | ||
2119 | ex2 = ex; | ||
2120 | } | ||
2121 | allocated = max_blocks; | ||
2122 | } | ||
2123 | /* | ||
2124 | * If there was a change of depth as part of the | ||
2125 | * insertion of ex3 above, we need to update the length | ||
2126 | * of the ex1 extent again here | ||
2127 | */ | ||
2128 | if (ex1 && ex1 != ex) { | ||
2129 | ex1 = ex; | ||
2130 | ex1->ee_len = cpu_to_le16(iblock - ee_block); | ||
2131 | ext4_ext_mark_uninitialized(ex1); | ||
2132 | ex2 = &newex; | ||
2133 | } | ||
2134 | /* ex2: iblock to iblock + maxblocks-1 : initialised */ | ||
2135 | ex2->ee_block = cpu_to_le32(iblock); | ||
2136 | ex2->ee_start = cpu_to_le32(newblock); | ||
2137 | ext4_ext_store_pblock(ex2, newblock); | ||
2138 | ex2->ee_len = cpu_to_le16(allocated); | ||
2139 | if (ex2 != ex) | ||
2140 | goto insert; | ||
2141 | err = ext4_ext_get_access(handle, inode, path + depth); | ||
2142 | if (err) | ||
2143 | goto out; | ||
2144 | /* | ||
2145 | * New (initialized) extent starts from the first block | ||
2146 | * in the current extent. i.e., ex2 == ex | ||
2147 | * We have to see if it can be merged with the extent | ||
2148 | * on the left. | ||
2149 | */ | ||
2150 | if (ex2 > EXT_FIRST_EXTENT(eh)) { | ||
2151 | /* | ||
2152 | * To merge left, pass "ex2 - 1" to try_to_merge(), | ||
2153 | * since it merges towards right _only_. | ||
2154 | */ | ||
2155 | ret = ext4_ext_try_to_merge(inode, path, ex2 - 1); | ||
2156 | if (ret) { | ||
2157 | err = ext4_ext_correct_indexes(handle, inode, path); | ||
2158 | if (err) | ||
2159 | goto out; | ||
2160 | depth = ext_depth(inode); | ||
2161 | ex2--; | ||
2162 | } | ||
2163 | } | ||
2164 | /* | ||
2165 | * Try to Merge towards right. This might be required | ||
2166 | * only when the whole extent is being written to. | ||
2167 | * i.e. ex2 == ex and ex3 == NULL. | ||
2168 | */ | ||
2169 | if (!ex3) { | ||
2170 | ret = ext4_ext_try_to_merge(inode, path, ex2); | ||
2171 | if (ret) { | ||
2172 | err = ext4_ext_correct_indexes(handle, inode, path); | ||
2173 | if (err) | ||
2174 | goto out; | ||
2175 | } | ||
2176 | } | ||
2177 | /* Mark modified extent as dirty */ | ||
2178 | err = ext4_ext_dirty(handle, inode, path + depth); | ||
2179 | goto out; | ||
2180 | insert: | ||
2181 | err = ext4_ext_insert_extent(handle, inode, path, &newex); | ||
2182 | out: | ||
2183 | return err ? err : allocated; | ||
2184 | } | ||
2185 | |||
2015 | int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | 2186 | int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, |
2016 | ext4_fsblk_t iblock, | 2187 | ext4_fsblk_t iblock, |
2017 | unsigned long max_blocks, struct buffer_head *bh_result, | 2188 | unsigned long max_blocks, struct buffer_head *bh_result, |
2018 | int create, int extend_disksize) | 2189 | int create, int extend_disksize) |
2019 | { | 2190 | { |
2020 | struct ext4_ext_path *path = NULL; | 2191 | struct ext4_ext_path *path = NULL; |
2192 | struct ext4_extent_header *eh; | ||
2021 | struct ext4_extent newex, *ex; | 2193 | struct ext4_extent newex, *ex; |
2022 | ext4_fsblk_t goal, newblock; | 2194 | ext4_fsblk_t goal, newblock; |
2023 | int err = 0, depth; | 2195 | int err = 0, depth, ret; |
2024 | unsigned long allocated = 0; | 2196 | unsigned long allocated = 0; |
2025 | 2197 | ||
2026 | __clear_bit(BH_New, &bh_result->b_state); | 2198 | __clear_bit(BH_New, &bh_result->b_state); |
@@ -2033,8 +2205,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
2033 | if (goal) { | 2205 | if (goal) { |
2034 | if (goal == EXT4_EXT_CACHE_GAP) { | 2206 | if (goal == EXT4_EXT_CACHE_GAP) { |
2035 | if (!create) { | 2207 | if (!create) { |
2036 | /* block isn't allocated yet and | 2208 | /* |
2037 | * user doesn't want to allocate it */ | 2209 | * block isn't allocated yet and |
2210 | * user doesn't want to allocate it | ||
2211 | */ | ||
2038 | goto out2; | 2212 | goto out2; |
2039 | } | 2213 | } |
2040 | /* we should allocate requested block */ | 2214 | /* we should allocate requested block */ |
@@ -2068,6 +2242,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
2068 | * this is why assert can't be put in ext4_ext_find_extent() | 2242 | * this is why assert can't be put in ext4_ext_find_extent() |
2069 | */ | 2243 | */ |
2070 | BUG_ON(path[depth].p_ext == NULL && depth != 0); | 2244 | BUG_ON(path[depth].p_ext == NULL && depth != 0); |
2245 | eh = path[depth].p_hdr; | ||
2071 | 2246 | ||
2072 | ex = path[depth].p_ext; | 2247 | ex = path[depth].p_ext; |
2073 | if (ex) { | 2248 | if (ex) { |
@@ -2076,13 +2251,9 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
2076 | unsigned short ee_len; | 2251 | unsigned short ee_len; |
2077 | 2252 | ||
2078 | /* | 2253 | /* |
2079 | * Allow future support for preallocated extents to be added | ||
2080 | * as an RO_COMPAT feature: | ||
2081 | * Uninitialized extents are treated as holes, except that | 2254 | * Uninitialized extents are treated as holes, except that |
2082 | * we avoid (fail) allocating new blocks during a write. | 2255 | * we split out initialized portions during a write. |
2083 | */ | 2256 | */ |
2084 | if (le16_to_cpu(ex->ee_len) > EXT_MAX_LEN) | ||
2085 | goto out2; | ||
2086 | ee_len = ext4_ext_get_actual_len(ex); | 2257 | ee_len = ext4_ext_get_actual_len(ex); |
2087 | /* if found extent covers block, simply return it */ | 2258 | /* if found extent covers block, simply return it */ |
2088 | if (iblock >= ee_block && iblock < ee_block + ee_len) { | 2259 | if (iblock >= ee_block && iblock < ee_block + ee_len) { |
@@ -2091,12 +2262,27 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
2091 | allocated = ee_len - (iblock - ee_block); | 2262 | allocated = ee_len - (iblock - ee_block); |
2092 | ext_debug("%d fit into %lu:%d -> %llu\n", (int) iblock, | 2263 | ext_debug("%d fit into %lu:%d -> %llu\n", (int) iblock, |
2093 | ee_block, ee_len, newblock); | 2264 | ee_block, ee_len, newblock); |
2265 | |||
2094 | /* Do not put uninitialized extent in the cache */ | 2266 | /* Do not put uninitialized extent in the cache */ |
2095 | if (!ext4_ext_is_uninitialized(ex)) | 2267 | if (!ext4_ext_is_uninitialized(ex)) { |
2096 | ext4_ext_put_in_cache(inode, ee_block, | 2268 | ext4_ext_put_in_cache(inode, ee_block, |
2097 | ee_len, ee_start, | 2269 | ee_len, ee_start, |
2098 | EXT4_EXT_CACHE_EXTENT); | 2270 | EXT4_EXT_CACHE_EXTENT); |
2099 | goto out; | 2271 | goto out; |
2272 | } | ||
2273 | if (create == EXT4_CREATE_UNINITIALIZED_EXT) | ||
2274 | goto out; | ||
2275 | if (!create) | ||
2276 | goto out2; | ||
2277 | |||
2278 | ret = ext4_ext_convert_to_initialized(handle, inode, | ||
2279 | path, iblock, | ||
2280 | max_blocks); | ||
2281 | if (ret <= 0) | ||
2282 | goto out2; | ||
2283 | else | ||
2284 | allocated = ret; | ||
2285 | goto outnew; | ||
2100 | } | 2286 | } |
2101 | } | 2287 | } |
2102 | 2288 | ||
@@ -2105,8 +2291,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
2105 | * we couldn't try to create block if create flag is zero | 2291 | * we couldn't try to create block if create flag is zero |
2106 | */ | 2292 | */ |
2107 | if (!create) { | 2293 | if (!create) { |
2108 | /* put just found gap into cache to speed up | 2294 | /* |
2109 | * subsequent requests */ | 2295 | * put just found gap into cache to speed up |
2296 | * subsequent requests | ||
2297 | */ | ||
2110 | ext4_ext_put_gap_in_cache(inode, path, iblock); | 2298 | ext4_ext_put_gap_in_cache(inode, path, iblock); |
2111 | goto out2; | 2299 | goto out2; |
2112 | } | 2300 | } |
@@ -2152,6 +2340,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
2152 | 2340 | ||
2153 | /* previous routine could use block we allocated */ | 2341 | /* previous routine could use block we allocated */ |
2154 | newblock = ext_pblock(&newex); | 2342 | newblock = ext_pblock(&newex); |
2343 | outnew: | ||
2155 | __set_bit(BH_New, &bh_result->b_state); | 2344 | __set_bit(BH_New, &bh_result->b_state); |
2156 | 2345 | ||
2157 | /* Cache only when it is _not_ an uninitialized extent */ | 2346 | /* Cache only when it is _not_ an uninitialized extent */ |
@@ -2221,7 +2410,8 @@ void ext4_ext_truncate(struct inode * inode, struct page *page) | |||
2221 | err = ext4_ext_remove_space(inode, last_block); | 2410 | err = ext4_ext_remove_space(inode, last_block); |
2222 | 2411 | ||
2223 | /* In a multi-transaction truncate, we only make the final | 2412 | /* In a multi-transaction truncate, we only make the final |
2224 | * transaction synchronous. */ | 2413 | * transaction synchronous. |
2414 | */ | ||
2225 | if (IS_SYNC(inode)) | 2415 | if (IS_SYNC(inode)) |
2226 | handle->h_sync = 1; | 2416 | handle->h_sync = 1; |
2227 | 2417 | ||