diff options
| author | Amit Arora <aarora@in.ibm.com> | 2007-07-17 21:42:38 -0400 |
|---|---|---|
| committer | Theodore Ts'o <tytso@mit.edu> | 2007-07-17 21:42:38 -0400 |
| commit | 56055d3ae4cc7fa6d2b10885f20269de8a989ed7 (patch) | |
| tree | ab0008be38ef4d2789aee16d084fdaa2d5acfc32 | |
| parent | a2df2a63407803a833f82e1fa6693826c8c9d584 (diff) | |
write support for preallocated blocks
This patch adds write support to the uninitialized extents that get
created when a preallocation is done using fallocate(). It takes care of
splitting the extents into multiple (upto three) extents and merging the
new split extents with neighbouring ones, if possible.
Signed-off-by: Amit Arora <aarora@in.ibm.com>
| -rw-r--r-- | fs/ext4/extents.c | 254 | ||||
| -rw-r--r-- | include/linux/ext4_fs_extents.h | 3 |
2 files changed, 225 insertions, 32 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index ba25832a756c..ded3d469f978 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
| @@ -1141,6 +1141,53 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, | |||
| 1141 | } | 1141 | } |
| 1142 | 1142 | ||
| 1143 | /* | 1143 | /* |
| 1144 | * This function tries to merge the "ex" extent to the next extent in the tree. | ||
| 1145 | * It always tries to merge towards right. If you want to merge towards | ||
| 1146 | * left, pass "ex - 1" as argument instead of "ex". | ||
| 1147 | * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns | ||
| 1148 | * 1 if they got merged. | ||
| 1149 | */ | ||
| 1150 | int ext4_ext_try_to_merge(struct inode *inode, | ||
| 1151 | struct ext4_ext_path *path, | ||
| 1152 | struct ext4_extent *ex) | ||
| 1153 | { | ||
| 1154 | struct ext4_extent_header *eh; | ||
| 1155 | unsigned int depth, len; | ||
| 1156 | int merge_done = 0; | ||
| 1157 | int uninitialized = 0; | ||
| 1158 | |||
| 1159 | depth = ext_depth(inode); | ||
| 1160 | BUG_ON(path[depth].p_hdr == NULL); | ||
| 1161 | eh = path[depth].p_hdr; | ||
| 1162 | |||
| 1163 | while (ex < EXT_LAST_EXTENT(eh)) { | ||
| 1164 | if (!ext4_can_extents_be_merged(inode, ex, ex + 1)) | ||
| 1165 | break; | ||
| 1166 | /* merge with next extent! */ | ||
| 1167 | if (ext4_ext_is_uninitialized(ex)) | ||
| 1168 | uninitialized = 1; | ||
| 1169 | ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) | ||
| 1170 | + ext4_ext_get_actual_len(ex + 1)); | ||
| 1171 | if (uninitialized) | ||
| 1172 | ext4_ext_mark_uninitialized(ex); | ||
| 1173 | |||
| 1174 | if (ex + 1 < EXT_LAST_EXTENT(eh)) { | ||
| 1175 | len = (EXT_LAST_EXTENT(eh) - ex - 1) | ||
| 1176 | * sizeof(struct ext4_extent); | ||
| 1177 | memmove(ex + 1, ex + 2, len); | ||
| 1178 | } | ||
| 1179 | eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries) - 1); | ||
| 1180 | merge_done = 1; | ||
| 1181 | WARN_ON(eh->eh_entries == 0); | ||
| 1182 | if (!eh->eh_entries) | ||
| 1183 | ext4_error(inode->i_sb, "ext4_ext_try_to_merge", | ||
| 1184 | "inode#%lu, eh->eh_entries = 0!", inode->i_ino); | ||
| 1185 | } | ||
| 1186 | |||
| 1187 | return merge_done; | ||
| 1188 | } | ||
| 1189 | |||
| 1190 | /* | ||
| 1144 | * check if a portion of the "newext" extent overlaps with an | 1191 | * check if a portion of the "newext" extent overlaps with an |
| 1145 | * existing extent. | 1192 | * existing extent. |
| 1146 | * | 1193 | * |
| @@ -1328,25 +1375,7 @@ has_space: | |||
| 1328 | 1375 | ||
| 1329 | merge: | 1376 | merge: |
| 1330 | /* try to merge extents to the right */ | 1377 | /* try to merge extents to the right */ |
| 1331 | while (nearex < EXT_LAST_EXTENT(eh)) { | 1378 | ext4_ext_try_to_merge(inode, path, nearex); |
| 1332 | if (!ext4_can_extents_be_merged(inode, nearex, nearex + 1)) | ||
| 1333 | break; | ||
| 1334 | /* merge with next extent! */ | ||
| 1335 | if (ext4_ext_is_uninitialized(nearex)) | ||
| 1336 | uninitialized = 1; | ||
| 1337 | nearex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(nearex) | ||
| 1338 | + ext4_ext_get_actual_len(nearex + 1)); | ||
| 1339 | if (uninitialized) | ||
| 1340 | ext4_ext_mark_uninitialized(nearex); | ||
| 1341 | |||
| 1342 | if (nearex + 1 < EXT_LAST_EXTENT(eh)) { | ||
| 1343 | len = (EXT_LAST_EXTENT(eh) - nearex - 1) | ||
| 1344 | * sizeof(struct ext4_extent); | ||
| 1345 | memmove(nearex + 1, nearex + 2, len); | ||
| 1346 | } | ||
| 1347 | eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)-1); | ||
| 1348 | BUG_ON(eh->eh_entries == 0); | ||
| 1349 | } | ||
| 1350 | 1379 | ||
| 1351 | /* try to merge extents to the left */ | 1380 | /* try to merge extents to the left */ |
| 1352 | 1381 | ||
| @@ -2012,15 +2041,158 @@ void ext4_ext_release(struct super_block *sb) | |||
| 2012 | #endif | 2041 | #endif |
| 2013 | } | 2042 | } |
| 2014 | 2043 | ||
| 2044 | /* | ||
| 2045 | * This function is called by ext4_ext_get_blocks() if someone tries to write | ||
| 2046 | * to an uninitialized extent. It may result in splitting the uninitialized | ||
| 2047 | * extent into multiple extents (upto three - one initialized and two | ||
| 2048 | * uninitialized). | ||
| 2049 | * There are three possibilities: | ||
| 2050 | * a> There is no split required: Entire extent should be initialized | ||
| 2051 | * b> Splits in two extents: Write is happening at either end of the extent | ||
| 2052 | * c> Splits in three extents: Somone is writing in middle of the extent | ||
| 2053 | */ | ||
| 2054 | int ext4_ext_convert_to_initialized(handle_t *handle, struct inode *inode, | ||
| 2055 | struct ext4_ext_path *path, | ||
| 2056 | ext4_fsblk_t iblock, | ||
| 2057 | unsigned long max_blocks) | ||
| 2058 | { | ||
| 2059 | struct ext4_extent *ex, newex; | ||
| 2060 | struct ext4_extent *ex1 = NULL; | ||
| 2061 | struct ext4_extent *ex2 = NULL; | ||
| 2062 | struct ext4_extent *ex3 = NULL; | ||
| 2063 | struct ext4_extent_header *eh; | ||
| 2064 | unsigned int allocated, ee_block, ee_len, depth; | ||
| 2065 | ext4_fsblk_t newblock; | ||
| 2066 | int err = 0; | ||
| 2067 | int ret = 0; | ||
| 2068 | |||
| 2069 | depth = ext_depth(inode); | ||
| 2070 | eh = path[depth].p_hdr; | ||
| 2071 | ex = path[depth].p_ext; | ||
| 2072 | ee_block = le32_to_cpu(ex->ee_block); | ||
| 2073 | ee_len = ext4_ext_get_actual_len(ex); | ||
| 2074 | allocated = ee_len - (iblock - ee_block); | ||
| 2075 | newblock = iblock - ee_block + ext_pblock(ex); | ||
| 2076 | ex2 = ex; | ||
| 2077 | |||
| 2078 | /* ex1: ee_block to iblock - 1 : uninitialized */ | ||
| 2079 | if (iblock > ee_block) { | ||
| 2080 | ex1 = ex; | ||
| 2081 | ex1->ee_len = cpu_to_le16(iblock - ee_block); | ||
| 2082 | ext4_ext_mark_uninitialized(ex1); | ||
| 2083 | ex2 = &newex; | ||
| 2084 | } | ||
| 2085 | /* | ||
| 2086 | * for sanity, update the length of the ex2 extent before | ||
| 2087 | * we insert ex3, if ex1 is NULL. This is to avoid temporary | ||
| 2088 | * overlap of blocks. | ||
| 2089 | */ | ||
| 2090 | if (!ex1 && allocated > max_blocks) | ||
| 2091 | ex2->ee_len = cpu_to_le16(max_blocks); | ||
| 2092 | /* ex3: to ee_block + ee_len : uninitialised */ | ||
| 2093 | if (allocated > max_blocks) { | ||
| 2094 | unsigned int newdepth; | ||
| 2095 | ex3 = &newex; | ||
| 2096 | ex3->ee_block = cpu_to_le32(iblock + max_blocks); | ||
| 2097 | ext4_ext_store_pblock(ex3, newblock + max_blocks); | ||
| 2098 | ex3->ee_len = cpu_to_le16(allocated - max_blocks); | ||
| 2099 | ext4_ext_mark_uninitialized(ex3); | ||
| 2100 | err = ext4_ext_insert_extent(handle, inode, path, ex3); | ||
| 2101 | if (err) | ||
| 2102 | goto out; | ||
| 2103 | /* | ||
| 2104 | * The depth, and hence eh & ex might change | ||
| 2105 | * as part of the insert above. | ||
| 2106 | */ | ||
| 2107 | newdepth = ext_depth(inode); | ||
| 2108 | if (newdepth != depth) { | ||
| 2109 | depth = newdepth; | ||
| 2110 | path = ext4_ext_find_extent(inode, iblock, NULL); | ||
| 2111 | if (IS_ERR(path)) { | ||
| 2112 | err = PTR_ERR(path); | ||
| 2113 | path = NULL; | ||
| 2114 | goto out; | ||
| 2115 | } | ||
| 2116 | eh = path[depth].p_hdr; | ||
| 2117 | ex = path[depth].p_ext; | ||
| 2118 | if (ex2 != &newex) | ||
| 2119 | ex2 = ex; | ||
| 2120 | } | ||
| 2121 | allocated = max_blocks; | ||
| 2122 | } | ||
| 2123 | /* | ||
| 2124 | * If there was a change of depth as part of the | ||
| 2125 | * insertion of ex3 above, we need to update the length | ||
| 2126 | * of the ex1 extent again here | ||
| 2127 | */ | ||
| 2128 | if (ex1 && ex1 != ex) { | ||
| 2129 | ex1 = ex; | ||
| 2130 | ex1->ee_len = cpu_to_le16(iblock - ee_block); | ||
| 2131 | ext4_ext_mark_uninitialized(ex1); | ||
| 2132 | ex2 = &newex; | ||
| 2133 | } | ||
| 2134 | /* ex2: iblock to iblock + maxblocks-1 : initialised */ | ||
| 2135 | ex2->ee_block = cpu_to_le32(iblock); | ||
| 2136 | ex2->ee_start = cpu_to_le32(newblock); | ||
| 2137 | ext4_ext_store_pblock(ex2, newblock); | ||
| 2138 | ex2->ee_len = cpu_to_le16(allocated); | ||
| 2139 | if (ex2 != ex) | ||
| 2140 | goto insert; | ||
| 2141 | err = ext4_ext_get_access(handle, inode, path + depth); | ||
| 2142 | if (err) | ||
| 2143 | goto out; | ||
| 2144 | /* | ||
| 2145 | * New (initialized) extent starts from the first block | ||
| 2146 | * in the current extent. i.e., ex2 == ex | ||
| 2147 | * We have to see if it can be merged with the extent | ||
| 2148 | * on the left. | ||
| 2149 | */ | ||
| 2150 | if (ex2 > EXT_FIRST_EXTENT(eh)) { | ||
| 2151 | /* | ||
| 2152 | * To merge left, pass "ex2 - 1" to try_to_merge(), | ||
| 2153 | * since it merges towards right _only_. | ||
| 2154 | */ | ||
| 2155 | ret = ext4_ext_try_to_merge(inode, path, ex2 - 1); | ||
| 2156 | if (ret) { | ||
| 2157 | err = ext4_ext_correct_indexes(handle, inode, path); | ||
| 2158 | if (err) | ||
| 2159 | goto out; | ||
| 2160 | depth = ext_depth(inode); | ||
| 2161 | ex2--; | ||
| 2162 | } | ||
| 2163 | } | ||
| 2164 | /* | ||
| 2165 | * Try to Merge towards right. This might be required | ||
| 2166 | * only when the whole extent is being written to. | ||
| 2167 | * i.e. ex2 == ex and ex3 == NULL. | ||
| 2168 | */ | ||
| 2169 | if (!ex3) { | ||
| 2170 | ret = ext4_ext_try_to_merge(inode, path, ex2); | ||
| 2171 | if (ret) { | ||
| 2172 | err = ext4_ext_correct_indexes(handle, inode, path); | ||
| 2173 | if (err) | ||
| 2174 | goto out; | ||
| 2175 | } | ||
| 2176 | } | ||
| 2177 | /* Mark modified extent as dirty */ | ||
| 2178 | err = ext4_ext_dirty(handle, inode, path + depth); | ||
| 2179 | goto out; | ||
| 2180 | insert: | ||
| 2181 | err = ext4_ext_insert_extent(handle, inode, path, &newex); | ||
| 2182 | out: | ||
| 2183 | return err ? err : allocated; | ||
| 2184 | } | ||
| 2185 | |||
| 2015 | int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | 2186 | int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, |
| 2016 | ext4_fsblk_t iblock, | 2187 | ext4_fsblk_t iblock, |
| 2017 | unsigned long max_blocks, struct buffer_head *bh_result, | 2188 | unsigned long max_blocks, struct buffer_head *bh_result, |
| 2018 | int create, int extend_disksize) | 2189 | int create, int extend_disksize) |
| 2019 | { | 2190 | { |
| 2020 | struct ext4_ext_path *path = NULL; | 2191 | struct ext4_ext_path *path = NULL; |
| 2192 | struct ext4_extent_header *eh; | ||
| 2021 | struct ext4_extent newex, *ex; | 2193 | struct ext4_extent newex, *ex; |
| 2022 | ext4_fsblk_t goal, newblock; | 2194 | ext4_fsblk_t goal, newblock; |
| 2023 | int err = 0, depth; | 2195 | int err = 0, depth, ret; |
| 2024 | unsigned long allocated = 0; | 2196 | unsigned long allocated = 0; |
| 2025 | 2197 | ||
| 2026 | __clear_bit(BH_New, &bh_result->b_state); | 2198 | __clear_bit(BH_New, &bh_result->b_state); |
| @@ -2033,8 +2205,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
| 2033 | if (goal) { | 2205 | if (goal) { |
| 2034 | if (goal == EXT4_EXT_CACHE_GAP) { | 2206 | if (goal == EXT4_EXT_CACHE_GAP) { |
| 2035 | if (!create) { | 2207 | if (!create) { |
| 2036 | /* block isn't allocated yet and | 2208 | /* |
| 2037 | * user doesn't want to allocate it */ | 2209 | * block isn't allocated yet and |
| 2210 | * user doesn't want to allocate it | ||
| 2211 | */ | ||
| 2038 | goto out2; | 2212 | goto out2; |
| 2039 | } | 2213 | } |
| 2040 | /* we should allocate requested block */ | 2214 | /* we should allocate requested block */ |
| @@ -2068,6 +2242,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
| 2068 | * this is why assert can't be put in ext4_ext_find_extent() | 2242 | * this is why assert can't be put in ext4_ext_find_extent() |
| 2069 | */ | 2243 | */ |
| 2070 | BUG_ON(path[depth].p_ext == NULL && depth != 0); | 2244 | BUG_ON(path[depth].p_ext == NULL && depth != 0); |
| 2245 | eh = path[depth].p_hdr; | ||
| 2071 | 2246 | ||
| 2072 | ex = path[depth].p_ext; | 2247 | ex = path[depth].p_ext; |
| 2073 | if (ex) { | 2248 | if (ex) { |
| @@ -2076,13 +2251,9 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
| 2076 | unsigned short ee_len; | 2251 | unsigned short ee_len; |
| 2077 | 2252 | ||
| 2078 | /* | 2253 | /* |
| 2079 | * Allow future support for preallocated extents to be added | ||
| 2080 | * as an RO_COMPAT feature: | ||
| 2081 | * Uninitialized extents are treated as holes, except that | 2254 | * Uninitialized extents are treated as holes, except that |
| 2082 | * we avoid (fail) allocating new blocks during a write. | 2255 | * we split out initialized portions during a write. |
| 2083 | */ | 2256 | */ |
| 2084 | if (le16_to_cpu(ex->ee_len) > EXT_MAX_LEN) | ||
| 2085 | goto out2; | ||
| 2086 | ee_len = ext4_ext_get_actual_len(ex); | 2257 | ee_len = ext4_ext_get_actual_len(ex); |
| 2087 | /* if found extent covers block, simply return it */ | 2258 | /* if found extent covers block, simply return it */ |
| 2088 | if (iblock >= ee_block && iblock < ee_block + ee_len) { | 2259 | if (iblock >= ee_block && iblock < ee_block + ee_len) { |
| @@ -2091,12 +2262,27 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
| 2091 | allocated = ee_len - (iblock - ee_block); | 2262 | allocated = ee_len - (iblock - ee_block); |
| 2092 | ext_debug("%d fit into %lu:%d -> %llu\n", (int) iblock, | 2263 | ext_debug("%d fit into %lu:%d -> %llu\n", (int) iblock, |
| 2093 | ee_block, ee_len, newblock); | 2264 | ee_block, ee_len, newblock); |
| 2265 | |||
| 2094 | /* Do not put uninitialized extent in the cache */ | 2266 | /* Do not put uninitialized extent in the cache */ |
| 2095 | if (!ext4_ext_is_uninitialized(ex)) | 2267 | if (!ext4_ext_is_uninitialized(ex)) { |
| 2096 | ext4_ext_put_in_cache(inode, ee_block, | 2268 | ext4_ext_put_in_cache(inode, ee_block, |
| 2097 | ee_len, ee_start, | 2269 | ee_len, ee_start, |
| 2098 | EXT4_EXT_CACHE_EXTENT); | 2270 | EXT4_EXT_CACHE_EXTENT); |
| 2099 | goto out; | 2271 | goto out; |
| 2272 | } | ||
| 2273 | if (create == EXT4_CREATE_UNINITIALIZED_EXT) | ||
| 2274 | goto out; | ||
| 2275 | if (!create) | ||
| 2276 | goto out2; | ||
| 2277 | |||
| 2278 | ret = ext4_ext_convert_to_initialized(handle, inode, | ||
| 2279 | path, iblock, | ||
| 2280 | max_blocks); | ||
| 2281 | if (ret <= 0) | ||
| 2282 | goto out2; | ||
| 2283 | else | ||
| 2284 | allocated = ret; | ||
| 2285 | goto outnew; | ||
| 2100 | } | 2286 | } |
| 2101 | } | 2287 | } |
| 2102 | 2288 | ||
| @@ -2105,8 +2291,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
| 2105 | * we couldn't try to create block if create flag is zero | 2291 | * we couldn't try to create block if create flag is zero |
| 2106 | */ | 2292 | */ |
| 2107 | if (!create) { | 2293 | if (!create) { |
| 2108 | /* put just found gap into cache to speed up | 2294 | /* |
| 2109 | * subsequent requests */ | 2295 | * put just found gap into cache to speed up |
| 2296 | * subsequent requests | ||
| 2297 | */ | ||
| 2110 | ext4_ext_put_gap_in_cache(inode, path, iblock); | 2298 | ext4_ext_put_gap_in_cache(inode, path, iblock); |
| 2111 | goto out2; | 2299 | goto out2; |
| 2112 | } | 2300 | } |
| @@ -2152,6 +2340,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
| 2152 | 2340 | ||
| 2153 | /* previous routine could use block we allocated */ | 2341 | /* previous routine could use block we allocated */ |
| 2154 | newblock = ext_pblock(&newex); | 2342 | newblock = ext_pblock(&newex); |
| 2343 | outnew: | ||
| 2155 | __set_bit(BH_New, &bh_result->b_state); | 2344 | __set_bit(BH_New, &bh_result->b_state); |
| 2156 | 2345 | ||
| 2157 | /* Cache only when it is _not_ an uninitialized extent */ | 2346 | /* Cache only when it is _not_ an uninitialized extent */ |
| @@ -2221,7 +2410,8 @@ void ext4_ext_truncate(struct inode * inode, struct page *page) | |||
| 2221 | err = ext4_ext_remove_space(inode, last_block); | 2410 | err = ext4_ext_remove_space(inode, last_block); |
| 2222 | 2411 | ||
| 2223 | /* In a multi-transaction truncate, we only make the final | 2412 | /* In a multi-transaction truncate, we only make the final |
| 2224 | * transaction synchronous. */ | 2413 | * transaction synchronous. |
| 2414 | */ | ||
| 2225 | if (IS_SYNC(inode)) | 2415 | if (IS_SYNC(inode)) |
| 2226 | handle->h_sync = 1; | 2416 | handle->h_sync = 1; |
| 2227 | 2417 | ||
diff --git a/include/linux/ext4_fs_extents.h b/include/linux/ext4_fs_extents.h index e3d5afc6f23e..edf49ec89eac 100644 --- a/include/linux/ext4_fs_extents.h +++ b/include/linux/ext4_fs_extents.h | |||
| @@ -205,6 +205,9 @@ static inline int ext4_ext_get_actual_len(struct ext4_extent *ext) | |||
| 205 | 205 | ||
| 206 | extern int ext4_extent_tree_init(handle_t *, struct inode *); | 206 | extern int ext4_extent_tree_init(handle_t *, struct inode *); |
| 207 | extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *); | 207 | extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *); |
| 208 | extern int ext4_ext_try_to_merge(struct inode *inode, | ||
| 209 | struct ext4_ext_path *path, | ||
| 210 | struct ext4_extent *); | ||
| 208 | extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *); | 211 | extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *); |
| 209 | extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *); | 212 | extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *); |
| 210 | extern int ext4_ext_walk_space(struct inode *, unsigned long, unsigned long, ext_prepare_callback, void *); | 213 | extern int ext4_ext_walk_space(struct inode *, unsigned long, unsigned long, ext_prepare_callback, void *); |
