diff options
author | Tristan Ye <tristan.ye@oracle.com> | 2011-05-25 02:30:36 -0400 |
---|---|---|
committer | Tristan Ye <tristan.ye@oracle.com> | 2011-05-25 03:17:12 -0400 |
commit | 4dfa66bd595120530506448f3d519f341afd736e (patch) | |
tree | e11b923c29fbd85d10aa343dff93db5bc82003b4 | |
parent | 53069d4e76954e2e63c1b3c501051c6fbcf7298c (diff) |
Ocfs2/move_extents: Let defrag handle partial extent moving.
We're going to support partial extent moving, which may split entire extent
movement into pieces to compromise the insuffice allocations, it eases the
'ENSPC' pain and makes the whole moving much less likely to fail, the downside
is it may make the fs even more fragmented before moving, just let the userspace
make a trade-off here.
Signed-off-by: Tristan Ye <tristan.ye@oracle.com>
-rw-r--r-- | fs/ocfs2/move_extents.c | 46 |
1 files changed, 26 insertions, 20 deletions
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c index 800552168d8a..efc509b3af1f 100644 --- a/fs/ocfs2/move_extents.c +++ b/fs/ocfs2/move_extents.c | |||
@@ -44,6 +44,7 @@ struct ocfs2_move_extents_context { | |||
44 | struct inode *inode; | 44 | struct inode *inode; |
45 | struct file *file; | 45 | struct file *file; |
46 | int auto_defrag; | 46 | int auto_defrag; |
47 | int partial; | ||
47 | int credits; | 48 | int credits; |
48 | u32 new_phys_cpos; | 49 | u32 new_phys_cpos; |
49 | u32 clusters_moved; | 50 | u32 clusters_moved; |
@@ -221,9 +222,9 @@ out: | |||
221 | * crash happens anywhere. | 222 | * crash happens anywhere. |
222 | */ | 223 | */ |
223 | static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, | 224 | static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, |
224 | u32 cpos, u32 phys_cpos, u32 len, int ext_flags) | 225 | u32 cpos, u32 phys_cpos, u32 *len, int ext_flags) |
225 | { | 226 | { |
226 | int ret, credits = 0, extra_blocks = 0; | 227 | int ret, credits = 0, extra_blocks = 0, partial = context->partial; |
227 | handle_t *handle; | 228 | handle_t *handle; |
228 | struct inode *inode = context->inode; | 229 | struct inode *inode = context->inode; |
229 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 230 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
@@ -232,7 +233,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, | |||
232 | u32 new_phys_cpos, new_len; | 233 | u32 new_phys_cpos, new_len; |
233 | u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); | 234 | u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); |
234 | 235 | ||
235 | if ((ext_flags & OCFS2_EXT_REFCOUNTED) && len) { | 236 | if ((ext_flags & OCFS2_EXT_REFCOUNTED) && *len) { |
236 | 237 | ||
237 | BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & | 238 | BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & |
238 | OCFS2_HAS_REFCOUNT_FL)); | 239 | OCFS2_HAS_REFCOUNT_FL)); |
@@ -249,7 +250,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, | |||
249 | ret = ocfs2_prepare_refcount_change_for_del(inode, | 250 | ret = ocfs2_prepare_refcount_change_for_del(inode, |
250 | context->refcount_loc, | 251 | context->refcount_loc, |
251 | phys_blkno, | 252 | phys_blkno, |
252 | len, | 253 | *len, |
253 | &credits, | 254 | &credits, |
254 | &extra_blocks); | 255 | &extra_blocks); |
255 | if (ret) { | 256 | if (ret) { |
@@ -258,7 +259,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, | |||
258 | } | 259 | } |
259 | } | 260 | } |
260 | 261 | ||
261 | ret = ocfs2_lock_allocators_move_extents(inode, &context->et, len, 1, | 262 | ret = ocfs2_lock_allocators_move_extents(inode, &context->et, *len, 1, |
262 | &context->meta_ac, | 263 | &context->meta_ac, |
263 | &context->data_ac, | 264 | &context->data_ac, |
264 | extra_blocks, &credits); | 265 | extra_blocks, &credits); |
@@ -291,7 +292,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, | |||
291 | goto out_unlock_mutex; | 292 | goto out_unlock_mutex; |
292 | } | 293 | } |
293 | 294 | ||
294 | ret = __ocfs2_claim_clusters(handle, context->data_ac, 1, len, | 295 | ret = __ocfs2_claim_clusters(handle, context->data_ac, 1, *len, |
295 | &new_phys_cpos, &new_len); | 296 | &new_phys_cpos, &new_len); |
296 | if (ret) { | 297 | if (ret) { |
297 | mlog_errno(ret); | 298 | mlog_errno(ret); |
@@ -299,33 +300,36 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, | |||
299 | } | 300 | } |
300 | 301 | ||
301 | /* | 302 | /* |
302 | * we're not quite patient here to make multiple attempts for claiming | 303 | * allowing partial extent moving is kind of 'pros and cons', it makes |
303 | * enough clusters, failure to claim clusters per-requested is not a | 304 | * whole defragmentation less likely to fail, on the contrary, the bad |
304 | * disaster though, it can only mean partial range of defragmentation | 305 | * thing is it may make the fs even more fragmented after moving, let |
305 | * or extent movements gets gone, users anyway is able to have another | 306 | * userspace make a good decision here. |
306 | * try as they wish anytime, since they're going to be returned a | ||
307 | * '-ENOSPC' and completed length of this movement. | ||
308 | */ | 307 | */ |
309 | if (new_len != len) { | 308 | if (new_len != *len) { |
310 | mlog(0, "len_claimed: %u, len: %u\n", new_len, len); | 309 | mlog(0, "len_claimed: %u, len: %u\n", new_len, *len); |
311 | context->range->me_flags &= ~OCFS2_MOVE_EXT_FL_COMPLETE; | 310 | if (!partial) { |
312 | ret = -ENOSPC; | 311 | context->range->me_flags &= ~OCFS2_MOVE_EXT_FL_COMPLETE; |
313 | goto out_commit; | 312 | ret = -ENOSPC; |
313 | goto out_commit; | ||
314 | } | ||
314 | } | 315 | } |
315 | 316 | ||
316 | mlog(0, "cpos: %u, phys_cpos: %u, new_phys_cpos: %u\n", cpos, | 317 | mlog(0, "cpos: %u, phys_cpos: %u, new_phys_cpos: %u\n", cpos, |
317 | phys_cpos, new_phys_cpos); | 318 | phys_cpos, new_phys_cpos); |
318 | 319 | ||
319 | ret = __ocfs2_move_extent(handle, context, cpos, len, phys_cpos, | 320 | ret = __ocfs2_move_extent(handle, context, cpos, new_len, phys_cpos, |
320 | new_phys_cpos, ext_flags); | 321 | new_phys_cpos, ext_flags); |
321 | if (ret) | 322 | if (ret) |
322 | mlog_errno(ret); | 323 | mlog_errno(ret); |
323 | 324 | ||
325 | if (partial && (new_len != *len)) | ||
326 | *len = new_len; | ||
327 | |||
324 | /* | 328 | /* |
325 | * Here we should write the new page out first if we are | 329 | * Here we should write the new page out first if we are |
326 | * in write-back mode. | 330 | * in write-back mode. |
327 | */ | 331 | */ |
328 | ret = ocfs2_cow_sync_writeback(inode->i_sb, context->inode, cpos, len); | 332 | ret = ocfs2_cow_sync_writeback(inode->i_sb, context->inode, cpos, *len); |
329 | if (ret) | 333 | if (ret) |
330 | mlog_errno(ret); | 334 | mlog_errno(ret); |
331 | 335 | ||
@@ -926,7 +930,7 @@ static int __ocfs2_move_extents_range(struct buffer_head *di_bh, | |||
926 | cpos, phys_cpos, alloc_size, len_defraged); | 930 | cpos, phys_cpos, alloc_size, len_defraged); |
927 | 931 | ||
928 | ret = ocfs2_defrag_extent(context, cpos, phys_cpos, | 932 | ret = ocfs2_defrag_extent(context, cpos, phys_cpos, |
929 | alloc_size, flags); | 933 | &alloc_size, flags); |
930 | } else { | 934 | } else { |
931 | ret = ocfs2_move_extent(context, cpos, phys_cpos, | 935 | ret = ocfs2_move_extent(context, cpos, phys_cpos, |
932 | &new_phys_cpos, alloc_size, | 936 | &new_phys_cpos, alloc_size, |
@@ -1101,6 +1105,8 @@ int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp) | |||
1101 | * any thought? | 1105 | * any thought? |
1102 | */ | 1106 | */ |
1103 | range.me_threshold = 1024 * 1024; | 1107 | range.me_threshold = 1024 * 1024; |
1108 | if (range.me_flags & OCFS2_MOVE_EXT_FL_PART_DEFRAG) | ||
1109 | context->partial = 1; | ||
1104 | } else { | 1110 | } else { |
1105 | /* | 1111 | /* |
1106 | * first best-effort attempt to validate and adjust the goal | 1112 | * first best-effort attempt to validate and adjust the goal |