diff options
author | Tristan Ye <tristan.ye@oracle.com> | 2011-03-18 02:35:34 -0400 |
---|---|---|
committer | Tristan Ye <tristan.ye@oracle.com> | 2011-05-25 03:17:09 -0400 |
commit | 202ee5facb2c55f36a4324a4f56d8bdf3617a579 (patch) | |
tree | 22875e904e85cf1ef9419119914314fe4dd48550 /fs/ocfs2 | |
parent | 8f603e567aa7a243e68ca48b4f105b990851360f (diff) |
Ocfs2/move_extents: defrag a range of extent.
It's a relatively complete function to accomplish defragmentation for entire
or partial extent, one journal handle was kept during the operation, it was
logically doing one more thing than ocfs2_move_extent() acutally, yes, it's
claiming the new clusters itself;-)
Signed-off-by: Tristan Ye <tristan.ye@oracle.com>
Diffstat (limited to 'fs/ocfs2')
-rw-r--r-- | fs/ocfs2/move_extents.c | 136 |
1 files changed, 136 insertions, 0 deletions
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c index d1bd5a347e9c..78db10d4c7f1 100644 --- a/fs/ocfs2/move_extents.c +++ b/fs/ocfs2/move_extents.c | |||
@@ -215,3 +215,139 @@ out: | |||
215 | 215 | ||
216 | return ret; | 216 | return ret; |
217 | } | 217 | } |
218 | |||
219 | /* | ||
220 | * Using one journal handle to guarantee the data consistency in case | ||
221 | * crash happens anywhere. | ||
222 | */ | ||
223 | static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, | ||
224 | u32 cpos, u32 phys_cpos, u32 len, int ext_flags) | ||
225 | { | ||
226 | int ret, credits = 0, extra_blocks = 0; | ||
227 | handle_t *handle; | ||
228 | struct inode *inode = context->inode; | ||
229 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
230 | struct inode *tl_inode = osb->osb_tl_inode; | ||
231 | struct ocfs2_refcount_tree *ref_tree = NULL; | ||
232 | u32 new_phys_cpos, new_len; | ||
233 | u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); | ||
234 | |||
235 | if ((ext_flags & OCFS2_EXT_REFCOUNTED) && len) { | ||
236 | |||
237 | BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & | ||
238 | OCFS2_HAS_REFCOUNT_FL)); | ||
239 | |||
240 | BUG_ON(!context->refcount_loc); | ||
241 | |||
242 | ret = ocfs2_lock_refcount_tree(osb, context->refcount_loc, 1, | ||
243 | &ref_tree, NULL); | ||
244 | if (ret) { | ||
245 | mlog_errno(ret); | ||
246 | return ret; | ||
247 | } | ||
248 | |||
249 | ret = ocfs2_prepare_refcount_change_for_del(inode, | ||
250 | context->refcount_loc, | ||
251 | phys_blkno, | ||
252 | len, | ||
253 | &credits, | ||
254 | &extra_blocks); | ||
255 | if (ret) { | ||
256 | mlog_errno(ret); | ||
257 | goto out; | ||
258 | } | ||
259 | } | ||
260 | |||
261 | ret = ocfs2_lock_allocators_move_extents(inode, &context->et, len, 1, | ||
262 | &context->meta_ac, | ||
263 | &context->data_ac, | ||
264 | extra_blocks, &credits); | ||
265 | if (ret) { | ||
266 | mlog_errno(ret); | ||
267 | goto out; | ||
268 | } | ||
269 | |||
270 | /* | ||
271 | * should be using allocation reservation strategy there? | ||
272 | * | ||
273 | * if (context->data_ac) | ||
274 | * context->data_ac->ac_resv = &OCFS2_I(inode)->ip_la_data_resv; | ||
275 | */ | ||
276 | |||
277 | mutex_lock(&tl_inode->i_mutex); | ||
278 | |||
279 | if (ocfs2_truncate_log_needs_flush(osb)) { | ||
280 | ret = __ocfs2_flush_truncate_log(osb); | ||
281 | if (ret < 0) { | ||
282 | mlog_errno(ret); | ||
283 | goto out_unlock_mutex; | ||
284 | } | ||
285 | } | ||
286 | |||
287 | handle = ocfs2_start_trans(osb, credits); | ||
288 | if (IS_ERR(handle)) { | ||
289 | ret = PTR_ERR(handle); | ||
290 | mlog_errno(ret); | ||
291 | goto out_unlock_mutex; | ||
292 | } | ||
293 | |||
294 | ret = __ocfs2_claim_clusters(handle, context->data_ac, 1, len, | ||
295 | &new_phys_cpos, &new_len); | ||
296 | if (ret) { | ||
297 | mlog_errno(ret); | ||
298 | goto out_commit; | ||
299 | } | ||
300 | |||
301 | /* | ||
302 | * we're not quite patient here to make multiple attempts for claiming | ||
303 | * enough clusters, failure to claim clusters per-requested is not a | ||
304 | * disaster though, it can only mean partial range of defragmentation | ||
305 | * or extent movements gets gone, users anyway is able to have another | ||
306 | * try as they wish anytime, since they're going to be returned a | ||
307 | * '-ENOSPC' and completed length of this movement. | ||
308 | */ | ||
309 | if (new_len != len) { | ||
310 | mlog(0, "len_claimed: %u, len: %u\n", new_len, len); | ||
311 | context->range->me_flags &= ~OCFS2_MOVE_EXT_FL_COMPLETE; | ||
312 | ret = -ENOSPC; | ||
313 | goto out_commit; | ||
314 | } | ||
315 | |||
316 | mlog(0, "cpos: %u, phys_cpos: %u, new_phys_cpos: %u\n", cpos, | ||
317 | phys_cpos, new_phys_cpos); | ||
318 | |||
319 | ret = __ocfs2_move_extent(handle, context, cpos, len, phys_cpos, | ||
320 | new_phys_cpos, ext_flags); | ||
321 | if (ret) | ||
322 | mlog_errno(ret); | ||
323 | |||
324 | /* | ||
325 | * Here we should write the new page out first if we are | ||
326 | * in write-back mode. | ||
327 | */ | ||
328 | ret = ocfs2_cow_sync_writeback(inode->i_sb, context->inode, cpos, len); | ||
329 | if (ret) | ||
330 | mlog_errno(ret); | ||
331 | |||
332 | out_commit: | ||
333 | ocfs2_commit_trans(osb, handle); | ||
334 | |||
335 | out_unlock_mutex: | ||
336 | mutex_unlock(&tl_inode->i_mutex); | ||
337 | |||
338 | if (context->data_ac) { | ||
339 | ocfs2_free_alloc_context(context->data_ac); | ||
340 | context->data_ac = NULL; | ||
341 | } | ||
342 | |||
343 | if (context->meta_ac) { | ||
344 | ocfs2_free_alloc_context(context->meta_ac); | ||
345 | context->meta_ac = NULL; | ||
346 | } | ||
347 | |||
348 | out: | ||
349 | if (ref_tree) | ||
350 | ocfs2_unlock_refcount_tree(osb, ref_tree, 1); | ||
351 | |||
352 | return ret; | ||
353 | } | ||