aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTao Ma <tao.ma@oracle.com>2009-08-17 23:29:12 -0400
committerJoel Becker <joel.becker@oracle.com>2009-09-22 23:09:35 -0400
commitbcbbb24a6a5c5b3e7b8e5284e0bfa23f45c32377 (patch)
treee45a6cfd75cd8005fdf280c2f5f57c09186249dd
parent1aa75fea64bc26bda9be9b1b20ae253d7a481877 (diff)
ocfs2: Decrement refcount when truncating refcounted extents.
Add 'Decrement refcount for delete' in to the normal truncate process. So for a refcounted extent record, call refcount rec decrementation instead of cluster free. Signed-off-by: Tao Ma <tao.ma@oracle.com>
-rw-r--r--fs/ocfs2/alloc.c76
-rw-r--r--fs/ocfs2/journal.h3
-rw-r--r--fs/ocfs2/refcounttree.c212
-rw-r--r--fs/ocfs2/refcounttree.h6
4 files changed, 290 insertions, 7 deletions
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 96f8ca6b3aba..03438a677933 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -49,6 +49,7 @@
49#include "super.h" 49#include "super.h"
50#include "uptodate.h" 50#include "uptodate.h"
51#include "xattr.h" 51#include "xattr.h"
52#include "refcounttree.h"
52 53
53#include "buffer_head_io.h" 54#include "buffer_head_io.h"
54 55
@@ -6673,7 +6674,7 @@ out:
6673 */ 6674 */
6674static int ocfs2_trim_tree(struct inode *inode, struct ocfs2_path *path, 6675static int ocfs2_trim_tree(struct inode *inode, struct ocfs2_path *path,
6675 handle_t *handle, struct ocfs2_truncate_context *tc, 6676 handle_t *handle, struct ocfs2_truncate_context *tc,
6676 u32 clusters_to_del, u64 *delete_start) 6677 u32 clusters_to_del, u64 *delete_start, u8 *flags)
6677{ 6678{
6678 int ret, i, index = path->p_tree_depth; 6679 int ret, i, index = path->p_tree_depth;
6679 u32 new_edge = 0; 6680 u32 new_edge = 0;
@@ -6683,6 +6684,7 @@ static int ocfs2_trim_tree(struct inode *inode, struct ocfs2_path *path,
6683 struct ocfs2_extent_rec *rec; 6684 struct ocfs2_extent_rec *rec;
6684 6685
6685 *delete_start = 0; 6686 *delete_start = 0;
6687 *flags = 0;
6686 6688
6687 while (index >= 0) { 6689 while (index >= 0) {
6688 bh = path->p_node[index].bh; 6690 bh = path->p_node[index].bh;
@@ -6770,6 +6772,7 @@ find_tail_record:
6770 *delete_start = le64_to_cpu(rec->e_blkno) 6772 *delete_start = le64_to_cpu(rec->e_blkno)
6771 + ocfs2_clusters_to_blocks(inode->i_sb, 6773 + ocfs2_clusters_to_blocks(inode->i_sb,
6772 le16_to_cpu(rec->e_leaf_clusters)); 6774 le16_to_cpu(rec->e_leaf_clusters));
6775 *flags = rec->e_flags;
6773 6776
6774 /* 6777 /*
6775 * If it's now empty, remove this record. 6778 * If it's now empty, remove this record.
@@ -6869,7 +6872,8 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
6869 struct buffer_head *fe_bh, 6872 struct buffer_head *fe_bh,
6870 handle_t *handle, 6873 handle_t *handle,
6871 struct ocfs2_truncate_context *tc, 6874 struct ocfs2_truncate_context *tc,
6872 struct ocfs2_path *path) 6875 struct ocfs2_path *path,
6876 struct ocfs2_alloc_context *meta_ac)
6873{ 6877{
6874 int status; 6878 int status;
6875 struct ocfs2_dinode *fe; 6879 struct ocfs2_dinode *fe;
@@ -6877,6 +6881,7 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
6877 struct ocfs2_extent_list *el; 6881 struct ocfs2_extent_list *el;
6878 struct buffer_head *last_eb_bh = NULL; 6882 struct buffer_head *last_eb_bh = NULL;
6879 u64 delete_blk = 0; 6883 u64 delete_blk = 0;
6884 u8 rec_flags;
6880 6885
6881 fe = (struct ocfs2_dinode *) fe_bh->b_data; 6886 fe = (struct ocfs2_dinode *) fe_bh->b_data;
6882 6887
@@ -6932,7 +6937,7 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
6932 inode->i_blocks = ocfs2_inode_sector_count(inode); 6937 inode->i_blocks = ocfs2_inode_sector_count(inode);
6933 6938
6934 status = ocfs2_trim_tree(inode, path, handle, tc, 6939 status = ocfs2_trim_tree(inode, path, handle, tc,
6935 clusters_to_del, &delete_blk); 6940 clusters_to_del, &delete_blk, &rec_flags);
6936 if (status) { 6941 if (status) {
6937 mlog_errno(status); 6942 mlog_errno(status);
6938 goto bail; 6943 goto bail;
@@ -6964,8 +6969,16 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
6964 } 6969 }
6965 6970
6966 if (delete_blk) { 6971 if (delete_blk) {
6967 status = ocfs2_truncate_log_append(osb, handle, delete_blk, 6972 if (rec_flags & OCFS2_EXT_REFCOUNTED)
6968 clusters_to_del); 6973 status = ocfs2_decrease_refcount(inode, handle,
6974 ocfs2_blocks_to_clusters(osb->sb,
6975 delete_blk),
6976 clusters_to_del, meta_ac,
6977 &tc->tc_dealloc);
6978 else
6979 status = ocfs2_truncate_log_append(osb, handle,
6980 delete_blk,
6981 clusters_to_del);
6969 if (status < 0) { 6982 if (status < 0) {
6970 mlog_errno(status); 6983 mlog_errno(status);
6971 goto bail; 6984 goto bail;
@@ -7383,11 +7396,14 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb,
7383{ 7396{
7384 int status, i, credits, tl_sem = 0; 7397 int status, i, credits, tl_sem = 0;
7385 u32 clusters_to_del, new_highest_cpos, range; 7398 u32 clusters_to_del, new_highest_cpos, range;
7399 u64 blkno = 0;
7386 struct ocfs2_extent_list *el; 7400 struct ocfs2_extent_list *el;
7387 handle_t *handle = NULL; 7401 handle_t *handle = NULL;
7388 struct inode *tl_inode = osb->osb_tl_inode; 7402 struct inode *tl_inode = osb->osb_tl_inode;
7389 struct ocfs2_path *path = NULL; 7403 struct ocfs2_path *path = NULL;
7390 struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data; 7404 struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
7405 struct ocfs2_alloc_context *meta_ac = NULL;
7406 struct ocfs2_refcount_tree *ref_tree = NULL;
7391 7407
7392 mlog_entry_void(); 7408 mlog_entry_void();
7393 7409
@@ -7413,6 +7429,8 @@ start:
7413 goto bail; 7429 goto bail;
7414 } 7430 }
7415 7431
7432 credits = 0;
7433
7416 /* 7434 /*
7417 * Truncate always works against the rightmost tree branch. 7435 * Truncate always works against the rightmost tree branch.
7418 */ 7436 */
@@ -7453,10 +7471,15 @@ start:
7453 clusters_to_del = 0; 7471 clusters_to_del = 0;
7454 } else if (le32_to_cpu(el->l_recs[i].e_cpos) >= new_highest_cpos) { 7472 } else if (le32_to_cpu(el->l_recs[i].e_cpos) >= new_highest_cpos) {
7455 clusters_to_del = ocfs2_rec_clusters(el, &el->l_recs[i]); 7473 clusters_to_del = ocfs2_rec_clusters(el, &el->l_recs[i]);
7474 blkno = le64_to_cpu(el->l_recs[i].e_blkno);
7456 } else if (range > new_highest_cpos) { 7475 } else if (range > new_highest_cpos) {
7457 clusters_to_del = (ocfs2_rec_clusters(el, &el->l_recs[i]) + 7476 clusters_to_del = (ocfs2_rec_clusters(el, &el->l_recs[i]) +
7458 le32_to_cpu(el->l_recs[i].e_cpos)) - 7477 le32_to_cpu(el->l_recs[i].e_cpos)) -
7459 new_highest_cpos; 7478 new_highest_cpos;
7479 blkno = le64_to_cpu(el->l_recs[i].e_blkno) +
7480 ocfs2_clusters_to_blocks(inode->i_sb,
7481 ocfs2_rec_clusters(el, &el->l_recs[i]) -
7482 clusters_to_del);
7460 } else { 7483 } else {
7461 status = 0; 7484 status = 0;
7462 goto bail; 7485 goto bail;
@@ -7465,6 +7488,29 @@ start:
7465 mlog(0, "clusters_to_del = %u in this pass, tail blk=%llu\n", 7488 mlog(0, "clusters_to_del = %u in this pass, tail blk=%llu\n",
7466 clusters_to_del, (unsigned long long)path_leaf_bh(path)->b_blocknr); 7489 clusters_to_del, (unsigned long long)path_leaf_bh(path)->b_blocknr);
7467 7490
7491 if (el->l_recs[i].e_flags & OCFS2_EXT_REFCOUNTED && clusters_to_del) {
7492 BUG_ON(!(OCFS2_I(inode)->ip_dyn_features &
7493 OCFS2_HAS_REFCOUNT_FL));
7494
7495 status = ocfs2_lock_refcount_tree(osb,
7496 le64_to_cpu(di->i_refcount_loc),
7497 1, &ref_tree, NULL);
7498 if (status) {
7499 mlog_errno(status);
7500 goto bail;
7501 }
7502
7503 status = ocfs2_prepare_refcount_change_for_del(inode, fe_bh,
7504 blkno,
7505 clusters_to_del,
7506 &credits,
7507 &meta_ac);
7508 if (status < 0) {
7509 mlog_errno(status);
7510 goto bail;
7511 }
7512 }
7513
7468 mutex_lock(&tl_inode->i_mutex); 7514 mutex_lock(&tl_inode->i_mutex);
7469 tl_sem = 1; 7515 tl_sem = 1;
7470 /* ocfs2_truncate_log_needs_flush guarantees us at least one 7516 /* ocfs2_truncate_log_needs_flush guarantees us at least one
@@ -7478,7 +7524,7 @@ start:
7478 } 7524 }
7479 } 7525 }
7480 7526
7481 credits = ocfs2_calc_tree_trunc_credits(osb->sb, clusters_to_del, 7527 credits += ocfs2_calc_tree_trunc_credits(osb->sb, clusters_to_del,
7482 (struct ocfs2_dinode *)fe_bh->b_data, 7528 (struct ocfs2_dinode *)fe_bh->b_data,
7483 el); 7529 el);
7484 handle = ocfs2_start_trans(osb, credits); 7530 handle = ocfs2_start_trans(osb, credits);
@@ -7490,7 +7536,7 @@ start:
7490 } 7536 }
7491 7537
7492 status = ocfs2_do_truncate(osb, clusters_to_del, inode, fe_bh, handle, 7538 status = ocfs2_do_truncate(osb, clusters_to_del, inode, fe_bh, handle,
7493 tc, path); 7539 tc, path, meta_ac);
7494 if (status < 0) { 7540 if (status < 0) {
7495 mlog_errno(status); 7541 mlog_errno(status);
7496 goto bail; 7542 goto bail;
@@ -7504,6 +7550,16 @@ start:
7504 7550
7505 ocfs2_reinit_path(path, 1); 7551 ocfs2_reinit_path(path, 1);
7506 7552
7553 if (meta_ac) {
7554 ocfs2_free_alloc_context(meta_ac);
7555 meta_ac = NULL;
7556 }
7557
7558 if (ref_tree) {
7559 ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
7560 ref_tree = NULL;
7561 }
7562
7507 /* 7563 /*
7508 * The check above will catch the case where we've truncated 7564 * The check above will catch the case where we've truncated
7509 * away all allocation. 7565 * away all allocation.
@@ -7520,6 +7576,12 @@ bail:
7520 if (handle) 7576 if (handle)
7521 ocfs2_commit_trans(osb, handle); 7577 ocfs2_commit_trans(osb, handle);
7522 7578
7579 if (meta_ac)
7580 ocfs2_free_alloc_context(meta_ac);
7581
7582 if (ref_tree)
7583 ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
7584
7523 ocfs2_run_deallocs(osb, &tc->tc_dealloc); 7585 ocfs2_run_deallocs(osb, &tc->tc_dealloc);
7524 7586
7525 ocfs2_free_path(path); 7587 ocfs2_free_path(path);
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index bd88c8b9f2fb..3f74e09b0d80 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -504,6 +504,9 @@ static inline int ocfs2_calc_dxi_expand_credits(struct super_block *sb)
504 */ 504 */
505#define OCFS2_REFCOUNT_TREE_REMOVE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1) 505#define OCFS2_REFCOUNT_TREE_REMOVE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1)
506 506
507/* 2 metadata alloc, 2 new blocks and root refcount block */
508#define OCFS2_EXPAND_REFCOUNT_TREE_CREDITS (OCFS2_SUBALLOC_ALLOC * 2 + 3)
509
507/* 510/*
508 * Please note that the caller must make sure that root_el is the root 511 * Please note that the caller must make sure that root_el is the root
509 * of extent tree. So for an inode, it should be &fe->id2.i_list. Otherwise 512 * of extent tree. So for an inode, it should be &fe->id2.i_list. Otherwise
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index f7d19f4db897..e72dbdd3b6e8 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -2192,3 +2192,215 @@ static int ocfs2_mark_extent_refcounted(struct inode *inode,
2192out: 2192out:
2193 return ret; 2193 return ret;
2194} 2194}
2195
2196/*
2197 * Given some contiguous physical clusters, calculate what we need
2198 * for modifying their refcount.
2199 */
2200static int ocfs2_calc_refcount_meta_credits(struct super_block *sb,
2201 struct ocfs2_caching_info *ci,
2202 struct buffer_head *ref_root_bh,
2203 u64 start_cpos,
2204 u32 clusters,
2205 int *meta_add,
2206 int *credits)
2207{
2208 int ret = 0, index, ref_blocks = 0, recs_add = 0;
2209 u64 cpos = start_cpos;
2210 struct ocfs2_refcount_block *rb;
2211 struct ocfs2_refcount_rec rec;
2212 struct buffer_head *ref_leaf_bh = NULL, *prev_bh = NULL;
2213 u32 len;
2214
2215 mlog(0, "start_cpos %llu, clusters %u\n",
2216 (unsigned long long)start_cpos, clusters);
2217 while (clusters) {
2218 ret = ocfs2_get_refcount_rec(ci, ref_root_bh,
2219 cpos, clusters, &rec,
2220 &index, &ref_leaf_bh);
2221 if (ret) {
2222 mlog_errno(ret);
2223 goto out;
2224 }
2225
2226 if (ref_leaf_bh != prev_bh) {
2227 /*
2228 * Now we encounter a new leaf block, so calculate
2229 * whether we need to extend the old leaf.
2230 */
2231 if (prev_bh) {
2232 rb = (struct ocfs2_refcount_block *)
2233 prev_bh->b_data;
2234
2235 if (le64_to_cpu(rb->rf_records.rl_used) +
2236 recs_add >
2237 le16_to_cpu(rb->rf_records.rl_count))
2238 ref_blocks++;
2239 }
2240
2241 recs_add = 0;
2242 *credits += 1;
2243 brelse(prev_bh);
2244 prev_bh = ref_leaf_bh;
2245 get_bh(prev_bh);
2246 }
2247
2248 rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
2249
2250 mlog(0, "recs_add %d,cpos %llu, clusters %u, rec->r_cpos %llu,"
2251 "rec->r_clusters %u, rec->r_refcount %u, index %d\n",
2252 recs_add, (unsigned long long)cpos, clusters,
2253 (unsigned long long)le64_to_cpu(rec.r_cpos),
2254 le32_to_cpu(rec.r_clusters),
2255 le32_to_cpu(rec.r_refcount), index);
2256
2257 len = min((u64)cpos + clusters, le64_to_cpu(rec.r_cpos) +
2258 le32_to_cpu(rec.r_clusters)) - cpos;
2259 /*
2260 * If the refcount rec already exist, cool. We just need
2261 * to check whether there is a split. Otherwise we just need
2262 * to increase the refcount.
2263 * If we will insert one, increases recs_add.
2264 *
2265 * We record all the records which will be inserted to the
2266 * same refcount block, so that we can tell exactly whether
2267 * we need a new refcount block or not.
2268 */
2269 if (rec.r_refcount) {
2270 /* Check whether we need a split at the beginning. */
2271 if (cpos == start_cpos &&
2272 cpos != le64_to_cpu(rec.r_cpos))
2273 recs_add++;
2274
2275 /* Check whether we need a split in the end. */
2276 if (cpos + clusters < le64_to_cpu(rec.r_cpos) +
2277 le32_to_cpu(rec.r_clusters))
2278 recs_add++;
2279 } else
2280 recs_add++;
2281
2282 brelse(ref_leaf_bh);
2283 ref_leaf_bh = NULL;
2284 clusters -= len;
2285 cpos += len;
2286 }
2287
2288 if (prev_bh) {
2289 rb = (struct ocfs2_refcount_block *)prev_bh->b_data;
2290
2291 if (le64_to_cpu(rb->rf_records.rl_used) + recs_add >
2292 le16_to_cpu(rb->rf_records.rl_count))
2293 ref_blocks++;
2294
2295 *credits += 1;
2296 }
2297
2298 if (!ref_blocks)
2299 goto out;
2300
2301 mlog(0, "we need ref_blocks %d\n", ref_blocks);
2302 *meta_add += ref_blocks;
2303 *credits += ref_blocks;
2304
2305 /*
2306 * So we may need ref_blocks to insert into the tree.
2307 * That also means we need to change the b-tree and add that number
2308 * of records since we never merge them.
2309 * We need one more block for expansion since the new created leaf
2310 * block is also full and needs split.
2311 */
2312 rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
2313 if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL) {
2314 struct ocfs2_extent_tree et;
2315
2316 ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh);
2317 *meta_add += ocfs2_extend_meta_needed(et.et_root_el);
2318 *credits += ocfs2_calc_extend_credits(sb,
2319 et.et_root_el,
2320 ref_blocks);
2321 } else {
2322 *credits += OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
2323 *meta_add += 1;
2324 }
2325
2326out:
2327 brelse(ref_leaf_bh);
2328 brelse(prev_bh);
2329 return ret;
2330}
2331
2332/*
2333 * For refcount tree, we will decrease some contiguous clusters
2334 * refcount count, so just go through it to see how many blocks
2335 * we gonna touch and whether we need to create new blocks.
2336 *
2337 * Normally the refcount blocks store these refcount should be
2338 * continguous also, so that we can get the number easily.
2339 * As for meta_ac, we will at most add split 2 refcount record and
2340 * 2 more refcount block, so just check it in a rough way.
2341 *
2342 * Caller must hold refcount tree lock.
2343 */
2344int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
2345 struct buffer_head *di_bh,
2346 u64 phys_blkno,
2347 u32 clusters,
2348 int *credits,
2349 struct ocfs2_alloc_context **meta_ac)
2350{
2351 int ret, ref_blocks = 0;
2352 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2353 struct ocfs2_inode_info *oi = OCFS2_I(inode);
2354 struct buffer_head *ref_root_bh = NULL;
2355 struct ocfs2_refcount_tree *tree;
2356 u64 start_cpos = ocfs2_blocks_to_clusters(inode->i_sb, phys_blkno);
2357
2358 if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) {
2359 ocfs2_error(inode->i_sb, "Inode %lu want to use refcount "
2360 "tree, but the feature bit is not set in the "
2361 "super block.", inode->i_ino);
2362 ret = -EROFS;
2363 goto out;
2364 }
2365
2366 BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
2367
2368 ret = ocfs2_get_refcount_tree(OCFS2_SB(inode->i_sb),
2369 le64_to_cpu(di->i_refcount_loc), &tree);
2370 if (ret) {
2371 mlog_errno(ret);
2372 goto out;
2373 }
2374
2375 ret = ocfs2_read_refcount_block(&tree->rf_ci,
2376 le64_to_cpu(di->i_refcount_loc),
2377 &ref_root_bh);
2378 if (ret) {
2379 mlog_errno(ret);
2380 goto out;
2381 }
2382
2383 ret = ocfs2_calc_refcount_meta_credits(inode->i_sb,
2384 &tree->rf_ci,
2385 ref_root_bh,
2386 start_cpos, clusters,
2387 &ref_blocks, credits);
2388 if (ret) {
2389 mlog_errno(ret);
2390 goto out;
2391 }
2392
2393 mlog(0, "reserve new metadata %d, credits = %d\n",
2394 ref_blocks, *credits);
2395
2396 if (ref_blocks) {
2397 ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb),
2398 ref_blocks, meta_ac);
2399 if (ret)
2400 mlog_errno(ret);
2401 }
2402
2403out:
2404 brelse(ref_root_bh);
2405 return ret;
2406}
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
index ad4b483ec5c7..b8c9ed7dc383 100644
--- a/fs/ocfs2/refcounttree.h
+++ b/fs/ocfs2/refcounttree.h
@@ -45,4 +45,10 @@ int ocfs2_decrease_refcount(struct inode *inode,
45 handle_t *handle, u32 cpos, u32 len, 45 handle_t *handle, u32 cpos, u32 len,
46 struct ocfs2_alloc_context *meta_ac, 46 struct ocfs2_alloc_context *meta_ac,
47 struct ocfs2_cached_dealloc_ctxt *dealloc); 47 struct ocfs2_cached_dealloc_ctxt *dealloc);
48int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
49 struct buffer_head *di_bh,
50 u64 phys_blkno,
51 u32 clusters,
52 int *credits,
53 struct ocfs2_alloc_context **meta_ac);
48#endif /* OCFS2_REFCOUNTTREE_H */ 54#endif /* OCFS2_REFCOUNTTREE_H */