aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTristan Ye <tristan.ye@oracle.com>2010-05-11 05:54:42 -0400
committerJoel Becker <joel.becker@oracle.com>2010-05-18 15:25:10 -0400
commit78f94673d7faf01677f374f4ebbf324ff1a0aa6e (patch)
tree1bd394469f12b5e148835365295d1df413c04a0c
parent547ba7c8efe43c2cabb38782e23572a6179dd1c1 (diff)
Ocfs2: Optimize ocfs2 truncate to use ocfs2_remove_btree_range() instead.
Truncate is just a special case of punching holes(from new i_size to end), we therefore could take advantage of the existing ocfs2_remove_btree_range() to reduce the comlexity and redundancy in alloc.c. The goal here is to make truncate more generic and straightforward. Several functions only used by ocfs2_commit_truncate() will smiply be removed. ocfs2_remove_btree_range() was originally used by the hole punching code, which didn't take refcount trees into account (definitely a bug). We therefore need to change that func a bit to handle refcount trees. It must take the refcount lock, calculate and reserve blocks for refcount tree changes, and decrease refcounts at the end. We replace ocfs2_lock_allocators() here by adding a new func ocfs2_reserve_blocks_for_rec_trunc() which accepts some extra blocks to reserve. This will not hurt any other code using ocfs2_remove_btree_range() (such as dir truncate and hole punching). I merged the following steps into one patch since they may be logically doing one thing, though I know it looks a little bit fat to review. 1). Remove redundant code used by ocfs2_commit_truncate(), since we're moving to ocfs2_remove_btree_range anyway. 2). Add a new func ocfs2_reserve_blocks_for_rec_trunc() for purpose of accepting some extra blocks to reserve. 3). Change ocfs2_prepare_refcount_change_for_del() a bit to fit our needs. It's safe to do this since it's only being called by truncate. 4). Change ocfs2_remove_btree_range() a bit to take refcount case into account. 5). Finally, we change ocfs2_commit_truncate() to call ocfs2_remove_btree_range() in a proper way. The patch has been tested normally for sanity check, stress tests with heavier workload will be expected. Based on this patch, fixing the punching holes bug will be fairly easy. Signed-off-by: Tristan Ye <tristan.ye@oracle.com> Acked-by: Mark Fasheh <mfasheh@suse.com> Signed-off-by: Joel Becker <joel.becker@oracle.com>
-rw-r--r--fs/ocfs2/alloc.c685
-rw-r--r--fs/ocfs2/alloc.h8
-rw-r--r--fs/ocfs2/dir.c4
-rw-r--r--fs/ocfs2/file.c11
-rw-r--r--fs/ocfs2/inode.c9
-rw-r--r--fs/ocfs2/refcounttree.c29
-rw-r--r--fs/ocfs2/refcounttree.h4
7 files changed, 178 insertions, 572 deletions
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 0cb2945eb817..0cb4248e03cd 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -5587,19 +5587,97 @@ out:
5587 return ret; 5587 return ret;
5588} 5588}
5589 5589
5590/*
5591 * ocfs2_reserve_blocks_for_rec_trunc() would look basically the
5592 * same as ocfs2_lock_alloctors(), except for it accepts a blocks
5593 * number to reserve some extra blocks, and it only handles meta
5594 * data allocations.
5595 *
5596 * Currently, only ocfs2_remove_btree_range() uses it for truncating
5597 * and punching holes.
5598 */
5599static int ocfs2_reserve_blocks_for_rec_trunc(struct inode *inode,
5600 struct ocfs2_extent_tree *et,
5601 u32 extents_to_split,
5602 struct ocfs2_alloc_context **ac,
5603 int extra_blocks)
5604{
5605 int ret = 0, num_free_extents;
5606 unsigned int max_recs_needed = 2 * extents_to_split;
5607 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5608
5609 *ac = NULL;
5610
5611 num_free_extents = ocfs2_num_free_extents(osb, et);
5612 if (num_free_extents < 0) {
5613 ret = num_free_extents;
5614 mlog_errno(ret);
5615 goto out;
5616 }
5617
5618 if (!num_free_extents ||
5619 (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed))
5620 extra_blocks += ocfs2_extend_meta_needed(et->et_root_el);
5621
5622 if (extra_blocks) {
5623 ret = ocfs2_reserve_new_metadata_blocks(osb, extra_blocks, ac);
5624 if (ret < 0) {
5625 if (ret != -ENOSPC)
5626 mlog_errno(ret);
5627 goto out;
5628 }
5629 }
5630
5631out:
5632 if (ret) {
5633 if (*ac) {
5634 ocfs2_free_alloc_context(*ac);
5635 *ac = NULL;
5636 }
5637 }
5638
5639 return ret;
5640}
5641
5590int ocfs2_remove_btree_range(struct inode *inode, 5642int ocfs2_remove_btree_range(struct inode *inode,
5591 struct ocfs2_extent_tree *et, 5643 struct ocfs2_extent_tree *et,
5592 u32 cpos, u32 phys_cpos, u32 len, 5644 u32 cpos, u32 phys_cpos, u32 len, int flags,
5593 struct ocfs2_cached_dealloc_ctxt *dealloc) 5645 struct ocfs2_cached_dealloc_ctxt *dealloc,
5646 u64 refcount_loc)
5594{ 5647{
5595 int ret; 5648 int ret, credits = 0, extra_blocks = 0;
5596 u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); 5649 u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
5597 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5650 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5598 struct inode *tl_inode = osb->osb_tl_inode; 5651 struct inode *tl_inode = osb->osb_tl_inode;
5599 handle_t *handle; 5652 handle_t *handle;
5600 struct ocfs2_alloc_context *meta_ac = NULL; 5653 struct ocfs2_alloc_context *meta_ac = NULL;
5654 struct ocfs2_refcount_tree *ref_tree = NULL;
5655
5656 if ((flags & OCFS2_EXT_REFCOUNTED) && len) {
5657 BUG_ON(!(OCFS2_I(inode)->ip_dyn_features &
5658 OCFS2_HAS_REFCOUNT_FL));
5659
5660 ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1,
5661 &ref_tree, NULL);
5662 if (ret) {
5663 mlog_errno(ret);
5664 goto out;
5665 }
5666
5667 ret = ocfs2_prepare_refcount_change_for_del(inode,
5668 refcount_loc,
5669 phys_blkno,
5670 len,
5671 &credits,
5672 &extra_blocks);
5673 if (ret < 0) {
5674 mlog_errno(ret);
5675 goto out;
5676 }
5677 }
5601 5678
5602 ret = ocfs2_lock_allocators(inode, et, 0, 1, NULL, &meta_ac); 5679 ret = ocfs2_reserve_blocks_for_rec_trunc(inode, et, 1, &meta_ac,
5680 extra_blocks);
5603 if (ret) { 5681 if (ret) {
5604 mlog_errno(ret); 5682 mlog_errno(ret);
5605 return ret; 5683 return ret;
@@ -5615,7 +5693,8 @@ int ocfs2_remove_btree_range(struct inode *inode,
5615 } 5693 }
5616 } 5694 }
5617 5695
5618 handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb)); 5696 handle = ocfs2_start_trans(osb,
5697 ocfs2_remove_extent_credits(osb->sb) + credits);
5619 if (IS_ERR(handle)) { 5698 if (IS_ERR(handle)) {
5620 ret = PTR_ERR(handle); 5699 ret = PTR_ERR(handle);
5621 mlog_errno(ret); 5700 mlog_errno(ret);
@@ -5642,9 +5721,20 @@ int ocfs2_remove_btree_range(struct inode *inode,
5642 5721
5643 ocfs2_journal_dirty(handle, et->et_root_bh); 5722 ocfs2_journal_dirty(handle, et->et_root_bh);
5644 5723
5645 ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len); 5724 if (phys_blkno) {
5646 if (ret) 5725 if (flags & OCFS2_EXT_REFCOUNTED)
5647 mlog_errno(ret); 5726 ret = ocfs2_decrease_refcount(inode, handle,
5727 ocfs2_blocks_to_clusters(osb->sb,
5728 phys_blkno),
5729 len, meta_ac,
5730 dealloc, 1);
5731 else
5732 ret = ocfs2_truncate_log_append(osb, handle,
5733 phys_blkno, len);
5734 if (ret)
5735 mlog_errno(ret);
5736
5737 }
5648 5738
5649out_commit: 5739out_commit:
5650 ocfs2_commit_trans(osb, handle); 5740 ocfs2_commit_trans(osb, handle);
@@ -5654,6 +5744,9 @@ out:
5654 if (meta_ac) 5744 if (meta_ac)
5655 ocfs2_free_alloc_context(meta_ac); 5745 ocfs2_free_alloc_context(meta_ac);
5656 5746
5747 if (ref_tree)
5748 ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
5749
5657 return ret; 5750 return ret;
5658} 5751}
5659 5752
@@ -6481,417 +6574,6 @@ static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt,
6481 le16_to_cpu(eb->h_suballoc_bit)); 6574 le16_to_cpu(eb->h_suballoc_bit));
6482} 6575}
6483 6576
6484/* This function will figure out whether the currently last extent
6485 * block will be deleted, and if it will, what the new last extent
6486 * block will be so we can update his h_next_leaf_blk field, as well
6487 * as the dinodes i_last_eb_blk */
6488static int ocfs2_find_new_last_ext_blk(struct inode *inode,
6489 unsigned int clusters_to_del,
6490 struct ocfs2_path *path,
6491 struct buffer_head **new_last_eb)
6492{
6493 int next_free, ret = 0;
6494 u32 cpos;
6495 struct ocfs2_extent_rec *rec;
6496 struct ocfs2_extent_block *eb;
6497 struct ocfs2_extent_list *el;
6498 struct buffer_head *bh = NULL;
6499
6500 *new_last_eb = NULL;
6501
6502 /* we have no tree, so of course, no last_eb. */
6503 if (!path->p_tree_depth)
6504 goto out;
6505
6506 /* trunc to zero special case - this makes tree_depth = 0
6507 * regardless of what it is. */
6508 if (OCFS2_I(inode)->ip_clusters == clusters_to_del)
6509 goto out;
6510
6511 el = path_leaf_el(path);
6512 BUG_ON(!el->l_next_free_rec);
6513
6514 /*
6515 * Make sure that this extent list will actually be empty
6516 * after we clear away the data. We can shortcut out if
6517 * there's more than one non-empty extent in the
6518 * list. Otherwise, a check of the remaining extent is
6519 * necessary.
6520 */
6521 next_free = le16_to_cpu(el->l_next_free_rec);
6522 rec = NULL;
6523 if (ocfs2_is_empty_extent(&el->l_recs[0])) {
6524 if (next_free > 2)
6525 goto out;
6526
6527 /* We may have a valid extent in index 1, check it. */
6528 if (next_free == 2)
6529 rec = &el->l_recs[1];
6530
6531 /*
6532 * Fall through - no more nonempty extents, so we want
6533 * to delete this leaf.
6534 */
6535 } else {
6536 if (next_free > 1)
6537 goto out;
6538
6539 rec = &el->l_recs[0];
6540 }
6541
6542 if (rec) {
6543 /*
6544 * Check it we'll only be trimming off the end of this
6545 * cluster.
6546 */
6547 if (le16_to_cpu(rec->e_leaf_clusters) > clusters_to_del)
6548 goto out;
6549 }
6550
6551 ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, path, &cpos);
6552 if (ret) {
6553 mlog_errno(ret);
6554 goto out;
6555 }
6556
6557 ret = ocfs2_find_leaf(INODE_CACHE(inode), path_root_el(path), cpos, &bh);
6558 if (ret) {
6559 mlog_errno(ret);
6560 goto out;
6561 }
6562
6563 eb = (struct ocfs2_extent_block *) bh->b_data;
6564 el = &eb->h_list;
6565
6566 /* ocfs2_find_leaf() gets the eb from ocfs2_read_extent_block().
6567 * Any corruption is a code bug. */
6568 BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb));
6569
6570 *new_last_eb = bh;
6571 get_bh(*new_last_eb);
6572 mlog(0, "returning block %llu, (cpos: %u)\n",
6573 (unsigned long long)le64_to_cpu(eb->h_blkno), cpos);
6574out:
6575 brelse(bh);
6576
6577 return ret;
6578}
6579
6580/*
6581 * Trim some clusters off the rightmost edge of a tree. Only called
6582 * during truncate.
6583 *
6584 * The caller needs to:
6585 * - start journaling of each path component.
6586 * - compute and fully set up any new last ext block
6587 */
6588static int ocfs2_trim_tree(struct inode *inode, struct ocfs2_path *path,
6589 handle_t *handle, struct ocfs2_truncate_context *tc,
6590 u32 clusters_to_del, u64 *delete_start, u8 *flags)
6591{
6592 int ret, i, index = path->p_tree_depth;
6593 u32 new_edge = 0;
6594 u64 deleted_eb = 0;
6595 struct buffer_head *bh;
6596 struct ocfs2_extent_list *el;
6597 struct ocfs2_extent_rec *rec;
6598
6599 *delete_start = 0;
6600 *flags = 0;
6601
6602 while (index >= 0) {
6603 bh = path->p_node[index].bh;
6604 el = path->p_node[index].el;
6605
6606 mlog(0, "traveling tree (index = %d, block = %llu)\n",
6607 index, (unsigned long long)bh->b_blocknr);
6608
6609 BUG_ON(le16_to_cpu(el->l_next_free_rec) == 0);
6610
6611 if (index !=
6612 (path->p_tree_depth - le16_to_cpu(el->l_tree_depth))) {
6613 ocfs2_error(inode->i_sb,
6614 "Inode %lu has invalid ext. block %llu",
6615 inode->i_ino,
6616 (unsigned long long)bh->b_blocknr);
6617 ret = -EROFS;
6618 goto out;
6619 }
6620
6621find_tail_record:
6622 i = le16_to_cpu(el->l_next_free_rec) - 1;
6623 rec = &el->l_recs[i];
6624
6625 mlog(0, "Extent list before: record %d: (%u, %u, %llu), "
6626 "next = %u\n", i, le32_to_cpu(rec->e_cpos),
6627 ocfs2_rec_clusters(el, rec),
6628 (unsigned long long)le64_to_cpu(rec->e_blkno),
6629 le16_to_cpu(el->l_next_free_rec));
6630
6631 BUG_ON(ocfs2_rec_clusters(el, rec) < clusters_to_del);
6632
6633 if (le16_to_cpu(el->l_tree_depth) == 0) {
6634 /*
6635 * If the leaf block contains a single empty
6636 * extent and no records, we can just remove
6637 * the block.
6638 */
6639 if (i == 0 && ocfs2_is_empty_extent(rec)) {
6640 memset(rec, 0,
6641 sizeof(struct ocfs2_extent_rec));
6642 el->l_next_free_rec = cpu_to_le16(0);
6643
6644 goto delete;
6645 }
6646
6647 /*
6648 * Remove any empty extents by shifting things
6649 * left. That should make life much easier on
6650 * the code below. This condition is rare
6651 * enough that we shouldn't see a performance
6652 * hit.
6653 */
6654 if (ocfs2_is_empty_extent(&el->l_recs[0])) {
6655 le16_add_cpu(&el->l_next_free_rec, -1);
6656
6657 for(i = 0;
6658 i < le16_to_cpu(el->l_next_free_rec); i++)
6659 el->l_recs[i] = el->l_recs[i + 1];
6660
6661 memset(&el->l_recs[i], 0,
6662 sizeof(struct ocfs2_extent_rec));
6663
6664 /*
6665 * We've modified our extent list. The
6666 * simplest way to handle this change
6667 * is to being the search from the
6668 * start again.
6669 */
6670 goto find_tail_record;
6671 }
6672
6673 le16_add_cpu(&rec->e_leaf_clusters, -clusters_to_del);
6674
6675 /*
6676 * We'll use "new_edge" on our way back up the
6677 * tree to know what our rightmost cpos is.
6678 */
6679 new_edge = le16_to_cpu(rec->e_leaf_clusters);
6680 new_edge += le32_to_cpu(rec->e_cpos);
6681
6682 /*
6683 * The caller will use this to delete data blocks.
6684 */
6685 *delete_start = le64_to_cpu(rec->e_blkno)
6686 + ocfs2_clusters_to_blocks(inode->i_sb,
6687 le16_to_cpu(rec->e_leaf_clusters));
6688 *flags = rec->e_flags;
6689
6690 /*
6691 * If it's now empty, remove this record.
6692 */
6693 if (le16_to_cpu(rec->e_leaf_clusters) == 0) {
6694 memset(rec, 0,
6695 sizeof(struct ocfs2_extent_rec));
6696 le16_add_cpu(&el->l_next_free_rec, -1);
6697 }
6698 } else {
6699 if (le64_to_cpu(rec->e_blkno) == deleted_eb) {
6700 memset(rec, 0,
6701 sizeof(struct ocfs2_extent_rec));
6702 le16_add_cpu(&el->l_next_free_rec, -1);
6703
6704 goto delete;
6705 }
6706
6707 /* Can this actually happen? */
6708 if (le16_to_cpu(el->l_next_free_rec) == 0)
6709 goto delete;
6710
6711 /*
6712 * We never actually deleted any clusters
6713 * because our leaf was empty. There's no
6714 * reason to adjust the rightmost edge then.
6715 */
6716 if (new_edge == 0)
6717 goto delete;
6718
6719 rec->e_int_clusters = cpu_to_le32(new_edge);
6720 le32_add_cpu(&rec->e_int_clusters,
6721 -le32_to_cpu(rec->e_cpos));
6722
6723 /*
6724 * A deleted child record should have been
6725 * caught above.
6726 */
6727 BUG_ON(le32_to_cpu(rec->e_int_clusters) == 0);
6728 }
6729
6730delete:
6731 ocfs2_journal_dirty(handle, bh);
6732
6733 mlog(0, "extent list container %llu, after: record %d: "
6734 "(%u, %u, %llu), next = %u.\n",
6735 (unsigned long long)bh->b_blocknr, i,
6736 le32_to_cpu(rec->e_cpos), ocfs2_rec_clusters(el, rec),
6737 (unsigned long long)le64_to_cpu(rec->e_blkno),
6738 le16_to_cpu(el->l_next_free_rec));
6739
6740 /*
6741 * We must be careful to only attempt delete of an
6742 * extent block (and not the root inode block).
6743 */
6744 if (index > 0 && le16_to_cpu(el->l_next_free_rec) == 0) {
6745 struct ocfs2_extent_block *eb =
6746 (struct ocfs2_extent_block *)bh->b_data;
6747
6748 /*
6749 * Save this for use when processing the
6750 * parent block.
6751 */
6752 deleted_eb = le64_to_cpu(eb->h_blkno);
6753
6754 mlog(0, "deleting this extent block.\n");
6755
6756 ocfs2_remove_from_cache(INODE_CACHE(inode), bh);
6757
6758 BUG_ON(ocfs2_rec_clusters(el, &el->l_recs[0]));
6759 BUG_ON(le32_to_cpu(el->l_recs[0].e_cpos));
6760 BUG_ON(le64_to_cpu(el->l_recs[0].e_blkno));
6761
6762 ret = ocfs2_cache_extent_block_free(&tc->tc_dealloc, eb);
6763 /* An error here is not fatal. */
6764 if (ret < 0)
6765 mlog_errno(ret);
6766 } else {
6767 deleted_eb = 0;
6768 }
6769
6770 index--;
6771 }
6772
6773 ret = 0;
6774out:
6775 return ret;
6776}
6777
6778static int ocfs2_do_truncate(struct ocfs2_super *osb,
6779 unsigned int clusters_to_del,
6780 struct inode *inode,
6781 struct buffer_head *fe_bh,
6782 handle_t *handle,
6783 struct ocfs2_truncate_context *tc,
6784 struct ocfs2_path *path,
6785 struct ocfs2_alloc_context *meta_ac)
6786{
6787 int status;
6788 struct ocfs2_dinode *fe;
6789 struct ocfs2_extent_block *last_eb = NULL;
6790 struct ocfs2_extent_list *el;
6791 struct buffer_head *last_eb_bh = NULL;
6792 u64 delete_blk = 0;
6793 u8 rec_flags;
6794
6795 fe = (struct ocfs2_dinode *) fe_bh->b_data;
6796
6797 status = ocfs2_find_new_last_ext_blk(inode, clusters_to_del,
6798 path, &last_eb_bh);
6799 if (status < 0) {
6800 mlog_errno(status);
6801 goto bail;
6802 }
6803
6804 /*
6805 * Each component will be touched, so we might as well journal
6806 * here to avoid having to handle errors later.
6807 */
6808 status = ocfs2_journal_access_path(INODE_CACHE(inode), handle, path);
6809 if (status < 0) {
6810 mlog_errno(status);
6811 goto bail;
6812 }
6813
6814 if (last_eb_bh) {
6815 status = ocfs2_journal_access_eb(handle, INODE_CACHE(inode), last_eb_bh,
6816 OCFS2_JOURNAL_ACCESS_WRITE);
6817 if (status < 0) {
6818 mlog_errno(status);
6819 goto bail;
6820 }
6821
6822 last_eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
6823 }
6824
6825 el = &(fe->id2.i_list);
6826
6827 /*
6828 * Lower levels depend on this never happening, but it's best
6829 * to check it up here before changing the tree.
6830 */
6831 if (el->l_tree_depth && el->l_recs[0].e_int_clusters == 0) {
6832 ocfs2_error(inode->i_sb,
6833 "Inode %lu has an empty extent record, depth %u\n",
6834 inode->i_ino, le16_to_cpu(el->l_tree_depth));
6835 status = -EROFS;
6836 goto bail;
6837 }
6838
6839 dquot_free_space_nodirty(inode,
6840 ocfs2_clusters_to_bytes(osb->sb, clusters_to_del));
6841 spin_lock(&OCFS2_I(inode)->ip_lock);
6842 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters) -
6843 clusters_to_del;
6844 spin_unlock(&OCFS2_I(inode)->ip_lock);
6845 le32_add_cpu(&fe->i_clusters, -clusters_to_del);
6846 inode->i_blocks = ocfs2_inode_sector_count(inode);
6847
6848 status = ocfs2_trim_tree(inode, path, handle, tc,
6849 clusters_to_del, &delete_blk, &rec_flags);
6850 if (status) {
6851 mlog_errno(status);
6852 goto bail;
6853 }
6854
6855 if (le32_to_cpu(fe->i_clusters) == 0) {
6856 /* trunc to zero is a special case. */
6857 el->l_tree_depth = 0;
6858 fe->i_last_eb_blk = 0;
6859 } else if (last_eb)
6860 fe->i_last_eb_blk = last_eb->h_blkno;
6861
6862 ocfs2_journal_dirty(handle, fe_bh);
6863
6864 if (last_eb) {
6865 /* If there will be a new last extent block, then by
6866 * definition, there cannot be any leaves to the right of
6867 * him. */
6868 last_eb->h_next_leaf_blk = 0;
6869 ocfs2_journal_dirty(handle, last_eb_bh);
6870 }
6871
6872 if (delete_blk) {
6873 if (rec_flags & OCFS2_EXT_REFCOUNTED)
6874 status = ocfs2_decrease_refcount(inode, handle,
6875 ocfs2_blocks_to_clusters(osb->sb,
6876 delete_blk),
6877 clusters_to_del, meta_ac,
6878 &tc->tc_dealloc, 1);
6879 else
6880 status = ocfs2_truncate_log_append(osb, handle,
6881 delete_blk,
6882 clusters_to_del);
6883 if (status < 0) {
6884 mlog_errno(status);
6885 goto bail;
6886 }
6887 }
6888 status = 0;
6889bail:
6890 brelse(last_eb_bh);
6891 mlog_exit(status);
6892 return status;
6893}
6894
6895static int ocfs2_zero_func(handle_t *handle, struct buffer_head *bh) 6577static int ocfs2_zero_func(handle_t *handle, struct buffer_head *bh)
6896{ 6578{
6897 set_buffer_uptodate(bh); 6579 set_buffer_uptodate(bh);
@@ -7300,26 +6982,29 @@ out:
7300 */ 6982 */
7301int ocfs2_commit_truncate(struct ocfs2_super *osb, 6983int ocfs2_commit_truncate(struct ocfs2_super *osb,
7302 struct inode *inode, 6984 struct inode *inode,
7303 struct buffer_head *fe_bh, 6985 struct buffer_head *di_bh)
7304 struct ocfs2_truncate_context *tc)
7305{ 6986{
7306 int status, i, credits, tl_sem = 0; 6987 int status = 0, i, flags = 0;
7307 u32 clusters_to_del, new_highest_cpos, range; 6988 u32 new_highest_cpos, range, trunc_cpos, trunc_len, phys_cpos, coff;
7308 u64 blkno = 0; 6989 u64 blkno = 0;
7309 struct ocfs2_extent_list *el; 6990 struct ocfs2_extent_list *el;
7310 handle_t *handle = NULL; 6991 struct ocfs2_extent_rec *rec;
7311 struct inode *tl_inode = osb->osb_tl_inode;
7312 struct ocfs2_path *path = NULL; 6992 struct ocfs2_path *path = NULL;
7313 struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data; 6993 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
7314 struct ocfs2_alloc_context *meta_ac = NULL; 6994 struct ocfs2_extent_list *root_el = &(di->id2.i_list);
7315 struct ocfs2_refcount_tree *ref_tree = NULL; 6995 u64 refcount_loc = le64_to_cpu(di->i_refcount_loc);
6996 struct ocfs2_extent_tree et;
6997 struct ocfs2_cached_dealloc_ctxt dealloc;
7316 6998
7317 mlog_entry_void(); 6999 mlog_entry_void();
7318 7000
7001 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
7002 ocfs2_init_dealloc_ctxt(&dealloc);
7003
7319 new_highest_cpos = ocfs2_clusters_for_bytes(osb->sb, 7004 new_highest_cpos = ocfs2_clusters_for_bytes(osb->sb,
7320 i_size_read(inode)); 7005 i_size_read(inode));
7321 7006
7322 path = ocfs2_new_path(fe_bh, &di->id2.i_list, 7007 path = ocfs2_new_path(di_bh, &di->id2.i_list,
7323 ocfs2_journal_access_di); 7008 ocfs2_journal_access_di);
7324 if (!path) { 7009 if (!path) {
7325 status = -ENOMEM; 7010 status = -ENOMEM;
@@ -7338,8 +7023,6 @@ start:
7338 goto bail; 7023 goto bail;
7339 } 7024 }
7340 7025
7341 credits = 0;
7342
7343 /* 7026 /*
7344 * Truncate always works against the rightmost tree branch. 7027 * Truncate always works against the rightmost tree branch.
7345 */ 7028 */
@@ -7374,101 +7057,62 @@ start:
7374 } 7057 }
7375 7058
7376 i = le16_to_cpu(el->l_next_free_rec) - 1; 7059 i = le16_to_cpu(el->l_next_free_rec) - 1;
7377 range = le32_to_cpu(el->l_recs[i].e_cpos) + 7060 rec = &el->l_recs[i];
7378 ocfs2_rec_clusters(el, &el->l_recs[i]); 7061 flags = rec->e_flags;
7379 if (i == 0 && ocfs2_is_empty_extent(&el->l_recs[i])) { 7062 range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec);
7380 clusters_to_del = 0; 7063
7381 } else if (le32_to_cpu(el->l_recs[i].e_cpos) >= new_highest_cpos) { 7064 if (i == 0 && ocfs2_is_empty_extent(rec)) {
7382 clusters_to_del = ocfs2_rec_clusters(el, &el->l_recs[i]); 7065 /*
7383 blkno = le64_to_cpu(el->l_recs[i].e_blkno); 7066 * Lower levels depend on this never happening, but it's best
7067 * to check it up here before changing the tree.
7068 */
7069 if (root_el->l_tree_depth && rec->e_int_clusters == 0) {
7070 ocfs2_error(inode->i_sb, "Inode %lu has an empty "
7071 "extent record, depth %u\n", inode->i_ino,
7072 le16_to_cpu(root_el->l_tree_depth));
7073 status = -EROFS;
7074 goto bail;
7075 }
7076 trunc_cpos = le32_to_cpu(rec->e_cpos);
7077 trunc_len = 0;
7078 blkno = 0;
7079 } else if (le32_to_cpu(rec->e_cpos) >= new_highest_cpos) {
7080 /*
7081 * Truncate entire record.
7082 */
7083 trunc_cpos = le32_to_cpu(rec->e_cpos);
7084 trunc_len = ocfs2_rec_clusters(el, rec);
7085 blkno = le64_to_cpu(rec->e_blkno);
7384 } else if (range > new_highest_cpos) { 7086 } else if (range > new_highest_cpos) {
7385 clusters_to_del = (ocfs2_rec_clusters(el, &el->l_recs[i]) + 7087 /*
7386 le32_to_cpu(el->l_recs[i].e_cpos)) - 7088 * Partial truncate. it also should be
7387 new_highest_cpos; 7089 * the last truncate we're doing.
7388 blkno = le64_to_cpu(el->l_recs[i].e_blkno) + 7090 */
7389 ocfs2_clusters_to_blocks(inode->i_sb, 7091 trunc_cpos = new_highest_cpos;
7390 ocfs2_rec_clusters(el, &el->l_recs[i]) - 7092 trunc_len = range - new_highest_cpos;
7391 clusters_to_del); 7093 coff = new_highest_cpos - le32_to_cpu(rec->e_cpos);
7094 blkno = le64_to_cpu(rec->e_blkno) +
7095 ocfs2_clusters_to_blocks(inode->i_sb, coff);
7392 } else { 7096 } else {
7097 /*
7098 * Truncate completed, leave happily.
7099 */
7393 status = 0; 7100 status = 0;
7394 goto bail; 7101 goto bail;
7395 } 7102 }
7396 7103
7397 mlog(0, "clusters_to_del = %u in this pass, tail blk=%llu\n", 7104 phys_cpos = ocfs2_blocks_to_clusters(inode->i_sb, blkno);
7398 clusters_to_del, (unsigned long long)path_leaf_bh(path)->b_blocknr);
7399
7400 if (el->l_recs[i].e_flags & OCFS2_EXT_REFCOUNTED && clusters_to_del) {
7401 BUG_ON(!(OCFS2_I(inode)->ip_dyn_features &
7402 OCFS2_HAS_REFCOUNT_FL));
7403
7404 status = ocfs2_lock_refcount_tree(osb,
7405 le64_to_cpu(di->i_refcount_loc),
7406 1, &ref_tree, NULL);
7407 if (status) {
7408 mlog_errno(status);
7409 goto bail;
7410 }
7411
7412 status = ocfs2_prepare_refcount_change_for_del(inode, fe_bh,
7413 blkno,
7414 clusters_to_del,
7415 &credits,
7416 &meta_ac);
7417 if (status < 0) {
7418 mlog_errno(status);
7419 goto bail;
7420 }
7421 }
7422
7423 mutex_lock(&tl_inode->i_mutex);
7424 tl_sem = 1;
7425 /* ocfs2_truncate_log_needs_flush guarantees us at least one
7426 * record is free for use. If there isn't any, we flush to get
7427 * an empty truncate log. */
7428 if (ocfs2_truncate_log_needs_flush(osb)) {
7429 status = __ocfs2_flush_truncate_log(osb);
7430 if (status < 0) {
7431 mlog_errno(status);
7432 goto bail;
7433 }
7434 }
7435
7436 credits += ocfs2_calc_tree_trunc_credits(osb->sb, clusters_to_del,
7437 (struct ocfs2_dinode *)fe_bh->b_data,
7438 el);
7439 handle = ocfs2_start_trans(osb, credits);
7440 if (IS_ERR(handle)) {
7441 status = PTR_ERR(handle);
7442 handle = NULL;
7443 mlog_errno(status);
7444 goto bail;
7445 }
7446 7105
7447 status = ocfs2_do_truncate(osb, clusters_to_del, inode, fe_bh, handle, 7106 status = ocfs2_remove_btree_range(inode, &et, trunc_cpos,
7448 tc, path, meta_ac); 7107 phys_cpos, trunc_len, flags, &dealloc,
7108 refcount_loc);
7449 if (status < 0) { 7109 if (status < 0) {
7450 mlog_errno(status); 7110 mlog_errno(status);
7451 goto bail; 7111 goto bail;
7452 } 7112 }
7453 7113
7454 mutex_unlock(&tl_inode->i_mutex);
7455 tl_sem = 0;
7456
7457 ocfs2_commit_trans(osb, handle);
7458 handle = NULL;
7459
7460 ocfs2_reinit_path(path, 1); 7114 ocfs2_reinit_path(path, 1);
7461 7115
7462 if (meta_ac) {
7463 ocfs2_free_alloc_context(meta_ac);
7464 meta_ac = NULL;
7465 }
7466
7467 if (ref_tree) {
7468 ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
7469 ref_tree = NULL;
7470 }
7471
7472 /* 7116 /*
7473 * The check above will catch the case where we've truncated 7117 * The check above will catch the case where we've truncated
7474 * away all allocation. 7118 * away all allocation.
@@ -7479,25 +7123,10 @@ bail:
7479 7123
7480 ocfs2_schedule_truncate_log_flush(osb, 1); 7124 ocfs2_schedule_truncate_log_flush(osb, 1);
7481 7125
7482 if (tl_sem) 7126 ocfs2_run_deallocs(osb, &dealloc);
7483 mutex_unlock(&tl_inode->i_mutex);
7484
7485 if (handle)
7486 ocfs2_commit_trans(osb, handle);
7487
7488 if (meta_ac)
7489 ocfs2_free_alloc_context(meta_ac);
7490
7491 if (ref_tree)
7492 ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
7493
7494 ocfs2_run_deallocs(osb, &tc->tc_dealloc);
7495 7127
7496 ocfs2_free_path(path); 7128 ocfs2_free_path(path);
7497 7129
7498 /* This will drop the ext_alloc cluster lock for us */
7499 ocfs2_free_truncate_context(tc);
7500
7501 mlog_exit(status); 7130 mlog_exit(status);
7502 return status; 7131 return status;
7503} 7132}
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 1db4359ccb90..4fb9882ed2d6 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -140,8 +140,9 @@ int ocfs2_remove_extent(handle_t *handle, struct ocfs2_extent_tree *et,
140 struct ocfs2_cached_dealloc_ctxt *dealloc); 140 struct ocfs2_cached_dealloc_ctxt *dealloc);
141int ocfs2_remove_btree_range(struct inode *inode, 141int ocfs2_remove_btree_range(struct inode *inode,
142 struct ocfs2_extent_tree *et, 142 struct ocfs2_extent_tree *et,
143 u32 cpos, u32 phys_cpos, u32 len, 143 u32 cpos, u32 phys_cpos, u32 len, int flags,
144 struct ocfs2_cached_dealloc_ctxt *dealloc); 144 struct ocfs2_cached_dealloc_ctxt *dealloc,
145 u64 refcount_loc);
145 146
146int ocfs2_num_free_extents(struct ocfs2_super *osb, 147int ocfs2_num_free_extents(struct ocfs2_super *osb,
147 struct ocfs2_extent_tree *et); 148 struct ocfs2_extent_tree *et);
@@ -233,8 +234,7 @@ int ocfs2_prepare_truncate(struct ocfs2_super *osb,
233 struct ocfs2_truncate_context **tc); 234 struct ocfs2_truncate_context **tc);
234int ocfs2_commit_truncate(struct ocfs2_super *osb, 235int ocfs2_commit_truncate(struct ocfs2_super *osb,
235 struct inode *inode, 236 struct inode *inode,
236 struct buffer_head *fe_bh, 237 struct buffer_head *di_bh);
237 struct ocfs2_truncate_context *tc);
238int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh, 238int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
239 unsigned int start, unsigned int end, int trunc); 239 unsigned int start, unsigned int end, int trunc);
240 240
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 6c9a28a2d3ae..4a75c2e2f855 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -4526,8 +4526,8 @@ int ocfs2_dx_dir_truncate(struct inode *dir, struct buffer_head *di_bh)
4526 4526
4527 p_cpos = ocfs2_blocks_to_clusters(dir->i_sb, blkno); 4527 p_cpos = ocfs2_blocks_to_clusters(dir->i_sb, blkno);
4528 4528
4529 ret = ocfs2_remove_btree_range(dir, &et, cpos, p_cpos, clen, 4529 ret = ocfs2_remove_btree_range(dir, &et, cpos, p_cpos, clen, 0,
4530 &dealloc); 4530 &dealloc, 0);
4531 if (ret) { 4531 if (ret) {
4532 mlog_errno(ret); 4532 mlog_errno(ret);
4533 goto out; 4533 goto out;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 19d16f2ef81e..4c7a4d8ed32c 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -444,7 +444,6 @@ static int ocfs2_truncate_file(struct inode *inode,
444 int status = 0; 444 int status = 0;
445 struct ocfs2_dinode *fe = NULL; 445 struct ocfs2_dinode *fe = NULL;
446 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 446 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
447 struct ocfs2_truncate_context *tc = NULL;
448 447
449 mlog_entry("(inode = %llu, new_i_size = %llu\n", 448 mlog_entry("(inode = %llu, new_i_size = %llu\n",
450 (unsigned long long)OCFS2_I(inode)->ip_blkno, 449 (unsigned long long)OCFS2_I(inode)->ip_blkno,
@@ -515,13 +514,7 @@ static int ocfs2_truncate_file(struct inode *inode,
515 goto bail_unlock_sem; 514 goto bail_unlock_sem;
516 } 515 }
517 516
518 status = ocfs2_prepare_truncate(osb, inode, di_bh, &tc); 517 status = ocfs2_commit_truncate(osb, inode, di_bh);
519 if (status < 0) {
520 mlog_errno(status);
521 goto bail_unlock_sem;
522 }
523
524 status = ocfs2_commit_truncate(osb, inode, di_bh, tc);
525 if (status < 0) { 518 if (status < 0) {
526 mlog_errno(status); 519 mlog_errno(status);
527 goto bail_unlock_sem; 520 goto bail_unlock_sem;
@@ -1494,7 +1487,7 @@ static int ocfs2_remove_inode_range(struct inode *inode,
1494 if (phys_cpos != 0) { 1487 if (phys_cpos != 0) {
1495 ret = ocfs2_remove_btree_range(inode, &et, cpos, 1488 ret = ocfs2_remove_btree_range(inode, &et, cpos,
1496 phys_cpos, alloc_size, 1489 phys_cpos, alloc_size,
1497 &dealloc); 1490 0, &dealloc, 0);
1498 if (ret) { 1491 if (ret) {
1499 mlog_errno(ret); 1492 mlog_errno(ret);
1500 goto out; 1493 goto out;
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index b7650ccd76d0..9a17251f3d9e 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -544,7 +544,6 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
544 struct buffer_head *fe_bh) 544 struct buffer_head *fe_bh)
545{ 545{
546 int status = 0; 546 int status = 0;
547 struct ocfs2_truncate_context *tc = NULL;
548 struct ocfs2_dinode *fe; 547 struct ocfs2_dinode *fe;
549 handle_t *handle = NULL; 548 handle_t *handle = NULL;
550 549
@@ -586,13 +585,7 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
586 ocfs2_commit_trans(osb, handle); 585 ocfs2_commit_trans(osb, handle);
587 handle = NULL; 586 handle = NULL;
588 587
589 status = ocfs2_prepare_truncate(osb, inode, fe_bh, &tc); 588 status = ocfs2_commit_truncate(osb, inode, fe_bh);
590 if (status < 0) {
591 mlog_errno(status);
592 goto out;
593 }
594
595 status = ocfs2_commit_truncate(osb, inode, fe_bh, tc);
596 if (status < 0) { 589 if (status < 0) {
597 mlog_errno(status); 590 mlog_errno(status);
598 goto out; 591 goto out;
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 33dd2a18cb74..6fab28921f3d 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -2509,20 +2509,19 @@ out:
2509 * 2509 *
2510 * Normally the refcount blocks store these refcount should be 2510 * Normally the refcount blocks store these refcount should be
2511 * contiguous also, so that we can get the number easily. 2511 * contiguous also, so that we can get the number easily.
2512 * As for meta_ac, we will at most add split 2 refcount record and 2512 * We will at most add split 2 refcount records and 2 more
2513 * 2 more refcount block, so just check it in a rough way. 2513 * refcount blocks, so just check it in a rough way.
2514 * 2514 *
2515 * Caller must hold refcount tree lock. 2515 * Caller must hold refcount tree lock.
2516 */ 2516 */
2517int ocfs2_prepare_refcount_change_for_del(struct inode *inode, 2517int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
2518 struct buffer_head *di_bh, 2518 u64 refcount_loc,
2519 u64 phys_blkno, 2519 u64 phys_blkno,
2520 u32 clusters, 2520 u32 clusters,
2521 int *credits, 2521 int *credits,
2522 struct ocfs2_alloc_context **meta_ac) 2522 int *ref_blocks)
2523{ 2523{
2524 int ret, ref_blocks = 0; 2524 int ret;
2525 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2526 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2525 struct ocfs2_inode_info *oi = OCFS2_I(inode);
2527 struct buffer_head *ref_root_bh = NULL; 2526 struct buffer_head *ref_root_bh = NULL;
2528 struct ocfs2_refcount_tree *tree; 2527 struct ocfs2_refcount_tree *tree;
@@ -2539,14 +2538,13 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
2539 BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); 2538 BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
2540 2539
2541 ret = ocfs2_get_refcount_tree(OCFS2_SB(inode->i_sb), 2540 ret = ocfs2_get_refcount_tree(OCFS2_SB(inode->i_sb),
2542 le64_to_cpu(di->i_refcount_loc), &tree); 2541 refcount_loc, &tree);
2543 if (ret) { 2542 if (ret) {
2544 mlog_errno(ret); 2543 mlog_errno(ret);
2545 goto out; 2544 goto out;
2546 } 2545 }
2547 2546
2548 ret = ocfs2_read_refcount_block(&tree->rf_ci, 2547 ret = ocfs2_read_refcount_block(&tree->rf_ci, refcount_loc,
2549 le64_to_cpu(di->i_refcount_loc),
2550 &ref_root_bh); 2548 &ref_root_bh);
2551 if (ret) { 2549 if (ret) {
2552 mlog_errno(ret); 2550 mlog_errno(ret);
@@ -2557,21 +2555,14 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
2557 &tree->rf_ci, 2555 &tree->rf_ci,
2558 ref_root_bh, 2556 ref_root_bh,
2559 start_cpos, clusters, 2557 start_cpos, clusters,
2560 &ref_blocks, credits); 2558 ref_blocks, credits);
2561 if (ret) { 2559 if (ret) {
2562 mlog_errno(ret); 2560 mlog_errno(ret);
2563 goto out; 2561 goto out;
2564 } 2562 }
2565 2563
2566 mlog(0, "reserve new metadata %d, credits = %d\n", 2564 mlog(0, "reserve new metadata %d blocks, credits = %d\n",
2567 ref_blocks, *credits); 2565 *ref_blocks, *credits);
2568
2569 if (ref_blocks) {
2570 ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb),
2571 ref_blocks, meta_ac);
2572 if (ret)
2573 mlog_errno(ret);
2574 }
2575 2566
2576out: 2567out:
2577 brelse(ref_root_bh); 2568 brelse(ref_root_bh);
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
index c1d19b1d3ecc..9983ba1570e2 100644
--- a/fs/ocfs2/refcounttree.h
+++ b/fs/ocfs2/refcounttree.h
@@ -47,11 +47,11 @@ int ocfs2_decrease_refcount(struct inode *inode,
47 struct ocfs2_cached_dealloc_ctxt *dealloc, 47 struct ocfs2_cached_dealloc_ctxt *dealloc,
48 int delete); 48 int delete);
49int ocfs2_prepare_refcount_change_for_del(struct inode *inode, 49int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
50 struct buffer_head *di_bh, 50 u64 refcount_loc,
51 u64 phys_blkno, 51 u64 phys_blkno,
52 u32 clusters, 52 u32 clusters,
53 int *credits, 53 int *credits,
54 struct ocfs2_alloc_context **meta_ac); 54 int *ref_blocks);
55int ocfs2_refcount_cow(struct inode *inode, struct buffer_head *di_bh, 55int ocfs2_refcount_cow(struct inode *inode, struct buffer_head *di_bh,
56 u32 cpos, u32 write_len, u32 max_cpos); 56 u32 cpos, u32 write_len, u32 max_cpos);
57 57