aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMandy Kirkconnell <alkirkco@sgi.com>2006-03-13 21:30:23 -0500
committerNathan Scott <nathans@sgi.com>2006-03-13 21:30:23 -0500
commit0293ce3a9fd1b34c933a96577a8ba737b681cf75 (patch)
tree19c01a41566aa3c631a4d903ca5f3242b15af169
parent4eea22f01bb4fdba1aab4430c33adbe88d9d4985 (diff)
[XFS] 929045 567344 This mod introduces multi-level in-core file extent
functionality, building upon the new layout introduced in mod xfs-linux:xfs-kern:207390a. The new multi-level extent allocations are only required for heavily fragmented files, so the old-style linear extent list is used on files until the extents reach a pre-determined size of 4k. 4k buffers are used because this is the system page size on Linux i386 and systems with larger page sizes don't seem to gain much, if anything, by using their native page size as the extent buffer size. Also, using 4k extent buffers everywhere provides a consistent interface for CXFS across different platforms. The 4k extent buffers are managed by an indirection array (xfs_ext_irec_t) which is basically just a pointer array with a bit of extra information to keep track of the number of extents in each buffer as well as the extent offset of each buffer. Major changes include: - Add multi-level in-core file extent functionality to the xfs_iext_ subroutines introduced in mod: xfs-linux:xfs-kern:207390a - Introduce 13 new subroutines which add functionality for multi-level in-core file extents: xfs_iext_add_indirect_multi() xfs_iext_remove_indirect() xfs_iext_realloc_indirect() xfs_iext_indirect_to_direct() xfs_iext_bno_to_irec() xfs_iext_idx_to_irec() xfs_iext_irec_init() xfs_iext_irec_new() xfs_iext_irec_remove() xfs_iext_irec_compact() xfs_iext_irec_compact_pages() xfs_iext_irec_compact_full() xfs_iext_irec_update_extoffs() SGI-PV: 928864 SGI-Modid: xfs-linux-melb:xfs-kern:207393a Signed-off-by: Mandy Kirkconnell <alkirkco@sgi.com> Signed-off-by: Nathan Scott <nathans@sgi.com>
-rw-r--r--fs/xfs/xfs_bmap.c59
-rw-r--r--fs/xfs/xfs_bmap.h18
-rw-r--r--fs/xfs/xfs_bmap_btree.h8
-rw-r--r--fs/xfs/xfs_inode.c715
-rw-r--r--fs/xfs/xfs_inode.h52
5 files changed, 826 insertions, 26 deletions
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 53c47a181f87..81a95b684b6b 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -3423,6 +3423,7 @@ xfs_bmap_local_to_extents(
3423 xfs_bmap_forkoff_reset(args.mp, ip, whichfork); 3423 xfs_bmap_forkoff_reset(args.mp, ip, whichfork);
3424 xfs_idata_realloc(ip, -ifp->if_bytes, whichfork); 3424 xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
3425 xfs_iext_add(ifp, 0, 1); 3425 xfs_iext_add(ifp, 0, 1);
3426 ASSERT((ifp->if_flags & (XFS_IFEXTENTS|XFS_IFEXTIREC)) == XFS_IFEXTENTS);
3426 ep = xfs_iext_get_ext(ifp, 0); 3427 ep = xfs_iext_get_ext(ifp, 0);
3427 xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM); 3428 xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM);
3428 xfs_bmap_trace_post_update(fname, "new", ip, 0, whichfork); 3429 xfs_bmap_trace_post_update(fname, "new", ip, 0, whichfork);
@@ -3552,6 +3553,54 @@ xfs_bmap_do_search_extents(
3552} 3553}
3553 3554
3554/* 3555/*
3556 * Call xfs_bmap_do_search_extents() to search for the extent
3557 * record containing block bno. If in multi-level in-core extent
3558 * allocation mode, find and extract the target extent buffer,
3559 * otherwise just use the direct extent list.
3560 */
3561xfs_bmbt_rec_t * /* pointer to found extent entry */
3562xfs_bmap_search_multi_extents(
3563 xfs_ifork_t *ifp, /* inode fork pointer */
3564 xfs_fileoff_t bno, /* block number searched for */
3565 int *eofp, /* out: end of file found */
3566 xfs_extnum_t *lastxp, /* out: last extent index */
3567 xfs_bmbt_irec_t *gotp, /* out: extent entry found */
3568 xfs_bmbt_irec_t *prevp) /* out: previous extent entry found */
3569{
3570 xfs_bmbt_rec_t *base; /* base of extent records */
3571 xfs_bmbt_rec_t *ep; /* extent record pointer */
3572 xfs_ext_irec_t *erp = NULL; /* indirection array pointer */
3573 xfs_extnum_t lastx; /* last extent index */
3574 xfs_extnum_t nextents; /* number of file extents */
3575
3576 /*
3577 * For multi-level extent allocation mode, find the
3578 * target extent list and pass only the contiguous
3579 * list to xfs_bmap_do_search_extents. Convert lastx
3580 * from a file extent index to an index within the
3581 * target extent list.
3582 */
3583 if (ifp->if_flags & XFS_IFEXTIREC) {
3584 int erp_idx = 0;
3585 erp = xfs_iext_bno_to_irec(ifp, bno, &erp_idx);
3586 base = erp->er_extbuf;
3587 nextents = erp->er_extcount;
3588 lastx = ifp->if_lastex - erp->er_extoff;
3589 } else {
3590 base = &ifp->if_u1.if_extents[0];
3591 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
3592 lastx = ifp->if_lastex;
3593 }
3594 ep = xfs_bmap_do_search_extents(base, lastx, nextents, bno,
3595 eofp, lastxp, gotp, prevp);
3596 /* Convert lastx back to file-based index */
3597 if (ifp->if_flags & XFS_IFEXTIREC) {
3598 *lastxp += erp->er_extoff;
3599 }
3600 return ep;
3601}
3602
3603/*
3555 * Search the extents list for the inode, for the extent containing bno. 3604 * Search the extents list for the inode, for the extent containing bno.
3556 * If bno lies in a hole, point to the next entry. If bno lies past eof, 3605 * If bno lies in a hole, point to the next entry. If bno lies past eof,
3557 * *eofp will be set, and *prevp will contain the last entry (null if none). 3606 * *eofp will be set, and *prevp will contain the last entry (null if none).
@@ -3569,20 +3618,14 @@ xfs_bmap_search_extents(
3569 xfs_bmbt_irec_t *prevp) /* out: previous extent entry found */ 3618 xfs_bmbt_irec_t *prevp) /* out: previous extent entry found */
3570{ 3619{
3571 xfs_ifork_t *ifp; /* inode fork pointer */ 3620 xfs_ifork_t *ifp; /* inode fork pointer */
3572 xfs_bmbt_rec_t *base; /* base of extent list */
3573 xfs_extnum_t lastx; /* last extent index used */
3574 xfs_extnum_t nextents; /* number of file extents */
3575 xfs_bmbt_rec_t *ep; /* extent record pointer */ 3621 xfs_bmbt_rec_t *ep; /* extent record pointer */
3576 int rt; /* realtime flag */ 3622 int rt; /* realtime flag */
3577 3623
3578 XFS_STATS_INC(xs_look_exlist); 3624 XFS_STATS_INC(xs_look_exlist);
3579 ifp = XFS_IFORK_PTR(ip, whichfork); 3625 ifp = XFS_IFORK_PTR(ip, whichfork);
3580 lastx = ifp->if_lastex;
3581 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
3582 base = &ifp->if_u1.if_extents[0];
3583 3626
3584 ep = xfs_bmap_do_search_extents(base, lastx, nextents, bno, eofp, 3627 ep = xfs_bmap_search_multi_extents(ifp, bno, eofp, lastxp, gotp, prevp);
3585 lastxp, gotp, prevp); 3628
3586 rt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip); 3629 rt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
3587 if (unlikely(!rt && !gotp->br_startblock && (*lastxp != NULLEXTNUM))) { 3630 if (unlikely(!rt && !gotp->br_startblock && (*lastxp != NULLEXTNUM))) {
3588 cmn_err(CE_PANIC,"Access to block zero: fs: <%s> inode: %lld " 3631 cmn_err(CE_PANIC,"Access to block zero: fs: <%s> inode: %lld "
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 4c05f95452c2..011ccaa9a1c0 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -352,6 +352,24 @@ xfs_check_nostate_extents(
352 xfs_extnum_t idx, 352 xfs_extnum_t idx,
353 xfs_extnum_t num); 353 xfs_extnum_t num);
354 354
355/*
356 * Call xfs_bmap_do_search_extents() to search for the extent
357 * record containing block bno. If in multi-level in-core extent
358 * allocation mode, find and extract the target extent buffer,
359 * otherwise just use the direct extent list.
360 */
361xfs_bmbt_rec_t *
362xfs_bmap_search_multi_extents(struct xfs_ifork *, xfs_fileoff_t, int *,
363 xfs_extnum_t *, xfs_bmbt_irec_t *, xfs_bmbt_irec_t *);
364
365/*
366 * Search an extent list for the extent which includes block
367 * bno.
368 */
369xfs_bmbt_rec_t *xfs_bmap_do_search_extents(xfs_bmbt_rec_t *,
370 xfs_extnum_t, xfs_extnum_t, xfs_fileoff_t, int *,
371 xfs_extnum_t *, xfs_bmbt_irec_t *, xfs_bmbt_irec_t *);
372
355#endif /* __KERNEL__ */ 373#endif /* __KERNEL__ */
356 374
357#endif /* __XFS_BMAP_H__ */ 375#endif /* __XFS_BMAP_H__ */
diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h
index e095a2d344ae..6478cfa0e539 100644
--- a/fs/xfs/xfs_bmap_btree.h
+++ b/fs/xfs/xfs_bmap_btree.h
@@ -372,14 +372,6 @@ extern int xfs_bmbt_get_rec(struct xfs_btree_cur *, xfs_fileoff_t *,
372 xfs_exntst_t *, int *); 372 xfs_exntst_t *, int *);
373#endif 373#endif
374 374
375/*
376 * Search an extent list for the extent which includes block
377 * bno.
378 */
379xfs_bmbt_rec_t *xfs_bmap_do_search_extents(xfs_bmbt_rec_t *,
380 xfs_extnum_t, xfs_extnum_t, xfs_fileoff_t, int *,
381 xfs_extnum_t *, xfs_bmbt_irec_t *, xfs_bmbt_irec_t *);
382
383#endif /* __KERNEL__ */ 375#endif /* __KERNEL__ */
384 376
385#endif /* __XFS_BMAP_BTREE_H__ */ 377#endif /* __XFS_BMAP_BTREE_H__ */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 6459395a0e40..580fa0758039 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2630,8 +2630,9 @@ xfs_idestroy_fork(
2630 ifp->if_real_bytes = 0; 2630 ifp->if_real_bytes = 0;
2631 } 2631 }
2632 } else if ((ifp->if_flags & XFS_IFEXTENTS) && 2632 } else if ((ifp->if_flags & XFS_IFEXTENTS) &&
2633 (ifp->if_u1.if_extents != NULL) && 2633 ((ifp->if_flags & XFS_IFEXTIREC) ||
2634 (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)) { 2634 ((ifp->if_u1.if_extents != NULL) &&
2635 (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)))) {
2635 ASSERT(ifp->if_real_bytes != 0); 2636 ASSERT(ifp->if_real_bytes != 0);
2636 xfs_iext_destroy(ifp); 2637 xfs_iext_destroy(ifp);
2637 } 2638 }
@@ -3622,7 +3623,16 @@ xfs_iext_get_ext(
3622 xfs_extnum_t idx) /* index of target extent */ 3623 xfs_extnum_t idx) /* index of target extent */
3623{ 3624{
3624 ASSERT(idx >= 0); 3625 ASSERT(idx >= 0);
3625 if (ifp->if_bytes) { 3626 if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) {
3627 return ifp->if_u1.if_ext_irec->er_extbuf;
3628 } else if (ifp->if_flags & XFS_IFEXTIREC) {
3629 xfs_ext_irec_t *erp; /* irec pointer */
3630 int erp_idx = 0; /* irec index */
3631 xfs_extnum_t page_idx = idx; /* ext index in target list */
3632
3633 erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0);
3634 return &erp->er_extbuf[page_idx];
3635 } else if (ifp->if_bytes) {
3626 return &ifp->if_u1.if_extents[idx]; 3636 return &ifp->if_u1.if_extents[idx];
3627 } else { 3637 } else {
3628 return NULL; 3638 return NULL;
@@ -3691,6 +3701,7 @@ xfs_iext_add(
3691 } 3701 }
3692 ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; 3702 ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
3693 ifp->if_real_bytes = 0; 3703 ifp->if_real_bytes = 0;
3704 ifp->if_lastex = nextents + ext_diff;
3694 } 3705 }
3695 /* 3706 /*
3696 * Otherwise use a linear (direct) extent list. 3707 * Otherwise use a linear (direct) extent list.
@@ -3698,7 +3709,7 @@ xfs_iext_add(
3698 * xfs_iext_realloc_direct will switch us from 3709 * xfs_iext_realloc_direct will switch us from
3699 * inline to direct extent allocation mode. 3710 * inline to direct extent allocation mode.
3700 */ 3711 */
3701 else { 3712 else if (nextents + ext_diff <= XFS_LINEAR_EXTS) {
3702 xfs_iext_realloc_direct(ifp, new_size); 3713 xfs_iext_realloc_direct(ifp, new_size);
3703 if (idx < nextents) { 3714 if (idx < nextents) {
3704 memmove(&ifp->if_u1.if_extents[idx + ext_diff], 3715 memmove(&ifp->if_u1.if_extents[idx + ext_diff],
@@ -3707,14 +3718,182 @@ xfs_iext_add(
3707 memset(&ifp->if_u1.if_extents[idx], 0, byte_diff); 3718 memset(&ifp->if_u1.if_extents[idx], 0, byte_diff);
3708 } 3719 }
3709 } 3720 }
3721 /* Indirection array */
3722 else {
3723 xfs_ext_irec_t *erp;
3724 int erp_idx = 0;
3725 int page_idx = idx;
3726
3727 ASSERT(nextents + ext_diff > XFS_LINEAR_EXTS);
3728 if (ifp->if_flags & XFS_IFEXTIREC) {
3729 erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 1);
3730 } else {
3731 xfs_iext_irec_init(ifp);
3732 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
3733 erp = ifp->if_u1.if_ext_irec;
3734 }
3735 /* Extents fit in target extent page */
3736 if (erp && erp->er_extcount + ext_diff <= XFS_LINEAR_EXTS) {
3737 if (page_idx < erp->er_extcount) {
3738 memmove(&erp->er_extbuf[page_idx + ext_diff],
3739 &erp->er_extbuf[page_idx],
3740 (erp->er_extcount - page_idx) *
3741 sizeof(xfs_bmbt_rec_t));
3742 memset(&erp->er_extbuf[page_idx], 0, byte_diff);
3743 }
3744 erp->er_extcount += ext_diff;
3745 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
3746 }
3747 /* Insert a new extent page */
3748 else if (erp) {
3749 xfs_iext_add_indirect_multi(ifp,
3750 erp_idx, page_idx, ext_diff);
3751 }
3752 /*
3753 * If extent(s) are being appended to the last page in
3754 * the indirection array and the new extent(s) don't fit
3755 * in the page, then erp is NULL and erp_idx is set to
3756 * the next index needed in the indirection array.
3757 */
3758 else {
3759 int count = ext_diff;
3760
3761 while (count) {
3762 erp = xfs_iext_irec_new(ifp, erp_idx);
3763 erp->er_extcount = count;
3764 count -= MIN(count, (int)XFS_LINEAR_EXTS);
3765 if (count) {
3766 erp_idx++;
3767 }
3768 }
3769 }
3770 }
3710 ifp->if_bytes = new_size; 3771 ifp->if_bytes = new_size;
3711} 3772}
3712 3773
3713/* 3774/*
3775 * This is called when incore extents are being added to the indirection
3776 * array and the new extents do not fit in the target extent list. The
3777 * erp_idx parameter contains the irec index for the target extent list
3778 * in the indirection array, and the idx parameter contains the extent
3779 * index within the list. The number of extents being added is stored
3780 * in the count parameter.
3781 *
3782 * |-------| |-------|
3783 * | | | | idx - number of extents before idx
3784 * | idx | | count |
3785 * | | | | count - number of extents being inserted at idx
3786 * |-------| |-------|
3787 * | count | | nex2 | nex2 - number of extents after idx + count
3788 * |-------| |-------|
3789 */
3790void
3791xfs_iext_add_indirect_multi(
3792 xfs_ifork_t *ifp, /* inode fork pointer */
3793 int erp_idx, /* target extent irec index */
3794 xfs_extnum_t idx, /* index within target list */
3795 int count) /* new extents being added */
3796{
3797 int byte_diff; /* new bytes being added */
3798 xfs_ext_irec_t *erp; /* pointer to irec entry */
3799 xfs_extnum_t ext_diff; /* number of extents to add */
3800 xfs_extnum_t ext_cnt; /* new extents still needed */
3801 xfs_extnum_t nex2; /* extents after idx + count */
3802 xfs_bmbt_rec_t *nex2_ep = NULL; /* temp list for nex2 extents */
3803 int nlists; /* number of irec's (lists) */
3804
3805 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
3806 erp = &ifp->if_u1.if_ext_irec[erp_idx];
3807 nex2 = erp->er_extcount - idx;
3808 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
3809
3810 /*
3811 * Save second part of target extent list
3812 * (all extents past */
3813 if (nex2) {
3814 byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
3815 nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_SLEEP);
3816 memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff);
3817 erp->er_extcount -= nex2;
3818 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2);
3819 memset(&erp->er_extbuf[idx], 0, byte_diff);
3820 }
3821
3822 /*
3823 * Add the new extents to the end of the target
3824 * list, then allocate new irec record(s) and
3825 * extent buffer(s) as needed to store the rest
3826 * of the new extents.
3827 */
3828 ext_cnt = count;
3829 ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS - erp->er_extcount);
3830 if (ext_diff) {
3831 erp->er_extcount += ext_diff;
3832 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
3833 ext_cnt -= ext_diff;
3834 }
3835 while (ext_cnt) {
3836 erp_idx++;
3837 erp = xfs_iext_irec_new(ifp, erp_idx);
3838 ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS);
3839 erp->er_extcount = ext_diff;
3840 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
3841 ext_cnt -= ext_diff;
3842 }
3843
3844 /* Add nex2 extents back to indirection array */
3845 if (nex2) {
3846 xfs_extnum_t ext_avail;
3847 int i;
3848
3849 byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
3850 ext_avail = XFS_LINEAR_EXTS - erp->er_extcount;
3851 i = 0;
3852 /*
3853 * If nex2 extents fit in the current page, append
3854 * nex2_ep after the new extents.
3855 */
3856 if (nex2 <= ext_avail) {
3857 i = erp->er_extcount;
3858 }
3859 /*
3860 * Otherwise, check if space is available in the
3861 * next page.
3862 */
3863 else if ((erp_idx < nlists - 1) &&
3864 (nex2 <= (ext_avail = XFS_LINEAR_EXTS -
3865 ifp->if_u1.if_ext_irec[erp_idx+1].er_extcount))) {
3866 erp_idx++;
3867 erp++;
3868 /* Create a hole for nex2 extents */
3869 memmove(&erp->er_extbuf[nex2], erp->er_extbuf,
3870 erp->er_extcount * sizeof(xfs_bmbt_rec_t));
3871 }
3872 /*
3873 * Final choice, create a new extent page for
3874 * nex2 extents.
3875 */
3876 else {
3877 erp_idx++;
3878 erp = xfs_iext_irec_new(ifp, erp_idx);
3879 }
3880 memmove(&erp->er_extbuf[i], nex2_ep, byte_diff);
3881 kmem_free(nex2_ep, byte_diff);
3882 erp->er_extcount += nex2;
3883 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2);
3884 }
3885}
3886
3887/*
3714 * This is called when the amount of space required for incore file 3888 * This is called when the amount of space required for incore file
3715 * extents needs to be decreased. The ext_diff parameter stores the 3889 * extents needs to be decreased. The ext_diff parameter stores the
3716 * number of extents to be removed and the idx parameter contains 3890 * number of extents to be removed and the idx parameter contains
3717 * the extent index where the extents will be removed from. 3891 * the extent index where the extents will be removed from.
3892 *
3893 * If the amount of space needed has decreased below the linear
3894 * limit, XFS_IEXT_BUFSZ, then switch to using the contiguous
3895 * extent array. Otherwise, use kmem_realloc() to adjust the
3896 * size to what is needed.
3718 */ 3897 */
3719void 3898void
3720xfs_iext_remove( 3899xfs_iext_remove(
@@ -3731,6 +3910,8 @@ xfs_iext_remove(
3731 3910
3732 if (new_size == 0) { 3911 if (new_size == 0) {
3733 xfs_iext_destroy(ifp); 3912 xfs_iext_destroy(ifp);
3913 } else if (ifp->if_flags & XFS_IFEXTIREC) {
3914 xfs_iext_remove_indirect(ifp, idx, ext_diff);
3734 } else if (ifp->if_real_bytes) { 3915 } else if (ifp->if_real_bytes) {
3735 xfs_iext_remove_direct(ifp, idx, ext_diff); 3916 xfs_iext_remove_direct(ifp, idx, ext_diff);
3736 } else { 3917 } else {
@@ -3751,6 +3932,7 @@ xfs_iext_remove_inline(
3751{ 3932{
3752 int nextents; /* number of extents in file */ 3933 int nextents; /* number of extents in file */
3753 3934
3935 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
3754 ASSERT(idx < XFS_INLINE_EXTS); 3936 ASSERT(idx < XFS_INLINE_EXTS);
3755 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 3937 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
3756 ASSERT(((nextents - ext_diff) > 0) && 3938 ASSERT(((nextents - ext_diff) > 0) &&
@@ -3788,6 +3970,7 @@ xfs_iext_remove_direct(
3788 xfs_extnum_t nextents; /* number of extents in file */ 3970 xfs_extnum_t nextents; /* number of extents in file */
3789 int new_size; /* size of extents after removal */ 3971 int new_size; /* size of extents after removal */
3790 3972
3973 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
3791 new_size = ifp->if_bytes - 3974 new_size = ifp->if_bytes -
3792 (ext_diff * sizeof(xfs_bmbt_rec_t)); 3975 (ext_diff * sizeof(xfs_bmbt_rec_t));
3793 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 3976 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
@@ -3816,6 +3999,84 @@ xfs_iext_remove_direct(
3816} 3999}
3817 4000
3818/* 4001/*
4002 * This is called when incore extents are being removed from the
4003 * indirection array and the extents being removed span multiple extent
4004 * buffers. The idx parameter contains the file extent index where we
4005 * want to begin removing extents, and the count parameter contains
4006 * how many extents need to be removed.
4007 *
4008 * |-------| |-------|
4009 * | nex1 | | | nex1 - number of extents before idx
4010 * |-------| | count |
4011 * | | | | count - number of extents being removed at idx
4012 * | count | |-------|
4013 * | | | nex2 | nex2 - number of extents after idx + count
4014 * |-------| |-------|
4015 */
4016void
4017xfs_iext_remove_indirect(
4018 xfs_ifork_t *ifp, /* inode fork pointer */
4019 xfs_extnum_t idx, /* index to begin removing extents */
4020 int count) /* number of extents to remove */
4021{
4022 xfs_ext_irec_t *erp; /* indirection array pointer */
4023 int erp_idx = 0; /* indirection array index */
4024 xfs_extnum_t ext_cnt; /* extents left to remove */
4025 xfs_extnum_t ext_diff; /* extents to remove in current list */
4026 xfs_extnum_t nex1; /* number of extents before idx */
4027 xfs_extnum_t nex2; /* extents after idx + count */
4028 int nlists; /* entries in indirecton array */
4029 int page_idx = idx; /* index in target extent list */
4030
4031 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
4032 erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0);
4033 ASSERT(erp != NULL);
4034 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
4035 nex1 = page_idx;
4036 ext_cnt = count;
4037 while (ext_cnt) {
4038 nex2 = MAX((erp->er_extcount - (nex1 + ext_cnt)), 0);
4039 ext_diff = MIN(ext_cnt, (erp->er_extcount - nex1));
4040 /*
4041 * Check for deletion of entire list;
4042 * xfs_iext_irec_remove() updates extent offsets.
4043 */
4044 if (ext_diff == erp->er_extcount) {
4045 xfs_iext_irec_remove(ifp, erp_idx);
4046 ext_cnt -= ext_diff;
4047 nex1 = 0;
4048 if (ext_cnt) {
4049 ASSERT(erp_idx < ifp->if_real_bytes /
4050 XFS_IEXT_BUFSZ);
4051 erp = &ifp->if_u1.if_ext_irec[erp_idx];
4052 nex1 = 0;
4053 continue;
4054 } else {
4055 break;
4056 }
4057 }
4058 /* Move extents up (if needed) */
4059 if (nex2) {
4060 memmove(&erp->er_extbuf[nex1],
4061 &erp->er_extbuf[nex1 + ext_diff],
4062 nex2 * sizeof(xfs_bmbt_rec_t));
4063 }
4064 /* Zero out rest of page */
4065 memset(&erp->er_extbuf[nex1 + nex2], 0, (XFS_IEXT_BUFSZ -
4066 ((nex1 + nex2) * sizeof(xfs_bmbt_rec_t))));
4067 /* Update remaining counters */
4068 erp->er_extcount -= ext_diff;
4069 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -ext_diff);
4070 ext_cnt -= ext_diff;
4071 nex1 = 0;
4072 erp_idx++;
4073 erp++;
4074 }
4075 ifp->if_bytes -= count * sizeof(xfs_bmbt_rec_t);
4076 xfs_iext_irec_compact(ifp);
4077}
4078
4079/*
3819 * Create, destroy, or resize a linear (direct) block of extents. 4080 * Create, destroy, or resize a linear (direct) block of extents.
3820 */ 4081 */
3821void 4082void
@@ -3827,6 +4088,10 @@ xfs_iext_realloc_direct(
3827 4088
3828 rnew_size = new_size; 4089 rnew_size = new_size;
3829 4090
4091 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC) ||
4092 ((new_size >= 0) && (new_size <= XFS_IEXT_BUFSZ) &&
4093 (new_size != ifp->if_real_bytes)));
4094
3830 /* Free extent records */ 4095 /* Free extent records */
3831 if (new_size == 0) { 4096 if (new_size == 0) {
3832 xfs_iext_destroy(ifp); 4097 xfs_iext_destroy(ifp);
@@ -3920,13 +4185,76 @@ xfs_iext_inline_to_direct(
3920} 4185}
3921 4186
3922/* 4187/*
4188 * Resize an extent indirection array to new_size bytes.
4189 */
4190void
4191xfs_iext_realloc_indirect(
4192 xfs_ifork_t *ifp, /* inode fork pointer */
4193 int new_size) /* new indirection array size */
4194{
4195 int nlists; /* number of irec's (ex lists) */
4196 int size; /* current indirection array size */
4197
4198 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
4199 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
4200 size = nlists * sizeof(xfs_ext_irec_t);
4201 ASSERT(ifp->if_real_bytes);
4202 ASSERT((new_size >= 0) && (new_size != size));
4203 if (new_size == 0) {
4204 xfs_iext_destroy(ifp);
4205 } else {
4206 ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *)
4207 kmem_realloc(ifp->if_u1.if_ext_irec,
4208 new_size, size, KM_SLEEP);
4209 }
4210}
4211
4212/*
4213 * Switch from indirection array to linear (direct) extent allocations.
4214 */
4215void
4216xfs_iext_indirect_to_direct(
4217 xfs_ifork_t *ifp) /* inode fork pointer */
4218{
4219 xfs_bmbt_rec_t *ep; /* extent record pointer */
4220 xfs_extnum_t nextents; /* number of extents in file */
4221 int size; /* size of file extents */
4222
4223 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
4224 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
4225 ASSERT(nextents <= XFS_LINEAR_EXTS);
4226 size = nextents * sizeof(xfs_bmbt_rec_t);
4227
4228 xfs_iext_irec_compact_full(ifp);
4229 ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ);
4230
4231 ep = ifp->if_u1.if_ext_irec->er_extbuf;
4232 kmem_free(ifp->if_u1.if_ext_irec, sizeof(xfs_ext_irec_t));
4233 ifp->if_flags &= ~XFS_IFEXTIREC;
4234 ifp->if_u1.if_extents = ep;
4235 ifp->if_bytes = size;
4236 if (nextents < XFS_LINEAR_EXTS) {
4237 xfs_iext_realloc_direct(ifp, size);
4238 }
4239}
4240
4241/*
3923 * Free incore file extents. 4242 * Free incore file extents.
3924 */ 4243 */
3925void 4244void
3926xfs_iext_destroy( 4245xfs_iext_destroy(
3927 xfs_ifork_t *ifp) /* inode fork pointer */ 4246 xfs_ifork_t *ifp) /* inode fork pointer */
3928{ 4247{
3929 if (ifp->if_real_bytes) { 4248 if (ifp->if_flags & XFS_IFEXTIREC) {
4249 int erp_idx;
4250 int nlists;
4251
4252 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
4253 for (erp_idx = nlists - 1; erp_idx >= 0 ; erp_idx--) {
4254 xfs_iext_irec_remove(ifp, erp_idx);
4255 }
4256 ifp->if_flags &= ~XFS_IFEXTIREC;
4257 } else if (ifp->if_real_bytes) {
3930 kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes); 4258 kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes);
3931 } else if (ifp->if_bytes) { 4259 } else if (ifp->if_bytes) {
3932 memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS * 4260 memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
@@ -3936,3 +4264,380 @@ xfs_iext_destroy(
3936 ifp->if_real_bytes = 0; 4264 ifp->if_real_bytes = 0;
3937 ifp->if_bytes = 0; 4265 ifp->if_bytes = 0;
3938} 4266}
4267
4268/*
4269 * Return a pointer to the indirection array entry containing the
4270 * extent record for filesystem block bno. Store the index of the
4271 * target irec in *erp_idxp.
4272 */
4273xfs_ext_irec_t *
4274xfs_iext_bno_to_irec(
4275 xfs_ifork_t *ifp, /* inode fork pointer */
4276 xfs_fileoff_t bno, /* block number to search for */
4277 int *erp_idxp) /* irec index of target ext list */
4278{
4279 xfs_ext_irec_t *erp = NULL; /* indirection array pointer */
4280 xfs_ext_irec_t *erp_next; /* next indirection array entry */
4281 xfs_extnum_t erp_idx; /* indirection array index */
4282 int nlists; /* number of extent irec's (lists) */
4283 int high; /* binary search upper limit */
4284 int low; /* binary search lower limit */
4285
4286 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
4287 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
4288 erp_idx = 0;
4289 low = 0;
4290 high = nlists - 1;
4291 while (low <= high) {
4292 erp_idx = (low + high) >> 1;
4293 erp = &ifp->if_u1.if_ext_irec[erp_idx];
4294 erp_next = erp_idx < nlists - 1 ? erp + 1 : NULL;
4295 if (bno < xfs_bmbt_get_startoff(erp->er_extbuf)) {
4296 high = erp_idx - 1;
4297 } else if (erp_next && bno >=
4298 xfs_bmbt_get_startoff(erp_next->er_extbuf)) {
4299 low = erp_idx + 1;
4300 } else {
4301 break;
4302 }
4303 }
4304 *erp_idxp = erp_idx;
4305 return erp;
4306}
4307
4308/*
4309 * Return a pointer to the indirection array entry containing the
4310 * extent record at file extent index *idxp. Store the index of the
4311 * target irec in *erp_idxp and store the page index of the target
4312 * extent record in *idxp.
4313 */
4314xfs_ext_irec_t *
4315xfs_iext_idx_to_irec(
4316 xfs_ifork_t *ifp, /* inode fork pointer */
4317 xfs_extnum_t *idxp, /* extent index (file -> page) */
4318 int *erp_idxp, /* pointer to target irec */
4319 int realloc) /* new bytes were just added */
4320{
4321 xfs_ext_irec_t *prev; /* pointer to previous irec */
4322 xfs_ext_irec_t *erp = NULL; /* pointer to current irec */
4323 int erp_idx; /* indirection array index */
4324 int nlists; /* number of irec's (ex lists) */
4325 int high; /* binary search upper limit */
4326 int low; /* binary search lower limit */
4327 xfs_extnum_t page_idx = *idxp; /* extent index in target list */
4328
4329 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
4330 ASSERT(page_idx >= 0 && page_idx <=
4331 ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t));
4332 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
4333 erp_idx = 0;
4334 low = 0;
4335 high = nlists - 1;
4336
4337 /* Binary search extent irec's */
4338 while (low <= high) {
4339 erp_idx = (low + high) >> 1;
4340 erp = &ifp->if_u1.if_ext_irec[erp_idx];
4341 prev = erp_idx > 0 ? erp - 1 : NULL;
4342 if (page_idx < erp->er_extoff || (page_idx == erp->er_extoff &&
4343 realloc && prev && prev->er_extcount < XFS_LINEAR_EXTS)) {
4344 high = erp_idx - 1;
4345 } else if (page_idx > erp->er_extoff + erp->er_extcount ||
4346 (page_idx == erp->er_extoff + erp->er_extcount &&
4347 !realloc)) {
4348 low = erp_idx + 1;
4349 } else if (page_idx == erp->er_extoff + erp->er_extcount &&
4350 erp->er_extcount == XFS_LINEAR_EXTS) {
4351 ASSERT(realloc);
4352 page_idx = 0;
4353 erp_idx++;
4354 erp = erp_idx < nlists ? erp + 1 : NULL;
4355 break;
4356 } else {
4357 page_idx -= erp->er_extoff;
4358 break;
4359 }
4360 }
4361 *idxp = page_idx;
4362 *erp_idxp = erp_idx;
4363 return(erp);
4364}
4365
4366/*
4367 * Allocate and initialize an indirection array once the space needed
4368 * for incore extents increases above XFS_IEXT_BUFSZ.
4369 */
4370void
4371xfs_iext_irec_init(
4372 xfs_ifork_t *ifp) /* inode fork pointer */
4373{
4374 xfs_ext_irec_t *erp; /* indirection array pointer */
4375 xfs_extnum_t nextents; /* number of extents in file */
4376
4377 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
4378 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
4379 ASSERT(nextents <= XFS_LINEAR_EXTS);
4380
4381 erp = (xfs_ext_irec_t *)
4382 kmem_alloc(sizeof(xfs_ext_irec_t), KM_SLEEP);
4383
4384 if (nextents == 0) {
4385 ifp->if_u1.if_extents = (xfs_bmbt_rec_t *)
4386 kmem_alloc(XFS_IEXT_BUFSZ, KM_SLEEP);
4387 } else if (!ifp->if_real_bytes) {
4388 xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ);
4389 } else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) {
4390 xfs_iext_realloc_direct(ifp, XFS_IEXT_BUFSZ);
4391 }
4392 erp->er_extbuf = ifp->if_u1.if_extents;
4393 erp->er_extcount = nextents;
4394 erp->er_extoff = 0;
4395
4396 ifp->if_flags |= XFS_IFEXTIREC;
4397 ifp->if_real_bytes = XFS_IEXT_BUFSZ;
4398 ifp->if_bytes = nextents * sizeof(xfs_bmbt_rec_t);
4399 ifp->if_u1.if_ext_irec = erp;
4400
4401 return;
4402}
4403
4404/*
4405 * Allocate and initialize a new entry in the indirection array.
4406 */
4407xfs_ext_irec_t *
4408xfs_iext_irec_new(
4409 xfs_ifork_t *ifp, /* inode fork pointer */
4410 int erp_idx) /* index for new irec */
4411{
4412 xfs_ext_irec_t *erp; /* indirection array pointer */
4413 int i; /* loop counter */
4414 int nlists; /* number of irec's (ex lists) */
4415
4416 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
4417 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
4418
4419 /* Resize indirection array */
4420 xfs_iext_realloc_indirect(ifp, ++nlists *
4421 sizeof(xfs_ext_irec_t));
4422 /*
4423 * Move records down in the array so the
4424 * new page can use erp_idx.
4425 */
4426 erp = ifp->if_u1.if_ext_irec;
4427 for (i = nlists - 1; i > erp_idx; i--) {
4428 memmove(&erp[i], &erp[i-1], sizeof(xfs_ext_irec_t));
4429 }
4430 ASSERT(i == erp_idx);
4431
4432 /* Initialize new extent record */
4433 erp = ifp->if_u1.if_ext_irec;
4434 erp[erp_idx].er_extbuf = (xfs_bmbt_rec_t *)
4435 kmem_alloc(XFS_IEXT_BUFSZ, KM_SLEEP);
4436 ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
4437 memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ);
4438 erp[erp_idx].er_extcount = 0;
4439 erp[erp_idx].er_extoff = erp_idx > 0 ?
4440 erp[erp_idx-1].er_extoff + erp[erp_idx-1].er_extcount : 0;
4441 return (&erp[erp_idx]);
4442}
4443
4444/*
4445 * Remove a record from the indirection array.
4446 */
4447void
4448xfs_iext_irec_remove(
4449 xfs_ifork_t *ifp, /* inode fork pointer */
4450 int erp_idx) /* irec index to remove */
4451{
4452 xfs_ext_irec_t *erp; /* indirection array pointer */
4453 int i; /* loop counter */
4454 int nlists; /* number of irec's (ex lists) */
4455
4456 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
4457 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
4458 erp = &ifp->if_u1.if_ext_irec[erp_idx];
4459 if (erp->er_extbuf) {
4460 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1,
4461 -erp->er_extcount);
4462 kmem_free(erp->er_extbuf, XFS_IEXT_BUFSZ);
4463 }
4464 /* Compact extent records */
4465 erp = ifp->if_u1.if_ext_irec;
4466 for (i = erp_idx; i < nlists - 1; i++) {
4467 memmove(&erp[i], &erp[i+1], sizeof(xfs_ext_irec_t));
4468 }
4469 /*
4470 * Manually free the last extent record from the indirection
4471 * array. A call to xfs_iext_realloc_indirect() with a size
4472 * of zero would result in a call to xfs_iext_destroy() which
4473 * would in turn call this function again, creating a nasty
4474 * infinite loop.
4475 */
4476 if (--nlists) {
4477 xfs_iext_realloc_indirect(ifp,
4478 nlists * sizeof(xfs_ext_irec_t));
4479 } else {
4480 kmem_free(ifp->if_u1.if_ext_irec,
4481 sizeof(xfs_ext_irec_t));
4482 }
4483 ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
4484}
4485
4486/*
4487 * This is called to clean up large amounts of unused memory allocated
4488 * by the indirection array. Before compacting anything though, verify
4489 * that the indirection array is still needed and switch back to the
4490 * linear extent list (or even the inline buffer) if possible. The
4491 * compaction policy is as follows:
4492 *
4493 * Full Compaction: Extents fit into a single page (or inline buffer)
4494 * Full Compaction: Extents occupy less than 10% of allocated space
4495 * Partial Compaction: Extents occupy > 10% and < 50% of allocated space
4496 * No Compaction: Extents occupy at least 50% of allocated space
4497 */
4498void
4499xfs_iext_irec_compact(
4500 xfs_ifork_t *ifp) /* inode fork pointer */
4501{
4502 xfs_extnum_t nextents; /* number of extents in file */
4503 int nlists; /* number of irec's (ex lists) */
4504
4505 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
4506 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
4507 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
4508
4509 if (nextents == 0) {
4510 xfs_iext_destroy(ifp);
4511 } else if (nextents <= XFS_INLINE_EXTS) {
4512 xfs_iext_indirect_to_direct(ifp);
4513 xfs_iext_direct_to_inline(ifp, nextents);
4514 } else if (nextents <= XFS_LINEAR_EXTS) {
4515 xfs_iext_indirect_to_direct(ifp);
4516 } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 3) {
4517 xfs_iext_irec_compact_full(ifp);
4518 } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) {
4519 xfs_iext_irec_compact_pages(ifp);
4520 }
4521}
4522
4523/*
4524 * Combine extents from neighboring extent pages.
4525 */
4526void
4527xfs_iext_irec_compact_pages(
4528 xfs_ifork_t *ifp) /* inode fork pointer */
4529{
4530 xfs_ext_irec_t *erp, *erp_next;/* pointers to irec entries */
4531 int erp_idx = 0; /* indirection array index */
4532 int nlists; /* number of irec's (ex lists) */
4533
4534 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
4535 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
4536 while (erp_idx < nlists - 1) {
4537 erp = &ifp->if_u1.if_ext_irec[erp_idx];
4538 erp_next = erp + 1;
4539 if (erp_next->er_extcount <=
4540 (XFS_LINEAR_EXTS - erp->er_extcount)) {
4541 memmove(&erp->er_extbuf[erp->er_extcount],
4542 erp_next->er_extbuf, erp_next->er_extcount *
4543 sizeof(xfs_bmbt_rec_t));
4544 erp->er_extcount += erp_next->er_extcount;
4545 /*
4546 * Free page before removing extent record
4547 * so er_extoffs don't get modified in
4548 * xfs_iext_irec_remove.
4549 */
4550 kmem_free(erp_next->er_extbuf, XFS_IEXT_BUFSZ);
4551 erp_next->er_extbuf = NULL;
4552 xfs_iext_irec_remove(ifp, erp_idx + 1);
4553 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
4554 } else {
4555 erp_idx++;
4556 }
4557 }
4558}
4559
4560/*
4561 * Fully compact the extent records managed by the indirection array.
4562 */
4563void
4564xfs_iext_irec_compact_full(
4565 xfs_ifork_t *ifp) /* inode fork pointer */
4566{
4567 xfs_bmbt_rec_t *ep, *ep_next; /* extent record pointers */
4568 xfs_ext_irec_t *erp, *erp_next; /* extent irec pointers */
4569 int erp_idx = 0; /* extent irec index */
4570 int ext_avail; /* empty entries in ex list */
4571 int ext_diff; /* number of exts to add */
4572 int nlists; /* number of irec's (ex lists) */
4573
4574 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
4575 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
4576 erp = ifp->if_u1.if_ext_irec;
4577 ep = &erp->er_extbuf[erp->er_extcount];
4578 erp_next = erp + 1;
4579 ep_next = erp_next->er_extbuf;
4580 while (erp_idx < nlists - 1) {
4581 ext_avail = XFS_LINEAR_EXTS - erp->er_extcount;
4582 ext_diff = MIN(ext_avail, erp_next->er_extcount);
4583 memcpy(ep, ep_next, ext_diff * sizeof(xfs_bmbt_rec_t));
4584 erp->er_extcount += ext_diff;
4585 erp_next->er_extcount -= ext_diff;
4586 /* Remove next page */
4587 if (erp_next->er_extcount == 0) {
4588 /*
4589 * Free page before removing extent record
4590 * so er_extoffs don't get modified in
4591 * xfs_iext_irec_remove.
4592 */
4593 kmem_free(erp_next->er_extbuf,
4594 erp_next->er_extcount * sizeof(xfs_bmbt_rec_t));
4595 erp_next->er_extbuf = NULL;
4596 xfs_iext_irec_remove(ifp, erp_idx + 1);
4597 erp = &ifp->if_u1.if_ext_irec[erp_idx];
4598 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
4599 /* Update next page */
4600 } else {
4601 /* Move rest of page up to become next new page */
4602 memmove(erp_next->er_extbuf, ep_next,
4603 erp_next->er_extcount * sizeof(xfs_bmbt_rec_t));
4604 ep_next = erp_next->er_extbuf;
4605 memset(&ep_next[erp_next->er_extcount], 0,
4606 (XFS_LINEAR_EXTS - erp_next->er_extcount) *
4607 sizeof(xfs_bmbt_rec_t));
4608 }
4609 if (erp->er_extcount == XFS_LINEAR_EXTS) {
4610 erp_idx++;
4611 if (erp_idx < nlists)
4612 erp = &ifp->if_u1.if_ext_irec[erp_idx];
4613 else
4614 break;
4615 }
4616 ep = &erp->er_extbuf[erp->er_extcount];
4617 erp_next = erp + 1;
4618 ep_next = erp_next->er_extbuf;
4619 }
4620}
4621
4622/*
4623 * This is called to update the er_extoff field in the indirection
4624 * array when extents have been added or removed from one of the
4625 * extent lists. erp_idx contains the irec index to begin updating
4626 * at and ext_diff contains the number of extents that were added
4627 * or removed.
4628 */
4629void
4630xfs_iext_irec_update_extoffs(
4631 xfs_ifork_t *ifp, /* inode fork pointer */
4632 int erp_idx, /* irec index to update */
4633 int ext_diff) /* number of new extents */
4634{
4635 int i; /* loop counter */
4636 int nlists; /* number of irec's (ex lists */
4637
4638 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
4639 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
4640 for (i = erp_idx; i < nlists; i++) {
4641 ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff;
4642 }
4643}
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 740b73fabd2f..3c1df1d642fa 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -25,10 +25,37 @@
25#define XFS_ATTR_FORK 1 25#define XFS_ATTR_FORK 1
26 26
27/* 27/*
28 * The following xfs_ext_irec_t struct introduces a second (top) level
29 * to the in-core extent allocation scheme. These structs are allocated
30 * in a contiguous block, creating an indirection array where each entry
31 * (irec) contains a pointer to a buffer of in-core extent records which
32 * it manages. Each extent buffer is 4k in size, since 4k is the system
33 * page size on Linux i386 and systems with larger page sizes don't seem
34 * to gain much, if anything, by using their native page size as the
35 * extent buffer size. Also, using 4k extent buffers everywhere provides
36 * a consistent interface for CXFS across different platforms.
37 *
38 * There is currently no limit on the number of irec's (extent lists)
39 * allowed, so heavily fragmented files may require an indirection array
40 * which spans multiple system pages of memory. The number of extents
41 * which would require this amount of contiguous memory is very large
42 * and should not cause problems in the foreseeable future. However,
43 * if the memory needed for the contiguous array ever becomes a problem,
44 * it is possible that a third level of indirection may be required.
45 */
46typedef struct xfs_ext_irec {
47 xfs_bmbt_rec_t *er_extbuf; /* block of extent records */
48 xfs_extnum_t er_extoff; /* extent offset in file */
49 xfs_extnum_t er_extcount; /* number of extents in page/block */
50} xfs_ext_irec_t;
51
52/*
28 * File incore extent information, present for each of data & attr forks. 53 * File incore extent information, present for each of data & attr forks.
29 */ 54 */
30#define XFS_INLINE_EXTS 2 55#define XFS_IEXT_BUFSZ 4096
31#define XFS_INLINE_DATA 32 56#define XFS_LINEAR_EXTS (XFS_IEXT_BUFSZ / (uint)sizeof(xfs_bmbt_rec_t))
57#define XFS_INLINE_EXTS 2
58#define XFS_INLINE_DATA 32
32typedef struct xfs_ifork { 59typedef struct xfs_ifork {
33 int if_bytes; /* bytes in if_u1 */ 60 int if_bytes; /* bytes in if_u1 */
34 int if_real_bytes; /* bytes allocated in if_u1 */ 61 int if_real_bytes; /* bytes allocated in if_u1 */
@@ -39,6 +66,7 @@ typedef struct xfs_ifork {
39 xfs_extnum_t if_lastex; /* last if_extents used */ 66 xfs_extnum_t if_lastex; /* last if_extents used */
40 union { 67 union {
41 xfs_bmbt_rec_t *if_extents; /* linear map file exts */ 68 xfs_bmbt_rec_t *if_extents; /* linear map file exts */
69 xfs_ext_irec_t *if_ext_irec; /* irec map file exts */
42 char *if_data; /* inline file data */ 70 char *if_data; /* inline file data */
43 } if_u1; 71 } if_u1;
44 union { 72 union {
@@ -61,9 +89,10 @@ typedef struct xfs_ifork {
61/* 89/*
62 * Per-fork incore inode flags. 90 * Per-fork incore inode flags.
63 */ 91 */
64#define XFS_IFINLINE 0x0001 /* Inline data is read in */ 92#define XFS_IFINLINE 0x01 /* Inline data is read in */
65#define XFS_IFEXTENTS 0x0002 /* All extent pointers are read in */ 93#define XFS_IFEXTENTS 0x02 /* All extent pointers are read in */
66#define XFS_IFBROOT 0x0004 /* i_broot points to the bmap b-tree root */ 94#define XFS_IFBROOT 0x04 /* i_broot points to the bmap b-tree root */
95#define XFS_IFEXTIREC 0x08 /* Indirection array of extent blocks */
67 96
68/* 97/*
69 * Flags for xfs_imap() and xfs_dilocate(). 98 * Flags for xfs_imap() and xfs_dilocate().
@@ -438,13 +467,26 @@ xfs_bmbt_rec_t *xfs_iext_get_ext(xfs_ifork_t *, xfs_extnum_t);
438void xfs_iext_insert(xfs_ifork_t *, xfs_extnum_t, xfs_extnum_t, 467void xfs_iext_insert(xfs_ifork_t *, xfs_extnum_t, xfs_extnum_t,
439 xfs_bmbt_irec_t *); 468 xfs_bmbt_irec_t *);
440void xfs_iext_add(xfs_ifork_t *, xfs_extnum_t, int); 469void xfs_iext_add(xfs_ifork_t *, xfs_extnum_t, int);
470void xfs_iext_add_indirect_multi(xfs_ifork_t *, int, xfs_extnum_t, int);
441void xfs_iext_remove(xfs_ifork_t *, xfs_extnum_t, int); 471void xfs_iext_remove(xfs_ifork_t *, xfs_extnum_t, int);
442void xfs_iext_remove_inline(xfs_ifork_t *, xfs_extnum_t, int); 472void xfs_iext_remove_inline(xfs_ifork_t *, xfs_extnum_t, int);
443void xfs_iext_remove_direct(xfs_ifork_t *, xfs_extnum_t, int); 473void xfs_iext_remove_direct(xfs_ifork_t *, xfs_extnum_t, int);
474void xfs_iext_remove_indirect(xfs_ifork_t *, xfs_extnum_t, int);
444void xfs_iext_realloc_direct(xfs_ifork_t *, int); 475void xfs_iext_realloc_direct(xfs_ifork_t *, int);
476void xfs_iext_realloc_indirect(xfs_ifork_t *, int);
477void xfs_iext_indirect_to_direct(xfs_ifork_t *);
445void xfs_iext_direct_to_inline(xfs_ifork_t *, xfs_extnum_t); 478void xfs_iext_direct_to_inline(xfs_ifork_t *, xfs_extnum_t);
446void xfs_iext_inline_to_direct(xfs_ifork_t *, int); 479void xfs_iext_inline_to_direct(xfs_ifork_t *, int);
447void xfs_iext_destroy(xfs_ifork_t *); 480void xfs_iext_destroy(xfs_ifork_t *);
481xfs_ext_irec_t *xfs_iext_bno_to_irec(xfs_ifork_t *, xfs_fileoff_t, int *);
482xfs_ext_irec_t *xfs_iext_idx_to_irec(xfs_ifork_t *, xfs_extnum_t *, int *, int);
483void xfs_iext_irec_init(xfs_ifork_t *);
484xfs_ext_irec_t *xfs_iext_irec_new(xfs_ifork_t *, int);
485void xfs_iext_irec_remove(xfs_ifork_t *, int);
486void xfs_iext_irec_compact(xfs_ifork_t *);
487void xfs_iext_irec_compact_pages(xfs_ifork_t *);
488void xfs_iext_irec_compact_full(xfs_ifork_t *);
489void xfs_iext_irec_update_extoffs(xfs_ifork_t *, int, int);
448 490
449#define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount)) 491#define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount))
450 492