aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/Makefile5
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c149
-rw-r--r--fs/xfs/libxfs/xfs_alloc.h52
-rw-r--r--fs/xfs/libxfs/xfs_alloc_btree.c12
-rw-r--r--fs/xfs/libxfs/xfs_attr.c71
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c4
-rw-r--r--fs/xfs/libxfs/xfs_attr_remote.c19
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c241
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h54
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.c32
-rw-r--r--fs/xfs/libxfs/xfs_btree.c914
-rw-r--r--fs/xfs/libxfs/xfs_btree.h88
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.c6
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.h4
-rw-r--r--fs/xfs/libxfs/xfs_da_format.h1
-rw-r--r--fs/xfs/libxfs/xfs_defer.c463
-rw-r--r--fs/xfs/libxfs/xfs_defer.h97
-rw-r--r--fs/xfs/libxfs/xfs_dir2.c15
-rw-r--r--fs/xfs/libxfs/xfs_dir2.h8
-rw-r--r--fs/xfs/libxfs/xfs_format.h131
-rw-r--r--fs/xfs/libxfs/xfs_fs.h1
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c23
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.h2
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.c18
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c1
-rw-r--r--fs/xfs/libxfs/xfs_log_format.h63
-rw-r--r--fs/xfs/libxfs/xfs_rmap.c1399
-rw-r--r--fs/xfs/libxfs/xfs_rmap.h209
-rw-r--r--fs/xfs/libxfs/xfs_rmap_btree.c511
-rw-r--r--fs/xfs/libxfs/xfs_rmap_btree.h61
-rw-r--r--fs/xfs/libxfs/xfs_sb.c9
-rw-r--r--fs/xfs/libxfs/xfs_shared.h2
-rw-r--r--fs/xfs/libxfs/xfs_trans_resv.c62
-rw-r--r--fs/xfs/libxfs/xfs_trans_resv.h10
-rw-r--r--fs/xfs/libxfs/xfs_types.h4
-rw-r--r--fs/xfs/xfs_bmap_util.c139
-rw-r--r--fs/xfs/xfs_bmap_util.h4
-rw-r--r--fs/xfs/xfs_discard.c2
-rw-r--r--fs/xfs/xfs_dquot.c13
-rw-r--r--fs/xfs/xfs_error.h6
-rw-r--r--fs/xfs/xfs_extfree_item.c69
-rw-r--r--fs/xfs/xfs_extfree_item.h3
-rw-r--r--fs/xfs/xfs_filestream.c3
-rw-r--r--fs/xfs/xfs_fsops.c106
-rw-r--r--fs/xfs/xfs_inode.c99
-rw-r--r--fs/xfs/xfs_inode.h4
-rw-r--r--fs/xfs/xfs_ioctl.c6
-rw-r--r--fs/xfs/xfs_iomap.c31
-rw-r--r--fs/xfs/xfs_log_recover.c336
-rw-r--r--fs/xfs/xfs_mount.c7
-rw-r--r--fs/xfs/xfs_mount.h6
-rw-r--r--fs/xfs/xfs_ondisk.h3
-rw-r--r--fs/xfs/xfs_rmap_item.c536
-rw-r--r--fs/xfs/xfs_rmap_item.h95
-rw-r--r--fs/xfs/xfs_rtalloc.c11
-rw-r--r--fs/xfs/xfs_stats.c1
-rw-r--r--fs/xfs/xfs_stats.h18
-rw-r--r--fs/xfs/xfs_super.c30
-rw-r--r--fs/xfs/xfs_symlink.c25
-rw-r--r--fs/xfs/xfs_trace.c2
-rw-r--r--fs/xfs/xfs_trace.h374
-rw-r--r--fs/xfs/xfs_trans.h26
-rw-r--r--fs/xfs/xfs_trans_extfree.c215
-rw-r--r--fs/xfs/xfs_trans_rmap.c271
64 files changed, 6267 insertions, 915 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 52c288514be1..fc593c869493 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -39,6 +39,7 @@ xfs-y += $(addprefix libxfs/, \
39 xfs_btree.o \ 39 xfs_btree.o \
40 xfs_da_btree.o \ 40 xfs_da_btree.o \
41 xfs_da_format.o \ 41 xfs_da_format.o \
42 xfs_defer.o \
42 xfs_dir2.o \ 43 xfs_dir2.o \
43 xfs_dir2_block.o \ 44 xfs_dir2_block.o \
44 xfs_dir2_data.o \ 45 xfs_dir2_data.o \
@@ -51,6 +52,8 @@ xfs-y += $(addprefix libxfs/, \
51 xfs_inode_fork.o \ 52 xfs_inode_fork.o \
52 xfs_inode_buf.o \ 53 xfs_inode_buf.o \
53 xfs_log_rlimit.o \ 54 xfs_log_rlimit.o \
55 xfs_rmap.o \
56 xfs_rmap_btree.o \
54 xfs_sb.o \ 57 xfs_sb.o \
55 xfs_symlink_remote.o \ 58 xfs_symlink_remote.o \
56 xfs_trans_resv.o \ 59 xfs_trans_resv.o \
@@ -100,11 +103,13 @@ xfs-y += xfs_log.o \
100 xfs_extfree_item.o \ 103 xfs_extfree_item.o \
101 xfs_icreate_item.o \ 104 xfs_icreate_item.o \
102 xfs_inode_item.o \ 105 xfs_inode_item.o \
106 xfs_rmap_item.o \
103 xfs_log_recover.o \ 107 xfs_log_recover.o \
104 xfs_trans_ail.o \ 108 xfs_trans_ail.o \
105 xfs_trans_buf.o \ 109 xfs_trans_buf.o \
106 xfs_trans_extfree.o \ 110 xfs_trans_extfree.o \
107 xfs_trans_inode.o \ 111 xfs_trans_inode.o \
112 xfs_trans_rmap.o \
108 113
109# optional features 114# optional features
110xfs-$(CONFIG_XFS_QUOTA) += xfs_dquot.o \ 115xfs-$(CONFIG_XFS_QUOTA) += xfs_dquot.o \
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 88c26b827a2d..776ae2f325d1 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -24,8 +24,10 @@
24#include "xfs_bit.h" 24#include "xfs_bit.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_mount.h" 26#include "xfs_mount.h"
27#include "xfs_defer.h"
27#include "xfs_inode.h" 28#include "xfs_inode.h"
28#include "xfs_btree.h" 29#include "xfs_btree.h"
30#include "xfs_rmap.h"
29#include "xfs_alloc_btree.h" 31#include "xfs_alloc_btree.h"
30#include "xfs_alloc.h" 32#include "xfs_alloc.h"
31#include "xfs_extent_busy.h" 33#include "xfs_extent_busy.h"
@@ -49,6 +51,81 @@ STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
49STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, 51STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
50 xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); 52 xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
51 53
54xfs_extlen_t
55xfs_prealloc_blocks(
56 struct xfs_mount *mp)
57{
58 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
59 return XFS_RMAP_BLOCK(mp) + 1;
60 if (xfs_sb_version_hasfinobt(&mp->m_sb))
61 return XFS_FIBT_BLOCK(mp) + 1;
62 return XFS_IBT_BLOCK(mp) + 1;
63}
64
65/*
66 * In order to avoid ENOSPC-related deadlock caused by out-of-order locking of
67 * AGF buffer (PV 947395), we place constraints on the relationship among
68 * actual allocations for data blocks, freelist blocks, and potential file data
69 * bmap btree blocks. However, these restrictions may result in no actual space
70 * allocated for a delayed extent, for example, a data block in a certain AG is
71 * allocated but there is no additional block for the additional bmap btree
72 * block due to a split of the bmap btree of the file. The result of this may
73 * lead to an infinite loop when the file gets flushed to disk and all delayed
74 * extents need to be actually allocated. To get around this, we explicitly set
75 * aside a few blocks which will not be reserved in delayed allocation.
76 *
77 * When rmap is disabled, we need to reserve 4 fsbs _per AG_ for the freelist
78 * and 4 more to handle a potential split of the file's bmap btree.
79 *
80 * When rmap is enabled, we must also be able to handle two rmap btree inserts
81 * to record both the file data extent and a new bmbt block. The bmbt block
82 * might not be in the same AG as the file data extent. In the worst case
83 * the bmap btree splits multiple levels and all the new blocks come from
84 * different AGs, so set aside enough to handle rmap btree splits in all AGs.
85 */
86unsigned int
87xfs_alloc_set_aside(
88 struct xfs_mount *mp)
89{
90 unsigned int blocks;
91
92 blocks = 4 + (mp->m_sb.sb_agcount * XFS_ALLOC_AGFL_RESERVE);
93 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
94 blocks += mp->m_sb.sb_agcount * mp->m_rmap_maxlevels;
95 return blocks;
96}
97
98/*
99 * When deciding how much space to allocate out of an AG, we limit the
100 * allocation maximum size to the size the AG. However, we cannot use all the
101 * blocks in the AG - some are permanently used by metadata. These
102 * blocks are generally:
103 * - the AG superblock, AGF, AGI and AGFL
104 * - the AGF (bno and cnt) and AGI btree root blocks, and optionally
105 * the AGI free inode and rmap btree root blocks.
106 * - blocks on the AGFL according to xfs_alloc_set_aside() limits
107 * - the rmapbt root block
108 *
109 * The AG headers are sector sized, so the amount of space they take up is
110 * dependent on filesystem geometry. The others are all single blocks.
111 */
112unsigned int
113xfs_alloc_ag_max_usable(
114 struct xfs_mount *mp)
115{
116 unsigned int blocks;
117
118 blocks = XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)); /* ag headers */
119 blocks += XFS_ALLOC_AGFL_RESERVE;
120 blocks += 3; /* AGF, AGI btree root blocks */
121 if (xfs_sb_version_hasfinobt(&mp->m_sb))
122 blocks++; /* finobt root block */
123 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
124 blocks++; /* rmap root block */
125
126 return mp->m_sb.sb_agblocks - blocks;
127}
128
52/* 129/*
53 * Lookup the record equal to [bno, len] in the btree given by cur. 130 * Lookup the record equal to [bno, len] in the btree given by cur.
54 */ 131 */
@@ -636,6 +713,14 @@ xfs_alloc_ag_vextent(
636 ASSERT(!args->wasfromfl || !args->isfl); 713 ASSERT(!args->wasfromfl || !args->isfl);
637 ASSERT(args->agbno % args->alignment == 0); 714 ASSERT(args->agbno % args->alignment == 0);
638 715
716 /* if not file data, insert new block into the reverse map btree */
717 if (args->oinfo.oi_owner != XFS_RMAP_OWN_UNKNOWN) {
718 error = xfs_rmap_alloc(args->tp, args->agbp, args->agno,
719 args->agbno, args->len, &args->oinfo);
720 if (error)
721 return error;
722 }
723
639 if (!args->wasfromfl) { 724 if (!args->wasfromfl) {
640 error = xfs_alloc_update_counters(args->tp, args->pag, 725 error = xfs_alloc_update_counters(args->tp, args->pag,
641 args->agbp, 726 args->agbp,
@@ -1577,14 +1662,15 @@ error0:
1577/* 1662/*
1578 * Free the extent starting at agno/bno for length. 1663 * Free the extent starting at agno/bno for length.
1579 */ 1664 */
1580STATIC int /* error */ 1665STATIC int
1581xfs_free_ag_extent( 1666xfs_free_ag_extent(
1582 xfs_trans_t *tp, /* transaction pointer */ 1667 xfs_trans_t *tp,
1583 xfs_buf_t *agbp, /* buffer for a.g. freelist header */ 1668 xfs_buf_t *agbp,
1584 xfs_agnumber_t agno, /* allocation group number */ 1669 xfs_agnumber_t agno,
1585 xfs_agblock_t bno, /* starting block number */ 1670 xfs_agblock_t bno,
1586 xfs_extlen_t len, /* length of extent */ 1671 xfs_extlen_t len,
1587 int isfl) /* set if is freelist blocks - no sb acctg */ 1672 struct xfs_owner_info *oinfo,
1673 int isfl)
1588{ 1674{
1589 xfs_btree_cur_t *bno_cur; /* cursor for by-block btree */ 1675 xfs_btree_cur_t *bno_cur; /* cursor for by-block btree */
1590 xfs_btree_cur_t *cnt_cur; /* cursor for by-size btree */ 1676 xfs_btree_cur_t *cnt_cur; /* cursor for by-size btree */
@@ -1601,12 +1687,19 @@ xfs_free_ag_extent(
1601 xfs_extlen_t nlen; /* new length of freespace */ 1687 xfs_extlen_t nlen; /* new length of freespace */
1602 xfs_perag_t *pag; /* per allocation group data */ 1688 xfs_perag_t *pag; /* per allocation group data */
1603 1689
1690 bno_cur = cnt_cur = NULL;
1604 mp = tp->t_mountp; 1691 mp = tp->t_mountp;
1692
1693 if (oinfo->oi_owner != XFS_RMAP_OWN_UNKNOWN) {
1694 error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo);
1695 if (error)
1696 goto error0;
1697 }
1698
1605 /* 1699 /*
1606 * Allocate and initialize a cursor for the by-block btree. 1700 * Allocate and initialize a cursor for the by-block btree.
1607 */ 1701 */
1608 bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO); 1702 bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO);
1609 cnt_cur = NULL;
1610 /* 1703 /*
1611 * Look for a neighboring block on the left (lower block numbers) 1704 * Look for a neighboring block on the left (lower block numbers)
1612 * that is contiguous with this space. 1705 * that is contiguous with this space.
@@ -1875,6 +1968,11 @@ xfs_alloc_min_freelist(
1875 /* space needed by-size freespace btree */ 1968 /* space needed by-size freespace btree */
1876 min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_CNTi] + 1, 1969 min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_CNTi] + 1,
1877 mp->m_ag_maxlevels); 1970 mp->m_ag_maxlevels);
1971 /* space needed reverse mapping used space btree */
1972 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
1973 min_free += min_t(unsigned int,
1974 pag->pagf_levels[XFS_BTNUM_RMAPi] + 1,
1975 mp->m_rmap_maxlevels);
1878 1976
1879 return min_free; 1977 return min_free;
1880} 1978}
@@ -1992,21 +2090,34 @@ xfs_alloc_fix_freelist(
1992 * anything other than extra overhead when we need to put more blocks 2090 * anything other than extra overhead when we need to put more blocks
1993 * back on the free list? Maybe we should only do this when space is 2091 * back on the free list? Maybe we should only do this when space is
1994 * getting low or the AGFL is more than half full? 2092 * getting low or the AGFL is more than half full?
2093 *
2094 * The NOSHRINK flag prevents the AGFL from being shrunk if it's too
2095 * big; the NORMAP flag prevents AGFL expand/shrink operations from
2096 * updating the rmapbt. Both flags are used in xfs_repair while we're
2097 * rebuilding the rmapbt, and neither are used by the kernel. They're
2098 * both required to ensure that rmaps are correctly recorded for the
2099 * regenerated AGFL, bnobt, and cntbt. See repair/phase5.c and
2100 * repair/rmap.c in xfsprogs for details.
1995 */ 2101 */
1996 while (pag->pagf_flcount > need) { 2102 memset(&targs, 0, sizeof(targs));
2103 if (flags & XFS_ALLOC_FLAG_NORMAP)
2104 xfs_rmap_skip_owner_update(&targs.oinfo);
2105 else
2106 xfs_rmap_ag_owner(&targs.oinfo, XFS_RMAP_OWN_AG);
2107 while (!(flags & XFS_ALLOC_FLAG_NOSHRINK) && pag->pagf_flcount > need) {
1997 struct xfs_buf *bp; 2108 struct xfs_buf *bp;
1998 2109
1999 error = xfs_alloc_get_freelist(tp, agbp, &bno, 0); 2110 error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
2000 if (error) 2111 if (error)
2001 goto out_agbp_relse; 2112 goto out_agbp_relse;
2002 error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1); 2113 error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1,
2114 &targs.oinfo, 1);
2003 if (error) 2115 if (error)
2004 goto out_agbp_relse; 2116 goto out_agbp_relse;
2005 bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0); 2117 bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
2006 xfs_trans_binval(tp, bp); 2118 xfs_trans_binval(tp, bp);
2007 } 2119 }
2008 2120
2009 memset(&targs, 0, sizeof(targs));
2010 targs.tp = tp; 2121 targs.tp = tp;
2011 targs.mp = mp; 2122 targs.mp = mp;
2012 targs.agbp = agbp; 2123 targs.agbp = agbp;
@@ -2271,6 +2382,10 @@ xfs_agf_verify(
2271 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS) 2382 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS)
2272 return false; 2383 return false;
2273 2384
2385 if (xfs_sb_version_hasrmapbt(&mp->m_sb) &&
2386 be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS)
2387 return false;
2388
2274 /* 2389 /*
2275 * during growfs operations, the perag is not fully initialised, 2390 * during growfs operations, the perag is not fully initialised,
2276 * so we can't use it for any useful checking. growfs ensures we can't 2391 * so we can't use it for any useful checking. growfs ensures we can't
@@ -2402,6 +2517,8 @@ xfs_alloc_read_agf(
2402 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]); 2517 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
2403 pag->pagf_levels[XFS_BTNUM_CNTi] = 2518 pag->pagf_levels[XFS_BTNUM_CNTi] =
2404 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]); 2519 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
2520 pag->pagf_levels[XFS_BTNUM_RMAPi] =
2521 be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
2405 spin_lock_init(&pag->pagb_lock); 2522 spin_lock_init(&pag->pagb_lock);
2406 pag->pagb_count = 0; 2523 pag->pagb_count = 0;
2407 pag->pagb_tree = RB_ROOT; 2524 pag->pagb_tree = RB_ROOT;
@@ -2691,7 +2808,8 @@ int /* error */
2691xfs_free_extent( 2808xfs_free_extent(
2692 struct xfs_trans *tp, /* transaction pointer */ 2809 struct xfs_trans *tp, /* transaction pointer */
2693 xfs_fsblock_t bno, /* starting block number of extent */ 2810 xfs_fsblock_t bno, /* starting block number of extent */
2694 xfs_extlen_t len) /* length of extent */ 2811 xfs_extlen_t len, /* length of extent */
2812 struct xfs_owner_info *oinfo) /* extent owner */
2695{ 2813{
2696 struct xfs_mount *mp = tp->t_mountp; 2814 struct xfs_mount *mp = tp->t_mountp;
2697 struct xfs_buf *agbp; 2815 struct xfs_buf *agbp;
@@ -2701,6 +2819,11 @@ xfs_free_extent(
2701 2819
2702 ASSERT(len != 0); 2820 ASSERT(len != 0);
2703 2821
2822 if (XFS_TEST_ERROR(false, mp,
2823 XFS_ERRTAG_FREE_EXTENT,
2824 XFS_RANDOM_FREE_EXTENT))
2825 return -EIO;
2826
2704 error = xfs_free_extent_fix_freelist(tp, agno, &agbp); 2827 error = xfs_free_extent_fix_freelist(tp, agno, &agbp);
2705 if (error) 2828 if (error)
2706 return error; 2829 return error;
@@ -2712,7 +2835,7 @@ xfs_free_extent(
2712 agbno + len <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_length), 2835 agbno + len <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_length),
2713 err); 2836 err);
2714 2837
2715 error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, 0); 2838 error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, oinfo, 0);
2716 if (error) 2839 if (error)
2717 goto err; 2840 goto err;
2718 2841
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index cf268b2d0b6c..6fe2d6b7cfe9 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -54,41 +54,8 @@ typedef unsigned int xfs_alloctype_t;
54 */ 54 */
55#define XFS_ALLOC_FLAG_TRYLOCK 0x00000001 /* use trylock for buffer locking */ 55#define XFS_ALLOC_FLAG_TRYLOCK 0x00000001 /* use trylock for buffer locking */
56#define XFS_ALLOC_FLAG_FREEING 0x00000002 /* indicate caller is freeing extents*/ 56#define XFS_ALLOC_FLAG_FREEING 0x00000002 /* indicate caller is freeing extents*/
57 57#define XFS_ALLOC_FLAG_NORMAP 0x00000004 /* don't modify the rmapbt */
58/* 58#define XFS_ALLOC_FLAG_NOSHRINK 0x00000008 /* don't shrink the freelist */
59 * In order to avoid ENOSPC-related deadlock caused by
60 * out-of-order locking of AGF buffer (PV 947395), we place
61 * constraints on the relationship among actual allocations for
62 * data blocks, freelist blocks, and potential file data bmap
63 * btree blocks. However, these restrictions may result in no
64 * actual space allocated for a delayed extent, for example, a data
65 * block in a certain AG is allocated but there is no additional
66 * block for the additional bmap btree block due to a split of the
67 * bmap btree of the file. The result of this may lead to an
68 * infinite loop in xfssyncd when the file gets flushed to disk and
69 * all delayed extents need to be actually allocated. To get around
70 * this, we explicitly set aside a few blocks which will not be
71 * reserved in delayed allocation. Considering the minimum number of
72 * needed freelist blocks is 4 fsbs _per AG_, a potential split of file's bmap
73 * btree requires 1 fsb, so we set the number of set-aside blocks
74 * to 4 + 4*agcount.
75 */
76#define XFS_ALLOC_SET_ASIDE(mp) (4 + ((mp)->m_sb.sb_agcount * 4))
77
78/*
79 * When deciding how much space to allocate out of an AG, we limit the
80 * allocation maximum size to the size the AG. However, we cannot use all the
81 * blocks in the AG - some are permanently used by metadata. These
82 * blocks are generally:
83 * - the AG superblock, AGF, AGI and AGFL
84 * - the AGF (bno and cnt) and AGI btree root blocks
85 * - 4 blocks on the AGFL according to XFS_ALLOC_SET_ASIDE() limits
86 *
87 * The AG headers are sector sized, so the amount of space they take up is
88 * dependent on filesystem geometry. The others are all single blocks.
89 */
90#define XFS_ALLOC_AG_MAX_USABLE(mp) \
91 ((mp)->m_sb.sb_agblocks - XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)) - 7)
92 59
93 60
94/* 61/*
@@ -123,6 +90,7 @@ typedef struct xfs_alloc_arg {
123 char isfl; /* set if is freelist blocks - !acctg */ 90 char isfl; /* set if is freelist blocks - !acctg */
124 char userdata; /* mask defining userdata treatment */ 91 char userdata; /* mask defining userdata treatment */
125 xfs_fsblock_t firstblock; /* io first block allocated */ 92 xfs_fsblock_t firstblock; /* io first block allocated */
93 struct xfs_owner_info oinfo; /* owner of blocks being allocated */
126} xfs_alloc_arg_t; 94} xfs_alloc_arg_t;
127 95
128/* 96/*
@@ -132,6 +100,11 @@ typedef struct xfs_alloc_arg {
132#define XFS_ALLOC_INITIAL_USER_DATA (1 << 1)/* special case start of file */ 100#define XFS_ALLOC_INITIAL_USER_DATA (1 << 1)/* special case start of file */
133#define XFS_ALLOC_USERDATA_ZERO (1 << 2)/* zero extent on allocation */ 101#define XFS_ALLOC_USERDATA_ZERO (1 << 2)/* zero extent on allocation */
134 102
103/* freespace limit calculations */
104#define XFS_ALLOC_AGFL_RESERVE 4
105unsigned int xfs_alloc_set_aside(struct xfs_mount *mp);
106unsigned int xfs_alloc_ag_max_usable(struct xfs_mount *mp);
107
135xfs_extlen_t xfs_alloc_longest_free_extent(struct xfs_mount *mp, 108xfs_extlen_t xfs_alloc_longest_free_extent(struct xfs_mount *mp,
136 struct xfs_perag *pag, xfs_extlen_t need); 109 struct xfs_perag *pag, xfs_extlen_t need);
137unsigned int xfs_alloc_min_freelist(struct xfs_mount *mp, 110unsigned int xfs_alloc_min_freelist(struct xfs_mount *mp,
@@ -208,9 +181,10 @@ xfs_alloc_vextent(
208 */ 181 */
209int /* error */ 182int /* error */
210xfs_free_extent( 183xfs_free_extent(
211 struct xfs_trans *tp, /* transaction pointer */ 184 struct xfs_trans *tp, /* transaction pointer */
212 xfs_fsblock_t bno, /* starting block number of extent */ 185 xfs_fsblock_t bno, /* starting block number of extent */
213 xfs_extlen_t len); /* length of extent */ 186 xfs_extlen_t len, /* length of extent */
187 struct xfs_owner_info *oinfo);/* extent owner */
214 188
215int /* error */ 189int /* error */
216xfs_alloc_lookup_ge( 190xfs_alloc_lookup_ge(
@@ -232,4 +206,6 @@ int xfs_alloc_fix_freelist(struct xfs_alloc_arg *args, int flags);
232int xfs_free_extent_fix_freelist(struct xfs_trans *tp, xfs_agnumber_t agno, 206int xfs_free_extent_fix_freelist(struct xfs_trans *tp, xfs_agnumber_t agno,
233 struct xfs_buf **agbp); 207 struct xfs_buf **agbp);
234 208
209xfs_extlen_t xfs_prealloc_blocks(struct xfs_mount *mp);
210
235#endif /* __XFS_ALLOC_H__ */ 211#endif /* __XFS_ALLOC_H__ */
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c
index d9b42425291e..5ba2dac5e67c 100644
--- a/fs/xfs/libxfs/xfs_alloc_btree.c
+++ b/fs/xfs/libxfs/xfs_alloc_btree.c
@@ -212,17 +212,6 @@ xfs_allocbt_init_key_from_rec(
212} 212}
213 213
214STATIC void 214STATIC void
215xfs_allocbt_init_rec_from_key(
216 union xfs_btree_key *key,
217 union xfs_btree_rec *rec)
218{
219 ASSERT(key->alloc.ar_startblock != 0);
220
221 rec->alloc.ar_startblock = key->alloc.ar_startblock;
222 rec->alloc.ar_blockcount = key->alloc.ar_blockcount;
223}
224
225STATIC void
226xfs_allocbt_init_rec_from_cur( 215xfs_allocbt_init_rec_from_cur(
227 struct xfs_btree_cur *cur, 216 struct xfs_btree_cur *cur,
228 union xfs_btree_rec *rec) 217 union xfs_btree_rec *rec)
@@ -406,7 +395,6 @@ static const struct xfs_btree_ops xfs_allocbt_ops = {
406 .get_minrecs = xfs_allocbt_get_minrecs, 395 .get_minrecs = xfs_allocbt_get_minrecs,
407 .get_maxrecs = xfs_allocbt_get_maxrecs, 396 .get_maxrecs = xfs_allocbt_get_maxrecs,
408 .init_key_from_rec = xfs_allocbt_init_key_from_rec, 397 .init_key_from_rec = xfs_allocbt_init_key_from_rec,
409 .init_rec_from_key = xfs_allocbt_init_rec_from_key,
410 .init_rec_from_cur = xfs_allocbt_init_rec_from_cur, 398 .init_rec_from_cur = xfs_allocbt_init_rec_from_cur,
411 .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur, 399 .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur,
412 .key_diff = xfs_allocbt_key_diff, 400 .key_diff = xfs_allocbt_key_diff,
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index 4e126f41a0aa..af1ecb19121e 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -23,6 +23,7 @@
23#include "xfs_trans_resv.h" 23#include "xfs_trans_resv.h"
24#include "xfs_bit.h" 24#include "xfs_bit.h"
25#include "xfs_mount.h" 25#include "xfs_mount.h"
26#include "xfs_defer.h"
26#include "xfs_da_format.h" 27#include "xfs_da_format.h"
27#include "xfs_da_btree.h" 28#include "xfs_da_btree.h"
28#include "xfs_attr_sf.h" 29#include "xfs_attr_sf.h"
@@ -203,7 +204,7 @@ xfs_attr_set(
203{ 204{
204 struct xfs_mount *mp = dp->i_mount; 205 struct xfs_mount *mp = dp->i_mount;
205 struct xfs_da_args args; 206 struct xfs_da_args args;
206 struct xfs_bmap_free flist; 207 struct xfs_defer_ops dfops;
207 struct xfs_trans_res tres; 208 struct xfs_trans_res tres;
208 xfs_fsblock_t firstblock; 209 xfs_fsblock_t firstblock;
209 int rsvd = (flags & ATTR_ROOT) != 0; 210 int rsvd = (flags & ATTR_ROOT) != 0;
@@ -221,7 +222,7 @@ xfs_attr_set(
221 args.value = value; 222 args.value = value;
222 args.valuelen = valuelen; 223 args.valuelen = valuelen;
223 args.firstblock = &firstblock; 224 args.firstblock = &firstblock;
224 args.flist = &flist; 225 args.dfops = &dfops;
225 args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT; 226 args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
226 args.total = xfs_attr_calc_size(&args, &local); 227 args.total = xfs_attr_calc_size(&args, &local);
227 228
@@ -316,13 +317,13 @@ xfs_attr_set(
316 * It won't fit in the shortform, transform to a leaf block. 317 * It won't fit in the shortform, transform to a leaf block.
317 * GROT: another possible req'mt for a double-split btree op. 318 * GROT: another possible req'mt for a double-split btree op.
318 */ 319 */
319 xfs_bmap_init(args.flist, args.firstblock); 320 xfs_defer_init(args.dfops, args.firstblock);
320 error = xfs_attr_shortform_to_leaf(&args); 321 error = xfs_attr_shortform_to_leaf(&args);
321 if (!error) 322 if (!error)
322 error = xfs_bmap_finish(&args.trans, args.flist, dp); 323 error = xfs_defer_finish(&args.trans, args.dfops, dp);
323 if (error) { 324 if (error) {
324 args.trans = NULL; 325 args.trans = NULL;
325 xfs_bmap_cancel(&flist); 326 xfs_defer_cancel(&dfops);
326 goto out; 327 goto out;
327 } 328 }
328 329
@@ -382,7 +383,7 @@ xfs_attr_remove(
382{ 383{
383 struct xfs_mount *mp = dp->i_mount; 384 struct xfs_mount *mp = dp->i_mount;
384 struct xfs_da_args args; 385 struct xfs_da_args args;
385 struct xfs_bmap_free flist; 386 struct xfs_defer_ops dfops;
386 xfs_fsblock_t firstblock; 387 xfs_fsblock_t firstblock;
387 int error; 388 int error;
388 389
@@ -399,7 +400,7 @@ xfs_attr_remove(
399 return error; 400 return error;
400 401
401 args.firstblock = &firstblock; 402 args.firstblock = &firstblock;
402 args.flist = &flist; 403 args.dfops = &dfops;
403 404
404 /* 405 /*
405 * we have no control over the attribute names that userspace passes us 406 * we have no control over the attribute names that userspace passes us
@@ -584,13 +585,13 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
584 * Commit that transaction so that the node_addname() call 585 * Commit that transaction so that the node_addname() call
585 * can manage its own transactions. 586 * can manage its own transactions.
586 */ 587 */
587 xfs_bmap_init(args->flist, args->firstblock); 588 xfs_defer_init(args->dfops, args->firstblock);
588 error = xfs_attr3_leaf_to_node(args); 589 error = xfs_attr3_leaf_to_node(args);
589 if (!error) 590 if (!error)
590 error = xfs_bmap_finish(&args->trans, args->flist, dp); 591 error = xfs_defer_finish(&args->trans, args->dfops, dp);
591 if (error) { 592 if (error) {
592 args->trans = NULL; 593 args->trans = NULL;
593 xfs_bmap_cancel(args->flist); 594 xfs_defer_cancel(args->dfops);
594 return error; 595 return error;
595 } 596 }
596 597
@@ -674,15 +675,15 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
674 * If the result is small enough, shrink it all into the inode. 675 * If the result is small enough, shrink it all into the inode.
675 */ 676 */
676 if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) { 677 if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
677 xfs_bmap_init(args->flist, args->firstblock); 678 xfs_defer_init(args->dfops, args->firstblock);
678 error = xfs_attr3_leaf_to_shortform(bp, args, forkoff); 679 error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
679 /* bp is gone due to xfs_da_shrink_inode */ 680 /* bp is gone due to xfs_da_shrink_inode */
680 if (!error) 681 if (!error)
681 error = xfs_bmap_finish(&args->trans, 682 error = xfs_defer_finish(&args->trans,
682 args->flist, dp); 683 args->dfops, dp);
683 if (error) { 684 if (error) {
684 args->trans = NULL; 685 args->trans = NULL;
685 xfs_bmap_cancel(args->flist); 686 xfs_defer_cancel(args->dfops);
686 return error; 687 return error;
687 } 688 }
688 } 689 }
@@ -737,14 +738,14 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
737 * If the result is small enough, shrink it all into the inode. 738 * If the result is small enough, shrink it all into the inode.
738 */ 739 */
739 if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) { 740 if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
740 xfs_bmap_init(args->flist, args->firstblock); 741 xfs_defer_init(args->dfops, args->firstblock);
741 error = xfs_attr3_leaf_to_shortform(bp, args, forkoff); 742 error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
742 /* bp is gone due to xfs_da_shrink_inode */ 743 /* bp is gone due to xfs_da_shrink_inode */
743 if (!error) 744 if (!error)
744 error = xfs_bmap_finish(&args->trans, args->flist, dp); 745 error = xfs_defer_finish(&args->trans, args->dfops, dp);
745 if (error) { 746 if (error) {
746 args->trans = NULL; 747 args->trans = NULL;
747 xfs_bmap_cancel(args->flist); 748 xfs_defer_cancel(args->dfops);
748 return error; 749 return error;
749 } 750 }
750 } 751 }
@@ -863,14 +864,14 @@ restart:
863 */ 864 */
864 xfs_da_state_free(state); 865 xfs_da_state_free(state);
865 state = NULL; 866 state = NULL;
866 xfs_bmap_init(args->flist, args->firstblock); 867 xfs_defer_init(args->dfops, args->firstblock);
867 error = xfs_attr3_leaf_to_node(args); 868 error = xfs_attr3_leaf_to_node(args);
868 if (!error) 869 if (!error)
869 error = xfs_bmap_finish(&args->trans, 870 error = xfs_defer_finish(&args->trans,
870 args->flist, dp); 871 args->dfops, dp);
871 if (error) { 872 if (error) {
872 args->trans = NULL; 873 args->trans = NULL;
873 xfs_bmap_cancel(args->flist); 874 xfs_defer_cancel(args->dfops);
874 goto out; 875 goto out;
875 } 876 }
876 877
@@ -891,13 +892,13 @@ restart:
891 * in the index/blkno/rmtblkno/rmtblkcnt fields and 892 * in the index/blkno/rmtblkno/rmtblkcnt fields and
892 * in the index2/blkno2/rmtblkno2/rmtblkcnt2 fields. 893 * in the index2/blkno2/rmtblkno2/rmtblkcnt2 fields.
893 */ 894 */
894 xfs_bmap_init(args->flist, args->firstblock); 895 xfs_defer_init(args->dfops, args->firstblock);
895 error = xfs_da3_split(state); 896 error = xfs_da3_split(state);
896 if (!error) 897 if (!error)
897 error = xfs_bmap_finish(&args->trans, args->flist, dp); 898 error = xfs_defer_finish(&args->trans, args->dfops, dp);
898 if (error) { 899 if (error) {
899 args->trans = NULL; 900 args->trans = NULL;
900 xfs_bmap_cancel(args->flist); 901 xfs_defer_cancel(args->dfops);
901 goto out; 902 goto out;
902 } 903 }
903 } else { 904 } else {
@@ -990,14 +991,14 @@ restart:
990 * Check to see if the tree needs to be collapsed. 991 * Check to see if the tree needs to be collapsed.
991 */ 992 */
992 if (retval && (state->path.active > 1)) { 993 if (retval && (state->path.active > 1)) {
993 xfs_bmap_init(args->flist, args->firstblock); 994 xfs_defer_init(args->dfops, args->firstblock);
994 error = xfs_da3_join(state); 995 error = xfs_da3_join(state);
995 if (!error) 996 if (!error)
996 error = xfs_bmap_finish(&args->trans, 997 error = xfs_defer_finish(&args->trans,
997 args->flist, dp); 998 args->dfops, dp);
998 if (error) { 999 if (error) {
999 args->trans = NULL; 1000 args->trans = NULL;
1000 xfs_bmap_cancel(args->flist); 1001 xfs_defer_cancel(args->dfops);
1001 goto out; 1002 goto out;
1002 } 1003 }
1003 } 1004 }
@@ -1113,13 +1114,13 @@ xfs_attr_node_removename(xfs_da_args_t *args)
1113 * Check to see if the tree needs to be collapsed. 1114 * Check to see if the tree needs to be collapsed.
1114 */ 1115 */
1115 if (retval && (state->path.active > 1)) { 1116 if (retval && (state->path.active > 1)) {
1116 xfs_bmap_init(args->flist, args->firstblock); 1117 xfs_defer_init(args->dfops, args->firstblock);
1117 error = xfs_da3_join(state); 1118 error = xfs_da3_join(state);
1118 if (!error) 1119 if (!error)
1119 error = xfs_bmap_finish(&args->trans, args->flist, dp); 1120 error = xfs_defer_finish(&args->trans, args->dfops, dp);
1120 if (error) { 1121 if (error) {
1121 args->trans = NULL; 1122 args->trans = NULL;
1122 xfs_bmap_cancel(args->flist); 1123 xfs_defer_cancel(args->dfops);
1123 goto out; 1124 goto out;
1124 } 1125 }
1125 /* 1126 /*
@@ -1146,15 +1147,15 @@ xfs_attr_node_removename(xfs_da_args_t *args)
1146 goto out; 1147 goto out;
1147 1148
1148 if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) { 1149 if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
1149 xfs_bmap_init(args->flist, args->firstblock); 1150 xfs_defer_init(args->dfops, args->firstblock);
1150 error = xfs_attr3_leaf_to_shortform(bp, args, forkoff); 1151 error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
1151 /* bp is gone due to xfs_da_shrink_inode */ 1152 /* bp is gone due to xfs_da_shrink_inode */
1152 if (!error) 1153 if (!error)
1153 error = xfs_bmap_finish(&args->trans, 1154 error = xfs_defer_finish(&args->trans,
1154 args->flist, dp); 1155 args->dfops, dp);
1155 if (error) { 1156 if (error) {
1156 args->trans = NULL; 1157 args->trans = NULL;
1157 xfs_bmap_cancel(args->flist); 1158 xfs_defer_cancel(args->dfops);
1158 goto out; 1159 goto out;
1159 } 1160 }
1160 } else 1161 } else
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 01a5ecfedfcf..8ea91f363093 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -792,7 +792,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
792 nargs.dp = dp; 792 nargs.dp = dp;
793 nargs.geo = args->geo; 793 nargs.geo = args->geo;
794 nargs.firstblock = args->firstblock; 794 nargs.firstblock = args->firstblock;
795 nargs.flist = args->flist; 795 nargs.dfops = args->dfops;
796 nargs.total = args->total; 796 nargs.total = args->total;
797 nargs.whichfork = XFS_ATTR_FORK; 797 nargs.whichfork = XFS_ATTR_FORK;
798 nargs.trans = args->trans; 798 nargs.trans = args->trans;
@@ -922,7 +922,7 @@ xfs_attr3_leaf_to_shortform(
922 nargs.geo = args->geo; 922 nargs.geo = args->geo;
923 nargs.dp = dp; 923 nargs.dp = dp;
924 nargs.firstblock = args->firstblock; 924 nargs.firstblock = args->firstblock;
925 nargs.flist = args->flist; 925 nargs.dfops = args->dfops;
926 nargs.total = args->total; 926 nargs.total = args->total;
927 nargs.whichfork = XFS_ATTR_FORK; 927 nargs.whichfork = XFS_ATTR_FORK;
928 nargs.trans = args->trans; 928 nargs.trans = args->trans;
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index a572532a55cd..d52f525f5b2d 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -24,6 +24,7 @@
24#include "xfs_trans_resv.h" 24#include "xfs_trans_resv.h"
25#include "xfs_bit.h" 25#include "xfs_bit.h"
26#include "xfs_mount.h" 26#include "xfs_mount.h"
27#include "xfs_defer.h"
27#include "xfs_da_format.h" 28#include "xfs_da_format.h"
28#include "xfs_da_btree.h" 29#include "xfs_da_btree.h"
29#include "xfs_inode.h" 30#include "xfs_inode.h"
@@ -460,16 +461,16 @@ xfs_attr_rmtval_set(
460 * extent and then crash then the block may not contain the 461 * extent and then crash then the block may not contain the
461 * correct metadata after log recovery occurs. 462 * correct metadata after log recovery occurs.
462 */ 463 */
463 xfs_bmap_init(args->flist, args->firstblock); 464 xfs_defer_init(args->dfops, args->firstblock);
464 nmap = 1; 465 nmap = 1;
465 error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno, 466 error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno,
466 blkcnt, XFS_BMAPI_ATTRFORK, args->firstblock, 467 blkcnt, XFS_BMAPI_ATTRFORK, args->firstblock,
467 args->total, &map, &nmap, args->flist); 468 args->total, &map, &nmap, args->dfops);
468 if (!error) 469 if (!error)
469 error = xfs_bmap_finish(&args->trans, args->flist, dp); 470 error = xfs_defer_finish(&args->trans, args->dfops, dp);
470 if (error) { 471 if (error) {
471 args->trans = NULL; 472 args->trans = NULL;
472 xfs_bmap_cancel(args->flist); 473 xfs_defer_cancel(args->dfops);
473 return error; 474 return error;
474 } 475 }
475 476
@@ -503,7 +504,7 @@ xfs_attr_rmtval_set(
503 504
504 ASSERT(blkcnt > 0); 505 ASSERT(blkcnt > 0);
505 506
506 xfs_bmap_init(args->flist, args->firstblock); 507 xfs_defer_init(args->dfops, args->firstblock);
507 nmap = 1; 508 nmap = 1;
508 error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno, 509 error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno,
509 blkcnt, &map, &nmap, 510 blkcnt, &map, &nmap,
@@ -603,16 +604,16 @@ xfs_attr_rmtval_remove(
603 blkcnt = args->rmtblkcnt; 604 blkcnt = args->rmtblkcnt;
604 done = 0; 605 done = 0;
605 while (!done) { 606 while (!done) {
606 xfs_bmap_init(args->flist, args->firstblock); 607 xfs_defer_init(args->dfops, args->firstblock);
607 error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt, 608 error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
608 XFS_BMAPI_ATTRFORK, 1, args->firstblock, 609 XFS_BMAPI_ATTRFORK, 1, args->firstblock,
609 args->flist, &done); 610 args->dfops, &done);
610 if (!error) 611 if (!error)
611 error = xfs_bmap_finish(&args->trans, args->flist, 612 error = xfs_defer_finish(&args->trans, args->dfops,
612 args->dp); 613 args->dp);
613 if (error) { 614 if (error) {
614 args->trans = NULL; 615 args->trans = NULL;
615 xfs_bmap_cancel(args->flist); 616 xfs_defer_cancel(args->dfops);
616 return error; 617 return error;
617 } 618 }
618 619
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 2f2c85cc8117..b060bca93402 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -24,6 +24,7 @@
24#include "xfs_bit.h" 24#include "xfs_bit.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_mount.h" 26#include "xfs_mount.h"
27#include "xfs_defer.h"
27#include "xfs_da_format.h" 28#include "xfs_da_format.h"
28#include "xfs_da_btree.h" 29#include "xfs_da_btree.h"
29#include "xfs_dir2.h" 30#include "xfs_dir2.h"
@@ -45,6 +46,7 @@
45#include "xfs_symlink.h" 46#include "xfs_symlink.h"
46#include "xfs_attr_leaf.h" 47#include "xfs_attr_leaf.h"
47#include "xfs_filestream.h" 48#include "xfs_filestream.h"
49#include "xfs_rmap.h"
48 50
49 51
50kmem_zone_t *xfs_bmap_free_item_zone; 52kmem_zone_t *xfs_bmap_free_item_zone;
@@ -570,12 +572,13 @@ xfs_bmap_validate_ret(
570 */ 572 */
571void 573void
572xfs_bmap_add_free( 574xfs_bmap_add_free(
573 struct xfs_mount *mp, /* mount point structure */ 575 struct xfs_mount *mp,
574 struct xfs_bmap_free *flist, /* list of extents */ 576 struct xfs_defer_ops *dfops,
575 xfs_fsblock_t bno, /* fs block number of extent */ 577 xfs_fsblock_t bno,
576 xfs_filblks_t len) /* length of extent */ 578 xfs_filblks_t len,
579 struct xfs_owner_info *oinfo)
577{ 580{
578 struct xfs_bmap_free_item *new; /* new element */ 581 struct xfs_extent_free_item *new; /* new element */
579#ifdef DEBUG 582#ifdef DEBUG
580 xfs_agnumber_t agno; 583 xfs_agnumber_t agno;
581 xfs_agblock_t agbno; 584 xfs_agblock_t agbno;
@@ -592,44 +595,17 @@ xfs_bmap_add_free(
592 ASSERT(agbno + len <= mp->m_sb.sb_agblocks); 595 ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
593#endif 596#endif
594 ASSERT(xfs_bmap_free_item_zone != NULL); 597 ASSERT(xfs_bmap_free_item_zone != NULL);
595 new = kmem_zone_alloc(xfs_bmap_free_item_zone, KM_SLEEP);
596 new->xbfi_startblock = bno;
597 new->xbfi_blockcount = (xfs_extlen_t)len;
598 list_add(&new->xbfi_list, &flist->xbf_flist);
599 flist->xbf_count++;
600}
601
602/*
603 * Remove the entry "free" from the free item list. Prev points to the
604 * previous entry, unless "free" is the head of the list.
605 */
606void
607xfs_bmap_del_free(
608 struct xfs_bmap_free *flist, /* free item list header */
609 struct xfs_bmap_free_item *free) /* list item to be freed */
610{
611 list_del(&free->xbfi_list);
612 flist->xbf_count--;
613 kmem_zone_free(xfs_bmap_free_item_zone, free);
614}
615
616/*
617 * Free up any items left in the list.
618 */
619void
620xfs_bmap_cancel(
621 struct xfs_bmap_free *flist) /* list of bmap_free_items */
622{
623 struct xfs_bmap_free_item *free; /* free list item */
624 598
625 if (flist->xbf_count == 0) 599 new = kmem_zone_alloc(xfs_bmap_free_item_zone, KM_SLEEP);
626 return; 600 new->xefi_startblock = bno;
627 while (!list_empty(&flist->xbf_flist)) { 601 new->xefi_blockcount = (xfs_extlen_t)len;
628 free = list_first_entry(&flist->xbf_flist, 602 if (oinfo)
629 struct xfs_bmap_free_item, xbfi_list); 603 new->xefi_oinfo = *oinfo;
630 xfs_bmap_del_free(flist, free); 604 else
631 } 605 xfs_rmap_skip_owner_update(&new->xefi_oinfo);
632 ASSERT(flist->xbf_count == 0); 606 trace_xfs_bmap_free_defer(mp, XFS_FSB_TO_AGNO(mp, bno), 0,
607 XFS_FSB_TO_AGBNO(mp, bno), len);
608 xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_FREE, &new->xefi_list);
633} 609}
634 610
635/* 611/*
@@ -659,6 +635,7 @@ xfs_bmap_btree_to_extents(
659 xfs_mount_t *mp; /* mount point structure */ 635 xfs_mount_t *mp; /* mount point structure */
660 __be64 *pp; /* ptr to block address */ 636 __be64 *pp; /* ptr to block address */
661 struct xfs_btree_block *rblock;/* root btree block */ 637 struct xfs_btree_block *rblock;/* root btree block */
638 struct xfs_owner_info oinfo;
662 639
663 mp = ip->i_mount; 640 mp = ip->i_mount;
664 ifp = XFS_IFORK_PTR(ip, whichfork); 641 ifp = XFS_IFORK_PTR(ip, whichfork);
@@ -682,7 +659,8 @@ xfs_bmap_btree_to_extents(
682 cblock = XFS_BUF_TO_BLOCK(cbp); 659 cblock = XFS_BUF_TO_BLOCK(cbp);
683 if ((error = xfs_btree_check_block(cur, cblock, 0, cbp))) 660 if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
684 return error; 661 return error;
685 xfs_bmap_add_free(mp, cur->bc_private.b.flist, cbno, 1); 662 xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
663 xfs_bmap_add_free(mp, cur->bc_private.b.dfops, cbno, 1, &oinfo);
686 ip->i_d.di_nblocks--; 664 ip->i_d.di_nblocks--;
687 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); 665 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
688 xfs_trans_binval(tp, cbp); 666 xfs_trans_binval(tp, cbp);
@@ -705,7 +683,7 @@ xfs_bmap_extents_to_btree(
705 xfs_trans_t *tp, /* transaction pointer */ 683 xfs_trans_t *tp, /* transaction pointer */
706 xfs_inode_t *ip, /* incore inode pointer */ 684 xfs_inode_t *ip, /* incore inode pointer */
707 xfs_fsblock_t *firstblock, /* first-block-allocated */ 685 xfs_fsblock_t *firstblock, /* first-block-allocated */
708 xfs_bmap_free_t *flist, /* blocks freed in xaction */ 686 struct xfs_defer_ops *dfops, /* blocks freed in xaction */
709 xfs_btree_cur_t **curp, /* cursor returned to caller */ 687 xfs_btree_cur_t **curp, /* cursor returned to caller */
710 int wasdel, /* converting a delayed alloc */ 688 int wasdel, /* converting a delayed alloc */
711 int *logflagsp, /* inode logging flags */ 689 int *logflagsp, /* inode logging flags */
@@ -754,7 +732,7 @@ xfs_bmap_extents_to_btree(
754 */ 732 */
755 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); 733 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
756 cur->bc_private.b.firstblock = *firstblock; 734 cur->bc_private.b.firstblock = *firstblock;
757 cur->bc_private.b.flist = flist; 735 cur->bc_private.b.dfops = dfops;
758 cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0; 736 cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0;
759 /* 737 /*
760 * Convert to a btree with two levels, one record in root. 738 * Convert to a btree with two levels, one record in root.
@@ -763,11 +741,12 @@ xfs_bmap_extents_to_btree(
763 memset(&args, 0, sizeof(args)); 741 memset(&args, 0, sizeof(args));
764 args.tp = tp; 742 args.tp = tp;
765 args.mp = mp; 743 args.mp = mp;
744 xfs_rmap_ino_bmbt_owner(&args.oinfo, ip->i_ino, whichfork);
766 args.firstblock = *firstblock; 745 args.firstblock = *firstblock;
767 if (*firstblock == NULLFSBLOCK) { 746 if (*firstblock == NULLFSBLOCK) {
768 args.type = XFS_ALLOCTYPE_START_BNO; 747 args.type = XFS_ALLOCTYPE_START_BNO;
769 args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino); 748 args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
770 } else if (flist->xbf_low) { 749 } else if (dfops->dop_low) {
771 args.type = XFS_ALLOCTYPE_START_BNO; 750 args.type = XFS_ALLOCTYPE_START_BNO;
772 args.fsbno = *firstblock; 751 args.fsbno = *firstblock;
773 } else { 752 } else {
@@ -788,7 +767,7 @@ xfs_bmap_extents_to_btree(
788 ASSERT(args.fsbno != NULLFSBLOCK); 767 ASSERT(args.fsbno != NULLFSBLOCK);
789 ASSERT(*firstblock == NULLFSBLOCK || 768 ASSERT(*firstblock == NULLFSBLOCK ||
790 args.agno == XFS_FSB_TO_AGNO(mp, *firstblock) || 769 args.agno == XFS_FSB_TO_AGNO(mp, *firstblock) ||
791 (flist->xbf_low && 770 (dfops->dop_low &&
792 args.agno > XFS_FSB_TO_AGNO(mp, *firstblock))); 771 args.agno > XFS_FSB_TO_AGNO(mp, *firstblock)));
793 *firstblock = cur->bc_private.b.firstblock = args.fsbno; 772 *firstblock = cur->bc_private.b.firstblock = args.fsbno;
794 cur->bc_private.b.allocated++; 773 cur->bc_private.b.allocated++;
@@ -909,6 +888,7 @@ xfs_bmap_local_to_extents(
909 memset(&args, 0, sizeof(args)); 888 memset(&args, 0, sizeof(args));
910 args.tp = tp; 889 args.tp = tp;
911 args.mp = ip->i_mount; 890 args.mp = ip->i_mount;
891 xfs_rmap_ino_owner(&args.oinfo, ip->i_ino, whichfork, 0);
912 args.firstblock = *firstblock; 892 args.firstblock = *firstblock;
913 /* 893 /*
914 * Allocate a block. We know we need only one, since the 894 * Allocate a block. We know we need only one, since the
@@ -973,7 +953,7 @@ xfs_bmap_add_attrfork_btree(
973 xfs_trans_t *tp, /* transaction pointer */ 953 xfs_trans_t *tp, /* transaction pointer */
974 xfs_inode_t *ip, /* incore inode pointer */ 954 xfs_inode_t *ip, /* incore inode pointer */
975 xfs_fsblock_t *firstblock, /* first block allocated */ 955 xfs_fsblock_t *firstblock, /* first block allocated */
976 xfs_bmap_free_t *flist, /* blocks to free at commit */ 956 struct xfs_defer_ops *dfops, /* blocks to free at commit */
977 int *flags) /* inode logging flags */ 957 int *flags) /* inode logging flags */
978{ 958{
979 xfs_btree_cur_t *cur; /* btree cursor */ 959 xfs_btree_cur_t *cur; /* btree cursor */
@@ -986,7 +966,7 @@ xfs_bmap_add_attrfork_btree(
986 *flags |= XFS_ILOG_DBROOT; 966 *flags |= XFS_ILOG_DBROOT;
987 else { 967 else {
988 cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK); 968 cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK);
989 cur->bc_private.b.flist = flist; 969 cur->bc_private.b.dfops = dfops;
990 cur->bc_private.b.firstblock = *firstblock; 970 cur->bc_private.b.firstblock = *firstblock;
991 if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat))) 971 if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat)))
992 goto error0; 972 goto error0;
@@ -1016,7 +996,7 @@ xfs_bmap_add_attrfork_extents(
1016 xfs_trans_t *tp, /* transaction pointer */ 996 xfs_trans_t *tp, /* transaction pointer */
1017 xfs_inode_t *ip, /* incore inode pointer */ 997 xfs_inode_t *ip, /* incore inode pointer */
1018 xfs_fsblock_t *firstblock, /* first block allocated */ 998 xfs_fsblock_t *firstblock, /* first block allocated */
1019 xfs_bmap_free_t *flist, /* blocks to free at commit */ 999 struct xfs_defer_ops *dfops, /* blocks to free at commit */
1020 int *flags) /* inode logging flags */ 1000 int *flags) /* inode logging flags */
1021{ 1001{
1022 xfs_btree_cur_t *cur; /* bmap btree cursor */ 1002 xfs_btree_cur_t *cur; /* bmap btree cursor */
@@ -1025,7 +1005,7 @@ xfs_bmap_add_attrfork_extents(
1025 if (ip->i_d.di_nextents * sizeof(xfs_bmbt_rec_t) <= XFS_IFORK_DSIZE(ip)) 1005 if (ip->i_d.di_nextents * sizeof(xfs_bmbt_rec_t) <= XFS_IFORK_DSIZE(ip))
1026 return 0; 1006 return 0;
1027 cur = NULL; 1007 cur = NULL;
1028 error = xfs_bmap_extents_to_btree(tp, ip, firstblock, flist, &cur, 0, 1008 error = xfs_bmap_extents_to_btree(tp, ip, firstblock, dfops, &cur, 0,
1029 flags, XFS_DATA_FORK); 1009 flags, XFS_DATA_FORK);
1030 if (cur) { 1010 if (cur) {
1031 cur->bc_private.b.allocated = 0; 1011 cur->bc_private.b.allocated = 0;
@@ -1051,7 +1031,7 @@ xfs_bmap_add_attrfork_local(
1051 xfs_trans_t *tp, /* transaction pointer */ 1031 xfs_trans_t *tp, /* transaction pointer */
1052 xfs_inode_t *ip, /* incore inode pointer */ 1032 xfs_inode_t *ip, /* incore inode pointer */
1053 xfs_fsblock_t *firstblock, /* first block allocated */ 1033 xfs_fsblock_t *firstblock, /* first block allocated */
1054 xfs_bmap_free_t *flist, /* blocks to free at commit */ 1034 struct xfs_defer_ops *dfops, /* blocks to free at commit */
1055 int *flags) /* inode logging flags */ 1035 int *flags) /* inode logging flags */
1056{ 1036{
1057 xfs_da_args_t dargs; /* args for dir/attr code */ 1037 xfs_da_args_t dargs; /* args for dir/attr code */
@@ -1064,7 +1044,7 @@ xfs_bmap_add_attrfork_local(
1064 dargs.geo = ip->i_mount->m_dir_geo; 1044 dargs.geo = ip->i_mount->m_dir_geo;
1065 dargs.dp = ip; 1045 dargs.dp = ip;
1066 dargs.firstblock = firstblock; 1046 dargs.firstblock = firstblock;
1067 dargs.flist = flist; 1047 dargs.dfops = dfops;
1068 dargs.total = dargs.geo->fsbcount; 1048 dargs.total = dargs.geo->fsbcount;
1069 dargs.whichfork = XFS_DATA_FORK; 1049 dargs.whichfork = XFS_DATA_FORK;
1070 dargs.trans = tp; 1050 dargs.trans = tp;
@@ -1092,7 +1072,7 @@ xfs_bmap_add_attrfork(
1092 int rsvd) /* xact may use reserved blks */ 1072 int rsvd) /* xact may use reserved blks */
1093{ 1073{
1094 xfs_fsblock_t firstblock; /* 1st block/ag allocated */ 1074 xfs_fsblock_t firstblock; /* 1st block/ag allocated */
1095 xfs_bmap_free_t flist; /* freed extent records */ 1075 struct xfs_defer_ops dfops; /* freed extent records */
1096 xfs_mount_t *mp; /* mount structure */ 1076 xfs_mount_t *mp; /* mount structure */
1097 xfs_trans_t *tp; /* transaction pointer */ 1077 xfs_trans_t *tp; /* transaction pointer */
1098 int blks; /* space reservation */ 1078 int blks; /* space reservation */
@@ -1158,18 +1138,18 @@ xfs_bmap_add_attrfork(
1158 ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP); 1138 ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP);
1159 ip->i_afp->if_flags = XFS_IFEXTENTS; 1139 ip->i_afp->if_flags = XFS_IFEXTENTS;
1160 logflags = 0; 1140 logflags = 0;
1161 xfs_bmap_init(&flist, &firstblock); 1141 xfs_defer_init(&dfops, &firstblock);
1162 switch (ip->i_d.di_format) { 1142 switch (ip->i_d.di_format) {
1163 case XFS_DINODE_FMT_LOCAL: 1143 case XFS_DINODE_FMT_LOCAL:
1164 error = xfs_bmap_add_attrfork_local(tp, ip, &firstblock, &flist, 1144 error = xfs_bmap_add_attrfork_local(tp, ip, &firstblock, &dfops,
1165 &logflags); 1145 &logflags);
1166 break; 1146 break;
1167 case XFS_DINODE_FMT_EXTENTS: 1147 case XFS_DINODE_FMT_EXTENTS:
1168 error = xfs_bmap_add_attrfork_extents(tp, ip, &firstblock, 1148 error = xfs_bmap_add_attrfork_extents(tp, ip, &firstblock,
1169 &flist, &logflags); 1149 &dfops, &logflags);
1170 break; 1150 break;
1171 case XFS_DINODE_FMT_BTREE: 1151 case XFS_DINODE_FMT_BTREE:
1172 error = xfs_bmap_add_attrfork_btree(tp, ip, &firstblock, &flist, 1152 error = xfs_bmap_add_attrfork_btree(tp, ip, &firstblock, &dfops,
1173 &logflags); 1153 &logflags);
1174 break; 1154 break;
1175 default: 1155 default:
@@ -1198,7 +1178,7 @@ xfs_bmap_add_attrfork(
1198 xfs_log_sb(tp); 1178 xfs_log_sb(tp);
1199 } 1179 }
1200 1180
1201 error = xfs_bmap_finish(&tp, &flist, NULL); 1181 error = xfs_defer_finish(&tp, &dfops, NULL);
1202 if (error) 1182 if (error)
1203 goto bmap_cancel; 1183 goto bmap_cancel;
1204 error = xfs_trans_commit(tp); 1184 error = xfs_trans_commit(tp);
@@ -1206,7 +1186,7 @@ xfs_bmap_add_attrfork(
1206 return error; 1186 return error;
1207 1187
1208bmap_cancel: 1188bmap_cancel:
1209 xfs_bmap_cancel(&flist); 1189 xfs_defer_cancel(&dfops);
1210trans_cancel: 1190trans_cancel:
1211 xfs_trans_cancel(tp); 1191 xfs_trans_cancel(tp);
1212 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1192 xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -2003,7 +1983,7 @@ xfs_bmap_add_extent_delay_real(
2003 1983
2004 if (xfs_bmap_needs_btree(bma->ip, whichfork)) { 1984 if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
2005 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, 1985 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
2006 bma->firstblock, bma->flist, 1986 bma->firstblock, bma->dfops,
2007 &bma->cur, 1, &tmp_rval, whichfork); 1987 &bma->cur, 1, &tmp_rval, whichfork);
2008 rval |= tmp_rval; 1988 rval |= tmp_rval;
2009 if (error) 1989 if (error)
@@ -2087,7 +2067,7 @@ xfs_bmap_add_extent_delay_real(
2087 2067
2088 if (xfs_bmap_needs_btree(bma->ip, whichfork)) { 2068 if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
2089 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, 2069 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
2090 bma->firstblock, bma->flist, &bma->cur, 1, 2070 bma->firstblock, bma->dfops, &bma->cur, 1,
2091 &tmp_rval, whichfork); 2071 &tmp_rval, whichfork);
2092 rval |= tmp_rval; 2072 rval |= tmp_rval;
2093 if (error) 2073 if (error)
@@ -2156,7 +2136,7 @@ xfs_bmap_add_extent_delay_real(
2156 2136
2157 if (xfs_bmap_needs_btree(bma->ip, whichfork)) { 2137 if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
2158 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, 2138 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
2159 bma->firstblock, bma->flist, &bma->cur, 2139 bma->firstblock, bma->dfops, &bma->cur,
2160 1, &tmp_rval, whichfork); 2140 1, &tmp_rval, whichfork);
2161 rval |= tmp_rval; 2141 rval |= tmp_rval;
2162 if (error) 2142 if (error)
@@ -2199,13 +2179,18 @@ xfs_bmap_add_extent_delay_real(
2199 ASSERT(0); 2179 ASSERT(0);
2200 } 2180 }
2201 2181
2182 /* add reverse mapping */
2183 error = xfs_rmap_map_extent(mp, bma->dfops, bma->ip, whichfork, new);
2184 if (error)
2185 goto done;
2186
2202 /* convert to a btree if necessary */ 2187 /* convert to a btree if necessary */
2203 if (xfs_bmap_needs_btree(bma->ip, whichfork)) { 2188 if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
2204 int tmp_logflags; /* partial log flag return val */ 2189 int tmp_logflags; /* partial log flag return val */
2205 2190
2206 ASSERT(bma->cur == NULL); 2191 ASSERT(bma->cur == NULL);
2207 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, 2192 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
2208 bma->firstblock, bma->flist, &bma->cur, 2193 bma->firstblock, bma->dfops, &bma->cur,
2209 da_old > 0, &tmp_logflags, whichfork); 2194 da_old > 0, &tmp_logflags, whichfork);
2210 bma->logflags |= tmp_logflags; 2195 bma->logflags |= tmp_logflags;
2211 if (error) 2196 if (error)
@@ -2247,7 +2232,7 @@ xfs_bmap_add_extent_unwritten_real(
2247 xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ 2232 xfs_btree_cur_t **curp, /* if *curp is null, not a btree */
2248 xfs_bmbt_irec_t *new, /* new data to add to file extents */ 2233 xfs_bmbt_irec_t *new, /* new data to add to file extents */
2249 xfs_fsblock_t *first, /* pointer to firstblock variable */ 2234 xfs_fsblock_t *first, /* pointer to firstblock variable */
2250 xfs_bmap_free_t *flist, /* list of extents to be freed */ 2235 struct xfs_defer_ops *dfops, /* list of extents to be freed */
2251 int *logflagsp) /* inode logging flags */ 2236 int *logflagsp) /* inode logging flags */
2252{ 2237{
2253 xfs_btree_cur_t *cur; /* btree cursor */ 2238 xfs_btree_cur_t *cur; /* btree cursor */
@@ -2735,12 +2720,17 @@ xfs_bmap_add_extent_unwritten_real(
2735 ASSERT(0); 2720 ASSERT(0);
2736 } 2721 }
2737 2722
2723 /* update reverse mappings */
2724 error = xfs_rmap_convert_extent(mp, dfops, ip, XFS_DATA_FORK, new);
2725 if (error)
2726 goto done;
2727
2738 /* convert to a btree if necessary */ 2728 /* convert to a btree if necessary */
2739 if (xfs_bmap_needs_btree(ip, XFS_DATA_FORK)) { 2729 if (xfs_bmap_needs_btree(ip, XFS_DATA_FORK)) {
2740 int tmp_logflags; /* partial log flag return val */ 2730 int tmp_logflags; /* partial log flag return val */
2741 2731
2742 ASSERT(cur == NULL); 2732 ASSERT(cur == NULL);
2743 error = xfs_bmap_extents_to_btree(tp, ip, first, flist, &cur, 2733 error = xfs_bmap_extents_to_btree(tp, ip, first, dfops, &cur,
2744 0, &tmp_logflags, XFS_DATA_FORK); 2734 0, &tmp_logflags, XFS_DATA_FORK);
2745 *logflagsp |= tmp_logflags; 2735 *logflagsp |= tmp_logflags;
2746 if (error) 2736 if (error)
@@ -3127,13 +3117,18 @@ xfs_bmap_add_extent_hole_real(
3127 break; 3117 break;
3128 } 3118 }
3129 3119
3120 /* add reverse mapping */
3121 error = xfs_rmap_map_extent(mp, bma->dfops, bma->ip, whichfork, new);
3122 if (error)
3123 goto done;
3124
3130 /* convert to a btree if necessary */ 3125 /* convert to a btree if necessary */
3131 if (xfs_bmap_needs_btree(bma->ip, whichfork)) { 3126 if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
3132 int tmp_logflags; /* partial log flag return val */ 3127 int tmp_logflags; /* partial log flag return val */
3133 3128
3134 ASSERT(bma->cur == NULL); 3129 ASSERT(bma->cur == NULL);
3135 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, 3130 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
3136 bma->firstblock, bma->flist, &bma->cur, 3131 bma->firstblock, bma->dfops, &bma->cur,
3137 0, &tmp_logflags, whichfork); 3132 0, &tmp_logflags, whichfork);
3138 bma->logflags |= tmp_logflags; 3133 bma->logflags |= tmp_logflags;
3139 if (error) 3134 if (error)
@@ -3691,9 +3686,10 @@ xfs_bmap_btalloc(
3691 args.tp = ap->tp; 3686 args.tp = ap->tp;
3692 args.mp = mp; 3687 args.mp = mp;
3693 args.fsbno = ap->blkno; 3688 args.fsbno = ap->blkno;
3689 xfs_rmap_skip_owner_update(&args.oinfo);
3694 3690
3695 /* Trim the allocation back to the maximum an AG can fit. */ 3691 /* Trim the allocation back to the maximum an AG can fit. */
3696 args.maxlen = MIN(ap->length, XFS_ALLOC_AG_MAX_USABLE(mp)); 3692 args.maxlen = MIN(ap->length, mp->m_ag_max_usable);
3697 args.firstblock = *ap->firstblock; 3693 args.firstblock = *ap->firstblock;
3698 blen = 0; 3694 blen = 0;
3699 if (nullfb) { 3695 if (nullfb) {
@@ -3708,7 +3704,7 @@ xfs_bmap_btalloc(
3708 error = xfs_bmap_btalloc_nullfb(ap, &args, &blen); 3704 error = xfs_bmap_btalloc_nullfb(ap, &args, &blen);
3709 if (error) 3705 if (error)
3710 return error; 3706 return error;
3711 } else if (ap->flist->xbf_low) { 3707 } else if (ap->dfops->dop_low) {
3712 if (xfs_inode_is_filestream(ap->ip)) 3708 if (xfs_inode_is_filestream(ap->ip))
3713 args.type = XFS_ALLOCTYPE_FIRST_AG; 3709 args.type = XFS_ALLOCTYPE_FIRST_AG;
3714 else 3710 else
@@ -3741,7 +3737,7 @@ xfs_bmap_btalloc(
3741 * is >= the stripe unit and the allocation offset is 3737 * is >= the stripe unit and the allocation offset is
3742 * at the end of file. 3738 * at the end of file.
3743 */ 3739 */
3744 if (!ap->flist->xbf_low && ap->aeof) { 3740 if (!ap->dfops->dop_low && ap->aeof) {
3745 if (!ap->offset) { 3741 if (!ap->offset) {
3746 args.alignment = stripe_align; 3742 args.alignment = stripe_align;
3747 atype = args.type; 3743 atype = args.type;
@@ -3834,7 +3830,7 @@ xfs_bmap_btalloc(
3834 args.minleft = 0; 3830 args.minleft = 0;
3835 if ((error = xfs_alloc_vextent(&args))) 3831 if ((error = xfs_alloc_vextent(&args)))
3836 return error; 3832 return error;
3837 ap->flist->xbf_low = 1; 3833 ap->dfops->dop_low = true;
3838 } 3834 }
3839 if (args.fsbno != NULLFSBLOCK) { 3835 if (args.fsbno != NULLFSBLOCK) {
3840 /* 3836 /*
@@ -3844,7 +3840,7 @@ xfs_bmap_btalloc(
3844 ASSERT(*ap->firstblock == NULLFSBLOCK || 3840 ASSERT(*ap->firstblock == NULLFSBLOCK ||
3845 XFS_FSB_TO_AGNO(mp, *ap->firstblock) == 3841 XFS_FSB_TO_AGNO(mp, *ap->firstblock) ==
3846 XFS_FSB_TO_AGNO(mp, args.fsbno) || 3842 XFS_FSB_TO_AGNO(mp, args.fsbno) ||
3847 (ap->flist->xbf_low && 3843 (ap->dfops->dop_low &&
3848 XFS_FSB_TO_AGNO(mp, *ap->firstblock) < 3844 XFS_FSB_TO_AGNO(mp, *ap->firstblock) <
3849 XFS_FSB_TO_AGNO(mp, args.fsbno))); 3845 XFS_FSB_TO_AGNO(mp, args.fsbno)));
3850 3846
@@ -3852,7 +3848,7 @@ xfs_bmap_btalloc(
3852 if (*ap->firstblock == NULLFSBLOCK) 3848 if (*ap->firstblock == NULLFSBLOCK)
3853 *ap->firstblock = args.fsbno; 3849 *ap->firstblock = args.fsbno;
3854 ASSERT(nullfb || fb_agno == args.agno || 3850 ASSERT(nullfb || fb_agno == args.agno ||
3855 (ap->flist->xbf_low && fb_agno < args.agno)); 3851 (ap->dfops->dop_low && fb_agno < args.agno));
3856 ap->length = args.len; 3852 ap->length = args.len;
3857 ap->ip->i_d.di_nblocks += args.len; 3853 ap->ip->i_d.di_nblocks += args.len;
3858 xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); 3854 xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
@@ -4319,7 +4315,7 @@ xfs_bmapi_allocate(
4319 if (error) 4315 if (error)
4320 return error; 4316 return error;
4321 4317
4322 if (bma->flist->xbf_low) 4318 if (bma->dfops->dop_low)
4323 bma->minleft = 0; 4319 bma->minleft = 0;
4324 if (bma->cur) 4320 if (bma->cur)
4325 bma->cur->bc_private.b.firstblock = *bma->firstblock; 4321 bma->cur->bc_private.b.firstblock = *bma->firstblock;
@@ -4328,7 +4324,7 @@ xfs_bmapi_allocate(
4328 if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) { 4324 if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) {
4329 bma->cur = xfs_bmbt_init_cursor(mp, bma->tp, bma->ip, whichfork); 4325 bma->cur = xfs_bmbt_init_cursor(mp, bma->tp, bma->ip, whichfork);
4330 bma->cur->bc_private.b.firstblock = *bma->firstblock; 4326 bma->cur->bc_private.b.firstblock = *bma->firstblock;
4331 bma->cur->bc_private.b.flist = bma->flist; 4327 bma->cur->bc_private.b.dfops = bma->dfops;
4332 } 4328 }
4333 /* 4329 /*
4334 * Bump the number of extents we've allocated 4330 * Bump the number of extents we've allocated
@@ -4409,7 +4405,7 @@ xfs_bmapi_convert_unwritten(
4409 bma->cur = xfs_bmbt_init_cursor(bma->ip->i_mount, bma->tp, 4405 bma->cur = xfs_bmbt_init_cursor(bma->ip->i_mount, bma->tp,
4410 bma->ip, whichfork); 4406 bma->ip, whichfork);
4411 bma->cur->bc_private.b.firstblock = *bma->firstblock; 4407 bma->cur->bc_private.b.firstblock = *bma->firstblock;
4412 bma->cur->bc_private.b.flist = bma->flist; 4408 bma->cur->bc_private.b.dfops = bma->dfops;
4413 } 4409 }
4414 mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN) 4410 mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN)
4415 ? XFS_EXT_NORM : XFS_EXT_UNWRITTEN; 4411 ? XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
@@ -4426,7 +4422,7 @@ xfs_bmapi_convert_unwritten(
4426 } 4422 }
4427 4423
4428 error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx, 4424 error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx,
4429 &bma->cur, mval, bma->firstblock, bma->flist, 4425 &bma->cur, mval, bma->firstblock, bma->dfops,
4430 &tmp_logflags); 4426 &tmp_logflags);
4431 /* 4427 /*
4432 * Log the inode core unconditionally in the unwritten extent conversion 4428 * Log the inode core unconditionally in the unwritten extent conversion
@@ -4480,7 +4476,7 @@ xfs_bmapi_write(
4480 xfs_extlen_t total, /* total blocks needed */ 4476 xfs_extlen_t total, /* total blocks needed */
4481 struct xfs_bmbt_irec *mval, /* output: map values */ 4477 struct xfs_bmbt_irec *mval, /* output: map values */
4482 int *nmap, /* i/o: mval size/count */ 4478 int *nmap, /* i/o: mval size/count */
4483 struct xfs_bmap_free *flist) /* i/o: list extents to free */ 4479 struct xfs_defer_ops *dfops) /* i/o: list extents to free */
4484{ 4480{
4485 struct xfs_mount *mp = ip->i_mount; 4481 struct xfs_mount *mp = ip->i_mount;
4486 struct xfs_ifork *ifp; 4482 struct xfs_ifork *ifp;
@@ -4570,7 +4566,7 @@ xfs_bmapi_write(
4570 bma.ip = ip; 4566 bma.ip = ip;
4571 bma.total = total; 4567 bma.total = total;
4572 bma.userdata = 0; 4568 bma.userdata = 0;
4573 bma.flist = flist; 4569 bma.dfops = dfops;
4574 bma.firstblock = firstblock; 4570 bma.firstblock = firstblock;
4575 4571
4576 while (bno < end && n < *nmap) { 4572 while (bno < end && n < *nmap) {
@@ -4684,7 +4680,7 @@ error0:
4684 XFS_FSB_TO_AGNO(mp, *firstblock) == 4680 XFS_FSB_TO_AGNO(mp, *firstblock) ==
4685 XFS_FSB_TO_AGNO(mp, 4681 XFS_FSB_TO_AGNO(mp,
4686 bma.cur->bc_private.b.firstblock) || 4682 bma.cur->bc_private.b.firstblock) ||
4687 (flist->xbf_low && 4683 (dfops->dop_low &&
4688 XFS_FSB_TO_AGNO(mp, *firstblock) < 4684 XFS_FSB_TO_AGNO(mp, *firstblock) <
4689 XFS_FSB_TO_AGNO(mp, 4685 XFS_FSB_TO_AGNO(mp,
4690 bma.cur->bc_private.b.firstblock))); 4686 bma.cur->bc_private.b.firstblock)));
@@ -4768,7 +4764,7 @@ xfs_bmap_del_extent(
4768 xfs_inode_t *ip, /* incore inode pointer */ 4764 xfs_inode_t *ip, /* incore inode pointer */
4769 xfs_trans_t *tp, /* current transaction pointer */ 4765 xfs_trans_t *tp, /* current transaction pointer */
4770 xfs_extnum_t *idx, /* extent number to update/delete */ 4766 xfs_extnum_t *idx, /* extent number to update/delete */
4771 xfs_bmap_free_t *flist, /* list of extents to be freed */ 4767 struct xfs_defer_ops *dfops, /* list of extents to be freed */
4772 xfs_btree_cur_t *cur, /* if null, not a btree */ 4768 xfs_btree_cur_t *cur, /* if null, not a btree */
4773 xfs_bmbt_irec_t *del, /* data to remove from extents */ 4769 xfs_bmbt_irec_t *del, /* data to remove from extents */
4774 int *logflagsp, /* inode logging flags */ 4770 int *logflagsp, /* inode logging flags */
@@ -4870,6 +4866,7 @@ xfs_bmap_del_extent(
4870 nblks = 0; 4866 nblks = 0;
4871 do_fx = 0; 4867 do_fx = 0;
4872 } 4868 }
4869
4873 /* 4870 /*
4874 * Set flag value to use in switch statement. 4871 * Set flag value to use in switch statement.
4875 * Left-contig is 2, right-contig is 1. 4872 * Left-contig is 2, right-contig is 1.
@@ -5052,12 +5049,20 @@ xfs_bmap_del_extent(
5052 ++*idx; 5049 ++*idx;
5053 break; 5050 break;
5054 } 5051 }
5052
5053 /* remove reverse mapping */
5054 if (!delay) {
5055 error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, del);
5056 if (error)
5057 goto done;
5058 }
5059
5055 /* 5060 /*
5056 * If we need to, add to list of extents to delete. 5061 * If we need to, add to list of extents to delete.
5057 */ 5062 */
5058 if (do_fx) 5063 if (do_fx)
5059 xfs_bmap_add_free(mp, flist, del->br_startblock, 5064 xfs_bmap_add_free(mp, dfops, del->br_startblock,
5060 del->br_blockcount); 5065 del->br_blockcount, NULL);
5061 /* 5066 /*
5062 * Adjust inode # blocks in the file. 5067 * Adjust inode # blocks in the file.
5063 */ 5068 */
@@ -5097,7 +5102,7 @@ xfs_bunmapi(
5097 xfs_extnum_t nexts, /* number of extents max */ 5102 xfs_extnum_t nexts, /* number of extents max */
5098 xfs_fsblock_t *firstblock, /* first allocated block 5103 xfs_fsblock_t *firstblock, /* first allocated block
5099 controls a.g. for allocs */ 5104 controls a.g. for allocs */
5100 xfs_bmap_free_t *flist, /* i/o: list extents to free */ 5105 struct xfs_defer_ops *dfops, /* i/o: list extents to free */
5101 int *done) /* set if not done yet */ 5106 int *done) /* set if not done yet */
5102{ 5107{
5103 xfs_btree_cur_t *cur; /* bmap btree cursor */ 5108 xfs_btree_cur_t *cur; /* bmap btree cursor */
@@ -5170,7 +5175,7 @@ xfs_bunmapi(
5170 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE); 5175 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
5171 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); 5176 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5172 cur->bc_private.b.firstblock = *firstblock; 5177 cur->bc_private.b.firstblock = *firstblock;
5173 cur->bc_private.b.flist = flist; 5178 cur->bc_private.b.dfops = dfops;
5174 cur->bc_private.b.flags = 0; 5179 cur->bc_private.b.flags = 0;
5175 } else 5180 } else
5176 cur = NULL; 5181 cur = NULL;
@@ -5179,8 +5184,10 @@ xfs_bunmapi(
5179 /* 5184 /*
5180 * Synchronize by locking the bitmap inode. 5185 * Synchronize by locking the bitmap inode.
5181 */ 5186 */
5182 xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL); 5187 xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL|XFS_ILOCK_RTBITMAP);
5183 xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL); 5188 xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
5189 xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL|XFS_ILOCK_RTSUM);
5190 xfs_trans_ijoin(tp, mp->m_rsumip, XFS_ILOCK_EXCL);
5184 } 5191 }
5185 5192
5186 extno = 0; 5193 extno = 0;
@@ -5262,7 +5269,7 @@ xfs_bunmapi(
5262 } 5269 }
5263 del.br_state = XFS_EXT_UNWRITTEN; 5270 del.br_state = XFS_EXT_UNWRITTEN;
5264 error = xfs_bmap_add_extent_unwritten_real(tp, ip, 5271 error = xfs_bmap_add_extent_unwritten_real(tp, ip,
5265 &lastx, &cur, &del, firstblock, flist, 5272 &lastx, &cur, &del, firstblock, dfops,
5266 &logflags); 5273 &logflags);
5267 if (error) 5274 if (error)
5268 goto error0; 5275 goto error0;
@@ -5321,7 +5328,7 @@ xfs_bunmapi(
5321 lastx--; 5328 lastx--;
5322 error = xfs_bmap_add_extent_unwritten_real(tp, 5329 error = xfs_bmap_add_extent_unwritten_real(tp,
5323 ip, &lastx, &cur, &prev, 5330 ip, &lastx, &cur, &prev,
5324 firstblock, flist, &logflags); 5331 firstblock, dfops, &logflags);
5325 if (error) 5332 if (error)
5326 goto error0; 5333 goto error0;
5327 goto nodelete; 5334 goto nodelete;
@@ -5330,7 +5337,7 @@ xfs_bunmapi(
5330 del.br_state = XFS_EXT_UNWRITTEN; 5337 del.br_state = XFS_EXT_UNWRITTEN;
5331 error = xfs_bmap_add_extent_unwritten_real(tp, 5338 error = xfs_bmap_add_extent_unwritten_real(tp,
5332 ip, &lastx, &cur, &del, 5339 ip, &lastx, &cur, &del,
5333 firstblock, flist, &logflags); 5340 firstblock, dfops, &logflags);
5334 if (error) 5341 if (error)
5335 goto error0; 5342 goto error0;
5336 goto nodelete; 5343 goto nodelete;
@@ -5388,7 +5395,7 @@ xfs_bunmapi(
5388 } else if (cur) 5395 } else if (cur)
5389 cur->bc_private.b.flags &= ~XFS_BTCUR_BPRV_WASDEL; 5396 cur->bc_private.b.flags &= ~XFS_BTCUR_BPRV_WASDEL;
5390 5397
5391 error = xfs_bmap_del_extent(ip, tp, &lastx, flist, cur, &del, 5398 error = xfs_bmap_del_extent(ip, tp, &lastx, dfops, cur, &del,
5392 &tmp_logflags, whichfork); 5399 &tmp_logflags, whichfork);
5393 logflags |= tmp_logflags; 5400 logflags |= tmp_logflags;
5394 if (error) 5401 if (error)
@@ -5422,7 +5429,7 @@ nodelete:
5422 */ 5429 */
5423 if (xfs_bmap_needs_btree(ip, whichfork)) { 5430 if (xfs_bmap_needs_btree(ip, whichfork)) {
5424 ASSERT(cur == NULL); 5431 ASSERT(cur == NULL);
5425 error = xfs_bmap_extents_to_btree(tp, ip, firstblock, flist, 5432 error = xfs_bmap_extents_to_btree(tp, ip, firstblock, dfops,
5426 &cur, 0, &tmp_logflags, whichfork); 5433 &cur, 0, &tmp_logflags, whichfork);
5427 logflags |= tmp_logflags; 5434 logflags |= tmp_logflags;
5428 if (error) 5435 if (error)
@@ -5589,7 +5596,8 @@ xfs_bmse_shift_one(
5589 struct xfs_bmbt_rec_host *gotp, 5596 struct xfs_bmbt_rec_host *gotp,
5590 struct xfs_btree_cur *cur, 5597 struct xfs_btree_cur *cur,
5591 int *logflags, 5598 int *logflags,
5592 enum shift_direction direction) 5599 enum shift_direction direction,
5600 struct xfs_defer_ops *dfops)
5593{ 5601{
5594 struct xfs_ifork *ifp; 5602 struct xfs_ifork *ifp;
5595 struct xfs_mount *mp; 5603 struct xfs_mount *mp;
@@ -5637,9 +5645,13 @@ xfs_bmse_shift_one(
5637 /* check whether to merge the extent or shift it down */ 5645 /* check whether to merge the extent or shift it down */
5638 if (xfs_bmse_can_merge(&adj_irec, &got, 5646 if (xfs_bmse_can_merge(&adj_irec, &got,
5639 offset_shift_fsb)) { 5647 offset_shift_fsb)) {
5640 return xfs_bmse_merge(ip, whichfork, offset_shift_fsb, 5648 error = xfs_bmse_merge(ip, whichfork, offset_shift_fsb,
5641 *current_ext, gotp, adj_irecp, 5649 *current_ext, gotp, adj_irecp,
5642 cur, logflags); 5650 cur, logflags);
5651 if (error)
5652 return error;
5653 adj_irec = got;
5654 goto update_rmap;
5643 } 5655 }
5644 } else { 5656 } else {
5645 startoff = got.br_startoff + offset_shift_fsb; 5657 startoff = got.br_startoff + offset_shift_fsb;
@@ -5676,9 +5688,10 @@ update_current_ext:
5676 (*current_ext)--; 5688 (*current_ext)--;
5677 xfs_bmbt_set_startoff(gotp, startoff); 5689 xfs_bmbt_set_startoff(gotp, startoff);
5678 *logflags |= XFS_ILOG_CORE; 5690 *logflags |= XFS_ILOG_CORE;
5691 adj_irec = got;
5679 if (!cur) { 5692 if (!cur) {
5680 *logflags |= XFS_ILOG_DEXT; 5693 *logflags |= XFS_ILOG_DEXT;
5681 return 0; 5694 goto update_rmap;
5682 } 5695 }
5683 5696
5684 error = xfs_bmbt_lookup_eq(cur, got.br_startoff, got.br_startblock, 5697 error = xfs_bmbt_lookup_eq(cur, got.br_startoff, got.br_startblock,
@@ -5688,8 +5701,18 @@ update_current_ext:
5688 XFS_WANT_CORRUPTED_RETURN(mp, i == 1); 5701 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
5689 5702
5690 got.br_startoff = startoff; 5703 got.br_startoff = startoff;
5691 return xfs_bmbt_update(cur, got.br_startoff, got.br_startblock, 5704 error = xfs_bmbt_update(cur, got.br_startoff, got.br_startblock,
5692 got.br_blockcount, got.br_state); 5705 got.br_blockcount, got.br_state);
5706 if (error)
5707 return error;
5708
5709update_rmap:
5710 /* update reverse mapping */
5711 error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, &adj_irec);
5712 if (error)
5713 return error;
5714 adj_irec.br_startoff = startoff;
5715 return xfs_rmap_map_extent(mp, dfops, ip, whichfork, &adj_irec);
5693} 5716}
5694 5717
5695/* 5718/*
@@ -5711,7 +5734,7 @@ xfs_bmap_shift_extents(
5711 int *done, 5734 int *done,
5712 xfs_fileoff_t stop_fsb, 5735 xfs_fileoff_t stop_fsb,
5713 xfs_fsblock_t *firstblock, 5736 xfs_fsblock_t *firstblock,
5714 struct xfs_bmap_free *flist, 5737 struct xfs_defer_ops *dfops,
5715 enum shift_direction direction, 5738 enum shift_direction direction,
5716 int num_exts) 5739 int num_exts)
5717{ 5740{
@@ -5756,7 +5779,7 @@ xfs_bmap_shift_extents(
5756 if (ifp->if_flags & XFS_IFBROOT) { 5779 if (ifp->if_flags & XFS_IFBROOT) {
5757 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); 5780 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5758 cur->bc_private.b.firstblock = *firstblock; 5781 cur->bc_private.b.firstblock = *firstblock;
5759 cur->bc_private.b.flist = flist; 5782 cur->bc_private.b.dfops = dfops;
5760 cur->bc_private.b.flags = 0; 5783 cur->bc_private.b.flags = 0;
5761 } 5784 }
5762 5785
@@ -5817,7 +5840,7 @@ xfs_bmap_shift_extents(
5817 while (nexts++ < num_exts) { 5840 while (nexts++ < num_exts) {
5818 error = xfs_bmse_shift_one(ip, whichfork, offset_shift_fsb, 5841 error = xfs_bmse_shift_one(ip, whichfork, offset_shift_fsb,
5819 &current_ext, gotp, cur, &logflags, 5842 &current_ext, gotp, cur, &logflags,
5820 direction); 5843 direction, dfops);
5821 if (error) 5844 if (error)
5822 goto del_cursor; 5845 goto del_cursor;
5823 /* 5846 /*
@@ -5865,7 +5888,7 @@ xfs_bmap_split_extent_at(
5865 struct xfs_inode *ip, 5888 struct xfs_inode *ip,
5866 xfs_fileoff_t split_fsb, 5889 xfs_fileoff_t split_fsb,
5867 xfs_fsblock_t *firstfsb, 5890 xfs_fsblock_t *firstfsb,
5868 struct xfs_bmap_free *free_list) 5891 struct xfs_defer_ops *dfops)
5869{ 5892{
5870 int whichfork = XFS_DATA_FORK; 5893 int whichfork = XFS_DATA_FORK;
5871 struct xfs_btree_cur *cur = NULL; 5894 struct xfs_btree_cur *cur = NULL;
@@ -5927,7 +5950,7 @@ xfs_bmap_split_extent_at(
5927 if (ifp->if_flags & XFS_IFBROOT) { 5950 if (ifp->if_flags & XFS_IFBROOT) {
5928 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); 5951 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5929 cur->bc_private.b.firstblock = *firstfsb; 5952 cur->bc_private.b.firstblock = *firstfsb;
5930 cur->bc_private.b.flist = free_list; 5953 cur->bc_private.b.dfops = dfops;
5931 cur->bc_private.b.flags = 0; 5954 cur->bc_private.b.flags = 0;
5932 error = xfs_bmbt_lookup_eq(cur, got.br_startoff, 5955 error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
5933 got.br_startblock, 5956 got.br_startblock,
@@ -5980,7 +6003,7 @@ xfs_bmap_split_extent_at(
5980 int tmp_logflags; /* partial log flag return val */ 6003 int tmp_logflags; /* partial log flag return val */
5981 6004
5982 ASSERT(cur == NULL); 6005 ASSERT(cur == NULL);
5983 error = xfs_bmap_extents_to_btree(tp, ip, firstfsb, free_list, 6006 error = xfs_bmap_extents_to_btree(tp, ip, firstfsb, dfops,
5984 &cur, 0, &tmp_logflags, whichfork); 6007 &cur, 0, &tmp_logflags, whichfork);
5985 logflags |= tmp_logflags; 6008 logflags |= tmp_logflags;
5986 } 6009 }
@@ -6004,7 +6027,7 @@ xfs_bmap_split_extent(
6004{ 6027{
6005 struct xfs_mount *mp = ip->i_mount; 6028 struct xfs_mount *mp = ip->i_mount;
6006 struct xfs_trans *tp; 6029 struct xfs_trans *tp;
6007 struct xfs_bmap_free free_list; 6030 struct xfs_defer_ops dfops;
6008 xfs_fsblock_t firstfsb; 6031 xfs_fsblock_t firstfsb;
6009 int error; 6032 int error;
6010 6033
@@ -6016,21 +6039,21 @@ xfs_bmap_split_extent(
6016 xfs_ilock(ip, XFS_ILOCK_EXCL); 6039 xfs_ilock(ip, XFS_ILOCK_EXCL);
6017 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 6040 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
6018 6041
6019 xfs_bmap_init(&free_list, &firstfsb); 6042 xfs_defer_init(&dfops, &firstfsb);
6020 6043
6021 error = xfs_bmap_split_extent_at(tp, ip, split_fsb, 6044 error = xfs_bmap_split_extent_at(tp, ip, split_fsb,
6022 &firstfsb, &free_list); 6045 &firstfsb, &dfops);
6023 if (error) 6046 if (error)
6024 goto out; 6047 goto out;
6025 6048
6026 error = xfs_bmap_finish(&tp, &free_list, NULL); 6049 error = xfs_defer_finish(&tp, &dfops, NULL);
6027 if (error) 6050 if (error)
6028 goto out; 6051 goto out;
6029 6052
6030 return xfs_trans_commit(tp); 6053 return xfs_trans_commit(tp);
6031 6054
6032out: 6055out:
6033 xfs_bmap_cancel(&free_list); 6056 xfs_defer_cancel(&dfops);
6034 xfs_trans_cancel(tp); 6057 xfs_trans_cancel(tp);
6035 return error; 6058 return error;
6036} 6059}
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index f1f3ae6c0a3f..254034f96941 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -32,7 +32,7 @@ extern kmem_zone_t *xfs_bmap_free_item_zone;
32 */ 32 */
33struct xfs_bmalloca { 33struct xfs_bmalloca {
34 xfs_fsblock_t *firstblock; /* i/o first block allocated */ 34 xfs_fsblock_t *firstblock; /* i/o first block allocated */
35 struct xfs_bmap_free *flist; /* bmap freelist */ 35 struct xfs_defer_ops *dfops; /* bmap freelist */
36 struct xfs_trans *tp; /* transaction pointer */ 36 struct xfs_trans *tp; /* transaction pointer */
37 struct xfs_inode *ip; /* incore inode pointer */ 37 struct xfs_inode *ip; /* incore inode pointer */
38 struct xfs_bmbt_irec prev; /* extent before the new one */ 38 struct xfs_bmbt_irec prev; /* extent before the new one */
@@ -62,34 +62,14 @@ struct xfs_bmalloca {
62 * List of extents to be free "later". 62 * List of extents to be free "later".
63 * The list is kept sorted on xbf_startblock. 63 * The list is kept sorted on xbf_startblock.
64 */ 64 */
65struct xfs_bmap_free_item 65struct xfs_extent_free_item
66{ 66{
67 xfs_fsblock_t xbfi_startblock;/* starting fs block number */ 67 xfs_fsblock_t xefi_startblock;/* starting fs block number */
68 xfs_extlen_t xbfi_blockcount;/* number of blocks in extent */ 68 xfs_extlen_t xefi_blockcount;/* number of blocks in extent */
69 struct list_head xbfi_list; 69 struct list_head xefi_list;
70 struct xfs_owner_info xefi_oinfo; /* extent owner */
70}; 71};
71 72
72/*
73 * Header for free extent list.
74 *
75 * xbf_low is used by the allocator to activate the lowspace algorithm -
76 * when free space is running low the extent allocator may choose to
77 * allocate an extent from an AG without leaving sufficient space for
78 * a btree split when inserting the new extent. In this case the allocator
79 * will enable the lowspace algorithm which is supposed to allow further
80 * allocations (such as btree splits and newroots) to allocate from
81 * sequential AGs. In order to avoid locking AGs out of order the lowspace
82 * algorithm will start searching for free space from AG 0. If the correct
83 * transaction reservations have been made then this algorithm will eventually
84 * find all the space it needs.
85 */
86typedef struct xfs_bmap_free
87{
88 struct list_head xbf_flist; /* list of to-be-free extents */
89 int xbf_count; /* count of items on list */
90 int xbf_low; /* alloc in low mode */
91} xfs_bmap_free_t;
92
93#define XFS_BMAP_MAX_NMAP 4 73#define XFS_BMAP_MAX_NMAP 4
94 74
95/* 75/*
@@ -139,14 +119,6 @@ static inline int xfs_bmapi_aflag(int w)
139#define DELAYSTARTBLOCK ((xfs_fsblock_t)-1LL) 119#define DELAYSTARTBLOCK ((xfs_fsblock_t)-1LL)
140#define HOLESTARTBLOCK ((xfs_fsblock_t)-2LL) 120#define HOLESTARTBLOCK ((xfs_fsblock_t)-2LL)
141 121
142static inline void xfs_bmap_init(xfs_bmap_free_t *flp, xfs_fsblock_t *fbp)
143{
144 INIT_LIST_HEAD(&flp->xbf_flist);
145 flp->xbf_count = 0;
146 flp->xbf_low = 0;
147 *fbp = NULLFSBLOCK;
148}
149
150/* 122/*
151 * Flags for xfs_bmap_add_extent*. 123 * Flags for xfs_bmap_add_extent*.
152 */ 124 */
@@ -193,11 +165,9 @@ void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,
193 165
194int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd); 166int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd);
195void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork); 167void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork);
196void xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_bmap_free *flist, 168void xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
197 xfs_fsblock_t bno, xfs_filblks_t len); 169 xfs_fsblock_t bno, xfs_filblks_t len,
198void xfs_bmap_cancel(struct xfs_bmap_free *flist); 170 struct xfs_owner_info *oinfo);
199int xfs_bmap_finish(struct xfs_trans **tp, struct xfs_bmap_free *flist,
200 struct xfs_inode *ip);
201void xfs_bmap_compute_maxlevels(struct xfs_mount *mp, int whichfork); 171void xfs_bmap_compute_maxlevels(struct xfs_mount *mp, int whichfork);
202int xfs_bmap_first_unused(struct xfs_trans *tp, struct xfs_inode *ip, 172int xfs_bmap_first_unused(struct xfs_trans *tp, struct xfs_inode *ip,
203 xfs_extlen_t len, xfs_fileoff_t *unused, int whichfork); 173 xfs_extlen_t len, xfs_fileoff_t *unused, int whichfork);
@@ -218,18 +188,18 @@ int xfs_bmapi_write(struct xfs_trans *tp, struct xfs_inode *ip,
218 xfs_fileoff_t bno, xfs_filblks_t len, int flags, 188 xfs_fileoff_t bno, xfs_filblks_t len, int flags,
219 xfs_fsblock_t *firstblock, xfs_extlen_t total, 189 xfs_fsblock_t *firstblock, xfs_extlen_t total,
220 struct xfs_bmbt_irec *mval, int *nmap, 190 struct xfs_bmbt_irec *mval, int *nmap,
221 struct xfs_bmap_free *flist); 191 struct xfs_defer_ops *dfops);
222int xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip, 192int xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip,
223 xfs_fileoff_t bno, xfs_filblks_t len, int flags, 193 xfs_fileoff_t bno, xfs_filblks_t len, int flags,
224 xfs_extnum_t nexts, xfs_fsblock_t *firstblock, 194 xfs_extnum_t nexts, xfs_fsblock_t *firstblock,
225 struct xfs_bmap_free *flist, int *done); 195 struct xfs_defer_ops *dfops, int *done);
226int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx, 196int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx,
227 xfs_extnum_t num); 197 xfs_extnum_t num);
228uint xfs_default_attroffset(struct xfs_inode *ip); 198uint xfs_default_attroffset(struct xfs_inode *ip);
229int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip, 199int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip,
230 xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb, 200 xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb,
231 int *done, xfs_fileoff_t stop_fsb, xfs_fsblock_t *firstblock, 201 int *done, xfs_fileoff_t stop_fsb, xfs_fsblock_t *firstblock,
232 struct xfs_bmap_free *flist, enum shift_direction direction, 202 struct xfs_defer_ops *dfops, enum shift_direction direction,
233 int num_exts); 203 int num_exts);
234int xfs_bmap_split_extent(struct xfs_inode *ip, xfs_fileoff_t split_offset); 204int xfs_bmap_split_extent(struct xfs_inode *ip, xfs_fileoff_t split_offset);
235 205
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index db0c71e470c9..cd85274e810c 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -23,6 +23,7 @@
23#include "xfs_trans_resv.h" 23#include "xfs_trans_resv.h"
24#include "xfs_bit.h" 24#include "xfs_bit.h"
25#include "xfs_mount.h" 25#include "xfs_mount.h"
26#include "xfs_defer.h"
26#include "xfs_inode.h" 27#include "xfs_inode.h"
27#include "xfs_trans.h" 28#include "xfs_trans.h"
28#include "xfs_inode_item.h" 29#include "xfs_inode_item.h"
@@ -34,6 +35,7 @@
34#include "xfs_quota.h" 35#include "xfs_quota.h"
35#include "xfs_trace.h" 36#include "xfs_trace.h"
36#include "xfs_cksum.h" 37#include "xfs_cksum.h"
38#include "xfs_rmap.h"
37 39
38/* 40/*
39 * Determine the extent state. 41 * Determine the extent state.
@@ -406,11 +408,11 @@ xfs_bmbt_dup_cursor(
406 cur->bc_private.b.ip, cur->bc_private.b.whichfork); 408 cur->bc_private.b.ip, cur->bc_private.b.whichfork);
407 409
408 /* 410 /*
409 * Copy the firstblock, flist, and flags values, 411 * Copy the firstblock, dfops, and flags values,
410 * since init cursor doesn't get them. 412 * since init cursor doesn't get them.
411 */ 413 */
412 new->bc_private.b.firstblock = cur->bc_private.b.firstblock; 414 new->bc_private.b.firstblock = cur->bc_private.b.firstblock;
413 new->bc_private.b.flist = cur->bc_private.b.flist; 415 new->bc_private.b.dfops = cur->bc_private.b.dfops;
414 new->bc_private.b.flags = cur->bc_private.b.flags; 416 new->bc_private.b.flags = cur->bc_private.b.flags;
415 417
416 return new; 418 return new;
@@ -423,7 +425,7 @@ xfs_bmbt_update_cursor(
423{ 425{
424 ASSERT((dst->bc_private.b.firstblock != NULLFSBLOCK) || 426 ASSERT((dst->bc_private.b.firstblock != NULLFSBLOCK) ||
425 (dst->bc_private.b.ip->i_d.di_flags & XFS_DIFLAG_REALTIME)); 427 (dst->bc_private.b.ip->i_d.di_flags & XFS_DIFLAG_REALTIME));
426 ASSERT(dst->bc_private.b.flist == src->bc_private.b.flist); 428 ASSERT(dst->bc_private.b.dfops == src->bc_private.b.dfops);
427 429
428 dst->bc_private.b.allocated += src->bc_private.b.allocated; 430 dst->bc_private.b.allocated += src->bc_private.b.allocated;
429 dst->bc_private.b.firstblock = src->bc_private.b.firstblock; 431 dst->bc_private.b.firstblock = src->bc_private.b.firstblock;
@@ -446,6 +448,8 @@ xfs_bmbt_alloc_block(
446 args.mp = cur->bc_mp; 448 args.mp = cur->bc_mp;
447 args.fsbno = cur->bc_private.b.firstblock; 449 args.fsbno = cur->bc_private.b.firstblock;
448 args.firstblock = args.fsbno; 450 args.firstblock = args.fsbno;
451 xfs_rmap_ino_bmbt_owner(&args.oinfo, cur->bc_private.b.ip->i_ino,
452 cur->bc_private.b.whichfork);
449 453
450 if (args.fsbno == NULLFSBLOCK) { 454 if (args.fsbno == NULLFSBLOCK) {
451 args.fsbno = be64_to_cpu(start->l); 455 args.fsbno = be64_to_cpu(start->l);
@@ -462,7 +466,7 @@ xfs_bmbt_alloc_block(
462 * block allocation here and corrupt the filesystem. 466 * block allocation here and corrupt the filesystem.
463 */ 467 */
464 args.minleft = args.tp->t_blk_res; 468 args.minleft = args.tp->t_blk_res;
465 } else if (cur->bc_private.b.flist->xbf_low) { 469 } else if (cur->bc_private.b.dfops->dop_low) {
466 args.type = XFS_ALLOCTYPE_START_BNO; 470 args.type = XFS_ALLOCTYPE_START_BNO;
467 } else { 471 } else {
468 args.type = XFS_ALLOCTYPE_NEAR_BNO; 472 args.type = XFS_ALLOCTYPE_NEAR_BNO;
@@ -490,7 +494,7 @@ xfs_bmbt_alloc_block(
490 error = xfs_alloc_vextent(&args); 494 error = xfs_alloc_vextent(&args);
491 if (error) 495 if (error)
492 goto error0; 496 goto error0;
493 cur->bc_private.b.flist->xbf_low = 1; 497 cur->bc_private.b.dfops->dop_low = true;
494 } 498 }
495 if (args.fsbno == NULLFSBLOCK) { 499 if (args.fsbno == NULLFSBLOCK) {
496 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 500 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
@@ -525,8 +529,10 @@ xfs_bmbt_free_block(
525 struct xfs_inode *ip = cur->bc_private.b.ip; 529 struct xfs_inode *ip = cur->bc_private.b.ip;
526 struct xfs_trans *tp = cur->bc_tp; 530 struct xfs_trans *tp = cur->bc_tp;
527 xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(bp)); 531 xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(bp));
532 struct xfs_owner_info oinfo;
528 533
529 xfs_bmap_add_free(mp, cur->bc_private.b.flist, fsbno, 1); 534 xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_private.b.whichfork);
535 xfs_bmap_add_free(mp, cur->bc_private.b.dfops, fsbno, 1, &oinfo);
530 ip->i_d.di_nblocks--; 536 ip->i_d.di_nblocks--;
531 537
532 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 538 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
@@ -600,17 +606,6 @@ xfs_bmbt_init_key_from_rec(
600} 606}
601 607
602STATIC void 608STATIC void
603xfs_bmbt_init_rec_from_key(
604 union xfs_btree_key *key,
605 union xfs_btree_rec *rec)
606{
607 ASSERT(key->bmbt.br_startoff != 0);
608
609 xfs_bmbt_disk_set_allf(&rec->bmbt, be64_to_cpu(key->bmbt.br_startoff),
610 0, 0, XFS_EXT_NORM);
611}
612
613STATIC void
614xfs_bmbt_init_rec_from_cur( 609xfs_bmbt_init_rec_from_cur(
615 struct xfs_btree_cur *cur, 610 struct xfs_btree_cur *cur,
616 union xfs_btree_rec *rec) 611 union xfs_btree_rec *rec)
@@ -760,7 +755,6 @@ static const struct xfs_btree_ops xfs_bmbt_ops = {
760 .get_minrecs = xfs_bmbt_get_minrecs, 755 .get_minrecs = xfs_bmbt_get_minrecs,
761 .get_dmaxrecs = xfs_bmbt_get_dmaxrecs, 756 .get_dmaxrecs = xfs_bmbt_get_dmaxrecs,
762 .init_key_from_rec = xfs_bmbt_init_key_from_rec, 757 .init_key_from_rec = xfs_bmbt_init_key_from_rec,
763 .init_rec_from_key = xfs_bmbt_init_rec_from_key,
764 .init_rec_from_cur = xfs_bmbt_init_rec_from_cur, 758 .init_rec_from_cur = xfs_bmbt_init_rec_from_cur,
765 .init_ptr_from_cur = xfs_bmbt_init_ptr_from_cur, 759 .init_ptr_from_cur = xfs_bmbt_init_ptr_from_cur,
766 .key_diff = xfs_bmbt_key_diff, 760 .key_diff = xfs_bmbt_key_diff,
@@ -800,7 +794,7 @@ xfs_bmbt_init_cursor(
800 cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork); 794 cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork);
801 cur->bc_private.b.ip = ip; 795 cur->bc_private.b.ip = ip;
802 cur->bc_private.b.firstblock = NULLFSBLOCK; 796 cur->bc_private.b.firstblock = NULLFSBLOCK;
803 cur->bc_private.b.flist = NULL; 797 cur->bc_private.b.dfops = NULL;
804 cur->bc_private.b.allocated = 0; 798 cur->bc_private.b.allocated = 0;
805 cur->bc_private.b.flags = 0; 799 cur->bc_private.b.flags = 0;
806 cur->bc_private.b.whichfork = whichfork; 800 cur->bc_private.b.whichfork = whichfork;
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 07eeb0b4ca74..b5c213a051cd 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -23,6 +23,7 @@
23#include "xfs_trans_resv.h" 23#include "xfs_trans_resv.h"
24#include "xfs_bit.h" 24#include "xfs_bit.h"
25#include "xfs_mount.h" 25#include "xfs_mount.h"
26#include "xfs_defer.h"
26#include "xfs_inode.h" 27#include "xfs_inode.h"
27#include "xfs_trans.h" 28#include "xfs_trans.h"
28#include "xfs_inode_item.h" 29#include "xfs_inode_item.h"
@@ -43,15 +44,14 @@ kmem_zone_t *xfs_btree_cur_zone;
43 * Btree magic numbers. 44 * Btree magic numbers.
44 */ 45 */
45static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = { 46static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = {
46 { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC, 47 { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, 0, XFS_BMAP_MAGIC, XFS_IBT_MAGIC,
47 XFS_FIBT_MAGIC }, 48 XFS_FIBT_MAGIC },
48 { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, 49 { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, XFS_RMAP_CRC_MAGIC,
49 XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC } 50 XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC }
50}; 51};
51#define xfs_btree_magic(cur) \ 52#define xfs_btree_magic(cur) \
52 xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum] 53 xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum]
53 54
54
55STATIC int /* error (0 or EFSCORRUPTED) */ 55STATIC int /* error (0 or EFSCORRUPTED) */
56xfs_btree_check_lblock( 56xfs_btree_check_lblock(
57 struct xfs_btree_cur *cur, /* btree cursor */ 57 struct xfs_btree_cur *cur, /* btree cursor */
@@ -428,6 +428,50 @@ xfs_btree_dup_cursor(
428 * into a btree block (xfs_btree_*_offset) or return a pointer to the given 428 * into a btree block (xfs_btree_*_offset) or return a pointer to the given
429 * record, key or pointer (xfs_btree_*_addr). Note that all addressing 429 * record, key or pointer (xfs_btree_*_addr). Note that all addressing
430 * inside the btree block is done using indices starting at one, not zero! 430 * inside the btree block is done using indices starting at one, not zero!
431 *
432 * If XFS_BTREE_OVERLAPPING is set, then this btree supports keys containing
433 * overlapping intervals. In such a tree, records are still sorted lowest to
434 * highest and indexed by the smallest key value that refers to the record.
435 * However, nodes are different: each pointer has two associated keys -- one
436 * indexing the lowest key available in the block(s) below (the same behavior
437 * as the key in a regular btree) and another indexing the highest key
438 * available in the block(s) below. Because records are /not/ sorted by the
439 * highest key, all leaf block updates require us to compute the highest key
440 * that matches any record in the leaf and to recursively update the high keys
441 * in the nodes going further up in the tree, if necessary. Nodes look like
442 * this:
443 *
444 * +--------+-----+-----+-----+-----+-----+-------+-------+-----+
445 * Non-Leaf: | header | lo1 | hi1 | lo2 | hi2 | ... | ptr 1 | ptr 2 | ... |
446 * +--------+-----+-----+-----+-----+-----+-------+-------+-----+
447 *
448 * To perform an interval query on an overlapped tree, perform the usual
449 * depth-first search and use the low and high keys to decide if we can skip
450 * that particular node. If a leaf node is reached, return the records that
451 * intersect the interval. Note that an interval query may return numerous
452 * entries. For a non-overlapped tree, simply search for the record associated
453 * with the lowest key and iterate forward until a non-matching record is
454 * found. Section 14.3 ("Interval Trees") of _Introduction to Algorithms_ by
455 * Cormen, Leiserson, Rivest, and Stein (2nd or 3rd ed. only) discuss this in
456 * more detail.
457 *
458 * Why do we care about overlapping intervals? Let's say you have a bunch of
459 * reverse mapping records on a reflink filesystem:
460 *
461 * 1: +- file A startblock B offset C length D -----------+
462 * 2: +- file E startblock F offset G length H --------------+
463 * 3: +- file I startblock F offset J length K --+
464 * 4: +- file L... --+
465 *
466 * Now say we want to map block (B+D) into file A at offset (C+D). Ideally,
467 * we'd simply increment the length of record 1. But how do we find the record
468 * that ends at (B+D-1) (i.e. record 1)? A LE lookup of (B+D-1) would return
469 * record 3 because the keys are ordered first by startblock. An interval
470 * query would return records 1 and 2 because they both overlap (B+D-1), and
471 * from that we can pick out record 1 as the appropriate left neighbor.
472 *
473 * In the non-overlapped case you can do a LE lookup and decrement the cursor
474 * because a record's interval must end before the next record.
431 */ 475 */
432 476
433/* 477/*
@@ -479,6 +523,18 @@ xfs_btree_key_offset(
479} 523}
480 524
481/* 525/*
526 * Calculate offset of the n-th high key in a btree block.
527 */
528STATIC size_t
529xfs_btree_high_key_offset(
530 struct xfs_btree_cur *cur,
531 int n)
532{
533 return xfs_btree_block_len(cur) +
534 (n - 1) * cur->bc_ops->key_len + (cur->bc_ops->key_len / 2);
535}
536
537/*
482 * Calculate offset of the n-th block pointer in a btree block. 538 * Calculate offset of the n-th block pointer in a btree block.
483 */ 539 */
484STATIC size_t 540STATIC size_t
@@ -519,6 +575,19 @@ xfs_btree_key_addr(
519} 575}
520 576
521/* 577/*
578 * Return a pointer to the n-th high key in the btree block.
579 */
580STATIC union xfs_btree_key *
581xfs_btree_high_key_addr(
582 struct xfs_btree_cur *cur,
583 int n,
584 struct xfs_btree_block *block)
585{
586 return (union xfs_btree_key *)
587 ((char *)block + xfs_btree_high_key_offset(cur, n));
588}
589
590/*
522 * Return a pointer to the n-th block pointer in the btree block. 591 * Return a pointer to the n-th block pointer in the btree block.
523 */ 592 */
524STATIC union xfs_btree_ptr * 593STATIC union xfs_btree_ptr *
@@ -1144,6 +1213,9 @@ xfs_btree_set_refs(
1144 case XFS_BTNUM_BMAP: 1213 case XFS_BTNUM_BMAP:
1145 xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF); 1214 xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF);
1146 break; 1215 break;
1216 case XFS_BTNUM_RMAP:
1217 xfs_buf_set_ref(bp, XFS_RMAP_BTREE_REF);
1218 break;
1147 default: 1219 default:
1148 ASSERT(0); 1220 ASSERT(0);
1149 } 1221 }
@@ -1879,32 +1951,214 @@ error0:
1879 return error; 1951 return error;
1880} 1952}
1881 1953
1954/* Find the high key storage area from a regular key. */
1955STATIC union xfs_btree_key *
1956xfs_btree_high_key_from_key(
1957 struct xfs_btree_cur *cur,
1958 union xfs_btree_key *key)
1959{
1960 ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING);
1961 return (union xfs_btree_key *)((char *)key +
1962 (cur->bc_ops->key_len / 2));
1963}
1964
1965/* Determine the low (and high if overlapped) keys of a leaf block */
1966STATIC void
1967xfs_btree_get_leaf_keys(
1968 struct xfs_btree_cur *cur,
1969 struct xfs_btree_block *block,
1970 union xfs_btree_key *key)
1971{
1972 union xfs_btree_key max_hkey;
1973 union xfs_btree_key hkey;
1974 union xfs_btree_rec *rec;
1975 union xfs_btree_key *high;
1976 int n;
1977
1978 rec = xfs_btree_rec_addr(cur, 1, block);
1979 cur->bc_ops->init_key_from_rec(key, rec);
1980
1981 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
1982
1983 cur->bc_ops->init_high_key_from_rec(&max_hkey, rec);
1984 for (n = 2; n <= xfs_btree_get_numrecs(block); n++) {
1985 rec = xfs_btree_rec_addr(cur, n, block);
1986 cur->bc_ops->init_high_key_from_rec(&hkey, rec);
1987 if (cur->bc_ops->diff_two_keys(cur, &hkey, &max_hkey)
1988 > 0)
1989 max_hkey = hkey;
1990 }
1991
1992 high = xfs_btree_high_key_from_key(cur, key);
1993 memcpy(high, &max_hkey, cur->bc_ops->key_len / 2);
1994 }
1995}
1996
1997/* Determine the low (and high if overlapped) keys of a node block */
1998STATIC void
1999xfs_btree_get_node_keys(
2000 struct xfs_btree_cur *cur,
2001 struct xfs_btree_block *block,
2002 union xfs_btree_key *key)
2003{
2004 union xfs_btree_key *hkey;
2005 union xfs_btree_key *max_hkey;
2006 union xfs_btree_key *high;
2007 int n;
2008
2009 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
2010 memcpy(key, xfs_btree_key_addr(cur, 1, block),
2011 cur->bc_ops->key_len / 2);
2012
2013 max_hkey = xfs_btree_high_key_addr(cur, 1, block);
2014 for (n = 2; n <= xfs_btree_get_numrecs(block); n++) {
2015 hkey = xfs_btree_high_key_addr(cur, n, block);
2016 if (cur->bc_ops->diff_two_keys(cur, hkey, max_hkey) > 0)
2017 max_hkey = hkey;
2018 }
2019
2020 high = xfs_btree_high_key_from_key(cur, key);
2021 memcpy(high, max_hkey, cur->bc_ops->key_len / 2);
2022 } else {
2023 memcpy(key, xfs_btree_key_addr(cur, 1, block),
2024 cur->bc_ops->key_len);
2025 }
2026}
2027
2028/* Derive the keys for any btree block. */
2029STATIC void
2030xfs_btree_get_keys(
2031 struct xfs_btree_cur *cur,
2032 struct xfs_btree_block *block,
2033 union xfs_btree_key *key)
2034{
2035 if (be16_to_cpu(block->bb_level) == 0)
2036 xfs_btree_get_leaf_keys(cur, block, key);
2037 else
2038 xfs_btree_get_node_keys(cur, block, key);
2039}
2040
1882/* 2041/*
1883 * Update keys at all levels from here to the root along the cursor's path. 2042 * Decide if we need to update the parent keys of a btree block. For
2043 * a standard btree this is only necessary if we're updating the first
2044 * record/key. For an overlapping btree, we must always update the
2045 * keys because the highest key can be in any of the records or keys
2046 * in the block.
2047 */
2048static inline bool
2049xfs_btree_needs_key_update(
2050 struct xfs_btree_cur *cur,
2051 int ptr)
2052{
2053 return (cur->bc_flags & XFS_BTREE_OVERLAPPING) || ptr == 1;
2054}
2055
2056/*
2057 * Update the low and high parent keys of the given level, progressing
2058 * towards the root. If force_all is false, stop if the keys for a given
2059 * level do not need updating.
1884 */ 2060 */
1885STATIC int 2061STATIC int
1886xfs_btree_updkey( 2062__xfs_btree_updkeys(
2063 struct xfs_btree_cur *cur,
2064 int level,
2065 struct xfs_btree_block *block,
2066 struct xfs_buf *bp0,
2067 bool force_all)
2068{
2069 union xfs_btree_bigkey key; /* keys from current level */
2070 union xfs_btree_key *lkey; /* keys from the next level up */
2071 union xfs_btree_key *hkey;
2072 union xfs_btree_key *nlkey; /* keys from the next level up */
2073 union xfs_btree_key *nhkey;
2074 struct xfs_buf *bp;
2075 int ptr;
2076
2077 ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING);
2078
2079 /* Exit if there aren't any parent levels to update. */
2080 if (level + 1 >= cur->bc_nlevels)
2081 return 0;
2082
2083 trace_xfs_btree_updkeys(cur, level, bp0);
2084
2085 lkey = (union xfs_btree_key *)&key;
2086 hkey = xfs_btree_high_key_from_key(cur, lkey);
2087 xfs_btree_get_keys(cur, block, lkey);
2088 for (level++; level < cur->bc_nlevels; level++) {
2089#ifdef DEBUG
2090 int error;
2091#endif
2092 block = xfs_btree_get_block(cur, level, &bp);
2093 trace_xfs_btree_updkeys(cur, level, bp);
2094#ifdef DEBUG
2095 error = xfs_btree_check_block(cur, block, level, bp);
2096 if (error) {
2097 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2098 return error;
2099 }
2100#endif
2101 ptr = cur->bc_ptrs[level];
2102 nlkey = xfs_btree_key_addr(cur, ptr, block);
2103 nhkey = xfs_btree_high_key_addr(cur, ptr, block);
2104 if (!force_all &&
2105 !(cur->bc_ops->diff_two_keys(cur, nlkey, lkey) != 0 ||
2106 cur->bc_ops->diff_two_keys(cur, nhkey, hkey) != 0))
2107 break;
2108 xfs_btree_copy_keys(cur, nlkey, lkey, 1);
2109 xfs_btree_log_keys(cur, bp, ptr, ptr);
2110 if (level + 1 >= cur->bc_nlevels)
2111 break;
2112 xfs_btree_get_node_keys(cur, block, lkey);
2113 }
2114
2115 return 0;
2116}
2117
2118/* Update all the keys from some level in cursor back to the root. */
2119STATIC int
2120xfs_btree_updkeys_force(
2121 struct xfs_btree_cur *cur,
2122 int level)
2123{
2124 struct xfs_buf *bp;
2125 struct xfs_btree_block *block;
2126
2127 block = xfs_btree_get_block(cur, level, &bp);
2128 return __xfs_btree_updkeys(cur, level, block, bp, true);
2129}
2130
2131/*
2132 * Update the parent keys of the given level, progressing towards the root.
2133 */
2134STATIC int
2135xfs_btree_update_keys(
1887 struct xfs_btree_cur *cur, 2136 struct xfs_btree_cur *cur,
1888 union xfs_btree_key *keyp,
1889 int level) 2137 int level)
1890{ 2138{
1891 struct xfs_btree_block *block; 2139 struct xfs_btree_block *block;
1892 struct xfs_buf *bp; 2140 struct xfs_buf *bp;
1893 union xfs_btree_key *kp; 2141 union xfs_btree_key *kp;
2142 union xfs_btree_key key;
1894 int ptr; 2143 int ptr;
1895 2144
2145 ASSERT(level >= 0);
2146
2147 block = xfs_btree_get_block(cur, level, &bp);
2148 if (cur->bc_flags & XFS_BTREE_OVERLAPPING)
2149 return __xfs_btree_updkeys(cur, level, block, bp, false);
2150
1896 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 2151 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1897 XFS_BTREE_TRACE_ARGIK(cur, level, keyp); 2152 XFS_BTREE_TRACE_ARGIK(cur, level, keyp);
1898 2153
1899 ASSERT(!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) || level >= 1);
1900
1901 /* 2154 /*
1902 * Go up the tree from this level toward the root. 2155 * Go up the tree from this level toward the root.
1903 * At each level, update the key value to the value input. 2156 * At each level, update the key value to the value input.
1904 * Stop when we reach a level where the cursor isn't pointing 2157 * Stop when we reach a level where the cursor isn't pointing
1905 * at the first entry in the block. 2158 * at the first entry in the block.
1906 */ 2159 */
1907 for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) { 2160 xfs_btree_get_keys(cur, block, &key);
2161 for (level++, ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
1908#ifdef DEBUG 2162#ifdef DEBUG
1909 int error; 2163 int error;
1910#endif 2164#endif
@@ -1918,7 +2172,7 @@ xfs_btree_updkey(
1918#endif 2172#endif
1919 ptr = cur->bc_ptrs[level]; 2173 ptr = cur->bc_ptrs[level];
1920 kp = xfs_btree_key_addr(cur, ptr, block); 2174 kp = xfs_btree_key_addr(cur, ptr, block);
1921 xfs_btree_copy_keys(cur, kp, keyp, 1); 2175 xfs_btree_copy_keys(cur, kp, &key, 1);
1922 xfs_btree_log_keys(cur, bp, ptr, ptr); 2176 xfs_btree_log_keys(cur, bp, ptr, ptr);
1923 } 2177 }
1924 2178
@@ -1970,12 +2224,9 @@ xfs_btree_update(
1970 ptr, LASTREC_UPDATE); 2224 ptr, LASTREC_UPDATE);
1971 } 2225 }
1972 2226
1973 /* Updating first rec in leaf. Pass new key value up to our parent. */ 2227 /* Pass new key value up to our parent. */
1974 if (ptr == 1) { 2228 if (xfs_btree_needs_key_update(cur, ptr)) {
1975 union xfs_btree_key key; 2229 error = xfs_btree_update_keys(cur, 0);
1976
1977 cur->bc_ops->init_key_from_rec(&key, rec);
1978 error = xfs_btree_updkey(cur, &key, 1);
1979 if (error) 2230 if (error)
1980 goto error0; 2231 goto error0;
1981 } 2232 }
@@ -1998,18 +2249,19 @@ xfs_btree_lshift(
1998 int level, 2249 int level,
1999 int *stat) /* success/failure */ 2250 int *stat) /* success/failure */
2000{ 2251{
2001 union xfs_btree_key key; /* btree key */
2002 struct xfs_buf *lbp; /* left buffer pointer */ 2252 struct xfs_buf *lbp; /* left buffer pointer */
2003 struct xfs_btree_block *left; /* left btree block */ 2253 struct xfs_btree_block *left; /* left btree block */
2004 int lrecs; /* left record count */ 2254 int lrecs; /* left record count */
2005 struct xfs_buf *rbp; /* right buffer pointer */ 2255 struct xfs_buf *rbp; /* right buffer pointer */
2006 struct xfs_btree_block *right; /* right btree block */ 2256 struct xfs_btree_block *right; /* right btree block */
2257 struct xfs_btree_cur *tcur; /* temporary btree cursor */
2007 int rrecs; /* right record count */ 2258 int rrecs; /* right record count */
2008 union xfs_btree_ptr lptr; /* left btree pointer */ 2259 union xfs_btree_ptr lptr; /* left btree pointer */
2009 union xfs_btree_key *rkp = NULL; /* right btree key */ 2260 union xfs_btree_key *rkp = NULL; /* right btree key */
2010 union xfs_btree_ptr *rpp = NULL; /* right address pointer */ 2261 union xfs_btree_ptr *rpp = NULL; /* right address pointer */
2011 union xfs_btree_rec *rrp = NULL; /* right record pointer */ 2262 union xfs_btree_rec *rrp = NULL; /* right record pointer */
2012 int error; /* error return value */ 2263 int error; /* error return value */
2264 int i;
2013 2265
2014 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 2266 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2015 XFS_BTREE_TRACE_ARGI(cur, level); 2267 XFS_BTREE_TRACE_ARGI(cur, level);
@@ -2139,18 +2391,33 @@ xfs_btree_lshift(
2139 xfs_btree_rec_addr(cur, 2, right), 2391 xfs_btree_rec_addr(cur, 2, right),
2140 -1, rrecs); 2392 -1, rrecs);
2141 xfs_btree_log_recs(cur, rbp, 1, rrecs); 2393 xfs_btree_log_recs(cur, rbp, 1, rrecs);
2394 }
2142 2395
2143 /* 2396 /*
2144 * If it's the first record in the block, we'll need a key 2397 * Using a temporary cursor, update the parent key values of the
2145 * structure to pass up to the next level (updkey). 2398 * block on the left.
2146 */ 2399 */
2147 cur->bc_ops->init_key_from_rec(&key, 2400 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
2148 xfs_btree_rec_addr(cur, 1, right)); 2401 error = xfs_btree_dup_cursor(cur, &tcur);
2149 rkp = &key; 2402 if (error)
2403 goto error0;
2404 i = xfs_btree_firstrec(tcur, level);
2405 XFS_WANT_CORRUPTED_GOTO(tcur->bc_mp, i == 1, error0);
2406
2407 error = xfs_btree_decrement(tcur, level, &i);
2408 if (error)
2409 goto error1;
2410
2411 /* Update the parent high keys of the left block, if needed. */
2412 error = xfs_btree_update_keys(tcur, level);
2413 if (error)
2414 goto error1;
2415
2416 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
2150 } 2417 }
2151 2418
2152 /* Update the parent key values of right. */ 2419 /* Update the parent keys of the right block. */
2153 error = xfs_btree_updkey(cur, rkp, level + 1); 2420 error = xfs_btree_update_keys(cur, level);
2154 if (error) 2421 if (error)
2155 goto error0; 2422 goto error0;
2156 2423
@@ -2169,6 +2436,11 @@ out0:
2169error0: 2436error0:
2170 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 2437 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2171 return error; 2438 return error;
2439
2440error1:
2441 XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR);
2442 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
2443 return error;
2172} 2444}
2173 2445
2174/* 2446/*
@@ -2181,7 +2453,6 @@ xfs_btree_rshift(
2181 int level, 2453 int level,
2182 int *stat) /* success/failure */ 2454 int *stat) /* success/failure */
2183{ 2455{
2184 union xfs_btree_key key; /* btree key */
2185 struct xfs_buf *lbp; /* left buffer pointer */ 2456 struct xfs_buf *lbp; /* left buffer pointer */
2186 struct xfs_btree_block *left; /* left btree block */ 2457 struct xfs_btree_block *left; /* left btree block */
2187 struct xfs_buf *rbp; /* right buffer pointer */ 2458 struct xfs_buf *rbp; /* right buffer pointer */
@@ -2290,12 +2561,6 @@ xfs_btree_rshift(
2290 /* Now put the new data in, and log it. */ 2561 /* Now put the new data in, and log it. */
2291 xfs_btree_copy_recs(cur, rrp, lrp, 1); 2562 xfs_btree_copy_recs(cur, rrp, lrp, 1);
2292 xfs_btree_log_recs(cur, rbp, 1, rrecs + 1); 2563 xfs_btree_log_recs(cur, rbp, 1, rrecs + 1);
2293
2294 cur->bc_ops->init_key_from_rec(&key, rrp);
2295 rkp = &key;
2296
2297 ASSERT(cur->bc_ops->recs_inorder(cur, rrp,
2298 xfs_btree_rec_addr(cur, 2, right)));
2299 } 2564 }
2300 2565
2301 /* 2566 /*
@@ -2315,13 +2580,21 @@ xfs_btree_rshift(
2315 if (error) 2580 if (error)
2316 goto error0; 2581 goto error0;
2317 i = xfs_btree_lastrec(tcur, level); 2582 i = xfs_btree_lastrec(tcur, level);
2318 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 2583 XFS_WANT_CORRUPTED_GOTO(tcur->bc_mp, i == 1, error0);
2319 2584
2320 error = xfs_btree_increment(tcur, level, &i); 2585 error = xfs_btree_increment(tcur, level, &i);
2321 if (error) 2586 if (error)
2322 goto error1; 2587 goto error1;
2323 2588
2324 error = xfs_btree_updkey(tcur, rkp, level + 1); 2589 /* Update the parent high keys of the left block, if needed. */
2590 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
2591 error = xfs_btree_update_keys(cur, level);
2592 if (error)
2593 goto error1;
2594 }
2595
2596 /* Update the parent keys of the right block. */
2597 error = xfs_btree_update_keys(tcur, level);
2325 if (error) 2598 if (error)
2326 goto error1; 2599 goto error1;
2327 2600
@@ -2422,6 +2695,11 @@ __xfs_btree_split(
2422 2695
2423 XFS_BTREE_STATS_ADD(cur, moves, rrecs); 2696 XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2424 2697
2698 /* Adjust numrecs for the later get_*_keys() calls. */
2699 lrecs -= rrecs;
2700 xfs_btree_set_numrecs(left, lrecs);
2701 xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs);
2702
2425 /* 2703 /*
2426 * Copy btree block entries from the left block over to the 2704 * Copy btree block entries from the left block over to the
2427 * new block, the right. Update the right block and log the 2705 * new block, the right. Update the right block and log the
@@ -2447,14 +2725,15 @@ __xfs_btree_split(
2447 } 2725 }
2448#endif 2726#endif
2449 2727
2728 /* Copy the keys & pointers to the new block. */
2450 xfs_btree_copy_keys(cur, rkp, lkp, rrecs); 2729 xfs_btree_copy_keys(cur, rkp, lkp, rrecs);
2451 xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs); 2730 xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs);
2452 2731
2453 xfs_btree_log_keys(cur, rbp, 1, rrecs); 2732 xfs_btree_log_keys(cur, rbp, 1, rrecs);
2454 xfs_btree_log_ptrs(cur, rbp, 1, rrecs); 2733 xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
2455 2734
2456 /* Grab the keys to the entries moved to the right block */ 2735 /* Stash the keys of the new block for later insertion. */
2457 xfs_btree_copy_keys(cur, key, rkp, 1); 2736 xfs_btree_get_node_keys(cur, right, key);
2458 } else { 2737 } else {
2459 /* It's a leaf. Move records. */ 2738 /* It's a leaf. Move records. */
2460 union xfs_btree_rec *lrp; /* left record pointer */ 2739 union xfs_btree_rec *lrp; /* left record pointer */
@@ -2463,27 +2742,23 @@ __xfs_btree_split(
2463 lrp = xfs_btree_rec_addr(cur, src_index, left); 2742 lrp = xfs_btree_rec_addr(cur, src_index, left);
2464 rrp = xfs_btree_rec_addr(cur, 1, right); 2743 rrp = xfs_btree_rec_addr(cur, 1, right);
2465 2744
2745 /* Copy records to the new block. */
2466 xfs_btree_copy_recs(cur, rrp, lrp, rrecs); 2746 xfs_btree_copy_recs(cur, rrp, lrp, rrecs);
2467 xfs_btree_log_recs(cur, rbp, 1, rrecs); 2747 xfs_btree_log_recs(cur, rbp, 1, rrecs);
2468 2748
2469 cur->bc_ops->init_key_from_rec(key, 2749 /* Stash the keys of the new block for later insertion. */
2470 xfs_btree_rec_addr(cur, 1, right)); 2750 xfs_btree_get_leaf_keys(cur, right, key);
2471 } 2751 }
2472 2752
2473
2474 /* 2753 /*
2475 * Find the left block number by looking in the buffer. 2754 * Find the left block number by looking in the buffer.
2476 * Adjust numrecs, sibling pointers. 2755 * Adjust sibling pointers.
2477 */ 2756 */
2478 xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB); 2757 xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB);
2479 xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB); 2758 xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB);
2480 xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); 2759 xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2481 xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB); 2760 xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2482 2761
2483 lrecs -= rrecs;
2484 xfs_btree_set_numrecs(left, lrecs);
2485 xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs);
2486
2487 xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS); 2762 xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS);
2488 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); 2763 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
2489 2764
@@ -2499,6 +2774,14 @@ __xfs_btree_split(
2499 xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB); 2774 xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB);
2500 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB); 2775 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
2501 } 2776 }
2777
2778 /* Update the parent high keys of the left block, if needed. */
2779 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
2780 error = xfs_btree_update_keys(cur, level);
2781 if (error)
2782 goto error0;
2783 }
2784
2502 /* 2785 /*
2503 * If the cursor is really in the right block, move it there. 2786 * If the cursor is really in the right block, move it there.
2504 * If it's just pointing past the last entry in left, then we'll 2787 * If it's just pointing past the last entry in left, then we'll
@@ -2802,6 +3085,7 @@ xfs_btree_new_root(
2802 bp = lbp; 3085 bp = lbp;
2803 nptr = 2; 3086 nptr = 2;
2804 } 3087 }
3088
2805 /* Fill in the new block's btree header and log it. */ 3089 /* Fill in the new block's btree header and log it. */
2806 xfs_btree_init_block_cur(cur, nbp, cur->bc_nlevels, 2); 3090 xfs_btree_init_block_cur(cur, nbp, cur->bc_nlevels, 2);
2807 xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS); 3091 xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS);
@@ -2810,19 +3094,24 @@ xfs_btree_new_root(
2810 3094
2811 /* Fill in the key data in the new root. */ 3095 /* Fill in the key data in the new root. */
2812 if (xfs_btree_get_level(left) > 0) { 3096 if (xfs_btree_get_level(left) > 0) {
2813 xfs_btree_copy_keys(cur, 3097 /*
2814 xfs_btree_key_addr(cur, 1, new), 3098 * Get the keys for the left block's keys and put them directly
2815 xfs_btree_key_addr(cur, 1, left), 1); 3099 * in the parent block. Do the same for the right block.
2816 xfs_btree_copy_keys(cur, 3100 */
2817 xfs_btree_key_addr(cur, 2, new), 3101 xfs_btree_get_node_keys(cur, left,
2818 xfs_btree_key_addr(cur, 1, right), 1); 3102 xfs_btree_key_addr(cur, 1, new));
3103 xfs_btree_get_node_keys(cur, right,
3104 xfs_btree_key_addr(cur, 2, new));
2819 } else { 3105 } else {
2820 cur->bc_ops->init_key_from_rec( 3106 /*
2821 xfs_btree_key_addr(cur, 1, new), 3107 * Get the keys for the left block's records and put them
2822 xfs_btree_rec_addr(cur, 1, left)); 3108 * directly in the parent block. Do the same for the right
2823 cur->bc_ops->init_key_from_rec( 3109 * block.
2824 xfs_btree_key_addr(cur, 2, new), 3110 */
2825 xfs_btree_rec_addr(cur, 1, right)); 3111 xfs_btree_get_leaf_keys(cur, left,
3112 xfs_btree_key_addr(cur, 1, new));
3113 xfs_btree_get_leaf_keys(cur, right,
3114 xfs_btree_key_addr(cur, 2, new));
2826 } 3115 }
2827 xfs_btree_log_keys(cur, nbp, 1, 2); 3116 xfs_btree_log_keys(cur, nbp, 1, 2);
2828 3117
@@ -2858,10 +3147,9 @@ xfs_btree_make_block_unfull(
2858 int *index, /* new tree index */ 3147 int *index, /* new tree index */
2859 union xfs_btree_ptr *nptr, /* new btree ptr */ 3148 union xfs_btree_ptr *nptr, /* new btree ptr */
2860 struct xfs_btree_cur **ncur, /* new btree cursor */ 3149 struct xfs_btree_cur **ncur, /* new btree cursor */
2861 union xfs_btree_rec *nrec, /* new record */ 3150 union xfs_btree_key *key, /* key of new block */
2862 int *stat) 3151 int *stat)
2863{ 3152{
2864 union xfs_btree_key key; /* new btree key value */
2865 int error = 0; 3153 int error = 0;
2866 3154
2867 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 3155 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
@@ -2871,6 +3159,7 @@ xfs_btree_make_block_unfull(
2871 if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) { 3159 if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) {
2872 /* A root block that can be made bigger. */ 3160 /* A root block that can be made bigger. */
2873 xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork); 3161 xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork);
3162 *stat = 1;
2874 } else { 3163 } else {
2875 /* A root block that needs replacing */ 3164 /* A root block that needs replacing */
2876 int logflags = 0; 3165 int logflags = 0;
@@ -2906,13 +3195,12 @@ xfs_btree_make_block_unfull(
2906 * If this works we have to re-set our variables because we 3195 * If this works we have to re-set our variables because we
2907 * could be in a different block now. 3196 * could be in a different block now.
2908 */ 3197 */
2909 error = xfs_btree_split(cur, level, nptr, &key, ncur, stat); 3198 error = xfs_btree_split(cur, level, nptr, key, ncur, stat);
2910 if (error || *stat == 0) 3199 if (error || *stat == 0)
2911 return error; 3200 return error;
2912 3201
2913 3202
2914 *index = cur->bc_ptrs[level]; 3203 *index = cur->bc_ptrs[level];
2915 cur->bc_ops->init_rec_from_key(&key, nrec);
2916 return 0; 3204 return 0;
2917} 3205}
2918 3206
@@ -2925,16 +3213,17 @@ xfs_btree_insrec(
2925 struct xfs_btree_cur *cur, /* btree cursor */ 3213 struct xfs_btree_cur *cur, /* btree cursor */
2926 int level, /* level to insert record at */ 3214 int level, /* level to insert record at */
2927 union xfs_btree_ptr *ptrp, /* i/o: block number inserted */ 3215 union xfs_btree_ptr *ptrp, /* i/o: block number inserted */
2928 union xfs_btree_rec *recp, /* i/o: record data inserted */ 3216 union xfs_btree_rec *rec, /* record to insert */
3217 union xfs_btree_key *key, /* i/o: block key for ptrp */
2929 struct xfs_btree_cur **curp, /* output: new cursor replacing cur */ 3218 struct xfs_btree_cur **curp, /* output: new cursor replacing cur */
2930 int *stat) /* success/failure */ 3219 int *stat) /* success/failure */
2931{ 3220{
2932 struct xfs_btree_block *block; /* btree block */ 3221 struct xfs_btree_block *block; /* btree block */
2933 struct xfs_buf *bp; /* buffer for block */ 3222 struct xfs_buf *bp; /* buffer for block */
2934 union xfs_btree_key key; /* btree key */
2935 union xfs_btree_ptr nptr; /* new block ptr */ 3223 union xfs_btree_ptr nptr; /* new block ptr */
2936 struct xfs_btree_cur *ncur; /* new btree cursor */ 3224 struct xfs_btree_cur *ncur; /* new btree cursor */
2937 union xfs_btree_rec nrec; /* new record count */ 3225 union xfs_btree_bigkey nkey; /* new block key */
3226 union xfs_btree_key *lkey;
2938 int optr; /* old key/record index */ 3227 int optr; /* old key/record index */
2939 int ptr; /* key/record index */ 3228 int ptr; /* key/record index */
2940 int numrecs;/* number of records */ 3229 int numrecs;/* number of records */
@@ -2942,11 +3231,13 @@ xfs_btree_insrec(
2942#ifdef DEBUG 3231#ifdef DEBUG
2943 int i; 3232 int i;
2944#endif 3233#endif
3234 xfs_daddr_t old_bn;
2945 3235
2946 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 3236 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2947 XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, recp); 3237 XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, &rec);
2948 3238
2949 ncur = NULL; 3239 ncur = NULL;
3240 lkey = (union xfs_btree_key *)&nkey;
2950 3241
2951 /* 3242 /*
2952 * If we have an external root pointer, and we've made it to the 3243 * If we have an external root pointer, and we've made it to the
@@ -2969,15 +3260,13 @@ xfs_btree_insrec(
2969 return 0; 3260 return 0;
2970 } 3261 }
2971 3262
2972 /* Make a key out of the record data to be inserted, and save it. */
2973 cur->bc_ops->init_key_from_rec(&key, recp);
2974
2975 optr = ptr; 3263 optr = ptr;
2976 3264
2977 XFS_BTREE_STATS_INC(cur, insrec); 3265 XFS_BTREE_STATS_INC(cur, insrec);
2978 3266
2979 /* Get pointers to the btree buffer and block. */ 3267 /* Get pointers to the btree buffer and block. */
2980 block = xfs_btree_get_block(cur, level, &bp); 3268 block = xfs_btree_get_block(cur, level, &bp);
3269 old_bn = bp ? bp->b_bn : XFS_BUF_DADDR_NULL;
2981 numrecs = xfs_btree_get_numrecs(block); 3270 numrecs = xfs_btree_get_numrecs(block);
2982 3271
2983#ifdef DEBUG 3272#ifdef DEBUG
@@ -2988,10 +3277,10 @@ xfs_btree_insrec(
2988 /* Check that the new entry is being inserted in the right place. */ 3277 /* Check that the new entry is being inserted in the right place. */
2989 if (ptr <= numrecs) { 3278 if (ptr <= numrecs) {
2990 if (level == 0) { 3279 if (level == 0) {
2991 ASSERT(cur->bc_ops->recs_inorder(cur, recp, 3280 ASSERT(cur->bc_ops->recs_inorder(cur, rec,
2992 xfs_btree_rec_addr(cur, ptr, block))); 3281 xfs_btree_rec_addr(cur, ptr, block)));
2993 } else { 3282 } else {
2994 ASSERT(cur->bc_ops->keys_inorder(cur, &key, 3283 ASSERT(cur->bc_ops->keys_inorder(cur, key,
2995 xfs_btree_key_addr(cur, ptr, block))); 3284 xfs_btree_key_addr(cur, ptr, block)));
2996 } 3285 }
2997 } 3286 }
@@ -3004,7 +3293,7 @@ xfs_btree_insrec(
3004 xfs_btree_set_ptr_null(cur, &nptr); 3293 xfs_btree_set_ptr_null(cur, &nptr);
3005 if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) { 3294 if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) {
3006 error = xfs_btree_make_block_unfull(cur, level, numrecs, 3295 error = xfs_btree_make_block_unfull(cur, level, numrecs,
3007 &optr, &ptr, &nptr, &ncur, &nrec, stat); 3296 &optr, &ptr, &nptr, &ncur, lkey, stat);
3008 if (error || *stat == 0) 3297 if (error || *stat == 0)
3009 goto error0; 3298 goto error0;
3010 } 3299 }
@@ -3054,7 +3343,7 @@ xfs_btree_insrec(
3054#endif 3343#endif
3055 3344
3056 /* Now put the new data in, bump numrecs and log it. */ 3345 /* Now put the new data in, bump numrecs and log it. */
3057 xfs_btree_copy_keys(cur, kp, &key, 1); 3346 xfs_btree_copy_keys(cur, kp, key, 1);
3058 xfs_btree_copy_ptrs(cur, pp, ptrp, 1); 3347 xfs_btree_copy_ptrs(cur, pp, ptrp, 1);
3059 numrecs++; 3348 numrecs++;
3060 xfs_btree_set_numrecs(block, numrecs); 3349 xfs_btree_set_numrecs(block, numrecs);
@@ -3075,7 +3364,7 @@ xfs_btree_insrec(
3075 xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1); 3364 xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1);
3076 3365
3077 /* Now put the new data in, bump numrecs and log it. */ 3366 /* Now put the new data in, bump numrecs and log it. */
3078 xfs_btree_copy_recs(cur, rp, recp, 1); 3367 xfs_btree_copy_recs(cur, rp, rec, 1);
3079 xfs_btree_set_numrecs(block, ++numrecs); 3368 xfs_btree_set_numrecs(block, ++numrecs);
3080 xfs_btree_log_recs(cur, bp, ptr, numrecs); 3369 xfs_btree_log_recs(cur, bp, ptr, numrecs);
3081#ifdef DEBUG 3370#ifdef DEBUG
@@ -3089,9 +3378,18 @@ xfs_btree_insrec(
3089 /* Log the new number of records in the btree header. */ 3378 /* Log the new number of records in the btree header. */
3090 xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS); 3379 xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
3091 3380
3092 /* If we inserted at the start of a block, update the parents' keys. */ 3381 /*
3093 if (optr == 1) { 3382 * If we just inserted into a new tree block, we have to
3094 error = xfs_btree_updkey(cur, &key, level + 1); 3383 * recalculate nkey here because nkey is out of date.
3384 *
3385 * Otherwise we're just updating an existing block (having shoved
3386 * some records into the new tree block), so use the regular key
3387 * update mechanism.
3388 */
3389 if (bp && bp->b_bn != old_bn) {
3390 xfs_btree_get_keys(cur, block, lkey);
3391 } else if (xfs_btree_needs_key_update(cur, optr)) {
3392 error = xfs_btree_update_keys(cur, level);
3095 if (error) 3393 if (error)
3096 goto error0; 3394 goto error0;
3097 } 3395 }
@@ -3101,7 +3399,7 @@ xfs_btree_insrec(
3101 * we are at the far right edge of the tree, update it. 3399 * we are at the far right edge of the tree, update it.
3102 */ 3400 */
3103 if (xfs_btree_is_lastrec(cur, block, level)) { 3401 if (xfs_btree_is_lastrec(cur, block, level)) {
3104 cur->bc_ops->update_lastrec(cur, block, recp, 3402 cur->bc_ops->update_lastrec(cur, block, rec,
3105 ptr, LASTREC_INSREC); 3403 ptr, LASTREC_INSREC);
3106 } 3404 }
3107 3405
@@ -3111,7 +3409,7 @@ xfs_btree_insrec(
3111 */ 3409 */
3112 *ptrp = nptr; 3410 *ptrp = nptr;
3113 if (!xfs_btree_ptr_is_null(cur, &nptr)) { 3411 if (!xfs_btree_ptr_is_null(cur, &nptr)) {
3114 *recp = nrec; 3412 xfs_btree_copy_keys(cur, key, lkey, 1);
3115 *curp = ncur; 3413 *curp = ncur;
3116 } 3414 }
3117 3415
@@ -3142,14 +3440,20 @@ xfs_btree_insert(
3142 union xfs_btree_ptr nptr; /* new block number (split result) */ 3440 union xfs_btree_ptr nptr; /* new block number (split result) */
3143 struct xfs_btree_cur *ncur; /* new cursor (split result) */ 3441 struct xfs_btree_cur *ncur; /* new cursor (split result) */
3144 struct xfs_btree_cur *pcur; /* previous level's cursor */ 3442 struct xfs_btree_cur *pcur; /* previous level's cursor */
3443 union xfs_btree_bigkey bkey; /* key of block to insert */
3444 union xfs_btree_key *key;
3145 union xfs_btree_rec rec; /* record to insert */ 3445 union xfs_btree_rec rec; /* record to insert */
3146 3446
3147 level = 0; 3447 level = 0;
3148 ncur = NULL; 3448 ncur = NULL;
3149 pcur = cur; 3449 pcur = cur;
3450 key = (union xfs_btree_key *)&bkey;
3150 3451
3151 xfs_btree_set_ptr_null(cur, &nptr); 3452 xfs_btree_set_ptr_null(cur, &nptr);
3453
3454 /* Make a key out of the record data to be inserted, and save it. */
3152 cur->bc_ops->init_rec_from_cur(cur, &rec); 3455 cur->bc_ops->init_rec_from_cur(cur, &rec);
3456 cur->bc_ops->init_key_from_rec(key, &rec);
3153 3457
3154 /* 3458 /*
3155 * Loop going up the tree, starting at the leaf level. 3459 * Loop going up the tree, starting at the leaf level.
@@ -3161,7 +3465,8 @@ xfs_btree_insert(
3161 * Insert nrec/nptr into this level of the tree. 3465 * Insert nrec/nptr into this level of the tree.
3162 * Note if we fail, nptr will be null. 3466 * Note if we fail, nptr will be null.
3163 */ 3467 */
3164 error = xfs_btree_insrec(pcur, level, &nptr, &rec, &ncur, &i); 3468 error = xfs_btree_insrec(pcur, level, &nptr, &rec, key,
3469 &ncur, &i);
3165 if (error) { 3470 if (error) {
3166 if (pcur != cur) 3471 if (pcur != cur)
3167 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR); 3472 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
@@ -3385,8 +3690,6 @@ xfs_btree_delrec(
3385 struct xfs_buf *bp; /* buffer for block */ 3690 struct xfs_buf *bp; /* buffer for block */
3386 int error; /* error return value */ 3691 int error; /* error return value */
3387 int i; /* loop counter */ 3692 int i; /* loop counter */
3388 union xfs_btree_key key; /* storage for keyp */
3389 union xfs_btree_key *keyp = &key; /* passed to the next level */
3390 union xfs_btree_ptr lptr; /* left sibling block ptr */ 3693 union xfs_btree_ptr lptr; /* left sibling block ptr */
3391 struct xfs_buf *lbp; /* left buffer pointer */ 3694 struct xfs_buf *lbp; /* left buffer pointer */
3392 struct xfs_btree_block *left; /* left btree block */ 3695 struct xfs_btree_block *left; /* left btree block */
@@ -3457,13 +3760,6 @@ xfs_btree_delrec(
3457 xfs_btree_log_keys(cur, bp, ptr, numrecs - 1); 3760 xfs_btree_log_keys(cur, bp, ptr, numrecs - 1);
3458 xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1); 3761 xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1);
3459 } 3762 }
3460
3461 /*
3462 * If it's the first record in the block, we'll need to pass a
3463 * key up to the next level (updkey).
3464 */
3465 if (ptr == 1)
3466 keyp = xfs_btree_key_addr(cur, 1, block);
3467 } else { 3763 } else {
3468 /* It's a leaf. operate on records */ 3764 /* It's a leaf. operate on records */
3469 if (ptr < numrecs) { 3765 if (ptr < numrecs) {
@@ -3472,16 +3768,6 @@ xfs_btree_delrec(
3472 -1, numrecs - ptr); 3768 -1, numrecs - ptr);
3473 xfs_btree_log_recs(cur, bp, ptr, numrecs - 1); 3769 xfs_btree_log_recs(cur, bp, ptr, numrecs - 1);
3474 } 3770 }
3475
3476 /*
3477 * If it's the first record in the block, we'll need a key
3478 * structure to pass up to the next level (updkey).
3479 */
3480 if (ptr == 1) {
3481 cur->bc_ops->init_key_from_rec(&key,
3482 xfs_btree_rec_addr(cur, 1, block));
3483 keyp = &key;
3484 }
3485 } 3771 }
3486 3772
3487 /* 3773 /*
@@ -3548,8 +3834,8 @@ xfs_btree_delrec(
3548 * If we deleted the leftmost entry in the block, update the 3834 * If we deleted the leftmost entry in the block, update the
3549 * key values above us in the tree. 3835 * key values above us in the tree.
3550 */ 3836 */
3551 if (ptr == 1) { 3837 if (xfs_btree_needs_key_update(cur, ptr)) {
3552 error = xfs_btree_updkey(cur, keyp, level + 1); 3838 error = xfs_btree_update_keys(cur, level);
3553 if (error) 3839 if (error)
3554 goto error0; 3840 goto error0;
3555 } 3841 }
@@ -3878,6 +4164,16 @@ xfs_btree_delrec(
3878 if (level > 0) 4164 if (level > 0)
3879 cur->bc_ptrs[level]--; 4165 cur->bc_ptrs[level]--;
3880 4166
4167 /*
4168 * We combined blocks, so we have to update the parent keys if the
4169 * btree supports overlapped intervals. However, bc_ptrs[level + 1]
4170 * points to the old block so that the caller knows which record to
4171 * delete. Therefore, the caller must be savvy enough to call updkeys
4172 * for us if we return stat == 2. The other exit points from this
4173 * function don't require deletions further up the tree, so they can
4174 * call updkeys directly.
4175 */
4176
3881 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 4177 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3882 /* Return value means the next level up has something to do. */ 4178 /* Return value means the next level up has something to do. */
3883 *stat = 2; 4179 *stat = 2;
@@ -3903,6 +4199,7 @@ xfs_btree_delete(
3903 int error; /* error return value */ 4199 int error; /* error return value */
3904 int level; 4200 int level;
3905 int i; 4201 int i;
4202 bool joined = false;
3906 4203
3907 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 4204 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3908 4205
@@ -3916,6 +4213,18 @@ xfs_btree_delete(
3916 error = xfs_btree_delrec(cur, level, &i); 4213 error = xfs_btree_delrec(cur, level, &i);
3917 if (error) 4214 if (error)
3918 goto error0; 4215 goto error0;
4216 if (i == 2)
4217 joined = true;
4218 }
4219
4220 /*
4221 * If we combined blocks as part of deleting the record, delrec won't
4222 * have updated the parent high keys so we have to do that here.
4223 */
4224 if (joined && (cur->bc_flags & XFS_BTREE_OVERLAPPING)) {
4225 error = xfs_btree_updkeys_force(cur, 0);
4226 if (error)
4227 goto error0;
3919 } 4228 }
3920 4229
3921 if (i == 0) { 4230 if (i == 0) {
@@ -3978,6 +4287,81 @@ xfs_btree_get_rec(
3978 return 0; 4287 return 0;
3979} 4288}
3980 4289
4290/* Visit a block in a btree. */
4291STATIC int
4292xfs_btree_visit_block(
4293 struct xfs_btree_cur *cur,
4294 int level,
4295 xfs_btree_visit_blocks_fn fn,
4296 void *data)
4297{
4298 struct xfs_btree_block *block;
4299 struct xfs_buf *bp;
4300 union xfs_btree_ptr rptr;
4301 int error;
4302
4303 /* do right sibling readahead */
4304 xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
4305 block = xfs_btree_get_block(cur, level, &bp);
4306
4307 /* process the block */
4308 error = fn(cur, level, data);
4309 if (error)
4310 return error;
4311
4312 /* now read rh sibling block for next iteration */
4313 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
4314 if (xfs_btree_ptr_is_null(cur, &rptr))
4315 return -ENOENT;
4316
4317 return xfs_btree_lookup_get_block(cur, level, &rptr, &block);
4318}
4319
4320
4321/* Visit every block in a btree. */
4322int
4323xfs_btree_visit_blocks(
4324 struct xfs_btree_cur *cur,
4325 xfs_btree_visit_blocks_fn fn,
4326 void *data)
4327{
4328 union xfs_btree_ptr lptr;
4329 int level;
4330 struct xfs_btree_block *block = NULL;
4331 int error = 0;
4332
4333 cur->bc_ops->init_ptr_from_cur(cur, &lptr);
4334
4335 /* for each level */
4336 for (level = cur->bc_nlevels - 1; level >= 0; level--) {
4337 /* grab the left hand block */
4338 error = xfs_btree_lookup_get_block(cur, level, &lptr, &block);
4339 if (error)
4340 return error;
4341
4342 /* readahead the left most block for the next level down */
4343 if (level > 0) {
4344 union xfs_btree_ptr *ptr;
4345
4346 ptr = xfs_btree_ptr_addr(cur, 1, block);
4347 xfs_btree_readahead_ptr(cur, ptr, 1);
4348
4349 /* save for the next iteration of the loop */
4350 lptr = *ptr;
4351 }
4352
4353 /* for each buffer in the level */
4354 do {
4355 error = xfs_btree_visit_block(cur, level, fn, data);
4356 } while (!error);
4357
4358 if (error != -ENOENT)
4359 return error;
4360 }
4361
4362 return 0;
4363}
4364
3981/* 4365/*
3982 * Change the owner of a btree. 4366 * Change the owner of a btree.
3983 * 4367 *
@@ -4002,26 +4386,27 @@ xfs_btree_get_rec(
4002 * just queue the modified buffer as delayed write buffer so the transaction 4386 * just queue the modified buffer as delayed write buffer so the transaction
4003 * recovery completion writes the changes to disk. 4387 * recovery completion writes the changes to disk.
4004 */ 4388 */
4389struct xfs_btree_block_change_owner_info {
4390 __uint64_t new_owner;
4391 struct list_head *buffer_list;
4392};
4393
4005static int 4394static int
4006xfs_btree_block_change_owner( 4395xfs_btree_block_change_owner(
4007 struct xfs_btree_cur *cur, 4396 struct xfs_btree_cur *cur,
4008 int level, 4397 int level,
4009 __uint64_t new_owner, 4398 void *data)
4010 struct list_head *buffer_list)
4011{ 4399{
4400 struct xfs_btree_block_change_owner_info *bbcoi = data;
4012 struct xfs_btree_block *block; 4401 struct xfs_btree_block *block;
4013 struct xfs_buf *bp; 4402 struct xfs_buf *bp;
4014 union xfs_btree_ptr rptr;
4015
4016 /* do right sibling readahead */
4017 xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
4018 4403
4019 /* modify the owner */ 4404 /* modify the owner */
4020 block = xfs_btree_get_block(cur, level, &bp); 4405 block = xfs_btree_get_block(cur, level, &bp);
4021 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 4406 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
4022 block->bb_u.l.bb_owner = cpu_to_be64(new_owner); 4407 block->bb_u.l.bb_owner = cpu_to_be64(bbcoi->new_owner);
4023 else 4408 else
4024 block->bb_u.s.bb_owner = cpu_to_be32(new_owner); 4409 block->bb_u.s.bb_owner = cpu_to_be32(bbcoi->new_owner);
4025 4410
4026 /* 4411 /*
4027 * If the block is a root block hosted in an inode, we might not have a 4412 * If the block is a root block hosted in an inode, we might not have a
@@ -4035,19 +4420,14 @@ xfs_btree_block_change_owner(
4035 xfs_trans_ordered_buf(cur->bc_tp, bp); 4420 xfs_trans_ordered_buf(cur->bc_tp, bp);
4036 xfs_btree_log_block(cur, bp, XFS_BB_OWNER); 4421 xfs_btree_log_block(cur, bp, XFS_BB_OWNER);
4037 } else { 4422 } else {
4038 xfs_buf_delwri_queue(bp, buffer_list); 4423 xfs_buf_delwri_queue(bp, bbcoi->buffer_list);
4039 } 4424 }
4040 } else { 4425 } else {
4041 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); 4426 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
4042 ASSERT(level == cur->bc_nlevels - 1); 4427 ASSERT(level == cur->bc_nlevels - 1);
4043 } 4428 }
4044 4429
4045 /* now read rh sibling block for next iteration */ 4430 return 0;
4046 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
4047 if (xfs_btree_ptr_is_null(cur, &rptr))
4048 return -ENOENT;
4049
4050 return xfs_btree_lookup_get_block(cur, level, &rptr, &block);
4051} 4431}
4052 4432
4053int 4433int
@@ -4056,43 +4436,13 @@ xfs_btree_change_owner(
4056 __uint64_t new_owner, 4436 __uint64_t new_owner,
4057 struct list_head *buffer_list) 4437 struct list_head *buffer_list)
4058{ 4438{
4059 union xfs_btree_ptr lptr; 4439 struct xfs_btree_block_change_owner_info bbcoi;
4060 int level;
4061 struct xfs_btree_block *block = NULL;
4062 int error = 0;
4063
4064 cur->bc_ops->init_ptr_from_cur(cur, &lptr);
4065
4066 /* for each level */
4067 for (level = cur->bc_nlevels - 1; level >= 0; level--) {
4068 /* grab the left hand block */
4069 error = xfs_btree_lookup_get_block(cur, level, &lptr, &block);
4070 if (error)
4071 return error;
4072
4073 /* readahead the left most block for the next level down */
4074 if (level > 0) {
4075 union xfs_btree_ptr *ptr;
4076
4077 ptr = xfs_btree_ptr_addr(cur, 1, block);
4078 xfs_btree_readahead_ptr(cur, ptr, 1);
4079
4080 /* save for the next iteration of the loop */
4081 lptr = *ptr;
4082 }
4083
4084 /* for each buffer in the level */
4085 do {
4086 error = xfs_btree_block_change_owner(cur, level,
4087 new_owner,
4088 buffer_list);
4089 } while (!error);
4090 4440
4091 if (error != -ENOENT) 4441 bbcoi.new_owner = new_owner;
4092 return error; 4442 bbcoi.buffer_list = buffer_list;
4093 }
4094 4443
4095 return 0; 4444 return xfs_btree_visit_blocks(cur, xfs_btree_block_change_owner,
4445 &bbcoi);
4096} 4446}
4097 4447
4098/** 4448/**
@@ -4171,3 +4521,267 @@ xfs_btree_compute_maxlevels(
4171 maxblocks = (maxblocks + limits[1] - 1) / limits[1]; 4521 maxblocks = (maxblocks + limits[1] - 1) / limits[1];
4172 return level; 4522 return level;
4173} 4523}
4524
4525/*
4526 * Query a regular btree for all records overlapping a given interval.
4527 * Start with a LE lookup of the key of low_rec and return all records
4528 * until we find a record with a key greater than the key of high_rec.
4529 */
4530STATIC int
4531xfs_btree_simple_query_range(
4532 struct xfs_btree_cur *cur,
4533 union xfs_btree_key *low_key,
4534 union xfs_btree_key *high_key,
4535 xfs_btree_query_range_fn fn,
4536 void *priv)
4537{
4538 union xfs_btree_rec *recp;
4539 union xfs_btree_key rec_key;
4540 __int64_t diff;
4541 int stat;
4542 bool firstrec = true;
4543 int error;
4544
4545 ASSERT(cur->bc_ops->init_high_key_from_rec);
4546 ASSERT(cur->bc_ops->diff_two_keys);
4547
4548 /*
4549 * Find the leftmost record. The btree cursor must be set
4550 * to the low record used to generate low_key.
4551 */
4552 stat = 0;
4553 error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, &stat);
4554 if (error)
4555 goto out;
4556
4557 while (stat) {
4558 /* Find the record. */
4559 error = xfs_btree_get_rec(cur, &recp, &stat);
4560 if (error || !stat)
4561 break;
4562 cur->bc_ops->init_high_key_from_rec(&rec_key, recp);
4563
4564 /* Skip if high_key(rec) < low_key. */
4565 if (firstrec) {
4566 firstrec = false;
4567 diff = cur->bc_ops->diff_two_keys(cur, low_key,
4568 &rec_key);
4569 if (diff > 0)
4570 goto advloop;
4571 }
4572
4573 /* Stop if high_key < low_key(rec). */
4574 diff = cur->bc_ops->diff_two_keys(cur, &rec_key, high_key);
4575 if (diff > 0)
4576 break;
4577
4578 /* Callback */
4579 error = fn(cur, recp, priv);
4580 if (error < 0 || error == XFS_BTREE_QUERY_RANGE_ABORT)
4581 break;
4582
4583advloop:
4584 /* Move on to the next record. */
4585 error = xfs_btree_increment(cur, 0, &stat);
4586 if (error)
4587 break;
4588 }
4589
4590out:
4591 return error;
4592}
4593
4594/*
4595 * Query an overlapped interval btree for all records overlapping a given
4596 * interval. This function roughly follows the algorithm given in
4597 * "Interval Trees" of _Introduction to Algorithms_, which is section
4598 * 14.3 in the 2nd and 3rd editions.
4599 *
4600 * First, generate keys for the low and high records passed in.
4601 *
4602 * For any leaf node, generate the high and low keys for the record.
4603 * If the record keys overlap with the query low/high keys, pass the
4604 * record to the function iterator.
4605 *
4606 * For any internal node, compare the low and high keys of each
4607 * pointer against the query low/high keys. If there's an overlap,
4608 * follow the pointer.
4609 *
4610 * As an optimization, we stop scanning a block when we find a low key
4611 * that is greater than the query's high key.
4612 */
4613STATIC int
4614xfs_btree_overlapped_query_range(
4615 struct xfs_btree_cur *cur,
4616 union xfs_btree_key *low_key,
4617 union xfs_btree_key *high_key,
4618 xfs_btree_query_range_fn fn,
4619 void *priv)
4620{
4621 union xfs_btree_ptr ptr;
4622 union xfs_btree_ptr *pp;
4623 union xfs_btree_key rec_key;
4624 union xfs_btree_key rec_hkey;
4625 union xfs_btree_key *lkp;
4626 union xfs_btree_key *hkp;
4627 union xfs_btree_rec *recp;
4628 struct xfs_btree_block *block;
4629 __int64_t ldiff;
4630 __int64_t hdiff;
4631 int level;
4632 struct xfs_buf *bp;
4633 int i;
4634 int error;
4635
4636 /* Load the root of the btree. */
4637 level = cur->bc_nlevels - 1;
4638 cur->bc_ops->init_ptr_from_cur(cur, &ptr);
4639 error = xfs_btree_lookup_get_block(cur, level, &ptr, &block);
4640 if (error)
4641 return error;
4642 xfs_btree_get_block(cur, level, &bp);
4643 trace_xfs_btree_overlapped_query_range(cur, level, bp);
4644#ifdef DEBUG
4645 error = xfs_btree_check_block(cur, block, level, bp);
4646 if (error)
4647 goto out;
4648#endif
4649 cur->bc_ptrs[level] = 1;
4650
4651 while (level < cur->bc_nlevels) {
4652 block = xfs_btree_get_block(cur, level, &bp);
4653
4654 /* End of node, pop back towards the root. */
4655 if (cur->bc_ptrs[level] > be16_to_cpu(block->bb_numrecs)) {
4656pop_up:
4657 if (level < cur->bc_nlevels - 1)
4658 cur->bc_ptrs[level + 1]++;
4659 level++;
4660 continue;
4661 }
4662
4663 if (level == 0) {
4664 /* Handle a leaf node. */
4665 recp = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block);
4666
4667 cur->bc_ops->init_high_key_from_rec(&rec_hkey, recp);
4668 ldiff = cur->bc_ops->diff_two_keys(cur, &rec_hkey,
4669 low_key);
4670
4671 cur->bc_ops->init_key_from_rec(&rec_key, recp);
4672 hdiff = cur->bc_ops->diff_two_keys(cur, high_key,
4673 &rec_key);
4674
4675 /*
4676 * If (record's high key >= query's low key) and
4677 * (query's high key >= record's low key), then
4678 * this record overlaps the query range; callback.
4679 */
4680 if (ldiff >= 0 && hdiff >= 0) {
4681 error = fn(cur, recp, priv);
4682 if (error < 0 ||
4683 error == XFS_BTREE_QUERY_RANGE_ABORT)
4684 break;
4685 } else if (hdiff < 0) {
4686 /* Record is larger than high key; pop. */
4687 goto pop_up;
4688 }
4689 cur->bc_ptrs[level]++;
4690 continue;
4691 }
4692
4693 /* Handle an internal node. */
4694 lkp = xfs_btree_key_addr(cur, cur->bc_ptrs[level], block);
4695 hkp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level], block);
4696 pp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[level], block);
4697
4698 ldiff = cur->bc_ops->diff_two_keys(cur, hkp, low_key);
4699 hdiff = cur->bc_ops->diff_two_keys(cur, high_key, lkp);
4700
4701 /*
4702 * If (pointer's high key >= query's low key) and
4703 * (query's high key >= pointer's low key), then
4704 * this record overlaps the query range; follow pointer.
4705 */
4706 if (ldiff >= 0 && hdiff >= 0) {
4707 level--;
4708 error = xfs_btree_lookup_get_block(cur, level, pp,
4709 &block);
4710 if (error)
4711 goto out;
4712 xfs_btree_get_block(cur, level, &bp);
4713 trace_xfs_btree_overlapped_query_range(cur, level, bp);
4714#ifdef DEBUG
4715 error = xfs_btree_check_block(cur, block, level, bp);
4716 if (error)
4717 goto out;
4718#endif
4719 cur->bc_ptrs[level] = 1;
4720 continue;
4721 } else if (hdiff < 0) {
4722 /* The low key is larger than the upper range; pop. */
4723 goto pop_up;
4724 }
4725 cur->bc_ptrs[level]++;
4726 }
4727
4728out:
4729 /*
4730 * If we don't end this function with the cursor pointing at a record
4731 * block, a subsequent non-error cursor deletion will not release
4732 * node-level buffers, causing a buffer leak. This is quite possible
4733 * with a zero-results range query, so release the buffers if we
4734 * failed to return any results.
4735 */
4736 if (cur->bc_bufs[0] == NULL) {
4737 for (i = 0; i < cur->bc_nlevels; i++) {
4738 if (cur->bc_bufs[i]) {
4739 xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]);
4740 cur->bc_bufs[i] = NULL;
4741 cur->bc_ptrs[i] = 0;
4742 cur->bc_ra[i] = 0;
4743 }
4744 }
4745 }
4746
4747 return error;
4748}
4749
4750/*
4751 * Query a btree for all records overlapping a given interval of keys. The
4752 * supplied function will be called with each record found; return one of the
4753 * XFS_BTREE_QUERY_RANGE_{CONTINUE,ABORT} values or the usual negative error
4754 * code. This function returns XFS_BTREE_QUERY_RANGE_ABORT, zero, or a
4755 * negative error code.
4756 */
4757int
4758xfs_btree_query_range(
4759 struct xfs_btree_cur *cur,
4760 union xfs_btree_irec *low_rec,
4761 union xfs_btree_irec *high_rec,
4762 xfs_btree_query_range_fn fn,
4763 void *priv)
4764{
4765 union xfs_btree_rec rec;
4766 union xfs_btree_key low_key;
4767 union xfs_btree_key high_key;
4768
4769 /* Find the keys of both ends of the interval. */
4770 cur->bc_rec = *high_rec;
4771 cur->bc_ops->init_rec_from_cur(cur, &rec);
4772 cur->bc_ops->init_key_from_rec(&high_key, &rec);
4773
4774 cur->bc_rec = *low_rec;
4775 cur->bc_ops->init_rec_from_cur(cur, &rec);
4776 cur->bc_ops->init_key_from_rec(&low_key, &rec);
4777
4778 /* Enforce low key < high key. */
4779 if (cur->bc_ops->diff_two_keys(cur, &low_key, &high_key) > 0)
4780 return -EINVAL;
4781
4782 if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
4783 return xfs_btree_simple_query_range(cur, &low_key,
4784 &high_key, fn, priv);
4785 return xfs_btree_overlapped_query_range(cur, &low_key, &high_key,
4786 fn, priv);
4787}
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index 785a99682159..04d0865e5e6d 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -19,7 +19,7 @@
19#define __XFS_BTREE_H__ 19#define __XFS_BTREE_H__
20 20
21struct xfs_buf; 21struct xfs_buf;
22struct xfs_bmap_free; 22struct xfs_defer_ops;
23struct xfs_inode; 23struct xfs_inode;
24struct xfs_mount; 24struct xfs_mount;
25struct xfs_trans; 25struct xfs_trans;
@@ -38,17 +38,37 @@ union xfs_btree_ptr {
38}; 38};
39 39
40union xfs_btree_key { 40union xfs_btree_key {
41 xfs_bmbt_key_t bmbt; 41 struct xfs_bmbt_key bmbt;
42 xfs_bmdr_key_t bmbr; /* bmbt root block */ 42 xfs_bmdr_key_t bmbr; /* bmbt root block */
43 xfs_alloc_key_t alloc; 43 xfs_alloc_key_t alloc;
44 xfs_inobt_key_t inobt; 44 struct xfs_inobt_key inobt;
45 struct xfs_rmap_key rmap;
46};
47
48/*
49 * In-core key that holds both low and high keys for overlapped btrees.
50 * The two keys are packed next to each other on disk, so do the same
51 * in memory. Preserve the existing xfs_btree_key as a single key to
52 * avoid the mental model breakage that would happen if we passed a
53 * bigkey into a function that operates on a single key.
54 */
55union xfs_btree_bigkey {
56 struct xfs_bmbt_key bmbt;
57 xfs_bmdr_key_t bmbr; /* bmbt root block */
58 xfs_alloc_key_t alloc;
59 struct xfs_inobt_key inobt;
60 struct {
61 struct xfs_rmap_key rmap;
62 struct xfs_rmap_key rmap_hi;
63 };
45}; 64};
46 65
47union xfs_btree_rec { 66union xfs_btree_rec {
48 xfs_bmbt_rec_t bmbt; 67 struct xfs_bmbt_rec bmbt;
49 xfs_bmdr_rec_t bmbr; /* bmbt root block */ 68 xfs_bmdr_rec_t bmbr; /* bmbt root block */
50 xfs_alloc_rec_t alloc; 69 struct xfs_alloc_rec alloc;
51 xfs_inobt_rec_t inobt; 70 struct xfs_inobt_rec inobt;
71 struct xfs_rmap_rec rmap;
52}; 72};
53 73
54/* 74/*
@@ -63,6 +83,7 @@ union xfs_btree_rec {
63#define XFS_BTNUM_BMAP ((xfs_btnum_t)XFS_BTNUM_BMAPi) 83#define XFS_BTNUM_BMAP ((xfs_btnum_t)XFS_BTNUM_BMAPi)
64#define XFS_BTNUM_INO ((xfs_btnum_t)XFS_BTNUM_INOi) 84#define XFS_BTNUM_INO ((xfs_btnum_t)XFS_BTNUM_INOi)
65#define XFS_BTNUM_FINO ((xfs_btnum_t)XFS_BTNUM_FINOi) 85#define XFS_BTNUM_FINO ((xfs_btnum_t)XFS_BTNUM_FINOi)
86#define XFS_BTNUM_RMAP ((xfs_btnum_t)XFS_BTNUM_RMAPi)
66 87
67/* 88/*
68 * For logging record fields. 89 * For logging record fields.
@@ -95,6 +116,7 @@ do { \
95 case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(__mp, bmbt, stat); break; \ 116 case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(__mp, bmbt, stat); break; \
96 case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(__mp, ibt, stat); break; \ 117 case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(__mp, ibt, stat); break; \
97 case XFS_BTNUM_FINO: __XFS_BTREE_STATS_INC(__mp, fibt, stat); break; \ 118 case XFS_BTNUM_FINO: __XFS_BTREE_STATS_INC(__mp, fibt, stat); break; \
119 case XFS_BTNUM_RMAP: __XFS_BTREE_STATS_INC(__mp, rmap, stat); break; \
98 case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ 120 case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \
99 } \ 121 } \
100} while (0) 122} while (0)
@@ -115,11 +137,13 @@ do { \
115 __XFS_BTREE_STATS_ADD(__mp, ibt, stat, val); break; \ 137 __XFS_BTREE_STATS_ADD(__mp, ibt, stat, val); break; \
116 case XFS_BTNUM_FINO: \ 138 case XFS_BTNUM_FINO: \
117 __XFS_BTREE_STATS_ADD(__mp, fibt, stat, val); break; \ 139 __XFS_BTREE_STATS_ADD(__mp, fibt, stat, val); break; \
140 case XFS_BTNUM_RMAP: \
141 __XFS_BTREE_STATS_ADD(__mp, rmap, stat, val); break; \
118 case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ 142 case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \
119 } \ 143 } \
120} while (0) 144} while (0)
121 145
122#define XFS_BTREE_MAXLEVELS 8 /* max of all btrees */ 146#define XFS_BTREE_MAXLEVELS 9 /* max of all btrees */
123 147
124struct xfs_btree_ops { 148struct xfs_btree_ops {
125 /* size of the key and record structures */ 149 /* size of the key and record structures */
@@ -158,17 +182,25 @@ struct xfs_btree_ops {
158 /* init values of btree structures */ 182 /* init values of btree structures */
159 void (*init_key_from_rec)(union xfs_btree_key *key, 183 void (*init_key_from_rec)(union xfs_btree_key *key,
160 union xfs_btree_rec *rec); 184 union xfs_btree_rec *rec);
161 void (*init_rec_from_key)(union xfs_btree_key *key,
162 union xfs_btree_rec *rec);
163 void (*init_rec_from_cur)(struct xfs_btree_cur *cur, 185 void (*init_rec_from_cur)(struct xfs_btree_cur *cur,
164 union xfs_btree_rec *rec); 186 union xfs_btree_rec *rec);
165 void (*init_ptr_from_cur)(struct xfs_btree_cur *cur, 187 void (*init_ptr_from_cur)(struct xfs_btree_cur *cur,
166 union xfs_btree_ptr *ptr); 188 union xfs_btree_ptr *ptr);
189 void (*init_high_key_from_rec)(union xfs_btree_key *key,
190 union xfs_btree_rec *rec);
167 191
168 /* difference between key value and cursor value */ 192 /* difference between key value and cursor value */
169 __int64_t (*key_diff)(struct xfs_btree_cur *cur, 193 __int64_t (*key_diff)(struct xfs_btree_cur *cur,
170 union xfs_btree_key *key); 194 union xfs_btree_key *key);
171 195
196 /*
197 * Difference between key2 and key1 -- positive if key1 > key2,
198 * negative if key1 < key2, and zero if equal.
199 */
200 __int64_t (*diff_two_keys)(struct xfs_btree_cur *cur,
201 union xfs_btree_key *key1,
202 union xfs_btree_key *key2);
203
172 const struct xfs_buf_ops *buf_ops; 204 const struct xfs_buf_ops *buf_ops;
173 205
174#if defined(DEBUG) || defined(XFS_WARN) 206#if defined(DEBUG) || defined(XFS_WARN)
@@ -192,6 +224,13 @@ struct xfs_btree_ops {
192#define LASTREC_DELREC 2 224#define LASTREC_DELREC 2
193 225
194 226
227union xfs_btree_irec {
228 struct xfs_alloc_rec_incore a;
229 struct xfs_bmbt_irec b;
230 struct xfs_inobt_rec_incore i;
231 struct xfs_rmap_irec r;
232};
233
195/* 234/*
196 * Btree cursor structure. 235 * Btree cursor structure.
197 * This collects all information needed by the btree code in one place. 236 * This collects all information needed by the btree code in one place.
@@ -202,11 +241,7 @@ typedef struct xfs_btree_cur
202 struct xfs_mount *bc_mp; /* file system mount struct */ 241 struct xfs_mount *bc_mp; /* file system mount struct */
203 const struct xfs_btree_ops *bc_ops; 242 const struct xfs_btree_ops *bc_ops;
204 uint bc_flags; /* btree features - below */ 243 uint bc_flags; /* btree features - below */
205 union { 244 union xfs_btree_irec bc_rec; /* current insert/search record value */
206 xfs_alloc_rec_incore_t a;
207 xfs_bmbt_irec_t b;
208 xfs_inobt_rec_incore_t i;
209 } bc_rec; /* current insert/search record value */
210 struct xfs_buf *bc_bufs[XFS_BTREE_MAXLEVELS]; /* buf ptr per level */ 245 struct xfs_buf *bc_bufs[XFS_BTREE_MAXLEVELS]; /* buf ptr per level */
211 int bc_ptrs[XFS_BTREE_MAXLEVELS]; /* key/record # */ 246 int bc_ptrs[XFS_BTREE_MAXLEVELS]; /* key/record # */
212 __uint8_t bc_ra[XFS_BTREE_MAXLEVELS]; /* readahead bits */ 247 __uint8_t bc_ra[XFS_BTREE_MAXLEVELS]; /* readahead bits */
@@ -218,11 +253,12 @@ typedef struct xfs_btree_cur
218 union { 253 union {
219 struct { /* needed for BNO, CNT, INO */ 254 struct { /* needed for BNO, CNT, INO */
220 struct xfs_buf *agbp; /* agf/agi buffer pointer */ 255 struct xfs_buf *agbp; /* agf/agi buffer pointer */
256 struct xfs_defer_ops *dfops; /* deferred updates */
221 xfs_agnumber_t agno; /* ag number */ 257 xfs_agnumber_t agno; /* ag number */
222 } a; 258 } a;
223 struct { /* needed for BMAP */ 259 struct { /* needed for BMAP */
224 struct xfs_inode *ip; /* pointer to our inode */ 260 struct xfs_inode *ip; /* pointer to our inode */
225 struct xfs_bmap_free *flist; /* list to free after */ 261 struct xfs_defer_ops *dfops; /* deferred updates */
226 xfs_fsblock_t firstblock; /* 1st blk allocated */ 262 xfs_fsblock_t firstblock; /* 1st blk allocated */
227 int allocated; /* count of alloced */ 263 int allocated; /* count of alloced */
228 short forksize; /* fork's inode space */ 264 short forksize; /* fork's inode space */
@@ -238,6 +274,7 @@ typedef struct xfs_btree_cur
238#define XFS_BTREE_ROOT_IN_INODE (1<<1) /* root may be variable size */ 274#define XFS_BTREE_ROOT_IN_INODE (1<<1) /* root may be variable size */
239#define XFS_BTREE_LASTREC_UPDATE (1<<2) /* track last rec externally */ 275#define XFS_BTREE_LASTREC_UPDATE (1<<2) /* track last rec externally */
240#define XFS_BTREE_CRC_BLOCKS (1<<3) /* uses extended btree blocks */ 276#define XFS_BTREE_CRC_BLOCKS (1<<3) /* uses extended btree blocks */
277#define XFS_BTREE_OVERLAPPING (1<<4) /* overlapping intervals */
241 278
242 279
243#define XFS_BTREE_NOERROR 0 280#define XFS_BTREE_NOERROR 0
@@ -477,4 +514,19 @@ bool xfs_btree_sblock_verify(struct xfs_buf *bp, unsigned int max_recs);
477uint xfs_btree_compute_maxlevels(struct xfs_mount *mp, uint *limits, 514uint xfs_btree_compute_maxlevels(struct xfs_mount *mp, uint *limits,
478 unsigned long len); 515 unsigned long len);
479 516
517/* return codes */
518#define XFS_BTREE_QUERY_RANGE_CONTINUE 0 /* keep iterating */
519#define XFS_BTREE_QUERY_RANGE_ABORT 1 /* stop iterating */
520typedef int (*xfs_btree_query_range_fn)(struct xfs_btree_cur *cur,
521 union xfs_btree_rec *rec, void *priv);
522
523int xfs_btree_query_range(struct xfs_btree_cur *cur,
524 union xfs_btree_irec *low_rec, union xfs_btree_irec *high_rec,
525 xfs_btree_query_range_fn fn, void *priv);
526
527typedef int (*xfs_btree_visit_blocks_fn)(struct xfs_btree_cur *cur, int level,
528 void *data);
529int xfs_btree_visit_blocks(struct xfs_btree_cur *cur,
530 xfs_btree_visit_blocks_fn fn, void *data);
531
480#endif /* __XFS_BTREE_H__ */ 532#endif /* __XFS_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index 0f1f165f4048..f2dc1a950c85 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -2029,7 +2029,7 @@ xfs_da_grow_inode_int(
2029 error = xfs_bmapi_write(tp, dp, *bno, count, 2029 error = xfs_bmapi_write(tp, dp, *bno, count,
2030 xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG, 2030 xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG,
2031 args->firstblock, args->total, &map, &nmap, 2031 args->firstblock, args->total, &map, &nmap,
2032 args->flist); 2032 args->dfops);
2033 if (error) 2033 if (error)
2034 return error; 2034 return error;
2035 2035
@@ -2052,7 +2052,7 @@ xfs_da_grow_inode_int(
2052 error = xfs_bmapi_write(tp, dp, b, c, 2052 error = xfs_bmapi_write(tp, dp, b, c,
2053 xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA, 2053 xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA,
2054 args->firstblock, args->total, 2054 args->firstblock, args->total,
2055 &mapp[mapi], &nmap, args->flist); 2055 &mapp[mapi], &nmap, args->dfops);
2056 if (error) 2056 if (error)
2057 goto out_free_map; 2057 goto out_free_map;
2058 if (nmap < 1) 2058 if (nmap < 1)
@@ -2362,7 +2362,7 @@ xfs_da_shrink_inode(
2362 */ 2362 */
2363 error = xfs_bunmapi(tp, dp, dead_blkno, count, 2363 error = xfs_bunmapi(tp, dp, dead_blkno, count,
2364 xfs_bmapi_aflag(w), 0, args->firstblock, 2364 xfs_bmapi_aflag(w), 0, args->firstblock,
2365 args->flist, &done); 2365 args->dfops, &done);
2366 if (error == -ENOSPC) { 2366 if (error == -ENOSPC) {
2367 if (w != XFS_DATA_FORK) 2367 if (w != XFS_DATA_FORK)
2368 break; 2368 break;
diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h
index 6e153e399a77..98c75cbe6ac2 100644
--- a/fs/xfs/libxfs/xfs_da_btree.h
+++ b/fs/xfs/libxfs/xfs_da_btree.h
@@ -19,7 +19,7 @@
19#ifndef __XFS_DA_BTREE_H__ 19#ifndef __XFS_DA_BTREE_H__
20#define __XFS_DA_BTREE_H__ 20#define __XFS_DA_BTREE_H__
21 21
22struct xfs_bmap_free; 22struct xfs_defer_ops;
23struct xfs_inode; 23struct xfs_inode;
24struct xfs_trans; 24struct xfs_trans;
25struct zone; 25struct zone;
@@ -70,7 +70,7 @@ typedef struct xfs_da_args {
70 xfs_ino_t inumber; /* input/output inode number */ 70 xfs_ino_t inumber; /* input/output inode number */
71 struct xfs_inode *dp; /* directory inode to manipulate */ 71 struct xfs_inode *dp; /* directory inode to manipulate */
72 xfs_fsblock_t *firstblock; /* ptr to firstblock for bmap calls */ 72 xfs_fsblock_t *firstblock; /* ptr to firstblock for bmap calls */
73 struct xfs_bmap_free *flist; /* ptr to freelist for bmap_finish */ 73 struct xfs_defer_ops *dfops; /* ptr to freelist for bmap_finish */
74 struct xfs_trans *trans; /* current trans (changes over time) */ 74 struct xfs_trans *trans; /* current trans (changes over time) */
75 xfs_extlen_t total; /* total blocks needed, for 1st bmap */ 75 xfs_extlen_t total; /* total blocks needed, for 1st bmap */
76 int whichfork; /* data or attribute fork */ 76 int whichfork; /* data or attribute fork */
diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h
index 685f23b67056..9a492a9e19bd 100644
--- a/fs/xfs/libxfs/xfs_da_format.h
+++ b/fs/xfs/libxfs/xfs_da_format.h
@@ -629,6 +629,7 @@ typedef struct xfs_attr_shortform {
629 struct xfs_attr_sf_hdr { /* constant-structure header block */ 629 struct xfs_attr_sf_hdr { /* constant-structure header block */
630 __be16 totsize; /* total bytes in shortform list */ 630 __be16 totsize; /* total bytes in shortform list */
631 __u8 count; /* count of active entries */ 631 __u8 count; /* count of active entries */
632 __u8 padding;
632 } hdr; 633 } hdr;
633 struct xfs_attr_sf_entry { 634 struct xfs_attr_sf_entry {
634 __uint8_t namelen; /* actual length of name (no NULL) */ 635 __uint8_t namelen; /* actual length of name (no NULL) */
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
new file mode 100644
index 000000000000..054a2032fdb3
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -0,0 +1,463 @@
1/*
2 * Copyright (C) 2016 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#include "xfs.h"
21#include "xfs_fs.h"
22#include "xfs_shared.h"
23#include "xfs_format.h"
24#include "xfs_log_format.h"
25#include "xfs_trans_resv.h"
26#include "xfs_bit.h"
27#include "xfs_sb.h"
28#include "xfs_mount.h"
29#include "xfs_defer.h"
30#include "xfs_trans.h"
31#include "xfs_trace.h"
32
33/*
34 * Deferred Operations in XFS
35 *
36 * Due to the way locking rules work in XFS, certain transactions (block
37 * mapping and unmapping, typically) have permanent reservations so that
38 * we can roll the transaction to adhere to AG locking order rules and
39 * to unlock buffers between metadata updates. Prior to rmap/reflink,
40 * the mapping code had a mechanism to perform these deferrals for
41 * extents that were going to be freed; this code makes that facility
42 * more generic.
43 *
44 * When adding the reverse mapping and reflink features, it became
45 * necessary to perform complex remapping multi-transactions to comply
46 * with AG locking order rules, and to be able to spread a single
47 * refcount update operation (an operation on an n-block extent can
48 * update as many as n records!) among multiple transactions. XFS can
49 * roll a transaction to facilitate this, but using this facility
50 * requires us to log "intent" items in case log recovery needs to
51 * redo the operation, and to log "done" items to indicate that redo
52 * is not necessary.
53 *
54 * Deferred work is tracked in xfs_defer_pending items. Each pending
55 * item tracks one type of deferred work. Incoming work items (which
56 * have not yet had an intent logged) are attached to a pending item
57 * on the dop_intake list, where they wait for the caller to finish
58 * the deferred operations.
59 *
60 * Finishing a set of deferred operations is an involved process. To
61 * start, we define "rolling a deferred-op transaction" as follows:
62 *
63 * > For each xfs_defer_pending item on the dop_intake list,
64 * - Sort the work items in AG order. XFS locking
65 * order rules require us to lock buffers in AG order.
66 * - Create a log intent item for that type.
67 * - Attach it to the pending item.
68 * - Move the pending item from the dop_intake list to the
69 * dop_pending list.
70 * > Roll the transaction.
71 *
72 * NOTE: To avoid exceeding the transaction reservation, we limit the
73 * number of items that we attach to a given xfs_defer_pending.
74 *
75 * The actual finishing process looks like this:
76 *
77 * > For each xfs_defer_pending in the dop_pending list,
78 * - Roll the deferred-op transaction as above.
79 * - Create a log done item for that type, and attach it to the
80 * log intent item.
81 * - For each work item attached to the log intent item,
82 * * Perform the described action.
83 * * Attach the work item to the log done item.
84 *
85 * The key here is that we must log an intent item for all pending
86 * work items every time we roll the transaction, and that we must log
87 * a done item as soon as the work is completed. With this mechanism
88 * we can perform complex remapping operations, chaining intent items
89 * as needed.
90 *
91 * This is an example of remapping the extent (E, E+B) into file X at
92 * offset A and dealing with the extent (C, C+B) already being mapped
93 * there:
94 * +-------------------------------------------------+
95 * | Unmap file X startblock C offset A length B | t0
96 * | Intent to reduce refcount for extent (C, B) |
97 * | Intent to remove rmap (X, C, A, B) |
98 * | Intent to free extent (D, 1) (bmbt block) |
99 * | Intent to map (X, A, B) at startblock E |
100 * +-------------------------------------------------+
101 * | Map file X startblock E offset A length B | t1
102 * | Done mapping (X, E, A, B) |
103 * | Intent to increase refcount for extent (E, B) |
104 * | Intent to add rmap (X, E, A, B) |
105 * +-------------------------------------------------+
106 * | Reduce refcount for extent (C, B) | t2
107 * | Done reducing refcount for extent (C, B) |
108 * | Increase refcount for extent (E, B) |
109 * | Done increasing refcount for extent (E, B) |
110 * | Intent to free extent (C, B) |
111 * | Intent to free extent (F, 1) (refcountbt block) |
112 * | Intent to remove rmap (F, 1, REFC) |
113 * +-------------------------------------------------+
114 * | Remove rmap (X, C, A, B) | t3
115 * | Done removing rmap (X, C, A, B) |
116 * | Add rmap (X, E, A, B) |
117 * | Done adding rmap (X, E, A, B) |
118 * | Remove rmap (F, 1, REFC) |
119 * | Done removing rmap (F, 1, REFC) |
120 * +-------------------------------------------------+
121 * | Free extent (C, B) | t4
122 * | Done freeing extent (C, B) |
123 * | Free extent (D, 1) |
124 * | Done freeing extent (D, 1) |
125 * | Free extent (F, 1) |
126 * | Done freeing extent (F, 1) |
127 * +-------------------------------------------------+
128 *
129 * If we should crash before t2 commits, log recovery replays
130 * the following intent items:
131 *
132 * - Intent to reduce refcount for extent (C, B)
133 * - Intent to remove rmap (X, C, A, B)
134 * - Intent to free extent (D, 1) (bmbt block)
135 * - Intent to increase refcount for extent (E, B)
136 * - Intent to add rmap (X, E, A, B)
137 *
138 * In the process of recovering, it should also generate and take care
139 * of these intent items:
140 *
141 * - Intent to free extent (C, B)
142 * - Intent to free extent (F, 1) (refcountbt block)
143 * - Intent to remove rmap (F, 1, REFC)
144 */
145
146static const struct xfs_defer_op_type *defer_op_types[XFS_DEFER_OPS_TYPE_MAX];
147
148/*
149 * For each pending item in the intake list, log its intent item and the
150 * associated extents, then add the entire intake list to the end of
151 * the pending list.
152 */
153STATIC void
154xfs_defer_intake_work(
155 struct xfs_trans *tp,
156 struct xfs_defer_ops *dop)
157{
158 struct list_head *li;
159 struct xfs_defer_pending *dfp;
160
161 list_for_each_entry(dfp, &dop->dop_intake, dfp_list) {
162 trace_xfs_defer_intake_work(tp->t_mountp, dfp);
163 dfp->dfp_intent = dfp->dfp_type->create_intent(tp,
164 dfp->dfp_count);
165 list_sort(tp->t_mountp, &dfp->dfp_work,
166 dfp->dfp_type->diff_items);
167 list_for_each(li, &dfp->dfp_work)
168 dfp->dfp_type->log_item(tp, dfp->dfp_intent, li);
169 }
170
171 list_splice_tail_init(&dop->dop_intake, &dop->dop_pending);
172}
173
174/* Abort all the intents that were committed. */
175STATIC void
176xfs_defer_trans_abort(
177 struct xfs_trans *tp,
178 struct xfs_defer_ops *dop,
179 int error)
180{
181 struct xfs_defer_pending *dfp;
182
183 trace_xfs_defer_trans_abort(tp->t_mountp, dop);
184 /*
185 * If the transaction was committed, drop the intent reference
186 * since we're bailing out of here. The other reference is
187 * dropped when the intent hits the AIL. If the transaction
188 * was not committed, the intent is freed by the intent item
189 * unlock handler on abort.
190 */
191 if (!dop->dop_committed)
192 return;
193
194 /* Abort intent items. */
195 list_for_each_entry(dfp, &dop->dop_pending, dfp_list) {
196 trace_xfs_defer_pending_abort(tp->t_mountp, dfp);
197 if (dfp->dfp_committed)
198 dfp->dfp_type->abort_intent(dfp->dfp_intent);
199 }
200
201 /* Shut down FS. */
202 xfs_force_shutdown(tp->t_mountp, (error == -EFSCORRUPTED) ?
203 SHUTDOWN_CORRUPT_INCORE : SHUTDOWN_META_IO_ERROR);
204}
205
206/* Roll a transaction so we can do some deferred op processing. */
207STATIC int
208xfs_defer_trans_roll(
209 struct xfs_trans **tp,
210 struct xfs_defer_ops *dop,
211 struct xfs_inode *ip)
212{
213 int i;
214 int error;
215
216 /* Log all the joined inodes except the one we passed in. */
217 for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++) {
218 if (dop->dop_inodes[i] == ip)
219 continue;
220 xfs_trans_log_inode(*tp, dop->dop_inodes[i], XFS_ILOG_CORE);
221 }
222
223 trace_xfs_defer_trans_roll((*tp)->t_mountp, dop);
224
225 /* Roll the transaction. */
226 error = xfs_trans_roll(tp, ip);
227 if (error) {
228 trace_xfs_defer_trans_roll_error((*tp)->t_mountp, dop, error);
229 xfs_defer_trans_abort(*tp, dop, error);
230 return error;
231 }
232 dop->dop_committed = true;
233
234 /* Rejoin the joined inodes except the one we passed in. */
235 for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++) {
236 if (dop->dop_inodes[i] == ip)
237 continue;
238 xfs_trans_ijoin(*tp, dop->dop_inodes[i], 0);
239 }
240
241 return error;
242}
243
244/* Do we have any work items to finish? */
245bool
246xfs_defer_has_unfinished_work(
247 struct xfs_defer_ops *dop)
248{
249 return !list_empty(&dop->dop_pending) || !list_empty(&dop->dop_intake);
250}
251
252/*
253 * Add this inode to the deferred op. Each joined inode is relogged
254 * each time we roll the transaction, in addition to any inode passed
255 * to xfs_defer_finish().
256 */
257int
258xfs_defer_join(
259 struct xfs_defer_ops *dop,
260 struct xfs_inode *ip)
261{
262 int i;
263
264 for (i = 0; i < XFS_DEFER_OPS_NR_INODES; i++) {
265 if (dop->dop_inodes[i] == ip)
266 return 0;
267 else if (dop->dop_inodes[i] == NULL) {
268 dop->dop_inodes[i] = ip;
269 return 0;
270 }
271 }
272
273 return -EFSCORRUPTED;
274}
275
276/*
277 * Finish all the pending work. This involves logging intent items for
278 * any work items that wandered in since the last transaction roll (if
279 * one has even happened), rolling the transaction, and finishing the
280 * work items in the first item on the logged-and-pending list.
281 *
282 * If an inode is provided, relog it to the new transaction.
283 */
284int
285xfs_defer_finish(
286 struct xfs_trans **tp,
287 struct xfs_defer_ops *dop,
288 struct xfs_inode *ip)
289{
290 struct xfs_defer_pending *dfp;
291 struct list_head *li;
292 struct list_head *n;
293 void *done_item = NULL;
294 void *state;
295 int error = 0;
296 void (*cleanup_fn)(struct xfs_trans *, void *, int);
297
298 ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
299
300 trace_xfs_defer_finish((*tp)->t_mountp, dop);
301
302 /* Until we run out of pending work to finish... */
303 while (xfs_defer_has_unfinished_work(dop)) {
304 /* Log intents for work items sitting in the intake. */
305 xfs_defer_intake_work(*tp, dop);
306
307 /* Roll the transaction. */
308 error = xfs_defer_trans_roll(tp, dop, ip);
309 if (error)
310 goto out;
311
312 /* Mark all pending intents as committed. */
313 list_for_each_entry_reverse(dfp, &dop->dop_pending, dfp_list) {
314 if (dfp->dfp_committed)
315 break;
316 trace_xfs_defer_pending_commit((*tp)->t_mountp, dfp);
317 dfp->dfp_committed = true;
318 }
319
320 /* Log an intent-done item for the first pending item. */
321 dfp = list_first_entry(&dop->dop_pending,
322 struct xfs_defer_pending, dfp_list);
323 trace_xfs_defer_pending_finish((*tp)->t_mountp, dfp);
324 done_item = dfp->dfp_type->create_done(*tp, dfp->dfp_intent,
325 dfp->dfp_count);
326 cleanup_fn = dfp->dfp_type->finish_cleanup;
327
328 /* Finish the work items. */
329 state = NULL;
330 list_for_each_safe(li, n, &dfp->dfp_work) {
331 list_del(li);
332 dfp->dfp_count--;
333 error = dfp->dfp_type->finish_item(*tp, dop, li,
334 done_item, &state);
335 if (error) {
336 /*
337 * Clean up after ourselves and jump out.
338 * xfs_defer_cancel will take care of freeing
339 * all these lists and stuff.
340 */
341 if (cleanup_fn)
342 cleanup_fn(*tp, state, error);
343 xfs_defer_trans_abort(*tp, dop, error);
344 goto out;
345 }
346 }
347 /* Done with the dfp, free it. */
348 list_del(&dfp->dfp_list);
349 kmem_free(dfp);
350
351 if (cleanup_fn)
352 cleanup_fn(*tp, state, error);
353 }
354
355out:
356 if (error)
357 trace_xfs_defer_finish_error((*tp)->t_mountp, dop, error);
358 else
359 trace_xfs_defer_finish_done((*tp)->t_mountp, dop);
360 return error;
361}
362
363/*
364 * Free up any items left in the list.
365 */
366void
367xfs_defer_cancel(
368 struct xfs_defer_ops *dop)
369{
370 struct xfs_defer_pending *dfp;
371 struct xfs_defer_pending *pli;
372 struct list_head *pwi;
373 struct list_head *n;
374
375 trace_xfs_defer_cancel(NULL, dop);
376
377 /*
378 * Free the pending items. Caller should already have arranged
379 * for the intent items to be released.
380 */
381 list_for_each_entry_safe(dfp, pli, &dop->dop_intake, dfp_list) {
382 trace_xfs_defer_intake_cancel(NULL, dfp);
383 list_del(&dfp->dfp_list);
384 list_for_each_safe(pwi, n, &dfp->dfp_work) {
385 list_del(pwi);
386 dfp->dfp_count--;
387 dfp->dfp_type->cancel_item(pwi);
388 }
389 ASSERT(dfp->dfp_count == 0);
390 kmem_free(dfp);
391 }
392 list_for_each_entry_safe(dfp, pli, &dop->dop_pending, dfp_list) {
393 trace_xfs_defer_pending_cancel(NULL, dfp);
394 list_del(&dfp->dfp_list);
395 list_for_each_safe(pwi, n, &dfp->dfp_work) {
396 list_del(pwi);
397 dfp->dfp_count--;
398 dfp->dfp_type->cancel_item(pwi);
399 }
400 ASSERT(dfp->dfp_count == 0);
401 kmem_free(dfp);
402 }
403}
404
405/* Add an item for later deferred processing. */
406void
407xfs_defer_add(
408 struct xfs_defer_ops *dop,
409 enum xfs_defer_ops_type type,
410 struct list_head *li)
411{
412 struct xfs_defer_pending *dfp = NULL;
413
414 /*
415 * Add the item to a pending item at the end of the intake list.
416 * If the last pending item has the same type, reuse it. Else,
417 * create a new pending item at the end of the intake list.
418 */
419 if (!list_empty(&dop->dop_intake)) {
420 dfp = list_last_entry(&dop->dop_intake,
421 struct xfs_defer_pending, dfp_list);
422 if (dfp->dfp_type->type != type ||
423 (dfp->dfp_type->max_items &&
424 dfp->dfp_count >= dfp->dfp_type->max_items))
425 dfp = NULL;
426 }
427 if (!dfp) {
428 dfp = kmem_alloc(sizeof(struct xfs_defer_pending),
429 KM_SLEEP | KM_NOFS);
430 dfp->dfp_type = defer_op_types[type];
431 dfp->dfp_committed = false;
432 dfp->dfp_intent = NULL;
433 dfp->dfp_count = 0;
434 INIT_LIST_HEAD(&dfp->dfp_work);
435 list_add_tail(&dfp->dfp_list, &dop->dop_intake);
436 }
437
438 list_add_tail(li, &dfp->dfp_work);
439 dfp->dfp_count++;
440}
441
442/* Initialize a deferred operation list. */
443void
444xfs_defer_init_op_type(
445 const struct xfs_defer_op_type *type)
446{
447 defer_op_types[type->type] = type;
448}
449
450/* Initialize a deferred operation. */
451void
452xfs_defer_init(
453 struct xfs_defer_ops *dop,
454 xfs_fsblock_t *fbp)
455{
456 dop->dop_committed = false;
457 dop->dop_low = false;
458 memset(&dop->dop_inodes, 0, sizeof(dop->dop_inodes));
459 *fbp = NULLFSBLOCK;
460 INIT_LIST_HEAD(&dop->dop_intake);
461 INIT_LIST_HEAD(&dop->dop_pending);
462 trace_xfs_defer_init(NULL, dop);
463}
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
new file mode 100644
index 000000000000..cc3981c48296
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -0,0 +1,97 @@
1/*
2 * Copyright (C) 2016 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#ifndef __XFS_DEFER_H__
21#define __XFS_DEFER_H__
22
23struct xfs_defer_op_type;
24
25/*
26 * Save a log intent item and a list of extents, so that we can replay
27 * whatever action had to happen to the extent list and file the log done
28 * item.
29 */
30struct xfs_defer_pending {
31 const struct xfs_defer_op_type *dfp_type; /* function pointers */
32 struct list_head dfp_list; /* pending items */
33 bool dfp_committed; /* committed trans? */
34 void *dfp_intent; /* log intent item */
35 struct list_head dfp_work; /* work items */
36 unsigned int dfp_count; /* # extent items */
37};
38
39/*
40 * Header for deferred operation list.
41 *
42 * dop_low is used by the allocator to activate the lowspace algorithm -
43 * when free space is running low the extent allocator may choose to
44 * allocate an extent from an AG without leaving sufficient space for
45 * a btree split when inserting the new extent. In this case the allocator
46 * will enable the lowspace algorithm which is supposed to allow further
47 * allocations (such as btree splits and newroots) to allocate from
48 * sequential AGs. In order to avoid locking AGs out of order the lowspace
49 * algorithm will start searching for free space from AG 0. If the correct
50 * transaction reservations have been made then this algorithm will eventually
51 * find all the space it needs.
52 */
53enum xfs_defer_ops_type {
54 XFS_DEFER_OPS_TYPE_RMAP,
55 XFS_DEFER_OPS_TYPE_FREE,
56 XFS_DEFER_OPS_TYPE_MAX,
57};
58
59#define XFS_DEFER_OPS_NR_INODES 2 /* join up to two inodes */
60
61struct xfs_defer_ops {
62 bool dop_committed; /* did any trans commit? */
63 bool dop_low; /* alloc in low mode */
64 struct list_head dop_intake; /* unlogged pending work */
65 struct list_head dop_pending; /* logged pending work */
66
67 /* relog these inodes with each roll */
68 struct xfs_inode *dop_inodes[XFS_DEFER_OPS_NR_INODES];
69};
70
71void xfs_defer_add(struct xfs_defer_ops *dop, enum xfs_defer_ops_type type,
72 struct list_head *h);
73int xfs_defer_finish(struct xfs_trans **tp, struct xfs_defer_ops *dop,
74 struct xfs_inode *ip);
75void xfs_defer_cancel(struct xfs_defer_ops *dop);
76void xfs_defer_init(struct xfs_defer_ops *dop, xfs_fsblock_t *fbp);
77bool xfs_defer_has_unfinished_work(struct xfs_defer_ops *dop);
78int xfs_defer_join(struct xfs_defer_ops *dop, struct xfs_inode *ip);
79
80/* Description of a deferred type. */
81struct xfs_defer_op_type {
82 enum xfs_defer_ops_type type;
83 unsigned int max_items;
84 void (*abort_intent)(void *);
85 void *(*create_done)(struct xfs_trans *, void *, unsigned int);
86 int (*finish_item)(struct xfs_trans *, struct xfs_defer_ops *,
87 struct list_head *, void *, void **);
88 void (*finish_cleanup)(struct xfs_trans *, void *, int);
89 void (*cancel_item)(struct list_head *);
90 int (*diff_items)(void *, struct list_head *, struct list_head *);
91 void *(*create_intent)(struct xfs_trans *, uint);
92 void (*log_item)(struct xfs_trans *, void *, struct list_head *);
93};
94
95void xfs_defer_init_op_type(const struct xfs_defer_op_type *type);
96
97#endif /* __XFS_DEFER_H__ */
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
index af0f9d171f8a..20a96dd5af7e 100644
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -21,6 +21,7 @@
21#include "xfs_log_format.h" 21#include "xfs_log_format.h"
22#include "xfs_trans_resv.h" 22#include "xfs_trans_resv.h"
23#include "xfs_mount.h" 23#include "xfs_mount.h"
24#include "xfs_defer.h"
24#include "xfs_da_format.h" 25#include "xfs_da_format.h"
25#include "xfs_da_btree.h" 26#include "xfs_da_btree.h"
26#include "xfs_inode.h" 27#include "xfs_inode.h"
@@ -259,7 +260,7 @@ xfs_dir_createname(
259 struct xfs_name *name, 260 struct xfs_name *name,
260 xfs_ino_t inum, /* new entry inode number */ 261 xfs_ino_t inum, /* new entry inode number */
261 xfs_fsblock_t *first, /* bmap's firstblock */ 262 xfs_fsblock_t *first, /* bmap's firstblock */
262 xfs_bmap_free_t *flist, /* bmap's freeblock list */ 263 struct xfs_defer_ops *dfops, /* bmap's freeblock list */
263 xfs_extlen_t total) /* bmap's total block count */ 264 xfs_extlen_t total) /* bmap's total block count */
264{ 265{
265 struct xfs_da_args *args; 266 struct xfs_da_args *args;
@@ -286,7 +287,7 @@ xfs_dir_createname(
286 args->inumber = inum; 287 args->inumber = inum;
287 args->dp = dp; 288 args->dp = dp;
288 args->firstblock = first; 289 args->firstblock = first;
289 args->flist = flist; 290 args->dfops = dfops;
290 args->total = total; 291 args->total = total;
291 args->whichfork = XFS_DATA_FORK; 292 args->whichfork = XFS_DATA_FORK;
292 args->trans = tp; 293 args->trans = tp;
@@ -436,7 +437,7 @@ xfs_dir_removename(
436 struct xfs_name *name, 437 struct xfs_name *name,
437 xfs_ino_t ino, 438 xfs_ino_t ino,
438 xfs_fsblock_t *first, /* bmap's firstblock */ 439 xfs_fsblock_t *first, /* bmap's firstblock */
439 xfs_bmap_free_t *flist, /* bmap's freeblock list */ 440 struct xfs_defer_ops *dfops, /* bmap's freeblock list */
440 xfs_extlen_t total) /* bmap's total block count */ 441 xfs_extlen_t total) /* bmap's total block count */
441{ 442{
442 struct xfs_da_args *args; 443 struct xfs_da_args *args;
@@ -458,7 +459,7 @@ xfs_dir_removename(
458 args->inumber = ino; 459 args->inumber = ino;
459 args->dp = dp; 460 args->dp = dp;
460 args->firstblock = first; 461 args->firstblock = first;
461 args->flist = flist; 462 args->dfops = dfops;
462 args->total = total; 463 args->total = total;
463 args->whichfork = XFS_DATA_FORK; 464 args->whichfork = XFS_DATA_FORK;
464 args->trans = tp; 465 args->trans = tp;
@@ -498,7 +499,7 @@ xfs_dir_replace(
498 struct xfs_name *name, /* name of entry to replace */ 499 struct xfs_name *name, /* name of entry to replace */
499 xfs_ino_t inum, /* new inode number */ 500 xfs_ino_t inum, /* new inode number */
500 xfs_fsblock_t *first, /* bmap's firstblock */ 501 xfs_fsblock_t *first, /* bmap's firstblock */
501 xfs_bmap_free_t *flist, /* bmap's freeblock list */ 502 struct xfs_defer_ops *dfops, /* bmap's freeblock list */
502 xfs_extlen_t total) /* bmap's total block count */ 503 xfs_extlen_t total) /* bmap's total block count */
503{ 504{
504 struct xfs_da_args *args; 505 struct xfs_da_args *args;
@@ -523,7 +524,7 @@ xfs_dir_replace(
523 args->inumber = inum; 524 args->inumber = inum;
524 args->dp = dp; 525 args->dp = dp;
525 args->firstblock = first; 526 args->firstblock = first;
526 args->flist = flist; 527 args->dfops = dfops;
527 args->total = total; 528 args->total = total;
528 args->whichfork = XFS_DATA_FORK; 529 args->whichfork = XFS_DATA_FORK;
529 args->trans = tp; 530 args->trans = tp;
@@ -680,7 +681,7 @@ xfs_dir2_shrink_inode(
680 681
681 /* Unmap the fsblock(s). */ 682 /* Unmap the fsblock(s). */
682 error = xfs_bunmapi(tp, dp, da, args->geo->fsbcount, 0, 0, 683 error = xfs_bunmapi(tp, dp, da, args->geo->fsbcount, 0, 0,
683 args->firstblock, args->flist, &done); 684 args->firstblock, args->dfops, &done);
684 if (error) { 685 if (error) {
685 /* 686 /*
686 * ENOSPC actually can happen if we're in a removename with no 687 * ENOSPC actually can happen if we're in a removename with no
diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h
index e55353651f5b..becc926c3e3d 100644
--- a/fs/xfs/libxfs/xfs_dir2.h
+++ b/fs/xfs/libxfs/xfs_dir2.h
@@ -18,7 +18,7 @@
18#ifndef __XFS_DIR2_H__ 18#ifndef __XFS_DIR2_H__
19#define __XFS_DIR2_H__ 19#define __XFS_DIR2_H__
20 20
21struct xfs_bmap_free; 21struct xfs_defer_ops;
22struct xfs_da_args; 22struct xfs_da_args;
23struct xfs_inode; 23struct xfs_inode;
24struct xfs_mount; 24struct xfs_mount;
@@ -129,18 +129,18 @@ extern int xfs_dir_init(struct xfs_trans *tp, struct xfs_inode *dp,
129extern int xfs_dir_createname(struct xfs_trans *tp, struct xfs_inode *dp, 129extern int xfs_dir_createname(struct xfs_trans *tp, struct xfs_inode *dp,
130 struct xfs_name *name, xfs_ino_t inum, 130 struct xfs_name *name, xfs_ino_t inum,
131 xfs_fsblock_t *first, 131 xfs_fsblock_t *first,
132 struct xfs_bmap_free *flist, xfs_extlen_t tot); 132 struct xfs_defer_ops *dfops, xfs_extlen_t tot);
133extern int xfs_dir_lookup(struct xfs_trans *tp, struct xfs_inode *dp, 133extern int xfs_dir_lookup(struct xfs_trans *tp, struct xfs_inode *dp,
134 struct xfs_name *name, xfs_ino_t *inum, 134 struct xfs_name *name, xfs_ino_t *inum,
135 struct xfs_name *ci_name); 135 struct xfs_name *ci_name);
136extern int xfs_dir_removename(struct xfs_trans *tp, struct xfs_inode *dp, 136extern int xfs_dir_removename(struct xfs_trans *tp, struct xfs_inode *dp,
137 struct xfs_name *name, xfs_ino_t ino, 137 struct xfs_name *name, xfs_ino_t ino,
138 xfs_fsblock_t *first, 138 xfs_fsblock_t *first,
139 struct xfs_bmap_free *flist, xfs_extlen_t tot); 139 struct xfs_defer_ops *dfops, xfs_extlen_t tot);
140extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp, 140extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp,
141 struct xfs_name *name, xfs_ino_t inum, 141 struct xfs_name *name, xfs_ino_t inum,
142 xfs_fsblock_t *first, 142 xfs_fsblock_t *first,
143 struct xfs_bmap_free *flist, xfs_extlen_t tot); 143 struct xfs_defer_ops *dfops, xfs_extlen_t tot);
144extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp, 144extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp,
145 struct xfs_name *name); 145 struct xfs_name *name);
146 146
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index adb204d40f22..f814d42c73b2 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -455,8 +455,10 @@ xfs_sb_has_compat_feature(
455} 455}
456 456
457#define XFS_SB_FEAT_RO_COMPAT_FINOBT (1 << 0) /* free inode btree */ 457#define XFS_SB_FEAT_RO_COMPAT_FINOBT (1 << 0) /* free inode btree */
458#define XFS_SB_FEAT_RO_COMPAT_RMAPBT (1 << 1) /* reverse map btree */
458#define XFS_SB_FEAT_RO_COMPAT_ALL \ 459#define XFS_SB_FEAT_RO_COMPAT_ALL \
459 (XFS_SB_FEAT_RO_COMPAT_FINOBT) 460 (XFS_SB_FEAT_RO_COMPAT_FINOBT | \
461 XFS_SB_FEAT_RO_COMPAT_RMAPBT)
460#define XFS_SB_FEAT_RO_COMPAT_UNKNOWN ~XFS_SB_FEAT_RO_COMPAT_ALL 462#define XFS_SB_FEAT_RO_COMPAT_UNKNOWN ~XFS_SB_FEAT_RO_COMPAT_ALL
461static inline bool 463static inline bool
462xfs_sb_has_ro_compat_feature( 464xfs_sb_has_ro_compat_feature(
@@ -538,6 +540,12 @@ static inline bool xfs_sb_version_hasmetauuid(struct xfs_sb *sbp)
538 (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_META_UUID); 540 (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_META_UUID);
539} 541}
540 542
543static inline bool xfs_sb_version_hasrmapbt(struct xfs_sb *sbp)
544{
545 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) &&
546 (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_RMAPBT);
547}
548
541/* 549/*
542 * end of superblock version macros 550 * end of superblock version macros
543 */ 551 */
@@ -598,10 +606,10 @@ xfs_is_quota_inode(struct xfs_sb *sbp, xfs_ino_t ino)
598#define XFS_AGI_GOOD_VERSION(v) ((v) == XFS_AGI_VERSION) 606#define XFS_AGI_GOOD_VERSION(v) ((v) == XFS_AGI_VERSION)
599 607
600/* 608/*
601 * Btree number 0 is bno, 1 is cnt. This value gives the size of the 609 * Btree number 0 is bno, 1 is cnt, 2 is rmap. This value gives the size of the
602 * arrays below. 610 * arrays below.
603 */ 611 */
604#define XFS_BTNUM_AGF ((int)XFS_BTNUM_CNTi + 1) 612#define XFS_BTNUM_AGF ((int)XFS_BTNUM_RMAPi + 1)
605 613
606/* 614/*
607 * The second word of agf_levels in the first a.g. overlaps the EFS 615 * The second word of agf_levels in the first a.g. overlaps the EFS
@@ -618,12 +626,10 @@ typedef struct xfs_agf {
618 __be32 agf_seqno; /* sequence # starting from 0 */ 626 __be32 agf_seqno; /* sequence # starting from 0 */
619 __be32 agf_length; /* size in blocks of a.g. */ 627 __be32 agf_length; /* size in blocks of a.g. */
620 /* 628 /*
621 * Freespace information 629 * Freespace and rmap information
622 */ 630 */
623 __be32 agf_roots[XFS_BTNUM_AGF]; /* root blocks */ 631 __be32 agf_roots[XFS_BTNUM_AGF]; /* root blocks */
624 __be32 agf_spare0; /* spare field */
625 __be32 agf_levels[XFS_BTNUM_AGF]; /* btree levels */ 632 __be32 agf_levels[XFS_BTNUM_AGF]; /* btree levels */
626 __be32 agf_spare1; /* spare field */
627 633
628 __be32 agf_flfirst; /* first freelist block's index */ 634 __be32 agf_flfirst; /* first freelist block's index */
629 __be32 agf_fllast; /* last freelist block's index */ 635 __be32 agf_fllast; /* last freelist block's index */
@@ -1308,17 +1314,118 @@ typedef __be32 xfs_inobt_ptr_t;
1308#define XFS_FIBT_BLOCK(mp) ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1)) 1314#define XFS_FIBT_BLOCK(mp) ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1))
1309 1315
1310/* 1316/*
1311 * The first data block of an AG depends on whether the filesystem was formatted 1317 * Reverse mapping btree format definitions
1312 * with the finobt feature. If so, account for the finobt reserved root btree 1318 *
1313 * block. 1319 * There is a btree for the reverse map per allocation group
1320 */
1321#define XFS_RMAP_CRC_MAGIC 0x524d4233 /* 'RMB3' */
1322
1323/*
1324 * Ownership info for an extent. This is used to create reverse-mapping
1325 * entries.
1314 */ 1326 */
1315#define XFS_PREALLOC_BLOCKS(mp) \ 1327#define XFS_OWNER_INFO_ATTR_FORK (1 << 0)
1328#define XFS_OWNER_INFO_BMBT_BLOCK (1 << 1)
1329struct xfs_owner_info {
1330 uint64_t oi_owner;
1331 xfs_fileoff_t oi_offset;
1332 unsigned int oi_flags;
1333};
1334
1335/*
1336 * Special owner types.
1337 *
1338 * Seeing as we only support up to 8EB, we have the upper bit of the owner field
1339 * to tell us we have a special owner value. We use these for static metadata
1340 * allocated at mkfs/growfs time, as well as for freespace management metadata.
1341 */
1342#define XFS_RMAP_OWN_NULL (-1ULL) /* No owner, for growfs */
1343#define XFS_RMAP_OWN_UNKNOWN (-2ULL) /* Unknown owner, for EFI recovery */
1344#define XFS_RMAP_OWN_FS (-3ULL) /* static fs metadata */
1345#define XFS_RMAP_OWN_LOG (-4ULL) /* static fs metadata */
1346#define XFS_RMAP_OWN_AG (-5ULL) /* AG freespace btree blocks */
1347#define XFS_RMAP_OWN_INOBT (-6ULL) /* Inode btree blocks */
1348#define XFS_RMAP_OWN_INODES (-7ULL) /* Inode chunk */
1349#define XFS_RMAP_OWN_MIN (-8ULL) /* guard */
1350
1351#define XFS_RMAP_NON_INODE_OWNER(owner) (!!((owner) & (1ULL << 63)))
1352
1353/*
1354 * Data record structure
1355 */
1356struct xfs_rmap_rec {
1357 __be32 rm_startblock; /* extent start block */
1358 __be32 rm_blockcount; /* extent length */
1359 __be64 rm_owner; /* extent owner */
1360 __be64 rm_offset; /* offset within the owner */
1361};
1362
1363/*
1364 * rmap btree record
1365 * rm_offset:63 is the attribute fork flag
1366 * rm_offset:62 is the bmbt block flag
1367 * rm_offset:61 is the unwritten extent flag (same as l0:63 in bmbt)
1368 * rm_offset:54-60 aren't used and should be zero
1369 * rm_offset:0-53 is the block offset within the inode
1370 */
1371#define XFS_RMAP_OFF_ATTR_FORK ((__uint64_t)1ULL << 63)
1372#define XFS_RMAP_OFF_BMBT_BLOCK ((__uint64_t)1ULL << 62)
1373#define XFS_RMAP_OFF_UNWRITTEN ((__uint64_t)1ULL << 61)
1374
1375#define XFS_RMAP_LEN_MAX ((__uint32_t)~0U)
1376#define XFS_RMAP_OFF_FLAGS (XFS_RMAP_OFF_ATTR_FORK | \
1377 XFS_RMAP_OFF_BMBT_BLOCK | \
1378 XFS_RMAP_OFF_UNWRITTEN)
1379#define XFS_RMAP_OFF_MASK ((__uint64_t)0x3FFFFFFFFFFFFFULL)
1380
1381#define XFS_RMAP_OFF(off) ((off) & XFS_RMAP_OFF_MASK)
1382
1383#define XFS_RMAP_IS_BMBT_BLOCK(off) (!!((off) & XFS_RMAP_OFF_BMBT_BLOCK))
1384#define XFS_RMAP_IS_ATTR_FORK(off) (!!((off) & XFS_RMAP_OFF_ATTR_FORK))
1385#define XFS_RMAP_IS_UNWRITTEN(len) (!!((off) & XFS_RMAP_OFF_UNWRITTEN))
1386
1387#define RMAPBT_STARTBLOCK_BITLEN 32
1388#define RMAPBT_BLOCKCOUNT_BITLEN 32
1389#define RMAPBT_OWNER_BITLEN 64
1390#define RMAPBT_ATTRFLAG_BITLEN 1
1391#define RMAPBT_BMBTFLAG_BITLEN 1
1392#define RMAPBT_EXNTFLAG_BITLEN 1
1393#define RMAPBT_UNUSED_OFFSET_BITLEN 7
1394#define RMAPBT_OFFSET_BITLEN 54
1395
1396#define XFS_RMAP_ATTR_FORK (1 << 0)
1397#define XFS_RMAP_BMBT_BLOCK (1 << 1)
1398#define XFS_RMAP_UNWRITTEN (1 << 2)
1399#define XFS_RMAP_KEY_FLAGS (XFS_RMAP_ATTR_FORK | \
1400 XFS_RMAP_BMBT_BLOCK)
1401#define XFS_RMAP_REC_FLAGS (XFS_RMAP_UNWRITTEN)
1402struct xfs_rmap_irec {
1403 xfs_agblock_t rm_startblock; /* extent start block */
1404 xfs_extlen_t rm_blockcount; /* extent length */
1405 __uint64_t rm_owner; /* extent owner */
1406 __uint64_t rm_offset; /* offset within the owner */
1407 unsigned int rm_flags; /* state flags */
1408};
1409
1410/*
1411 * Key structure
1412 *
1413 * We don't use the length for lookups
1414 */
1415struct xfs_rmap_key {
1416 __be32 rm_startblock; /* extent start block */
1417 __be64 rm_owner; /* extent owner */
1418 __be64 rm_offset; /* offset within the owner */
1419} __attribute__((packed));
1420
1421/* btree pointer type */
1422typedef __be32 xfs_rmap_ptr_t;
1423
1424#define XFS_RMAP_BLOCK(mp) \
1316 (xfs_sb_version_hasfinobt(&((mp)->m_sb)) ? \ 1425 (xfs_sb_version_hasfinobt(&((mp)->m_sb)) ? \
1317 XFS_FIBT_BLOCK(mp) + 1 : \ 1426 XFS_FIBT_BLOCK(mp) + 1 : \
1318 XFS_IBT_BLOCK(mp) + 1) 1427 XFS_IBT_BLOCK(mp) + 1)
1319 1428
1320
1321
1322/* 1429/*
1323 * BMAP Btree format definitions 1430 * BMAP Btree format definitions
1324 * 1431 *
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index f5ec9c5ccae6..79455058b752 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -206,6 +206,7 @@ typedef struct xfs_fsop_resblks {
206#define XFS_FSOP_GEOM_FLAGS_FTYPE 0x10000 /* inode directory types */ 206#define XFS_FSOP_GEOM_FLAGS_FTYPE 0x10000 /* inode directory types */
207#define XFS_FSOP_GEOM_FLAGS_FINOBT 0x20000 /* free inode btree */ 207#define XFS_FSOP_GEOM_FLAGS_FINOBT 0x20000 /* free inode btree */
208#define XFS_FSOP_GEOM_FLAGS_SPINODES 0x40000 /* sparse inode chunks */ 208#define XFS_FSOP_GEOM_FLAGS_SPINODES 0x40000 /* sparse inode chunks */
209#define XFS_FSOP_GEOM_FLAGS_RMAPBT 0x80000 /* Reverse mapping btree */
209 210
210/* 211/*
211 * Minimum and maximum sizes need for growth checks. 212 * Minimum and maximum sizes need for growth checks.
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 4b1e408169a8..51b4e0de1fdc 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -24,6 +24,7 @@
24#include "xfs_bit.h" 24#include "xfs_bit.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_mount.h" 26#include "xfs_mount.h"
27#include "xfs_defer.h"
27#include "xfs_inode.h" 28#include "xfs_inode.h"
28#include "xfs_btree.h" 29#include "xfs_btree.h"
29#include "xfs_ialloc.h" 30#include "xfs_ialloc.h"
@@ -39,6 +40,7 @@
39#include "xfs_icache.h" 40#include "xfs_icache.h"
40#include "xfs_trace.h" 41#include "xfs_trace.h"
41#include "xfs_log.h" 42#include "xfs_log.h"
43#include "xfs_rmap.h"
42 44
43 45
44/* 46/*
@@ -614,6 +616,7 @@ xfs_ialloc_ag_alloc(
614 args.tp = tp; 616 args.tp = tp;
615 args.mp = tp->t_mountp; 617 args.mp = tp->t_mountp;
616 args.fsbno = NULLFSBLOCK; 618 args.fsbno = NULLFSBLOCK;
619 xfs_rmap_ag_owner(&args.oinfo, XFS_RMAP_OWN_INODES);
617 620
618#ifdef DEBUG 621#ifdef DEBUG
619 /* randomly do sparse inode allocations */ 622 /* randomly do sparse inode allocations */
@@ -1817,19 +1820,21 @@ xfs_difree_inode_chunk(
1817 struct xfs_mount *mp, 1820 struct xfs_mount *mp,
1818 xfs_agnumber_t agno, 1821 xfs_agnumber_t agno,
1819 struct xfs_inobt_rec_incore *rec, 1822 struct xfs_inobt_rec_incore *rec,
1820 struct xfs_bmap_free *flist) 1823 struct xfs_defer_ops *dfops)
1821{ 1824{
1822 xfs_agblock_t sagbno = XFS_AGINO_TO_AGBNO(mp, rec->ir_startino); 1825 xfs_agblock_t sagbno = XFS_AGINO_TO_AGBNO(mp, rec->ir_startino);
1823 int startidx, endidx; 1826 int startidx, endidx;
1824 int nextbit; 1827 int nextbit;
1825 xfs_agblock_t agbno; 1828 xfs_agblock_t agbno;
1826 int contigblk; 1829 int contigblk;
1830 struct xfs_owner_info oinfo;
1827 DECLARE_BITMAP(holemask, XFS_INOBT_HOLEMASK_BITS); 1831 DECLARE_BITMAP(holemask, XFS_INOBT_HOLEMASK_BITS);
1832 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES);
1828 1833
1829 if (!xfs_inobt_issparse(rec->ir_holemask)) { 1834 if (!xfs_inobt_issparse(rec->ir_holemask)) {
1830 /* not sparse, calculate extent info directly */ 1835 /* not sparse, calculate extent info directly */
1831 xfs_bmap_add_free(mp, flist, XFS_AGB_TO_FSB(mp, agno, sagbno), 1836 xfs_bmap_add_free(mp, dfops, XFS_AGB_TO_FSB(mp, agno, sagbno),
1832 mp->m_ialloc_blks); 1837 mp->m_ialloc_blks, &oinfo);
1833 return; 1838 return;
1834 } 1839 }
1835 1840
@@ -1872,8 +1877,8 @@ xfs_difree_inode_chunk(
1872 1877
1873 ASSERT(agbno % mp->m_sb.sb_spino_align == 0); 1878 ASSERT(agbno % mp->m_sb.sb_spino_align == 0);
1874 ASSERT(contigblk % mp->m_sb.sb_spino_align == 0); 1879 ASSERT(contigblk % mp->m_sb.sb_spino_align == 0);
1875 xfs_bmap_add_free(mp, flist, XFS_AGB_TO_FSB(mp, agno, agbno), 1880 xfs_bmap_add_free(mp, dfops, XFS_AGB_TO_FSB(mp, agno, agbno),
1876 contigblk); 1881 contigblk, &oinfo);
1877 1882
1878 /* reset range to current bit and carry on... */ 1883 /* reset range to current bit and carry on... */
1879 startidx = endidx = nextbit; 1884 startidx = endidx = nextbit;
@@ -1889,7 +1894,7 @@ xfs_difree_inobt(
1889 struct xfs_trans *tp, 1894 struct xfs_trans *tp,
1890 struct xfs_buf *agbp, 1895 struct xfs_buf *agbp,
1891 xfs_agino_t agino, 1896 xfs_agino_t agino,
1892 struct xfs_bmap_free *flist, 1897 struct xfs_defer_ops *dfops,
1893 struct xfs_icluster *xic, 1898 struct xfs_icluster *xic,
1894 struct xfs_inobt_rec_incore *orec) 1899 struct xfs_inobt_rec_incore *orec)
1895{ 1900{
@@ -1976,7 +1981,7 @@ xfs_difree_inobt(
1976 goto error0; 1981 goto error0;
1977 } 1982 }
1978 1983
1979 xfs_difree_inode_chunk(mp, agno, &rec, flist); 1984 xfs_difree_inode_chunk(mp, agno, &rec, dfops);
1980 } else { 1985 } else {
1981 xic->deleted = 0; 1986 xic->deleted = 0;
1982 1987
@@ -2121,7 +2126,7 @@ int
2121xfs_difree( 2126xfs_difree(
2122 struct xfs_trans *tp, /* transaction pointer */ 2127 struct xfs_trans *tp, /* transaction pointer */
2123 xfs_ino_t inode, /* inode to be freed */ 2128 xfs_ino_t inode, /* inode to be freed */
2124 struct xfs_bmap_free *flist, /* extents to free */ 2129 struct xfs_defer_ops *dfops, /* extents to free */
2125 struct xfs_icluster *xic) /* cluster info if deleted */ 2130 struct xfs_icluster *xic) /* cluster info if deleted */
2126{ 2131{
2127 /* REFERENCED */ 2132 /* REFERENCED */
@@ -2173,7 +2178,7 @@ xfs_difree(
2173 /* 2178 /*
2174 * Fix up the inode allocation btree. 2179 * Fix up the inode allocation btree.
2175 */ 2180 */
2176 error = xfs_difree_inobt(mp, tp, agbp, agino, flist, xic, &rec); 2181 error = xfs_difree_inobt(mp, tp, agbp, agino, dfops, xic, &rec);
2177 if (error) 2182 if (error)
2178 goto error0; 2183 goto error0;
2179 2184
diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h
index 6e450df2979b..0bb89669fc07 100644
--- a/fs/xfs/libxfs/xfs_ialloc.h
+++ b/fs/xfs/libxfs/xfs_ialloc.h
@@ -95,7 +95,7 @@ int /* error */
95xfs_difree( 95xfs_difree(
96 struct xfs_trans *tp, /* transaction pointer */ 96 struct xfs_trans *tp, /* transaction pointer */
97 xfs_ino_t inode, /* inode to be freed */ 97 xfs_ino_t inode, /* inode to be freed */
98 struct xfs_bmap_free *flist, /* extents to free */ 98 struct xfs_defer_ops *dfops, /* extents to free */
99 struct xfs_icluster *ifree); /* cluster info if deleted */ 99 struct xfs_icluster *ifree); /* cluster info if deleted */
100 100
101/* 101/*
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index 89c21d771e35..31ca2208c03d 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -32,6 +32,7 @@
32#include "xfs_trace.h" 32#include "xfs_trace.h"
33#include "xfs_cksum.h" 33#include "xfs_cksum.h"
34#include "xfs_trans.h" 34#include "xfs_trans.h"
35#include "xfs_rmap.h"
35 36
36 37
37STATIC int 38STATIC int
@@ -96,6 +97,7 @@ xfs_inobt_alloc_block(
96 memset(&args, 0, sizeof(args)); 97 memset(&args, 0, sizeof(args));
97 args.tp = cur->bc_tp; 98 args.tp = cur->bc_tp;
98 args.mp = cur->bc_mp; 99 args.mp = cur->bc_mp;
100 xfs_rmap_ag_owner(&args.oinfo, XFS_RMAP_OWN_INOBT);
99 args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, sbno); 101 args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, sbno);
100 args.minlen = 1; 102 args.minlen = 1;
101 args.maxlen = 1; 103 args.maxlen = 1;
@@ -125,8 +127,12 @@ xfs_inobt_free_block(
125 struct xfs_btree_cur *cur, 127 struct xfs_btree_cur *cur,
126 struct xfs_buf *bp) 128 struct xfs_buf *bp)
127{ 129{
130 struct xfs_owner_info oinfo;
131
132 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT);
128 return xfs_free_extent(cur->bc_tp, 133 return xfs_free_extent(cur->bc_tp,
129 XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)), 1); 134 XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)), 1,
135 &oinfo);
130} 136}
131 137
132STATIC int 138STATIC int
@@ -146,14 +152,6 @@ xfs_inobt_init_key_from_rec(
146} 152}
147 153
148STATIC void 154STATIC void
149xfs_inobt_init_rec_from_key(
150 union xfs_btree_key *key,
151 union xfs_btree_rec *rec)
152{
153 rec->inobt.ir_startino = key->inobt.ir_startino;
154}
155
156STATIC void
157xfs_inobt_init_rec_from_cur( 155xfs_inobt_init_rec_from_cur(
158 struct xfs_btree_cur *cur, 156 struct xfs_btree_cur *cur,
159 union xfs_btree_rec *rec) 157 union xfs_btree_rec *rec)
@@ -314,7 +312,6 @@ static const struct xfs_btree_ops xfs_inobt_ops = {
314 .get_minrecs = xfs_inobt_get_minrecs, 312 .get_minrecs = xfs_inobt_get_minrecs,
315 .get_maxrecs = xfs_inobt_get_maxrecs, 313 .get_maxrecs = xfs_inobt_get_maxrecs,
316 .init_key_from_rec = xfs_inobt_init_key_from_rec, 314 .init_key_from_rec = xfs_inobt_init_key_from_rec,
317 .init_rec_from_key = xfs_inobt_init_rec_from_key,
318 .init_rec_from_cur = xfs_inobt_init_rec_from_cur, 315 .init_rec_from_cur = xfs_inobt_init_rec_from_cur,
319 .init_ptr_from_cur = xfs_inobt_init_ptr_from_cur, 316 .init_ptr_from_cur = xfs_inobt_init_ptr_from_cur,
320 .key_diff = xfs_inobt_key_diff, 317 .key_diff = xfs_inobt_key_diff,
@@ -336,7 +333,6 @@ static const struct xfs_btree_ops xfs_finobt_ops = {
336 .get_minrecs = xfs_inobt_get_minrecs, 333 .get_minrecs = xfs_inobt_get_minrecs,
337 .get_maxrecs = xfs_inobt_get_maxrecs, 334 .get_maxrecs = xfs_inobt_get_maxrecs,
338 .init_key_from_rec = xfs_inobt_init_key_from_rec, 335 .init_key_from_rec = xfs_inobt_init_key_from_rec,
339 .init_rec_from_key = xfs_inobt_init_rec_from_key,
340 .init_rec_from_cur = xfs_inobt_init_rec_from_cur, 336 .init_rec_from_cur = xfs_inobt_init_rec_from_cur,
341 .init_ptr_from_cur = xfs_finobt_init_ptr_from_cur, 337 .init_ptr_from_cur = xfs_finobt_init_ptr_from_cur,
342 .key_diff = xfs_inobt_key_diff, 338 .key_diff = xfs_inobt_key_diff,
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 9d9559eb2835..4b9769e23c83 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -22,6 +22,7 @@
22#include "xfs_log_format.h" 22#include "xfs_log_format.h"
23#include "xfs_trans_resv.h" 23#include "xfs_trans_resv.h"
24#include "xfs_mount.h" 24#include "xfs_mount.h"
25#include "xfs_defer.h"
25#include "xfs_inode.h" 26#include "xfs_inode.h"
26#include "xfs_error.h" 27#include "xfs_error.h"
27#include "xfs_cksum.h" 28#include "xfs_cksum.h"
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index e8f49c029ff0..a6eed43fa7cd 100644
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -110,7 +110,9 @@ static inline uint xlog_get_cycle(char *ptr)
110#define XLOG_REG_TYPE_COMMIT 18 110#define XLOG_REG_TYPE_COMMIT 18
111#define XLOG_REG_TYPE_TRANSHDR 19 111#define XLOG_REG_TYPE_TRANSHDR 19
112#define XLOG_REG_TYPE_ICREATE 20 112#define XLOG_REG_TYPE_ICREATE 20
113#define XLOG_REG_TYPE_MAX 20 113#define XLOG_REG_TYPE_RUI_FORMAT 21
114#define XLOG_REG_TYPE_RUD_FORMAT 22
115#define XLOG_REG_TYPE_MAX 22
114 116
115/* 117/*
116 * Flags to log operation header 118 * Flags to log operation header
@@ -227,6 +229,8 @@ typedef struct xfs_trans_header {
227#define XFS_LI_DQUOT 0x123d 229#define XFS_LI_DQUOT 0x123d
228#define XFS_LI_QUOTAOFF 0x123e 230#define XFS_LI_QUOTAOFF 0x123e
229#define XFS_LI_ICREATE 0x123f 231#define XFS_LI_ICREATE 0x123f
232#define XFS_LI_RUI 0x1240 /* rmap update intent */
233#define XFS_LI_RUD 0x1241
230 234
231#define XFS_LI_TYPE_DESC \ 235#define XFS_LI_TYPE_DESC \
232 { XFS_LI_EFI, "XFS_LI_EFI" }, \ 236 { XFS_LI_EFI, "XFS_LI_EFI" }, \
@@ -236,7 +240,9 @@ typedef struct xfs_trans_header {
236 { XFS_LI_BUF, "XFS_LI_BUF" }, \ 240 { XFS_LI_BUF, "XFS_LI_BUF" }, \
237 { XFS_LI_DQUOT, "XFS_LI_DQUOT" }, \ 241 { XFS_LI_DQUOT, "XFS_LI_DQUOT" }, \
238 { XFS_LI_QUOTAOFF, "XFS_LI_QUOTAOFF" }, \ 242 { XFS_LI_QUOTAOFF, "XFS_LI_QUOTAOFF" }, \
239 { XFS_LI_ICREATE, "XFS_LI_ICREATE" } 243 { XFS_LI_ICREATE, "XFS_LI_ICREATE" }, \
244 { XFS_LI_RUI, "XFS_LI_RUI" }, \
245 { XFS_LI_RUD, "XFS_LI_RUD" }
240 246
241/* 247/*
242 * Inode Log Item Format definitions. 248 * Inode Log Item Format definitions.
@@ -604,6 +610,59 @@ typedef struct xfs_efd_log_format_64 {
604} xfs_efd_log_format_64_t; 610} xfs_efd_log_format_64_t;
605 611
606/* 612/*
613 * RUI/RUD (reverse mapping) log format definitions
614 */
615struct xfs_map_extent {
616 __uint64_t me_owner;
617 __uint64_t me_startblock;
618 __uint64_t me_startoff;
619 __uint32_t me_len;
620 __uint32_t me_flags;
621};
622
623/* rmap me_flags: upper bits are flags, lower byte is type code */
624#define XFS_RMAP_EXTENT_MAP 1
625#define XFS_RMAP_EXTENT_UNMAP 3
626#define XFS_RMAP_EXTENT_CONVERT 5
627#define XFS_RMAP_EXTENT_ALLOC 7
628#define XFS_RMAP_EXTENT_FREE 8
629#define XFS_RMAP_EXTENT_TYPE_MASK 0xFF
630
631#define XFS_RMAP_EXTENT_ATTR_FORK (1U << 31)
632#define XFS_RMAP_EXTENT_BMBT_BLOCK (1U << 30)
633#define XFS_RMAP_EXTENT_UNWRITTEN (1U << 29)
634
635#define XFS_RMAP_EXTENT_FLAGS (XFS_RMAP_EXTENT_TYPE_MASK | \
636 XFS_RMAP_EXTENT_ATTR_FORK | \
637 XFS_RMAP_EXTENT_BMBT_BLOCK | \
638 XFS_RMAP_EXTENT_UNWRITTEN)
639
640/*
641 * This is the structure used to lay out an rui log item in the
642 * log. The rui_extents field is a variable size array whose
643 * size is given by rui_nextents.
644 */
645struct xfs_rui_log_format {
646 __uint16_t rui_type; /* rui log item type */
647 __uint16_t rui_size; /* size of this item */
648 __uint32_t rui_nextents; /* # extents to free */
649 __uint64_t rui_id; /* rui identifier */
650 struct xfs_map_extent rui_extents[1]; /* array of extents to rmap */
651};
652
653/*
654 * This is the structure used to lay out an rud log item in the
655 * log. The rud_extents array is a variable size array whose
656 * size is given by rud_nextents;
657 */
658struct xfs_rud_log_format {
659 __uint16_t rud_type; /* rud log item type */
660 __uint16_t rud_size; /* size of this item */
661 __uint32_t __pad;
662 __uint64_t rud_rui_id; /* id of corresponding rui */
663};
664
665/*
607 * Dquot Log format definitions. 666 * Dquot Log format definitions.
608 * 667 *
609 * The first two fields must be the type and size fitting into 668 * The first two fields must be the type and size fitting into
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
new file mode 100644
index 000000000000..73d05407d663
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -0,0 +1,1399 @@
1/*
2 * Copyright (c) 2014 Red Hat, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include "xfs.h"
19#include "xfs_fs.h"
20#include "xfs_shared.h"
21#include "xfs_format.h"
22#include "xfs_log_format.h"
23#include "xfs_trans_resv.h"
24#include "xfs_bit.h"
25#include "xfs_sb.h"
26#include "xfs_mount.h"
27#include "xfs_defer.h"
28#include "xfs_da_format.h"
29#include "xfs_da_btree.h"
30#include "xfs_btree.h"
31#include "xfs_trans.h"
32#include "xfs_alloc.h"
33#include "xfs_rmap.h"
34#include "xfs_rmap_btree.h"
35#include "xfs_trans_space.h"
36#include "xfs_trace.h"
37#include "xfs_error.h"
38#include "xfs_extent_busy.h"
39#include "xfs_bmap.h"
40#include "xfs_inode.h"
41
42/*
43 * Lookup the first record less than or equal to [bno, len, owner, offset]
44 * in the btree given by cur.
45 */
46int
47xfs_rmap_lookup_le(
48 struct xfs_btree_cur *cur,
49 xfs_agblock_t bno,
50 xfs_extlen_t len,
51 uint64_t owner,
52 uint64_t offset,
53 unsigned int flags,
54 int *stat)
55{
56 cur->bc_rec.r.rm_startblock = bno;
57 cur->bc_rec.r.rm_blockcount = len;
58 cur->bc_rec.r.rm_owner = owner;
59 cur->bc_rec.r.rm_offset = offset;
60 cur->bc_rec.r.rm_flags = flags;
61 return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
62}
63
64/*
65 * Lookup the record exactly matching [bno, len, owner, offset]
66 * in the btree given by cur.
67 */
68int
69xfs_rmap_lookup_eq(
70 struct xfs_btree_cur *cur,
71 xfs_agblock_t bno,
72 xfs_extlen_t len,
73 uint64_t owner,
74 uint64_t offset,
75 unsigned int flags,
76 int *stat)
77{
78 cur->bc_rec.r.rm_startblock = bno;
79 cur->bc_rec.r.rm_blockcount = len;
80 cur->bc_rec.r.rm_owner = owner;
81 cur->bc_rec.r.rm_offset = offset;
82 cur->bc_rec.r.rm_flags = flags;
83 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
84}
85
86/*
87 * Update the record referred to by cur to the value given
88 * by [bno, len, owner, offset].
89 * This either works (return 0) or gets an EFSCORRUPTED error.
90 */
91STATIC int
92xfs_rmap_update(
93 struct xfs_btree_cur *cur,
94 struct xfs_rmap_irec *irec)
95{
96 union xfs_btree_rec rec;
97 int error;
98
99 trace_xfs_rmap_update(cur->bc_mp, cur->bc_private.a.agno,
100 irec->rm_startblock, irec->rm_blockcount,
101 irec->rm_owner, irec->rm_offset, irec->rm_flags);
102
103 rec.rmap.rm_startblock = cpu_to_be32(irec->rm_startblock);
104 rec.rmap.rm_blockcount = cpu_to_be32(irec->rm_blockcount);
105 rec.rmap.rm_owner = cpu_to_be64(irec->rm_owner);
106 rec.rmap.rm_offset = cpu_to_be64(
107 xfs_rmap_irec_offset_pack(irec));
108 error = xfs_btree_update(cur, &rec);
109 if (error)
110 trace_xfs_rmap_update_error(cur->bc_mp,
111 cur->bc_private.a.agno, error, _RET_IP_);
112 return error;
113}
114
115int
116xfs_rmap_insert(
117 struct xfs_btree_cur *rcur,
118 xfs_agblock_t agbno,
119 xfs_extlen_t len,
120 uint64_t owner,
121 uint64_t offset,
122 unsigned int flags)
123{
124 int i;
125 int error;
126
127 trace_xfs_rmap_insert(rcur->bc_mp, rcur->bc_private.a.agno, agbno,
128 len, owner, offset, flags);
129
130 error = xfs_rmap_lookup_eq(rcur, agbno, len, owner, offset, flags, &i);
131 if (error)
132 goto done;
133 XFS_WANT_CORRUPTED_GOTO(rcur->bc_mp, i == 0, done);
134
135 rcur->bc_rec.r.rm_startblock = agbno;
136 rcur->bc_rec.r.rm_blockcount = len;
137 rcur->bc_rec.r.rm_owner = owner;
138 rcur->bc_rec.r.rm_offset = offset;
139 rcur->bc_rec.r.rm_flags = flags;
140 error = xfs_btree_insert(rcur, &i);
141 if (error)
142 goto done;
143 XFS_WANT_CORRUPTED_GOTO(rcur->bc_mp, i == 1, done);
144done:
145 if (error)
146 trace_xfs_rmap_insert_error(rcur->bc_mp,
147 rcur->bc_private.a.agno, error, _RET_IP_);
148 return error;
149}
150
151static int
152xfs_rmap_btrec_to_irec(
153 union xfs_btree_rec *rec,
154 struct xfs_rmap_irec *irec)
155{
156 irec->rm_flags = 0;
157 irec->rm_startblock = be32_to_cpu(rec->rmap.rm_startblock);
158 irec->rm_blockcount = be32_to_cpu(rec->rmap.rm_blockcount);
159 irec->rm_owner = be64_to_cpu(rec->rmap.rm_owner);
160 return xfs_rmap_irec_offset_unpack(be64_to_cpu(rec->rmap.rm_offset),
161 irec);
162}
163
164/*
165 * Get the data from the pointed-to record.
166 */
167int
168xfs_rmap_get_rec(
169 struct xfs_btree_cur *cur,
170 struct xfs_rmap_irec *irec,
171 int *stat)
172{
173 union xfs_btree_rec *rec;
174 int error;
175
176 error = xfs_btree_get_rec(cur, &rec, stat);
177 if (error || !*stat)
178 return error;
179
180 return xfs_rmap_btrec_to_irec(rec, irec);
181}
182
183/*
184 * Find the extent in the rmap btree and remove it.
185 *
186 * The record we find should always be an exact match for the extent that we're
187 * looking for, since we insert them into the btree without modification.
188 *
189 * Special Case #1: when growing the filesystem, we "free" an extent when
190 * growing the last AG. This extent is new space and so it is not tracked as
191 * used space in the btree. The growfs code will pass in an owner of
192 * XFS_RMAP_OWN_NULL to indicate that it expected that there is no owner of this
193 * extent. We verify that - the extent lookup result in a record that does not
194 * overlap.
195 *
196 * Special Case #2: EFIs do not record the owner of the extent, so when
197 * recovering EFIs from the log we pass in XFS_RMAP_OWN_UNKNOWN to tell the rmap
198 * btree to ignore the owner (i.e. wildcard match) so we don't trigger
199 * corruption checks during log recovery.
200 */
201STATIC int
202xfs_rmap_unmap(
203 struct xfs_btree_cur *cur,
204 xfs_agblock_t bno,
205 xfs_extlen_t len,
206 bool unwritten,
207 struct xfs_owner_info *oinfo)
208{
209 struct xfs_mount *mp = cur->bc_mp;
210 struct xfs_rmap_irec ltrec;
211 uint64_t ltoff;
212 int error = 0;
213 int i;
214 uint64_t owner;
215 uint64_t offset;
216 unsigned int flags;
217 bool ignore_off;
218
219 xfs_owner_info_unpack(oinfo, &owner, &offset, &flags);
220 ignore_off = XFS_RMAP_NON_INODE_OWNER(owner) ||
221 (flags & XFS_RMAP_BMBT_BLOCK);
222 if (unwritten)
223 flags |= XFS_RMAP_UNWRITTEN;
224 trace_xfs_rmap_unmap(mp, cur->bc_private.a.agno, bno, len,
225 unwritten, oinfo);
226
227 /*
228 * We should always have a left record because there's a static record
229 * for the AG headers at rm_startblock == 0 created by mkfs/growfs that
230 * will not ever be removed from the tree.
231 */
232 error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, flags, &i);
233 if (error)
234 goto out_error;
235 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
236
237 error = xfs_rmap_get_rec(cur, &ltrec, &i);
238 if (error)
239 goto out_error;
240 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
241 trace_xfs_rmap_lookup_le_range_result(cur->bc_mp,
242 cur->bc_private.a.agno, ltrec.rm_startblock,
243 ltrec.rm_blockcount, ltrec.rm_owner,
244 ltrec.rm_offset, ltrec.rm_flags);
245 ltoff = ltrec.rm_offset;
246
247 /*
248 * For growfs, the incoming extent must be beyond the left record we
249 * just found as it is new space and won't be used by anyone. This is
250 * just a corruption check as we don't actually do anything with this
251 * extent. Note that we need to use >= instead of > because it might
252 * be the case that the "left" extent goes all the way to EOFS.
253 */
254 if (owner == XFS_RMAP_OWN_NULL) {
255 XFS_WANT_CORRUPTED_GOTO(mp, bno >= ltrec.rm_startblock +
256 ltrec.rm_blockcount, out_error);
257 goto out_done;
258 }
259
260 /* Make sure the unwritten flag matches. */
261 XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) ==
262 (ltrec.rm_flags & XFS_RMAP_UNWRITTEN), out_error);
263
264 /* Make sure the extent we found covers the entire freeing range. */
265 XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_startblock <= bno &&
266 ltrec.rm_startblock + ltrec.rm_blockcount >=
267 bno + len, out_error);
268
269 /* Make sure the owner matches what we expect to find in the tree. */
270 XFS_WANT_CORRUPTED_GOTO(mp, owner == ltrec.rm_owner ||
271 XFS_RMAP_NON_INODE_OWNER(owner), out_error);
272
273 /* Check the offset, if necessary. */
274 if (!XFS_RMAP_NON_INODE_OWNER(owner)) {
275 if (flags & XFS_RMAP_BMBT_BLOCK) {
276 XFS_WANT_CORRUPTED_GOTO(mp,
277 ltrec.rm_flags & XFS_RMAP_BMBT_BLOCK,
278 out_error);
279 } else {
280 XFS_WANT_CORRUPTED_GOTO(mp,
281 ltrec.rm_offset <= offset, out_error);
282 XFS_WANT_CORRUPTED_GOTO(mp,
283 ltoff + ltrec.rm_blockcount >= offset + len,
284 out_error);
285 }
286 }
287
288 if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) {
289 /* exact match, simply remove the record from rmap tree */
290 trace_xfs_rmap_delete(mp, cur->bc_private.a.agno,
291 ltrec.rm_startblock, ltrec.rm_blockcount,
292 ltrec.rm_owner, ltrec.rm_offset,
293 ltrec.rm_flags);
294 error = xfs_btree_delete(cur, &i);
295 if (error)
296 goto out_error;
297 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
298 } else if (ltrec.rm_startblock == bno) {
299 /*
300 * overlap left hand side of extent: move the start, trim the
301 * length and update the current record.
302 *
303 * ltbno ltlen
304 * Orig: |oooooooooooooooooooo|
305 * Freeing: |fffffffff|
306 * Result: |rrrrrrrrrr|
307 * bno len
308 */
309 ltrec.rm_startblock += len;
310 ltrec.rm_blockcount -= len;
311 if (!ignore_off)
312 ltrec.rm_offset += len;
313 error = xfs_rmap_update(cur, &ltrec);
314 if (error)
315 goto out_error;
316 } else if (ltrec.rm_startblock + ltrec.rm_blockcount == bno + len) {
317 /*
318 * overlap right hand side of extent: trim the length and update
319 * the current record.
320 *
321 * ltbno ltlen
322 * Orig: |oooooooooooooooooooo|
323 * Freeing: |fffffffff|
324 * Result: |rrrrrrrrrr|
325 * bno len
326 */
327 ltrec.rm_blockcount -= len;
328 error = xfs_rmap_update(cur, &ltrec);
329 if (error)
330 goto out_error;
331 } else {
332
333 /*
334 * overlap middle of extent: trim the length of the existing
335 * record to the length of the new left-extent size, increment
336 * the insertion position so we can insert a new record
337 * containing the remaining right-extent space.
338 *
339 * ltbno ltlen
340 * Orig: |oooooooooooooooooooo|
341 * Freeing: |fffffffff|
342 * Result: |rrrrr| |rrrr|
343 * bno len
344 */
345 xfs_extlen_t orig_len = ltrec.rm_blockcount;
346
347 ltrec.rm_blockcount = bno - ltrec.rm_startblock;
348 error = xfs_rmap_update(cur, &ltrec);
349 if (error)
350 goto out_error;
351
352 error = xfs_btree_increment(cur, 0, &i);
353 if (error)
354 goto out_error;
355
356 cur->bc_rec.r.rm_startblock = bno + len;
357 cur->bc_rec.r.rm_blockcount = orig_len - len -
358 ltrec.rm_blockcount;
359 cur->bc_rec.r.rm_owner = ltrec.rm_owner;
360 if (ignore_off)
361 cur->bc_rec.r.rm_offset = 0;
362 else
363 cur->bc_rec.r.rm_offset = offset + len;
364 cur->bc_rec.r.rm_flags = flags;
365 trace_xfs_rmap_insert(mp, cur->bc_private.a.agno,
366 cur->bc_rec.r.rm_startblock,
367 cur->bc_rec.r.rm_blockcount,
368 cur->bc_rec.r.rm_owner,
369 cur->bc_rec.r.rm_offset,
370 cur->bc_rec.r.rm_flags);
371 error = xfs_btree_insert(cur, &i);
372 if (error)
373 goto out_error;
374 }
375
376out_done:
377 trace_xfs_rmap_unmap_done(mp, cur->bc_private.a.agno, bno, len,
378 unwritten, oinfo);
379out_error:
380 if (error)
381 trace_xfs_rmap_unmap_error(mp, cur->bc_private.a.agno,
382 error, _RET_IP_);
383 return error;
384}
385
386/*
387 * Remove a reference to an extent in the rmap btree.
388 */
389int
390xfs_rmap_free(
391 struct xfs_trans *tp,
392 struct xfs_buf *agbp,
393 xfs_agnumber_t agno,
394 xfs_agblock_t bno,
395 xfs_extlen_t len,
396 struct xfs_owner_info *oinfo)
397{
398 struct xfs_mount *mp = tp->t_mountp;
399 struct xfs_btree_cur *cur;
400 int error;
401
402 if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
403 return 0;
404
405 cur = xfs_rmapbt_init_cursor(mp, tp, agbp, agno);
406
407 error = xfs_rmap_unmap(cur, bno, len, false, oinfo);
408 if (error)
409 goto out_error;
410
411 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
412 return 0;
413
414out_error:
415 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
416 return error;
417}
418
419/*
420 * A mergeable rmap must have the same owner and the same values for
421 * the unwritten, attr_fork, and bmbt flags. The startblock and
422 * offset are checked separately.
423 */
424static bool
425xfs_rmap_is_mergeable(
426 struct xfs_rmap_irec *irec,
427 uint64_t owner,
428 unsigned int flags)
429{
430 if (irec->rm_owner == XFS_RMAP_OWN_NULL)
431 return false;
432 if (irec->rm_owner != owner)
433 return false;
434 if ((flags & XFS_RMAP_UNWRITTEN) ^
435 (irec->rm_flags & XFS_RMAP_UNWRITTEN))
436 return false;
437 if ((flags & XFS_RMAP_ATTR_FORK) ^
438 (irec->rm_flags & XFS_RMAP_ATTR_FORK))
439 return false;
440 if ((flags & XFS_RMAP_BMBT_BLOCK) ^
441 (irec->rm_flags & XFS_RMAP_BMBT_BLOCK))
442 return false;
443 return true;
444}
445
446/*
447 * When we allocate a new block, the first thing we do is add a reference to
448 * the extent in the rmap btree. This takes the form of a [agbno, length,
449 * owner, offset] record. Flags are encoded in the high bits of the offset
450 * field.
451 */
452STATIC int
453xfs_rmap_map(
454 struct xfs_btree_cur *cur,
455 xfs_agblock_t bno,
456 xfs_extlen_t len,
457 bool unwritten,
458 struct xfs_owner_info *oinfo)
459{
460 struct xfs_mount *mp = cur->bc_mp;
461 struct xfs_rmap_irec ltrec;
462 struct xfs_rmap_irec gtrec;
463 int have_gt;
464 int have_lt;
465 int error = 0;
466 int i;
467 uint64_t owner;
468 uint64_t offset;
469 unsigned int flags = 0;
470 bool ignore_off;
471
472 xfs_owner_info_unpack(oinfo, &owner, &offset, &flags);
473 ASSERT(owner != 0);
474 ignore_off = XFS_RMAP_NON_INODE_OWNER(owner) ||
475 (flags & XFS_RMAP_BMBT_BLOCK);
476 if (unwritten)
477 flags |= XFS_RMAP_UNWRITTEN;
478 trace_xfs_rmap_map(mp, cur->bc_private.a.agno, bno, len,
479 unwritten, oinfo);
480
481 /*
482 * For the initial lookup, look for an exact match or the left-adjacent
483 * record for our insertion point. This will also give us the record for
484 * start block contiguity tests.
485 */
486 error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, flags,
487 &have_lt);
488 if (error)
489 goto out_error;
490 XFS_WANT_CORRUPTED_GOTO(mp, have_lt == 1, out_error);
491
492 error = xfs_rmap_get_rec(cur, &ltrec, &have_lt);
493 if (error)
494 goto out_error;
495 XFS_WANT_CORRUPTED_GOTO(mp, have_lt == 1, out_error);
496 trace_xfs_rmap_lookup_le_range_result(cur->bc_mp,
497 cur->bc_private.a.agno, ltrec.rm_startblock,
498 ltrec.rm_blockcount, ltrec.rm_owner,
499 ltrec.rm_offset, ltrec.rm_flags);
500
501 if (!xfs_rmap_is_mergeable(&ltrec, owner, flags))
502 have_lt = 0;
503
504 XFS_WANT_CORRUPTED_GOTO(mp,
505 have_lt == 0 ||
506 ltrec.rm_startblock + ltrec.rm_blockcount <= bno, out_error);
507
508 /*
509 * Increment the cursor to see if we have a right-adjacent record to our
510 * insertion point. This will give us the record for end block
511 * contiguity tests.
512 */
513 error = xfs_btree_increment(cur, 0, &have_gt);
514 if (error)
515 goto out_error;
516 if (have_gt) {
517 error = xfs_rmap_get_rec(cur, &gtrec, &have_gt);
518 if (error)
519 goto out_error;
520 XFS_WANT_CORRUPTED_GOTO(mp, have_gt == 1, out_error);
521 XFS_WANT_CORRUPTED_GOTO(mp, bno + len <= gtrec.rm_startblock,
522 out_error);
523 trace_xfs_rmap_find_right_neighbor_result(cur->bc_mp,
524 cur->bc_private.a.agno, gtrec.rm_startblock,
525 gtrec.rm_blockcount, gtrec.rm_owner,
526 gtrec.rm_offset, gtrec.rm_flags);
527 if (!xfs_rmap_is_mergeable(&gtrec, owner, flags))
528 have_gt = 0;
529 }
530
531 /*
532 * Note: cursor currently points one record to the right of ltrec, even
533 * if there is no record in the tree to the right.
534 */
535 if (have_lt &&
536 ltrec.rm_startblock + ltrec.rm_blockcount == bno &&
537 (ignore_off || ltrec.rm_offset + ltrec.rm_blockcount == offset)) {
538 /*
539 * left edge contiguous, merge into left record.
540 *
541 * ltbno ltlen
542 * orig: |ooooooooo|
543 * adding: |aaaaaaaaa|
544 * result: |rrrrrrrrrrrrrrrrrrr|
545 * bno len
546 */
547 ltrec.rm_blockcount += len;
548 if (have_gt &&
549 bno + len == gtrec.rm_startblock &&
550 (ignore_off || offset + len == gtrec.rm_offset) &&
551 (unsigned long)ltrec.rm_blockcount + len +
552 gtrec.rm_blockcount <= XFS_RMAP_LEN_MAX) {
553 /*
554 * right edge also contiguous, delete right record
555 * and merge into left record.
556 *
557 * ltbno ltlen gtbno gtlen
558 * orig: |ooooooooo| |ooooooooo|
559 * adding: |aaaaaaaaa|
560 * result: |rrrrrrrrrrrrrrrrrrrrrrrrrrrrr|
561 */
562 ltrec.rm_blockcount += gtrec.rm_blockcount;
563 trace_xfs_rmap_delete(mp, cur->bc_private.a.agno,
564 gtrec.rm_startblock,
565 gtrec.rm_blockcount,
566 gtrec.rm_owner,
567 gtrec.rm_offset,
568 gtrec.rm_flags);
569 error = xfs_btree_delete(cur, &i);
570 if (error)
571 goto out_error;
572 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
573 }
574
575 /* point the cursor back to the left record and update */
576 error = xfs_btree_decrement(cur, 0, &have_gt);
577 if (error)
578 goto out_error;
579 error = xfs_rmap_update(cur, &ltrec);
580 if (error)
581 goto out_error;
582 } else if (have_gt &&
583 bno + len == gtrec.rm_startblock &&
584 (ignore_off || offset + len == gtrec.rm_offset)) {
585 /*
586 * right edge contiguous, merge into right record.
587 *
588 * gtbno gtlen
589 * Orig: |ooooooooo|
590 * adding: |aaaaaaaaa|
591 * Result: |rrrrrrrrrrrrrrrrrrr|
592 * bno len
593 */
594 gtrec.rm_startblock = bno;
595 gtrec.rm_blockcount += len;
596 if (!ignore_off)
597 gtrec.rm_offset = offset;
598 error = xfs_rmap_update(cur, &gtrec);
599 if (error)
600 goto out_error;
601 } else {
602 /*
603 * no contiguous edge with identical owner, insert
604 * new record at current cursor position.
605 */
606 cur->bc_rec.r.rm_startblock = bno;
607 cur->bc_rec.r.rm_blockcount = len;
608 cur->bc_rec.r.rm_owner = owner;
609 cur->bc_rec.r.rm_offset = offset;
610 cur->bc_rec.r.rm_flags = flags;
611 trace_xfs_rmap_insert(mp, cur->bc_private.a.agno, bno, len,
612 owner, offset, flags);
613 error = xfs_btree_insert(cur, &i);
614 if (error)
615 goto out_error;
616 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
617 }
618
619 trace_xfs_rmap_map_done(mp, cur->bc_private.a.agno, bno, len,
620 unwritten, oinfo);
621out_error:
622 if (error)
623 trace_xfs_rmap_map_error(mp, cur->bc_private.a.agno,
624 error, _RET_IP_);
625 return error;
626}
627
628/*
629 * Add a reference to an extent in the rmap btree.
630 */
631int
632xfs_rmap_alloc(
633 struct xfs_trans *tp,
634 struct xfs_buf *agbp,
635 xfs_agnumber_t agno,
636 xfs_agblock_t bno,
637 xfs_extlen_t len,
638 struct xfs_owner_info *oinfo)
639{
640 struct xfs_mount *mp = tp->t_mountp;
641 struct xfs_btree_cur *cur;
642 int error;
643
644 if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
645 return 0;
646
647 cur = xfs_rmapbt_init_cursor(mp, tp, agbp, agno);
648 error = xfs_rmap_map(cur, bno, len, false, oinfo);
649 if (error)
650 goto out_error;
651
652 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
653 return 0;
654
655out_error:
656 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
657 return error;
658}
659
660#define RMAP_LEFT_CONTIG (1 << 0)
661#define RMAP_RIGHT_CONTIG (1 << 1)
662#define RMAP_LEFT_FILLING (1 << 2)
663#define RMAP_RIGHT_FILLING (1 << 3)
664#define RMAP_LEFT_VALID (1 << 6)
665#define RMAP_RIGHT_VALID (1 << 7)
666
667#define LEFT r[0]
668#define RIGHT r[1]
669#define PREV r[2]
670#define NEW r[3]
671
672/*
673 * Convert an unwritten extent to a real extent or vice versa.
674 * Does not handle overlapping extents.
675 */
676STATIC int
677xfs_rmap_convert(
678 struct xfs_btree_cur *cur,
679 xfs_agblock_t bno,
680 xfs_extlen_t len,
681 bool unwritten,
682 struct xfs_owner_info *oinfo)
683{
684 struct xfs_mount *mp = cur->bc_mp;
685 struct xfs_rmap_irec r[4]; /* neighbor extent entries */
686 /* left is 0, right is 1, prev is 2 */
687 /* new is 3 */
688 uint64_t owner;
689 uint64_t offset;
690 uint64_t new_endoff;
691 unsigned int oldext;
692 unsigned int newext;
693 unsigned int flags = 0;
694 int i;
695 int state = 0;
696 int error;
697
698 xfs_owner_info_unpack(oinfo, &owner, &offset, &flags);
699 ASSERT(!(XFS_RMAP_NON_INODE_OWNER(owner) ||
700 (flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK))));
701 oldext = unwritten ? XFS_RMAP_UNWRITTEN : 0;
702 new_endoff = offset + len;
703 trace_xfs_rmap_convert(mp, cur->bc_private.a.agno, bno, len,
704 unwritten, oinfo);
705
706 /*
707 * For the initial lookup, look for an exact match or the left-adjacent
708 * record for our insertion point. This will also give us the record for
709 * start block contiguity tests.
710 */
711 error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, oldext, &i);
712 if (error)
713 goto done;
714 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
715
716 error = xfs_rmap_get_rec(cur, &PREV, &i);
717 if (error)
718 goto done;
719 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
720 trace_xfs_rmap_lookup_le_range_result(cur->bc_mp,
721 cur->bc_private.a.agno, PREV.rm_startblock,
722 PREV.rm_blockcount, PREV.rm_owner,
723 PREV.rm_offset, PREV.rm_flags);
724
725 ASSERT(PREV.rm_offset <= offset);
726 ASSERT(PREV.rm_offset + PREV.rm_blockcount >= new_endoff);
727 ASSERT((PREV.rm_flags & XFS_RMAP_UNWRITTEN) == oldext);
728 newext = ~oldext & XFS_RMAP_UNWRITTEN;
729
730 /*
731 * Set flags determining what part of the previous oldext allocation
732 * extent is being replaced by a newext allocation.
733 */
734 if (PREV.rm_offset == offset)
735 state |= RMAP_LEFT_FILLING;
736 if (PREV.rm_offset + PREV.rm_blockcount == new_endoff)
737 state |= RMAP_RIGHT_FILLING;
738
739 /*
740 * Decrement the cursor to see if we have a left-adjacent record to our
741 * insertion point. This will give us the record for end block
742 * contiguity tests.
743 */
744 error = xfs_btree_decrement(cur, 0, &i);
745 if (error)
746 goto done;
747 if (i) {
748 state |= RMAP_LEFT_VALID;
749 error = xfs_rmap_get_rec(cur, &LEFT, &i);
750 if (error)
751 goto done;
752 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
753 XFS_WANT_CORRUPTED_GOTO(mp,
754 LEFT.rm_startblock + LEFT.rm_blockcount <= bno,
755 done);
756 trace_xfs_rmap_find_left_neighbor_result(cur->bc_mp,
757 cur->bc_private.a.agno, LEFT.rm_startblock,
758 LEFT.rm_blockcount, LEFT.rm_owner,
759 LEFT.rm_offset, LEFT.rm_flags);
760 if (LEFT.rm_startblock + LEFT.rm_blockcount == bno &&
761 LEFT.rm_offset + LEFT.rm_blockcount == offset &&
762 xfs_rmap_is_mergeable(&LEFT, owner, newext))
763 state |= RMAP_LEFT_CONTIG;
764 }
765
766 /*
767 * Increment the cursor to see if we have a right-adjacent record to our
768 * insertion point. This will give us the record for end block
769 * contiguity tests.
770 */
771 error = xfs_btree_increment(cur, 0, &i);
772 if (error)
773 goto done;
774 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
775 error = xfs_btree_increment(cur, 0, &i);
776 if (error)
777 goto done;
778 if (i) {
779 state |= RMAP_RIGHT_VALID;
780 error = xfs_rmap_get_rec(cur, &RIGHT, &i);
781 if (error)
782 goto done;
783 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
784 XFS_WANT_CORRUPTED_GOTO(mp, bno + len <= RIGHT.rm_startblock,
785 done);
786 trace_xfs_rmap_find_right_neighbor_result(cur->bc_mp,
787 cur->bc_private.a.agno, RIGHT.rm_startblock,
788 RIGHT.rm_blockcount, RIGHT.rm_owner,
789 RIGHT.rm_offset, RIGHT.rm_flags);
790 if (bno + len == RIGHT.rm_startblock &&
791 offset + len == RIGHT.rm_offset &&
792 xfs_rmap_is_mergeable(&RIGHT, owner, newext))
793 state |= RMAP_RIGHT_CONTIG;
794 }
795
796 /* check that left + prev + right is not too long */
797 if ((state & (RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG |
798 RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG)) ==
799 (RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG |
800 RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG) &&
801 (unsigned long)LEFT.rm_blockcount + len +
802 RIGHT.rm_blockcount > XFS_RMAP_LEN_MAX)
803 state &= ~RMAP_RIGHT_CONTIG;
804
805 trace_xfs_rmap_convert_state(mp, cur->bc_private.a.agno, state,
806 _RET_IP_);
807
808 /* reset the cursor back to PREV */
809 error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, oldext, &i);
810 if (error)
811 goto done;
812 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
813
814 /*
815 * Switch out based on the FILLING and CONTIG state bits.
816 */
817 switch (state & (RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG |
818 RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG)) {
819 case RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG |
820 RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG:
821 /*
822 * Setting all of a previous oldext extent to newext.
823 * The left and right neighbors are both contiguous with new.
824 */
825 error = xfs_btree_increment(cur, 0, &i);
826 if (error)
827 goto done;
828 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
829 trace_xfs_rmap_delete(mp, cur->bc_private.a.agno,
830 RIGHT.rm_startblock, RIGHT.rm_blockcount,
831 RIGHT.rm_owner, RIGHT.rm_offset,
832 RIGHT.rm_flags);
833 error = xfs_btree_delete(cur, &i);
834 if (error)
835 goto done;
836 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
837 error = xfs_btree_decrement(cur, 0, &i);
838 if (error)
839 goto done;
840 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
841 trace_xfs_rmap_delete(mp, cur->bc_private.a.agno,
842 PREV.rm_startblock, PREV.rm_blockcount,
843 PREV.rm_owner, PREV.rm_offset,
844 PREV.rm_flags);
845 error = xfs_btree_delete(cur, &i);
846 if (error)
847 goto done;
848 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
849 error = xfs_btree_decrement(cur, 0, &i);
850 if (error)
851 goto done;
852 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
853 NEW = LEFT;
854 NEW.rm_blockcount += PREV.rm_blockcount + RIGHT.rm_blockcount;
855 error = xfs_rmap_update(cur, &NEW);
856 if (error)
857 goto done;
858 break;
859
860 case RMAP_LEFT_FILLING | RMAP_RIGHT_FILLING | RMAP_LEFT_CONTIG:
861 /*
862 * Setting all of a previous oldext extent to newext.
863 * The left neighbor is contiguous, the right is not.
864 */
865 trace_xfs_rmap_delete(mp, cur->bc_private.a.agno,
866 PREV.rm_startblock, PREV.rm_blockcount,
867 PREV.rm_owner, PREV.rm_offset,
868 PREV.rm_flags);
869 error = xfs_btree_delete(cur, &i);
870 if (error)
871 goto done;
872 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
873 error = xfs_btree_decrement(cur, 0, &i);
874 if (error)
875 goto done;
876 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
877 NEW = LEFT;
878 NEW.rm_blockcount += PREV.rm_blockcount;
879 error = xfs_rmap_update(cur, &NEW);
880 if (error)
881 goto done;
882 break;
883
884 case RMAP_LEFT_FILLING | RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG:
885 /*
886 * Setting all of a previous oldext extent to newext.
887 * The right neighbor is contiguous, the left is not.
888 */
889 error = xfs_btree_increment(cur, 0, &i);
890 if (error)
891 goto done;
892 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
893 trace_xfs_rmap_delete(mp, cur->bc_private.a.agno,
894 RIGHT.rm_startblock, RIGHT.rm_blockcount,
895 RIGHT.rm_owner, RIGHT.rm_offset,
896 RIGHT.rm_flags);
897 error = xfs_btree_delete(cur, &i);
898 if (error)
899 goto done;
900 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
901 error = xfs_btree_decrement(cur, 0, &i);
902 if (error)
903 goto done;
904 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
905 NEW = PREV;
906 NEW.rm_blockcount = len + RIGHT.rm_blockcount;
907 NEW.rm_flags = newext;
908 error = xfs_rmap_update(cur, &NEW);
909 if (error)
910 goto done;
911 break;
912
913 case RMAP_LEFT_FILLING | RMAP_RIGHT_FILLING:
914 /*
915 * Setting all of a previous oldext extent to newext.
916 * Neither the left nor right neighbors are contiguous with
917 * the new one.
918 */
919 NEW = PREV;
920 NEW.rm_flags = newext;
921 error = xfs_rmap_update(cur, &NEW);
922 if (error)
923 goto done;
924 break;
925
926 case RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG:
927 /*
928 * Setting the first part of a previous oldext extent to newext.
929 * The left neighbor is contiguous.
930 */
931 NEW = PREV;
932 NEW.rm_offset += len;
933 NEW.rm_startblock += len;
934 NEW.rm_blockcount -= len;
935 error = xfs_rmap_update(cur, &NEW);
936 if (error)
937 goto done;
938 error = xfs_btree_decrement(cur, 0, &i);
939 if (error)
940 goto done;
941 NEW = LEFT;
942 NEW.rm_blockcount += len;
943 error = xfs_rmap_update(cur, &NEW);
944 if (error)
945 goto done;
946 break;
947
948 case RMAP_LEFT_FILLING:
949 /*
950 * Setting the first part of a previous oldext extent to newext.
951 * The left neighbor is not contiguous.
952 */
953 NEW = PREV;
954 NEW.rm_startblock += len;
955 NEW.rm_offset += len;
956 NEW.rm_blockcount -= len;
957 error = xfs_rmap_update(cur, &NEW);
958 if (error)
959 goto done;
960 NEW.rm_startblock = bno;
961 NEW.rm_owner = owner;
962 NEW.rm_offset = offset;
963 NEW.rm_blockcount = len;
964 NEW.rm_flags = newext;
965 cur->bc_rec.r = NEW;
966 trace_xfs_rmap_insert(mp, cur->bc_private.a.agno, bno,
967 len, owner, offset, newext);
968 error = xfs_btree_insert(cur, &i);
969 if (error)
970 goto done;
971 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
972 break;
973
974 case RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG:
975 /*
976 * Setting the last part of a previous oldext extent to newext.
977 * The right neighbor is contiguous with the new allocation.
978 */
979 NEW = PREV;
980 NEW.rm_blockcount -= len;
981 error = xfs_rmap_update(cur, &NEW);
982 if (error)
983 goto done;
984 error = xfs_btree_increment(cur, 0, &i);
985 if (error)
986 goto done;
987 NEW = RIGHT;
988 NEW.rm_offset = offset;
989 NEW.rm_startblock = bno;
990 NEW.rm_blockcount += len;
991 error = xfs_rmap_update(cur, &NEW);
992 if (error)
993 goto done;
994 break;
995
996 case RMAP_RIGHT_FILLING:
997 /*
998 * Setting the last part of a previous oldext extent to newext.
999 * The right neighbor is not contiguous.
1000 */
1001 NEW = PREV;
1002 NEW.rm_blockcount -= len;
1003 error = xfs_rmap_update(cur, &NEW);
1004 if (error)
1005 goto done;
1006 error = xfs_rmap_lookup_eq(cur, bno, len, owner, offset,
1007 oldext, &i);
1008 if (error)
1009 goto done;
1010 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
1011 NEW.rm_startblock = bno;
1012 NEW.rm_owner = owner;
1013 NEW.rm_offset = offset;
1014 NEW.rm_blockcount = len;
1015 NEW.rm_flags = newext;
1016 cur->bc_rec.r = NEW;
1017 trace_xfs_rmap_insert(mp, cur->bc_private.a.agno, bno,
1018 len, owner, offset, newext);
1019 error = xfs_btree_insert(cur, &i);
1020 if (error)
1021 goto done;
1022 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1023 break;
1024
1025 case 0:
1026 /*
1027 * Setting the middle part of a previous oldext extent to
1028 * newext. Contiguity is impossible here.
1029 * One extent becomes three extents.
1030 */
1031 /* new right extent - oldext */
1032 NEW.rm_startblock = bno + len;
1033 NEW.rm_owner = owner;
1034 NEW.rm_offset = new_endoff;
1035 NEW.rm_blockcount = PREV.rm_offset + PREV.rm_blockcount -
1036 new_endoff;
1037 NEW.rm_flags = PREV.rm_flags;
1038 error = xfs_rmap_update(cur, &NEW);
1039 if (error)
1040 goto done;
1041 /* new left extent - oldext */
1042 NEW = PREV;
1043 NEW.rm_blockcount = offset - PREV.rm_offset;
1044 cur->bc_rec.r = NEW;
1045 trace_xfs_rmap_insert(mp, cur->bc_private.a.agno,
1046 NEW.rm_startblock, NEW.rm_blockcount,
1047 NEW.rm_owner, NEW.rm_offset,
1048 NEW.rm_flags);
1049 error = xfs_btree_insert(cur, &i);
1050 if (error)
1051 goto done;
1052 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1053 /*
1054 * Reset the cursor to the position of the new extent
1055 * we are about to insert as we can't trust it after
1056 * the previous insert.
1057 */
1058 error = xfs_rmap_lookup_eq(cur, bno, len, owner, offset,
1059 oldext, &i);
1060 if (error)
1061 goto done;
1062 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
1063 /* new middle extent - newext */
1064 cur->bc_rec.r.rm_flags &= ~XFS_RMAP_UNWRITTEN;
1065 cur->bc_rec.r.rm_flags |= newext;
1066 trace_xfs_rmap_insert(mp, cur->bc_private.a.agno, bno, len,
1067 owner, offset, newext);
1068 error = xfs_btree_insert(cur, &i);
1069 if (error)
1070 goto done;
1071 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1072 break;
1073
1074 case RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG | RMAP_RIGHT_CONTIG:
1075 case RMAP_RIGHT_FILLING | RMAP_LEFT_CONTIG | RMAP_RIGHT_CONTIG:
1076 case RMAP_LEFT_FILLING | RMAP_RIGHT_CONTIG:
1077 case RMAP_RIGHT_FILLING | RMAP_LEFT_CONTIG:
1078 case RMAP_LEFT_CONTIG | RMAP_RIGHT_CONTIG:
1079 case RMAP_LEFT_CONTIG:
1080 case RMAP_RIGHT_CONTIG:
1081 /*
1082 * These cases are all impossible.
1083 */
1084 ASSERT(0);
1085 }
1086
1087 trace_xfs_rmap_convert_done(mp, cur->bc_private.a.agno, bno, len,
1088 unwritten, oinfo);
1089done:
1090 if (error)
1091 trace_xfs_rmap_convert_error(cur->bc_mp,
1092 cur->bc_private.a.agno, error, _RET_IP_);
1093 return error;
1094}
1095
1096#undef NEW
1097#undef LEFT
1098#undef RIGHT
1099#undef PREV
1100
1101struct xfs_rmap_query_range_info {
1102 xfs_rmap_query_range_fn fn;
1103 void *priv;
1104};
1105
1106/* Format btree record and pass to our callback. */
1107STATIC int
1108xfs_rmap_query_range_helper(
1109 struct xfs_btree_cur *cur,
1110 union xfs_btree_rec *rec,
1111 void *priv)
1112{
1113 struct xfs_rmap_query_range_info *query = priv;
1114 struct xfs_rmap_irec irec;
1115 int error;
1116
1117 error = xfs_rmap_btrec_to_irec(rec, &irec);
1118 if (error)
1119 return error;
1120 return query->fn(cur, &irec, query->priv);
1121}
1122
1123/* Find all rmaps between two keys. */
1124int
1125xfs_rmap_query_range(
1126 struct xfs_btree_cur *cur,
1127 struct xfs_rmap_irec *low_rec,
1128 struct xfs_rmap_irec *high_rec,
1129 xfs_rmap_query_range_fn fn,
1130 void *priv)
1131{
1132 union xfs_btree_irec low_brec;
1133 union xfs_btree_irec high_brec;
1134 struct xfs_rmap_query_range_info query;
1135
1136 low_brec.r = *low_rec;
1137 high_brec.r = *high_rec;
1138 query.priv = priv;
1139 query.fn = fn;
1140 return xfs_btree_query_range(cur, &low_brec, &high_brec,
1141 xfs_rmap_query_range_helper, &query);
1142}
1143
1144/* Clean up after calling xfs_rmap_finish_one. */
1145void
1146xfs_rmap_finish_one_cleanup(
1147 struct xfs_trans *tp,
1148 struct xfs_btree_cur *rcur,
1149 int error)
1150{
1151 struct xfs_buf *agbp;
1152
1153 if (rcur == NULL)
1154 return;
1155 agbp = rcur->bc_private.a.agbp;
1156 xfs_btree_del_cursor(rcur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
1157 if (error)
1158 xfs_trans_brelse(tp, agbp);
1159}
1160
1161/*
1162 * Process one of the deferred rmap operations. We pass back the
1163 * btree cursor to maintain our lock on the rmapbt between calls.
1164 * This saves time and eliminates a buffer deadlock between the
1165 * superblock and the AGF because we'll always grab them in the same
1166 * order.
1167 */
1168int
1169xfs_rmap_finish_one(
1170 struct xfs_trans *tp,
1171 enum xfs_rmap_intent_type type,
1172 __uint64_t owner,
1173 int whichfork,
1174 xfs_fileoff_t startoff,
1175 xfs_fsblock_t startblock,
1176 xfs_filblks_t blockcount,
1177 xfs_exntst_t state,
1178 struct xfs_btree_cur **pcur)
1179{
1180 struct xfs_mount *mp = tp->t_mountp;
1181 struct xfs_btree_cur *rcur;
1182 struct xfs_buf *agbp = NULL;
1183 int error = 0;
1184 xfs_agnumber_t agno;
1185 struct xfs_owner_info oinfo;
1186 xfs_agblock_t bno;
1187 bool unwritten;
1188
1189 agno = XFS_FSB_TO_AGNO(mp, startblock);
1190 ASSERT(agno != NULLAGNUMBER);
1191 bno = XFS_FSB_TO_AGBNO(mp, startblock);
1192
1193 trace_xfs_rmap_deferred(mp, agno, type, bno, owner, whichfork,
1194 startoff, blockcount, state);
1195
1196 if (XFS_TEST_ERROR(false, mp,
1197 XFS_ERRTAG_RMAP_FINISH_ONE,
1198 XFS_RANDOM_RMAP_FINISH_ONE))
1199 return -EIO;
1200
1201 /*
1202 * If we haven't gotten a cursor or the cursor AG doesn't match
1203 * the startblock, get one now.
1204 */
1205 rcur = *pcur;
1206 if (rcur != NULL && rcur->bc_private.a.agno != agno) {
1207 xfs_rmap_finish_one_cleanup(tp, rcur, 0);
1208 rcur = NULL;
1209 *pcur = NULL;
1210 }
1211 if (rcur == NULL) {
1212 /*
1213 * Refresh the freelist before we start changing the
1214 * rmapbt, because a shape change could cause us to
1215 * allocate blocks.
1216 */
1217 error = xfs_free_extent_fix_freelist(tp, agno, &agbp);
1218 if (error)
1219 return error;
1220 if (!agbp)
1221 return -EFSCORRUPTED;
1222
1223 rcur = xfs_rmapbt_init_cursor(mp, tp, agbp, agno);
1224 if (!rcur) {
1225 error = -ENOMEM;
1226 goto out_cur;
1227 }
1228 }
1229 *pcur = rcur;
1230
1231 xfs_rmap_ino_owner(&oinfo, owner, whichfork, startoff);
1232 unwritten = state == XFS_EXT_UNWRITTEN;
1233 bno = XFS_FSB_TO_AGBNO(rcur->bc_mp, startblock);
1234
1235 switch (type) {
1236 case XFS_RMAP_ALLOC:
1237 case XFS_RMAP_MAP:
1238 error = xfs_rmap_map(rcur, bno, blockcount, unwritten, &oinfo);
1239 break;
1240 case XFS_RMAP_FREE:
1241 case XFS_RMAP_UNMAP:
1242 error = xfs_rmap_unmap(rcur, bno, blockcount, unwritten,
1243 &oinfo);
1244 break;
1245 case XFS_RMAP_CONVERT:
1246 error = xfs_rmap_convert(rcur, bno, blockcount, !unwritten,
1247 &oinfo);
1248 break;
1249 default:
1250 ASSERT(0);
1251 error = -EFSCORRUPTED;
1252 }
1253 return error;
1254
1255out_cur:
1256 xfs_trans_brelse(tp, agbp);
1257
1258 return error;
1259}
1260
1261/*
1262 * Don't defer an rmap if we aren't an rmap filesystem.
1263 */
1264static bool
1265xfs_rmap_update_is_needed(
1266 struct xfs_mount *mp)
1267{
1268 return xfs_sb_version_hasrmapbt(&mp->m_sb);
1269}
1270
1271/*
1272 * Record a rmap intent; the list is kept sorted first by AG and then by
1273 * increasing age.
1274 */
1275static int
1276__xfs_rmap_add(
1277 struct xfs_mount *mp,
1278 struct xfs_defer_ops *dfops,
1279 enum xfs_rmap_intent_type type,
1280 __uint64_t owner,
1281 int whichfork,
1282 struct xfs_bmbt_irec *bmap)
1283{
1284 struct xfs_rmap_intent *ri;
1285
1286 trace_xfs_rmap_defer(mp, XFS_FSB_TO_AGNO(mp, bmap->br_startblock),
1287 type,
1288 XFS_FSB_TO_AGBNO(mp, bmap->br_startblock),
1289 owner, whichfork,
1290 bmap->br_startoff,
1291 bmap->br_blockcount,
1292 bmap->br_state);
1293
1294 ri = kmem_alloc(sizeof(struct xfs_rmap_intent), KM_SLEEP | KM_NOFS);
1295 INIT_LIST_HEAD(&ri->ri_list);
1296 ri->ri_type = type;
1297 ri->ri_owner = owner;
1298 ri->ri_whichfork = whichfork;
1299 ri->ri_bmap = *bmap;
1300
1301 xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_RMAP, &ri->ri_list);
1302 return 0;
1303}
1304
1305/* Map an extent into a file. */
1306int
1307xfs_rmap_map_extent(
1308 struct xfs_mount *mp,
1309 struct xfs_defer_ops *dfops,
1310 struct xfs_inode *ip,
1311 int whichfork,
1312 struct xfs_bmbt_irec *PREV)
1313{
1314 if (!xfs_rmap_update_is_needed(mp))
1315 return 0;
1316
1317 return __xfs_rmap_add(mp, dfops, XFS_RMAP_MAP, ip->i_ino,
1318 whichfork, PREV);
1319}
1320
1321/* Unmap an extent out of a file. */
1322int
1323xfs_rmap_unmap_extent(
1324 struct xfs_mount *mp,
1325 struct xfs_defer_ops *dfops,
1326 struct xfs_inode *ip,
1327 int whichfork,
1328 struct xfs_bmbt_irec *PREV)
1329{
1330 if (!xfs_rmap_update_is_needed(mp))
1331 return 0;
1332
1333 return __xfs_rmap_add(mp, dfops, XFS_RMAP_UNMAP, ip->i_ino,
1334 whichfork, PREV);
1335}
1336
1337/* Convert a data fork extent from unwritten to real or vice versa. */
1338int
1339xfs_rmap_convert_extent(
1340 struct xfs_mount *mp,
1341 struct xfs_defer_ops *dfops,
1342 struct xfs_inode *ip,
1343 int whichfork,
1344 struct xfs_bmbt_irec *PREV)
1345{
1346 if (!xfs_rmap_update_is_needed(mp))
1347 return 0;
1348
1349 return __xfs_rmap_add(mp, dfops, XFS_RMAP_CONVERT, ip->i_ino,
1350 whichfork, PREV);
1351}
1352
1353/* Schedule the creation of an rmap for non-file data. */
1354int
1355xfs_rmap_alloc_extent(
1356 struct xfs_mount *mp,
1357 struct xfs_defer_ops *dfops,
1358 xfs_agnumber_t agno,
1359 xfs_agblock_t bno,
1360 xfs_extlen_t len,
1361 __uint64_t owner)
1362{
1363 struct xfs_bmbt_irec bmap;
1364
1365 if (!xfs_rmap_update_is_needed(mp))
1366 return 0;
1367
1368 bmap.br_startblock = XFS_AGB_TO_FSB(mp, agno, bno);
1369 bmap.br_blockcount = len;
1370 bmap.br_startoff = 0;
1371 bmap.br_state = XFS_EXT_NORM;
1372
1373 return __xfs_rmap_add(mp, dfops, XFS_RMAP_ALLOC, owner,
1374 XFS_DATA_FORK, &bmap);
1375}
1376
1377/* Schedule the deletion of an rmap for non-file data. */
1378int
1379xfs_rmap_free_extent(
1380 struct xfs_mount *mp,
1381 struct xfs_defer_ops *dfops,
1382 xfs_agnumber_t agno,
1383 xfs_agblock_t bno,
1384 xfs_extlen_t len,
1385 __uint64_t owner)
1386{
1387 struct xfs_bmbt_irec bmap;
1388
1389 if (!xfs_rmap_update_is_needed(mp))
1390 return 0;
1391
1392 bmap.br_startblock = XFS_AGB_TO_FSB(mp, agno, bno);
1393 bmap.br_blockcount = len;
1394 bmap.br_startoff = 0;
1395 bmap.br_state = XFS_EXT_NORM;
1396
1397 return __xfs_rmap_add(mp, dfops, XFS_RMAP_FREE, owner,
1398 XFS_DATA_FORK, &bmap);
1399}
diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h
new file mode 100644
index 000000000000..71cf99a4acba
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_rmap.h
@@ -0,0 +1,209 @@
1/*
2 * Copyright (C) 2016 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#ifndef __XFS_RMAP_H__
21#define __XFS_RMAP_H__
22
23static inline void
24xfs_rmap_ag_owner(
25 struct xfs_owner_info *oi,
26 uint64_t owner)
27{
28 oi->oi_owner = owner;
29 oi->oi_offset = 0;
30 oi->oi_flags = 0;
31}
32
33static inline void
34xfs_rmap_ino_bmbt_owner(
35 struct xfs_owner_info *oi,
36 xfs_ino_t ino,
37 int whichfork)
38{
39 oi->oi_owner = ino;
40 oi->oi_offset = 0;
41 oi->oi_flags = XFS_OWNER_INFO_BMBT_BLOCK;
42 if (whichfork == XFS_ATTR_FORK)
43 oi->oi_flags |= XFS_OWNER_INFO_ATTR_FORK;
44}
45
46static inline void
47xfs_rmap_ino_owner(
48 struct xfs_owner_info *oi,
49 xfs_ino_t ino,
50 int whichfork,
51 xfs_fileoff_t offset)
52{
53 oi->oi_owner = ino;
54 oi->oi_offset = offset;
55 oi->oi_flags = 0;
56 if (whichfork == XFS_ATTR_FORK)
57 oi->oi_flags |= XFS_OWNER_INFO_ATTR_FORK;
58}
59
60static inline void
61xfs_rmap_skip_owner_update(
62 struct xfs_owner_info *oi)
63{
64 oi->oi_owner = XFS_RMAP_OWN_UNKNOWN;
65}
66
67/* Reverse mapping functions. */
68
69struct xfs_buf;
70
71static inline __u64
72xfs_rmap_irec_offset_pack(
73 const struct xfs_rmap_irec *irec)
74{
75 __u64 x;
76
77 x = XFS_RMAP_OFF(irec->rm_offset);
78 if (irec->rm_flags & XFS_RMAP_ATTR_FORK)
79 x |= XFS_RMAP_OFF_ATTR_FORK;
80 if (irec->rm_flags & XFS_RMAP_BMBT_BLOCK)
81 x |= XFS_RMAP_OFF_BMBT_BLOCK;
82 if (irec->rm_flags & XFS_RMAP_UNWRITTEN)
83 x |= XFS_RMAP_OFF_UNWRITTEN;
84 return x;
85}
86
87static inline int
88xfs_rmap_irec_offset_unpack(
89 __u64 offset,
90 struct xfs_rmap_irec *irec)
91{
92 if (offset & ~(XFS_RMAP_OFF_MASK | XFS_RMAP_OFF_FLAGS))
93 return -EFSCORRUPTED;
94 irec->rm_offset = XFS_RMAP_OFF(offset);
95 if (offset & XFS_RMAP_OFF_ATTR_FORK)
96 irec->rm_flags |= XFS_RMAP_ATTR_FORK;
97 if (offset & XFS_RMAP_OFF_BMBT_BLOCK)
98 irec->rm_flags |= XFS_RMAP_BMBT_BLOCK;
99 if (offset & XFS_RMAP_OFF_UNWRITTEN)
100 irec->rm_flags |= XFS_RMAP_UNWRITTEN;
101 return 0;
102}
103
104static inline void
105xfs_owner_info_unpack(
106 struct xfs_owner_info *oinfo,
107 uint64_t *owner,
108 uint64_t *offset,
109 unsigned int *flags)
110{
111 unsigned int r = 0;
112
113 *owner = oinfo->oi_owner;
114 *offset = oinfo->oi_offset;
115 if (oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK)
116 r |= XFS_RMAP_ATTR_FORK;
117 if (oinfo->oi_flags & XFS_OWNER_INFO_BMBT_BLOCK)
118 r |= XFS_RMAP_BMBT_BLOCK;
119 *flags = r;
120}
121
122static inline void
123xfs_owner_info_pack(
124 struct xfs_owner_info *oinfo,
125 uint64_t owner,
126 uint64_t offset,
127 unsigned int flags)
128{
129 oinfo->oi_owner = owner;
130 oinfo->oi_offset = XFS_RMAP_OFF(offset);
131 oinfo->oi_flags = 0;
132 if (flags & XFS_RMAP_ATTR_FORK)
133 oinfo->oi_flags |= XFS_OWNER_INFO_ATTR_FORK;
134 if (flags & XFS_RMAP_BMBT_BLOCK)
135 oinfo->oi_flags |= XFS_OWNER_INFO_BMBT_BLOCK;
136}
137
138int xfs_rmap_alloc(struct xfs_trans *tp, struct xfs_buf *agbp,
139 xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len,
140 struct xfs_owner_info *oinfo);
141int xfs_rmap_free(struct xfs_trans *tp, struct xfs_buf *agbp,
142 xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len,
143 struct xfs_owner_info *oinfo);
144
145int xfs_rmap_lookup_le(struct xfs_btree_cur *cur, xfs_agblock_t bno,
146 xfs_extlen_t len, uint64_t owner, uint64_t offset,
147 unsigned int flags, int *stat);
148int xfs_rmap_lookup_eq(struct xfs_btree_cur *cur, xfs_agblock_t bno,
149 xfs_extlen_t len, uint64_t owner, uint64_t offset,
150 unsigned int flags, int *stat);
151int xfs_rmap_insert(struct xfs_btree_cur *rcur, xfs_agblock_t agbno,
152 xfs_extlen_t len, uint64_t owner, uint64_t offset,
153 unsigned int flags);
154int xfs_rmap_get_rec(struct xfs_btree_cur *cur, struct xfs_rmap_irec *irec,
155 int *stat);
156
157typedef int (*xfs_rmap_query_range_fn)(
158 struct xfs_btree_cur *cur,
159 struct xfs_rmap_irec *rec,
160 void *priv);
161
162int xfs_rmap_query_range(struct xfs_btree_cur *cur,
163 struct xfs_rmap_irec *low_rec, struct xfs_rmap_irec *high_rec,
164 xfs_rmap_query_range_fn fn, void *priv);
165
166enum xfs_rmap_intent_type {
167 XFS_RMAP_MAP,
168 XFS_RMAP_MAP_SHARED,
169 XFS_RMAP_UNMAP,
170 XFS_RMAP_UNMAP_SHARED,
171 XFS_RMAP_CONVERT,
172 XFS_RMAP_CONVERT_SHARED,
173 XFS_RMAP_ALLOC,
174 XFS_RMAP_FREE,
175};
176
177struct xfs_rmap_intent {
178 struct list_head ri_list;
179 enum xfs_rmap_intent_type ri_type;
180 __uint64_t ri_owner;
181 int ri_whichfork;
182 struct xfs_bmbt_irec ri_bmap;
183};
184
185/* functions for updating the rmapbt based on bmbt map/unmap operations */
186int xfs_rmap_map_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
187 struct xfs_inode *ip, int whichfork,
188 struct xfs_bmbt_irec *imap);
189int xfs_rmap_unmap_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
190 struct xfs_inode *ip, int whichfork,
191 struct xfs_bmbt_irec *imap);
192int xfs_rmap_convert_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
193 struct xfs_inode *ip, int whichfork,
194 struct xfs_bmbt_irec *imap);
195int xfs_rmap_alloc_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
196 xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len,
197 __uint64_t owner);
198int xfs_rmap_free_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
199 xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len,
200 __uint64_t owner);
201
202void xfs_rmap_finish_one_cleanup(struct xfs_trans *tp,
203 struct xfs_btree_cur *rcur, int error);
204int xfs_rmap_finish_one(struct xfs_trans *tp, enum xfs_rmap_intent_type type,
205 __uint64_t owner, int whichfork, xfs_fileoff_t startoff,
206 xfs_fsblock_t startblock, xfs_filblks_t blockcount,
207 xfs_exntst_t state, struct xfs_btree_cur **pcur);
208
209#endif /* __XFS_RMAP_H__ */
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c
new file mode 100644
index 000000000000..bc1faebc84ec
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_rmap_btree.c
@@ -0,0 +1,511 @@
1/*
2 * Copyright (c) 2014 Red Hat, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include "xfs.h"
19#include "xfs_fs.h"
20#include "xfs_shared.h"
21#include "xfs_format.h"
22#include "xfs_log_format.h"
23#include "xfs_trans_resv.h"
24#include "xfs_bit.h"
25#include "xfs_sb.h"
26#include "xfs_mount.h"
27#include "xfs_defer.h"
28#include "xfs_inode.h"
29#include "xfs_trans.h"
30#include "xfs_alloc.h"
31#include "xfs_btree.h"
32#include "xfs_rmap.h"
33#include "xfs_rmap_btree.h"
34#include "xfs_trace.h"
35#include "xfs_cksum.h"
36#include "xfs_error.h"
37#include "xfs_extent_busy.h"
38
39/*
40 * Reverse map btree.
41 *
42 * This is a per-ag tree used to track the owner(s) of a given extent. With
43 * reflink it is possible for there to be multiple owners, which is a departure
44 * from classic XFS. Owner records for data extents are inserted when the
45 * extent is mapped and removed when an extent is unmapped. Owner records for
46 * all other block types (i.e. metadata) are inserted when an extent is
47 * allocated and removed when an extent is freed. There can only be one owner
48 * of a metadata extent, usually an inode or some other metadata structure like
49 * an AG btree.
50 *
51 * The rmap btree is part of the free space management, so blocks for the tree
52 * are sourced from the agfl. Hence we need transaction reservation support for
53 * this tree so that the freelist is always large enough. This also impacts on
54 * the minimum space we need to leave free in the AG.
55 *
56 * The tree is ordered by [ag block, owner, offset]. This is a large key size,
57 * but it is the only way to enforce unique keys when a block can be owned by
58 * multiple files at any offset. There's no need to order/search by extent
59 * size for online updating/management of the tree. It is intended that most
60 * reverse lookups will be to find the owner(s) of a particular block, or to
61 * try to recover tree and file data from corrupt primary metadata.
62 */
63
64static struct xfs_btree_cur *
65xfs_rmapbt_dup_cursor(
66 struct xfs_btree_cur *cur)
67{
68 return xfs_rmapbt_init_cursor(cur->bc_mp, cur->bc_tp,
69 cur->bc_private.a.agbp, cur->bc_private.a.agno);
70}
71
72STATIC void
73xfs_rmapbt_set_root(
74 struct xfs_btree_cur *cur,
75 union xfs_btree_ptr *ptr,
76 int inc)
77{
78 struct xfs_buf *agbp = cur->bc_private.a.agbp;
79 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
80 xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno);
81 int btnum = cur->bc_btnum;
82 struct xfs_perag *pag = xfs_perag_get(cur->bc_mp, seqno);
83
84 ASSERT(ptr->s != 0);
85
86 agf->agf_roots[btnum] = ptr->s;
87 be32_add_cpu(&agf->agf_levels[btnum], inc);
88 pag->pagf_levels[btnum] += inc;
89 xfs_perag_put(pag);
90
91 xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS);
92}
93
94STATIC int
95xfs_rmapbt_alloc_block(
96 struct xfs_btree_cur *cur,
97 union xfs_btree_ptr *start,
98 union xfs_btree_ptr *new,
99 int *stat)
100{
101 int error;
102 xfs_agblock_t bno;
103
104 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
105
106 /* Allocate the new block from the freelist. If we can't, give up. */
107 error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp,
108 &bno, 1);
109 if (error) {
110 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
111 return error;
112 }
113
114 trace_xfs_rmapbt_alloc_block(cur->bc_mp, cur->bc_private.a.agno,
115 bno, 1);
116 if (bno == NULLAGBLOCK) {
117 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
118 *stat = 0;
119 return 0;
120 }
121
122 xfs_extent_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1,
123 false);
124
125 xfs_trans_agbtree_delta(cur->bc_tp, 1);
126 new->s = cpu_to_be32(bno);
127
128 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
129 *stat = 1;
130 return 0;
131}
132
133STATIC int
134xfs_rmapbt_free_block(
135 struct xfs_btree_cur *cur,
136 struct xfs_buf *bp)
137{
138 struct xfs_buf *agbp = cur->bc_private.a.agbp;
139 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
140 xfs_agblock_t bno;
141 int error;
142
143 bno = xfs_daddr_to_agbno(cur->bc_mp, XFS_BUF_ADDR(bp));
144 trace_xfs_rmapbt_free_block(cur->bc_mp, cur->bc_private.a.agno,
145 bno, 1);
146 error = xfs_alloc_put_freelist(cur->bc_tp, agbp, NULL, bno, 1);
147 if (error)
148 return error;
149
150 xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1,
151 XFS_EXTENT_BUSY_SKIP_DISCARD);
152 xfs_trans_agbtree_delta(cur->bc_tp, -1);
153
154 return 0;
155}
156
157STATIC int
158xfs_rmapbt_get_minrecs(
159 struct xfs_btree_cur *cur,
160 int level)
161{
162 return cur->bc_mp->m_rmap_mnr[level != 0];
163}
164
165STATIC int
166xfs_rmapbt_get_maxrecs(
167 struct xfs_btree_cur *cur,
168 int level)
169{
170 return cur->bc_mp->m_rmap_mxr[level != 0];
171}
172
173STATIC void
174xfs_rmapbt_init_key_from_rec(
175 union xfs_btree_key *key,
176 union xfs_btree_rec *rec)
177{
178 key->rmap.rm_startblock = rec->rmap.rm_startblock;
179 key->rmap.rm_owner = rec->rmap.rm_owner;
180 key->rmap.rm_offset = rec->rmap.rm_offset;
181}
182
183/*
184 * The high key for a reverse mapping record can be computed by shifting
185 * the startblock and offset to the highest value that would still map
186 * to that record. In practice this means that we add blockcount-1 to
187 * the startblock for all records, and if the record is for a data/attr
188 * fork mapping, we add blockcount-1 to the offset too.
189 */
190STATIC void
191xfs_rmapbt_init_high_key_from_rec(
192 union xfs_btree_key *key,
193 union xfs_btree_rec *rec)
194{
195 __uint64_t off;
196 int adj;
197
198 adj = be32_to_cpu(rec->rmap.rm_blockcount) - 1;
199
200 key->rmap.rm_startblock = rec->rmap.rm_startblock;
201 be32_add_cpu(&key->rmap.rm_startblock, adj);
202 key->rmap.rm_owner = rec->rmap.rm_owner;
203 key->rmap.rm_offset = rec->rmap.rm_offset;
204 if (XFS_RMAP_NON_INODE_OWNER(be64_to_cpu(rec->rmap.rm_owner)) ||
205 XFS_RMAP_IS_BMBT_BLOCK(be64_to_cpu(rec->rmap.rm_offset)))
206 return;
207 off = be64_to_cpu(key->rmap.rm_offset);
208 off = (XFS_RMAP_OFF(off) + adj) | (off & ~XFS_RMAP_OFF_MASK);
209 key->rmap.rm_offset = cpu_to_be64(off);
210}
211
212STATIC void
213xfs_rmapbt_init_rec_from_cur(
214 struct xfs_btree_cur *cur,
215 union xfs_btree_rec *rec)
216{
217 rec->rmap.rm_startblock = cpu_to_be32(cur->bc_rec.r.rm_startblock);
218 rec->rmap.rm_blockcount = cpu_to_be32(cur->bc_rec.r.rm_blockcount);
219 rec->rmap.rm_owner = cpu_to_be64(cur->bc_rec.r.rm_owner);
220 rec->rmap.rm_offset = cpu_to_be64(
221 xfs_rmap_irec_offset_pack(&cur->bc_rec.r));
222}
223
224STATIC void
225xfs_rmapbt_init_ptr_from_cur(
226 struct xfs_btree_cur *cur,
227 union xfs_btree_ptr *ptr)
228{
229 struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
230
231 ASSERT(cur->bc_private.a.agno == be32_to_cpu(agf->agf_seqno));
232 ASSERT(agf->agf_roots[cur->bc_btnum] != 0);
233
234 ptr->s = agf->agf_roots[cur->bc_btnum];
235}
236
237STATIC __int64_t
238xfs_rmapbt_key_diff(
239 struct xfs_btree_cur *cur,
240 union xfs_btree_key *key)
241{
242 struct xfs_rmap_irec *rec = &cur->bc_rec.r;
243 struct xfs_rmap_key *kp = &key->rmap;
244 __u64 x, y;
245 __int64_t d;
246
247 d = (__int64_t)be32_to_cpu(kp->rm_startblock) - rec->rm_startblock;
248 if (d)
249 return d;
250
251 x = be64_to_cpu(kp->rm_owner);
252 y = rec->rm_owner;
253 if (x > y)
254 return 1;
255 else if (y > x)
256 return -1;
257
258 x = XFS_RMAP_OFF(be64_to_cpu(kp->rm_offset));
259 y = rec->rm_offset;
260 if (x > y)
261 return 1;
262 else if (y > x)
263 return -1;
264 return 0;
265}
266
267STATIC __int64_t
268xfs_rmapbt_diff_two_keys(
269 struct xfs_btree_cur *cur,
270 union xfs_btree_key *k1,
271 union xfs_btree_key *k2)
272{
273 struct xfs_rmap_key *kp1 = &k1->rmap;
274 struct xfs_rmap_key *kp2 = &k2->rmap;
275 __int64_t d;
276 __u64 x, y;
277
278 d = (__int64_t)be32_to_cpu(kp1->rm_startblock) -
279 be32_to_cpu(kp2->rm_startblock);
280 if (d)
281 return d;
282
283 x = be64_to_cpu(kp1->rm_owner);
284 y = be64_to_cpu(kp2->rm_owner);
285 if (x > y)
286 return 1;
287 else if (y > x)
288 return -1;
289
290 x = XFS_RMAP_OFF(be64_to_cpu(kp1->rm_offset));
291 y = XFS_RMAP_OFF(be64_to_cpu(kp2->rm_offset));
292 if (x > y)
293 return 1;
294 else if (y > x)
295 return -1;
296 return 0;
297}
298
299static bool
300xfs_rmapbt_verify(
301 struct xfs_buf *bp)
302{
303 struct xfs_mount *mp = bp->b_target->bt_mount;
304 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
305 struct xfs_perag *pag = bp->b_pag;
306 unsigned int level;
307
308 /*
309 * magic number and level verification
310 *
311 * During growfs operations, we can't verify the exact level or owner as
312 * the perag is not fully initialised and hence not attached to the
313 * buffer. In this case, check against the maximum tree depth.
314 *
315 * Similarly, during log recovery we will have a perag structure
316 * attached, but the agf information will not yet have been initialised
317 * from the on disk AGF. Again, we can only check against maximum limits
318 * in this case.
319 */
320 if (block->bb_magic != cpu_to_be32(XFS_RMAP_CRC_MAGIC))
321 return false;
322
323 if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
324 return false;
325 if (!xfs_btree_sblock_v5hdr_verify(bp))
326 return false;
327
328 level = be16_to_cpu(block->bb_level);
329 if (pag && pag->pagf_init) {
330 if (level >= pag->pagf_levels[XFS_BTNUM_RMAPi])
331 return false;
332 } else if (level >= mp->m_rmap_maxlevels)
333 return false;
334
335 return xfs_btree_sblock_verify(bp, mp->m_rmap_mxr[level != 0]);
336}
337
338static void
339xfs_rmapbt_read_verify(
340 struct xfs_buf *bp)
341{
342 if (!xfs_btree_sblock_verify_crc(bp))
343 xfs_buf_ioerror(bp, -EFSBADCRC);
344 else if (!xfs_rmapbt_verify(bp))
345 xfs_buf_ioerror(bp, -EFSCORRUPTED);
346
347 if (bp->b_error) {
348 trace_xfs_btree_corrupt(bp, _RET_IP_);
349 xfs_verifier_error(bp);
350 }
351}
352
353static void
354xfs_rmapbt_write_verify(
355 struct xfs_buf *bp)
356{
357 if (!xfs_rmapbt_verify(bp)) {
358 trace_xfs_btree_corrupt(bp, _RET_IP_);
359 xfs_buf_ioerror(bp, -EFSCORRUPTED);
360 xfs_verifier_error(bp);
361 return;
362 }
363 xfs_btree_sblock_calc_crc(bp);
364
365}
366
367const struct xfs_buf_ops xfs_rmapbt_buf_ops = {
368 .name = "xfs_rmapbt",
369 .verify_read = xfs_rmapbt_read_verify,
370 .verify_write = xfs_rmapbt_write_verify,
371};
372
373#if defined(DEBUG) || defined(XFS_WARN)
374STATIC int
375xfs_rmapbt_keys_inorder(
376 struct xfs_btree_cur *cur,
377 union xfs_btree_key *k1,
378 union xfs_btree_key *k2)
379{
380 __uint32_t x;
381 __uint32_t y;
382 __uint64_t a;
383 __uint64_t b;
384
385 x = be32_to_cpu(k1->rmap.rm_startblock);
386 y = be32_to_cpu(k2->rmap.rm_startblock);
387 if (x < y)
388 return 1;
389 else if (x > y)
390 return 0;
391 a = be64_to_cpu(k1->rmap.rm_owner);
392 b = be64_to_cpu(k2->rmap.rm_owner);
393 if (a < b)
394 return 1;
395 else if (a > b)
396 return 0;
397 a = XFS_RMAP_OFF(be64_to_cpu(k1->rmap.rm_offset));
398 b = XFS_RMAP_OFF(be64_to_cpu(k2->rmap.rm_offset));
399 if (a <= b)
400 return 1;
401 return 0;
402}
403
404STATIC int
405xfs_rmapbt_recs_inorder(
406 struct xfs_btree_cur *cur,
407 union xfs_btree_rec *r1,
408 union xfs_btree_rec *r2)
409{
410 __uint32_t x;
411 __uint32_t y;
412 __uint64_t a;
413 __uint64_t b;
414
415 x = be32_to_cpu(r1->rmap.rm_startblock);
416 y = be32_to_cpu(r2->rmap.rm_startblock);
417 if (x < y)
418 return 1;
419 else if (x > y)
420 return 0;
421 a = be64_to_cpu(r1->rmap.rm_owner);
422 b = be64_to_cpu(r2->rmap.rm_owner);
423 if (a < b)
424 return 1;
425 else if (a > b)
426 return 0;
427 a = XFS_RMAP_OFF(be64_to_cpu(r1->rmap.rm_offset));
428 b = XFS_RMAP_OFF(be64_to_cpu(r2->rmap.rm_offset));
429 if (a <= b)
430 return 1;
431 return 0;
432}
433#endif /* DEBUG */
434
435static const struct xfs_btree_ops xfs_rmapbt_ops = {
436 .rec_len = sizeof(struct xfs_rmap_rec),
437 .key_len = 2 * sizeof(struct xfs_rmap_key),
438
439 .dup_cursor = xfs_rmapbt_dup_cursor,
440 .set_root = xfs_rmapbt_set_root,
441 .alloc_block = xfs_rmapbt_alloc_block,
442 .free_block = xfs_rmapbt_free_block,
443 .get_minrecs = xfs_rmapbt_get_minrecs,
444 .get_maxrecs = xfs_rmapbt_get_maxrecs,
445 .init_key_from_rec = xfs_rmapbt_init_key_from_rec,
446 .init_high_key_from_rec = xfs_rmapbt_init_high_key_from_rec,
447 .init_rec_from_cur = xfs_rmapbt_init_rec_from_cur,
448 .init_ptr_from_cur = xfs_rmapbt_init_ptr_from_cur,
449 .key_diff = xfs_rmapbt_key_diff,
450 .buf_ops = &xfs_rmapbt_buf_ops,
451 .diff_two_keys = xfs_rmapbt_diff_two_keys,
452#if defined(DEBUG) || defined(XFS_WARN)
453 .keys_inorder = xfs_rmapbt_keys_inorder,
454 .recs_inorder = xfs_rmapbt_recs_inorder,
455#endif
456};
457
458/*
459 * Allocate a new allocation btree cursor.
460 */
461struct xfs_btree_cur *
462xfs_rmapbt_init_cursor(
463 struct xfs_mount *mp,
464 struct xfs_trans *tp,
465 struct xfs_buf *agbp,
466 xfs_agnumber_t agno)
467{
468 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
469 struct xfs_btree_cur *cur;
470
471 cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_NOFS);
472 cur->bc_tp = tp;
473 cur->bc_mp = mp;
474 /* Overlapping btree; 2 keys per pointer. */
475 cur->bc_btnum = XFS_BTNUM_RMAP;
476 cur->bc_flags = XFS_BTREE_CRC_BLOCKS | XFS_BTREE_OVERLAPPING;
477 cur->bc_blocklog = mp->m_sb.sb_blocklog;
478 cur->bc_ops = &xfs_rmapbt_ops;
479 cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]);
480
481 cur->bc_private.a.agbp = agbp;
482 cur->bc_private.a.agno = agno;
483
484 return cur;
485}
486
487/*
488 * Calculate number of records in an rmap btree block.
489 */
490int
491xfs_rmapbt_maxrecs(
492 struct xfs_mount *mp,
493 int blocklen,
494 int leaf)
495{
496 blocklen -= XFS_RMAP_BLOCK_LEN;
497
498 if (leaf)
499 return blocklen / sizeof(struct xfs_rmap_rec);
500 return blocklen /
501 (2 * sizeof(struct xfs_rmap_key) + sizeof(xfs_rmap_ptr_t));
502}
503
504/* Compute the maximum height of an rmap btree. */
505void
506xfs_rmapbt_compute_maxlevels(
507 struct xfs_mount *mp)
508{
509 mp->m_rmap_maxlevels = xfs_btree_compute_maxlevels(mp,
510 mp->m_rmap_mnr, mp->m_sb.sb_agblocks);
511}
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.h b/fs/xfs/libxfs/xfs_rmap_btree.h
new file mode 100644
index 000000000000..e73a55357dab
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_rmap_btree.h
@@ -0,0 +1,61 @@
1/*
2 * Copyright (c) 2014 Red Hat, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_RMAP_BTREE_H__
19#define __XFS_RMAP_BTREE_H__
20
21struct xfs_buf;
22struct xfs_btree_cur;
23struct xfs_mount;
24
25/* rmaps only exist on crc enabled filesystems */
26#define XFS_RMAP_BLOCK_LEN XFS_BTREE_SBLOCK_CRC_LEN
27
28/*
29 * Record, key, and pointer address macros for btree blocks.
30 *
31 * (note that some of these may appear unused, but they are used in userspace)
32 */
33#define XFS_RMAP_REC_ADDR(block, index) \
34 ((struct xfs_rmap_rec *) \
35 ((char *)(block) + XFS_RMAP_BLOCK_LEN + \
36 (((index) - 1) * sizeof(struct xfs_rmap_rec))))
37
38#define XFS_RMAP_KEY_ADDR(block, index) \
39 ((struct xfs_rmap_key *) \
40 ((char *)(block) + XFS_RMAP_BLOCK_LEN + \
41 ((index) - 1) * 2 * sizeof(struct xfs_rmap_key)))
42
43#define XFS_RMAP_HIGH_KEY_ADDR(block, index) \
44 ((struct xfs_rmap_key *) \
45 ((char *)(block) + XFS_RMAP_BLOCK_LEN + \
46 sizeof(struct xfs_rmap_key) + \
47 ((index) - 1) * 2 * sizeof(struct xfs_rmap_key)))
48
49#define XFS_RMAP_PTR_ADDR(block, index, maxrecs) \
50 ((xfs_rmap_ptr_t *) \
51 ((char *)(block) + XFS_RMAP_BLOCK_LEN + \
52 (maxrecs) * 2 * sizeof(struct xfs_rmap_key) + \
53 ((index) - 1) * sizeof(xfs_rmap_ptr_t)))
54
55struct xfs_btree_cur *xfs_rmapbt_init_cursor(struct xfs_mount *mp,
56 struct xfs_trans *tp, struct xfs_buf *bp,
57 xfs_agnumber_t agno);
58int xfs_rmapbt_maxrecs(struct xfs_mount *mp, int blocklen, int leaf);
59extern void xfs_rmapbt_compute_maxlevels(struct xfs_mount *mp);
60
61#endif /* __XFS_RMAP_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 12ca86778e02..0e3d4f5ec33c 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -24,6 +24,7 @@
24#include "xfs_bit.h" 24#include "xfs_bit.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_mount.h" 26#include "xfs_mount.h"
27#include "xfs_defer.h"
27#include "xfs_inode.h" 28#include "xfs_inode.h"
28#include "xfs_ialloc.h" 29#include "xfs_ialloc.h"
29#include "xfs_alloc.h" 30#include "xfs_alloc.h"
@@ -36,6 +37,7 @@
36#include "xfs_alloc_btree.h" 37#include "xfs_alloc_btree.h"
37#include "xfs_ialloc_btree.h" 38#include "xfs_ialloc_btree.h"
38#include "xfs_log.h" 39#include "xfs_log.h"
40#include "xfs_rmap_btree.h"
39 41
40/* 42/*
41 * Physical superblock buffer manipulations. Shared with libxfs in userspace. 43 * Physical superblock buffer manipulations. Shared with libxfs in userspace.
@@ -729,6 +731,11 @@ xfs_sb_mount_common(
729 mp->m_bmap_dmnr[0] = mp->m_bmap_dmxr[0] / 2; 731 mp->m_bmap_dmnr[0] = mp->m_bmap_dmxr[0] / 2;
730 mp->m_bmap_dmnr[1] = mp->m_bmap_dmxr[1] / 2; 732 mp->m_bmap_dmnr[1] = mp->m_bmap_dmxr[1] / 2;
731 733
734 mp->m_rmap_mxr[0] = xfs_rmapbt_maxrecs(mp, sbp->sb_blocksize, 1);
735 mp->m_rmap_mxr[1] = xfs_rmapbt_maxrecs(mp, sbp->sb_blocksize, 0);
736 mp->m_rmap_mnr[0] = mp->m_rmap_mxr[0] / 2;
737 mp->m_rmap_mnr[1] = mp->m_rmap_mxr[1] / 2;
738
732 mp->m_bsize = XFS_FSB_TO_BB(mp, 1); 739 mp->m_bsize = XFS_FSB_TO_BB(mp, 1);
733 mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK, 740 mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK,
734 sbp->sb_inopblock); 741 sbp->sb_inopblock);
@@ -738,6 +745,8 @@ xfs_sb_mount_common(
738 mp->m_ialloc_min_blks = sbp->sb_spino_align; 745 mp->m_ialloc_min_blks = sbp->sb_spino_align;
739 else 746 else
740 mp->m_ialloc_min_blks = mp->m_ialloc_blks; 747 mp->m_ialloc_min_blks = mp->m_ialloc_blks;
748 mp->m_alloc_set_aside = xfs_alloc_set_aside(mp);
749 mp->m_ag_max_usable = xfs_alloc_ag_max_usable(mp);
741} 750}
742 751
743/* 752/*
diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h
index 16002b5ec4eb..0c5b30bd884c 100644
--- a/fs/xfs/libxfs/xfs_shared.h
+++ b/fs/xfs/libxfs/xfs_shared.h
@@ -38,6 +38,7 @@ extern const struct xfs_buf_ops xfs_agi_buf_ops;
38extern const struct xfs_buf_ops xfs_agf_buf_ops; 38extern const struct xfs_buf_ops xfs_agf_buf_ops;
39extern const struct xfs_buf_ops xfs_agfl_buf_ops; 39extern const struct xfs_buf_ops xfs_agfl_buf_ops;
40extern const struct xfs_buf_ops xfs_allocbt_buf_ops; 40extern const struct xfs_buf_ops xfs_allocbt_buf_ops;
41extern const struct xfs_buf_ops xfs_rmapbt_buf_ops;
41extern const struct xfs_buf_ops xfs_attr3_leaf_buf_ops; 42extern const struct xfs_buf_ops xfs_attr3_leaf_buf_ops;
42extern const struct xfs_buf_ops xfs_attr3_rmt_buf_ops; 43extern const struct xfs_buf_ops xfs_attr3_rmt_buf_ops;
43extern const struct xfs_buf_ops xfs_bmbt_buf_ops; 44extern const struct xfs_buf_ops xfs_bmbt_buf_ops;
@@ -116,6 +117,7 @@ int xfs_log_calc_minimum_size(struct xfs_mount *);
116#define XFS_INO_BTREE_REF 3 117#define XFS_INO_BTREE_REF 3
117#define XFS_ALLOC_BTREE_REF 2 118#define XFS_ALLOC_BTREE_REF 2
118#define XFS_BMAP_BTREE_REF 2 119#define XFS_BMAP_BTREE_REF 2
120#define XFS_RMAP_BTREE_REF 2
119#define XFS_DIR_BTREE_REF 2 121#define XFS_DIR_BTREE_REF 2
120#define XFS_INO_REF 2 122#define XFS_INO_REF 2
121#define XFS_ATTR_BTREE_REF 1 123#define XFS_ATTR_BTREE_REF 1
diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
index 68cb1e7bf2bb..301ef2f4dbd6 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.c
+++ b/fs/xfs/libxfs/xfs_trans_resv.c
@@ -64,6 +64,30 @@ xfs_calc_buf_res(
64} 64}
65 65
66/* 66/*
67 * Per-extent log reservation for the btree changes involved in freeing or
68 * allocating an extent. In classic XFS there were two trees that will be
69 * modified (bnobt + cntbt). With rmap enabled, there are three trees
70 * (rmapbt). The number of blocks reserved is based on the formula:
71 *
72 * num trees * ((2 blocks/level * max depth) - 1)
73 *
74 * Keep in mind that max depth is calculated separately for each type of tree.
75 */
76static uint
77xfs_allocfree_log_count(
78 struct xfs_mount *mp,
79 uint num_ops)
80{
81 uint blocks;
82
83 blocks = num_ops * 2 * (2 * mp->m_ag_maxlevels - 1);
84 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
85 blocks += num_ops * (2 * mp->m_rmap_maxlevels - 1);
86
87 return blocks;
88}
89
90/*
67 * Logging inodes is really tricksy. They are logged in memory format, 91 * Logging inodes is really tricksy. They are logged in memory format,
68 * which means that what we write into the log doesn't directly translate into 92 * which means that what we write into the log doesn't directly translate into
69 * the amount of space they use on disk. 93 * the amount of space they use on disk.
@@ -126,7 +150,7 @@ xfs_calc_inode_res(
126 */ 150 */
127STATIC uint 151STATIC uint
128xfs_calc_finobt_res( 152xfs_calc_finobt_res(
129 struct xfs_mount *mp, 153 struct xfs_mount *mp,
130 int alloc, 154 int alloc,
131 int modify) 155 int modify)
132{ 156{
@@ -137,7 +161,7 @@ xfs_calc_finobt_res(
137 161
138 res = xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)); 162 res = xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1));
139 if (alloc) 163 if (alloc)
140 res += xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), 164 res += xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
141 XFS_FSB_TO_B(mp, 1)); 165 XFS_FSB_TO_B(mp, 1));
142 if (modify) 166 if (modify)
143 res += (uint)XFS_FSB_TO_B(mp, 1); 167 res += (uint)XFS_FSB_TO_B(mp, 1);
@@ -153,9 +177,9 @@ xfs_calc_finobt_res(
153 * item logged to try to account for the overhead of the transaction mechanism. 177 * item logged to try to account for the overhead of the transaction mechanism.
154 * 178 *
155 * Note: Most of the reservations underestimate the number of allocation 179 * Note: Most of the reservations underestimate the number of allocation
156 * groups into which they could free extents in the xfs_bmap_finish() call. 180 * groups into which they could free extents in the xfs_defer_finish() call.
157 * This is because the number in the worst case is quite high and quite 181 * This is because the number in the worst case is quite high and quite
158 * unusual. In order to fix this we need to change xfs_bmap_finish() to free 182 * unusual. In order to fix this we need to change xfs_defer_finish() to free
159 * extents in only a single AG at a time. This will require changes to the 183 * extents in only a single AG at a time. This will require changes to the
160 * EFI code as well, however, so that the EFI for the extents not freed is 184 * EFI code as well, however, so that the EFI for the extents not freed is
161 * logged again in each transaction. See SGI PV #261917. 185 * logged again in each transaction. See SGI PV #261917.
@@ -188,10 +212,10 @@ xfs_calc_write_reservation(
188 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), 212 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
189 XFS_FSB_TO_B(mp, 1)) + 213 XFS_FSB_TO_B(mp, 1)) +
190 xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + 214 xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
191 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2), 215 xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2),
192 XFS_FSB_TO_B(mp, 1))), 216 XFS_FSB_TO_B(mp, 1))),
193 (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) + 217 (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
194 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2), 218 xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2),
195 XFS_FSB_TO_B(mp, 1)))); 219 XFS_FSB_TO_B(mp, 1))));
196} 220}
197 221
@@ -217,10 +241,10 @@ xfs_calc_itruncate_reservation(
217 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1, 241 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1,
218 XFS_FSB_TO_B(mp, 1))), 242 XFS_FSB_TO_B(mp, 1))),
219 (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) + 243 (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
220 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 4), 244 xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4),
221 XFS_FSB_TO_B(mp, 1)) + 245 XFS_FSB_TO_B(mp, 1)) +
222 xfs_calc_buf_res(5, 0) + 246 xfs_calc_buf_res(5, 0) +
223 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), 247 xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
224 XFS_FSB_TO_B(mp, 1)) + 248 XFS_FSB_TO_B(mp, 1)) +
225 xfs_calc_buf_res(2 + mp->m_ialloc_blks + 249 xfs_calc_buf_res(2 + mp->m_ialloc_blks +
226 mp->m_in_maxlevels, 0))); 250 mp->m_in_maxlevels, 0)));
@@ -247,7 +271,7 @@ xfs_calc_rename_reservation(
247 xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp), 271 xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp),
248 XFS_FSB_TO_B(mp, 1))), 272 XFS_FSB_TO_B(mp, 1))),
249 (xfs_calc_buf_res(7, mp->m_sb.sb_sectsize) + 273 (xfs_calc_buf_res(7, mp->m_sb.sb_sectsize) +
250 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 3), 274 xfs_calc_buf_res(xfs_allocfree_log_count(mp, 3),
251 XFS_FSB_TO_B(mp, 1)))); 275 XFS_FSB_TO_B(mp, 1))));
252} 276}
253 277
@@ -286,7 +310,7 @@ xfs_calc_link_reservation(
286 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), 310 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
287 XFS_FSB_TO_B(mp, 1))), 311 XFS_FSB_TO_B(mp, 1))),
288 (xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + 312 (xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
289 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), 313 xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
290 XFS_FSB_TO_B(mp, 1)))); 314 XFS_FSB_TO_B(mp, 1))));
291} 315}
292 316
@@ -324,7 +348,7 @@ xfs_calc_remove_reservation(
324 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), 348 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
325 XFS_FSB_TO_B(mp, 1))), 349 XFS_FSB_TO_B(mp, 1))),
326 (xfs_calc_buf_res(4, mp->m_sb.sb_sectsize) + 350 (xfs_calc_buf_res(4, mp->m_sb.sb_sectsize) +
327 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2), 351 xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2),
328 XFS_FSB_TO_B(mp, 1)))); 352 XFS_FSB_TO_B(mp, 1))));
329} 353}
330 354
@@ -371,7 +395,7 @@ xfs_calc_create_resv_alloc(
371 mp->m_sb.sb_sectsize + 395 mp->m_sb.sb_sectsize +
372 xfs_calc_buf_res(mp->m_ialloc_blks, XFS_FSB_TO_B(mp, 1)) + 396 xfs_calc_buf_res(mp->m_ialloc_blks, XFS_FSB_TO_B(mp, 1)) +
373 xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + 397 xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) +
374 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), 398 xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
375 XFS_FSB_TO_B(mp, 1)); 399 XFS_FSB_TO_B(mp, 1));
376} 400}
377 401
@@ -399,7 +423,7 @@ xfs_calc_icreate_resv_alloc(
399 return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + 423 return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
400 mp->m_sb.sb_sectsize + 424 mp->m_sb.sb_sectsize +
401 xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + 425 xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) +
402 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), 426 xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
403 XFS_FSB_TO_B(mp, 1)) + 427 XFS_FSB_TO_B(mp, 1)) +
404 xfs_calc_finobt_res(mp, 0, 0); 428 xfs_calc_finobt_res(mp, 0, 0);
405} 429}
@@ -483,7 +507,7 @@ xfs_calc_ifree_reservation(
483 xfs_calc_buf_res(1, 0) + 507 xfs_calc_buf_res(1, 0) +
484 xfs_calc_buf_res(2 + mp->m_ialloc_blks + 508 xfs_calc_buf_res(2 + mp->m_ialloc_blks +
485 mp->m_in_maxlevels, 0) + 509 mp->m_in_maxlevels, 0) +
486 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), 510 xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
487 XFS_FSB_TO_B(mp, 1)) + 511 XFS_FSB_TO_B(mp, 1)) +
488 xfs_calc_finobt_res(mp, 0, 1); 512 xfs_calc_finobt_res(mp, 0, 1);
489} 513}
@@ -513,7 +537,7 @@ xfs_calc_growdata_reservation(
513 struct xfs_mount *mp) 537 struct xfs_mount *mp)
514{ 538{
515 return xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + 539 return xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
516 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), 540 xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
517 XFS_FSB_TO_B(mp, 1)); 541 XFS_FSB_TO_B(mp, 1));
518} 542}
519 543
@@ -535,7 +559,7 @@ xfs_calc_growrtalloc_reservation(
535 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), 559 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
536 XFS_FSB_TO_B(mp, 1)) + 560 XFS_FSB_TO_B(mp, 1)) +
537 xfs_calc_inode_res(mp, 1) + 561 xfs_calc_inode_res(mp, 1) +
538 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), 562 xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
539 XFS_FSB_TO_B(mp, 1)); 563 XFS_FSB_TO_B(mp, 1));
540} 564}
541 565
@@ -611,7 +635,7 @@ xfs_calc_addafork_reservation(
611 xfs_calc_buf_res(1, mp->m_dir_geo->blksize) + 635 xfs_calc_buf_res(1, mp->m_dir_geo->blksize) +
612 xfs_calc_buf_res(XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1, 636 xfs_calc_buf_res(XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1,
613 XFS_FSB_TO_B(mp, 1)) + 637 XFS_FSB_TO_B(mp, 1)) +
614 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), 638 xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
615 XFS_FSB_TO_B(mp, 1)); 639 XFS_FSB_TO_B(mp, 1));
616} 640}
617 641
@@ -634,7 +658,7 @@ xfs_calc_attrinval_reservation(
634 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK), 658 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK),
635 XFS_FSB_TO_B(mp, 1))), 659 XFS_FSB_TO_B(mp, 1))),
636 (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) + 660 (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
637 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 4), 661 xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4),
638 XFS_FSB_TO_B(mp, 1)))); 662 XFS_FSB_TO_B(mp, 1))));
639} 663}
640 664
@@ -701,7 +725,7 @@ xfs_calc_attrrm_reservation(
701 XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + 725 XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) +
702 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), 0)), 726 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), 0)),
703 (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) + 727 (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
704 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2), 728 xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2),
705 XFS_FSB_TO_B(mp, 1)))); 729 XFS_FSB_TO_B(mp, 1))));
706} 730}
707 731
diff --git a/fs/xfs/libxfs/xfs_trans_resv.h b/fs/xfs/libxfs/xfs_trans_resv.h
index 797815012c0e..0eb46ed6d404 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.h
+++ b/fs/xfs/libxfs/xfs_trans_resv.h
@@ -68,16 +68,6 @@ struct xfs_trans_resv {
68#define M_RES(mp) (&(mp)->m_resv) 68#define M_RES(mp) (&(mp)->m_resv)
69 69
70/* 70/*
71 * Per-extent log reservation for the allocation btree changes
72 * involved in freeing or allocating an extent.
73 * 2 trees * (2 blocks/level * max depth - 1) * block size
74 */
75#define XFS_ALLOCFREE_LOG_RES(mp,nx) \
76 ((nx) * (2 * XFS_FSB_TO_B((mp), 2 * (mp)->m_ag_maxlevels - 1)))
77#define XFS_ALLOCFREE_LOG_COUNT(mp,nx) \
78 ((nx) * (2 * (2 * (mp)->m_ag_maxlevels - 1)))
79
80/*
81 * Per-directory log reservation for any directory change. 71 * Per-directory log reservation for any directory change.
82 * dir blocks: (1 btree block per level + data block + free block) * dblock size 72 * dir blocks: (1 btree block per level + data block + free block) * dblock size
83 * bmap btree: (levels + 2) * max depth * block size 73 * bmap btree: (levels + 2) * max depth * block size
diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h
index b79dc66b2ecd..3d503647f26b 100644
--- a/fs/xfs/libxfs/xfs_types.h
+++ b/fs/xfs/libxfs/xfs_types.h
@@ -108,8 +108,8 @@ typedef enum {
108} xfs_lookup_t; 108} xfs_lookup_t;
109 109
110typedef enum { 110typedef enum {
111 XFS_BTNUM_BNOi, XFS_BTNUM_CNTi, XFS_BTNUM_BMAPi, XFS_BTNUM_INOi, 111 XFS_BTNUM_BNOi, XFS_BTNUM_CNTi, XFS_BTNUM_RMAPi, XFS_BTNUM_BMAPi,
112 XFS_BTNUM_FINOi, XFS_BTNUM_MAX 112 XFS_BTNUM_INOi, XFS_BTNUM_FINOi, XFS_BTNUM_MAX
113} xfs_btnum_t; 113} xfs_btnum_t;
114 114
115struct xfs_name { 115struct xfs_name {
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index cd4a850564f2..4ece4f2ffc72 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -25,6 +25,7 @@
25#include "xfs_bit.h" 25#include "xfs_bit.h"
26#include "xfs_mount.h" 26#include "xfs_mount.h"
27#include "xfs_da_format.h" 27#include "xfs_da_format.h"
28#include "xfs_defer.h"
28#include "xfs_inode.h" 29#include "xfs_inode.h"
29#include "xfs_btree.h" 30#include "xfs_btree.h"
30#include "xfs_trans.h" 31#include "xfs_trans.h"
@@ -40,6 +41,7 @@
40#include "xfs_trace.h" 41#include "xfs_trace.h"
41#include "xfs_icache.h" 42#include "xfs_icache.h"
42#include "xfs_log.h" 43#include "xfs_log.h"
44#include "xfs_rmap_btree.h"
43 45
44/* Kernel only BMAP related definitions and functions */ 46/* Kernel only BMAP related definitions and functions */
45 47
@@ -79,95 +81,6 @@ xfs_zero_extent(
79 GFP_NOFS, true); 81 GFP_NOFS, true);
80} 82}
81 83
82/* Sort bmap items by AG. */
83static int
84xfs_bmap_free_list_cmp(
85 void *priv,
86 struct list_head *a,
87 struct list_head *b)
88{
89 struct xfs_mount *mp = priv;
90 struct xfs_bmap_free_item *ra;
91 struct xfs_bmap_free_item *rb;
92
93 ra = container_of(a, struct xfs_bmap_free_item, xbfi_list);
94 rb = container_of(b, struct xfs_bmap_free_item, xbfi_list);
95 return XFS_FSB_TO_AGNO(mp, ra->xbfi_startblock) -
96 XFS_FSB_TO_AGNO(mp, rb->xbfi_startblock);
97}
98
99/*
100 * Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi
101 * caller. Frees all the extents that need freeing, which must be done
102 * last due to locking considerations. We never free any extents in
103 * the first transaction.
104 *
105 * If an inode *ip is provided, rejoin it to the transaction if
106 * the transaction was committed.
107 */
108int /* error */
109xfs_bmap_finish(
110 struct xfs_trans **tp, /* transaction pointer addr */
111 struct xfs_bmap_free *flist, /* i/o: list extents to free */
112 struct xfs_inode *ip)
113{
114 struct xfs_efd_log_item *efd; /* extent free data */
115 struct xfs_efi_log_item *efi; /* extent free intention */
116 int error; /* error return value */
117 int committed;/* xact committed or not */
118 struct xfs_bmap_free_item *free; /* free extent item */
119
120 ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
121 if (flist->xbf_count == 0)
122 return 0;
123
124 list_sort((*tp)->t_mountp, &flist->xbf_flist, xfs_bmap_free_list_cmp);
125
126 efi = xfs_trans_get_efi(*tp, flist->xbf_count);
127 list_for_each_entry(free, &flist->xbf_flist, xbfi_list)
128 xfs_trans_log_efi_extent(*tp, efi, free->xbfi_startblock,
129 free->xbfi_blockcount);
130
131 error = __xfs_trans_roll(tp, ip, &committed);
132 if (error) {
133 /*
134 * If the transaction was committed, drop the EFD reference
135 * since we're bailing out of here. The other reference is
136 * dropped when the EFI hits the AIL.
137 *
138 * If the transaction was not committed, the EFI is freed by the
139 * EFI item unlock handler on abort. Also, we have a new
140 * transaction so we should return committed=1 even though we're
141 * returning an error.
142 */
143 if (committed) {
144 xfs_efi_release(efi);
145 xfs_force_shutdown((*tp)->t_mountp,
146 SHUTDOWN_META_IO_ERROR);
147 }
148 return error;
149 }
150
151 /*
152 * Get an EFD and free each extent in the list, logging to the EFD in
153 * the process. The remaining bmap free list is cleaned up by the caller
154 * on error.
155 */
156 efd = xfs_trans_get_efd(*tp, efi, flist->xbf_count);
157 while (!list_empty(&flist->xbf_flist)) {
158 free = list_first_entry(&flist->xbf_flist,
159 struct xfs_bmap_free_item, xbfi_list);
160 error = xfs_trans_free_extent(*tp, efd, free->xbfi_startblock,
161 free->xbfi_blockcount);
162 if (error)
163 return error;
164
165 xfs_bmap_del_free(flist, free);
166 }
167
168 return 0;
169}
170
171int 84int
172xfs_bmap_rtalloc( 85xfs_bmap_rtalloc(
173 struct xfs_bmalloca *ap) /* bmap alloc argument struct */ 86 struct xfs_bmalloca *ap) /* bmap alloc argument struct */
@@ -214,9 +127,9 @@ xfs_bmap_rtalloc(
214 /* 127 /*
215 * Lock out modifications to both the RT bitmap and summary inodes 128 * Lock out modifications to both the RT bitmap and summary inodes
216 */ 129 */
217 xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL); 130 xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL|XFS_ILOCK_RTBITMAP);
218 xfs_trans_ijoin(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL); 131 xfs_trans_ijoin(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL);
219 xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL); 132 xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL|XFS_ILOCK_RTSUM);
220 xfs_trans_ijoin(ap->tp, mp->m_rsumip, XFS_ILOCK_EXCL); 133 xfs_trans_ijoin(ap->tp, mp->m_rsumip, XFS_ILOCK_EXCL);
221 134
222 /* 135 /*
@@ -773,7 +686,7 @@ xfs_bmap_punch_delalloc_range(
773 xfs_bmbt_irec_t imap; 686 xfs_bmbt_irec_t imap;
774 int nimaps = 1; 687 int nimaps = 1;
775 xfs_fsblock_t firstblock; 688 xfs_fsblock_t firstblock;
776 xfs_bmap_free_t flist; 689 struct xfs_defer_ops dfops;
777 690
778 /* 691 /*
779 * Map the range first and check that it is a delalloc extent 692 * Map the range first and check that it is a delalloc extent
@@ -804,18 +717,18 @@ xfs_bmap_punch_delalloc_range(
804 WARN_ON(imap.br_blockcount == 0); 717 WARN_ON(imap.br_blockcount == 0);
805 718
806 /* 719 /*
807 * Note: while we initialise the firstblock/flist pair, they 720 * Note: while we initialise the firstblock/dfops pair, they
808 * should never be used because blocks should never be 721 * should never be used because blocks should never be
809 * allocated or freed for a delalloc extent and hence we need 722 * allocated or freed for a delalloc extent and hence we need
810 * don't cancel or finish them after the xfs_bunmapi() call. 723 * don't cancel or finish them after the xfs_bunmapi() call.
811 */ 724 */
812 xfs_bmap_init(&flist, &firstblock); 725 xfs_defer_init(&dfops, &firstblock);
813 error = xfs_bunmapi(NULL, ip, start_fsb, 1, 0, 1, &firstblock, 726 error = xfs_bunmapi(NULL, ip, start_fsb, 1, 0, 1, &firstblock,
814 &flist, &done); 727 &dfops, &done);
815 if (error) 728 if (error)
816 break; 729 break;
817 730
818 ASSERT(!flist.xbf_count && list_empty(&flist.xbf_flist)); 731 ASSERT(!xfs_defer_has_unfinished_work(&dfops));
819next_block: 732next_block:
820 start_fsb++; 733 start_fsb++;
821 remaining--; 734 remaining--;
@@ -972,7 +885,7 @@ xfs_alloc_file_space(
972 int rt; 885 int rt;
973 xfs_trans_t *tp; 886 xfs_trans_t *tp;
974 xfs_bmbt_irec_t imaps[1], *imapp; 887 xfs_bmbt_irec_t imaps[1], *imapp;
975 xfs_bmap_free_t free_list; 888 struct xfs_defer_ops dfops;
976 uint qblocks, resblks, resrtextents; 889 uint qblocks, resblks, resrtextents;
977 int error; 890 int error;
978 891
@@ -1063,17 +976,17 @@ xfs_alloc_file_space(
1063 976
1064 xfs_trans_ijoin(tp, ip, 0); 977 xfs_trans_ijoin(tp, ip, 0);
1065 978
1066 xfs_bmap_init(&free_list, &firstfsb); 979 xfs_defer_init(&dfops, &firstfsb);
1067 error = xfs_bmapi_write(tp, ip, startoffset_fsb, 980 error = xfs_bmapi_write(tp, ip, startoffset_fsb,
1068 allocatesize_fsb, alloc_type, &firstfsb, 981 allocatesize_fsb, alloc_type, &firstfsb,
1069 resblks, imapp, &nimaps, &free_list); 982 resblks, imapp, &nimaps, &dfops);
1070 if (error) 983 if (error)
1071 goto error0; 984 goto error0;
1072 985
1073 /* 986 /*
1074 * Complete the transaction 987 * Complete the transaction
1075 */ 988 */
1076 error = xfs_bmap_finish(&tp, &free_list, NULL); 989 error = xfs_defer_finish(&tp, &dfops, NULL);
1077 if (error) 990 if (error)
1078 goto error0; 991 goto error0;
1079 992
@@ -1096,7 +1009,7 @@ xfs_alloc_file_space(
1096 return error; 1009 return error;
1097 1010
1098error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ 1011error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
1099 xfs_bmap_cancel(&free_list); 1012 xfs_defer_cancel(&dfops);
1100 xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag); 1013 xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag);
1101 1014
1102error1: /* Just cancel transaction */ 1015error1: /* Just cancel transaction */
@@ -1114,7 +1027,7 @@ xfs_unmap_extent(
1114{ 1027{
1115 struct xfs_mount *mp = ip->i_mount; 1028 struct xfs_mount *mp = ip->i_mount;
1116 struct xfs_trans *tp; 1029 struct xfs_trans *tp;
1117 struct xfs_bmap_free free_list; 1030 struct xfs_defer_ops dfops;
1118 xfs_fsblock_t firstfsb; 1031 xfs_fsblock_t firstfsb;
1119 uint resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 1032 uint resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
1120 int error; 1033 int error;
@@ -1133,13 +1046,13 @@ xfs_unmap_extent(
1133 1046
1134 xfs_trans_ijoin(tp, ip, 0); 1047 xfs_trans_ijoin(tp, ip, 0);
1135 1048
1136 xfs_bmap_init(&free_list, &firstfsb); 1049 xfs_defer_init(&dfops, &firstfsb);
1137 error = xfs_bunmapi(tp, ip, startoffset_fsb, len_fsb, 0, 2, &firstfsb, 1050 error = xfs_bunmapi(tp, ip, startoffset_fsb, len_fsb, 0, 2, &firstfsb,
1138 &free_list, done); 1051 &dfops, done);
1139 if (error) 1052 if (error)
1140 goto out_bmap_cancel; 1053 goto out_bmap_cancel;
1141 1054
1142 error = xfs_bmap_finish(&tp, &free_list, NULL); 1055 error = xfs_defer_finish(&tp, &dfops, ip);
1143 if (error) 1056 if (error)
1144 goto out_bmap_cancel; 1057 goto out_bmap_cancel;
1145 1058
@@ -1149,7 +1062,7 @@ out_unlock:
1149 return error; 1062 return error;
1150 1063
1151out_bmap_cancel: 1064out_bmap_cancel:
1152 xfs_bmap_cancel(&free_list); 1065 xfs_defer_cancel(&dfops);
1153out_trans_cancel: 1066out_trans_cancel:
1154 xfs_trans_cancel(tp); 1067 xfs_trans_cancel(tp);
1155 goto out_unlock; 1068 goto out_unlock;
@@ -1338,7 +1251,7 @@ xfs_shift_file_space(
1338 struct xfs_mount *mp = ip->i_mount; 1251 struct xfs_mount *mp = ip->i_mount;
1339 struct xfs_trans *tp; 1252 struct xfs_trans *tp;
1340 int error; 1253 int error;
1341 struct xfs_bmap_free free_list; 1254 struct xfs_defer_ops dfops;
1342 xfs_fsblock_t first_block; 1255 xfs_fsblock_t first_block;
1343 xfs_fileoff_t stop_fsb; 1256 xfs_fileoff_t stop_fsb;
1344 xfs_fileoff_t next_fsb; 1257 xfs_fileoff_t next_fsb;
@@ -1416,19 +1329,19 @@ xfs_shift_file_space(
1416 1329
1417 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 1330 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
1418 1331
1419 xfs_bmap_init(&free_list, &first_block); 1332 xfs_defer_init(&dfops, &first_block);
1420 1333
1421 /* 1334 /*
1422 * We are using the write transaction in which max 2 bmbt 1335 * We are using the write transaction in which max 2 bmbt
1423 * updates are allowed 1336 * updates are allowed
1424 */ 1337 */
1425 error = xfs_bmap_shift_extents(tp, ip, &next_fsb, shift_fsb, 1338 error = xfs_bmap_shift_extents(tp, ip, &next_fsb, shift_fsb,
1426 &done, stop_fsb, &first_block, &free_list, 1339 &done, stop_fsb, &first_block, &dfops,
1427 direction, XFS_BMAP_MAX_SHIFT_EXTENTS); 1340 direction, XFS_BMAP_MAX_SHIFT_EXTENTS);
1428 if (error) 1341 if (error)
1429 goto out_bmap_cancel; 1342 goto out_bmap_cancel;
1430 1343
1431 error = xfs_bmap_finish(&tp, &free_list, NULL); 1344 error = xfs_defer_finish(&tp, &dfops, NULL);
1432 if (error) 1345 if (error)
1433 goto out_bmap_cancel; 1346 goto out_bmap_cancel;
1434 1347
@@ -1438,7 +1351,7 @@ xfs_shift_file_space(
1438 return error; 1351 return error;
1439 1352
1440out_bmap_cancel: 1353out_bmap_cancel:
1441 xfs_bmap_cancel(&free_list); 1354 xfs_defer_cancel(&dfops);
1442out_trans_cancel: 1355out_trans_cancel:
1443 xfs_trans_cancel(tp); 1356 xfs_trans_cancel(tp);
1444 return error; 1357 return error;
@@ -1622,6 +1535,10 @@ xfs_swap_extents(
1622 __uint64_t tmp; 1535 __uint64_t tmp;
1623 int lock_flags; 1536 int lock_flags;
1624 1537
1538 /* XXX: we can't do this with rmap, will fix later */
1539 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
1540 return -EOPNOTSUPP;
1541
1625 tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL); 1542 tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL);
1626 if (!tempifp) { 1543 if (!tempifp) {
1627 error = -ENOMEM; 1544 error = -ENOMEM;
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index f20071432ca6..68a621a8e0c0 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -21,7 +21,7 @@
21/* Kernel only BMAP related definitions and functions */ 21/* Kernel only BMAP related definitions and functions */
22 22
23struct xfs_bmbt_irec; 23struct xfs_bmbt_irec;
24struct xfs_bmap_free_item; 24struct xfs_extent_free_item;
25struct xfs_ifork; 25struct xfs_ifork;
26struct xfs_inode; 26struct xfs_inode;
27struct xfs_mount; 27struct xfs_mount;
@@ -40,8 +40,6 @@ int xfs_getbmap(struct xfs_inode *ip, struct getbmapx *bmv,
40 xfs_bmap_format_t formatter, void *arg); 40 xfs_bmap_format_t formatter, void *arg);
41 41
42/* functions in xfs_bmap.c that are only needed by xfs_bmap_util.c */ 42/* functions in xfs_bmap.c that are only needed by xfs_bmap_util.c */
43void xfs_bmap_del_free(struct xfs_bmap_free *flist,
44 struct xfs_bmap_free_item *free);
45int xfs_bmap_extsize_align(struct xfs_mount *mp, struct xfs_bmbt_irec *gotp, 43int xfs_bmap_extsize_align(struct xfs_mount *mp, struct xfs_bmbt_irec *gotp,
46 struct xfs_bmbt_irec *prevp, xfs_extlen_t extsz, 44 struct xfs_bmbt_irec *prevp, xfs_extlen_t extsz,
47 int rt, int eof, int delay, int convert, 45 int rt, int eof, int delay, int convert,
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index 272c3f8b6f7d..4ff499aa7338 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -179,7 +179,7 @@ xfs_ioc_trim(
179 * matter as trimming blocks is an advisory interface. 179 * matter as trimming blocks is an advisory interface.
180 */ 180 */
181 if (range.start >= XFS_FSB_TO_B(mp, mp->m_sb.sb_dblocks) || 181 if (range.start >= XFS_FSB_TO_B(mp, mp->m_sb.sb_dblocks) ||
182 range.minlen > XFS_FSB_TO_B(mp, XFS_ALLOC_AG_MAX_USABLE(mp)) || 182 range.minlen > XFS_FSB_TO_B(mp, mp->m_ag_max_usable) ||
183 range.len < mp->m_sb.sb_blocksize) 183 range.len < mp->m_sb.sb_blocksize)
184 return -EINVAL; 184 return -EINVAL;
185 185
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index ccb0811963b2..7a30b8f11db7 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -23,6 +23,7 @@
23#include "xfs_trans_resv.h" 23#include "xfs_trans_resv.h"
24#include "xfs_bit.h" 24#include "xfs_bit.h"
25#include "xfs_mount.h" 25#include "xfs_mount.h"
26#include "xfs_defer.h"
26#include "xfs_inode.h" 27#include "xfs_inode.h"
27#include "xfs_bmap.h" 28#include "xfs_bmap.h"
28#include "xfs_bmap_util.h" 29#include "xfs_bmap_util.h"
@@ -307,7 +308,7 @@ xfs_qm_dqalloc(
307 xfs_buf_t **O_bpp) 308 xfs_buf_t **O_bpp)
308{ 309{
309 xfs_fsblock_t firstblock; 310 xfs_fsblock_t firstblock;
310 xfs_bmap_free_t flist; 311 struct xfs_defer_ops dfops;
311 xfs_bmbt_irec_t map; 312 xfs_bmbt_irec_t map;
312 int nmaps, error; 313 int nmaps, error;
313 xfs_buf_t *bp; 314 xfs_buf_t *bp;
@@ -320,7 +321,7 @@ xfs_qm_dqalloc(
320 /* 321 /*
321 * Initialize the bmap freelist prior to calling bmapi code. 322 * Initialize the bmap freelist prior to calling bmapi code.
322 */ 323 */
323 xfs_bmap_init(&flist, &firstblock); 324 xfs_defer_init(&dfops, &firstblock);
324 xfs_ilock(quotip, XFS_ILOCK_EXCL); 325 xfs_ilock(quotip, XFS_ILOCK_EXCL);
325 /* 326 /*
326 * Return if this type of quotas is turned off while we didn't 327 * Return if this type of quotas is turned off while we didn't
@@ -336,7 +337,7 @@ xfs_qm_dqalloc(
336 error = xfs_bmapi_write(tp, quotip, offset_fsb, 337 error = xfs_bmapi_write(tp, quotip, offset_fsb,
337 XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA, 338 XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA,
338 &firstblock, XFS_QM_DQALLOC_SPACE_RES(mp), 339 &firstblock, XFS_QM_DQALLOC_SPACE_RES(mp),
339 &map, &nmaps, &flist); 340 &map, &nmaps, &dfops);
340 if (error) 341 if (error)
341 goto error0; 342 goto error0;
342 ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB); 343 ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB);
@@ -368,7 +369,7 @@ xfs_qm_dqalloc(
368 dqp->dq_flags & XFS_DQ_ALLTYPES, bp); 369 dqp->dq_flags & XFS_DQ_ALLTYPES, bp);
369 370
370 /* 371 /*
371 * xfs_bmap_finish() may commit the current transaction and 372 * xfs_defer_finish() may commit the current transaction and
372 * start a second transaction if the freelist is not empty. 373 * start a second transaction if the freelist is not empty.
373 * 374 *
374 * Since we still want to modify this buffer, we need to 375 * Since we still want to modify this buffer, we need to
@@ -382,7 +383,7 @@ xfs_qm_dqalloc(
382 383
383 xfs_trans_bhold(tp, bp); 384 xfs_trans_bhold(tp, bp);
384 385
385 error = xfs_bmap_finish(tpp, &flist, NULL); 386 error = xfs_defer_finish(tpp, &dfops, NULL);
386 if (error) 387 if (error)
387 goto error1; 388 goto error1;
388 389
@@ -398,7 +399,7 @@ xfs_qm_dqalloc(
398 return 0; 399 return 0;
399 400
400error1: 401error1:
401 xfs_bmap_cancel(&flist); 402 xfs_defer_cancel(&dfops);
402error0: 403error0:
403 xfs_iunlock(quotip, XFS_ILOCK_EXCL); 404 xfs_iunlock(quotip, XFS_ILOCK_EXCL);
404 405
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index 2e4f67f68856..3d224702fbc0 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -90,7 +90,9 @@ extern void xfs_verifier_error(struct xfs_buf *bp);
90#define XFS_ERRTAG_STRATCMPL_IOERR 19 90#define XFS_ERRTAG_STRATCMPL_IOERR 19
91#define XFS_ERRTAG_DIOWRITE_IOERR 20 91#define XFS_ERRTAG_DIOWRITE_IOERR 20
92#define XFS_ERRTAG_BMAPIFORMAT 21 92#define XFS_ERRTAG_BMAPIFORMAT 21
93#define XFS_ERRTAG_MAX 22 93#define XFS_ERRTAG_FREE_EXTENT 22
94#define XFS_ERRTAG_RMAP_FINISH_ONE 23
95#define XFS_ERRTAG_MAX 24
94 96
95/* 97/*
96 * Random factors for above tags, 1 means always, 2 means 1/2 time, etc. 98 * Random factors for above tags, 1 means always, 2 means 1/2 time, etc.
@@ -117,6 +119,8 @@ extern void xfs_verifier_error(struct xfs_buf *bp);
117#define XFS_RANDOM_STRATCMPL_IOERR (XFS_RANDOM_DEFAULT/10) 119#define XFS_RANDOM_STRATCMPL_IOERR (XFS_RANDOM_DEFAULT/10)
118#define XFS_RANDOM_DIOWRITE_IOERR (XFS_RANDOM_DEFAULT/10) 120#define XFS_RANDOM_DIOWRITE_IOERR (XFS_RANDOM_DEFAULT/10)
119#define XFS_RANDOM_BMAPIFORMAT XFS_RANDOM_DEFAULT 121#define XFS_RANDOM_BMAPIFORMAT XFS_RANDOM_DEFAULT
122#define XFS_RANDOM_FREE_EXTENT 1
123#define XFS_RANDOM_RMAP_FINISH_ONE 1
120 124
121#ifdef DEBUG 125#ifdef DEBUG
122extern int xfs_error_test_active; 126extern int xfs_error_test_active;
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index ab779460ecbf..d7bc14906af8 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -20,12 +20,15 @@
20#include "xfs_format.h" 20#include "xfs_format.h"
21#include "xfs_log_format.h" 21#include "xfs_log_format.h"
22#include "xfs_trans_resv.h" 22#include "xfs_trans_resv.h"
23#include "xfs_bit.h"
23#include "xfs_mount.h" 24#include "xfs_mount.h"
24#include "xfs_trans.h" 25#include "xfs_trans.h"
25#include "xfs_trans_priv.h" 26#include "xfs_trans_priv.h"
26#include "xfs_buf_item.h" 27#include "xfs_buf_item.h"
27#include "xfs_extfree_item.h" 28#include "xfs_extfree_item.h"
28#include "xfs_log.h" 29#include "xfs_log.h"
30#include "xfs_btree.h"
31#include "xfs_rmap.h"
29 32
30 33
31kmem_zone_t *xfs_efi_zone; 34kmem_zone_t *xfs_efi_zone;
@@ -486,3 +489,69 @@ xfs_efd_init(
486 489
487 return efdp; 490 return efdp;
488} 491}
492
493/*
494 * Process an extent free intent item that was recovered from
495 * the log. We need to free the extents that it describes.
496 */
497int
498xfs_efi_recover(
499 struct xfs_mount *mp,
500 struct xfs_efi_log_item *efip)
501{
502 struct xfs_efd_log_item *efdp;
503 struct xfs_trans *tp;
504 int i;
505 int error = 0;
506 xfs_extent_t *extp;
507 xfs_fsblock_t startblock_fsb;
508 struct xfs_owner_info oinfo;
509
510 ASSERT(!test_bit(XFS_EFI_RECOVERED, &efip->efi_flags));
511
512 /*
513 * First check the validity of the extents described by the
514 * EFI. If any are bad, then assume that all are bad and
515 * just toss the EFI.
516 */
517 for (i = 0; i < efip->efi_format.efi_nextents; i++) {
518 extp = &efip->efi_format.efi_extents[i];
519 startblock_fsb = XFS_BB_TO_FSB(mp,
520 XFS_FSB_TO_DADDR(mp, extp->ext_start));
521 if (startblock_fsb == 0 ||
522 extp->ext_len == 0 ||
523 startblock_fsb >= mp->m_sb.sb_dblocks ||
524 extp->ext_len >= mp->m_sb.sb_agblocks) {
525 /*
526 * This will pull the EFI from the AIL and
527 * free the memory associated with it.
528 */
529 set_bit(XFS_EFI_RECOVERED, &efip->efi_flags);
530 xfs_efi_release(efip);
531 return -EIO;
532 }
533 }
534
535 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
536 if (error)
537 return error;
538 efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents);
539
540 xfs_rmap_skip_owner_update(&oinfo);
541 for (i = 0; i < efip->efi_format.efi_nextents; i++) {
542 extp = &efip->efi_format.efi_extents[i];
543 error = xfs_trans_free_extent(tp, efdp, extp->ext_start,
544 extp->ext_len, &oinfo);
545 if (error)
546 goto abort_error;
547
548 }
549
550 set_bit(XFS_EFI_RECOVERED, &efip->efi_flags);
551 error = xfs_trans_commit(tp);
552 return error;
553
554abort_error:
555 xfs_trans_cancel(tp);
556 return error;
557}
diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h
index 8fa8651705e1..a32c794a86b7 100644
--- a/fs/xfs/xfs_extfree_item.h
+++ b/fs/xfs/xfs_extfree_item.h
@@ -98,4 +98,7 @@ int xfs_efi_copy_format(xfs_log_iovec_t *buf,
98void xfs_efi_item_free(xfs_efi_log_item_t *); 98void xfs_efi_item_free(xfs_efi_log_item_t *);
99void xfs_efi_release(struct xfs_efi_log_item *); 99void xfs_efi_release(struct xfs_efi_log_item *);
100 100
101int xfs_efi_recover(struct xfs_mount *mp,
102 struct xfs_efi_log_item *efip);
103
101#endif /* __XFS_EXTFREE_ITEM_H__ */ 104#endif /* __XFS_EXTFREE_ITEM_H__ */
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index a51353a1f87f..4a33a3304369 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -22,6 +22,7 @@
22#include "xfs_trans_resv.h" 22#include "xfs_trans_resv.h"
23#include "xfs_sb.h" 23#include "xfs_sb.h"
24#include "xfs_mount.h" 24#include "xfs_mount.h"
25#include "xfs_defer.h"
25#include "xfs_inode.h" 26#include "xfs_inode.h"
26#include "xfs_bmap.h" 27#include "xfs_bmap.h"
27#include "xfs_bmap_util.h" 28#include "xfs_bmap_util.h"
@@ -385,7 +386,7 @@ xfs_filestream_new_ag(
385 } 386 }
386 387
387 flags = (ap->userdata ? XFS_PICK_USERDATA : 0) | 388 flags = (ap->userdata ? XFS_PICK_USERDATA : 0) |
388 (ap->flist->xbf_low ? XFS_PICK_LOWSPACE : 0); 389 (ap->dfops->dop_low ? XFS_PICK_LOWSPACE : 0);
389 390
390 err = xfs_filestream_pick_ag(pip, startag, agp, flags, minlen); 391 err = xfs_filestream_pick_ag(pip, startag, agp, flags, minlen);
391 392
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 7191c3878b4a..0f96847b90e1 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -23,6 +23,7 @@
23#include "xfs_trans_resv.h" 23#include "xfs_trans_resv.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_mount.h" 25#include "xfs_mount.h"
26#include "xfs_defer.h"
26#include "xfs_da_format.h" 27#include "xfs_da_format.h"
27#include "xfs_da_btree.h" 28#include "xfs_da_btree.h"
28#include "xfs_inode.h" 29#include "xfs_inode.h"
@@ -32,6 +33,7 @@
32#include "xfs_btree.h" 33#include "xfs_btree.h"
33#include "xfs_alloc_btree.h" 34#include "xfs_alloc_btree.h"
34#include "xfs_alloc.h" 35#include "xfs_alloc.h"
36#include "xfs_rmap_btree.h"
35#include "xfs_ialloc.h" 37#include "xfs_ialloc.h"
36#include "xfs_fsops.h" 38#include "xfs_fsops.h"
37#include "xfs_itable.h" 39#include "xfs_itable.h"
@@ -40,6 +42,7 @@
40#include "xfs_trace.h" 42#include "xfs_trace.h"
41#include "xfs_log.h" 43#include "xfs_log.h"
42#include "xfs_filestream.h" 44#include "xfs_filestream.h"
45#include "xfs_rmap.h"
43 46
44/* 47/*
45 * File system operations 48 * File system operations
@@ -103,7 +106,9 @@ xfs_fs_geometry(
103 (xfs_sb_version_hasfinobt(&mp->m_sb) ? 106 (xfs_sb_version_hasfinobt(&mp->m_sb) ?
104 XFS_FSOP_GEOM_FLAGS_FINOBT : 0) | 107 XFS_FSOP_GEOM_FLAGS_FINOBT : 0) |
105 (xfs_sb_version_hassparseinodes(&mp->m_sb) ? 108 (xfs_sb_version_hassparseinodes(&mp->m_sb) ?
106 XFS_FSOP_GEOM_FLAGS_SPINODES : 0); 109 XFS_FSOP_GEOM_FLAGS_SPINODES : 0) |
110 (xfs_sb_version_hasrmapbt(&mp->m_sb) ?
111 XFS_FSOP_GEOM_FLAGS_RMAPBT : 0);
107 geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ? 112 geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ?
108 mp->m_sb.sb_logsectsize : BBSIZE; 113 mp->m_sb.sb_logsectsize : BBSIZE;
109 geo->rtsectsize = mp->m_sb.sb_blocksize; 114 geo->rtsectsize = mp->m_sb.sb_blocksize;
@@ -239,10 +244,16 @@ xfs_growfs_data_private(
239 agf->agf_roots[XFS_BTNUM_CNTi] = cpu_to_be32(XFS_CNT_BLOCK(mp)); 244 agf->agf_roots[XFS_BTNUM_CNTi] = cpu_to_be32(XFS_CNT_BLOCK(mp));
240 agf->agf_levels[XFS_BTNUM_BNOi] = cpu_to_be32(1); 245 agf->agf_levels[XFS_BTNUM_BNOi] = cpu_to_be32(1);
241 agf->agf_levels[XFS_BTNUM_CNTi] = cpu_to_be32(1); 246 agf->agf_levels[XFS_BTNUM_CNTi] = cpu_to_be32(1);
247 if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
248 agf->agf_roots[XFS_BTNUM_RMAPi] =
249 cpu_to_be32(XFS_RMAP_BLOCK(mp));
250 agf->agf_levels[XFS_BTNUM_RMAPi] = cpu_to_be32(1);
251 }
252
242 agf->agf_flfirst = cpu_to_be32(1); 253 agf->agf_flfirst = cpu_to_be32(1);
243 agf->agf_fllast = 0; 254 agf->agf_fllast = 0;
244 agf->agf_flcount = 0; 255 agf->agf_flcount = 0;
245 tmpsize = agsize - XFS_PREALLOC_BLOCKS(mp); 256 tmpsize = agsize - mp->m_ag_prealloc_blocks;
246 agf->agf_freeblks = cpu_to_be32(tmpsize); 257 agf->agf_freeblks = cpu_to_be32(tmpsize);
247 agf->agf_longest = cpu_to_be32(tmpsize); 258 agf->agf_longest = cpu_to_be32(tmpsize);
248 if (xfs_sb_version_hascrc(&mp->m_sb)) 259 if (xfs_sb_version_hascrc(&mp->m_sb))
@@ -339,7 +350,7 @@ xfs_growfs_data_private(
339 agno, 0); 350 agno, 0);
340 351
341 arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1); 352 arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
342 arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp)); 353 arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks);
343 arec->ar_blockcount = cpu_to_be32( 354 arec->ar_blockcount = cpu_to_be32(
344 agsize - be32_to_cpu(arec->ar_startblock)); 355 agsize - be32_to_cpu(arec->ar_startblock));
345 356
@@ -368,7 +379,7 @@ xfs_growfs_data_private(
368 agno, 0); 379 agno, 0);
369 380
370 arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1); 381 arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
371 arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp)); 382 arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks);
372 arec->ar_blockcount = cpu_to_be32( 383 arec->ar_blockcount = cpu_to_be32(
373 agsize - be32_to_cpu(arec->ar_startblock)); 384 agsize - be32_to_cpu(arec->ar_startblock));
374 nfree += be32_to_cpu(arec->ar_blockcount); 385 nfree += be32_to_cpu(arec->ar_blockcount);
@@ -378,6 +389,72 @@ xfs_growfs_data_private(
378 if (error) 389 if (error)
379 goto error0; 390 goto error0;
380 391
392 /* RMAP btree root block */
393 if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
394 struct xfs_rmap_rec *rrec;
395 struct xfs_btree_block *block;
396
397 bp = xfs_growfs_get_hdr_buf(mp,
398 XFS_AGB_TO_DADDR(mp, agno, XFS_RMAP_BLOCK(mp)),
399 BTOBB(mp->m_sb.sb_blocksize), 0,
400 &xfs_rmapbt_buf_ops);
401 if (!bp) {
402 error = -ENOMEM;
403 goto error0;
404 }
405
406 xfs_btree_init_block(mp, bp, XFS_RMAP_CRC_MAGIC, 0, 0,
407 agno, XFS_BTREE_CRC_BLOCKS);
408 block = XFS_BUF_TO_BLOCK(bp);
409
410
411 /*
412 * mark the AG header regions as static metadata The BNO
413 * btree block is the first block after the headers, so
414 * it's location defines the size of region the static
415 * metadata consumes.
416 *
417 * Note: unlike mkfs, we never have to account for log
418 * space when growing the data regions
419 */
420 rrec = XFS_RMAP_REC_ADDR(block, 1);
421 rrec->rm_startblock = 0;
422 rrec->rm_blockcount = cpu_to_be32(XFS_BNO_BLOCK(mp));
423 rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_FS);
424 rrec->rm_offset = 0;
425 be16_add_cpu(&block->bb_numrecs, 1);
426
427 /* account freespace btree root blocks */
428 rrec = XFS_RMAP_REC_ADDR(block, 2);
429 rrec->rm_startblock = cpu_to_be32(XFS_BNO_BLOCK(mp));
430 rrec->rm_blockcount = cpu_to_be32(2);
431 rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG);
432 rrec->rm_offset = 0;
433 be16_add_cpu(&block->bb_numrecs, 1);
434
435 /* account inode btree root blocks */
436 rrec = XFS_RMAP_REC_ADDR(block, 3);
437 rrec->rm_startblock = cpu_to_be32(XFS_IBT_BLOCK(mp));
438 rrec->rm_blockcount = cpu_to_be32(XFS_RMAP_BLOCK(mp) -
439 XFS_IBT_BLOCK(mp));
440 rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_INOBT);
441 rrec->rm_offset = 0;
442 be16_add_cpu(&block->bb_numrecs, 1);
443
444 /* account for rmap btree root */
445 rrec = XFS_RMAP_REC_ADDR(block, 4);
446 rrec->rm_startblock = cpu_to_be32(XFS_RMAP_BLOCK(mp));
447 rrec->rm_blockcount = cpu_to_be32(1);
448 rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG);
449 rrec->rm_offset = 0;
450 be16_add_cpu(&block->bb_numrecs, 1);
451
452 error = xfs_bwrite(bp);
453 xfs_buf_relse(bp);
454 if (error)
455 goto error0;
456 }
457
381 /* 458 /*
382 * INO btree root block 459 * INO btree root block
383 */ 460 */
@@ -435,6 +512,8 @@ xfs_growfs_data_private(
435 * There are new blocks in the old last a.g. 512 * There are new blocks in the old last a.g.
436 */ 513 */
437 if (new) { 514 if (new) {
515 struct xfs_owner_info oinfo;
516
438 /* 517 /*
439 * Change the agi length. 518 * Change the agi length.
440 */ 519 */
@@ -462,14 +541,20 @@ xfs_growfs_data_private(
462 be32_to_cpu(agi->agi_length)); 541 be32_to_cpu(agi->agi_length));
463 542
464 xfs_alloc_log_agf(tp, bp, XFS_AGF_LENGTH); 543 xfs_alloc_log_agf(tp, bp, XFS_AGF_LENGTH);
544
465 /* 545 /*
466 * Free the new space. 546 * Free the new space.
547 *
548 * XFS_RMAP_OWN_NULL is used here to tell the rmap btree that
549 * this doesn't actually exist in the rmap btree.
467 */ 550 */
468 error = xfs_free_extent(tp, XFS_AGB_TO_FSB(mp, agno, 551 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_NULL);
469 be32_to_cpu(agf->agf_length) - new), new); 552 error = xfs_free_extent(tp,
470 if (error) { 553 XFS_AGB_TO_FSB(mp, agno,
554 be32_to_cpu(agf->agf_length) - new),
555 new, &oinfo);
556 if (error)
471 goto error0; 557 goto error0;
472 }
473 } 558 }
474 559
475 /* 560 /*
@@ -501,6 +586,7 @@ xfs_growfs_data_private(
501 } else 586 } else
502 mp->m_maxicount = 0; 587 mp->m_maxicount = 0;
503 xfs_set_low_space_thresholds(mp); 588 xfs_set_low_space_thresholds(mp);
589 mp->m_alloc_set_aside = xfs_alloc_set_aside(mp);
504 590
505 /* update secondary superblocks. */ 591 /* update secondary superblocks. */
506 for (agno = 1; agno < nagcount; agno++) { 592 for (agno = 1; agno < nagcount; agno++) {
@@ -638,7 +724,7 @@ xfs_fs_counts(
638 cnt->allocino = percpu_counter_read_positive(&mp->m_icount); 724 cnt->allocino = percpu_counter_read_positive(&mp->m_icount);
639 cnt->freeino = percpu_counter_read_positive(&mp->m_ifree); 725 cnt->freeino = percpu_counter_read_positive(&mp->m_ifree);
640 cnt->freedata = percpu_counter_read_positive(&mp->m_fdblocks) - 726 cnt->freedata = percpu_counter_read_positive(&mp->m_fdblocks) -
641 XFS_ALLOC_SET_ASIDE(mp); 727 mp->m_alloc_set_aside;
642 728
643 spin_lock(&mp->m_sb_lock); 729 spin_lock(&mp->m_sb_lock);
644 cnt->freertx = mp->m_sb.sb_frextents; 730 cnt->freertx = mp->m_sb.sb_frextents;
@@ -726,7 +812,7 @@ xfs_reserve_blocks(
726 error = -ENOSPC; 812 error = -ENOSPC;
727 do { 813 do {
728 free = percpu_counter_sum(&mp->m_fdblocks) - 814 free = percpu_counter_sum(&mp->m_fdblocks) -
729 XFS_ALLOC_SET_ASIDE(mp); 815 mp->m_alloc_set_aside;
730 if (!free) 816 if (!free)
731 break; 817 break;
732 818
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 8825bcfd314c..e08eaea6327b 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -25,6 +25,7 @@
25#include "xfs_trans_resv.h" 25#include "xfs_trans_resv.h"
26#include "xfs_sb.h" 26#include "xfs_sb.h"
27#include "xfs_mount.h" 27#include "xfs_mount.h"
28#include "xfs_defer.h"
28#include "xfs_inode.h" 29#include "xfs_inode.h"
29#include "xfs_da_format.h" 30#include "xfs_da_format.h"
30#include "xfs_da_btree.h" 31#include "xfs_da_btree.h"
@@ -1122,7 +1123,7 @@ xfs_create(
1122 struct xfs_inode *ip = NULL; 1123 struct xfs_inode *ip = NULL;
1123 struct xfs_trans *tp = NULL; 1124 struct xfs_trans *tp = NULL;
1124 int error; 1125 int error;
1125 xfs_bmap_free_t free_list; 1126 struct xfs_defer_ops dfops;
1126 xfs_fsblock_t first_block; 1127 xfs_fsblock_t first_block;
1127 bool unlock_dp_on_error = false; 1128 bool unlock_dp_on_error = false;
1128 prid_t prid; 1129 prid_t prid;
@@ -1182,7 +1183,7 @@ xfs_create(
1182 XFS_IOLOCK_PARENT | XFS_ILOCK_PARENT); 1183 XFS_IOLOCK_PARENT | XFS_ILOCK_PARENT);
1183 unlock_dp_on_error = true; 1184 unlock_dp_on_error = true;
1184 1185
1185 xfs_bmap_init(&free_list, &first_block); 1186 xfs_defer_init(&dfops, &first_block);
1186 1187
1187 /* 1188 /*
1188 * Reserve disk quota and the inode. 1189 * Reserve disk quota and the inode.
@@ -1219,7 +1220,7 @@ xfs_create(
1219 unlock_dp_on_error = false; 1220 unlock_dp_on_error = false;
1220 1221
1221 error = xfs_dir_createname(tp, dp, name, ip->i_ino, 1222 error = xfs_dir_createname(tp, dp, name, ip->i_ino,
1222 &first_block, &free_list, resblks ? 1223 &first_block, &dfops, resblks ?
1223 resblks - XFS_IALLOC_SPACE_RES(mp) : 0); 1224 resblks - XFS_IALLOC_SPACE_RES(mp) : 0);
1224 if (error) { 1225 if (error) {
1225 ASSERT(error != -ENOSPC); 1226 ASSERT(error != -ENOSPC);
@@ -1253,7 +1254,7 @@ xfs_create(
1253 */ 1254 */
1254 xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp); 1255 xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp);
1255 1256
1256 error = xfs_bmap_finish(&tp, &free_list, NULL); 1257 error = xfs_defer_finish(&tp, &dfops, NULL);
1257 if (error) 1258 if (error)
1258 goto out_bmap_cancel; 1259 goto out_bmap_cancel;
1259 1260
@@ -1269,7 +1270,7 @@ xfs_create(
1269 return 0; 1270 return 0;
1270 1271
1271 out_bmap_cancel: 1272 out_bmap_cancel:
1272 xfs_bmap_cancel(&free_list); 1273 xfs_defer_cancel(&dfops);
1273 out_trans_cancel: 1274 out_trans_cancel:
1274 xfs_trans_cancel(tp); 1275 xfs_trans_cancel(tp);
1275 out_release_inode: 1276 out_release_inode:
@@ -1401,7 +1402,7 @@ xfs_link(
1401 xfs_mount_t *mp = tdp->i_mount; 1402 xfs_mount_t *mp = tdp->i_mount;
1402 xfs_trans_t *tp; 1403 xfs_trans_t *tp;
1403 int error; 1404 int error;
1404 xfs_bmap_free_t free_list; 1405 struct xfs_defer_ops dfops;
1405 xfs_fsblock_t first_block; 1406 xfs_fsblock_t first_block;
1406 int resblks; 1407 int resblks;
1407 1408
@@ -1452,7 +1453,7 @@ xfs_link(
1452 goto error_return; 1453 goto error_return;
1453 } 1454 }
1454 1455
1455 xfs_bmap_init(&free_list, &first_block); 1456 xfs_defer_init(&dfops, &first_block);
1456 1457
1457 /* 1458 /*
1458 * Handle initial link state of O_TMPFILE inode 1459 * Handle initial link state of O_TMPFILE inode
@@ -1464,7 +1465,7 @@ xfs_link(
1464 } 1465 }
1465 1466
1466 error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino, 1467 error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino,
1467 &first_block, &free_list, resblks); 1468 &first_block, &dfops, resblks);
1468 if (error) 1469 if (error)
1469 goto error_return; 1470 goto error_return;
1470 xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 1471 xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@ -1482,9 +1483,9 @@ xfs_link(
1482 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) 1483 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
1483 xfs_trans_set_sync(tp); 1484 xfs_trans_set_sync(tp);
1484 1485
1485 error = xfs_bmap_finish(&tp, &free_list, NULL); 1486 error = xfs_defer_finish(&tp, &dfops, NULL);
1486 if (error) { 1487 if (error) {
1487 xfs_bmap_cancel(&free_list); 1488 xfs_defer_cancel(&dfops);
1488 goto error_return; 1489 goto error_return;
1489 } 1490 }
1490 1491
@@ -1526,7 +1527,7 @@ xfs_itruncate_extents(
1526{ 1527{
1527 struct xfs_mount *mp = ip->i_mount; 1528 struct xfs_mount *mp = ip->i_mount;
1528 struct xfs_trans *tp = *tpp; 1529 struct xfs_trans *tp = *tpp;
1529 xfs_bmap_free_t free_list; 1530 struct xfs_defer_ops dfops;
1530 xfs_fsblock_t first_block; 1531 xfs_fsblock_t first_block;
1531 xfs_fileoff_t first_unmap_block; 1532 xfs_fileoff_t first_unmap_block;
1532 xfs_fileoff_t last_block; 1533 xfs_fileoff_t last_block;
@@ -1562,12 +1563,12 @@ xfs_itruncate_extents(
1562 ASSERT(first_unmap_block < last_block); 1563 ASSERT(first_unmap_block < last_block);
1563 unmap_len = last_block - first_unmap_block + 1; 1564 unmap_len = last_block - first_unmap_block + 1;
1564 while (!done) { 1565 while (!done) {
1565 xfs_bmap_init(&free_list, &first_block); 1566 xfs_defer_init(&dfops, &first_block);
1566 error = xfs_bunmapi(tp, ip, 1567 error = xfs_bunmapi(tp, ip,
1567 first_unmap_block, unmap_len, 1568 first_unmap_block, unmap_len,
1568 xfs_bmapi_aflag(whichfork), 1569 xfs_bmapi_aflag(whichfork),
1569 XFS_ITRUNC_MAX_EXTENTS, 1570 XFS_ITRUNC_MAX_EXTENTS,
1570 &first_block, &free_list, 1571 &first_block, &dfops,
1571 &done); 1572 &done);
1572 if (error) 1573 if (error)
1573 goto out_bmap_cancel; 1574 goto out_bmap_cancel;
@@ -1576,7 +1577,7 @@ xfs_itruncate_extents(
1576 * Duplicate the transaction that has the permanent 1577 * Duplicate the transaction that has the permanent
1577 * reservation and commit the old transaction. 1578 * reservation and commit the old transaction.
1578 */ 1579 */
1579 error = xfs_bmap_finish(&tp, &free_list, ip); 1580 error = xfs_defer_finish(&tp, &dfops, ip);
1580 if (error) 1581 if (error)
1581 goto out_bmap_cancel; 1582 goto out_bmap_cancel;
1582 1583
@@ -1602,7 +1603,7 @@ out_bmap_cancel:
1602 * the transaction can be properly aborted. We just need to make sure 1603 * the transaction can be properly aborted. We just need to make sure
1603 * we're not holding any resources that we were not when we came in. 1604 * we're not holding any resources that we were not when we came in.
1604 */ 1605 */
1605 xfs_bmap_cancel(&free_list); 1606 xfs_defer_cancel(&dfops);
1606 goto out; 1607 goto out;
1607} 1608}
1608 1609
@@ -1743,7 +1744,7 @@ STATIC int
1743xfs_inactive_ifree( 1744xfs_inactive_ifree(
1744 struct xfs_inode *ip) 1745 struct xfs_inode *ip)
1745{ 1746{
1746 xfs_bmap_free_t free_list; 1747 struct xfs_defer_ops dfops;
1747 xfs_fsblock_t first_block; 1748 xfs_fsblock_t first_block;
1748 struct xfs_mount *mp = ip->i_mount; 1749 struct xfs_mount *mp = ip->i_mount;
1749 struct xfs_trans *tp; 1750 struct xfs_trans *tp;
@@ -1780,8 +1781,8 @@ xfs_inactive_ifree(
1780 xfs_ilock(ip, XFS_ILOCK_EXCL); 1781 xfs_ilock(ip, XFS_ILOCK_EXCL);
1781 xfs_trans_ijoin(tp, ip, 0); 1782 xfs_trans_ijoin(tp, ip, 0);
1782 1783
1783 xfs_bmap_init(&free_list, &first_block); 1784 xfs_defer_init(&dfops, &first_block);
1784 error = xfs_ifree(tp, ip, &free_list); 1785 error = xfs_ifree(tp, ip, &dfops);
1785 if (error) { 1786 if (error) {
1786 /* 1787 /*
1787 * If we fail to free the inode, shut down. The cancel 1788 * If we fail to free the inode, shut down. The cancel
@@ -1807,11 +1808,11 @@ xfs_inactive_ifree(
1807 * Just ignore errors at this point. There is nothing we can do except 1808 * Just ignore errors at this point. There is nothing we can do except
1808 * to try to keep going. Make sure it's not a silent error. 1809 * to try to keep going. Make sure it's not a silent error.
1809 */ 1810 */
1810 error = xfs_bmap_finish(&tp, &free_list, NULL); 1811 error = xfs_defer_finish(&tp, &dfops, NULL);
1811 if (error) { 1812 if (error) {
1812 xfs_notice(mp, "%s: xfs_bmap_finish returned error %d", 1813 xfs_notice(mp, "%s: xfs_defer_finish returned error %d",
1813 __func__, error); 1814 __func__, error);
1814 xfs_bmap_cancel(&free_list); 1815 xfs_defer_cancel(&dfops);
1815 } 1816 }
1816 error = xfs_trans_commit(tp); 1817 error = xfs_trans_commit(tp);
1817 if (error) 1818 if (error)
@@ -2367,7 +2368,7 @@ int
2367xfs_ifree( 2368xfs_ifree(
2368 xfs_trans_t *tp, 2369 xfs_trans_t *tp,
2369 xfs_inode_t *ip, 2370 xfs_inode_t *ip,
2370 xfs_bmap_free_t *flist) 2371 struct xfs_defer_ops *dfops)
2371{ 2372{
2372 int error; 2373 int error;
2373 struct xfs_icluster xic = { 0 }; 2374 struct xfs_icluster xic = { 0 };
@@ -2386,7 +2387,7 @@ xfs_ifree(
2386 if (error) 2387 if (error)
2387 return error; 2388 return error;
2388 2389
2389 error = xfs_difree(tp, ip->i_ino, flist, &xic); 2390 error = xfs_difree(tp, ip->i_ino, dfops, &xic);
2390 if (error) 2391 if (error)
2391 return error; 2392 return error;
2392 2393
@@ -2474,7 +2475,7 @@ xfs_iunpin_wait(
2474 * directory entry. 2475 * directory entry.
2475 * 2476 *
2476 * This is still safe from a transactional point of view - it is not until we 2477 * This is still safe from a transactional point of view - it is not until we
2477 * get to xfs_bmap_finish() that we have the possibility of multiple 2478 * get to xfs_defer_finish() that we have the possibility of multiple
2478 * transactions in this operation. Hence as long as we remove the directory 2479 * transactions in this operation. Hence as long as we remove the directory
2479 * entry and drop the link count in the first transaction of the remove 2480 * entry and drop the link count in the first transaction of the remove
2480 * operation, there are no transactional constraints on the ordering here. 2481 * operation, there are no transactional constraints on the ordering here.
@@ -2489,7 +2490,7 @@ xfs_remove(
2489 xfs_trans_t *tp = NULL; 2490 xfs_trans_t *tp = NULL;
2490 int is_dir = S_ISDIR(VFS_I(ip)->i_mode); 2491 int is_dir = S_ISDIR(VFS_I(ip)->i_mode);
2491 int error = 0; 2492 int error = 0;
2492 xfs_bmap_free_t free_list; 2493 struct xfs_defer_ops dfops;
2493 xfs_fsblock_t first_block; 2494 xfs_fsblock_t first_block;
2494 uint resblks; 2495 uint resblks;
2495 2496
@@ -2571,9 +2572,9 @@ xfs_remove(
2571 if (error) 2572 if (error)
2572 goto out_trans_cancel; 2573 goto out_trans_cancel;
2573 2574
2574 xfs_bmap_init(&free_list, &first_block); 2575 xfs_defer_init(&dfops, &first_block);
2575 error = xfs_dir_removename(tp, dp, name, ip->i_ino, 2576 error = xfs_dir_removename(tp, dp, name, ip->i_ino,
2576 &first_block, &free_list, resblks); 2577 &first_block, &dfops, resblks);
2577 if (error) { 2578 if (error) {
2578 ASSERT(error != -ENOENT); 2579 ASSERT(error != -ENOENT);
2579 goto out_bmap_cancel; 2580 goto out_bmap_cancel;
@@ -2587,7 +2588,7 @@ xfs_remove(
2587 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) 2588 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
2588 xfs_trans_set_sync(tp); 2589 xfs_trans_set_sync(tp);
2589 2590
2590 error = xfs_bmap_finish(&tp, &free_list, NULL); 2591 error = xfs_defer_finish(&tp, &dfops, NULL);
2591 if (error) 2592 if (error)
2592 goto out_bmap_cancel; 2593 goto out_bmap_cancel;
2593 2594
@@ -2601,7 +2602,7 @@ xfs_remove(
2601 return 0; 2602 return 0;
2602 2603
2603 out_bmap_cancel: 2604 out_bmap_cancel:
2604 xfs_bmap_cancel(&free_list); 2605 xfs_defer_cancel(&dfops);
2605 out_trans_cancel: 2606 out_trans_cancel:
2606 xfs_trans_cancel(tp); 2607 xfs_trans_cancel(tp);
2607 std_return: 2608 std_return:
@@ -2662,7 +2663,7 @@ xfs_sort_for_rename(
2662static int 2663static int
2663xfs_finish_rename( 2664xfs_finish_rename(
2664 struct xfs_trans *tp, 2665 struct xfs_trans *tp,
2665 struct xfs_bmap_free *free_list) 2666 struct xfs_defer_ops *dfops)
2666{ 2667{
2667 int error; 2668 int error;
2668 2669
@@ -2673,9 +2674,9 @@ xfs_finish_rename(
2673 if (tp->t_mountp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) 2674 if (tp->t_mountp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
2674 xfs_trans_set_sync(tp); 2675 xfs_trans_set_sync(tp);
2675 2676
2676 error = xfs_bmap_finish(&tp, free_list, NULL); 2677 error = xfs_defer_finish(&tp, dfops, NULL);
2677 if (error) { 2678 if (error) {
2678 xfs_bmap_cancel(free_list); 2679 xfs_defer_cancel(dfops);
2679 xfs_trans_cancel(tp); 2680 xfs_trans_cancel(tp);
2680 return error; 2681 return error;
2681 } 2682 }
@@ -2697,7 +2698,7 @@ xfs_cross_rename(
2697 struct xfs_inode *dp2, 2698 struct xfs_inode *dp2,
2698 struct xfs_name *name2, 2699 struct xfs_name *name2,
2699 struct xfs_inode *ip2, 2700 struct xfs_inode *ip2,
2700 struct xfs_bmap_free *free_list, 2701 struct xfs_defer_ops *dfops,
2701 xfs_fsblock_t *first_block, 2702 xfs_fsblock_t *first_block,
2702 int spaceres) 2703 int spaceres)
2703{ 2704{
@@ -2709,14 +2710,14 @@ xfs_cross_rename(
2709 /* Swap inode number for dirent in first parent */ 2710 /* Swap inode number for dirent in first parent */
2710 error = xfs_dir_replace(tp, dp1, name1, 2711 error = xfs_dir_replace(tp, dp1, name1,
2711 ip2->i_ino, 2712 ip2->i_ino,
2712 first_block, free_list, spaceres); 2713 first_block, dfops, spaceres);
2713 if (error) 2714 if (error)
2714 goto out_trans_abort; 2715 goto out_trans_abort;
2715 2716
2716 /* Swap inode number for dirent in second parent */ 2717 /* Swap inode number for dirent in second parent */
2717 error = xfs_dir_replace(tp, dp2, name2, 2718 error = xfs_dir_replace(tp, dp2, name2,
2718 ip1->i_ino, 2719 ip1->i_ino,
2719 first_block, free_list, spaceres); 2720 first_block, dfops, spaceres);
2720 if (error) 2721 if (error)
2721 goto out_trans_abort; 2722 goto out_trans_abort;
2722 2723
@@ -2731,7 +2732,7 @@ xfs_cross_rename(
2731 if (S_ISDIR(VFS_I(ip2)->i_mode)) { 2732 if (S_ISDIR(VFS_I(ip2)->i_mode)) {
2732 error = xfs_dir_replace(tp, ip2, &xfs_name_dotdot, 2733 error = xfs_dir_replace(tp, ip2, &xfs_name_dotdot,
2733 dp1->i_ino, first_block, 2734 dp1->i_ino, first_block,
2734 free_list, spaceres); 2735 dfops, spaceres);
2735 if (error) 2736 if (error)
2736 goto out_trans_abort; 2737 goto out_trans_abort;
2737 2738
@@ -2758,7 +2759,7 @@ xfs_cross_rename(
2758 if (S_ISDIR(VFS_I(ip1)->i_mode)) { 2759 if (S_ISDIR(VFS_I(ip1)->i_mode)) {
2759 error = xfs_dir_replace(tp, ip1, &xfs_name_dotdot, 2760 error = xfs_dir_replace(tp, ip1, &xfs_name_dotdot,
2760 dp2->i_ino, first_block, 2761 dp2->i_ino, first_block,
2761 free_list, spaceres); 2762 dfops, spaceres);
2762 if (error) 2763 if (error)
2763 goto out_trans_abort; 2764 goto out_trans_abort;
2764 2765
@@ -2797,10 +2798,10 @@ xfs_cross_rename(
2797 } 2798 }
2798 xfs_trans_ichgtime(tp, dp1, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2799 xfs_trans_ichgtime(tp, dp1, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
2799 xfs_trans_log_inode(tp, dp1, XFS_ILOG_CORE); 2800 xfs_trans_log_inode(tp, dp1, XFS_ILOG_CORE);
2800 return xfs_finish_rename(tp, free_list); 2801 return xfs_finish_rename(tp, dfops);
2801 2802
2802out_trans_abort: 2803out_trans_abort:
2803 xfs_bmap_cancel(free_list); 2804 xfs_defer_cancel(dfops);
2804 xfs_trans_cancel(tp); 2805 xfs_trans_cancel(tp);
2805 return error; 2806 return error;
2806} 2807}
@@ -2855,7 +2856,7 @@ xfs_rename(
2855{ 2856{
2856 struct xfs_mount *mp = src_dp->i_mount; 2857 struct xfs_mount *mp = src_dp->i_mount;
2857 struct xfs_trans *tp; 2858 struct xfs_trans *tp;
2858 struct xfs_bmap_free free_list; 2859 struct xfs_defer_ops dfops;
2859 xfs_fsblock_t first_block; 2860 xfs_fsblock_t first_block;
2860 struct xfs_inode *wip = NULL; /* whiteout inode */ 2861 struct xfs_inode *wip = NULL; /* whiteout inode */
2861 struct xfs_inode *inodes[__XFS_SORT_INODES]; 2862 struct xfs_inode *inodes[__XFS_SORT_INODES];
@@ -2944,13 +2945,13 @@ xfs_rename(
2944 goto out_trans_cancel; 2945 goto out_trans_cancel;
2945 } 2946 }
2946 2947
2947 xfs_bmap_init(&free_list, &first_block); 2948 xfs_defer_init(&dfops, &first_block);
2948 2949
2949 /* RENAME_EXCHANGE is unique from here on. */ 2950 /* RENAME_EXCHANGE is unique from here on. */
2950 if (flags & RENAME_EXCHANGE) 2951 if (flags & RENAME_EXCHANGE)
2951 return xfs_cross_rename(tp, src_dp, src_name, src_ip, 2952 return xfs_cross_rename(tp, src_dp, src_name, src_ip,
2952 target_dp, target_name, target_ip, 2953 target_dp, target_name, target_ip,
2953 &free_list, &first_block, spaceres); 2954 &dfops, &first_block, spaceres);
2954 2955
2955 /* 2956 /*
2956 * Set up the target. 2957 * Set up the target.
@@ -2972,7 +2973,7 @@ xfs_rename(
2972 */ 2973 */
2973 error = xfs_dir_createname(tp, target_dp, target_name, 2974 error = xfs_dir_createname(tp, target_dp, target_name,
2974 src_ip->i_ino, &first_block, 2975 src_ip->i_ino, &first_block,
2975 &free_list, spaceres); 2976 &dfops, spaceres);
2976 if (error) 2977 if (error)
2977 goto out_bmap_cancel; 2978 goto out_bmap_cancel;
2978 2979
@@ -3012,7 +3013,7 @@ xfs_rename(
3012 */ 3013 */
3013 error = xfs_dir_replace(tp, target_dp, target_name, 3014 error = xfs_dir_replace(tp, target_dp, target_name,
3014 src_ip->i_ino, 3015 src_ip->i_ino,
3015 &first_block, &free_list, spaceres); 3016 &first_block, &dfops, spaceres);
3016 if (error) 3017 if (error)
3017 goto out_bmap_cancel; 3018 goto out_bmap_cancel;
3018 3019
@@ -3047,7 +3048,7 @@ xfs_rename(
3047 */ 3048 */
3048 error = xfs_dir_replace(tp, src_ip, &xfs_name_dotdot, 3049 error = xfs_dir_replace(tp, src_ip, &xfs_name_dotdot,
3049 target_dp->i_ino, 3050 target_dp->i_ino,
3050 &first_block, &free_list, spaceres); 3051 &first_block, &dfops, spaceres);
3051 ASSERT(error != -EEXIST); 3052 ASSERT(error != -EEXIST);
3052 if (error) 3053 if (error)
3053 goto out_bmap_cancel; 3054 goto out_bmap_cancel;
@@ -3086,10 +3087,10 @@ xfs_rename(
3086 */ 3087 */
3087 if (wip) { 3088 if (wip) {
3088 error = xfs_dir_replace(tp, src_dp, src_name, wip->i_ino, 3089 error = xfs_dir_replace(tp, src_dp, src_name, wip->i_ino,
3089 &first_block, &free_list, spaceres); 3090 &first_block, &dfops, spaceres);
3090 } else 3091 } else
3091 error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino, 3092 error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino,
3092 &first_block, &free_list, spaceres); 3093 &first_block, &dfops, spaceres);
3093 if (error) 3094 if (error)
3094 goto out_bmap_cancel; 3095 goto out_bmap_cancel;
3095 3096
@@ -3124,13 +3125,13 @@ xfs_rename(
3124 if (new_parent) 3125 if (new_parent)
3125 xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE); 3126 xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE);
3126 3127
3127 error = xfs_finish_rename(tp, &free_list); 3128 error = xfs_finish_rename(tp, &dfops);
3128 if (wip) 3129 if (wip)
3129 IRELE(wip); 3130 IRELE(wip);
3130 return error; 3131 return error;
3131 3132
3132out_bmap_cancel: 3133out_bmap_cancel:
3133 xfs_bmap_cancel(&free_list); 3134 xfs_defer_cancel(&dfops);
3134out_trans_cancel: 3135out_trans_cancel:
3135 xfs_trans_cancel(tp); 3136 xfs_trans_cancel(tp);
3136out_release_wip: 3137out_release_wip:
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 8eb78ec4a6e2..e1a411e08f00 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -27,7 +27,7 @@
27struct xfs_dinode; 27struct xfs_dinode;
28struct xfs_inode; 28struct xfs_inode;
29struct xfs_buf; 29struct xfs_buf;
30struct xfs_bmap_free; 30struct xfs_defer_ops;
31struct xfs_bmbt_irec; 31struct xfs_bmbt_irec;
32struct xfs_inode_log_item; 32struct xfs_inode_log_item;
33struct xfs_mount; 33struct xfs_mount;
@@ -398,7 +398,7 @@ uint xfs_ilock_attr_map_shared(struct xfs_inode *);
398 398
399uint xfs_ip2xflags(struct xfs_inode *); 399uint xfs_ip2xflags(struct xfs_inode *);
400int xfs_ifree(struct xfs_trans *, xfs_inode_t *, 400int xfs_ifree(struct xfs_trans *, xfs_inode_t *,
401 struct xfs_bmap_free *); 401 struct xfs_defer_ops *);
402int xfs_itruncate_extents(struct xfs_trans **, struct xfs_inode *, 402int xfs_itruncate_extents(struct xfs_trans **, struct xfs_inode *,
403 int, xfs_fsize_t); 403 int, xfs_fsize_t);
404void xfs_iext_realloc(xfs_inode_t *, int, int); 404void xfs_iext_realloc(xfs_inode_t *, int, int);
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 9a7c87809d3b..cf46658392ce 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -387,6 +387,7 @@ xfs_attrlist_by_handle(
387{ 387{
388 int error = -ENOMEM; 388 int error = -ENOMEM;
389 attrlist_cursor_kern_t *cursor; 389 attrlist_cursor_kern_t *cursor;
390 struct xfs_fsop_attrlist_handlereq __user *p = arg;
390 xfs_fsop_attrlist_handlereq_t al_hreq; 391 xfs_fsop_attrlist_handlereq_t al_hreq;
391 struct dentry *dentry; 392 struct dentry *dentry;
392 char *kbuf; 393 char *kbuf;
@@ -419,6 +420,11 @@ xfs_attrlist_by_handle(
419 if (error) 420 if (error)
420 goto out_kfree; 421 goto out_kfree;
421 422
423 if (copy_to_user(&p->pos, cursor, sizeof(attrlist_cursor_kern_t))) {
424 error = -EFAULT;
425 goto out_kfree;
426 }
427
422 if (copy_to_user(al_hreq.buffer, kbuf, al_hreq.buflen)) 428 if (copy_to_user(al_hreq.buffer, kbuf, al_hreq.buflen))
423 error = -EFAULT; 429 error = -EFAULT;
424 430
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 620fc9120444..2114d53df433 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -23,6 +23,7 @@
23#include "xfs_log_format.h" 23#include "xfs_log_format.h"
24#include "xfs_trans_resv.h" 24#include "xfs_trans_resv.h"
25#include "xfs_mount.h" 25#include "xfs_mount.h"
26#include "xfs_defer.h"
26#include "xfs_inode.h" 27#include "xfs_inode.h"
27#include "xfs_btree.h" 28#include "xfs_btree.h"
28#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
@@ -128,7 +129,7 @@ xfs_iomap_write_direct(
128 int quota_flag; 129 int quota_flag;
129 int rt; 130 int rt;
130 xfs_trans_t *tp; 131 xfs_trans_t *tp;
131 xfs_bmap_free_t free_list; 132 struct xfs_defer_ops dfops;
132 uint qblocks, resblks, resrtextents; 133 uint qblocks, resblks, resrtextents;
133 int error; 134 int error;
134 int lockmode; 135 int lockmode;
@@ -231,18 +232,18 @@ xfs_iomap_write_direct(
231 * From this point onwards we overwrite the imap pointer that the 232 * From this point onwards we overwrite the imap pointer that the
232 * caller gave to us. 233 * caller gave to us.
233 */ 234 */
234 xfs_bmap_init(&free_list, &firstfsb); 235 xfs_defer_init(&dfops, &firstfsb);
235 nimaps = 1; 236 nimaps = 1;
236 error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, 237 error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
237 bmapi_flags, &firstfsb, resblks, imap, 238 bmapi_flags, &firstfsb, resblks, imap,
238 &nimaps, &free_list); 239 &nimaps, &dfops);
239 if (error) 240 if (error)
240 goto out_bmap_cancel; 241 goto out_bmap_cancel;
241 242
242 /* 243 /*
243 * Complete the transaction 244 * Complete the transaction
244 */ 245 */
245 error = xfs_bmap_finish(&tp, &free_list, NULL); 246 error = xfs_defer_finish(&tp, &dfops, NULL);
246 if (error) 247 if (error)
247 goto out_bmap_cancel; 248 goto out_bmap_cancel;
248 249
@@ -266,7 +267,7 @@ out_unlock:
266 return error; 267 return error;
267 268
268out_bmap_cancel: 269out_bmap_cancel:
269 xfs_bmap_cancel(&free_list); 270 xfs_defer_cancel(&dfops);
270 xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag); 271 xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag);
271out_trans_cancel: 272out_trans_cancel:
272 xfs_trans_cancel(tp); 273 xfs_trans_cancel(tp);
@@ -685,7 +686,7 @@ xfs_iomap_write_allocate(
685 xfs_fileoff_t offset_fsb, last_block; 686 xfs_fileoff_t offset_fsb, last_block;
686 xfs_fileoff_t end_fsb, map_start_fsb; 687 xfs_fileoff_t end_fsb, map_start_fsb;
687 xfs_fsblock_t first_block; 688 xfs_fsblock_t first_block;
688 xfs_bmap_free_t free_list; 689 struct xfs_defer_ops dfops;
689 xfs_filblks_t count_fsb; 690 xfs_filblks_t count_fsb;
690 xfs_trans_t *tp; 691 xfs_trans_t *tp;
691 int nimaps; 692 int nimaps;
@@ -727,7 +728,7 @@ xfs_iomap_write_allocate(
727 xfs_ilock(ip, XFS_ILOCK_EXCL); 728 xfs_ilock(ip, XFS_ILOCK_EXCL);
728 xfs_trans_ijoin(tp, ip, 0); 729 xfs_trans_ijoin(tp, ip, 0);
729 730
730 xfs_bmap_init(&free_list, &first_block); 731 xfs_defer_init(&dfops, &first_block);
731 732
732 /* 733 /*
733 * it is possible that the extents have changed since 734 * it is possible that the extents have changed since
@@ -783,11 +784,11 @@ xfs_iomap_write_allocate(
783 error = xfs_bmapi_write(tp, ip, map_start_fsb, 784 error = xfs_bmapi_write(tp, ip, map_start_fsb,
784 count_fsb, 0, &first_block, 785 count_fsb, 0, &first_block,
785 nres, imap, &nimaps, 786 nres, imap, &nimaps,
786 &free_list); 787 &dfops);
787 if (error) 788 if (error)
788 goto trans_cancel; 789 goto trans_cancel;
789 790
790 error = xfs_bmap_finish(&tp, &free_list, NULL); 791 error = xfs_defer_finish(&tp, &dfops, NULL);
791 if (error) 792 if (error)
792 goto trans_cancel; 793 goto trans_cancel;
793 794
@@ -821,7 +822,7 @@ xfs_iomap_write_allocate(
821 } 822 }
822 823
823trans_cancel: 824trans_cancel:
824 xfs_bmap_cancel(&free_list); 825 xfs_defer_cancel(&dfops);
825 xfs_trans_cancel(tp); 826 xfs_trans_cancel(tp);
826error0: 827error0:
827 xfs_iunlock(ip, XFS_ILOCK_EXCL); 828 xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -842,7 +843,7 @@ xfs_iomap_write_unwritten(
842 int nimaps; 843 int nimaps;
843 xfs_trans_t *tp; 844 xfs_trans_t *tp;
844 xfs_bmbt_irec_t imap; 845 xfs_bmbt_irec_t imap;
845 xfs_bmap_free_t free_list; 846 struct xfs_defer_ops dfops;
846 xfs_fsize_t i_size; 847 xfs_fsize_t i_size;
847 uint resblks; 848 uint resblks;
848 int error; 849 int error;
@@ -886,11 +887,11 @@ xfs_iomap_write_unwritten(
886 /* 887 /*
887 * Modify the unwritten extent state of the buffer. 888 * Modify the unwritten extent state of the buffer.
888 */ 889 */
889 xfs_bmap_init(&free_list, &firstfsb); 890 xfs_defer_init(&dfops, &firstfsb);
890 nimaps = 1; 891 nimaps = 1;
891 error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, 892 error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
892 XFS_BMAPI_CONVERT, &firstfsb, resblks, 893 XFS_BMAPI_CONVERT, &firstfsb, resblks,
893 &imap, &nimaps, &free_list); 894 &imap, &nimaps, &dfops);
894 if (error) 895 if (error)
895 goto error_on_bmapi_transaction; 896 goto error_on_bmapi_transaction;
896 897
@@ -909,7 +910,7 @@ xfs_iomap_write_unwritten(
909 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 910 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
910 } 911 }
911 912
912 error = xfs_bmap_finish(&tp, &free_list, NULL); 913 error = xfs_defer_finish(&tp, &dfops, NULL);
913 if (error) 914 if (error)
914 goto error_on_bmapi_transaction; 915 goto error_on_bmapi_transaction;
915 916
@@ -936,7 +937,7 @@ xfs_iomap_write_unwritten(
936 return 0; 937 return 0;
937 938
938error_on_bmapi_transaction: 939error_on_bmapi_transaction:
939 xfs_bmap_cancel(&free_list); 940 xfs_defer_cancel(&dfops);
940 xfs_trans_cancel(tp); 941 xfs_trans_cancel(tp);
941 xfs_iunlock(ip, XFS_ILOCK_EXCL); 942 xfs_iunlock(ip, XFS_ILOCK_EXCL);
942 return error; 943 return error;
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 835997843846..e8638fd2c0c3 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -43,6 +43,7 @@
43#include "xfs_bmap_btree.h" 43#include "xfs_bmap_btree.h"
44#include "xfs_error.h" 44#include "xfs_error.h"
45#include "xfs_dir2.h" 45#include "xfs_dir2.h"
46#include "xfs_rmap_item.h"
46 47
47#define BLK_AVG(blk1, blk2) ((blk1+blk2) >> 1) 48#define BLK_AVG(blk1, blk2) ((blk1+blk2) >> 1)
48 49
@@ -1911,6 +1912,8 @@ xlog_recover_reorder_trans(
1911 case XFS_LI_QUOTAOFF: 1912 case XFS_LI_QUOTAOFF:
1912 case XFS_LI_EFD: 1913 case XFS_LI_EFD:
1913 case XFS_LI_EFI: 1914 case XFS_LI_EFI:
1915 case XFS_LI_RUI:
1916 case XFS_LI_RUD:
1914 trace_xfs_log_recover_item_reorder_tail(log, 1917 trace_xfs_log_recover_item_reorder_tail(log,
1915 trans, item, pass); 1918 trans, item, pass);
1916 list_move_tail(&item->ri_list, &inode_list); 1919 list_move_tail(&item->ri_list, &inode_list);
@@ -2228,6 +2231,7 @@ xlog_recover_get_buf_lsn(
2228 case XFS_ABTC_CRC_MAGIC: 2231 case XFS_ABTC_CRC_MAGIC:
2229 case XFS_ABTB_MAGIC: 2232 case XFS_ABTB_MAGIC:
2230 case XFS_ABTC_MAGIC: 2233 case XFS_ABTC_MAGIC:
2234 case XFS_RMAP_CRC_MAGIC:
2231 case XFS_IBT_CRC_MAGIC: 2235 case XFS_IBT_CRC_MAGIC:
2232 case XFS_IBT_MAGIC: { 2236 case XFS_IBT_MAGIC: {
2233 struct xfs_btree_block *btb = blk; 2237 struct xfs_btree_block *btb = blk;
@@ -2396,6 +2400,9 @@ xlog_recover_validate_buf_type(
2396 case XFS_BMAP_MAGIC: 2400 case XFS_BMAP_MAGIC:
2397 bp->b_ops = &xfs_bmbt_buf_ops; 2401 bp->b_ops = &xfs_bmbt_buf_ops;
2398 break; 2402 break;
2403 case XFS_RMAP_CRC_MAGIC:
2404 bp->b_ops = &xfs_rmapbt_buf_ops;
2405 break;
2399 default: 2406 default:
2400 xfs_warn(mp, "Bad btree block magic!"); 2407 xfs_warn(mp, "Bad btree block magic!");
2401 ASSERT(0); 2408 ASSERT(0);
@@ -3415,6 +3422,99 @@ xlog_recover_efd_pass2(
3415} 3422}
3416 3423
3417/* 3424/*
3425 * This routine is called to create an in-core extent rmap update
3426 * item from the rui format structure which was logged on disk.
3427 * It allocates an in-core rui, copies the extents from the format
3428 * structure into it, and adds the rui to the AIL with the given
3429 * LSN.
3430 */
3431STATIC int
3432xlog_recover_rui_pass2(
3433 struct xlog *log,
3434 struct xlog_recover_item *item,
3435 xfs_lsn_t lsn)
3436{
3437 int error;
3438 struct xfs_mount *mp = log->l_mp;
3439 struct xfs_rui_log_item *ruip;
3440 struct xfs_rui_log_format *rui_formatp;
3441
3442 rui_formatp = item->ri_buf[0].i_addr;
3443
3444 ruip = xfs_rui_init(mp, rui_formatp->rui_nextents);
3445 error = xfs_rui_copy_format(&item->ri_buf[0], &ruip->rui_format);
3446 if (error) {
3447 xfs_rui_item_free(ruip);
3448 return error;
3449 }
3450 atomic_set(&ruip->rui_next_extent, rui_formatp->rui_nextents);
3451
3452 spin_lock(&log->l_ailp->xa_lock);
3453 /*
3454 * The RUI has two references. One for the RUD and one for RUI to ensure
3455 * it makes it into the AIL. Insert the RUI into the AIL directly and
3456 * drop the RUI reference. Note that xfs_trans_ail_update() drops the
3457 * AIL lock.
3458 */
3459 xfs_trans_ail_update(log->l_ailp, &ruip->rui_item, lsn);
3460 xfs_rui_release(ruip);
3461 return 0;
3462}
3463
3464
3465/*
3466 * This routine is called when an RUD format structure is found in a committed
3467 * transaction in the log. Its purpose is to cancel the corresponding RUI if it
3468 * was still in the log. To do this it searches the AIL for the RUI with an id
3469 * equal to that in the RUD format structure. If we find it we drop the RUD
3470 * reference, which removes the RUI from the AIL and frees it.
3471 */
3472STATIC int
3473xlog_recover_rud_pass2(
3474 struct xlog *log,
3475 struct xlog_recover_item *item)
3476{
3477 struct xfs_rud_log_format *rud_formatp;
3478 struct xfs_rui_log_item *ruip = NULL;
3479 struct xfs_log_item *lip;
3480 __uint64_t rui_id;
3481 struct xfs_ail_cursor cur;
3482 struct xfs_ail *ailp = log->l_ailp;
3483
3484 rud_formatp = item->ri_buf[0].i_addr;
3485 ASSERT(item->ri_buf[0].i_len == sizeof(struct xfs_rud_log_format));
3486 rui_id = rud_formatp->rud_rui_id;
3487
3488 /*
3489 * Search for the RUI with the id in the RUD format structure in the
3490 * AIL.
3491 */
3492 spin_lock(&ailp->xa_lock);
3493 lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
3494 while (lip != NULL) {
3495 if (lip->li_type == XFS_LI_RUI) {
3496 ruip = (struct xfs_rui_log_item *)lip;
3497 if (ruip->rui_format.rui_id == rui_id) {
3498 /*
3499 * Drop the RUD reference to the RUI. This
3500 * removes the RUI from the AIL and frees it.
3501 */
3502 spin_unlock(&ailp->xa_lock);
3503 xfs_rui_release(ruip);
3504 spin_lock(&ailp->xa_lock);
3505 break;
3506 }
3507 }
3508 lip = xfs_trans_ail_cursor_next(ailp, &cur);
3509 }
3510
3511 xfs_trans_ail_cursor_done(&cur);
3512 spin_unlock(&ailp->xa_lock);
3513
3514 return 0;
3515}
3516
3517/*
3418 * This routine is called when an inode create format structure is found in a 3518 * This routine is called when an inode create format structure is found in a
3419 * committed transaction in the log. It's purpose is to initialise the inodes 3519 * committed transaction in the log. It's purpose is to initialise the inodes
3420 * being allocated on disk. This requires us to get inode cluster buffers that 3520 * being allocated on disk. This requires us to get inode cluster buffers that
@@ -3639,6 +3739,8 @@ xlog_recover_ra_pass2(
3639 case XFS_LI_EFI: 3739 case XFS_LI_EFI:
3640 case XFS_LI_EFD: 3740 case XFS_LI_EFD:
3641 case XFS_LI_QUOTAOFF: 3741 case XFS_LI_QUOTAOFF:
3742 case XFS_LI_RUI:
3743 case XFS_LI_RUD:
3642 default: 3744 default:
3643 break; 3745 break;
3644 } 3746 }
@@ -3662,6 +3764,8 @@ xlog_recover_commit_pass1(
3662 case XFS_LI_EFD: 3764 case XFS_LI_EFD:
3663 case XFS_LI_DQUOT: 3765 case XFS_LI_DQUOT:
3664 case XFS_LI_ICREATE: 3766 case XFS_LI_ICREATE:
3767 case XFS_LI_RUI:
3768 case XFS_LI_RUD:
3665 /* nothing to do in pass 1 */ 3769 /* nothing to do in pass 1 */
3666 return 0; 3770 return 0;
3667 default: 3771 default:
@@ -3692,6 +3796,10 @@ xlog_recover_commit_pass2(
3692 return xlog_recover_efi_pass2(log, item, trans->r_lsn); 3796 return xlog_recover_efi_pass2(log, item, trans->r_lsn);
3693 case XFS_LI_EFD: 3797 case XFS_LI_EFD:
3694 return xlog_recover_efd_pass2(log, item); 3798 return xlog_recover_efd_pass2(log, item);
3799 case XFS_LI_RUI:
3800 return xlog_recover_rui_pass2(log, item, trans->r_lsn);
3801 case XFS_LI_RUD:
3802 return xlog_recover_rud_pass2(log, item);
3695 case XFS_LI_DQUOT: 3803 case XFS_LI_DQUOT:
3696 return xlog_recover_dquot_pass2(log, buffer_list, item, 3804 return xlog_recover_dquot_pass2(log, buffer_list, item,
3697 trans->r_lsn); 3805 trans->r_lsn);
@@ -4164,126 +4272,156 @@ xlog_recover_process_data(
4164 return 0; 4272 return 0;
4165} 4273}
4166 4274
4167/* 4275/* Recover the EFI if necessary. */
4168 * Process an extent free intent item that was recovered from
4169 * the log. We need to free the extents that it describes.
4170 */
4171STATIC int 4276STATIC int
4172xlog_recover_process_efi( 4277xlog_recover_process_efi(
4173 xfs_mount_t *mp, 4278 struct xfs_mount *mp,
4174 xfs_efi_log_item_t *efip) 4279 struct xfs_ail *ailp,
4280 struct xfs_log_item *lip)
4175{ 4281{
4176 xfs_efd_log_item_t *efdp; 4282 struct xfs_efi_log_item *efip;
4177 xfs_trans_t *tp; 4283 int error;
4178 int i;
4179 int error = 0;
4180 xfs_extent_t *extp;
4181 xfs_fsblock_t startblock_fsb;
4182
4183 ASSERT(!test_bit(XFS_EFI_RECOVERED, &efip->efi_flags));
4184 4284
4185 /* 4285 /*
4186 * First check the validity of the extents described by the 4286 * Skip EFIs that we've already processed.
4187 * EFI. If any are bad, then assume that all are bad and
4188 * just toss the EFI.
4189 */ 4287 */
4190 for (i = 0; i < efip->efi_format.efi_nextents; i++) { 4288 efip = container_of(lip, struct xfs_efi_log_item, efi_item);
4191 extp = &(efip->efi_format.efi_extents[i]); 4289 if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags))
4192 startblock_fsb = XFS_BB_TO_FSB(mp, 4290 return 0;
4193 XFS_FSB_TO_DADDR(mp, extp->ext_start));
4194 if ((startblock_fsb == 0) ||
4195 (extp->ext_len == 0) ||
4196 (startblock_fsb >= mp->m_sb.sb_dblocks) ||
4197 (extp->ext_len >= mp->m_sb.sb_agblocks)) {
4198 /*
4199 * This will pull the EFI from the AIL and
4200 * free the memory associated with it.
4201 */
4202 set_bit(XFS_EFI_RECOVERED, &efip->efi_flags);
4203 xfs_efi_release(efip);
4204 return -EIO;
4205 }
4206 }
4207 4291
4208 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); 4292 spin_unlock(&ailp->xa_lock);
4209 if (error) 4293 error = xfs_efi_recover(mp, efip);
4210 return error; 4294 spin_lock(&ailp->xa_lock);
4211 efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents);
4212 4295
4213 for (i = 0; i < efip->efi_format.efi_nextents; i++) { 4296 return error;
4214 extp = &(efip->efi_format.efi_extents[i]); 4297}
4215 error = xfs_trans_free_extent(tp, efdp, extp->ext_start,
4216 extp->ext_len);
4217 if (error)
4218 goto abort_error;
4219 4298
4220 } 4299/* Release the EFI since we're cancelling everything. */
4300STATIC void
4301xlog_recover_cancel_efi(
4302 struct xfs_mount *mp,
4303 struct xfs_ail *ailp,
4304 struct xfs_log_item *lip)
4305{
4306 struct xfs_efi_log_item *efip;
4221 4307
4222 set_bit(XFS_EFI_RECOVERED, &efip->efi_flags); 4308 efip = container_of(lip, struct xfs_efi_log_item, efi_item);
4223 error = xfs_trans_commit(tp); 4309
4224 return error; 4310 spin_unlock(&ailp->xa_lock);
4311 xfs_efi_release(efip);
4312 spin_lock(&ailp->xa_lock);
4313}
4314
4315/* Recover the RUI if necessary. */
4316STATIC int
4317xlog_recover_process_rui(
4318 struct xfs_mount *mp,
4319 struct xfs_ail *ailp,
4320 struct xfs_log_item *lip)
4321{
4322 struct xfs_rui_log_item *ruip;
4323 int error;
4324
4325 /*
4326 * Skip RUIs that we've already processed.
4327 */
4328 ruip = container_of(lip, struct xfs_rui_log_item, rui_item);
4329 if (test_bit(XFS_RUI_RECOVERED, &ruip->rui_flags))
4330 return 0;
4331
4332 spin_unlock(&ailp->xa_lock);
4333 error = xfs_rui_recover(mp, ruip);
4334 spin_lock(&ailp->xa_lock);
4225 4335
4226abort_error:
4227 xfs_trans_cancel(tp);
4228 return error; 4336 return error;
4229} 4337}
4230 4338
4339/* Release the RUI since we're cancelling everything. */
4340STATIC void
4341xlog_recover_cancel_rui(
4342 struct xfs_mount *mp,
4343 struct xfs_ail *ailp,
4344 struct xfs_log_item *lip)
4345{
4346 struct xfs_rui_log_item *ruip;
4347
4348 ruip = container_of(lip, struct xfs_rui_log_item, rui_item);
4349
4350 spin_unlock(&ailp->xa_lock);
4351 xfs_rui_release(ruip);
4352 spin_lock(&ailp->xa_lock);
4353}
4354
4355/* Is this log item a deferred action intent? */
4356static inline bool xlog_item_is_intent(struct xfs_log_item *lip)
4357{
4358 switch (lip->li_type) {
4359 case XFS_LI_EFI:
4360 case XFS_LI_RUI:
4361 return true;
4362 default:
4363 return false;
4364 }
4365}
4366
4231/* 4367/*
4232 * When this is called, all of the EFIs which did not have 4368 * When this is called, all of the log intent items which did not have
4233 * corresponding EFDs should be in the AIL. What we do now 4369 * corresponding log done items should be in the AIL. What we do now
4234 * is free the extents associated with each one. 4370 * is update the data structures associated with each one.
4235 * 4371 *
4236 * Since we process the EFIs in normal transactions, they 4372 * Since we process the log intent items in normal transactions, they
4237 * will be removed at some point after the commit. This prevents 4373 * will be removed at some point after the commit. This prevents us
4238 * us from just walking down the list processing each one. 4374 * from just walking down the list processing each one. We'll use a
4239 * We'll use a flag in the EFI to skip those that we've already 4375 * flag in the intent item to skip those that we've already processed
4240 * processed and use the AIL iteration mechanism's generation 4376 * and use the AIL iteration mechanism's generation count to try to
4241 * count to try to speed this up at least a bit. 4377 * speed this up at least a bit.
4242 * 4378 *
4243 * When we start, we know that the EFIs are the only things in 4379 * When we start, we know that the intents are the only things in the
4244 * the AIL. As we process them, however, other items are added 4380 * AIL. As we process them, however, other items are added to the
4245 * to the AIL. Since everything added to the AIL must come after 4381 * AIL.
4246 * everything already in the AIL, we stop processing as soon as
4247 * we see something other than an EFI in the AIL.
4248 */ 4382 */
4249STATIC int 4383STATIC int
4250xlog_recover_process_efis( 4384xlog_recover_process_intents(
4251 struct xlog *log) 4385 struct xlog *log)
4252{ 4386{
4253 struct xfs_log_item *lip; 4387 struct xfs_log_item *lip;
4254 struct xfs_efi_log_item *efip;
4255 int error = 0; 4388 int error = 0;
4256 struct xfs_ail_cursor cur; 4389 struct xfs_ail_cursor cur;
4257 struct xfs_ail *ailp; 4390 struct xfs_ail *ailp;
4391 xfs_lsn_t last_lsn;
4258 4392
4259 ailp = log->l_ailp; 4393 ailp = log->l_ailp;
4260 spin_lock(&ailp->xa_lock); 4394 spin_lock(&ailp->xa_lock);
4261 lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); 4395 lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
4396 last_lsn = xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block);
4262 while (lip != NULL) { 4397 while (lip != NULL) {
4263 /* 4398 /*
4264 * We're done when we see something other than an EFI. 4399 * We're done when we see something other than an intent.
4265 * There should be no EFIs left in the AIL now. 4400 * There should be no intents left in the AIL now.
4266 */ 4401 */
4267 if (lip->li_type != XFS_LI_EFI) { 4402 if (!xlog_item_is_intent(lip)) {
4268#ifdef DEBUG 4403#ifdef DEBUG
4269 for (; lip; lip = xfs_trans_ail_cursor_next(ailp, &cur)) 4404 for (; lip; lip = xfs_trans_ail_cursor_next(ailp, &cur))
4270 ASSERT(lip->li_type != XFS_LI_EFI); 4405 ASSERT(!xlog_item_is_intent(lip));
4271#endif 4406#endif
4272 break; 4407 break;
4273 } 4408 }
4274 4409
4275 /* 4410 /*
4276 * Skip EFIs that we've already processed. 4411 * We should never see a redo item with a LSN higher than
4412 * the last transaction we found in the log at the start
4413 * of recovery.
4277 */ 4414 */
4278 efip = container_of(lip, struct xfs_efi_log_item, efi_item); 4415 ASSERT(XFS_LSN_CMP(last_lsn, lip->li_lsn) >= 0);
4279 if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags)) {
4280 lip = xfs_trans_ail_cursor_next(ailp, &cur);
4281 continue;
4282 }
4283 4416
4284 spin_unlock(&ailp->xa_lock); 4417 switch (lip->li_type) {
4285 error = xlog_recover_process_efi(log->l_mp, efip); 4418 case XFS_LI_EFI:
4286 spin_lock(&ailp->xa_lock); 4419 error = xlog_recover_process_efi(log->l_mp, ailp, lip);
4420 break;
4421 case XFS_LI_RUI:
4422 error = xlog_recover_process_rui(log->l_mp, ailp, lip);
4423 break;
4424 }
4287 if (error) 4425 if (error)
4288 goto out; 4426 goto out;
4289 lip = xfs_trans_ail_cursor_next(ailp, &cur); 4427 lip = xfs_trans_ail_cursor_next(ailp, &cur);
@@ -4295,15 +4433,14 @@ out:
4295} 4433}
4296 4434
4297/* 4435/*
4298 * A cancel occurs when the mount has failed and we're bailing out. Release all 4436 * A cancel occurs when the mount has failed and we're bailing out.
4299 * pending EFIs so they don't pin the AIL. 4437 * Release all pending log intent items so they don't pin the AIL.
4300 */ 4438 */
4301STATIC int 4439STATIC int
4302xlog_recover_cancel_efis( 4440xlog_recover_cancel_intents(
4303 struct xlog *log) 4441 struct xlog *log)
4304{ 4442{
4305 struct xfs_log_item *lip; 4443 struct xfs_log_item *lip;
4306 struct xfs_efi_log_item *efip;
4307 int error = 0; 4444 int error = 0;
4308 struct xfs_ail_cursor cur; 4445 struct xfs_ail_cursor cur;
4309 struct xfs_ail *ailp; 4446 struct xfs_ail *ailp;
@@ -4313,22 +4450,25 @@ xlog_recover_cancel_efis(
4313 lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); 4450 lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
4314 while (lip != NULL) { 4451 while (lip != NULL) {
4315 /* 4452 /*
4316 * We're done when we see something other than an EFI. 4453 * We're done when we see something other than an intent.
4317 * There should be no EFIs left in the AIL now. 4454 * There should be no intents left in the AIL now.
4318 */ 4455 */
4319 if (lip->li_type != XFS_LI_EFI) { 4456 if (!xlog_item_is_intent(lip)) {
4320#ifdef DEBUG 4457#ifdef DEBUG
4321 for (; lip; lip = xfs_trans_ail_cursor_next(ailp, &cur)) 4458 for (; lip; lip = xfs_trans_ail_cursor_next(ailp, &cur))
4322 ASSERT(lip->li_type != XFS_LI_EFI); 4459 ASSERT(!xlog_item_is_intent(lip));
4323#endif 4460#endif
4324 break; 4461 break;
4325 } 4462 }
4326 4463
4327 efip = container_of(lip, struct xfs_efi_log_item, efi_item); 4464 switch (lip->li_type) {
4328 4465 case XFS_LI_EFI:
4329 spin_unlock(&ailp->xa_lock); 4466 xlog_recover_cancel_efi(log->l_mp, ailp, lip);
4330 xfs_efi_release(efip); 4467 break;
4331 spin_lock(&ailp->xa_lock); 4468 case XFS_LI_RUI:
4469 xlog_recover_cancel_rui(log->l_mp, ailp, lip);
4470 break;
4471 }
4332 4472
4333 lip = xfs_trans_ail_cursor_next(ailp, &cur); 4473 lip = xfs_trans_ail_cursor_next(ailp, &cur);
4334 } 4474 }
@@ -5023,6 +5163,7 @@ xlog_do_recover(
5023 xfs_warn(mp, "Failed post-recovery per-ag init: %d", error); 5163 xfs_warn(mp, "Failed post-recovery per-ag init: %d", error);
5024 return error; 5164 return error;
5025 } 5165 }
5166 mp->m_alloc_set_aside = xfs_alloc_set_aside(mp);
5026 5167
5027 xlog_recover_check_summary(log); 5168 xlog_recover_check_summary(log);
5028 5169
@@ -5139,16 +5280,17 @@ xlog_recover_finish(
5139 */ 5280 */
5140 if (log->l_flags & XLOG_RECOVERY_NEEDED) { 5281 if (log->l_flags & XLOG_RECOVERY_NEEDED) {
5141 int error; 5282 int error;
5142 error = xlog_recover_process_efis(log); 5283 error = xlog_recover_process_intents(log);
5143 if (error) { 5284 if (error) {
5144 xfs_alert(log->l_mp, "Failed to recover EFIs"); 5285 xfs_alert(log->l_mp, "Failed to recover intents");
5145 return error; 5286 return error;
5146 } 5287 }
5288
5147 /* 5289 /*
5148 * Sync the log to get all the EFIs out of the AIL. 5290 * Sync the log to get all the intents out of the AIL.
5149 * This isn't absolutely necessary, but it helps in 5291 * This isn't absolutely necessary, but it helps in
5150 * case the unlink transactions would have problems 5292 * case the unlink transactions would have problems
5151 * pushing the EFIs out of the way. 5293 * pushing the intents out of the way.
5152 */ 5294 */
5153 xfs_log_force(log->l_mp, XFS_LOG_SYNC); 5295 xfs_log_force(log->l_mp, XFS_LOG_SYNC);
5154 5296
@@ -5173,7 +5315,7 @@ xlog_recover_cancel(
5173 int error = 0; 5315 int error = 0;
5174 5316
5175 if (log->l_flags & XLOG_RECOVERY_NEEDED) 5317 if (log->l_flags & XLOG_RECOVERY_NEEDED)
5176 error = xlog_recover_cancel_efis(log); 5318 error = xlog_recover_cancel_intents(log);
5177 5319
5178 return error; 5320 return error;
5179} 5321}
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 970c19ba2f56..faeead671f9f 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -24,6 +24,7 @@
24#include "xfs_bit.h" 24#include "xfs_bit.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_mount.h" 26#include "xfs_mount.h"
27#include "xfs_defer.h"
27#include "xfs_da_format.h" 28#include "xfs_da_format.h"
28#include "xfs_da_btree.h" 29#include "xfs_da_btree.h"
29#include "xfs_inode.h" 30#include "xfs_inode.h"
@@ -41,6 +42,7 @@
41#include "xfs_trace.h" 42#include "xfs_trace.h"
42#include "xfs_icache.h" 43#include "xfs_icache.h"
43#include "xfs_sysfs.h" 44#include "xfs_sysfs.h"
45#include "xfs_rmap_btree.h"
44 46
45 47
46static DEFINE_MUTEX(xfs_uuid_table_mutex); 48static DEFINE_MUTEX(xfs_uuid_table_mutex);
@@ -230,6 +232,8 @@ xfs_initialize_perag(
230 232
231 if (maxagi) 233 if (maxagi)
232 *maxagi = index; 234 *maxagi = index;
235
236 mp->m_ag_prealloc_blocks = xfs_prealloc_blocks(mp);
233 return 0; 237 return 0;
234 238
235out_unwind: 239out_unwind:
@@ -679,6 +683,7 @@ xfs_mountfs(
679 xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK); 683 xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK);
680 xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK); 684 xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK);
681 xfs_ialloc_compute_maxlevels(mp); 685 xfs_ialloc_compute_maxlevels(mp);
686 xfs_rmapbt_compute_maxlevels(mp);
682 687
683 xfs_set_maxicount(mp); 688 xfs_set_maxicount(mp);
684 689
@@ -1216,7 +1221,7 @@ xfs_mod_fdblocks(
1216 batch = XFS_FDBLOCKS_BATCH; 1221 batch = XFS_FDBLOCKS_BATCH;
1217 1222
1218 __percpu_counter_add(&mp->m_fdblocks, delta, batch); 1223 __percpu_counter_add(&mp->m_fdblocks, delta, batch);
1219 if (__percpu_counter_compare(&mp->m_fdblocks, XFS_ALLOC_SET_ASIDE(mp), 1224 if (__percpu_counter_compare(&mp->m_fdblocks, mp->m_alloc_set_aside,
1220 XFS_FDBLOCKS_BATCH) >= 0) { 1225 XFS_FDBLOCKS_BATCH) >= 0) {
1221 /* we had space! */ 1226 /* we had space! */
1222 return 0; 1227 return 0;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index c1b798c72126..b36676cde103 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -116,9 +116,15 @@ typedef struct xfs_mount {
116 uint m_bmap_dmnr[2]; /* min bmap btree records */ 116 uint m_bmap_dmnr[2]; /* min bmap btree records */
117 uint m_inobt_mxr[2]; /* max inobt btree records */ 117 uint m_inobt_mxr[2]; /* max inobt btree records */
118 uint m_inobt_mnr[2]; /* min inobt btree records */ 118 uint m_inobt_mnr[2]; /* min inobt btree records */
119 uint m_rmap_mxr[2]; /* max rmap btree records */
120 uint m_rmap_mnr[2]; /* min rmap btree records */
119 uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */ 121 uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */
120 uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */ 122 uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */
121 uint m_in_maxlevels; /* max inobt btree levels. */ 123 uint m_in_maxlevels; /* max inobt btree levels. */
124 uint m_rmap_maxlevels; /* max rmap btree levels */
125 xfs_extlen_t m_ag_prealloc_blocks; /* reserved ag blocks */
126 uint m_alloc_set_aside; /* space we can't use */
127 uint m_ag_max_usable; /* max space per AG */
122 struct radix_tree_root m_perag_tree; /* per-ag accounting info */ 128 struct radix_tree_root m_perag_tree; /* per-ag accounting info */
123 spinlock_t m_perag_lock; /* lock for m_perag_tree */ 129 spinlock_t m_perag_lock; /* lock for m_perag_tree */
124 struct mutex m_growlock; /* growfs mutex */ 130 struct mutex m_growlock; /* growfs mutex */
diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h
index 0cc8d8f74356..69e2986a3776 100644
--- a/fs/xfs/xfs_ondisk.h
+++ b/fs/xfs/xfs_ondisk.h
@@ -49,11 +49,14 @@ xfs_check_ondisk_structs(void)
49 XFS_CHECK_STRUCT_SIZE(struct xfs_dsymlink_hdr, 56); 49 XFS_CHECK_STRUCT_SIZE(struct xfs_dsymlink_hdr, 56);
50 XFS_CHECK_STRUCT_SIZE(struct xfs_inobt_key, 4); 50 XFS_CHECK_STRUCT_SIZE(struct xfs_inobt_key, 4);
51 XFS_CHECK_STRUCT_SIZE(struct xfs_inobt_rec, 16); 51 XFS_CHECK_STRUCT_SIZE(struct xfs_inobt_rec, 16);
52 XFS_CHECK_STRUCT_SIZE(struct xfs_rmap_key, 20);
53 XFS_CHECK_STRUCT_SIZE(struct xfs_rmap_rec, 24);
52 XFS_CHECK_STRUCT_SIZE(struct xfs_timestamp, 8); 54 XFS_CHECK_STRUCT_SIZE(struct xfs_timestamp, 8);
53 XFS_CHECK_STRUCT_SIZE(xfs_alloc_key_t, 8); 55 XFS_CHECK_STRUCT_SIZE(xfs_alloc_key_t, 8);
54 XFS_CHECK_STRUCT_SIZE(xfs_alloc_ptr_t, 4); 56 XFS_CHECK_STRUCT_SIZE(xfs_alloc_ptr_t, 4);
55 XFS_CHECK_STRUCT_SIZE(xfs_alloc_rec_t, 8); 57 XFS_CHECK_STRUCT_SIZE(xfs_alloc_rec_t, 8);
56 XFS_CHECK_STRUCT_SIZE(xfs_inobt_ptr_t, 4); 58 XFS_CHECK_STRUCT_SIZE(xfs_inobt_ptr_t, 4);
59 XFS_CHECK_STRUCT_SIZE(xfs_rmap_ptr_t, 4);
57 60
58 /* dir/attr trees */ 61 /* dir/attr trees */
59 XFS_CHECK_STRUCT_SIZE(struct xfs_attr3_leaf_hdr, 80); 62 XFS_CHECK_STRUCT_SIZE(struct xfs_attr3_leaf_hdr, 80);
diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
new file mode 100644
index 000000000000..2500f28689d5
--- /dev/null
+++ b/fs/xfs/xfs_rmap_item.c
@@ -0,0 +1,536 @@
1/*
2 * Copyright (C) 2016 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#include "xfs.h"
21#include "xfs_fs.h"
22#include "xfs_format.h"
23#include "xfs_log_format.h"
24#include "xfs_trans_resv.h"
25#include "xfs_bit.h"
26#include "xfs_mount.h"
27#include "xfs_defer.h"
28#include "xfs_trans.h"
29#include "xfs_trans_priv.h"
30#include "xfs_buf_item.h"
31#include "xfs_rmap_item.h"
32#include "xfs_log.h"
33#include "xfs_rmap.h"
34
35
36kmem_zone_t *xfs_rui_zone;
37kmem_zone_t *xfs_rud_zone;
38
39static inline struct xfs_rui_log_item *RUI_ITEM(struct xfs_log_item *lip)
40{
41 return container_of(lip, struct xfs_rui_log_item, rui_item);
42}
43
44void
45xfs_rui_item_free(
46 struct xfs_rui_log_item *ruip)
47{
48 if (ruip->rui_format.rui_nextents > XFS_RUI_MAX_FAST_EXTENTS)
49 kmem_free(ruip);
50 else
51 kmem_zone_free(xfs_rui_zone, ruip);
52}
53
54/*
55 * This returns the number of iovecs needed to log the given rui item.
56 * We only need 1 iovec for an rui item. It just logs the rui_log_format
57 * structure.
58 */
59static inline int
60xfs_rui_item_sizeof(
61 struct xfs_rui_log_item *ruip)
62{
63 return sizeof(struct xfs_rui_log_format) +
64 (ruip->rui_format.rui_nextents - 1) *
65 sizeof(struct xfs_map_extent);
66}
67
68STATIC void
69xfs_rui_item_size(
70 struct xfs_log_item *lip,
71 int *nvecs,
72 int *nbytes)
73{
74 *nvecs += 1;
75 *nbytes += xfs_rui_item_sizeof(RUI_ITEM(lip));
76}
77
78/*
79 * This is called to fill in the vector of log iovecs for the
80 * given rui log item. We use only 1 iovec, and we point that
81 * at the rui_log_format structure embedded in the rui item.
82 * It is at this point that we assert that all of the extent
83 * slots in the rui item have been filled.
84 */
85STATIC void
86xfs_rui_item_format(
87 struct xfs_log_item *lip,
88 struct xfs_log_vec *lv)
89{
90 struct xfs_rui_log_item *ruip = RUI_ITEM(lip);
91 struct xfs_log_iovec *vecp = NULL;
92
93 ASSERT(atomic_read(&ruip->rui_next_extent) ==
94 ruip->rui_format.rui_nextents);
95
96 ruip->rui_format.rui_type = XFS_LI_RUI;
97 ruip->rui_format.rui_size = 1;
98
99 xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_RUI_FORMAT, &ruip->rui_format,
100 xfs_rui_item_sizeof(ruip));
101}
102
103/*
104 * Pinning has no meaning for an rui item, so just return.
105 */
106STATIC void
107xfs_rui_item_pin(
108 struct xfs_log_item *lip)
109{
110}
111
112/*
113 * The unpin operation is the last place an RUI is manipulated in the log. It is
114 * either inserted in the AIL or aborted in the event of a log I/O error. In
115 * either case, the RUI transaction has been successfully committed to make it
116 * this far. Therefore, we expect whoever committed the RUI to either construct
117 * and commit the RUD or drop the RUD's reference in the event of error. Simply
118 * drop the log's RUI reference now that the log is done with it.
119 */
120STATIC void
121xfs_rui_item_unpin(
122 struct xfs_log_item *lip,
123 int remove)
124{
125 struct xfs_rui_log_item *ruip = RUI_ITEM(lip);
126
127 xfs_rui_release(ruip);
128}
129
130/*
131 * RUI items have no locking or pushing. However, since RUIs are pulled from
132 * the AIL when their corresponding RUDs are committed to disk, their situation
133 * is very similar to being pinned. Return XFS_ITEM_PINNED so that the caller
134 * will eventually flush the log. This should help in getting the RUI out of
135 * the AIL.
136 */
137STATIC uint
138xfs_rui_item_push(
139 struct xfs_log_item *lip,
140 struct list_head *buffer_list)
141{
142 return XFS_ITEM_PINNED;
143}
144
145/*
146 * The RUI has been either committed or aborted if the transaction has been
147 * cancelled. If the transaction was cancelled, an RUD isn't going to be
148 * constructed and thus we free the RUI here directly.
149 */
150STATIC void
151xfs_rui_item_unlock(
152 struct xfs_log_item *lip)
153{
154 if (lip->li_flags & XFS_LI_ABORTED)
155 xfs_rui_item_free(RUI_ITEM(lip));
156}
157
158/*
159 * The RUI is logged only once and cannot be moved in the log, so simply return
160 * the lsn at which it's been logged.
161 */
162STATIC xfs_lsn_t
163xfs_rui_item_committed(
164 struct xfs_log_item *lip,
165 xfs_lsn_t lsn)
166{
167 return lsn;
168}
169
170/*
171 * The RUI dependency tracking op doesn't do squat. It can't because
172 * it doesn't know where the free extent is coming from. The dependency
173 * tracking has to be handled by the "enclosing" metadata object. For
174 * example, for inodes, the inode is locked throughout the extent freeing
175 * so the dependency should be recorded there.
176 */
177STATIC void
178xfs_rui_item_committing(
179 struct xfs_log_item *lip,
180 xfs_lsn_t lsn)
181{
182}
183
184/*
185 * This is the ops vector shared by all rui log items.
186 */
187static const struct xfs_item_ops xfs_rui_item_ops = {
188 .iop_size = xfs_rui_item_size,
189 .iop_format = xfs_rui_item_format,
190 .iop_pin = xfs_rui_item_pin,
191 .iop_unpin = xfs_rui_item_unpin,
192 .iop_unlock = xfs_rui_item_unlock,
193 .iop_committed = xfs_rui_item_committed,
194 .iop_push = xfs_rui_item_push,
195 .iop_committing = xfs_rui_item_committing,
196};
197
198/*
199 * Allocate and initialize an rui item with the given number of extents.
200 */
201struct xfs_rui_log_item *
202xfs_rui_init(
203 struct xfs_mount *mp,
204 uint nextents)
205
206{
207 struct xfs_rui_log_item *ruip;
208 uint size;
209
210 ASSERT(nextents > 0);
211 if (nextents > XFS_RUI_MAX_FAST_EXTENTS) {
212 size = (uint)(sizeof(struct xfs_rui_log_item) +
213 ((nextents - 1) * sizeof(struct xfs_map_extent)));
214 ruip = kmem_zalloc(size, KM_SLEEP);
215 } else {
216 ruip = kmem_zone_zalloc(xfs_rui_zone, KM_SLEEP);
217 }
218
219 xfs_log_item_init(mp, &ruip->rui_item, XFS_LI_RUI, &xfs_rui_item_ops);
220 ruip->rui_format.rui_nextents = nextents;
221 ruip->rui_format.rui_id = (uintptr_t)(void *)ruip;
222 atomic_set(&ruip->rui_next_extent, 0);
223 atomic_set(&ruip->rui_refcount, 2);
224
225 return ruip;
226}
227
228/*
229 * Copy an RUI format buffer from the given buf, and into the destination
230 * RUI format structure. The RUI/RUD items were designed not to need any
231 * special alignment handling.
232 */
233int
234xfs_rui_copy_format(
235 struct xfs_log_iovec *buf,
236 struct xfs_rui_log_format *dst_rui_fmt)
237{
238 struct xfs_rui_log_format *src_rui_fmt;
239 uint len;
240
241 src_rui_fmt = buf->i_addr;
242 len = sizeof(struct xfs_rui_log_format) +
243 (src_rui_fmt->rui_nextents - 1) *
244 sizeof(struct xfs_map_extent);
245
246 if (buf->i_len != len)
247 return -EFSCORRUPTED;
248
249 memcpy((char *)dst_rui_fmt, (char *)src_rui_fmt, len);
250 return 0;
251}
252
253/*
254 * Freeing the RUI requires that we remove it from the AIL if it has already
255 * been placed there. However, the RUI may not yet have been placed in the AIL
256 * when called by xfs_rui_release() from RUD processing due to the ordering of
257 * committed vs unpin operations in bulk insert operations. Hence the reference
258 * count to ensure only the last caller frees the RUI.
259 */
260void
261xfs_rui_release(
262 struct xfs_rui_log_item *ruip)
263{
264 if (atomic_dec_and_test(&ruip->rui_refcount)) {
265 xfs_trans_ail_remove(&ruip->rui_item, SHUTDOWN_LOG_IO_ERROR);
266 xfs_rui_item_free(ruip);
267 }
268}
269
270static inline struct xfs_rud_log_item *RUD_ITEM(struct xfs_log_item *lip)
271{
272 return container_of(lip, struct xfs_rud_log_item, rud_item);
273}
274
275STATIC void
276xfs_rud_item_size(
277 struct xfs_log_item *lip,
278 int *nvecs,
279 int *nbytes)
280{
281 *nvecs += 1;
282 *nbytes += sizeof(struct xfs_rud_log_format);
283}
284
285/*
286 * This is called to fill in the vector of log iovecs for the
287 * given rud log item. We use only 1 iovec, and we point that
288 * at the rud_log_format structure embedded in the rud item.
289 * It is at this point that we assert that all of the extent
290 * slots in the rud item have been filled.
291 */
292STATIC void
293xfs_rud_item_format(
294 struct xfs_log_item *lip,
295 struct xfs_log_vec *lv)
296{
297 struct xfs_rud_log_item *rudp = RUD_ITEM(lip);
298 struct xfs_log_iovec *vecp = NULL;
299
300 rudp->rud_format.rud_type = XFS_LI_RUD;
301 rudp->rud_format.rud_size = 1;
302
303 xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_RUD_FORMAT, &rudp->rud_format,
304 sizeof(struct xfs_rud_log_format));
305}
306
307/*
308 * Pinning has no meaning for an rud item, so just return.
309 */
310STATIC void
311xfs_rud_item_pin(
312 struct xfs_log_item *lip)
313{
314}
315
316/*
317 * Since pinning has no meaning for an rud item, unpinning does
318 * not either.
319 */
320STATIC void
321xfs_rud_item_unpin(
322 struct xfs_log_item *lip,
323 int remove)
324{
325}
326
327/*
328 * There isn't much you can do to push on an rud item. It is simply stuck
329 * waiting for the log to be flushed to disk.
330 */
331STATIC uint
332xfs_rud_item_push(
333 struct xfs_log_item *lip,
334 struct list_head *buffer_list)
335{
336 return XFS_ITEM_PINNED;
337}
338
339/*
340 * The RUD is either committed or aborted if the transaction is cancelled. If
341 * the transaction is cancelled, drop our reference to the RUI and free the
342 * RUD.
343 */
344STATIC void
345xfs_rud_item_unlock(
346 struct xfs_log_item *lip)
347{
348 struct xfs_rud_log_item *rudp = RUD_ITEM(lip);
349
350 if (lip->li_flags & XFS_LI_ABORTED) {
351 xfs_rui_release(rudp->rud_ruip);
352 kmem_zone_free(xfs_rud_zone, rudp);
353 }
354}
355
356/*
357 * When the rud item is committed to disk, all we need to do is delete our
358 * reference to our partner rui item and then free ourselves. Since we're
359 * freeing ourselves we must return -1 to keep the transaction code from
360 * further referencing this item.
361 */
362STATIC xfs_lsn_t
363xfs_rud_item_committed(
364 struct xfs_log_item *lip,
365 xfs_lsn_t lsn)
366{
367 struct xfs_rud_log_item *rudp = RUD_ITEM(lip);
368
369 /*
370 * Drop the RUI reference regardless of whether the RUD has been
371 * aborted. Once the RUD transaction is constructed, it is the sole
372 * responsibility of the RUD to release the RUI (even if the RUI is
373 * aborted due to log I/O error).
374 */
375 xfs_rui_release(rudp->rud_ruip);
376 kmem_zone_free(xfs_rud_zone, rudp);
377
378 return (xfs_lsn_t)-1;
379}
380
381/*
382 * The RUD dependency tracking op doesn't do squat. It can't because
383 * it doesn't know where the free extent is coming from. The dependency
384 * tracking has to be handled by the "enclosing" metadata object. For
385 * example, for inodes, the inode is locked throughout the extent freeing
386 * so the dependency should be recorded there.
387 */
388STATIC void
389xfs_rud_item_committing(
390 struct xfs_log_item *lip,
391 xfs_lsn_t lsn)
392{
393}
394
395/*
396 * This is the ops vector shared by all rud log items.
397 */
398static const struct xfs_item_ops xfs_rud_item_ops = {
399 .iop_size = xfs_rud_item_size,
400 .iop_format = xfs_rud_item_format,
401 .iop_pin = xfs_rud_item_pin,
402 .iop_unpin = xfs_rud_item_unpin,
403 .iop_unlock = xfs_rud_item_unlock,
404 .iop_committed = xfs_rud_item_committed,
405 .iop_push = xfs_rud_item_push,
406 .iop_committing = xfs_rud_item_committing,
407};
408
409/*
410 * Allocate and initialize an rud item with the given number of extents.
411 */
412struct xfs_rud_log_item *
413xfs_rud_init(
414 struct xfs_mount *mp,
415 struct xfs_rui_log_item *ruip)
416
417{
418 struct xfs_rud_log_item *rudp;
419
420 rudp = kmem_zone_zalloc(xfs_rud_zone, KM_SLEEP);
421 xfs_log_item_init(mp, &rudp->rud_item, XFS_LI_RUD, &xfs_rud_item_ops);
422 rudp->rud_ruip = ruip;
423 rudp->rud_format.rud_rui_id = ruip->rui_format.rui_id;
424
425 return rudp;
426}
427
428/*
429 * Process an rmap update intent item that was recovered from the log.
430 * We need to update the rmapbt.
431 */
432int
433xfs_rui_recover(
434 struct xfs_mount *mp,
435 struct xfs_rui_log_item *ruip)
436{
437 int i;
438 int error = 0;
439 struct xfs_map_extent *rmap;
440 xfs_fsblock_t startblock_fsb;
441 bool op_ok;
442 struct xfs_rud_log_item *rudp;
443 enum xfs_rmap_intent_type type;
444 int whichfork;
445 xfs_exntst_t state;
446 struct xfs_trans *tp;
447 struct xfs_btree_cur *rcur = NULL;
448
449 ASSERT(!test_bit(XFS_RUI_RECOVERED, &ruip->rui_flags));
450
451 /*
452 * First check the validity of the extents described by the
453 * RUI. If any are bad, then assume that all are bad and
454 * just toss the RUI.
455 */
456 for (i = 0; i < ruip->rui_format.rui_nextents; i++) {
457 rmap = &ruip->rui_format.rui_extents[i];
458 startblock_fsb = XFS_BB_TO_FSB(mp,
459 XFS_FSB_TO_DADDR(mp, rmap->me_startblock));
460 switch (rmap->me_flags & XFS_RMAP_EXTENT_TYPE_MASK) {
461 case XFS_RMAP_EXTENT_MAP:
462 case XFS_RMAP_EXTENT_UNMAP:
463 case XFS_RMAP_EXTENT_CONVERT:
464 case XFS_RMAP_EXTENT_ALLOC:
465 case XFS_RMAP_EXTENT_FREE:
466 op_ok = true;
467 break;
468 default:
469 op_ok = false;
470 break;
471 }
472 if (!op_ok || startblock_fsb == 0 ||
473 rmap->me_len == 0 ||
474 startblock_fsb >= mp->m_sb.sb_dblocks ||
475 rmap->me_len >= mp->m_sb.sb_agblocks ||
476 (rmap->me_flags & ~XFS_RMAP_EXTENT_FLAGS)) {
477 /*
478 * This will pull the RUI from the AIL and
479 * free the memory associated with it.
480 */
481 set_bit(XFS_RUI_RECOVERED, &ruip->rui_flags);
482 xfs_rui_release(ruip);
483 return -EIO;
484 }
485 }
486
487 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
488 if (error)
489 return error;
490 rudp = xfs_trans_get_rud(tp, ruip);
491
492 for (i = 0; i < ruip->rui_format.rui_nextents; i++) {
493 rmap = &ruip->rui_format.rui_extents[i];
494 state = (rmap->me_flags & XFS_RMAP_EXTENT_UNWRITTEN) ?
495 XFS_EXT_UNWRITTEN : XFS_EXT_NORM;
496 whichfork = (rmap->me_flags & XFS_RMAP_EXTENT_ATTR_FORK) ?
497 XFS_ATTR_FORK : XFS_DATA_FORK;
498 switch (rmap->me_flags & XFS_RMAP_EXTENT_TYPE_MASK) {
499 case XFS_RMAP_EXTENT_MAP:
500 type = XFS_RMAP_MAP;
501 break;
502 case XFS_RMAP_EXTENT_UNMAP:
503 type = XFS_RMAP_UNMAP;
504 break;
505 case XFS_RMAP_EXTENT_CONVERT:
506 type = XFS_RMAP_CONVERT;
507 break;
508 case XFS_RMAP_EXTENT_ALLOC:
509 type = XFS_RMAP_ALLOC;
510 break;
511 case XFS_RMAP_EXTENT_FREE:
512 type = XFS_RMAP_FREE;
513 break;
514 default:
515 error = -EFSCORRUPTED;
516 goto abort_error;
517 }
518 error = xfs_trans_log_finish_rmap_update(tp, rudp, type,
519 rmap->me_owner, whichfork,
520 rmap->me_startoff, rmap->me_startblock,
521 rmap->me_len, state, &rcur);
522 if (error)
523 goto abort_error;
524
525 }
526
527 xfs_rmap_finish_one_cleanup(tp, rcur, error);
528 set_bit(XFS_RUI_RECOVERED, &ruip->rui_flags);
529 error = xfs_trans_commit(tp);
530 return error;
531
532abort_error:
533 xfs_rmap_finish_one_cleanup(tp, rcur, error);
534 xfs_trans_cancel(tp);
535 return error;
536}
diff --git a/fs/xfs/xfs_rmap_item.h b/fs/xfs/xfs_rmap_item.h
new file mode 100644
index 000000000000..aefcc3a318a5
--- /dev/null
+++ b/fs/xfs/xfs_rmap_item.h
@@ -0,0 +1,95 @@
1/*
2 * Copyright (C) 2016 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#ifndef __XFS_RMAP_ITEM_H__
21#define __XFS_RMAP_ITEM_H__
22
23/*
24 * There are (currently) three pairs of rmap btree redo item types: map, unmap,
25 * and convert. The common abbreviations for these are RUI (rmap update
26 * intent) and RUD (rmap update done). The redo item type is encoded in the
27 * flags field of each xfs_map_extent.
28 *
29 * *I items should be recorded in the *first* of a series of rolled
30 * transactions, and the *D items should be recorded in the same transaction
31 * that records the associated rmapbt updates. Typically, the first
32 * transaction will record a bmbt update, followed by some number of
33 * transactions containing rmapbt updates, and finally transactions with any
34 * bnobt/cntbt updates.
35 *
36 * Should the system crash after the commit of the first transaction but
37 * before the commit of the final transaction in a series, log recovery will
38 * use the redo information recorded by the intent items to replay the
39 * (rmapbt/bnobt/cntbt) metadata updates in the non-first transaction.
40 */
41
42/* kernel only RUI/RUD definitions */
43
44struct xfs_mount;
45struct kmem_zone;
46
47/*
48 * Max number of extents in fast allocation path.
49 */
50#define XFS_RUI_MAX_FAST_EXTENTS 16
51
52/*
53 * Define RUI flag bits. Manipulated by set/clear/test_bit operators.
54 */
55#define XFS_RUI_RECOVERED 1
56
57/*
58 * This is the "rmap update intent" log item. It is used to log the fact that
59 * some reverse mappings need to change. It is used in conjunction with the
60 * "rmap update done" log item described below.
61 *
62 * These log items follow the same rules as struct xfs_efi_log_item; see the
63 * comments about that structure (in xfs_extfree_item.h) for more details.
64 */
65struct xfs_rui_log_item {
66 struct xfs_log_item rui_item;
67 atomic_t rui_refcount;
68 atomic_t rui_next_extent;
69 unsigned long rui_flags; /* misc flags */
70 struct xfs_rui_log_format rui_format;
71};
72
73/*
74 * This is the "rmap update done" log item. It is used to log the fact that
75 * some rmapbt updates mentioned in an earlier rui item have been performed.
76 */
77struct xfs_rud_log_item {
78 struct xfs_log_item rud_item;
79 struct xfs_rui_log_item *rud_ruip;
80 struct xfs_rud_log_format rud_format;
81};
82
83extern struct kmem_zone *xfs_rui_zone;
84extern struct kmem_zone *xfs_rud_zone;
85
86struct xfs_rui_log_item *xfs_rui_init(struct xfs_mount *, uint);
87struct xfs_rud_log_item *xfs_rud_init(struct xfs_mount *,
88 struct xfs_rui_log_item *);
89int xfs_rui_copy_format(struct xfs_log_iovec *buf,
90 struct xfs_rui_log_format *dst_rui_fmt);
91void xfs_rui_item_free(struct xfs_rui_log_item *);
92void xfs_rui_release(struct xfs_rui_log_item *);
93int xfs_rui_recover(struct xfs_mount *mp, struct xfs_rui_log_item *ruip);
94
95#endif /* __XFS_RMAP_ITEM_H__ */
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 3938b37d1043..802bcc326d9f 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -23,6 +23,7 @@
23#include "xfs_trans_resv.h" 23#include "xfs_trans_resv.h"
24#include "xfs_bit.h" 24#include "xfs_bit.h"
25#include "xfs_mount.h" 25#include "xfs_mount.h"
26#include "xfs_defer.h"
26#include "xfs_inode.h" 27#include "xfs_inode.h"
27#include "xfs_bmap.h" 28#include "xfs_bmap.h"
28#include "xfs_bmap_util.h" 29#include "xfs_bmap_util.h"
@@ -769,7 +770,7 @@ xfs_growfs_rt_alloc(
769 xfs_daddr_t d; /* disk block address */ 770 xfs_daddr_t d; /* disk block address */
770 int error; /* error return value */ 771 int error; /* error return value */
771 xfs_fsblock_t firstblock;/* first block allocated in xaction */ 772 xfs_fsblock_t firstblock;/* first block allocated in xaction */
772 struct xfs_bmap_free flist; /* list of freed blocks */ 773 struct xfs_defer_ops dfops; /* list of freed blocks */
773 xfs_fsblock_t fsbno; /* filesystem block for bno */ 774 xfs_fsblock_t fsbno; /* filesystem block for bno */
774 struct xfs_bmbt_irec map; /* block map output */ 775 struct xfs_bmbt_irec map; /* block map output */
775 int nmap; /* number of block maps */ 776 int nmap; /* number of block maps */
@@ -794,14 +795,14 @@ xfs_growfs_rt_alloc(
794 xfs_ilock(ip, XFS_ILOCK_EXCL); 795 xfs_ilock(ip, XFS_ILOCK_EXCL);
795 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 796 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
796 797
797 xfs_bmap_init(&flist, &firstblock); 798 xfs_defer_init(&dfops, &firstblock);
798 /* 799 /*
799 * Allocate blocks to the bitmap file. 800 * Allocate blocks to the bitmap file.
800 */ 801 */
801 nmap = 1; 802 nmap = 1;
802 error = xfs_bmapi_write(tp, ip, oblocks, nblocks - oblocks, 803 error = xfs_bmapi_write(tp, ip, oblocks, nblocks - oblocks,
803 XFS_BMAPI_METADATA, &firstblock, 804 XFS_BMAPI_METADATA, &firstblock,
804 resblks, &map, &nmap, &flist); 805 resblks, &map, &nmap, &dfops);
805 if (!error && nmap < 1) 806 if (!error && nmap < 1)
806 error = -ENOSPC; 807 error = -ENOSPC;
807 if (error) 808 if (error)
@@ -809,7 +810,7 @@ xfs_growfs_rt_alloc(
809 /* 810 /*
810 * Free any blocks freed up in the transaction, then commit. 811 * Free any blocks freed up in the transaction, then commit.
811 */ 812 */
812 error = xfs_bmap_finish(&tp, &flist, NULL); 813 error = xfs_defer_finish(&tp, &dfops, NULL);
813 if (error) 814 if (error)
814 goto out_bmap_cancel; 815 goto out_bmap_cancel;
815 error = xfs_trans_commit(tp); 816 error = xfs_trans_commit(tp);
@@ -862,7 +863,7 @@ xfs_growfs_rt_alloc(
862 return 0; 863 return 0;
863 864
864out_bmap_cancel: 865out_bmap_cancel:
865 xfs_bmap_cancel(&flist); 866 xfs_defer_cancel(&dfops);
866out_trans_cancel: 867out_trans_cancel:
867 xfs_trans_cancel(tp); 868 xfs_trans_cancel(tp);
868 return error; 869 return error;
diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c
index d266e835ecc3..6e812fe0fd43 100644
--- a/fs/xfs/xfs_stats.c
+++ b/fs/xfs/xfs_stats.c
@@ -61,6 +61,7 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
61 { "bmbt2", XFSSTAT_END_BMBT_V2 }, 61 { "bmbt2", XFSSTAT_END_BMBT_V2 },
62 { "ibt2", XFSSTAT_END_IBT_V2 }, 62 { "ibt2", XFSSTAT_END_IBT_V2 },
63 { "fibt2", XFSSTAT_END_FIBT_V2 }, 63 { "fibt2", XFSSTAT_END_FIBT_V2 },
64 { "rmapbt", XFSSTAT_END_RMAP_V2 },
64 /* we print both series of quota information together */ 65 /* we print both series of quota information together */
65 { "qm", XFSSTAT_END_QM }, 66 { "qm", XFSSTAT_END_QM },
66 }; 67 };
diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h
index 483b0eff1988..657865f51e78 100644
--- a/fs/xfs/xfs_stats.h
+++ b/fs/xfs/xfs_stats.h
@@ -197,7 +197,23 @@ struct xfsstats {
197 __uint32_t xs_fibt_2_alloc; 197 __uint32_t xs_fibt_2_alloc;
198 __uint32_t xs_fibt_2_free; 198 __uint32_t xs_fibt_2_free;
199 __uint32_t xs_fibt_2_moves; 199 __uint32_t xs_fibt_2_moves;
200#define XFSSTAT_END_XQMSTAT (XFSSTAT_END_FIBT_V2+6) 200#define XFSSTAT_END_RMAP_V2 (XFSSTAT_END_FIBT_V2+15)
201 __uint32_t xs_rmap_2_lookup;
202 __uint32_t xs_rmap_2_compare;
203 __uint32_t xs_rmap_2_insrec;
204 __uint32_t xs_rmap_2_delrec;
205 __uint32_t xs_rmap_2_newroot;
206 __uint32_t xs_rmap_2_killroot;
207 __uint32_t xs_rmap_2_increment;
208 __uint32_t xs_rmap_2_decrement;
209 __uint32_t xs_rmap_2_lshift;
210 __uint32_t xs_rmap_2_rshift;
211 __uint32_t xs_rmap_2_split;
212 __uint32_t xs_rmap_2_join;
213 __uint32_t xs_rmap_2_alloc;
214 __uint32_t xs_rmap_2_free;
215 __uint32_t xs_rmap_2_moves;
216#define XFSSTAT_END_XQMSTAT (XFSSTAT_END_RMAP_V2+6)
201 __uint32_t xs_qm_dqreclaims; 217 __uint32_t xs_qm_dqreclaims;
202 __uint32_t xs_qm_dqreclaim_misses; 218 __uint32_t xs_qm_dqreclaim_misses;
203 __uint32_t xs_qm_dquot_dups; 219 __uint32_t xs_qm_dquot_dups;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 0303f1005f88..24ef83ef04de 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -46,6 +46,7 @@
46#include "xfs_quota.h" 46#include "xfs_quota.h"
47#include "xfs_sysfs.h" 47#include "xfs_sysfs.h"
48#include "xfs_ondisk.h" 48#include "xfs_ondisk.h"
49#include "xfs_rmap_item.h"
49 50
50#include <linux/namei.h> 51#include <linux/namei.h>
51#include <linux/init.h> 52#include <linux/init.h>
@@ -1075,7 +1076,7 @@ xfs_fs_statfs(
1075 statp->f_blocks = sbp->sb_dblocks - lsize; 1076 statp->f_blocks = sbp->sb_dblocks - lsize;
1076 spin_unlock(&mp->m_sb_lock); 1077 spin_unlock(&mp->m_sb_lock);
1077 1078
1078 statp->f_bfree = fdblocks - XFS_ALLOC_SET_ASIDE(mp); 1079 statp->f_bfree = fdblocks - mp->m_alloc_set_aside;
1079 statp->f_bavail = statp->f_bfree; 1080 statp->f_bavail = statp->f_bfree;
1080 1081
1081 fakeinos = statp->f_bfree << sbp->sb_inopblog; 1082 fakeinos = statp->f_bfree << sbp->sb_inopblog;
@@ -1573,6 +1574,10 @@ xfs_fs_fill_super(
1573 } 1574 }
1574 } 1575 }
1575 1576
1577 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
1578 xfs_alert(mp,
1579 "EXPERIMENTAL reverse mapping btree feature enabled. Use at your own risk!");
1580
1576 error = xfs_mountfs(mp); 1581 error = xfs_mountfs(mp);
1577 if (error) 1582 if (error)
1578 goto out_filestream_unmount; 1583 goto out_filestream_unmount;
@@ -1697,7 +1702,7 @@ xfs_init_zones(void)
1697 goto out_free_ioend_bioset; 1702 goto out_free_ioend_bioset;
1698 1703
1699 xfs_bmap_free_item_zone = kmem_zone_init( 1704 xfs_bmap_free_item_zone = kmem_zone_init(
1700 sizeof(struct xfs_bmap_free_item), 1705 sizeof(struct xfs_extent_free_item),
1701 "xfs_bmap_free_item"); 1706 "xfs_bmap_free_item");
1702 if (!xfs_bmap_free_item_zone) 1707 if (!xfs_bmap_free_item_zone)
1703 goto out_destroy_log_ticket_zone; 1708 goto out_destroy_log_ticket_zone;
@@ -1765,8 +1770,24 @@ xfs_init_zones(void)
1765 if (!xfs_icreate_zone) 1770 if (!xfs_icreate_zone)
1766 goto out_destroy_ili_zone; 1771 goto out_destroy_ili_zone;
1767 1772
1773 xfs_rud_zone = kmem_zone_init(sizeof(struct xfs_rud_log_item),
1774 "xfs_rud_item");
1775 if (!xfs_rud_zone)
1776 goto out_destroy_icreate_zone;
1777
1778 xfs_rui_zone = kmem_zone_init((sizeof(struct xfs_rui_log_item) +
1779 ((XFS_RUI_MAX_FAST_EXTENTS - 1) *
1780 sizeof(struct xfs_map_extent))),
1781 "xfs_rui_item");
1782 if (!xfs_rui_zone)
1783 goto out_destroy_rud_zone;
1784
1768 return 0; 1785 return 0;
1769 1786
1787 out_destroy_rud_zone:
1788 kmem_zone_destroy(xfs_rud_zone);
1789 out_destroy_icreate_zone:
1790 kmem_zone_destroy(xfs_icreate_zone);
1770 out_destroy_ili_zone: 1791 out_destroy_ili_zone:
1771 kmem_zone_destroy(xfs_ili_zone); 1792 kmem_zone_destroy(xfs_ili_zone);
1772 out_destroy_inode_zone: 1793 out_destroy_inode_zone:
@@ -1805,6 +1826,8 @@ xfs_destroy_zones(void)
1805 * destroy caches. 1826 * destroy caches.
1806 */ 1827 */
1807 rcu_barrier(); 1828 rcu_barrier();
1829 kmem_zone_destroy(xfs_rui_zone);
1830 kmem_zone_destroy(xfs_rud_zone);
1808 kmem_zone_destroy(xfs_icreate_zone); 1831 kmem_zone_destroy(xfs_icreate_zone);
1809 kmem_zone_destroy(xfs_ili_zone); 1832 kmem_zone_destroy(xfs_ili_zone);
1810 kmem_zone_destroy(xfs_inode_zone); 1833 kmem_zone_destroy(xfs_inode_zone);
@@ -1854,6 +1877,9 @@ init_xfs_fs(void)
1854 printk(KERN_INFO XFS_VERSION_STRING " with " 1877 printk(KERN_INFO XFS_VERSION_STRING " with "
1855 XFS_BUILD_OPTIONS " enabled\n"); 1878 XFS_BUILD_OPTIONS " enabled\n");
1856 1879
1880 xfs_extent_free_init_defer_op();
1881 xfs_rmap_update_init_defer_op();
1882
1857 xfs_dir_startup(); 1883 xfs_dir_startup();
1858 1884
1859 error = xfs_init_zones(); 1885 error = xfs_init_zones();
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 08a46c6181fd..58142aeeeea6 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -26,6 +26,7 @@
26#include "xfs_mount.h" 26#include "xfs_mount.h"
27#include "xfs_da_format.h" 27#include "xfs_da_format.h"
28#include "xfs_da_btree.h" 28#include "xfs_da_btree.h"
29#include "xfs_defer.h"
29#include "xfs_dir2.h" 30#include "xfs_dir2.h"
30#include "xfs_inode.h" 31#include "xfs_inode.h"
31#include "xfs_ialloc.h" 32#include "xfs_ialloc.h"
@@ -172,7 +173,7 @@ xfs_symlink(
172 struct xfs_inode *ip = NULL; 173 struct xfs_inode *ip = NULL;
173 int error = 0; 174 int error = 0;
174 int pathlen; 175 int pathlen;
175 struct xfs_bmap_free free_list; 176 struct xfs_defer_ops dfops;
176 xfs_fsblock_t first_block; 177 xfs_fsblock_t first_block;
177 bool unlock_dp_on_error = false; 178 bool unlock_dp_on_error = false;
178 xfs_fileoff_t first_fsb; 179 xfs_fileoff_t first_fsb;
@@ -269,7 +270,7 @@ xfs_symlink(
269 * Initialize the bmap freelist prior to calling either 270 * Initialize the bmap freelist prior to calling either
270 * bmapi or the directory create code. 271 * bmapi or the directory create code.
271 */ 272 */
272 xfs_bmap_init(&free_list, &first_block); 273 xfs_defer_init(&dfops, &first_block);
273 274
274 /* 275 /*
275 * Allocate an inode for the symlink. 276 * Allocate an inode for the symlink.
@@ -313,7 +314,7 @@ xfs_symlink(
313 314
314 error = xfs_bmapi_write(tp, ip, first_fsb, fs_blocks, 315 error = xfs_bmapi_write(tp, ip, first_fsb, fs_blocks,
315 XFS_BMAPI_METADATA, &first_block, resblks, 316 XFS_BMAPI_METADATA, &first_block, resblks,
316 mval, &nmaps, &free_list); 317 mval, &nmaps, &dfops);
317 if (error) 318 if (error)
318 goto out_bmap_cancel; 319 goto out_bmap_cancel;
319 320
@@ -361,7 +362,7 @@ xfs_symlink(
361 * Create the directory entry for the symlink. 362 * Create the directory entry for the symlink.
362 */ 363 */
363 error = xfs_dir_createname(tp, dp, link_name, ip->i_ino, 364 error = xfs_dir_createname(tp, dp, link_name, ip->i_ino,
364 &first_block, &free_list, resblks); 365 &first_block, &dfops, resblks);
365 if (error) 366 if (error)
366 goto out_bmap_cancel; 367 goto out_bmap_cancel;
367 xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 368 xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@ -376,7 +377,7 @@ xfs_symlink(
376 xfs_trans_set_sync(tp); 377 xfs_trans_set_sync(tp);
377 } 378 }
378 379
379 error = xfs_bmap_finish(&tp, &free_list, NULL); 380 error = xfs_defer_finish(&tp, &dfops, NULL);
380 if (error) 381 if (error)
381 goto out_bmap_cancel; 382 goto out_bmap_cancel;
382 383
@@ -392,7 +393,7 @@ xfs_symlink(
392 return 0; 393 return 0;
393 394
394out_bmap_cancel: 395out_bmap_cancel:
395 xfs_bmap_cancel(&free_list); 396 xfs_defer_cancel(&dfops);
396out_trans_cancel: 397out_trans_cancel:
397 xfs_trans_cancel(tp); 398 xfs_trans_cancel(tp);
398out_release_inode: 399out_release_inode:
@@ -426,7 +427,7 @@ xfs_inactive_symlink_rmt(
426 int done; 427 int done;
427 int error; 428 int error;
428 xfs_fsblock_t first_block; 429 xfs_fsblock_t first_block;
429 xfs_bmap_free_t free_list; 430 struct xfs_defer_ops dfops;
430 int i; 431 int i;
431 xfs_mount_t *mp; 432 xfs_mount_t *mp;
432 xfs_bmbt_irec_t mval[XFS_SYMLINK_MAPS]; 433 xfs_bmbt_irec_t mval[XFS_SYMLINK_MAPS];
@@ -465,7 +466,7 @@ xfs_inactive_symlink_rmt(
465 * Find the block(s) so we can inval and unmap them. 466 * Find the block(s) so we can inval and unmap them.
466 */ 467 */
467 done = 0; 468 done = 0;
468 xfs_bmap_init(&free_list, &first_block); 469 xfs_defer_init(&dfops, &first_block);
469 nmaps = ARRAY_SIZE(mval); 470 nmaps = ARRAY_SIZE(mval);
470 error = xfs_bmapi_read(ip, 0, xfs_symlink_blocks(mp, size), 471 error = xfs_bmapi_read(ip, 0, xfs_symlink_blocks(mp, size),
471 mval, &nmaps, 0); 472 mval, &nmaps, 0);
@@ -485,17 +486,17 @@ xfs_inactive_symlink_rmt(
485 xfs_trans_binval(tp, bp); 486 xfs_trans_binval(tp, bp);
486 } 487 }
487 /* 488 /*
488 * Unmap the dead block(s) to the free_list. 489 * Unmap the dead block(s) to the dfops.
489 */ 490 */
490 error = xfs_bunmapi(tp, ip, 0, size, 0, nmaps, 491 error = xfs_bunmapi(tp, ip, 0, size, 0, nmaps,
491 &first_block, &free_list, &done); 492 &first_block, &dfops, &done);
492 if (error) 493 if (error)
493 goto error_bmap_cancel; 494 goto error_bmap_cancel;
494 ASSERT(done); 495 ASSERT(done);
495 /* 496 /*
496 * Commit the first transaction. This logs the EFI and the inode. 497 * Commit the first transaction. This logs the EFI and the inode.
497 */ 498 */
498 error = xfs_bmap_finish(&tp, &free_list, ip); 499 error = xfs_defer_finish(&tp, &dfops, ip);
499 if (error) 500 if (error)
500 goto error_bmap_cancel; 501 goto error_bmap_cancel;
501 /* 502 /*
@@ -525,7 +526,7 @@ xfs_inactive_symlink_rmt(
525 return 0; 526 return 0;
526 527
527error_bmap_cancel: 528error_bmap_cancel:
528 xfs_bmap_cancel(&free_list); 529 xfs_defer_cancel(&dfops);
529error_trans_cancel: 530error_trans_cancel:
530 xfs_trans_cancel(tp); 531 xfs_trans_cancel(tp);
531error_unlock: 532error_unlock:
diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c
index 13a029806805..7f17ae6d709a 100644
--- a/fs/xfs/xfs_trace.c
+++ b/fs/xfs/xfs_trace.c
@@ -22,7 +22,9 @@
22#include "xfs_log_format.h" 22#include "xfs_log_format.h"
23#include "xfs_trans_resv.h" 23#include "xfs_trans_resv.h"
24#include "xfs_mount.h" 24#include "xfs_mount.h"
25#include "xfs_defer.h"
25#include "xfs_da_format.h" 26#include "xfs_da_format.h"
27#include "xfs_defer.h"
26#include "xfs_inode.h" 28#include "xfs_inode.h"
27#include "xfs_btree.h" 29#include "xfs_btree.h"
28#include "xfs_da_btree.h" 30#include "xfs_da_btree.h"
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 145169093fe0..551b7e26980c 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -38,6 +38,7 @@ struct xlog_recover_item;
38struct xfs_buf_log_format; 38struct xfs_buf_log_format;
39struct xfs_inode_log_format; 39struct xfs_inode_log_format;
40struct xfs_bmbt_irec; 40struct xfs_bmbt_irec;
41struct xfs_btree_cur;
41 42
42DECLARE_EVENT_CLASS(xfs_attr_list_class, 43DECLARE_EVENT_CLASS(xfs_attr_list_class,
43 TP_PROTO(struct xfs_attr_list_context *ctx), 44 TP_PROTO(struct xfs_attr_list_context *ctx),
@@ -2185,6 +2186,379 @@ DEFINE_DISCARD_EVENT(xfs_discard_toosmall);
2185DEFINE_DISCARD_EVENT(xfs_discard_exclude); 2186DEFINE_DISCARD_EVENT(xfs_discard_exclude);
2186DEFINE_DISCARD_EVENT(xfs_discard_busy); 2187DEFINE_DISCARD_EVENT(xfs_discard_busy);
2187 2188
2189/* btree cursor events */
2190DECLARE_EVENT_CLASS(xfs_btree_cur_class,
2191 TP_PROTO(struct xfs_btree_cur *cur, int level, struct xfs_buf *bp),
2192 TP_ARGS(cur, level, bp),
2193 TP_STRUCT__entry(
2194 __field(dev_t, dev)
2195 __field(xfs_btnum_t, btnum)
2196 __field(int, level)
2197 __field(int, nlevels)
2198 __field(int, ptr)
2199 __field(xfs_daddr_t, daddr)
2200 ),
2201 TP_fast_assign(
2202 __entry->dev = cur->bc_mp->m_super->s_dev;
2203 __entry->btnum = cur->bc_btnum;
2204 __entry->level = level;
2205 __entry->nlevels = cur->bc_nlevels;
2206 __entry->ptr = cur->bc_ptrs[level];
2207 __entry->daddr = bp ? bp->b_bn : -1;
2208 ),
2209 TP_printk("dev %d:%d btnum %d level %d/%d ptr %d daddr 0x%llx",
2210 MAJOR(__entry->dev), MINOR(__entry->dev),
2211 __entry->btnum,
2212 __entry->level,
2213 __entry->nlevels,
2214 __entry->ptr,
2215 (unsigned long long)__entry->daddr)
2216)
2217
2218#define DEFINE_BTREE_CUR_EVENT(name) \
2219DEFINE_EVENT(xfs_btree_cur_class, name, \
2220 TP_PROTO(struct xfs_btree_cur *cur, int level, struct xfs_buf *bp), \
2221 TP_ARGS(cur, level, bp))
2222DEFINE_BTREE_CUR_EVENT(xfs_btree_updkeys);
2223DEFINE_BTREE_CUR_EVENT(xfs_btree_overlapped_query_range);
2224
2225/* deferred ops */
2226struct xfs_defer_pending;
2227struct xfs_defer_intake;
2228struct xfs_defer_ops;
2229
2230DECLARE_EVENT_CLASS(xfs_defer_class,
2231 TP_PROTO(struct xfs_mount *mp, struct xfs_defer_ops *dop),
2232 TP_ARGS(mp, dop),
2233 TP_STRUCT__entry(
2234 __field(dev_t, dev)
2235 __field(void *, dop)
2236 __field(bool, committed)
2237 __field(bool, low)
2238 ),
2239 TP_fast_assign(
2240 __entry->dev = mp ? mp->m_super->s_dev : 0;
2241 __entry->dop = dop;
2242 __entry->committed = dop->dop_committed;
2243 __entry->low = dop->dop_low;
2244 ),
2245 TP_printk("dev %d:%d ops %p committed %d low %d\n",
2246 MAJOR(__entry->dev), MINOR(__entry->dev),
2247 __entry->dop,
2248 __entry->committed,
2249 __entry->low)
2250)
2251#define DEFINE_DEFER_EVENT(name) \
2252DEFINE_EVENT(xfs_defer_class, name, \
2253 TP_PROTO(struct xfs_mount *mp, struct xfs_defer_ops *dop), \
2254 TP_ARGS(mp, dop))
2255
2256DECLARE_EVENT_CLASS(xfs_defer_error_class,
2257 TP_PROTO(struct xfs_mount *mp, struct xfs_defer_ops *dop, int error),
2258 TP_ARGS(mp, dop, error),
2259 TP_STRUCT__entry(
2260 __field(dev_t, dev)
2261 __field(void *, dop)
2262 __field(bool, committed)
2263 __field(bool, low)
2264 __field(int, error)
2265 ),
2266 TP_fast_assign(
2267 __entry->dev = mp ? mp->m_super->s_dev : 0;
2268 __entry->dop = dop;
2269 __entry->committed = dop->dop_committed;
2270 __entry->low = dop->dop_low;
2271 __entry->error = error;
2272 ),
2273 TP_printk("dev %d:%d ops %p committed %d low %d err %d\n",
2274 MAJOR(__entry->dev), MINOR(__entry->dev),
2275 __entry->dop,
2276 __entry->committed,
2277 __entry->low,
2278 __entry->error)
2279)
2280#define DEFINE_DEFER_ERROR_EVENT(name) \
2281DEFINE_EVENT(xfs_defer_error_class, name, \
2282 TP_PROTO(struct xfs_mount *mp, struct xfs_defer_ops *dop, int error), \
2283 TP_ARGS(mp, dop, error))
2284
2285DECLARE_EVENT_CLASS(xfs_defer_pending_class,
2286 TP_PROTO(struct xfs_mount *mp, struct xfs_defer_pending *dfp),
2287 TP_ARGS(mp, dfp),
2288 TP_STRUCT__entry(
2289 __field(dev_t, dev)
2290 __field(int, type)
2291 __field(void *, intent)
2292 __field(bool, committed)
2293 __field(int, nr)
2294 ),
2295 TP_fast_assign(
2296 __entry->dev = mp ? mp->m_super->s_dev : 0;
2297 __entry->type = dfp->dfp_type->type;
2298 __entry->intent = dfp->dfp_intent;
2299 __entry->committed = dfp->dfp_committed;
2300 __entry->nr = dfp->dfp_count;
2301 ),
2302 TP_printk("dev %d:%d optype %d intent %p committed %d nr %d\n",
2303 MAJOR(__entry->dev), MINOR(__entry->dev),
2304 __entry->type,
2305 __entry->intent,
2306 __entry->committed,
2307 __entry->nr)
2308)
2309#define DEFINE_DEFER_PENDING_EVENT(name) \
2310DEFINE_EVENT(xfs_defer_pending_class, name, \
2311 TP_PROTO(struct xfs_mount *mp, struct xfs_defer_pending *dfp), \
2312 TP_ARGS(mp, dfp))
2313
2314DECLARE_EVENT_CLASS(xfs_phys_extent_deferred_class,
2315 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
2316 int type, xfs_agblock_t agbno, xfs_extlen_t len),
2317 TP_ARGS(mp, agno, type, agbno, len),
2318 TP_STRUCT__entry(
2319 __field(dev_t, dev)
2320 __field(xfs_agnumber_t, agno)
2321 __field(int, type)
2322 __field(xfs_agblock_t, agbno)
2323 __field(xfs_extlen_t, len)
2324 ),
2325 TP_fast_assign(
2326 __entry->dev = mp->m_super->s_dev;
2327 __entry->agno = agno;
2328 __entry->type = type;
2329 __entry->agbno = agbno;
2330 __entry->len = len;
2331 ),
2332 TP_printk("dev %d:%d op %d agno %u agbno %u len %u",
2333 MAJOR(__entry->dev), MINOR(__entry->dev),
2334 __entry->type,
2335 __entry->agno,
2336 __entry->agbno,
2337 __entry->len)
2338);
2339#define DEFINE_PHYS_EXTENT_DEFERRED_EVENT(name) \
2340DEFINE_EVENT(xfs_phys_extent_deferred_class, name, \
2341 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
2342 int type, \
2343 xfs_agblock_t bno, \
2344 xfs_extlen_t len), \
2345 TP_ARGS(mp, agno, type, bno, len))
2346
2347DECLARE_EVENT_CLASS(xfs_map_extent_deferred_class,
2348 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
2349 int op,
2350 xfs_agblock_t agbno,
2351 xfs_ino_t ino,
2352 int whichfork,
2353 xfs_fileoff_t offset,
2354 xfs_filblks_t len,
2355 xfs_exntst_t state),
2356 TP_ARGS(mp, agno, op, agbno, ino, whichfork, offset, len, state),
2357 TP_STRUCT__entry(
2358 __field(dev_t, dev)
2359 __field(xfs_agnumber_t, agno)
2360 __field(xfs_ino_t, ino)
2361 __field(xfs_agblock_t, agbno)
2362 __field(int, whichfork)
2363 __field(xfs_fileoff_t, l_loff)
2364 __field(xfs_filblks_t, l_len)
2365 __field(xfs_exntst_t, l_state)
2366 __field(int, op)
2367 ),
2368 TP_fast_assign(
2369 __entry->dev = mp->m_super->s_dev;
2370 __entry->agno = agno;
2371 __entry->ino = ino;
2372 __entry->agbno = agbno;
2373 __entry->whichfork = whichfork;
2374 __entry->l_loff = offset;
2375 __entry->l_len = len;
2376 __entry->l_state = state;
2377 __entry->op = op;
2378 ),
2379 TP_printk("dev %d:%d op %d agno %u agbno %u owner %lld %s offset %llu len %llu state %d",
2380 MAJOR(__entry->dev), MINOR(__entry->dev),
2381 __entry->op,
2382 __entry->agno,
2383 __entry->agbno,
2384 __entry->ino,
2385 __entry->whichfork == XFS_ATTR_FORK ? "attr" : "data",
2386 __entry->l_loff,
2387 __entry->l_len,
2388 __entry->l_state)
2389);
2390#define DEFINE_MAP_EXTENT_DEFERRED_EVENT(name) \
2391DEFINE_EVENT(xfs_map_extent_deferred_class, name, \
2392 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
2393 int op, \
2394 xfs_agblock_t agbno, \
2395 xfs_ino_t ino, \
2396 int whichfork, \
2397 xfs_fileoff_t offset, \
2398 xfs_filblks_t len, \
2399 xfs_exntst_t state), \
2400 TP_ARGS(mp, agno, op, agbno, ino, whichfork, offset, len, state))
2401
2402DEFINE_DEFER_EVENT(xfs_defer_init);
2403DEFINE_DEFER_EVENT(xfs_defer_cancel);
2404DEFINE_DEFER_EVENT(xfs_defer_trans_roll);
2405DEFINE_DEFER_EVENT(xfs_defer_trans_abort);
2406DEFINE_DEFER_EVENT(xfs_defer_finish);
2407DEFINE_DEFER_EVENT(xfs_defer_finish_done);
2408
2409DEFINE_DEFER_ERROR_EVENT(xfs_defer_trans_roll_error);
2410DEFINE_DEFER_ERROR_EVENT(xfs_defer_finish_error);
2411DEFINE_DEFER_ERROR_EVENT(xfs_defer_op_finish_error);
2412
2413DEFINE_DEFER_PENDING_EVENT(xfs_defer_intake_work);
2414DEFINE_DEFER_PENDING_EVENT(xfs_defer_intake_cancel);
2415DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_commit);
2416DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_cancel);
2417DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_finish);
2418DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_abort);
2419
2420#define DEFINE_BMAP_FREE_DEFERRED_EVENT DEFINE_PHYS_EXTENT_DEFERRED_EVENT
2421DEFINE_BMAP_FREE_DEFERRED_EVENT(xfs_bmap_free_defer);
2422DEFINE_BMAP_FREE_DEFERRED_EVENT(xfs_bmap_free_deferred);
2423
2424/* rmap tracepoints */
2425DECLARE_EVENT_CLASS(xfs_rmap_class,
2426 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
2427 xfs_agblock_t agbno, xfs_extlen_t len, bool unwritten,
2428 struct xfs_owner_info *oinfo),
2429 TP_ARGS(mp, agno, agbno, len, unwritten, oinfo),
2430 TP_STRUCT__entry(
2431 __field(dev_t, dev)
2432 __field(xfs_agnumber_t, agno)
2433 __field(xfs_agblock_t, agbno)
2434 __field(xfs_extlen_t, len)
2435 __field(uint64_t, owner)
2436 __field(uint64_t, offset)
2437 __field(unsigned long, flags)
2438 ),
2439 TP_fast_assign(
2440 __entry->dev = mp->m_super->s_dev;
2441 __entry->agno = agno;
2442 __entry->agbno = agbno;
2443 __entry->len = len;
2444 __entry->owner = oinfo->oi_owner;
2445 __entry->offset = oinfo->oi_offset;
2446 __entry->flags = oinfo->oi_flags;
2447 if (unwritten)
2448 __entry->flags |= XFS_RMAP_UNWRITTEN;
2449 ),
2450 TP_printk("dev %d:%d agno %u agbno %u len %u owner %lld offset %llu flags 0x%lx",
2451 MAJOR(__entry->dev), MINOR(__entry->dev),
2452 __entry->agno,
2453 __entry->agbno,
2454 __entry->len,
2455 __entry->owner,
2456 __entry->offset,
2457 __entry->flags)
2458);
2459#define DEFINE_RMAP_EVENT(name) \
2460DEFINE_EVENT(xfs_rmap_class, name, \
2461 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
2462 xfs_agblock_t agbno, xfs_extlen_t len, bool unwritten, \
2463 struct xfs_owner_info *oinfo), \
2464 TP_ARGS(mp, agno, agbno, len, unwritten, oinfo))
2465
2466/* simple AG-based error/%ip tracepoint class */
2467DECLARE_EVENT_CLASS(xfs_ag_error_class,
2468 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int error,
2469 unsigned long caller_ip),
2470 TP_ARGS(mp, agno, error, caller_ip),
2471 TP_STRUCT__entry(
2472 __field(dev_t, dev)
2473 __field(xfs_agnumber_t, agno)
2474 __field(int, error)
2475 __field(unsigned long, caller_ip)
2476 ),
2477 TP_fast_assign(
2478 __entry->dev = mp->m_super->s_dev;
2479 __entry->agno = agno;
2480 __entry->error = error;
2481 __entry->caller_ip = caller_ip;
2482 ),
2483 TP_printk("dev %d:%d agno %u error %d caller %ps",
2484 MAJOR(__entry->dev), MINOR(__entry->dev),
2485 __entry->agno,
2486 __entry->error,
2487 (char *)__entry->caller_ip)
2488);
2489
2490#define DEFINE_AG_ERROR_EVENT(name) \
2491DEFINE_EVENT(xfs_ag_error_class, name, \
2492 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int error, \
2493 unsigned long caller_ip), \
2494 TP_ARGS(mp, agno, error, caller_ip))
2495
2496DEFINE_RMAP_EVENT(xfs_rmap_unmap);
2497DEFINE_RMAP_EVENT(xfs_rmap_unmap_done);
2498DEFINE_AG_ERROR_EVENT(xfs_rmap_unmap_error);
2499DEFINE_RMAP_EVENT(xfs_rmap_map);
2500DEFINE_RMAP_EVENT(xfs_rmap_map_done);
2501DEFINE_AG_ERROR_EVENT(xfs_rmap_map_error);
2502DEFINE_RMAP_EVENT(xfs_rmap_convert);
2503DEFINE_RMAP_EVENT(xfs_rmap_convert_done);
2504DEFINE_AG_ERROR_EVENT(xfs_rmap_convert_error);
2505DEFINE_AG_ERROR_EVENT(xfs_rmap_convert_state);
2506
2507DECLARE_EVENT_CLASS(xfs_rmapbt_class,
2508 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
2509 xfs_agblock_t agbno, xfs_extlen_t len,
2510 uint64_t owner, uint64_t offset, unsigned int flags),
2511 TP_ARGS(mp, agno, agbno, len, owner, offset, flags),
2512 TP_STRUCT__entry(
2513 __field(dev_t, dev)
2514 __field(xfs_agnumber_t, agno)
2515 __field(xfs_agblock_t, agbno)
2516 __field(xfs_extlen_t, len)
2517 __field(uint64_t, owner)
2518 __field(uint64_t, offset)
2519 __field(unsigned int, flags)
2520 ),
2521 TP_fast_assign(
2522 __entry->dev = mp->m_super->s_dev;
2523 __entry->agno = agno;
2524 __entry->agbno = agbno;
2525 __entry->len = len;
2526 __entry->owner = owner;
2527 __entry->offset = offset;
2528 __entry->flags = flags;
2529 ),
2530 TP_printk("dev %d:%d agno %u agbno %u len %u owner %lld offset %llu flags 0x%x",
2531 MAJOR(__entry->dev), MINOR(__entry->dev),
2532 __entry->agno,
2533 __entry->agbno,
2534 __entry->len,
2535 __entry->owner,
2536 __entry->offset,
2537 __entry->flags)
2538);
2539#define DEFINE_RMAPBT_EVENT(name) \
2540DEFINE_EVENT(xfs_rmapbt_class, name, \
2541 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
2542 xfs_agblock_t agbno, xfs_extlen_t len, \
2543 uint64_t owner, uint64_t offset, unsigned int flags), \
2544 TP_ARGS(mp, agno, agbno, len, owner, offset, flags))
2545
2546#define DEFINE_RMAP_DEFERRED_EVENT DEFINE_MAP_EXTENT_DEFERRED_EVENT
2547DEFINE_RMAP_DEFERRED_EVENT(xfs_rmap_defer);
2548DEFINE_RMAP_DEFERRED_EVENT(xfs_rmap_deferred);
2549
2550DEFINE_BUSY_EVENT(xfs_rmapbt_alloc_block);
2551DEFINE_BUSY_EVENT(xfs_rmapbt_free_block);
2552DEFINE_RMAPBT_EVENT(xfs_rmap_update);
2553DEFINE_RMAPBT_EVENT(xfs_rmap_insert);
2554DEFINE_RMAPBT_EVENT(xfs_rmap_delete);
2555DEFINE_AG_ERROR_EVENT(xfs_rmap_insert_error);
2556DEFINE_AG_ERROR_EVENT(xfs_rmap_delete_error);
2557DEFINE_AG_ERROR_EVENT(xfs_rmap_update_error);
2558DEFINE_RMAPBT_EVENT(xfs_rmap_lookup_le_range_result);
2559DEFINE_RMAPBT_EVENT(xfs_rmap_find_right_neighbor_result);
2560DEFINE_RMAPBT_EVENT(xfs_rmap_find_left_neighbor_result);
2561
2188#endif /* _TRACE_XFS_H */ 2562#endif /* _TRACE_XFS_H */
2189 2563
2190#undef TRACE_INCLUDE_PATH 2564#undef TRACE_INCLUDE_PATH
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 9b2b9fa89331..e2bf86aad33d 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -33,6 +33,9 @@ struct xfs_trans;
33struct xfs_trans_res; 33struct xfs_trans_res;
34struct xfs_dquot_acct; 34struct xfs_dquot_acct;
35struct xfs_busy_extent; 35struct xfs_busy_extent;
36struct xfs_rud_log_item;
37struct xfs_rui_log_item;
38struct xfs_btree_cur;
36 39
37typedef struct xfs_log_item { 40typedef struct xfs_log_item {
38 struct list_head li_ail; /* AIL pointers */ 41 struct list_head li_ail; /* AIL pointers */
@@ -210,17 +213,14 @@ void xfs_trans_ichgtime(struct xfs_trans *, struct xfs_inode *, int);
210void xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *, uint); 213void xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *, uint);
211void xfs_trans_log_buf(xfs_trans_t *, struct xfs_buf *, uint, uint); 214void xfs_trans_log_buf(xfs_trans_t *, struct xfs_buf *, uint, uint);
212void xfs_trans_log_inode(xfs_trans_t *, struct xfs_inode *, uint); 215void xfs_trans_log_inode(xfs_trans_t *, struct xfs_inode *, uint);
213struct xfs_efi_log_item *xfs_trans_get_efi(xfs_trans_t *, uint); 216
214void xfs_trans_log_efi_extent(xfs_trans_t *, 217void xfs_extent_free_init_defer_op(void);
215 struct xfs_efi_log_item *, 218struct xfs_efd_log_item *xfs_trans_get_efd(struct xfs_trans *,
216 xfs_fsblock_t,
217 xfs_extlen_t);
218struct xfs_efd_log_item *xfs_trans_get_efd(xfs_trans_t *,
219 struct xfs_efi_log_item *, 219 struct xfs_efi_log_item *,
220 uint); 220 uint);
221int xfs_trans_free_extent(struct xfs_trans *, 221int xfs_trans_free_extent(struct xfs_trans *,
222 struct xfs_efd_log_item *, xfs_fsblock_t, 222 struct xfs_efd_log_item *, xfs_fsblock_t,
223 xfs_extlen_t); 223 xfs_extlen_t, struct xfs_owner_info *);
224int xfs_trans_commit(struct xfs_trans *); 224int xfs_trans_commit(struct xfs_trans *);
225int __xfs_trans_roll(struct xfs_trans **, struct xfs_inode *, int *); 225int __xfs_trans_roll(struct xfs_trans **, struct xfs_inode *, int *);
226int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *); 226int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *);
@@ -236,4 +236,16 @@ void xfs_trans_buf_copy_type(struct xfs_buf *dst_bp,
236extern kmem_zone_t *xfs_trans_zone; 236extern kmem_zone_t *xfs_trans_zone;
237extern kmem_zone_t *xfs_log_item_desc_zone; 237extern kmem_zone_t *xfs_log_item_desc_zone;
238 238
239/* rmap updates */
240enum xfs_rmap_intent_type;
241
242void xfs_rmap_update_init_defer_op(void);
243struct xfs_rud_log_item *xfs_trans_get_rud(struct xfs_trans *tp,
244 struct xfs_rui_log_item *ruip);
245int xfs_trans_log_finish_rmap_update(struct xfs_trans *tp,
246 struct xfs_rud_log_item *rudp, enum xfs_rmap_intent_type type,
247 __uint64_t owner, int whichfork, xfs_fileoff_t startoff,
248 xfs_fsblock_t startblock, xfs_filblks_t blockcount,
249 xfs_exntst_t state, struct xfs_btree_cur **pcur);
250
239#endif /* __XFS_TRANS_H__ */ 251#endif /* __XFS_TRANS_H__ */
diff --git a/fs/xfs/xfs_trans_extfree.c b/fs/xfs/xfs_trans_extfree.c
index a96ae540eb62..459ddec137a4 100644
--- a/fs/xfs/xfs_trans_extfree.c
+++ b/fs/xfs/xfs_trans_extfree.c
@@ -21,66 +21,15 @@
21#include "xfs_format.h" 21#include "xfs_format.h"
22#include "xfs_log_format.h" 22#include "xfs_log_format.h"
23#include "xfs_trans_resv.h" 23#include "xfs_trans_resv.h"
24#include "xfs_bit.h"
24#include "xfs_mount.h" 25#include "xfs_mount.h"
26#include "xfs_defer.h"
25#include "xfs_trans.h" 27#include "xfs_trans.h"
26#include "xfs_trans_priv.h" 28#include "xfs_trans_priv.h"
27#include "xfs_extfree_item.h" 29#include "xfs_extfree_item.h"
28#include "xfs_alloc.h" 30#include "xfs_alloc.h"
29 31#include "xfs_bmap.h"
30/* 32#include "xfs_trace.h"
31 * This routine is called to allocate an "extent free intention"
32 * log item that will hold nextents worth of extents. The
33 * caller must use all nextents extents, because we are not
34 * flexible about this at all.
35 */
36xfs_efi_log_item_t *
37xfs_trans_get_efi(xfs_trans_t *tp,
38 uint nextents)
39{
40 xfs_efi_log_item_t *efip;
41
42 ASSERT(tp != NULL);
43 ASSERT(nextents > 0);
44
45 efip = xfs_efi_init(tp->t_mountp, nextents);
46 ASSERT(efip != NULL);
47
48 /*
49 * Get a log_item_desc to point at the new item.
50 */
51 xfs_trans_add_item(tp, &efip->efi_item);
52 return efip;
53}
54
55/*
56 * This routine is called to indicate that the described
57 * extent is to be logged as needing to be freed. It should
58 * be called once for each extent to be freed.
59 */
60void
61xfs_trans_log_efi_extent(xfs_trans_t *tp,
62 xfs_efi_log_item_t *efip,
63 xfs_fsblock_t start_block,
64 xfs_extlen_t ext_len)
65{
66 uint next_extent;
67 xfs_extent_t *extp;
68
69 tp->t_flags |= XFS_TRANS_DIRTY;
70 efip->efi_item.li_desc->lid_flags |= XFS_LID_DIRTY;
71
72 /*
73 * atomic_inc_return gives us the value after the increment;
74 * we want to use it as an array index so we need to subtract 1 from
75 * it.
76 */
77 next_extent = atomic_inc_return(&efip->efi_next_extent) - 1;
78 ASSERT(next_extent < efip->efi_format.efi_nextents);
79 extp = &(efip->efi_format.efi_extents[next_extent]);
80 extp->ext_start = start_block;
81 extp->ext_len = ext_len;
82}
83
84 33
85/* 34/*
86 * This routine is called to allocate an "extent free done" 35 * This routine is called to allocate an "extent free done"
@@ -88,12 +37,12 @@ xfs_trans_log_efi_extent(xfs_trans_t *tp,
88 * caller must use all nextents extents, because we are not 37 * caller must use all nextents extents, because we are not
89 * flexible about this at all. 38 * flexible about this at all.
90 */ 39 */
91xfs_efd_log_item_t * 40struct xfs_efd_log_item *
92xfs_trans_get_efd(xfs_trans_t *tp, 41xfs_trans_get_efd(struct xfs_trans *tp,
93 xfs_efi_log_item_t *efip, 42 struct xfs_efi_log_item *efip,
94 uint nextents) 43 uint nextents)
95{ 44{
96 xfs_efd_log_item_t *efdp; 45 struct xfs_efd_log_item *efdp;
97 46
98 ASSERT(tp != NULL); 47 ASSERT(tp != NULL);
99 ASSERT(nextents > 0); 48 ASSERT(nextents > 0);
@@ -118,13 +67,19 @@ xfs_trans_free_extent(
118 struct xfs_trans *tp, 67 struct xfs_trans *tp,
119 struct xfs_efd_log_item *efdp, 68 struct xfs_efd_log_item *efdp,
120 xfs_fsblock_t start_block, 69 xfs_fsblock_t start_block,
121 xfs_extlen_t ext_len) 70 xfs_extlen_t ext_len,
71 struct xfs_owner_info *oinfo)
122{ 72{
73 struct xfs_mount *mp = tp->t_mountp;
123 uint next_extent; 74 uint next_extent;
75 xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, start_block);
76 xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, start_block);
124 struct xfs_extent *extp; 77 struct xfs_extent *extp;
125 int error; 78 int error;
126 79
127 error = xfs_free_extent(tp, start_block, ext_len); 80 trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno, ext_len);
81
82 error = xfs_free_extent(tp, start_block, ext_len, oinfo);
128 83
129 /* 84 /*
130 * Mark the transaction dirty, even on error. This ensures the 85 * Mark the transaction dirty, even on error. This ensures the
@@ -145,3 +100,139 @@ xfs_trans_free_extent(
145 100
146 return error; 101 return error;
147} 102}
103
104/* Sort bmap items by AG. */
105static int
106xfs_extent_free_diff_items(
107 void *priv,
108 struct list_head *a,
109 struct list_head *b)
110{
111 struct xfs_mount *mp = priv;
112 struct xfs_extent_free_item *ra;
113 struct xfs_extent_free_item *rb;
114
115 ra = container_of(a, struct xfs_extent_free_item, xefi_list);
116 rb = container_of(b, struct xfs_extent_free_item, xefi_list);
117 return XFS_FSB_TO_AGNO(mp, ra->xefi_startblock) -
118 XFS_FSB_TO_AGNO(mp, rb->xefi_startblock);
119}
120
121/* Get an EFI. */
122STATIC void *
123xfs_extent_free_create_intent(
124 struct xfs_trans *tp,
125 unsigned int count)
126{
127 struct xfs_efi_log_item *efip;
128
129 ASSERT(tp != NULL);
130 ASSERT(count > 0);
131
132 efip = xfs_efi_init(tp->t_mountp, count);
133 ASSERT(efip != NULL);
134
135 /*
136 * Get a log_item_desc to point at the new item.
137 */
138 xfs_trans_add_item(tp, &efip->efi_item);
139 return efip;
140}
141
142/* Log a free extent to the intent item. */
143STATIC void
144xfs_extent_free_log_item(
145 struct xfs_trans *tp,
146 void *intent,
147 struct list_head *item)
148{
149 struct xfs_efi_log_item *efip = intent;
150 struct xfs_extent_free_item *free;
151 uint next_extent;
152 struct xfs_extent *extp;
153
154 free = container_of(item, struct xfs_extent_free_item, xefi_list);
155
156 tp->t_flags |= XFS_TRANS_DIRTY;
157 efip->efi_item.li_desc->lid_flags |= XFS_LID_DIRTY;
158
159 /*
160 * atomic_inc_return gives us the value after the increment;
161 * we want to use it as an array index so we need to subtract 1 from
162 * it.
163 */
164 next_extent = atomic_inc_return(&efip->efi_next_extent) - 1;
165 ASSERT(next_extent < efip->efi_format.efi_nextents);
166 extp = &efip->efi_format.efi_extents[next_extent];
167 extp->ext_start = free->xefi_startblock;
168 extp->ext_len = free->xefi_blockcount;
169}
170
171/* Get an EFD so we can process all the free extents. */
172STATIC void *
173xfs_extent_free_create_done(
174 struct xfs_trans *tp,
175 void *intent,
176 unsigned int count)
177{
178 return xfs_trans_get_efd(tp, intent, count);
179}
180
181/* Process a free extent. */
182STATIC int
183xfs_extent_free_finish_item(
184 struct xfs_trans *tp,
185 struct xfs_defer_ops *dop,
186 struct list_head *item,
187 void *done_item,
188 void **state)
189{
190 struct xfs_extent_free_item *free;
191 int error;
192
193 free = container_of(item, struct xfs_extent_free_item, xefi_list);
194 error = xfs_trans_free_extent(tp, done_item,
195 free->xefi_startblock,
196 free->xefi_blockcount,
197 &free->xefi_oinfo);
198 kmem_free(free);
199 return error;
200}
201
202/* Abort all pending EFIs. */
203STATIC void
204xfs_extent_free_abort_intent(
205 void *intent)
206{
207 xfs_efi_release(intent);
208}
209
210/* Cancel a free extent. */
211STATIC void
212xfs_extent_free_cancel_item(
213 struct list_head *item)
214{
215 struct xfs_extent_free_item *free;
216
217 free = container_of(item, struct xfs_extent_free_item, xefi_list);
218 kmem_free(free);
219}
220
221static const struct xfs_defer_op_type xfs_extent_free_defer_type = {
222 .type = XFS_DEFER_OPS_TYPE_FREE,
223 .max_items = XFS_EFI_MAX_FAST_EXTENTS,
224 .diff_items = xfs_extent_free_diff_items,
225 .create_intent = xfs_extent_free_create_intent,
226 .abort_intent = xfs_extent_free_abort_intent,
227 .log_item = xfs_extent_free_log_item,
228 .create_done = xfs_extent_free_create_done,
229 .finish_item = xfs_extent_free_finish_item,
230 .cancel_item = xfs_extent_free_cancel_item,
231};
232
233/* Register the deferred op type. */
234void
235xfs_extent_free_init_defer_op(void)
236{
237 xfs_defer_init_op_type(&xfs_extent_free_defer_type);
238}
diff --git a/fs/xfs/xfs_trans_rmap.c b/fs/xfs/xfs_trans_rmap.c
new file mode 100644
index 000000000000..5a50ef881568
--- /dev/null
+++ b/fs/xfs/xfs_trans_rmap.c
@@ -0,0 +1,271 @@
1/*
2 * Copyright (C) 2016 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#include "xfs.h"
21#include "xfs_fs.h"
22#include "xfs_shared.h"
23#include "xfs_format.h"
24#include "xfs_log_format.h"
25#include "xfs_trans_resv.h"
26#include "xfs_mount.h"
27#include "xfs_defer.h"
28#include "xfs_trans.h"
29#include "xfs_trans_priv.h"
30#include "xfs_rmap_item.h"
31#include "xfs_alloc.h"
32#include "xfs_rmap.h"
33
34/* Set the map extent flags for this reverse mapping. */
35static void
36xfs_trans_set_rmap_flags(
37 struct xfs_map_extent *rmap,
38 enum xfs_rmap_intent_type type,
39 int whichfork,
40 xfs_exntst_t state)
41{
42 rmap->me_flags = 0;
43 if (state == XFS_EXT_UNWRITTEN)
44 rmap->me_flags |= XFS_RMAP_EXTENT_UNWRITTEN;
45 if (whichfork == XFS_ATTR_FORK)
46 rmap->me_flags |= XFS_RMAP_EXTENT_ATTR_FORK;
47 switch (type) {
48 case XFS_RMAP_MAP:
49 rmap->me_flags |= XFS_RMAP_EXTENT_MAP;
50 break;
51 case XFS_RMAP_UNMAP:
52 rmap->me_flags |= XFS_RMAP_EXTENT_UNMAP;
53 break;
54 case XFS_RMAP_CONVERT:
55 rmap->me_flags |= XFS_RMAP_EXTENT_CONVERT;
56 break;
57 case XFS_RMAP_ALLOC:
58 rmap->me_flags |= XFS_RMAP_EXTENT_ALLOC;
59 break;
60 case XFS_RMAP_FREE:
61 rmap->me_flags |= XFS_RMAP_EXTENT_FREE;
62 break;
63 default:
64 ASSERT(0);
65 }
66}
67
68struct xfs_rud_log_item *
69xfs_trans_get_rud(
70 struct xfs_trans *tp,
71 struct xfs_rui_log_item *ruip)
72{
73 struct xfs_rud_log_item *rudp;
74
75 rudp = xfs_rud_init(tp->t_mountp, ruip);
76 xfs_trans_add_item(tp, &rudp->rud_item);
77 return rudp;
78}
79
80/*
81 * Finish an rmap update and log it to the RUD. Note that the transaction is
82 * marked dirty regardless of whether the rmap update succeeds or fails to
83 * support the RUI/RUD lifecycle rules.
84 */
85int
86xfs_trans_log_finish_rmap_update(
87 struct xfs_trans *tp,
88 struct xfs_rud_log_item *rudp,
89 enum xfs_rmap_intent_type type,
90 __uint64_t owner,
91 int whichfork,
92 xfs_fileoff_t startoff,
93 xfs_fsblock_t startblock,
94 xfs_filblks_t blockcount,
95 xfs_exntst_t state,
96 struct xfs_btree_cur **pcur)
97{
98 int error;
99
100 error = xfs_rmap_finish_one(tp, type, owner, whichfork, startoff,
101 startblock, blockcount, state, pcur);
102
103 /*
104 * Mark the transaction dirty, even on error. This ensures the
105 * transaction is aborted, which:
106 *
107 * 1.) releases the RUI and frees the RUD
108 * 2.) shuts down the filesystem
109 */
110 tp->t_flags |= XFS_TRANS_DIRTY;
111 rudp->rud_item.li_desc->lid_flags |= XFS_LID_DIRTY;
112
113 return error;
114}
115
116/* Sort rmap intents by AG. */
117static int
118xfs_rmap_update_diff_items(
119 void *priv,
120 struct list_head *a,
121 struct list_head *b)
122{
123 struct xfs_mount *mp = priv;
124 struct xfs_rmap_intent *ra;
125 struct xfs_rmap_intent *rb;
126
127 ra = container_of(a, struct xfs_rmap_intent, ri_list);
128 rb = container_of(b, struct xfs_rmap_intent, ri_list);
129 return XFS_FSB_TO_AGNO(mp, ra->ri_bmap.br_startblock) -
130 XFS_FSB_TO_AGNO(mp, rb->ri_bmap.br_startblock);
131}
132
133/* Get an RUI. */
134STATIC void *
135xfs_rmap_update_create_intent(
136 struct xfs_trans *tp,
137 unsigned int count)
138{
139 struct xfs_rui_log_item *ruip;
140
141 ASSERT(tp != NULL);
142 ASSERT(count > 0);
143
144 ruip = xfs_rui_init(tp->t_mountp, count);
145 ASSERT(ruip != NULL);
146
147 /*
148 * Get a log_item_desc to point at the new item.
149 */
150 xfs_trans_add_item(tp, &ruip->rui_item);
151 return ruip;
152}
153
154/* Log rmap updates in the intent item. */
155STATIC void
156xfs_rmap_update_log_item(
157 struct xfs_trans *tp,
158 void *intent,
159 struct list_head *item)
160{
161 struct xfs_rui_log_item *ruip = intent;
162 struct xfs_rmap_intent *rmap;
163 uint next_extent;
164 struct xfs_map_extent *map;
165
166 rmap = container_of(item, struct xfs_rmap_intent, ri_list);
167
168 tp->t_flags |= XFS_TRANS_DIRTY;
169 ruip->rui_item.li_desc->lid_flags |= XFS_LID_DIRTY;
170
171 /*
172 * atomic_inc_return gives us the value after the increment;
173 * we want to use it as an array index so we need to subtract 1 from
174 * it.
175 */
176 next_extent = atomic_inc_return(&ruip->rui_next_extent) - 1;
177 ASSERT(next_extent < ruip->rui_format.rui_nextents);
178 map = &ruip->rui_format.rui_extents[next_extent];
179 map->me_owner = rmap->ri_owner;
180 map->me_startblock = rmap->ri_bmap.br_startblock;
181 map->me_startoff = rmap->ri_bmap.br_startoff;
182 map->me_len = rmap->ri_bmap.br_blockcount;
183 xfs_trans_set_rmap_flags(map, rmap->ri_type, rmap->ri_whichfork,
184 rmap->ri_bmap.br_state);
185}
186
187/* Get an RUD so we can process all the deferred rmap updates. */
188STATIC void *
189xfs_rmap_update_create_done(
190 struct xfs_trans *tp,
191 void *intent,
192 unsigned int count)
193{
194 return xfs_trans_get_rud(tp, intent);
195}
196
197/* Process a deferred rmap update. */
198STATIC int
199xfs_rmap_update_finish_item(
200 struct xfs_trans *tp,
201 struct xfs_defer_ops *dop,
202 struct list_head *item,
203 void *done_item,
204 void **state)
205{
206 struct xfs_rmap_intent *rmap;
207 int error;
208
209 rmap = container_of(item, struct xfs_rmap_intent, ri_list);
210 error = xfs_trans_log_finish_rmap_update(tp, done_item,
211 rmap->ri_type,
212 rmap->ri_owner, rmap->ri_whichfork,
213 rmap->ri_bmap.br_startoff,
214 rmap->ri_bmap.br_startblock,
215 rmap->ri_bmap.br_blockcount,
216 rmap->ri_bmap.br_state,
217 (struct xfs_btree_cur **)state);
218 kmem_free(rmap);
219 return error;
220}
221
222/* Clean up after processing deferred rmaps. */
223STATIC void
224xfs_rmap_update_finish_cleanup(
225 struct xfs_trans *tp,
226 void *state,
227 int error)
228{
229 struct xfs_btree_cur *rcur = state;
230
231 xfs_rmap_finish_one_cleanup(tp, rcur, error);
232}
233
234/* Abort all pending RUIs. */
235STATIC void
236xfs_rmap_update_abort_intent(
237 void *intent)
238{
239 xfs_rui_release(intent);
240}
241
242/* Cancel a deferred rmap update. */
243STATIC void
244xfs_rmap_update_cancel_item(
245 struct list_head *item)
246{
247 struct xfs_rmap_intent *rmap;
248
249 rmap = container_of(item, struct xfs_rmap_intent, ri_list);
250 kmem_free(rmap);
251}
252
253static const struct xfs_defer_op_type xfs_rmap_update_defer_type = {
254 .type = XFS_DEFER_OPS_TYPE_RMAP,
255 .max_items = XFS_RUI_MAX_FAST_EXTENTS,
256 .diff_items = xfs_rmap_update_diff_items,
257 .create_intent = xfs_rmap_update_create_intent,
258 .abort_intent = xfs_rmap_update_abort_intent,
259 .log_item = xfs_rmap_update_log_item,
260 .create_done = xfs_rmap_update_create_done,
261 .finish_item = xfs_rmap_update_finish_item,
262 .finish_cleanup = xfs_rmap_update_finish_cleanup,
263 .cancel_item = xfs_rmap_update_cancel_item,
264};
265
266/* Register the deferred op type. */
267void
268xfs_rmap_update_init_defer_op(void)
269{
270 xfs_defer_init_op_type(&xfs_rmap_update_defer_type);
271}