aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-08-06 09:50:36 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-08-06 09:50:36 -0400
commit0cbbc422d56668528f6efd1234fe908010284082 (patch)
treed4bebf90c29044b4a6180053fc18f9e927361012 /fs/xfs
parent835c92d43b29eb354abdbd5475308a474d7efdfa (diff)
parent3481b68285238054be519ad0c8cad5cc2425e26c (diff)
Merge tag 'xfs-rmap-for-linus-4.8-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs
Pull more xfs updates from Dave Chinner: "This is the second part of the XFS updates for this merge cycle, and contains the new reverse block mapping feature for XFS. Reverse mapping allows us to track the owner of a specific block on disk precisely. It is implemented as a set of btrees (one per allocation group) that track the owners of allocated extents. Effectively it is a "used space tree" that is updated when we allocate or free extents. i.e. it is coherent with the free space btrees we already maintain and never overlaps with them. This reverse mapping infrastructure is the building block of several upcoming features - reflink, copy-on-write data, dedupe, online metadata and data scrubbing, highly accurate bad sector/data loss reporting to users, and significantly improved reconstruction of damaged and corrupted filesystems. There's a lot of new stuff coming along in the next couple of cycles,a nd it all builds in the rmap infrastructure. As such, it's a huge chunk of new code with new on-disk format features and internal infrastructure. It warns at mount time as an experimental feature and that it may eat data (as we do with all new on-disk features until they stabilise). We have not released userspace suport for it yet - userspace support currently requires download from Darrick's xfsprogs repo and build from source, so the access to this feature is really developer/tester only at this point. Initial userspace support will be released at the same time kernel with this code in it is released. The new rmap enabled code regresses 3 xfstests - all are ENOSPC related corner cases, one of which Darrick posted a fix for a few hours ago. The other two are fixed by infrastructure that is part of the upcoming reflink patchset. This new ENOSPC infrastructure requires a on-disk format tweak required to keep mount times in check - we need to keep an on-disk count of allocated rmapbt blocks so we don't have to scan the entire btrees at mount time to count them. This is currently being tested and will be part of the fixes sent in the next week or two so users will not be exposed to this change" * tag 'xfs-rmap-for-linus-4.8-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs: (52 commits) xfs: move (and rename) the deferred bmap-free tracepoints xfs: collapse single use static functions xfs: remove unnecessary parentheses from log redo item recovery functions xfs: remove the extents array from the rmap update done log item xfs: in btree_lshift, only allocate temporary cursor when needed xfs: remove unnecesary lshift/rshift key initialization xfs: remove the get*keys and update_keys btree ops pointers xfs: enable the rmap btree functionality xfs: don't update rmapbt when fixing agfl xfs: disable XFS_IOC_SWAPEXT when rmap btree is enabled xfs: add rmap btree block detection to log recovery xfs: add rmap btree geometry feature flag xfs: propagate bmap updates to rmapbt xfs: enable the xfs_defer mechanism to process rmaps to update xfs: log rmap intent items xfs: create rmap update intent log items xfs: add rmap btree insert and delete helpers xfs: convert unwritten status of reverse mappings xfs: remove an extent from the rmap btree xfs: add an extent to the rmap btree ...
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/Makefile5
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c149
-rw-r--r--fs/xfs/libxfs/xfs_alloc.h52
-rw-r--r--fs/xfs/libxfs/xfs_alloc_btree.c12
-rw-r--r--fs/xfs/libxfs/xfs_attr.c71
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c4
-rw-r--r--fs/xfs/libxfs/xfs_attr_remote.c19
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c241
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h54
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.c32
-rw-r--r--fs/xfs/libxfs/xfs_btree.c914
-rw-r--r--fs/xfs/libxfs/xfs_btree.h88
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.c6
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.h4
-rw-r--r--fs/xfs/libxfs/xfs_da_format.h1
-rw-r--r--fs/xfs/libxfs/xfs_defer.c463
-rw-r--r--fs/xfs/libxfs/xfs_defer.h97
-rw-r--r--fs/xfs/libxfs/xfs_dir2.c15
-rw-r--r--fs/xfs/libxfs/xfs_dir2.h8
-rw-r--r--fs/xfs/libxfs/xfs_format.h131
-rw-r--r--fs/xfs/libxfs/xfs_fs.h1
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c23
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.h2
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.c18
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c1
-rw-r--r--fs/xfs/libxfs/xfs_log_format.h63
-rw-r--r--fs/xfs/libxfs/xfs_rmap.c1399
-rw-r--r--fs/xfs/libxfs/xfs_rmap.h209
-rw-r--r--fs/xfs/libxfs/xfs_rmap_btree.c511
-rw-r--r--fs/xfs/libxfs/xfs_rmap_btree.h61
-rw-r--r--fs/xfs/libxfs/xfs_sb.c9
-rw-r--r--fs/xfs/libxfs/xfs_shared.h2
-rw-r--r--fs/xfs/libxfs/xfs_trans_resv.c62
-rw-r--r--fs/xfs/libxfs/xfs_trans_resv.h10
-rw-r--r--fs/xfs/libxfs/xfs_types.h4
-rw-r--r--fs/xfs/xfs_bmap_util.c139
-rw-r--r--fs/xfs/xfs_bmap_util.h4
-rw-r--r--fs/xfs/xfs_discard.c2
-rw-r--r--fs/xfs/xfs_dquot.c13
-rw-r--r--fs/xfs/xfs_error.h6
-rw-r--r--fs/xfs/xfs_extfree_item.c69
-rw-r--r--fs/xfs/xfs_extfree_item.h3
-rw-r--r--fs/xfs/xfs_filestream.c3
-rw-r--r--fs/xfs/xfs_fsops.c106
-rw-r--r--fs/xfs/xfs_inode.c99
-rw-r--r--fs/xfs/xfs_inode.h4
-rw-r--r--fs/xfs/xfs_ioctl.c6
-rw-r--r--fs/xfs/xfs_iomap.c31
-rw-r--r--fs/xfs/xfs_log_recover.c336
-rw-r--r--fs/xfs/xfs_mount.c7
-rw-r--r--fs/xfs/xfs_mount.h6
-rw-r--r--fs/xfs/xfs_ondisk.h3
-rw-r--r--fs/xfs/xfs_rmap_item.c536
-rw-r--r--fs/xfs/xfs_rmap_item.h95
-rw-r--r--fs/xfs/xfs_rtalloc.c11
-rw-r--r--fs/xfs/xfs_stats.c1
-rw-r--r--fs/xfs/xfs_stats.h18
-rw-r--r--fs/xfs/xfs_super.c30
-rw-r--r--fs/xfs/xfs_symlink.c25
-rw-r--r--fs/xfs/xfs_trace.c2
-rw-r--r--fs/xfs/xfs_trace.h374
-rw-r--r--fs/xfs/xfs_trans.h26
-rw-r--r--fs/xfs/xfs_trans_extfree.c215
-rw-r--r--fs/xfs/xfs_trans_rmap.c271
64 files changed, 6267 insertions, 915 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 52c288514be1..fc593c869493 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -39,6 +39,7 @@ xfs-y += $(addprefix libxfs/, \
39 xfs_btree.o \ 39 xfs_btree.o \
40 xfs_da_btree.o \ 40 xfs_da_btree.o \
41 xfs_da_format.o \ 41 xfs_da_format.o \
42 xfs_defer.o \
42 xfs_dir2.o \ 43 xfs_dir2.o \
43 xfs_dir2_block.o \ 44 xfs_dir2_block.o \
44 xfs_dir2_data.o \ 45 xfs_dir2_data.o \
@@ -51,6 +52,8 @@ xfs-y += $(addprefix libxfs/, \
51 xfs_inode_fork.o \ 52 xfs_inode_fork.o \
52 xfs_inode_buf.o \ 53 xfs_inode_buf.o \
53 xfs_log_rlimit.o \ 54 xfs_log_rlimit.o \
55 xfs_rmap.o \
56 xfs_rmap_btree.o \
54 xfs_sb.o \ 57 xfs_sb.o \
55 xfs_symlink_remote.o \ 58 xfs_symlink_remote.o \
56 xfs_trans_resv.o \ 59 xfs_trans_resv.o \
@@ -100,11 +103,13 @@ xfs-y += xfs_log.o \
100 xfs_extfree_item.o \ 103 xfs_extfree_item.o \
101 xfs_icreate_item.o \ 104 xfs_icreate_item.o \
102 xfs_inode_item.o \ 105 xfs_inode_item.o \
106 xfs_rmap_item.o \
103 xfs_log_recover.o \ 107 xfs_log_recover.o \
104 xfs_trans_ail.o \ 108 xfs_trans_ail.o \
105 xfs_trans_buf.o \ 109 xfs_trans_buf.o \
106 xfs_trans_extfree.o \ 110 xfs_trans_extfree.o \
107 xfs_trans_inode.o \ 111 xfs_trans_inode.o \
112 xfs_trans_rmap.o \
108 113
109# optional features 114# optional features
110xfs-$(CONFIG_XFS_QUOTA) += xfs_dquot.o \ 115xfs-$(CONFIG_XFS_QUOTA) += xfs_dquot.o \
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 88c26b827a2d..776ae2f325d1 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -24,8 +24,10 @@
24#include "xfs_bit.h" 24#include "xfs_bit.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_mount.h" 26#include "xfs_mount.h"
27#include "xfs_defer.h"
27#include "xfs_inode.h" 28#include "xfs_inode.h"
28#include "xfs_btree.h" 29#include "xfs_btree.h"
30#include "xfs_rmap.h"
29#include "xfs_alloc_btree.h" 31#include "xfs_alloc_btree.h"
30#include "xfs_alloc.h" 32#include "xfs_alloc.h"
31#include "xfs_extent_busy.h" 33#include "xfs_extent_busy.h"
@@ -49,6 +51,81 @@ STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
49STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, 51STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
50 xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); 52 xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
51 53
54xfs_extlen_t
55xfs_prealloc_blocks(
56 struct xfs_mount *mp)
57{
58 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
59 return XFS_RMAP_BLOCK(mp) + 1;
60 if (xfs_sb_version_hasfinobt(&mp->m_sb))
61 return XFS_FIBT_BLOCK(mp) + 1;
62 return XFS_IBT_BLOCK(mp) + 1;
63}
64
65/*
66 * In order to avoid ENOSPC-related deadlock caused by out-of-order locking of
67 * AGF buffer (PV 947395), we place constraints on the relationship among
68 * actual allocations for data blocks, freelist blocks, and potential file data
69 * bmap btree blocks. However, these restrictions may result in no actual space
70 * allocated for a delayed extent, for example, a data block in a certain AG is
71 * allocated but there is no additional block for the additional bmap btree
72 * block due to a split of the bmap btree of the file. The result of this may
73 * lead to an infinite loop when the file gets flushed to disk and all delayed
74 * extents need to be actually allocated. To get around this, we explicitly set
75 * aside a few blocks which will not be reserved in delayed allocation.
76 *
77 * When rmap is disabled, we need to reserve 4 fsbs _per AG_ for the freelist
78 * and 4 more to handle a potential split of the file's bmap btree.
79 *
80 * When rmap is enabled, we must also be able to handle two rmap btree inserts
81 * to record both the file data extent and a new bmbt block. The bmbt block
82 * might not be in the same AG as the file data extent. In the worst case
83 * the bmap btree splits multiple levels and all the new blocks come from
84 * different AGs, so set aside enough to handle rmap btree splits in all AGs.
85 */
86unsigned int
87xfs_alloc_set_aside(
88 struct xfs_mount *mp)
89{
90 unsigned int blocks;
91
92 blocks = 4 + (mp->m_sb.sb_agcount * XFS_ALLOC_AGFL_RESERVE);
93 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
94 blocks += mp->m_sb.sb_agcount * mp->m_rmap_maxlevels;
95 return blocks;
96}
97
98/*
99 * When deciding how much space to allocate out of an AG, we limit the
100 * allocation maximum size to the size the AG. However, we cannot use all the
101 * blocks in the AG - some are permanently used by metadata. These
102 * blocks are generally:
103 * - the AG superblock, AGF, AGI and AGFL
104 * - the AGF (bno and cnt) and AGI btree root blocks, and optionally
105 * the AGI free inode and rmap btree root blocks.
106 * - blocks on the AGFL according to xfs_alloc_set_aside() limits
107 * - the rmapbt root block
108 *
109 * The AG headers are sector sized, so the amount of space they take up is
110 * dependent on filesystem geometry. The others are all single blocks.
111 */
112unsigned int
113xfs_alloc_ag_max_usable(
114 struct xfs_mount *mp)
115{
116 unsigned int blocks;
117
118 blocks = XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)); /* ag headers */
119 blocks += XFS_ALLOC_AGFL_RESERVE;
120 blocks += 3; /* AGF, AGI btree root blocks */
121 if (xfs_sb_version_hasfinobt(&mp->m_sb))
122 blocks++; /* finobt root block */
123 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
124 blocks++; /* rmap root block */
125
126 return mp->m_sb.sb_agblocks - blocks;
127}
128
52/* 129/*
53 * Lookup the record equal to [bno, len] in the btree given by cur. 130 * Lookup the record equal to [bno, len] in the btree given by cur.
54 */ 131 */
@@ -636,6 +713,14 @@ xfs_alloc_ag_vextent(
636 ASSERT(!args->wasfromfl || !args->isfl); 713 ASSERT(!args->wasfromfl || !args->isfl);
637 ASSERT(args->agbno % args->alignment == 0); 714 ASSERT(args->agbno % args->alignment == 0);
638 715
716 /* if not file data, insert new block into the reverse map btree */
717 if (args->oinfo.oi_owner != XFS_RMAP_OWN_UNKNOWN) {
718 error = xfs_rmap_alloc(args->tp, args->agbp, args->agno,
719 args->agbno, args->len, &args->oinfo);
720 if (error)
721 return error;
722 }
723
639 if (!args->wasfromfl) { 724 if (!args->wasfromfl) {
640 error = xfs_alloc_update_counters(args->tp, args->pag, 725 error = xfs_alloc_update_counters(args->tp, args->pag,
641 args->agbp, 726 args->agbp,
@@ -1577,14 +1662,15 @@ error0:
1577/* 1662/*
1578 * Free the extent starting at agno/bno for length. 1663 * Free the extent starting at agno/bno for length.
1579 */ 1664 */
1580STATIC int /* error */ 1665STATIC int
1581xfs_free_ag_extent( 1666xfs_free_ag_extent(
1582 xfs_trans_t *tp, /* transaction pointer */ 1667 xfs_trans_t *tp,
1583 xfs_buf_t *agbp, /* buffer for a.g. freelist header */ 1668 xfs_buf_t *agbp,
1584 xfs_agnumber_t agno, /* allocation group number */ 1669 xfs_agnumber_t agno,
1585 xfs_agblock_t bno, /* starting block number */ 1670 xfs_agblock_t bno,
1586 xfs_extlen_t len, /* length of extent */ 1671 xfs_extlen_t len,
1587 int isfl) /* set if is freelist blocks - no sb acctg */ 1672 struct xfs_owner_info *oinfo,
1673 int isfl)
1588{ 1674{
1589 xfs_btree_cur_t *bno_cur; /* cursor for by-block btree */ 1675 xfs_btree_cur_t *bno_cur; /* cursor for by-block btree */
1590 xfs_btree_cur_t *cnt_cur; /* cursor for by-size btree */ 1676 xfs_btree_cur_t *cnt_cur; /* cursor for by-size btree */
@@ -1601,12 +1687,19 @@ xfs_free_ag_extent(
1601 xfs_extlen_t nlen; /* new length of freespace */ 1687 xfs_extlen_t nlen; /* new length of freespace */
1602 xfs_perag_t *pag; /* per allocation group data */ 1688 xfs_perag_t *pag; /* per allocation group data */
1603 1689
1690 bno_cur = cnt_cur = NULL;
1604 mp = tp->t_mountp; 1691 mp = tp->t_mountp;
1692
1693 if (oinfo->oi_owner != XFS_RMAP_OWN_UNKNOWN) {
1694 error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo);
1695 if (error)
1696 goto error0;
1697 }
1698
1605 /* 1699 /*
1606 * Allocate and initialize a cursor for the by-block btree. 1700 * Allocate and initialize a cursor for the by-block btree.
1607 */ 1701 */
1608 bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO); 1702 bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO);
1609 cnt_cur = NULL;
1610 /* 1703 /*
1611 * Look for a neighboring block on the left (lower block numbers) 1704 * Look for a neighboring block on the left (lower block numbers)
1612 * that is contiguous with this space. 1705 * that is contiguous with this space.
@@ -1875,6 +1968,11 @@ xfs_alloc_min_freelist(
1875 /* space needed by-size freespace btree */ 1968 /* space needed by-size freespace btree */
1876 min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_CNTi] + 1, 1969 min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_CNTi] + 1,
1877 mp->m_ag_maxlevels); 1970 mp->m_ag_maxlevels);
1971 /* space needed reverse mapping used space btree */
1972 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
1973 min_free += min_t(unsigned int,
1974 pag->pagf_levels[XFS_BTNUM_RMAPi] + 1,
1975 mp->m_rmap_maxlevels);
1878 1976
1879 return min_free; 1977 return min_free;
1880} 1978}
@@ -1992,21 +2090,34 @@ xfs_alloc_fix_freelist(
1992 * anything other than extra overhead when we need to put more blocks 2090 * anything other than extra overhead when we need to put more blocks
1993 * back on the free list? Maybe we should only do this when space is 2091 * back on the free list? Maybe we should only do this when space is
1994 * getting low or the AGFL is more than half full? 2092 * getting low or the AGFL is more than half full?
2093 *
2094 * The NOSHRINK flag prevents the AGFL from being shrunk if it's too
2095 * big; the NORMAP flag prevents AGFL expand/shrink operations from
2096 * updating the rmapbt. Both flags are used in xfs_repair while we're
2097 * rebuilding the rmapbt, and neither are used by the kernel. They're
2098 * both required to ensure that rmaps are correctly recorded for the
2099 * regenerated AGFL, bnobt, and cntbt. See repair/phase5.c and
2100 * repair/rmap.c in xfsprogs for details.
1995 */ 2101 */
1996 while (pag->pagf_flcount > need) { 2102 memset(&targs, 0, sizeof(targs));
2103 if (flags & XFS_ALLOC_FLAG_NORMAP)
2104 xfs_rmap_skip_owner_update(&targs.oinfo);
2105 else
2106 xfs_rmap_ag_owner(&targs.oinfo, XFS_RMAP_OWN_AG);
2107 while (!(flags & XFS_ALLOC_FLAG_NOSHRINK) && pag->pagf_flcount > need) {
1997 struct xfs_buf *bp; 2108 struct xfs_buf *bp;
1998 2109
1999 error = xfs_alloc_get_freelist(tp, agbp, &bno, 0); 2110 error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
2000 if (error) 2111 if (error)
2001 goto out_agbp_relse; 2112 goto out_agbp_relse;
2002 error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1); 2113 error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1,
2114 &targs.oinfo, 1);
2003 if (error) 2115 if (error)
2004 goto out_agbp_relse; 2116 goto out_agbp_relse;
2005 bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0); 2117 bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
2006 xfs_trans_binval(tp, bp); 2118 xfs_trans_binval(tp, bp);
2007 } 2119 }
2008 2120
2009 memset(&targs, 0, sizeof(targs));
2010 targs.tp = tp; 2121 targs.tp = tp;
2011 targs.mp = mp; 2122 targs.mp = mp;
2012 targs.agbp = agbp; 2123 targs.agbp = agbp;
@@ -2271,6 +2382,10 @@ xfs_agf_verify(
2271 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS) 2382 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS)
2272 return false; 2383 return false;
2273 2384
2385 if (xfs_sb_version_hasrmapbt(&mp->m_sb) &&
2386 be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS)
2387 return false;
2388
2274 /* 2389 /*
2275 * during growfs operations, the perag is not fully initialised, 2390 * during growfs operations, the perag is not fully initialised,
2276 * so we can't use it for any useful checking. growfs ensures we can't 2391 * so we can't use it for any useful checking. growfs ensures we can't
@@ -2402,6 +2517,8 @@ xfs_alloc_read_agf(
2402 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]); 2517 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
2403 pag->pagf_levels[XFS_BTNUM_CNTi] = 2518 pag->pagf_levels[XFS_BTNUM_CNTi] =
2404 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]); 2519 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
2520 pag->pagf_levels[XFS_BTNUM_RMAPi] =
2521 be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
2405 spin_lock_init(&pag->pagb_lock); 2522 spin_lock_init(&pag->pagb_lock);
2406 pag->pagb_count = 0; 2523 pag->pagb_count = 0;
2407 pag->pagb_tree = RB_ROOT; 2524 pag->pagb_tree = RB_ROOT;
@@ -2691,7 +2808,8 @@ int /* error */
2691xfs_free_extent( 2808xfs_free_extent(
2692 struct xfs_trans *tp, /* transaction pointer */ 2809 struct xfs_trans *tp, /* transaction pointer */
2693 xfs_fsblock_t bno, /* starting block number of extent */ 2810 xfs_fsblock_t bno, /* starting block number of extent */
2694 xfs_extlen_t len) /* length of extent */ 2811 xfs_extlen_t len, /* length of extent */
2812 struct xfs_owner_info *oinfo) /* extent owner */
2695{ 2813{
2696 struct xfs_mount *mp = tp->t_mountp; 2814 struct xfs_mount *mp = tp->t_mountp;
2697 struct xfs_buf *agbp; 2815 struct xfs_buf *agbp;
@@ -2701,6 +2819,11 @@ xfs_free_extent(
2701 2819
2702 ASSERT(len != 0); 2820 ASSERT(len != 0);
2703 2821
2822 if (XFS_TEST_ERROR(false, mp,
2823 XFS_ERRTAG_FREE_EXTENT,
2824 XFS_RANDOM_FREE_EXTENT))
2825 return -EIO;
2826
2704 error = xfs_free_extent_fix_freelist(tp, agno, &agbp); 2827 error = xfs_free_extent_fix_freelist(tp, agno, &agbp);
2705 if (error) 2828 if (error)
2706 return error; 2829 return error;
@@ -2712,7 +2835,7 @@ xfs_free_extent(
2712 agbno + len <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_length), 2835 agbno + len <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_length),
2713 err); 2836 err);
2714 2837
2715 error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, 0); 2838 error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, oinfo, 0);
2716 if (error) 2839 if (error)
2717 goto err; 2840 goto err;
2718 2841
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index cf268b2d0b6c..6fe2d6b7cfe9 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -54,41 +54,8 @@ typedef unsigned int xfs_alloctype_t;
54 */ 54 */
55#define XFS_ALLOC_FLAG_TRYLOCK 0x00000001 /* use trylock for buffer locking */ 55#define XFS_ALLOC_FLAG_TRYLOCK 0x00000001 /* use trylock for buffer locking */
56#define XFS_ALLOC_FLAG_FREEING 0x00000002 /* indicate caller is freeing extents*/ 56#define XFS_ALLOC_FLAG_FREEING 0x00000002 /* indicate caller is freeing extents*/
57 57#define XFS_ALLOC_FLAG_NORMAP 0x00000004 /* don't modify the rmapbt */
58/* 58#define XFS_ALLOC_FLAG_NOSHRINK 0x00000008 /* don't shrink the freelist */
59 * In order to avoid ENOSPC-related deadlock caused by
60 * out-of-order locking of AGF buffer (PV 947395), we place
61 * constraints on the relationship among actual allocations for
62 * data blocks, freelist blocks, and potential file data bmap
63 * btree blocks. However, these restrictions may result in no
64 * actual space allocated for a delayed extent, for example, a data
65 * block in a certain AG is allocated but there is no additional
66 * block for the additional bmap btree block due to a split of the
67 * bmap btree of the file. The result of this may lead to an
68 * infinite loop in xfssyncd when the file gets flushed to disk and
69 * all delayed extents need to be actually allocated. To get around
70 * this, we explicitly set aside a few blocks which will not be
71 * reserved in delayed allocation. Considering the minimum number of
72 * needed freelist blocks is 4 fsbs _per AG_, a potential split of file's bmap
73 * btree requires 1 fsb, so we set the number of set-aside blocks
74 * to 4 + 4*agcount.
75 */
76#define XFS_ALLOC_SET_ASIDE(mp) (4 + ((mp)->m_sb.sb_agcount * 4))
77
78/*
79 * When deciding how much space to allocate out of an AG, we limit the
80 * allocation maximum size to the size the AG. However, we cannot use all the
81 * blocks in the AG - some are permanently used by metadata. These
82 * blocks are generally:
83 * - the AG superblock, AGF, AGI and AGFL
84 * - the AGF (bno and cnt) and AGI btree root blocks
85 * - 4 blocks on the AGFL according to XFS_ALLOC_SET_ASIDE() limits
86 *
87 * The AG headers are sector sized, so the amount of space they take up is
88 * dependent on filesystem geometry. The others are all single blocks.
89 */
90#define XFS_ALLOC_AG_MAX_USABLE(mp) \
91 ((mp)->m_sb.sb_agblocks - XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)) - 7)
92 59
93 60
94/* 61/*
@@ -123,6 +90,7 @@ typedef struct xfs_alloc_arg {
123 char isfl; /* set if is freelist blocks - !acctg */ 90 char isfl; /* set if is freelist blocks - !acctg */
124 char userdata; /* mask defining userdata treatment */ 91 char userdata; /* mask defining userdata treatment */
125 xfs_fsblock_t firstblock; /* io first block allocated */ 92 xfs_fsblock_t firstblock; /* io first block allocated */
93 struct xfs_owner_info oinfo; /* owner of blocks being allocated */
126} xfs_alloc_arg_t; 94} xfs_alloc_arg_t;
127 95
128/* 96/*
@@ -132,6 +100,11 @@ typedef struct xfs_alloc_arg {
132#define XFS_ALLOC_INITIAL_USER_DATA (1 << 1)/* special case start of file */ 100#define XFS_ALLOC_INITIAL_USER_DATA (1 << 1)/* special case start of file */
133#define XFS_ALLOC_USERDATA_ZERO (1 << 2)/* zero extent on allocation */ 101#define XFS_ALLOC_USERDATA_ZERO (1 << 2)/* zero extent on allocation */
134 102
103/* freespace limit calculations */
104#define XFS_ALLOC_AGFL_RESERVE 4
105unsigned int xfs_alloc_set_aside(struct xfs_mount *mp);
106unsigned int xfs_alloc_ag_max_usable(struct xfs_mount *mp);
107
135xfs_extlen_t xfs_alloc_longest_free_extent(struct xfs_mount *mp, 108xfs_extlen_t xfs_alloc_longest_free_extent(struct xfs_mount *mp,
136 struct xfs_perag *pag, xfs_extlen_t need); 109 struct xfs_perag *pag, xfs_extlen_t need);
137unsigned int xfs_alloc_min_freelist(struct xfs_mount *mp, 110unsigned int xfs_alloc_min_freelist(struct xfs_mount *mp,
@@ -208,9 +181,10 @@ xfs_alloc_vextent(
208 */ 181 */
209int /* error */ 182int /* error */
210xfs_free_extent( 183xfs_free_extent(
211 struct xfs_trans *tp, /* transaction pointer */ 184 struct xfs_trans *tp, /* transaction pointer */
212 xfs_fsblock_t bno, /* starting block number of extent */ 185 xfs_fsblock_t bno, /* starting block number of extent */
213 xfs_extlen_t len); /* length of extent */ 186 xfs_extlen_t len, /* length of extent */
187 struct xfs_owner_info *oinfo);/* extent owner */
214 188
215int /* error */ 189int /* error */
216xfs_alloc_lookup_ge( 190xfs_alloc_lookup_ge(
@@ -232,4 +206,6 @@ int xfs_alloc_fix_freelist(struct xfs_alloc_arg *args, int flags);
232int xfs_free_extent_fix_freelist(struct xfs_trans *tp, xfs_agnumber_t agno, 206int xfs_free_extent_fix_freelist(struct xfs_trans *tp, xfs_agnumber_t agno,
233 struct xfs_buf **agbp); 207 struct xfs_buf **agbp);
234 208
209xfs_extlen_t xfs_prealloc_blocks(struct xfs_mount *mp);
210
235#endif /* __XFS_ALLOC_H__ */ 211#endif /* __XFS_ALLOC_H__ */
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c
index d9b42425291e..5ba2dac5e67c 100644
--- a/fs/xfs/libxfs/xfs_alloc_btree.c
+++ b/fs/xfs/libxfs/xfs_alloc_btree.c
@@ -212,17 +212,6 @@ xfs_allocbt_init_key_from_rec(
212} 212}
213 213
214STATIC void 214STATIC void
215xfs_allocbt_init_rec_from_key(
216 union xfs_btree_key *key,
217 union xfs_btree_rec *rec)
218{
219 ASSERT(key->alloc.ar_startblock != 0);
220
221 rec->alloc.ar_startblock = key->alloc.ar_startblock;
222 rec->alloc.ar_blockcount = key->alloc.ar_blockcount;
223}
224
225STATIC void
226xfs_allocbt_init_rec_from_cur( 215xfs_allocbt_init_rec_from_cur(
227 struct xfs_btree_cur *cur, 216 struct xfs_btree_cur *cur,
228 union xfs_btree_rec *rec) 217 union xfs_btree_rec *rec)
@@ -406,7 +395,6 @@ static const struct xfs_btree_ops xfs_allocbt_ops = {
406 .get_minrecs = xfs_allocbt_get_minrecs, 395 .get_minrecs = xfs_allocbt_get_minrecs,
407 .get_maxrecs = xfs_allocbt_get_maxrecs, 396 .get_maxrecs = xfs_allocbt_get_maxrecs,
408 .init_key_from_rec = xfs_allocbt_init_key_from_rec, 397 .init_key_from_rec = xfs_allocbt_init_key_from_rec,
409 .init_rec_from_key = xfs_allocbt_init_rec_from_key,
410 .init_rec_from_cur = xfs_allocbt_init_rec_from_cur, 398 .init_rec_from_cur = xfs_allocbt_init_rec_from_cur,
411 .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur, 399 .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur,
412 .key_diff = xfs_allocbt_key_diff, 400 .key_diff = xfs_allocbt_key_diff,
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index 4e126f41a0aa..af1ecb19121e 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -23,6 +23,7 @@
23#include "xfs_trans_resv.h" 23#include "xfs_trans_resv.h"
24#include "xfs_bit.h" 24#include "xfs_bit.h"
25#include "xfs_mount.h" 25#include "xfs_mount.h"
26#include "xfs_defer.h"
26#include "xfs_da_format.h" 27#include "xfs_da_format.h"
27#include "xfs_da_btree.h" 28#include "xfs_da_btree.h"
28#include "xfs_attr_sf.h" 29#include "xfs_attr_sf.h"
@@ -203,7 +204,7 @@ xfs_attr_set(
203{ 204{
204 struct xfs_mount *mp = dp->i_mount; 205 struct xfs_mount *mp = dp->i_mount;
205 struct xfs_da_args args; 206 struct xfs_da_args args;
206 struct xfs_bmap_free flist; 207 struct xfs_defer_ops dfops;
207 struct xfs_trans_res tres; 208 struct xfs_trans_res tres;
208 xfs_fsblock_t firstblock; 209 xfs_fsblock_t firstblock;
209 int rsvd = (flags & ATTR_ROOT) != 0; 210 int rsvd = (flags & ATTR_ROOT) != 0;
@@ -221,7 +222,7 @@ xfs_attr_set(
221 args.value = value; 222 args.value = value;
222 args.valuelen = valuelen; 223 args.valuelen = valuelen;
223 args.firstblock = &firstblock; 224 args.firstblock = &firstblock;
224 args.flist = &flist; 225 args.dfops = &dfops;
225 args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT; 226 args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
226 args.total = xfs_attr_calc_size(&args, &local); 227 args.total = xfs_attr_calc_size(&args, &local);
227 228
@@ -316,13 +317,13 @@ xfs_attr_set(
316 * It won't fit in the shortform, transform to a leaf block. 317 * It won't fit in the shortform, transform to a leaf block.
317 * GROT: another possible req'mt for a double-split btree op. 318 * GROT: another possible req'mt for a double-split btree op.
318 */ 319 */
319 xfs_bmap_init(args.flist, args.firstblock); 320 xfs_defer_init(args.dfops, args.firstblock);
320 error = xfs_attr_shortform_to_leaf(&args); 321 error = xfs_attr_shortform_to_leaf(&args);
321 if (!error) 322 if (!error)
322 error = xfs_bmap_finish(&args.trans, args.flist, dp); 323 error = xfs_defer_finish(&args.trans, args.dfops, dp);
323 if (error) { 324 if (error) {
324 args.trans = NULL; 325 args.trans = NULL;
325 xfs_bmap_cancel(&flist); 326 xfs_defer_cancel(&dfops);
326 goto out; 327 goto out;
327 } 328 }
328 329
@@ -382,7 +383,7 @@ xfs_attr_remove(
382{ 383{
383 struct xfs_mount *mp = dp->i_mount; 384 struct xfs_mount *mp = dp->i_mount;
384 struct xfs_da_args args; 385 struct xfs_da_args args;
385 struct xfs_bmap_free flist; 386 struct xfs_defer_ops dfops;
386 xfs_fsblock_t firstblock; 387 xfs_fsblock_t firstblock;
387 int error; 388 int error;
388 389
@@ -399,7 +400,7 @@ xfs_attr_remove(
399 return error; 400 return error;
400 401
401 args.firstblock = &firstblock; 402 args.firstblock = &firstblock;
402 args.flist = &flist; 403 args.dfops = &dfops;
403 404
404 /* 405 /*
405 * we have no control over the attribute names that userspace passes us 406 * we have no control over the attribute names that userspace passes us
@@ -584,13 +585,13 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
584 * Commit that transaction so that the node_addname() call 585 * Commit that transaction so that the node_addname() call
585 * can manage its own transactions. 586 * can manage its own transactions.
586 */ 587 */
587 xfs_bmap_init(args->flist, args->firstblock); 588 xfs_defer_init(args->dfops, args->firstblock);
588 error = xfs_attr3_leaf_to_node(args); 589 error = xfs_attr3_leaf_to_node(args);
589 if (!error) 590 if (!error)
590 error = xfs_bmap_finish(&args->trans, args->flist, dp); 591 error = xfs_defer_finish(&args->trans, args->dfops, dp);
591 if (error) { 592 if (error) {
592 args->trans = NULL; 593 args->trans = NULL;
593 xfs_bmap_cancel(args->flist); 594 xfs_defer_cancel(args->dfops);
594 return error; 595 return error;
595 } 596 }
596 597
@@ -674,15 +675,15 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
674 * If the result is small enough, shrink it all into the inode. 675 * If the result is small enough, shrink it all into the inode.
675 */ 676 */
676 if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) { 677 if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
677 xfs_bmap_init(args->flist, args->firstblock); 678 xfs_defer_init(args->dfops, args->firstblock);
678 error = xfs_attr3_leaf_to_shortform(bp, args, forkoff); 679 error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
679 /* bp is gone due to xfs_da_shrink_inode */ 680 /* bp is gone due to xfs_da_shrink_inode */
680 if (!error) 681 if (!error)
681 error = xfs_bmap_finish(&args->trans, 682 error = xfs_defer_finish(&args->trans,
682 args->flist, dp); 683 args->dfops, dp);
683 if (error) { 684 if (error) {
684 args->trans = NULL; 685 args->trans = NULL;
685 xfs_bmap_cancel(args->flist); 686 xfs_defer_cancel(args->dfops);
686 return error; 687 return error;
687 } 688 }
688 } 689 }
@@ -737,14 +738,14 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
737 * If the result is small enough, shrink it all into the inode. 738 * If the result is small enough, shrink it all into the inode.
738 */ 739 */
739 if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) { 740 if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
740 xfs_bmap_init(args->flist, args->firstblock); 741 xfs_defer_init(args->dfops, args->firstblock);
741 error = xfs_attr3_leaf_to_shortform(bp, args, forkoff); 742 error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
742 /* bp is gone due to xfs_da_shrink_inode */ 743 /* bp is gone due to xfs_da_shrink_inode */
743 if (!error) 744 if (!error)
744 error = xfs_bmap_finish(&args->trans, args->flist, dp); 745 error = xfs_defer_finish(&args->trans, args->dfops, dp);
745 if (error) { 746 if (error) {
746 args->trans = NULL; 747 args->trans = NULL;
747 xfs_bmap_cancel(args->flist); 748 xfs_defer_cancel(args->dfops);
748 return error; 749 return error;
749 } 750 }
750 } 751 }
@@ -863,14 +864,14 @@ restart:
863 */ 864 */
864 xfs_da_state_free(state); 865 xfs_da_state_free(state);
865 state = NULL; 866 state = NULL;
866 xfs_bmap_init(args->flist, args->firstblock); 867 xfs_defer_init(args->dfops, args->firstblock);
867 error = xfs_attr3_leaf_to_node(args); 868 error = xfs_attr3_leaf_to_node(args);
868 if (!error) 869 if (!error)
869 error = xfs_bmap_finish(&args->trans, 870 error = xfs_defer_finish(&args->trans,
870 args->flist, dp); 871 args->dfops, dp);
871 if (error) { 872 if (error) {
872 args->trans = NULL; 873 args->trans = NULL;
873 xfs_bmap_cancel(args->flist); 874 xfs_defer_cancel(args->dfops);
874 goto out; 875 goto out;
875 } 876 }
876 877
@@ -891,13 +892,13 @@ restart:
891 * in the index/blkno/rmtblkno/rmtblkcnt fields and 892 * in the index/blkno/rmtblkno/rmtblkcnt fields and
892 * in the index2/blkno2/rmtblkno2/rmtblkcnt2 fields. 893 * in the index2/blkno2/rmtblkno2/rmtblkcnt2 fields.
893 */ 894 */
894 xfs_bmap_init(args->flist, args->firstblock); 895 xfs_defer_init(args->dfops, args->firstblock);
895 error = xfs_da3_split(state); 896 error = xfs_da3_split(state);
896 if (!error) 897 if (!error)
897 error = xfs_bmap_finish(&args->trans, args->flist, dp); 898 error = xfs_defer_finish(&args->trans, args->dfops, dp);
898 if (error) { 899 if (error) {
899 args->trans = NULL; 900 args->trans = NULL;
900 xfs_bmap_cancel(args->flist); 901 xfs_defer_cancel(args->dfops);
901 goto out; 902 goto out;
902 } 903 }
903 } else { 904 } else {
@@ -990,14 +991,14 @@ restart:
990 * Check to see if the tree needs to be collapsed. 991 * Check to see if the tree needs to be collapsed.
991 */ 992 */
992 if (retval && (state->path.active > 1)) { 993 if (retval && (state->path.active > 1)) {
993 xfs_bmap_init(args->flist, args->firstblock); 994 xfs_defer_init(args->dfops, args->firstblock);
994 error = xfs_da3_join(state); 995 error = xfs_da3_join(state);
995 if (!error) 996 if (!error)
996 error = xfs_bmap_finish(&args->trans, 997 error = xfs_defer_finish(&args->trans,
997 args->flist, dp); 998 args->dfops, dp);
998 if (error) { 999 if (error) {
999 args->trans = NULL; 1000 args->trans = NULL;
1000 xfs_bmap_cancel(args->flist); 1001 xfs_defer_cancel(args->dfops);
1001 goto out; 1002 goto out;
1002 } 1003 }
1003 } 1004 }
@@ -1113,13 +1114,13 @@ xfs_attr_node_removename(xfs_da_args_t *args)
1113 * Check to see if the tree needs to be collapsed. 1114 * Check to see if the tree needs to be collapsed.
1114 */ 1115 */
1115 if (retval && (state->path.active > 1)) { 1116 if (retval && (state->path.active > 1)) {
1116 xfs_bmap_init(args->flist, args->firstblock); 1117 xfs_defer_init(args->dfops, args->firstblock);
1117 error = xfs_da3_join(state); 1118 error = xfs_da3_join(state);
1118 if (!error) 1119 if (!error)
1119 error = xfs_bmap_finish(&args->trans, args->flist, dp); 1120 error = xfs_defer_finish(&args->trans, args->dfops, dp);
1120 if (error) { 1121 if (error) {
1121 args->trans = NULL; 1122 args->trans = NULL;
1122 xfs_bmap_cancel(args->flist); 1123 xfs_defer_cancel(args->dfops);
1123 goto out; 1124 goto out;
1124 } 1125 }
1125 /* 1126 /*
@@ -1146,15 +1147,15 @@ xfs_attr_node_removename(xfs_da_args_t *args)
1146 goto out; 1147 goto out;
1147 1148
1148 if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) { 1149 if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
1149 xfs_bmap_init(args->flist, args->firstblock); 1150 xfs_defer_init(args->dfops, args->firstblock);
1150 error = xfs_attr3_leaf_to_shortform(bp, args, forkoff); 1151 error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
1151 /* bp is gone due to xfs_da_shrink_inode */ 1152 /* bp is gone due to xfs_da_shrink_inode */
1152 if (!error) 1153 if (!error)
1153 error = xfs_bmap_finish(&args->trans, 1154 error = xfs_defer_finish(&args->trans,
1154 args->flist, dp); 1155 args->dfops, dp);
1155 if (error) { 1156 if (error) {
1156 args->trans = NULL; 1157 args->trans = NULL;
1157 xfs_bmap_cancel(args->flist); 1158 xfs_defer_cancel(args->dfops);
1158 goto out; 1159 goto out;
1159 } 1160 }
1160 } else 1161 } else
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 01a5ecfedfcf..8ea91f363093 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -792,7 +792,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
792 nargs.dp = dp; 792 nargs.dp = dp;
793 nargs.geo = args->geo; 793 nargs.geo = args->geo;
794 nargs.firstblock = args->firstblock; 794 nargs.firstblock = args->firstblock;
795 nargs.flist = args->flist; 795 nargs.dfops = args->dfops;
796 nargs.total = args->total; 796 nargs.total = args->total;
797 nargs.whichfork = XFS_ATTR_FORK; 797 nargs.whichfork = XFS_ATTR_FORK;
798 nargs.trans = args->trans; 798 nargs.trans = args->trans;
@@ -922,7 +922,7 @@ xfs_attr3_leaf_to_shortform(
922 nargs.geo = args->geo; 922 nargs.geo = args->geo;
923 nargs.dp = dp; 923 nargs.dp = dp;
924 nargs.firstblock = args->firstblock; 924 nargs.firstblock = args->firstblock;
925 nargs.flist = args->flist; 925 nargs.dfops = args->dfops;
926 nargs.total = args->total; 926 nargs.total = args->total;
927 nargs.whichfork = XFS_ATTR_FORK; 927 nargs.whichfork = XFS_ATTR_FORK;
928 nargs.trans = args->trans; 928 nargs.trans = args->trans;
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index a572532a55cd..d52f525f5b2d 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -24,6 +24,7 @@
24#include "xfs_trans_resv.h" 24#include "xfs_trans_resv.h"
25#include "xfs_bit.h" 25#include "xfs_bit.h"
26#include "xfs_mount.h" 26#include "xfs_mount.h"
27#include "xfs_defer.h"
27#include "xfs_da_format.h" 28#include "xfs_da_format.h"
28#include "xfs_da_btree.h" 29#include "xfs_da_btree.h"
29#include "xfs_inode.h" 30#include "xfs_inode.h"
@@ -460,16 +461,16 @@ xfs_attr_rmtval_set(
460 * extent and then crash then the block may not contain the 461 * extent and then crash then the block may not contain the
461 * correct metadata after log recovery occurs. 462 * correct metadata after log recovery occurs.
462 */ 463 */
463 xfs_bmap_init(args->flist, args->firstblock); 464 xfs_defer_init(args->dfops, args->firstblock);
464 nmap = 1; 465 nmap = 1;
465 error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno, 466 error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno,
466 blkcnt, XFS_BMAPI_ATTRFORK, args->firstblock, 467 blkcnt, XFS_BMAPI_ATTRFORK, args->firstblock,
467 args->total, &map, &nmap, args->flist); 468 args->total, &map, &nmap, args->dfops);
468 if (!error) 469 if (!error)
469 error = xfs_bmap_finish(&args->trans, args->flist, dp); 470 error = xfs_defer_finish(&args->trans, args->dfops, dp);
470 if (error) { 471 if (error) {
471 args->trans = NULL; 472 args->trans = NULL;
472 xfs_bmap_cancel(args->flist); 473 xfs_defer_cancel(args->dfops);
473 return error; 474 return error;
474 } 475 }
475 476
@@ -503,7 +504,7 @@ xfs_attr_rmtval_set(
503 504
504 ASSERT(blkcnt > 0); 505 ASSERT(blkcnt > 0);
505 506
506 xfs_bmap_init(args->flist, args->firstblock); 507 xfs_defer_init(args->dfops, args->firstblock);
507 nmap = 1; 508 nmap = 1;
508 error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno, 509 error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno,
509 blkcnt, &map, &nmap, 510 blkcnt, &map, &nmap,
@@ -603,16 +604,16 @@ xfs_attr_rmtval_remove(
603 blkcnt = args->rmtblkcnt; 604 blkcnt = args->rmtblkcnt;
604 done = 0; 605 done = 0;
605 while (!done) { 606 while (!done) {
606 xfs_bmap_init(args->flist, args->firstblock); 607 xfs_defer_init(args->dfops, args->firstblock);
607 error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt, 608 error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
608 XFS_BMAPI_ATTRFORK, 1, args->firstblock, 609 XFS_BMAPI_ATTRFORK, 1, args->firstblock,
609 args->flist, &done); 610 args->dfops, &done);
610 if (!error) 611 if (!error)
611 error = xfs_bmap_finish(&args->trans, args->flist, 612 error = xfs_defer_finish(&args->trans, args->dfops,
612 args->dp); 613 args->dp);
613 if (error) { 614 if (error) {
614 args->trans = NULL; 615 args->trans = NULL;
615 xfs_bmap_cancel(args->flist); 616 xfs_defer_cancel(args->dfops);
616 return error; 617 return error;
617 } 618 }
618 619
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 2f2c85cc8117..b060bca93402 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -24,6 +24,7 @@
24#include "xfs_bit.h" 24#include "xfs_bit.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_mount.h" 26#include "xfs_mount.h"
27#include "xfs_defer.h"
27#include "xfs_da_format.h" 28#include "xfs_da_format.h"
28#include "xfs_da_btree.h" 29#include "xfs_da_btree.h"
29#include "xfs_dir2.h" 30#include "xfs_dir2.h"
@@ -45,6 +46,7 @@
45#include "xfs_symlink.h" 46#include "xfs_symlink.h"
46#include "xfs_attr_leaf.h" 47#include "xfs_attr_leaf.h"
47#include "xfs_filestream.h" 48#include "xfs_filestream.h"
49#include "xfs_rmap.h"
48 50
49 51
50kmem_zone_t *xfs_bmap_free_item_zone; 52kmem_zone_t *xfs_bmap_free_item_zone;
@@ -570,12 +572,13 @@ xfs_bmap_validate_ret(
570 */ 572 */
571void 573void
572xfs_bmap_add_free( 574xfs_bmap_add_free(
573 struct xfs_mount *mp, /* mount point structure */ 575 struct xfs_mount *mp,
574 struct xfs_bmap_free *flist, /* list of extents */ 576 struct xfs_defer_ops *dfops,
575 xfs_fsblock_t bno, /* fs block number of extent */ 577 xfs_fsblock_t bno,
576 xfs_filblks_t len) /* length of extent */ 578 xfs_filblks_t len,
579 struct xfs_owner_info *oinfo)
577{ 580{
578 struct xfs_bmap_free_item *new; /* new element */ 581 struct xfs_extent_free_item *new; /* new element */
579#ifdef DEBUG 582#ifdef DEBUG
580 xfs_agnumber_t agno; 583 xfs_agnumber_t agno;
581 xfs_agblock_t agbno; 584 xfs_agblock_t agbno;
@@ -592,44 +595,17 @@ xfs_bmap_add_free(
592 ASSERT(agbno + len <= mp->m_sb.sb_agblocks); 595 ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
593#endif 596#endif
594 ASSERT(xfs_bmap_free_item_zone != NULL); 597 ASSERT(xfs_bmap_free_item_zone != NULL);
595 new = kmem_zone_alloc(xfs_bmap_free_item_zone, KM_SLEEP);
596 new->xbfi_startblock = bno;
597 new->xbfi_blockcount = (xfs_extlen_t)len;
598 list_add(&new->xbfi_list, &flist->xbf_flist);
599 flist->xbf_count++;
600}
601
602/*
603 * Remove the entry "free" from the free item list. Prev points to the
604 * previous entry, unless "free" is the head of the list.
605 */
606void
607xfs_bmap_del_free(
608 struct xfs_bmap_free *flist, /* free item list header */
609 struct xfs_bmap_free_item *free) /* list item to be freed */
610{
611 list_del(&free->xbfi_list);
612 flist->xbf_count--;
613 kmem_zone_free(xfs_bmap_free_item_zone, free);
614}
615
616/*
617 * Free up any items left in the list.
618 */
619void
620xfs_bmap_cancel(
621 struct xfs_bmap_free *flist) /* list of bmap_free_items */
622{
623 struct xfs_bmap_free_item *free; /* free list item */
624 598
625 if (flist->xbf_count == 0) 599 new = kmem_zone_alloc(xfs_bmap_free_item_zone, KM_SLEEP);
626 return; 600 new->xefi_startblock = bno;
627 while (!list_empty(&flist->xbf_flist)) { 601 new->xefi_blockcount = (xfs_extlen_t)len;
628 free = list_first_entry(&flist->xbf_flist, 602 if (oinfo)
629 struct xfs_bmap_free_item, xbfi_list); 603 new->xefi_oinfo = *oinfo;
630 xfs_bmap_del_free(flist, free); 604 else
631 } 605 xfs_rmap_skip_owner_update(&new->xefi_oinfo);
632 ASSERT(flist->xbf_count == 0); 606 trace_xfs_bmap_free_defer(mp, XFS_FSB_TO_AGNO(mp, bno), 0,
607 XFS_FSB_TO_AGBNO(mp, bno), len);
608 xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_FREE, &new->xefi_list);
633} 609}
634 610
635/* 611/*
@@ -659,6 +635,7 @@ xfs_bmap_btree_to_extents(
659 xfs_mount_t *mp; /* mount point structure */ 635 xfs_mount_t *mp; /* mount point structure */
660 __be64 *pp; /* ptr to block address */ 636 __be64 *pp; /* ptr to block address */
661 struct xfs_btree_block *rblock;/* root btree block */ 637 struct xfs_btree_block *rblock;/* root btree block */
638 struct xfs_owner_info oinfo;
662 639
663 mp = ip->i_mount; 640 mp = ip->i_mount;
664 ifp = XFS_IFORK_PTR(ip, whichfork); 641 ifp = XFS_IFORK_PTR(ip, whichfork);
@@ -682,7 +659,8 @@ xfs_bmap_btree_to_extents(
682 cblock = XFS_BUF_TO_BLOCK(cbp); 659 cblock = XFS_BUF_TO_BLOCK(cbp);
683 if ((error = xfs_btree_check_block(cur, cblock, 0, cbp))) 660 if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
684 return error; 661 return error;
685 xfs_bmap_add_free(mp, cur->bc_private.b.flist, cbno, 1); 662 xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
663 xfs_bmap_add_free(mp, cur->bc_private.b.dfops, cbno, 1, &oinfo);
686 ip->i_d.di_nblocks--; 664 ip->i_d.di_nblocks--;
687 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); 665 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
688 xfs_trans_binval(tp, cbp); 666 xfs_trans_binval(tp, cbp);
@@ -705,7 +683,7 @@ xfs_bmap_extents_to_btree(
705 xfs_trans_t *tp, /* transaction pointer */ 683 xfs_trans_t *tp, /* transaction pointer */
706 xfs_inode_t *ip, /* incore inode pointer */ 684 xfs_inode_t *ip, /* incore inode pointer */
707 xfs_fsblock_t *firstblock, /* first-block-allocated */ 685 xfs_fsblock_t *firstblock, /* first-block-allocated */
708 xfs_bmap_free_t *flist, /* blocks freed in xaction */ 686 struct xfs_defer_ops *dfops, /* blocks freed in xaction */
709 xfs_btree_cur_t **curp, /* cursor returned to caller */ 687 xfs_btree_cur_t **curp, /* cursor returned to caller */
710 int wasdel, /* converting a delayed alloc */ 688 int wasdel, /* converting a delayed alloc */
711 int *logflagsp, /* inode logging flags */ 689 int *logflagsp, /* inode logging flags */
@@ -754,7 +732,7 @@ xfs_bmap_extents_to_btree(
754 */ 732 */
755 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); 733 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
756 cur->bc_private.b.firstblock = *firstblock; 734 cur->bc_private.b.firstblock = *firstblock;
757 cur->bc_private.b.flist = flist; 735 cur->bc_private.b.dfops = dfops;
758 cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0; 736 cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0;
759 /* 737 /*
760 * Convert to a btree with two levels, one record in root. 738 * Convert to a btree with two levels, one record in root.
@@ -763,11 +741,12 @@ xfs_bmap_extents_to_btree(
763 memset(&args, 0, sizeof(args)); 741 memset(&args, 0, sizeof(args));
764 args.tp = tp; 742 args.tp = tp;
765 args.mp = mp; 743 args.mp = mp;
744 xfs_rmap_ino_bmbt_owner(&args.oinfo, ip->i_ino, whichfork);
766 args.firstblock = *firstblock; 745 args.firstblock = *firstblock;
767 if (*firstblock == NULLFSBLOCK) { 746 if (*firstblock == NULLFSBLOCK) {
768 args.type = XFS_ALLOCTYPE_START_BNO; 747 args.type = XFS_ALLOCTYPE_START_BNO;
769 args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino); 748 args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
770 } else if (flist->xbf_low) { 749 } else if (dfops->dop_low) {
771 args.type = XFS_ALLOCTYPE_START_BNO; 750 args.type = XFS_ALLOCTYPE_START_BNO;
772 args.fsbno = *firstblock; 751 args.fsbno = *firstblock;
773 } else { 752 } else {
@@ -788,7 +767,7 @@ xfs_bmap_extents_to_btree(
788 ASSERT(args.fsbno != NULLFSBLOCK); 767 ASSERT(args.fsbno != NULLFSBLOCK);
789 ASSERT(*firstblock == NULLFSBLOCK || 768 ASSERT(*firstblock == NULLFSBLOCK ||
790 args.agno == XFS_FSB_TO_AGNO(mp, *firstblock) || 769 args.agno == XFS_FSB_TO_AGNO(mp, *firstblock) ||
791 (flist->xbf_low && 770 (dfops->dop_low &&
792 args.agno > XFS_FSB_TO_AGNO(mp, *firstblock))); 771 args.agno > XFS_FSB_TO_AGNO(mp, *firstblock)));
793 *firstblock = cur->bc_private.b.firstblock = args.fsbno; 772 *firstblock = cur->bc_private.b.firstblock = args.fsbno;
794 cur->bc_private.b.allocated++; 773 cur->bc_private.b.allocated++;
@@ -909,6 +888,7 @@ xfs_bmap_local_to_extents(
909 memset(&args, 0, sizeof(args)); 888 memset(&args, 0, sizeof(args));
910 args.tp = tp; 889 args.tp = tp;
911 args.mp = ip->i_mount; 890 args.mp = ip->i_mount;
891 xfs_rmap_ino_owner(&args.oinfo, ip->i_ino, whichfork, 0);
912 args.firstblock = *firstblock; 892 args.firstblock = *firstblock;
913 /* 893 /*
914 * Allocate a block. We know we need only one, since the 894 * Allocate a block. We know we need only one, since the
@@ -973,7 +953,7 @@ xfs_bmap_add_attrfork_btree(
973 xfs_trans_t *tp, /* transaction pointer */ 953 xfs_trans_t *tp, /* transaction pointer */
974 xfs_inode_t *ip, /* incore inode pointer */ 954 xfs_inode_t *ip, /* incore inode pointer */
975 xfs_fsblock_t *firstblock, /* first block allocated */ 955 xfs_fsblock_t *firstblock, /* first block allocated */
976 xfs_bmap_free_t *flist, /* blocks to free at commit */ 956 struct xfs_defer_ops *dfops, /* blocks to free at commit */
977 int *flags) /* inode logging flags */ 957 int *flags) /* inode logging flags */
978{ 958{
979 xfs_btree_cur_t *cur; /* btree cursor */ 959 xfs_btree_cur_t *cur; /* btree cursor */
@@ -986,7 +966,7 @@ xfs_bmap_add_attrfork_btree(
986 *flags |= XFS_ILOG_DBROOT; 966 *flags |= XFS_ILOG_DBROOT;
987 else { 967 else {
988 cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK); 968 cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK);
989 cur->bc_private.b.flist = flist; 969 cur->bc_private.b.dfops = dfops;
990 cur->bc_private.b.firstblock = *firstblock; 970 cur->bc_private.b.firstblock = *firstblock;
991 if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat))) 971 if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat)))
992 goto error0; 972 goto error0;
@@ -1016,7 +996,7 @@ xfs_bmap_add_attrfork_extents(
1016 xfs_trans_t *tp, /* transaction pointer */ 996 xfs_trans_t *tp, /* transaction pointer */
1017 xfs_inode_t *ip, /* incore inode pointer */ 997 xfs_inode_t *ip, /* incore inode pointer */
1018 xfs_fsblock_t *firstblock, /* first block allocated */ 998 xfs_fsblock_t *firstblock, /* first block allocated */
1019 xfs_bmap_free_t *flist, /* blocks to free at commit */ 999 struct xfs_defer_ops *dfops, /* blocks to free at commit */
1020 int *flags) /* inode logging flags */ 1000 int *flags) /* inode logging flags */
1021{ 1001{
1022 xfs_btree_cur_t *cur; /* bmap btree cursor */ 1002 xfs_btree_cur_t *cur; /* bmap btree cursor */
@@ -1025,7 +1005,7 @@ xfs_bmap_add_attrfork_extents(
1025 if (ip->i_d.di_nextents * sizeof(xfs_bmbt_rec_t) <= XFS_IFORK_DSIZE(ip)) 1005 if (ip->i_d.di_nextents * sizeof(xfs_bmbt_rec_t) <= XFS_IFORK_DSIZE(ip))
1026 return 0; 1006 return 0;
1027 cur = NULL; 1007 cur = NULL;
1028 error = xfs_bmap_extents_to_btree(tp, ip, firstblock, flist, &cur, 0, 1008 error = xfs_bmap_extents_to_btree(tp, ip, firstblock, dfops, &cur, 0,
1029 flags, XFS_DATA_FORK); 1009 flags, XFS_DATA_FORK);
1030 if (cur) { 1010 if (cur) {
1031 cur->bc_private.b.allocated = 0; 1011 cur->bc_private.b.allocated = 0;
@@ -1051,7 +1031,7 @@ xfs_bmap_add_attrfork_local(
1051 xfs_trans_t *tp, /* transaction pointer */ 1031 xfs_trans_t *tp, /* transaction pointer */
1052 xfs_inode_t *ip, /* incore inode pointer */ 1032 xfs_inode_t *ip, /* incore inode pointer */
1053 xfs_fsblock_t *firstblock, /* first block allocated */ 1033 xfs_fsblock_t *firstblock, /* first block allocated */
1054 xfs_bmap_free_t *flist, /* blocks to free at commit */ 1034 struct xfs_defer_ops *dfops, /* blocks to free at commit */
1055 int *flags) /* inode logging flags */ 1035 int *flags) /* inode logging flags */
1056{ 1036{
1057 xfs_da_args_t dargs; /* args for dir/attr code */ 1037 xfs_da_args_t dargs; /* args for dir/attr code */
@@ -1064,7 +1044,7 @@ xfs_bmap_add_attrfork_local(
1064 dargs.geo = ip->i_mount->m_dir_geo; 1044 dargs.geo = ip->i_mount->m_dir_geo;
1065 dargs.dp = ip; 1045 dargs.dp = ip;
1066 dargs.firstblock = firstblock; 1046 dargs.firstblock = firstblock;
1067 dargs.flist = flist; 1047 dargs.dfops = dfops;
1068 dargs.total = dargs.geo->fsbcount; 1048 dargs.total = dargs.geo->fsbcount;
1069 dargs.whichfork = XFS_DATA_FORK; 1049 dargs.whichfork = XFS_DATA_FORK;
1070 dargs.trans = tp; 1050 dargs.trans = tp;
@@ -1092,7 +1072,7 @@ xfs_bmap_add_attrfork(
1092 int rsvd) /* xact may use reserved blks */ 1072 int rsvd) /* xact may use reserved blks */
1093{ 1073{
1094 xfs_fsblock_t firstblock; /* 1st block/ag allocated */ 1074 xfs_fsblock_t firstblock; /* 1st block/ag allocated */
1095 xfs_bmap_free_t flist; /* freed extent records */ 1075 struct xfs_defer_ops dfops; /* freed extent records */
1096 xfs_mount_t *mp; /* mount structure */ 1076 xfs_mount_t *mp; /* mount structure */
1097 xfs_trans_t *tp; /* transaction pointer */ 1077 xfs_trans_t *tp; /* transaction pointer */
1098 int blks; /* space reservation */ 1078 int blks; /* space reservation */
@@ -1158,18 +1138,18 @@ xfs_bmap_add_attrfork(
1158 ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP); 1138 ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP);
1159 ip->i_afp->if_flags = XFS_IFEXTENTS; 1139 ip->i_afp->if_flags = XFS_IFEXTENTS;
1160 logflags = 0; 1140 logflags = 0;
1161 xfs_bmap_init(&flist, &firstblock); 1141 xfs_defer_init(&dfops, &firstblock);
1162 switch (ip->i_d.di_format) { 1142 switch (ip->i_d.di_format) {
1163 case XFS_DINODE_FMT_LOCAL: 1143 case XFS_DINODE_FMT_LOCAL:
1164 error = xfs_bmap_add_attrfork_local(tp, ip, &firstblock, &flist, 1144 error = xfs_bmap_add_attrfork_local(tp, ip, &firstblock, &dfops,
1165 &logflags); 1145 &logflags);
1166 break; 1146 break;
1167 case XFS_DINODE_FMT_EXTENTS: 1147 case XFS_DINODE_FMT_EXTENTS:
1168 error = xfs_bmap_add_attrfork_extents(tp, ip, &firstblock, 1148 error = xfs_bmap_add_attrfork_extents(tp, ip, &firstblock,
1169 &flist, &logflags); 1149 &dfops, &logflags);
1170 break; 1150 break;
1171 case XFS_DINODE_FMT_BTREE: 1151 case XFS_DINODE_FMT_BTREE:
1172 error = xfs_bmap_add_attrfork_btree(tp, ip, &firstblock, &flist, 1152 error = xfs_bmap_add_attrfork_btree(tp, ip, &firstblock, &dfops,
1173 &logflags); 1153 &logflags);
1174 break; 1154 break;
1175 default: 1155 default:
@@ -1198,7 +1178,7 @@ xfs_bmap_add_attrfork(
1198 xfs_log_sb(tp); 1178 xfs_log_sb(tp);
1199 } 1179 }
1200 1180
1201 error = xfs_bmap_finish(&tp, &flist, NULL); 1181 error = xfs_defer_finish(&tp, &dfops, NULL);
1202 if (error) 1182 if (error)
1203 goto bmap_cancel; 1183 goto bmap_cancel;
1204 error = xfs_trans_commit(tp); 1184 error = xfs_trans_commit(tp);
@@ -1206,7 +1186,7 @@ xfs_bmap_add_attrfork(
1206 return error; 1186 return error;
1207 1187
1208bmap_cancel: 1188bmap_cancel:
1209 xfs_bmap_cancel(&flist); 1189 xfs_defer_cancel(&dfops);
1210trans_cancel: 1190trans_cancel:
1211 xfs_trans_cancel(tp); 1191 xfs_trans_cancel(tp);
1212 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1192 xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -2003,7 +1983,7 @@ xfs_bmap_add_extent_delay_real(
2003 1983
2004 if (xfs_bmap_needs_btree(bma->ip, whichfork)) { 1984 if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
2005 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, 1985 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
2006 bma->firstblock, bma->flist, 1986 bma->firstblock, bma->dfops,
2007 &bma->cur, 1, &tmp_rval, whichfork); 1987 &bma->cur, 1, &tmp_rval, whichfork);
2008 rval |= tmp_rval; 1988 rval |= tmp_rval;
2009 if (error) 1989 if (error)
@@ -2087,7 +2067,7 @@ xfs_bmap_add_extent_delay_real(
2087 2067
2088 if (xfs_bmap_needs_btree(bma->ip, whichfork)) { 2068 if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
2089 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, 2069 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
2090 bma->firstblock, bma->flist, &bma->cur, 1, 2070 bma->firstblock, bma->dfops, &bma->cur, 1,
2091 &tmp_rval, whichfork); 2071 &tmp_rval, whichfork);
2092 rval |= tmp_rval; 2072 rval |= tmp_rval;
2093 if (error) 2073 if (error)
@@ -2156,7 +2136,7 @@ xfs_bmap_add_extent_delay_real(
2156 2136
2157 if (xfs_bmap_needs_btree(bma->ip, whichfork)) { 2137 if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
2158 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, 2138 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
2159 bma->firstblock, bma->flist, &bma->cur, 2139 bma->firstblock, bma->dfops, &bma->cur,
2160 1, &tmp_rval, whichfork); 2140 1, &tmp_rval, whichfork);
2161 rval |= tmp_rval; 2141 rval |= tmp_rval;
2162 if (error) 2142 if (error)
@@ -2199,13 +2179,18 @@ xfs_bmap_add_extent_delay_real(
2199 ASSERT(0); 2179 ASSERT(0);
2200 } 2180 }
2201 2181
2182 /* add reverse mapping */
2183 error = xfs_rmap_map_extent(mp, bma->dfops, bma->ip, whichfork, new);
2184 if (error)
2185 goto done;
2186
2202 /* convert to a btree if necessary */ 2187 /* convert to a btree if necessary */
2203 if (xfs_bmap_needs_btree(bma->ip, whichfork)) { 2188 if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
2204 int tmp_logflags; /* partial log flag return val */ 2189 int tmp_logflags; /* partial log flag return val */
2205 2190
2206 ASSERT(bma->cur == NULL); 2191 ASSERT(bma->cur == NULL);
2207 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, 2192 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
2208 bma->firstblock, bma->flist, &bma->cur, 2193 bma->firstblock, bma->dfops, &bma->cur,
2209 da_old > 0, &tmp_logflags, whichfork); 2194 da_old > 0, &tmp_logflags, whichfork);
2210 bma->logflags |= tmp_logflags; 2195 bma->logflags |= tmp_logflags;
2211 if (error) 2196 if (error)
@@ -2247,7 +2232,7 @@ xfs_bmap_add_extent_unwritten_real(
2247 xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ 2232 xfs_btree_cur_t **curp, /* if *curp is null, not a btree */
2248 xfs_bmbt_irec_t *new, /* new data to add to file extents */ 2233 xfs_bmbt_irec_t *new, /* new data to add to file extents */
2249 xfs_fsblock_t *first, /* pointer to firstblock variable */ 2234 xfs_fsblock_t *first, /* pointer to firstblock variable */
2250 xfs_bmap_free_t *flist, /* list of extents to be freed */ 2235 struct xfs_defer_ops *dfops, /* list of extents to be freed */
2251 int *logflagsp) /* inode logging flags */ 2236 int *logflagsp) /* inode logging flags */
2252{ 2237{
2253 xfs_btree_cur_t *cur; /* btree cursor */ 2238 xfs_btree_cur_t *cur; /* btree cursor */
@@ -2735,12 +2720,17 @@ xfs_bmap_add_extent_unwritten_real(
2735 ASSERT(0); 2720 ASSERT(0);
2736 } 2721 }
2737 2722
2723 /* update reverse mappings */
2724 error = xfs_rmap_convert_extent(mp, dfops, ip, XFS_DATA_FORK, new);
2725 if (error)
2726 goto done;
2727
2738 /* convert to a btree if necessary */ 2728 /* convert to a btree if necessary */
2739 if (xfs_bmap_needs_btree(ip, XFS_DATA_FORK)) { 2729 if (xfs_bmap_needs_btree(ip, XFS_DATA_FORK)) {
2740 int tmp_logflags; /* partial log flag return val */ 2730 int tmp_logflags; /* partial log flag return val */
2741 2731
2742 ASSERT(cur == NULL); 2732 ASSERT(cur == NULL);
2743 error = xfs_bmap_extents_to_btree(tp, ip, first, flist, &cur, 2733 error = xfs_bmap_extents_to_btree(tp, ip, first, dfops, &cur,
2744 0, &tmp_logflags, XFS_DATA_FORK); 2734 0, &tmp_logflags, XFS_DATA_FORK);
2745 *logflagsp |= tmp_logflags; 2735 *logflagsp |= tmp_logflags;
2746 if (error) 2736 if (error)
@@ -3127,13 +3117,18 @@ xfs_bmap_add_extent_hole_real(
3127 break; 3117 break;
3128 } 3118 }
3129 3119
3120 /* add reverse mapping */
3121 error = xfs_rmap_map_extent(mp, bma->dfops, bma->ip, whichfork, new);
3122 if (error)
3123 goto done;
3124
3130 /* convert to a btree if necessary */ 3125 /* convert to a btree if necessary */
3131 if (xfs_bmap_needs_btree(bma->ip, whichfork)) { 3126 if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
3132 int tmp_logflags; /* partial log flag return val */ 3127 int tmp_logflags; /* partial log flag return val */
3133 3128
3134 ASSERT(bma->cur == NULL); 3129 ASSERT(bma->cur == NULL);
3135 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, 3130 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
3136 bma->firstblock, bma->flist, &bma->cur, 3131 bma->firstblock, bma->dfops, &bma->cur,
3137 0, &tmp_logflags, whichfork); 3132 0, &tmp_logflags, whichfork);
3138 bma->logflags |= tmp_logflags; 3133 bma->logflags |= tmp_logflags;
3139 if (error) 3134 if (error)
@@ -3691,9 +3686,10 @@ xfs_bmap_btalloc(
3691 args.tp = ap->tp; 3686 args.tp = ap->tp;
3692 args.mp = mp; 3687 args.mp = mp;
3693 args.fsbno = ap->blkno; 3688 args.fsbno = ap->blkno;
3689 xfs_rmap_skip_owner_update(&args.oinfo);
3694 3690
3695 /* Trim the allocation back to the maximum an AG can fit. */ 3691 /* Trim the allocation back to the maximum an AG can fit. */
3696 args.maxlen = MIN(ap->length, XFS_ALLOC_AG_MAX_USABLE(mp)); 3692 args.maxlen = MIN(ap->length, mp->m_ag_max_usable);
3697 args.firstblock = *ap->firstblock; 3693 args.firstblock = *ap->firstblock;
3698 blen = 0; 3694 blen = 0;
3699 if (nullfb) { 3695 if (nullfb) {
@@ -3708,7 +3704,7 @@ xfs_bmap_btalloc(
3708 error = xfs_bmap_btalloc_nullfb(ap, &args, &blen); 3704 error = xfs_bmap_btalloc_nullfb(ap, &args, &blen);
3709 if (error) 3705 if (error)
3710 return error; 3706 return error;
3711 } else if (ap->flist->xbf_low) { 3707 } else if (ap->dfops->dop_low) {
3712 if (xfs_inode_is_filestream(ap->ip)) 3708 if (xfs_inode_is_filestream(ap->ip))
3713 args.type = XFS_ALLOCTYPE_FIRST_AG; 3709 args.type = XFS_ALLOCTYPE_FIRST_AG;
3714 else 3710 else
@@ -3741,7 +3737,7 @@ xfs_bmap_btalloc(
3741 * is >= the stripe unit and the allocation offset is 3737 * is >= the stripe unit and the allocation offset is
3742 * at the end of file. 3738 * at the end of file.
3743 */ 3739 */
3744 if (!ap->flist->xbf_low && ap->aeof) { 3740 if (!ap->dfops->dop_low && ap->aeof) {
3745 if (!ap->offset) { 3741 if (!ap->offset) {
3746 args.alignment = stripe_align; 3742 args.alignment = stripe_align;
3747 atype = args.type; 3743 atype = args.type;
@@ -3834,7 +3830,7 @@ xfs_bmap_btalloc(
3834 args.minleft = 0; 3830 args.minleft = 0;
3835 if ((error = xfs_alloc_vextent(&args))) 3831 if ((error = xfs_alloc_vextent(&args)))
3836 return error; 3832 return error;
3837 ap->flist->xbf_low = 1; 3833 ap->dfops->dop_low = true;
3838 } 3834 }
3839 if (args.fsbno != NULLFSBLOCK) { 3835 if (args.fsbno != NULLFSBLOCK) {
3840 /* 3836 /*
@@ -3844,7 +3840,7 @@ xfs_bmap_btalloc(
3844 ASSERT(*ap->firstblock == NULLFSBLOCK || 3840 ASSERT(*ap->firstblock == NULLFSBLOCK ||
3845 XFS_FSB_TO_AGNO(mp, *ap->firstblock) == 3841 XFS_FSB_TO_AGNO(mp, *ap->firstblock) ==
3846 XFS_FSB_TO_AGNO(mp, args.fsbno) || 3842 XFS_FSB_TO_AGNO(mp, args.fsbno) ||
3847 (ap->flist->xbf_low && 3843 (ap->dfops->dop_low &&
3848 XFS_FSB_TO_AGNO(mp, *ap->firstblock) < 3844 XFS_FSB_TO_AGNO(mp, *ap->firstblock) <
3849 XFS_FSB_TO_AGNO(mp, args.fsbno))); 3845 XFS_FSB_TO_AGNO(mp, args.fsbno)));
3850 3846
@@ -3852,7 +3848,7 @@ xfs_bmap_btalloc(
3852 if (*ap->firstblock == NULLFSBLOCK) 3848 if (*ap->firstblock == NULLFSBLOCK)
3853 *ap->firstblock = args.fsbno; 3849 *ap->firstblock = args.fsbno;
3854 ASSERT(nullfb || fb_agno == args.agno || 3850 ASSERT(nullfb || fb_agno == args.agno ||
3855 (ap->flist->xbf_low && fb_agno < args.agno)); 3851 (ap->dfops->dop_low && fb_agno < args.agno));
3856 ap->length = args.len; 3852 ap->length = args.len;
3857 ap->ip->i_d.di_nblocks += args.len; 3853 ap->ip->i_d.di_nblocks += args.len;
3858 xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); 3854 xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
@@ -4319,7 +4315,7 @@ xfs_bmapi_allocate(
4319 if (error) 4315 if (error)
4320 return error; 4316 return error;
4321 4317
4322 if (bma->flist->xbf_low) 4318 if (bma->dfops->dop_low)
4323 bma->minleft = 0; 4319 bma->minleft = 0;
4324 if (bma->cur) 4320 if (bma->cur)
4325 bma->cur->bc_private.b.firstblock = *bma->firstblock; 4321 bma->cur->bc_private.b.firstblock = *bma->firstblock;
@@ -4328,7 +4324,7 @@ xfs_bmapi_allocate(
4328 if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) { 4324 if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) {
4329 bma->cur = xfs_bmbt_init_cursor(mp, bma->tp, bma->ip, whichfork); 4325 bma->cur = xfs_bmbt_init_cursor(mp, bma->tp, bma->ip, whichfork);
4330 bma->cur->bc_private.b.firstblock = *bma->firstblock; 4326 bma->cur->bc_private.b.firstblock = *bma->firstblock;
4331 bma->cur->bc_private.b.flist = bma->flist; 4327 bma->cur->bc_private.b.dfops = bma->dfops;
4332 } 4328 }
4333 /* 4329 /*
4334 * Bump the number of extents we've allocated 4330 * Bump the number of extents we've allocated
@@ -4409,7 +4405,7 @@ xfs_bmapi_convert_unwritten(
4409 bma->cur = xfs_bmbt_init_cursor(bma->ip->i_mount, bma->tp, 4405 bma->cur = xfs_bmbt_init_cursor(bma->ip->i_mount, bma->tp,
4410 bma->ip, whichfork); 4406 bma->ip, whichfork);
4411 bma->cur->bc_private.b.firstblock = *bma->firstblock; 4407 bma->cur->bc_private.b.firstblock = *bma->firstblock;
4412 bma->cur->bc_private.b.flist = bma->flist; 4408 bma->cur->bc_private.b.dfops = bma->dfops;
4413 } 4409 }
4414 mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN) 4410 mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN)
4415 ? XFS_EXT_NORM : XFS_EXT_UNWRITTEN; 4411 ? XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
@@ -4426,7 +4422,7 @@ xfs_bmapi_convert_unwritten(
4426 } 4422 }
4427 4423
4428 error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx, 4424 error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx,
4429 &bma->cur, mval, bma->firstblock, bma->flist, 4425 &bma->cur, mval, bma->firstblock, bma->dfops,
4430 &tmp_logflags); 4426 &tmp_logflags);
4431 /* 4427 /*
4432 * Log the inode core unconditionally in the unwritten extent conversion 4428 * Log the inode core unconditionally in the unwritten extent conversion
@@ -4480,7 +4476,7 @@ xfs_bmapi_write(
4480 xfs_extlen_t total, /* total blocks needed */ 4476 xfs_extlen_t total, /* total blocks needed */
4481 struct xfs_bmbt_irec *mval, /* output: map values */ 4477 struct xfs_bmbt_irec *mval, /* output: map values */
4482 int *nmap, /* i/o: mval size/count */ 4478 int *nmap, /* i/o: mval size/count */
4483 struct xfs_bmap_free *flist) /* i/o: list extents to free */ 4479 struct xfs_defer_ops *dfops) /* i/o: list extents to free */
4484{ 4480{
4485 struct xfs_mount *mp = ip->i_mount; 4481 struct xfs_mount *mp = ip->i_mount;
4486 struct xfs_ifork *ifp; 4482 struct xfs_ifork *ifp;
@@ -4570,7 +4566,7 @@ xfs_bmapi_write(
4570 bma.ip = ip; 4566 bma.ip = ip;
4571 bma.total = total; 4567 bma.total = total;
4572 bma.userdata = 0; 4568 bma.userdata = 0;
4573 bma.flist = flist; 4569 bma.dfops = dfops;
4574 bma.firstblock = firstblock; 4570 bma.firstblock = firstblock;
4575 4571
4576 while (bno < end && n < *nmap) { 4572 while (bno < end && n < *nmap) {
@@ -4684,7 +4680,7 @@ error0:
4684 XFS_FSB_TO_AGNO(mp, *firstblock) == 4680 XFS_FSB_TO_AGNO(mp, *firstblock) ==
4685 XFS_FSB_TO_AGNO(mp, 4681 XFS_FSB_TO_AGNO(mp,
4686 bma.cur->bc_private.b.firstblock) || 4682 bma.cur->bc_private.b.firstblock) ||
4687 (flist->xbf_low && 4683 (dfops->dop_low &&
4688 XFS_FSB_TO_AGNO(mp, *firstblock) < 4684 XFS_FSB_TO_AGNO(mp, *firstblock) <
4689 XFS_FSB_TO_AGNO(mp, 4685 XFS_FSB_TO_AGNO(mp,
4690 bma.cur->bc_private.b.firstblock))); 4686 bma.cur->bc_private.b.firstblock)));
@@ -4768,7 +4764,7 @@ xfs_bmap_del_extent(
4768 xfs_inode_t *ip, /* incore inode pointer */ 4764 xfs_inode_t *ip, /* incore inode pointer */
4769 xfs_trans_t *tp, /* current transaction pointer */ 4765 xfs_trans_t *tp, /* current transaction pointer */
4770 xfs_extnum_t *idx, /* extent number to update/delete */ 4766 xfs_extnum_t *idx, /* extent number to update/delete */
4771 xfs_bmap_free_t *flist, /* list of extents to be freed */ 4767 struct xfs_defer_ops *dfops, /* list of extents to be freed */
4772 xfs_btree_cur_t *cur, /* if null, not a btree */ 4768 xfs_btree_cur_t *cur, /* if null, not a btree */
4773 xfs_bmbt_irec_t *del, /* data to remove from extents */ 4769 xfs_bmbt_irec_t *del, /* data to remove from extents */
4774 int *logflagsp, /* inode logging flags */ 4770 int *logflagsp, /* inode logging flags */
@@ -4870,6 +4866,7 @@ xfs_bmap_del_extent(
4870 nblks = 0; 4866 nblks = 0;
4871 do_fx = 0; 4867 do_fx = 0;
4872 } 4868 }
4869
4873 /* 4870 /*
4874 * Set flag value to use in switch statement. 4871 * Set flag value to use in switch statement.
4875 * Left-contig is 2, right-contig is 1. 4872 * Left-contig is 2, right-contig is 1.
@@ -5052,12 +5049,20 @@ xfs_bmap_del_extent(
5052 ++*idx; 5049 ++*idx;
5053 break; 5050 break;
5054 } 5051 }
5052
5053 /* remove reverse mapping */
5054 if (!delay) {
5055 error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, del);
5056 if (error)
5057 goto done;
5058 }
5059
5055 /* 5060 /*
5056 * If we need to, add to list of extents to delete. 5061 * If we need to, add to list of extents to delete.
5057 */ 5062 */
5058 if (do_fx) 5063 if (do_fx)
5059 xfs_bmap_add_free(mp, flist, del->br_startblock, 5064 xfs_bmap_add_free(mp, dfops, del->br_startblock,
5060 del->br_blockcount); 5065 del->br_blockcount, NULL);
5061 /* 5066 /*
5062 * Adjust inode # blocks in the file. 5067 * Adjust inode # blocks in the file.
5063 */ 5068 */
@@ -5097,7 +5102,7 @@ xfs_bunmapi(
5097 xfs_extnum_t nexts, /* number of extents max */ 5102 xfs_extnum_t nexts, /* number of extents max */
5098 xfs_fsblock_t *firstblock, /* first allocated block 5103 xfs_fsblock_t *firstblock, /* first allocated block
5099 controls a.g. for allocs */ 5104 controls a.g. for allocs */
5100 xfs_bmap_free_t *flist, /* i/o: list extents to free */ 5105 struct xfs_defer_ops *dfops, /* i/o: list extents to free */
5101 int *done) /* set if not done yet */ 5106 int *done) /* set if not done yet */
5102{ 5107{
5103 xfs_btree_cur_t *cur; /* bmap btree cursor */ 5108 xfs_btree_cur_t *cur; /* bmap btree cursor */
@@ -5170,7 +5175,7 @@ xfs_bunmapi(
5170 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE); 5175 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
5171 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); 5176 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5172 cur->bc_private.b.firstblock = *firstblock; 5177 cur->bc_private.b.firstblock = *firstblock;
5173 cur->bc_private.b.flist = flist; 5178 cur->bc_private.b.dfops = dfops;
5174 cur->bc_private.b.flags = 0; 5179 cur->bc_private.b.flags = 0;
5175 } else 5180 } else
5176 cur = NULL; 5181 cur = NULL;
@@ -5179,8 +5184,10 @@ xfs_bunmapi(
5179 /* 5184 /*
5180 * Synchronize by locking the bitmap inode. 5185 * Synchronize by locking the bitmap inode.
5181 */ 5186 */
5182 xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL); 5187 xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL|XFS_ILOCK_RTBITMAP);
5183 xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL); 5188 xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
5189 xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL|XFS_ILOCK_RTSUM);
5190 xfs_trans_ijoin(tp, mp->m_rsumip, XFS_ILOCK_EXCL);
5184 } 5191 }
5185 5192
5186 extno = 0; 5193 extno = 0;
@@ -5262,7 +5269,7 @@ xfs_bunmapi(
5262 } 5269 }
5263 del.br_state = XFS_EXT_UNWRITTEN; 5270 del.br_state = XFS_EXT_UNWRITTEN;
5264 error = xfs_bmap_add_extent_unwritten_real(tp, ip, 5271 error = xfs_bmap_add_extent_unwritten_real(tp, ip,
5265 &lastx, &cur, &del, firstblock, flist, 5272 &lastx, &cur, &del, firstblock, dfops,
5266 &logflags); 5273 &logflags);
5267 if (error) 5274 if (error)
5268 goto error0; 5275 goto error0;
@@ -5321,7 +5328,7 @@ xfs_bunmapi(
5321 lastx--; 5328 lastx--;
5322 error = xfs_bmap_add_extent_unwritten_real(tp, 5329 error = xfs_bmap_add_extent_unwritten_real(tp,
5323 ip, &lastx, &cur, &prev, 5330 ip, &lastx, &cur, &prev,
5324 firstblock, flist, &logflags); 5331 firstblock, dfops, &logflags);
5325 if (error) 5332 if (error)
5326 goto error0; 5333 goto error0;
5327 goto nodelete; 5334 goto nodelete;
@@ -5330,7 +5337,7 @@ xfs_bunmapi(
5330 del.br_state = XFS_EXT_UNWRITTEN; 5337 del.br_state = XFS_EXT_UNWRITTEN;
5331 error = xfs_bmap_add_extent_unwritten_real(tp, 5338 error = xfs_bmap_add_extent_unwritten_real(tp,
5332 ip, &lastx, &cur, &del, 5339 ip, &lastx, &cur, &del,
5333 firstblock, flist, &logflags); 5340 firstblock, dfops, &logflags);
5334 if (error) 5341 if (error)
5335 goto error0; 5342 goto error0;
5336 goto nodelete; 5343 goto nodelete;
@@ -5388,7 +5395,7 @@ xfs_bunmapi(
5388 } else if (cur) 5395 } else if (cur)
5389 cur->bc_private.b.flags &= ~XFS_BTCUR_BPRV_WASDEL; 5396 cur->bc_private.b.flags &= ~XFS_BTCUR_BPRV_WASDEL;
5390 5397
5391 error = xfs_bmap_del_extent(ip, tp, &lastx, flist, cur, &del, 5398 error = xfs_bmap_del_extent(ip, tp, &lastx, dfops, cur, &del,
5392 &tmp_logflags, whichfork); 5399 &tmp_logflags, whichfork);
5393 logflags |= tmp_logflags; 5400 logflags |= tmp_logflags;
5394 if (error) 5401 if (error)
@@ -5422,7 +5429,7 @@ nodelete:
5422 */ 5429 */
5423 if (xfs_bmap_needs_btree(ip, whichfork)) { 5430 if (xfs_bmap_needs_btree(ip, whichfork)) {
5424 ASSERT(cur == NULL); 5431 ASSERT(cur == NULL);
5425 error = xfs_bmap_extents_to_btree(tp, ip, firstblock, flist, 5432 error = xfs_bmap_extents_to_btree(tp, ip, firstblock, dfops,
5426 &cur, 0, &tmp_logflags, whichfork); 5433 &cur, 0, &tmp_logflags, whichfork);
5427 logflags |= tmp_logflags; 5434 logflags |= tmp_logflags;
5428 if (error) 5435 if (error)
@@ -5589,7 +5596,8 @@ xfs_bmse_shift_one(
5589 struct xfs_bmbt_rec_host *gotp, 5596 struct xfs_bmbt_rec_host *gotp,
5590 struct xfs_btree_cur *cur, 5597 struct xfs_btree_cur *cur,
5591 int *logflags, 5598 int *logflags,
5592 enum shift_direction direction) 5599 enum shift_direction direction,
5600 struct xfs_defer_ops *dfops)
5593{ 5601{
5594 struct xfs_ifork *ifp; 5602 struct xfs_ifork *ifp;
5595 struct xfs_mount *mp; 5603 struct xfs_mount *mp;
@@ -5637,9 +5645,13 @@ xfs_bmse_shift_one(
5637 /* check whether to merge the extent or shift it down */ 5645 /* check whether to merge the extent or shift it down */
5638 if (xfs_bmse_can_merge(&adj_irec, &got, 5646 if (xfs_bmse_can_merge(&adj_irec, &got,
5639 offset_shift_fsb)) { 5647 offset_shift_fsb)) {
5640 return xfs_bmse_merge(ip, whichfork, offset_shift_fsb, 5648 error = xfs_bmse_merge(ip, whichfork, offset_shift_fsb,
5641 *current_ext, gotp, adj_irecp, 5649 *current_ext, gotp, adj_irecp,
5642 cur, logflags); 5650 cur, logflags);
5651 if (error)
5652 return error;
5653 adj_irec = got;
5654 goto update_rmap;
5643 } 5655 }
5644 } else { 5656 } else {
5645 startoff = got.br_startoff + offset_shift_fsb; 5657 startoff = got.br_startoff + offset_shift_fsb;
@@ -5676,9 +5688,10 @@ update_current_ext:
5676 (*current_ext)--; 5688 (*current_ext)--;
5677 xfs_bmbt_set_startoff(gotp, startoff); 5689 xfs_bmbt_set_startoff(gotp, startoff);
5678 *logflags |= XFS_ILOG_CORE; 5690 *logflags |= XFS_ILOG_CORE;
5691 adj_irec = got;
5679 if (!cur) { 5692 if (!cur) {
5680 *logflags |= XFS_ILOG_DEXT; 5693 *logflags |= XFS_ILOG_DEXT;
5681 return 0; 5694 goto update_rmap;
5682 } 5695 }
5683 5696
5684 error = xfs_bmbt_lookup_eq(cur, got.br_startoff, got.br_startblock, 5697 error = xfs_bmbt_lookup_eq(cur, got.br_startoff, got.br_startblock,
@@ -5688,8 +5701,18 @@ update_current_ext:
5688 XFS_WANT_CORRUPTED_RETURN(mp, i == 1); 5701 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
5689 5702
5690 got.br_startoff = startoff; 5703 got.br_startoff = startoff;
5691 return xfs_bmbt_update(cur, got.br_startoff, got.br_startblock, 5704 error = xfs_bmbt_update(cur, got.br_startoff, got.br_startblock,
5692 got.br_blockcount, got.br_state); 5705 got.br_blockcount, got.br_state);
5706 if (error)
5707 return error;
5708
5709update_rmap:
5710 /* update reverse mapping */
5711 error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, &adj_irec);
5712 if (error)
5713 return error;
5714 adj_irec.br_startoff = startoff;
5715 return xfs_rmap_map_extent(mp, dfops, ip, whichfork, &adj_irec);
5693} 5716}
5694 5717
5695/* 5718/*
@@ -5711,7 +5734,7 @@ xfs_bmap_shift_extents(
5711 int *done, 5734 int *done,
5712 xfs_fileoff_t stop_fsb, 5735 xfs_fileoff_t stop_fsb,
5713 xfs_fsblock_t *firstblock, 5736 xfs_fsblock_t *firstblock,
5714 struct xfs_bmap_free *flist, 5737 struct xfs_defer_ops *dfops,
5715 enum shift_direction direction, 5738 enum shift_direction direction,
5716 int num_exts) 5739 int num_exts)
5717{ 5740{
@@ -5756,7 +5779,7 @@ xfs_bmap_shift_extents(
5756 if (ifp->if_flags & XFS_IFBROOT) { 5779 if (ifp->if_flags & XFS_IFBROOT) {
5757 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); 5780 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5758 cur->bc_private.b.firstblock = *firstblock; 5781 cur->bc_private.b.firstblock = *firstblock;
5759 cur->bc_private.b.flist = flist; 5782 cur->bc_private.b.dfops = dfops;
5760 cur->bc_private.b.flags = 0; 5783 cur->bc_private.b.flags = 0;
5761 } 5784 }
5762 5785
@@ -5817,7 +5840,7 @@ xfs_bmap_shift_extents(
5817 while (nexts++ < num_exts) { 5840 while (nexts++ < num_exts) {
5818 error = xfs_bmse_shift_one(ip, whichfork, offset_shift_fsb, 5841 error = xfs_bmse_shift_one(ip, whichfork, offset_shift_fsb,
5819 &current_ext, gotp, cur, &logflags, 5842 &current_ext, gotp, cur, &logflags,
5820 direction); 5843 direction, dfops);
5821 if (error) 5844 if (error)
5822 goto del_cursor; 5845 goto del_cursor;
5823 /* 5846 /*
@@ -5865,7 +5888,7 @@ xfs_bmap_split_extent_at(
5865 struct xfs_inode *ip, 5888 struct xfs_inode *ip,
5866 xfs_fileoff_t split_fsb, 5889 xfs_fileoff_t split_fsb,
5867 xfs_fsblock_t *firstfsb, 5890 xfs_fsblock_t *firstfsb,
5868 struct xfs_bmap_free *free_list) 5891 struct xfs_defer_ops *dfops)
5869{ 5892{
5870 int whichfork = XFS_DATA_FORK; 5893 int whichfork = XFS_DATA_FORK;
5871 struct xfs_btree_cur *cur = NULL; 5894 struct xfs_btree_cur *cur = NULL;
@@ -5927,7 +5950,7 @@ xfs_bmap_split_extent_at(
5927 if (ifp->if_flags & XFS_IFBROOT) { 5950 if (ifp->if_flags & XFS_IFBROOT) {
5928 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); 5951 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5929 cur->bc_private.b.firstblock = *firstfsb; 5952 cur->bc_private.b.firstblock = *firstfsb;
5930 cur->bc_private.b.flist = free_list; 5953 cur->bc_private.b.dfops = dfops;
5931 cur->bc_private.b.flags = 0; 5954 cur->bc_private.b.flags = 0;
5932 error = xfs_bmbt_lookup_eq(cur, got.br_startoff, 5955 error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
5933 got.br_startblock, 5956 got.br_startblock,
@@ -5980,7 +6003,7 @@ xfs_bmap_split_extent_at(
5980 int tmp_logflags; /* partial log flag return val */ 6003 int tmp_logflags; /* partial log flag return val */
5981 6004
5982 ASSERT(cur == NULL); 6005 ASSERT(cur == NULL);
5983 error = xfs_bmap_extents_to_btree(tp, ip, firstfsb, free_list, 6006 error = xfs_bmap_extents_to_btree(tp, ip, firstfsb, dfops,
5984 &cur, 0, &tmp_logflags, whichfork); 6007 &cur, 0, &tmp_logflags, whichfork);
5985 logflags |= tmp_logflags; 6008 logflags |= tmp_logflags;
5986 } 6009 }
@@ -6004,7 +6027,7 @@ xfs_bmap_split_extent(
6004{ 6027{
6005 struct xfs_mount *mp = ip->i_mount; 6028 struct xfs_mount *mp = ip->i_mount;
6006 struct xfs_trans *tp; 6029 struct xfs_trans *tp;
6007 struct xfs_bmap_free free_list; 6030 struct xfs_defer_ops dfops;
6008 xfs_fsblock_t firstfsb; 6031 xfs_fsblock_t firstfsb;
6009 int error; 6032 int error;
6010 6033
@@ -6016,21 +6039,21 @@ xfs_bmap_split_extent(
6016 xfs_ilock(ip, XFS_ILOCK_EXCL); 6039 xfs_ilock(ip, XFS_ILOCK_EXCL);
6017 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 6040 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
6018 6041
6019 xfs_bmap_init(&free_list, &firstfsb); 6042 xfs_defer_init(&dfops, &firstfsb);
6020 6043
6021 error = xfs_bmap_split_extent_at(tp, ip, split_fsb, 6044 error = xfs_bmap_split_extent_at(tp, ip, split_fsb,
6022 &firstfsb, &free_list); 6045 &firstfsb, &dfops);
6023 if (error) 6046 if (error)
6024 goto out; 6047 goto out;
6025 6048
6026 error = xfs_bmap_finish(&tp, &free_list, NULL); 6049 error = xfs_defer_finish(&tp, &dfops, NULL);
6027 if (error) 6050 if (error)
6028 goto out; 6051 goto out;
6029 6052
6030 return xfs_trans_commit(tp); 6053 return xfs_trans_commit(tp);
6031 6054
6032out: 6055out:
6033 xfs_bmap_cancel(&free_list); 6056 xfs_defer_cancel(&dfops);
6034 xfs_trans_cancel(tp); 6057 xfs_trans_cancel(tp);
6035 return error; 6058 return error;
6036} 6059}
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index f1f3ae6c0a3f..254034f96941 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -32,7 +32,7 @@ extern kmem_zone_t *xfs_bmap_free_item_zone;
32 */ 32 */
33struct xfs_bmalloca { 33struct xfs_bmalloca {
34 xfs_fsblock_t *firstblock; /* i/o first block allocated */ 34 xfs_fsblock_t *firstblock; /* i/o first block allocated */
35 struct xfs_bmap_free *flist; /* bmap freelist */ 35 struct xfs_defer_ops *dfops; /* bmap freelist */
36 struct xfs_trans *tp; /* transaction pointer */ 36 struct xfs_trans *tp; /* transaction pointer */
37 struct xfs_inode *ip; /* incore inode pointer */ 37 struct xfs_inode *ip; /* incore inode pointer */
38 struct xfs_bmbt_irec prev; /* extent before the new one */ 38 struct xfs_bmbt_irec prev; /* extent before the new one */
@@ -62,34 +62,14 @@ struct xfs_bmalloca {
62 * List of extents to be free "later". 62 * List of extents to be free "later".
63 * The list is kept sorted on xbf_startblock. 63 * The list is kept sorted on xbf_startblock.
64 */ 64 */
65struct xfs_bmap_free_item 65struct xfs_extent_free_item
66{ 66{
67 xfs_fsblock_t xbfi_startblock;/* starting fs block number */ 67 xfs_fsblock_t xefi_startblock;/* starting fs block number */
68 xfs_extlen_t xbfi_blockcount;/* number of blocks in extent */ 68 xfs_extlen_t xefi_blockcount;/* number of blocks in extent */
69 struct list_head xbfi_list; 69 struct list_head xefi_list;
70 struct xfs_owner_info xefi_oinfo; /* extent owner */
70}; 71};
71 72
72/*
73 * Header for free extent list.
74 *
75 * xbf_low is used by the allocator to activate the lowspace algorithm -
76 * when free space is running low the extent allocator may choose to
77 * allocate an extent from an AG without leaving sufficient space for
78 * a btree split when inserting the new extent. In this case the allocator
79 * will enable the lowspace algorithm which is supposed to allow further
80 * allocations (such as btree splits and newroots) to allocate from
81 * sequential AGs. In order to avoid locking AGs out of order the lowspace
82 * algorithm will start searching for free space from AG 0. If the correct
83 * transaction reservations have been made then this algorithm will eventually
84 * find all the space it needs.
85 */
86typedef struct xfs_bmap_free
87{
88 struct list_head xbf_flist; /* list of to-be-free extents */
89 int xbf_count; /* count of items on list */
90 int xbf_low; /* alloc in low mode */
91} xfs_bmap_free_t;
92
93#define XFS_BMAP_MAX_NMAP 4 73#define XFS_BMAP_MAX_NMAP 4
94 74
95/* 75/*
@@ -139,14 +119,6 @@ static inline int xfs_bmapi_aflag(int w)
139#define DELAYSTARTBLOCK ((xfs_fsblock_t)-1LL) 119#define DELAYSTARTBLOCK ((xfs_fsblock_t)-1LL)
140#define HOLESTARTBLOCK ((xfs_fsblock_t)-2LL) 120#define HOLESTARTBLOCK ((xfs_fsblock_t)-2LL)
141 121
142static inline void xfs_bmap_init(xfs_bmap_free_t *flp, xfs_fsblock_t *fbp)
143{
144 INIT_LIST_HEAD(&flp->xbf_flist);
145 flp->xbf_count = 0;
146 flp->xbf_low = 0;
147 *fbp = NULLFSBLOCK;
148}
149
150/* 122/*
151 * Flags for xfs_bmap_add_extent*. 123 * Flags for xfs_bmap_add_extent*.
152 */ 124 */
@@ -193,11 +165,9 @@ void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,
193 165
194int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd); 166int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd);
195void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork); 167void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork);
196void xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_bmap_free *flist, 168void xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
197 xfs_fsblock_t bno, xfs_filblks_t len); 169 xfs_fsblock_t bno, xfs_filblks_t len,
198void xfs_bmap_cancel(struct xfs_bmap_free *flist); 170 struct xfs_owner_info *oinfo);
199int xfs_bmap_finish(struct xfs_trans **tp, struct xfs_bmap_free *flist,
200 struct xfs_inode *ip);
201void xfs_bmap_compute_maxlevels(struct xfs_mount *mp, int whichfork); 171void xfs_bmap_compute_maxlevels(struct xfs_mount *mp, int whichfork);
202int xfs_bmap_first_unused(struct xfs_trans *tp, struct xfs_inode *ip, 172int xfs_bmap_first_unused(struct xfs_trans *tp, struct xfs_inode *ip,
203 xfs_extlen_t len, xfs_fileoff_t *unused, int whichfork); 173 xfs_extlen_t len, xfs_fileoff_t *unused, int whichfork);
@@ -218,18 +188,18 @@ int xfs_bmapi_write(struct xfs_trans *tp, struct xfs_inode *ip,
218 xfs_fileoff_t bno, xfs_filblks_t len, int flags, 188 xfs_fileoff_t bno, xfs_filblks_t len, int flags,
219 xfs_fsblock_t *firstblock, xfs_extlen_t total, 189 xfs_fsblock_t *firstblock, xfs_extlen_t total,
220 struct xfs_bmbt_irec *mval, int *nmap, 190 struct xfs_bmbt_irec *mval, int *nmap,
221 struct xfs_bmap_free *flist); 191 struct xfs_defer_ops *dfops);
222int xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip, 192int xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip,
223 xfs_fileoff_t bno, xfs_filblks_t len, int flags, 193 xfs_fileoff_t bno, xfs_filblks_t len, int flags,
224 xfs_extnum_t nexts, xfs_fsblock_t *firstblock, 194 xfs_extnum_t nexts, xfs_fsblock_t *firstblock,
225 struct xfs_bmap_free *flist, int *done); 195 struct xfs_defer_ops *dfops, int *done);
226int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx, 196int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx,
227 xfs_extnum_t num); 197 xfs_extnum_t num);
228uint xfs_default_attroffset(struct xfs_inode *ip); 198uint xfs_default_attroffset(struct xfs_inode *ip);
229int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip, 199int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip,
230 xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb, 200 xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb,
231 int *done, xfs_fileoff_t stop_fsb, xfs_fsblock_t *firstblock, 201 int *done, xfs_fileoff_t stop_fsb, xfs_fsblock_t *firstblock,
232 struct xfs_bmap_free *flist, enum shift_direction direction, 202 struct xfs_defer_ops *dfops, enum shift_direction direction,
233 int num_exts); 203 int num_exts);
234int xfs_bmap_split_extent(struct xfs_inode *ip, xfs_fileoff_t split_offset); 204int xfs_bmap_split_extent(struct xfs_inode *ip, xfs_fileoff_t split_offset);
235 205
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index db0c71e470c9..cd85274e810c 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -23,6 +23,7 @@
23#include "xfs_trans_resv.h" 23#include "xfs_trans_resv.h"
24#include "xfs_bit.h" 24#include "xfs_bit.h"
25#include "xfs_mount.h" 25#include "xfs_mount.h"
26#include "xfs_defer.h"
26#include "xfs_inode.h" 27#include "xfs_inode.h"
27#include "xfs_trans.h" 28#include "xfs_trans.h"
28#include "xfs_inode_item.h" 29#include "xfs_inode_item.h"
@@ -34,6 +35,7 @@
34#include "xfs_quota.h" 35#include "xfs_quota.h"
35#include "xfs_trace.h" 36#include "xfs_trace.h"
36#include "xfs_cksum.h" 37#include "xfs_cksum.h"
38#include "xfs_rmap.h"
37 39
38/* 40/*
39 * Determine the extent state. 41 * Determine the extent state.
@@ -406,11 +408,11 @@ xfs_bmbt_dup_cursor(
406 cur->bc_private.b.ip, cur->bc_private.b.whichfork); 408 cur->bc_private.b.ip, cur->bc_private.b.whichfork);
407 409
408 /* 410 /*
409 * Copy the firstblock, flist, and flags values, 411 * Copy the firstblock, dfops, and flags values,
410 * since init cursor doesn't get them. 412 * since init cursor doesn't get them.
411 */ 413 */
412 new->bc_private.b.firstblock = cur->bc_private.b.firstblock; 414 new->bc_private.b.firstblock = cur->bc_private.b.firstblock;
413 new->bc_private.b.flist = cur->bc_private.b.flist; 415 new->bc_private.b.dfops = cur->bc_private.b.dfops;
414 new->bc_private.b.flags = cur->bc_private.b.flags; 416 new->bc_private.b.flags = cur->bc_private.b.flags;
415 417
416 return new; 418 return new;
@@ -423,7 +425,7 @@ xfs_bmbt_update_cursor(
423{ 425{
424 ASSERT((dst->bc_private.b.firstblock != NULLFSBLOCK) || 426 ASSERT((dst->bc_private.b.firstblock != NULLFSBLOCK) ||
425 (dst->bc_private.b.ip->i_d.di_flags & XFS_DIFLAG_REALTIME)); 427 (dst->bc_private.b.ip->i_d.di_flags & XFS_DIFLAG_REALTIME));
426 ASSERT(dst->bc_private.b.flist == src->bc_private.b.flist); 428 ASSERT(dst->bc_private.b.dfops == src->bc_private.b.dfops);
427 429
428 dst->bc_private.b.allocated += src->bc_private.b.allocated; 430 dst->bc_private.b.allocated += src->bc_private.b.allocated;
429 dst->bc_private.b.firstblock = src->bc_private.b.firstblock; 431 dst->bc_private.b.firstblock = src->bc_private.b.firstblock;
@@ -446,6 +448,8 @@ xfs_bmbt_alloc_block(
446 args.mp = cur->bc_mp; 448 args.mp = cur->bc_mp;
447 args.fsbno = cur->bc_private.b.firstblock; 449 args.fsbno = cur->bc_private.b.firstblock;
448 args.firstblock = args.fsbno; 450 args.firstblock = args.fsbno;
451 xfs_rmap_ino_bmbt_owner(&args.oinfo, cur->bc_private.b.ip->i_ino,
452 cur->bc_private.b.whichfork);
449 453
450 if (args.fsbno == NULLFSBLOCK) { 454 if (args.fsbno == NULLFSBLOCK) {
451 args.fsbno = be64_to_cpu(start->l); 455 args.fsbno = be64_to_cpu(start->l);
@@ -462,7 +466,7 @@ xfs_bmbt_alloc_block(
462 * block allocation here and corrupt the filesystem. 466 * block allocation here and corrupt the filesystem.
463 */ 467 */
464 args.minleft = args.tp->t_blk_res; 468 args.minleft = args.tp->t_blk_res;
465 } else if (cur->bc_private.b.flist->xbf_low) { 469 } else if (cur->bc_private.b.dfops->dop_low) {
466 args.type = XFS_ALLOCTYPE_START_BNO; 470 args.type = XFS_ALLOCTYPE_START_BNO;
467 } else { 471 } else {
468 args.type = XFS_ALLOCTYPE_NEAR_BNO; 472 args.type = XFS_ALLOCTYPE_NEAR_BNO;
@@ -490,7 +494,7 @@ xfs_bmbt_alloc_block(
490 error = xfs_alloc_vextent(&args); 494 error = xfs_alloc_vextent(&args);
491 if (error) 495 if (error)
492 goto error0; 496 goto error0;
493 cur->bc_private.b.flist->xbf_low = 1; 497 cur->bc_private.b.dfops->dop_low = true;
494 } 498 }
495 if (args.fsbno == NULLFSBLOCK) { 499 if (args.fsbno == NULLFSBLOCK) {
496 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 500 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
@@ -525,8 +529,10 @@ xfs_bmbt_free_block(
525 struct xfs_inode *ip = cur->bc_private.b.ip; 529 struct xfs_inode *ip = cur->bc_private.b.ip;
526 struct xfs_trans *tp = cur->bc_tp; 530 struct xfs_trans *tp = cur->bc_tp;
527 xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(bp)); 531 xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(bp));
532 struct xfs_owner_info oinfo;
528 533
529 xfs_bmap_add_free(mp, cur->bc_private.b.flist, fsbno, 1); 534 xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_private.b.whichfork);
535 xfs_bmap_add_free(mp, cur->bc_private.b.dfops, fsbno, 1, &oinfo);
530 ip->i_d.di_nblocks--; 536 ip->i_d.di_nblocks--;
531 537
532 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 538 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
@@ -600,17 +606,6 @@ xfs_bmbt_init_key_from_rec(
600} 606}
601 607
602STATIC void 608STATIC void
603xfs_bmbt_init_rec_from_key(
604 union xfs_btree_key *key,
605 union xfs_btree_rec *rec)
606{
607 ASSERT(key->bmbt.br_startoff != 0);
608
609 xfs_bmbt_disk_set_allf(&rec->bmbt, be64_to_cpu(key->bmbt.br_startoff),
610 0, 0, XFS_EXT_NORM);
611}
612
613STATIC void
614xfs_bmbt_init_rec_from_cur( 609xfs_bmbt_init_rec_from_cur(
615 struct xfs_btree_cur *cur, 610 struct xfs_btree_cur *cur,
616 union xfs_btree_rec *rec) 611 union xfs_btree_rec *rec)
@@ -760,7 +755,6 @@ static const struct xfs_btree_ops xfs_bmbt_ops = {
760 .get_minrecs = xfs_bmbt_get_minrecs, 755 .get_minrecs = xfs_bmbt_get_minrecs,
761 .get_dmaxrecs = xfs_bmbt_get_dmaxrecs, 756 .get_dmaxrecs = xfs_bmbt_get_dmaxrecs,
762 .init_key_from_rec = xfs_bmbt_init_key_from_rec, 757 .init_key_from_rec = xfs_bmbt_init_key_from_rec,
763 .init_rec_from_key = xfs_bmbt_init_rec_from_key,
764 .init_rec_from_cur = xfs_bmbt_init_rec_from_cur, 758 .init_rec_from_cur = xfs_bmbt_init_rec_from_cur,
765 .init_ptr_from_cur = xfs_bmbt_init_ptr_from_cur, 759 .init_ptr_from_cur = xfs_bmbt_init_ptr_from_cur,
766 .key_diff = xfs_bmbt_key_diff, 760 .key_diff = xfs_bmbt_key_diff,
@@ -800,7 +794,7 @@ xfs_bmbt_init_cursor(
800 cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork); 794 cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork);
801 cur->bc_private.b.ip = ip; 795 cur->bc_private.b.ip = ip;
802 cur->bc_private.b.firstblock = NULLFSBLOCK; 796 cur->bc_private.b.firstblock = NULLFSBLOCK;
803 cur->bc_private.b.flist = NULL; 797 cur->bc_private.b.dfops = NULL;
804 cur->bc_private.b.allocated = 0; 798 cur->bc_private.b.allocated = 0;
805 cur->bc_private.b.flags = 0; 799 cur->bc_private.b.flags = 0;
806 cur->bc_private.b.whichfork = whichfork; 800 cur->bc_private.b.whichfork = whichfork;
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 07eeb0b4ca74..b5c213a051cd 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -23,6 +23,7 @@
23#include "xfs_trans_resv.h" 23#include "xfs_trans_resv.h"
24#include "xfs_bit.h" 24#include "xfs_bit.h"
25#include "xfs_mount.h" 25#include "xfs_mount.h"
26#include "xfs_defer.h"
26#include "xfs_inode.h" 27#include "xfs_inode.h"
27#include "xfs_trans.h" 28#include "xfs_trans.h"
28#include "xfs_inode_item.h" 29#include "xfs_inode_item.h"
@@ -43,15 +44,14 @@ kmem_zone_t *xfs_btree_cur_zone;
43 * Btree magic numbers. 44 * Btree magic numbers.
44 */ 45 */
45static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = { 46static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = {
46 { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC, 47 { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, 0, XFS_BMAP_MAGIC, XFS_IBT_MAGIC,
47 XFS_FIBT_MAGIC }, 48 XFS_FIBT_MAGIC },
48 { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, 49 { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, XFS_RMAP_CRC_MAGIC,
49 XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC } 50 XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC }
50}; 51};
51#define xfs_btree_magic(cur) \ 52#define xfs_btree_magic(cur) \
52 xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum] 53 xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum]
53 54
54
55STATIC int /* error (0 or EFSCORRUPTED) */ 55STATIC int /* error (0 or EFSCORRUPTED) */
56xfs_btree_check_lblock( 56xfs_btree_check_lblock(
57 struct xfs_btree_cur *cur, /* btree cursor */ 57 struct xfs_btree_cur *cur, /* btree cursor */
@@ -428,6 +428,50 @@ xfs_btree_dup_cursor(
428 * into a btree block (xfs_btree_*_offset) or return a pointer to the given 428 * into a btree block (xfs_btree_*_offset) or return a pointer to the given
429 * record, key or pointer (xfs_btree_*_addr). Note that all addressing 429 * record, key or pointer (xfs_btree_*_addr). Note that all addressing
430 * inside the btree block is done using indices starting at one, not zero! 430 * inside the btree block is done using indices starting at one, not zero!
431 *
432 * If XFS_BTREE_OVERLAPPING is set, then this btree supports keys containing
433 * overlapping intervals. In such a tree, records are still sorted lowest to
434 * highest and indexed by the smallest key value that refers to the record.
435 * However, nodes are different: each pointer has two associated keys -- one
436 * indexing the lowest key available in the block(s) below (the same behavior
437 * as the key in a regular btree) and another indexing the highest key
438 * available in the block(s) below. Because records are /not/ sorted by the
439 * highest key, all leaf block updates require us to compute the highest key
440 * that matches any record in the leaf and to recursively update the high keys
441 * in the nodes going further up in the tree, if necessary. Nodes look like
442 * this:
443 *
444 * +--------+-----+-----+-----+-----+-----+-------+-------+-----+
445 * Non-Leaf: | header | lo1 | hi1 | lo2 | hi2 | ... | ptr 1 | ptr 2 | ... |
446 * +--------+-----+-----+-----+-----+-----+-------+-------+-----+
447 *
448 * To perform an interval query on an overlapped tree, perform the usual
449 * depth-first search and use the low and high keys to decide if we can skip
450 * that particular node. If a leaf node is reached, return the records that
451 * intersect the interval. Note that an interval query may return numerous
452 * entries. For a non-overlapped tree, simply search for the record associated
453 * with the lowest key and iterate forward until a non-matching record is
454 * found. Section 14.3 ("Interval Trees") of _Introduction to Algorithms_ by
455 * Cormen, Leiserson, Rivest, and Stein (2nd or 3rd ed. only) discuss this in
456 * more detail.
457 *
458 * Why do we care about overlapping intervals? Let's say you have a bunch of
459 * reverse mapping records on a reflink filesystem:
460 *
461 * 1: +- file A startblock B offset C length D -----------+
462 * 2: +- file E startblock F offset G length H --------------+
463 * 3: +- file I startblock F offset J length K --+
464 * 4: +- file L... --+
465 *
466 * Now say we want to map block (B+D) into file A at offset (C+D). Ideally,
467 * we'd simply increment the length of record 1. But how do we find the record
468 * that ends at (B+D-1) (i.e. record 1)? A LE lookup of (B+D-1) would return
469 * record 3 because the keys are ordered first by startblock. An interval
470 * query would return records 1 and 2 because they both overlap (B+D-1), and
471 * from that we can pick out record 1 as the appropriate left neighbor.
472 *
473 * In the non-overlapped case you can do a LE lookup and decrement the cursor
474 * because a record's interval must end before the next record.
431 */ 475 */
432 476
433/* 477/*
@@ -479,6 +523,18 @@ xfs_btree_key_offset(
479} 523}
480 524
481/* 525/*
526 * Calculate offset of the n-th high key in a btree block.
527 */
528STATIC size_t
529xfs_btree_high_key_offset(
530 struct xfs_btree_cur *cur,
531 int n)
532{
533 return xfs_btree_block_len(cur) +
534 (n - 1) * cur->bc_ops->key_len + (cur->bc_ops->key_len / 2);
535}
536
537/*
482 * Calculate offset of the n-th block pointer in a btree block. 538 * Calculate offset of the n-th block pointer in a btree block.
483 */ 539 */
484STATIC size_t 540STATIC size_t
@@ -519,6 +575,19 @@ xfs_btree_key_addr(
519} 575}
520 576
521/* 577/*
578 * Return a pointer to the n-th high key in the btree block.
579 */
580STATIC union xfs_btree_key *
581xfs_btree_high_key_addr(
582 struct xfs_btree_cur *cur,
583 int n,
584 struct xfs_btree_block *block)
585{
586 return (union xfs_btree_key *)
587 ((char *)block + xfs_btree_high_key_offset(cur, n));
588}
589
590/*
522 * Return a pointer to the n-th block pointer in the btree block. 591 * Return a pointer to the n-th block pointer in the btree block.
523 */ 592 */
524STATIC union xfs_btree_ptr * 593STATIC union xfs_btree_ptr *
@@ -1144,6 +1213,9 @@ xfs_btree_set_refs(
1144 case XFS_BTNUM_BMAP: 1213 case XFS_BTNUM_BMAP:
1145 xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF); 1214 xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF);
1146 break; 1215 break;
1216 case XFS_BTNUM_RMAP:
1217 xfs_buf_set_ref(bp, XFS_RMAP_BTREE_REF);
1218 break;
1147 default: 1219 default:
1148 ASSERT(0); 1220 ASSERT(0);
1149 } 1221 }
@@ -1879,32 +1951,214 @@ error0:
1879 return error; 1951 return error;
1880} 1952}
1881 1953
1954/* Find the high key storage area from a regular key. */
1955STATIC union xfs_btree_key *
1956xfs_btree_high_key_from_key(
1957 struct xfs_btree_cur *cur,
1958 union xfs_btree_key *key)
1959{
1960 ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING);
1961 return (union xfs_btree_key *)((char *)key +
1962 (cur->bc_ops->key_len / 2));
1963}
1964
1965/* Determine the low (and high if overlapped) keys of a leaf block */
1966STATIC void
1967xfs_btree_get_leaf_keys(
1968 struct xfs_btree_cur *cur,
1969 struct xfs_btree_block *block,
1970 union xfs_btree_key *key)
1971{
1972 union xfs_btree_key max_hkey;
1973 union xfs_btree_key hkey;
1974 union xfs_btree_rec *rec;
1975 union xfs_btree_key *high;
1976 int n;
1977
1978 rec = xfs_btree_rec_addr(cur, 1, block);
1979 cur->bc_ops->init_key_from_rec(key, rec);
1980
1981 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
1982
1983 cur->bc_ops->init_high_key_from_rec(&max_hkey, rec);
1984 for (n = 2; n <= xfs_btree_get_numrecs(block); n++) {
1985 rec = xfs_btree_rec_addr(cur, n, block);
1986 cur->bc_ops->init_high_key_from_rec(&hkey, rec);
1987 if (cur->bc_ops->diff_two_keys(cur, &hkey, &max_hkey)
1988 > 0)
1989 max_hkey = hkey;
1990 }
1991
1992 high = xfs_btree_high_key_from_key(cur, key);
1993 memcpy(high, &max_hkey, cur->bc_ops->key_len / 2);
1994 }
1995}
1996
1997/* Determine the low (and high if overlapped) keys of a node block */
1998STATIC void
1999xfs_btree_get_node_keys(
2000 struct xfs_btree_cur *cur,
2001 struct xfs_btree_block *block,
2002 union xfs_btree_key *key)
2003{
2004 union xfs_btree_key *hkey;
2005 union xfs_btree_key *max_hkey;
2006 union xfs_btree_key *high;
2007 int n;
2008
2009 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
2010 memcpy(key, xfs_btree_key_addr(cur, 1, block),
2011 cur->bc_ops->key_len / 2);
2012
2013 max_hkey = xfs_btree_high_key_addr(cur, 1, block);
2014 for (n = 2; n <= xfs_btree_get_numrecs(block); n++) {
2015 hkey = xfs_btree_high_key_addr(cur, n, block);
2016 if (cur->bc_ops->diff_two_keys(cur, hkey, max_hkey) > 0)
2017 max_hkey = hkey;
2018 }
2019
2020 high = xfs_btree_high_key_from_key(cur, key);
2021 memcpy(high, max_hkey, cur->bc_ops->key_len / 2);
2022 } else {
2023 memcpy(key, xfs_btree_key_addr(cur, 1, block),
2024 cur->bc_ops->key_len);
2025 }
2026}
2027
2028/* Derive the keys for any btree block. */
2029STATIC void
2030xfs_btree_get_keys(
2031 struct xfs_btree_cur *cur,
2032 struct xfs_btree_block *block,
2033 union xfs_btree_key *key)
2034{
2035 if (be16_to_cpu(block->bb_level) == 0)
2036 xfs_btree_get_leaf_keys(cur, block, key);
2037 else
2038 xfs_btree_get_node_keys(cur, block, key);
2039}
2040
1882/* 2041/*
1883 * Update keys at all levels from here to the root along the cursor's path. 2042 * Decide if we need to update the parent keys of a btree block. For
2043 * a standard btree this is only necessary if we're updating the first
2044 * record/key. For an overlapping btree, we must always update the
2045 * keys because the highest key can be in any of the records or keys
2046 * in the block.
2047 */
2048static inline bool
2049xfs_btree_needs_key_update(
2050 struct xfs_btree_cur *cur,
2051 int ptr)
2052{
2053 return (cur->bc_flags & XFS_BTREE_OVERLAPPING) || ptr == 1;
2054}
2055
2056/*
2057 * Update the low and high parent keys of the given level, progressing
2058 * towards the root. If force_all is false, stop if the keys for a given
2059 * level do not need updating.
1884 */ 2060 */
1885STATIC int 2061STATIC int
1886xfs_btree_updkey( 2062__xfs_btree_updkeys(
2063 struct xfs_btree_cur *cur,
2064 int level,
2065 struct xfs_btree_block *block,
2066 struct xfs_buf *bp0,
2067 bool force_all)
2068{
2069 union xfs_btree_bigkey key; /* keys from current level */
2070 union xfs_btree_key *lkey; /* keys from the next level up */
2071 union xfs_btree_key *hkey;
2072 union xfs_btree_key *nlkey; /* keys from the next level up */
2073 union xfs_btree_key *nhkey;
2074 struct xfs_buf *bp;
2075 int ptr;
2076
2077 ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING);
2078
2079 /* Exit if there aren't any parent levels to update. */
2080 if (level + 1 >= cur->bc_nlevels)
2081 return 0;
2082
2083 trace_xfs_btree_updkeys(cur, level, bp0);
2084
2085 lkey = (union xfs_btree_key *)&key;
2086 hkey = xfs_btree_high_key_from_key(cur, lkey);
2087 xfs_btree_get_keys(cur, block, lkey);
2088 for (level++; level < cur->bc_nlevels; level++) {
2089#ifdef DEBUG
2090 int error;
2091#endif
2092 block = xfs_btree_get_block(cur, level, &bp);
2093 trace_xfs_btree_updkeys(cur, level, bp);
2094#ifdef DEBUG
2095 error = xfs_btree_check_block(cur, block, level, bp);
2096 if (error) {
2097 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2098 return error;
2099 }
2100#endif
2101 ptr = cur->bc_ptrs[level];
2102 nlkey = xfs_btree_key_addr(cur, ptr, block);
2103 nhkey = xfs_btree_high_key_addr(cur, ptr, block);
2104 if (!force_all &&
2105 !(cur->bc_ops->diff_two_keys(cur, nlkey, lkey) != 0 ||
2106 cur->bc_ops->diff_two_keys(cur, nhkey, hkey) != 0))
2107 break;
2108 xfs_btree_copy_keys(cur, nlkey, lkey, 1);
2109 xfs_btree_log_keys(cur, bp, ptr, ptr);
2110 if (level + 1 >= cur->bc_nlevels)
2111 break;
2112 xfs_btree_get_node_keys(cur, block, lkey);
2113 }
2114
2115 return 0;
2116}
2117
2118/* Update all the keys from some level in cursor back to the root. */
2119STATIC int
2120xfs_btree_updkeys_force(
2121 struct xfs_btree_cur *cur,
2122 int level)
2123{
2124 struct xfs_buf *bp;
2125 struct xfs_btree_block *block;
2126
2127 block = xfs_btree_get_block(cur, level, &bp);
2128 return __xfs_btree_updkeys(cur, level, block, bp, true);
2129}
2130
2131/*
2132 * Update the parent keys of the given level, progressing towards the root.
2133 */
2134STATIC int
2135xfs_btree_update_keys(
1887 struct xfs_btree_cur *cur, 2136 struct xfs_btree_cur *cur,
1888 union xfs_btree_key *keyp,
1889 int level) 2137 int level)
1890{ 2138{
1891 struct xfs_btree_block *block; 2139 struct xfs_btree_block *block;
1892 struct xfs_buf *bp; 2140 struct xfs_buf *bp;
1893 union xfs_btree_key *kp; 2141 union xfs_btree_key *kp;
2142 union xfs_btree_key key;
1894 int ptr; 2143 int ptr;
1895 2144
2145 ASSERT(level >= 0);
2146
2147 block = xfs_btree_get_block(cur, level, &bp);
2148 if (cur->bc_flags & XFS_BTREE_OVERLAPPING)
2149 return __xfs_btree_updkeys(cur, level, block, bp, false);
2150
1896 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 2151 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1897 XFS_BTREE_TRACE_ARGIK(cur, level, keyp); 2152 XFS_BTREE_TRACE_ARGIK(cur, level, keyp);
1898 2153
1899 ASSERT(!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) || level >= 1);
1900
1901 /* 2154 /*
1902 * Go up the tree from this level toward the root. 2155 * Go up the tree from this level toward the root.
1903 * At each level, update the key value to the value input. 2156 * At each level, update the key value to the value input.
1904 * Stop when we reach a level where the cursor isn't pointing 2157 * Stop when we reach a level where the cursor isn't pointing
1905 * at the first entry in the block. 2158 * at the first entry in the block.
1906 */ 2159 */
1907 for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) { 2160 xfs_btree_get_keys(cur, block, &key);
2161 for (level++, ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
1908#ifdef DEBUG 2162#ifdef DEBUG
1909 int error; 2163 int error;
1910#endif 2164#endif
@@ -1918,7 +2172,7 @@ xfs_btree_updkey(
1918#endif 2172#endif
1919 ptr = cur->bc_ptrs[level]; 2173 ptr = cur->bc_ptrs[level];
1920 kp = xfs_btree_key_addr(cur, ptr, block); 2174 kp = xfs_btree_key_addr(cur, ptr, block);
1921 xfs_btree_copy_keys(cur, kp, keyp, 1); 2175 xfs_btree_copy_keys(cur, kp, &key, 1);
1922 xfs_btree_log_keys(cur, bp, ptr, ptr); 2176 xfs_btree_log_keys(cur, bp, ptr, ptr);
1923 } 2177 }
1924 2178
@@ -1970,12 +2224,9 @@ xfs_btree_update(
1970 ptr, LASTREC_UPDATE); 2224 ptr, LASTREC_UPDATE);
1971 } 2225 }
1972 2226
1973 /* Updating first rec in leaf. Pass new key value up to our parent. */ 2227 /* Pass new key value up to our parent. */
1974 if (ptr == 1) { 2228 if (xfs_btree_needs_key_update(cur, ptr)) {
1975 union xfs_btree_key key; 2229 error = xfs_btree_update_keys(cur, 0);
1976
1977 cur->bc_ops->init_key_from_rec(&key, rec);
1978 error = xfs_btree_updkey(cur, &key, 1);
1979 if (error) 2230 if (error)
1980 goto error0; 2231 goto error0;
1981 } 2232 }
@@ -1998,18 +2249,19 @@ xfs_btree_lshift(
1998 int level, 2249 int level,
1999 int *stat) /* success/failure */ 2250 int *stat) /* success/failure */
2000{ 2251{
2001 union xfs_btree_key key; /* btree key */
2002 struct xfs_buf *lbp; /* left buffer pointer */ 2252 struct xfs_buf *lbp; /* left buffer pointer */
2003 struct xfs_btree_block *left; /* left btree block */ 2253 struct xfs_btree_block *left; /* left btree block */
2004 int lrecs; /* left record count */ 2254 int lrecs; /* left record count */
2005 struct xfs_buf *rbp; /* right buffer pointer */ 2255 struct xfs_buf *rbp; /* right buffer pointer */
2006 struct xfs_btree_block *right; /* right btree block */ 2256 struct xfs_btree_block *right; /* right btree block */
2257 struct xfs_btree_cur *tcur; /* temporary btree cursor */
2007 int rrecs; /* right record count */ 2258 int rrecs; /* right record count */
2008 union xfs_btree_ptr lptr; /* left btree pointer */ 2259 union xfs_btree_ptr lptr; /* left btree pointer */
2009 union xfs_btree_key *rkp = NULL; /* right btree key */ 2260 union xfs_btree_key *rkp = NULL; /* right btree key */
2010 union xfs_btree_ptr *rpp = NULL; /* right address pointer */ 2261 union xfs_btree_ptr *rpp = NULL; /* right address pointer */
2011 union xfs_btree_rec *rrp = NULL; /* right record pointer */ 2262 union xfs_btree_rec *rrp = NULL; /* right record pointer */
2012 int error; /* error return value */ 2263 int error; /* error return value */
2264 int i;
2013 2265
2014 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 2266 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2015 XFS_BTREE_TRACE_ARGI(cur, level); 2267 XFS_BTREE_TRACE_ARGI(cur, level);
@@ -2139,18 +2391,33 @@ xfs_btree_lshift(
2139 xfs_btree_rec_addr(cur, 2, right), 2391 xfs_btree_rec_addr(cur, 2, right),
2140 -1, rrecs); 2392 -1, rrecs);
2141 xfs_btree_log_recs(cur, rbp, 1, rrecs); 2393 xfs_btree_log_recs(cur, rbp, 1, rrecs);
2394 }
2142 2395
2143 /* 2396 /*
2144 * If it's the first record in the block, we'll need a key 2397 * Using a temporary cursor, update the parent key values of the
2145 * structure to pass up to the next level (updkey). 2398 * block on the left.
2146 */ 2399 */
2147 cur->bc_ops->init_key_from_rec(&key, 2400 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
2148 xfs_btree_rec_addr(cur, 1, right)); 2401 error = xfs_btree_dup_cursor(cur, &tcur);
2149 rkp = &key; 2402 if (error)
2403 goto error0;
2404 i = xfs_btree_firstrec(tcur, level);
2405 XFS_WANT_CORRUPTED_GOTO(tcur->bc_mp, i == 1, error0);
2406
2407 error = xfs_btree_decrement(tcur, level, &i);
2408 if (error)
2409 goto error1;
2410
2411 /* Update the parent high keys of the left block, if needed. */
2412 error = xfs_btree_update_keys(tcur, level);
2413 if (error)
2414 goto error1;
2415
2416 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
2150 } 2417 }
2151 2418
2152 /* Update the parent key values of right. */ 2419 /* Update the parent keys of the right block. */
2153 error = xfs_btree_updkey(cur, rkp, level + 1); 2420 error = xfs_btree_update_keys(cur, level);
2154 if (error) 2421 if (error)
2155 goto error0; 2422 goto error0;
2156 2423
@@ -2169,6 +2436,11 @@ out0:
2169error0: 2436error0:
2170 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 2437 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2171 return error; 2438 return error;
2439
2440error1:
2441 XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR);
2442 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
2443 return error;
2172} 2444}
2173 2445
2174/* 2446/*
@@ -2181,7 +2453,6 @@ xfs_btree_rshift(
2181 int level, 2453 int level,
2182 int *stat) /* success/failure */ 2454 int *stat) /* success/failure */
2183{ 2455{
2184 union xfs_btree_key key; /* btree key */
2185 struct xfs_buf *lbp; /* left buffer pointer */ 2456 struct xfs_buf *lbp; /* left buffer pointer */
2186 struct xfs_btree_block *left; /* left btree block */ 2457 struct xfs_btree_block *left; /* left btree block */
2187 struct xfs_buf *rbp; /* right buffer pointer */ 2458 struct xfs_buf *rbp; /* right buffer pointer */
@@ -2290,12 +2561,6 @@ xfs_btree_rshift(
2290 /* Now put the new data in, and log it. */ 2561 /* Now put the new data in, and log it. */
2291 xfs_btree_copy_recs(cur, rrp, lrp, 1); 2562 xfs_btree_copy_recs(cur, rrp, lrp, 1);
2292 xfs_btree_log_recs(cur, rbp, 1, rrecs + 1); 2563 xfs_btree_log_recs(cur, rbp, 1, rrecs + 1);
2293
2294 cur->bc_ops->init_key_from_rec(&key, rrp);
2295 rkp = &key;
2296
2297 ASSERT(cur->bc_ops->recs_inorder(cur, rrp,
2298 xfs_btree_rec_addr(cur, 2, right)));
2299 } 2564 }
2300 2565
2301 /* 2566 /*
@@ -2315,13 +2580,21 @@ xfs_btree_rshift(
2315 if (error) 2580 if (error)
2316 goto error0; 2581 goto error0;
2317 i = xfs_btree_lastrec(tcur, level); 2582 i = xfs_btree_lastrec(tcur, level);
2318 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 2583 XFS_WANT_CORRUPTED_GOTO(tcur->bc_mp, i == 1, error0);
2319 2584
2320 error = xfs_btree_increment(tcur, level, &i); 2585 error = xfs_btree_increment(tcur, level, &i);
2321 if (error) 2586 if (error)
2322 goto error1; 2587 goto error1;
2323 2588
2324 error = xfs_btree_updkey(tcur, rkp, level + 1); 2589 /* Update the parent high keys of the left block, if needed. */
2590 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
2591 error = xfs_btree_update_keys(cur, level);
2592 if (error)
2593 goto error1;
2594 }
2595
2596 /* Update the parent keys of the right block. */
2597 error = xfs_btree_update_keys(tcur, level);
2325 if (error) 2598 if (error)
2326 goto error1; 2599 goto error1;
2327 2600
@@ -2422,6 +2695,11 @@ __xfs_btree_split(
2422 2695
2423 XFS_BTREE_STATS_ADD(cur, moves, rrecs); 2696 XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2424 2697
2698 /* Adjust numrecs for the later get_*_keys() calls. */
2699 lrecs -= rrecs;
2700 xfs_btree_set_numrecs(left, lrecs);
2701 xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs);
2702
2425 /* 2703 /*
2426 * Copy btree block entries from the left block over to the 2704 * Copy btree block entries from the left block over to the
2427 * new block, the right. Update the right block and log the 2705 * new block, the right. Update the right block and log the
@@ -2447,14 +2725,15 @@ __xfs_btree_split(
2447 } 2725 }
2448#endif 2726#endif
2449 2727
2728 /* Copy the keys & pointers to the new block. */
2450 xfs_btree_copy_keys(cur, rkp, lkp, rrecs); 2729 xfs_btree_copy_keys(cur, rkp, lkp, rrecs);
2451 xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs); 2730 xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs);
2452 2731
2453 xfs_btree_log_keys(cur, rbp, 1, rrecs); 2732 xfs_btree_log_keys(cur, rbp, 1, rrecs);
2454 xfs_btree_log_ptrs(cur, rbp, 1, rrecs); 2733 xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
2455 2734
2456 /* Grab the keys to the entries moved to the right block */ 2735 /* Stash the keys of the new block for later insertion. */
2457 xfs_btree_copy_keys(cur, key, rkp, 1); 2736 xfs_btree_get_node_keys(cur, right, key);
2458 } else { 2737 } else {
2459 /* It's a leaf. Move records. */ 2738 /* It's a leaf. Move records. */
2460 union xfs_btree_rec *lrp; /* left record pointer */ 2739 union xfs_btree_rec *lrp; /* left record pointer */
@@ -2463,27 +2742,23 @@ __xfs_btree_split(
2463 lrp = xfs_btree_rec_addr(cur, src_index, left); 2742 lrp = xfs_btree_rec_addr(cur, src_index, left);
2464 rrp = xfs_btree_rec_addr(cur, 1, right); 2743 rrp = xfs_btree_rec_addr(cur, 1, right);
2465 2744
2745 /* Copy records to the new block. */
2466 xfs_btree_copy_recs(cur, rrp, lrp, rrecs); 2746 xfs_btree_copy_recs(cur, rrp, lrp, rrecs);
2467 xfs_btree_log_recs(cur, rbp, 1, rrecs); 2747 xfs_btree_log_recs(cur, rbp, 1, rrecs);
2468 2748
2469 cur->bc_ops->init_key_from_rec(key, 2749 /* Stash the keys of the new block for later insertion. */
2470 xfs_btree_rec_addr(cur, 1, right)); 2750 xfs_btree_get_leaf_keys(cur, right, key);
2471 } 2751 }
2472 2752
2473
2474 /* 2753 /*
2475 * Find the left block number by looking in the buffer. 2754 * Find the left block number by looking in the buffer.
2476 * Adjust numrecs, sibling pointers. 2755 * Adjust sibling pointers.
2477 */ 2756 */
2478 xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB); 2757 xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB);
2479 xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB); 2758 xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB);
2480 xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); 2759 xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2481 xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB); 2760 xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2482 2761
2483 lrecs -= rrecs;
2484 xfs_btree_set_numrecs(left, lrecs);
2485 xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs);
2486
2487 xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS); 2762 xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS);
2488 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); 2763 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
2489 2764
@@ -2499,6 +2774,14 @@ __xfs_btree_split(
2499 xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB); 2774 xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB);
2500 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB); 2775 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
2501 } 2776 }
2777
2778 /* Update the parent high keys of the left block, if needed. */
2779 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
2780 error = xfs_btree_update_keys(cur, level);
2781 if (error)
2782 goto error0;
2783 }
2784
2502 /* 2785 /*
2503 * If the cursor is really in the right block, move it there. 2786 * If the cursor is really in the right block, move it there.
2504 * If it's just pointing past the last entry in left, then we'll 2787 * If it's just pointing past the last entry in left, then we'll
@@ -2802,6 +3085,7 @@ xfs_btree_new_root(
2802 bp = lbp; 3085 bp = lbp;
2803 nptr = 2; 3086 nptr = 2;
2804 } 3087 }
3088
2805 /* Fill in the new block's btree header and log it. */ 3089 /* Fill in the new block's btree header and log it. */
2806 xfs_btree_init_block_cur(cur, nbp, cur->bc_nlevels, 2); 3090 xfs_btree_init_block_cur(cur, nbp, cur->bc_nlevels, 2);
2807 xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS); 3091 xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS);
@@ -2810,19 +3094,24 @@ xfs_btree_new_root(
2810 3094
2811 /* Fill in the key data in the new root. */ 3095 /* Fill in the key data in the new root. */
2812 if (xfs_btree_get_level(left) > 0) { 3096 if (xfs_btree_get_level(left) > 0) {
2813 xfs_btree_copy_keys(cur, 3097 /*
2814 xfs_btree_key_addr(cur, 1, new), 3098 * Get the keys for the left block's keys and put them directly
2815 xfs_btree_key_addr(cur, 1, left), 1); 3099 * in the parent block. Do the same for the right block.
2816 xfs_btree_copy_keys(cur, 3100 */
2817 xfs_btree_key_addr(cur, 2, new), 3101 xfs_btree_get_node_keys(cur, left,
2818 xfs_btree_key_addr(cur, 1, right), 1); 3102 xfs_btree_key_addr(cur, 1, new));
3103 xfs_btree_get_node_keys(cur, right,
3104 xfs_btree_key_addr(cur, 2, new));
2819 } else { 3105 } else {
2820 cur->bc_ops->init_key_from_rec( 3106 /*
2821 xfs_btree_key_addr(cur, 1, new), 3107 * Get the keys for the left block's records and put them
2822 xfs_btree_rec_addr(cur, 1, left)); 3108 * directly in the parent block. Do the same for the right
2823 cur->bc_ops->init_key_from_rec( 3109 * block.
2824 xfs_btree_key_addr(cur, 2, new), 3110 */
2825 xfs_btree_rec_addr(cur, 1, right)); 3111 xfs_btree_get_leaf_keys(cur, left,
3112 xfs_btree_key_addr(cur, 1, new));
3113 xfs_btree_get_leaf_keys(cur, right,
3114 xfs_btree_key_addr(cur, 2, new));
2826 } 3115 }
2827 xfs_btree_log_keys(cur, nbp, 1, 2); 3116 xfs_btree_log_keys(cur, nbp, 1, 2);
2828 3117
@@ -2858,10 +3147,9 @@ xfs_btree_make_block_unfull(
2858 int *index, /* new tree index */ 3147 int *index, /* new tree index */
2859 union xfs_btree_ptr *nptr, /* new btree ptr */ 3148 union xfs_btree_ptr *nptr, /* new btree ptr */
2860 struct xfs_btree_cur **ncur, /* new btree cursor */ 3149 struct xfs_btree_cur **ncur, /* new btree cursor */
2861 union xfs_btree_rec *nrec, /* new record */ 3150 union xfs_btree_key *key, /* key of new block */
2862 int *stat) 3151 int *stat)
2863{ 3152{
2864 union xfs_btree_key key; /* new btree key value */
2865 int error = 0; 3153 int error = 0;
2866 3154
2867 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 3155 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
@@ -2871,6 +3159,7 @@ xfs_btree_make_block_unfull(
2871 if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) { 3159 if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) {
2872 /* A root block that can be made bigger. */ 3160 /* A root block that can be made bigger. */
2873 xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork); 3161 xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork);
3162 *stat = 1;
2874 } else { 3163 } else {
2875 /* A root block that needs replacing */ 3164 /* A root block that needs replacing */
2876 int logflags = 0; 3165 int logflags = 0;
@@ -2906,13 +3195,12 @@ xfs_btree_make_block_unfull(
2906 * If this works we have to re-set our variables because we 3195 * If this works we have to re-set our variables because we
2907 * could be in a different block now. 3196 * could be in a different block now.
2908 */ 3197 */
2909 error = xfs_btree_split(cur, level, nptr, &key, ncur, stat); 3198 error = xfs_btree_split(cur, level, nptr, key, ncur, stat);
2910 if (error || *stat == 0) 3199 if (error || *stat == 0)
2911 return error; 3200 return error;
2912 3201
2913 3202
2914 *index = cur->bc_ptrs[level]; 3203 *index = cur->bc_ptrs[level];
2915 cur->bc_ops->init_rec_from_key(&key, nrec);
2916 return 0; 3204 return 0;
2917} 3205}
2918 3206
@@ -2925,16 +3213,17 @@ xfs_btree_insrec(
2925 struct xfs_btree_cur *cur, /* btree cursor */ 3213 struct xfs_btree_cur *cur, /* btree cursor */
2926 int level, /* level to insert record at */ 3214 int level, /* level to insert record at */
2927 union xfs_btree_ptr *ptrp, /* i/o: block number inserted */ 3215 union xfs_btree_ptr *ptrp, /* i/o: block number inserted */
2928 union xfs_btree_rec *recp, /* i/o: record data inserted */ 3216 union xfs_btree_rec *rec, /* record to insert */
3217 union xfs_btree_key *key, /* i/o: block key for ptrp */
2929 struct xfs_btree_cur **curp, /* output: new cursor replacing cur */ 3218 struct xfs_btree_cur **curp, /* output: new cursor replacing cur */
2930 int *stat) /* success/failure */ 3219 int *stat) /* success/failure */
2931{ 3220{
2932 struct xfs_btree_block *block; /* btree block */ 3221 struct xfs_btree_block *block; /* btree block */
2933 struct xfs_buf *bp; /* buffer for block */ 3222 struct xfs_buf *bp; /* buffer for block */
2934 union xfs_btree_key key; /* btree key */
2935 union xfs_btree_ptr nptr; /* new block ptr */ 3223 union xfs_btree_ptr nptr; /* new block ptr */
2936 struct xfs_btree_cur *ncur; /* new btree cursor */ 3224 struct xfs_btree_cur *ncur; /* new btree cursor */
2937 union xfs_btree_rec nrec; /* new record count */ 3225 union xfs_btree_bigkey nkey; /* new block key */
3226 union xfs_btree_key *lkey;
2938 int optr; /* old key/record index */ 3227 int optr; /* old key/record index */
2939 int ptr; /* key/record index */ 3228 int ptr; /* key/record index */
2940 int numrecs;/* number of records */ 3229 int numrecs;/* number of records */
@@ -2942,11 +3231,13 @@ xfs_btree_insrec(
2942#ifdef DEBUG 3231#ifdef DEBUG
2943 int i; 3232 int i;
2944#endif 3233#endif
3234 xfs_daddr_t old_bn;
2945 3235
2946 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 3236 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2947 XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, recp); 3237 XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, &rec);
2948 3238
2949 ncur = NULL; 3239 ncur = NULL;
3240 lkey = (union xfs_btree_key *)&nkey;
2950 3241
2951 /* 3242 /*
2952 * If we have an external root pointer, and we've made it to the 3243 * If we have an external root pointer, and we've made it to the
@@ -2969,15 +3260,13 @@ xfs_btree_insrec(
2969 return 0; 3260 return 0;
2970 } 3261 }
2971 3262
2972 /* Make a key out of the record data to be inserted, and save it. */
2973 cur->bc_ops->init_key_from_rec(&key, recp);
2974
2975 optr = ptr; 3263 optr = ptr;
2976 3264
2977 XFS_BTREE_STATS_INC(cur, insrec); 3265 XFS_BTREE_STATS_INC(cur, insrec);
2978 3266
2979 /* Get pointers to the btree buffer and block. */ 3267 /* Get pointers to the btree buffer and block. */
2980 block = xfs_btree_get_block(cur, level, &bp); 3268 block = xfs_btree_get_block(cur, level, &bp);
3269 old_bn = bp ? bp->b_bn : XFS_BUF_DADDR_NULL;
2981 numrecs = xfs_btree_get_numrecs(block); 3270 numrecs = xfs_btree_get_numrecs(block);
2982 3271
2983#ifdef DEBUG 3272#ifdef DEBUG
@@ -2988,10 +3277,10 @@ xfs_btree_insrec(
2988 /* Check that the new entry is being inserted in the right place. */ 3277 /* Check that the new entry is being inserted in the right place. */
2989 if (ptr <= numrecs) { 3278 if (ptr <= numrecs) {
2990 if (level == 0) { 3279 if (level == 0) {
2991 ASSERT(cur->bc_ops->recs_inorder(cur, recp, 3280 ASSERT(cur->bc_ops->recs_inorder(cur, rec,
2992 xfs_btree_rec_addr(cur, ptr, block))); 3281 xfs_btree_rec_addr(cur, ptr, block)));
2993 } else { 3282 } else {
2994 ASSERT(cur->bc_ops->keys_inorder(cur, &key, 3283 ASSERT(cur->bc_ops->keys_inorder(cur, key,
2995 xfs_btree_key_addr(cur, ptr, block))); 3284 xfs_btree_key_addr(cur, ptr, block)));
2996 } 3285 }
2997 } 3286 }
@@ -3004,7 +3293,7 @@ xfs_btree_insrec(
3004 xfs_btree_set_ptr_null(cur, &nptr); 3293 xfs_btree_set_ptr_null(cur, &nptr);
3005 if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) { 3294 if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) {
3006 error = xfs_btree_make_block_unfull(cur, level, numrecs, 3295 error = xfs_btree_make_block_unfull(cur, level, numrecs,
3007 &optr, &ptr, &nptr, &ncur, &nrec, stat); 3296 &optr, &ptr, &nptr, &ncur, lkey, stat);
3008 if (error || *stat == 0) 3297 if (error || *stat == 0)
3009 goto error0; 3298 goto error0;
3010 } 3299 }
@@ -3054,7 +3343,7 @@ xfs_btree_insrec(
3054#endif 3343#endif
3055 3344
3056 /* Now put the new data in, bump numrecs and log it. */ 3345 /* Now put the new data in, bump numrecs and log it. */
3057 xfs_btree_copy_keys(cur, kp, &key, 1); 3346 xfs_btree_copy_keys(cur, kp, key, 1);
3058 xfs_btree_copy_ptrs(cur, pp, ptrp, 1); 3347 xfs_btree_copy_ptrs(cur, pp, ptrp, 1);
3059 numrecs++; 3348 numrecs++;
3060 xfs_btree_set_numrecs(block, numrecs); 3349 xfs_btree_set_numrecs(block, numrecs);
@@ -3075,7 +3364,7 @@ xfs_btree_insrec(
3075 xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1); 3364 xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1);
3076 3365
3077 /* Now put the new data in, bump numrecs and log it. */ 3366 /* Now put the new data in, bump numrecs and log it. */
3078 xfs_btree_copy_recs(cur, rp, recp, 1); 3367 xfs_btree_copy_recs(cur, rp, rec, 1);
3079 xfs_btree_set_numrecs(block, ++numrecs); 3368 xfs_btree_set_numrecs(block, ++numrecs);
3080 xfs_btree_log_recs(cur, bp, ptr, numrecs); 3369 xfs_btree_log_recs(cur, bp, ptr, numrecs);
3081#ifdef DEBUG 3370#ifdef DEBUG
@@ -3089,9 +3378,18 @@ xfs_btree_insrec(
3089 /* Log the new number of records in the btree header. */ 3378 /* Log the new number of records in the btree header. */
3090 xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS); 3379 xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
3091 3380
3092 /* If we inserted at the start of a block, update the parents' keys. */ 3381 /*
3093 if (optr == 1) { 3382 * If we just inserted into a new tree block, we have to
3094 error = xfs_btree_updkey(cur, &key, level + 1); 3383 * recalculate nkey here because nkey is out of date.
3384 *
3385 * Otherwise we're just updating an existing block (having shoved
3386 * some records into the new tree block), so use the regular key
3387 * update mechanism.
3388 */
3389 if (bp && bp->b_bn != old_bn) {
3390 xfs_btree_get_keys(cur, block, lkey);
3391 } else if (xfs_btree_needs_key_update(cur, optr)) {
3392 error = xfs_btree_update_keys(cur, level);
3095 if (error) 3393 if (error)
3096 goto error0; 3394 goto error0;
3097 } 3395 }
@@ -3101,7 +3399,7 @@ xfs_btree_insrec(
3101 * we are at the far right edge of the tree, update it. 3399 * we are at the far right edge of the tree, update it.
3102 */ 3400 */
3103 if (xfs_btree_is_lastrec(cur, block, level)) { 3401 if (xfs_btree_is_lastrec(cur, block, level)) {
3104 cur->bc_ops->update_lastrec(cur, block, recp, 3402 cur->bc_ops->update_lastrec(cur, block, rec,
3105 ptr, LASTREC_INSREC); 3403 ptr, LASTREC_INSREC);
3106 } 3404 }
3107 3405
@@ -3111,7 +3409,7 @@ xfs_btree_insrec(
3111 */ 3409 */
3112 *ptrp = nptr; 3410 *ptrp = nptr;
3113 if (!xfs_btree_ptr_is_null(cur, &nptr)) { 3411 if (!xfs_btree_ptr_is_null(cur, &nptr)) {
3114 *recp = nrec; 3412 xfs_btree_copy_keys(cur, key, lkey, 1);
3115 *curp = ncur; 3413 *curp = ncur;
3116 } 3414 }
3117 3415
@@ -3142,14 +3440,20 @@ xfs_btree_insert(
3142 union xfs_btree_ptr nptr; /* new block number (split result) */ 3440 union xfs_btree_ptr nptr; /* new block number (split result) */
3143 struct xfs_btree_cur *ncur; /* new cursor (split result) */ 3441 struct xfs_btree_cur *ncur; /* new cursor (split result) */
3144 struct xfs_btree_cur *pcur; /* previous level's cursor */ 3442 struct xfs_btree_cur *pcur; /* previous level's cursor */
3443 union xfs_btree_bigkey bkey; /* key of block to insert */
3444 union xfs_btree_key *key;
3145 union xfs_btree_rec rec; /* record to insert */ 3445 union xfs_btree_rec rec; /* record to insert */
3146 3446
3147 level = 0; 3447 level = 0;
3148 ncur = NULL; 3448 ncur = NULL;
3149 pcur = cur; 3449 pcur = cur;
3450 key = (union xfs_btree_key *)&bkey;
3150 3451
3151 xfs_btree_set_ptr_null(cur, &nptr); 3452 xfs_btree_set_ptr_null(cur, &nptr);
3453
3454 /* Make a key out of the record data to be inserted, and save it. */
3152 cur->bc_ops->init_rec_from_cur(cur, &rec); 3455 cur->bc_ops->init_rec_from_cur(cur, &rec);
3456 cur->bc_ops->init_key_from_rec(key, &rec);
3153 3457
3154 /* 3458 /*
3155 * Loop going up the tree, starting at the leaf level. 3459 * Loop going up the tree, starting at the leaf level.
@@ -3161,7 +3465,8 @@ xfs_btree_insert(
3161 * Insert nrec/nptr into this level of the tree. 3465 * Insert nrec/nptr into this level of the tree.
3162 * Note if we fail, nptr will be null. 3466 * Note if we fail, nptr will be null.
3163 */ 3467 */
3164 error = xfs_btree_insrec(pcur, level, &nptr, &rec, &ncur, &i); 3468 error = xfs_btree_insrec(pcur, level, &nptr, &rec, key,
3469 &ncur, &i);
3165 if (error) { 3470 if (error) {
3166 if (pcur != cur) 3471 if (pcur != cur)
3167 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR); 3472 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
@@ -3385,8 +3690,6 @@ xfs_btree_delrec(
3385 struct xfs_buf *bp; /* buffer for block */ 3690 struct xfs_buf *bp; /* buffer for block */
3386 int error; /* error return value */ 3691 int error; /* error return value */
3387 int i; /* loop counter */ 3692 int i; /* loop counter */
3388 union xfs_btree_key key; /* storage for keyp */
3389 union xfs_btree_key *keyp = &key; /* passed to the next level */
3390 union xfs_btree_ptr lptr; /* left sibling block ptr */ 3693 union xfs_btree_ptr lptr; /* left sibling block ptr */
3391 struct xfs_buf *lbp; /* left buffer pointer */ 3694 struct xfs_buf *lbp; /* left buffer pointer */
3392 struct xfs_btree_block *left; /* left btree block */ 3695 struct xfs_btree_block *left; /* left btree block */
@@ -3457,13 +3760,6 @@ xfs_btree_delrec(
3457 xfs_btree_log_keys(cur, bp, ptr, numrecs - 1); 3760 xfs_btree_log_keys(cur, bp, ptr, numrecs - 1);
3458 xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1); 3761 xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1);
3459 } 3762 }
3460
3461 /*
3462 * If it's the first record in the block, we'll need to pass a
3463 * key up to the next level (updkey).
3464 */
3465 if (ptr == 1)
3466 keyp = xfs_btree_key_addr(cur, 1, block);
3467 } else { 3763 } else {
3468 /* It's a leaf. operate on records */ 3764 /* It's a leaf. operate on records */
3469 if (ptr < numrecs) { 3765 if (ptr < numrecs) {
@@ -3472,16 +3768,6 @@ xfs_btree_delrec(
3472 -1, numrecs - ptr); 3768 -1, numrecs - ptr);
3473 xfs_btree_log_recs(cur, bp, ptr, numrecs - 1); 3769 xfs_btree_log_recs(cur, bp, ptr, numrecs - 1);
3474 } 3770 }
3475
3476 /*
3477 * If it's the first record in the block, we'll need a key
3478 * structure to pass up to the next level (updkey).
3479 */
3480 if (ptr == 1) {
3481 cur->bc_ops->init_key_from_rec(&key,
3482 xfs_btree_rec_addr(cur, 1, block));
3483 keyp = &key;
3484 }
3485 } 3771 }
3486 3772
3487 /* 3773 /*
@@ -3548,8 +3834,8 @@ xfs_btree_delrec(
3548 * If we deleted the leftmost entry in the block, update the 3834 * If we deleted the leftmost entry in the block, update the
3549 * key values above us in the tree. 3835 * key values above us in the tree.
3550 */ 3836 */
3551 if (ptr == 1) { 3837 if (xfs_btree_needs_key_update(cur, ptr)) {
3552 error = xfs_btree_updkey(cur, keyp, level + 1); 3838 error = xfs_btree_update_keys(cur, level);
3553 if (error) 3839 if (error)
3554 goto error0; 3840 goto error0;
3555 } 3841 }
@@ -3878,6 +4164,16 @@ xfs_btree_delrec(
3878 if (level > 0) 4164 if (level > 0)
3879 cur->bc_ptrs[level]--; 4165 cur->bc_ptrs[level]--;
3880 4166
4167 /*
4168 * We combined blocks, so we have to update the parent keys if the
4169 * btree supports overlapped intervals. However, bc_ptrs[level + 1]
4170 * points to the old block so that the caller knows which record to
4171 * delete. Therefore, the caller must be savvy enough to call updkeys
4172 * for us if we return stat == 2. The other exit points from this
4173 * function don't require deletions further up the tree, so they can
4174 * call updkeys directly.
4175 */
4176
3881 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 4177 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3882 /* Return value means the next level up has something to do. */ 4178 /* Return value means the next level up has something to do. */
3883 *stat = 2; 4179 *stat = 2;
@@ -3903,6 +4199,7 @@ xfs_btree_delete(
3903 int error; /* error return value */ 4199 int error; /* error return value */
3904 int level; 4200 int level;
3905 int i; 4201 int i;
4202 bool joined = false;
3906 4203
3907 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 4204 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3908 4205
@@ -3916,6 +4213,18 @@ xfs_btree_delete(
3916 error = xfs_btree_delrec(cur, level, &i); 4213 error = xfs_btree_delrec(cur, level, &i);
3917 if (error) 4214 if (error)
3918 goto error0; 4215 goto error0;
4216 if (i == 2)
4217 joined = true;
4218 }
4219
4220 /*
4221 * If we combined blocks as part of deleting the record, delrec won't
4222 * have updated the parent high keys so we have to do that here.
4223 */
4224 if (joined && (cur->bc_flags & XFS_BTREE_OVERLAPPING)) {
4225 error = xfs_btree_updkeys_force(cur, 0);
4226 if (error)
4227 goto error0;
3919 } 4228 }
3920 4229
3921 if (i == 0) { 4230 if (i == 0) {
@@ -3978,6 +4287,81 @@ xfs_btree_get_rec(
3978 return 0; 4287 return 0;
3979} 4288}
3980 4289
4290/* Visit a block in a btree. */
4291STATIC int
4292xfs_btree_visit_block(
4293 struct xfs_btree_cur *cur,
4294 int level,
4295 xfs_btree_visit_blocks_fn fn,
4296 void *data)
4297{
4298 struct xfs_btree_block *block;
4299 struct xfs_buf *bp;
4300 union xfs_btree_ptr rptr;
4301 int error;
4302
4303 /* do right sibling readahead */
4304 xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
4305 block = xfs_btree_get_block(cur, level, &bp);
4306
4307 /* process the block */
4308 error = fn(cur, level, data);
4309 if (error)
4310 return error;
4311
4312 /* now read rh sibling block for next iteration */
4313 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
4314 if (xfs_btree_ptr_is_null(cur, &rptr))
4315 return -ENOENT;
4316
4317 return xfs_btree_lookup_get_block(cur, level, &rptr, &block);
4318}
4319
4320
4321/* Visit every block in a btree. */
4322int
4323xfs_btree_visit_blocks(
4324 struct xfs_btree_cur *cur,
4325 xfs_btree_visit_blocks_fn fn,
4326 void *data)
4327{
4328 union xfs_btree_ptr lptr;
4329 int level;
4330 struct xfs_btree_block *block = NULL;
4331 int error = 0;
4332
4333 cur->bc_ops->init_ptr_from_cur(cur, &lptr);
4334
4335 /* for each level */
4336 for (level = cur->bc_nlevels - 1; level >= 0; level--) {
4337 /* grab the left hand block */
4338 error = xfs_btree_lookup_get_block(cur, level, &lptr, &block);
4339 if (error)
4340 return error;
4341
4342 /* readahead the left most block for the next level down */
4343 if (level > 0) {
4344 union xfs_btree_ptr *ptr;
4345
4346 ptr = xfs_btree_ptr_addr(cur, 1, block);
4347 xfs_btree_readahead_ptr(cur, ptr, 1);
4348
4349 /* save for the next iteration of the loop */
4350 lptr = *ptr;
4351 }
4352
4353 /* for each buffer in the level */
4354 do {
4355 error = xfs_btree_visit_block(cur, level, fn, data);
4356 } while (!error);
4357
4358 if (error != -ENOENT)
4359 return error;
4360 }
4361
4362 return 0;
4363}
4364
3981/* 4365/*
3982 * Change the owner of a btree. 4366 * Change the owner of a btree.
3983 * 4367 *
@@ -4002,26 +4386,27 @@ xfs_btree_get_rec(
4002 * just queue the modified buffer as delayed write buffer so the transaction 4386 * just queue the modified buffer as delayed write buffer so the transaction
4003 * recovery completion writes the changes to disk. 4387 * recovery completion writes the changes to disk.
4004 */ 4388 */
4389struct xfs_btree_block_change_owner_info {
4390 __uint64_t new_owner;
4391 struct list_head *buffer_list;
4392};
4393
4005static int 4394static int
4006xfs_btree_block_change_owner( 4395xfs_btree_block_change_owner(
4007 struct xfs_btree_cur *cur, 4396 struct xfs_btree_cur *cur,
4008 int level, 4397 int level,
4009 __uint64_t new_owner, 4398 void *data)
4010 struct list_head *buffer_list)
4011{ 4399{
4400 struct xfs_btree_block_change_owner_info *bbcoi = data;
4012 struct xfs_btree_block *block; 4401 struct xfs_btree_block *block;
4013 struct xfs_buf *bp; 4402 struct xfs_buf *bp;
4014 union xfs_btree_ptr rptr;
4015
4016 /* do right sibling readahead */
4017 xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
4018 4403
4019 /* modify the owner */ 4404 /* modify the owner */
4020 block = xfs_btree_get_block(cur, level, &bp); 4405 block = xfs_btree_get_block(cur, level, &bp);
4021 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 4406 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
4022 block->bb_u.l.bb_owner = cpu_to_be64(new_owner); 4407 block->bb_u.l.bb_owner = cpu_to_be64(bbcoi->new_owner);
4023 else 4408 else
4024 block->bb_u.s.bb_owner = cpu_to_be32(new_owner); 4409 block->bb_u.s.bb_owner = cpu_to_be32(bbcoi->new_owner);
4025 4410
4026 /* 4411 /*
4027 * If the block is a root block hosted in an inode, we might not have a 4412 * If the block is a root block hosted in an inode, we might not have a
@@ -4035,19 +4420,14 @@ xfs_btree_block_change_owner(
4035 xfs_trans_ordered_buf(cur->bc_tp, bp); 4420 xfs_trans_ordered_buf(cur->bc_tp, bp);
4036 xfs_btree_log_block(cur, bp, XFS_BB_OWNER); 4421 xfs_btree_log_block(cur, bp, XFS_BB_OWNER);
4037 } else { 4422 } else {
4038 xfs_buf_delwri_queue(bp, buffer_list); 4423 xfs_buf_delwri_queue(bp, bbcoi->buffer_list);
4039 } 4424 }
4040 } else { 4425 } else {
4041 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); 4426 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
4042 ASSERT(level == cur->bc_nlevels - 1); 4427 ASSERT(level == cur->bc_nlevels - 1);
4043 } 4428 }
4044 4429
4045 /* now read rh sibling block for next iteration */ 4430 return 0;
4046 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
4047 if (xfs_btree_ptr_is_null(cur, &rptr))
4048 return -ENOENT;
4049
4050 return xfs_btree_lookup_get_block(cur, level, &rptr, &block);
4051} 4431}
4052 4432
4053int 4433int
@@ -4056,43 +4436,13 @@ xfs_btree_change_owner(
4056 __uint64_t new_owner, 4436 __uint64_t new_owner,
4057 struct list_head *buffer_list) 4437 struct list_head *buffer_list)
4058{ 4438{
4059 union xfs_btree_ptr lptr; 4439 struct xfs_btree_block_change_owner_info bbcoi;
4060 int level;
4061 struct xfs_btree_block *block = NULL;
4062 int error = 0;
4063
4064 cur->bc_ops->init_ptr_from_cur(cur, &lptr);
4065
4066 /* for each level */
4067 for (level = cur->bc_nlevels - 1; level >= 0; level--) {
4068 /* grab the left hand block */
4069 error = xfs_btree_lookup_get_block(cur, level, &lptr, &block);
4070 if (error)
4071 return error;
4072
4073 /* readahead the left most block for the next level down */
4074 if (level > 0) {
4075 union xfs_btree_ptr *ptr;
4076
4077 ptr = xfs_btree_ptr_addr(cur, 1, block);
4078 xfs_btree_readahead_ptr(cur, ptr, 1);
4079
4080 /* save for the next iteration of the loop */
4081 lptr = *ptr;
4082 }
4083
4084 /* for each buffer in the level */
4085 do {
4086 error = xfs_btree_block_change_owner(cur, level,
4087 new_owner,
4088 buffer_list);
4089 } while (!error);
4090 4440
4091 if (error != -ENOENT) 4441 bbcoi.new_owner = new_owner;
4092 return error; 4442 bbcoi.buffer_list = buffer_list;
4093 }
4094 4443
4095 return 0; 4444 return xfs_btree_visit_blocks(cur, xfs_btree_block_change_owner,
4445 &bbcoi);
4096} 4446}
4097 4447
4098/** 4448/**
@@ -4171,3 +4521,267 @@ xfs_btree_compute_maxlevels(
4171 maxblocks = (maxblocks + limits[1] - 1) / limits[1]; 4521 maxblocks = (maxblocks + limits[1] - 1) / limits[1];
4172 return level; 4522 return level;
4173} 4523}
4524
4525/*
4526 * Query a regular btree for all records overlapping a given interval.
4527 * Start with a LE lookup of the key of low_rec and return all records
4528 * until we find a record with a key greater than the key of high_rec.
4529 */
4530STATIC int
4531xfs_btree_simple_query_range(
4532 struct xfs_btree_cur *cur,
4533 union xfs_btree_key *low_key,
4534 union xfs_btree_key *high_key,
4535 xfs_btree_query_range_fn fn,
4536 void *priv)
4537{
4538 union xfs_btree_rec *recp;
4539 union xfs_btree_key rec_key;
4540 __int64_t diff;
4541 int stat;
4542 bool firstrec = true;
4543 int error;
4544
4545 ASSERT(cur->bc_ops->init_high_key_from_rec);
4546 ASSERT(cur->bc_ops->diff_two_keys);
4547
4548 /*
4549 * Find the leftmost record. The btree cursor must be set
4550 * to the low record used to generate low_key.
4551 */
4552 stat = 0;
4553 error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, &stat);
4554 if (error)
4555 goto out;
4556
4557 while (stat) {
4558 /* Find the record. */
4559 error = xfs_btree_get_rec(cur, &recp, &stat);
4560 if (error || !stat)
4561 break;
4562 cur->bc_ops->init_high_key_from_rec(&rec_key, recp);
4563
4564 /* Skip if high_key(rec) < low_key. */
4565 if (firstrec) {
4566 firstrec = false;
4567 diff = cur->bc_ops->diff_two_keys(cur, low_key,
4568 &rec_key);
4569 if (diff > 0)
4570 goto advloop;
4571 }
4572
4573 /* Stop if high_key < low_key(rec). */
4574 diff = cur->bc_ops->diff_two_keys(cur, &rec_key, high_key);
4575 if (diff > 0)
4576 break;
4577
4578 /* Callback */
4579 error = fn(cur, recp, priv);
4580 if (error < 0 || error == XFS_BTREE_QUERY_RANGE_ABORT)
4581 break;
4582
4583advloop:
4584 /* Move on to the next record. */
4585 error = xfs_btree_increment(cur, 0, &stat);
4586 if (error)
4587 break;
4588 }
4589
4590out:
4591 return error;
4592}
4593
4594/*
4595 * Query an overlapped interval btree for all records overlapping a given
4596 * interval. This function roughly follows the algorithm given in
4597 * "Interval Trees" of _Introduction to Algorithms_, which is section
4598 * 14.3 in the 2nd and 3rd editions.
4599 *
4600 * First, generate keys for the low and high records passed in.
4601 *
4602 * For any leaf node, generate the high and low keys for the record.
4603 * If the record keys overlap with the query low/high keys, pass the
4604 * record to the function iterator.
4605 *
4606 * For any internal node, compare the low and high keys of each
4607 * pointer against the query low/high keys. If there's an overlap,
4608 * follow the pointer.
4609 *
4610 * As an optimization, we stop scanning a block when we find a low key
4611 * that is greater than the query's high key.
4612 */
4613STATIC int
4614xfs_btree_overlapped_query_range(
4615 struct xfs_btree_cur *cur,
4616 union xfs_btree_key *low_key,
4617 union xfs_btree_key *high_key,
4618 xfs_btree_query_range_fn fn,
4619 void *priv)
4620{
4621 union xfs_btree_ptr ptr;
4622 union xfs_btree_ptr *pp;
4623 union xfs_btree_key rec_key;
4624 union xfs_btree_key rec_hkey;
4625 union xfs_btree_key *lkp;
4626 union xfs_btree_key *hkp;
4627 union xfs_btree_rec *recp;
4628 struct xfs_btree_block *block;
4629 __int64_t ldiff;
4630 __int64_t hdiff;
4631 int level;
4632 struct xfs_buf *bp;
4633 int i;
4634 int error;
4635
4636 /* Load the root of the btree. */
4637 level = cur->bc_nlevels - 1;
4638 cur->bc_ops->init_ptr_from_cur(cur, &ptr);
4639 error = xfs_btree_lookup_get_block(cur, level, &ptr, &block);
4640 if (error)
4641 return error;
4642 xfs_btree_get_block(cur, level, &bp);
4643 trace_xfs_btree_overlapped_query_range(cur, level, bp);
4644#ifdef DEBUG
4645 error = xfs_btree_check_block(cur, block, level, bp);
4646 if (error)
4647 goto out;
4648#endif
4649 cur->bc_ptrs[level] = 1;
4650
4651 while (level < cur->bc_nlevels) {
4652 block = xfs_btree_get_block(cur, level, &bp);
4653
4654 /* End of node, pop back towards the root. */
4655 if (cur->bc_ptrs[level] > be16_to_cpu(block->bb_numrecs)) {
4656pop_up:
4657 if (level < cur->bc_nlevels - 1)
4658 cur->bc_ptrs[level + 1]++;
4659 level++;
4660 continue;
4661 }
4662
4663 if (level == 0) {
4664 /* Handle a leaf node. */
4665 recp = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block);
4666
4667 cur->bc_ops->init_high_key_from_rec(&rec_hkey, recp);
4668 ldiff = cur->bc_ops->diff_two_keys(cur, &rec_hkey,
4669 low_key);
4670
4671 cur->bc_ops->init_key_from_rec(&rec_key, recp);
4672 hdiff = cur->bc_ops->diff_two_keys(cur, high_key,
4673 &rec_key);
4674
4675 /*
4676 * If (record's high key >= query's low key) and
4677 * (query's high key >= record's low key), then
4678 * this record overlaps the query range; callback.
4679 */
4680 if (ldiff >= 0 && hdiff >= 0) {
4681 error = fn(cur, recp, priv);
4682 if (error < 0 ||
4683 error == XFS_BTREE_QUERY_RANGE_ABORT)
4684 break;
4685 } else if (hdiff < 0) {
4686 /* Record is larger than high key; pop. */
4687 goto pop_up;
4688 }
4689 cur->bc_ptrs[level]++;
4690 continue;
4691 }
4692
4693 /* Handle an internal node. */
4694 lkp = xfs_btree_key_addr(cur, cur->bc_ptrs[level], block);
4695 hkp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level], block);
4696 pp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[level], block);
4697
4698 ldiff = cur->bc_ops->diff_two_keys(cur, hkp, low_key);
4699 hdiff = cur->bc_ops->diff_two_keys(cur, high_key, lkp);
4700
4701 /*
4702 * If (pointer's high key >= query's low key) and
4703 * (query's high key >= pointer's low key), then
4704 * this record overlaps the query range; follow pointer.
4705 */
4706 if (ldiff >= 0 && hdiff >= 0) {
4707 level--;
4708 error = xfs_btree_lookup_get_block(cur, level, pp,
4709 &block);
4710 if (error)
4711 goto out;
4712 xfs_btree_get_block(cur, level, &bp);
4713 trace_xfs_btree_overlapped_query_range(cur, level, bp);
4714#ifdef DEBUG
4715 error = xfs_btree_check_block(cur, block, level, bp);
4716 if (error)
4717 goto out;
4718#endif
4719 cur->bc_ptrs[level] = 1;
4720 continue;
4721 } else if (hdiff < 0) {
4722 /* The low key is larger than the upper range; pop. */
4723 goto pop_up;
4724 }
4725 cur->bc_ptrs[level]++;
4726 }
4727
4728out:
4729 /*
4730 * If we don't end this function with the cursor pointing at a record
4731 * block, a subsequent non-error cursor deletion will not release
4732 * node-level buffers, causing a buffer leak. This is quite possible
4733 * with a zero-results range query, so release the buffers if we
4734 * failed to return any results.
4735 */
4736 if (cur->bc_bufs[0] == NULL) {
4737 for (i = 0; i < cur->bc_nlevels; i++) {
4738 if (cur->bc_bufs[i]) {
4739 xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]);
4740 cur->bc_bufs[i] = NULL;
4741 cur->bc_ptrs[i] = 0;
4742 cur->bc_ra[i] = 0;
4743 }
4744 }
4745 }
4746
4747 return error;
4748}
4749
4750/*
4751 * Query a btree for all records overlapping a given interval of keys. The
4752 * supplied function will be called with each record found; return one of the
4753 * XFS_BTREE_QUERY_RANGE_{CONTINUE,ABORT} values or the usual negative error
4754 * code. This function returns XFS_BTREE_QUERY_RANGE_ABORT, zero, or a
4755 * negative error code.
4756 */
4757int
4758xfs_btree_query_range(
4759 struct xfs_btree_cur *cur,
4760 union xfs_btree_irec *low_rec,
4761 union xfs_btree_irec *high_rec,
4762 xfs_btree_query_range_fn fn,
4763 void *priv)
4764{
4765 union xfs_btree_rec rec;
4766 union xfs_btree_key low_key;
4767 union xfs_btree_key high_key;
4768
4769 /* Find the keys of both ends of the interval. */
4770 cur->bc_rec = *high_rec;
4771 cur->bc_ops->init_rec_from_cur(cur, &rec);
4772 cur->bc_ops->init_key_from_rec(&high_key, &rec);
4773
4774 cur->bc_rec = *low_rec;
4775 cur->bc_ops->init_rec_from_cur(cur, &rec);
4776 cur->bc_ops->init_key_from_rec(&low_key, &rec);
4777
4778 /* Enforce low key < high key. */
4779 if (cur->bc_ops->diff_two_keys(cur, &low_key, &high_key) > 0)
4780 return -EINVAL;
4781
4782 if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
4783 return xfs_btree_simple_query_range(cur, &low_key,
4784 &high_key, fn, priv);
4785 return xfs_btree_overlapped_query_range(cur, &low_key, &high_key,
4786 fn, priv);
4787}
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index 785a99682159..04d0865e5e6d 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -19,7 +19,7 @@
19#define __XFS_BTREE_H__ 19#define __XFS_BTREE_H__
20 20
21struct xfs_buf; 21struct xfs_buf;
22struct xfs_bmap_free; 22struct xfs_defer_ops;
23struct xfs_inode; 23struct xfs_inode;
24struct xfs_mount; 24struct xfs_mount;
25struct xfs_trans; 25struct xfs_trans;
@@ -38,17 +38,37 @@ union xfs_btree_ptr {
38}; 38};
39 39
40union xfs_btree_key { 40union xfs_btree_key {
41 xfs_bmbt_key_t bmbt; 41 struct xfs_bmbt_key bmbt;
42 xfs_bmdr_key_t bmbr; /* bmbt root block */ 42 xfs_bmdr_key_t bmbr; /* bmbt root block */
43 xfs_alloc_key_t alloc; 43 xfs_alloc_key_t alloc;
44 xfs_inobt_key_t inobt; 44 struct xfs_inobt_key inobt;
45 struct xfs_rmap_key rmap;
46};
47
48/*
49 * In-core key that holds both low and high keys for overlapped btrees.
50 * The two keys are packed next to each other on disk, so do the same
51 * in memory. Preserve the existing xfs_btree_key as a single key to
52 * avoid the mental model breakage that would happen if we passed a
53 * bigkey into a function that operates on a single key.
54 */
55union xfs_btree_bigkey {
56 struct xfs_bmbt_key bmbt;
57 xfs_bmdr_key_t bmbr; /* bmbt root block */
58 xfs_alloc_key_t alloc;
59 struct xfs_inobt_key inobt;
60 struct {
61 struct xfs_rmap_key rmap;
62 struct xfs_rmap_key rmap_hi;
63 };
45}; 64};
46 65
47union xfs_btree_rec { 66union xfs_btree_rec {
48 xfs_bmbt_rec_t bmbt; 67 struct xfs_bmbt_rec bmbt;
49 xfs_bmdr_rec_t bmbr; /* bmbt root block */ 68 xfs_bmdr_rec_t bmbr; /* bmbt root block */
50 xfs_alloc_rec_t alloc; 69 struct xfs_alloc_rec alloc;
51 xfs_inobt_rec_t inobt; 70 struct xfs_inobt_rec inobt;
71 struct xfs_rmap_rec rmap;
52}; 72};
53 73
54/* 74/*
@@ -63,6 +83,7 @@ union xfs_btree_rec {
63#define XFS_BTNUM_BMAP ((xfs_btnum_t)XFS_BTNUM_BMAPi) 83#define XFS_BTNUM_BMAP ((xfs_btnum_t)XFS_BTNUM_BMAPi)
64#define XFS_BTNUM_INO ((xfs_btnum_t)XFS_BTNUM_INOi) 84#define XFS_BTNUM_INO ((xfs_btnum_t)XFS_BTNUM_INOi)
65#define XFS_BTNUM_FINO ((xfs_btnum_t)XFS_BTNUM_FINOi) 85#define XFS_BTNUM_FINO ((xfs_btnum_t)XFS_BTNUM_FINOi)
86#define XFS_BTNUM_RMAP ((xfs_btnum_t)XFS_BTNUM_RMAPi)
66 87
67/* 88/*
68 * For logging record fields. 89 * For logging record fields.
@@ -95,6 +116,7 @@ do { \
95 case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(__mp, bmbt, stat); break; \ 116 case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(__mp, bmbt, stat); break; \
96 case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(__mp, ibt, stat); break; \ 117 case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(__mp, ibt, stat); break; \
97 case XFS_BTNUM_FINO: __XFS_BTREE_STATS_INC(__mp, fibt, stat); break; \ 118 case XFS_BTNUM_FINO: __XFS_BTREE_STATS_INC(__mp, fibt, stat); break; \
119 case XFS_BTNUM_RMAP: __XFS_BTREE_STATS_INC(__mp, rmap, stat); break; \
98 case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ 120 case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \
99 } \ 121 } \
100} while (0) 122} while (0)
@@ -115,11 +137,13 @@ do { \
115 __XFS_BTREE_STATS_ADD(__mp, ibt, stat, val); break; \ 137 __XFS_BTREE_STATS_ADD(__mp, ibt, stat, val); break; \
116 case XFS_BTNUM_FINO: \ 138 case XFS_BTNUM_FINO: \
117 __XFS_BTREE_STATS_ADD(__mp, fibt, stat, val); break; \ 139 __XFS_BTREE_STATS_ADD(__mp, fibt, stat, val); break; \
140 case XFS_BTNUM_RMAP: \
141 __XFS_BTREE_STATS_ADD(__mp, rmap, stat, val); break; \
118 case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ 142 case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \
119 } \ 143 } \
120} while (0) 144} while (0)
121 145
122#define XFS_BTREE_MAXLEVELS 8 /* max of all btrees */ 146#define XFS_BTREE_MAXLEVELS 9 /* max of all btrees */
123 147
124struct xfs_btree_ops { 148struct xfs_btree_ops {
125 /* size of the key and record structures */ 149 /* size of the key and record structures */
@@ -158,17 +182,25 @@ struct xfs_btree_ops {
158 /* init values of btree structures */ 182 /* init values of btree structures */
159 void (*init_key_from_rec)(union xfs_btree_key *key, 183 void (*init_key_from_rec)(union xfs_btree_key *key,
160 union xfs_btree_rec *rec); 184 union xfs_btree_rec *rec);
161 void (*init_rec_from_key)(union xfs_btree_key *key,
162 union xfs_btree_rec *rec);
163 void (*init_rec_from_cur)(struct xfs_btree_cur *cur, 185 void (*init_rec_from_cur)(struct xfs_btree_cur *cur,
164 union xfs_btree_rec *rec); 186 union xfs_btree_rec *rec);
165 void (*init_ptr_from_cur)(struct xfs_btree_cur *cur, 187 void (*init_ptr_from_cur)(struct xfs_btree_cur *cur,
166 union xfs_btree_ptr *ptr); 188 union xfs_btree_ptr *ptr);
189 void (*init_high_key_from_rec)(union xfs_btree_key *key,
190 union xfs_btree_rec *rec);
167 191
168 /* difference between key value and cursor value */ 192 /* difference between key value and cursor value */
169 __int64_t (*key_diff)(struct xfs_btree_cur *cur, 193 __int64_t (*key_diff)(struct xfs_btree_cur *cur,
170 union xfs_btree_key *key); 194 union xfs_btree_key *key);
171 195
196 /*
197 * Difference between key2 and key1 -- positive if key1 > key2,
198 * negative if key1 < key2, and zero if equal.
199 */
200 __int64_t (*diff_two_keys)(struct xfs_btree_cur *cur,
201 union xfs_btree_key *key1,
202 union xfs_btree_key *key2);
203
172 const struct xfs_buf_ops *buf_ops; 204 const struct xfs_buf_ops *buf_ops;
173 205
174#if defined(DEBUG) || defined(XFS_WARN) 206#if defined(DEBUG) || defined(XFS_WARN)
@@ -192,6 +224,13 @@ struct xfs_btree_ops {
192#define LASTREC_DELREC 2 224#define LASTREC_DELREC 2
193 225
194 226
227union xfs_btree_irec {
228 struct xfs_alloc_rec_incore a;
229 struct xfs_bmbt_irec b;
230 struct xfs_inobt_rec_incore i;
231 struct xfs_rmap_irec r;
232};
233
195/* 234/*
196 * Btree cursor structure. 235 * Btree cursor structure.
197 * This collects all information needed by the btree code in one place. 236 * This collects all information needed by the btree code in one place.
@@ -202,11 +241,7 @@ typedef struct xfs_btree_cur
202 struct xfs_mount *bc_mp; /* file system mount struct */ 241 struct xfs_mount *bc_mp; /* file system mount struct */
203 const struct xfs_btree_ops *bc_ops; 242 const struct xfs_btree_ops *bc_ops;
204 uint bc_flags; /* btree features - below */ 243 uint bc_flags; /* btree features - below */
205 union { 244 union xfs_btree_irec bc_rec; /* current insert/search record value */
206 xfs_alloc_rec_incore_t a;
207 xfs_bmbt_irec_t b;
208 xfs_inobt_rec_incore_t i;
209 } bc_rec; /* current insert/search record value */
210 struct xfs_buf *bc_bufs[XFS_BTREE_MAXLEVELS]; /* buf ptr per level */ 245 struct xfs_buf *bc_bufs[XFS_BTREE_MAXLEVELS]; /* buf ptr per level */
211 int bc_ptrs[XFS_BTREE_MAXLEVELS]; /* key/record # */ 246 int bc_ptrs[XFS_BTREE_MAXLEVELS]; /* key/record # */
212 __uint8_t bc_ra[XFS_BTREE_MAXLEVELS]; /* readahead bits */ 247 __uint8_t bc_ra[XFS_BTREE_MAXLEVELS]; /* readahead bits */
@@ -218,11 +253,12 @@ typedef struct xfs_btree_cur
218 union { 253 union {
219 struct { /* needed for BNO, CNT, INO */ 254 struct { /* needed for BNO, CNT, INO */
220 struct xfs_buf *agbp; /* agf/agi buffer pointer */ 255 struct xfs_buf *agbp; /* agf/agi buffer pointer */
256 struct xfs_defer_ops *dfops; /* deferred updates */
221 xfs_agnumber_t agno; /* ag number */ 257 xfs_agnumber_t agno; /* ag number */
222 } a; 258 } a;
223 struct { /* needed for BMAP */ 259 struct { /* needed for BMAP */
224 struct xfs_inode *ip; /* pointer to our inode */ 260 struct xfs_inode *ip; /* pointer to our inode */
225 struct xfs_bmap_free *flist; /* list to free after */ 261 struct xfs_defer_ops *dfops; /* deferred updates */
226 xfs_fsblock_t firstblock; /* 1st blk allocated */ 262 xfs_fsblock_t firstblock; /* 1st blk allocated */
227 int allocated; /* count of alloced */ 263 int allocated; /* count of alloced */
228 short forksize; /* fork's inode space */ 264 short forksize; /* fork's inode space */
@@ -238,6 +274,7 @@ typedef struct xfs_btree_cur
238#define XFS_BTREE_ROOT_IN_INODE (1<<1) /* root may be variable size */ 274#define XFS_BTREE_ROOT_IN_INODE (1<<1) /* root may be variable size */
239#define XFS_BTREE_LASTREC_UPDATE (1<<2) /* track last rec externally */ 275#define XFS_BTREE_LASTREC_UPDATE (1<<2) /* track last rec externally */
240#define XFS_BTREE_CRC_BLOCKS (1<<3) /* uses extended btree blocks */ 276#define XFS_BTREE_CRC_BLOCKS (1<<3) /* uses extended btree blocks */
277#define XFS_BTREE_OVERLAPPING (1<<4) /* overlapping intervals */
241 278
242 279
243#define XFS_BTREE_NOERROR 0 280#define XFS_BTREE_NOERROR 0
@@ -477,4 +514,19 @@ bool xfs_btree_sblock_verify(struct xfs_buf *bp, unsigned int max_recs);
477uint xfs_btree_compute_maxlevels(struct xfs_mount *mp, uint *limits, 514uint xfs_btree_compute_maxlevels(struct xfs_mount *mp, uint *limits,
478 unsigned long len); 515 unsigned long len);
479 516
517/* return codes */
518#define XFS_BTREE_QUERY_RANGE_CONTINUE 0 /* keep iterating */
519#define XFS_BTREE_QUERY_RANGE_ABORT 1 /* stop iterating */
520typedef int (*xfs_btree_query_range_fn)(struct xfs_btree_cur *cur,
521 union xfs_btree_rec *rec, void *priv);
522
523int xfs_btree_query_range(struct xfs_btree_cur *cur,
524 union xfs_btree_irec *low_rec, union xfs_btree_irec *high_rec,
525 xfs_btree_query_range_fn fn, void *priv);
526
527typedef int (*xfs_btree_visit_blocks_fn)(struct xfs_btree_cur *cur, int level,
528 void *data);
529int xfs_btree_visit_blocks(struct xfs_btree_cur *cur,
530 xfs_btree_visit_blocks_fn fn, void *data);
531
480#endif /* __XFS_BTREE_H__ */ 532#endif /* __XFS_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index 0f1f165f4048..f2dc1a950c85 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -2029,7 +2029,7 @@ xfs_da_grow_inode_int(
2029 error = xfs_bmapi_write(tp, dp, *bno, count, 2029 error = xfs_bmapi_write(tp, dp, *bno, count,
2030 xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG, 2030 xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG,
2031 args->firstblock, args->total, &map, &nmap, 2031 args->firstblock, args->total, &map, &nmap,
2032 args->flist); 2032 args->dfops);
2033 if (error) 2033 if (error)
2034 return error; 2034 return error;
2035 2035
@@ -2052,7 +2052,7 @@ xfs_da_grow_inode_int(
2052 error = xfs_bmapi_write(tp, dp, b, c, 2052 error = xfs_bmapi_write(tp, dp, b, c,
2053 xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA, 2053 xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA,
2054 args->firstblock, args->total, 2054 args->firstblock, args->total,
2055 &mapp[mapi], &nmap, args->flist); 2055 &mapp[mapi], &nmap, args->dfops);
2056 if (error) 2056 if (error)
2057 goto out_free_map; 2057 goto out_free_map;
2058 if (nmap < 1) 2058 if (nmap < 1)
@@ -2362,7 +2362,7 @@ xfs_da_shrink_inode(
2362 */ 2362 */
2363 error = xfs_bunmapi(tp, dp, dead_blkno, count, 2363 error = xfs_bunmapi(tp, dp, dead_blkno, count,
2364 xfs_bmapi_aflag(w), 0, args->firstblock, 2364 xfs_bmapi_aflag(w), 0, args->firstblock,
2365 args->flist, &done); 2365 args->dfops, &done);
2366 if (error == -ENOSPC) { 2366 if (error == -ENOSPC) {
2367 if (w != XFS_DATA_FORK) 2367 if (w != XFS_DATA_FORK)
2368 break; 2368 break;
diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h
index 6e153e399a77..98c75cbe6ac2 100644
--- a/fs/xfs/libxfs/xfs_da_btree.h
+++ b/fs/xfs/libxfs/xfs_da_btree.h
@@ -19,7 +19,7 @@
19#ifndef __XFS_DA_BTREE_H__ 19#ifndef __XFS_DA_BTREE_H__
20#define __XFS_DA_BTREE_H__ 20#define __XFS_DA_BTREE_H__
21 21
22struct xfs_bmap_free; 22struct xfs_defer_ops;
23struct xfs_inode; 23struct xfs_inode;
24struct xfs_trans; 24struct xfs_trans;
25struct zone; 25struct zone;
@@ -70,7 +70,7 @@ typedef struct xfs_da_args {
70 xfs_ino_t inumber; /* input/output inode number */ 70 xfs_ino_t inumber; /* input/output inode number */
71 struct xfs_inode *dp; /* directory inode to manipulate */ 71 struct xfs_inode *dp; /* directory inode to manipulate */
72 xfs_fsblock_t *firstblock; /* ptr to firstblock for bmap calls */ 72 xfs_fsblock_t *firstblock; /* ptr to firstblock for bmap calls */
73 struct xfs_bmap_free *flist; /* ptr to freelist for bmap_finish */ 73 struct xfs_defer_ops *dfops; /* ptr to freelist for bmap_finish */
74 struct xfs_trans *trans; /* current trans (changes over time) */ 74 struct xfs_trans *trans; /* current trans (changes over time) */
75 xfs_extlen_t total; /* total blocks needed, for 1st bmap */ 75 xfs_extlen_t total; /* total blocks needed, for 1st bmap */
76 int whichfork; /* data or attribute fork */ 76 int whichfork; /* data or attribute fork */
diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h
index 685f23b67056..9a492a9e19bd 100644
--- a/fs/xfs/libxfs/xfs_da_format.h
+++ b/fs/xfs/libxfs/xfs_da_format.h
@@ -629,6 +629,7 @@ typedef struct xfs_attr_shortform {
629 struct xfs_attr_sf_hdr { /* constant-structure header block */ 629 struct xfs_attr_sf_hdr { /* constant-structure header block */
630 __be16 totsize; /* total bytes in shortform list */ 630 __be16 totsize; /* total bytes in shortform list */
631 __u8 count; /* count of active entries */ 631 __u8 count; /* count of active entries */
632 __u8 padding;
632 } hdr; 633 } hdr;
633 struct xfs_attr_sf_entry { 634 struct xfs_attr_sf_entry {
634 __uint8_t namelen; /* actual length of name (no NULL) */ 635 __uint8_t namelen; /* actual length of name (no NULL) */
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
new file mode 100644
index 000000000000..054a2032fdb3
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -0,0 +1,463 @@
1/*
2 * Copyright (C) 2016 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#include "xfs.h"
21#include "xfs_fs.h"
22#include "xfs_shared.h"
23#include "xfs_format.h"
24#include "xfs_log_format.h"
25#include "xfs_trans_resv.h"
26#include "xfs_bit.h"
27#include "xfs_sb.h"
28#include "xfs_mount.h"
29#include "xfs_defer.h"
30#include "xfs_trans.h"
31#include "xfs_trace.h"
32
33/*
34 * Deferred Operations in XFS
35 *
36 * Due to the way locking rules work in XFS, certain transactions (block
37 * mapping and unmapping, typically) have permanent reservations so that
38 * we can roll the transaction to adhere to AG locking order rules and
39 * to unlock buffers between metadata updates. Prior to rmap/reflink,
40 * the mapping code had a mechanism to perform these deferrals for
41 * extents that were going to be freed; this code makes that facility
42 * more generic.
43 *
44 * When adding the reverse mapping and reflink features, it became
45 * necessary to perform complex remapping multi-transactions to comply
46 * with AG locking order rules, and to be able to spread a single
47 * refcount update operation (an operation on an n-block extent can
48 * update as many as n records!) among multiple transactions. XFS can
49 * roll a transaction to facilitate this, but using this facility
50 * requires us to log "intent" items in case log recovery needs to
51 * redo the operation, and to log "done" items to indicate that redo
52 * is not necessary.
53 *
54 * Deferred work is tracked in xfs_defer_pending items. Each pending
55 * item tracks one type of deferred work. Incoming work items (which
56 * have not yet had an intent logged) are attached to a pending item
57 * on the dop_intake list, where they wait for the caller to finish
58 * the deferred operations.
59 *
60 * Finishing a set of deferred operations is an involved process. To
61 * start, we define "rolling a deferred-op transaction" as follows:
62 *
63 * > For each xfs_defer_pending item on the dop_intake list,
64 * - Sort the work items in AG order. XFS locking
65 * order rules require us to lock buffers in AG order.
66 * - Create a log intent item for that type.
67 * - Attach it to the pending item.
68 * - Move the pending item from the dop_intake list to the
69 * dop_pending list.
70 * > Roll the transaction.
71 *
72 * NOTE: To avoid exceeding the transaction reservation, we limit the
73 * number of items that we attach to a given xfs_defer_pending.
74 *
75 * The actual finishing process looks like this:
76 *
77 * > For each xfs_defer_pending in the dop_pending list,
78 * - Roll the deferred-op transaction as above.
79 * - Create a log done item for that type, and attach it to the
80 * log intent item.
81 * - For each work item attached to the log intent item,
82 * * Perform the described action.
83 * * Attach the work item to the log done item.
84 *
85 * The key here is that we must log an intent item for all pending
86 * work items every time we roll the transaction, and that we must log
87 * a done item as soon as the work is completed. With this mechanism
88 * we can perform complex remapping operations, chaining intent items
89 * as needed.
90 *
91 * This is an example of remapping the extent (E, E+B) into file X at
92 * offset A and dealing with the extent (C, C+B) already being mapped
93 * there:
94 * +-------------------------------------------------+
95 * | Unmap file X startblock C offset A length B | t0
96 * | Intent to reduce refcount for extent (C, B) |
97 * | Intent to remove rmap (X, C, A, B) |
98 * | Intent to free extent (D, 1) (bmbt block) |
99 * | Intent to map (X, A, B) at startblock E |
100 * +-------------------------------------------------+
101 * | Map file X startblock E offset A length B | t1
102 * | Done mapping (X, E, A, B) |
103 * | Intent to increase refcount for extent (E, B) |
104 * | Intent to add rmap (X, E, A, B) |
105 * +-------------------------------------------------+
106 * | Reduce refcount for extent (C, B) | t2
107 * | Done reducing refcount for extent (C, B) |
108 * | Increase refcount for extent (E, B) |
109 * | Done increasing refcount for extent (E, B) |
110 * | Intent to free extent (C, B) |
111 * | Intent to free extent (F, 1) (refcountbt block) |
112 * | Intent to remove rmap (F, 1, REFC) |
113 * +-------------------------------------------------+
114 * | Remove rmap (X, C, A, B) | t3
115 * | Done removing rmap (X, C, A, B) |
116 * | Add rmap (X, E, A, B) |
117 * | Done adding rmap (X, E, A, B) |
118 * | Remove rmap (F, 1, REFC) |
119 * | Done removing rmap (F, 1, REFC) |
120 * +-------------------------------------------------+
121 * | Free extent (C, B) | t4
122 * | Done freeing extent (C, B) |
123 * | Free extent (D, 1) |
124 * | Done freeing extent (D, 1) |
125 * | Free extent (F, 1) |
126 * | Done freeing extent (F, 1) |
127 * +-------------------------------------------------+
128 *
129 * If we should crash before t2 commits, log recovery replays
130 * the following intent items:
131 *
132 * - Intent to reduce refcount for extent (C, B)
133 * - Intent to remove rmap (X, C, A, B)
134 * - Intent to free extent (D, 1) (bmbt block)
135 * - Intent to increase refcount for extent (E, B)
136 * - Intent to add rmap (X, E, A, B)
137 *
138 * In the process of recovering, it should also generate and take care
139 * of these intent items:
140 *
141 * - Intent to free extent (C, B)
142 * - Intent to free extent (F, 1) (refcountbt block)
143 * - Intent to remove rmap (F, 1, REFC)
144 */
145
146static const struct xfs_defer_op_type *defer_op_types[XFS_DEFER_OPS_TYPE_MAX];
147
148/*
149 * For each pending item in the intake list, log its intent item and the
150 * associated extents, then add the entire intake list to the end of
151 * the pending list.
152 */
153STATIC void
154xfs_defer_intake_work(
155 struct xfs_trans *tp,
156 struct xfs_defer_ops *dop)
157{
158 struct list_head *li;
159 struct xfs_defer_pending *dfp;
160
161 list_for_each_entry(dfp, &dop->dop_intake, dfp_list) {
162 trace_xfs_defer_intake_work(tp->t_mountp, dfp);
163 dfp->dfp_intent = dfp->dfp_type->create_intent(tp,
164 dfp->dfp_count);
165 list_sort(tp->t_mountp, &dfp->dfp_work,
166 dfp->dfp_type->diff_items);
167 list_for_each(li, &dfp->dfp_work)
168 dfp->dfp_type->log_item(tp, dfp->dfp_intent, li);
169 }
170
171 list_splice_tail_init(&dop->dop_intake, &dop->dop_pending);
172}
173
174/* Abort all the intents that were committed. */
175STATIC void
176xfs_defer_trans_abort(
177 struct xfs_trans *tp,
178 struct xfs_defer_ops *dop,
179 int error)
180{
181 struct xfs_defer_pending *dfp;
182
183 trace_xfs_defer_trans_abort(tp->t_mountp, dop);
184 /*
185 * If the transaction was committed, drop the intent reference
186 * since we're bailing out of here. The other reference is
187 * dropped when the intent hits the AIL. If the transaction
188 * was not committed, the intent is freed by the intent item
189 * unlock handler on abort.
190 */
191 if (!dop->dop_committed)
192 return;
193
194 /* Abort intent items. */
195 list_for_each_entry(dfp, &dop->dop_pending, dfp_list) {
196 trace_xfs_defer_pending_abort(tp->t_mountp, dfp);
197 if (dfp->dfp_committed)
198 dfp->dfp_type->abort_intent(dfp->dfp_intent);
199 }
200
201 /* Shut down FS. */
202 xfs_force_shutdown(tp->t_mountp, (error == -EFSCORRUPTED) ?
203 SHUTDOWN_CORRUPT_INCORE : SHUTDOWN_META_IO_ERROR);
204}
205
206/* Roll a transaction so we can do some deferred op processing. */
207STATIC int
208xfs_defer_trans_roll(
209 struct xfs_trans **tp,
210 struct xfs_defer_ops *dop,
211 struct xfs_inode *ip)
212{
213 int i;
214 int error;
215
216 /* Log all the joined inodes except the one we passed in. */
217 for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++) {
218 if (dop->dop_inodes[i] == ip)
219 continue;
220 xfs_trans_log_inode(*tp, dop->dop_inodes[i], XFS_ILOG_CORE);
221 }
222
223 trace_xfs_defer_trans_roll((*tp)->t_mountp, dop);
224
225 /* Roll the transaction. */
226 error = xfs_trans_roll(tp, ip);
227 if (error) {
228 trace_xfs_defer_trans_roll_error((*tp)->t_mountp, dop, error);
229 xfs_defer_trans_abort(*tp, dop, error);
230 return error;
231 }
232 dop->dop_committed = true;
233
234 /* Rejoin the joined inodes except the one we passed in. */
235 for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++) {
236 if (dop->dop_inodes[i] == ip)
237 continue;
238 xfs_trans_ijoin(*tp, dop->dop_inodes[i], 0);
239 }
240
241 return error;
242}
243
244/* Do we have any work items to finish? */
245bool
246xfs_defer_has_unfinished_work(
247 struct xfs_defer_ops *dop)
248{
249 return !list_empty(&dop->dop_pending) || !list_empty(&dop->dop_intake);
250}
251
252/*
253 * Add this inode to the deferred op. Each joined inode is relogged
254 * each time we roll the transaction, in addition to any inode passed
255 * to xfs_defer_finish().
256 */
257int
258xfs_defer_join(
259 struct xfs_defer_ops *dop,
260 struct xfs_inode *ip)
261{
262 int i;
263
264 for (i = 0; i < XFS_DEFER_OPS_NR_INODES; i++) {
265 if (dop->dop_inodes[i] == ip)
266 return 0;
267 else if (dop->dop_inodes[i] == NULL) {
268 dop->dop_inodes[i] = ip;
269 return 0;
270 }
271 }
272
273 return -EFSCORRUPTED;
274}
275
276/*
277 * Finish all the pending work. This involves logging intent items for
278 * any work items that wandered in since the last transaction roll (if
279 * one has even happened), rolling the transaction, and finishing the
280 * work items in the first item on the logged-and-pending list.
281 *
282 * If an inode is provided, relog it to the new transaction.
283 */
284int
285xfs_defer_finish(
286 struct xfs_trans **tp,
287 struct xfs_defer_ops *dop,
288 struct xfs_inode *ip)
289{
290 struct xfs_defer_pending *dfp;
291 struct list_head *li;
292 struct list_head *n;
293 void *done_item = NULL;
294 void *state;
295 int error = 0;
296 void (*cleanup_fn)(struct xfs_trans *, void *, int);
297
298 ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
299
300 trace_xfs_defer_finish((*tp)->t_mountp, dop);
301
302 /* Until we run out of pending work to finish... */
303 while (xfs_defer_has_unfinished_work(dop)) {
304 /* Log intents for work items sitting in the intake. */
305 xfs_defer_intake_work(*tp, dop);
306
307 /* Roll the transaction. */
308 error = xfs_defer_trans_roll(tp, dop, ip);
309 if (error)
310 goto out;
311
312 /* Mark all pending intents as committed. */
313 list_for_each_entry_reverse(dfp, &dop->dop_pending, dfp_list) {
314 if (dfp->dfp_committed)
315 break;
316 trace_xfs_defer_pending_commit((*tp)->t_mountp, dfp);
317 dfp->dfp_committed = true;
318 }
319
320 /* Log an intent-done item for the first pending item. */
321 dfp = list_first_entry(&dop->dop_pending,
322 struct xfs_defer_pending, dfp_list);
323 trace_xfs_defer_pending_finish((*tp)->t_mountp, dfp);
324 done_item = dfp->dfp_type->create_done(*tp, dfp->dfp_intent,
325 dfp->dfp_count);
326 cleanup_fn = dfp->dfp_type->finish_cleanup;
327
328 /* Finish the work items. */
329 state = NULL;
330 list_for_each_safe(li, n, &dfp->dfp_work) {
331 list_del(li);
332 dfp->dfp_count--;
333 error = dfp->dfp_type->finish_item(*tp, dop, li,
334 done_item, &state);
335 if (error) {
336 /*
337 * Clean up after ourselves and jump out.
338 * xfs_defer_cancel will take care of freeing
339 * all these lists and stuff.
340 */
341 if (cleanup_fn)
342 cleanup_fn(*tp, state, error);
343 xfs_defer_trans_abort(*tp, dop, error);
344 goto out;
345 }
346 }
347 /* Done with the dfp, free it. */
348 list_del(&dfp->dfp_list);
349 kmem_free(dfp);
350
351 if (cleanup_fn)
352 cleanup_fn(*tp, state, error);
353 }
354
355out:
356 if (error)
357 trace_xfs_defer_finish_error((*tp)->t_mountp, dop, error);
358 else
359 trace_xfs_defer_finish_done((*tp)->t_mountp, dop);
360 return error;
361}
362
363/*
364 * Free up any items left in the list.
365 */
366void
367xfs_defer_cancel(
368 struct xfs_defer_ops *dop)
369{
370 struct xfs_defer_pending *dfp;
371 struct xfs_defer_pending *pli;
372 struct list_head *pwi;
373 struct list_head *n;
374
375 trace_xfs_defer_cancel(NULL, dop);
376
377 /*
378 * Free the pending items. Caller should already have arranged
379 * for the intent items to be released.
380 */
381 list_for_each_entry_safe(dfp, pli, &dop->dop_intake, dfp_list) {
382 trace_xfs_defer_intake_cancel(NULL, dfp);
383 list_del(&dfp->dfp_list);
384 list_for_each_safe(pwi, n, &dfp->dfp_work) {
385 list_del(pwi);
386 dfp->dfp_count--;
387 dfp->dfp_type->cancel_item(pwi);
388 }
389 ASSERT(dfp->dfp_count == 0);
390 kmem_free(dfp);
391 }
392 list_for_each_entry_safe(dfp, pli, &dop->dop_pending, dfp_list) {
393 trace_xfs_defer_pending_cancel(NULL, dfp);
394 list_del(&dfp->dfp_list);
395 list_for_each_safe(pwi, n, &dfp->dfp_work) {
396 list_del(pwi);
397 dfp->dfp_count--;
398 dfp->dfp_type->cancel_item(pwi);
399 }
400 ASSERT(dfp->dfp_count == 0);
401 kmem_free(dfp);
402 }
403}
404
405/* Add an item for later deferred processing. */
406void
407xfs_defer_add(
408 struct xfs_defer_ops *dop,
409 enum xfs_defer_ops_type type,
410 struct list_head *li)
411{
412 struct xfs_defer_pending *dfp = NULL;
413
414 /*
415 * Add the item to a pending item at the end of the intake list.
416 * If the last pending item has the same type, reuse it. Else,
417 * create a new pending item at the end of the intake list.
418 */
419 if (!list_empty(&dop->dop_intake)) {
420 dfp = list_last_entry(&dop->dop_intake,
421 struct xfs_defer_pending, dfp_list);
422 if (dfp->dfp_type->type != type ||
423 (dfp->dfp_type->max_items &&
424 dfp->dfp_count >= dfp->dfp_type->max_items))
425 dfp = NULL;
426 }
427 if (!dfp) {
428 dfp = kmem_alloc(sizeof(struct xfs_defer_pending),
429 KM_SLEEP | KM_NOFS);
430 dfp->dfp_type = defer_op_types[type];
431 dfp->dfp_committed = false;
432 dfp->dfp_intent = NULL;
433 dfp->dfp_count = 0;
434 INIT_LIST_HEAD(&dfp->dfp_work);
435 list_add_tail(&dfp->dfp_list, &dop->dop_intake);
436 }
437
438 list_add_tail(li, &dfp->dfp_work);
439 dfp->dfp_count++;
440}
441
442/* Initialize a deferred operation list. */
443void
444xfs_defer_init_op_type(
445 const struct xfs_defer_op_type *type)
446{
447 defer_op_types[type->type] = type;
448}
449
450/* Initialize a deferred operation. */
451void
452xfs_defer_init(
453 struct xfs_defer_ops *dop,
454 xfs_fsblock_t *fbp)
455{
456 dop->dop_committed = false;
457 dop->dop_low = false;
458 memset(&dop->dop_inodes, 0, sizeof(dop->dop_inodes));
459 *fbp = NULLFSBLOCK;
460 INIT_LIST_HEAD(&dop->dop_intake);
461 INIT_LIST_HEAD(&dop->dop_pending);
462 trace_xfs_defer_init(NULL, dop);
463}
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
new file mode 100644
index 000000000000..cc3981c48296
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -0,0 +1,97 @@
1/*
2 * Copyright (C) 2016 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#ifndef __XFS_DEFER_H__
21#define __XFS_DEFER_H__
22
23struct xfs_defer_op_type;
24
25/*
26 * Save a log intent item and a list of extents, so that we can replay
27 * whatever action had to happen to the extent list and file the log done
28 * item.
29 */
30struct xfs_defer_pending {
31 const struct xfs_defer_op_type *dfp_type; /* function pointers */
32 struct list_head dfp_list; /* pending items */
33 bool dfp_committed; /* committed trans? */
34 void *dfp_intent; /* log intent item */
35 struct list_head dfp_work; /* work items */
36 unsigned int dfp_count; /* # extent items */
37};
38
39/*
40 * Header for deferred operation list.
41 *
42 * dop_low is used by the allocator to activate the lowspace algorithm -
43 * when free space is running low the extent allocator may choose to
44 * allocate an extent from an AG without leaving sufficient space for
45 * a btree split when inserting the new extent. In this case the allocator
46 * will enable the lowspace algorithm which is supposed to allow further
47 * allocations (such as btree splits and newroots) to allocate from
48 * sequential AGs. In order to avoid locking AGs out of order the lowspace
49 * algorithm will start searching for free space from AG 0. If the correct
50 * transaction reservations have been made then this algorithm will eventually
51 * find all the space it needs.
52 */
53enum xfs_defer_ops_type {
54 XFS_DEFER_OPS_TYPE_RMAP,
55 XFS_DEFER_OPS_TYPE_FREE,
56 XFS_DEFER_OPS_TYPE_MAX,
57};
58
59#define XFS_DEFER_OPS_NR_INODES 2 /* join up to two inodes */
60
61struct xfs_defer_ops {
62 bool dop_committed; /* did any trans commit? */
63 bool dop_low; /* alloc in low mode */
64 struct list_head dop_intake; /* unlogged pending work */
65 struct list_head dop_pending; /* logged pending work */
66
67 /* relog these inodes with each roll */
68 struct xfs_inode *dop_inodes[XFS_DEFER_OPS_NR_INODES];
69};
70
71void xfs_defer_add(struct xfs_defer_ops *dop, enum xfs_defer_ops_type type,
72 struct list_head *h);
73int xfs_defer_finish(struct xfs_trans **tp, struct xfs_defer_ops *dop,
74 struct xfs_inode *ip);
75void xfs_defer_cancel(struct xfs_defer_ops *dop);
76void xfs_defer_init(struct xfs_defer_ops *dop, xfs_fsblock_t *fbp);
77bool xfs_defer_has_unfinished_work(struct xfs_defer_ops *dop);
78int xfs_defer_join(struct xfs_defer_ops *dop, struct xfs_inode *ip);
79
80/* Description of a deferred type. */
81struct xfs_defer_op_type {
82 enum xfs_defer_ops_type type;
83 unsigned int max_items;
84 void (*abort_intent)(void *);
85 void *(*create_done)(struct xfs_trans *, void *, unsigned int);
86 int (*finish_item)(struct xfs_trans *, struct xfs_defer_ops *,
87 struct list_head *, void *, void **);
88 void (*finish_cleanup)(struct xfs_trans *, void *, int);
89 void (*cancel_item)(struct list_head *);
90 int (*diff_items)(void *, struct list_head *, struct list_head *);
91 void *(*create_intent)(struct xfs_trans *, uint);
92 void (*log_item)(struct xfs_trans *, void *, struct list_head *);
93};
94
95void xfs_defer_init_op_type(const struct xfs_defer_op_type *type);
96
97#endif /* __XFS_DEFER_H__ */
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
index af0f9d171f8a..20a96dd5af7e 100644
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -21,6 +21,7 @@
21#include "xfs_log_format.h" 21#include "xfs_log_format.h"
22#include "xfs_trans_resv.h" 22#include "xfs_trans_resv.h"
23#include "xfs_mount.h" 23#include "xfs_mount.h"
24#include "xfs_defer.h"
24#include "xfs_da_format.h" 25#include "xfs_da_format.h"
25#include "xfs_da_btree.h" 26#include "xfs_da_btree.h"
26#include "xfs_inode.h" 27#include "xfs_inode.h"
@@ -259,7 +260,7 @@ xfs_dir_createname(
259 struct xfs_name *name, 260 struct xfs_name *name,
260 xfs_ino_t inum, /* new entry inode number */ 261 xfs_ino_t inum, /* new entry inode number */
261 xfs_fsblock_t *first, /* bmap's firstblock */ 262 xfs_fsblock_t *first, /* bmap's firstblock */
262 xfs_bmap_free_t *flist, /* bmap's freeblock list */ 263 struct xfs_defer_ops *dfops, /* bmap's freeblock list */
263 xfs_extlen_t total) /* bmap's total block count */ 264 xfs_extlen_t total) /* bmap's total block count */
264{ 265{
265 struct xfs_da_args *args; 266 struct xfs_da_args *args;
@@ -286,7 +287,7 @@ xfs_dir_createname(
286 args->inumber = inum; 287 args->inumber = inum;
287 args->dp = dp; 288 args->dp = dp;
288 args->firstblock = first; 289 args->firstblock = first;
289 args->flist = flist; 290 args->dfops = dfops;
290 args->total = total; 291 args->total = total;
291 args->whichfork = XFS_DATA_FORK; 292 args->whichfork = XFS_DATA_FORK;
292 args->trans = tp; 293 args->trans = tp;
@@ -436,7 +437,7 @@ xfs_dir_removename(
436 struct xfs_name *name, 437 struct xfs_name *name,
437 xfs_ino_t ino, 438 xfs_ino_t ino,
438 xfs_fsblock_t *first, /* bmap's firstblock */ 439 xfs_fsblock_t *first, /* bmap's firstblock */
439 xfs_bmap_free_t *flist, /* bmap's freeblock list */ 440 struct xfs_defer_ops *dfops, /* bmap's freeblock list */
440 xfs_extlen_t total) /* bmap's total block count */ 441 xfs_extlen_t total) /* bmap's total block count */
441{ 442{
442 struct xfs_da_args *args; 443 struct xfs_da_args *args;
@@ -458,7 +459,7 @@ xfs_dir_removename(
458 args->inumber = ino; 459 args->inumber = ino;
459 args->dp = dp; 460 args->dp = dp;
460 args->firstblock = first; 461 args->firstblock = first;
461 args->flist = flist; 462 args->dfops = dfops;
462 args->total = total; 463 args->total = total;
463 args->whichfork = XFS_DATA_FORK; 464 args->whichfork = XFS_DATA_FORK;
464 args->trans = tp; 465 args->trans = tp;
@@ -498,7 +499,7 @@ xfs_dir_replace(
498 struct xfs_name *name, /* name of entry to replace */ 499 struct xfs_name *name, /* name of entry to replace */
499 xfs_ino_t inum, /* new inode number */ 500 xfs_ino_t inum, /* new inode number */
500 xfs_fsblock_t *first, /* bmap's firstblock */ 501 xfs_fsblock_t *first, /* bmap's firstblock */
501 xfs_bmap_free_t *flist, /* bmap's freeblock list */ 502 struct xfs_defer_ops *dfops, /* bmap's freeblock list */
502 xfs_extlen_t total) /* bmap's total block count */ 503 xfs_extlen_t total) /* bmap's total block count */
503{ 504{
504 struct xfs_da_args *args; 505 struct xfs_da_args *args;
@@ -523,7 +524,7 @@ xfs_dir_replace(
523 args->inumber = inum; 524 args->inumber = inum;
524 args->dp = dp; 525 args->dp = dp;
525 args->firstblock = first; 526 args->firstblock = first;
526 args->flist = flist; 527 args->dfops = dfops;
527 args->total = total; 528 args->total = total;
528 args->whichfork = XFS_DATA_FORK; 529 args->whichfork = XFS_DATA_FORK;
529 args->trans = tp; 530 args->trans = tp;
@@ -680,7 +681,7 @@ xfs_dir2_shrink_inode(
680 681
681 /* Unmap the fsblock(s). */ 682 /* Unmap the fsblock(s). */
682 error = xfs_bunmapi(tp, dp, da, args->geo->fsbcount, 0, 0, 683 error = xfs_bunmapi(tp, dp, da, args->geo->fsbcount, 0, 0,
683 args->firstblock, args->flist, &done); 684 args->firstblock, args->dfops, &done);
684 if (error) { 685 if (error) {
685 /* 686 /*
686 * ENOSPC actually can happen if we're in a removename with no 687 * ENOSPC actually can happen if we're in a removename with no
diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h
index e55353651f5b..becc926c3e3d 100644
--- a/fs/xfs/libxfs/xfs_dir2.h
+++ b/fs/xfs/libxfs/xfs_dir2.h
@@ -18,7 +18,7 @@
18#ifndef __XFS_DIR2_H__ 18#ifndef __XFS_DIR2_H__
19#define __XFS_DIR2_H__ 19#define __XFS_DIR2_H__
20 20
21struct xfs_bmap_free; 21struct xfs_defer_ops;
22struct xfs_da_args; 22struct xfs_da_args;
23struct xfs_inode; 23struct xfs_inode;
24struct xfs_mount; 24struct xfs_mount;
@@ -129,18 +129,18 @@ extern int xfs_dir_init(struct xfs_trans *tp, struct xfs_inode *dp,
129extern int xfs_dir_createname(struct xfs_trans *tp, struct xfs_inode *dp, 129extern int xfs_dir_createname(struct xfs_trans *tp, struct xfs_inode *dp,
130 struct xfs_name *name, xfs_ino_t inum, 130 struct xfs_name *name, xfs_ino_t inum,
131 xfs_fsblock_t *first, 131 xfs_fsblock_t *first,
132 struct xfs_bmap_free *flist, xfs_extlen_t tot); 132 struct xfs_defer_ops *dfops, xfs_extlen_t tot);
133extern int xfs_dir_lookup(struct xfs_trans *tp, struct xfs_inode *dp, 133extern int xfs_dir_lookup(struct xfs_trans *tp, struct xfs_inode *dp,
134 struct xfs_name *name, xfs_ino_t *inum, 134 struct xfs_name *name, xfs_ino_t *inum,
135 struct xfs_name *ci_name); 135 struct xfs_name *ci_name);
136extern int xfs_dir_removename(struct xfs_trans *tp, struct xfs_inode *dp, 136extern int xfs_dir_removename(struct xfs_trans *tp, struct xfs_inode *dp,
137 struct xfs_name *name, xfs_ino_t ino, 137 struct xfs_name *name, xfs_ino_t ino,
138 xfs_fsblock_t *first, 138 xfs_fsblock_t *first,
139 struct xfs_bmap_free *flist, xfs_extlen_t tot); 139 struct xfs_defer_ops *dfops, xfs_extlen_t tot);
140extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp, 140extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp,
141 struct xfs_name *name, xfs_ino_t inum, 141 struct xfs_name *name, xfs_ino_t inum,
142 xfs_fsblock_t *first, 142 xfs_fsblock_t *first,
143 struct xfs_bmap_free *flist, xfs_extlen_t tot); 143 struct xfs_defer_ops *dfops, xfs_extlen_t tot);
144extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp, 144extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp,
145 struct xfs_name *name); 145 struct xfs_name *name);
146 146
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index adb204d40f22..f814d42c73b2 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -455,8 +455,10 @@ xfs_sb_has_compat_feature(
455} 455}
456 456
457#define XFS_SB_FEAT_RO_COMPAT_FINOBT (1 << 0) /* free inode btree */ 457#define XFS_SB_FEAT_RO_COMPAT_FINOBT (1 << 0) /* free inode btree */
458#define XFS_SB_FEAT_RO_COMPAT_RMAPBT (1 << 1) /* reverse map btree */
458#define XFS_SB_FEAT_RO_COMPAT_ALL \ 459#define XFS_SB_FEAT_RO_COMPAT_ALL \
459 (XFS_SB_FEAT_RO_COMPAT_FINOBT) 460 (XFS_SB_FEAT_RO_COMPAT_FINOBT | \
461 XFS_SB_FEAT_RO_COMPAT_RMAPBT)
460#define XFS_SB_FEAT_RO_COMPAT_UNKNOWN ~XFS_SB_FEAT_RO_COMPAT_ALL 462#define XFS_SB_FEAT_RO_COMPAT_UNKNOWN ~XFS_SB_FEAT_RO_COMPAT_ALL
461static inline bool 463static inline bool
462xfs_sb_has_ro_compat_feature( 464xfs_sb_has_ro_compat_feature(
@@ -538,6 +540,12 @@ static inline bool xfs_sb_version_hasmetauuid(struct xfs_sb *sbp)
538 (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_META_UUID); 540 (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_META_UUID);
539} 541}
540 542
543static inline bool xfs_sb_version_hasrmapbt(struct xfs_sb *sbp)
544{
545 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) &&
546 (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_RMAPBT);
547}
548
541/* 549/*
542 * end of superblock version macros 550 * end of superblock version macros
543 */ 551 */
@@ -598,10 +606,10 @@ xfs_is_quota_inode(struct xfs_sb *sbp, xfs_ino_t ino)
598#define XFS_AGI_GOOD_VERSION(v) ((v) == XFS_AGI_VERSION) 606#define XFS_AGI_GOOD_VERSION(v) ((v) == XFS_AGI_VERSION)
599 607
600/* 608/*
601 * Btree number 0 is bno, 1 is cnt. This value gives the size of the 609 * Btree number 0 is bno, 1 is cnt, 2 is rmap. This value gives the size of the
602 * arrays below. 610 * arrays below.
603 */ 611 */
604#define XFS_BTNUM_AGF ((int)XFS_BTNUM_CNTi + 1) 612#define XFS_BTNUM_AGF ((int)XFS_BTNUM_RMAPi + 1)
605 613
606/* 614/*
607 * The second word of agf_levels in the first a.g. overlaps the EFS 615 * The second word of agf_levels in the first a.g. overlaps the EFS
@@ -618,12 +626,10 @@ typedef struct xfs_agf {
618 __be32 agf_seqno; /* sequence # starting from 0 */ 626 __be32 agf_seqno; /* sequence # starting from 0 */
619 __be32 agf_length; /* size in blocks of a.g. */ 627 __be32 agf_length; /* size in blocks of a.g. */
620 /* 628 /*
621 * Freespace information 629 * Freespace and rmap information
622 */ 630 */
623 __be32 agf_roots[XFS_BTNUM_AGF]; /* root blocks */ 631 __be32 agf_roots[XFS_BTNUM_AGF]; /* root blocks */
624 __be32 agf_spare0; /* spare field */
625 __be32 agf_levels[XFS_BTNUM_AGF]; /* btree levels */ 632 __be32 agf_levels[XFS_BTNUM_AGF]; /* btree levels */
626 __be32 agf_spare1; /* spare field */
627 633
628 __be32 agf_flfirst; /* first freelist block's index */ 634 __be32 agf_flfirst; /* first freelist block's index */
629 __be32 agf_fllast; /* last freelist block's index */ 635 __be32 agf_fllast; /* last freelist block's index */
@@ -1308,17 +1314,118 @@ typedef __be32 xfs_inobt_ptr_t;
1308#define XFS_FIBT_BLOCK(mp) ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1)) 1314#define XFS_FIBT_BLOCK(mp) ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1))
1309 1315
1310/* 1316/*
1311 * The first data block of an AG depends on whether the filesystem was formatted 1317 * Reverse mapping btree format definitions
1312 * with the finobt feature. If so, account for the finobt reserved root btree 1318 *
1313 * block. 1319 * There is a btree for the reverse map per allocation group
1320 */
1321#define XFS_RMAP_CRC_MAGIC 0x524d4233 /* 'RMB3' */
1322
1323/*
1324 * Ownership info for an extent. This is used to create reverse-mapping
1325 * entries.
1314 */ 1326 */
1315#define XFS_PREALLOC_BLOCKS(mp) \ 1327#define XFS_OWNER_INFO_ATTR_FORK (1 << 0)
1328#define XFS_OWNER_INFO_BMBT_BLOCK (1 << 1)
1329struct xfs_owner_info {
1330 uint64_t oi_owner;
1331 xfs_fileoff_t oi_offset;
1332 unsigned int oi_flags;
1333};
1334
1335/*
1336 * Special owner types.
1337 *
1338 * Seeing as we only support up to 8EB, we have the upper bit of the owner field
1339 * to tell us we have a special owner value. We use these for static metadata
1340 * allocated at mkfs/growfs time, as well as for freespace management metadata.
1341 */
1342#define XFS_RMAP_OWN_NULL (-1ULL) /* No owner, for growfs */
1343#define XFS_RMAP_OWN_UNKNOWN (-2ULL) /* Unknown owner, for EFI recovery */
1344#define XFS_RMAP_OWN_FS (-3ULL) /* static fs metadata */
1345#define XFS_RMAP_OWN_LOG (-4ULL) /* static fs metadata */
1346#define XFS_RMAP_OWN_AG (-5ULL) /* AG freespace btree blocks */
1347#define XFS_RMAP_OWN_INOBT (-6ULL) /* Inode btree blocks */
1348#define XFS_RMAP_OWN_INODES (-7ULL) /* Inode chunk */
1349#define XFS_RMAP_OWN_MIN (-8ULL) /* guard */
1350
1351#define XFS_RMAP_NON_INODE_OWNER(owner) (!!((owner) & (1ULL << 63)))
1352
1353/*
1354 * Data record structure
1355 */
1356struct xfs_rmap_rec {
1357 __be32 rm_startblock; /* extent start block */
1358 __be32 rm_blockcount; /* extent length */
1359 __be64 rm_owner; /* extent owner */
1360 __be64 rm_offset; /* offset within the owner */
1361};
1362
1363/*
1364 * rmap btree record
1365 * rm_offset:63 is the attribute fork flag
1366 * rm_offset:62 is the bmbt block flag
1367 * rm_offset:61 is the unwritten extent flag (same as l0:63 in bmbt)
1368 * rm_offset:54-60 aren't used and should be zero
1369 * rm_offset:0-53 is the block offset within the inode
1370 */
1371#define XFS_RMAP_OFF_ATTR_FORK ((__uint64_t)1ULL << 63)
1372#define XFS_RMAP_OFF_BMBT_BLOCK ((__uint64_t)1ULL << 62)
1373#define XFS_RMAP_OFF_UNWRITTEN ((__uint64_t)1ULL << 61)
1374
1375#define XFS_RMAP_LEN_MAX ((__uint32_t)~0U)
1376#define XFS_RMAP_OFF_FLAGS (XFS_RMAP_OFF_ATTR_FORK | \
1377 XFS_RMAP_OFF_BMBT_BLOCK | \
1378 XFS_RMAP_OFF_UNWRITTEN)
1379#define XFS_RMAP_OFF_MASK ((__uint64_t)0x3FFFFFFFFFFFFFULL)
1380
1381#define XFS_RMAP_OFF(off) ((off) & XFS_RMAP_OFF_MASK)
1382
1383#define XFS_RMAP_IS_BMBT_BLOCK(off) (!!((off) & XFS_RMAP_OFF_BMBT_BLOCK))
1384#define XFS_RMAP_IS_ATTR_FORK(off) (!!((off) & XFS_RMAP_OFF_ATTR_FORK))
1385#define XFS_RMAP_IS_UNWRITTEN(len) (!!((off) & XFS_RMAP_OFF_UNWRITTEN))
1386
1387#define RMAPBT_STARTBLOCK_BITLEN 32
1388#define RMAPBT_BLOCKCOUNT_BITLEN 32
1389#define RMAPBT_OWNER_BITLEN 64
1390#define RMAPBT_ATTRFLAG_BITLEN 1
1391#define RMAPBT_BMBTFLAG_BITLEN 1
1392#define RMAPBT_EXNTFLAG_BITLEN 1
1393#define RMAPBT_UNUSED_OFFSET_BITLEN 7
1394#define RMAPBT_OFFSET_BITLEN 54
1395
1396#define XFS_RMAP_ATTR_FORK (1 << 0)
1397#define XFS_RMAP_BMBT_BLOCK (1 << 1)
1398#define XFS_RMAP_UNWRITTEN (1 << 2)
1399#define XFS_RMAP_KEY_FLAGS (XFS_RMAP_ATTR_FORK | \
1400 XFS_RMAP_BMBT_BLOCK)
1401#define XFS_RMAP_REC_FLAGS (XFS_RMAP_UNWRITTEN)
1402struct xfs_rmap_irec {
1403 xfs_agblock_t rm_startblock; /* extent start block */
1404 xfs_extlen_t rm_blockcount; /* extent length */
1405 __uint64_t rm_owner; /* extent owner */
1406 __uint64_t rm_offset; /* offset within the owner */
1407 unsigned int rm_flags; /* state flags */
1408};
1409
1410/*
1411 * Key structure
1412 *
1413 * We don't use the length for lookups
1414 */
1415struct xfs_rmap_key {
1416 __be32 rm_startblock; /* extent start block */
1417 __be64 rm_owner; /* extent owner */
1418 __be64 rm_offset; /* offset within the owner */
1419} __attribute__((packed));
1420
1421/* btree pointer type */
1422typedef __be32 xfs_rmap_ptr_t;
1423
1424#define XFS_RMAP_BLOCK(mp) \
1316 (xfs_sb_version_hasfinobt(&((mp)->m_sb)) ? \ 1425 (xfs_sb_version_hasfinobt(&((mp)->m_sb)) ? \
1317 XFS_FIBT_BLOCK(mp) + 1 : \ 1426 XFS_FIBT_BLOCK(mp) + 1 : \
1318 XFS_IBT_BLOCK(mp) + 1) 1427 XFS_IBT_BLOCK(mp) + 1)
1319 1428
1320
1321
1322/* 1429/*
1323 * BMAP Btree format definitions 1430 * BMAP Btree format definitions
1324 * 1431 *
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index f5ec9c5ccae6..79455058b752 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -206,6 +206,7 @@ typedef struct xfs_fsop_resblks {
206#define XFS_FSOP_GEOM_FLAGS_FTYPE 0x10000 /* inode directory types */ 206#define XFS_FSOP_GEOM_FLAGS_FTYPE 0x10000 /* inode directory types */
207#define XFS_FSOP_GEOM_FLAGS_FINOBT 0x20000 /* free inode btree */ 207#define XFS_FSOP_GEOM_FLAGS_FINOBT 0x20000 /* free inode btree */
208#define XFS_FSOP_GEOM_FLAGS_SPINODES 0x40000 /* sparse inode chunks */ 208#define XFS_FSOP_GEOM_FLAGS_SPINODES 0x40000 /* sparse inode chunks */
209#define XFS_FSOP_GEOM_FLAGS_RMAPBT 0x80000 /* Reverse mapping btree */
209 210
210/* 211/*
211 * Minimum and maximum sizes need for growth checks. 212 * Minimum and maximum sizes need for growth checks.
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 4b1e408169a8..51b4e0de1fdc 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -24,6 +24,7 @@
24#include "xfs_bit.h" 24#include "xfs_bit.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_mount.h" 26#include "xfs_mount.h"
27#include "xfs_defer.h"
27#include "xfs_inode.h" 28#include "xfs_inode.h"
28#include "xfs_btree.h" 29#include "xfs_btree.h"
29#include "xfs_ialloc.h" 30#include "xfs_ialloc.h"
@@ -39,6 +40,7 @@
39#include "xfs_icache.h" 40#include "xfs_icache.h"
40#include "xfs_trace.h" 41#include "xfs_trace.h"
41#include "xfs_log.h" 42#include "xfs_log.h"
43#include "xfs_rmap.h"
42 44
43 45
44/* 46/*
@@ -614,6 +616,7 @@ xfs_ialloc_ag_alloc(
614 args.tp = tp; 616 args.tp = tp;
615 args.mp = tp->t_mountp; 617 args.mp = tp->t_mountp;
616 args.fsbno = NULLFSBLOCK; 618 args.fsbno = NULLFSBLOCK;
619 xfs_rmap_ag_owner(&args.oinfo, XFS_RMAP_OWN_INODES);
617 620
618#ifdef DEBUG 621#ifdef DEBUG
619 /* randomly do sparse inode allocations */ 622 /* randomly do sparse inode allocations */
@@ -1817,19 +1820,21 @@ xfs_difree_inode_chunk(
1817 struct xfs_mount *mp, 1820 struct xfs_mount *mp,
1818 xfs_agnumber_t agno, 1821 xfs_agnumber_t agno,
1819 struct xfs_inobt_rec_incore *rec, 1822 struct xfs_inobt_rec_incore *rec,
1820 struct xfs_bmap_free *flist) 1823 struct xfs_defer_ops *dfops)
1821{ 1824{
1822 xfs_agblock_t sagbno = XFS_AGINO_TO_AGBNO(mp, rec->ir_startino); 1825 xfs_agblock_t sagbno = XFS_AGINO_TO_AGBNO(mp, rec->ir_startino);
1823 int startidx, endidx; 1826 int startidx, endidx;
1824 int nextbit; 1827 int nextbit;
1825 xfs_agblock_t agbno; 1828 xfs_agblock_t agbno;
1826 int contigblk; 1829 int contigblk;
1830 struct xfs_owner_info oinfo;
1827 DECLARE_BITMAP(holemask, XFS_INOBT_HOLEMASK_BITS); 1831 DECLARE_BITMAP(holemask, XFS_INOBT_HOLEMASK_BITS);
1832 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES);
1828 1833
1829 if (!xfs_inobt_issparse(rec->ir_holemask)) { 1834 if (!xfs_inobt_issparse(rec->ir_holemask)) {
1830 /* not sparse, calculate extent info directly */ 1835 /* not sparse, calculate extent info directly */
1831 xfs_bmap_add_free(mp, flist, XFS_AGB_TO_FSB(mp, agno, sagbno), 1836 xfs_bmap_add_free(mp, dfops, XFS_AGB_TO_FSB(mp, agno, sagbno),
1832 mp->m_ialloc_blks); 1837 mp->m_ialloc_blks, &oinfo);
1833 return; 1838 return;
1834 } 1839 }
1835 1840
@@ -1872,8 +1877,8 @@ xfs_difree_inode_chunk(
1872 1877
1873 ASSERT(agbno % mp->m_sb.sb_spino_align == 0); 1878 ASSERT(agbno % mp->m_sb.sb_spino_align == 0);
1874 ASSERT(contigblk % mp->m_sb.sb_spino_align == 0); 1879 ASSERT(contigblk % mp->m_sb.sb_spino_align == 0);
1875 xfs_bmap_add_free(mp, flist, XFS_AGB_TO_FSB(mp, agno, agbno), 1880 xfs_bmap_add_free(mp, dfops, XFS_AGB_TO_FSB(mp, agno, agbno),
1876 contigblk); 1881 contigblk, &oinfo);
1877 1882
1878 /* reset range to current bit and carry on... */ 1883 /* reset range to current bit and carry on... */
1879 startidx = endidx = nextbit; 1884 startidx = endidx = nextbit;
@@ -1889,7 +1894,7 @@ xfs_difree_inobt(
1889 struct xfs_trans *tp, 1894 struct xfs_trans *tp,
1890 struct xfs_buf *agbp, 1895 struct xfs_buf *agbp,
1891 xfs_agino_t agino, 1896 xfs_agino_t agino,
1892 struct xfs_bmap_free *flist, 1897 struct xfs_defer_ops *dfops,
1893 struct xfs_icluster *xic, 1898 struct xfs_icluster *xic,
1894 struct xfs_inobt_rec_incore *orec) 1899 struct xfs_inobt_rec_incore *orec)
1895{ 1900{
@@ -1976,7 +1981,7 @@ xfs_difree_inobt(
1976 goto error0; 1981 goto error0;
1977 } 1982 }
1978 1983
1979 xfs_difree_inode_chunk(mp, agno, &rec, flist); 1984 xfs_difree_inode_chunk(mp, agno, &rec, dfops);
1980 } else { 1985 } else {
1981 xic->deleted = 0; 1986 xic->deleted = 0;
1982 1987
@@ -2121,7 +2126,7 @@ int
2121xfs_difree( 2126xfs_difree(
2122 struct xfs_trans *tp, /* transaction pointer */ 2127 struct xfs_trans *tp, /* transaction pointer */
2123 xfs_ino_t inode, /* inode to be freed */ 2128 xfs_ino_t inode, /* inode to be freed */
2124 struct xfs_bmap_free *flist, /* extents to free */ 2129 struct xfs_defer_ops *dfops, /* extents to free */
2125 struct xfs_icluster *xic) /* cluster info if deleted */ 2130 struct xfs_icluster *xic) /* cluster info if deleted */
2126{ 2131{
2127 /* REFERENCED */ 2132 /* REFERENCED */
@@ -2173,7 +2178,7 @@ xfs_difree(
2173 /* 2178 /*
2174 * Fix up the inode allocation btree. 2179 * Fix up the inode allocation btree.
2175 */ 2180 */
2176 error = xfs_difree_inobt(mp, tp, agbp, agino, flist, xic, &rec); 2181 error = xfs_difree_inobt(mp, tp, agbp, agino, dfops, xic, &rec);
2177 if (error) 2182 if (error)
2178 goto error0; 2183 goto error0;
2179 2184
diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h
index 6e450df2979b..0bb89669fc07 100644
--- a/fs/xfs/libxfs/xfs_ialloc.h
+++ b/fs/xfs/libxfs/xfs_ialloc.h
@@ -95,7 +95,7 @@ int /* error */
95xfs_difree( 95xfs_difree(
96 struct xfs_trans *tp, /* transaction pointer */ 96 struct xfs_trans *tp, /* transaction pointer */
97 xfs_ino_t inode, /* inode to be freed */ 97 xfs_ino_t inode, /* inode to be freed */
98 struct xfs_bmap_free *flist, /* extents to free */ 98 struct xfs_defer_ops *dfops, /* extents to free */
99 struct xfs_icluster *ifree); /* cluster info if deleted */ 99 struct xfs_icluster *ifree); /* cluster info if deleted */
100 100
101/* 101/*
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index 89c21d771e35..31ca2208c03d 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -32,6 +32,7 @@
32#include "xfs_trace.h" 32#include "xfs_trace.h"
33#include "xfs_cksum.h" 33#include "xfs_cksum.h"
34#include "xfs_trans.h" 34#include "xfs_trans.h"
35#include "xfs_rmap.h"
35 36
36 37
37STATIC int 38STATIC int
@@ -96,6 +97,7 @@ xfs_inobt_alloc_block(
96 memset(&args, 0, sizeof(args)); 97 memset(&args, 0, sizeof(args));
97 args.tp = cur->bc_tp; 98 args.tp = cur->bc_tp;
98 args.mp = cur->bc_mp; 99 args.mp = cur->bc_mp;
100 xfs_rmap_ag_owner(&args.oinfo, XFS_RMAP_OWN_INOBT);
99 args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, sbno); 101 args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, sbno);
100 args.minlen = 1; 102 args.minlen = 1;
101 args.maxlen = 1; 103 args.maxlen = 1;
@@ -125,8 +127,12 @@ xfs_inobt_free_block(
125 struct xfs_btree_cur *cur, 127 struct xfs_btree_cur *cur,
126 struct xfs_buf *bp) 128 struct xfs_buf *bp)
127{ 129{
130 struct xfs_owner_info oinfo;
131
132 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT);
128 return xfs_free_extent(cur->bc_tp, 133 return xfs_free_extent(cur->bc_tp,
129 XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)), 1); 134 XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)), 1,
135 &oinfo);
130} 136}
131 137
132STATIC int 138STATIC int
@@ -146,14 +152,6 @@ xfs_inobt_init_key_from_rec(
146} 152}
147 153
148STATIC void 154STATIC void
149xfs_inobt_init_rec_from_key(
150 union xfs_btree_key *key,
151 union xfs_btree_rec *rec)
152{
153 rec->inobt.ir_startino = key->inobt.ir_startino;
154}
155
156STATIC void
157xfs_inobt_init_rec_from_cur( 155xfs_inobt_init_rec_from_cur(
158 struct xfs_btree_cur *cur, 156 struct xfs_btree_cur *cur,
159 union xfs_btree_rec *rec) 157 union xfs_btree_rec *rec)
@@ -314,7 +312,6 @@ static const struct xfs_btree_ops xfs_inobt_ops = {
314 .get_minrecs = xfs_inobt_get_minrecs, 312 .get_minrecs = xfs_inobt_get_minrecs,
315 .get_maxrecs = xfs_inobt_get_maxrecs, 313 .get_maxrecs = xfs_inobt_get_maxrecs,
316 .init_key_from_rec = xfs_inobt_init_key_from_rec, 314 .init_key_from_rec = xfs_inobt_init_key_from_rec,
317 .init_rec_from_key = xfs_inobt_init_rec_from_key,
318 .init_rec_from_cur = xfs_inobt_init_rec_from_cur, 315 .init_rec_from_cur = xfs_inobt_init_rec_from_cur,
319 .init_ptr_from_cur = xfs_inobt_init_ptr_from_cur, 316 .init_ptr_from_cur = xfs_inobt_init_ptr_from_cur,
320 .key_diff = xfs_inobt_key_diff, 317 .key_diff = xfs_inobt_key_diff,
@@ -336,7 +333,6 @@ static const struct xfs_btree_ops xfs_finobt_ops = {
336 .get_minrecs = xfs_inobt_get_minrecs, 333 .get_minrecs = xfs_inobt_get_minrecs,
337 .get_maxrecs = xfs_inobt_get_maxrecs, 334 .get_maxrecs = xfs_inobt_get_maxrecs,
338 .init_key_from_rec = xfs_inobt_init_key_from_rec, 335 .init_key_from_rec = xfs_inobt_init_key_from_rec,
339 .init_rec_from_key = xfs_inobt_init_rec_from_key,
340 .init_rec_from_cur = xfs_inobt_init_rec_from_cur, 336 .init_rec_from_cur = xfs_inobt_init_rec_from_cur,
341 .init_ptr_from_cur = xfs_finobt_init_ptr_from_cur, 337 .init_ptr_from_cur = xfs_finobt_init_ptr_from_cur,
342 .key_diff = xfs_inobt_key_diff, 338 .key_diff = xfs_inobt_key_diff,
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 9d9559eb2835..4b9769e23c83 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -22,6 +22,7 @@
22#include "xfs_log_format.h" 22#include "xfs_log_format.h"
23#include "xfs_trans_resv.h" 23#include "xfs_trans_resv.h"
24#include "xfs_mount.h" 24#include "xfs_mount.h"
25#include "xfs_defer.h"
25#include "xfs_inode.h" 26#include "xfs_inode.h"
26#include "xfs_error.h" 27#include "xfs_error.h"
27#include "xfs_cksum.h" 28#include "xfs_cksum.h"
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index e8f49c029ff0..a6eed43fa7cd 100644
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -110,7 +110,9 @@ static inline uint xlog_get_cycle(char *ptr)
110#define XLOG_REG_TYPE_COMMIT 18 110#define XLOG_REG_TYPE_COMMIT 18
111#define XLOG_REG_TYPE_TRANSHDR 19 111#define XLOG_REG_TYPE_TRANSHDR 19
112#define XLOG_REG_TYPE_ICREATE 20 112#define XLOG_REG_TYPE_ICREATE 20
113#define XLOG_REG_TYPE_MAX 20 113#define XLOG_REG_TYPE_RUI_FORMAT 21
114#define XLOG_REG_TYPE_RUD_FORMAT 22
115#define XLOG_REG_TYPE_MAX 22
114 116
115/* 117/*
116 * Flags to log operation header 118 * Flags to log operation header
@@ -227,6 +229,8 @@ typedef struct xfs_trans_header {
227#define XFS_LI_DQUOT 0x123d 229#define XFS_LI_DQUOT 0x123d
228#define XFS_LI_QUOTAOFF 0x123e 230#define XFS_LI_QUOTAOFF 0x123e
229#define XFS_LI_ICREATE 0x123f 231#define XFS_LI_ICREATE 0x123f
232#define XFS_LI_RUI 0x1240 /* rmap update intent */
233#define XFS_LI_RUD 0x1241
230 234
231#define XFS_LI_TYPE_DESC \ 235#define XFS_LI_TYPE_DESC \
232 { XFS_LI_EFI, "XFS_LI_EFI" }, \ 236 { XFS_LI_EFI, "XFS_LI_EFI" }, \
@@ -236,7 +240,9 @@ typedef struct xfs_trans_header {
236 { XFS_LI_BUF, "XFS_LI_BUF" }, \ 240 { XFS_LI_BUF, "XFS_LI_BUF" }, \
237 { XFS_LI_DQUOT, "XFS_LI_DQUOT" }, \ 241 { XFS_LI_DQUOT, "XFS_LI_DQUOT" }, \
238 { XFS_LI_QUOTAOFF, "XFS_LI_QUOTAOFF" }, \ 242 { XFS_LI_QUOTAOFF, "XFS_LI_QUOTAOFF" }, \
239 { XFS_LI_ICREATE, "XFS_LI_ICREATE" } 243 { XFS_LI_ICREATE, "XFS_LI_ICREATE" }, \
244 { XFS_LI_RUI, "XFS_LI_RUI" }, \
245 { XFS_LI_RUD, "XFS_LI_RUD" }
240 246
241/* 247/*
242 * Inode Log Item Format definitions. 248 * Inode Log Item Format definitions.
@@ -604,6 +610,59 @@ typedef struct xfs_efd_log_format_64 {
604} xfs_efd_log_format_64_t; 610} xfs_efd_log_format_64_t;
605 611
606/* 612/*
613 * RUI/RUD (reverse mapping) log format definitions
614 */
615struct xfs_map_extent {
616 __uint64_t me_owner;
617 __uint64_t me_startblock;
618 __uint64_t me_startoff;
619 __uint32_t me_len;
620 __uint32_t me_flags;
621};
622
623/* rmap me_flags: upper bits are flags, lower byte is type code */
624#define XFS_RMAP_EXTENT_MAP 1
625#define XFS_RMAP_EXTENT_UNMAP 3
626#define XFS_RMAP_EXTENT_CONVERT 5
627#define XFS_RMAP_EXTENT_ALLOC 7
628#define XFS_RMAP_EXTENT_FREE 8
629#define XFS_RMAP_EXTENT_TYPE_MASK 0xFF
630
631#define XFS_RMAP_EXTENT_ATTR_FORK (1U << 31)
632#define XFS_RMAP_EXTENT_BMBT_BLOCK (1U << 30)
633#define XFS_RMAP_EXTENT_UNWRITTEN (1U << 29)
634
635#define XFS_RMAP_EXTENT_FLAGS (XFS_RMAP_EXTENT_TYPE_MASK | \
636 XFS_RMAP_EXTENT_ATTR_FORK | \
637 XFS_RMAP_EXTENT_BMBT_BLOCK | \
638 XFS_RMAP_EXTENT_UNWRITTEN)
639
640/*
641 * This is the structure used to lay out an rui log item in the
642 * log. The rui_extents field is a variable size array whose
643 * size is given by rui_nextents.
644 */
645struct xfs_rui_log_format {
646 __uint16_t rui_type; /* rui log item type */
647 __uint16_t rui_size; /* size of this item */
648 __uint32_t rui_nextents; /* # extents to free */
649 __uint64_t rui_id; /* rui identifier */
650 struct xfs_map_extent rui_extents[1]; /* array of extents to rmap */
651};
652
653/*
654 * This is the structure used to lay out an rud log item in the
655 * log. The rud_extents array is a variable size array whose
656 * size is given by rud_nextents;
657 */
658struct xfs_rud_log_format {
659 __uint16_t rud_type; /* rud log item type */
660 __uint16_t rud_size; /* size of this item */
661 __uint32_t __pad;
662 __uint64_t rud_rui_id; /* id of corresponding rui */
663};
664
665/*
607 * Dquot Log format definitions. 666 * Dquot Log format definitions.
608 * 667 *
609 * The first two fields must be the type and size fitting into 668 * The first two fields must be the type and size fitting into
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
new file mode 100644
index 000000000000..73d05407d663
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -0,0 +1,1399 @@
1/*
2 * Copyright (c) 2014 Red Hat, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include "xfs.h"
19#include "xfs_fs.h"
20#include "xfs_shared.h"
21#include "xfs_format.h"
22#include "xfs_log_format.h"
23#include "xfs_trans_resv.h"
24#include "xfs_bit.h"
25#include "xfs_sb.h"
26#include "xfs_mount.h"
27#include "xfs_defer.h"
28#include "xfs_da_format.h"
29#include "xfs_da_btree.h"
30#include "xfs_btree.h"
31#include "xfs_trans.h"
32#include "xfs_alloc.h"
33#include "xfs_rmap.h"
34#include "xfs_rmap_btree.h"
35#include "xfs_trans_space.h"
36#include "xfs_trace.h"
37#include "xfs_error.h"
38#include "xfs_extent_busy.h"
39#include "xfs_bmap.h"
40#include "xfs_inode.h"
41
42/*
43 * Lookup the first record less than or equal to [bno, len, owner, offset]
44 * in the btree given by cur.
45 */
46int
47xfs_rmap_lookup_le(
48 struct xfs_btree_cur *cur,
49 xfs_agblock_t bno,
50 xfs_extlen_t len,
51 uint64_t owner,
52 uint64_t offset,
53 unsigned int flags,
54 int *stat)
55{
56 cur->bc_rec.r.rm_startblock = bno;
57 cur->bc_rec.r.rm_blockcount = len;
58 cur->bc_rec.r.rm_owner = owner;
59 cur->bc_rec.r.rm_offset = offset;
60 cur->bc_rec.r.rm_flags = flags;
61 return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
62}
63
64/*
65 * Lookup the record exactly matching [bno, len, owner, offset]
66 * in the btree given by cur.
67 */
68int
69xfs_rmap_lookup_eq(
70 struct xfs_btree_cur *cur,
71 xfs_agblock_t bno,
72 xfs_extlen_t len,
73 uint64_t owner,
74 uint64_t offset,
75 unsigned int flags,
76 int *stat)
77{
78 cur->bc_rec.r.rm_startblock = bno;
79 cur->bc_rec.r.rm_blockcount = len;
80 cur->bc_rec.r.rm_owner = owner;
81 cur->bc_rec.r.rm_offset = offset;
82 cur->bc_rec.r.rm_flags = flags;
83 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
84}
85
86/*
87 * Update the record referred to by cur to the value given
88 * by [bno, len, owner, offset].
89 * This either works (return 0) or gets an EFSCORRUPTED error.
90 */
91STATIC int
92xfs_rmap_update(
93 struct xfs_btree_cur *cur,
94 struct xfs_rmap_irec *irec)
95{
96 union xfs_btree_rec rec;
97 int error;
98
99 trace_xfs_rmap_update(cur->bc_mp, cur->bc_private.a.agno,
100 irec->rm_startblock, irec->rm_blockcount,
101 irec->rm_owner, irec->rm_offset, irec->rm_flags);
102
103 rec.rmap.rm_startblock = cpu_to_be32(irec->rm_startblock);
104 rec.rmap.rm_blockcount = cpu_to_be32(irec->rm_blockcount);
105 rec.rmap.rm_owner = cpu_to_be64(irec->rm_owner);
106 rec.rmap.rm_offset = cpu_to_be64(
107 xfs_rmap_irec_offset_pack(irec));
108 error = xfs_btree_update(cur, &rec);
109 if (error)
110 trace_xfs_rmap_update_error(cur->bc_mp,
111 cur->bc_private.a.agno, error, _RET_IP_);
112 return error;
113}
114
115int
116xfs_rmap_insert(
117 struct xfs_btree_cur *rcur,
118 xfs_agblock_t agbno,
119 xfs_extlen_t len,
120 uint64_t owner,
121 uint64_t offset,
122 unsigned int flags)
123{
124 int i;
125 int error;
126
127 trace_xfs_rmap_insert(rcur->bc_mp, rcur->bc_private.a.agno, agbno,
128 len, owner, offset, flags);
129
130 error = xfs_rmap_lookup_eq(rcur, agbno, len, owner, offset, flags, &i);
131 if (error)
132 goto done;
133 XFS_WANT_CORRUPTED_GOTO(rcur->bc_mp, i == 0, done);
134
135 rcur->bc_rec.r.rm_startblock = agbno;
136 rcur->bc_rec.r.rm_blockcount = len;
137 rcur->bc_rec.r.rm_owner = owner;
138 rcur->bc_rec.r.rm_offset = offset;
139 rcur->bc_rec.r.rm_flags = flags;
140 error = xfs_btree_insert(rcur, &i);
141 if (error)
142 goto done;
143 XFS_WANT_CORRUPTED_GOTO(rcur->bc_mp, i == 1, done);
144done:
145 if (error)
146 trace_xfs_rmap_insert_error(rcur->bc_mp,
147 rcur->bc_private.a.agno, error, _RET_IP_);
148 return error;
149}
150
151static int
152xfs_rmap_btrec_to_irec(
153 union xfs_btree_rec *rec,
154 struct xfs_rmap_irec *irec)
155{
156 irec->rm_flags = 0;
157 irec->rm_startblock = be32_to_cpu(rec->rmap.rm_startblock);
158 irec->rm_blockcount = be32_to_cpu(rec->rmap.rm_blockcount);
159 irec->rm_owner = be64_to_cpu(rec->rmap.rm_owner);
160 return xfs_rmap_irec_offset_unpack(be64_to_cpu(rec->rmap.rm_offset),
161 irec);
162}
163
164/*
165 * Get the data from the pointed-to record.
166 */
167int
168xfs_rmap_get_rec(
169 struct xfs_btree_cur *cur,
170 struct xfs_rmap_irec *irec,
171 int *stat)
172{
173 union xfs_btree_rec *rec;
174 int error;
175
176 error = xfs_btree_get_rec(cur, &rec, stat);
177 if (error || !*stat)
178 return error;
179
180 return xfs_rmap_btrec_to_irec(rec, irec);
181}
182
183/*
184 * Find the extent in the rmap btree and remove it.
185 *
186 * The record we find should always be an exact match for the extent that we're
187 * looking for, since we insert them into the btree without modification.
188 *
189 * Special Case #1: when growing the filesystem, we "free" an extent when
190 * growing the last AG. This extent is new space and so it is not tracked as
191 * used space in the btree. The growfs code will pass in an owner of
192 * XFS_RMAP_OWN_NULL to indicate that it expected that there is no owner of this
193 * extent. We verify that - the extent lookup result in a record that does not
194 * overlap.
195 *
196 * Special Case #2: EFIs do not record the owner of the extent, so when
197 * recovering EFIs from the log we pass in XFS_RMAP_OWN_UNKNOWN to tell the rmap
198 * btree to ignore the owner (i.e. wildcard match) so we don't trigger
199 * corruption checks during log recovery.
200 */
201STATIC int
202xfs_rmap_unmap(
203 struct xfs_btree_cur *cur,
204 xfs_agblock_t bno,
205 xfs_extlen_t len,
206 bool unwritten,
207 struct xfs_owner_info *oinfo)
208{
209 struct xfs_mount *mp = cur->bc_mp;
210 struct xfs_rmap_irec ltrec;
211 uint64_t ltoff;
212 int error = 0;
213 int i;
214 uint64_t owner;
215 uint64_t offset;
216 unsigned int flags;
217 bool ignore_off;
218
219 xfs_owner_info_unpack(oinfo, &owner, &offset, &flags);
220 ignore_off = XFS_RMAP_NON_INODE_OWNER(owner) ||
221 (flags & XFS_RMAP_BMBT_BLOCK);
222 if (unwritten)
223 flags |= XFS_RMAP_UNWRITTEN;
224 trace_xfs_rmap_unmap(mp, cur->bc_private.a.agno, bno, len,
225 unwritten, oinfo);
226
227 /*
228 * We should always have a left record because there's a static record
229 * for the AG headers at rm_startblock == 0 created by mkfs/growfs that
230 * will not ever be removed from the tree.
231 */
232 error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, flags, &i);
233 if (error)
234 goto out_error;
235 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
236
237 error = xfs_rmap_get_rec(cur, &ltrec, &i);
238 if (error)
239 goto out_error;
240 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
241 trace_xfs_rmap_lookup_le_range_result(cur->bc_mp,
242 cur->bc_private.a.agno, ltrec.rm_startblock,
243 ltrec.rm_blockcount, ltrec.rm_owner,
244 ltrec.rm_offset, ltrec.rm_flags);
245 ltoff = ltrec.rm_offset;
246
247 /*
248 * For growfs, the incoming extent must be beyond the left record we
249 * just found as it is new space and won't be used by anyone. This is
250 * just a corruption check as we don't actually do anything with this
251 * extent. Note that we need to use >= instead of > because it might
252 * be the case that the "left" extent goes all the way to EOFS.
253 */
254 if (owner == XFS_RMAP_OWN_NULL) {
255 XFS_WANT_CORRUPTED_GOTO(mp, bno >= ltrec.rm_startblock +
256 ltrec.rm_blockcount, out_error);
257 goto out_done;
258 }
259
260 /* Make sure the unwritten flag matches. */
261 XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) ==
262 (ltrec.rm_flags & XFS_RMAP_UNWRITTEN), out_error);
263
264 /* Make sure the extent we found covers the entire freeing range. */
265 XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_startblock <= bno &&
266 ltrec.rm_startblock + ltrec.rm_blockcount >=
267 bno + len, out_error);
268
269 /* Make sure the owner matches what we expect to find in the tree. */
270 XFS_WANT_CORRUPTED_GOTO(mp, owner == ltrec.rm_owner ||
271 XFS_RMAP_NON_INODE_OWNER(owner), out_error);
272
273 /* Check the offset, if necessary. */
274 if (!XFS_RMAP_NON_INODE_OWNER(owner)) {
275 if (flags & XFS_RMAP_BMBT_BLOCK) {
276 XFS_WANT_CORRUPTED_GOTO(mp,
277 ltrec.rm_flags & XFS_RMAP_BMBT_BLOCK,
278 out_error);
279 } else {
280 XFS_WANT_CORRUPTED_GOTO(mp,
281 ltrec.rm_offset <= offset, out_error);
282 XFS_WANT_CORRUPTED_GOTO(mp,
283 ltoff + ltrec.rm_blockcount >= offset + len,
284 out_error);
285 }
286 }
287
288 if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) {
289 /* exact match, simply remove the record from rmap tree */
290 trace_xfs_rmap_delete(mp, cur->bc_private.a.agno,
291 ltrec.rm_startblock, ltrec.rm_blockcount,
292 ltrec.rm_owner, ltrec.rm_offset,
293 ltrec.rm_flags);
294 error = xfs_btree_delete(cur, &i);
295 if (error)
296 goto out_error;
297 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
298 } else if (ltrec.rm_startblock == bno) {
299 /*
300 * overlap left hand side of extent: move the start, trim the
301 * length and update the current record.
302 *
303 * ltbno ltlen
304 * Orig: |oooooooooooooooooooo|
305 * Freeing: |fffffffff|
306 * Result: |rrrrrrrrrr|
307 * bno len
308 */
309 ltrec.rm_startblock += len;
310 ltrec.rm_blockcount -= len;
311 if (!ignore_off)
312 ltrec.rm_offset += len;
313 error = xfs_rmap_update(cur, &ltrec);
314 if (error)
315 goto out_error;
316 } else if (ltrec.rm_startblock + ltrec.rm_blockcount == bno + len) {
317 /*
318 * overlap right hand side of extent: trim the length and update
319 * the current record.
320 *
321 * ltbno ltlen
322 * Orig: |oooooooooooooooooooo|
323 * Freeing: |fffffffff|
324 * Result: |rrrrrrrrrr|
325 * bno len
326 */
327 ltrec.rm_blockcount -= len;
328 error = xfs_rmap_update(cur, &ltrec);
329 if (error)
330 goto out_error;
331 } else {
332
333 /*
334 * overlap middle of extent: trim the length of the existing
335 * record to the length of the new left-extent size, increment
336 * the insertion position so we can insert a new record
337 * containing the remaining right-extent space.
338 *
339 * ltbno ltlen
340 * Orig: |oooooooooooooooooooo|
341 * Freeing: |fffffffff|
342 * Result: |rrrrr| |rrrr|
343 * bno len
344 */
345 xfs_extlen_t orig_len = ltrec.rm_blockcount;
346
347 ltrec.rm_blockcount = bno - ltrec.rm_startblock;
348 error = xfs_rmap_update(cur, &ltrec);
349 if (error)
350 goto out_error;
351
352 error = xfs_btree_increment(cur, 0, &i);
353 if (error)
354 goto out_error;
355
356 cur->bc_rec.r.rm_startblock = bno + len;
357 cur->bc_rec.r.rm_blockcount = orig_len - len -
358 ltrec.rm_blockcount;
359 cur->bc_rec.r.rm_owner = ltrec.rm_owner;
360 if (ignore_off)
361 cur->bc_rec.r.rm_offset = 0;
362 else
363 cur->bc_rec.r.rm_offset = offset + len;
364 cur->bc_rec.r.rm_flags = flags;
365 trace_xfs_rmap_insert(mp, cur->bc_private.a.agno,
366 cur->bc_rec.r.rm_startblock,
367 cur->bc_rec.r.rm_blockcount,
368 cur->bc_rec.r.rm_owner,
369 cur->bc_rec.r.rm_offset,
370 cur->bc_rec.r.rm_flags);
371 error = xfs_btree_insert(cur, &i);
372 if (error)
373 goto out_error;
374 }
375
376out_done:
377 trace_xfs_rmap_unmap_done(mp, cur->bc_private.a.agno, bno, len,
378 unwritten, oinfo);
379out_error:
380 if (error)
381 trace_xfs_rmap_unmap_error(mp, cur->bc_private.a.agno,
382 error, _RET_IP_);
383 return error;
384}
385
386/*
387 * Remove a reference to an extent in the rmap btree.
388 */
389int
390xfs_rmap_free(
391 struct xfs_trans *tp,
392 struct xfs_buf *agbp,
393 xfs_agnumber_t agno,
394 xfs_agblock_t bno,
395 xfs_extlen_t len,
396 struct xfs_owner_info *oinfo)
397{
398 struct xfs_mount *mp = tp->t_mountp;
399 struct xfs_btree_cur *cur;
400 int error;
401
402 if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
403 return 0;
404
405 cur = xfs_rmapbt_init_cursor(mp, tp, agbp, agno);
406
407 error = xfs_rmap_unmap(cur, bno, len, false, oinfo);
408 if (error)
409 goto out_error;
410
411 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
412 return 0;
413
414out_error:
415 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
416 return error;
417}
418
419/*
420 * A mergeable rmap must have the same owner and the same values for
421 * the unwritten, attr_fork, and bmbt flags. The startblock and
422 * offset are checked separately.
423 */
424static bool
425xfs_rmap_is_mergeable(
426 struct xfs_rmap_irec *irec,
427 uint64_t owner,
428 unsigned int flags)
429{
430 if (irec->rm_owner == XFS_RMAP_OWN_NULL)
431 return false;
432 if (irec->rm_owner != owner)
433 return false;
434 if ((flags & XFS_RMAP_UNWRITTEN) ^
435 (irec->rm_flags & XFS_RMAP_UNWRITTEN))
436 return false;
437 if ((flags & XFS_RMAP_ATTR_FORK) ^
438 (irec->rm_flags & XFS_RMAP_ATTR_FORK))
439 return false;
440 if ((flags & XFS_RMAP_BMBT_BLOCK) ^
441 (irec->rm_flags & XFS_RMAP_BMBT_BLOCK))
442 return false;
443 return true;
444}
445
446/*
447 * When we allocate a new block, the first thing we do is add a reference to
448 * the extent in the rmap btree. This takes the form of a [agbno, length,
449 * owner, offset] record. Flags are encoded in the high bits of the offset
450 * field.
451 */
452STATIC int
453xfs_rmap_map(
454 struct xfs_btree_cur *cur,
455 xfs_agblock_t bno,
456 xfs_extlen_t len,
457 bool unwritten,
458 struct xfs_owner_info *oinfo)
459{
460 struct xfs_mount *mp = cur->bc_mp;
461 struct xfs_rmap_irec ltrec;
462 struct xfs_rmap_irec gtrec;
463 int have_gt;
464 int have_lt;
465 int error = 0;
466 int i;
467 uint64_t owner;
468 uint64_t offset;
469 unsigned int flags = 0;
470 bool ignore_off;
471
472 xfs_owner_info_unpack(oinfo, &owner, &offset, &flags);
473 ASSERT(owner != 0);
474 ignore_off = XFS_RMAP_NON_INODE_OWNER(owner) ||
475 (flags & XFS_RMAP_BMBT_BLOCK);
476 if (unwritten)
477 flags |= XFS_RMAP_UNWRITTEN;
478 trace_xfs_rmap_map(mp, cur->bc_private.a.agno, bno, len,
479 unwritten, oinfo);
480
481 /*
482 * For the initial lookup, look for an exact match or the left-adjacent
483 * record for our insertion point. This will also give us the record for
484 * start block contiguity tests.
485 */
486 error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, flags,
487 &have_lt);
488 if (error)
489 goto out_error;
490 XFS_WANT_CORRUPTED_GOTO(mp, have_lt == 1, out_error);
491
492 error = xfs_rmap_get_rec(cur, &ltrec, &have_lt);
493 if (error)
494 goto out_error;
495 XFS_WANT_CORRUPTED_GOTO(mp, have_lt == 1, out_error);
496 trace_xfs_rmap_lookup_le_range_result(cur->bc_mp,
497 cur->bc_private.a.agno, ltrec.rm_startblock,
498 ltrec.rm_blockcount, ltrec.rm_owner,
499 ltrec.rm_offset, ltrec.rm_flags);
500
501 if (!xfs_rmap_is_mergeable(&ltrec, owner, flags))
502 have_lt = 0;
503
504 XFS_WANT_CORRUPTED_GOTO(mp,
505 have_lt == 0 ||
506 ltrec.rm_startblock + ltrec.rm_blockcount <= bno, out_error);
507
508 /*
509 * Increment the cursor to see if we have a right-adjacent record to our
510 * insertion point. This will give us the record for end block
511 * contiguity tests.
512 */
513 error = xfs_btree_increment(cur, 0, &have_gt);
514 if (error)
515 goto out_error;
516 if (have_gt) {
517 error = xfs_rmap_get_rec(cur, &gtrec, &have_gt);
518 if (error)
519 goto out_error;
520 XFS_WANT_CORRUPTED_GOTO(mp, have_gt == 1, out_error);
521 XFS_WANT_CORRUPTED_GOTO(mp, bno + len <= gtrec.rm_startblock,
522 out_error);
523 trace_xfs_rmap_find_right_neighbor_result(cur->bc_mp,
524 cur->bc_private.a.agno, gtrec.rm_startblock,
525 gtrec.rm_blockcount, gtrec.rm_owner,
526 gtrec.rm_offset, gtrec.rm_flags);
527 if (!xfs_rmap_is_mergeable(&gtrec, owner, flags))
528 have_gt = 0;
529 }
530
531 /*
532 * Note: cursor currently points one record to the right of ltrec, even
533 * if there is no record in the tree to the right.
534 */
535 if (have_lt &&
536 ltrec.rm_startblock + ltrec.rm_blockcount == bno &&
537 (ignore_off || ltrec.rm_offset + ltrec.rm_blockcount == offset)) {
538 /*
539 * left edge contiguous, merge into left record.
540 *
541 * ltbno ltlen
542 * orig: |ooooooooo|
543 * adding: |aaaaaaaaa|
544 * result: |rrrrrrrrrrrrrrrrrrr|
545 * bno len
546 */
547 ltrec.rm_blockcount += len;
548 if (have_gt &&
549 bno + len == gtrec.rm_startblock &&
550 (ignore_off || offset + len == gtrec.rm_offset) &&
551 (unsigned long)ltrec.rm_blockcount + len +
552 gtrec.rm_blockcount <= XFS_RMAP_LEN_MAX) {
553 /*
554 * right edge also contiguous, delete right record
555 * and merge into left record.
556 *
557 * ltbno ltlen gtbno gtlen
558 * orig: |ooooooooo| |ooooooooo|
559 * adding: |aaaaaaaaa|
560 * result: |rrrrrrrrrrrrrrrrrrrrrrrrrrrrr|
561 */
562 ltrec.rm_blockcount += gtrec.rm_blockcount;
563 trace_xfs_rmap_delete(mp, cur->bc_private.a.agno,
564 gtrec.rm_startblock,
565 gtrec.rm_blockcount,
566 gtrec.rm_owner,
567 gtrec.rm_offset,
568 gtrec.rm_flags);
569 error = xfs_btree_delete(cur, &i);
570 if (error)
571 goto out_error;
572 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
573 }
574
575 /* point the cursor back to the left record and update */
576 error = xfs_btree_decrement(cur, 0, &have_gt);
577 if (error)
578 goto out_error;
579 error = xfs_rmap_update(cur, &ltrec);
580 if (error)
581 goto out_error;
582 } else if (have_gt &&
583 bno + len == gtrec.rm_startblock &&
584 (ignore_off || offset + len == gtrec.rm_offset)) {
585 /*
586 * right edge contiguous, merge into right record.
587 *
588 * gtbno gtlen
589 * Orig: |ooooooooo|
590 * adding: |aaaaaaaaa|
591 * Result: |rrrrrrrrrrrrrrrrrrr|
592 * bno len
593 */
594 gtrec.rm_startblock = bno;
595 gtrec.rm_blockcount += len;
596 if (!ignore_off)
597 gtrec.rm_offset = offset;
598 error = xfs_rmap_update(cur, &gtrec);
599 if (error)
600 goto out_error;
601 } else {
602 /*
603 * no contiguous edge with identical owner, insert
604 * new record at current cursor position.
605 */
606 cur->bc_rec.r.rm_startblock = bno;
607 cur->bc_rec.r.rm_blockcount = len;
608 cur->bc_rec.r.rm_owner = owner;
609 cur->bc_rec.r.rm_offset = offset;
610 cur->bc_rec.r.rm_flags = flags;
611 trace_xfs_rmap_insert(mp, cur->bc_private.a.agno, bno, len,
612 owner, offset, flags);
613 error = xfs_btree_insert(cur, &i);
614 if (error)
615 goto out_error;
616 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
617 }
618
619 trace_xfs_rmap_map_done(mp, cur->bc_private.a.agno, bno, len,
620 unwritten, oinfo);
621out_error:
622 if (error)
623 trace_xfs_rmap_map_error(mp, cur->bc_private.a.agno,
624 error, _RET_IP_);
625 return error;
626}
627
628/*
629 * Add a reference to an extent in the rmap btree.
630 */
631int
632xfs_rmap_alloc(
633 struct xfs_trans *tp,
634 struct xfs_buf *agbp,
635 xfs_agnumber_t agno,
636 xfs_agblock_t bno,
637 xfs_extlen_t len,
638 struct xfs_owner_info *oinfo)
639{
640 struct xfs_mount *mp = tp->t_mountp;
641 struct xfs_btree_cur *cur;
642 int error;
643
644 if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
645 return 0;
646
647 cur = xfs_rmapbt_init_cursor(mp, tp, agbp, agno);
648 error = xfs_rmap_map(cur, bno, len, false, oinfo);
649 if (error)
650 goto out_error;
651
652 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
653 return 0;
654
655out_error:
656 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
657 return error;
658}
659
660#define RMAP_LEFT_CONTIG (1 << 0)
661#define RMAP_RIGHT_CONTIG (1 << 1)
662#define RMAP_LEFT_FILLING (1 << 2)
663#define RMAP_RIGHT_FILLING (1 << 3)
664#define RMAP_LEFT_VALID (1 << 6)
665#define RMAP_RIGHT_VALID (1 << 7)
666
667#define LEFT r[0]
668#define RIGHT r[1]
669#define PREV r[2]
670#define NEW r[3]
671
672/*
673 * Convert an unwritten extent to a real extent or vice versa.
674 * Does not handle overlapping extents.
675 */
676STATIC int
677xfs_rmap_convert(
678 struct xfs_btree_cur *cur,
679 xfs_agblock_t bno,
680 xfs_extlen_t len,
681 bool unwritten,
682 struct xfs_owner_info *oinfo)
683{
684 struct xfs_mount *mp = cur->bc_mp;
685 struct xfs_rmap_irec r[4]; /* neighbor extent entries */
686 /* left is 0, right is 1, prev is 2 */
687 /* new is 3 */
688 uint64_t owner;
689 uint64_t offset;
690 uint64_t new_endoff;
691 unsigned int oldext;
692 unsigned int newext;
693 unsigned int flags = 0;
694 int i;
695 int state = 0;
696 int error;
697
698 xfs_owner_info_unpack(oinfo, &owner, &offset, &flags);
699 ASSERT(!(XFS_RMAP_NON_INODE_OWNER(owner) ||
700 (flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK))));
701 oldext = unwritten ? XFS_RMAP_UNWRITTEN : 0;
702 new_endoff = offset + len;
703 trace_xfs_rmap_convert(mp, cur->bc_private.a.agno, bno, len,
704 unwritten, oinfo);
705
706 /*
707 * For the initial lookup, look for an exact match or the left-adjacent
708 * record for our insertion point. This will also give us the record for
709 * start block contiguity tests.
710 */
711 error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, oldext, &i);
712 if (error)
713 goto done;
714 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
715
716 error = xfs_rmap_get_rec(cur, &PREV, &i);
717 if (error)
718 goto done;
719 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
720 trace_xfs_rmap_lookup_le_range_result(cur->bc_mp,
721 cur->bc_private.a.agno, PREV.rm_startblock,
722 PREV.rm_blockcount, PREV.rm_owner,
723 PREV.rm_offset, PREV.rm_flags);
724
725 ASSERT(PREV.rm_offset <= offset);
726 ASSERT(PREV.rm_offset + PREV.rm_blockcount >= new_endoff);
727 ASSERT((PREV.rm_flags & XFS_RMAP_UNWRITTEN) == oldext);
728 newext = ~oldext & XFS_RMAP_UNWRITTEN;
729
730 /*
731 * Set flags determining what part of the previous oldext allocation
732 * extent is being replaced by a newext allocation.
733 */
734 if (PREV.rm_offset == offset)
735 state |= RMAP_LEFT_FILLING;
736 if (PREV.rm_offset + PREV.rm_blockcount == new_endoff)
737 state |= RMAP_RIGHT_FILLING;
738
739 /*
740 * Decrement the cursor to see if we have a left-adjacent record to our
741 * insertion point. This will give us the record for end block
742 * contiguity tests.
743 */
744 error = xfs_btree_decrement(cur, 0, &i);
745 if (error)
746 goto done;
747 if (i) {
748 state |= RMAP_LEFT_VALID;
749 error = xfs_rmap_get_rec(cur, &LEFT, &i);
750 if (error)
751 goto done;
752 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
753 XFS_WANT_CORRUPTED_GOTO(mp,
754 LEFT.rm_startblock + LEFT.rm_blockcount <= bno,
755 done);
756 trace_xfs_rmap_find_left_neighbor_result(cur->bc_mp,
757 cur->bc_private.a.agno, LEFT.rm_startblock,
758 LEFT.rm_blockcount, LEFT.rm_owner,
759 LEFT.rm_offset, LEFT.rm_flags);
760 if (LEFT.rm_startblock + LEFT.rm_blockcount == bno &&
761 LEFT.rm_offset + LEFT.rm_blockcount == offset &&
762 xfs_rmap_is_mergeable(&LEFT, owner, newext))
763 state |= RMAP_LEFT_CONTIG;
764 }
765
766 /*
767 * Increment the cursor to see if we have a right-adjacent record to our
768 * insertion point. This will give us the record for end block
769 * contiguity tests.
770 */
771 error = xfs_btree_increment(cur, 0, &i);
772 if (error)
773 goto done;
774 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
775 error = xfs_btree_increment(cur, 0, &i);
776 if (error)
777 goto done;
778 if (i) {
779 state |= RMAP_RIGHT_VALID;
780 error = xfs_rmap_get_rec(cur, &RIGHT, &i);
781 if (error)
782 goto done;
783 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
784 XFS_WANT_CORRUPTED_GOTO(mp, bno + len <= RIGHT.rm_startblock,
785 done);
786 trace_xfs_rmap_find_right_neighbor_result(cur->bc_mp,
787 cur->bc_private.a.agno, RIGHT.rm_startblock,
788 RIGHT.rm_blockcount, RIGHT.rm_owner,
789 RIGHT.rm_offset, RIGHT.rm_flags);
790 if (bno + len == RIGHT.rm_startblock &&
791 offset + len == RIGHT.rm_offset &&
792 xfs_rmap_is_mergeable(&RIGHT, owner, newext))
793 state |= RMAP_RIGHT_CONTIG;
794 }
795
796 /* check that left + prev + right is not too long */
797 if ((state & (RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG |
798 RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG)) ==
799 (RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG |
800 RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG) &&
801 (unsigned long)LEFT.rm_blockcount + len +
802 RIGHT.rm_blockcount > XFS_RMAP_LEN_MAX)
803 state &= ~RMAP_RIGHT_CONTIG;
804
805 trace_xfs_rmap_convert_state(mp, cur->bc_private.a.agno, state,
806 _RET_IP_);
807
808 /* reset the cursor back to PREV */
809 error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, oldext, &i);
810 if (error)
811 goto done;
812 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
813
814 /*
815 * Switch out based on the FILLING and CONTIG state bits.
816 */
817 switch (state & (RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG |
818 RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG)) {
819 case RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG |
820 RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG:
821 /*
822 * Setting all of a previous oldext extent to newext.
823 * The left and right neighbors are both contiguous with new.
824 */
825 error = xfs_btree_increment(cur, 0, &i);
826 if (error)
827 goto done;
828 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
829 trace_xfs_rmap_delete(mp, cur->bc_private.a.agno,
830 RIGHT.rm_startblock, RIGHT.rm_blockcount,
831 RIGHT.rm_owner, RIGHT.rm_offset,
832 RIGHT.rm_flags);
833 error = xfs_btree_delete(cur, &i);
834 if (error)
835 goto done;
836 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
837 error = xfs_btree_decrement(cur, 0, &i);
838 if (error)
839 goto done;
840 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
841 trace_xfs_rmap_delete(mp, cur->bc_private.a.agno,
842 PREV.rm_startblock, PREV.rm_blockcount,
843 PREV.rm_owner, PREV.rm_offset,
844 PREV.rm_flags);
845 error = xfs_btree_delete(cur, &i);
846 if (error)
847 goto done;
848 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
849 error = xfs_btree_decrement(cur, 0, &i);
850 if (error)
851 goto done;
852 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
853 NEW = LEFT;
854 NEW.rm_blockcount += PREV.rm_blockcount + RIGHT.rm_blockcount;
855 error = xfs_rmap_update(cur, &NEW);
856 if (error)
857 goto done;
858 break;
859
860 case RMAP_LEFT_FILLING | RMAP_RIGHT_FILLING | RMAP_LEFT_CONTIG:
861 /*
862 * Setting all of a previous oldext extent to newext.
863 * The left neighbor is contiguous, the right is not.
864 */
865 trace_xfs_rmap_delete(mp, cur->bc_private.a.agno,
866 PREV.rm_startblock, PREV.rm_blockcount,
867 PREV.rm_owner, PREV.rm_offset,
868 PREV.rm_flags);
869 error = xfs_btree_delete(cur, &i);
870 if (error)
871 goto done;
872 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
873 error = xfs_btree_decrement(cur, 0, &i);
874 if (error)
875 goto done;
876 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
877 NEW = LEFT;
878 NEW.rm_blockcount += PREV.rm_blockcount;
879 error = xfs_rmap_update(cur, &NEW);
880 if (error)
881 goto done;
882 break;
883
884 case RMAP_LEFT_FILLING | RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG:
885 /*
886 * Setting all of a previous oldext extent to newext.
887 * The right neighbor is contiguous, the left is not.
888 */
889 error = xfs_btree_increment(cur, 0, &i);
890 if (error)
891 goto done;
892 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
893 trace_xfs_rmap_delete(mp, cur->bc_private.a.agno,
894 RIGHT.rm_startblock, RIGHT.rm_blockcount,
895 RIGHT.rm_owner, RIGHT.rm_offset,
896 RIGHT.rm_flags);
897 error = xfs_btree_delete(cur, &i);
898 if (error)
899 goto done;
900 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
901 error = xfs_btree_decrement(cur, 0, &i);
902 if (error)
903 goto done;
904 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
905 NEW = PREV;
906 NEW.rm_blockcount = len + RIGHT.rm_blockcount;
907 NEW.rm_flags = newext;
908 error = xfs_rmap_update(cur, &NEW);
909 if (error)
910 goto done;
911 break;
912
913 case RMAP_LEFT_FILLING | RMAP_RIGHT_FILLING:
914 /*
915 * Setting all of a previous oldext extent to newext.
916 * Neither the left nor right neighbors are contiguous with
917 * the new one.
918 */
919 NEW = PREV;
920 NEW.rm_flags = newext;
921 error = xfs_rmap_update(cur, &NEW);
922 if (error)
923 goto done;
924 break;
925
926 case RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG:
927 /*
928 * Setting the first part of a previous oldext extent to newext.
929 * The left neighbor is contiguous.
930 */
931 NEW = PREV;
932 NEW.rm_offset += len;
933 NEW.rm_startblock += len;
934 NEW.rm_blockcount -= len;
935 error = xfs_rmap_update(cur, &NEW);
936 if (error)
937 goto done;
938 error = xfs_btree_decrement(cur, 0, &i);
939 if (error)
940 goto done;
941 NEW = LEFT;
942 NEW.rm_blockcount += len;
943 error = xfs_rmap_update(cur, &NEW);
944 if (error)
945 goto done;
946 break;
947
948 case RMAP_LEFT_FILLING:
949 /*
950 * Setting the first part of a previous oldext extent to newext.
951 * The left neighbor is not contiguous.
952 */
953 NEW = PREV;
954 NEW.rm_startblock += len;
955 NEW.rm_offset += len;
956 NEW.rm_blockcount -= len;
957 error = xfs_rmap_update(cur, &NEW);
958 if (error)
959 goto done;
960 NEW.rm_startblock = bno;
961 NEW.rm_owner = owner;
962 NEW.rm_offset = offset;
963 NEW.rm_blockcount = len;
964 NEW.rm_flags = newext;
965 cur->bc_rec.r = NEW;
966 trace_xfs_rmap_insert(mp, cur->bc_private.a.agno, bno,
967 len, owner, offset, newext);
968 error = xfs_btree_insert(cur, &i);
969 if (error)
970 goto done;
971 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
972 break;
973
974 case RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG:
975 /*
976 * Setting the last part of a previous oldext extent to newext.
977 * The right neighbor is contiguous with the new allocation.
978 */
979 NEW = PREV;
980 NEW.rm_blockcount -= len;
981 error = xfs_rmap_update(cur, &NEW);
982 if (error)
983 goto done;
984 error = xfs_btree_increment(cur, 0, &i);
985 if (error)
986 goto done;
987 NEW = RIGHT;
988 NEW.rm_offset = offset;
989 NEW.rm_startblock = bno;
990 NEW.rm_blockcount += len;
991 error = xfs_rmap_update(cur, &NEW);
992 if (error)
993 goto done;
994 break;
995
996 case RMAP_RIGHT_FILLING:
997 /*
998 * Setting the last part of a previous oldext extent to newext.
999 * The right neighbor is not contiguous.
1000 */
1001 NEW = PREV;
1002 NEW.rm_blockcount -= len;
1003 error = xfs_rmap_update(cur, &NEW);
1004 if (error)
1005 goto done;
1006 error = xfs_rmap_lookup_eq(cur, bno, len, owner, offset,
1007 oldext, &i);
1008 if (error)
1009 goto done;
1010 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
1011 NEW.rm_startblock = bno;
1012 NEW.rm_owner = owner;
1013 NEW.rm_offset = offset;
1014 NEW.rm_blockcount = len;
1015 NEW.rm_flags = newext;
1016 cur->bc_rec.r = NEW;
1017 trace_xfs_rmap_insert(mp, cur->bc_private.a.agno, bno,
1018 len, owner, offset, newext);
1019 error = xfs_btree_insert(cur, &i);
1020 if (error)
1021 goto done;
1022 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1023 break;
1024
1025 case 0:
1026 /*
1027 * Setting the middle part of a previous oldext extent to
1028 * newext. Contiguity is impossible here.
1029 * One extent becomes three extents.
1030 */
1031 /* new right extent - oldext */
1032 NEW.rm_startblock = bno + len;
1033 NEW.rm_owner = owner;
1034 NEW.rm_offset = new_endoff;
1035 NEW.rm_blockcount = PREV.rm_offset + PREV.rm_blockcount -
1036 new_endoff;
1037 NEW.rm_flags = PREV.rm_flags;
1038 error = xfs_rmap_update(cur, &NEW);
1039 if (error)
1040 goto done;
1041 /* new left extent - oldext */
1042 NEW = PREV;
1043 NEW.rm_blockcount = offset - PREV.rm_offset;
1044 cur->bc_rec.r = NEW;
1045 trace_xfs_rmap_insert(mp, cur->bc_private.a.agno,
1046 NEW.rm_startblock, NEW.rm_blockcount,
1047 NEW.rm_owner, NEW.rm_offset,
1048 NEW.rm_flags);
1049 error = xfs_btree_insert(cur, &i);
1050 if (error)
1051 goto done;
1052 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1053 /*
1054 * Reset the cursor to the position of the new extent
1055 * we are about to insert as we can't trust it after
1056 * the previous insert.
1057 */
1058 error = xfs_rmap_lookup_eq(cur, bno, len, owner, offset,
1059 oldext, &i);
1060 if (error)
1061 goto done;
1062 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
1063 /* new middle extent - newext */
1064 cur->bc_rec.r.rm_flags &= ~XFS_RMAP_UNWRITTEN;
1065 cur->bc_rec.r.rm_flags |= newext;
1066 trace_xfs_rmap_insert(mp, cur->bc_private.a.agno, bno, len,
1067 owner, offset, newext);
1068 error = xfs_btree_insert(cur, &i);
1069 if (error)
1070 goto done;
1071 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1072 break;
1073
1074 case RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG | RMAP_RIGHT_CONTIG:
1075 case RMAP_RIGHT_FILLING | RMAP_LEFT_CONTIG | RMAP_RIGHT_CONTIG:
1076 case RMAP_LEFT_FILLING | RMAP_RIGHT_CONTIG:
1077 case RMAP_RIGHT_FILLING | RMAP_LEFT_CONTIG:
1078 case RMAP_LEFT_CONTIG | RMAP_RIGHT_CONTIG:
1079 case RMAP_LEFT_CONTIG:
1080 case RMAP_RIGHT_CONTIG:
1081 /*
1082 * These cases are all impossible.
1083 */
1084 ASSERT(0);
1085 }
1086
1087 trace_xfs_rmap_convert_done(mp, cur->bc_private.a.agno, bno, len,
1088 unwritten, oinfo);
1089done:
1090 if (error)
1091 trace_xfs_rmap_convert_error(cur->bc_mp,
1092 cur->bc_private.a.agno, error, _RET_IP_);
1093 return error;
1094}
1095
1096#undef NEW
1097#undef LEFT
1098#undef RIGHT
1099#undef PREV
1100
1101struct xfs_rmap_query_range_info {
1102 xfs_rmap_query_range_fn fn;
1103 void *priv;
1104};
1105
1106/* Format btree record and pass to our callback. */
1107STATIC int
1108xfs_rmap_query_range_helper(
1109 struct xfs_btree_cur *cur,
1110 union xfs_btree_rec *rec,
1111 void *priv)
1112{
1113 struct xfs_rmap_query_range_info *query = priv;
1114 struct xfs_rmap_irec irec;
1115 int error;
1116
1117 error = xfs_rmap_btrec_to_irec(rec, &irec);
1118 if (error)
1119 return error;
1120 return query->fn(cur, &irec, query->priv);
1121}
1122
1123/* Find all rmaps between two keys. */
1124int
1125xfs_rmap_query_range(
1126 struct xfs_btree_cur *cur,
1127 struct xfs_rmap_irec *low_rec,
1128 struct xfs_rmap_irec *high_rec,
1129 xfs_rmap_query_range_fn fn,
1130 void *priv)
1131{
1132 union xfs_btree_irec low_brec;
1133 union xfs_btree_irec high_brec;
1134 struct xfs_rmap_query_range_info query;
1135
1136 low_brec.r = *low_rec;
1137 high_brec.r = *high_rec;
1138 query.priv = priv;
1139 query.fn = fn;
1140 return xfs_btree_query_range(cur, &low_brec, &high_brec,
1141 xfs_rmap_query_range_helper, &query);
1142}
1143
1144/* Clean up after calling xfs_rmap_finish_one. */
1145void
1146xfs_rmap_finish_one_cleanup(
1147 struct xfs_trans *tp,
1148 struct xfs_btree_cur *rcur,
1149 int error)
1150{
1151 struct xfs_buf *agbp;
1152
1153 if (rcur == NULL)
1154 return;
1155 agbp = rcur->bc_private.a.agbp;
1156 xfs_btree_del_cursor(rcur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
1157 if (error)
1158 xfs_trans_brelse(tp, agbp);
1159}
1160
1161/*
1162 * Process one of the deferred rmap operations. We pass back the
1163 * btree cursor to maintain our lock on the rmapbt between calls.
1164 * This saves time and eliminates a buffer deadlock between the
1165 * superblock and the AGF because we'll always grab them in the same
1166 * order.
1167 */
1168int
1169xfs_rmap_finish_one(
1170 struct xfs_trans *tp,
1171 enum xfs_rmap_intent_type type,
1172 __uint64_t owner,
1173 int whichfork,
1174 xfs_fileoff_t startoff,
1175 xfs_fsblock_t startblock,
1176 xfs_filblks_t blockcount,
1177 xfs_exntst_t state,
1178 struct xfs_btree_cur **pcur)
1179{
1180 struct xfs_mount *mp = tp->t_mountp;
1181 struct xfs_btree_cur *rcur;
1182 struct xfs_buf *agbp = NULL;
1183 int error = 0;
1184 xfs_agnumber_t agno;
1185 struct xfs_owner_info oinfo;
1186 xfs_agblock_t bno;
1187 bool unwritten;
1188
1189 agno = XFS_FSB_TO_AGNO(mp, startblock);
1190 ASSERT(agno != NULLAGNUMBER);
1191 bno = XFS_FSB_TO_AGBNO(mp, startblock);
1192
1193 trace_xfs_rmap_deferred(mp, agno, type, bno, owner, whichfork,
1194 startoff, blockcount, state);
1195
1196 if (XFS_TEST_ERROR(false, mp,
1197 XFS_ERRTAG_RMAP_FINISH_ONE,
1198 XFS_RANDOM_RMAP_FINISH_ONE))
1199 return -EIO;
1200
1201 /*
1202 * If we haven't gotten a cursor or the cursor AG doesn't match
1203 * the startblock, get one now.
1204 */
1205 rcur = *pcur;
1206 if (rcur != NULL && rcur->bc_private.a.agno != agno) {
1207 xfs_rmap_finish_one_cleanup(tp, rcur, 0);
1208 rcur = NULL;
1209 *pcur = NULL;
1210 }
1211 if (rcur == NULL) {
1212 /*
1213 * Refresh the freelist before we start changing the
1214 * rmapbt, because a shape change could cause us to
1215 * allocate blocks.
1216 */
1217 error = xfs_free_extent_fix_freelist(tp, agno, &agbp);
1218 if (error)
1219 return error;
1220 if (!agbp)
1221 return -EFSCORRUPTED;
1222
1223 rcur = xfs_rmapbt_init_cursor(mp, tp, agbp, agno);
1224 if (!rcur) {
1225 error = -ENOMEM;
1226 goto out_cur;
1227 }
1228 }
1229 *pcur = rcur;
1230
1231 xfs_rmap_ino_owner(&oinfo, owner, whichfork, startoff);
1232 unwritten = state == XFS_EXT_UNWRITTEN;
1233 bno = XFS_FSB_TO_AGBNO(rcur->bc_mp, startblock);
1234
1235 switch (type) {
1236 case XFS_RMAP_ALLOC:
1237 case XFS_RMAP_MAP:
1238 error = xfs_rmap_map(rcur, bno, blockcount, unwritten, &oinfo);
1239 break;
1240 case XFS_RMAP_FREE:
1241 case XFS_RMAP_UNMAP:
1242 error = xfs_rmap_unmap(rcur, bno, blockcount, unwritten,
1243 &oinfo);
1244 break;
1245 case XFS_RMAP_CONVERT:
1246 error = xfs_rmap_convert(rcur, bno, blockcount, !unwritten,
1247 &oinfo);
1248 break;
1249 default:
1250 ASSERT(0);
1251 error = -EFSCORRUPTED;
1252 }
1253 return error;
1254
1255out_cur:
1256 xfs_trans_brelse(tp, agbp);
1257
1258 return error;
1259}
1260
1261/*
1262 * Don't defer an rmap if we aren't an rmap filesystem.
1263 */
1264static bool
1265xfs_rmap_update_is_needed(
1266 struct xfs_mount *mp)
1267{
1268 return xfs_sb_version_hasrmapbt(&mp->m_sb);
1269}
1270
1271/*
1272 * Record a rmap intent; the list is kept sorted first by AG and then by
1273 * increasing age.
1274 */
1275static int
1276__xfs_rmap_add(
1277 struct xfs_mount *mp,
1278 struct xfs_defer_ops *dfops,
1279 enum xfs_rmap_intent_type type,
1280 __uint64_t owner,
1281 int whichfork,
1282 struct xfs_bmbt_irec *bmap)
1283{
1284 struct xfs_rmap_intent *ri;
1285
1286 trace_xfs_rmap_defer(mp, XFS_FSB_TO_AGNO(mp, bmap->br_startblock),
1287 type,
1288 XFS_FSB_TO_AGBNO(mp, bmap->br_startblock),
1289 owner, whichfork,
1290 bmap->br_startoff,
1291 bmap->br_blockcount,
1292 bmap->br_state);
1293
1294 ri = kmem_alloc(sizeof(struct xfs_rmap_intent), KM_SLEEP | KM_NOFS);
1295 INIT_LIST_HEAD(&ri->ri_list);
1296 ri->ri_type = type;
1297 ri->ri_owner = owner;
1298 ri->ri_whichfork = whichfork;
1299 ri->ri_bmap = *bmap;
1300
1301 xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_RMAP, &ri->ri_list);
1302 return 0;
1303}
1304
1305/* Map an extent into a file. */
1306int
1307xfs_rmap_map_extent(
1308 struct xfs_mount *mp,
1309 struct xfs_defer_ops *dfops,
1310 struct xfs_inode *ip,
1311 int whichfork,
1312 struct xfs_bmbt_irec *PREV)
1313{
1314 if (!xfs_rmap_update_is_needed(mp))
1315 return 0;
1316
1317 return __xfs_rmap_add(mp, dfops, XFS_RMAP_MAP, ip->i_ino,
1318 whichfork, PREV);
1319}
1320
1321/* Unmap an extent out of a file. */
1322int
1323xfs_rmap_unmap_extent(
1324 struct xfs_mount *mp,
1325 struct xfs_defer_ops *dfops,
1326 struct xfs_inode *ip,
1327 int whichfork,
1328 struct xfs_bmbt_irec *PREV)
1329{
1330 if (!xfs_rmap_update_is_needed(mp))
1331 return 0;
1332
1333 return __xfs_rmap_add(mp, dfops, XFS_RMAP_UNMAP, ip->i_ino,
1334 whichfork, PREV);
1335}
1336
1337/* Convert a data fork extent from unwritten to real or vice versa. */
1338int
1339xfs_rmap_convert_extent(
1340 struct xfs_mount *mp,
1341 struct xfs_defer_ops *dfops,
1342 struct xfs_inode *ip,
1343 int whichfork,
1344 struct xfs_bmbt_irec *PREV)
1345{
1346 if (!xfs_rmap_update_is_needed(mp))
1347 return 0;
1348
1349 return __xfs_rmap_add(mp, dfops, XFS_RMAP_CONVERT, ip->i_ino,
1350 whichfork, PREV);
1351}
1352
1353/* Schedule the creation of an rmap for non-file data. */
1354int
1355xfs_rmap_alloc_extent(
1356 struct xfs_mount *mp,
1357 struct xfs_defer_ops *dfops,
1358 xfs_agnumber_t agno,
1359 xfs_agblock_t bno,
1360 xfs_extlen_t len,
1361 __uint64_t owner)
1362{
1363 struct xfs_bmbt_irec bmap;
1364
1365 if (!xfs_rmap_update_is_needed(mp))
1366 return 0;
1367
1368 bmap.br_startblock = XFS_AGB_TO_FSB(mp, agno, bno);
1369 bmap.br_blockcount = len;
1370 bmap.br_startoff = 0;
1371 bmap.br_state = XFS_EXT_NORM;
1372
1373 return __xfs_rmap_add(mp, dfops, XFS_RMAP_ALLOC, owner,
1374 XFS_DATA_FORK, &bmap);
1375}
1376
1377/* Schedule the deletion of an rmap for non-file data. */
1378int
1379xfs_rmap_free_extent(
1380 struct xfs_mount *mp,
1381 struct xfs_defer_ops *dfops,
1382 xfs_agnumber_t agno,
1383 xfs_agblock_t bno,
1384 xfs_extlen_t len,
1385 __uint64_t owner)
1386{
1387 struct xfs_bmbt_irec bmap;
1388
1389 if (!xfs_rmap_update_is_needed(mp))
1390 return 0;
1391
1392 bmap.br_startblock = XFS_AGB_TO_FSB(mp, agno, bno);
1393 bmap.br_blockcount = len;
1394 bmap.br_startoff = 0;
1395 bmap.br_state = XFS_EXT_NORM;
1396
1397 return __xfs_rmap_add(mp, dfops, XFS_RMAP_FREE, owner,
1398 XFS_DATA_FORK, &bmap);
1399}
diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h
new file mode 100644
index 000000000000..71cf99a4acba
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_rmap.h
@@ -0,0 +1,209 @@
1/*
2 * Copyright (C) 2016 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#ifndef __XFS_RMAP_H__
21#define __XFS_RMAP_H__
22
23static inline void
24xfs_rmap_ag_owner(
25 struct xfs_owner_info *oi,
26 uint64_t owner)
27{
28 oi->oi_owner = owner;
29 oi->oi_offset = 0;
30 oi->oi_flags = 0;
31}
32
33static inline void
34xfs_rmap_ino_bmbt_owner(
35 struct xfs_owner_info *oi,
36 xfs_ino_t ino,
37 int whichfork)
38{
39 oi->oi_owner = ino;
40 oi->oi_offset = 0;
41 oi->oi_flags = XFS_OWNER_INFO_BMBT_BLOCK;
42 if (whichfork == XFS_ATTR_FORK)
43 oi->oi_flags |= XFS_OWNER_INFO_ATTR_FORK;
44}
45
46static inline void
47xfs_rmap_ino_owner(
48 struct xfs_owner_info *oi,
49 xfs_ino_t ino,
50 int whichfork,
51 xfs_fileoff_t offset)
52{
53 oi->oi_owner = ino;
54 oi->oi_offset = offset;
55 oi->oi_flags = 0;
56 if (whichfork == XFS_ATTR_FORK)
57 oi->oi_flags |= XFS_OWNER_INFO_ATTR_FORK;
58}
59
60static inline void
61xfs_rmap_skip_owner_update(
62 struct xfs_owner_info *oi)
63{
64 oi->oi_owner = XFS_RMAP_OWN_UNKNOWN;
65}
66
67/* Reverse mapping functions. */
68
69struct xfs_buf;
70
71static inline __u64
72xfs_rmap_irec_offset_pack(
73 const struct xfs_rmap_irec *irec)
74{
75 __u64 x;
76
77 x = XFS_RMAP_OFF(irec->rm_offset);
78 if (irec->rm_flags & XFS_RMAP_ATTR_FORK)
79 x |= XFS_RMAP_OFF_ATTR_FORK;
80 if (irec->rm_flags & XFS_RMAP_BMBT_BLOCK)
81 x |= XFS_RMAP_OFF_BMBT_BLOCK;
82 if (irec->rm_flags & XFS_RMAP_UNWRITTEN)
83 x |= XFS_RMAP_OFF_UNWRITTEN;
84 return x;
85}
86
87static inline int
88xfs_rmap_irec_offset_unpack(
89 __u64 offset,
90 struct xfs_rmap_irec *irec)
91{
92 if (offset & ~(XFS_RMAP_OFF_MASK | XFS_RMAP_OFF_FLAGS))
93 return -EFSCORRUPTED;
94 irec->rm_offset = XFS_RMAP_OFF(offset);
95 if (offset & XFS_RMAP_OFF_ATTR_FORK)
96 irec->rm_flags |= XFS_RMAP_ATTR_FORK;
97 if (offset & XFS_RMAP_OFF_BMBT_BLOCK)
98 irec->rm_flags |= XFS_RMAP_BMBT_BLOCK;
99 if (offset & XFS_RMAP_OFF_UNWRITTEN)
100 irec->rm_flags |= XFS_RMAP_UNWRITTEN;
101 return 0;
102}
103
104static inline void
105xfs_owner_info_unpack(
106 struct xfs_owner_info *oinfo,
107 uint64_t *owner,
108 uint64_t *offset,
109 unsigned int *flags)
110{
111 unsigned int r = 0;
112
113 *owner = oinfo->oi_owner;
114 *offset = oinfo->oi_offset;
115 if (oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK)
116 r |= XFS_RMAP_ATTR_FORK;
117 if (oinfo->oi_flags & XFS_OWNER_INFO_BMBT_BLOCK)
118 r |= XFS_RMAP_BMBT_BLOCK;
119 *flags = r;
120}
121
122static inline void
123xfs_owner_info_pack(
124 struct xfs_owner_info *oinfo,
125 uint64_t owner,
126 uint64_t offset,
127 unsigned int flags)
128{
129 oinfo->oi_owner = owner;
130 oinfo->oi_offset = XFS_RMAP_OFF(offset);
131 oinfo->oi_flags = 0;
132 if (flags & XFS_RMAP_ATTR_FORK)
133 oinfo->oi_flags |= XFS_OWNER_INFO_ATTR_FORK;
134 if (flags & XFS_RMAP_BMBT_BLOCK)
135 oinfo->oi_flags |= XFS_OWNER_INFO_BMBT_BLOCK;
136}
137
138int xfs_rmap_alloc(struct xfs_trans *tp, struct xfs_buf *agbp,
139 xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len,
140 struct xfs_owner_info *oinfo);
141int xfs_rmap_free(struct xfs_trans *tp, struct xfs_buf *agbp,
142 xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len,
143 struct xfs_owner_info *oinfo);
144
145int xfs_rmap_lookup_le(struct xfs_btree_cur *cur, xfs_agblock_t bno,
146 xfs_extlen_t len, uint64_t owner, uint64_t offset,
147 unsigned int flags, int *stat);
148int xfs_rmap_lookup_eq(struct xfs_btree_cur *cur, xfs_agblock_t bno,
149 xfs_extlen_t len, uint64_t owner, uint64_t offset,
150 unsigned int flags, int *stat);
151int xfs_rmap_insert(struct xfs_btree_cur *rcur, xfs_agblock_t agbno,
152 xfs_extlen_t len, uint64_t owner, uint64_t offset,
153 unsigned int flags);
154int xfs_rmap_get_rec(struct xfs_btree_cur *cur, struct xfs_rmap_irec *irec,
155 int *stat);
156
157typedef int (*xfs_rmap_query_range_fn)(
158 struct xfs_btree_cur *cur,
159 struct xfs_rmap_irec *rec,
160 void *priv);
161
162int xfs_rmap_query_range(struct xfs_btree_cur *cur,
163 struct xfs_rmap_irec *low_rec, struct xfs_rmap_irec *high_rec,
164 xfs_rmap_query_range_fn fn, void *priv);
165
166enum xfs_rmap_intent_type {
167 XFS_RMAP_MAP,
168 XFS_RMAP_MAP_SHARED,
169 XFS_RMAP_UNMAP,
170 XFS_RMAP_UNMAP_SHARED,
171 XFS_RMAP_CONVERT,
172 XFS_RMAP_CONVERT_SHARED,
173 XFS_RMAP_ALLOC,
174 XFS_RMAP_FREE,
175};
176
177struct xfs_rmap_intent {
178 struct list_head ri_list;
179 enum xfs_rmap_intent_type ri_type;
180 __uint64_t ri_owner;
181 int ri_whichfork;
182 struct xfs_bmbt_irec ri_bmap;
183};
184
185/* functions for updating the rmapbt based on bmbt map/unmap operations */
186int xfs_rmap_map_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
187 struct xfs_inode *ip, int whichfork,
188 struct xfs_bmbt_irec *imap);
189int xfs_rmap_unmap_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
190 struct xfs_inode *ip, int whichfork,
191 struct xfs_bmbt_irec *imap);
192int xfs_rmap_convert_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
193 struct xfs_inode *ip, int whichfork,
194 struct xfs_bmbt_irec *imap);
195int xfs_rmap_alloc_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
196 xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len,
197 __uint64_t owner);
198int xfs_rmap_free_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
199 xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len,
200 __uint64_t owner);
201
202void xfs_rmap_finish_one_cleanup(struct xfs_trans *tp,
203 struct xfs_btree_cur *rcur, int error);
204int xfs_rmap_finish_one(struct xfs_trans *tp, enum xfs_rmap_intent_type type,
205 __uint64_t owner, int whichfork, xfs_fileoff_t startoff,
206 xfs_fsblock_t startblock, xfs_filblks_t blockcount,
207 xfs_exntst_t state, struct xfs_btree_cur **pcur);
208
209#endif /* __XFS_RMAP_H__ */
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c
new file mode 100644
index 000000000000..bc1faebc84ec
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_rmap_btree.c
@@ -0,0 +1,511 @@
1/*
2 * Copyright (c) 2014 Red Hat, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include "xfs.h"
19#include "xfs_fs.h"
20#include "xfs_shared.h"
21#include "xfs_format.h"
22#include "xfs_log_format.h"
23#include "xfs_trans_resv.h"
24#include "xfs_bit.h"
25#include "xfs_sb.h"
26#include "xfs_mount.h"
27#include "xfs_defer.h"
28#include "xfs_inode.h"
29#include "xfs_trans.h"
30#include "xfs_alloc.h"
31#include "xfs_btree.h"
32#include "xfs_rmap.h"
33#include "xfs_rmap_btree.h"
34#include "xfs_trace.h"
35#include "xfs_cksum.h"
36#include "xfs_error.h"
37#include "xfs_extent_busy.h"
38
39/*
40 * Reverse map btree.
41 *
42 * This is a per-ag tree used to track the owner(s) of a given extent. With
43 * reflink it is possible for there to be multiple owners, which is a departure
44 * from classic XFS. Owner records for data extents are inserted when the
45 * extent is mapped and removed when an extent is unmapped. Owner records for
46 * all other block types (i.e. metadata) are inserted when an extent is
47 * allocated and removed when an extent is freed. There can only be one owner
48 * of a metadata extent, usually an inode or some other metadata structure like
49 * an AG btree.
50 *
51 * The rmap btree is part of the free space management, so blocks for the tree
52 * are sourced from the agfl. Hence we need transaction reservation support for
53 * this tree so that the freelist is always large enough. This also impacts on
54 * the minimum space we need to leave free in the AG.
55 *
56 * The tree is ordered by [ag block, owner, offset]. This is a large key size,
57 * but it is the only way to enforce unique keys when a block can be owned by
58 * multiple files at any offset. There's no need to order/search by extent
59 * size for online updating/management of the tree. It is intended that most
60 * reverse lookups will be to find the owner(s) of a particular block, or to
61 * try to recover tree and file data from corrupt primary metadata.
62 */
63
64static struct xfs_btree_cur *
65xfs_rmapbt_dup_cursor(
66 struct xfs_btree_cur *cur)
67{
68 return xfs_rmapbt_init_cursor(cur->bc_mp, cur->bc_tp,
69 cur->bc_private.a.agbp, cur->bc_private.a.agno);
70}
71
72STATIC void
73xfs_rmapbt_set_root(
74 struct xfs_btree_cur *cur,
75 union xfs_btree_ptr *ptr,
76 int inc)
77{
78 struct xfs_buf *agbp = cur->bc_private.a.agbp;
79 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
80 xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno);
81 int btnum = cur->bc_btnum;
82 struct xfs_perag *pag = xfs_perag_get(cur->bc_mp, seqno);
83
84 ASSERT(ptr->s != 0);
85
86 agf->agf_roots[btnum] = ptr->s;
87 be32_add_cpu(&agf->agf_levels[btnum], inc);
88 pag->pagf_levels[btnum] += inc;
89 xfs_perag_put(pag);
90
91 xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS);
92}
93
94STATIC int
95xfs_rmapbt_alloc_block(
96 struct xfs_btree_cur *cur,
97 union xfs_btree_ptr *start,
98 union xfs_btree_ptr *new,
99 int *stat)
100{
101 int error;
102 xfs_agblock_t bno;
103
104 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
105
106 /* Allocate the new block from the freelist. If we can't, give up. */
107 error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp,
108 &bno, 1);
109 if (error) {
110 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
111 return error;
112 }
113
114 trace_xfs_rmapbt_alloc_block(cur->bc_mp, cur->bc_private.a.agno,
115 bno, 1);
116 if (bno == NULLAGBLOCK) {
117 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
118 *stat = 0;
119 return 0;
120 }
121
122 xfs_extent_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1,
123 false);
124
125 xfs_trans_agbtree_delta(cur->bc_tp, 1);
126 new->s = cpu_to_be32(bno);
127
128 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
129 *stat = 1;
130 return 0;
131}
132
133STATIC int
134xfs_rmapbt_free_block(
135 struct xfs_btree_cur *cur,
136 struct xfs_buf *bp)
137{
138 struct xfs_buf *agbp = cur->bc_private.a.agbp;
139 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
140 xfs_agblock_t bno;
141 int error;
142
143 bno = xfs_daddr_to_agbno(cur->bc_mp, XFS_BUF_ADDR(bp));
144 trace_xfs_rmapbt_free_block(cur->bc_mp, cur->bc_private.a.agno,
145 bno, 1);
146 error = xfs_alloc_put_freelist(cur->bc_tp, agbp, NULL, bno, 1);
147 if (error)
148 return error;
149
150 xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1,
151 XFS_EXTENT_BUSY_SKIP_DISCARD);
152 xfs_trans_agbtree_delta(cur->bc_tp, -1);
153
154 return 0;
155}
156
157STATIC int
158xfs_rmapbt_get_minrecs(
159 struct xfs_btree_cur *cur,
160 int level)
161{
162 return cur->bc_mp->m_rmap_mnr[level != 0];
163}
164
165STATIC int
166xfs_rmapbt_get_maxrecs(
167 struct xfs_btree_cur *cur,
168 int level)
169{
170 return cur->bc_mp->m_rmap_mxr[level != 0];
171}
172
173STATIC void
174xfs_rmapbt_init_key_from_rec(
175 union xfs_btree_key *key,
176 union xfs_btree_rec *rec)
177{
178 key->rmap.rm_startblock = rec->rmap.rm_startblock;
179 key->rmap.rm_owner = rec->rmap.rm_owner;
180 key->rmap.rm_offset = rec->rmap.rm_offset;
181}
182
183/*
184 * The high key for a reverse mapping record can be computed by shifting
185 * the startblock and offset to the highest value that would still map
186 * to that record. In practice this means that we add blockcount-1 to
187 * the startblock for all records, and if the record is for a data/attr
188 * fork mapping, we add blockcount-1 to the offset too.
189 */
190STATIC void
191xfs_rmapbt_init_high_key_from_rec(
192 union xfs_btree_key *key,
193 union xfs_btree_rec *rec)
194{
195 __uint64_t off;
196 int adj;
197
198 adj = be32_to_cpu(rec->rmap.rm_blockcount) - 1;
199
200 key->rmap.rm_startblock = rec->rmap.rm_startblock;
201 be32_add_cpu(&key->rmap.rm_startblock, adj);
202 key->rmap.rm_owner = rec->rmap.rm_owner;
203 key->rmap.rm_offset = rec->rmap.rm_offset;
204 if (XFS_RMAP_NON_INODE_OWNER(be64_to_cpu(rec->rmap.rm_owner)) ||
205 XFS_RMAP_IS_BMBT_BLOCK(be64_to_cpu(rec->rmap.rm_offset)))
206 return;
207 off = be64_to_cpu(key->rmap.rm_offset);
208 off = (XFS_RMAP_OFF(off) + adj) | (off & ~XFS_RMAP_OFF_MASK);
209 key->rmap.rm_offset = cpu_to_be64(off);
210}
211
212STATIC void
213xfs_rmapbt_init_rec_from_cur(
214 struct xfs_btree_cur *cur,
215 union xfs_btree_rec *rec)
216{
217 rec->rmap.rm_startblock = cpu_to_be32(cur->bc_rec.r.rm_startblock);
218 rec->rmap.rm_blockcount = cpu_to_be32(cur->bc_rec.r.rm_blockcount);
219 rec->rmap.rm_owner = cpu_to_be64(cur->bc_rec.r.rm_owner);
220 rec->rmap.rm_offset = cpu_to_be64(
221 xfs_rmap_irec_offset_pack(&cur->bc_rec.r));
222}
223
224STATIC void
225xfs_rmapbt_init_ptr_from_cur(
226 struct xfs_btree_cur *cur,
227 union xfs_btree_ptr *ptr)
228{
229 struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
230
231 ASSERT(cur->bc_private.a.agno == be32_to_cpu(agf->agf_seqno));
232 ASSERT(agf->agf_roots[cur->bc_btnum] != 0);
233
234 ptr->s = agf->agf_roots[cur->bc_btnum];
235}
236
237STATIC __int64_t
238xfs_rmapbt_key_diff(
239 struct xfs_btree_cur *cur,
240 union xfs_btree_key *key)
241{
242 struct xfs_rmap_irec *rec = &cur->bc_rec.r;
243 struct xfs_rmap_key *kp = &key->rmap;
244 __u64 x, y;
245 __int64_t d;
246
247 d = (__int64_t)be32_to_cpu(kp->rm_startblock) - rec->rm_startblock;
248 if (d)
249 return d;
250
251 x = be64_to_cpu(kp->rm_owner);
252 y = rec->rm_owner;
253 if (x > y)
254 return 1;
255 else if (y > x)
256 return -1;
257
258 x = XFS_RMAP_OFF(be64_to_cpu(kp->rm_offset));
259 y = rec->rm_offset;
260 if (x > y)
261 return 1;
262 else if (y > x)
263 return -1;
264 return 0;
265}
266
267STATIC __int64_t
268xfs_rmapbt_diff_two_keys(
269 struct xfs_btree_cur *cur,
270 union xfs_btree_key *k1,
271 union xfs_btree_key *k2)
272{
273 struct xfs_rmap_key *kp1 = &k1->rmap;
274 struct xfs_rmap_key *kp2 = &k2->rmap;
275 __int64_t d;
276 __u64 x, y;
277
278 d = (__int64_t)be32_to_cpu(kp1->rm_startblock) -
279 be32_to_cpu(kp2->rm_startblock);
280 if (d)
281 return d;
282
283 x = be64_to_cpu(kp1->rm_owner);
284 y = be64_to_cpu(kp2->rm_owner);
285 if (x > y)
286 return 1;
287 else if (y > x)
288 return -1;
289
290 x = XFS_RMAP_OFF(be64_to_cpu(kp1->rm_offset));
291 y = XFS_RMAP_OFF(be64_to_cpu(kp2->rm_offset));
292 if (x > y)
293 return 1;
294 else if (y > x)
295 return -1;
296 return 0;
297}
298
299static bool
300xfs_rmapbt_verify(
301 struct xfs_buf *bp)
302{
303 struct xfs_mount *mp = bp->b_target->bt_mount;
304 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
305 struct xfs_perag *pag = bp->b_pag;
306 unsigned int level;
307
308 /*
309 * magic number and level verification
310 *
311 * During growfs operations, we can't verify the exact level or owner as
312 * the perag is not fully initialised and hence not attached to the
313 * buffer. In this case, check against the maximum tree depth.
314 *
315 * Similarly, during log recovery we will have a perag structure
316 * attached, but the agf information will not yet have been initialised
317 * from the on disk AGF. Again, we can only check against maximum limits
318 * in this case.
319 */
320 if (block->bb_magic != cpu_to_be32(XFS_RMAP_CRC_MAGIC))
321 return false;
322
323 if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
324 return false;
325 if (!xfs_btree_sblock_v5hdr_verify(bp))
326 return false;
327
328 level = be16_to_cpu(block->bb_level);
329 if (pag && pag->pagf_init) {
330 if (level >= pag->pagf_levels[XFS_BTNUM_RMAPi])
331 return false;
332 } else if (level >= mp->m_rmap_maxlevels)
333 return false;
334
335 return xfs_btree_sblock_verify(bp, mp->m_rmap_mxr[level != 0]);
336}
337
338static void
339xfs_rmapbt_read_verify(
340 struct xfs_buf *bp)
341{
342 if (!xfs_btree_sblock_verify_crc(bp))
343 xfs_buf_ioerror(bp, -EFSBADCRC);
344 else if (!xfs_rmapbt_verify(bp))
345 xfs_buf_ioerror(bp, -EFSCORRUPTED);
346
347 if (bp->b_error) {
348 trace_xfs_btree_corrupt(bp, _RET_IP_);
349 xfs_verifier_error(bp);
350 }
351}
352
353static void
354xfs_rmapbt_write_verify(
355 struct xfs_buf *bp)
356{
357 if (!xfs_rmapbt_verify(bp)) {
358 trace_xfs_btree_corrupt(bp, _RET_IP_);
359 xfs_buf_ioerror(bp, -EFSCORRUPTED);
360 xfs_verifier_error(bp);
361 return;
362 }
363 xfs_btree_sblock_calc_crc(bp);
364
365}
366
367const struct xfs_buf_ops xfs_rmapbt_buf_ops = {
368 .name = "xfs_rmapbt",
369 .verify_read = xfs_rmapbt_read_verify,
370 .verify_write = xfs_rmapbt_write_verify,
371};
372
373#if defined(DEBUG) || defined(XFS_WARN)
374STATIC int
375xfs_rmapbt_keys_inorder(
376 struct xfs_btree_cur *cur,
377 union xfs_btree_key *k1,
378 union xfs_btree_key *k2)
379{
380 __uint32_t x;
381 __uint32_t y;
382 __uint64_t a;
383 __uint64_t b;
384
385 x = be32_to_cpu(k1->rmap.rm_startblock);
386 y = be32_to_cpu(k2->rmap.rm_startblock);
387 if (x < y)
388 return 1;
389 else if (x > y)
390 return 0;
391 a = be64_to_cpu(k1->rmap.rm_owner);
392 b = be64_to_cpu(k2->rmap.rm_owner);
393 if (a < b)
394 return 1;
395 else if (a > b)
396 return 0;
397 a = XFS_RMAP_OFF(be64_to_cpu(k1->rmap.rm_offset));
398 b = XFS_RMAP_OFF(be64_to_cpu(k2->rmap.rm_offset));
399 if (a <= b)
400 return 1;
401 return 0;
402}
403
404STATIC int
405xfs_rmapbt_recs_inorder(
406 struct xfs_btree_cur *cur,
407 union xfs_btree_rec *r1,
408 union xfs_btree_rec *r2)
409{
410 __uint32_t x;
411 __uint32_t y;
412 __uint64_t a;
413 __uint64_t b;
414
415 x = be32_to_cpu(r1->rmap.rm_startblock);
416 y = be32_to_cpu(r2->rmap.rm_startblock);
417 if (x < y)
418 return 1;
419 else if (x > y)
420 return 0;
421 a = be64_to_cpu(r1->rmap.rm_owner);
422 b = be64_to_cpu(r2->rmap.rm_owner);
423 if (a < b)
424 return 1;
425 else if (a > b)
426 return 0;
427 a = XFS_RMAP_OFF(be64_to_cpu(r1->rmap.rm_offset));
428 b = XFS_RMAP_OFF(be64_to_cpu(r2->rmap.rm_offset));
429 if (a <= b)
430 return 1;
431 return 0;
432}
433#endif /* DEBUG */
434
435static const struct xfs_btree_ops xfs_rmapbt_ops = {
436 .rec_len = sizeof(struct xfs_rmap_rec),
437 .key_len = 2 * sizeof(struct xfs_rmap_key),
438
439 .dup_cursor = xfs_rmapbt_dup_cursor,
440 .set_root = xfs_rmapbt_set_root,
441 .alloc_block = xfs_rmapbt_alloc_block,
442 .free_block = xfs_rmapbt_free_block,
443 .get_minrecs = xfs_rmapbt_get_minrecs,
444 .get_maxrecs = xfs_rmapbt_get_maxrecs,
445 .init_key_from_rec = xfs_rmapbt_init_key_from_rec,
446 .init_high_key_from_rec = xfs_rmapbt_init_high_key_from_rec,
447 .init_rec_from_cur = xfs_rmapbt_init_rec_from_cur,
448 .init_ptr_from_cur = xfs_rmapbt_init_ptr_from_cur,
449 .key_diff = xfs_rmapbt_key_diff,
450 .buf_ops = &xfs_rmapbt_buf_ops,
451 .diff_two_keys = xfs_rmapbt_diff_two_keys,
452#if defined(DEBUG) || defined(XFS_WARN)
453 .keys_inorder = xfs_rmapbt_keys_inorder,
454 .recs_inorder = xfs_rmapbt_recs_inorder,
455#endif
456};
457
458/*
459 * Allocate a new allocation btree cursor.
460 */
461struct xfs_btree_cur *
462xfs_rmapbt_init_cursor(
463 struct xfs_mount *mp,
464 struct xfs_trans *tp,
465 struct xfs_buf *agbp,
466 xfs_agnumber_t agno)
467{
468 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
469 struct xfs_btree_cur *cur;
470
471 cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_NOFS);
472 cur->bc_tp = tp;
473 cur->bc_mp = mp;
474 /* Overlapping btree; 2 keys per pointer. */
475 cur->bc_btnum = XFS_BTNUM_RMAP;
476 cur->bc_flags = XFS_BTREE_CRC_BLOCKS | XFS_BTREE_OVERLAPPING;
477 cur->bc_blocklog = mp->m_sb.sb_blocklog;
478 cur->bc_ops = &xfs_rmapbt_ops;
479 cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]);
480
481 cur->bc_private.a.agbp = agbp;
482 cur->bc_private.a.agno = agno;
483
484 return cur;
485}
486
487/*
488 * Calculate number of records in an rmap btree block.
489 */
490int
491xfs_rmapbt_maxrecs(
492 struct xfs_mount *mp,
493 int blocklen,
494 int leaf)
495{
496 blocklen -= XFS_RMAP_BLOCK_LEN;
497
498 if (leaf)
499 return blocklen / sizeof(struct xfs_rmap_rec);
500 return blocklen /
501 (2 * sizeof(struct xfs_rmap_key) + sizeof(xfs_rmap_ptr_t));
502}
503
504/* Compute the maximum height of an rmap btree. */
505void
506xfs_rmapbt_compute_maxlevels(
507 struct xfs_mount *mp)
508{
509 mp->m_rmap_maxlevels = xfs_btree_compute_maxlevels(mp,
510 mp->m_rmap_mnr, mp->m_sb.sb_agblocks);
511}
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.h b/fs/xfs/libxfs/xfs_rmap_btree.h
new file mode 100644
index 000000000000..e73a55357dab
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_rmap_btree.h
@@ -0,0 +1,61 @@
1/*
2 * Copyright (c) 2014 Red Hat, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_RMAP_BTREE_H__
19#define __XFS_RMAP_BTREE_H__
20
21struct xfs_buf;
22struct xfs_btree_cur;
23struct xfs_mount;
24
25/* rmaps only exist on crc enabled filesystems */
26#define XFS_RMAP_BLOCK_LEN XFS_BTREE_SBLOCK_CRC_LEN
27
28/*
29 * Record, key, and pointer address macros for btree blocks.
30 *
31 * (note that some of these may appear unused, but they are used in userspace)
32 */
33#define XFS_RMAP_REC_ADDR(block, index) \
34 ((struct xfs_rmap_rec *) \
35 ((char *)(block) + XFS_RMAP_BLOCK_LEN + \
36 (((index) - 1) * sizeof(struct xfs_rmap_rec))))
37
38#define XFS_RMAP_KEY_ADDR(block, index) \
39 ((struct xfs_rmap_key *) \
40 ((char *)(block) + XFS_RMAP_BLOCK_LEN + \
41 ((index) - 1) * 2 * sizeof(struct xfs_rmap_key)))
42
43#define XFS_RMAP_HIGH_KEY_ADDR(block, index) \
44 ((struct xfs_rmap_key *) \
45 ((char *)(block) + XFS_RMAP_BLOCK_LEN + \
46 sizeof(struct xfs_rmap_key) + \
47 ((index) - 1) * 2 * sizeof(struct xfs_rmap_key)))
48
49#define XFS_RMAP_PTR_ADDR(block, index, maxrecs) \
50 ((xfs_rmap_ptr_t *) \
51 ((char *)(block) + XFS_RMAP_BLOCK_LEN + \
52 (maxrecs) * 2 * sizeof(struct xfs_rmap_key) + \
53 ((index) - 1) * sizeof(xfs_rmap_ptr_t)))
54
55struct xfs_btree_cur *xfs_rmapbt_init_cursor(struct xfs_mount *mp,
56 struct xfs_trans *tp, struct xfs_buf *bp,
57 xfs_agnumber_t agno);
58int xfs_rmapbt_maxrecs(struct xfs_mount *mp, int blocklen, int leaf);
59extern void xfs_rmapbt_compute_maxlevels(struct xfs_mount *mp);
60
61#endif /* __XFS_RMAP_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 12ca86778e02..0e3d4f5ec33c 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -24,6 +24,7 @@
24#include "xfs_bit.h" 24#include "xfs_bit.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_mount.h" 26#include "xfs_mount.h"
27#include "xfs_defer.h"
27#include "xfs_inode.h" 28#include "xfs_inode.h"
28#include "xfs_ialloc.h" 29#include "xfs_ialloc.h"
29#include "xfs_alloc.h" 30#include "xfs_alloc.h"
@@ -36,6 +37,7 @@
36#include "xfs_alloc_btree.h" 37#include "xfs_alloc_btree.h"
37#include "xfs_ialloc_btree.h" 38#include "xfs_ialloc_btree.h"
38#include "xfs_log.h" 39#include "xfs_log.h"
40#include "xfs_rmap_btree.h"
39 41
40/* 42/*
41 * Physical superblock buffer manipulations. Shared with libxfs in userspace. 43 * Physical superblock buffer manipulations. Shared with libxfs in userspace.
@@ -729,6 +731,11 @@ xfs_sb_mount_common(
729 mp->m_bmap_dmnr[0] = mp->m_bmap_dmxr[0] / 2; 731 mp->m_bmap_dmnr[0] = mp->m_bmap_dmxr[0] / 2;
730 mp->m_bmap_dmnr[1] = mp->m_bmap_dmxr[1] / 2; 732 mp->m_bmap_dmnr[1] = mp->m_bmap_dmxr[1] / 2;
731 733
734 mp->m_rmap_mxr[0] = xfs_rmapbt_maxrecs(mp, sbp->sb_blocksize, 1);
735 mp->m_rmap_mxr[1] = xfs_rmapbt_maxrecs(mp, sbp->sb_blocksize, 0);
736 mp->m_rmap_mnr[0] = mp->m_rmap_mxr[0] / 2;
737 mp->m_rmap_mnr[1] = mp->m_rmap_mxr[1] / 2;
738
732 mp->m_bsize = XFS_FSB_TO_BB(mp, 1); 739 mp->m_bsize = XFS_FSB_TO_BB(mp, 1);
733 mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK, 740 mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK,
734 sbp->sb_inopblock); 741 sbp->sb_inopblock);
@@ -738,6 +745,8 @@ xfs_sb_mount_common(
738 mp->m_ialloc_min_blks = sbp->sb_spino_align; 745 mp->m_ialloc_min_blks = sbp->sb_spino_align;
739 else 746 else
740 mp->m_ialloc_min_blks = mp->m_ialloc_blks; 747 mp->m_ialloc_min_blks = mp->m_ialloc_blks;
748 mp->m_alloc_set_aside = xfs_alloc_set_aside(mp);
749 mp->m_ag_max_usable = xfs_alloc_ag_max_usable(mp);
741} 750}
742 751
743/* 752/*
diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h
index 16002b5ec4eb..0c5b30bd884c 100644
--- a/fs/xfs/libxfs/xfs_shared.h
+++ b/fs/xfs/libxfs/xfs_shared.h
@@ -38,6 +38,7 @@ extern const struct xfs_buf_ops xfs_agi_buf_ops;
38extern const struct xfs_buf_ops xfs_agf_buf_ops; 38extern const struct xfs_buf_ops xfs_agf_buf_ops;
39extern const struct xfs_buf_ops xfs_agfl_buf_ops; 39extern const struct xfs_buf_ops xfs_agfl_buf_ops;
40extern const struct xfs_buf_ops xfs_allocbt_buf_ops; 40extern const struct xfs_buf_ops xfs_allocbt_buf_ops;
41extern const struct xfs_buf_ops xfs_rmapbt_buf_ops;
41extern const struct xfs_buf_ops xfs_attr3_leaf_buf_ops; 42extern const struct xfs_buf_ops xfs_attr3_leaf_buf_ops;
42extern const struct xfs_buf_ops xfs_attr3_rmt_buf_ops; 43extern const struct xfs_buf_ops xfs_attr3_rmt_buf_ops;
43extern const struct xfs_buf_ops xfs_bmbt_buf_ops; 44extern const struct xfs_buf_ops xfs_bmbt_buf_ops;
@@ -116,6 +117,7 @@ int xfs_log_calc_minimum_size(struct xfs_mount *);
116#define XFS_INO_BTREE_REF 3 117#define XFS_INO_BTREE_REF 3
117#define XFS_ALLOC_BTREE_REF 2 118#define XFS_ALLOC_BTREE_REF 2
118#define XFS_BMAP_BTREE_REF 2 119#define XFS_BMAP_BTREE_REF 2
120#define XFS_RMAP_BTREE_REF 2
119#define XFS_DIR_BTREE_REF 2 121#define XFS_DIR_BTREE_REF 2
120#define XFS_INO_REF 2 122#define XFS_INO_REF 2
121#define XFS_ATTR_BTREE_REF 1 123#define XFS_ATTR_BTREE_REF 1
diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
index 68cb1e7bf2bb..301ef2f4dbd6 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.c
+++ b/fs/xfs/libxfs/xfs_trans_resv.c
@@ -64,6 +64,30 @@ xfs_calc_buf_res(
64} 64}
65 65
66/* 66/*
67 * Per-extent log reservation for the btree changes involved in freeing or
68 * allocating an extent. In classic XFS there were two trees that will be
69 * modified (bnobt + cntbt). With rmap enabled, there are three trees
70 * (rmapbt). The number of blocks reserved is based on the formula:
71 *
72 * num trees * ((2 blocks/level * max depth) - 1)
73 *
74 * Keep in mind that max depth is calculated separately for each type of tree.
75 */
76static uint
77xfs_allocfree_log_count(
78 struct xfs_mount *mp,
79 uint num_ops)
80{
81 uint blocks;
82
83 blocks = num_ops * 2 * (2 * mp->m_ag_maxlevels - 1);
84 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
85 blocks += num_ops * (2 * mp->m_rmap_maxlevels - 1);
86
87 return blocks;
88}
89
90/*
67 * Logging inodes is really tricksy. They are logged in memory format, 91 * Logging inodes is really tricksy. They are logged in memory format,
68 * which means that what we write into the log doesn't directly translate into 92 * which means that what we write into the log doesn't directly translate into
69 * the amount of space they use on disk. 93 * the amount of space they use on disk.
@@ -126,7 +150,7 @@ xfs_calc_inode_res(
126 */ 150 */
127STATIC uint 151STATIC uint
128xfs_calc_finobt_res( 152xfs_calc_finobt_res(
129 struct xfs_mount *mp, 153 struct xfs_mount *mp,
130 int alloc, 154 int alloc,
131 int modify) 155 int modify)
132{ 156{
@@ -137,7 +161,7 @@ xfs_calc_finobt_res(
137 161
138 res = xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)); 162 res = xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1));
139 if (alloc) 163 if (alloc)
140 res += xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), 164 res += xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
141 XFS_FSB_TO_B(mp, 1)); 165 XFS_FSB_TO_B(mp, 1));
142 if (modify) 166 if (modify)
143 res += (uint)XFS_FSB_TO_B(mp, 1); 167 res += (uint)XFS_FSB_TO_B(mp, 1);
@@ -153,9 +177,9 @@ xfs_calc_finobt_res(
153 * item logged to try to account for the overhead of the transaction mechanism. 177 * item logged to try to account for the overhead of the transaction mechanism.
154 * 178 *
155 * Note: Most of the reservations underestimate the number of allocation 179 * Note: Most of the reservations underestimate the number of allocation
156 * groups into which they could free extents in the xfs_bmap_finish() call. 180 * groups into which they could free extents in the xfs_defer_finish() call.
157 * This is because the number in the worst case is quite high and quite 181 * This is because the number in the worst case is quite high and quite
158 * unusual. In order to fix this we need to change xfs_bmap_finish() to free 182 * unusual. In order to fix this we need to change xfs_defer_finish() to free
159 * extents in only a single AG at a time. This will require changes to the 183 * extents in only a single AG at a time. This will require changes to the
160 * EFI code as well, however, so that the EFI for the extents not freed is 184 * EFI code as well, however, so that the EFI for the extents not freed is
161 * logged again in each transaction. See SGI PV #261917. 185 * logged again in each transaction. See SGI PV #261917.
@@ -188,10 +212,10 @@ xfs_calc_write_reservation(
188 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), 212 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
189 XFS_FSB_TO_B(mp, 1)) + 213 XFS_FSB_TO_B(mp, 1)) +
190 xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + 214 xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
191 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2), 215 xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2),
192 XFS_FSB_TO_B(mp, 1))), 216 XFS_FSB_TO_B(mp, 1))),
193 (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) + 217 (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
194 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2), 218 xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2),
195 XFS_FSB_TO_B(mp, 1)))); 219 XFS_FSB_TO_B(mp, 1))));
196} 220}
197 221
@@ -217,10 +241,10 @@ xfs_calc_itruncate_reservation(
217 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1, 241 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1,
218 XFS_FSB_TO_B(mp, 1))), 242 XFS_FSB_TO_B(mp, 1))),
219 (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) + 243 (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
220 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 4), 244 xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4),
221 XFS_FSB_TO_B(mp, 1)) + 245 XFS_FSB_TO_B(mp, 1)) +
222 xfs_calc_buf_res(5, 0) + 246 xfs_calc_buf_res(5, 0) +
223 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), 247 xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
224 XFS_FSB_TO_B(mp, 1)) + 248 XFS_FSB_TO_B(mp, 1)) +
225 xfs_calc_buf_res(2 + mp->m_ialloc_blks + 249 xfs_calc_buf_res(2 + mp->m_ialloc_blks +
226 mp->m_in_maxlevels, 0))); 250 mp->m_in_maxlevels, 0)));
@@ -247,7 +271,7 @@ xfs_calc_rename_reservation(
247 xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp), 271 xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp),
248 XFS_FSB_TO_B(mp, 1))), 272 XFS_FSB_TO_B(mp, 1))),
249 (xfs_calc_buf_res(7, mp->m_sb.sb_sectsize) + 273 (xfs_calc_buf_res(7, mp->m_sb.sb_sectsize) +
250 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 3), 274 xfs_calc_buf_res(xfs_allocfree_log_count(mp, 3),
251 XFS_FSB_TO_B(mp, 1)))); 275 XFS_FSB_TO_B(mp, 1))));
252} 276}
253 277
@@ -286,7 +310,7 @@ xfs_calc_link_reservation(
286 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), 310 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
287 XFS_FSB_TO_B(mp, 1))), 311 XFS_FSB_TO_B(mp, 1))),
288 (xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + 312 (xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
289 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), 313 xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
290 XFS_FSB_TO_B(mp, 1)))); 314 XFS_FSB_TO_B(mp, 1))));
291} 315}
292 316
@@ -324,7 +348,7 @@ xfs_calc_remove_reservation(
324 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), 348 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
325 XFS_FSB_TO_B(mp, 1))), 349 XFS_FSB_TO_B(mp, 1))),
326 (xfs_calc_buf_res(4, mp->m_sb.sb_sectsize) + 350 (xfs_calc_buf_res(4, mp->m_sb.sb_sectsize) +
327 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2), 351 xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2),
328 XFS_FSB_TO_B(mp, 1)))); 352 XFS_FSB_TO_B(mp, 1))));
329} 353}
330 354
@@ -371,7 +395,7 @@ xfs_calc_create_resv_alloc(
371 mp->m_sb.sb_sectsize + 395 mp->m_sb.sb_sectsize +
372 xfs_calc_buf_res(mp->m_ialloc_blks, XFS_FSB_TO_B(mp, 1)) + 396 xfs_calc_buf_res(mp->m_ialloc_blks, XFS_FSB_TO_B(mp, 1)) +
373 xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + 397 xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) +
374 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), 398 xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
375 XFS_FSB_TO_B(mp, 1)); 399 XFS_FSB_TO_B(mp, 1));
376} 400}
377 401
@@ -399,7 +423,7 @@ xfs_calc_icreate_resv_alloc(
399 return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + 423 return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
400 mp->m_sb.sb_sectsize + 424 mp->m_sb.sb_sectsize +
401 xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + 425 xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) +
402 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), 426 xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
403 XFS_FSB_TO_B(mp, 1)) + 427 XFS_FSB_TO_B(mp, 1)) +
404 xfs_calc_finobt_res(mp, 0, 0); 428 xfs_calc_finobt_res(mp, 0, 0);
405} 429}
@@ -483,7 +507,7 @@ xfs_calc_ifree_reservation(
483 xfs_calc_buf_res(1, 0) + 507 xfs_calc_buf_res(1, 0) +
484 xfs_calc_buf_res(2 + mp->m_ialloc_blks + 508 xfs_calc_buf_res(2 + mp->m_ialloc_blks +
485 mp->m_in_maxlevels, 0) + 509 mp->m_in_maxlevels, 0) +
486 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), 510 xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
487 XFS_FSB_TO_B(mp, 1)) + 511 XFS_FSB_TO_B(mp, 1)) +
488 xfs_calc_finobt_res(mp, 0, 1); 512 xfs_calc_finobt_res(mp, 0, 1);
489} 513}
@@ -513,7 +537,7 @@ xfs_calc_growdata_reservation(
513 struct xfs_mount *mp) 537 struct xfs_mount *mp)
514{ 538{
515 return xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + 539 return xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
516 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), 540 xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
517 XFS_FSB_TO_B(mp, 1)); 541 XFS_FSB_TO_B(mp, 1));
518} 542}
519 543
@@ -535,7 +559,7 @@ xfs_calc_growrtalloc_reservation(
535 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), 559 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
536 XFS_FSB_TO_B(mp, 1)) + 560 XFS_FSB_TO_B(mp, 1)) +
537 xfs_calc_inode_res(mp, 1) + 561 xfs_calc_inode_res(mp, 1) +
538 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), 562 xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
539 XFS_FSB_TO_B(mp, 1)); 563 XFS_FSB_TO_B(mp, 1));
540} 564}
541 565
@@ -611,7 +635,7 @@ xfs_calc_addafork_reservation(
611 xfs_calc_buf_res(1, mp->m_dir_geo->blksize) + 635 xfs_calc_buf_res(1, mp->m_dir_geo->blksize) +
612 xfs_calc_buf_res(XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1, 636 xfs_calc_buf_res(XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1,
613 XFS_FSB_TO_B(mp, 1)) + 637 XFS_FSB_TO_B(mp, 1)) +
614 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), 638 xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
615 XFS_FSB_TO_B(mp, 1)); 639 XFS_FSB_TO_B(mp, 1));
616} 640}
617 641
@@ -634,7 +658,7 @@ xfs_calc_attrinval_reservation(
634 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK), 658 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK),
635 XFS_FSB_TO_B(mp, 1))), 659 XFS_FSB_TO_B(mp, 1))),
636 (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) + 660 (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
637 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 4), 661 xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4),
638 XFS_FSB_TO_B(mp, 1)))); 662 XFS_FSB_TO_B(mp, 1))));
639} 663}
640 664
@@ -701,7 +725,7 @@ xfs_calc_attrrm_reservation(
701 XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + 725 XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) +
702 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), 0)), 726 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), 0)),
703 (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) + 727 (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
704 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2), 728 xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2),
705 XFS_FSB_TO_B(mp, 1)))); 729 XFS_FSB_TO_B(mp, 1))));
706} 730}
707 731
diff --git a/fs/xfs/libxfs/xfs_trans_resv.h b/fs/xfs/libxfs/xfs_trans_resv.h
index 797815012c0e..0eb46ed6d404 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.h
+++ b/fs/xfs/libxfs/xfs_trans_resv.h
@@ -68,16 +68,6 @@ struct xfs_trans_resv {
68#define M_RES(mp) (&(mp)->m_resv) 68#define M_RES(mp) (&(mp)->m_resv)
69 69
70/* 70/*
71 * Per-extent log reservation for the allocation btree changes
72 * involved in freeing or allocating an extent.
73 * 2 trees * (2 blocks/level * max depth - 1) * block size
74 */
75#define XFS_ALLOCFREE_LOG_RES(mp,nx) \
76 ((nx) * (2 * XFS_FSB_TO_B((mp), 2 * (mp)->m_ag_maxlevels - 1)))
77#define XFS_ALLOCFREE_LOG_COUNT(mp,nx) \
78 ((nx) * (2 * (2 * (mp)->m_ag_maxlevels - 1)))
79
80/*
81 * Per-directory log reservation for any directory change. 71 * Per-directory log reservation for any directory change.
82 * dir blocks: (1 btree block per level + data block + free block) * dblock size 72 * dir blocks: (1 btree block per level + data block + free block) * dblock size
83 * bmap btree: (levels + 2) * max depth * block size 73 * bmap btree: (levels + 2) * max depth * block size
diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h
index b79dc66b2ecd..3d503647f26b 100644
--- a/fs/xfs/libxfs/xfs_types.h
+++ b/fs/xfs/libxfs/xfs_types.h
@@ -108,8 +108,8 @@ typedef enum {
108} xfs_lookup_t; 108} xfs_lookup_t;
109 109
110typedef enum { 110typedef enum {
111 XFS_BTNUM_BNOi, XFS_BTNUM_CNTi, XFS_BTNUM_BMAPi, XFS_BTNUM_INOi, 111 XFS_BTNUM_BNOi, XFS_BTNUM_CNTi, XFS_BTNUM_RMAPi, XFS_BTNUM_BMAPi,
112 XFS_BTNUM_FINOi, XFS_BTNUM_MAX 112 XFS_BTNUM_INOi, XFS_BTNUM_FINOi, XFS_BTNUM_MAX
113} xfs_btnum_t; 113} xfs_btnum_t;
114 114
115struct xfs_name { 115struct xfs_name {
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index cd4a850564f2..4ece4f2ffc72 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -25,6 +25,7 @@
25#include "xfs_bit.h" 25#include "xfs_bit.h"
26#include "xfs_mount.h" 26#include "xfs_mount.h"
27#include "xfs_da_format.h" 27#include "xfs_da_format.h"
28#include "xfs_defer.h"
28#include "xfs_inode.h" 29#include "xfs_inode.h"
29#include "xfs_btree.h" 30#include "xfs_btree.h"
30#include "xfs_trans.h" 31#include "xfs_trans.h"
@@ -40,6 +41,7 @@
40#include "xfs_trace.h" 41#include "xfs_trace.h"
41#include "xfs_icache.h" 42#include "xfs_icache.h"
42#include "xfs_log.h" 43#include "xfs_log.h"
44#include "xfs_rmap_btree.h"
43 45
44/* Kernel only BMAP related definitions and functions */ 46/* Kernel only BMAP related definitions and functions */
45 47
@@ -79,95 +81,6 @@ xfs_zero_extent(
79 GFP_NOFS, true); 81 GFP_NOFS, true);
80} 82}
81 83
82/* Sort bmap items by AG. */
83static int
84xfs_bmap_free_list_cmp(
85 void *priv,
86 struct list_head *a,
87 struct list_head *b)
88{
89 struct xfs_mount *mp = priv;
90 struct xfs_bmap_free_item *ra;
91 struct xfs_bmap_free_item *rb;
92
93 ra = container_of(a, struct xfs_bmap_free_item, xbfi_list);
94 rb = container_of(b, struct xfs_bmap_free_item, xbfi_list);
95 return XFS_FSB_TO_AGNO(mp, ra->xbfi_startblock) -
96 XFS_FSB_TO_AGNO(mp, rb->xbfi_startblock);
97}
98
99/*
100 * Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi
101 * caller. Frees all the extents that need freeing, which must be done
102 * last due to locking considerations. We never free any extents in
103 * the first transaction.
104 *
105 * If an inode *ip is provided, rejoin it to the transaction if
106 * the transaction was committed.
107 */
108int /* error */
109xfs_bmap_finish(
110 struct xfs_trans **tp, /* transaction pointer addr */
111 struct xfs_bmap_free *flist, /* i/o: list extents to free */
112 struct xfs_inode *ip)
113{
114 struct xfs_efd_log_item *efd; /* extent free data */
115 struct xfs_efi_log_item *efi; /* extent free intention */
116 int error; /* error return value */
117 int committed;/* xact committed or not */
118 struct xfs_bmap_free_item *free; /* free extent item */
119
120 ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
121 if (flist->xbf_count == 0)
122 return 0;
123
124 list_sort((*tp)->t_mountp, &flist->xbf_flist, xfs_bmap_free_list_cmp);
125
126 efi = xfs_trans_get_efi(*tp, flist->xbf_count);
127 list_for_each_entry(free, &flist->xbf_flist, xbfi_list)
128 xfs_trans_log_efi_extent(*tp, efi, free->xbfi_startblock,
129 free->xbfi_blockcount);
130
131 error = __xfs_trans_roll(tp, ip, &committed);
132 if (error) {
133 /*
134 * If the transaction was committed, drop the EFD reference
135 * since we're bailing out of here. The other reference is
136 * dropped when the EFI hits the AIL.
137 *
138 * If the transaction was not committed, the EFI is freed by the
139 * EFI item unlock handler on abort. Also, we have a new
140 * transaction so we should return committed=1 even though we're
141 * returning an error.
142 */
143 if (committed) {
144 xfs_efi_release(efi);
145 xfs_force_shutdown((*tp)->t_mountp,
146 SHUTDOWN_META_IO_ERROR);
147 }
148 return error;
149 }
150
151 /*
152 * Get an EFD and free each extent in the list, logging to the EFD in
153 * the process. The remaining bmap free list is cleaned up by the caller
154 * on error.
155 */
156 efd = xfs_trans_get_efd(*tp, efi, flist->xbf_count);
157 while (!list_empty(&flist->xbf_flist)) {
158 free = list_first_entry(&flist->xbf_flist,
159 struct xfs_bmap_free_item, xbfi_list);
160 error = xfs_trans_free_extent(*tp, efd, free->xbfi_startblock,
161 free->xbfi_blockcount);
162 if (error)
163 return error;
164
165 xfs_bmap_del_free(flist, free);
166 }
167
168 return 0;
169}
170
171int 84int
172xfs_bmap_rtalloc( 85xfs_bmap_rtalloc(
173 struct xfs_bmalloca *ap) /* bmap alloc argument struct */ 86 struct xfs_bmalloca *ap) /* bmap alloc argument struct */
@@ -214,9 +127,9 @@ xfs_bmap_rtalloc(
214 /* 127 /*
215 * Lock out modifications to both the RT bitmap and summary inodes 128 * Lock out modifications to both the RT bitmap and summary inodes
216 */ 129 */
217 xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL); 130 xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL|XFS_ILOCK_RTBITMAP);
218 xfs_trans_ijoin(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL); 131 xfs_trans_ijoin(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL);
219 xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL); 132 xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL|XFS_ILOCK_RTSUM);
220 xfs_trans_ijoin(ap->tp, mp->m_rsumip, XFS_ILOCK_EXCL); 133 xfs_trans_ijoin(ap->tp, mp->m_rsumip, XFS_ILOCK_EXCL);
221 134
222 /* 135 /*
@@ -773,7 +686,7 @@ xfs_bmap_punch_delalloc_range(
773 xfs_bmbt_irec_t imap; 686 xfs_bmbt_irec_t imap;
774 int nimaps = 1; 687 int nimaps = 1;
775 xfs_fsblock_t firstblock; 688 xfs_fsblock_t firstblock;
776 xfs_bmap_free_t flist; 689 struct xfs_defer_ops dfops;
777 690
778 /* 691 /*
779 * Map the range first and check that it is a delalloc extent 692 * Map the range first and check that it is a delalloc extent
@@ -804,18 +717,18 @@ xfs_bmap_punch_delalloc_range(
804 WARN_ON(imap.br_blockcount == 0); 717 WARN_ON(imap.br_blockcount == 0);
805 718
806 /* 719 /*
807 * Note: while we initialise the firstblock/flist pair, they 720 * Note: while we initialise the firstblock/dfops pair, they
808 * should never be used because blocks should never be 721 * should never be used because blocks should never be
809 * allocated or freed for a delalloc extent and hence we need 722 * allocated or freed for a delalloc extent and hence we need
810 * don't cancel or finish them after the xfs_bunmapi() call. 723 * don't cancel or finish them after the xfs_bunmapi() call.
811 */ 724 */
812 xfs_bmap_init(&flist, &firstblock); 725 xfs_defer_init(&dfops, &firstblock);
813 error = xfs_bunmapi(NULL, ip, start_fsb, 1, 0, 1, &firstblock, 726 error = xfs_bunmapi(NULL, ip, start_fsb, 1, 0, 1, &firstblock,
814 &flist, &done); 727 &dfops, &done);
815 if (error) 728 if (error)
816 break; 729 break;
817 730
818 ASSERT(!flist.xbf_count && list_empty(&flist.xbf_flist)); 731 ASSERT(!xfs_defer_has_unfinished_work(&dfops));
819next_block: 732next_block:
820 start_fsb++; 733 start_fsb++;
821 remaining--; 734 remaining--;
@@ -972,7 +885,7 @@ xfs_alloc_file_space(
972 int rt; 885 int rt;
973 xfs_trans_t *tp; 886 xfs_trans_t *tp;
974 xfs_bmbt_irec_t imaps[1], *imapp; 887 xfs_bmbt_irec_t imaps[1], *imapp;
975 xfs_bmap_free_t free_list; 888 struct xfs_defer_ops dfops;
976 uint qblocks, resblks, resrtextents; 889 uint qblocks, resblks, resrtextents;
977 int error; 890 int error;
978 891
@@ -1063,17 +976,17 @@ xfs_alloc_file_space(
1063 976
1064 xfs_trans_ijoin(tp, ip, 0); 977 xfs_trans_ijoin(tp, ip, 0);
1065 978
1066 xfs_bmap_init(&free_list, &firstfsb); 979 xfs_defer_init(&dfops, &firstfsb);
1067 error = xfs_bmapi_write(tp, ip, startoffset_fsb, 980 error = xfs_bmapi_write(tp, ip, startoffset_fsb,
1068 allocatesize_fsb, alloc_type, &firstfsb, 981 allocatesize_fsb, alloc_type, &firstfsb,
1069 resblks, imapp, &nimaps, &free_list); 982 resblks, imapp, &nimaps, &dfops);
1070 if (error) 983 if (error)
1071 goto error0; 984 goto error0;
1072 985
1073 /* 986 /*
1074 * Complete the transaction 987 * Complete the transaction
1075 */ 988 */
1076 error = xfs_bmap_finish(&tp, &free_list, NULL); 989 error = xfs_defer_finish(&tp, &dfops, NULL);
1077 if (error) 990 if (error)
1078 goto error0; 991 goto error0;
1079 992
@@ -1096,7 +1009,7 @@ xfs_alloc_file_space(
1096 return error; 1009 return error;
1097 1010
1098error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ 1011error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
1099 xfs_bmap_cancel(&free_list); 1012 xfs_defer_cancel(&dfops);
1100 xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag); 1013 xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag);
1101 1014
1102error1: /* Just cancel transaction */ 1015error1: /* Just cancel transaction */
@@ -1114,7 +1027,7 @@ xfs_unmap_extent(
1114{ 1027{
1115 struct xfs_mount *mp = ip->i_mount; 1028 struct xfs_mount *mp = ip->i_mount;
1116 struct xfs_trans *tp; 1029 struct xfs_trans *tp;
1117 struct xfs_bmap_free free_list; 1030 struct xfs_defer_ops dfops;
1118 xfs_fsblock_t firstfsb; 1031 xfs_fsblock_t firstfsb;
1119 uint resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 1032 uint resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
1120 int error; 1033 int error;
@@ -1133,13 +1046,13 @@ xfs_unmap_extent(
1133 1046
1134 xfs_trans_ijoin(tp, ip, 0); 1047 xfs_trans_ijoin(tp, ip, 0);
1135 1048
1136 xfs_bmap_init(&free_list, &firstfsb); 1049 xfs_defer_init(&dfops, &firstfsb);
1137 error = xfs_bunmapi(tp, ip, startoffset_fsb, len_fsb, 0, 2, &firstfsb, 1050 error = xfs_bunmapi(tp, ip, startoffset_fsb, len_fsb, 0, 2, &firstfsb,
1138 &free_list, done); 1051 &dfops, done);
1139 if (error) 1052 if (error)
1140 goto out_bmap_cancel; 1053 goto out_bmap_cancel;
1141 1054
1142 error = xfs_bmap_finish(&tp, &free_list, NULL); 1055 error = xfs_defer_finish(&tp, &dfops, ip);
1143 if (error) 1056 if (error)
1144 goto out_bmap_cancel; 1057 goto out_bmap_cancel;
1145 1058
@@ -1149,7 +1062,7 @@ out_unlock:
1149 return error; 1062 return error;
1150 1063
1151out_bmap_cancel: 1064out_bmap_cancel:
1152 xfs_bmap_cancel(&free_list); 1065 xfs_defer_cancel(&dfops);
1153out_trans_cancel: 1066out_trans_cancel:
1154 xfs_trans_cancel(tp); 1067 xfs_trans_cancel(tp);
1155 goto out_unlock; 1068 goto out_unlock;
@@ -1338,7 +1251,7 @@ xfs_shift_file_space(
1338 struct xfs_mount *mp = ip->i_mount; 1251 struct xfs_mount *mp = ip->i_mount;
1339 struct xfs_trans *tp; 1252 struct xfs_trans *tp;
1340 int error; 1253 int error;
1341 struct xfs_bmap_free free_list; 1254 struct xfs_defer_ops dfops;
1342 xfs_fsblock_t first_block; 1255 xfs_fsblock_t first_block;
1343 xfs_fileoff_t stop_fsb; 1256 xfs_fileoff_t stop_fsb;
1344 xfs_fileoff_t next_fsb; 1257 xfs_fileoff_t next_fsb;
@@ -1416,19 +1329,19 @@ xfs_shift_file_space(
1416 1329
1417 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 1330 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
1418 1331
1419 xfs_bmap_init(&free_list, &first_block); 1332 xfs_defer_init(&dfops, &first_block);
1420 1333
1421 /* 1334 /*
1422 * We are using the write transaction in which max 2 bmbt 1335 * We are using the write transaction in which max 2 bmbt
1423 * updates are allowed 1336 * updates are allowed
1424 */ 1337 */
1425 error = xfs_bmap_shift_extents(tp, ip, &next_fsb, shift_fsb, 1338 error = xfs_bmap_shift_extents(tp, ip, &next_fsb, shift_fsb,
1426 &done, stop_fsb, &first_block, &free_list, 1339 &done, stop_fsb, &first_block, &dfops,
1427 direction, XFS_BMAP_MAX_SHIFT_EXTENTS); 1340 direction, XFS_BMAP_MAX_SHIFT_EXTENTS);
1428 if (error) 1341 if (error)
1429 goto out_bmap_cancel; 1342 goto out_bmap_cancel;
1430 1343
1431 error = xfs_bmap_finish(&tp, &free_list, NULL); 1344 error = xfs_defer_finish(&tp, &dfops, NULL);
1432 if (error) 1345 if (error)
1433 goto out_bmap_cancel; 1346 goto out_bmap_cancel;
1434 1347
@@ -1438,7 +1351,7 @@ xfs_shift_file_space(
1438 return error; 1351 return error;
1439 1352
1440out_bmap_cancel: 1353out_bmap_cancel:
1441 xfs_bmap_cancel(&free_list); 1354 xfs_defer_cancel(&dfops);
1442out_trans_cancel: 1355out_trans_cancel:
1443 xfs_trans_cancel(tp); 1356 xfs_trans_cancel(tp);
1444 return error; 1357 return error;
@@ -1622,6 +1535,10 @@ xfs_swap_extents(
1622 __uint64_t tmp; 1535 __uint64_t tmp;
1623 int lock_flags; 1536 int lock_flags;
1624 1537
1538 /* XXX: we can't do this with rmap, will fix later */
1539 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
1540 return -EOPNOTSUPP;
1541
1625 tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL); 1542 tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL);
1626 if (!tempifp) { 1543 if (!tempifp) {
1627 error = -ENOMEM; 1544 error = -ENOMEM;
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index f20071432ca6..68a621a8e0c0 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -21,7 +21,7 @@
21/* Kernel only BMAP related definitions and functions */ 21/* Kernel only BMAP related definitions and functions */
22 22
23struct xfs_bmbt_irec; 23struct xfs_bmbt_irec;
24struct xfs_bmap_free_item; 24struct xfs_extent_free_item;
25struct xfs_ifork; 25struct xfs_ifork;
26struct xfs_inode; 26struct xfs_inode;
27struct xfs_mount; 27struct xfs_mount;
@@ -40,8 +40,6 @@ int xfs_getbmap(struct xfs_inode *ip, struct getbmapx *bmv,
40 xfs_bmap_format_t formatter, void *arg); 40 xfs_bmap_format_t formatter, void *arg);
41 41
42/* functions in xfs_bmap.c that are only needed by xfs_bmap_util.c */ 42/* functions in xfs_bmap.c that are only needed by xfs_bmap_util.c */
43void xfs_bmap_del_free(struct xfs_bmap_free *flist,
44 struct xfs_bmap_free_item *free);
45int xfs_bmap_extsize_align(struct xfs_mount *mp, struct xfs_bmbt_irec *gotp, 43int xfs_bmap_extsize_align(struct xfs_mount *mp, struct xfs_bmbt_irec *gotp,
46 struct xfs_bmbt_irec *prevp, xfs_extlen_t extsz, 44 struct xfs_bmbt_irec *prevp, xfs_extlen_t extsz,
47 int rt, int eof, int delay, int convert, 45 int rt, int eof, int delay, int convert,
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index 272c3f8b6f7d..4ff499aa7338 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -179,7 +179,7 @@ xfs_ioc_trim(
179 * matter as trimming blocks is an advisory interface. 179 * matter as trimming blocks is an advisory interface.
180 */ 180 */
181 if (range.start >= XFS_FSB_TO_B(mp, mp->m_sb.sb_dblocks) || 181 if (range.start >= XFS_FSB_TO_B(mp, mp->m_sb.sb_dblocks) ||
182 range.minlen > XFS_FSB_TO_B(mp, XFS_ALLOC_AG_MAX_USABLE(mp)) || 182 range.minlen > XFS_FSB_TO_B(mp, mp->m_ag_max_usable) ||
183 range.len < mp->m_sb.sb_blocksize) 183 range.len < mp->m_sb.sb_blocksize)
184 return -EINVAL; 184 return -EINVAL;
185 185
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index ccb0811963b2..7a30b8f11db7 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -23,6 +23,7 @@
23#include "xfs_trans_resv.h" 23#include "xfs_trans_resv.h"
24#include "xfs_bit.h" 24#include "xfs_bit.h"
25#include "xfs_mount.h" 25#include "xfs_mount.h"
26#include "xfs_defer.h"
26#include "xfs_inode.h" 27#include "xfs_inode.h"
27#include "xfs_bmap.h" 28#include "xfs_bmap.h"
28#include "xfs_bmap_util.h" 29#include "xfs_bmap_util.h"
@@ -307,7 +308,7 @@ xfs_qm_dqalloc(
307 xfs_buf_t **O_bpp) 308 xfs_buf_t **O_bpp)
308{ 309{
309 xfs_fsblock_t firstblock; 310 xfs_fsblock_t firstblock;
310 xfs_bmap_free_t flist; 311 struct xfs_defer_ops dfops;
311 xfs_bmbt_irec_t map; 312 xfs_bmbt_irec_t map;
312 int nmaps, error; 313 int nmaps, error;
313 xfs_buf_t *bp; 314 xfs_buf_t *bp;
@@ -320,7 +321,7 @@ xfs_qm_dqalloc(
320 /* 321 /*
321 * Initialize the bmap freelist prior to calling bmapi code. 322 * Initialize the bmap freelist prior to calling bmapi code.
322 */ 323 */
323 xfs_bmap_init(&flist, &firstblock); 324 xfs_defer_init(&dfops, &firstblock);
324 xfs_ilock(quotip, XFS_ILOCK_EXCL); 325 xfs_ilock(quotip, XFS_ILOCK_EXCL);
325 /* 326 /*
326 * Return if this type of quotas is turned off while we didn't 327 * Return if this type of quotas is turned off while we didn't
@@ -336,7 +337,7 @@ xfs_qm_dqalloc(
336 error = xfs_bmapi_write(tp, quotip, offset_fsb, 337 error = xfs_bmapi_write(tp, quotip, offset_fsb,
337 XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA, 338 XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA,
338 &firstblock, XFS_QM_DQALLOC_SPACE_RES(mp), 339 &firstblock, XFS_QM_DQALLOC_SPACE_RES(mp),
339 &map, &nmaps, &flist); 340 &map, &nmaps, &dfops);
340 if (error) 341 if (error)
341 goto error0; 342 goto error0;
342 ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB); 343 ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB);
@@ -368,7 +369,7 @@ xfs_qm_dqalloc(
368 dqp->dq_flags & XFS_DQ_ALLTYPES, bp); 369 dqp->dq_flags & XFS_DQ_ALLTYPES, bp);
369 370
370 /* 371 /*
371 * xfs_bmap_finish() may commit the current transaction and 372 * xfs_defer_finish() may commit the current transaction and
372 * start a second transaction if the freelist is not empty. 373 * start a second transaction if the freelist is not empty.
373 * 374 *
374 * Since we still want to modify this buffer, we need to 375 * Since we still want to modify this buffer, we need to
@@ -382,7 +383,7 @@ xfs_qm_dqalloc(
382 383
383 xfs_trans_bhold(tp, bp); 384 xfs_trans_bhold(tp, bp);
384 385
385 error = xfs_bmap_finish(tpp, &flist, NULL); 386 error = xfs_defer_finish(tpp, &dfops, NULL);
386 if (error) 387 if (error)
387 goto error1; 388 goto error1;
388 389
@@ -398,7 +399,7 @@ xfs_qm_dqalloc(
398 return 0; 399 return 0;
399 400
400error1: 401error1:
401 xfs_bmap_cancel(&flist); 402 xfs_defer_cancel(&dfops);
402error0: 403error0:
403 xfs_iunlock(quotip, XFS_ILOCK_EXCL); 404 xfs_iunlock(quotip, XFS_ILOCK_EXCL);
404 405
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index 2e4f67f68856..3d224702fbc0 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -90,7 +90,9 @@ extern void xfs_verifier_error(struct xfs_buf *bp);
90#define XFS_ERRTAG_STRATCMPL_IOERR 19 90#define XFS_ERRTAG_STRATCMPL_IOERR 19
91#define XFS_ERRTAG_DIOWRITE_IOERR 20 91#define XFS_ERRTAG_DIOWRITE_IOERR 20
92#define XFS_ERRTAG_BMAPIFORMAT 21 92#define XFS_ERRTAG_BMAPIFORMAT 21
93#define XFS_ERRTAG_MAX 22 93#define XFS_ERRTAG_FREE_EXTENT 22
94#define XFS_ERRTAG_RMAP_FINISH_ONE 23
95#define XFS_ERRTAG_MAX 24
94 96
95/* 97/*
96 * Random factors for above tags, 1 means always, 2 means 1/2 time, etc. 98 * Random factors for above tags, 1 means always, 2 means 1/2 time, etc.
@@ -117,6 +119,8 @@ extern void xfs_verifier_error(struct xfs_buf *bp);
117#define XFS_RANDOM_STRATCMPL_IOERR (XFS_RANDOM_DEFAULT/10) 119#define XFS_RANDOM_STRATCMPL_IOERR (XFS_RANDOM_DEFAULT/10)
118#define XFS_RANDOM_DIOWRITE_IOERR (XFS_RANDOM_DEFAULT/10) 120#define XFS_RANDOM_DIOWRITE_IOERR (XFS_RANDOM_DEFAULT/10)
119#define XFS_RANDOM_BMAPIFORMAT XFS_RANDOM_DEFAULT 121#define XFS_RANDOM_BMAPIFORMAT XFS_RANDOM_DEFAULT
122#define XFS_RANDOM_FREE_EXTENT 1
123#define XFS_RANDOM_RMAP_FINISH_ONE 1
120 124
121#ifdef DEBUG 125#ifdef DEBUG
122extern int xfs_error_test_active; 126extern int xfs_error_test_active;
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index ab779460ecbf..d7bc14906af8 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -20,12 +20,15 @@
20#include "xfs_format.h" 20#include "xfs_format.h"
21#include "xfs_log_format.h" 21#include "xfs_log_format.h"
22#include "xfs_trans_resv.h" 22#include "xfs_trans_resv.h"
23#include "xfs_bit.h"
23#include "xfs_mount.h" 24#include "xfs_mount.h"
24#include "xfs_trans.h" 25#include "xfs_trans.h"
25#include "xfs_trans_priv.h" 26#include "xfs_trans_priv.h"
26#include "xfs_buf_item.h" 27#include "xfs_buf_item.h"
27#include "xfs_extfree_item.h" 28#include "xfs_extfree_item.h"
28#include "xfs_log.h" 29#include "xfs_log.h"
30#include "xfs_btree.h"
31#include "xfs_rmap.h"
29 32
30 33
31kmem_zone_t *xfs_efi_zone; 34kmem_zone_t *xfs_efi_zone;
@@ -486,3 +489,69 @@ xfs_efd_init(
486 489
487 return efdp; 490 return efdp;
488} 491}
492
493/*
494 * Process an extent free intent item that was recovered from
495 * the log. We need to free the extents that it describes.
496 */
497int
498xfs_efi_recover(
499 struct xfs_mount *mp,
500 struct xfs_efi_log_item *efip)
501{
502 struct xfs_efd_log_item *efdp;
503 struct xfs_trans *tp;
504 int i;
505 int error = 0;
506 xfs_extent_t *extp;
507 xfs_fsblock_t startblock_fsb;
508 struct xfs_owner_info oinfo;
509
510 ASSERT(!test_bit(XFS_EFI_RECOVERED, &efip->efi_flags));
511
512 /*
513 * First check the validity of the extents described by the
514 * EFI. If any are bad, then assume that all are bad and
515 * just toss the EFI.
516 */
517 for (i = 0; i < efip->efi_format.efi_nextents; i++) {
518 extp = &efip->efi_format.efi_extents[i];
519 startblock_fsb = XFS_BB_TO_FSB(mp,
520 XFS_FSB_TO_DADDR(mp, extp->ext_start));
521 if (startblock_fsb == 0 ||
522 extp->ext_len == 0 ||
523 startblock_fsb >= mp->m_sb.sb_dblocks ||
524 extp->ext_len >= mp->m_sb.sb_agblocks) {
525 /*
526 * This will pull the EFI from the AIL and
527 * free the memory associated with it.
528 */
529 set_bit(XFS_EFI_RECOVERED, &efip->efi_flags);
530 xfs_efi_release(efip);
531 return -EIO;
532 }
533 }
534
535 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
536 if (error)
537 return error;
538 efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents);
539
540 xfs_rmap_skip_owner_update(&oinfo);
541 for (i = 0; i < efip->efi_format.efi_nextents; i++) {
542 extp = &efip->efi_format.efi_extents[i];
543 error = xfs_trans_free_extent(tp, efdp, extp->ext_start,
544 extp->ext_len, &oinfo);
545 if (error)
546 goto abort_error;
547
548 }
549
550 set_bit(XFS_EFI_RECOVERED, &efip->efi_flags);
551 error = xfs_trans_commit(tp);
552 return error;
553
554abort_error:
555 xfs_trans_cancel(tp);
556 return error;
557}
diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h
index 8fa8651705e1..a32c794a86b7 100644
--- a/fs/xfs/xfs_extfree_item.h
+++ b/fs/xfs/xfs_extfree_item.h
@@ -98,4 +98,7 @@ int xfs_efi_copy_format(xfs_log_iovec_t *buf,
98void xfs_efi_item_free(xfs_efi_log_item_t *); 98void xfs_efi_item_free(xfs_efi_log_item_t *);
99void xfs_efi_release(struct xfs_efi_log_item *); 99void xfs_efi_release(struct xfs_efi_log_item *);
100 100
101int xfs_efi_recover(struct xfs_mount *mp,
102 struct xfs_efi_log_item *efip);
103
101#endif /* __XFS_EXTFREE_ITEM_H__ */ 104#endif /* __XFS_EXTFREE_ITEM_H__ */
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index a51353a1f87f..4a33a3304369 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -22,6 +22,7 @@
22#include "xfs_trans_resv.h" 22#include "xfs_trans_resv.h"
23#include "xfs_sb.h" 23#include "xfs_sb.h"
24#include "xfs_mount.h" 24#include "xfs_mount.h"
25#include "xfs_defer.h"
25#include "xfs_inode.h" 26#include "xfs_inode.h"
26#include "xfs_bmap.h" 27#include "xfs_bmap.h"
27#include "xfs_bmap_util.h" 28#include "xfs_bmap_util.h"
@@ -385,7 +386,7 @@ xfs_filestream_new_ag(
385 } 386 }
386 387
387 flags = (ap->userdata ? XFS_PICK_USERDATA : 0) | 388 flags = (ap->userdata ? XFS_PICK_USERDATA : 0) |
388 (ap->flist->xbf_low ? XFS_PICK_LOWSPACE : 0); 389 (ap->dfops->dop_low ? XFS_PICK_LOWSPACE : 0);
389 390
390 err = xfs_filestream_pick_ag(pip, startag, agp, flags, minlen); 391 err = xfs_filestream_pick_ag(pip, startag, agp, flags, minlen);
391 392
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 7191c3878b4a..0f96847b90e1 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -23,6 +23,7 @@
23#include "xfs_trans_resv.h" 23#include "xfs_trans_resv.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_mount.h" 25#include "xfs_mount.h"
26#include "xfs_defer.h"
26#include "xfs_da_format.h" 27#include "xfs_da_format.h"
27#include "xfs_da_btree.h" 28#include "xfs_da_btree.h"
28#include "xfs_inode.h" 29#include "xfs_inode.h"
@@ -32,6 +33,7 @@
32#include "xfs_btree.h" 33#include "xfs_btree.h"
33#include "xfs_alloc_btree.h" 34#include "xfs_alloc_btree.h"
34#include "xfs_alloc.h" 35#include "xfs_alloc.h"
36#include "xfs_rmap_btree.h"
35#include "xfs_ialloc.h" 37#include "xfs_ialloc.h"
36#include "xfs_fsops.h" 38#include "xfs_fsops.h"
37#include "xfs_itable.h" 39#include "xfs_itable.h"
@@ -40,6 +42,7 @@
40#include "xfs_trace.h" 42#include "xfs_trace.h"
41#include "xfs_log.h" 43#include "xfs_log.h"
42#include "xfs_filestream.h" 44#include "xfs_filestream.h"
45#include "xfs_rmap.h"
43 46
44/* 47/*
45 * File system operations 48 * File system operations
@@ -103,7 +106,9 @@ xfs_fs_geometry(
103 (xfs_sb_version_hasfinobt(&mp->m_sb) ? 106 (xfs_sb_version_hasfinobt(&mp->m_sb) ?
104 XFS_FSOP_GEOM_FLAGS_FINOBT : 0) | 107 XFS_FSOP_GEOM_FLAGS_FINOBT : 0) |
105 (xfs_sb_version_hassparseinodes(&mp->m_sb) ? 108 (xfs_sb_version_hassparseinodes(&mp->m_sb) ?
106 XFS_FSOP_GEOM_FLAGS_SPINODES : 0); 109 XFS_FSOP_GEOM_FLAGS_SPINODES : 0) |
110 (xfs_sb_version_hasrmapbt(&mp->m_sb) ?
111 XFS_FSOP_GEOM_FLAGS_RMAPBT : 0);
107 geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ? 112 geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ?
108 mp->m_sb.sb_logsectsize : BBSIZE; 113 mp->m_sb.sb_logsectsize : BBSIZE;
109 geo->rtsectsize = mp->m_sb.sb_blocksize; 114 geo->rtsectsize = mp->m_sb.sb_blocksize;
@@ -239,10 +244,16 @@ xfs_growfs_data_private(
239 agf->agf_roots[XFS_BTNUM_CNTi] = cpu_to_be32(XFS_CNT_BLOCK(mp)); 244 agf->agf_roots[XFS_BTNUM_CNTi] = cpu_to_be32(XFS_CNT_BLOCK(mp));
240 agf->agf_levels[XFS_BTNUM_BNOi] = cpu_to_be32(1); 245 agf->agf_levels[XFS_BTNUM_BNOi] = cpu_to_be32(1);
241 agf->agf_levels[XFS_BTNUM_CNTi] = cpu_to_be32(1); 246 agf->agf_levels[XFS_BTNUM_CNTi] = cpu_to_be32(1);
247 if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
248 agf->agf_roots[XFS_BTNUM_RMAPi] =
249 cpu_to_be32(XFS_RMAP_BLOCK(mp));
250 agf->agf_levels[XFS_BTNUM_RMAPi] = cpu_to_be32(1);
251 }
252
242 agf->agf_flfirst = cpu_to_be32(1); 253 agf->agf_flfirst = cpu_to_be32(1);
243 agf->agf_fllast = 0; 254 agf->agf_fllast = 0;
244 agf->agf_flcount = 0; 255 agf->agf_flcount = 0;
245 tmpsize = agsize - XFS_PREALLOC_BLOCKS(mp); 256 tmpsize = agsize - mp->m_ag_prealloc_blocks;
246 agf->agf_freeblks = cpu_to_be32(tmpsize); 257 agf->agf_freeblks = cpu_to_be32(tmpsize);
247 agf->agf_longest = cpu_to_be32(tmpsize); 258 agf->agf_longest = cpu_to_be32(tmpsize);
248 if (xfs_sb_version_hascrc(&mp->m_sb)) 259 if (xfs_sb_version_hascrc(&mp->m_sb))
@@ -339,7 +350,7 @@ xfs_growfs_data_private(
339 agno, 0); 350 agno, 0);
340 351
341 arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1); 352 arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
342 arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp)); 353 arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks);
343 arec->ar_blockcount = cpu_to_be32( 354 arec->ar_blockcount = cpu_to_be32(
344 agsize - be32_to_cpu(arec->ar_startblock)); 355 agsize - be32_to_cpu(arec->ar_startblock));
345 356
@@ -368,7 +379,7 @@ xfs_growfs_data_private(
368 agno, 0); 379 agno, 0);
369 380
370 arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1); 381 arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
371 arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp)); 382 arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks);
372 arec->ar_blockcount = cpu_to_be32( 383 arec->ar_blockcount = cpu_to_be32(
373 agsize - be32_to_cpu(arec->ar_startblock)); 384 agsize - be32_to_cpu(arec->ar_startblock));
374 nfree += be32_to_cpu(arec->ar_blockcount); 385 nfree += be32_to_cpu(arec->ar_blockcount);
@@ -378,6 +389,72 @@ xfs_growfs_data_private(
378 if (error) 389 if (error)
379 goto error0; 390 goto error0;
380 391
392 /* RMAP btree root block */
393 if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
394 struct xfs_rmap_rec *rrec;
395 struct xfs_btree_block *block;
396
397 bp = xfs_growfs_get_hdr_buf(mp,
398 XFS_AGB_TO_DADDR(mp, agno, XFS_RMAP_BLOCK(mp)),
399 BTOBB(mp->m_sb.sb_blocksize), 0,
400 &xfs_rmapbt_buf_ops);
401 if (!bp) {
402 error = -ENOMEM;
403 goto error0;
404 }
405
406 xfs_btree_init_block(mp, bp, XFS_RMAP_CRC_MAGIC, 0, 0,
407 agno, XFS_BTREE_CRC_BLOCKS);
408 block = XFS_BUF_TO_BLOCK(bp);
409
410
411 /*
412 * mark the AG header regions as static metadata The BNO
413 * btree block is the first block after the headers, so
414 * it's location defines the size of region the static
415 * metadata consumes.
416 *
417 * Note: unlike mkfs, we never have to account for log
418 * space when growing the data regions
419 */
420 rrec = XFS_RMAP_REC_ADDR(block, 1);
421 rrec->rm_startblock = 0;
422 rrec->rm_blockcount = cpu_to_be32(XFS_BNO_BLOCK(mp));
423 rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_FS);
424 rrec->rm_offset = 0;
425 be16_add_cpu(&block->bb_numrecs, 1);
426
427 /* account freespace btree root blocks */
428 rrec = XFS_RMAP_REC_ADDR(block, 2);
429 rrec->rm_startblock = cpu_to_be32(XFS_BNO_BLOCK(mp));
430 rrec->rm_blockcount = cpu_to_be32(2);
431 rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG);
432 rrec->rm_offset = 0;
433 be16_add_cpu(&block->bb_numrecs, 1);
434
435 /* account inode btree root blocks */
436 rrec = XFS_RMAP_REC_ADDR(block, 3);
437 rrec->rm_startblock = cpu_to_be32(XFS_IBT_BLOCK(mp));
438 rrec->rm_blockcount = cpu_to_be32(XFS_RMAP_BLOCK(mp) -
439 XFS_IBT_BLOCK(mp));
440 rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_INOBT);
441 rrec->rm_offset = 0;
442 be16_add_cpu(&block->bb_numrecs, 1);
443
444 /* account for rmap btree root */
445 rrec = XFS_RMAP_REC_ADDR(block, 4);
446 rrec->rm_startblock = cpu_to_be32(XFS_RMAP_BLOCK(mp));
447 rrec->rm_blockcount = cpu_to_be32(1);
448 rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG);
449 rrec->rm_offset = 0;
450 be16_add_cpu(&block->bb_numrecs, 1);
451
452 error = xfs_bwrite(bp);
453 xfs_buf_relse(bp);
454 if (error)
455 goto error0;
456 }
457
381 /* 458 /*
382 * INO btree root block 459 * INO btree root block
383 */ 460 */
@@ -435,6 +512,8 @@ xfs_growfs_data_private(
435 * There are new blocks in the old last a.g. 512 * There are new blocks in the old last a.g.
436 */ 513 */
437 if (new) { 514 if (new) {
515 struct xfs_owner_info oinfo;
516
438 /* 517 /*
439 * Change the agi length. 518 * Change the agi length.
440 */ 519 */
@@ -462,14 +541,20 @@ xfs_growfs_data_private(
462 be32_to_cpu(agi->agi_length)); 541 be32_to_cpu(agi->agi_length));
463 542
464 xfs_alloc_log_agf(tp, bp, XFS_AGF_LENGTH); 543 xfs_alloc_log_agf(tp, bp, XFS_AGF_LENGTH);
544
465 /* 545 /*
466 * Free the new space. 546 * Free the new space.
547 *
548 * XFS_RMAP_OWN_NULL is used here to tell the rmap btree that
549 * this doesn't actually exist in the rmap btree.
467 */ 550 */
468 error = xfs_free_extent(tp, XFS_AGB_TO_FSB(mp, agno, 551 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_NULL);
469 be32_to_cpu(agf->agf_length) - new), new); 552 error = xfs_free_extent(tp,
470 if (error) { 553 XFS_AGB_TO_FSB(mp, agno,
554 be32_to_cpu(agf->agf_length) - new),
555 new, &oinfo);
556 if (error)
471 goto error0; 557 goto error0;
472 }
473 } 558 }
474 559
475 /* 560 /*
@@ -501,6 +586,7 @@ xfs_growfs_data_private(
501 } else 586 } else
502 mp->m_maxicount = 0; 587 mp->m_maxicount = 0;
503 xfs_set_low_space_thresholds(mp); 588 xfs_set_low_space_thresholds(mp);
589 mp->m_alloc_set_aside = xfs_alloc_set_aside(mp);
504 590
505 /* update secondary superblocks. */ 591 /* update secondary superblocks. */
506 for (agno = 1; agno < nagcount; agno++) { 592 for (agno = 1; agno < nagcount; agno++) {
@@ -638,7 +724,7 @@ xfs_fs_counts(
638 cnt->allocino = percpu_counter_read_positive(&mp->m_icount); 724 cnt->allocino = percpu_counter_read_positive(&mp->m_icount);
639 cnt->freeino = percpu_counter_read_positive(&mp->m_ifree); 725 cnt->freeino = percpu_counter_read_positive(&mp->m_ifree);
640 cnt->freedata = percpu_counter_read_positive(&mp->m_fdblocks) - 726 cnt->freedata = percpu_counter_read_positive(&mp->m_fdblocks) -
641 XFS_ALLOC_SET_ASIDE(mp); 727 mp->m_alloc_set_aside;
642 728
643 spin_lock(&mp->m_sb_lock); 729 spin_lock(&mp->m_sb_lock);
644 cnt->freertx = mp->m_sb.sb_frextents; 730 cnt->freertx = mp->m_sb.sb_frextents;
@@ -726,7 +812,7 @@ xfs_reserve_blocks(
726 error = -ENOSPC; 812 error = -ENOSPC;
727 do { 813 do {
728 free = percpu_counter_sum(&mp->m_fdblocks) - 814 free = percpu_counter_sum(&mp->m_fdblocks) -
729 XFS_ALLOC_SET_ASIDE(mp); 815 mp->m_alloc_set_aside;
730 if (!free) 816 if (!free)
731 break; 817 break;
732 818
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 8825bcfd314c..e08eaea6327b 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -25,6 +25,7 @@
25#include "xfs_trans_resv.h" 25#include "xfs_trans_resv.h"
26#include "xfs_sb.h" 26#include "xfs_sb.h"
27#include "xfs_mount.h" 27#include "xfs_mount.h"
28#include "xfs_defer.h"
28#include "xfs_inode.h" 29#include "xfs_inode.h"
29#include "xfs_da_format.h" 30#include "xfs_da_format.h"
30#include "xfs_da_btree.h" 31#include "xfs_da_btree.h"
@@ -1122,7 +1123,7 @@ xfs_create(
1122 struct xfs_inode *ip = NULL; 1123 struct xfs_inode *ip = NULL;
1123 struct xfs_trans *tp = NULL; 1124 struct xfs_trans *tp = NULL;
1124 int error; 1125 int error;
1125 xfs_bmap_free_t free_list; 1126 struct xfs_defer_ops dfops;
1126 xfs_fsblock_t first_block; 1127 xfs_fsblock_t first_block;
1127 bool unlock_dp_on_error = false; 1128 bool unlock_dp_on_error = false;
1128 prid_t prid; 1129 prid_t prid;
@@ -1182,7 +1183,7 @@ xfs_create(
1182 XFS_IOLOCK_PARENT | XFS_ILOCK_PARENT); 1183 XFS_IOLOCK_PARENT | XFS_ILOCK_PARENT);
1183 unlock_dp_on_error = true; 1184 unlock_dp_on_error = true;
1184 1185
1185 xfs_bmap_init(&free_list, &first_block); 1186 xfs_defer_init(&dfops, &first_block);
1186 1187
1187 /* 1188 /*
1188 * Reserve disk quota and the inode. 1189 * Reserve disk quota and the inode.
@@ -1219,7 +1220,7 @@ xfs_create(
1219 unlock_dp_on_error = false; 1220 unlock_dp_on_error = false;
1220 1221
1221 error = xfs_dir_createname(tp, dp, name, ip->i_ino, 1222 error = xfs_dir_createname(tp, dp, name, ip->i_ino,
1222 &first_block, &free_list, resblks ? 1223 &first_block, &dfops, resblks ?
1223 resblks - XFS_IALLOC_SPACE_RES(mp) : 0); 1224 resblks - XFS_IALLOC_SPACE_RES(mp) : 0);
1224 if (error) { 1225 if (error) {
1225 ASSERT(error != -ENOSPC); 1226 ASSERT(error != -ENOSPC);
@@ -1253,7 +1254,7 @@ xfs_create(
1253 */ 1254 */
1254 xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp); 1255 xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp);
1255 1256
1256 error = xfs_bmap_finish(&tp, &free_list, NULL); 1257 error = xfs_defer_finish(&tp, &dfops, NULL);
1257 if (error) 1258 if (error)
1258 goto out_bmap_cancel; 1259 goto out_bmap_cancel;
1259 1260
@@ -1269,7 +1270,7 @@ xfs_create(
1269 return 0; 1270 return 0;
1270 1271
1271 out_bmap_cancel: 1272 out_bmap_cancel:
1272 xfs_bmap_cancel(&free_list); 1273 xfs_defer_cancel(&dfops);
1273 out_trans_cancel: 1274 out_trans_cancel:
1274 xfs_trans_cancel(tp); 1275 xfs_trans_cancel(tp);
1275 out_release_inode: 1276 out_release_inode:
@@ -1401,7 +1402,7 @@ xfs_link(
1401 xfs_mount_t *mp = tdp->i_mount; 1402 xfs_mount_t *mp = tdp->i_mount;
1402 xfs_trans_t *tp; 1403 xfs_trans_t *tp;
1403 int error; 1404 int error;
1404 xfs_bmap_free_t free_list; 1405 struct xfs_defer_ops dfops;
1405 xfs_fsblock_t first_block; 1406 xfs_fsblock_t first_block;
1406 int resblks; 1407 int resblks;
1407 1408
@@ -1452,7 +1453,7 @@ xfs_link(
1452 goto error_return; 1453 goto error_return;
1453 } 1454 }
1454 1455
1455 xfs_bmap_init(&free_list, &first_block); 1456 xfs_defer_init(&dfops, &first_block);
1456 1457
1457 /* 1458 /*
1458 * Handle initial link state of O_TMPFILE inode 1459 * Handle initial link state of O_TMPFILE inode
@@ -1464,7 +1465,7 @@ xfs_link(
1464 } 1465 }
1465 1466
1466 error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino, 1467 error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino,
1467 &first_block, &free_list, resblks); 1468 &first_block, &dfops, resblks);
1468 if (error) 1469 if (error)
1469 goto error_return; 1470 goto error_return;
1470 xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 1471 xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@ -1482,9 +1483,9 @@ xfs_link(
1482 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) 1483 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
1483 xfs_trans_set_sync(tp); 1484 xfs_trans_set_sync(tp);
1484 1485
1485 error = xfs_bmap_finish(&tp, &free_list, NULL); 1486 error = xfs_defer_finish(&tp, &dfops, NULL);
1486 if (error) { 1487 if (error) {
1487 xfs_bmap_cancel(&free_list); 1488 xfs_defer_cancel(&dfops);
1488 goto error_return; 1489 goto error_return;
1489 } 1490 }
1490 1491
@@ -1526,7 +1527,7 @@ xfs_itruncate_extents(
1526{ 1527{
1527 struct xfs_mount *mp = ip->i_mount; 1528 struct xfs_mount *mp = ip->i_mount;
1528 struct xfs_trans *tp = *tpp; 1529 struct xfs_trans *tp = *tpp;
1529 xfs_bmap_free_t free_list; 1530 struct xfs_defer_ops dfops;
1530 xfs_fsblock_t first_block; 1531 xfs_fsblock_t first_block;
1531 xfs_fileoff_t first_unmap_block; 1532 xfs_fileoff_t first_unmap_block;
1532 xfs_fileoff_t last_block; 1533 xfs_fileoff_t last_block;
@@ -1562,12 +1563,12 @@ xfs_itruncate_extents(
1562 ASSERT(first_unmap_block < last_block); 1563 ASSERT(first_unmap_block < last_block);
1563 unmap_len = last_block - first_unmap_block + 1; 1564 unmap_len = last_block - first_unmap_block + 1;
1564 while (!done) { 1565 while (!done) {
1565 xfs_bmap_init(&free_list, &first_block); 1566 xfs_defer_init(&dfops, &first_block);
1566 error = xfs_bunmapi(tp, ip, 1567 error = xfs_bunmapi(tp, ip,
1567 first_unmap_block, unmap_len, 1568 first_unmap_block, unmap_len,
1568 xfs_bmapi_aflag(whichfork), 1569 xfs_bmapi_aflag(whichfork),
1569 XFS_ITRUNC_MAX_EXTENTS, 1570 XFS_ITRUNC_MAX_EXTENTS,
1570 &first_block, &free_list, 1571 &first_block, &dfops,
1571 &done); 1572 &done);
1572 if (error) 1573 if (error)
1573 goto out_bmap_cancel; 1574 goto out_bmap_cancel;
@@ -1576,7 +1577,7 @@ xfs_itruncate_extents(
1576 * Duplicate the transaction that has the permanent 1577 * Duplicate the transaction that has the permanent
1577 * reservation and commit the old transaction. 1578 * reservation and commit the old transaction.
1578 */ 1579 */
1579 error = xfs_bmap_finish(&tp, &free_list, ip); 1580 error = xfs_defer_finish(&tp, &dfops, ip);
1580 if (error) 1581 if (error)
1581 goto out_bmap_cancel; 1582 goto out_bmap_cancel;
1582 1583
@@ -1602,7 +1603,7 @@ out_bmap_cancel:
1602 * the transaction can be properly aborted. We just need to make sure 1603 * the transaction can be properly aborted. We just need to make sure
1603 * we're not holding any resources that we were not when we came in. 1604 * we're not holding any resources that we were not when we came in.
1604 */ 1605 */
1605 xfs_bmap_cancel(&free_list); 1606 xfs_defer_cancel(&dfops);
1606 goto out; 1607 goto out;
1607} 1608}
1608 1609
@@ -1743,7 +1744,7 @@ STATIC int
1743xfs_inactive_ifree( 1744xfs_inactive_ifree(
1744 struct xfs_inode *ip) 1745 struct xfs_inode *ip)
1745{ 1746{
1746 xfs_bmap_free_t free_list; 1747 struct xfs_defer_ops dfops;
1747 xfs_fsblock_t first_block; 1748 xfs_fsblock_t first_block;
1748 struct xfs_mount *mp = ip->i_mount; 1749 struct xfs_mount *mp = ip->i_mount;
1749 struct xfs_trans *tp; 1750 struct xfs_trans *tp;
@@ -1780,8 +1781,8 @@ xfs_inactive_ifree(
1780 xfs_ilock(ip, XFS_ILOCK_EXCL); 1781 xfs_ilock(ip, XFS_ILOCK_EXCL);
1781 xfs_trans_ijoin(tp, ip, 0); 1782 xfs_trans_ijoin(tp, ip, 0);
1782 1783
1783 xfs_bmap_init(&free_list, &first_block); 1784 xfs_defer_init(&dfops, &first_block);
1784 error = xfs_ifree(tp, ip, &free_list); 1785 error = xfs_ifree(tp, ip, &dfops);
1785 if (error) { 1786 if (error) {
1786 /* 1787 /*
1787 * If we fail to free the inode, shut down. The cancel 1788 * If we fail to free the inode, shut down. The cancel
@@ -1807,11 +1808,11 @@ xfs_inactive_ifree(
1807 * Just ignore errors at this point. There is nothing we can do except 1808 * Just ignore errors at this point. There is nothing we can do except
1808 * to try to keep going. Make sure it's not a silent error. 1809 * to try to keep going. Make sure it's not a silent error.
1809 */ 1810 */
1810 error = xfs_bmap_finish(&tp, &free_list, NULL); 1811 error = xfs_defer_finish(&tp, &dfops, NULL);
1811 if (error) { 1812 if (error) {
1812 xfs_notice(mp, "%s: xfs_bmap_finish returned error %d", 1813 xfs_notice(mp, "%s: xfs_defer_finish returned error %d",
1813 __func__, error); 1814 __func__, error);
1814 xfs_bmap_cancel(&free_list); 1815 xfs_defer_cancel(&dfops);
1815 } 1816 }
1816 error = xfs_trans_commit(tp); 1817 error = xfs_trans_commit(tp);
1817 if (error) 1818 if (error)
@@ -2367,7 +2368,7 @@ int
2367xfs_ifree( 2368xfs_ifree(
2368 xfs_trans_t *tp, 2369 xfs_trans_t *tp,
2369 xfs_inode_t *ip, 2370 xfs_inode_t *ip,
2370 xfs_bmap_free_t *flist) 2371 struct xfs_defer_ops *dfops)
2371{ 2372{
2372 int error; 2373 int error;
2373 struct xfs_icluster xic = { 0 }; 2374 struct xfs_icluster xic = { 0 };
@@ -2386,7 +2387,7 @@ xfs_ifree(
2386 if (error) 2387 if (error)
2387 return error; 2388 return error;
2388 2389
2389 error = xfs_difree(tp, ip->i_ino, flist, &xic); 2390 error = xfs_difree(tp, ip->i_ino, dfops, &xic);
2390 if (error) 2391 if (error)
2391 return error; 2392 return error;
2392 2393
@@ -2474,7 +2475,7 @@ xfs_iunpin_wait(
2474 * directory entry. 2475 * directory entry.
2475 * 2476 *
2476 * This is still safe from a transactional point of view - it is not until we 2477 * This is still safe from a transactional point of view - it is not until we
2477 * get to xfs_bmap_finish() that we have the possibility of multiple 2478 * get to xfs_defer_finish() that we have the possibility of multiple
2478 * transactions in this operation. Hence as long as we remove the directory 2479 * transactions in this operation. Hence as long as we remove the directory
2479 * entry and drop the link count in the first transaction of the remove 2480 * entry and drop the link count in the first transaction of the remove
2480 * operation, there are no transactional constraints on the ordering here. 2481 * operation, there are no transactional constraints on the ordering here.
@@ -2489,7 +2490,7 @@ xfs_remove(
2489 xfs_trans_t *tp = NULL; 2490 xfs_trans_t *tp = NULL;
2490 int is_dir = S_ISDIR(VFS_I(ip)->i_mode); 2491 int is_dir = S_ISDIR(VFS_I(ip)->i_mode);
2491 int error = 0; 2492 int error = 0;
2492 xfs_bmap_free_t free_list; 2493 struct xfs_defer_ops dfops;
2493 xfs_fsblock_t first_block; 2494 xfs_fsblock_t first_block;
2494 uint resblks; 2495 uint resblks;
2495 2496
@@ -2571,9 +2572,9 @@ xfs_remove(
2571 if (error) 2572 if (error)
2572 goto out_trans_cancel; 2573 goto out_trans_cancel;
2573 2574
2574 xfs_bmap_init(&free_list, &first_block); 2575 xfs_defer_init(&dfops, &first_block);
2575 error = xfs_dir_removename(tp, dp, name, ip->i_ino, 2576 error = xfs_dir_removename(tp, dp, name, ip->i_ino,
2576 &first_block, &free_list, resblks); 2577 &first_block, &dfops, resblks);
2577 if (error) { 2578 if (error) {
2578 ASSERT(error != -ENOENT); 2579 ASSERT(error != -ENOENT);
2579 goto out_bmap_cancel; 2580 goto out_bmap_cancel;
@@ -2587,7 +2588,7 @@ xfs_remove(
2587 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) 2588 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
2588 xfs_trans_set_sync(tp); 2589 xfs_trans_set_sync(tp);
2589 2590
2590 error = xfs_bmap_finish(&tp, &free_list, NULL); 2591 error = xfs_defer_finish(&tp, &dfops, NULL);
2591 if (error) 2592 if (error)
2592 goto out_bmap_cancel; 2593 goto out_bmap_cancel;
2593 2594
@@ -2601,7 +2602,7 @@ xfs_remove(
2601 return 0; 2602 return 0;
2602 2603
2603 out_bmap_cancel: 2604 out_bmap_cancel:
2604 xfs_bmap_cancel(&free_list); 2605 xfs_defer_cancel(&dfops);
2605 out_trans_cancel: 2606 out_trans_cancel:
2606 xfs_trans_cancel(tp); 2607 xfs_trans_cancel(tp);
2607 std_return: 2608 std_return:
@@ -2662,7 +2663,7 @@ xfs_sort_for_rename(
2662static int 2663static int
2663xfs_finish_rename( 2664xfs_finish_rename(
2664 struct xfs_trans *tp, 2665 struct xfs_trans *tp,
2665 struct xfs_bmap_free *free_list) 2666 struct xfs_defer_ops *dfops)
2666{ 2667{
2667 int error; 2668 int error;
2668 2669
@@ -2673,9 +2674,9 @@ xfs_finish_rename(
2673 if (tp->t_mountp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) 2674 if (tp->t_mountp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
2674 xfs_trans_set_sync(tp); 2675 xfs_trans_set_sync(tp);
2675 2676
2676 error = xfs_bmap_finish(&tp, free_list, NULL); 2677 error = xfs_defer_finish(&tp, dfops, NULL);
2677 if (error) { 2678 if (error) {
2678 xfs_bmap_cancel(free_list); 2679 xfs_defer_cancel(dfops);
2679 xfs_trans_cancel(tp); 2680 xfs_trans_cancel(tp);
2680 return error; 2681 return error;
2681 } 2682 }
@@ -2697,7 +2698,7 @@ xfs_cross_rename(
2697 struct xfs_inode *dp2, 2698 struct xfs_inode *dp2,
2698 struct xfs_name *name2, 2699 struct xfs_name *name2,
2699 struct xfs_inode *ip2, 2700 struct xfs_inode *ip2,
2700 struct xfs_bmap_free *free_list, 2701 struct xfs_defer_ops *dfops,
2701 xfs_fsblock_t *first_block, 2702 xfs_fsblock_t *first_block,
2702 int spaceres) 2703 int spaceres)
2703{ 2704{
@@ -2709,14 +2710,14 @@ xfs_cross_rename(
2709 /* Swap inode number for dirent in first parent */ 2710 /* Swap inode number for dirent in first parent */
2710 error = xfs_dir_replace(tp, dp1, name1, 2711 error = xfs_dir_replace(tp, dp1, name1,
2711 ip2->i_ino, 2712 ip2->i_ino,
2712 first_block, free_list, spaceres); 2713 first_block, dfops, spaceres);
2713 if (error) 2714 if (error)
2714 goto out_trans_abort; 2715 goto out_trans_abort;
2715 2716
2716 /* Swap inode number for dirent in second parent */ 2717 /* Swap inode number for dirent in second parent */
2717 error = xfs_dir_replace(tp, dp2, name2, 2718 error = xfs_dir_replace(tp, dp2, name2,
2718 ip1->i_ino, 2719 ip1->i_ino,
2719 first_block, free_list, spaceres); 2720 first_block, dfops, spaceres);
2720 if (error) 2721 if (error)
2721 goto out_trans_abort; 2722 goto out_trans_abort;
2722 2723
@@ -2731,7 +2732,7 @@ xfs_cross_rename(
2731 if (S_ISDIR(VFS_I(ip2)->i_mode)) { 2732 if (S_ISDIR(VFS_I(ip2)->i_mode)) {
2732 error = xfs_dir_replace(tp, ip2, &xfs_name_dotdot, 2733 error = xfs_dir_replace(tp, ip2, &xfs_name_dotdot,
2733 dp1->i_ino, first_block, 2734 dp1->i_ino, first_block,
2734 free_list, spaceres); 2735 dfops, spaceres);
2735 if (error) 2736 if (error)
2736 goto out_trans_abort; 2737 goto out_trans_abort;
2737 2738
@@ -2758,7 +2759,7 @@ xfs_cross_rename(
2758 if (S_ISDIR(VFS_I(ip1)->i_mode)) { 2759 if (S_ISDIR(VFS_I(ip1)->i_mode)) {
2759 error = xfs_dir_replace(tp, ip1, &xfs_name_dotdot, 2760 error = xfs_dir_replace(tp, ip1, &xfs_name_dotdot,
2760 dp2->i_ino, first_block, 2761 dp2->i_ino, first_block,
2761 free_list, spaceres); 2762 dfops, spaceres);
2762 if (error) 2763 if (error)
2763 goto out_trans_abort; 2764 goto out_trans_abort;
2764 2765
@@ -2797,10 +2798,10 @@ xfs_cross_rename(
2797 } 2798 }
2798 xfs_trans_ichgtime(tp, dp1, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2799 xfs_trans_ichgtime(tp, dp1, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
2799 xfs_trans_log_inode(tp, dp1, XFS_ILOG_CORE); 2800 xfs_trans_log_inode(tp, dp1, XFS_ILOG_CORE);
2800 return xfs_finish_rename(tp, free_list); 2801 return xfs_finish_rename(tp, dfops);
2801 2802
2802out_trans_abort: 2803out_trans_abort:
2803 xfs_bmap_cancel(free_list); 2804 xfs_defer_cancel(dfops);
2804 xfs_trans_cancel(tp); 2805 xfs_trans_cancel(tp);
2805 return error; 2806 return error;
2806} 2807}
@@ -2855,7 +2856,7 @@ xfs_rename(
2855{ 2856{
2856 struct xfs_mount *mp = src_dp->i_mount; 2857 struct xfs_mount *mp = src_dp->i_mount;
2857 struct xfs_trans *tp; 2858 struct xfs_trans *tp;
2858 struct xfs_bmap_free free_list; 2859 struct xfs_defer_ops dfops;
2859 xfs_fsblock_t first_block; 2860 xfs_fsblock_t first_block;
2860 struct xfs_inode *wip = NULL; /* whiteout inode */ 2861 struct xfs_inode *wip = NULL; /* whiteout inode */
2861 struct xfs_inode *inodes[__XFS_SORT_INODES]; 2862 struct xfs_inode *inodes[__XFS_SORT_INODES];
@@ -2944,13 +2945,13 @@ xfs_rename(
2944 goto out_trans_cancel; 2945 goto out_trans_cancel;
2945 } 2946 }
2946 2947
2947 xfs_bmap_init(&free_list, &first_block); 2948 xfs_defer_init(&dfops, &first_block);
2948 2949
2949 /* RENAME_EXCHANGE is unique from here on. */ 2950 /* RENAME_EXCHANGE is unique from here on. */
2950 if (flags & RENAME_EXCHANGE) 2951 if (flags & RENAME_EXCHANGE)
2951 return xfs_cross_rename(tp, src_dp, src_name, src_ip, 2952 return xfs_cross_rename(tp, src_dp, src_name, src_ip,
2952 target_dp, target_name, target_ip, 2953 target_dp, target_name, target_ip,
2953 &free_list, &first_block, spaceres); 2954 &dfops, &first_block, spaceres);
2954 2955
2955 /* 2956 /*
2956 * Set up the target. 2957 * Set up the target.
@@ -2972,7 +2973,7 @@ xfs_rename(
2972 */ 2973 */
2973 error = xfs_dir_createname(tp, target_dp, target_name, 2974 error = xfs_dir_createname(tp, target_dp, target_name,
2974 src_ip->i_ino, &first_block, 2975 src_ip->i_ino, &first_block,
2975 &free_list, spaceres); 2976 &dfops, spaceres);
2976 if (error) 2977 if (error)
2977 goto out_bmap_cancel; 2978 goto out_bmap_cancel;
2978 2979
@@ -3012,7 +3013,7 @@ xfs_rename(
3012 */ 3013 */
3013 error = xfs_dir_replace(tp, target_dp, target_name, 3014 error = xfs_dir_replace(tp, target_dp, target_name,
3014 src_ip->i_ino, 3015 src_ip->i_ino,
3015 &first_block, &free_list, spaceres); 3016 &first_block, &dfops, spaceres);
3016 if (error) 3017 if (error)
3017 goto out_bmap_cancel; 3018 goto out_bmap_cancel;
3018 3019
@@ -3047,7 +3048,7 @@ xfs_rename(
3047 */ 3048 */
3048 error = xfs_dir_replace(tp, src_ip, &xfs_name_dotdot, 3049 error = xfs_dir_replace(tp, src_ip, &xfs_name_dotdot,
3049 target_dp->i_ino, 3050 target_dp->i_ino,
3050 &first_block, &free_list, spaceres); 3051 &first_block, &dfops, spaceres);
3051 ASSERT(error != -EEXIST); 3052 ASSERT(error != -EEXIST);
3052 if (error) 3053 if (error)
3053 goto out_bmap_cancel; 3054 goto out_bmap_cancel;
@@ -3086,10 +3087,10 @@ xfs_rename(
3086 */ 3087 */
3087 if (wip) { 3088 if (wip) {
3088 error = xfs_dir_replace(tp, src_dp, src_name, wip->i_ino, 3089 error = xfs_dir_replace(tp, src_dp, src_name, wip->i_ino,
3089 &first_block, &free_list, spaceres); 3090 &first_block, &dfops, spaceres);
3090 } else 3091 } else
3091 error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino, 3092 error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino,
3092 &first_block, &free_list, spaceres); 3093 &first_block, &dfops, spaceres);
3093 if (error) 3094 if (error)
3094 goto out_bmap_cancel; 3095 goto out_bmap_cancel;
3095 3096
@@ -3124,13 +3125,13 @@ xfs_rename(
3124 if (new_parent) 3125 if (new_parent)
3125 xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE); 3126 xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE);
3126 3127
3127 error = xfs_finish_rename(tp, &free_list); 3128 error = xfs_finish_rename(tp, &dfops);
3128 if (wip) 3129 if (wip)
3129 IRELE(wip); 3130 IRELE(wip);
3130 return error; 3131 return error;
3131 3132
3132out_bmap_cancel: 3133out_bmap_cancel:
3133 xfs_bmap_cancel(&free_list); 3134 xfs_defer_cancel(&dfops);
3134out_trans_cancel: 3135out_trans_cancel:
3135 xfs_trans_cancel(tp); 3136 xfs_trans_cancel(tp);
3136out_release_wip: 3137out_release_wip:
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 8eb78ec4a6e2..e1a411e08f00 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -27,7 +27,7 @@
27struct xfs_dinode; 27struct xfs_dinode;
28struct xfs_inode; 28struct xfs_inode;
29struct xfs_buf; 29struct xfs_buf;
30struct xfs_bmap_free; 30struct xfs_defer_ops;
31struct xfs_bmbt_irec; 31struct xfs_bmbt_irec;
32struct xfs_inode_log_item; 32struct xfs_inode_log_item;
33struct xfs_mount; 33struct xfs_mount;
@@ -398,7 +398,7 @@ uint xfs_ilock_attr_map_shared(struct xfs_inode *);
398 398
399uint xfs_ip2xflags(struct xfs_inode *); 399uint xfs_ip2xflags(struct xfs_inode *);
400int xfs_ifree(struct xfs_trans *, xfs_inode_t *, 400int xfs_ifree(struct xfs_trans *, xfs_inode_t *,
401 struct xfs_bmap_free *); 401 struct xfs_defer_ops *);
402int xfs_itruncate_extents(struct xfs_trans **, struct xfs_inode *, 402int xfs_itruncate_extents(struct xfs_trans **, struct xfs_inode *,
403 int, xfs_fsize_t); 403 int, xfs_fsize_t);
404void xfs_iext_realloc(xfs_inode_t *, int, int); 404void xfs_iext_realloc(xfs_inode_t *, int, int);
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 9a7c87809d3b..cf46658392ce 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -387,6 +387,7 @@ xfs_attrlist_by_handle(
387{ 387{
388 int error = -ENOMEM; 388 int error = -ENOMEM;
389 attrlist_cursor_kern_t *cursor; 389 attrlist_cursor_kern_t *cursor;
390 struct xfs_fsop_attrlist_handlereq __user *p = arg;
390 xfs_fsop_attrlist_handlereq_t al_hreq; 391 xfs_fsop_attrlist_handlereq_t al_hreq;
391 struct dentry *dentry; 392 struct dentry *dentry;
392 char *kbuf; 393 char *kbuf;
@@ -419,6 +420,11 @@ xfs_attrlist_by_handle(
419 if (error) 420 if (error)
420 goto out_kfree; 421 goto out_kfree;
421 422
423 if (copy_to_user(&p->pos, cursor, sizeof(attrlist_cursor_kern_t))) {
424 error = -EFAULT;
425 goto out_kfree;
426 }
427
422 if (copy_to_user(al_hreq.buffer, kbuf, al_hreq.buflen)) 428 if (copy_to_user(al_hreq.buffer, kbuf, al_hreq.buflen))
423 error = -EFAULT; 429 error = -EFAULT;
424 430
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 620fc9120444..2114d53df433 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -23,6 +23,7 @@
23#include "xfs_log_format.h" 23#include "xfs_log_format.h"
24#include "xfs_trans_resv.h" 24#include "xfs_trans_resv.h"
25#include "xfs_mount.h" 25#include "xfs_mount.h"
26#include "xfs_defer.h"
26#include "xfs_inode.h" 27#include "xfs_inode.h"
27#include "xfs_btree.h" 28#include "xfs_btree.h"
28#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
@@ -128,7 +129,7 @@ xfs_iomap_write_direct(
128 int quota_flag; 129 int quota_flag;
129 int rt; 130 int rt;
130 xfs_trans_t *tp; 131 xfs_trans_t *tp;
131 xfs_bmap_free_t free_list; 132 struct xfs_defer_ops dfops;
132 uint qblocks, resblks, resrtextents; 133 uint qblocks, resblks, resrtextents;
133 int error; 134 int error;
134 int lockmode; 135 int lockmode;
@@ -231,18 +232,18 @@ xfs_iomap_write_direct(
231 * From this point onwards we overwrite the imap pointer that the 232 * From this point onwards we overwrite the imap pointer that the
232 * caller gave to us. 233 * caller gave to us.
233 */ 234 */
234 xfs_bmap_init(&free_list, &firstfsb); 235 xfs_defer_init(&dfops, &firstfsb);
235 nimaps = 1; 236 nimaps = 1;
236 error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, 237 error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
237 bmapi_flags, &firstfsb, resblks, imap, 238 bmapi_flags, &firstfsb, resblks, imap,
238 &nimaps, &free_list); 239 &nimaps, &dfops);
239 if (error) 240 if (error)
240 goto out_bmap_cancel; 241 goto out_bmap_cancel;
241 242
242 /* 243 /*
243 * Complete the transaction 244 * Complete the transaction
244 */ 245 */
245 error = xfs_bmap_finish(&tp, &free_list, NULL); 246 error = xfs_defer_finish(&tp, &dfops, NULL);
246 if (error) 247 if (error)
247 goto out_bmap_cancel; 248 goto out_bmap_cancel;
248 249
@@ -266,7 +267,7 @@ out_unlock:
266 return error; 267 return error;
267 268
268out_bmap_cancel: 269out_bmap_cancel:
269 xfs_bmap_cancel(&free_list); 270 xfs_defer_cancel(&dfops);
270 xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag); 271 xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag);
271out_trans_cancel: 272out_trans_cancel:
272 xfs_trans_cancel(tp); 273 xfs_trans_cancel(tp);
@@ -685,7 +686,7 @@ xfs_iomap_write_allocate(
685 xfs_fileoff_t offset_fsb, last_block; 686 xfs_fileoff_t offset_fsb, last_block;
686 xfs_fileoff_t end_fsb, map_start_fsb; 687 xfs_fileoff_t end_fsb, map_start_fsb;
687 xfs_fsblock_t first_block; 688 xfs_fsblock_t first_block;
688 xfs_bmap_free_t free_list; 689 struct xfs_defer_ops dfops;
689 xfs_filblks_t count_fsb; 690 xfs_filblks_t count_fsb;
690 xfs_trans_t *tp; 691 xfs_trans_t *tp;
691 int nimaps; 692 int nimaps;
@@ -727,7 +728,7 @@ xfs_iomap_write_allocate(
727 xfs_ilock(ip, XFS_ILOCK_EXCL); 728 xfs_ilock(ip, XFS_ILOCK_EXCL);
728 xfs_trans_ijoin(tp, ip, 0); 729 xfs_trans_ijoin(tp, ip, 0);
729 730
730 xfs_bmap_init(&free_list, &first_block); 731 xfs_defer_init(&dfops, &first_block);
731 732
732 /* 733 /*
733 * it is possible that the extents have changed since 734 * it is possible that the extents have changed since
@@ -783,11 +784,11 @@ xfs_iomap_write_allocate(
783 error = xfs_bmapi_write(tp, ip, map_start_fsb, 784 error = xfs_bmapi_write(tp, ip, map_start_fsb,
784 count_fsb, 0, &first_block, 785 count_fsb, 0, &first_block,
785 nres, imap, &nimaps, 786 nres, imap, &nimaps,
786 &free_list); 787 &dfops);
787 if (error) 788 if (error)
788 goto trans_cancel; 789 goto trans_cancel;
789 790
790 error = xfs_bmap_finish(&tp, &free_list, NULL); 791 error = xfs_defer_finish(&tp, &dfops, NULL);
791 if (error) 792 if (error)
792 goto trans_cancel; 793 goto trans_cancel;
793 794
@@ -821,7 +822,7 @@ xfs_iomap_write_allocate(
821 } 822 }
822 823
823trans_cancel: 824trans_cancel:
824 xfs_bmap_cancel(&free_list); 825 xfs_defer_cancel(&dfops);
825 xfs_trans_cancel(tp); 826 xfs_trans_cancel(tp);
826error0: 827error0:
827 xfs_iunlock(ip, XFS_ILOCK_EXCL); 828 xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -842,7 +843,7 @@ xfs_iomap_write_unwritten(
842 int nimaps; 843 int nimaps;
843 xfs_trans_t *tp; 844 xfs_trans_t *tp;
844 xfs_bmbt_irec_t imap; 845 xfs_bmbt_irec_t imap;
845 xfs_bmap_free_t free_list; 846 struct xfs_defer_ops dfops;
846 xfs_fsize_t i_size; 847 xfs_fsize_t i_size;
847 uint resblks; 848 uint resblks;
848 int error; 849 int error;
@@ -886,11 +887,11 @@ xfs_iomap_write_unwritten(
886 /* 887 /*
887 * Modify the unwritten extent state of the buffer. 888 * Modify the unwritten extent state of the buffer.
888 */ 889 */
889 xfs_bmap_init(&free_list, &firstfsb); 890 xfs_defer_init(&dfops, &firstfsb);
890 nimaps = 1; 891 nimaps = 1;
891 error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, 892 error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
892 XFS_BMAPI_CONVERT, &firstfsb, resblks, 893 XFS_BMAPI_CONVERT, &firstfsb, resblks,
893 &imap, &nimaps, &free_list); 894 &imap, &nimaps, &dfops);
894 if (error) 895 if (error)
895 goto error_on_bmapi_transaction; 896 goto error_on_bmapi_transaction;
896 897
@@ -909,7 +910,7 @@ xfs_iomap_write_unwritten(
909 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 910 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
910 } 911 }
911 912
912 error = xfs_bmap_finish(&tp, &free_list, NULL); 913 error = xfs_defer_finish(&tp, &dfops, NULL);
913 if (error) 914 if (error)
914 goto error_on_bmapi_transaction; 915 goto error_on_bmapi_transaction;
915 916
@@ -936,7 +937,7 @@ xfs_iomap_write_unwritten(
936 return 0; 937 return 0;
937 938
938error_on_bmapi_transaction: 939error_on_bmapi_transaction:
939 xfs_bmap_cancel(&free_list); 940 xfs_defer_cancel(&dfops);
940 xfs_trans_cancel(tp); 941 xfs_trans_cancel(tp);
941 xfs_iunlock(ip, XFS_ILOCK_EXCL); 942 xfs_iunlock(ip, XFS_ILOCK_EXCL);
942 return error; 943 return error;
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 835997843846..e8638fd2c0c3 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -43,6 +43,7 @@
43#include "xfs_bmap_btree.h" 43#include "xfs_bmap_btree.h"
44#include "xfs_error.h" 44#include "xfs_error.h"
45#include "xfs_dir2.h" 45#include "xfs_dir2.h"
46#include "xfs_rmap_item.h"
46 47
47#define BLK_AVG(blk1, blk2) ((blk1+blk2) >> 1) 48#define BLK_AVG(blk1, blk2) ((blk1+blk2) >> 1)
48 49
@@ -1911,6 +1912,8 @@ xlog_recover_reorder_trans(
1911 case XFS_LI_QUOTAOFF: 1912 case XFS_LI_QUOTAOFF:
1912 case XFS_LI_EFD: 1913 case XFS_LI_EFD:
1913 case XFS_LI_EFI: 1914 case XFS_LI_EFI:
1915 case XFS_LI_RUI:
1916 case XFS_LI_RUD:
1914 trace_xfs_log_recover_item_reorder_tail(log, 1917 trace_xfs_log_recover_item_reorder_tail(log,
1915 trans, item, pass); 1918 trans, item, pass);
1916 list_move_tail(&item->ri_list, &inode_list); 1919 list_move_tail(&item->ri_list, &inode_list);
@@ -2228,6 +2231,7 @@ xlog_recover_get_buf_lsn(
2228 case XFS_ABTC_CRC_MAGIC: 2231 case XFS_ABTC_CRC_MAGIC:
2229 case XFS_ABTB_MAGIC: 2232 case XFS_ABTB_MAGIC:
2230 case XFS_ABTC_MAGIC: 2233 case XFS_ABTC_MAGIC:
2234 case XFS_RMAP_CRC_MAGIC:
2231 case XFS_IBT_CRC_MAGIC: 2235 case XFS_IBT_CRC_MAGIC:
2232 case XFS_IBT_MAGIC: { 2236 case XFS_IBT_MAGIC: {
2233 struct xfs_btree_block *btb = blk; 2237 struct xfs_btree_block *btb = blk;
@@ -2396,6 +2400,9 @@ xlog_recover_validate_buf_type(
2396 case XFS_BMAP_MAGIC: 2400 case XFS_BMAP_MAGIC:
2397 bp->b_ops = &xfs_bmbt_buf_ops; 2401 bp->b_ops = &xfs_bmbt_buf_ops;
2398 break; 2402 break;
2403 case XFS_RMAP_CRC_MAGIC:
2404 bp->b_ops = &xfs_rmapbt_buf_ops;
2405 break;
2399 default: 2406 default:
2400 xfs_warn(mp, "Bad btree block magic!"); 2407 xfs_warn(mp, "Bad btree block magic!");
2401 ASSERT(0); 2408 ASSERT(0);
@@ -3415,6 +3422,99 @@ xlog_recover_efd_pass2(
3415} 3422}
3416 3423
3417/* 3424/*
3425 * This routine is called to create an in-core extent rmap update
3426 * item from the rui format structure which was logged on disk.
3427 * It allocates an in-core rui, copies the extents from the format
3428 * structure into it, and adds the rui to the AIL with the given
3429 * LSN.
3430 */
3431STATIC int
3432xlog_recover_rui_pass2(
3433 struct xlog *log,
3434 struct xlog_recover_item *item,
3435 xfs_lsn_t lsn)
3436{
3437 int error;
3438 struct xfs_mount *mp = log->l_mp;
3439 struct xfs_rui_log_item *ruip;
3440 struct xfs_rui_log_format *rui_formatp;
3441
3442 rui_formatp = item->ri_buf[0].i_addr;
3443
3444 ruip = xfs_rui_init(mp, rui_formatp->rui_nextents);
3445 error = xfs_rui_copy_format(&item->ri_buf[0], &ruip->rui_format);
3446 if (error) {
3447 xfs_rui_item_free(ruip);
3448 return error;
3449 }
3450 atomic_set(&ruip->rui_next_extent, rui_formatp->rui_nextents);
3451
3452 spin_lock(&log->l_ailp->xa_lock);
3453 /*
3454 * The RUI has two references. One for the RUD and one for RUI to ensure
3455 * it makes it into the AIL. Insert the RUI into the AIL directly and
3456 * drop the RUI reference. Note that xfs_trans_ail_update() drops the
3457 * AIL lock.
3458 */
3459 xfs_trans_ail_update(log->l_ailp, &ruip->rui_item, lsn);
3460 xfs_rui_release(ruip);
3461 return 0;
3462}
3463
3464
3465/*
3466 * This routine is called when an RUD format structure is found in a committed
3467 * transaction in the log. Its purpose is to cancel the corresponding RUI if it
3468 * was still in the log. To do this it searches the AIL for the RUI with an id
3469 * equal to that in the RUD format structure. If we find it we drop the RUD
3470 * reference, which removes the RUI from the AIL and frees it.
3471 */
3472STATIC int
3473xlog_recover_rud_pass2(
3474 struct xlog *log,
3475 struct xlog_recover_item *item)
3476{
3477 struct xfs_rud_log_format *rud_formatp;
3478 struct xfs_rui_log_item *ruip = NULL;
3479 struct xfs_log_item *lip;
3480 __uint64_t rui_id;
3481 struct xfs_ail_cursor cur;
3482 struct xfs_ail *ailp = log->l_ailp;
3483
3484 rud_formatp = item->ri_buf[0].i_addr;
3485 ASSERT(item->ri_buf[0].i_len == sizeof(struct xfs_rud_log_format));
3486 rui_id = rud_formatp->rud_rui_id;
3487
3488 /*
3489 * Search for the RUI with the id in the RUD format structure in the
3490 * AIL.
3491 */
3492 spin_lock(&ailp->xa_lock);
3493 lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
3494 while (lip != NULL) {
3495 if (lip->li_type == XFS_LI_RUI) {
3496 ruip = (struct xfs_rui_log_item *)lip;
3497 if (ruip->rui_format.rui_id == rui_id) {
3498 /*
3499 * Drop the RUD reference to the RUI. This
3500 * removes the RUI from the AIL and frees it.
3501 */
3502 spin_unlock(&ailp->xa_lock);
3503 xfs_rui_release(ruip);
3504 spin_lock(&ailp->xa_lock);
3505 break;
3506 }
3507 }
3508 lip = xfs_trans_ail_cursor_next(ailp, &cur);
3509 }
3510
3511 xfs_trans_ail_cursor_done(&cur);
3512 spin_unlock(&ailp->xa_lock);
3513
3514 return 0;
3515}
3516
3517/*
3418 * This routine is called when an inode create format structure is found in a 3518 * This routine is called when an inode create format structure is found in a
3419 * committed transaction in the log. It's purpose is to initialise the inodes 3519 * committed transaction in the log. It's purpose is to initialise the inodes
3420 * being allocated on disk. This requires us to get inode cluster buffers that 3520 * being allocated on disk. This requires us to get inode cluster buffers that
@@ -3639,6 +3739,8 @@ xlog_recover_ra_pass2(
3639 case XFS_LI_EFI: 3739 case XFS_LI_EFI:
3640 case XFS_LI_EFD: 3740 case XFS_LI_EFD:
3641 case XFS_LI_QUOTAOFF: 3741 case XFS_LI_QUOTAOFF:
3742 case XFS_LI_RUI:
3743 case XFS_LI_RUD:
3642 default: 3744 default:
3643 break; 3745 break;
3644 } 3746 }
@@ -3662,6 +3764,8 @@ xlog_recover_commit_pass1(
3662 case XFS_LI_EFD: 3764 case XFS_LI_EFD:
3663 case XFS_LI_DQUOT: 3765 case XFS_LI_DQUOT:
3664 case XFS_LI_ICREATE: 3766 case XFS_LI_ICREATE:
3767 case XFS_LI_RUI:
3768 case XFS_LI_RUD:
3665 /* nothing to do in pass 1 */ 3769 /* nothing to do in pass 1 */
3666 return 0; 3770 return 0;
3667 default: 3771 default:
@@ -3692,6 +3796,10 @@ xlog_recover_commit_pass2(
3692 return xlog_recover_efi_pass2(log, item, trans->r_lsn); 3796 return xlog_recover_efi_pass2(log, item, trans->r_lsn);
3693 case XFS_LI_EFD: 3797 case XFS_LI_EFD:
3694 return xlog_recover_efd_pass2(log, item); 3798 return xlog_recover_efd_pass2(log, item);
3799 case XFS_LI_RUI:
3800 return xlog_recover_rui_pass2(log, item, trans->r_lsn);
3801 case XFS_LI_RUD:
3802 return xlog_recover_rud_pass2(log, item);
3695 case XFS_LI_DQUOT: 3803 case XFS_LI_DQUOT:
3696 return xlog_recover_dquot_pass2(log, buffer_list, item, 3804 return xlog_recover_dquot_pass2(log, buffer_list, item,
3697 trans->r_lsn); 3805 trans->r_lsn);
@@ -4164,126 +4272,156 @@ xlog_recover_process_data(
4164 return 0; 4272 return 0;
4165} 4273}
4166 4274
4167/* 4275/* Recover the EFI if necessary. */
4168 * Process an extent free intent item that was recovered from
4169 * the log. We need to free the extents that it describes.
4170 */
4171STATIC int 4276STATIC int
4172xlog_recover_process_efi( 4277xlog_recover_process_efi(
4173 xfs_mount_t *mp, 4278 struct xfs_mount *mp,
4174 xfs_efi_log_item_t *efip) 4279 struct xfs_ail *ailp,
4280 struct xfs_log_item *lip)
4175{ 4281{
4176 xfs_efd_log_item_t *efdp; 4282 struct xfs_efi_log_item *efip;
4177 xfs_trans_t *tp; 4283 int error;
4178 int i;
4179 int error = 0;
4180 xfs_extent_t *extp;
4181 xfs_fsblock_t startblock_fsb;
4182
4183 ASSERT(!test_bit(XFS_EFI_RECOVERED, &efip->efi_flags));
4184 4284
4185 /* 4285 /*
4186 * First check the validity of the extents described by the 4286 * Skip EFIs that we've already processed.
4187 * EFI. If any are bad, then assume that all are bad and
4188 * just toss the EFI.
4189 */ 4287 */
4190 for (i = 0; i < efip->efi_format.efi_nextents; i++) { 4288 efip = container_of(lip, struct xfs_efi_log_item, efi_item);
4191 extp = &(efip->efi_format.efi_extents[i]); 4289 if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags))
4192 startblock_fsb = XFS_BB_TO_FSB(mp, 4290 return 0;
4193 XFS_FSB_TO_DADDR(mp, extp->ext_start));
4194 if ((startblock_fsb == 0) ||
4195 (extp->ext_len == 0) ||
4196 (startblock_fsb >= mp->m_sb.sb_dblocks) ||
4197 (extp->ext_len >= mp->m_sb.sb_agblocks)) {
4198 /*
4199 * This will pull the EFI from the AIL and
4200 * free the memory associated with it.
4201 */
4202 set_bit(XFS_EFI_RECOVERED, &efip->efi_flags);
4203 xfs_efi_release(efip);
4204 return -EIO;
4205 }
4206 }
4207 4291
4208 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); 4292 spin_unlock(&ailp->xa_lock);
4209 if (error) 4293 error = xfs_efi_recover(mp, efip);
4210 return error; 4294 spin_lock(&ailp->xa_lock);
4211 efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents);
4212 4295
4213 for (i = 0; i < efip->efi_format.efi_nextents; i++) { 4296 return error;
4214 extp = &(efip->efi_format.efi_extents[i]); 4297}
4215 error = xfs_trans_free_extent(tp, efdp, extp->ext_start,
4216 extp->ext_len);
4217 if (error)
4218 goto abort_error;
4219 4298
4220 } 4299/* Release the EFI since we're cancelling everything. */
4300STATIC void
4301xlog_recover_cancel_efi(
4302 struct xfs_mount *mp,
4303 struct xfs_ail *ailp,
4304 struct xfs_log_item *lip)
4305{
4306 struct xfs_efi_log_item *efip;
4221 4307
4222 set_bit(XFS_EFI_RECOVERED, &efip->efi_flags); 4308 efip = container_of(lip, struct xfs_efi_log_item, efi_item);
4223 error = xfs_trans_commit(tp); 4309
4224 return error; 4310 spin_unlock(&ailp->xa_lock);
4311 xfs_efi_release(efip);
4312 spin_lock(&ailp->xa_lock);
4313}
4314
4315/* Recover the RUI if necessary. */
4316STATIC int
4317xlog_recover_process_rui(
4318 struct xfs_mount *mp,
4319 struct xfs_ail *ailp,
4320 struct xfs_log_item *lip)
4321{
4322 struct xfs_rui_log_item *ruip;
4323 int error;
4324
4325 /*
4326 * Skip RUIs that we've already processed.
4327 */
4328 ruip = container_of(lip, struct xfs_rui_log_item, rui_item);
4329 if (test_bit(XFS_RUI_RECOVERED, &ruip->rui_flags))
4330 return 0;
4331
4332 spin_unlock(&ailp->xa_lock);
4333 error = xfs_rui_recover(mp, ruip);
4334 spin_lock(&ailp->xa_lock);
4225 4335
4226abort_error:
4227 xfs_trans_cancel(tp);
4228 return error; 4336 return error;
4229} 4337}
4230 4338
4339/* Release the RUI since we're cancelling everything. */
4340STATIC void
4341xlog_recover_cancel_rui(
4342 struct xfs_mount *mp,
4343 struct xfs_ail *ailp,
4344 struct xfs_log_item *lip)
4345{
4346 struct xfs_rui_log_item *ruip;
4347
4348 ruip = container_of(lip, struct xfs_rui_log_item, rui_item);
4349
4350 spin_unlock(&ailp->xa_lock);
4351 xfs_rui_release(ruip);
4352 spin_lock(&ailp->xa_lock);
4353}
4354
4355/* Is this log item a deferred action intent? */
4356static inline bool xlog_item_is_intent(struct xfs_log_item *lip)
4357{
4358 switch (lip->li_type) {
4359 case XFS_LI_EFI:
4360 case XFS_LI_RUI:
4361 return true;
4362 default:
4363 return false;
4364 }
4365}
4366
4231/* 4367/*
4232 * When this is called, all of the EFIs which did not have 4368 * When this is called, all of the log intent items which did not have
4233 * corresponding EFDs should be in the AIL. What we do now 4369 * corresponding log done items should be in the AIL. What we do now
4234 * is free the extents associated with each one. 4370 * is update the data structures associated with each one.
4235 * 4371 *
4236 * Since we process the EFIs in normal transactions, they 4372 * Since we process the log intent items in normal transactions, they
4237 * will be removed at some point after the commit. This prevents 4373 * will be removed at some point after the commit. This prevents us
4238 * us from just walking down the list processing each one. 4374 * from just walking down the list processing each one. We'll use a
4239 * We'll use a flag in the EFI to skip those that we've already 4375 * flag in the intent item to skip those that we've already processed
4240 * processed and use the AIL iteration mechanism's generation 4376 * and use the AIL iteration mechanism's generation count to try to
4241 * count to try to speed this up at least a bit. 4377 * speed this up at least a bit.
4242 * 4378 *
4243 * When we start, we know that the EFIs are the only things in 4379 * When we start, we know that the intents are the only things in the
4244 * the AIL. As we process them, however, other items are added 4380 * AIL. As we process them, however, other items are added to the
4245 * to the AIL. Since everything added to the AIL must come after 4381 * AIL.
4246 * everything already in the AIL, we stop processing as soon as
4247 * we see something other than an EFI in the AIL.
4248 */ 4382 */
4249STATIC int 4383STATIC int
4250xlog_recover_process_efis( 4384xlog_recover_process_intents(
4251 struct xlog *log) 4385 struct xlog *log)
4252{ 4386{
4253 struct xfs_log_item *lip; 4387 struct xfs_log_item *lip;
4254 struct xfs_efi_log_item *efip;
4255 int error = 0; 4388 int error = 0;
4256 struct xfs_ail_cursor cur; 4389 struct xfs_ail_cursor cur;
4257 struct xfs_ail *ailp; 4390 struct xfs_ail *ailp;
4391 xfs_lsn_t last_lsn;
4258 4392
4259 ailp = log->l_ailp; 4393 ailp = log->l_ailp;
4260 spin_lock(&ailp->xa_lock); 4394 spin_lock(&ailp->xa_lock);
4261 lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); 4395 lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
4396 last_lsn = xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block);
4262 while (lip != NULL) { 4397 while (lip != NULL) {
4263 /* 4398 /*
4264 * We're done when we see something other than an EFI. 4399 * We're done when we see something other than an intent.
4265 * There should be no EFIs left in the AIL now. 4400 * There should be no intents left in the AIL now.
4266 */ 4401 */
4267 if (lip->li_type != XFS_LI_EFI) { 4402 if (!xlog_item_is_intent(lip)) {
4268#ifdef DEBUG 4403#ifdef DEBUG
4269 for (; lip; lip = xfs_trans_ail_cursor_next(ailp, &cur)) 4404 for (; lip; lip = xfs_trans_ail_cursor_next(ailp, &cur))
4270 ASSERT(lip->li_type != XFS_LI_EFI); 4405 ASSERT(!xlog_item_is_intent(lip));
4271#endif 4406#endif
4272 break; 4407 break;
4273 } 4408 }
4274 4409
4275 /* 4410 /*
4276 * Skip EFIs that we've already processed. 4411 * We should never see a redo item with a LSN higher than
4412 * the last transaction we found in the log at the start
4413 * of recovery.
4277 */ 4414 */
4278 efip = container_of(lip, struct xfs_efi_log_item, efi_item); 4415 ASSERT(XFS_LSN_CMP(last_lsn, lip->li_lsn) >= 0);
4279 if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags)) {
4280 lip = xfs_trans_ail_cursor_next(ailp, &cur);
4281 continue;
4282 }
4283 4416
4284 spin_unlock(&ailp->xa_lock); 4417 switch (lip->li_type) {
4285 error = xlog_recover_process_efi(log->l_mp, efip); 4418 case XFS_LI_EFI:
4286 spin_lock(&ailp->xa_lock); 4419 error = xlog_recover_process_efi(log->l_mp, ailp, lip);
4420 break;
4421 case XFS_LI_RUI:
4422 error = xlog_recover_process_rui(log->l_mp, ailp, lip);
4423 break;
4424 }
4287 if (error) 4425 if (error)
4288 goto out; 4426 goto out;
4289 lip = xfs_trans_ail_cursor_next(ailp, &cur); 4427 lip = xfs_trans_ail_cursor_next(ailp, &cur);
@@ -4295,15 +4433,14 @@ out:
4295} 4433}
4296 4434
4297/* 4435/*
4298 * A cancel occurs when the mount has failed and we're bailing out. Release all 4436 * A cancel occurs when the mount has failed and we're bailing out.
4299 * pending EFIs so they don't pin the AIL. 4437 * Release all pending log intent items so they don't pin the AIL.
4300 */ 4438 */
4301STATIC int 4439STATIC int
4302xlog_recover_cancel_efis( 4440xlog_recover_cancel_intents(
4303 struct xlog *log) 4441 struct xlog *log)
4304{ 4442{
4305 struct xfs_log_item *lip; 4443 struct xfs_log_item *lip;
4306 struct xfs_efi_log_item *efip;
4307 int error = 0; 4444 int error = 0;
4308 struct xfs_ail_cursor cur; 4445 struct xfs_ail_cursor cur;
4309 struct xfs_ail *ailp; 4446 struct xfs_ail *ailp;
@@ -4313,22 +4450,25 @@ xlog_recover_cancel_efis(
4313 lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); 4450 lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
4314 while (lip != NULL) { 4451 while (lip != NULL) {
4315 /* 4452 /*
4316 * We're done when we see something other than an EFI. 4453 * We're done when we see something other than an intent.
4317 * There should be no EFIs left in the AIL now. 4454 * There should be no intents left in the AIL now.
4318 */ 4455 */
4319 if (lip->li_type != XFS_LI_EFI) { 4456 if (!xlog_item_is_intent(lip)) {
4320#ifdef DEBUG 4457#ifdef DEBUG
4321 for (; lip; lip = xfs_trans_ail_cursor_next(ailp, &cur)) 4458 for (; lip; lip = xfs_trans_ail_cursor_next(ailp, &cur))
4322 ASSERT(lip->li_type != XFS_LI_EFI); 4459 ASSERT(!xlog_item_is_intent(lip));
4323#endif 4460#endif
4324 break; 4461 break;
4325 } 4462 }
4326 4463
4327 efip = container_of(lip, struct xfs_efi_log_item, efi_item); 4464 switch (lip->li_type) {
4328 4465 case XFS_LI_EFI:
4329 spin_unlock(&ailp->xa_lock); 4466 xlog_recover_cancel_efi(log->l_mp, ailp, lip);
4330 xfs_efi_release(efip); 4467 break;
4331 spin_lock(&ailp->xa_lock); 4468 case XFS_LI_RUI:
4469 xlog_recover_cancel_rui(log->l_mp, ailp, lip);
4470 break;
4471 }
4332 4472
4333 lip = xfs_trans_ail_cursor_next(ailp, &cur); 4473 lip = xfs_trans_ail_cursor_next(ailp, &cur);
4334 } 4474 }
@@ -5023,6 +5163,7 @@ xlog_do_recover(
5023 xfs_warn(mp, "Failed post-recovery per-ag init: %d", error); 5163 xfs_warn(mp, "Failed post-recovery per-ag init: %d", error);
5024 return error; 5164 return error;
5025 } 5165 }
5166 mp->m_alloc_set_aside = xfs_alloc_set_aside(mp);
5026 5167
5027 xlog_recover_check_summary(log); 5168 xlog_recover_check_summary(log);
5028 5169
@@ -5139,16 +5280,17 @@ xlog_recover_finish(
5139 */ 5280 */
5140 if (log->l_flags & XLOG_RECOVERY_NEEDED) { 5281 if (log->l_flags & XLOG_RECOVERY_NEEDED) {
5141 int error; 5282 int error;
5142 error = xlog_recover_process_efis(log); 5283 error = xlog_recover_process_intents(log);
5143 if (error) { 5284 if (error) {
5144 xfs_alert(log->l_mp, "Failed to recover EFIs"); 5285 xfs_alert(log->l_mp, "Failed to recover intents");
5145 return error; 5286 return error;
5146 } 5287 }
5288
5147 /* 5289 /*
5148 * Sync the log to get all the EFIs out of the AIL. 5290 * Sync the log to get all the intents out of the AIL.
5149 * This isn't absolutely necessary, but it helps in 5291 * This isn't absolutely necessary, but it helps in
5150 * case the unlink transactions would have problems 5292 * case the unlink transactions would have problems
5151 * pushing the EFIs out of the way. 5293 * pushing the intents out of the way.
5152 */ 5294 */
5153 xfs_log_force(log->l_mp, XFS_LOG_SYNC); 5295 xfs_log_force(log->l_mp, XFS_LOG_SYNC);
5154 5296
@@ -5173,7 +5315,7 @@ xlog_recover_cancel(
5173 int error = 0; 5315 int error = 0;
5174 5316
5175 if (log->l_flags & XLOG_RECOVERY_NEEDED) 5317 if (log->l_flags & XLOG_RECOVERY_NEEDED)
5176 error = xlog_recover_cancel_efis(log); 5318 error = xlog_recover_cancel_intents(log);
5177 5319
5178 return error; 5320 return error;
5179} 5321}
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 970c19ba2f56..faeead671f9f 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -24,6 +24,7 @@
24#include "xfs_bit.h" 24#include "xfs_bit.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_mount.h" 26#include "xfs_mount.h"
27#include "xfs_defer.h"
27#include "xfs_da_format.h" 28#include "xfs_da_format.h"
28#include "xfs_da_btree.h" 29#include "xfs_da_btree.h"
29#include "xfs_inode.h" 30#include "xfs_inode.h"
@@ -41,6 +42,7 @@
41#include "xfs_trace.h" 42#include "xfs_trace.h"
42#include "xfs_icache.h" 43#include "xfs_icache.h"
43#include "xfs_sysfs.h" 44#include "xfs_sysfs.h"
45#include "xfs_rmap_btree.h"
44 46
45 47
46static DEFINE_MUTEX(xfs_uuid_table_mutex); 48static DEFINE_MUTEX(xfs_uuid_table_mutex);
@@ -230,6 +232,8 @@ xfs_initialize_perag(
230 232
231 if (maxagi) 233 if (maxagi)
232 *maxagi = index; 234 *maxagi = index;
235
236 mp->m_ag_prealloc_blocks = xfs_prealloc_blocks(mp);
233 return 0; 237 return 0;
234 238
235out_unwind: 239out_unwind:
@@ -679,6 +683,7 @@ xfs_mountfs(
679 xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK); 683 xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK);
680 xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK); 684 xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK);
681 xfs_ialloc_compute_maxlevels(mp); 685 xfs_ialloc_compute_maxlevels(mp);
686 xfs_rmapbt_compute_maxlevels(mp);
682 687
683 xfs_set_maxicount(mp); 688 xfs_set_maxicount(mp);
684 689
@@ -1216,7 +1221,7 @@ xfs_mod_fdblocks(
1216 batch = XFS_FDBLOCKS_BATCH; 1221 batch = XFS_FDBLOCKS_BATCH;
1217 1222
1218 __percpu_counter_add(&mp->m_fdblocks, delta, batch); 1223 __percpu_counter_add(&mp->m_fdblocks, delta, batch);
1219 if (__percpu_counter_compare(&mp->m_fdblocks, XFS_ALLOC_SET_ASIDE(mp), 1224 if (__percpu_counter_compare(&mp->m_fdblocks, mp->m_alloc_set_aside,
1220 XFS_FDBLOCKS_BATCH) >= 0) { 1225 XFS_FDBLOCKS_BATCH) >= 0) {
1221 /* we had space! */ 1226 /* we had space! */
1222 return 0; 1227 return 0;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index c1b798c72126..b36676cde103 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -116,9 +116,15 @@ typedef struct xfs_mount {
116 uint m_bmap_dmnr[2]; /* min bmap btree records */ 116 uint m_bmap_dmnr[2]; /* min bmap btree records */
117 uint m_inobt_mxr[2]; /* max inobt btree records */ 117 uint m_inobt_mxr[2]; /* max inobt btree records */
118 uint m_inobt_mnr[2]; /* min inobt btree records */ 118 uint m_inobt_mnr[2]; /* min inobt btree records */
119 uint m_rmap_mxr[2]; /* max rmap btree records */
120 uint m_rmap_mnr[2]; /* min rmap btree records */
119 uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */ 121 uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */
120 uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */ 122 uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */
121 uint m_in_maxlevels; /* max inobt btree levels. */ 123 uint m_in_maxlevels; /* max inobt btree levels. */
124 uint m_rmap_maxlevels; /* max rmap btree levels */
125 xfs_extlen_t m_ag_prealloc_blocks; /* reserved ag blocks */
126 uint m_alloc_set_aside; /* space we can't use */
127 uint m_ag_max_usable; /* max space per AG */
122 struct radix_tree_root m_perag_tree; /* per-ag accounting info */ 128 struct radix_tree_root m_perag_tree; /* per-ag accounting info */
123 spinlock_t m_perag_lock; /* lock for m_perag_tree */ 129 spinlock_t m_perag_lock; /* lock for m_perag_tree */
124 struct mutex m_growlock; /* growfs mutex */ 130 struct mutex m_growlock; /* growfs mutex */
diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h
index 0cc8d8f74356..69e2986a3776 100644
--- a/fs/xfs/xfs_ondisk.h
+++ b/fs/xfs/xfs_ondisk.h
@@ -49,11 +49,14 @@ xfs_check_ondisk_structs(void)
49 XFS_CHECK_STRUCT_SIZE(struct xfs_dsymlink_hdr, 56); 49 XFS_CHECK_STRUCT_SIZE(struct xfs_dsymlink_hdr, 56);
50 XFS_CHECK_STRUCT_SIZE(struct xfs_inobt_key, 4); 50 XFS_CHECK_STRUCT_SIZE(struct xfs_inobt_key, 4);
51 XFS_CHECK_STRUCT_SIZE(struct xfs_inobt_rec, 16); 51 XFS_CHECK_STRUCT_SIZE(struct xfs_inobt_rec, 16);
52 XFS_CHECK_STRUCT_SIZE(struct xfs_rmap_key, 20);
53 XFS_CHECK_STRUCT_SIZE(struct xfs_rmap_rec, 24);
52 XFS_CHECK_STRUCT_SIZE(struct xfs_timestamp, 8); 54 XFS_CHECK_STRUCT_SIZE(struct xfs_timestamp, 8);
53 XFS_CHECK_STRUCT_SIZE(xfs_alloc_key_t, 8); 55 XFS_CHECK_STRUCT_SIZE(xfs_alloc_key_t, 8);
54 XFS_CHECK_STRUCT_SIZE(xfs_alloc_ptr_t, 4); 56 XFS_CHECK_STRUCT_SIZE(xfs_alloc_ptr_t, 4);
55 XFS_CHECK_STRUCT_SIZE(xfs_alloc_rec_t, 8); 57 XFS_CHECK_STRUCT_SIZE(xfs_alloc_rec_t, 8);
56 XFS_CHECK_STRUCT_SIZE(xfs_inobt_ptr_t, 4); 58 XFS_CHECK_STRUCT_SIZE(xfs_inobt_ptr_t, 4);
59 XFS_CHECK_STRUCT_SIZE(xfs_rmap_ptr_t, 4);
57 60
58 /* dir/attr trees */ 61 /* dir/attr trees */
59 XFS_CHECK_STRUCT_SIZE(struct xfs_attr3_leaf_hdr, 80); 62 XFS_CHECK_STRUCT_SIZE(struct xfs_attr3_leaf_hdr, 80);
diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
new file mode 100644
index 000000000000..2500f28689d5
--- /dev/null
+++ b/fs/xfs/xfs_rmap_item.c
@@ -0,0 +1,536 @@
1/*
2 * Copyright (C) 2016 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#include "xfs.h"
21#include "xfs_fs.h"
22#include "xfs_format.h"
23#include "xfs_log_format.h"
24#include "xfs_trans_resv.h"
25#include "xfs_bit.h"
26#include "xfs_mount.h"
27#include "xfs_defer.h"
28#include "xfs_trans.h"
29#include "xfs_trans_priv.h"
30#include "xfs_buf_item.h"
31#include "xfs_rmap_item.h"
32#include "xfs_log.h"
33#include "xfs_rmap.h"
34
35
36kmem_zone_t *xfs_rui_zone;
37kmem_zone_t *xfs_rud_zone;
38
39static inline struct xfs_rui_log_item *RUI_ITEM(struct xfs_log_item *lip)
40{
41 return container_of(lip, struct xfs_rui_log_item, rui_item);
42}
43
44void
45xfs_rui_item_free(
46 struct xfs_rui_log_item *ruip)
47{
48 if (ruip->rui_format.rui_nextents > XFS_RUI_MAX_FAST_EXTENTS)
49 kmem_free(ruip);
50 else
51 kmem_zone_free(xfs_rui_zone, ruip);
52}
53
54/*
55 * This returns the number of iovecs needed to log the given rui item.
56 * We only need 1 iovec for an rui item. It just logs the rui_log_format
57 * structure.
58 */
59static inline int
60xfs_rui_item_sizeof(
61 struct xfs_rui_log_item *ruip)
62{
63 return sizeof(struct xfs_rui_log_format) +
64 (ruip->rui_format.rui_nextents - 1) *
65 sizeof(struct xfs_map_extent);
66}
67
68STATIC void
69xfs_rui_item_size(
70 struct xfs_log_item *lip,
71 int *nvecs,
72 int *nbytes)
73{
74 *nvecs += 1;
75 *nbytes += xfs_rui_item_sizeof(RUI_ITEM(lip));
76}
77
78/*
79 * This is called to fill in the vector of log iovecs for the
80 * given rui log item. We use only 1 iovec, and we point that
81 * at the rui_log_format structure embedded in the rui item.
82 * It is at this point that we assert that all of the extent
83 * slots in the rui item have been filled.
84 */
85STATIC void
86xfs_rui_item_format(
87 struct xfs_log_item *lip,
88 struct xfs_log_vec *lv)
89{
90 struct xfs_rui_log_item *ruip = RUI_ITEM(lip);
91 struct xfs_log_iovec *vecp = NULL;
92
93 ASSERT(atomic_read(&ruip->rui_next_extent) ==
94 ruip->rui_format.rui_nextents);
95
96 ruip->rui_format.rui_type = XFS_LI_RUI;
97 ruip->rui_format.rui_size = 1;
98
99 xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_RUI_FORMAT, &ruip->rui_format,
100 xfs_rui_item_sizeof(ruip));
101}
102
103/*
104 * Pinning has no meaning for an rui item, so just return.
105 */
106STATIC void
107xfs_rui_item_pin(
108 struct xfs_log_item *lip)
109{
110}
111
112/*
113 * The unpin operation is the last place an RUI is manipulated in the log. It is
114 * either inserted in the AIL or aborted in the event of a log I/O error. In
115 * either case, the RUI transaction has been successfully committed to make it
116 * this far. Therefore, we expect whoever committed the RUI to either construct
117 * and commit the RUD or drop the RUD's reference in the event of error. Simply
118 * drop the log's RUI reference now that the log is done with it.
119 */
120STATIC void
121xfs_rui_item_unpin(
122 struct xfs_log_item *lip,
123 int remove)
124{
125 struct xfs_rui_log_item *ruip = RUI_ITEM(lip);
126
127 xfs_rui_release(ruip);
128}
129
130/*
131 * RUI items have no locking or pushing. However, since RUIs are pulled from
132 * the AIL when their corresponding RUDs are committed to disk, their situation
133 * is very similar to being pinned. Return XFS_ITEM_PINNED so that the caller
134 * will eventually flush the log. This should help in getting the RUI out of
135 * the AIL.
136 */
137STATIC uint
138xfs_rui_item_push(
139 struct xfs_log_item *lip,
140 struct list_head *buffer_list)
141{
142 return XFS_ITEM_PINNED;
143}
144
145/*
146 * The RUI has been either committed or aborted if the transaction has been
147 * cancelled. If the transaction was cancelled, an RUD isn't going to be
148 * constructed and thus we free the RUI here directly.
149 */
150STATIC void
151xfs_rui_item_unlock(
152 struct xfs_log_item *lip)
153{
154 if (lip->li_flags & XFS_LI_ABORTED)
155 xfs_rui_item_free(RUI_ITEM(lip));
156}
157
158/*
159 * The RUI is logged only once and cannot be moved in the log, so simply return
160 * the lsn at which it's been logged.
161 */
162STATIC xfs_lsn_t
163xfs_rui_item_committed(
164 struct xfs_log_item *lip,
165 xfs_lsn_t lsn)
166{
167 return lsn;
168}
169
170/*
171 * The RUI dependency tracking op doesn't do squat. It can't because
172 * it doesn't know where the free extent is coming from. The dependency
173 * tracking has to be handled by the "enclosing" metadata object. For
174 * example, for inodes, the inode is locked throughout the extent freeing
175 * so the dependency should be recorded there.
176 */
177STATIC void
178xfs_rui_item_committing(
179 struct xfs_log_item *lip,
180 xfs_lsn_t lsn)
181{
182}
183
184/*
185 * This is the ops vector shared by all rui log items.
186 */
187static const struct xfs_item_ops xfs_rui_item_ops = {
188 .iop_size = xfs_rui_item_size,
189 .iop_format = xfs_rui_item_format,
190 .iop_pin = xfs_rui_item_pin,
191 .iop_unpin = xfs_rui_item_unpin,
192 .iop_unlock = xfs_rui_item_unlock,
193 .iop_committed = xfs_rui_item_committed,
194 .iop_push = xfs_rui_item_push,
195 .iop_committing = xfs_rui_item_committing,
196};
197
198/*
199 * Allocate and initialize an rui item with the given number of extents.
200 */
201struct xfs_rui_log_item *
202xfs_rui_init(
203 struct xfs_mount *mp,
204 uint nextents)
205
206{
207 struct xfs_rui_log_item *ruip;
208 uint size;
209
210 ASSERT(nextents > 0);
211 if (nextents > XFS_RUI_MAX_FAST_EXTENTS) {
212 size = (uint)(sizeof(struct xfs_rui_log_item) +
213 ((nextents - 1) * sizeof(struct xfs_map_extent)));
214 ruip = kmem_zalloc(size, KM_SLEEP);
215 } else {
216 ruip = kmem_zone_zalloc(xfs_rui_zone, KM_SLEEP);
217 }
218
219 xfs_log_item_init(mp, &ruip->rui_item, XFS_LI_RUI, &xfs_rui_item_ops);
220 ruip->rui_format.rui_nextents = nextents;
221 ruip->rui_format.rui_id = (uintptr_t)(void *)ruip;
222 atomic_set(&ruip->rui_next_extent, 0);
223 atomic_set(&ruip->rui_refcount, 2);
224
225 return ruip;
226}
227
228/*
229 * Copy an RUI format buffer from the given buf, and into the destination
230 * RUI format structure. The RUI/RUD items were designed not to need any
231 * special alignment handling.
232 */
233int
234xfs_rui_copy_format(
235 struct xfs_log_iovec *buf,
236 struct xfs_rui_log_format *dst_rui_fmt)
237{
238 struct xfs_rui_log_format *src_rui_fmt;
239 uint len;
240
241 src_rui_fmt = buf->i_addr;
242 len = sizeof(struct xfs_rui_log_format) +
243 (src_rui_fmt->rui_nextents - 1) *
244 sizeof(struct xfs_map_extent);
245
246 if (buf->i_len != len)
247 return -EFSCORRUPTED;
248
249 memcpy((char *)dst_rui_fmt, (char *)src_rui_fmt, len);
250 return 0;
251}
252
253/*
254 * Freeing the RUI requires that we remove it from the AIL if it has already
255 * been placed there. However, the RUI may not yet have been placed in the AIL
256 * when called by xfs_rui_release() from RUD processing due to the ordering of
257 * committed vs unpin operations in bulk insert operations. Hence the reference
258 * count to ensure only the last caller frees the RUI.
259 */
260void
261xfs_rui_release(
262 struct xfs_rui_log_item *ruip)
263{
264 if (atomic_dec_and_test(&ruip->rui_refcount)) {
265 xfs_trans_ail_remove(&ruip->rui_item, SHUTDOWN_LOG_IO_ERROR);
266 xfs_rui_item_free(ruip);
267 }
268}
269
270static inline struct xfs_rud_log_item *RUD_ITEM(struct xfs_log_item *lip)
271{
272 return container_of(lip, struct xfs_rud_log_item, rud_item);
273}
274
275STATIC void
276xfs_rud_item_size(
277 struct xfs_log_item *lip,
278 int *nvecs,
279 int *nbytes)
280{
281 *nvecs += 1;
282 *nbytes += sizeof(struct xfs_rud_log_format);
283}
284
285/*
286 * This is called to fill in the vector of log iovecs for the
287 * given rud log item. We use only 1 iovec, and we point that
288 * at the rud_log_format structure embedded in the rud item.
289 * It is at this point that we assert that all of the extent
290 * slots in the rud item have been filled.
291 */
292STATIC void
293xfs_rud_item_format(
294 struct xfs_log_item *lip,
295 struct xfs_log_vec *lv)
296{
297 struct xfs_rud_log_item *rudp = RUD_ITEM(lip);
298 struct xfs_log_iovec *vecp = NULL;
299
300 rudp->rud_format.rud_type = XFS_LI_RUD;
301 rudp->rud_format.rud_size = 1;
302
303 xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_RUD_FORMAT, &rudp->rud_format,
304 sizeof(struct xfs_rud_log_format));
305}
306
307/*
308 * Pinning has no meaning for an rud item, so just return.
309 */
310STATIC void
311xfs_rud_item_pin(
312 struct xfs_log_item *lip)
313{
314}
315
316/*
317 * Since pinning has no meaning for an rud item, unpinning does
318 * not either.
319 */
320STATIC void
321xfs_rud_item_unpin(
322 struct xfs_log_item *lip,
323 int remove)
324{
325}
326
327/*
328 * There isn't much you can do to push on an rud item. It is simply stuck
329 * waiting for the log to be flushed to disk.
330 */
331STATIC uint
332xfs_rud_item_push(
333 struct xfs_log_item *lip,
334 struct list_head *buffer_list)
335{
336 return XFS_ITEM_PINNED;
337}
338
339/*
340 * The RUD is either committed or aborted if the transaction is cancelled. If
341 * the transaction is cancelled, drop our reference to the RUI and free the
342 * RUD.
343 */
344STATIC void
345xfs_rud_item_unlock(
346 struct xfs_log_item *lip)
347{
348 struct xfs_rud_log_item *rudp = RUD_ITEM(lip);
349
350 if (lip->li_flags & XFS_LI_ABORTED) {
351 xfs_rui_release(rudp->rud_ruip);
352 kmem_zone_free(xfs_rud_zone, rudp);
353 }
354}
355
356/*
357 * When the rud item is committed to disk, all we need to do is delete our
358 * reference to our partner rui item and then free ourselves. Since we're
359 * freeing ourselves we must return -1 to keep the transaction code from
360 * further referencing this item.
361 */
362STATIC xfs_lsn_t
363xfs_rud_item_committed(
364 struct xfs_log_item *lip,
365 xfs_lsn_t lsn)
366{
367 struct xfs_rud_log_item *rudp = RUD_ITEM(lip);
368
369 /*
370 * Drop the RUI reference regardless of whether the RUD has been
371 * aborted. Once the RUD transaction is constructed, it is the sole
372 * responsibility of the RUD to release the RUI (even if the RUI is
373 * aborted due to log I/O error).
374 */
375 xfs_rui_release(rudp->rud_ruip);
376 kmem_zone_free(xfs_rud_zone, rudp);
377
378 return (xfs_lsn_t)-1;
379}
380
381/*
382 * The RUD dependency tracking op doesn't do squat. It can't because
383 * it doesn't know where the free extent is coming from. The dependency
384 * tracking has to be handled by the "enclosing" metadata object. For
385 * example, for inodes, the inode is locked throughout the extent freeing
386 * so the dependency should be recorded there.
387 */
388STATIC void
389xfs_rud_item_committing(
390 struct xfs_log_item *lip,
391 xfs_lsn_t lsn)
392{
393}
394
395/*
396 * This is the ops vector shared by all rud log items.
397 */
398static const struct xfs_item_ops xfs_rud_item_ops = {
399 .iop_size = xfs_rud_item_size,
400 .iop_format = xfs_rud_item_format,
401 .iop_pin = xfs_rud_item_pin,
402 .iop_unpin = xfs_rud_item_unpin,
403 .iop_unlock = xfs_rud_item_unlock,
404 .iop_committed = xfs_rud_item_committed,
405 .iop_push = xfs_rud_item_push,
406 .iop_committing = xfs_rud_item_committing,
407};
408
409/*
410 * Allocate and initialize an rud item with the given number of extents.
411 */
412struct xfs_rud_log_item *
413xfs_rud_init(
414 struct xfs_mount *mp,
415 struct xfs_rui_log_item *ruip)
416
417{
418 struct xfs_rud_log_item *rudp;
419
420 rudp = kmem_zone_zalloc(xfs_rud_zone, KM_SLEEP);
421 xfs_log_item_init(mp, &rudp->rud_item, XFS_LI_RUD, &xfs_rud_item_ops);
422 rudp->rud_ruip = ruip;
423 rudp->rud_format.rud_rui_id = ruip->rui_format.rui_id;
424
425 return rudp;
426}
427
428/*
429 * Process an rmap update intent item that was recovered from the log.
430 * We need to update the rmapbt.
431 */
432int
433xfs_rui_recover(
434 struct xfs_mount *mp,
435 struct xfs_rui_log_item *ruip)
436{
437 int i;
438 int error = 0;
439 struct xfs_map_extent *rmap;
440 xfs_fsblock_t startblock_fsb;
441 bool op_ok;
442 struct xfs_rud_log_item *rudp;
443 enum xfs_rmap_intent_type type;
444 int whichfork;
445 xfs_exntst_t state;
446 struct xfs_trans *tp;
447 struct xfs_btree_cur *rcur = NULL;
448
449 ASSERT(!test_bit(XFS_RUI_RECOVERED, &ruip->rui_flags));
450
451 /*
452 * First check the validity of the extents described by the
453 * RUI. If any are bad, then assume that all are bad and
454 * just toss the RUI.
455 */
456 for (i = 0; i < ruip->rui_format.rui_nextents; i++) {
457 rmap = &ruip->rui_format.rui_extents[i];
458 startblock_fsb = XFS_BB_TO_FSB(mp,
459 XFS_FSB_TO_DADDR(mp, rmap->me_startblock));
460 switch (rmap->me_flags & XFS_RMAP_EXTENT_TYPE_MASK) {
461 case XFS_RMAP_EXTENT_MAP:
462 case XFS_RMAP_EXTENT_UNMAP:
463 case XFS_RMAP_EXTENT_CONVERT:
464 case XFS_RMAP_EXTENT_ALLOC:
465 case XFS_RMAP_EXTENT_FREE:
466 op_ok = true;
467 break;
468 default:
469 op_ok = false;
470 break;
471 }
472 if (!op_ok || startblock_fsb == 0 ||
473 rmap->me_len == 0 ||
474 startblock_fsb >= mp->m_sb.sb_dblocks ||
475 rmap->me_len >= mp->m_sb.sb_agblocks ||
476 (rmap->me_flags & ~XFS_RMAP_EXTENT_FLAGS)) {
477 /*
478 * This will pull the RUI from the AIL and
479 * free the memory associated with it.
480 */
481 set_bit(XFS_RUI_RECOVERED, &ruip->rui_flags);
482 xfs_rui_release(ruip);
483 return -EIO;
484 }
485 }
486
487 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
488 if (error)
489 return error;
490 rudp = xfs_trans_get_rud(tp, ruip);
491
492 for (i = 0; i < ruip->rui_format.rui_nextents; i++) {
493 rmap = &ruip->rui_format.rui_extents[i];
494 state = (rmap->me_flags & XFS_RMAP_EXTENT_UNWRITTEN) ?
495 XFS_EXT_UNWRITTEN : XFS_EXT_NORM;
496 whichfork = (rmap->me_flags & XFS_RMAP_EXTENT_ATTR_FORK) ?
497 XFS_ATTR_FORK : XFS_DATA_FORK;
498 switch (rmap->me_flags & XFS_RMAP_EXTENT_TYPE_MASK) {
499 case XFS_RMAP_EXTENT_MAP:
500 type = XFS_RMAP_MAP;
501 break;
502 case XFS_RMAP_EXTENT_UNMAP:
503 type = XFS_RMAP_UNMAP;
504 break;
505 case XFS_RMAP_EXTENT_CONVERT:
506 type = XFS_RMAP_CONVERT;
507 break;
508 case XFS_RMAP_EXTENT_ALLOC:
509 type = XFS_RMAP_ALLOC;
510 break;
511 case XFS_RMAP_EXTENT_FREE:
512 type = XFS_RMAP_FREE;
513 break;
514 default:
515 error = -EFSCORRUPTED;
516 goto abort_error;
517 }
518 error = xfs_trans_log_finish_rmap_update(tp, rudp, type,
519 rmap->me_owner, whichfork,
520 rmap->me_startoff, rmap->me_startblock,
521 rmap->me_len, state, &rcur);
522 if (error)
523 goto abort_error;
524
525 }
526
527 xfs_rmap_finish_one_cleanup(tp, rcur, error);
528 set_bit(XFS_RUI_RECOVERED, &ruip->rui_flags);
529 error = xfs_trans_commit(tp);
530 return error;
531
532abort_error:
533 xfs_rmap_finish_one_cleanup(tp, rcur, error);
534 xfs_trans_cancel(tp);
535 return error;
536}
diff --git a/fs/xfs/xfs_rmap_item.h b/fs/xfs/xfs_rmap_item.h
new file mode 100644
index 000000000000..aefcc3a318a5
--- /dev/null
+++ b/fs/xfs/xfs_rmap_item.h
@@ -0,0 +1,95 @@
1/*
2 * Copyright (C) 2016 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#ifndef __XFS_RMAP_ITEM_H__
21#define __XFS_RMAP_ITEM_H__
22
23/*
24 * There are (currently) three pairs of rmap btree redo item types: map, unmap,
25 * and convert. The common abbreviations for these are RUI (rmap update
26 * intent) and RUD (rmap update done). The redo item type is encoded in the
27 * flags field of each xfs_map_extent.
28 *
29 * *I items should be recorded in the *first* of a series of rolled
30 * transactions, and the *D items should be recorded in the same transaction
31 * that records the associated rmapbt updates. Typically, the first
32 * transaction will record a bmbt update, followed by some number of
33 * transactions containing rmapbt updates, and finally transactions with any
34 * bnobt/cntbt updates.
35 *
36 * Should the system crash after the commit of the first transaction but
37 * before the commit of the final transaction in a series, log recovery will
38 * use the redo information recorded by the intent items to replay the
39 * (rmapbt/bnobt/cntbt) metadata updates in the non-first transaction.
40 */
41
42/* kernel only RUI/RUD definitions */
43
44struct xfs_mount;
45struct kmem_zone;
46
47/*
48 * Max number of extents in fast allocation path.
49 */
50#define XFS_RUI_MAX_FAST_EXTENTS 16
51
52/*
53 * Define RUI flag bits. Manipulated by set/clear/test_bit operators.
54 */
55#define XFS_RUI_RECOVERED 1
56
57/*
58 * This is the "rmap update intent" log item. It is used to log the fact that
59 * some reverse mappings need to change. It is used in conjunction with the
60 * "rmap update done" log item described below.
61 *
62 * These log items follow the same rules as struct xfs_efi_log_item; see the
63 * comments about that structure (in xfs_extfree_item.h) for more details.
64 */
65struct xfs_rui_log_item {
66 struct xfs_log_item rui_item;
67 atomic_t rui_refcount;
68 atomic_t rui_next_extent;
69 unsigned long rui_flags; /* misc flags */
70 struct xfs_rui_log_format rui_format;
71};
72
73/*
74 * This is the "rmap update done" log item. It is used to log the fact that
75 * some rmapbt updates mentioned in an earlier rui item have been performed.
76 */
77struct xfs_rud_log_item {
78 struct xfs_log_item rud_item;
79 struct xfs_rui_log_item *rud_ruip;
80 struct xfs_rud_log_format rud_format;
81};
82
83extern struct kmem_zone *xfs_rui_zone;
84extern struct kmem_zone *xfs_rud_zone;
85
86struct xfs_rui_log_item *xfs_rui_init(struct xfs_mount *, uint);
87struct xfs_rud_log_item *xfs_rud_init(struct xfs_mount *,
88 struct xfs_rui_log_item *);
89int xfs_rui_copy_format(struct xfs_log_iovec *buf,
90 struct xfs_rui_log_format *dst_rui_fmt);
91void xfs_rui_item_free(struct xfs_rui_log_item *);
92void xfs_rui_release(struct xfs_rui_log_item *);
93int xfs_rui_recover(struct xfs_mount *mp, struct xfs_rui_log_item *ruip);
94
95#endif /* __XFS_RMAP_ITEM_H__ */
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 3938b37d1043..802bcc326d9f 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -23,6 +23,7 @@
23#include "xfs_trans_resv.h" 23#include "xfs_trans_resv.h"
24#include "xfs_bit.h" 24#include "xfs_bit.h"
25#include "xfs_mount.h" 25#include "xfs_mount.h"
26#include "xfs_defer.h"
26#include "xfs_inode.h" 27#include "xfs_inode.h"
27#include "xfs_bmap.h" 28#include "xfs_bmap.h"
28#include "xfs_bmap_util.h" 29#include "xfs_bmap_util.h"
@@ -769,7 +770,7 @@ xfs_growfs_rt_alloc(
769 xfs_daddr_t d; /* disk block address */ 770 xfs_daddr_t d; /* disk block address */
770 int error; /* error return value */ 771 int error; /* error return value */
771 xfs_fsblock_t firstblock;/* first block allocated in xaction */ 772 xfs_fsblock_t firstblock;/* first block allocated in xaction */
772 struct xfs_bmap_free flist; /* list of freed blocks */ 773 struct xfs_defer_ops dfops; /* list of freed blocks */
773 xfs_fsblock_t fsbno; /* filesystem block for bno */ 774 xfs_fsblock_t fsbno; /* filesystem block for bno */
774 struct xfs_bmbt_irec map; /* block map output */ 775 struct xfs_bmbt_irec map; /* block map output */
775 int nmap; /* number of block maps */ 776 int nmap; /* number of block maps */
@@ -794,14 +795,14 @@ xfs_growfs_rt_alloc(
794 xfs_ilock(ip, XFS_ILOCK_EXCL); 795 xfs_ilock(ip, XFS_ILOCK_EXCL);
795 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 796 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
796 797
797 xfs_bmap_init(&flist, &firstblock); 798 xfs_defer_init(&dfops, &firstblock);
798 /* 799 /*
799 * Allocate blocks to the bitmap file. 800 * Allocate blocks to the bitmap file.
800 */ 801 */
801 nmap = 1; 802 nmap = 1;
802 error = xfs_bmapi_write(tp, ip, oblocks, nblocks - oblocks, 803 error = xfs_bmapi_write(tp, ip, oblocks, nblocks - oblocks,
803 XFS_BMAPI_METADATA, &firstblock, 804 XFS_BMAPI_METADATA, &firstblock,
804 resblks, &map, &nmap, &flist); 805 resblks, &map, &nmap, &dfops);
805 if (!error && nmap < 1) 806 if (!error && nmap < 1)
806 error = -ENOSPC; 807 error = -ENOSPC;
807 if (error) 808 if (error)
@@ -809,7 +810,7 @@ xfs_growfs_rt_alloc(
809 /* 810 /*
810 * Free any blocks freed up in the transaction, then commit. 811 * Free any blocks freed up in the transaction, then commit.
811 */ 812 */
812 error = xfs_bmap_finish(&tp, &flist, NULL); 813 error = xfs_defer_finish(&tp, &dfops, NULL);
813 if (error) 814 if (error)
814 goto out_bmap_cancel; 815 goto out_bmap_cancel;
815 error = xfs_trans_commit(tp); 816 error = xfs_trans_commit(tp);
@@ -862,7 +863,7 @@ xfs_growfs_rt_alloc(
862 return 0; 863 return 0;
863 864
864out_bmap_cancel: 865out_bmap_cancel:
865 xfs_bmap_cancel(&flist); 866 xfs_defer_cancel(&dfops);
866out_trans_cancel: 867out_trans_cancel:
867 xfs_trans_cancel(tp); 868 xfs_trans_cancel(tp);
868 return error; 869 return error;
diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c
index d266e835ecc3..6e812fe0fd43 100644
--- a/fs/xfs/xfs_stats.c
+++ b/fs/xfs/xfs_stats.c
@@ -61,6 +61,7 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
61 { "bmbt2", XFSSTAT_END_BMBT_V2 }, 61 { "bmbt2", XFSSTAT_END_BMBT_V2 },
62 { "ibt2", XFSSTAT_END_IBT_V2 }, 62 { "ibt2", XFSSTAT_END_IBT_V2 },
63 { "fibt2", XFSSTAT_END_FIBT_V2 }, 63 { "fibt2", XFSSTAT_END_FIBT_V2 },
64 { "rmapbt", XFSSTAT_END_RMAP_V2 },
64 /* we print both series of quota information together */ 65 /* we print both series of quota information together */
65 { "qm", XFSSTAT_END_QM }, 66 { "qm", XFSSTAT_END_QM },
66 }; 67 };
diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h
index 483b0eff1988..657865f51e78 100644
--- a/fs/xfs/xfs_stats.h
+++ b/fs/xfs/xfs_stats.h
@@ -197,7 +197,23 @@ struct xfsstats {
197 __uint32_t xs_fibt_2_alloc; 197 __uint32_t xs_fibt_2_alloc;
198 __uint32_t xs_fibt_2_free; 198 __uint32_t xs_fibt_2_free;
199 __uint32_t xs_fibt_2_moves; 199 __uint32_t xs_fibt_2_moves;
200#define XFSSTAT_END_XQMSTAT (XFSSTAT_END_FIBT_V2+6) 200#define XFSSTAT_END_RMAP_V2 (XFSSTAT_END_FIBT_V2+15)
201 __uint32_t xs_rmap_2_lookup;
202 __uint32_t xs_rmap_2_compare;
203 __uint32_t xs_rmap_2_insrec;
204 __uint32_t xs_rmap_2_delrec;
205 __uint32_t xs_rmap_2_newroot;
206 __uint32_t xs_rmap_2_killroot;
207 __uint32_t xs_rmap_2_increment;
208 __uint32_t xs_rmap_2_decrement;
209 __uint32_t xs_rmap_2_lshift;
210 __uint32_t xs_rmap_2_rshift;
211 __uint32_t xs_rmap_2_split;
212 __uint32_t xs_rmap_2_join;
213 __uint32_t xs_rmap_2_alloc;
214 __uint32_t xs_rmap_2_free;
215 __uint32_t xs_rmap_2_moves;
216#define XFSSTAT_END_XQMSTAT (XFSSTAT_END_RMAP_V2+6)
201 __uint32_t xs_qm_dqreclaims; 217 __uint32_t xs_qm_dqreclaims;
202 __uint32_t xs_qm_dqreclaim_misses; 218 __uint32_t xs_qm_dqreclaim_misses;
203 __uint32_t xs_qm_dquot_dups; 219 __uint32_t xs_qm_dquot_dups;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 0303f1005f88..24ef83ef04de 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -46,6 +46,7 @@
46#include "xfs_quota.h" 46#include "xfs_quota.h"
47#include "xfs_sysfs.h" 47#include "xfs_sysfs.h"
48#include "xfs_ondisk.h" 48#include "xfs_ondisk.h"
49#include "xfs_rmap_item.h"
49 50
50#include <linux/namei.h> 51#include <linux/namei.h>
51#include <linux/init.h> 52#include <linux/init.h>
@@ -1075,7 +1076,7 @@ xfs_fs_statfs(
1075 statp->f_blocks = sbp->sb_dblocks - lsize; 1076 statp->f_blocks = sbp->sb_dblocks - lsize;
1076 spin_unlock(&mp->m_sb_lock); 1077 spin_unlock(&mp->m_sb_lock);
1077 1078
1078 statp->f_bfree = fdblocks - XFS_ALLOC_SET_ASIDE(mp); 1079 statp->f_bfree = fdblocks - mp->m_alloc_set_aside;
1079 statp->f_bavail = statp->f_bfree; 1080 statp->f_bavail = statp->f_bfree;
1080 1081
1081 fakeinos = statp->f_bfree << sbp->sb_inopblog; 1082 fakeinos = statp->f_bfree << sbp->sb_inopblog;
@@ -1573,6 +1574,10 @@ xfs_fs_fill_super(
1573 } 1574 }
1574 } 1575 }
1575 1576
1577 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
1578 xfs_alert(mp,
1579 "EXPERIMENTAL reverse mapping btree feature enabled. Use at your own risk!");
1580
1576 error = xfs_mountfs(mp); 1581 error = xfs_mountfs(mp);
1577 if (error) 1582 if (error)
1578 goto out_filestream_unmount; 1583 goto out_filestream_unmount;
@@ -1697,7 +1702,7 @@ xfs_init_zones(void)
1697 goto out_free_ioend_bioset; 1702 goto out_free_ioend_bioset;
1698 1703
1699 xfs_bmap_free_item_zone = kmem_zone_init( 1704 xfs_bmap_free_item_zone = kmem_zone_init(
1700 sizeof(struct xfs_bmap_free_item), 1705 sizeof(struct xfs_extent_free_item),
1701 "xfs_bmap_free_item"); 1706 "xfs_bmap_free_item");
1702 if (!xfs_bmap_free_item_zone) 1707 if (!xfs_bmap_free_item_zone)
1703 goto out_destroy_log_ticket_zone; 1708 goto out_destroy_log_ticket_zone;
@@ -1765,8 +1770,24 @@ xfs_init_zones(void)
1765 if (!xfs_icreate_zone) 1770 if (!xfs_icreate_zone)
1766 goto out_destroy_ili_zone; 1771 goto out_destroy_ili_zone;
1767 1772
1773 xfs_rud_zone = kmem_zone_init(sizeof(struct xfs_rud_log_item),
1774 "xfs_rud_item");
1775 if (!xfs_rud_zone)
1776 goto out_destroy_icreate_zone;
1777
1778 xfs_rui_zone = kmem_zone_init((sizeof(struct xfs_rui_log_item) +
1779 ((XFS_RUI_MAX_FAST_EXTENTS - 1) *
1780 sizeof(struct xfs_map_extent))),
1781 "xfs_rui_item");
1782 if (!xfs_rui_zone)
1783 goto out_destroy_rud_zone;
1784
1768 return 0; 1785 return 0;
1769 1786
1787 out_destroy_rud_zone:
1788 kmem_zone_destroy(xfs_rud_zone);
1789 out_destroy_icreate_zone:
1790 kmem_zone_destroy(xfs_icreate_zone);
1770 out_destroy_ili_zone: 1791 out_destroy_ili_zone:
1771 kmem_zone_destroy(xfs_ili_zone); 1792 kmem_zone_destroy(xfs_ili_zone);
1772 out_destroy_inode_zone: 1793 out_destroy_inode_zone:
@@ -1805,6 +1826,8 @@ xfs_destroy_zones(void)
1805 * destroy caches. 1826 * destroy caches.
1806 */ 1827 */
1807 rcu_barrier(); 1828 rcu_barrier();
1829 kmem_zone_destroy(xfs_rui_zone);
1830 kmem_zone_destroy(xfs_rud_zone);
1808 kmem_zone_destroy(xfs_icreate_zone); 1831 kmem_zone_destroy(xfs_icreate_zone);
1809 kmem_zone_destroy(xfs_ili_zone); 1832 kmem_zone_destroy(xfs_ili_zone);
1810 kmem_zone_destroy(xfs_inode_zone); 1833 kmem_zone_destroy(xfs_inode_zone);
@@ -1854,6 +1877,9 @@ init_xfs_fs(void)
1854 printk(KERN_INFO XFS_VERSION_STRING " with " 1877 printk(KERN_INFO XFS_VERSION_STRING " with "
1855 XFS_BUILD_OPTIONS " enabled\n"); 1878 XFS_BUILD_OPTIONS " enabled\n");
1856 1879
1880 xfs_extent_free_init_defer_op();
1881 xfs_rmap_update_init_defer_op();
1882
1857 xfs_dir_startup(); 1883 xfs_dir_startup();
1858 1884
1859 error = xfs_init_zones(); 1885 error = xfs_init_zones();
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 08a46c6181fd..58142aeeeea6 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -26,6 +26,7 @@
26#include "xfs_mount.h" 26#include "xfs_mount.h"
27#include "xfs_da_format.h" 27#include "xfs_da_format.h"
28#include "xfs_da_btree.h" 28#include "xfs_da_btree.h"
29#include "xfs_defer.h"
29#include "xfs_dir2.h" 30#include "xfs_dir2.h"
30#include "xfs_inode.h" 31#include "xfs_inode.h"
31#include "xfs_ialloc.h" 32#include "xfs_ialloc.h"
@@ -172,7 +173,7 @@ xfs_symlink(
172 struct xfs_inode *ip = NULL; 173 struct xfs_inode *ip = NULL;
173 int error = 0; 174 int error = 0;
174 int pathlen; 175 int pathlen;
175 struct xfs_bmap_free free_list; 176 struct xfs_defer_ops dfops;
176 xfs_fsblock_t first_block; 177 xfs_fsblock_t first_block;
177 bool unlock_dp_on_error = false; 178 bool unlock_dp_on_error = false;
178 xfs_fileoff_t first_fsb; 179 xfs_fileoff_t first_fsb;
@@ -269,7 +270,7 @@ xfs_symlink(
269 * Initialize the bmap freelist prior to calling either 270 * Initialize the bmap freelist prior to calling either
270 * bmapi or the directory create code. 271 * bmapi or the directory create code.
271 */ 272 */
272 xfs_bmap_init(&free_list, &first_block); 273 xfs_defer_init(&dfops, &first_block);
273 274
274 /* 275 /*
275 * Allocate an inode for the symlink. 276 * Allocate an inode for the symlink.
@@ -313,7 +314,7 @@ xfs_symlink(
313 314
314 error = xfs_bmapi_write(tp, ip, first_fsb, fs_blocks, 315 error = xfs_bmapi_write(tp, ip, first_fsb, fs_blocks,
315 XFS_BMAPI_METADATA, &first_block, resblks, 316 XFS_BMAPI_METADATA, &first_block, resblks,
316 mval, &nmaps, &free_list); 317 mval, &nmaps, &dfops);
317 if (error) 318 if (error)
318 goto out_bmap_cancel; 319 goto out_bmap_cancel;
319 320
@@ -361,7 +362,7 @@ xfs_symlink(
361 * Create the directory entry for the symlink. 362 * Create the directory entry for the symlink.
362 */ 363 */
363 error = xfs_dir_createname(tp, dp, link_name, ip->i_ino, 364 error = xfs_dir_createname(tp, dp, link_name, ip->i_ino,
364 &first_block, &free_list, resblks); 365 &first_block, &dfops, resblks);
365 if (error) 366 if (error)
366 goto out_bmap_cancel; 367 goto out_bmap_cancel;
367 xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 368 xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@ -376,7 +377,7 @@ xfs_symlink(
376 xfs_trans_set_sync(tp); 377 xfs_trans_set_sync(tp);
377 } 378 }
378 379
379 error = xfs_bmap_finish(&tp, &free_list, NULL); 380 error = xfs_defer_finish(&tp, &dfops, NULL);
380 if (error) 381 if (error)
381 goto out_bmap_cancel; 382 goto out_bmap_cancel;
382 383
@@ -392,7 +393,7 @@ xfs_symlink(
392 return 0; 393 return 0;
393 394
394out_bmap_cancel: 395out_bmap_cancel:
395 xfs_bmap_cancel(&free_list); 396 xfs_defer_cancel(&dfops);
396out_trans_cancel: 397out_trans_cancel:
397 xfs_trans_cancel(tp); 398 xfs_trans_cancel(tp);
398out_release_inode: 399out_release_inode:
@@ -426,7 +427,7 @@ xfs_inactive_symlink_rmt(
426 int done; 427 int done;
427 int error; 428 int error;
428 xfs_fsblock_t first_block; 429 xfs_fsblock_t first_block;
429 xfs_bmap_free_t free_list; 430 struct xfs_defer_ops dfops;
430 int i; 431 int i;
431 xfs_mount_t *mp; 432 xfs_mount_t *mp;
432 xfs_bmbt_irec_t mval[XFS_SYMLINK_MAPS]; 433 xfs_bmbt_irec_t mval[XFS_SYMLINK_MAPS];
@@ -465,7 +466,7 @@ xfs_inactive_symlink_rmt(
465 * Find the block(s) so we can inval and unmap them. 466 * Find the block(s) so we can inval and unmap them.
466 */ 467 */
467 done = 0; 468 done = 0;
468 xfs_bmap_init(&free_list, &first_block); 469 xfs_defer_init(&dfops, &first_block);
469 nmaps = ARRAY_SIZE(mval); 470 nmaps = ARRAY_SIZE(mval);
470 error = xfs_bmapi_read(ip, 0, xfs_symlink_blocks(mp, size), 471 error = xfs_bmapi_read(ip, 0, xfs_symlink_blocks(mp, size),
471 mval, &nmaps, 0); 472 mval, &nmaps, 0);
@@ -485,17 +486,17 @@ xfs_inactive_symlink_rmt(
485 xfs_trans_binval(tp, bp); 486 xfs_trans_binval(tp, bp);
486 } 487 }
487 /* 488 /*
488 * Unmap the dead block(s) to the free_list. 489 * Unmap the dead block(s) to the dfops.
489 */ 490 */
490 error = xfs_bunmapi(tp, ip, 0, size, 0, nmaps, 491 error = xfs_bunmapi(tp, ip, 0, size, 0, nmaps,
491 &first_block, &free_list, &done); 492 &first_block, &dfops, &done);
492 if (error) 493 if (error)
493 goto error_bmap_cancel; 494 goto error_bmap_cancel;
494 ASSERT(done); 495 ASSERT(done);
495 /* 496 /*
496 * Commit the first transaction. This logs the EFI and the inode. 497 * Commit the first transaction. This logs the EFI and the inode.
497 */ 498 */
498 error = xfs_bmap_finish(&tp, &free_list, ip); 499 error = xfs_defer_finish(&tp, &dfops, ip);
499 if (error) 500 if (error)
500 goto error_bmap_cancel; 501 goto error_bmap_cancel;
501 /* 502 /*
@@ -525,7 +526,7 @@ xfs_inactive_symlink_rmt(
525 return 0; 526 return 0;
526 527
527error_bmap_cancel: 528error_bmap_cancel:
528 xfs_bmap_cancel(&free_list); 529 xfs_defer_cancel(&dfops);
529error_trans_cancel: 530error_trans_cancel:
530 xfs_trans_cancel(tp); 531 xfs_trans_cancel(tp);
531error_unlock: 532error_unlock:
diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c
index 13a029806805..7f17ae6d709a 100644
--- a/fs/xfs/xfs_trace.c
+++ b/fs/xfs/xfs_trace.c
@@ -22,7 +22,9 @@
22#include "xfs_log_format.h" 22#include "xfs_log_format.h"
23#include "xfs_trans_resv.h" 23#include "xfs_trans_resv.h"
24#include "xfs_mount.h" 24#include "xfs_mount.h"
25#include "xfs_defer.h"
25#include "xfs_da_format.h" 26#include "xfs_da_format.h"
27#include "xfs_defer.h"
26#include "xfs_inode.h" 28#include "xfs_inode.h"
27#include "xfs_btree.h" 29#include "xfs_btree.h"
28#include "xfs_da_btree.h" 30#include "xfs_da_btree.h"
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 145169093fe0..551b7e26980c 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -38,6 +38,7 @@ struct xlog_recover_item;
38struct xfs_buf_log_format; 38struct xfs_buf_log_format;
39struct xfs_inode_log_format; 39struct xfs_inode_log_format;
40struct xfs_bmbt_irec; 40struct xfs_bmbt_irec;
41struct xfs_btree_cur;
41 42
42DECLARE_EVENT_CLASS(xfs_attr_list_class, 43DECLARE_EVENT_CLASS(xfs_attr_list_class,
43 TP_PROTO(struct xfs_attr_list_context *ctx), 44 TP_PROTO(struct xfs_attr_list_context *ctx),
@@ -2185,6 +2186,379 @@ DEFINE_DISCARD_EVENT(xfs_discard_toosmall);
2185DEFINE_DISCARD_EVENT(xfs_discard_exclude); 2186DEFINE_DISCARD_EVENT(xfs_discard_exclude);
2186DEFINE_DISCARD_EVENT(xfs_discard_busy); 2187DEFINE_DISCARD_EVENT(xfs_discard_busy);
2187 2188
2189/* btree cursor events */
2190DECLARE_EVENT_CLASS(xfs_btree_cur_class,
2191 TP_PROTO(struct xfs_btree_cur *cur, int level, struct xfs_buf *bp),
2192 TP_ARGS(cur, level, bp),
2193 TP_STRUCT__entry(
2194 __field(dev_t, dev)
2195 __field(xfs_btnum_t, btnum)
2196 __field(int, level)
2197 __field(int, nlevels)
2198 __field(int, ptr)
2199 __field(xfs_daddr_t, daddr)
2200 ),
2201 TP_fast_assign(
2202 __entry->dev = cur->bc_mp->m_super->s_dev;
2203 __entry->btnum = cur->bc_btnum;
2204 __entry->level = level;
2205 __entry->nlevels = cur->bc_nlevels;
2206 __entry->ptr = cur->bc_ptrs[level];
2207 __entry->daddr = bp ? bp->b_bn : -1;
2208 ),
2209 TP_printk("dev %d:%d btnum %d level %d/%d ptr %d daddr 0x%llx",
2210 MAJOR(__entry->dev), MINOR(__entry->dev),
2211 __entry->btnum,
2212 __entry->level,
2213 __entry->nlevels,
2214 __entry->ptr,
2215 (unsigned long long)__entry->daddr)
2216)
2217
2218#define DEFINE_BTREE_CUR_EVENT(name) \
2219DEFINE_EVENT(xfs_btree_cur_class, name, \
2220 TP_PROTO(struct xfs_btree_cur *cur, int level, struct xfs_buf *bp), \
2221 TP_ARGS(cur, level, bp))
2222DEFINE_BTREE_CUR_EVENT(xfs_btree_updkeys);
2223DEFINE_BTREE_CUR_EVENT(xfs_btree_overlapped_query_range);
2224
2225/* deferred ops */
2226struct xfs_defer_pending;
2227struct xfs_defer_intake;
2228struct xfs_defer_ops;
2229
2230DECLARE_EVENT_CLASS(xfs_defer_class,
2231 TP_PROTO(struct xfs_mount *mp, struct xfs_defer_ops *dop),
2232 TP_ARGS(mp, dop),
2233 TP_STRUCT__entry(
2234 __field(dev_t, dev)
2235 __field(void *, dop)
2236 __field(bool, committed)
2237 __field(bool, low)
2238 ),
2239 TP_fast_assign(
2240 __entry->dev = mp ? mp->m_super->s_dev : 0;
2241 __entry->dop = dop;
2242 __entry->committed = dop->dop_committed;
2243 __entry->low = dop->dop_low;
2244 ),
2245 TP_printk("dev %d:%d ops %p committed %d low %d\n",
2246 MAJOR(__entry->dev), MINOR(__entry->dev),
2247 __entry->dop,
2248 __entry->committed,
2249 __entry->low)
2250)
2251#define DEFINE_DEFER_EVENT(name) \
2252DEFINE_EVENT(xfs_defer_class, name, \
2253 TP_PROTO(struct xfs_mount *mp, struct xfs_defer_ops *dop), \
2254 TP_ARGS(mp, dop))
2255
2256DECLARE_EVENT_CLASS(xfs_defer_error_class,
2257 TP_PROTO(struct xfs_mount *mp, struct xfs_defer_ops *dop, int error),
2258 TP_ARGS(mp, dop, error),
2259 TP_STRUCT__entry(
2260 __field(dev_t, dev)
2261 __field(void *, dop)
2262 __field(bool, committed)
2263 __field(bool, low)
2264 __field(int, error)
2265 ),
2266 TP_fast_assign(
2267 __entry->dev = mp ? mp->m_super->s_dev : 0;
2268 __entry->dop = dop;
2269 __entry->committed = dop->dop_committed;
2270 __entry->low = dop->dop_low;
2271 __entry->error = error;
2272 ),
2273 TP_printk("dev %d:%d ops %p committed %d low %d err %d\n",
2274 MAJOR(__entry->dev), MINOR(__entry->dev),
2275 __entry->dop,
2276 __entry->committed,
2277 __entry->low,
2278 __entry->error)
2279)
2280#define DEFINE_DEFER_ERROR_EVENT(name) \
2281DEFINE_EVENT(xfs_defer_error_class, name, \
2282 TP_PROTO(struct xfs_mount *mp, struct xfs_defer_ops *dop, int error), \
2283 TP_ARGS(mp, dop, error))
2284
2285DECLARE_EVENT_CLASS(xfs_defer_pending_class,
2286 TP_PROTO(struct xfs_mount *mp, struct xfs_defer_pending *dfp),
2287 TP_ARGS(mp, dfp),
2288 TP_STRUCT__entry(
2289 __field(dev_t, dev)
2290 __field(int, type)
2291 __field(void *, intent)
2292 __field(bool, committed)
2293 __field(int, nr)
2294 ),
2295 TP_fast_assign(
2296 __entry->dev = mp ? mp->m_super->s_dev : 0;
2297 __entry->type = dfp->dfp_type->type;
2298 __entry->intent = dfp->dfp_intent;
2299 __entry->committed = dfp->dfp_committed;
2300 __entry->nr = dfp->dfp_count;
2301 ),
2302 TP_printk("dev %d:%d optype %d intent %p committed %d nr %d\n",
2303 MAJOR(__entry->dev), MINOR(__entry->dev),
2304 __entry->type,
2305 __entry->intent,
2306 __entry->committed,
2307 __entry->nr)
2308)
2309#define DEFINE_DEFER_PENDING_EVENT(name) \
2310DEFINE_EVENT(xfs_defer_pending_class, name, \
2311 TP_PROTO(struct xfs_mount *mp, struct xfs_defer_pending *dfp), \
2312 TP_ARGS(mp, dfp))
2313
2314DECLARE_EVENT_CLASS(xfs_phys_extent_deferred_class,
2315 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
2316 int type, xfs_agblock_t agbno, xfs_extlen_t len),
2317 TP_ARGS(mp, agno, type, agbno, len),
2318 TP_STRUCT__entry(
2319 __field(dev_t, dev)
2320 __field(xfs_agnumber_t, agno)
2321 __field(int, type)
2322 __field(xfs_agblock_t, agbno)
2323 __field(xfs_extlen_t, len)
2324 ),
2325 TP_fast_assign(
2326 __entry->dev = mp->m_super->s_dev;
2327 __entry->agno = agno;
2328 __entry->type = type;
2329 __entry->agbno = agbno;
2330 __entry->len = len;
2331 ),
2332 TP_printk("dev %d:%d op %d agno %u agbno %u len %u",
2333 MAJOR(__entry->dev), MINOR(__entry->dev),
2334 __entry->type,
2335 __entry->agno,
2336 __entry->agbno,
2337 __entry->len)
2338);
2339#define DEFINE_PHYS_EXTENT_DEFERRED_EVENT(name) \
2340DEFINE_EVENT(xfs_phys_extent_deferred_class, name, \
2341 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
2342 int type, \
2343 xfs_agblock_t bno, \
2344 xfs_extlen_t len), \
2345 TP_ARGS(mp, agno, type, bno, len))
2346
2347DECLARE_EVENT_CLASS(xfs_map_extent_deferred_class,
2348 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
2349 int op,
2350 xfs_agblock_t agbno,
2351 xfs_ino_t ino,
2352 int whichfork,
2353 xfs_fileoff_t offset,
2354 xfs_filblks_t len,
2355 xfs_exntst_t state),
2356 TP_ARGS(mp, agno, op, agbno, ino, whichfork, offset, len, state),
2357 TP_STRUCT__entry(
2358 __field(dev_t, dev)
2359 __field(xfs_agnumber_t, agno)
2360 __field(xfs_ino_t, ino)
2361 __field(xfs_agblock_t, agbno)
2362 __field(int, whichfork)
2363 __field(xfs_fileoff_t, l_loff)
2364 __field(xfs_filblks_t, l_len)
2365 __field(xfs_exntst_t, l_state)
2366 __field(int, op)
2367 ),
2368 TP_fast_assign(
2369 __entry->dev = mp->m_super->s_dev;
2370 __entry->agno = agno;
2371 __entry->ino = ino;
2372 __entry->agbno = agbno;
2373 __entry->whichfork = whichfork;
2374 __entry->l_loff = offset;
2375 __entry->l_len = len;
2376 __entry->l_state = state;
2377 __entry->op = op;
2378 ),
2379 TP_printk("dev %d:%d op %d agno %u agbno %u owner %lld %s offset %llu len %llu state %d",
2380 MAJOR(__entry->dev), MINOR(__entry->dev),
2381 __entry->op,
2382 __entry->agno,
2383 __entry->agbno,
2384 __entry->ino,
2385 __entry->whichfork == XFS_ATTR_FORK ? "attr" : "data",
2386 __entry->l_loff,
2387 __entry->l_len,
2388 __entry->l_state)
2389);
2390#define DEFINE_MAP_EXTENT_DEFERRED_EVENT(name) \
2391DEFINE_EVENT(xfs_map_extent_deferred_class, name, \
2392 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
2393 int op, \
2394 xfs_agblock_t agbno, \
2395 xfs_ino_t ino, \
2396 int whichfork, \
2397 xfs_fileoff_t offset, \
2398 xfs_filblks_t len, \
2399 xfs_exntst_t state), \
2400 TP_ARGS(mp, agno, op, agbno, ino, whichfork, offset, len, state))
2401
2402DEFINE_DEFER_EVENT(xfs_defer_init);
2403DEFINE_DEFER_EVENT(xfs_defer_cancel);
2404DEFINE_DEFER_EVENT(xfs_defer_trans_roll);
2405DEFINE_DEFER_EVENT(xfs_defer_trans_abort);
2406DEFINE_DEFER_EVENT(xfs_defer_finish);
2407DEFINE_DEFER_EVENT(xfs_defer_finish_done);
2408
2409DEFINE_DEFER_ERROR_EVENT(xfs_defer_trans_roll_error);
2410DEFINE_DEFER_ERROR_EVENT(xfs_defer_finish_error);
2411DEFINE_DEFER_ERROR_EVENT(xfs_defer_op_finish_error);
2412
2413DEFINE_DEFER_PENDING_EVENT(xfs_defer_intake_work);
2414DEFINE_DEFER_PENDING_EVENT(xfs_defer_intake_cancel);
2415DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_commit);
2416DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_cancel);
2417DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_finish);
2418DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_abort);
2419
2420#define DEFINE_BMAP_FREE_DEFERRED_EVENT DEFINE_PHYS_EXTENT_DEFERRED_EVENT
2421DEFINE_BMAP_FREE_DEFERRED_EVENT(xfs_bmap_free_defer);
2422DEFINE_BMAP_FREE_DEFERRED_EVENT(xfs_bmap_free_deferred);
2423
2424/* rmap tracepoints */
2425DECLARE_EVENT_CLASS(xfs_rmap_class,
2426 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
2427 xfs_agblock_t agbno, xfs_extlen_t len, bool unwritten,
2428 struct xfs_owner_info *oinfo),
2429 TP_ARGS(mp, agno, agbno, len, unwritten, oinfo),
2430 TP_STRUCT__entry(
2431 __field(dev_t, dev)
2432 __field(xfs_agnumber_t, agno)
2433 __field(xfs_agblock_t, agbno)
2434 __field(xfs_extlen_t, len)
2435 __field(uint64_t, owner)
2436 __field(uint64_t, offset)
2437 __field(unsigned long, flags)
2438 ),
2439 TP_fast_assign(
2440 __entry->dev = mp->m_super->s_dev;
2441 __entry->agno = agno;
2442 __entry->agbno = agbno;
2443 __entry->len = len;
2444 __entry->owner = oinfo->oi_owner;
2445 __entry->offset = oinfo->oi_offset;
2446 __entry->flags = oinfo->oi_flags;
2447 if (unwritten)
2448 __entry->flags |= XFS_RMAP_UNWRITTEN;
2449 ),
2450 TP_printk("dev %d:%d agno %u agbno %u len %u owner %lld offset %llu flags 0x%lx",
2451 MAJOR(__entry->dev), MINOR(__entry->dev),
2452 __entry->agno,
2453 __entry->agbno,
2454 __entry->len,
2455 __entry->owner,
2456 __entry->offset,
2457 __entry->flags)
2458);
2459#define DEFINE_RMAP_EVENT(name) \
2460DEFINE_EVENT(xfs_rmap_class, name, \
2461 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
2462 xfs_agblock_t agbno, xfs_extlen_t len, bool unwritten, \
2463 struct xfs_owner_info *oinfo), \
2464 TP_ARGS(mp, agno, agbno, len, unwritten, oinfo))
2465
2466/* simple AG-based error/%ip tracepoint class */
2467DECLARE_EVENT_CLASS(xfs_ag_error_class,
2468 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int error,
2469 unsigned long caller_ip),
2470 TP_ARGS(mp, agno, error, caller_ip),
2471 TP_STRUCT__entry(
2472 __field(dev_t, dev)
2473 __field(xfs_agnumber_t, agno)
2474 __field(int, error)
2475 __field(unsigned long, caller_ip)
2476 ),
2477 TP_fast_assign(
2478 __entry->dev = mp->m_super->s_dev;
2479 __entry->agno = agno;
2480 __entry->error = error;
2481 __entry->caller_ip = caller_ip;
2482 ),
2483 TP_printk("dev %d:%d agno %u error %d caller %ps",
2484 MAJOR(__entry->dev), MINOR(__entry->dev),
2485 __entry->agno,
2486 __entry->error,
2487 (char *)__entry->caller_ip)
2488);
2489
2490#define DEFINE_AG_ERROR_EVENT(name) \
2491DEFINE_EVENT(xfs_ag_error_class, name, \
2492 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int error, \
2493 unsigned long caller_ip), \
2494 TP_ARGS(mp, agno, error, caller_ip))
2495
2496DEFINE_RMAP_EVENT(xfs_rmap_unmap);
2497DEFINE_RMAP_EVENT(xfs_rmap_unmap_done);
2498DEFINE_AG_ERROR_EVENT(xfs_rmap_unmap_error);
2499DEFINE_RMAP_EVENT(xfs_rmap_map);
2500DEFINE_RMAP_EVENT(xfs_rmap_map_done);
2501DEFINE_AG_ERROR_EVENT(xfs_rmap_map_error);
2502DEFINE_RMAP_EVENT(xfs_rmap_convert);
2503DEFINE_RMAP_EVENT(xfs_rmap_convert_done);
2504DEFINE_AG_ERROR_EVENT(xfs_rmap_convert_error);
2505DEFINE_AG_ERROR_EVENT(xfs_rmap_convert_state);
2506
2507DECLARE_EVENT_CLASS(xfs_rmapbt_class,
2508 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
2509 xfs_agblock_t agbno, xfs_extlen_t len,
2510 uint64_t owner, uint64_t offset, unsigned int flags),
2511 TP_ARGS(mp, agno, agbno, len, owner, offset, flags),
2512 TP_STRUCT__entry(
2513 __field(dev_t, dev)
2514 __field(xfs_agnumber_t, agno)
2515 __field(xfs_agblock_t, agbno)
2516 __field(xfs_extlen_t, len)
2517 __field(uint64_t, owner)
2518 __field(uint64_t, offset)
2519 __field(unsigned int, flags)
2520 ),
2521 TP_fast_assign(
2522 __entry->dev = mp->m_super->s_dev;
2523 __entry->agno = agno;
2524 __entry->agbno = agbno;
2525 __entry->len = len;
2526 __entry->owner = owner;
2527 __entry->offset = offset;
2528 __entry->flags = flags;
2529 ),
2530 TP_printk("dev %d:%d agno %u agbno %u len %u owner %lld offset %llu flags 0x%x",
2531 MAJOR(__entry->dev), MINOR(__entry->dev),
2532 __entry->agno,
2533 __entry->agbno,
2534 __entry->len,
2535 __entry->owner,
2536 __entry->offset,
2537 __entry->flags)
2538);
2539#define DEFINE_RMAPBT_EVENT(name) \
2540DEFINE_EVENT(xfs_rmapbt_class, name, \
2541 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
2542 xfs_agblock_t agbno, xfs_extlen_t len, \
2543 uint64_t owner, uint64_t offset, unsigned int flags), \
2544 TP_ARGS(mp, agno, agbno, len, owner, offset, flags))
2545
2546#define DEFINE_RMAP_DEFERRED_EVENT DEFINE_MAP_EXTENT_DEFERRED_EVENT
2547DEFINE_RMAP_DEFERRED_EVENT(xfs_rmap_defer);
2548DEFINE_RMAP_DEFERRED_EVENT(xfs_rmap_deferred);
2549
2550DEFINE_BUSY_EVENT(xfs_rmapbt_alloc_block);
2551DEFINE_BUSY_EVENT(xfs_rmapbt_free_block);
2552DEFINE_RMAPBT_EVENT(xfs_rmap_update);
2553DEFINE_RMAPBT_EVENT(xfs_rmap_insert);
2554DEFINE_RMAPBT_EVENT(xfs_rmap_delete);
2555DEFINE_AG_ERROR_EVENT(xfs_rmap_insert_error);
2556DEFINE_AG_ERROR_EVENT(xfs_rmap_delete_error);
2557DEFINE_AG_ERROR_EVENT(xfs_rmap_update_error);
2558DEFINE_RMAPBT_EVENT(xfs_rmap_lookup_le_range_result);
2559DEFINE_RMAPBT_EVENT(xfs_rmap_find_right_neighbor_result);
2560DEFINE_RMAPBT_EVENT(xfs_rmap_find_left_neighbor_result);
2561
2188#endif /* _TRACE_XFS_H */ 2562#endif /* _TRACE_XFS_H */
2189 2563
2190#undef TRACE_INCLUDE_PATH 2564#undef TRACE_INCLUDE_PATH
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 9b2b9fa89331..e2bf86aad33d 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -33,6 +33,9 @@ struct xfs_trans;
33struct xfs_trans_res; 33struct xfs_trans_res;
34struct xfs_dquot_acct; 34struct xfs_dquot_acct;
35struct xfs_busy_extent; 35struct xfs_busy_extent;
36struct xfs_rud_log_item;
37struct xfs_rui_log_item;
38struct xfs_btree_cur;
36 39
37typedef struct xfs_log_item { 40typedef struct xfs_log_item {
38 struct list_head li_ail; /* AIL pointers */ 41 struct list_head li_ail; /* AIL pointers */
@@ -210,17 +213,14 @@ void xfs_trans_ichgtime(struct xfs_trans *, struct xfs_inode *, int);
210void xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *, uint); 213void xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *, uint);
211void xfs_trans_log_buf(xfs_trans_t *, struct xfs_buf *, uint, uint); 214void xfs_trans_log_buf(xfs_trans_t *, struct xfs_buf *, uint, uint);
212void xfs_trans_log_inode(xfs_trans_t *, struct xfs_inode *, uint); 215void xfs_trans_log_inode(xfs_trans_t *, struct xfs_inode *, uint);
213struct xfs_efi_log_item *xfs_trans_get_efi(xfs_trans_t *, uint); 216
214void xfs_trans_log_efi_extent(xfs_trans_t *, 217void xfs_extent_free_init_defer_op(void);
215 struct xfs_efi_log_item *, 218struct xfs_efd_log_item *xfs_trans_get_efd(struct xfs_trans *,
216 xfs_fsblock_t,
217 xfs_extlen_t);
218struct xfs_efd_log_item *xfs_trans_get_efd(xfs_trans_t *,
219 struct xfs_efi_log_item *, 219 struct xfs_efi_log_item *,
220 uint); 220 uint);
221int xfs_trans_free_extent(struct xfs_trans *, 221int xfs_trans_free_extent(struct xfs_trans *,
222 struct xfs_efd_log_item *, xfs_fsblock_t, 222 struct xfs_efd_log_item *, xfs_fsblock_t,
223 xfs_extlen_t); 223 xfs_extlen_t, struct xfs_owner_info *);
224int xfs_trans_commit(struct xfs_trans *); 224int xfs_trans_commit(struct xfs_trans *);
225int __xfs_trans_roll(struct xfs_trans **, struct xfs_inode *, int *); 225int __xfs_trans_roll(struct xfs_trans **, struct xfs_inode *, int *);
226int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *); 226int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *);
@@ -236,4 +236,16 @@ void xfs_trans_buf_copy_type(struct xfs_buf *dst_bp,
236extern kmem_zone_t *xfs_trans_zone; 236extern kmem_zone_t *xfs_trans_zone;
237extern kmem_zone_t *xfs_log_item_desc_zone; 237extern kmem_zone_t *xfs_log_item_desc_zone;
238 238
239/* rmap updates */
240enum xfs_rmap_intent_type;
241
242void xfs_rmap_update_init_defer_op(void);
243struct xfs_rud_log_item *xfs_trans_get_rud(struct xfs_trans *tp,
244 struct xfs_rui_log_item *ruip);
245int xfs_trans_log_finish_rmap_update(struct xfs_trans *tp,
246 struct xfs_rud_log_item *rudp, enum xfs_rmap_intent_type type,
247 __uint64_t owner, int whichfork, xfs_fileoff_t startoff,
248 xfs_fsblock_t startblock, xfs_filblks_t blockcount,
249 xfs_exntst_t state, struct xfs_btree_cur **pcur);
250
239#endif /* __XFS_TRANS_H__ */ 251#endif /* __XFS_TRANS_H__ */
diff --git a/fs/xfs/xfs_trans_extfree.c b/fs/xfs/xfs_trans_extfree.c
index a96ae540eb62..459ddec137a4 100644
--- a/fs/xfs/xfs_trans_extfree.c
+++ b/fs/xfs/xfs_trans_extfree.c
@@ -21,66 +21,15 @@
21#include "xfs_format.h" 21#include "xfs_format.h"
22#include "xfs_log_format.h" 22#include "xfs_log_format.h"
23#include "xfs_trans_resv.h" 23#include "xfs_trans_resv.h"
24#include "xfs_bit.h"
24#include "xfs_mount.h" 25#include "xfs_mount.h"
26#include "xfs_defer.h"
25#include "xfs_trans.h" 27#include "xfs_trans.h"
26#include "xfs_trans_priv.h" 28#include "xfs_trans_priv.h"
27#include "xfs_extfree_item.h" 29#include "xfs_extfree_item.h"
28#include "xfs_alloc.h" 30#include "xfs_alloc.h"
29 31#include "xfs_bmap.h"
30/* 32#include "xfs_trace.h"
31 * This routine is called to allocate an "extent free intention"
32 * log item that will hold nextents worth of extents. The
33 * caller must use all nextents extents, because we are not
34 * flexible about this at all.
35 */
36xfs_efi_log_item_t *
37xfs_trans_get_efi(xfs_trans_t *tp,
38 uint nextents)
39{
40 xfs_efi_log_item_t *efip;
41
42 ASSERT(tp != NULL);
43 ASSERT(nextents > 0);
44
45 efip = xfs_efi_init(tp->t_mountp, nextents);
46 ASSERT(efip != NULL);
47
48 /*
49 * Get a log_item_desc to point at the new item.
50 */
51 xfs_trans_add_item(tp, &efip->efi_item);
52 return efip;
53}
54
55/*
56 * This routine is called to indicate that the described
57 * extent is to be logged as needing to be freed. It should
58 * be called once for each extent to be freed.
59 */
60void
61xfs_trans_log_efi_extent(xfs_trans_t *tp,
62 xfs_efi_log_item_t *efip,
63 xfs_fsblock_t start_block,
64 xfs_extlen_t ext_len)
65{
66 uint next_extent;
67 xfs_extent_t *extp;
68
69 tp->t_flags |= XFS_TRANS_DIRTY;
70 efip->efi_item.li_desc->lid_flags |= XFS_LID_DIRTY;
71
72 /*
73 * atomic_inc_return gives us the value after the increment;
74 * we want to use it as an array index so we need to subtract 1 from
75 * it.
76 */
77 next_extent = atomic_inc_return(&efip->efi_next_extent) - 1;
78 ASSERT(next_extent < efip->efi_format.efi_nextents);
79 extp = &(efip->efi_format.efi_extents[next_extent]);
80 extp->ext_start = start_block;
81 extp->ext_len = ext_len;
82}
83
84 33
85/* 34/*
86 * This routine is called to allocate an "extent free done" 35 * This routine is called to allocate an "extent free done"
@@ -88,12 +37,12 @@ xfs_trans_log_efi_extent(xfs_trans_t *tp,
88 * caller must use all nextents extents, because we are not 37 * caller must use all nextents extents, because we are not
89 * flexible about this at all. 38 * flexible about this at all.
90 */ 39 */
91xfs_efd_log_item_t * 40struct xfs_efd_log_item *
92xfs_trans_get_efd(xfs_trans_t *tp, 41xfs_trans_get_efd(struct xfs_trans *tp,
93 xfs_efi_log_item_t *efip, 42 struct xfs_efi_log_item *efip,
94 uint nextents) 43 uint nextents)
95{ 44{
96 xfs_efd_log_item_t *efdp; 45 struct xfs_efd_log_item *efdp;
97 46
98 ASSERT(tp != NULL); 47 ASSERT(tp != NULL);
99 ASSERT(nextents > 0); 48 ASSERT(nextents > 0);
@@ -118,13 +67,19 @@ xfs_trans_free_extent(
118 struct xfs_trans *tp, 67 struct xfs_trans *tp,
119 struct xfs_efd_log_item *efdp, 68 struct xfs_efd_log_item *efdp,
120 xfs_fsblock_t start_block, 69 xfs_fsblock_t start_block,
121 xfs_extlen_t ext_len) 70 xfs_extlen_t ext_len,
71 struct xfs_owner_info *oinfo)
122{ 72{
73 struct xfs_mount *mp = tp->t_mountp;
123 uint next_extent; 74 uint next_extent;
75 xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, start_block);
76 xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, start_block);
124 struct xfs_extent *extp; 77 struct xfs_extent *extp;
125 int error; 78 int error;
126 79
127 error = xfs_free_extent(tp, start_block, ext_len); 80 trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno, ext_len);
81
82 error = xfs_free_extent(tp, start_block, ext_len, oinfo);
128 83
129 /* 84 /*
130 * Mark the transaction dirty, even on error. This ensures the 85 * Mark the transaction dirty, even on error. This ensures the
@@ -145,3 +100,139 @@ xfs_trans_free_extent(
145 100
146 return error; 101 return error;
147} 102}
103
104/* Sort bmap items by AG. */
105static int
106xfs_extent_free_diff_items(
107 void *priv,
108 struct list_head *a,
109 struct list_head *b)
110{
111 struct xfs_mount *mp = priv;
112 struct xfs_extent_free_item *ra;
113 struct xfs_extent_free_item *rb;
114
115 ra = container_of(a, struct xfs_extent_free_item, xefi_list);
116 rb = container_of(b, struct xfs_extent_free_item, xefi_list);
117 return XFS_FSB_TO_AGNO(mp, ra->xefi_startblock) -
118 XFS_FSB_TO_AGNO(mp, rb->xefi_startblock);
119}
120
121/* Get an EFI. */
122STATIC void *
123xfs_extent_free_create_intent(
124 struct xfs_trans *tp,
125 unsigned int count)
126{
127 struct xfs_efi_log_item *efip;
128
129 ASSERT(tp != NULL);
130 ASSERT(count > 0);
131
132 efip = xfs_efi_init(tp->t_mountp, count);
133 ASSERT(efip != NULL);
134
135 /*
136 * Get a log_item_desc to point at the new item.
137 */
138 xfs_trans_add_item(tp, &efip->efi_item);
139 return efip;
140}
141
142/* Log a free extent to the intent item. */
143STATIC void
144xfs_extent_free_log_item(
145 struct xfs_trans *tp,
146 void *intent,
147 struct list_head *item)
148{
149 struct xfs_efi_log_item *efip = intent;
150 struct xfs_extent_free_item *free;
151 uint next_extent;
152 struct xfs_extent *extp;
153
154 free = container_of(item, struct xfs_extent_free_item, xefi_list);
155
156 tp->t_flags |= XFS_TRANS_DIRTY;
157 efip->efi_item.li_desc->lid_flags |= XFS_LID_DIRTY;
158
159 /*
160 * atomic_inc_return gives us the value after the increment;
161 * we want to use it as an array index so we need to subtract 1 from
162 * it.
163 */
164 next_extent = atomic_inc_return(&efip->efi_next_extent) - 1;
165 ASSERT(next_extent < efip->efi_format.efi_nextents);
166 extp = &efip->efi_format.efi_extents[next_extent];
167 extp->ext_start = free->xefi_startblock;
168 extp->ext_len = free->xefi_blockcount;
169}
170
171/* Get an EFD so we can process all the free extents. */
172STATIC void *
173xfs_extent_free_create_done(
174 struct xfs_trans *tp,
175 void *intent,
176 unsigned int count)
177{
178 return xfs_trans_get_efd(tp, intent, count);
179}
180
181/* Process a free extent. */
182STATIC int
183xfs_extent_free_finish_item(
184 struct xfs_trans *tp,
185 struct xfs_defer_ops *dop,
186 struct list_head *item,
187 void *done_item,
188 void **state)
189{
190 struct xfs_extent_free_item *free;
191 int error;
192
193 free = container_of(item, struct xfs_extent_free_item, xefi_list);
194 error = xfs_trans_free_extent(tp, done_item,
195 free->xefi_startblock,
196 free->xefi_blockcount,
197 &free->xefi_oinfo);
198 kmem_free(free);
199 return error;
200}
201
202/* Abort all pending EFIs. */
203STATIC void
204xfs_extent_free_abort_intent(
205 void *intent)
206{
207 xfs_efi_release(intent);
208}
209
210/* Cancel a free extent. */
211STATIC void
212xfs_extent_free_cancel_item(
213 struct list_head *item)
214{
215 struct xfs_extent_free_item *free;
216
217 free = container_of(item, struct xfs_extent_free_item, xefi_list);
218 kmem_free(free);
219}
220
221static const struct xfs_defer_op_type xfs_extent_free_defer_type = {
222 .type = XFS_DEFER_OPS_TYPE_FREE,
223 .max_items = XFS_EFI_MAX_FAST_EXTENTS,
224 .diff_items = xfs_extent_free_diff_items,
225 .create_intent = xfs_extent_free_create_intent,
226 .abort_intent = xfs_extent_free_abort_intent,
227 .log_item = xfs_extent_free_log_item,
228 .create_done = xfs_extent_free_create_done,
229 .finish_item = xfs_extent_free_finish_item,
230 .cancel_item = xfs_extent_free_cancel_item,
231};
232
233/* Register the deferred op type. */
234void
235xfs_extent_free_init_defer_op(void)
236{
237 xfs_defer_init_op_type(&xfs_extent_free_defer_type);
238}
diff --git a/fs/xfs/xfs_trans_rmap.c b/fs/xfs/xfs_trans_rmap.c
new file mode 100644
index 000000000000..5a50ef881568
--- /dev/null
+++ b/fs/xfs/xfs_trans_rmap.c
@@ -0,0 +1,271 @@
1/*
2 * Copyright (C) 2016 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#include "xfs.h"
21#include "xfs_fs.h"
22#include "xfs_shared.h"
23#include "xfs_format.h"
24#include "xfs_log_format.h"
25#include "xfs_trans_resv.h"
26#include "xfs_mount.h"
27#include "xfs_defer.h"
28#include "xfs_trans.h"
29#include "xfs_trans_priv.h"
30#include "xfs_rmap_item.h"
31#include "xfs_alloc.h"
32#include "xfs_rmap.h"
33
34/* Set the map extent flags for this reverse mapping. */
35static void
36xfs_trans_set_rmap_flags(
37 struct xfs_map_extent *rmap,
38 enum xfs_rmap_intent_type type,
39 int whichfork,
40 xfs_exntst_t state)
41{
42 rmap->me_flags = 0;
43 if (state == XFS_EXT_UNWRITTEN)
44 rmap->me_flags |= XFS_RMAP_EXTENT_UNWRITTEN;
45 if (whichfork == XFS_ATTR_FORK)
46 rmap->me_flags |= XFS_RMAP_EXTENT_ATTR_FORK;
47 switch (type) {
48 case XFS_RMAP_MAP:
49 rmap->me_flags |= XFS_RMAP_EXTENT_MAP;
50 break;
51 case XFS_RMAP_UNMAP:
52 rmap->me_flags |= XFS_RMAP_EXTENT_UNMAP;
53 break;
54 case XFS_RMAP_CONVERT:
55 rmap->me_flags |= XFS_RMAP_EXTENT_CONVERT;
56 break;
57 case XFS_RMAP_ALLOC:
58 rmap->me_flags |= XFS_RMAP_EXTENT_ALLOC;
59 break;
60 case XFS_RMAP_FREE:
61 rmap->me_flags |= XFS_RMAP_EXTENT_FREE;
62 break;
63 default:
64 ASSERT(0);
65 }
66}
67
68struct xfs_rud_log_item *
69xfs_trans_get_rud(
70 struct xfs_trans *tp,
71 struct xfs_rui_log_item *ruip)
72{
73 struct xfs_rud_log_item *rudp;
74
75 rudp = xfs_rud_init(tp->t_mountp, ruip);
76 xfs_trans_add_item(tp, &rudp->rud_item);
77 return rudp;
78}
79
80/*
81 * Finish an rmap update and log it to the RUD. Note that the transaction is
82 * marked dirty regardless of whether the rmap update succeeds or fails to
83 * support the RUI/RUD lifecycle rules.
84 */
85int
86xfs_trans_log_finish_rmap_update(
87 struct xfs_trans *tp,
88 struct xfs_rud_log_item *rudp,
89 enum xfs_rmap_intent_type type,
90 __uint64_t owner,
91 int whichfork,
92 xfs_fileoff_t startoff,
93 xfs_fsblock_t startblock,
94 xfs_filblks_t blockcount,
95 xfs_exntst_t state,
96 struct xfs_btree_cur **pcur)
97{
98 int error;
99
100 error = xfs_rmap_finish_one(tp, type, owner, whichfork, startoff,
101 startblock, blockcount, state, pcur);
102
103 /*
104 * Mark the transaction dirty, even on error. This ensures the
105 * transaction is aborted, which:
106 *
107 * 1.) releases the RUI and frees the RUD
108 * 2.) shuts down the filesystem
109 */
110 tp->t_flags |= XFS_TRANS_DIRTY;
111 rudp->rud_item.li_desc->lid_flags |= XFS_LID_DIRTY;
112
113 return error;
114}
115
116/* Sort rmap intents by AG. */
117static int
118xfs_rmap_update_diff_items(
119 void *priv,
120 struct list_head *a,
121 struct list_head *b)
122{
123 struct xfs_mount *mp = priv;
124 struct xfs_rmap_intent *ra;
125 struct xfs_rmap_intent *rb;
126
127 ra = container_of(a, struct xfs_rmap_intent, ri_list);
128 rb = container_of(b, struct xfs_rmap_intent, ri_list);
129 return XFS_FSB_TO_AGNO(mp, ra->ri_bmap.br_startblock) -
130 XFS_FSB_TO_AGNO(mp, rb->ri_bmap.br_startblock);
131}
132
133/* Get an RUI. */
134STATIC void *
135xfs_rmap_update_create_intent(
136 struct xfs_trans *tp,
137 unsigned int count)
138{
139 struct xfs_rui_log_item *ruip;
140
141 ASSERT(tp != NULL);
142 ASSERT(count > 0);
143
144 ruip = xfs_rui_init(tp->t_mountp, count);
145 ASSERT(ruip != NULL);
146
147 /*
148 * Get a log_item_desc to point at the new item.
149 */
150 xfs_trans_add_item(tp, &ruip->rui_item);
151 return ruip;
152}
153
154/* Log rmap updates in the intent item. */
155STATIC void
156xfs_rmap_update_log_item(
157 struct xfs_trans *tp,
158 void *intent,
159 struct list_head *item)
160{
161 struct xfs_rui_log_item *ruip = intent;
162 struct xfs_rmap_intent *rmap;
163 uint next_extent;
164 struct xfs_map_extent *map;
165
166 rmap = container_of(item, struct xfs_rmap_intent, ri_list);
167
168 tp->t_flags |= XFS_TRANS_DIRTY;
169 ruip->rui_item.li_desc->lid_flags |= XFS_LID_DIRTY;
170
171 /*
172 * atomic_inc_return gives us the value after the increment;
173 * we want to use it as an array index so we need to subtract 1 from
174 * it.
175 */
176 next_extent = atomic_inc_return(&ruip->rui_next_extent) - 1;
177 ASSERT(next_extent < ruip->rui_format.rui_nextents);
178 map = &ruip->rui_format.rui_extents[next_extent];
179 map->me_owner = rmap->ri_owner;
180 map->me_startblock = rmap->ri_bmap.br_startblock;
181 map->me_startoff = rmap->ri_bmap.br_startoff;
182 map->me_len = rmap->ri_bmap.br_blockcount;
183 xfs_trans_set_rmap_flags(map, rmap->ri_type, rmap->ri_whichfork,
184 rmap->ri_bmap.br_state);
185}
186
187/* Get an RUD so we can process all the deferred rmap updates. */
188STATIC void *
189xfs_rmap_update_create_done(
190 struct xfs_trans *tp,
191 void *intent,
192 unsigned int count)
193{
194 return xfs_trans_get_rud(tp, intent);
195}
196
197/* Process a deferred rmap update. */
198STATIC int
199xfs_rmap_update_finish_item(
200 struct xfs_trans *tp,
201 struct xfs_defer_ops *dop,
202 struct list_head *item,
203 void *done_item,
204 void **state)
205{
206 struct xfs_rmap_intent *rmap;
207 int error;
208
209 rmap = container_of(item, struct xfs_rmap_intent, ri_list);
210 error = xfs_trans_log_finish_rmap_update(tp, done_item,
211 rmap->ri_type,
212 rmap->ri_owner, rmap->ri_whichfork,
213 rmap->ri_bmap.br_startoff,
214 rmap->ri_bmap.br_startblock,
215 rmap->ri_bmap.br_blockcount,
216 rmap->ri_bmap.br_state,
217 (struct xfs_btree_cur **)state);
218 kmem_free(rmap);
219 return error;
220}
221
222/* Clean up after processing deferred rmaps. */
223STATIC void
224xfs_rmap_update_finish_cleanup(
225 struct xfs_trans *tp,
226 void *state,
227 int error)
228{
229 struct xfs_btree_cur *rcur = state;
230
231 xfs_rmap_finish_one_cleanup(tp, rcur, error);
232}
233
234/* Abort all pending RUIs. */
235STATIC void
236xfs_rmap_update_abort_intent(
237 void *intent)
238{
239 xfs_rui_release(intent);
240}
241
242/* Cancel a deferred rmap update. */
243STATIC void
244xfs_rmap_update_cancel_item(
245 struct list_head *item)
246{
247 struct xfs_rmap_intent *rmap;
248
249 rmap = container_of(item, struct xfs_rmap_intent, ri_list);
250 kmem_free(rmap);
251}
252
253static const struct xfs_defer_op_type xfs_rmap_update_defer_type = {
254 .type = XFS_DEFER_OPS_TYPE_RMAP,
255 .max_items = XFS_RUI_MAX_FAST_EXTENTS,
256 .diff_items = xfs_rmap_update_diff_items,
257 .create_intent = xfs_rmap_update_create_intent,
258 .abort_intent = xfs_rmap_update_abort_intent,
259 .log_item = xfs_rmap_update_log_item,
260 .create_done = xfs_rmap_update_create_done,
261 .finish_item = xfs_rmap_update_finish_item,
262 .finish_cleanup = xfs_rmap_update_finish_cleanup,
263 .cancel_item = xfs_rmap_update_cancel_item,
264};
265
266/* Register the deferred op type. */
267void
268xfs_rmap_update_init_defer_op(void)
269{
270 xfs_defer_init_op_type(&xfs_rmap_update_defer_type);
271}