summaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-11-14 16:15:12 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2017-11-14 16:15:12 -0500
commit808eb24e0e0939b487bf90e3888a9636f1c83acb (patch)
tree202a53483f371844a116d4d70f37c16d92583451 /fs/xfs
parentae9a8c4bdc91202b4236372eed53c54d2297c71b (diff)
parent2d1d1da3d9cc387262193e83f0a96d753b040720 (diff)
Merge tag 'xfs-4.15-merge-1' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
Pull xfs updates from Darrick Wong: "xfs: great scads of new stuff for 4.15. This merge cycle, we're making some substantive changes to XFS. The in-core extent mappings have been refactored to use proper iterators and a btree to handle heavily fragmented files without needing high-order memory allocations; some important log recovery bug fixes; and the first part of the online fsck functionality. (The online fsck feature is disabled by default and more pieces of it will be coming in future release cycles.) This giant pile of patches has been run through a full xfstests run over the weekend and through a quick xfstests run against this morning's master, with no major failures reported. New in this version: - Refactor the incore extent map manipulations to use a cursor instead of directly modifying extent data. - Refactor the incore extent map cursor to use an in-memory btree instead of a single high-order allocation. This eliminates a major source of complaints about insufficient memory when opening a heavily fragmented file into a system whose memory is also heavily fragmented. - Fix a longstanding bug where deleting a file with a complex extended attribute btree incorrectly handled memory pointers, which could lead to memory corruption. - Improve metadata validation to eliminate crashing problems found while fuzzing xfs. - Move the error injection tag definitions into libxfs to be shared with userspace components. - Fix some log recovery bugs where we'd underflow log block position vector and incorrectly fail log recovery. - Drain the buffer lru after log recovery to force recovered buffers back through the verifiers after mount. On a v4 filesystem the log never attaches verifiers during log replay (v5 does), so we could end up with buffers marked verified but without having ever been verified. - Fix various other bugs. - Introduce the first part of a new online fsck tool. The new fsck tool will be able to iterate every piece of metadata in the filesystem to look for obvious errors and corruptions. In the next release cycle the checking will be extended to cross-reference with the other fs metadata, so this feature should only be used by the developers in the mean time" * tag 'xfs-4.15-merge-1' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (131 commits) xfs: on failed mount, force-reclaim inodes after unmounting quota controls xfs: check the uniqueness of the AGFL entries xfs: remove u_int* type usage xfs: handle zero entries case in xfs_iext_rebalance_leaf xfs: add comments documenting the rebalance algorithm xfs: trivial indentation fixup for xfs_iext_remove_node xfs: remove a superflous assignment in xfs_iext_remove_node xfs: add some comments to xfs_iext_insert/xfs_iext_insert_node xfs: fix number of records handling in xfs_iext_split_leaf fs/xfs: Remove NULL check before kmem_cache_destroy xfs: only check da node header padding on v5 filesystems xfs: fix btree scrub deref check xfs: fix uninitialized return values in scrub code xfs: pass inode number to xfs_scrub_ino_set_{preen,warning} xfs: refactor the directory data block bestfree checks xfs: mark xlog_verify_dest_ptr STATIC xfs: mark xlog_recover_check_summary STATIC xfs: mark xfs_btree_check_lblock and xfs_btree_check_ptr static xfs: remove unreachable error injection code in xfs_qm_dqget xfs: remove unused debug counts for xfs_lock_inodes ...
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/Kconfig17
-rw-r--r--fs/xfs/Makefile29
-rw-r--r--fs/xfs/kmem.h3
-rw-r--r--fs/xfs/libxfs/xfs_ag_resv.c1
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c50
-rw-r--r--fs/xfs/libxfs/xfs_alloc.h4
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c6
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c2061
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h66
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.c250
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.h22
-rw-r--r--fs/xfs/libxfs/xfs_btree.c259
-rw-r--r--fs/xfs/libxfs/xfs_btree.h32
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.c22
-rw-r--r--fs/xfs/libxfs/xfs_dir2.c24
-rw-r--r--fs/xfs/libxfs/xfs_dir2.h17
-rw-r--r--fs/xfs/libxfs/xfs_errortag.h106
-rw-r--r--fs/xfs/libxfs/xfs_format.h37
-rw-r--r--fs/xfs/libxfs/xfs_fs.h77
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c91
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.h7
-rw-r--r--fs/xfs/libxfs/xfs_iext_tree.c1043
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c1
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.c1333
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.h138
-rw-r--r--fs/xfs/libxfs/xfs_log_format.h24
-rw-r--r--fs/xfs/libxfs/xfs_refcount.c1
-rw-r--r--fs/xfs/libxfs/xfs_rmap.c1
-rw-r--r--fs/xfs/libxfs/xfs_rtbitmap.c13
-rw-r--r--fs/xfs/libxfs/xfs_types.h22
-rw-r--r--fs/xfs/scrub/agheader.c658
-rw-r--r--fs/xfs/scrub/alloc.c102
-rw-r--r--fs/xfs/scrub/attr.c471
-rw-r--r--fs/xfs/scrub/bmap.c363
-rw-r--r--fs/xfs/scrub/btree.c516
-rw-r--r--fs/xfs/scrub/btree.h57
-rw-r--r--fs/xfs/scrub/common.c574
-rw-r--r--fs/xfs/scrub/common.h144
-rw-r--r--fs/xfs/scrub/dabtree.c591
-rw-r--r--fs/xfs/scrub/dabtree.h59
-rw-r--r--fs/xfs/scrub/dir.c816
-rw-r--r--fs/xfs/scrub/ialloc.c337
-rw-r--r--fs/xfs/scrub/inode.c611
-rw-r--r--fs/xfs/scrub/parent.c317
-rw-r--r--fs/xfs/scrub/quota.c304
-rw-r--r--fs/xfs/scrub/refcount.c99
-rw-r--r--fs/xfs/scrub/rmap.c138
-rw-r--r--fs/xfs/scrub/rtbitmap.c108
-rw-r--r--fs/xfs/scrub/scrub.c392
-rw-r--r--fs/xfs/scrub/scrub.h115
-rw-r--r--fs/xfs/scrub/symlink.c92
-rw-r--r--fs/xfs/scrub/trace.c59
-rw-r--r--fs/xfs/scrub/trace.h499
-rw-r--r--fs/xfs/scrub/xfs_scrub.h29
-rw-r--r--fs/xfs/xfs.h1
-rw-r--r--fs/xfs/xfs_attr.h5
-rw-r--r--fs/xfs/xfs_attr_inactive.c69
-rw-r--r--fs/xfs/xfs_attr_list.c161
-rw-r--r--fs/xfs/xfs_bmap_util.c746
-rw-r--r--fs/xfs/xfs_bmap_util.h10
-rw-r--r--fs/xfs/xfs_buf.c16
-rw-r--r--fs/xfs/xfs_buf.h5
-rw-r--r--fs/xfs/xfs_dir2_readdir.c10
-rw-r--r--fs/xfs/xfs_dquot.c21
-rw-r--r--fs/xfs/xfs_error.c6
-rw-r--r--fs/xfs/xfs_error.h81
-rw-r--r--fs/xfs/xfs_file.c2
-rw-r--r--fs/xfs/xfs_icache.c2
-rw-r--r--fs/xfs/xfs_inode.c33
-rw-r--r--fs/xfs/xfs_inode.h4
-rw-r--r--fs/xfs/xfs_inode_item.c29
-rw-r--r--fs/xfs/xfs_inode_item.h2
-rw-r--r--fs/xfs/xfs_ioctl.c158
-rw-r--r--fs/xfs/xfs_ioctl.h4
-rw-r--r--fs/xfs/xfs_ioctl32.c1
-rw-r--r--fs/xfs/xfs_iomap.c15
-rw-r--r--fs/xfs/xfs_iops.c52
-rw-r--r--fs/xfs/xfs_itable.c13
-rw-r--r--fs/xfs/xfs_itable.h2
-rw-r--r--fs/xfs/xfs_linux.h21
-rw-r--r--fs/xfs/xfs_log.c33
-rw-r--r--fs/xfs/xfs_log_recover.c62
-rw-r--r--fs/xfs/xfs_mount.c15
-rw-r--r--fs/xfs/xfs_reflink.c108
-rw-r--r--fs/xfs/xfs_rtalloc.h2
-rw-r--r--fs/xfs/xfs_trace.h64
-rw-r--r--fs/xfs/xfs_trans_ail.c22
87 files changed, 10972 insertions, 4011 deletions
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index 1b98cfa342ab..f42fcf1b5465 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -71,6 +71,23 @@ config XFS_RT
71 71
72 If unsure, say N. 72 If unsure, say N.
73 73
74config XFS_ONLINE_SCRUB
75 bool "XFS online metadata check support"
76 default n
77 depends on XFS_FS
78 help
79 If you say Y here you will be able to check metadata on a
80 mounted XFS filesystem. This feature is intended to reduce
81 filesystem downtime by supplementing xfs_repair. The key
82 advantage here is to look for problems proactively so that
83 they can be dealt with in a controlled manner.
84
85 This feature is considered EXPERIMENTAL. Use with caution!
86
87 See the xfs_scrub man page in section 8 for additional information.
88
89 If unsure, say N.
90
74config XFS_WARN 91config XFS_WARN
75 bool "XFS Verbose Warnings" 92 bool "XFS Verbose Warnings"
76 depends on XFS_FS && !XFS_DEBUG 93 depends on XFS_FS && !XFS_DEBUG
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index a6e955bfead8..7ceb41a9786a 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -49,6 +49,7 @@ xfs-y += $(addprefix libxfs/, \
49 xfs_dquot_buf.o \ 49 xfs_dquot_buf.o \
50 xfs_ialloc.o \ 50 xfs_ialloc.o \
51 xfs_ialloc_btree.o \ 51 xfs_ialloc_btree.o \
52 xfs_iext_tree.o \
52 xfs_inode_fork.o \ 53 xfs_inode_fork.o \
53 xfs_inode_buf.o \ 54 xfs_inode_buf.o \
54 xfs_log_rlimit.o \ 55 xfs_log_rlimit.o \
@@ -135,3 +136,31 @@ xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o
135xfs-$(CONFIG_SYSCTL) += xfs_sysctl.o 136xfs-$(CONFIG_SYSCTL) += xfs_sysctl.o
136xfs-$(CONFIG_COMPAT) += xfs_ioctl32.o 137xfs-$(CONFIG_COMPAT) += xfs_ioctl32.o
137xfs-$(CONFIG_EXPORTFS_BLOCK_OPS) += xfs_pnfs.o 138xfs-$(CONFIG_EXPORTFS_BLOCK_OPS) += xfs_pnfs.o
139
140# online scrub/repair
141ifeq ($(CONFIG_XFS_ONLINE_SCRUB),y)
142
143# Tracepoints like to blow up, so build that before everything else
144
145xfs-y += $(addprefix scrub/, \
146 trace.o \
147 agheader.o \
148 alloc.o \
149 attr.o \
150 bmap.o \
151 btree.o \
152 common.o \
153 dabtree.o \
154 dir.o \
155 ialloc.o \
156 inode.o \
157 parent.o \
158 refcount.o \
159 rmap.o \
160 scrub.o \
161 symlink.o \
162 )
163
164xfs-$(CONFIG_XFS_RT) += scrub/rtbitmap.o
165xfs-$(CONFIG_XFS_QUOTA) += scrub/quota.o
166endif
diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h
index 4d85992d75b2..758f37ac5ad3 100644
--- a/fs/xfs/kmem.h
+++ b/fs/xfs/kmem.h
@@ -119,8 +119,7 @@ kmem_zone_free(kmem_zone_t *zone, void *ptr)
119static inline void 119static inline void
120kmem_zone_destroy(kmem_zone_t *zone) 120kmem_zone_destroy(kmem_zone_t *zone)
121{ 121{
122 if (zone) 122 kmem_cache_destroy(zone);
123 kmem_cache_destroy(zone);
124} 123}
125 124
126extern void *kmem_zone_alloc(kmem_zone_t *, xfs_km_flags_t); 125extern void *kmem_zone_alloc(kmem_zone_t *, xfs_km_flags_t);
diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c
index df3e600835e8..2291f4224e24 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.c
+++ b/fs/xfs/libxfs/xfs_ag_resv.c
@@ -27,6 +27,7 @@
27#include "xfs_mount.h" 27#include "xfs_mount.h"
28#include "xfs_defer.h" 28#include "xfs_defer.h"
29#include "xfs_alloc.h" 29#include "xfs_alloc.h"
30#include "xfs_errortag.h"
30#include "xfs_error.h" 31#include "xfs_error.h"
31#include "xfs_trace.h" 32#include "xfs_trace.h"
32#include "xfs_cksum.h" 33#include "xfs_cksum.h"
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index f965ce832bc0..0da80019a917 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -31,6 +31,7 @@
31#include "xfs_alloc_btree.h" 31#include "xfs_alloc_btree.h"
32#include "xfs_alloc.h" 32#include "xfs_alloc.h"
33#include "xfs_extent_busy.h" 33#include "xfs_extent_busy.h"
34#include "xfs_errortag.h"
34#include "xfs_error.h" 35#include "xfs_error.h"
35#include "xfs_cksum.h" 36#include "xfs_cksum.h"
36#include "xfs_trace.h" 37#include "xfs_trace.h"
@@ -2931,3 +2932,52 @@ xfs_alloc_query_all(
2931 query.fn = fn; 2932 query.fn = fn;
2932 return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query); 2933 return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query);
2933} 2934}
2935
2936/* Find the size of the AG, in blocks. */
2937xfs_agblock_t
2938xfs_ag_block_count(
2939 struct xfs_mount *mp,
2940 xfs_agnumber_t agno)
2941{
2942 ASSERT(agno < mp->m_sb.sb_agcount);
2943
2944 if (agno < mp->m_sb.sb_agcount - 1)
2945 return mp->m_sb.sb_agblocks;
2946 return mp->m_sb.sb_dblocks - (agno * mp->m_sb.sb_agblocks);
2947}
2948
2949/*
2950 * Verify that an AG block number pointer neither points outside the AG
2951 * nor points at static metadata.
2952 */
2953bool
2954xfs_verify_agbno(
2955 struct xfs_mount *mp,
2956 xfs_agnumber_t agno,
2957 xfs_agblock_t agbno)
2958{
2959 xfs_agblock_t eoag;
2960
2961 eoag = xfs_ag_block_count(mp, agno);
2962 if (agbno >= eoag)
2963 return false;
2964 if (agbno <= XFS_AGFL_BLOCK(mp))
2965 return false;
2966 return true;
2967}
2968
2969/*
2970 * Verify that an FS block number pointer neither points outside the
2971 * filesystem nor points at static AG metadata.
2972 */
2973bool
2974xfs_verify_fsbno(
2975 struct xfs_mount *mp,
2976 xfs_fsblock_t fsbno)
2977{
2978 xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, fsbno);
2979
2980 if (agno >= mp->m_sb.sb_agcount)
2981 return false;
2982 return xfs_verify_agbno(mp, agno, XFS_FSB_TO_AGBNO(mp, fsbno));
2983}
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index ef26edc2e938..7ba2d129d504 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -232,5 +232,9 @@ int xfs_alloc_query_range(struct xfs_btree_cur *cur,
232 xfs_alloc_query_range_fn fn, void *priv); 232 xfs_alloc_query_range_fn fn, void *priv);
233int xfs_alloc_query_all(struct xfs_btree_cur *cur, xfs_alloc_query_range_fn fn, 233int xfs_alloc_query_all(struct xfs_btree_cur *cur, xfs_alloc_query_range_fn fn,
234 void *priv); 234 void *priv);
235xfs_agblock_t xfs_ag_block_count(struct xfs_mount *mp, xfs_agnumber_t agno);
236bool xfs_verify_agbno(struct xfs_mount *mp, xfs_agnumber_t agno,
237 xfs_agblock_t agbno);
238bool xfs_verify_fsbno(struct xfs_mount *mp, xfs_fsblock_t fsbno);
235 239
236#endif /* __XFS_ALLOC_H__ */ 240#endif /* __XFS_ALLOC_H__ */
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 5c16db86b38f..53cc8b986eac 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -397,13 +397,9 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes)
397 /* rounded down */ 397 /* rounded down */
398 offset = (XFS_LITINO(mp, dp->i_d.di_version) - bytes) >> 3; 398 offset = (XFS_LITINO(mp, dp->i_d.di_version) - bytes) >> 3;
399 399
400 switch (dp->i_d.di_format) { 400 if (dp->i_d.di_format == XFS_DINODE_FMT_DEV) {
401 case XFS_DINODE_FMT_DEV:
402 minforkoff = roundup(sizeof(xfs_dev_t), 8) >> 3; 401 minforkoff = roundup(sizeof(xfs_dev_t), 8) >> 3;
403 return (offset >= minforkoff) ? minforkoff : 0; 402 return (offset >= minforkoff) ? minforkoff : 0;
404 case XFS_DINODE_FMT_UUID:
405 minforkoff = roundup(sizeof(uuid_t), 8) >> 3;
406 return (offset >= minforkoff) ? minforkoff : 0;
407 } 403 }
408 404
409 /* 405 /*
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 89263797cf32..08df809e2315 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -38,6 +38,7 @@
38#include "xfs_bmap_util.h" 38#include "xfs_bmap_util.h"
39#include "xfs_bmap_btree.h" 39#include "xfs_bmap_btree.h"
40#include "xfs_rtalloc.h" 40#include "xfs_rtalloc.h"
41#include "xfs_errortag.h"
41#include "xfs_error.h" 42#include "xfs_error.h"
42#include "xfs_quota.h" 43#include "xfs_quota.h"
43#include "xfs_trans_space.h" 44#include "xfs_trans_space.h"
@@ -112,28 +113,21 @@ xfs_bmap_compute_maxlevels(
112STATIC int /* error */ 113STATIC int /* error */
113xfs_bmbt_lookup_eq( 114xfs_bmbt_lookup_eq(
114 struct xfs_btree_cur *cur, 115 struct xfs_btree_cur *cur,
115 xfs_fileoff_t off, 116 struct xfs_bmbt_irec *irec,
116 xfs_fsblock_t bno,
117 xfs_filblks_t len,
118 int *stat) /* success/failure */ 117 int *stat) /* success/failure */
119{ 118{
120 cur->bc_rec.b.br_startoff = off; 119 cur->bc_rec.b = *irec;
121 cur->bc_rec.b.br_startblock = bno;
122 cur->bc_rec.b.br_blockcount = len;
123 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); 120 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
124} 121}
125 122
126STATIC int /* error */ 123STATIC int /* error */
127xfs_bmbt_lookup_ge( 124xfs_bmbt_lookup_first(
128 struct xfs_btree_cur *cur, 125 struct xfs_btree_cur *cur,
129 xfs_fileoff_t off,
130 xfs_fsblock_t bno,
131 xfs_filblks_t len,
132 int *stat) /* success/failure */ 126 int *stat) /* success/failure */
133{ 127{
134 cur->bc_rec.b.br_startoff = off; 128 cur->bc_rec.b.br_startoff = 0;
135 cur->bc_rec.b.br_startblock = bno; 129 cur->bc_rec.b.br_startblock = 0;
136 cur->bc_rec.b.br_blockcount = len; 130 cur->bc_rec.b.br_blockcount = 0;
137 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); 131 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
138} 132}
139 133
@@ -160,21 +154,17 @@ static inline bool xfs_bmap_wants_extents(struct xfs_inode *ip, int whichfork)
160} 154}
161 155
162/* 156/*
163 * Update the record referred to by cur to the value given 157 * Update the record referred to by cur to the value given by irec
164 * by [off, bno, len, state].
165 * This either works (return 0) or gets an EFSCORRUPTED error. 158 * This either works (return 0) or gets an EFSCORRUPTED error.
166 */ 159 */
167STATIC int 160STATIC int
168xfs_bmbt_update( 161xfs_bmbt_update(
169 struct xfs_btree_cur *cur, 162 struct xfs_btree_cur *cur,
170 xfs_fileoff_t off, 163 struct xfs_bmbt_irec *irec)
171 xfs_fsblock_t bno,
172 xfs_filblks_t len,
173 xfs_exntst_t state)
174{ 164{
175 union xfs_btree_rec rec; 165 union xfs_btree_rec rec;
176 166
177 xfs_bmbt_disk_set_allf(&rec.bmbt, off, bno, len, state); 167 xfs_bmbt_disk_set_all(&rec.bmbt, irec);
178 return xfs_btree_update(cur, &rec); 168 return xfs_btree_update(cur, &rec);
179} 169}
180 170
@@ -242,7 +232,6 @@ xfs_bmap_forkoff_reset(
242{ 232{
243 if (whichfork == XFS_ATTR_FORK && 233 if (whichfork == XFS_ATTR_FORK &&
244 ip->i_d.di_format != XFS_DINODE_FMT_DEV && 234 ip->i_d.di_format != XFS_DINODE_FMT_DEV &&
245 ip->i_d.di_format != XFS_DINODE_FMT_UUID &&
246 ip->i_d.di_format != XFS_DINODE_FMT_BTREE) { 235 ip->i_d.di_format != XFS_DINODE_FMT_BTREE) {
247 uint dfl_forkoff = xfs_default_attroffset(ip) >> 3; 236 uint dfl_forkoff = xfs_default_attroffset(ip) >> 3;
248 237
@@ -499,31 +488,6 @@ error_norelse:
499} 488}
500 489
501/* 490/*
502 * Add bmap trace insert entries for all the contents of the extent records.
503 */
504void
505xfs_bmap_trace_exlist(
506 xfs_inode_t *ip, /* incore inode pointer */
507 xfs_extnum_t cnt, /* count of entries in the list */
508 int whichfork, /* data or attr or cow fork */
509 unsigned long caller_ip)
510{
511 xfs_extnum_t idx; /* extent record index */
512 xfs_ifork_t *ifp; /* inode fork pointer */
513 int state = 0;
514
515 if (whichfork == XFS_ATTR_FORK)
516 state |= BMAP_ATTRFORK;
517 else if (whichfork == XFS_COW_FORK)
518 state |= BMAP_COWFORK;
519
520 ifp = XFS_IFORK_PTR(ip, whichfork);
521 ASSERT(cnt == xfs_iext_count(ifp));
522 for (idx = 0; idx < cnt; idx++)
523 trace_xfs_extlist(ip, idx, state, caller_ip);
524}
525
526/*
527 * Validate that the bmbt_irecs being returned from bmapi are valid 491 * Validate that the bmbt_irecs being returned from bmapi are valid
528 * given the caller's original parameters. Specifically check the 492 * given the caller's original parameters. Specifically check the
529 * ranges of the returned irecs to ensure that they only extend beyond 493 * ranges of the returned irecs to ensure that they only extend beyond
@@ -657,8 +621,8 @@ xfs_bmap_btree_to_extents(
657 cbno = be64_to_cpu(*pp); 621 cbno = be64_to_cpu(*pp);
658 *logflagsp = 0; 622 *logflagsp = 0;
659#ifdef DEBUG 623#ifdef DEBUG
660 if ((error = xfs_btree_check_lptr(cur, cbno, 1))) 624 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp,
661 return error; 625 xfs_btree_check_lptr(cur, cbno, 1));
662#endif 626#endif
663 error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, XFS_BMAP_BTREE_REF, 627 error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, XFS_BMAP_BTREE_REF,
664 &xfs_bmbt_buf_ops); 628 &xfs_bmbt_buf_ops);
@@ -703,14 +667,14 @@ xfs_bmap_extents_to_btree(
703 xfs_bmbt_rec_t *arp; /* child record pointer */ 667 xfs_bmbt_rec_t *arp; /* child record pointer */
704 struct xfs_btree_block *block; /* btree root block */ 668 struct xfs_btree_block *block; /* btree root block */
705 xfs_btree_cur_t *cur; /* bmap btree cursor */ 669 xfs_btree_cur_t *cur; /* bmap btree cursor */
706 xfs_bmbt_rec_host_t *ep; /* extent record pointer */
707 int error; /* error return value */ 670 int error; /* error return value */
708 xfs_extnum_t i, cnt; /* extent record index */
709 xfs_ifork_t *ifp; /* inode fork pointer */ 671 xfs_ifork_t *ifp; /* inode fork pointer */
710 xfs_bmbt_key_t *kp; /* root block key pointer */ 672 xfs_bmbt_key_t *kp; /* root block key pointer */
711 xfs_mount_t *mp; /* mount structure */ 673 xfs_mount_t *mp; /* mount structure */
712 xfs_extnum_t nextents; /* number of file extents */
713 xfs_bmbt_ptr_t *pp; /* root block address pointer */ 674 xfs_bmbt_ptr_t *pp; /* root block address pointer */
675 struct xfs_iext_cursor icur;
676 struct xfs_bmbt_irec rec;
677 xfs_extnum_t cnt = 0;
714 678
715 mp = ip->i_mount; 679 mp = ip->i_mount;
716 ASSERT(whichfork != XFS_COW_FORK); 680 ASSERT(whichfork != XFS_COW_FORK);
@@ -789,15 +753,12 @@ xfs_bmap_extents_to_btree(
789 XFS_BTNUM_BMAP, 0, 0, ip->i_ino, 753 XFS_BTNUM_BMAP, 0, 0, ip->i_ino,
790 XFS_BTREE_LONG_PTRS); 754 XFS_BTREE_LONG_PTRS);
791 755
792 arp = XFS_BMBT_REC_ADDR(mp, ablock, 1); 756 for_each_xfs_iext(ifp, &icur, &rec) {
793 nextents = xfs_iext_count(ifp); 757 if (isnullstartblock(rec.br_startblock))
794 for (cnt = i = 0; i < nextents; i++) { 758 continue;
795 ep = xfs_iext_get_ext(ifp, i); 759 arp = XFS_BMBT_REC_ADDR(mp, ablock, 1 + cnt);
796 if (!isnullstartblock(xfs_bmbt_get_startblock(ep))) { 760 xfs_bmbt_disk_set_all(arp, &rec);
797 arp->l0 = cpu_to_be64(ep->l0); 761 cnt++;
798 arp->l1 = cpu_to_be64(ep->l1);
799 arp++; cnt++;
800 }
801 } 762 }
802 ASSERT(cnt == XFS_IFORK_NEXTENTS(ip, whichfork)); 763 ASSERT(cnt == XFS_IFORK_NEXTENTS(ip, whichfork));
803 xfs_btree_set_numrecs(ablock, cnt); 764 xfs_btree_set_numrecs(ablock, cnt);
@@ -845,6 +806,8 @@ xfs_bmap_local_to_extents_empty(
845 xfs_bmap_forkoff_reset(ip, whichfork); 806 xfs_bmap_forkoff_reset(ip, whichfork);
846 ifp->if_flags &= ~XFS_IFINLINE; 807 ifp->if_flags &= ~XFS_IFINLINE;
847 ifp->if_flags |= XFS_IFEXTENTS; 808 ifp->if_flags |= XFS_IFEXTENTS;
809 ifp->if_u1.if_root = NULL;
810 ifp->if_height = 0;
848 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS); 811 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
849} 812}
850 813
@@ -868,6 +831,7 @@ xfs_bmap_local_to_extents(
868 xfs_alloc_arg_t args; /* allocation arguments */ 831 xfs_alloc_arg_t args; /* allocation arguments */
869 xfs_buf_t *bp; /* buffer for extent block */ 832 xfs_buf_t *bp; /* buffer for extent block */
870 struct xfs_bmbt_irec rec; 833 struct xfs_bmbt_irec rec;
834 struct xfs_iext_cursor icur;
871 835
872 /* 836 /*
873 * We don't want to deal with the case of keeping inode data inline yet. 837 * We don't want to deal with the case of keeping inode data inline yet.
@@ -885,8 +849,7 @@ xfs_bmap_local_to_extents(
885 849
886 flags = 0; 850 flags = 0;
887 error = 0; 851 error = 0;
888 ASSERT((ifp->if_flags & (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) == 852 ASSERT((ifp->if_flags & (XFS_IFINLINE|XFS_IFEXTENTS)) == XFS_IFINLINE);
889 XFS_IFINLINE);
890 memset(&args, 0, sizeof(args)); 853 memset(&args, 0, sizeof(args));
891 args.tp = tp; 854 args.tp = tp;
892 args.mp = ip->i_mount; 855 args.mp = ip->i_mount;
@@ -930,15 +893,16 @@ xfs_bmap_local_to_extents(
930 xfs_bmap_local_to_extents_empty(ip, whichfork); 893 xfs_bmap_local_to_extents_empty(ip, whichfork);
931 flags |= XFS_ILOG_CORE; 894 flags |= XFS_ILOG_CORE;
932 895
896 ifp->if_u1.if_root = NULL;
897 ifp->if_height = 0;
898
933 rec.br_startoff = 0; 899 rec.br_startoff = 0;
934 rec.br_startblock = args.fsbno; 900 rec.br_startblock = args.fsbno;
935 rec.br_blockcount = 1; 901 rec.br_blockcount = 1;
936 rec.br_state = XFS_EXT_NORM; 902 rec.br_state = XFS_EXT_NORM;
937 xfs_iext_insert(ip, 0, 1, &rec, 0); 903 xfs_iext_first(ifp, &icur);
904 xfs_iext_insert(ip, &icur, &rec, 0);
938 905
939 trace_xfs_bmap_post_update(ip, 0,
940 whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0,
941 _THIS_IP_);
942 XFS_IFORK_NEXT_SET(ip, whichfork, 1); 906 XFS_IFORK_NEXT_SET(ip, whichfork, 1);
943 ip->i_d.di_nblocks = 1; 907 ip->i_d.di_nblocks = 1;
944 xfs_trans_mod_dquot_byino(tp, ip, 908 xfs_trans_mod_dquot_byino(tp, ip,
@@ -973,7 +937,8 @@ xfs_bmap_add_attrfork_btree(
973 cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK); 937 cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK);
974 cur->bc_private.b.dfops = dfops; 938 cur->bc_private.b.dfops = dfops;
975 cur->bc_private.b.firstblock = *firstblock; 939 cur->bc_private.b.firstblock = *firstblock;
976 if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat))) 940 error = xfs_bmbt_lookup_first(cur, &stat);
941 if (error)
977 goto error0; 942 goto error0;
978 /* must be at least one entry */ 943 /* must be at least one entry */
979 XFS_WANT_CORRUPTED_GOTO(mp, stat == 1, error0); 944 XFS_WANT_CORRUPTED_GOTO(mp, stat == 1, error0);
@@ -1124,9 +1089,6 @@ xfs_bmap_add_attrfork(
1124 case XFS_DINODE_FMT_DEV: 1089 case XFS_DINODE_FMT_DEV:
1125 ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3; 1090 ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3;
1126 break; 1091 break;
1127 case XFS_DINODE_FMT_UUID:
1128 ip->i_d.di_forkoff = roundup(sizeof(uuid_t), 8) >> 3;
1129 break;
1130 case XFS_DINODE_FMT_LOCAL: 1092 case XFS_DINODE_FMT_LOCAL:
1131 case XFS_DINODE_FMT_EXTENTS: 1093 case XFS_DINODE_FMT_EXTENTS:
1132 case XFS_DINODE_FMT_BTREE: 1094 case XFS_DINODE_FMT_BTREE:
@@ -1206,32 +1168,35 @@ trans_cancel:
1206 */ 1168 */
1207 1169
1208/* 1170/*
1209 * Read in the extents to if_extents. 1171 * Read in extents from a btree-format inode.
1210 * All inode fields are set up by caller, we just traverse the btree
1211 * and copy the records in. If the file system cannot contain unwritten
1212 * extents, the records are checked for no "state" flags.
1213 */ 1172 */
1214int /* error */ 1173int
1215xfs_bmap_read_extents( 1174xfs_iread_extents(
1216 xfs_trans_t *tp, /* transaction pointer */ 1175 struct xfs_trans *tp,
1217 xfs_inode_t *ip, /* incore inode */ 1176 struct xfs_inode *ip,
1218 int whichfork) /* data or attr fork */ 1177 int whichfork)
1219{ 1178{
1220 struct xfs_btree_block *block; /* current btree block */ 1179 struct xfs_mount *mp = ip->i_mount;
1221 xfs_fsblock_t bno; /* block # of "block" */ 1180 int state = xfs_bmap_fork_to_state(whichfork);
1222 xfs_buf_t *bp; /* buffer for "block" */ 1181 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
1223 int error; /* error return value */ 1182 xfs_extnum_t nextents = XFS_IFORK_NEXTENTS(ip, whichfork);
1224 xfs_extnum_t i, j; /* index into the extents list */ 1183 struct xfs_btree_block *block = ifp->if_broot;
1225 xfs_ifork_t *ifp; /* fork structure */ 1184 struct xfs_iext_cursor icur;
1226 int level; /* btree level, for checking */ 1185 struct xfs_bmbt_irec new;
1227 xfs_mount_t *mp; /* file system mount structure */ 1186 xfs_fsblock_t bno;
1228 __be64 *pp; /* pointer to block address */ 1187 struct xfs_buf *bp;
1229 /* REFERENCED */ 1188 xfs_extnum_t i, j;
1230 xfs_extnum_t room; /* number of entries there's room for */ 1189 int level;
1190 __be64 *pp;
1191 int error;
1192
1193 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
1194
1195 if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
1196 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
1197 return -EFSCORRUPTED;
1198 }
1231 1199
1232 mp = ip->i_mount;
1233 ifp = XFS_IFORK_PTR(ip, whichfork);
1234 block = ifp->if_broot;
1235 /* 1200 /*
1236 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out. 1201 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
1237 */ 1202 */
@@ -1248,21 +1213,23 @@ xfs_bmap_read_extents(
1248 error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, 1213 error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
1249 XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); 1214 XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops);
1250 if (error) 1215 if (error)
1251 return error; 1216 goto out;
1252 block = XFS_BUF_TO_BLOCK(bp); 1217 block = XFS_BUF_TO_BLOCK(bp);
1253 if (level == 0) 1218 if (level == 0)
1254 break; 1219 break;
1255 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); 1220 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
1256 bno = be64_to_cpu(*pp); 1221 bno = be64_to_cpu(*pp);
1257 XFS_WANT_CORRUPTED_GOTO(mp, 1222 XFS_WANT_CORRUPTED_GOTO(mp,
1258 XFS_FSB_SANITY_CHECK(mp, bno), error0); 1223 XFS_FSB_SANITY_CHECK(mp, bno), out_brelse);
1259 xfs_trans_brelse(tp, bp); 1224 xfs_trans_brelse(tp, bp);
1260 } 1225 }
1226
1261 /* 1227 /*
1262 * Here with bp and block set to the leftmost leaf node in the tree. 1228 * Here with bp and block set to the leftmost leaf node in the tree.
1263 */ 1229 */
1264 room = xfs_iext_count(ifp);
1265 i = 0; 1230 i = 0;
1231 xfs_iext_first(ifp, &icur);
1232
1266 /* 1233 /*
1267 * Loop over all leaf nodes. Copy information to the extent records. 1234 * Loop over all leaf nodes. Copy information to the extent records.
1268 */ 1235 */
@@ -1272,14 +1239,15 @@ xfs_bmap_read_extents(
1272 xfs_extnum_t num_recs; 1239 xfs_extnum_t num_recs;
1273 1240
1274 num_recs = xfs_btree_get_numrecs(block); 1241 num_recs = xfs_btree_get_numrecs(block);
1275 if (unlikely(i + num_recs > room)) { 1242 if (unlikely(i + num_recs > nextents)) {
1276 ASSERT(i + num_recs <= room); 1243 ASSERT(i + num_recs <= nextents);
1277 xfs_warn(ip->i_mount, 1244 xfs_warn(ip->i_mount,
1278 "corrupt dinode %Lu, (btree extents).", 1245 "corrupt dinode %Lu, (btree extents).",
1279 (unsigned long long) ip->i_ino); 1246 (unsigned long long) ip->i_ino);
1280 XFS_CORRUPTION_ERROR("xfs_bmap_read_extents(1)", 1247 XFS_CORRUPTION_ERROR(__func__,
1281 XFS_ERRLEVEL_LOW, ip->i_mount, block); 1248 XFS_ERRLEVEL_LOW, ip->i_mount, block);
1282 goto error0; 1249 error = -EFSCORRUPTED;
1250 goto out_brelse;
1283 } 1251 }
1284 /* 1252 /*
1285 * Read-ahead the next leaf block, if any. 1253 * Read-ahead the next leaf block, if any.
@@ -1292,15 +1260,17 @@ xfs_bmap_read_extents(
1292 * Copy records into the extent records. 1260 * Copy records into the extent records.
1293 */ 1261 */
1294 frp = XFS_BMBT_REC_ADDR(mp, block, 1); 1262 frp = XFS_BMBT_REC_ADDR(mp, block, 1);
1295 for (j = 0; j < num_recs; j++, i++, frp++) { 1263 for (j = 0; j < num_recs; j++, frp++, i++) {
1296 xfs_bmbt_rec_host_t *trp = xfs_iext_get_ext(ifp, i); 1264 xfs_bmbt_disk_get_all(frp, &new);
1297 trp->l0 = be64_to_cpu(frp->l0); 1265 if (!xfs_bmbt_validate_extent(mp, whichfork, &new)) {
1298 trp->l1 = be64_to_cpu(frp->l1);
1299 if (!xfs_bmbt_validate_extent(mp, whichfork, trp)) {
1300 XFS_ERROR_REPORT("xfs_bmap_read_extents(2)", 1266 XFS_ERROR_REPORT("xfs_bmap_read_extents(2)",
1301 XFS_ERRLEVEL_LOW, mp); 1267 XFS_ERRLEVEL_LOW, mp);
1302 goto error0; 1268 error = -EFSCORRUPTED;
1269 goto out_brelse;
1303 } 1270 }
1271 xfs_iext_insert(ip, &icur, &new, state);
1272 trace_xfs_read_extent(ip, &icur, state, _THIS_IP_);
1273 xfs_iext_next(ifp, &icur);
1304 } 1274 }
1305 xfs_trans_brelse(tp, bp); 1275 xfs_trans_brelse(tp, bp);
1306 bno = nextbno; 1276 bno = nextbno;
@@ -1312,71 +1282,74 @@ xfs_bmap_read_extents(
1312 error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, 1282 error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
1313 XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); 1283 XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops);
1314 if (error) 1284 if (error)
1315 return error; 1285 goto out;
1316 block = XFS_BUF_TO_BLOCK(bp); 1286 block = XFS_BUF_TO_BLOCK(bp);
1317 } 1287 }
1318 if (i != XFS_IFORK_NEXTENTS(ip, whichfork)) 1288
1319 return -EFSCORRUPTED; 1289 if (i != XFS_IFORK_NEXTENTS(ip, whichfork)) {
1290 error = -EFSCORRUPTED;
1291 goto out;
1292 }
1320 ASSERT(i == xfs_iext_count(ifp)); 1293 ASSERT(i == xfs_iext_count(ifp));
1321 XFS_BMAP_TRACE_EXLIST(ip, i, whichfork); 1294
1295 ifp->if_flags |= XFS_IFEXTENTS;
1322 return 0; 1296 return 0;
1323error0: 1297
1298out_brelse:
1324 xfs_trans_brelse(tp, bp); 1299 xfs_trans_brelse(tp, bp);
1325 return -EFSCORRUPTED; 1300out:
1301 xfs_iext_destroy(ifp);
1302 return error;
1326} 1303}
1327 1304
1328/* 1305/*
1329 * Returns the file-relative block number of the first unused block(s) 1306 * Returns the relative block number of the first unused block(s) in the given
1330 * in the file with at least "len" logically contiguous blocks free. 1307 * fork with at least "len" logically contiguous blocks free. This is the
1331 * This is the lowest-address hole if the file has holes, else the first block 1308 * lowest-address hole if the fork has holes, else the first block past the end
1332 * past the end of file. 1309 * of fork. Return 0 if the fork is currently local (in-inode).
1333 * Return 0 if the file is currently local (in-inode).
1334 */ 1310 */
1335int /* error */ 1311int /* error */
1336xfs_bmap_first_unused( 1312xfs_bmap_first_unused(
1337 xfs_trans_t *tp, /* transaction pointer */ 1313 struct xfs_trans *tp, /* transaction pointer */
1338 xfs_inode_t *ip, /* incore inode */ 1314 struct xfs_inode *ip, /* incore inode */
1339 xfs_extlen_t len, /* size of hole to find */ 1315 xfs_extlen_t len, /* size of hole to find */
1340 xfs_fileoff_t *first_unused, /* unused block */ 1316 xfs_fileoff_t *first_unused, /* unused block */
1341 int whichfork) /* data or attr fork */ 1317 int whichfork) /* data or attr fork */
1342{ 1318{
1343 int error; /* error return value */ 1319 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
1344 int idx; /* extent record index */ 1320 struct xfs_bmbt_irec got;
1345 xfs_ifork_t *ifp; /* inode fork pointer */ 1321 struct xfs_iext_cursor icur;
1346 xfs_fileoff_t lastaddr; /* last block number seen */ 1322 xfs_fileoff_t lastaddr = 0;
1347 xfs_fileoff_t lowest; /* lowest useful block */ 1323 xfs_fileoff_t lowest, max;
1348 xfs_fileoff_t max; /* starting useful block */ 1324 int error;
1349 xfs_extnum_t nextents; /* number of extent entries */
1350 1325
1351 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE || 1326 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE ||
1352 XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS || 1327 XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ||
1353 XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL); 1328 XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
1329
1354 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { 1330 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
1355 *first_unused = 0; 1331 *first_unused = 0;
1356 return 0; 1332 return 0;
1357 } 1333 }
1358 ifp = XFS_IFORK_PTR(ip, whichfork);
1359 if (!(ifp->if_flags & XFS_IFEXTENTS) &&
1360 (error = xfs_iread_extents(tp, ip, whichfork)))
1361 return error;
1362 lowest = *first_unused;
1363 nextents = xfs_iext_count(ifp);
1364 for (idx = 0, lastaddr = 0, max = lowest; idx < nextents; idx++) {
1365 struct xfs_bmbt_irec got;
1366 1334
1367 xfs_iext_get_extent(ifp, idx, &got); 1335 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
1336 error = xfs_iread_extents(tp, ip, whichfork);
1337 if (error)
1338 return error;
1339 }
1368 1340
1341 lowest = max = *first_unused;
1342 for_each_xfs_iext(ifp, &icur, &got) {
1369 /* 1343 /*
1370 * See if the hole before this extent will work. 1344 * See if the hole before this extent will work.
1371 */ 1345 */
1372 if (got.br_startoff >= lowest + len && 1346 if (got.br_startoff >= lowest + len &&
1373 got.br_startoff - max >= len) { 1347 got.br_startoff - max >= len)
1374 *first_unused = max; 1348 break;
1375 return 0;
1376 }
1377 lastaddr = got.br_startoff + got.br_blockcount; 1349 lastaddr = got.br_startoff + got.br_blockcount;
1378 max = XFS_FILEOFF_MAX(lastaddr, lowest); 1350 max = XFS_FILEOFF_MAX(lastaddr, lowest);
1379 } 1351 }
1352
1380 *first_unused = max; 1353 *first_unused = max;
1381 return 0; 1354 return 0;
1382} 1355}
@@ -1396,7 +1369,7 @@ xfs_bmap_last_before(
1396{ 1369{
1397 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 1370 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
1398 struct xfs_bmbt_irec got; 1371 struct xfs_bmbt_irec got;
1399 xfs_extnum_t idx; 1372 struct xfs_iext_cursor icur;
1400 int error; 1373 int error;
1401 1374
1402 switch (XFS_IFORK_FORMAT(ip, whichfork)) { 1375 switch (XFS_IFORK_FORMAT(ip, whichfork)) {
@@ -1416,17 +1389,8 @@ xfs_bmap_last_before(
1416 return error; 1389 return error;
1417 } 1390 }
1418 1391
1419 if (xfs_iext_lookup_extent(ip, ifp, *last_block - 1, &idx, &got)) { 1392 if (!xfs_iext_lookup_extent_before(ip, ifp, last_block, &icur, &got))
1420 if (got.br_startoff <= *last_block - 1) 1393 *last_block = 0;
1421 return 0;
1422 }
1423
1424 if (xfs_iext_get_extent(ifp, idx - 1, &got)) {
1425 *last_block = got.br_startoff + got.br_blockcount;
1426 return 0;
1427 }
1428
1429 *last_block = 0;
1430 return 0; 1394 return 0;
1431} 1395}
1432 1396
@@ -1439,8 +1403,8 @@ xfs_bmap_last_extent(
1439 int *is_empty) 1403 int *is_empty)
1440{ 1404{
1441 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 1405 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
1406 struct xfs_iext_cursor icur;
1442 int error; 1407 int error;
1443 int nextents;
1444 1408
1445 if (!(ifp->if_flags & XFS_IFEXTENTS)) { 1409 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
1446 error = xfs_iread_extents(tp, ip, whichfork); 1410 error = xfs_iread_extents(tp, ip, whichfork);
@@ -1448,14 +1412,11 @@ xfs_bmap_last_extent(
1448 return error; 1412 return error;
1449 } 1413 }
1450 1414
1451 nextents = xfs_iext_count(ifp); 1415 xfs_iext_last(ifp, &icur);
1452 if (nextents == 0) { 1416 if (!xfs_iext_get_extent(ifp, &icur, rec))
1453 *is_empty = 1; 1417 *is_empty = 1;
1454 return 0; 1418 else
1455 } 1419 *is_empty = 0;
1456
1457 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, nextents - 1), rec);
1458 *is_empty = 0;
1459 return 0; 1420 return 0;
1460} 1421}
1461 1422
@@ -1540,10 +1501,10 @@ xfs_bmap_one_block(
1540 xfs_inode_t *ip, /* incore inode */ 1501 xfs_inode_t *ip, /* incore inode */
1541 int whichfork) /* data or attr fork */ 1502 int whichfork) /* data or attr fork */
1542{ 1503{
1543 xfs_bmbt_rec_host_t *ep; /* ptr to fork's extent */
1544 xfs_ifork_t *ifp; /* inode fork pointer */ 1504 xfs_ifork_t *ifp; /* inode fork pointer */
1545 int rval; /* return value */ 1505 int rval; /* return value */
1546 xfs_bmbt_irec_t s; /* internal version of extent */ 1506 xfs_bmbt_irec_t s; /* internal version of extent */
1507 struct xfs_iext_cursor icur;
1547 1508
1548#ifndef DEBUG 1509#ifndef DEBUG
1549 if (whichfork == XFS_DATA_FORK) 1510 if (whichfork == XFS_DATA_FORK)
@@ -1555,8 +1516,8 @@ xfs_bmap_one_block(
1555 return 0; 1516 return 0;
1556 ifp = XFS_IFORK_PTR(ip, whichfork); 1517 ifp = XFS_IFORK_PTR(ip, whichfork);
1557 ASSERT(ifp->if_flags & XFS_IFEXTENTS); 1518 ASSERT(ifp->if_flags & XFS_IFEXTENTS);
1558 ep = xfs_iext_get_ext(ifp, 0); 1519 xfs_iext_first(ifp, &icur);
1559 xfs_bmbt_get_all(ep, &s); 1520 xfs_iext_get_extent(ifp, &icur, &s);
1560 rval = s.br_startoff == 0 && s.br_blockcount == 1; 1521 rval = s.br_startoff == 0 && s.br_blockcount == 1;
1561 if (rval && whichfork == XFS_DATA_FORK) 1522 if (rval && whichfork == XFS_DATA_FORK)
1562 ASSERT(XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize); 1523 ASSERT(XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize);
@@ -1576,8 +1537,6 @@ xfs_bmap_add_extent_delay_real(
1576 int whichfork) 1537 int whichfork)
1577{ 1538{
1578 struct xfs_bmbt_irec *new = &bma->got; 1539 struct xfs_bmbt_irec *new = &bma->got;
1579 int diff; /* temp value */
1580 xfs_bmbt_rec_host_t *ep; /* extent entry for idx */
1581 int error; /* error return value */ 1540 int error; /* error return value */
1582 int i; /* temp state */ 1541 int i; /* temp state */
1583 xfs_ifork_t *ifp; /* inode fork pointer */ 1542 xfs_ifork_t *ifp; /* inode fork pointer */
@@ -1585,14 +1544,14 @@ xfs_bmap_add_extent_delay_real(
1585 xfs_bmbt_irec_t r[3]; /* neighbor extent entries */ 1544 xfs_bmbt_irec_t r[3]; /* neighbor extent entries */
1586 /* left is 0, right is 1, prev is 2 */ 1545 /* left is 0, right is 1, prev is 2 */
1587 int rval=0; /* return value (logging flags) */ 1546 int rval=0; /* return value (logging flags) */
1588 int state = 0;/* state bits, accessed thru macros */ 1547 int state = xfs_bmap_fork_to_state(whichfork);
1589 xfs_filblks_t da_new; /* new count del alloc blocks used */ 1548 xfs_filblks_t da_new; /* new count del alloc blocks used */
1590 xfs_filblks_t da_old; /* old count del alloc blocks used */ 1549 xfs_filblks_t da_old; /* old count del alloc blocks used */
1591 xfs_filblks_t temp=0; /* value for da_new calculations */ 1550 xfs_filblks_t temp=0; /* value for da_new calculations */
1592 xfs_filblks_t temp2=0;/* value for da_new calculations */
1593 int tmp_rval; /* partial logging flags */ 1551 int tmp_rval; /* partial logging flags */
1594 struct xfs_mount *mp; 1552 struct xfs_mount *mp;
1595 xfs_extnum_t *nextents; 1553 xfs_extnum_t *nextents;
1554 struct xfs_bmbt_irec old;
1596 1555
1597 mp = bma->ip->i_mount; 1556 mp = bma->ip->i_mount;
1598 ifp = XFS_IFORK_PTR(bma->ip, whichfork); 1557 ifp = XFS_IFORK_PTR(bma->ip, whichfork);
@@ -1600,8 +1559,6 @@ xfs_bmap_add_extent_delay_real(
1600 nextents = (whichfork == XFS_COW_FORK ? &bma->ip->i_cnextents : 1559 nextents = (whichfork == XFS_COW_FORK ? &bma->ip->i_cnextents :
1601 &bma->ip->i_d.di_nextents); 1560 &bma->ip->i_d.di_nextents);
1602 1561
1603 ASSERT(bma->idx >= 0);
1604 ASSERT(bma->idx <= xfs_iext_count(ifp));
1605 ASSERT(!isnullstartblock(new->br_startblock)); 1562 ASSERT(!isnullstartblock(new->br_startblock));
1606 ASSERT(!bma->cur || 1563 ASSERT(!bma->cur ||
1607 (bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); 1564 (bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
@@ -1612,15 +1569,12 @@ xfs_bmap_add_extent_delay_real(
1612#define RIGHT r[1] 1569#define RIGHT r[1]
1613#define PREV r[2] 1570#define PREV r[2]
1614 1571
1615 if (whichfork == XFS_COW_FORK)
1616 state |= BMAP_COWFORK;
1617
1618 /* 1572 /*
1619 * Set up a bunch of variables to make the tests simpler. 1573 * Set up a bunch of variables to make the tests simpler.
1620 */ 1574 */
1621 ep = xfs_iext_get_ext(ifp, bma->idx); 1575 xfs_iext_get_extent(ifp, &bma->icur, &PREV);
1622 xfs_bmbt_get_all(ep, &PREV);
1623 new_endoff = new->br_startoff + new->br_blockcount; 1576 new_endoff = new->br_startoff + new->br_blockcount;
1577 ASSERT(isnullstartblock(PREV.br_startblock));
1624 ASSERT(PREV.br_startoff <= new->br_startoff); 1578 ASSERT(PREV.br_startoff <= new->br_startoff);
1625 ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff); 1579 ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
1626 1580
@@ -1640,10 +1594,8 @@ xfs_bmap_add_extent_delay_real(
1640 * Check and set flags if this segment has a left neighbor. 1594 * Check and set flags if this segment has a left neighbor.
1641 * Don't set contiguous if the combined extent would be too large. 1595 * Don't set contiguous if the combined extent would be too large.
1642 */ 1596 */
1643 if (bma->idx > 0) { 1597 if (xfs_iext_peek_prev_extent(ifp, &bma->icur, &LEFT)) {
1644 state |= BMAP_LEFT_VALID; 1598 state |= BMAP_LEFT_VALID;
1645 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), &LEFT);
1646
1647 if (isnullstartblock(LEFT.br_startblock)) 1599 if (isnullstartblock(LEFT.br_startblock))
1648 state |= BMAP_LEFT_DELAY; 1600 state |= BMAP_LEFT_DELAY;
1649 } 1601 }
@@ -1660,10 +1612,8 @@ xfs_bmap_add_extent_delay_real(
1660 * Don't set contiguous if the combined extent would be too large. 1612 * Don't set contiguous if the combined extent would be too large.
1661 * Also check for all-three-contiguous being too large. 1613 * Also check for all-three-contiguous being too large.
1662 */ 1614 */
1663 if (bma->idx < xfs_iext_count(ifp) - 1) { 1615 if (xfs_iext_peek_next_extent(ifp, &bma->icur, &RIGHT)) {
1664 state |= BMAP_RIGHT_VALID; 1616 state |= BMAP_RIGHT_VALID;
1665 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx + 1), &RIGHT);
1666
1667 if (isnullstartblock(RIGHT.br_startblock)) 1617 if (isnullstartblock(RIGHT.br_startblock))
1668 state |= BMAP_RIGHT_DELAY; 1618 state |= BMAP_RIGHT_DELAY;
1669 } 1619 }
@@ -1693,22 +1643,19 @@ xfs_bmap_add_extent_delay_real(
1693 * Filling in all of a previously delayed allocation extent. 1643 * Filling in all of a previously delayed allocation extent.
1694 * The left and right neighbors are both contiguous with new. 1644 * The left and right neighbors are both contiguous with new.
1695 */ 1645 */
1696 bma->idx--; 1646 LEFT.br_blockcount += PREV.br_blockcount + RIGHT.br_blockcount;
1697 trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); 1647
1698 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx), 1648 xfs_iext_remove(bma->ip, &bma->icur, state);
1699 LEFT.br_blockcount + PREV.br_blockcount + 1649 xfs_iext_remove(bma->ip, &bma->icur, state);
1700 RIGHT.br_blockcount); 1650 xfs_iext_prev(ifp, &bma->icur);
1701 trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); 1651 xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
1702
1703 xfs_iext_remove(bma->ip, bma->idx + 1, 2, state);
1704 (*nextents)--; 1652 (*nextents)--;
1653
1705 if (bma->cur == NULL) 1654 if (bma->cur == NULL)
1706 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 1655 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1707 else { 1656 else {
1708 rval = XFS_ILOG_CORE; 1657 rval = XFS_ILOG_CORE;
1709 error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff, 1658 error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i);
1710 RIGHT.br_startblock,
1711 RIGHT.br_blockcount, &i);
1712 if (error) 1659 if (error)
1713 goto done; 1660 goto done;
1714 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 1661 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
@@ -1720,11 +1667,7 @@ xfs_bmap_add_extent_delay_real(
1720 if (error) 1667 if (error)
1721 goto done; 1668 goto done;
1722 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 1669 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1723 error = xfs_bmbt_update(bma->cur, LEFT.br_startoff, 1670 error = xfs_bmbt_update(bma->cur, &LEFT);
1724 LEFT.br_startblock,
1725 LEFT.br_blockcount +
1726 PREV.br_blockcount +
1727 RIGHT.br_blockcount, LEFT.br_state);
1728 if (error) 1671 if (error)
1729 goto done; 1672 goto done;
1730 } 1673 }
@@ -1735,28 +1678,22 @@ xfs_bmap_add_extent_delay_real(
1735 * Filling in all of a previously delayed allocation extent. 1678 * Filling in all of a previously delayed allocation extent.
1736 * The left neighbor is contiguous, the right is not. 1679 * The left neighbor is contiguous, the right is not.
1737 */ 1680 */
1738 bma->idx--; 1681 old = LEFT;
1682 LEFT.br_blockcount += PREV.br_blockcount;
1739 1683
1740 trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); 1684 xfs_iext_remove(bma->ip, &bma->icur, state);
1741 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx), 1685 xfs_iext_prev(ifp, &bma->icur);
1742 LEFT.br_blockcount + PREV.br_blockcount); 1686 xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
1743 trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
1744 1687
1745 xfs_iext_remove(bma->ip, bma->idx + 1, 1, state);
1746 if (bma->cur == NULL) 1688 if (bma->cur == NULL)
1747 rval = XFS_ILOG_DEXT; 1689 rval = XFS_ILOG_DEXT;
1748 else { 1690 else {
1749 rval = 0; 1691 rval = 0;
1750 error = xfs_bmbt_lookup_eq(bma->cur, LEFT.br_startoff, 1692 error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
1751 LEFT.br_startblock, LEFT.br_blockcount,
1752 &i);
1753 if (error) 1693 if (error)
1754 goto done; 1694 goto done;
1755 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 1695 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1756 error = xfs_bmbt_update(bma->cur, LEFT.br_startoff, 1696 error = xfs_bmbt_update(bma->cur, &LEFT);
1757 LEFT.br_startblock,
1758 LEFT.br_blockcount +
1759 PREV.br_blockcount, LEFT.br_state);
1760 if (error) 1697 if (error)
1761 goto done; 1698 goto done;
1762 } 1699 }
@@ -1767,27 +1704,23 @@ xfs_bmap_add_extent_delay_real(
1767 * Filling in all of a previously delayed allocation extent. 1704 * Filling in all of a previously delayed allocation extent.
1768 * The right neighbor is contiguous, the left is not. 1705 * The right neighbor is contiguous, the left is not.
1769 */ 1706 */
1770 trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); 1707 PREV.br_startblock = new->br_startblock;
1771 xfs_bmbt_set_startblock(ep, new->br_startblock); 1708 PREV.br_blockcount += RIGHT.br_blockcount;
1772 xfs_bmbt_set_blockcount(ep, 1709
1773 PREV.br_blockcount + RIGHT.br_blockcount); 1710 xfs_iext_next(ifp, &bma->icur);
1774 trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); 1711 xfs_iext_remove(bma->ip, &bma->icur, state);
1712 xfs_iext_prev(ifp, &bma->icur);
1713 xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1775 1714
1776 xfs_iext_remove(bma->ip, bma->idx + 1, 1, state);
1777 if (bma->cur == NULL) 1715 if (bma->cur == NULL)
1778 rval = XFS_ILOG_DEXT; 1716 rval = XFS_ILOG_DEXT;
1779 else { 1717 else {
1780 rval = 0; 1718 rval = 0;
1781 error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff, 1719 error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i);
1782 RIGHT.br_startblock,
1783 RIGHT.br_blockcount, &i);
1784 if (error) 1720 if (error)
1785 goto done; 1721 goto done;
1786 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 1722 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1787 error = xfs_bmbt_update(bma->cur, PREV.br_startoff, 1723 error = xfs_bmbt_update(bma->cur, &PREV);
1788 new->br_startblock,
1789 PREV.br_blockcount +
1790 RIGHT.br_blockcount, PREV.br_state);
1791 if (error) 1724 if (error)
1792 goto done; 1725 goto done;
1793 } 1726 }
@@ -1799,23 +1732,19 @@ xfs_bmap_add_extent_delay_real(
1799 * Neither the left nor right neighbors are contiguous with 1732 * Neither the left nor right neighbors are contiguous with
1800 * the new one. 1733 * the new one.
1801 */ 1734 */
1802 trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); 1735 PREV.br_startblock = new->br_startblock;
1803 xfs_bmbt_set_startblock(ep, new->br_startblock); 1736 PREV.br_state = new->br_state;
1804 xfs_bmbt_set_state(ep, new->br_state); 1737 xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1805 trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
1806 1738
1807 (*nextents)++; 1739 (*nextents)++;
1808 if (bma->cur == NULL) 1740 if (bma->cur == NULL)
1809 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 1741 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1810 else { 1742 else {
1811 rval = XFS_ILOG_CORE; 1743 rval = XFS_ILOG_CORE;
1812 error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff, 1744 error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1813 new->br_startblock, new->br_blockcount,
1814 &i);
1815 if (error) 1745 if (error)
1816 goto done; 1746 goto done;
1817 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); 1747 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
1818 bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
1819 error = xfs_btree_insert(bma->cur, &i); 1748 error = xfs_btree_insert(bma->cur, &i);
1820 if (error) 1749 if (error)
1821 goto done; 1750 goto done;
@@ -1828,40 +1757,33 @@ xfs_bmap_add_extent_delay_real(
1828 * Filling in the first part of a previous delayed allocation. 1757 * Filling in the first part of a previous delayed allocation.
1829 * The left neighbor is contiguous. 1758 * The left neighbor is contiguous.
1830 */ 1759 */
1831 trace_xfs_bmap_pre_update(bma->ip, bma->idx - 1, state, _THIS_IP_); 1760 old = LEFT;
1832 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx - 1),
1833 LEFT.br_blockcount + new->br_blockcount);
1834 xfs_bmbt_set_startoff(ep,
1835 PREV.br_startoff + new->br_blockcount);
1836 trace_xfs_bmap_post_update(bma->ip, bma->idx - 1, state, _THIS_IP_);
1837
1838 temp = PREV.br_blockcount - new->br_blockcount; 1761 temp = PREV.br_blockcount - new->br_blockcount;
1839 trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); 1762 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1840 xfs_bmbt_set_blockcount(ep, temp); 1763 startblockval(PREV.br_startblock));
1764
1765 LEFT.br_blockcount += new->br_blockcount;
1766
1767 PREV.br_blockcount = temp;
1768 PREV.br_startoff += new->br_blockcount;
1769 PREV.br_startblock = nullstartblock(da_new);
1770
1771 xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1772 xfs_iext_prev(ifp, &bma->icur);
1773 xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
1774
1841 if (bma->cur == NULL) 1775 if (bma->cur == NULL)
1842 rval = XFS_ILOG_DEXT; 1776 rval = XFS_ILOG_DEXT;
1843 else { 1777 else {
1844 rval = 0; 1778 rval = 0;
1845 error = xfs_bmbt_lookup_eq(bma->cur, LEFT.br_startoff, 1779 error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
1846 LEFT.br_startblock, LEFT.br_blockcount,
1847 &i);
1848 if (error) 1780 if (error)
1849 goto done; 1781 goto done;
1850 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 1782 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1851 error = xfs_bmbt_update(bma->cur, LEFT.br_startoff, 1783 error = xfs_bmbt_update(bma->cur, &LEFT);
1852 LEFT.br_startblock,
1853 LEFT.br_blockcount +
1854 new->br_blockcount,
1855 LEFT.br_state);
1856 if (error) 1784 if (error)
1857 goto done; 1785 goto done;
1858 } 1786 }
1859 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1860 startblockval(PREV.br_startblock));
1861 xfs_bmbt_set_startblock(ep, nullstartblock(da_new));
1862 trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
1863
1864 bma->idx--;
1865 break; 1787 break;
1866 1788
1867 case BMAP_LEFT_FILLING: 1789 case BMAP_LEFT_FILLING:
@@ -1869,23 +1791,16 @@ xfs_bmap_add_extent_delay_real(
1869 * Filling in the first part of a previous delayed allocation. 1791 * Filling in the first part of a previous delayed allocation.
1870 * The left neighbor is not contiguous. 1792 * The left neighbor is not contiguous.
1871 */ 1793 */
1872 trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); 1794 xfs_iext_update_extent(bma->ip, state, &bma->icur, new);
1873 xfs_bmbt_set_startoff(ep, new_endoff);
1874 temp = PREV.br_blockcount - new->br_blockcount;
1875 xfs_bmbt_set_blockcount(ep, temp);
1876 xfs_iext_insert(bma->ip, bma->idx, 1, new, state);
1877 (*nextents)++; 1795 (*nextents)++;
1878 if (bma->cur == NULL) 1796 if (bma->cur == NULL)
1879 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 1797 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1880 else { 1798 else {
1881 rval = XFS_ILOG_CORE; 1799 rval = XFS_ILOG_CORE;
1882 error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff, 1800 error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1883 new->br_startblock, new->br_blockcount,
1884 &i);
1885 if (error) 1801 if (error)
1886 goto done; 1802 goto done;
1887 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); 1803 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
1888 bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
1889 error = xfs_btree_insert(bma->cur, &i); 1804 error = xfs_btree_insert(bma->cur, &i);
1890 if (error) 1805 if (error)
1891 goto done; 1806 goto done;
@@ -1900,12 +1815,18 @@ xfs_bmap_add_extent_delay_real(
1900 if (error) 1815 if (error)
1901 goto done; 1816 goto done;
1902 } 1817 }
1818
1819 temp = PREV.br_blockcount - new->br_blockcount;
1903 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), 1820 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1904 startblockval(PREV.br_startblock) - 1821 startblockval(PREV.br_startblock) -
1905 (bma->cur ? bma->cur->bc_private.b.allocated : 0)); 1822 (bma->cur ? bma->cur->bc_private.b.allocated : 0));
1906 ep = xfs_iext_get_ext(ifp, bma->idx + 1); 1823
1907 xfs_bmbt_set_startblock(ep, nullstartblock(da_new)); 1824 PREV.br_startoff = new_endoff;
1908 trace_xfs_bmap_post_update(bma->ip, bma->idx + 1, state, _THIS_IP_); 1825 PREV.br_blockcount = temp;
1826 PREV.br_startblock = nullstartblock(da_new);
1827 xfs_iext_next(ifp, &bma->icur);
1828 xfs_iext_insert(bma->ip, &bma->icur, &PREV, state);
1829 xfs_iext_prev(ifp, &bma->icur);
1909 break; 1830 break;
1910 1831
1911 case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: 1832 case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
@@ -1913,40 +1834,34 @@ xfs_bmap_add_extent_delay_real(
1913 * Filling in the last part of a previous delayed allocation. 1834 * Filling in the last part of a previous delayed allocation.
1914 * The right neighbor is contiguous with the new allocation. 1835 * The right neighbor is contiguous with the new allocation.
1915 */ 1836 */
1916 temp = PREV.br_blockcount - new->br_blockcount; 1837 old = RIGHT;
1917 trace_xfs_bmap_pre_update(bma->ip, bma->idx + 1, state, _THIS_IP_); 1838 RIGHT.br_startoff = new->br_startoff;
1918 xfs_bmbt_set_blockcount(ep, temp); 1839 RIGHT.br_startblock = new->br_startblock;
1919 xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, bma->idx + 1), 1840 RIGHT.br_blockcount += new->br_blockcount;
1920 new->br_startoff, new->br_startblock, 1841
1921 new->br_blockcount + RIGHT.br_blockcount,
1922 RIGHT.br_state);
1923 trace_xfs_bmap_post_update(bma->ip, bma->idx + 1, state, _THIS_IP_);
1924 if (bma->cur == NULL) 1842 if (bma->cur == NULL)
1925 rval = XFS_ILOG_DEXT; 1843 rval = XFS_ILOG_DEXT;
1926 else { 1844 else {
1927 rval = 0; 1845 rval = 0;
1928 error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff, 1846 error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
1929 RIGHT.br_startblock,
1930 RIGHT.br_blockcount, &i);
1931 if (error) 1847 if (error)
1932 goto done; 1848 goto done;
1933 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 1849 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1934 error = xfs_bmbt_update(bma->cur, new->br_startoff, 1850 error = xfs_bmbt_update(bma->cur, &RIGHT);
1935 new->br_startblock,
1936 new->br_blockcount +
1937 RIGHT.br_blockcount,
1938 RIGHT.br_state);
1939 if (error) 1851 if (error)
1940 goto done; 1852 goto done;
1941 } 1853 }
1942 1854
1855 temp = PREV.br_blockcount - new->br_blockcount;
1943 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), 1856 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1944 startblockval(PREV.br_startblock)); 1857 startblockval(PREV.br_startblock));
1945 trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
1946 xfs_bmbt_set_startblock(ep, nullstartblock(da_new));
1947 trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
1948 1858
1949 bma->idx++; 1859 PREV.br_blockcount = temp;
1860 PREV.br_startblock = nullstartblock(da_new);
1861
1862 xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1863 xfs_iext_next(ifp, &bma->icur);
1864 xfs_iext_update_extent(bma->ip, state, &bma->icur, &RIGHT);
1950 break; 1865 break;
1951 1866
1952 case BMAP_RIGHT_FILLING: 1867 case BMAP_RIGHT_FILLING:
@@ -1954,22 +1869,16 @@ xfs_bmap_add_extent_delay_real(
1954 * Filling in the last part of a previous delayed allocation. 1869 * Filling in the last part of a previous delayed allocation.
1955 * The right neighbor is not contiguous. 1870 * The right neighbor is not contiguous.
1956 */ 1871 */
1957 temp = PREV.br_blockcount - new->br_blockcount; 1872 xfs_iext_update_extent(bma->ip, state, &bma->icur, new);
1958 trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
1959 xfs_bmbt_set_blockcount(ep, temp);
1960 xfs_iext_insert(bma->ip, bma->idx + 1, 1, new, state);
1961 (*nextents)++; 1873 (*nextents)++;
1962 if (bma->cur == NULL) 1874 if (bma->cur == NULL)
1963 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 1875 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1964 else { 1876 else {
1965 rval = XFS_ILOG_CORE; 1877 rval = XFS_ILOG_CORE;
1966 error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff, 1878 error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1967 new->br_startblock, new->br_blockcount,
1968 &i);
1969 if (error) 1879 if (error)
1970 goto done; 1880 goto done;
1971 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); 1881 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
1972 bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
1973 error = xfs_btree_insert(bma->cur, &i); 1882 error = xfs_btree_insert(bma->cur, &i);
1974 if (error) 1883 if (error)
1975 goto done; 1884 goto done;
@@ -1984,14 +1893,16 @@ xfs_bmap_add_extent_delay_real(
1984 if (error) 1893 if (error)
1985 goto done; 1894 goto done;
1986 } 1895 }
1896
1897 temp = PREV.br_blockcount - new->br_blockcount;
1987 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), 1898 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1988 startblockval(PREV.br_startblock) - 1899 startblockval(PREV.br_startblock) -
1989 (bma->cur ? bma->cur->bc_private.b.allocated : 0)); 1900 (bma->cur ? bma->cur->bc_private.b.allocated : 0));
1990 ep = xfs_iext_get_ext(ifp, bma->idx);
1991 xfs_bmbt_set_startblock(ep, nullstartblock(da_new));
1992 trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
1993 1901
1994 bma->idx++; 1902 PREV.br_startblock = nullstartblock(da_new);
1903 PREV.br_blockcount = temp;
1904 xfs_iext_insert(bma->ip, &bma->icur, &PREV, state);
1905 xfs_iext_next(ifp, &bma->icur);
1995 break; 1906 break;
1996 1907
1997 case 0: 1908 case 0:
@@ -2015,30 +1926,40 @@ xfs_bmap_add_extent_delay_real(
2015 * PREV @ idx LEFT RIGHT 1926 * PREV @ idx LEFT RIGHT
2016 * inserted at idx + 1 1927 * inserted at idx + 1
2017 */ 1928 */
2018 temp = new->br_startoff - PREV.br_startoff; 1929 old = PREV;
2019 temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff; 1930
2020 trace_xfs_bmap_pre_update(bma->ip, bma->idx, 0, _THIS_IP_); 1931 /* LEFT is the new middle */
2021 xfs_bmbt_set_blockcount(ep, temp); /* truncate PREV */
2022 LEFT = *new; 1932 LEFT = *new;
1933
1934 /* RIGHT is the new right */
2023 RIGHT.br_state = PREV.br_state; 1935 RIGHT.br_state = PREV.br_state;
2024 RIGHT.br_startblock = nullstartblock(
2025 (int)xfs_bmap_worst_indlen(bma->ip, temp2));
2026 RIGHT.br_startoff = new_endoff; 1936 RIGHT.br_startoff = new_endoff;
2027 RIGHT.br_blockcount = temp2; 1937 RIGHT.br_blockcount =
2028 /* insert LEFT (r[0]) and RIGHT (r[1]) at the same time */ 1938 PREV.br_startoff + PREV.br_blockcount - new_endoff;
2029 xfs_iext_insert(bma->ip, bma->idx + 1, 2, &LEFT, state); 1939 RIGHT.br_startblock =
1940 nullstartblock(xfs_bmap_worst_indlen(bma->ip,
1941 RIGHT.br_blockcount));
1942
1943 /* truncate PREV */
1944 PREV.br_blockcount = new->br_startoff - PREV.br_startoff;
1945 PREV.br_startblock =
1946 nullstartblock(xfs_bmap_worst_indlen(bma->ip,
1947 PREV.br_blockcount));
1948 xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1949
1950 xfs_iext_next(ifp, &bma->icur);
1951 xfs_iext_insert(bma->ip, &bma->icur, &RIGHT, state);
1952 xfs_iext_insert(bma->ip, &bma->icur, &LEFT, state);
2030 (*nextents)++; 1953 (*nextents)++;
1954
2031 if (bma->cur == NULL) 1955 if (bma->cur == NULL)
2032 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 1956 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2033 else { 1957 else {
2034 rval = XFS_ILOG_CORE; 1958 rval = XFS_ILOG_CORE;
2035 error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff, 1959 error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
2036 new->br_startblock, new->br_blockcount,
2037 &i);
2038 if (error) 1960 if (error)
2039 goto done; 1961 goto done;
2040 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); 1962 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2041 bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
2042 error = xfs_btree_insert(bma->cur, &i); 1963 error = xfs_btree_insert(bma->cur, &i);
2043 if (error) 1964 if (error)
2044 goto done; 1965 goto done;
@@ -2053,30 +1974,9 @@ xfs_bmap_add_extent_delay_real(
2053 if (error) 1974 if (error)
2054 goto done; 1975 goto done;
2055 } 1976 }
2056 temp = xfs_bmap_worst_indlen(bma->ip, temp);
2057 temp2 = xfs_bmap_worst_indlen(bma->ip, temp2);
2058 diff = (int)(temp + temp2 -
2059 (startblockval(PREV.br_startblock) -
2060 (bma->cur ?
2061 bma->cur->bc_private.b.allocated : 0)));
2062 if (diff > 0) {
2063 error = xfs_mod_fdblocks(bma->ip->i_mount,
2064 -((int64_t)diff), false);
2065 ASSERT(!error);
2066 if (error)
2067 goto done;
2068 }
2069
2070 ep = xfs_iext_get_ext(ifp, bma->idx);
2071 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
2072 trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
2073 trace_xfs_bmap_pre_update(bma->ip, bma->idx + 2, state, _THIS_IP_);
2074 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, bma->idx + 2),
2075 nullstartblock((int)temp2));
2076 trace_xfs_bmap_post_update(bma->ip, bma->idx + 2, state, _THIS_IP_);
2077 1977
2078 bma->idx++; 1978 da_new = startblockval(PREV.br_startblock) +
2079 da_new = temp + temp2; 1979 startblockval(RIGHT.br_startblock);
2080 break; 1980 break;
2081 1981
2082 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: 1982 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
@@ -2110,19 +2010,17 @@ xfs_bmap_add_extent_delay_real(
2110 goto done; 2010 goto done;
2111 } 2011 }
2112 2012
2113 /* adjust for changes in reserved delayed indirect blocks */ 2013 if (bma->cur) {
2114 if (da_old || da_new) { 2014 da_new += bma->cur->bc_private.b.allocated;
2115 temp = da_new; 2015 bma->cur->bc_private.b.allocated = 0;
2116 if (bma->cur)
2117 temp += bma->cur->bc_private.b.allocated;
2118 if (temp < da_old)
2119 xfs_mod_fdblocks(bma->ip->i_mount,
2120 (int64_t)(da_old - temp), false);
2121 } 2016 }
2122 2017
2123 /* clear out the allocated field, done with it now in any case. */ 2018 /* adjust for changes in reserved delayed indirect blocks */
2124 if (bma->cur) 2019 if (da_new != da_old) {
2125 bma->cur->bc_private.b.allocated = 0; 2020 ASSERT(state == 0 || da_new < da_old);
2021 error = xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new),
2022 false);
2023 }
2126 2024
2127 xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork); 2025 xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork);
2128done: 2026done:
@@ -2142,7 +2040,7 @@ xfs_bmap_add_extent_unwritten_real(
2142 struct xfs_trans *tp, 2040 struct xfs_trans *tp,
2143 xfs_inode_t *ip, /* incore inode pointer */ 2041 xfs_inode_t *ip, /* incore inode pointer */
2144 int whichfork, 2042 int whichfork,
2145 xfs_extnum_t *idx, /* extent number to update/insert */ 2043 struct xfs_iext_cursor *icur,
2146 xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ 2044 xfs_btree_cur_t **curp, /* if *curp is null, not a btree */
2147 xfs_bmbt_irec_t *new, /* new data to add to file extents */ 2045 xfs_bmbt_irec_t *new, /* new data to add to file extents */
2148 xfs_fsblock_t *first, /* pointer to firstblock variable */ 2046 xfs_fsblock_t *first, /* pointer to firstblock variable */
@@ -2150,28 +2048,22 @@ xfs_bmap_add_extent_unwritten_real(
2150 int *logflagsp) /* inode logging flags */ 2048 int *logflagsp) /* inode logging flags */
2151{ 2049{
2152 xfs_btree_cur_t *cur; /* btree cursor */ 2050 xfs_btree_cur_t *cur; /* btree cursor */
2153 xfs_bmbt_rec_host_t *ep; /* extent entry for idx */
2154 int error; /* error return value */ 2051 int error; /* error return value */
2155 int i; /* temp state */ 2052 int i; /* temp state */
2156 xfs_ifork_t *ifp; /* inode fork pointer */ 2053 xfs_ifork_t *ifp; /* inode fork pointer */
2157 xfs_fileoff_t new_endoff; /* end offset of new entry */ 2054 xfs_fileoff_t new_endoff; /* end offset of new entry */
2158 xfs_exntst_t newext; /* new extent state */
2159 xfs_exntst_t oldext; /* old extent state */
2160 xfs_bmbt_irec_t r[3]; /* neighbor extent entries */ 2055 xfs_bmbt_irec_t r[3]; /* neighbor extent entries */
2161 /* left is 0, right is 1, prev is 2 */ 2056 /* left is 0, right is 1, prev is 2 */
2162 int rval=0; /* return value (logging flags) */ 2057 int rval=0; /* return value (logging flags) */
2163 int state = 0;/* state bits, accessed thru macros */ 2058 int state = xfs_bmap_fork_to_state(whichfork);
2164 struct xfs_mount *mp = ip->i_mount; 2059 struct xfs_mount *mp = ip->i_mount;
2060 struct xfs_bmbt_irec old;
2165 2061
2166 *logflagsp = 0; 2062 *logflagsp = 0;
2167 2063
2168 cur = *curp; 2064 cur = *curp;
2169 ifp = XFS_IFORK_PTR(ip, whichfork); 2065 ifp = XFS_IFORK_PTR(ip, whichfork);
2170 if (whichfork == XFS_COW_FORK)
2171 state |= BMAP_COWFORK;
2172 2066
2173 ASSERT(*idx >= 0);
2174 ASSERT(*idx <= xfs_iext_count(ifp));
2175 ASSERT(!isnullstartblock(new->br_startblock)); 2067 ASSERT(!isnullstartblock(new->br_startblock));
2176 2068
2177 XFS_STATS_INC(mp, xs_add_exlist); 2069 XFS_STATS_INC(mp, xs_add_exlist);
@@ -2184,12 +2076,8 @@ xfs_bmap_add_extent_unwritten_real(
2184 * Set up a bunch of variables to make the tests simpler. 2076 * Set up a bunch of variables to make the tests simpler.
2185 */ 2077 */
2186 error = 0; 2078 error = 0;
2187 ep = xfs_iext_get_ext(ifp, *idx); 2079 xfs_iext_get_extent(ifp, icur, &PREV);
2188 xfs_bmbt_get_all(ep, &PREV); 2080 ASSERT(new->br_state != PREV.br_state);
2189 newext = new->br_state;
2190 oldext = (newext == XFS_EXT_UNWRITTEN) ?
2191 XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
2192 ASSERT(PREV.br_state == oldext);
2193 new_endoff = new->br_startoff + new->br_blockcount; 2081 new_endoff = new->br_startoff + new->br_blockcount;
2194 ASSERT(PREV.br_startoff <= new->br_startoff); 2082 ASSERT(PREV.br_startoff <= new->br_startoff);
2195 ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff); 2083 ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
@@ -2207,10 +2095,8 @@ xfs_bmap_add_extent_unwritten_real(
2207 * Check and set flags if this segment has a left neighbor. 2095 * Check and set flags if this segment has a left neighbor.
2208 * Don't set contiguous if the combined extent would be too large. 2096 * Don't set contiguous if the combined extent would be too large.
2209 */ 2097 */
2210 if (*idx > 0) { 2098 if (xfs_iext_peek_prev_extent(ifp, icur, &LEFT)) {
2211 state |= BMAP_LEFT_VALID; 2099 state |= BMAP_LEFT_VALID;
2212 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &LEFT);
2213
2214 if (isnullstartblock(LEFT.br_startblock)) 2100 if (isnullstartblock(LEFT.br_startblock))
2215 state |= BMAP_LEFT_DELAY; 2101 state |= BMAP_LEFT_DELAY;
2216 } 2102 }
@@ -2218,7 +2104,7 @@ xfs_bmap_add_extent_unwritten_real(
2218 if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) && 2104 if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
2219 LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff && 2105 LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
2220 LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock && 2106 LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
2221 LEFT.br_state == newext && 2107 LEFT.br_state == new->br_state &&
2222 LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN) 2108 LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN)
2223 state |= BMAP_LEFT_CONTIG; 2109 state |= BMAP_LEFT_CONTIG;
2224 2110
@@ -2227,9 +2113,8 @@ xfs_bmap_add_extent_unwritten_real(
2227 * Don't set contiguous if the combined extent would be too large. 2113 * Don't set contiguous if the combined extent would be too large.
2228 * Also check for all-three-contiguous being too large. 2114 * Also check for all-three-contiguous being too large.
2229 */ 2115 */
2230 if (*idx < xfs_iext_count(ifp) - 1) { 2116 if (xfs_iext_peek_next_extent(ifp, icur, &RIGHT)) {
2231 state |= BMAP_RIGHT_VALID; 2117 state |= BMAP_RIGHT_VALID;
2232 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT);
2233 if (isnullstartblock(RIGHT.br_startblock)) 2118 if (isnullstartblock(RIGHT.br_startblock))
2234 state |= BMAP_RIGHT_DELAY; 2119 state |= BMAP_RIGHT_DELAY;
2235 } 2120 }
@@ -2237,7 +2122,7 @@ xfs_bmap_add_extent_unwritten_real(
2237 if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) && 2122 if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
2238 new_endoff == RIGHT.br_startoff && 2123 new_endoff == RIGHT.br_startoff &&
2239 new->br_startblock + new->br_blockcount == RIGHT.br_startblock && 2124 new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
2240 newext == RIGHT.br_state && 2125 new->br_state == RIGHT.br_state &&
2241 new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN && 2126 new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
2242 ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | 2127 ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
2243 BMAP_RIGHT_FILLING)) != 2128 BMAP_RIGHT_FILLING)) !=
@@ -2258,24 +2143,20 @@ xfs_bmap_add_extent_unwritten_real(
2258 * Setting all of a previous oldext extent to newext. 2143 * Setting all of a previous oldext extent to newext.
2259 * The left and right neighbors are both contiguous with new. 2144 * The left and right neighbors are both contiguous with new.
2260 */ 2145 */
2261 --*idx; 2146 LEFT.br_blockcount += PREV.br_blockcount + RIGHT.br_blockcount;
2262
2263 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2264 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx),
2265 LEFT.br_blockcount + PREV.br_blockcount +
2266 RIGHT.br_blockcount);
2267 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2268 2147
2269 xfs_iext_remove(ip, *idx + 1, 2, state); 2148 xfs_iext_remove(ip, icur, state);
2149 xfs_iext_remove(ip, icur, state);
2150 xfs_iext_prev(ifp, icur);
2151 xfs_iext_update_extent(ip, state, icur, &LEFT);
2270 XFS_IFORK_NEXT_SET(ip, whichfork, 2152 XFS_IFORK_NEXT_SET(ip, whichfork,
2271 XFS_IFORK_NEXTENTS(ip, whichfork) - 2); 2153 XFS_IFORK_NEXTENTS(ip, whichfork) - 2);
2272 if (cur == NULL) 2154 if (cur == NULL)
2273 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 2155 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2274 else { 2156 else {
2275 rval = XFS_ILOG_CORE; 2157 rval = XFS_ILOG_CORE;
2276 if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff, 2158 error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i);
2277 RIGHT.br_startblock, 2159 if (error)
2278 RIGHT.br_blockcount, &i)))
2279 goto done; 2160 goto done;
2280 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2161 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2281 if ((error = xfs_btree_delete(cur, &i))) 2162 if ((error = xfs_btree_delete(cur, &i)))
@@ -2290,10 +2171,8 @@ xfs_bmap_add_extent_unwritten_real(
2290 if ((error = xfs_btree_decrement(cur, 0, &i))) 2171 if ((error = xfs_btree_decrement(cur, 0, &i)))
2291 goto done; 2172 goto done;
2292 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2173 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2293 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, 2174 error = xfs_bmbt_update(cur, &LEFT);
2294 LEFT.br_startblock, 2175 if (error)
2295 LEFT.br_blockcount + PREV.br_blockcount +
2296 RIGHT.br_blockcount, LEFT.br_state)))
2297 goto done; 2176 goto done;
2298 } 2177 }
2299 break; 2178 break;
@@ -2303,23 +2182,19 @@ xfs_bmap_add_extent_unwritten_real(
2303 * Setting all of a previous oldext extent to newext. 2182 * Setting all of a previous oldext extent to newext.
2304 * The left neighbor is contiguous, the right is not. 2183 * The left neighbor is contiguous, the right is not.
2305 */ 2184 */
2306 --*idx; 2185 LEFT.br_blockcount += PREV.br_blockcount;
2307 2186
2308 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); 2187 xfs_iext_remove(ip, icur, state);
2309 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), 2188 xfs_iext_prev(ifp, icur);
2310 LEFT.br_blockcount + PREV.br_blockcount); 2189 xfs_iext_update_extent(ip, state, icur, &LEFT);
2311 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2312
2313 xfs_iext_remove(ip, *idx + 1, 1, state);
2314 XFS_IFORK_NEXT_SET(ip, whichfork, 2190 XFS_IFORK_NEXT_SET(ip, whichfork,
2315 XFS_IFORK_NEXTENTS(ip, whichfork) - 1); 2191 XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
2316 if (cur == NULL) 2192 if (cur == NULL)
2317 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 2193 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2318 else { 2194 else {
2319 rval = XFS_ILOG_CORE; 2195 rval = XFS_ILOG_CORE;
2320 if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, 2196 error = xfs_bmbt_lookup_eq(cur, &PREV, &i);
2321 PREV.br_startblock, PREV.br_blockcount, 2197 if (error)
2322 &i)))
2323 goto done; 2198 goto done;
2324 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2199 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2325 if ((error = xfs_btree_delete(cur, &i))) 2200 if ((error = xfs_btree_delete(cur, &i)))
@@ -2328,10 +2203,8 @@ xfs_bmap_add_extent_unwritten_real(
2328 if ((error = xfs_btree_decrement(cur, 0, &i))) 2203 if ((error = xfs_btree_decrement(cur, 0, &i)))
2329 goto done; 2204 goto done;
2330 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2205 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2331 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, 2206 error = xfs_bmbt_update(cur, &LEFT);
2332 LEFT.br_startblock, 2207 if (error)
2333 LEFT.br_blockcount + PREV.br_blockcount,
2334 LEFT.br_state)))
2335 goto done; 2208 goto done;
2336 } 2209 }
2337 break; 2210 break;
@@ -2341,21 +2214,22 @@ xfs_bmap_add_extent_unwritten_real(
2341 * Setting all of a previous oldext extent to newext. 2214 * Setting all of a previous oldext extent to newext.
2342 * The right neighbor is contiguous, the left is not. 2215 * The right neighbor is contiguous, the left is not.
2343 */ 2216 */
2344 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); 2217 PREV.br_blockcount += RIGHT.br_blockcount;
2345 xfs_bmbt_set_blockcount(ep, 2218 PREV.br_state = new->br_state;
2346 PREV.br_blockcount + RIGHT.br_blockcount); 2219
2347 xfs_bmbt_set_state(ep, newext); 2220 xfs_iext_next(ifp, icur);
2348 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); 2221 xfs_iext_remove(ip, icur, state);
2349 xfs_iext_remove(ip, *idx + 1, 1, state); 2222 xfs_iext_prev(ifp, icur);
2223 xfs_iext_update_extent(ip, state, icur, &PREV);
2224
2350 XFS_IFORK_NEXT_SET(ip, whichfork, 2225 XFS_IFORK_NEXT_SET(ip, whichfork,
2351 XFS_IFORK_NEXTENTS(ip, whichfork) - 1); 2226 XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
2352 if (cur == NULL) 2227 if (cur == NULL)
2353 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 2228 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2354 else { 2229 else {
2355 rval = XFS_ILOG_CORE; 2230 rval = XFS_ILOG_CORE;
2356 if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff, 2231 error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i);
2357 RIGHT.br_startblock, 2232 if (error)
2358 RIGHT.br_blockcount, &i)))
2359 goto done; 2233 goto done;
2360 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2234 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2361 if ((error = xfs_btree_delete(cur, &i))) 2235 if ((error = xfs_btree_delete(cur, &i)))
@@ -2364,10 +2238,8 @@ xfs_bmap_add_extent_unwritten_real(
2364 if ((error = xfs_btree_decrement(cur, 0, &i))) 2238 if ((error = xfs_btree_decrement(cur, 0, &i)))
2365 goto done; 2239 goto done;
2366 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2240 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2367 if ((error = xfs_bmbt_update(cur, new->br_startoff, 2241 error = xfs_bmbt_update(cur, &PREV);
2368 new->br_startblock, 2242 if (error)
2369 new->br_blockcount + RIGHT.br_blockcount,
2370 newext)))
2371 goto done; 2243 goto done;
2372 } 2244 }
2373 break; 2245 break;
@@ -2378,22 +2250,19 @@ xfs_bmap_add_extent_unwritten_real(
2378 * Neither the left nor right neighbors are contiguous with 2250 * Neither the left nor right neighbors are contiguous with
2379 * the new one. 2251 * the new one.
2380 */ 2252 */
2381 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); 2253 PREV.br_state = new->br_state;
2382 xfs_bmbt_set_state(ep, newext); 2254 xfs_iext_update_extent(ip, state, icur, &PREV);
2383 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2384 2255
2385 if (cur == NULL) 2256 if (cur == NULL)
2386 rval = XFS_ILOG_DEXT; 2257 rval = XFS_ILOG_DEXT;
2387 else { 2258 else {
2388 rval = 0; 2259 rval = 0;
2389 if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff, 2260 error = xfs_bmbt_lookup_eq(cur, new, &i);
2390 new->br_startblock, new->br_blockcount, 2261 if (error)
2391 &i)))
2392 goto done; 2262 goto done;
2393 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2263 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2394 if ((error = xfs_bmbt_update(cur, new->br_startoff, 2264 error = xfs_bmbt_update(cur, &PREV);
2395 new->br_startblock, new->br_blockcount, 2265 if (error)
2396 newext)))
2397 goto done; 2266 goto done;
2398 } 2267 }
2399 break; 2268 break;
@@ -2403,43 +2272,32 @@ xfs_bmap_add_extent_unwritten_real(
2403 * Setting the first part of a previous oldext extent to newext. 2272 * Setting the first part of a previous oldext extent to newext.
2404 * The left neighbor is contiguous. 2273 * The left neighbor is contiguous.
2405 */ 2274 */
2406 trace_xfs_bmap_pre_update(ip, *idx - 1, state, _THIS_IP_); 2275 LEFT.br_blockcount += new->br_blockcount;
2407 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx - 1), 2276
2408 LEFT.br_blockcount + new->br_blockcount); 2277 old = PREV;
2409 xfs_bmbt_set_startoff(ep, 2278 PREV.br_startoff += new->br_blockcount;
2410 PREV.br_startoff + new->br_blockcount); 2279 PREV.br_startblock += new->br_blockcount;
2411 trace_xfs_bmap_post_update(ip, *idx - 1, state, _THIS_IP_); 2280 PREV.br_blockcount -= new->br_blockcount;
2412 2281
2413 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); 2282 xfs_iext_update_extent(ip, state, icur, &PREV);
2414 xfs_bmbt_set_startblock(ep, 2283 xfs_iext_prev(ifp, icur);
2415 new->br_startblock + new->br_blockcount); 2284 xfs_iext_update_extent(ip, state, icur, &LEFT);
2416 xfs_bmbt_set_blockcount(ep,
2417 PREV.br_blockcount - new->br_blockcount);
2418 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2419
2420 --*idx;
2421 2285
2422 if (cur == NULL) 2286 if (cur == NULL)
2423 rval = XFS_ILOG_DEXT; 2287 rval = XFS_ILOG_DEXT;
2424 else { 2288 else {
2425 rval = 0; 2289 rval = 0;
2426 if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, 2290 error = xfs_bmbt_lookup_eq(cur, &old, &i);
2427 PREV.br_startblock, PREV.br_blockcount, 2291 if (error)
2428 &i)))
2429 goto done; 2292 goto done;
2430 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2293 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2431 if ((error = xfs_bmbt_update(cur, 2294 error = xfs_bmbt_update(cur, &PREV);
2432 PREV.br_startoff + new->br_blockcount, 2295 if (error)
2433 PREV.br_startblock + new->br_blockcount,
2434 PREV.br_blockcount - new->br_blockcount,
2435 oldext)))
2436 goto done; 2296 goto done;
2437 if ((error = xfs_btree_decrement(cur, 0, &i))) 2297 error = xfs_btree_decrement(cur, 0, &i);
2298 if (error)
2438 goto done; 2299 goto done;
2439 error = xfs_bmbt_update(cur, LEFT.br_startoff, 2300 error = xfs_bmbt_update(cur, &LEFT);
2440 LEFT.br_startblock,
2441 LEFT.br_blockcount + new->br_blockcount,
2442 LEFT.br_state);
2443 if (error) 2301 if (error)
2444 goto done; 2302 goto done;
2445 } 2303 }
@@ -2450,32 +2308,25 @@ xfs_bmap_add_extent_unwritten_real(
2450 * Setting the first part of a previous oldext extent to newext. 2308 * Setting the first part of a previous oldext extent to newext.
2451 * The left neighbor is not contiguous. 2309 * The left neighbor is not contiguous.
2452 */ 2310 */
2453 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); 2311 old = PREV;
2454 ASSERT(ep && xfs_bmbt_get_state(ep) == oldext); 2312 PREV.br_startoff += new->br_blockcount;
2455 xfs_bmbt_set_startoff(ep, new_endoff); 2313 PREV.br_startblock += new->br_blockcount;
2456 xfs_bmbt_set_blockcount(ep, 2314 PREV.br_blockcount -= new->br_blockcount;
2457 PREV.br_blockcount - new->br_blockcount); 2315
2458 xfs_bmbt_set_startblock(ep, 2316 xfs_iext_update_extent(ip, state, icur, &PREV);
2459 new->br_startblock + new->br_blockcount); 2317 xfs_iext_insert(ip, icur, new, state);
2460 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2461
2462 xfs_iext_insert(ip, *idx, 1, new, state);
2463 XFS_IFORK_NEXT_SET(ip, whichfork, 2318 XFS_IFORK_NEXT_SET(ip, whichfork,
2464 XFS_IFORK_NEXTENTS(ip, whichfork) + 1); 2319 XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
2465 if (cur == NULL) 2320 if (cur == NULL)
2466 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 2321 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2467 else { 2322 else {
2468 rval = XFS_ILOG_CORE; 2323 rval = XFS_ILOG_CORE;
2469 if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, 2324 error = xfs_bmbt_lookup_eq(cur, &old, &i);
2470 PREV.br_startblock, PREV.br_blockcount, 2325 if (error)
2471 &i)))
2472 goto done; 2326 goto done;
2473 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2327 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2474 if ((error = xfs_bmbt_update(cur, 2328 error = xfs_bmbt_update(cur, &PREV);
2475 PREV.br_startoff + new->br_blockcount, 2329 if (error)
2476 PREV.br_startblock + new->br_blockcount,
2477 PREV.br_blockcount - new->br_blockcount,
2478 oldext)))
2479 goto done; 2330 goto done;
2480 cur->bc_rec.b = *new; 2331 cur->bc_rec.b = *new;
2481 if ((error = xfs_btree_insert(cur, &i))) 2332 if ((error = xfs_btree_insert(cur, &i)))
@@ -2489,39 +2340,33 @@ xfs_bmap_add_extent_unwritten_real(
2489 * Setting the last part of a previous oldext extent to newext. 2340 * Setting the last part of a previous oldext extent to newext.
2490 * The right neighbor is contiguous with the new allocation. 2341 * The right neighbor is contiguous with the new allocation.
2491 */ 2342 */
2492 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); 2343 old = PREV;
2493 xfs_bmbt_set_blockcount(ep, 2344 PREV.br_blockcount -= new->br_blockcount;
2494 PREV.br_blockcount - new->br_blockcount);
2495 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2496 2345
2497 ++*idx; 2346 RIGHT.br_startoff = new->br_startoff;
2347 RIGHT.br_startblock = new->br_startblock;
2348 RIGHT.br_blockcount += new->br_blockcount;
2498 2349
2499 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); 2350 xfs_iext_update_extent(ip, state, icur, &PREV);
2500 xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx), 2351 xfs_iext_next(ifp, icur);
2501 new->br_startoff, new->br_startblock, 2352 xfs_iext_update_extent(ip, state, icur, &RIGHT);
2502 new->br_blockcount + RIGHT.br_blockcount, newext);
2503 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2504 2353
2505 if (cur == NULL) 2354 if (cur == NULL)
2506 rval = XFS_ILOG_DEXT; 2355 rval = XFS_ILOG_DEXT;
2507 else { 2356 else {
2508 rval = 0; 2357 rval = 0;
2509 if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, 2358 error = xfs_bmbt_lookup_eq(cur, &old, &i);
2510 PREV.br_startblock, 2359 if (error)
2511 PREV.br_blockcount, &i)))
2512 goto done; 2360 goto done;
2513 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2361 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2514 if ((error = xfs_bmbt_update(cur, PREV.br_startoff, 2362 error = xfs_bmbt_update(cur, &PREV);
2515 PREV.br_startblock, 2363 if (error)
2516 PREV.br_blockcount - new->br_blockcount,
2517 oldext)))
2518 goto done; 2364 goto done;
2519 if ((error = xfs_btree_increment(cur, 0, &i))) 2365 error = xfs_btree_increment(cur, 0, &i);
2366 if (error)
2520 goto done; 2367 goto done;
2521 if ((error = xfs_bmbt_update(cur, new->br_startoff, 2368 error = xfs_bmbt_update(cur, &RIGHT);
2522 new->br_startblock, 2369 if (error)
2523 new->br_blockcount + RIGHT.br_blockcount,
2524 newext)))
2525 goto done; 2370 goto done;
2526 } 2371 }
2527 break; 2372 break;
@@ -2531,13 +2376,12 @@ xfs_bmap_add_extent_unwritten_real(
2531 * Setting the last part of a previous oldext extent to newext. 2376 * Setting the last part of a previous oldext extent to newext.
2532 * The right neighbor is not contiguous. 2377 * The right neighbor is not contiguous.
2533 */ 2378 */
2534 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); 2379 old = PREV;
2535 xfs_bmbt_set_blockcount(ep, 2380 PREV.br_blockcount -= new->br_blockcount;
2536 PREV.br_blockcount - new->br_blockcount);
2537 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2538 2381
2539 ++*idx; 2382 xfs_iext_update_extent(ip, state, icur, &PREV);
2540 xfs_iext_insert(ip, *idx, 1, new, state); 2383 xfs_iext_next(ifp, icur);
2384 xfs_iext_insert(ip, icur, new, state);
2541 2385
2542 XFS_IFORK_NEXT_SET(ip, whichfork, 2386 XFS_IFORK_NEXT_SET(ip, whichfork,
2543 XFS_IFORK_NEXTENTS(ip, whichfork) + 1); 2387 XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
@@ -2545,22 +2389,17 @@ xfs_bmap_add_extent_unwritten_real(
2545 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 2389 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2546 else { 2390 else {
2547 rval = XFS_ILOG_CORE; 2391 rval = XFS_ILOG_CORE;
2548 if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, 2392 error = xfs_bmbt_lookup_eq(cur, &old, &i);
2549 PREV.br_startblock, PREV.br_blockcount, 2393 if (error)
2550 &i)))
2551 goto done; 2394 goto done;
2552 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2395 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2553 if ((error = xfs_bmbt_update(cur, PREV.br_startoff, 2396 error = xfs_bmbt_update(cur, &PREV);
2554 PREV.br_startblock, 2397 if (error)
2555 PREV.br_blockcount - new->br_blockcount,
2556 oldext)))
2557 goto done; 2398 goto done;
2558 if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff, 2399 error = xfs_bmbt_lookup_eq(cur, new, &i);
2559 new->br_startblock, new->br_blockcount, 2400 if (error)
2560 &i)))
2561 goto done; 2401 goto done;
2562 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); 2402 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2563 cur->bc_rec.b.br_state = XFS_EXT_NORM;
2564 if ((error = xfs_btree_insert(cur, &i))) 2403 if ((error = xfs_btree_insert(cur, &i)))
2565 goto done; 2404 goto done;
2566 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2405 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
@@ -2573,20 +2412,20 @@ xfs_bmap_add_extent_unwritten_real(
2573 * newext. Contiguity is impossible here. 2412 * newext. Contiguity is impossible here.
2574 * One extent becomes three extents. 2413 * One extent becomes three extents.
2575 */ 2414 */
2576 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); 2415 old = PREV;
2577 xfs_bmbt_set_blockcount(ep, 2416 PREV.br_blockcount = new->br_startoff - PREV.br_startoff;
2578 new->br_startoff - PREV.br_startoff);
2579 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2580 2417
2581 r[0] = *new; 2418 r[0] = *new;
2582 r[1].br_startoff = new_endoff; 2419 r[1].br_startoff = new_endoff;
2583 r[1].br_blockcount = 2420 r[1].br_blockcount =
2584 PREV.br_startoff + PREV.br_blockcount - new_endoff; 2421 old.br_startoff + old.br_blockcount - new_endoff;
2585 r[1].br_startblock = new->br_startblock + new->br_blockcount; 2422 r[1].br_startblock = new->br_startblock + new->br_blockcount;
2586 r[1].br_state = oldext; 2423 r[1].br_state = PREV.br_state;
2587 2424
2588 ++*idx; 2425 xfs_iext_update_extent(ip, state, icur, &PREV);
2589 xfs_iext_insert(ip, *idx, 2, &r[0], state); 2426 xfs_iext_next(ifp, icur);
2427 xfs_iext_insert(ip, icur, &r[1], state);
2428 xfs_iext_insert(ip, icur, &r[0], state);
2590 2429
2591 XFS_IFORK_NEXT_SET(ip, whichfork, 2430 XFS_IFORK_NEXT_SET(ip, whichfork,
2592 XFS_IFORK_NEXTENTS(ip, whichfork) + 2); 2431 XFS_IFORK_NEXTENTS(ip, whichfork) + 2);
@@ -2594,20 +2433,16 @@ xfs_bmap_add_extent_unwritten_real(
2594 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 2433 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2595 else { 2434 else {
2596 rval = XFS_ILOG_CORE; 2435 rval = XFS_ILOG_CORE;
2597 if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, 2436 error = xfs_bmbt_lookup_eq(cur, &old, &i);
2598 PREV.br_startblock, PREV.br_blockcount, 2437 if (error)
2599 &i)))
2600 goto done; 2438 goto done;
2601 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2439 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2602 /* new right extent - oldext */ 2440 /* new right extent - oldext */
2603 if ((error = xfs_bmbt_update(cur, r[1].br_startoff, 2441 error = xfs_bmbt_update(cur, &r[1]);
2604 r[1].br_startblock, r[1].br_blockcount, 2442 if (error)
2605 r[1].br_state)))
2606 goto done; 2443 goto done;
2607 /* new left extent - oldext */ 2444 /* new left extent - oldext */
2608 cur->bc_rec.b = PREV; 2445 cur->bc_rec.b = PREV;
2609 cur->bc_rec.b.br_blockcount =
2610 new->br_startoff - PREV.br_startoff;
2611 if ((error = xfs_btree_insert(cur, &i))) 2446 if ((error = xfs_btree_insert(cur, &i)))
2612 goto done; 2447 goto done;
2613 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2448 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
@@ -2616,13 +2451,11 @@ xfs_bmap_add_extent_unwritten_real(
2616 * we are about to insert as we can't trust it after 2451 * we are about to insert as we can't trust it after
2617 * the previous insert. 2452 * the previous insert.
2618 */ 2453 */
2619 if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff, 2454 error = xfs_bmbt_lookup_eq(cur, new, &i);
2620 new->br_startblock, new->br_blockcount, 2455 if (error)
2621 &i)))
2622 goto done; 2456 goto done;
2623 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); 2457 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2624 /* new middle extent - newext */ 2458 /* new middle extent - newext */
2625 cur->bc_rec.b.br_state = new->br_state;
2626 if ((error = xfs_btree_insert(cur, &i))) 2459 if ((error = xfs_btree_insert(cur, &i)))
2627 goto done; 2460 goto done;
2628 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2461 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
@@ -2681,7 +2514,7 @@ STATIC void
2681xfs_bmap_add_extent_hole_delay( 2514xfs_bmap_add_extent_hole_delay(
2682 xfs_inode_t *ip, /* incore inode pointer */ 2515 xfs_inode_t *ip, /* incore inode pointer */
2683 int whichfork, 2516 int whichfork,
2684 xfs_extnum_t *idx, /* extent number to update/insert */ 2517 struct xfs_iext_cursor *icur,
2685 xfs_bmbt_irec_t *new) /* new data to add to file extents */ 2518 xfs_bmbt_irec_t *new) /* new data to add to file extents */
2686{ 2519{
2687 xfs_ifork_t *ifp; /* inode fork pointer */ 2520 xfs_ifork_t *ifp; /* inode fork pointer */
@@ -2689,22 +2522,17 @@ xfs_bmap_add_extent_hole_delay(
2689 xfs_filblks_t newlen=0; /* new indirect size */ 2522 xfs_filblks_t newlen=0; /* new indirect size */
2690 xfs_filblks_t oldlen=0; /* old indirect size */ 2523 xfs_filblks_t oldlen=0; /* old indirect size */
2691 xfs_bmbt_irec_t right; /* right neighbor extent entry */ 2524 xfs_bmbt_irec_t right; /* right neighbor extent entry */
2692 int state; /* state bits, accessed thru macros */ 2525 int state = xfs_bmap_fork_to_state(whichfork);
2693 xfs_filblks_t temp=0; /* temp for indirect calculations */ 2526 xfs_filblks_t temp; /* temp for indirect calculations */
2694 2527
2695 ifp = XFS_IFORK_PTR(ip, whichfork); 2528 ifp = XFS_IFORK_PTR(ip, whichfork);
2696 state = 0;
2697 if (whichfork == XFS_COW_FORK)
2698 state |= BMAP_COWFORK;
2699 ASSERT(isnullstartblock(new->br_startblock)); 2529 ASSERT(isnullstartblock(new->br_startblock));
2700 2530
2701 /* 2531 /*
2702 * Check and set flags if this segment has a left neighbor 2532 * Check and set flags if this segment has a left neighbor
2703 */ 2533 */
2704 if (*idx > 0) { 2534 if (xfs_iext_peek_prev_extent(ifp, icur, &left)) {
2705 state |= BMAP_LEFT_VALID; 2535 state |= BMAP_LEFT_VALID;
2706 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &left);
2707
2708 if (isnullstartblock(left.br_startblock)) 2536 if (isnullstartblock(left.br_startblock))
2709 state |= BMAP_LEFT_DELAY; 2537 state |= BMAP_LEFT_DELAY;
2710 } 2538 }
@@ -2713,10 +2541,8 @@ xfs_bmap_add_extent_hole_delay(
2713 * Check and set flags if the current (right) segment exists. 2541 * Check and set flags if the current (right) segment exists.
2714 * If it doesn't exist, we're converting the hole at end-of-file. 2542 * If it doesn't exist, we're converting the hole at end-of-file.
2715 */ 2543 */
2716 if (*idx < xfs_iext_count(ifp)) { 2544 if (xfs_iext_get_extent(ifp, icur, &right)) {
2717 state |= BMAP_RIGHT_VALID; 2545 state |= BMAP_RIGHT_VALID;
2718 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right);
2719
2720 if (isnullstartblock(right.br_startblock)) 2546 if (isnullstartblock(right.br_startblock))
2721 state |= BMAP_RIGHT_DELAY; 2547 state |= BMAP_RIGHT_DELAY;
2722 } 2548 }
@@ -2748,22 +2574,20 @@ xfs_bmap_add_extent_hole_delay(
2748 * on the left and on the right. 2574 * on the left and on the right.
2749 * Merge all three into a single extent record. 2575 * Merge all three into a single extent record.
2750 */ 2576 */
2751 --*idx;
2752 temp = left.br_blockcount + new->br_blockcount + 2577 temp = left.br_blockcount + new->br_blockcount +
2753 right.br_blockcount; 2578 right.br_blockcount;
2754 2579
2755 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2756 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp);
2757 oldlen = startblockval(left.br_startblock) + 2580 oldlen = startblockval(left.br_startblock) +
2758 startblockval(new->br_startblock) + 2581 startblockval(new->br_startblock) +
2759 startblockval(right.br_startblock); 2582 startblockval(right.br_startblock);
2760 newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), 2583 newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2761 oldlen); 2584 oldlen);
2762 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx), 2585 left.br_startblock = nullstartblock(newlen);
2763 nullstartblock((int)newlen)); 2586 left.br_blockcount = temp;
2764 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2765 2587
2766 xfs_iext_remove(ip, *idx + 1, 1, state); 2588 xfs_iext_remove(ip, icur, state);
2589 xfs_iext_prev(ifp, icur);
2590 xfs_iext_update_extent(ip, state, icur, &left);
2767 break; 2591 break;
2768 2592
2769 case BMAP_LEFT_CONTIG: 2593 case BMAP_LEFT_CONTIG:
@@ -2772,18 +2596,17 @@ xfs_bmap_add_extent_hole_delay(
2772 * on the left. 2596 * on the left.
2773 * Merge the new allocation with the left neighbor. 2597 * Merge the new allocation with the left neighbor.
2774 */ 2598 */
2775 --*idx;
2776 temp = left.br_blockcount + new->br_blockcount; 2599 temp = left.br_blockcount + new->br_blockcount;
2777 2600
2778 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2779 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp);
2780 oldlen = startblockval(left.br_startblock) + 2601 oldlen = startblockval(left.br_startblock) +
2781 startblockval(new->br_startblock); 2602 startblockval(new->br_startblock);
2782 newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), 2603 newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2783 oldlen); 2604 oldlen);
2784 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx), 2605 left.br_blockcount = temp;
2785 nullstartblock((int)newlen)); 2606 left.br_startblock = nullstartblock(newlen);
2786 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); 2607
2608 xfs_iext_prev(ifp, icur);
2609 xfs_iext_update_extent(ip, state, icur, &left);
2787 break; 2610 break;
2788 2611
2789 case BMAP_RIGHT_CONTIG: 2612 case BMAP_RIGHT_CONTIG:
@@ -2792,16 +2615,15 @@ xfs_bmap_add_extent_hole_delay(
2792 * on the right. 2615 * on the right.
2793 * Merge the new allocation with the right neighbor. 2616 * Merge the new allocation with the right neighbor.
2794 */ 2617 */
2795 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2796 temp = new->br_blockcount + right.br_blockcount; 2618 temp = new->br_blockcount + right.br_blockcount;
2797 oldlen = startblockval(new->br_startblock) + 2619 oldlen = startblockval(new->br_startblock) +
2798 startblockval(right.br_startblock); 2620 startblockval(right.br_startblock);
2799 newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), 2621 newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2800 oldlen); 2622 oldlen);
2801 xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx), 2623 right.br_startoff = new->br_startoff;
2802 new->br_startoff, 2624 right.br_startblock = nullstartblock(newlen);
2803 nullstartblock((int)newlen), temp, right.br_state); 2625 right.br_blockcount = temp;
2804 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); 2626 xfs_iext_update_extent(ip, state, icur, &right);
2805 break; 2627 break;
2806 2628
2807 case 0: 2629 case 0:
@@ -2811,7 +2633,7 @@ xfs_bmap_add_extent_hole_delay(
2811 * Insert a new entry. 2633 * Insert a new entry.
2812 */ 2634 */
2813 oldlen = newlen = 0; 2635 oldlen = newlen = 0;
2814 xfs_iext_insert(ip, *idx, 1, new, state); 2636 xfs_iext_insert(ip, icur, new, state);
2815 break; 2637 break;
2816 } 2638 }
2817 if (oldlen != newlen) { 2639 if (oldlen != newlen) {
@@ -2832,7 +2654,7 @@ xfs_bmap_add_extent_hole_real(
2832 struct xfs_trans *tp, 2654 struct xfs_trans *tp,
2833 struct xfs_inode *ip, 2655 struct xfs_inode *ip,
2834 int whichfork, 2656 int whichfork,
2835 xfs_extnum_t *idx, 2657 struct xfs_iext_cursor *icur,
2836 struct xfs_btree_cur **curp, 2658 struct xfs_btree_cur **curp,
2837 struct xfs_bmbt_irec *new, 2659 struct xfs_bmbt_irec *new,
2838 xfs_fsblock_t *first, 2660 xfs_fsblock_t *first,
@@ -2847,27 +2669,19 @@ xfs_bmap_add_extent_hole_real(
2847 xfs_bmbt_irec_t left; /* left neighbor extent entry */ 2669 xfs_bmbt_irec_t left; /* left neighbor extent entry */
2848 xfs_bmbt_irec_t right; /* right neighbor extent entry */ 2670 xfs_bmbt_irec_t right; /* right neighbor extent entry */
2849 int rval=0; /* return value (logging flags) */ 2671 int rval=0; /* return value (logging flags) */
2850 int state; /* state bits, accessed thru macros */ 2672 int state = xfs_bmap_fork_to_state(whichfork);
2673 struct xfs_bmbt_irec old;
2851 2674
2852 ASSERT(*idx >= 0);
2853 ASSERT(*idx <= xfs_iext_count(ifp));
2854 ASSERT(!isnullstartblock(new->br_startblock)); 2675 ASSERT(!isnullstartblock(new->br_startblock));
2855 ASSERT(!cur || !(cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); 2676 ASSERT(!cur || !(cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
2856 2677
2857 XFS_STATS_INC(mp, xs_add_exlist); 2678 XFS_STATS_INC(mp, xs_add_exlist);
2858 2679
2859 state = 0;
2860 if (whichfork == XFS_ATTR_FORK)
2861 state |= BMAP_ATTRFORK;
2862 if (whichfork == XFS_COW_FORK)
2863 state |= BMAP_COWFORK;
2864
2865 /* 2680 /*
2866 * Check and set flags if this segment has a left neighbor. 2681 * Check and set flags if this segment has a left neighbor.
2867 */ 2682 */
2868 if (*idx > 0) { 2683 if (xfs_iext_peek_prev_extent(ifp, icur, &left)) {
2869 state |= BMAP_LEFT_VALID; 2684 state |= BMAP_LEFT_VALID;
2870 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &left);
2871 if (isnullstartblock(left.br_startblock)) 2685 if (isnullstartblock(left.br_startblock))
2872 state |= BMAP_LEFT_DELAY; 2686 state |= BMAP_LEFT_DELAY;
2873 } 2687 }
@@ -2876,9 +2690,8 @@ xfs_bmap_add_extent_hole_real(
2876 * Check and set flags if this segment has a current value. 2690 * Check and set flags if this segment has a current value.
2877 * Not true if we're inserting into the "hole" at eof. 2691 * Not true if we're inserting into the "hole" at eof.
2878 */ 2692 */
2879 if (*idx < xfs_iext_count(ifp)) { 2693 if (xfs_iext_get_extent(ifp, icur, &right)) {
2880 state |= BMAP_RIGHT_VALID; 2694 state |= BMAP_RIGHT_VALID;
2881 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right);
2882 if (isnullstartblock(right.br_startblock)) 2695 if (isnullstartblock(right.br_startblock))
2883 state |= BMAP_RIGHT_DELAY; 2696 state |= BMAP_RIGHT_DELAY;
2884 } 2697 }
@@ -2915,14 +2728,11 @@ xfs_bmap_add_extent_hole_real(
2915 * left and on the right. 2728 * left and on the right.
2916 * Merge all three into a single extent record. 2729 * Merge all three into a single extent record.
2917 */ 2730 */
2918 --*idx; 2731 left.br_blockcount += new->br_blockcount + right.br_blockcount;
2919 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2920 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx),
2921 left.br_blockcount + new->br_blockcount +
2922 right.br_blockcount);
2923 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2924 2732
2925 xfs_iext_remove(ip, *idx + 1, 1, state); 2733 xfs_iext_remove(ip, icur, state);
2734 xfs_iext_prev(ifp, icur);
2735 xfs_iext_update_extent(ip, state, icur, &left);
2926 2736
2927 XFS_IFORK_NEXT_SET(ip, whichfork, 2737 XFS_IFORK_NEXT_SET(ip, whichfork,
2928 XFS_IFORK_NEXTENTS(ip, whichfork) - 1); 2738 XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
@@ -2930,9 +2740,7 @@ xfs_bmap_add_extent_hole_real(
2930 rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); 2740 rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
2931 } else { 2741 } else {
2932 rval = XFS_ILOG_CORE; 2742 rval = XFS_ILOG_CORE;
2933 error = xfs_bmbt_lookup_eq(cur, right.br_startoff, 2743 error = xfs_bmbt_lookup_eq(cur, &right, &i);
2934 right.br_startblock, right.br_blockcount,
2935 &i);
2936 if (error) 2744 if (error)
2937 goto done; 2745 goto done;
2938 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2746 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
@@ -2944,12 +2752,7 @@ xfs_bmap_add_extent_hole_real(
2944 if (error) 2752 if (error)
2945 goto done; 2753 goto done;
2946 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2754 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2947 error = xfs_bmbt_update(cur, left.br_startoff, 2755 error = xfs_bmbt_update(cur, &left);
2948 left.br_startblock,
2949 left.br_blockcount +
2950 new->br_blockcount +
2951 right.br_blockcount,
2952 left.br_state);
2953 if (error) 2756 if (error)
2954 goto done; 2757 goto done;
2955 } 2758 }
@@ -2961,27 +2764,21 @@ xfs_bmap_add_extent_hole_real(
2961 * on the left. 2764 * on the left.
2962 * Merge the new allocation with the left neighbor. 2765 * Merge the new allocation with the left neighbor.
2963 */ 2766 */
2964 --*idx; 2767 old = left;
2965 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); 2768 left.br_blockcount += new->br_blockcount;
2966 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), 2769
2967 left.br_blockcount + new->br_blockcount); 2770 xfs_iext_prev(ifp, icur);
2968 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); 2771 xfs_iext_update_extent(ip, state, icur, &left);
2969 2772
2970 if (cur == NULL) { 2773 if (cur == NULL) {
2971 rval = xfs_ilog_fext(whichfork); 2774 rval = xfs_ilog_fext(whichfork);
2972 } else { 2775 } else {
2973 rval = 0; 2776 rval = 0;
2974 error = xfs_bmbt_lookup_eq(cur, left.br_startoff, 2777 error = xfs_bmbt_lookup_eq(cur, &old, &i);
2975 left.br_startblock, left.br_blockcount,
2976 &i);
2977 if (error) 2778 if (error)
2978 goto done; 2779 goto done;
2979 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2780 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2980 error = xfs_bmbt_update(cur, left.br_startoff, 2781 error = xfs_bmbt_update(cur, &left);
2981 left.br_startblock,
2982 left.br_blockcount +
2983 new->br_blockcount,
2984 left.br_state);
2985 if (error) 2782 if (error)
2986 goto done; 2783 goto done;
2987 } 2784 }
@@ -2993,29 +2790,22 @@ xfs_bmap_add_extent_hole_real(
2993 * on the right. 2790 * on the right.
2994 * Merge the new allocation with the right neighbor. 2791 * Merge the new allocation with the right neighbor.
2995 */ 2792 */
2996 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); 2793 old = right;
2997 xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx), 2794
2998 new->br_startoff, new->br_startblock, 2795 right.br_startoff = new->br_startoff;
2999 new->br_blockcount + right.br_blockcount, 2796 right.br_startblock = new->br_startblock;
3000 right.br_state); 2797 right.br_blockcount += new->br_blockcount;
3001 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); 2798 xfs_iext_update_extent(ip, state, icur, &right);
3002 2799
3003 if (cur == NULL) { 2800 if (cur == NULL) {
3004 rval = xfs_ilog_fext(whichfork); 2801 rval = xfs_ilog_fext(whichfork);
3005 } else { 2802 } else {
3006 rval = 0; 2803 rval = 0;
3007 error = xfs_bmbt_lookup_eq(cur, 2804 error = xfs_bmbt_lookup_eq(cur, &old, &i);
3008 right.br_startoff,
3009 right.br_startblock,
3010 right.br_blockcount, &i);
3011 if (error) 2805 if (error)
3012 goto done; 2806 goto done;
3013 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2807 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3014 error = xfs_bmbt_update(cur, new->br_startoff, 2808 error = xfs_bmbt_update(cur, &right);
3015 new->br_startblock,
3016 new->br_blockcount +
3017 right.br_blockcount,
3018 right.br_state);
3019 if (error) 2809 if (error)
3020 goto done; 2810 goto done;
3021 } 2811 }
@@ -3027,21 +2817,17 @@ xfs_bmap_add_extent_hole_real(
3027 * real allocation. 2817 * real allocation.
3028 * Insert a new entry. 2818 * Insert a new entry.
3029 */ 2819 */
3030 xfs_iext_insert(ip, *idx, 1, new, state); 2820 xfs_iext_insert(ip, icur, new, state);
3031 XFS_IFORK_NEXT_SET(ip, whichfork, 2821 XFS_IFORK_NEXT_SET(ip, whichfork,
3032 XFS_IFORK_NEXTENTS(ip, whichfork) + 1); 2822 XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
3033 if (cur == NULL) { 2823 if (cur == NULL) {
3034 rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); 2824 rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
3035 } else { 2825 } else {
3036 rval = XFS_ILOG_CORE; 2826 rval = XFS_ILOG_CORE;
3037 error = xfs_bmbt_lookup_eq(cur, 2827 error = xfs_bmbt_lookup_eq(cur, new, &i);
3038 new->br_startoff,
3039 new->br_startblock,
3040 new->br_blockcount, &i);
3041 if (error) 2828 if (error)
3042 goto done; 2829 goto done;
3043 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); 2830 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
3044 cur->bc_rec.b.br_state = new->br_state;
3045 error = xfs_btree_insert(cur, &i); 2831 error = xfs_btree_insert(cur, &i);
3046 if (error) 2832 if (error)
3047 goto done; 2833 goto done;
@@ -3981,7 +3767,7 @@ xfs_bmapi_read(
3981 struct xfs_bmbt_irec got; 3767 struct xfs_bmbt_irec got;
3982 xfs_fileoff_t obno; 3768 xfs_fileoff_t obno;
3983 xfs_fileoff_t end; 3769 xfs_fileoff_t end;
3984 xfs_extnum_t idx; 3770 struct xfs_iext_cursor icur;
3985 int error; 3771 int error;
3986 bool eof = false; 3772 bool eof = false;
3987 int n = 0; 3773 int n = 0;
@@ -4023,7 +3809,7 @@ xfs_bmapi_read(
4023 return error; 3809 return error;
4024 } 3810 }
4025 3811
4026 if (!xfs_iext_lookup_extent(ip, ifp, bno, &idx, &got)) 3812 if (!xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got))
4027 eof = true; 3813 eof = true;
4028 end = bno + len; 3814 end = bno + len;
4029 obno = bno; 3815 obno = bno;
@@ -4055,7 +3841,7 @@ xfs_bmapi_read(
4055 break; 3841 break;
4056 3842
4057 /* Else go on to the next record. */ 3843 /* Else go on to the next record. */
4058 if (!xfs_iext_get_extent(ifp, ++idx, &got)) 3844 if (!xfs_iext_next_extent(ifp, &icur, &got))
4059 eof = true; 3845 eof = true;
4060 } 3846 }
4061 *nmap = n; 3847 *nmap = n;
@@ -4083,7 +3869,7 @@ xfs_bmapi_reserve_delalloc(
4083 xfs_filblks_t len, 3869 xfs_filblks_t len,
4084 xfs_filblks_t prealloc, 3870 xfs_filblks_t prealloc,
4085 struct xfs_bmbt_irec *got, 3871 struct xfs_bmbt_irec *got,
4086 xfs_extnum_t *lastx, 3872 struct xfs_iext_cursor *icur,
4087 int eof) 3873 int eof)
4088{ 3874{
4089 struct xfs_mount *mp = ip->i_mount; 3875 struct xfs_mount *mp = ip->i_mount;
@@ -4113,7 +3899,7 @@ xfs_bmapi_reserve_delalloc(
4113 if (extsz) { 3899 if (extsz) {
4114 struct xfs_bmbt_irec prev; 3900 struct xfs_bmbt_irec prev;
4115 3901
4116 if (!xfs_iext_get_extent(ifp, *lastx - 1, &prev)) 3902 if (!xfs_iext_peek_prev_extent(ifp, icur, &prev))
4117 prev.br_startoff = NULLFILEOFF; 3903 prev.br_startoff = NULLFILEOFF;
4118 3904
4119 error = xfs_bmap_extsize_align(mp, got, &prev, extsz, rt, eof, 3905 error = xfs_bmap_extsize_align(mp, got, &prev, extsz, rt, eof,
@@ -4162,7 +3948,7 @@ xfs_bmapi_reserve_delalloc(
4162 got->br_blockcount = alen; 3948 got->br_blockcount = alen;
4163 got->br_state = XFS_EXT_NORM; 3949 got->br_state = XFS_EXT_NORM;
4164 3950
4165 xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got); 3951 xfs_bmap_add_extent_hole_delay(ip, whichfork, icur, got);
4166 3952
4167 /* 3953 /*
4168 * Tag the inode if blocks were preallocated. Note that COW fork 3954 * Tag the inode if blocks were preallocated. Note that COW fork
@@ -4207,10 +3993,7 @@ xfs_bmapi_allocate(
4207 if (bma->wasdel) { 3993 if (bma->wasdel) {
4208 bma->length = (xfs_extlen_t)bma->got.br_blockcount; 3994 bma->length = (xfs_extlen_t)bma->got.br_blockcount;
4209 bma->offset = bma->got.br_startoff; 3995 bma->offset = bma->got.br_startoff;
4210 if (bma->idx) { 3996 xfs_iext_peek_prev_extent(ifp, &bma->icur, &bma->prev);
4211 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1),
4212 &bma->prev);
4213 }
4214 } else { 3997 } else {
4215 bma->length = XFS_FILBLKS_MIN(bma->length, MAXEXTLEN); 3998 bma->length = XFS_FILBLKS_MIN(bma->length, MAXEXTLEN);
4216 if (!bma->eof) 3999 if (!bma->eof)
@@ -4295,7 +4078,7 @@ xfs_bmapi_allocate(
4295 error = xfs_bmap_add_extent_delay_real(bma, whichfork); 4078 error = xfs_bmap_add_extent_delay_real(bma, whichfork);
4296 else 4079 else
4297 error = xfs_bmap_add_extent_hole_real(bma->tp, bma->ip, 4080 error = xfs_bmap_add_extent_hole_real(bma->tp, bma->ip,
4298 whichfork, &bma->idx, &bma->cur, &bma->got, 4081 whichfork, &bma->icur, &bma->cur, &bma->got,
4299 bma->firstblock, bma->dfops, &bma->logflags); 4082 bma->firstblock, bma->dfops, &bma->logflags);
4300 4083
4301 bma->logflags |= tmp_logflags; 4084 bma->logflags |= tmp_logflags;
@@ -4307,7 +4090,7 @@ xfs_bmapi_allocate(
4307 * or xfs_bmap_add_extent_hole_real might have merged it into one of 4090 * or xfs_bmap_add_extent_hole_real might have merged it into one of
4308 * the neighbouring ones. 4091 * the neighbouring ones.
4309 */ 4092 */
4310 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &bma->got); 4093 xfs_iext_get_extent(ifp, &bma->icur, &bma->got);
4311 4094
4312 ASSERT(bma->got.br_startoff <= bma->offset); 4095 ASSERT(bma->got.br_startoff <= bma->offset);
4313 ASSERT(bma->got.br_startoff + bma->got.br_blockcount >= 4096 ASSERT(bma->got.br_startoff + bma->got.br_blockcount >=
@@ -4365,8 +4148,8 @@ xfs_bmapi_convert_unwritten(
4365 } 4148 }
4366 4149
4367 error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, whichfork, 4150 error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, whichfork,
4368 &bma->idx, &bma->cur, mval, bma->firstblock, bma->dfops, 4151 &bma->icur, &bma->cur, mval, bma->firstblock,
4369 &tmp_logflags); 4152 bma->dfops, &tmp_logflags);
4370 /* 4153 /*
4371 * Log the inode core unconditionally in the unwritten extent conversion 4154 * Log the inode core unconditionally in the unwritten extent conversion
4372 * path because the conversion might not have done so (e.g., if the 4155 * path because the conversion might not have done so (e.g., if the
@@ -4388,7 +4171,7 @@ xfs_bmapi_convert_unwritten(
4388 * xfs_bmap_add_extent_unwritten_real might have merged it into one 4171 * xfs_bmap_add_extent_unwritten_real might have merged it into one
4389 * of the neighbouring ones. 4172 * of the neighbouring ones.
4390 */ 4173 */
4391 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &bma->got); 4174 xfs_iext_get_extent(ifp, &bma->icur, &bma->got);
4392 4175
4393 /* 4176 /*
4394 * We may have combined previously unwritten space with written space, 4177 * We may have combined previously unwritten space with written space,
@@ -4507,9 +4290,9 @@ xfs_bmapi_write(
4507 end = bno + len; 4290 end = bno + len;
4508 obno = bno; 4291 obno = bno;
4509 4292
4510 if (!xfs_iext_lookup_extent(ip, ifp, bno, &bma.idx, &bma.got)) 4293 if (!xfs_iext_lookup_extent(ip, ifp, bno, &bma.icur, &bma.got))
4511 eof = true; 4294 eof = true;
4512 if (!xfs_iext_get_extent(ifp, bma.idx - 1, &bma.prev)) 4295 if (!xfs_iext_peek_prev_extent(ifp, &bma.icur, &bma.prev))
4513 bma.prev.br_startoff = NULLFILEOFF; 4296 bma.prev.br_startoff = NULLFILEOFF;
4514 bma.tp = tp; 4297 bma.tp = tp;
4515 bma.ip = ip; 4298 bma.ip = ip;
@@ -4551,7 +4334,8 @@ xfs_bmapi_write(
4551 * First, deal with the hole before the allocated space 4334 * First, deal with the hole before the allocated space
4552 * that we found, if any. 4335 * that we found, if any.
4553 */ 4336 */
4554 if (need_alloc || wasdelay) { 4337 if ((need_alloc || wasdelay) &&
4338 !(flags & XFS_BMAPI_CONVERT_ONLY)) {
4555 bma.eof = eof; 4339 bma.eof = eof;
4556 bma.conv = !!(flags & XFS_BMAPI_CONVERT); 4340 bma.conv = !!(flags & XFS_BMAPI_CONVERT);
4557 bma.wasdel = wasdelay; 4341 bma.wasdel = wasdelay;
@@ -4614,7 +4398,7 @@ xfs_bmapi_write(
4614 4398
4615 /* Else go on to the next record. */ 4399 /* Else go on to the next record. */
4616 bma.prev = bma.got; 4400 bma.prev = bma.got;
4617 if (!xfs_iext_get_extent(ifp, ++bma.idx, &bma.got)) 4401 if (!xfs_iext_next_extent(ifp, &bma.icur, &bma.got))
4618 eof = true; 4402 eof = true;
4619 } 4403 }
4620 *nmap = n; 4404 *nmap = n;
@@ -4687,7 +4471,7 @@ xfs_bmapi_remap(
4687 struct xfs_btree_cur *cur = NULL; 4471 struct xfs_btree_cur *cur = NULL;
4688 xfs_fsblock_t firstblock = NULLFSBLOCK; 4472 xfs_fsblock_t firstblock = NULLFSBLOCK;
4689 struct xfs_bmbt_irec got; 4473 struct xfs_bmbt_irec got;
4690 xfs_extnum_t idx; 4474 struct xfs_iext_cursor icur;
4691 int logflags = 0, error; 4475 int logflags = 0, error;
4692 4476
4693 ASSERT(len > 0); 4477 ASSERT(len > 0);
@@ -4711,7 +4495,7 @@ xfs_bmapi_remap(
4711 return error; 4495 return error;
4712 } 4496 }
4713 4497
4714 if (xfs_iext_lookup_extent(ip, ifp, bno, &idx, &got)) { 4498 if (xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got)) {
4715 /* make sure we only reflink into a hole. */ 4499 /* make sure we only reflink into a hole. */
4716 ASSERT(got.br_startoff > bno); 4500 ASSERT(got.br_startoff > bno);
4717 ASSERT(got.br_startoff - bno >= len); 4501 ASSERT(got.br_startoff - bno >= len);
@@ -4732,8 +4516,8 @@ xfs_bmapi_remap(
4732 got.br_blockcount = len; 4516 got.br_blockcount = len;
4733 got.br_state = XFS_EXT_NORM; 4517 got.br_state = XFS_EXT_NORM;
4734 4518
4735 error = xfs_bmap_add_extent_hole_real(tp, ip, XFS_DATA_FORK, &idx, &cur, 4519 error = xfs_bmap_add_extent_hole_real(tp, ip, XFS_DATA_FORK, &icur,
4736 &got, &firstblock, dfops, &logflags); 4520 &cur, &got, &firstblock, dfops, &logflags);
4737 if (error) 4521 if (error)
4738 goto error0; 4522 goto error0;
4739 4523
@@ -4849,7 +4633,7 @@ int
4849xfs_bmap_del_extent_delay( 4633xfs_bmap_del_extent_delay(
4850 struct xfs_inode *ip, 4634 struct xfs_inode *ip,
4851 int whichfork, 4635 int whichfork,
4852 xfs_extnum_t *idx, 4636 struct xfs_iext_cursor *icur,
4853 struct xfs_bmbt_irec *got, 4637 struct xfs_bmbt_irec *got,
4854 struct xfs_bmbt_irec *del) 4638 struct xfs_bmbt_irec *del)
4855{ 4639{
@@ -4859,7 +4643,8 @@ xfs_bmap_del_extent_delay(
4859 int64_t da_old, da_new, da_diff = 0; 4643 int64_t da_old, da_new, da_diff = 0;
4860 xfs_fileoff_t del_endoff, got_endoff; 4644 xfs_fileoff_t del_endoff, got_endoff;
4861 xfs_filblks_t got_indlen, new_indlen, stolen; 4645 xfs_filblks_t got_indlen, new_indlen, stolen;
4862 int error = 0, state = 0; 4646 int state = xfs_bmap_fork_to_state(whichfork);
4647 int error = 0;
4863 bool isrt; 4648 bool isrt;
4864 4649
4865 XFS_STATS_INC(mp, xs_del_exlist); 4650 XFS_STATS_INC(mp, xs_del_exlist);
@@ -4870,8 +4655,6 @@ xfs_bmap_del_extent_delay(
4870 da_old = startblockval(got->br_startblock); 4655 da_old = startblockval(got->br_startblock);
4871 da_new = 0; 4656 da_new = 0;
4872 4657
4873 ASSERT(*idx >= 0);
4874 ASSERT(*idx <= xfs_iext_count(ifp));
4875 ASSERT(del->br_blockcount > 0); 4658 ASSERT(del->br_blockcount > 0);
4876 ASSERT(got->br_startoff <= del->br_startoff); 4659 ASSERT(got->br_startoff <= del->br_startoff);
4877 ASSERT(got_endoff >= del_endoff); 4660 ASSERT(got_endoff >= del_endoff);
@@ -4895,46 +4678,39 @@ xfs_bmap_del_extent_delay(
4895 return error; 4678 return error;
4896 ip->i_delayed_blks -= del->br_blockcount; 4679 ip->i_delayed_blks -= del->br_blockcount;
4897 4680
4898 if (whichfork == XFS_COW_FORK)
4899 state |= BMAP_COWFORK;
4900
4901 if (got->br_startoff == del->br_startoff) 4681 if (got->br_startoff == del->br_startoff)
4902 state |= BMAP_LEFT_CONTIG; 4682 state |= BMAP_LEFT_FILLING;
4903 if (got_endoff == del_endoff) 4683 if (got_endoff == del_endoff)
4904 state |= BMAP_RIGHT_CONTIG; 4684 state |= BMAP_RIGHT_FILLING;
4905 4685
4906 switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) { 4686 switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
4907 case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: 4687 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
4908 /* 4688 /*
4909 * Matches the whole extent. Delete the entry. 4689 * Matches the whole extent. Delete the entry.
4910 */ 4690 */
4911 xfs_iext_remove(ip, *idx, 1, state); 4691 xfs_iext_remove(ip, icur, state);
4912 --*idx; 4692 xfs_iext_prev(ifp, icur);
4913 break; 4693 break;
4914 case BMAP_LEFT_CONTIG: 4694 case BMAP_LEFT_FILLING:
4915 /* 4695 /*
4916 * Deleting the first part of the extent. 4696 * Deleting the first part of the extent.
4917 */ 4697 */
4918 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
4919 got->br_startoff = del_endoff; 4698 got->br_startoff = del_endoff;
4920 got->br_blockcount -= del->br_blockcount; 4699 got->br_blockcount -= del->br_blockcount;
4921 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, 4700 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
4922 got->br_blockcount), da_old); 4701 got->br_blockcount), da_old);
4923 got->br_startblock = nullstartblock((int)da_new); 4702 got->br_startblock = nullstartblock((int)da_new);
4924 xfs_iext_update_extent(ifp, *idx, got); 4703 xfs_iext_update_extent(ip, state, icur, got);
4925 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
4926 break; 4704 break;
4927 case BMAP_RIGHT_CONTIG: 4705 case BMAP_RIGHT_FILLING:
4928 /* 4706 /*
4929 * Deleting the last part of the extent. 4707 * Deleting the last part of the extent.
4930 */ 4708 */
4931 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
4932 got->br_blockcount = got->br_blockcount - del->br_blockcount; 4709 got->br_blockcount = got->br_blockcount - del->br_blockcount;
4933 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, 4710 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
4934 got->br_blockcount), da_old); 4711 got->br_blockcount), da_old);
4935 got->br_startblock = nullstartblock((int)da_new); 4712 got->br_startblock = nullstartblock((int)da_new);
4936 xfs_iext_update_extent(ifp, *idx, got); 4713 xfs_iext_update_extent(ip, state, icur, got);
4937 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
4938 break; 4714 break;
4939 case 0: 4715 case 0:
4940 /* 4716 /*
@@ -4946,8 +4722,6 @@ xfs_bmap_del_extent_delay(
4946 * Warn if either of the new indlen reservations is zero as this 4722 * Warn if either of the new indlen reservations is zero as this
4947 * can lead to delalloc problems. 4723 * can lead to delalloc problems.
4948 */ 4724 */
4949 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
4950
4951 got->br_blockcount = del->br_startoff - got->br_startoff; 4725 got->br_blockcount = del->br_startoff - got->br_startoff;
4952 got_indlen = xfs_bmap_worst_indlen(ip, got->br_blockcount); 4726 got_indlen = xfs_bmap_worst_indlen(ip, got->br_blockcount);
4953 4727
@@ -4959,15 +4733,14 @@ xfs_bmap_del_extent_delay(
4959 del->br_blockcount); 4733 del->br_blockcount);
4960 4734
4961 got->br_startblock = nullstartblock((int)got_indlen); 4735 got->br_startblock = nullstartblock((int)got_indlen);
4962 xfs_iext_update_extent(ifp, *idx, got);
4963 trace_xfs_bmap_post_update(ip, *idx, 0, _THIS_IP_);
4964 4736
4965 new.br_startoff = del_endoff; 4737 new.br_startoff = del_endoff;
4966 new.br_state = got->br_state; 4738 new.br_state = got->br_state;
4967 new.br_startblock = nullstartblock((int)new_indlen); 4739 new.br_startblock = nullstartblock((int)new_indlen);
4968 4740
4969 ++*idx; 4741 xfs_iext_update_extent(ip, state, icur, got);
4970 xfs_iext_insert(ip, *idx, 1, &new, state); 4742 xfs_iext_next(ifp, icur);
4743 xfs_iext_insert(ip, icur, &new, state);
4971 4744
4972 da_new = got_indlen + new_indlen - stolen; 4745 da_new = got_indlen + new_indlen - stolen;
4973 del->br_blockcount -= stolen; 4746 del->br_blockcount -= stolen;
@@ -4986,7 +4759,7 @@ xfs_bmap_del_extent_delay(
4986void 4759void
4987xfs_bmap_del_extent_cow( 4760xfs_bmap_del_extent_cow(
4988 struct xfs_inode *ip, 4761 struct xfs_inode *ip,
4989 xfs_extnum_t *idx, 4762 struct xfs_iext_cursor *icur,
4990 struct xfs_bmbt_irec *got, 4763 struct xfs_bmbt_irec *got,
4991 struct xfs_bmbt_irec *del) 4764 struct xfs_bmbt_irec *del)
4992{ 4765{
@@ -5001,75 +4774,67 @@ xfs_bmap_del_extent_cow(
5001 del_endoff = del->br_startoff + del->br_blockcount; 4774 del_endoff = del->br_startoff + del->br_blockcount;
5002 got_endoff = got->br_startoff + got->br_blockcount; 4775 got_endoff = got->br_startoff + got->br_blockcount;
5003 4776
5004 ASSERT(*idx >= 0);
5005 ASSERT(*idx <= xfs_iext_count(ifp));
5006 ASSERT(del->br_blockcount > 0); 4777 ASSERT(del->br_blockcount > 0);
5007 ASSERT(got->br_startoff <= del->br_startoff); 4778 ASSERT(got->br_startoff <= del->br_startoff);
5008 ASSERT(got_endoff >= del_endoff); 4779 ASSERT(got_endoff >= del_endoff);
5009 ASSERT(!isnullstartblock(got->br_startblock)); 4780 ASSERT(!isnullstartblock(got->br_startblock));
5010 4781
5011 if (got->br_startoff == del->br_startoff) 4782 if (got->br_startoff == del->br_startoff)
5012 state |= BMAP_LEFT_CONTIG; 4783 state |= BMAP_LEFT_FILLING;
5013 if (got_endoff == del_endoff) 4784 if (got_endoff == del_endoff)
5014 state |= BMAP_RIGHT_CONTIG; 4785 state |= BMAP_RIGHT_FILLING;
5015 4786
5016 switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) { 4787 switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
5017 case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: 4788 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
5018 /* 4789 /*
5019 * Matches the whole extent. Delete the entry. 4790 * Matches the whole extent. Delete the entry.
5020 */ 4791 */
5021 xfs_iext_remove(ip, *idx, 1, state); 4792 xfs_iext_remove(ip, icur, state);
5022 --*idx; 4793 xfs_iext_prev(ifp, icur);
5023 break; 4794 break;
5024 case BMAP_LEFT_CONTIG: 4795 case BMAP_LEFT_FILLING:
5025 /* 4796 /*
5026 * Deleting the first part of the extent. 4797 * Deleting the first part of the extent.
5027 */ 4798 */
5028 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
5029 got->br_startoff = del_endoff; 4799 got->br_startoff = del_endoff;
5030 got->br_blockcount -= del->br_blockcount; 4800 got->br_blockcount -= del->br_blockcount;
5031 got->br_startblock = del->br_startblock + del->br_blockcount; 4801 got->br_startblock = del->br_startblock + del->br_blockcount;
5032 xfs_iext_update_extent(ifp, *idx, got); 4802 xfs_iext_update_extent(ip, state, icur, got);
5033 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
5034 break; 4803 break;
5035 case BMAP_RIGHT_CONTIG: 4804 case BMAP_RIGHT_FILLING:
5036 /* 4805 /*
5037 * Deleting the last part of the extent. 4806 * Deleting the last part of the extent.
5038 */ 4807 */
5039 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
5040 got->br_blockcount -= del->br_blockcount; 4808 got->br_blockcount -= del->br_blockcount;
5041 xfs_iext_update_extent(ifp, *idx, got); 4809 xfs_iext_update_extent(ip, state, icur, got);
5042 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
5043 break; 4810 break;
5044 case 0: 4811 case 0:
5045 /* 4812 /*
5046 * Deleting the middle of the extent. 4813 * Deleting the middle of the extent.
5047 */ 4814 */
5048 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
5049 got->br_blockcount = del->br_startoff - got->br_startoff; 4815 got->br_blockcount = del->br_startoff - got->br_startoff;
5050 xfs_iext_update_extent(ifp, *idx, got);
5051 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
5052 4816
5053 new.br_startoff = del_endoff; 4817 new.br_startoff = del_endoff;
5054 new.br_blockcount = got_endoff - del_endoff; 4818 new.br_blockcount = got_endoff - del_endoff;
5055 new.br_state = got->br_state; 4819 new.br_state = got->br_state;
5056 new.br_startblock = del->br_startblock + del->br_blockcount; 4820 new.br_startblock = del->br_startblock + del->br_blockcount;
5057 4821
5058 ++*idx; 4822 xfs_iext_update_extent(ip, state, icur, got);
5059 xfs_iext_insert(ip, *idx, 1, &new, state); 4823 xfs_iext_next(ifp, icur);
4824 xfs_iext_insert(ip, icur, &new, state);
5060 break; 4825 break;
5061 } 4826 }
5062} 4827}
5063 4828
5064/* 4829/*
5065 * Called by xfs_bmapi to update file extent records and the btree 4830 * Called by xfs_bmapi to update file extent records and the btree
5066 * after removing space (or undoing a delayed allocation). 4831 * after removing space.
5067 */ 4832 */
5068STATIC int /* error */ 4833STATIC int /* error */
5069xfs_bmap_del_extent( 4834xfs_bmap_del_extent_real(
5070 xfs_inode_t *ip, /* incore inode pointer */ 4835 xfs_inode_t *ip, /* incore inode pointer */
5071 xfs_trans_t *tp, /* current transaction pointer */ 4836 xfs_trans_t *tp, /* current transaction pointer */
5072 xfs_extnum_t *idx, /* extent number to update/delete */ 4837 struct xfs_iext_cursor *icur,
5073 struct xfs_defer_ops *dfops, /* list of extents to be freed */ 4838 struct xfs_defer_ops *dfops, /* list of extents to be freed */
5074 xfs_btree_cur_t *cur, /* if null, not a btree */ 4839 xfs_btree_cur_t *cur, /* if null, not a btree */
5075 xfs_bmbt_irec_t *del, /* data to remove from extents */ 4840 xfs_bmbt_irec_t *del, /* data to remove from extents */
@@ -5077,16 +4842,12 @@ xfs_bmap_del_extent(
5077 int whichfork, /* data or attr fork */ 4842 int whichfork, /* data or attr fork */
5078 int bflags) /* bmapi flags */ 4843 int bflags) /* bmapi flags */
5079{ 4844{
5080 xfs_filblks_t da_new; /* new delay-alloc indirect blocks */
5081 xfs_filblks_t da_old; /* old delay-alloc indirect blocks */
5082 xfs_fsblock_t del_endblock=0; /* first block past del */ 4845 xfs_fsblock_t del_endblock=0; /* first block past del */
5083 xfs_fileoff_t del_endoff; /* first offset past del */ 4846 xfs_fileoff_t del_endoff; /* first offset past del */
5084 int delay; /* current block is delayed allocated */
5085 int do_fx; /* free extent at end of routine */ 4847 int do_fx; /* free extent at end of routine */
5086 xfs_bmbt_rec_host_t *ep; /* current extent entry pointer */
5087 int error; /* error return value */ 4848 int error; /* error return value */
5088 int flags; /* inode logging flags */ 4849 int flags = 0;/* inode logging flags */
5089 xfs_bmbt_irec_t got; /* current extent entry */ 4850 struct xfs_bmbt_irec got; /* current extent entry */
5090 xfs_fileoff_t got_endoff; /* first offset past got */ 4851 xfs_fileoff_t got_endoff; /* first offset past got */
5091 int i; /* temp state */ 4852 int i; /* temp state */
5092 xfs_ifork_t *ifp; /* inode fork pointer */ 4853 xfs_ifork_t *ifp; /* inode fork pointer */
@@ -5095,103 +4856,81 @@ xfs_bmap_del_extent(
5095 xfs_bmbt_irec_t new; /* new record to be inserted */ 4856 xfs_bmbt_irec_t new; /* new record to be inserted */
5096 /* REFERENCED */ 4857 /* REFERENCED */
5097 uint qfield; /* quota field to update */ 4858 uint qfield; /* quota field to update */
5098 xfs_filblks_t temp; /* for indirect length calculations */ 4859 int state = xfs_bmap_fork_to_state(whichfork);
5099 xfs_filblks_t temp2; /* for indirect length calculations */ 4860 struct xfs_bmbt_irec old;
5100 int state = 0;
5101 4861
5102 mp = ip->i_mount; 4862 mp = ip->i_mount;
5103 XFS_STATS_INC(mp, xs_del_exlist); 4863 XFS_STATS_INC(mp, xs_del_exlist);
5104 4864
5105 if (whichfork == XFS_ATTR_FORK)
5106 state |= BMAP_ATTRFORK;
5107 else if (whichfork == XFS_COW_FORK)
5108 state |= BMAP_COWFORK;
5109
5110 ifp = XFS_IFORK_PTR(ip, whichfork); 4865 ifp = XFS_IFORK_PTR(ip, whichfork);
5111 ASSERT((*idx >= 0) && (*idx < xfs_iext_count(ifp)));
5112 ASSERT(del->br_blockcount > 0); 4866 ASSERT(del->br_blockcount > 0);
5113 ep = xfs_iext_get_ext(ifp, *idx); 4867 xfs_iext_get_extent(ifp, icur, &got);
5114 xfs_bmbt_get_all(ep, &got);
5115 ASSERT(got.br_startoff <= del->br_startoff); 4868 ASSERT(got.br_startoff <= del->br_startoff);
5116 del_endoff = del->br_startoff + del->br_blockcount; 4869 del_endoff = del->br_startoff + del->br_blockcount;
5117 got_endoff = got.br_startoff + got.br_blockcount; 4870 got_endoff = got.br_startoff + got.br_blockcount;
5118 ASSERT(got_endoff >= del_endoff); 4871 ASSERT(got_endoff >= del_endoff);
5119 delay = isnullstartblock(got.br_startblock); 4872 ASSERT(!isnullstartblock(got.br_startblock));
5120 ASSERT(isnullstartblock(del->br_startblock) == delay);
5121 flags = 0;
5122 qfield = 0; 4873 qfield = 0;
5123 error = 0; 4874 error = 0;
4875
5124 /* 4876 /*
5125 * If deleting a real allocation, must free up the disk space. 4877 * If it's the case where the directory code is running with no block
4878 * reservation, and the deleted block is in the middle of its extent,
4879 * and the resulting insert of an extent would cause transformation to
4880 * btree format, then reject it. The calling code will then swap blocks
4881 * around instead. We have to do this now, rather than waiting for the
4882 * conversion to btree format, since the transaction will be dirty then.
5126 */ 4883 */
5127 if (!delay) { 4884 if (tp->t_blk_res == 0 &&
5128 flags = XFS_ILOG_CORE; 4885 XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
5129 /* 4886 XFS_IFORK_NEXTENTS(ip, whichfork) >=
5130 * Realtime allocation. Free it and record di_nblocks update. 4887 XFS_IFORK_MAXEXT(ip, whichfork) &&
5131 */ 4888 del->br_startoff > got.br_startoff && del_endoff < got_endoff)
5132 if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) { 4889 return -ENOSPC;
5133 xfs_fsblock_t bno; 4890
5134 xfs_filblks_t len; 4891 flags = XFS_ILOG_CORE;
5135 4892 if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) {
5136 ASSERT(do_mod(del->br_blockcount, 4893 xfs_fsblock_t bno;
5137 mp->m_sb.sb_rextsize) == 0); 4894 xfs_filblks_t len;
5138 ASSERT(do_mod(del->br_startblock, 4895
5139 mp->m_sb.sb_rextsize) == 0); 4896 ASSERT(do_mod(del->br_blockcount, mp->m_sb.sb_rextsize) == 0);
5140 bno = del->br_startblock; 4897 ASSERT(do_mod(del->br_startblock, mp->m_sb.sb_rextsize) == 0);
5141 len = del->br_blockcount; 4898 bno = del->br_startblock;
5142 do_div(bno, mp->m_sb.sb_rextsize); 4899 len = del->br_blockcount;
5143 do_div(len, mp->m_sb.sb_rextsize); 4900 do_div(bno, mp->m_sb.sb_rextsize);
5144 error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len); 4901 do_div(len, mp->m_sb.sb_rextsize);
5145 if (error) 4902 error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len);
5146 goto done; 4903 if (error)
5147 do_fx = 0; 4904 goto done;
5148 nblks = len * mp->m_sb.sb_rextsize;
5149 qfield = XFS_TRANS_DQ_RTBCOUNT;
5150 }
5151 /*
5152 * Ordinary allocation.
5153 */
5154 else {
5155 do_fx = 1;
5156 nblks = del->br_blockcount;
5157 qfield = XFS_TRANS_DQ_BCOUNT;
5158 }
5159 /*
5160 * Set up del_endblock and cur for later.
5161 */
5162 del_endblock = del->br_startblock + del->br_blockcount;
5163 if (cur) {
5164 if ((error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
5165 got.br_startblock, got.br_blockcount,
5166 &i)))
5167 goto done;
5168 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
5169 }
5170 da_old = da_new = 0;
5171 } else {
5172 da_old = startblockval(got.br_startblock);
5173 da_new = 0;
5174 nblks = 0;
5175 do_fx = 0; 4905 do_fx = 0;
4906 nblks = len * mp->m_sb.sb_rextsize;
4907 qfield = XFS_TRANS_DQ_RTBCOUNT;
4908 } else {
4909 do_fx = 1;
4910 nblks = del->br_blockcount;
4911 qfield = XFS_TRANS_DQ_BCOUNT;
5176 } 4912 }
5177 4913
5178 /* 4914 del_endblock = del->br_startblock + del->br_blockcount;
5179 * Set flag value to use in switch statement. 4915 if (cur) {
5180 * Left-contig is 2, right-contig is 1. 4916 error = xfs_bmbt_lookup_eq(cur, &got, &i);
5181 */ 4917 if (error)
5182 switch (((got.br_startoff == del->br_startoff) << 1) | 4918 goto done;
5183 (got_endoff == del_endoff)) { 4919 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
5184 case 3: 4920 }
4921
4922 if (got.br_startoff == del->br_startoff)
4923 state |= BMAP_LEFT_FILLING;
4924 if (got_endoff == del_endoff)
4925 state |= BMAP_RIGHT_FILLING;
4926
4927 switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
4928 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
5185 /* 4929 /*
5186 * Matches the whole extent. Delete the entry. 4930 * Matches the whole extent. Delete the entry.
5187 */ 4931 */
5188 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); 4932 xfs_iext_remove(ip, icur, state);
5189 xfs_iext_remove(ip, *idx, 1, 4933 xfs_iext_prev(ifp, icur);
5190 whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0);
5191 --*idx;
5192 if (delay)
5193 break;
5194
5195 XFS_IFORK_NEXT_SET(ip, whichfork, 4934 XFS_IFORK_NEXT_SET(ip, whichfork,
5196 XFS_IFORK_NEXTENTS(ip, whichfork) - 1); 4935 XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
5197 flags |= XFS_ILOG_CORE; 4936 flags |= XFS_ILOG_CORE;
@@ -5203,168 +4942,106 @@ xfs_bmap_del_extent(
5203 goto done; 4942 goto done;
5204 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 4943 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
5205 break; 4944 break;
5206 4945 case BMAP_LEFT_FILLING:
5207 case 2:
5208 /* 4946 /*
5209 * Deleting the first part of the extent. 4947 * Deleting the first part of the extent.
5210 */ 4948 */
5211 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); 4949 got.br_startoff = del_endoff;
5212 xfs_bmbt_set_startoff(ep, del_endoff); 4950 got.br_startblock = del_endblock;
5213 temp = got.br_blockcount - del->br_blockcount; 4951 got.br_blockcount -= del->br_blockcount;
5214 xfs_bmbt_set_blockcount(ep, temp); 4952 xfs_iext_update_extent(ip, state, icur, &got);
5215 if (delay) {
5216 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
5217 da_old);
5218 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
5219 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
5220 da_new = temp;
5221 break;
5222 }
5223 xfs_bmbt_set_startblock(ep, del_endblock);
5224 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
5225 if (!cur) { 4953 if (!cur) {
5226 flags |= xfs_ilog_fext(whichfork); 4954 flags |= xfs_ilog_fext(whichfork);
5227 break; 4955 break;
5228 } 4956 }
5229 if ((error = xfs_bmbt_update(cur, del_endoff, del_endblock, 4957 error = xfs_bmbt_update(cur, &got);
5230 got.br_blockcount - del->br_blockcount, 4958 if (error)
5231 got.br_state)))
5232 goto done; 4959 goto done;
5233 break; 4960 break;
5234 4961 case BMAP_RIGHT_FILLING:
5235 case 1:
5236 /* 4962 /*
5237 * Deleting the last part of the extent. 4963 * Deleting the last part of the extent.
5238 */ 4964 */
5239 temp = got.br_blockcount - del->br_blockcount; 4965 got.br_blockcount -= del->br_blockcount;
5240 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); 4966 xfs_iext_update_extent(ip, state, icur, &got);
5241 xfs_bmbt_set_blockcount(ep, temp);
5242 if (delay) {
5243 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
5244 da_old);
5245 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
5246 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
5247 da_new = temp;
5248 break;
5249 }
5250 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
5251 if (!cur) { 4967 if (!cur) {
5252 flags |= xfs_ilog_fext(whichfork); 4968 flags |= xfs_ilog_fext(whichfork);
5253 break; 4969 break;
5254 } 4970 }
5255 if ((error = xfs_bmbt_update(cur, got.br_startoff, 4971 error = xfs_bmbt_update(cur, &got);
5256 got.br_startblock, 4972 if (error)
5257 got.br_blockcount - del->br_blockcount,
5258 got.br_state)))
5259 goto done; 4973 goto done;
5260 break; 4974 break;
5261
5262 case 0: 4975 case 0:
5263 /* 4976 /*
5264 * Deleting the middle of the extent. 4977 * Deleting the middle of the extent.
5265 */ 4978 */
5266 temp = del->br_startoff - got.br_startoff; 4979 old = got;
5267 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); 4980
5268 xfs_bmbt_set_blockcount(ep, temp); 4981 got.br_blockcount = del->br_startoff - got.br_startoff;
4982 xfs_iext_update_extent(ip, state, icur, &got);
4983
5269 new.br_startoff = del_endoff; 4984 new.br_startoff = del_endoff;
5270 temp2 = got_endoff - del_endoff; 4985 new.br_blockcount = got_endoff - del_endoff;
5271 new.br_blockcount = temp2;
5272 new.br_state = got.br_state; 4986 new.br_state = got.br_state;
5273 if (!delay) { 4987 new.br_startblock = del_endblock;
5274 new.br_startblock = del_endblock; 4988
5275 flags |= XFS_ILOG_CORE; 4989 flags |= XFS_ILOG_CORE;
5276 if (cur) { 4990 if (cur) {
5277 if ((error = xfs_bmbt_update(cur, 4991 error = xfs_bmbt_update(cur, &got);
5278 got.br_startoff, 4992 if (error)
5279 got.br_startblock, temp, 4993 goto done;
5280 got.br_state))) 4994 error = xfs_btree_increment(cur, 0, &i);
5281 goto done; 4995 if (error)
5282 if ((error = xfs_btree_increment(cur, 0, &i))) 4996 goto done;
5283 goto done; 4997 cur->bc_rec.b = new;
5284 cur->bc_rec.b = new; 4998 error = xfs_btree_insert(cur, &i);
5285 error = xfs_btree_insert(cur, &i); 4999 if (error && error != -ENOSPC)
5286 if (error && error != -ENOSPC) 5000 goto done;
5287 goto done; 5001 /*
5002 * If get no-space back from btree insert, it tried a
5003 * split, and we have a zero block reservation. Fix up
5004 * our state and return the error.
5005 */
5006 if (error == -ENOSPC) {
5288 /* 5007 /*
5289 * If get no-space back from btree insert, 5008 * Reset the cursor, don't trust it after any
5290 * it tried a split, and we have a zero 5009 * insert operation.
5291 * block reservation.
5292 * Fix up our state and return the error.
5293 */ 5010 */
5294 if (error == -ENOSPC) { 5011 error = xfs_bmbt_lookup_eq(cur, &got, &i);
5295 /* 5012 if (error)
5296 * Reset the cursor, don't trust
5297 * it after any insert operation.
5298 */
5299 if ((error = xfs_bmbt_lookup_eq(cur,
5300 got.br_startoff,
5301 got.br_startblock,
5302 temp, &i)))
5303 goto done;
5304 XFS_WANT_CORRUPTED_GOTO(mp,
5305 i == 1, done);
5306 /*
5307 * Update the btree record back
5308 * to the original value.
5309 */
5310 if ((error = xfs_bmbt_update(cur,
5311 got.br_startoff,
5312 got.br_startblock,
5313 got.br_blockcount,
5314 got.br_state)))
5315 goto done;
5316 /*
5317 * Reset the extent record back
5318 * to the original value.
5319 */
5320 xfs_bmbt_set_blockcount(ep,
5321 got.br_blockcount);
5322 flags = 0;
5323 error = -ENOSPC;
5324 goto done; 5013 goto done;
5325 }
5326 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 5014 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
5327 } else 5015 /*
5328 flags |= xfs_ilog_fext(whichfork); 5016 * Update the btree record back
5329 XFS_IFORK_NEXT_SET(ip, whichfork, 5017 * to the original value.
5330 XFS_IFORK_NEXTENTS(ip, whichfork) + 1); 5018 */
5331 } else { 5019 error = xfs_bmbt_update(cur, &old);
5332 xfs_filblks_t stolen; 5020 if (error)
5333 ASSERT(whichfork == XFS_DATA_FORK); 5021 goto done;
5334 5022 /*
5335 /* 5023 * Reset the extent record back
5336 * Distribute the original indlen reservation across the 5024 * to the original value.
5337 * two new extents. Steal blocks from the deleted extent 5025 */
5338 * if necessary. Stealing blocks simply fudges the 5026 xfs_iext_update_extent(ip, state, icur, &old);
5339 * fdblocks accounting in xfs_bunmapi(). 5027 flags = 0;
5340 */ 5028 error = -ENOSPC;
5341 temp = xfs_bmap_worst_indlen(ip, got.br_blockcount); 5029 goto done;
5342 temp2 = xfs_bmap_worst_indlen(ip, new.br_blockcount); 5030 }
5343 stolen = xfs_bmap_split_indlen(da_old, &temp, &temp2, 5031 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
5344 del->br_blockcount); 5032 } else
5345 da_new = temp + temp2 - stolen; 5033 flags |= xfs_ilog_fext(whichfork);
5346 del->br_blockcount -= stolen; 5034 XFS_IFORK_NEXT_SET(ip, whichfork,
5347 5035 XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
5348 /* 5036 xfs_iext_next(ifp, icur);
5349 * Set the reservation for each extent. Warn if either 5037 xfs_iext_insert(ip, icur, &new, state);
5350 * is zero as this can lead to delalloc problems.
5351 */
5352 WARN_ON_ONCE(!temp || !temp2);
5353 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
5354 new.br_startblock = nullstartblock((int)temp2);
5355 }
5356 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
5357 xfs_iext_insert(ip, *idx + 1, 1, &new, state);
5358 ++*idx;
5359 break; 5038 break;
5360 } 5039 }
5361 5040
5362 /* remove reverse mapping */ 5041 /* remove reverse mapping */
5363 if (!delay) { 5042 error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, del);
5364 error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, del); 5043 if (error)
5365 if (error) 5044 goto done;
5366 goto done;
5367 }
5368 5045
5369 /* 5046 /*
5370 * If we need to, add to list of extents to delete. 5047 * If we need to, add to list of extents to delete.
@@ -5390,13 +5067,6 @@ xfs_bmap_del_extent(
5390 if (qfield && !(bflags & XFS_BMAPI_REMAP)) 5067 if (qfield && !(bflags & XFS_BMAPI_REMAP))
5391 xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks); 5068 xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks);
5392 5069
5393 /*
5394 * Account for change in delayed indirect blocks.
5395 * Nothing to do for disk quota accounting here.
5396 */
5397 ASSERT(da_old >= da_new);
5398 if (da_old > da_new)
5399 xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new), false);
5400done: 5070done:
5401 *logflagsp = flags; 5071 *logflagsp = flags;
5402 return error; 5072 return error;
@@ -5412,7 +5082,7 @@ int /* error */
5412__xfs_bunmapi( 5082__xfs_bunmapi(
5413 xfs_trans_t *tp, /* transaction pointer */ 5083 xfs_trans_t *tp, /* transaction pointer */
5414 struct xfs_inode *ip, /* incore inode */ 5084 struct xfs_inode *ip, /* incore inode */
5415 xfs_fileoff_t bno, /* starting offset to unmap */ 5085 xfs_fileoff_t start, /* first file offset deleted */
5416 xfs_filblks_t *rlen, /* i/o: amount remaining */ 5086 xfs_filblks_t *rlen, /* i/o: amount remaining */
5417 int flags, /* misc flags */ 5087 int flags, /* misc flags */
5418 xfs_extnum_t nexts, /* number of extents max */ 5088 xfs_extnum_t nexts, /* number of extents max */
@@ -5427,11 +5097,9 @@ __xfs_bunmapi(
5427 xfs_bmbt_irec_t got; /* current extent record */ 5097 xfs_bmbt_irec_t got; /* current extent record */
5428 xfs_ifork_t *ifp; /* inode fork pointer */ 5098 xfs_ifork_t *ifp; /* inode fork pointer */
5429 int isrt; /* freeing in rt area */ 5099 int isrt; /* freeing in rt area */
5430 xfs_extnum_t lastx; /* last extent index used */
5431 int logflags; /* transaction logging flags */ 5100 int logflags; /* transaction logging flags */
5432 xfs_extlen_t mod; /* rt extent offset */ 5101 xfs_extlen_t mod; /* rt extent offset */
5433 xfs_mount_t *mp; /* mount structure */ 5102 xfs_mount_t *mp; /* mount structure */
5434 xfs_fileoff_t start; /* first file offset deleted */
5435 int tmp_logflags; /* partial logging flags */ 5103 int tmp_logflags; /* partial logging flags */
5436 int wasdel; /* was a delayed alloc extent */ 5104 int wasdel; /* was a delayed alloc extent */
5437 int whichfork; /* data or attribute fork */ 5105 int whichfork; /* data or attribute fork */
@@ -5439,8 +5107,11 @@ __xfs_bunmapi(
5439 xfs_filblks_t len = *rlen; /* length to unmap in file */ 5107 xfs_filblks_t len = *rlen; /* length to unmap in file */
5440 xfs_fileoff_t max_len; 5108 xfs_fileoff_t max_len;
5441 xfs_agnumber_t prev_agno = NULLAGNUMBER, agno; 5109 xfs_agnumber_t prev_agno = NULLAGNUMBER, agno;
5110 xfs_fileoff_t end;
5111 struct xfs_iext_cursor icur;
5112 bool done = false;
5442 5113
5443 trace_xfs_bunmap(ip, bno, len, flags, _RET_IP_); 5114 trace_xfs_bunmap(ip, start, len, flags, _RET_IP_);
5444 5115
5445 whichfork = xfs_bmapi_whichfork(flags); 5116 whichfork = xfs_bmapi_whichfork(flags);
5446 ASSERT(whichfork != XFS_COW_FORK); 5117 ASSERT(whichfork != XFS_COW_FORK);
@@ -5479,18 +5150,13 @@ __xfs_bunmapi(
5479 } 5150 }
5480 XFS_STATS_INC(mp, xs_blk_unmap); 5151 XFS_STATS_INC(mp, xs_blk_unmap);
5481 isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip); 5152 isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
5482 start = bno; 5153 end = start + len;
5483 bno = start + len - 1;
5484 5154
5485 /* 5155 if (!xfs_iext_lookup_extent_before(ip, ifp, &end, &icur, &got)) {
5486 * Check to see if the given block number is past the end of the 5156 *rlen = 0;
5487 * file, back up to the last block if so... 5157 return 0;
5488 */
5489 if (!xfs_iext_lookup_extent(ip, ifp, bno, &lastx, &got)) {
5490 ASSERT(lastx > 0);
5491 xfs_iext_get_extent(ifp, --lastx, &got);
5492 bno = got.br_startoff + got.br_blockcount - 1;
5493 } 5158 }
5159 end--;
5494 5160
5495 logflags = 0; 5161 logflags = 0;
5496 if (ifp->if_flags & XFS_IFBROOT) { 5162 if (ifp->if_flags & XFS_IFBROOT) {
@@ -5513,24 +5179,24 @@ __xfs_bunmapi(
5513 } 5179 }
5514 5180
5515 extno = 0; 5181 extno = 0;
5516 while (bno != (xfs_fileoff_t)-1 && bno >= start && lastx >= 0 && 5182 while (end != (xfs_fileoff_t)-1 && end >= start &&
5517 (nexts == 0 || extno < nexts) && max_len > 0) { 5183 (nexts == 0 || extno < nexts) && max_len > 0) {
5518 /* 5184 /*
5519 * Is the found extent after a hole in which bno lives? 5185 * Is the found extent after a hole in which end lives?
5520 * Just back up to the previous extent, if so. 5186 * Just back up to the previous extent, if so.
5521 */ 5187 */
5522 if (got.br_startoff > bno) { 5188 if (got.br_startoff > end &&
5523 if (--lastx < 0) 5189 !xfs_iext_prev_extent(ifp, &icur, &got)) {
5524 break; 5190 done = true;
5525 xfs_iext_get_extent(ifp, lastx, &got); 5191 break;
5526 } 5192 }
5527 /* 5193 /*
5528 * Is the last block of this extent before the range 5194 * Is the last block of this extent before the range
5529 * we're supposed to delete? If so, we're done. 5195 * we're supposed to delete? If so, we're done.
5530 */ 5196 */
5531 bno = XFS_FILEOFF_MIN(bno, 5197 end = XFS_FILEOFF_MIN(end,
5532 got.br_startoff + got.br_blockcount - 1); 5198 got.br_startoff + got.br_blockcount - 1);
5533 if (bno < start) 5199 if (end < start)
5534 break; 5200 break;
5535 /* 5201 /*
5536 * Then deal with the (possibly delayed) allocated space 5202 * Then deal with the (possibly delayed) allocated space
@@ -5555,8 +5221,8 @@ __xfs_bunmapi(
5555 if (!wasdel) 5221 if (!wasdel)
5556 del.br_startblock += start - got.br_startoff; 5222 del.br_startblock += start - got.br_startoff;
5557 } 5223 }
5558 if (del.br_startoff + del.br_blockcount > bno + 1) 5224 if (del.br_startoff + del.br_blockcount > end + 1)
5559 del.br_blockcount = bno + 1 - del.br_startoff; 5225 del.br_blockcount = end + 1 - del.br_startoff;
5560 5226
5561 /* How much can we safely unmap? */ 5227 /* How much can we safely unmap? */
5562 if (max_len < del.br_blockcount) { 5228 if (max_len < del.br_blockcount) {
@@ -5582,13 +5248,13 @@ __xfs_bunmapi(
5582 * This piece is unwritten, or we're not 5248 * This piece is unwritten, or we're not
5583 * using unwritten extents. Skip over it. 5249 * using unwritten extents. Skip over it.
5584 */ 5250 */
5585 ASSERT(bno >= mod); 5251 ASSERT(end >= mod);
5586 bno -= mod > del.br_blockcount ? 5252 end -= mod > del.br_blockcount ?
5587 del.br_blockcount : mod; 5253 del.br_blockcount : mod;
5588 if (bno < got.br_startoff) { 5254 if (end < got.br_startoff &&
5589 if (--lastx >= 0) 5255 !xfs_iext_prev_extent(ifp, &icur, &got)) {
5590 xfs_bmbt_get_all(xfs_iext_get_ext( 5256 done = true;
5591 ifp, lastx), &got); 5257 break;
5592 } 5258 }
5593 continue; 5259 continue;
5594 } 5260 }
@@ -5609,7 +5275,7 @@ __xfs_bunmapi(
5609 } 5275 }
5610 del.br_state = XFS_EXT_UNWRITTEN; 5276 del.br_state = XFS_EXT_UNWRITTEN;
5611 error = xfs_bmap_add_extent_unwritten_real(tp, ip, 5277 error = xfs_bmap_add_extent_unwritten_real(tp, ip,
5612 whichfork, &lastx, &cur, &del, 5278 whichfork, &icur, &cur, &del,
5613 firstblock, dfops, &logflags); 5279 firstblock, dfops, &logflags);
5614 if (error) 5280 if (error)
5615 goto error0; 5281 goto error0;
@@ -5634,10 +5300,13 @@ __xfs_bunmapi(
5634 * Can't make it unwritten. There isn't 5300 * Can't make it unwritten. There isn't
5635 * a full extent here so just skip it. 5301 * a full extent here so just skip it.
5636 */ 5302 */
5637 ASSERT(bno >= del.br_blockcount); 5303 ASSERT(end >= del.br_blockcount);
5638 bno -= del.br_blockcount; 5304 end -= del.br_blockcount;
5639 if (got.br_startoff > bno && --lastx >= 0) 5305 if (got.br_startoff > end &&
5640 xfs_iext_get_extent(ifp, lastx, &got); 5306 !xfs_iext_prev_extent(ifp, &icur, &got)) {
5307 done = true;
5308 break;
5309 }
5641 continue; 5310 continue;
5642 } else if (del.br_state == XFS_EXT_UNWRITTEN) { 5311 } else if (del.br_state == XFS_EXT_UNWRITTEN) {
5643 struct xfs_bmbt_irec prev; 5312 struct xfs_bmbt_irec prev;
@@ -5648,8 +5317,8 @@ __xfs_bunmapi(
5648 * Unwrite the killed part of that one and 5317 * Unwrite the killed part of that one and
5649 * try again. 5318 * try again.
5650 */ 5319 */
5651 ASSERT(lastx > 0); 5320 if (!xfs_iext_prev_extent(ifp, &icur, &prev))
5652 xfs_iext_get_extent(ifp, lastx - 1, &prev); 5321 ASSERT(0);
5653 ASSERT(prev.br_state == XFS_EXT_NORM); 5322 ASSERT(prev.br_state == XFS_EXT_NORM);
5654 ASSERT(!isnullstartblock(prev.br_startblock)); 5323 ASSERT(!isnullstartblock(prev.br_startblock));
5655 ASSERT(del.br_startblock == 5324 ASSERT(del.br_startblock ==
@@ -5661,9 +5330,8 @@ __xfs_bunmapi(
5661 prev.br_startoff = start; 5330 prev.br_startoff = start;
5662 } 5331 }
5663 prev.br_state = XFS_EXT_UNWRITTEN; 5332 prev.br_state = XFS_EXT_UNWRITTEN;
5664 lastx--;
5665 error = xfs_bmap_add_extent_unwritten_real(tp, 5333 error = xfs_bmap_add_extent_unwritten_real(tp,
5666 ip, whichfork, &lastx, &cur, 5334 ip, whichfork, &icur, &cur,
5667 &prev, firstblock, dfops, 5335 &prev, firstblock, dfops,
5668 &logflags); 5336 &logflags);
5669 if (error) 5337 if (error)
@@ -5673,7 +5341,7 @@ __xfs_bunmapi(
5673 ASSERT(del.br_state == XFS_EXT_NORM); 5341 ASSERT(del.br_state == XFS_EXT_NORM);
5674 del.br_state = XFS_EXT_UNWRITTEN; 5342 del.br_state = XFS_EXT_UNWRITTEN;
5675 error = xfs_bmap_add_extent_unwritten_real(tp, 5343 error = xfs_bmap_add_extent_unwritten_real(tp,
5676 ip, whichfork, &lastx, &cur, 5344 ip, whichfork, &icur, &cur,
5677 &del, firstblock, dfops, 5345 &del, firstblock, dfops,
5678 &logflags); 5346 &logflags);
5679 if (error) 5347 if (error)
@@ -5682,85 +5350,39 @@ __xfs_bunmapi(
5682 } 5350 }
5683 } 5351 }
5684 5352
5685 /* 5353 if (wasdel) {
5686 * If it's the case where the directory code is running 5354 error = xfs_bmap_del_extent_delay(ip, whichfork, &icur,
5687 * with no block reservation, and the deleted block is in 5355 &got, &del);
5688 * the middle of its extent, and the resulting insert 5356 } else {
5689 * of an extent would cause transformation to btree format, 5357 error = xfs_bmap_del_extent_real(ip, tp, &icur, dfops,
5690 * then reject it. The calling code will then swap 5358 cur, &del, &tmp_logflags, whichfork,
5691 * blocks around instead. 5359 flags);
5692 * We have to do this now, rather than waiting for the 5360 logflags |= tmp_logflags;
5693 * conversion to btree format, since the transaction
5694 * will be dirty.
5695 */
5696 if (!wasdel && tp->t_blk_res == 0 &&
5697 XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
5698 XFS_IFORK_NEXTENTS(ip, whichfork) >= /* Note the >= */
5699 XFS_IFORK_MAXEXT(ip, whichfork) &&
5700 del.br_startoff > got.br_startoff &&
5701 del.br_startoff + del.br_blockcount <
5702 got.br_startoff + got.br_blockcount) {
5703 error = -ENOSPC;
5704 goto error0;
5705 } 5361 }
5706 5362
5707 /*
5708 * Unreserve quota and update realtime free space, if
5709 * appropriate. If delayed allocation, update the inode delalloc
5710 * counter now and wait to update the sb counters as
5711 * xfs_bmap_del_extent() might need to borrow some blocks.
5712 */
5713 if (wasdel) {
5714 ASSERT(startblockval(del.br_startblock) > 0);
5715 if (isrt) {
5716 xfs_filblks_t rtexts;
5717
5718 rtexts = XFS_FSB_TO_B(mp, del.br_blockcount);
5719 do_div(rtexts, mp->m_sb.sb_rextsize);
5720 xfs_mod_frextents(mp, (int64_t)rtexts);
5721 (void)xfs_trans_reserve_quota_nblks(NULL,
5722 ip, -((long)del.br_blockcount), 0,
5723 XFS_QMOPT_RES_RTBLKS);
5724 } else {
5725 (void)xfs_trans_reserve_quota_nblks(NULL,
5726 ip, -((long)del.br_blockcount), 0,
5727 XFS_QMOPT_RES_REGBLKS);
5728 }
5729 ip->i_delayed_blks -= del.br_blockcount;
5730 if (cur)
5731 cur->bc_private.b.flags |=
5732 XFS_BTCUR_BPRV_WASDEL;
5733 } else if (cur)
5734 cur->bc_private.b.flags &= ~XFS_BTCUR_BPRV_WASDEL;
5735
5736 error = xfs_bmap_del_extent(ip, tp, &lastx, dfops, cur, &del,
5737 &tmp_logflags, whichfork, flags);
5738 logflags |= tmp_logflags;
5739 if (error) 5363 if (error)
5740 goto error0; 5364 goto error0;
5741 5365
5742 if (!isrt && wasdel)
5743 xfs_mod_fdblocks(mp, (int64_t)del.br_blockcount, false);
5744
5745 max_len -= del.br_blockcount; 5366 max_len -= del.br_blockcount;
5746 bno = del.br_startoff - 1; 5367 end = del.br_startoff - 1;
5747nodelete: 5368nodelete:
5748 /* 5369 /*
5749 * If not done go on to the next (previous) record. 5370 * If not done go on to the next (previous) record.
5750 */ 5371 */
5751 if (bno != (xfs_fileoff_t)-1 && bno >= start) { 5372 if (end != (xfs_fileoff_t)-1 && end >= start) {
5752 if (lastx >= 0) { 5373 if (!xfs_iext_get_extent(ifp, &icur, &got) ||
5753 xfs_iext_get_extent(ifp, lastx, &got); 5374 (got.br_startoff > end &&
5754 if (got.br_startoff > bno && --lastx >= 0) 5375 !xfs_iext_prev_extent(ifp, &icur, &got))) {
5755 xfs_iext_get_extent(ifp, lastx, &got); 5376 done = true;
5377 break;
5756 } 5378 }
5757 extno++; 5379 extno++;
5758 } 5380 }
5759 } 5381 }
5760 if (bno == (xfs_fileoff_t)-1 || bno < start || lastx < 0) 5382 if (done || end == (xfs_fileoff_t)-1 || end < start)
5761 *rlen = 0; 5383 *rlen = 0;
5762 else 5384 else
5763 *rlen = bno - start + 1; 5385 *rlen = end - start + 1;
5764 5386
5765 /* 5387 /*
5766 * Convert to a btree if necessary. 5388 * Convert to a btree if necessary.
@@ -5878,14 +5500,13 @@ xfs_bmse_merge(
5878 struct xfs_inode *ip, 5500 struct xfs_inode *ip,
5879 int whichfork, 5501 int whichfork,
5880 xfs_fileoff_t shift, /* shift fsb */ 5502 xfs_fileoff_t shift, /* shift fsb */
5881 int current_ext, /* idx of gotp */ 5503 struct xfs_iext_cursor *icur,
5882 struct xfs_bmbt_irec *got, /* extent to shift */ 5504 struct xfs_bmbt_irec *got, /* extent to shift */
5883 struct xfs_bmbt_irec *left, /* preceding extent */ 5505 struct xfs_bmbt_irec *left, /* preceding extent */
5884 struct xfs_btree_cur *cur, 5506 struct xfs_btree_cur *cur,
5885 int *logflags, /* output */ 5507 int *logflags, /* output */
5886 struct xfs_defer_ops *dfops) 5508 struct xfs_defer_ops *dfops)
5887{ 5509{
5888 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
5889 struct xfs_bmbt_irec new; 5510 struct xfs_bmbt_irec new;
5890 xfs_filblks_t blockcount; 5511 xfs_filblks_t blockcount;
5891 int error, i; 5512 int error, i;
@@ -5913,8 +5534,7 @@ xfs_bmse_merge(
5913 } 5534 }
5914 5535
5915 /* lookup and remove the extent to merge */ 5536 /* lookup and remove the extent to merge */
5916 error = xfs_bmbt_lookup_eq(cur, got->br_startoff, got->br_startblock, 5537 error = xfs_bmbt_lookup_eq(cur, got, &i);
5917 got->br_blockcount, &i);
5918 if (error) 5538 if (error)
5919 return error; 5539 return error;
5920 XFS_WANT_CORRUPTED_RETURN(mp, i == 1); 5540 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
@@ -5925,20 +5545,20 @@ xfs_bmse_merge(
5925 XFS_WANT_CORRUPTED_RETURN(mp, i == 1); 5545 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
5926 5546
5927 /* lookup and update size of the previous extent */ 5547 /* lookup and update size of the previous extent */
5928 error = xfs_bmbt_lookup_eq(cur, left->br_startoff, left->br_startblock, 5548 error = xfs_bmbt_lookup_eq(cur, left, &i);
5929 left->br_blockcount, &i);
5930 if (error) 5549 if (error)
5931 return error; 5550 return error;
5932 XFS_WANT_CORRUPTED_RETURN(mp, i == 1); 5551 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
5933 5552
5934 error = xfs_bmbt_update(cur, new.br_startoff, new.br_startblock, 5553 error = xfs_bmbt_update(cur, &new);
5935 new.br_blockcount, new.br_state);
5936 if (error) 5554 if (error)
5937 return error; 5555 return error;
5938 5556
5939done: 5557done:
5940 xfs_iext_update_extent(ifp, current_ext - 1, &new); 5558 xfs_iext_remove(ip, icur, 0);
5941 xfs_iext_remove(ip, current_ext, 1, 0); 5559 xfs_iext_prev(XFS_IFORK_PTR(ip, whichfork), icur);
5560 xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), icur,
5561 &new);
5942 5562
5943 /* update reverse mapping. rmap functions merge the rmaps for us */ 5563 /* update reverse mapping. rmap functions merge the rmaps for us */
5944 error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, got); 5564 error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, got);
@@ -5949,183 +5569,83 @@ done:
5949 return xfs_rmap_map_extent(mp, dfops, ip, whichfork, &new); 5569 return xfs_rmap_map_extent(mp, dfops, ip, whichfork, &new);
5950} 5570}
5951 5571
5952/* 5572static int
5953 * Shift a single extent. 5573xfs_bmap_shift_update_extent(
5954 */ 5574 struct xfs_inode *ip,
5955STATIC int 5575 int whichfork,
5956xfs_bmse_shift_one( 5576 struct xfs_iext_cursor *icur,
5957 struct xfs_inode *ip, 5577 struct xfs_bmbt_irec *got,
5958 int whichfork, 5578 struct xfs_btree_cur *cur,
5959 xfs_fileoff_t offset_shift_fsb, 5579 int *logflags,
5960 int *current_ext, 5580 struct xfs_defer_ops *dfops,
5961 struct xfs_bmbt_irec *got, 5581 xfs_fileoff_t startoff)
5962 struct xfs_btree_cur *cur,
5963 int *logflags,
5964 enum shift_direction direction,
5965 struct xfs_defer_ops *dfops)
5966{ 5582{
5967 struct xfs_ifork *ifp; 5583 struct xfs_mount *mp = ip->i_mount;
5968 struct xfs_mount *mp; 5584 struct xfs_bmbt_irec prev = *got;
5969 xfs_fileoff_t startoff; 5585 int error, i;
5970 struct xfs_bmbt_irec adj_irec, new;
5971 int error;
5972 int i;
5973 int total_extents;
5974
5975 mp = ip->i_mount;
5976 ifp = XFS_IFORK_PTR(ip, whichfork);
5977 total_extents = xfs_iext_count(ifp);
5978
5979 /* delalloc extents should be prevented by caller */
5980 XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got->br_startblock));
5981
5982 if (direction == SHIFT_LEFT) {
5983 startoff = got->br_startoff - offset_shift_fsb;
5984
5985 /*
5986 * Check for merge if we've got an extent to the left,
5987 * otherwise make sure there's enough room at the start
5988 * of the file for the shift.
5989 */
5990 if (!*current_ext) {
5991 if (got->br_startoff < offset_shift_fsb)
5992 return -EINVAL;
5993 goto update_current_ext;
5994 }
5995
5996 /*
5997 * grab the left extent and check for a large enough hole.
5998 */
5999 xfs_iext_get_extent(ifp, *current_ext - 1, &adj_irec);
6000 if (startoff < adj_irec.br_startoff + adj_irec.br_blockcount)
6001 return -EINVAL;
6002
6003 /* check whether to merge the extent or shift it down */
6004 if (xfs_bmse_can_merge(&adj_irec, got, offset_shift_fsb)) {
6005 return xfs_bmse_merge(ip, whichfork, offset_shift_fsb,
6006 *current_ext, got, &adj_irec,
6007 cur, logflags, dfops);
6008 }
6009 } else {
6010 startoff = got->br_startoff + offset_shift_fsb;
6011 /* nothing to move if this is the last extent */
6012 if (*current_ext >= (total_extents - 1))
6013 goto update_current_ext;
6014
6015 /*
6016 * If this is not the last extent in the file, make sure there
6017 * is enough room between current extent and next extent for
6018 * accommodating the shift.
6019 */
6020 xfs_iext_get_extent(ifp, *current_ext + 1, &adj_irec);
6021 if (startoff + got->br_blockcount > adj_irec.br_startoff)
6022 return -EINVAL;
6023
6024 /*
6025 * Unlike a left shift (which involves a hole punch),
6026 * a right shift does not modify extent neighbors
6027 * in any way. We should never find mergeable extents
6028 * in this scenario. Check anyways and warn if we
6029 * encounter two extents that could be one.
6030 */
6031 if (xfs_bmse_can_merge(got, &adj_irec, offset_shift_fsb))
6032 WARN_ON_ONCE(1);
6033 }
6034 5586
6035 /*
6036 * Increment the extent index for the next iteration, update the start
6037 * offset of the in-core extent and update the btree if applicable.
6038 */
6039update_current_ext:
6040 *logflags |= XFS_ILOG_CORE; 5587 *logflags |= XFS_ILOG_CORE;
6041 5588
6042 new = *got; 5589 got->br_startoff = startoff;
6043 new.br_startoff = startoff;
6044 5590
6045 if (cur) { 5591 if (cur) {
6046 error = xfs_bmbt_lookup_eq(cur, got->br_startoff, 5592 error = xfs_bmbt_lookup_eq(cur, &prev, &i);
6047 got->br_startblock, got->br_blockcount, &i);
6048 if (error) 5593 if (error)
6049 return error; 5594 return error;
6050 XFS_WANT_CORRUPTED_RETURN(mp, i == 1); 5595 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
6051 5596
6052 error = xfs_bmbt_update(cur, new.br_startoff, 5597 error = xfs_bmbt_update(cur, got);
6053 new.br_startblock, new.br_blockcount,
6054 new.br_state);
6055 if (error) 5598 if (error)
6056 return error; 5599 return error;
6057 } else { 5600 } else {
6058 *logflags |= XFS_ILOG_DEXT; 5601 *logflags |= XFS_ILOG_DEXT;
6059 } 5602 }
6060 5603
6061 xfs_iext_update_extent(ifp, *current_ext, &new); 5604 xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), icur,
6062 5605 got);
6063 if (direction == SHIFT_LEFT)
6064 (*current_ext)++;
6065 else
6066 (*current_ext)--;
6067 5606
6068 /* update reverse mapping */ 5607 /* update reverse mapping */
6069 error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, got); 5608 error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, &prev);
6070 if (error) 5609 if (error)
6071 return error; 5610 return error;
6072 return xfs_rmap_map_extent(mp, dfops, ip, whichfork, &new); 5611 return xfs_rmap_map_extent(mp, dfops, ip, whichfork, got);
6073} 5612}
6074 5613
6075/*
6076 * Shift extent records to the left/right to cover/create a hole.
6077 *
6078 * The maximum number of extents to be shifted in a single operation is
6079 * @num_exts. @stop_fsb specifies the file offset at which to stop shift and the
6080 * file offset where we've left off is returned in @next_fsb. @offset_shift_fsb
6081 * is the length by which each extent is shifted. If there is no hole to shift
6082 * the extents into, this will be considered invalid operation and we abort
6083 * immediately.
6084 */
6085int 5614int
6086xfs_bmap_shift_extents( 5615xfs_bmap_collapse_extents(
6087 struct xfs_trans *tp, 5616 struct xfs_trans *tp,
6088 struct xfs_inode *ip, 5617 struct xfs_inode *ip,
6089 xfs_fileoff_t *next_fsb, 5618 xfs_fileoff_t *next_fsb,
6090 xfs_fileoff_t offset_shift_fsb, 5619 xfs_fileoff_t offset_shift_fsb,
6091 int *done, 5620 bool *done,
6092 xfs_fileoff_t stop_fsb, 5621 xfs_fileoff_t stop_fsb,
6093 xfs_fsblock_t *firstblock, 5622 xfs_fsblock_t *firstblock,
6094 struct xfs_defer_ops *dfops, 5623 struct xfs_defer_ops *dfops)
6095 enum shift_direction direction,
6096 int num_exts)
6097{ 5624{
6098 struct xfs_btree_cur *cur = NULL; 5625 int whichfork = XFS_DATA_FORK;
6099 struct xfs_bmbt_irec got; 5626 struct xfs_mount *mp = ip->i_mount;
6100 struct xfs_mount *mp = ip->i_mount; 5627 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
6101 struct xfs_ifork *ifp; 5628 struct xfs_btree_cur *cur = NULL;
6102 xfs_extnum_t nexts = 0; 5629 struct xfs_bmbt_irec got, prev;
6103 xfs_extnum_t current_ext; 5630 struct xfs_iext_cursor icur;
6104 xfs_extnum_t total_extents; 5631 xfs_fileoff_t new_startoff;
6105 xfs_extnum_t stop_extent; 5632 int error = 0;
6106 int error = 0; 5633 int logflags = 0;
6107 int whichfork = XFS_DATA_FORK;
6108 int logflags = 0;
6109 5634
6110 if (unlikely(XFS_TEST_ERROR( 5635 if (unlikely(XFS_TEST_ERROR(
6111 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && 5636 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
6112 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), 5637 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
6113 mp, XFS_ERRTAG_BMAPIFORMAT))) { 5638 mp, XFS_ERRTAG_BMAPIFORMAT))) {
6114 XFS_ERROR_REPORT("xfs_bmap_shift_extents", 5639 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
6115 XFS_ERRLEVEL_LOW, mp);
6116 return -EFSCORRUPTED; 5640 return -EFSCORRUPTED;
6117 } 5641 }
6118 5642
6119 if (XFS_FORCED_SHUTDOWN(mp)) 5643 if (XFS_FORCED_SHUTDOWN(mp))
6120 return -EIO; 5644 return -EIO;
6121 5645
6122 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 5646 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL));
6123 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
6124 ASSERT(direction == SHIFT_LEFT || direction == SHIFT_RIGHT);
6125 5647
6126 ifp = XFS_IFORK_PTR(ip, whichfork);
6127 if (!(ifp->if_flags & XFS_IFEXTENTS)) { 5648 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
6128 /* Read in all the extents */
6129 error = xfs_iread_extents(tp, ip, whichfork); 5649 error = xfs_iread_extents(tp, ip, whichfork);
6130 if (error) 5650 if (error)
6131 return error; 5651 return error;
@@ -6138,107 +5658,165 @@ xfs_bmap_shift_extents(
6138 cur->bc_private.b.flags = 0; 5658 cur->bc_private.b.flags = 0;
6139 } 5659 }
6140 5660
6141 /* 5661 if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &icur, &got)) {
6142 * There may be delalloc extents in the data fork before the range we 5662 *done = true;
6143 * are collapsing out, so we cannot use the count of real extents here.
6144 * Instead we have to calculate it from the incore fork.
6145 */
6146 total_extents = xfs_iext_count(ifp);
6147 if (total_extents == 0) {
6148 *done = 1;
6149 goto del_cursor; 5663 goto del_cursor;
6150 } 5664 }
5665 XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got.br_startblock));
6151 5666
6152 /* 5667 new_startoff = got.br_startoff - offset_shift_fsb;
6153 * In case of first right shift, we need to initialize next_fsb 5668 if (xfs_iext_peek_prev_extent(ifp, &icur, &prev)) {
6154 */ 5669 if (new_startoff < prev.br_startoff + prev.br_blockcount) {
6155 if (*next_fsb == NULLFSBLOCK) { 5670 error = -EINVAL;
6156 ASSERT(direction == SHIFT_RIGHT);
6157
6158 current_ext = total_extents - 1;
6159 xfs_iext_get_extent(ifp, current_ext, &got);
6160 if (stop_fsb > got.br_startoff) {
6161 *done = 1;
6162 goto del_cursor; 5671 goto del_cursor;
6163 } 5672 }
6164 *next_fsb = got.br_startoff; 5673
5674 if (xfs_bmse_can_merge(&prev, &got, offset_shift_fsb)) {
5675 error = xfs_bmse_merge(ip, whichfork, offset_shift_fsb,
5676 &icur, &got, &prev, cur, &logflags,
5677 dfops);
5678 if (error)
5679 goto del_cursor;
5680 goto done;
5681 }
6165 } else { 5682 } else {
6166 /* 5683 if (got.br_startoff < offset_shift_fsb) {
6167 * Look up the extent index for the fsb where we start shifting. We can 5684 error = -EINVAL;
6168 * henceforth iterate with current_ext as extent list changes are locked
6169 * out via ilock.
6170 *
6171 * If next_fsb lies in a hole beyond which there are no extents we are
6172 * done.
6173 */
6174 if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &current_ext,
6175 &got)) {
6176 *done = 1;
6177 goto del_cursor; 5685 goto del_cursor;
6178 } 5686 }
6179 } 5687 }
6180 5688
6181 /* Lookup the extent index at which we have to stop */ 5689 error = xfs_bmap_shift_update_extent(ip, whichfork, &icur, &got, cur,
6182 if (direction == SHIFT_RIGHT) { 5690 &logflags, dfops, new_startoff);
6183 struct xfs_bmbt_irec s; 5691 if (error)
5692 goto del_cursor;
5693
5694done:
5695 if (!xfs_iext_next_extent(ifp, &icur, &got)) {
5696 *done = true;
5697 goto del_cursor;
5698 }
5699
5700 *next_fsb = got.br_startoff;
5701del_cursor:
5702 if (cur)
5703 xfs_btree_del_cursor(cur,
5704 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
5705 if (logflags)
5706 xfs_trans_log_inode(tp, ip, logflags);
5707 return error;
5708}
5709
5710int
5711xfs_bmap_insert_extents(
5712 struct xfs_trans *tp,
5713 struct xfs_inode *ip,
5714 xfs_fileoff_t *next_fsb,
5715 xfs_fileoff_t offset_shift_fsb,
5716 bool *done,
5717 xfs_fileoff_t stop_fsb,
5718 xfs_fsblock_t *firstblock,
5719 struct xfs_defer_ops *dfops)
5720{
5721 int whichfork = XFS_DATA_FORK;
5722 struct xfs_mount *mp = ip->i_mount;
5723 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
5724 struct xfs_btree_cur *cur = NULL;
5725 struct xfs_bmbt_irec got, next;
5726 struct xfs_iext_cursor icur;
5727 xfs_fileoff_t new_startoff;
5728 int error = 0;
5729 int logflags = 0;
5730
5731 if (unlikely(XFS_TEST_ERROR(
5732 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
5733 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
5734 mp, XFS_ERRTAG_BMAPIFORMAT))) {
5735 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
5736 return -EFSCORRUPTED;
5737 }
5738
5739 if (XFS_FORCED_SHUTDOWN(mp))
5740 return -EIO;
5741
5742 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL));
5743
5744 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
5745 error = xfs_iread_extents(tp, ip, whichfork);
5746 if (error)
5747 return error;
5748 }
5749
5750 if (ifp->if_flags & XFS_IFBROOT) {
5751 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5752 cur->bc_private.b.firstblock = *firstblock;
5753 cur->bc_private.b.dfops = dfops;
5754 cur->bc_private.b.flags = 0;
5755 }
6184 5756
6185 xfs_iext_lookup_extent(ip, ifp, stop_fsb, &stop_extent, &s); 5757 if (*next_fsb == NULLFSBLOCK) {
6186 /* Make stop_extent exclusive of shift range */ 5758 xfs_iext_last(ifp, &icur);
6187 stop_extent--; 5759 if (!xfs_iext_get_extent(ifp, &icur, &got) ||
6188 if (current_ext <= stop_extent) { 5760 stop_fsb > got.br_startoff) {
6189 error = -EIO; 5761 *done = true;
6190 goto del_cursor; 5762 goto del_cursor;
6191 } 5763 }
6192 } else { 5764 } else {
6193 stop_extent = total_extents; 5765 if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &icur, &got)) {
6194 if (current_ext >= stop_extent) { 5766 *done = true;
6195 error = -EIO;
6196 goto del_cursor; 5767 goto del_cursor;
6197 } 5768 }
6198 } 5769 }
5770 XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got.br_startblock));
6199 5771
6200 while (nexts++ < num_exts) { 5772 if (stop_fsb >= got.br_startoff + got.br_blockcount) {
6201 error = xfs_bmse_shift_one(ip, whichfork, offset_shift_fsb, 5773 error = -EIO;
6202 &current_ext, &got, cur, &logflags, 5774 goto del_cursor;
6203 direction, dfops); 5775 }
6204 if (error) 5776
5777 new_startoff = got.br_startoff + offset_shift_fsb;
5778 if (xfs_iext_peek_next_extent(ifp, &icur, &next)) {
5779 if (new_startoff + got.br_blockcount > next.br_startoff) {
5780 error = -EINVAL;
6205 goto del_cursor; 5781 goto del_cursor;
6206 /*
6207 * If there was an extent merge during the shift, the extent
6208 * count can change. Update the total and grade the next record.
6209 */
6210 if (direction == SHIFT_LEFT) {
6211 total_extents = xfs_iext_count(ifp);
6212 stop_extent = total_extents;
6213 } 5782 }
6214 5783
6215 if (current_ext == stop_extent) { 5784 /*
6216 *done = 1; 5785 * Unlike a left shift (which involves a hole punch), a right
6217 *next_fsb = NULLFSBLOCK; 5786 * shift does not modify extent neighbors in any way. We should
6218 break; 5787 * never find mergeable extents in this scenario. Check anyways
6219 } 5788 * and warn if we encounter two extents that could be one.
6220 xfs_iext_get_extent(ifp, current_ext, &got); 5789 */
5790 if (xfs_bmse_can_merge(&got, &next, offset_shift_fsb))
5791 WARN_ON_ONCE(1);
6221 } 5792 }
6222 5793
6223 if (!*done) 5794 error = xfs_bmap_shift_update_extent(ip, whichfork, &icur, &got, cur,
6224 *next_fsb = got.br_startoff; 5795 &logflags, dfops, new_startoff);
5796 if (error)
5797 goto del_cursor;
5798
5799 if (!xfs_iext_prev_extent(ifp, &icur, &got) ||
5800 stop_fsb >= got.br_startoff + got.br_blockcount) {
5801 *done = true;
5802 goto del_cursor;
5803 }
6225 5804
5805 *next_fsb = got.br_startoff;
6226del_cursor: 5806del_cursor:
6227 if (cur) 5807 if (cur)
6228 xfs_btree_del_cursor(cur, 5808 xfs_btree_del_cursor(cur,
6229 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); 5809 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
6230
6231 if (logflags) 5810 if (logflags)
6232 xfs_trans_log_inode(tp, ip, logflags); 5811 xfs_trans_log_inode(tp, ip, logflags);
6233
6234 return error; 5812 return error;
6235} 5813}
6236 5814
6237/* 5815/*
6238 * Splits an extent into two extents at split_fsb block such that it is 5816 * Splits an extent into two extents at split_fsb block such that it is the
6239 * the first block of the current_ext. @current_ext is a target extent 5817 * first block of the current_ext. @ext is a target extent to be split.
6240 * to be split. @split_fsb is a block where the extents is split. 5818 * @split_fsb is a block where the extents is split. If split_fsb lies in a
6241 * If split_fsb lies in a hole or the first block of extents, just return 0. 5819 * hole or the first block of extents, just return 0.
6242 */ 5820 */
6243STATIC int 5821STATIC int
6244xfs_bmap_split_extent_at( 5822xfs_bmap_split_extent_at(
@@ -6255,7 +5833,7 @@ xfs_bmap_split_extent_at(
6255 struct xfs_mount *mp = ip->i_mount; 5833 struct xfs_mount *mp = ip->i_mount;
6256 struct xfs_ifork *ifp; 5834 struct xfs_ifork *ifp;
6257 xfs_fsblock_t gotblkcnt; /* new block count for got */ 5835 xfs_fsblock_t gotblkcnt; /* new block count for got */
6258 xfs_extnum_t current_ext; 5836 struct xfs_iext_cursor icur;
6259 int error = 0; 5837 int error = 0;
6260 int logflags = 0; 5838 int logflags = 0;
6261 int i = 0; 5839 int i = 0;
@@ -6283,7 +5861,7 @@ xfs_bmap_split_extent_at(
6283 /* 5861 /*
6284 * If there are not extents, or split_fsb lies in a hole we are done. 5862 * If there are not extents, or split_fsb lies in a hole we are done.
6285 */ 5863 */
6286 if (!xfs_iext_lookup_extent(ip, ifp, split_fsb, &current_ext, &got) || 5864 if (!xfs_iext_lookup_extent(ip, ifp, split_fsb, &icur, &got) ||
6287 got.br_startoff >= split_fsb) 5865 got.br_startoff >= split_fsb)
6288 return 0; 5866 return 0;
6289 5867
@@ -6298,44 +5876,35 @@ xfs_bmap_split_extent_at(
6298 cur->bc_private.b.firstblock = *firstfsb; 5876 cur->bc_private.b.firstblock = *firstfsb;
6299 cur->bc_private.b.dfops = dfops; 5877 cur->bc_private.b.dfops = dfops;
6300 cur->bc_private.b.flags = 0; 5878 cur->bc_private.b.flags = 0;
6301 error = xfs_bmbt_lookup_eq(cur, got.br_startoff, 5879 error = xfs_bmbt_lookup_eq(cur, &got, &i);
6302 got.br_startblock,
6303 got.br_blockcount,
6304 &i);
6305 if (error) 5880 if (error)
6306 goto del_cursor; 5881 goto del_cursor;
6307 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, del_cursor); 5882 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, del_cursor);
6308 } 5883 }
6309 5884
6310 got.br_blockcount = gotblkcnt; 5885 got.br_blockcount = gotblkcnt;
6311 xfs_iext_update_extent(ifp, current_ext, &got); 5886 xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), &icur,
5887 &got);
6312 5888
6313 logflags = XFS_ILOG_CORE; 5889 logflags = XFS_ILOG_CORE;
6314 if (cur) { 5890 if (cur) {
6315 error = xfs_bmbt_update(cur, got.br_startoff, 5891 error = xfs_bmbt_update(cur, &got);
6316 got.br_startblock,
6317 got.br_blockcount,
6318 got.br_state);
6319 if (error) 5892 if (error)
6320 goto del_cursor; 5893 goto del_cursor;
6321 } else 5894 } else
6322 logflags |= XFS_ILOG_DEXT; 5895 logflags |= XFS_ILOG_DEXT;
6323 5896
6324 /* Add new extent */ 5897 /* Add new extent */
6325 current_ext++; 5898 xfs_iext_next(ifp, &icur);
6326 xfs_iext_insert(ip, current_ext, 1, &new, 0); 5899 xfs_iext_insert(ip, &icur, &new, 0);
6327 XFS_IFORK_NEXT_SET(ip, whichfork, 5900 XFS_IFORK_NEXT_SET(ip, whichfork,
6328 XFS_IFORK_NEXTENTS(ip, whichfork) + 1); 5901 XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
6329 5902
6330 if (cur) { 5903 if (cur) {
6331 error = xfs_bmbt_lookup_eq(cur, new.br_startoff, 5904 error = xfs_bmbt_lookup_eq(cur, &new, &i);
6332 new.br_startblock, new.br_blockcount,
6333 &i);
6334 if (error) 5905 if (error)
6335 goto del_cursor; 5906 goto del_cursor;
6336 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, del_cursor); 5907 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, del_cursor);
6337 cur->bc_rec.b.br_state = new.br_state;
6338
6339 error = xfs_btree_insert(cur, &i); 5908 error = xfs_btree_insert(cur, &i);
6340 if (error) 5909 if (error)
6341 goto del_cursor; 5910 goto del_cursor;
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index 502e0d8fb4ff..e36d75799cd5 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -43,7 +43,7 @@ struct xfs_bmalloca {
43 xfs_fsblock_t blkno; /* starting block of new extent */ 43 xfs_fsblock_t blkno; /* starting block of new extent */
44 44
45 struct xfs_btree_cur *cur; /* btree cursor */ 45 struct xfs_btree_cur *cur; /* btree cursor */
46 xfs_extnum_t idx; /* current extent index */ 46 struct xfs_iext_cursor icur; /* incore extent cursor */
47 int nallocs;/* number of extents alloc'd */ 47 int nallocs;/* number of extents alloc'd */
48 int logflags;/* flags for transaction logging */ 48 int logflags;/* flags for transaction logging */
49 49
@@ -113,6 +113,9 @@ struct xfs_extent_free_item
113/* Only convert delalloc space, don't allocate entirely new extents */ 113/* Only convert delalloc space, don't allocate entirely new extents */
114#define XFS_BMAPI_DELALLOC 0x400 114#define XFS_BMAPI_DELALLOC 0x400
115 115
116/* Only convert unwritten extents, don't allocate new blocks */
117#define XFS_BMAPI_CONVERT_ONLY 0x800
118
116#define XFS_BMAPI_FLAGS \ 119#define XFS_BMAPI_FLAGS \
117 { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ 120 { XFS_BMAPI_ENTIRE, "ENTIRE" }, \
118 { XFS_BMAPI_METADATA, "METADATA" }, \ 121 { XFS_BMAPI_METADATA, "METADATA" }, \
@@ -124,7 +127,8 @@ struct xfs_extent_free_item
124 { XFS_BMAPI_ZERO, "ZERO" }, \ 127 { XFS_BMAPI_ZERO, "ZERO" }, \
125 { XFS_BMAPI_REMAP, "REMAP" }, \ 128 { XFS_BMAPI_REMAP, "REMAP" }, \
126 { XFS_BMAPI_COWFORK, "COWFORK" }, \ 129 { XFS_BMAPI_COWFORK, "COWFORK" }, \
127 { XFS_BMAPI_DELALLOC, "DELALLOC" } 130 { XFS_BMAPI_DELALLOC, "DELALLOC" }, \
131 { XFS_BMAPI_CONVERT_ONLY, "CONVERT_ONLY" }
128 132
129 133
130static inline int xfs_bmapi_aflag(int w) 134static inline int xfs_bmapi_aflag(int w)
@@ -183,29 +187,6 @@ static inline bool xfs_bmap_is_real_extent(struct xfs_bmbt_irec *irec)
183 !isnullstartblock(irec->br_startblock); 187 !isnullstartblock(irec->br_startblock);
184} 188}
185 189
186/*
187 * This macro is used to determine how many extents will be shifted
188 * in one write transaction. We could require two splits,
189 * an extent move on the first and an extent merge on the second,
190 * So it is proper that one extent is shifted inside write transaction
191 * at a time.
192 */
193#define XFS_BMAP_MAX_SHIFT_EXTENTS 1
194
195enum shift_direction {
196 SHIFT_LEFT = 0,
197 SHIFT_RIGHT,
198};
199
200#ifdef DEBUG
201void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,
202 int whichfork, unsigned long caller_ip);
203#define XFS_BMAP_TRACE_EXLIST(ip,c,w) \
204 xfs_bmap_trace_exlist(ip,c,w, _THIS_IP_)
205#else
206#define XFS_BMAP_TRACE_EXLIST(ip,c,w)
207#endif
208
209void xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno, 190void xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno,
210 xfs_filblks_t len); 191 xfs_filblks_t len);
211void xfs_trim_extent_eof(struct xfs_bmbt_irec *, struct xfs_inode *); 192void xfs_trim_extent_eof(struct xfs_bmbt_irec *, struct xfs_inode *);
@@ -222,8 +203,6 @@ int xfs_bmap_last_before(struct xfs_trans *tp, struct xfs_inode *ip,
222int xfs_bmap_last_offset(struct xfs_inode *ip, xfs_fileoff_t *unused, 203int xfs_bmap_last_offset(struct xfs_inode *ip, xfs_fileoff_t *unused,
223 int whichfork); 204 int whichfork);
224int xfs_bmap_one_block(struct xfs_inode *ip, int whichfork); 205int xfs_bmap_one_block(struct xfs_inode *ip, int whichfork);
225int xfs_bmap_read_extents(struct xfs_trans *tp, struct xfs_inode *ip,
226 int whichfork);
227int xfs_bmapi_read(struct xfs_inode *ip, xfs_fileoff_t bno, 206int xfs_bmapi_read(struct xfs_inode *ip, xfs_fileoff_t bno,
228 xfs_filblks_t len, struct xfs_bmbt_irec *mval, 207 xfs_filblks_t len, struct xfs_bmbt_irec *mval,
229 int *nmap, int flags); 208 int *nmap, int flags);
@@ -241,20 +220,25 @@ int xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip,
241 xfs_extnum_t nexts, xfs_fsblock_t *firstblock, 220 xfs_extnum_t nexts, xfs_fsblock_t *firstblock,
242 struct xfs_defer_ops *dfops, int *done); 221 struct xfs_defer_ops *dfops, int *done);
243int xfs_bmap_del_extent_delay(struct xfs_inode *ip, int whichfork, 222int xfs_bmap_del_extent_delay(struct xfs_inode *ip, int whichfork,
244 xfs_extnum_t *idx, struct xfs_bmbt_irec *got, 223 struct xfs_iext_cursor *cur, struct xfs_bmbt_irec *got,
224 struct xfs_bmbt_irec *del);
225void xfs_bmap_del_extent_cow(struct xfs_inode *ip,
226 struct xfs_iext_cursor *cur, struct xfs_bmbt_irec *got,
245 struct xfs_bmbt_irec *del); 227 struct xfs_bmbt_irec *del);
246void xfs_bmap_del_extent_cow(struct xfs_inode *ip, xfs_extnum_t *idx,
247 struct xfs_bmbt_irec *got, struct xfs_bmbt_irec *del);
248uint xfs_default_attroffset(struct xfs_inode *ip); 228uint xfs_default_attroffset(struct xfs_inode *ip);
249int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip, 229int xfs_bmap_collapse_extents(struct xfs_trans *tp, struct xfs_inode *ip,
230 xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb,
231 bool *done, xfs_fileoff_t stop_fsb, xfs_fsblock_t *firstblock,
232 struct xfs_defer_ops *dfops);
233int xfs_bmap_insert_extents(struct xfs_trans *tp, struct xfs_inode *ip,
250 xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb, 234 xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb,
251 int *done, xfs_fileoff_t stop_fsb, xfs_fsblock_t *firstblock, 235 bool *done, xfs_fileoff_t stop_fsb, xfs_fsblock_t *firstblock,
252 struct xfs_defer_ops *dfops, enum shift_direction direction, 236 struct xfs_defer_ops *dfops);
253 int num_exts);
254int xfs_bmap_split_extent(struct xfs_inode *ip, xfs_fileoff_t split_offset); 237int xfs_bmap_split_extent(struct xfs_inode *ip, xfs_fileoff_t split_offset);
255int xfs_bmapi_reserve_delalloc(struct xfs_inode *ip, int whichfork, 238int xfs_bmapi_reserve_delalloc(struct xfs_inode *ip, int whichfork,
256 xfs_fileoff_t off, xfs_filblks_t len, xfs_filblks_t prealloc, 239 xfs_fileoff_t off, xfs_filblks_t len, xfs_filblks_t prealloc,
257 struct xfs_bmbt_irec *got, xfs_extnum_t *lastx, int eof); 240 struct xfs_bmbt_irec *got, struct xfs_iext_cursor *cur,
241 int eof);
258 242
259enum xfs_bmap_intent_type { 243enum xfs_bmap_intent_type {
260 XFS_BMAP_MAP = 1, 244 XFS_BMAP_MAP = 1,
@@ -278,4 +262,16 @@ int xfs_bmap_map_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
278int xfs_bmap_unmap_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops, 262int xfs_bmap_unmap_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
279 struct xfs_inode *ip, struct xfs_bmbt_irec *imap); 263 struct xfs_inode *ip, struct xfs_bmbt_irec *imap);
280 264
265static inline int xfs_bmap_fork_to_state(int whichfork)
266{
267 switch (whichfork) {
268 case XFS_ATTR_FORK:
269 return BMAP_ATTRFORK;
270 case XFS_COW_FORK:
271 return BMAP_COWFORK;
272 default:
273 return 0;
274 }
275}
276
281#endif /* __XFS_BMAP_H__ */ 277#endif /* __XFS_BMAP_H__ */
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index a6331ffa51e3..c10aecaaae44 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -38,22 +38,6 @@
38#include "xfs_rmap.h" 38#include "xfs_rmap.h"
39 39
40/* 40/*
41 * Determine the extent state.
42 */
43/* ARGSUSED */
44STATIC xfs_exntst_t
45xfs_extent_state(
46 xfs_filblks_t blks,
47 int extent_flag)
48{
49 if (extent_flag) {
50 ASSERT(blks != 0); /* saved for DMIG */
51 return XFS_EXT_UNWRITTEN;
52 }
53 return XFS_EXT_NORM;
54}
55
56/*
57 * Convert on-disk form of btree root to in-memory form. 41 * Convert on-disk form of btree root to in-memory form.
58 */ 42 */
59void 43void
@@ -87,84 +71,21 @@ xfs_bmdr_to_bmbt(
87 memcpy(tpp, fpp, sizeof(*fpp) * dmxr); 71 memcpy(tpp, fpp, sizeof(*fpp) * dmxr);
88} 72}
89 73
90/*
91 * Convert a compressed bmap extent record to an uncompressed form.
92 * This code must be in sync with the routines xfs_bmbt_get_startoff,
93 * xfs_bmbt_get_startblock, xfs_bmbt_get_blockcount and xfs_bmbt_get_state.
94 */
95STATIC void
96__xfs_bmbt_get_all(
97 uint64_t l0,
98 uint64_t l1,
99 xfs_bmbt_irec_t *s)
100{
101 int ext_flag;
102 xfs_exntst_t st;
103
104 ext_flag = (int)(l0 >> (64 - BMBT_EXNTFLAG_BITLEN));
105 s->br_startoff = ((xfs_fileoff_t)l0 &
106 xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9;
107 s->br_startblock = (((xfs_fsblock_t)l0 & xfs_mask64lo(9)) << 43) |
108 (((xfs_fsblock_t)l1) >> 21);
109 s->br_blockcount = (xfs_filblks_t)(l1 & xfs_mask64lo(21));
110 /* This is xfs_extent_state() in-line */
111 if (ext_flag) {
112 ASSERT(s->br_blockcount != 0); /* saved for DMIG */
113 st = XFS_EXT_UNWRITTEN;
114 } else
115 st = XFS_EXT_NORM;
116 s->br_state = st;
117}
118
119void 74void
120xfs_bmbt_get_all( 75xfs_bmbt_disk_get_all(
121 xfs_bmbt_rec_host_t *r, 76 struct xfs_bmbt_rec *rec,
122 xfs_bmbt_irec_t *s) 77 struct xfs_bmbt_irec *irec)
123{ 78{
124 __xfs_bmbt_get_all(r->l0, r->l1, s); 79 uint64_t l0 = get_unaligned_be64(&rec->l0);
125} 80 uint64_t l1 = get_unaligned_be64(&rec->l1);
126 81
127/* 82 irec->br_startoff = (l0 & xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9;
128 * Extract the blockcount field from an in memory bmap extent record. 83 irec->br_startblock = ((l0 & xfs_mask64lo(9)) << 43) | (l1 >> 21);
129 */ 84 irec->br_blockcount = l1 & xfs_mask64lo(21);
130xfs_filblks_t 85 if (l0 >> (64 - BMBT_EXNTFLAG_BITLEN))
131xfs_bmbt_get_blockcount( 86 irec->br_state = XFS_EXT_UNWRITTEN;
132 xfs_bmbt_rec_host_t *r) 87 else
133{ 88 irec->br_state = XFS_EXT_NORM;
134 return (xfs_filblks_t)(r->l1 & xfs_mask64lo(21));
135}
136
137/*
138 * Extract the startblock field from an in memory bmap extent record.
139 */
140xfs_fsblock_t
141xfs_bmbt_get_startblock(
142 xfs_bmbt_rec_host_t *r)
143{
144 return (((xfs_fsblock_t)r->l0 & xfs_mask64lo(9)) << 43) |
145 (((xfs_fsblock_t)r->l1) >> 21);
146}
147
148/*
149 * Extract the startoff field from an in memory bmap extent record.
150 */
151xfs_fileoff_t
152xfs_bmbt_get_startoff(
153 xfs_bmbt_rec_host_t *r)
154{
155 return ((xfs_fileoff_t)r->l0 &
156 xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9;
157}
158
159xfs_exntst_t
160xfs_bmbt_get_state(
161 xfs_bmbt_rec_host_t *r)
162{
163 int ext_flag;
164
165 ext_flag = (int)((r->l0) >> (64 - BMBT_EXNTFLAG_BITLEN));
166 return xfs_extent_state(xfs_bmbt_get_blockcount(r),
167 ext_flag);
168} 89}
169 90
170/* 91/*
@@ -188,142 +109,29 @@ xfs_bmbt_disk_get_startoff(
188 xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9; 109 xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9;
189} 110}
190 111
191
192/*
193 * Set all the fields in a bmap extent record from the arguments.
194 */
195void
196xfs_bmbt_set_allf(
197 xfs_bmbt_rec_host_t *r,
198 xfs_fileoff_t startoff,
199 xfs_fsblock_t startblock,
200 xfs_filblks_t blockcount,
201 xfs_exntst_t state)
202{
203 int extent_flag = (state == XFS_EXT_NORM) ? 0 : 1;
204
205 ASSERT(state == XFS_EXT_NORM || state == XFS_EXT_UNWRITTEN);
206 ASSERT((startoff & xfs_mask64hi(64-BMBT_STARTOFF_BITLEN)) == 0);
207 ASSERT((blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN)) == 0);
208
209 ASSERT((startblock & xfs_mask64hi(64-BMBT_STARTBLOCK_BITLEN)) == 0);
210
211 r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) |
212 ((xfs_bmbt_rec_base_t)startoff << 9) |
213 ((xfs_bmbt_rec_base_t)startblock >> 43);
214 r->l1 = ((xfs_bmbt_rec_base_t)startblock << 21) |
215 ((xfs_bmbt_rec_base_t)blockcount &
216 (xfs_bmbt_rec_base_t)xfs_mask64lo(21));
217}
218
219/* 112/*
220 * Set all the fields in a bmap extent record from the uncompressed form. 113 * Set all the fields in a bmap extent record from the uncompressed form.
221 */ 114 */
222void 115void
223xfs_bmbt_set_all(
224 xfs_bmbt_rec_host_t *r,
225 xfs_bmbt_irec_t *s)
226{
227 xfs_bmbt_set_allf(r, s->br_startoff, s->br_startblock,
228 s->br_blockcount, s->br_state);
229}
230
231
232/*
233 * Set all the fields in a disk format bmap extent record from the arguments.
234 */
235void
236xfs_bmbt_disk_set_allf(
237 xfs_bmbt_rec_t *r,
238 xfs_fileoff_t startoff,
239 xfs_fsblock_t startblock,
240 xfs_filblks_t blockcount,
241 xfs_exntst_t state)
242{
243 int extent_flag = (state == XFS_EXT_NORM) ? 0 : 1;
244
245 ASSERT(state == XFS_EXT_NORM || state == XFS_EXT_UNWRITTEN);
246 ASSERT((startoff & xfs_mask64hi(64-BMBT_STARTOFF_BITLEN)) == 0);
247 ASSERT((blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN)) == 0);
248 ASSERT((startblock & xfs_mask64hi(64-BMBT_STARTBLOCK_BITLEN)) == 0);
249
250 r->l0 = cpu_to_be64(
251 ((xfs_bmbt_rec_base_t)extent_flag << 63) |
252 ((xfs_bmbt_rec_base_t)startoff << 9) |
253 ((xfs_bmbt_rec_base_t)startblock >> 43));
254 r->l1 = cpu_to_be64(
255 ((xfs_bmbt_rec_base_t)startblock << 21) |
256 ((xfs_bmbt_rec_base_t)blockcount &
257 (xfs_bmbt_rec_base_t)xfs_mask64lo(21)));
258}
259
260/*
261 * Set all the fields in a bmap extent record from the uncompressed form.
262 */
263STATIC void
264xfs_bmbt_disk_set_all( 116xfs_bmbt_disk_set_all(
265 xfs_bmbt_rec_t *r, 117 struct xfs_bmbt_rec *r,
266 xfs_bmbt_irec_t *s) 118 struct xfs_bmbt_irec *s)
267{
268 xfs_bmbt_disk_set_allf(r, s->br_startoff, s->br_startblock,
269 s->br_blockcount, s->br_state);
270}
271
272/*
273 * Set the blockcount field in a bmap extent record.
274 */
275void
276xfs_bmbt_set_blockcount(
277 xfs_bmbt_rec_host_t *r,
278 xfs_filblks_t v)
279{ 119{
280 ASSERT((v & xfs_mask64hi(43)) == 0); 120 int extent_flag = (s->br_state != XFS_EXT_NORM);
281 r->l1 = (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64hi(43)) |
282 (xfs_bmbt_rec_base_t)(v & xfs_mask64lo(21));
283}
284
285/*
286 * Set the startblock field in a bmap extent record.
287 */
288void
289xfs_bmbt_set_startblock(
290 xfs_bmbt_rec_host_t *r,
291 xfs_fsblock_t v)
292{
293 ASSERT((v & xfs_mask64hi(12)) == 0);
294 r->l0 = (r->l0 & (xfs_bmbt_rec_base_t)xfs_mask64hi(55)) |
295 (xfs_bmbt_rec_base_t)(v >> 43);
296 r->l1 = (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64lo(21)) |
297 (xfs_bmbt_rec_base_t)(v << 21);
298}
299 121
300/* 122 ASSERT(s->br_state == XFS_EXT_NORM || s->br_state == XFS_EXT_UNWRITTEN);
301 * Set the startoff field in a bmap extent record. 123 ASSERT(!(s->br_startoff & xfs_mask64hi(64-BMBT_STARTOFF_BITLEN)));
302 */ 124 ASSERT(!(s->br_blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN)));
303void 125 ASSERT(!(s->br_startblock & xfs_mask64hi(64-BMBT_STARTBLOCK_BITLEN)));
304xfs_bmbt_set_startoff(
305 xfs_bmbt_rec_host_t *r,
306 xfs_fileoff_t v)
307{
308 ASSERT((v & xfs_mask64hi(9)) == 0);
309 r->l0 = (r->l0 & (xfs_bmbt_rec_base_t) xfs_mask64hi(1)) |
310 ((xfs_bmbt_rec_base_t)v << 9) |
311 (r->l0 & (xfs_bmbt_rec_base_t)xfs_mask64lo(9));
312}
313 126
314/* 127 put_unaligned_be64(
315 * Set the extent state field in a bmap extent record. 128 ((xfs_bmbt_rec_base_t)extent_flag << 63) |
316 */ 129 ((xfs_bmbt_rec_base_t)s->br_startoff << 9) |
317void 130 ((xfs_bmbt_rec_base_t)s->br_startblock >> 43), &r->l0);
318xfs_bmbt_set_state( 131 put_unaligned_be64(
319 xfs_bmbt_rec_host_t *r, 132 ((xfs_bmbt_rec_base_t)s->br_startblock << 21) |
320 xfs_exntst_t v) 133 ((xfs_bmbt_rec_base_t)s->br_blockcount &
321{ 134 (xfs_bmbt_rec_base_t)xfs_mask64lo(21)), &r->l1);
322 ASSERT(v == XFS_EXT_NORM || v == XFS_EXT_UNWRITTEN);
323 if (v == XFS_EXT_NORM)
324 r->l0 &= xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN);
325 else
326 r->l0 |= xfs_mask64hi(BMBT_EXNTFLAG_BITLEN);
327} 135}
328 136
329/* 137/*
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.h b/fs/xfs/libxfs/xfs_bmap_btree.h
index 9da5a8d4f184..135b8c56d23e 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.h
+++ b/fs/xfs/libxfs/xfs_bmap_btree.h
@@ -98,25 +98,11 @@ struct xfs_trans;
98 */ 98 */
99extern void xfs_bmdr_to_bmbt(struct xfs_inode *, xfs_bmdr_block_t *, int, 99extern void xfs_bmdr_to_bmbt(struct xfs_inode *, xfs_bmdr_block_t *, int,
100 struct xfs_btree_block *, int); 100 struct xfs_btree_block *, int);
101extern void xfs_bmbt_get_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s);
102extern xfs_filblks_t xfs_bmbt_get_blockcount(xfs_bmbt_rec_host_t *r);
103extern xfs_fsblock_t xfs_bmbt_get_startblock(xfs_bmbt_rec_host_t *r);
104extern xfs_fileoff_t xfs_bmbt_get_startoff(xfs_bmbt_rec_host_t *r);
105extern xfs_exntst_t xfs_bmbt_get_state(xfs_bmbt_rec_host_t *r);
106 101
102void xfs_bmbt_disk_set_all(struct xfs_bmbt_rec *r, struct xfs_bmbt_irec *s);
107extern xfs_filblks_t xfs_bmbt_disk_get_blockcount(xfs_bmbt_rec_t *r); 103extern xfs_filblks_t xfs_bmbt_disk_get_blockcount(xfs_bmbt_rec_t *r);
108extern xfs_fileoff_t xfs_bmbt_disk_get_startoff(xfs_bmbt_rec_t *r); 104extern xfs_fileoff_t xfs_bmbt_disk_get_startoff(xfs_bmbt_rec_t *r);
109 105extern void xfs_bmbt_disk_get_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s);
110extern void xfs_bmbt_set_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s);
111extern void xfs_bmbt_set_allf(xfs_bmbt_rec_host_t *r, xfs_fileoff_t o,
112 xfs_fsblock_t b, xfs_filblks_t c, xfs_exntst_t v);
113extern void xfs_bmbt_set_blockcount(xfs_bmbt_rec_host_t *r, xfs_filblks_t v);
114extern void xfs_bmbt_set_startblock(xfs_bmbt_rec_host_t *r, xfs_fsblock_t v);
115extern void xfs_bmbt_set_startoff(xfs_bmbt_rec_host_t *r, xfs_fileoff_t v);
116extern void xfs_bmbt_set_state(xfs_bmbt_rec_host_t *r, xfs_exntst_t v);
117
118extern void xfs_bmbt_disk_set_allf(xfs_bmbt_rec_t *r, xfs_fileoff_t o,
119 xfs_fsblock_t b, xfs_filblks_t c, xfs_exntst_t v);
120 106
121extern void xfs_bmbt_to_bmdr(struct xfs_mount *, struct xfs_btree_block *, int, 107extern void xfs_bmbt_to_bmdr(struct xfs_mount *, struct xfs_btree_block *, int,
122 xfs_bmdr_block_t *, int); 108 xfs_bmdr_block_t *, int);
@@ -136,9 +122,9 @@ extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *,
136 * Check that the extent does not contain an invalid unwritten extent flag. 122 * Check that the extent does not contain an invalid unwritten extent flag.
137 */ 123 */
138static inline bool xfs_bmbt_validate_extent(struct xfs_mount *mp, int whichfork, 124static inline bool xfs_bmbt_validate_extent(struct xfs_mount *mp, int whichfork,
139 struct xfs_bmbt_rec_host *ep) 125 struct xfs_bmbt_irec *irec)
140{ 126{
141 if (ep->l0 >> (64 - BMBT_EXNTFLAG_BITLEN) == 0) 127 if (irec->br_state == XFS_EXT_NORM)
142 return true; 128 return true;
143 if (whichfork == XFS_DATA_FORK && 129 if (whichfork == XFS_DATA_FORK &&
144 xfs_sb_version_hasextflgbit(&mp->m_sb)) 130 xfs_sb_version_hasextflgbit(&mp->m_sb))
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 5bfb88261c7e..5f33adf8eecb 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -29,6 +29,7 @@
29#include "xfs_inode_item.h" 29#include "xfs_inode_item.h"
30#include "xfs_buf_item.h" 30#include "xfs_buf_item.h"
31#include "xfs_btree.h" 31#include "xfs_btree.h"
32#include "xfs_errortag.h"
32#include "xfs_error.h" 33#include "xfs_error.h"
33#include "xfs_trace.h" 34#include "xfs_trace.h"
34#include "xfs_cksum.h" 35#include "xfs_cksum.h"
@@ -63,44 +64,63 @@ xfs_btree_magic(
63 return magic; 64 return magic;
64} 65}
65 66
66STATIC int /* error (0 or EFSCORRUPTED) */ 67/*
67xfs_btree_check_lblock( 68 * Check a long btree block header. Return the address of the failing check,
68 struct xfs_btree_cur *cur, /* btree cursor */ 69 * or NULL if everything is ok.
69 struct xfs_btree_block *block, /* btree long form block pointer */ 70 */
70 int level, /* level of the btree block */ 71xfs_failaddr_t
71 struct xfs_buf *bp) /* buffer for block, if any */ 72__xfs_btree_check_lblock(
73 struct xfs_btree_cur *cur,
74 struct xfs_btree_block *block,
75 int level,
76 struct xfs_buf *bp)
72{ 77{
73 int lblock_ok = 1; /* block passes checks */ 78 struct xfs_mount *mp = cur->bc_mp;
74 struct xfs_mount *mp; /* file system mount point */
75 xfs_btnum_t btnum = cur->bc_btnum; 79 xfs_btnum_t btnum = cur->bc_btnum;
76 int crc; 80 int crc = xfs_sb_version_hascrc(&mp->m_sb);
77
78 mp = cur->bc_mp;
79 crc = xfs_sb_version_hascrc(&mp->m_sb);
80 81
81 if (crc) { 82 if (crc) {
82 lblock_ok = lblock_ok && 83 if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid))
83 uuid_equal(&block->bb_u.l.bb_uuid, 84 return __this_address;
84 &mp->m_sb.sb_meta_uuid) && 85 if (block->bb_u.l.bb_blkno !=
85 block->bb_u.l.bb_blkno == cpu_to_be64( 86 cpu_to_be64(bp ? bp->b_bn : XFS_BUF_DADDR_NULL))
86 bp ? bp->b_bn : XFS_BUF_DADDR_NULL); 87 return __this_address;
88 if (block->bb_u.l.bb_pad != cpu_to_be32(0))
89 return __this_address;
87 } 90 }
88 91
89 lblock_ok = lblock_ok && 92 if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(crc, btnum))
90 be32_to_cpu(block->bb_magic) == xfs_btree_magic(crc, btnum) && 93 return __this_address;
91 be16_to_cpu(block->bb_level) == level && 94 if (be16_to_cpu(block->bb_level) != level)
92 be16_to_cpu(block->bb_numrecs) <= 95 return __this_address;
93 cur->bc_ops->get_maxrecs(cur, level) && 96 if (be16_to_cpu(block->bb_numrecs) >
94 block->bb_u.l.bb_leftsib && 97 cur->bc_ops->get_maxrecs(cur, level))
95 (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK) || 98 return __this_address;
96 XFS_FSB_SANITY_CHECK(mp, 99 if (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) &&
97 be64_to_cpu(block->bb_u.l.bb_leftsib))) && 100 !xfs_btree_check_lptr(cur, be64_to_cpu(block->bb_u.l.bb_leftsib),
98 block->bb_u.l.bb_rightsib && 101 level + 1))
99 (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK) || 102 return __this_address;
100 XFS_FSB_SANITY_CHECK(mp, 103 if (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) &&
101 be64_to_cpu(block->bb_u.l.bb_rightsib))); 104 !xfs_btree_check_lptr(cur, be64_to_cpu(block->bb_u.l.bb_rightsib),
102 105 level + 1))
103 if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp, 106 return __this_address;
107
108 return NULL;
109}
110
111/* Check a long btree block header. */
112static int
113xfs_btree_check_lblock(
114 struct xfs_btree_cur *cur,
115 struct xfs_btree_block *block,
116 int level,
117 struct xfs_buf *bp)
118{
119 struct xfs_mount *mp = cur->bc_mp;
120 xfs_failaddr_t fa;
121
122 fa = __xfs_btree_check_lblock(cur, block, level, bp);
123 if (unlikely(XFS_TEST_ERROR(fa != NULL, mp,
104 XFS_ERRTAG_BTREE_CHECK_LBLOCK))) { 124 XFS_ERRTAG_BTREE_CHECK_LBLOCK))) {
105 if (bp) 125 if (bp)
106 trace_xfs_btree_corrupt(bp, _RET_IP_); 126 trace_xfs_btree_corrupt(bp, _RET_IP_);
@@ -110,48 +130,61 @@ xfs_btree_check_lblock(
110 return 0; 130 return 0;
111} 131}
112 132
113STATIC int /* error (0 or EFSCORRUPTED) */ 133/*
114xfs_btree_check_sblock( 134 * Check a short btree block header. Return the address of the failing check,
115 struct xfs_btree_cur *cur, /* btree cursor */ 135 * or NULL if everything is ok.
116 struct xfs_btree_block *block, /* btree short form block pointer */ 136 */
117 int level, /* level of the btree block */ 137xfs_failaddr_t
118 struct xfs_buf *bp) /* buffer containing block */ 138__xfs_btree_check_sblock(
139 struct xfs_btree_cur *cur,
140 struct xfs_btree_block *block,
141 int level,
142 struct xfs_buf *bp)
119{ 143{
120 struct xfs_mount *mp; /* file system mount point */ 144 struct xfs_mount *mp = cur->bc_mp;
121 struct xfs_buf *agbp; /* buffer for ag. freespace struct */
122 struct xfs_agf *agf; /* ag. freespace structure */
123 xfs_agblock_t agflen; /* native ag. freespace length */
124 int sblock_ok = 1; /* block passes checks */
125 xfs_btnum_t btnum = cur->bc_btnum; 145 xfs_btnum_t btnum = cur->bc_btnum;
126 int crc; 146 int crc = xfs_sb_version_hascrc(&mp->m_sb);
127
128 mp = cur->bc_mp;
129 crc = xfs_sb_version_hascrc(&mp->m_sb);
130 agbp = cur->bc_private.a.agbp;
131 agf = XFS_BUF_TO_AGF(agbp);
132 agflen = be32_to_cpu(agf->agf_length);
133 147
134 if (crc) { 148 if (crc) {
135 sblock_ok = sblock_ok && 149 if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
136 uuid_equal(&block->bb_u.s.bb_uuid, 150 return __this_address;
137 &mp->m_sb.sb_meta_uuid) && 151 if (block->bb_u.s.bb_blkno !=
138 block->bb_u.s.bb_blkno == cpu_to_be64( 152 cpu_to_be64(bp ? bp->b_bn : XFS_BUF_DADDR_NULL))
139 bp ? bp->b_bn : XFS_BUF_DADDR_NULL); 153 return __this_address;
140 } 154 }
141 155
142 sblock_ok = sblock_ok && 156 if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(crc, btnum))
143 be32_to_cpu(block->bb_magic) == xfs_btree_magic(crc, btnum) && 157 return __this_address;
144 be16_to_cpu(block->bb_level) == level && 158 if (be16_to_cpu(block->bb_level) != level)
145 be16_to_cpu(block->bb_numrecs) <= 159 return __this_address;
146 cur->bc_ops->get_maxrecs(cur, level) && 160 if (be16_to_cpu(block->bb_numrecs) >
147 (block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) || 161 cur->bc_ops->get_maxrecs(cur, level))
148 be32_to_cpu(block->bb_u.s.bb_leftsib) < agflen) && 162 return __this_address;
149 block->bb_u.s.bb_leftsib && 163 if (block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK) &&
150 (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) || 164 !xfs_btree_check_sptr(cur, be32_to_cpu(block->bb_u.s.bb_leftsib),
151 be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) && 165 level + 1))
152 block->bb_u.s.bb_rightsib; 166 return __this_address;
153 167 if (block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK) &&
154 if (unlikely(XFS_TEST_ERROR(!sblock_ok, mp, 168 !xfs_btree_check_sptr(cur, be32_to_cpu(block->bb_u.s.bb_rightsib),
169 level + 1))
170 return __this_address;
171
172 return NULL;
173}
174
175/* Check a short btree block header. */
176STATIC int
177xfs_btree_check_sblock(
178 struct xfs_btree_cur *cur,
179 struct xfs_btree_block *block,
180 int level,
181 struct xfs_buf *bp)
182{
183 struct xfs_mount *mp = cur->bc_mp;
184 xfs_failaddr_t fa;
185
186 fa = __xfs_btree_check_sblock(cur, block, level, bp);
187 if (unlikely(XFS_TEST_ERROR(fa != NULL, mp,
155 XFS_ERRTAG_BTREE_CHECK_SBLOCK))) { 188 XFS_ERRTAG_BTREE_CHECK_SBLOCK))) {
156 if (bp) 189 if (bp)
157 trace_xfs_btree_corrupt(bp, _RET_IP_); 190 trace_xfs_btree_corrupt(bp, _RET_IP_);
@@ -177,59 +210,53 @@ xfs_btree_check_block(
177 return xfs_btree_check_sblock(cur, block, level, bp); 210 return xfs_btree_check_sblock(cur, block, level, bp);
178} 211}
179 212
180/* 213/* Check that this long pointer is valid and points within the fs. */
181 * Check that (long) pointer is ok. 214bool
182 */
183int /* error (0 or EFSCORRUPTED) */
184xfs_btree_check_lptr( 215xfs_btree_check_lptr(
185 struct xfs_btree_cur *cur, /* btree cursor */ 216 struct xfs_btree_cur *cur,
186 xfs_fsblock_t bno, /* btree block disk address */ 217 xfs_fsblock_t fsbno,
187 int level) /* btree block level */ 218 int level)
188{ 219{
189 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, 220 if (level <= 0)
190 level > 0 && 221 return false;
191 bno != NULLFSBLOCK && 222 return xfs_verify_fsbno(cur->bc_mp, fsbno);
192 XFS_FSB_SANITY_CHECK(cur->bc_mp, bno));
193 return 0;
194} 223}
195 224
196#ifdef DEBUG 225/* Check that this short pointer is valid and points within the AG. */
197/* 226bool
198 * Check that (short) pointer is ok.
199 */
200STATIC int /* error (0 or EFSCORRUPTED) */
201xfs_btree_check_sptr( 227xfs_btree_check_sptr(
202 struct xfs_btree_cur *cur, /* btree cursor */ 228 struct xfs_btree_cur *cur,
203 xfs_agblock_t bno, /* btree block disk address */ 229 xfs_agblock_t agbno,
204 int level) /* btree block level */ 230 int level)
205{ 231{
206 xfs_agblock_t agblocks = cur->bc_mp->m_sb.sb_agblocks; 232 if (level <= 0)
207 233 return false;
208 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, 234 return xfs_verify_agbno(cur->bc_mp, cur->bc_private.a.agno, agbno);
209 level > 0 &&
210 bno != NULLAGBLOCK &&
211 bno != 0 &&
212 bno < agblocks);
213 return 0;
214} 235}
215 236
237#ifdef DEBUG
216/* 238/*
217 * Check that block ptr is ok. 239 * Check that a given (indexed) btree pointer at a certain level of a
240 * btree is valid and doesn't point past where it should.
218 */ 241 */
219STATIC int /* error (0 or EFSCORRUPTED) */ 242static int
220xfs_btree_check_ptr( 243xfs_btree_check_ptr(
221 struct xfs_btree_cur *cur, /* btree cursor */ 244 struct xfs_btree_cur *cur,
222 union xfs_btree_ptr *ptr, /* btree block disk address */ 245 union xfs_btree_ptr *ptr,
223 int index, /* offset from ptr to check */ 246 int index,
224 int level) /* btree block level */ 247 int level)
225{ 248{
226 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 249 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
227 return xfs_btree_check_lptr(cur, 250 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp,
228 be64_to_cpu((&ptr->l)[index]), level); 251 xfs_btree_check_lptr(cur,
252 be64_to_cpu((&ptr->l)[index]), level));
229 } else { 253 } else {
230 return xfs_btree_check_sptr(cur, 254 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp,
231 be32_to_cpu((&ptr->s)[index]), level); 255 xfs_btree_check_sptr(cur,
256 be32_to_cpu((&ptr->s)[index]), level));
232 } 257 }
258
259 return 0;
233} 260}
234#endif 261#endif
235 262
@@ -1027,7 +1054,7 @@ xfs_btree_setbuf(
1027 } 1054 }
1028} 1055}
1029 1056
1030STATIC int 1057bool
1031xfs_btree_ptr_is_null( 1058xfs_btree_ptr_is_null(
1032 struct xfs_btree_cur *cur, 1059 struct xfs_btree_cur *cur,
1033 union xfs_btree_ptr *ptr) 1060 union xfs_btree_ptr *ptr)
@@ -1052,7 +1079,7 @@ xfs_btree_set_ptr_null(
1052/* 1079/*
1053 * Get/set/init sibling pointers 1080 * Get/set/init sibling pointers
1054 */ 1081 */
1055STATIC void 1082void
1056xfs_btree_get_sibling( 1083xfs_btree_get_sibling(
1057 struct xfs_btree_cur *cur, 1084 struct xfs_btree_cur *cur,
1058 struct xfs_btree_block *block, 1085 struct xfs_btree_block *block,
@@ -2001,7 +2028,7 @@ error0:
2001} 2028}
2002 2029
2003/* Find the high key storage area from a regular key. */ 2030/* Find the high key storage area from a regular key. */
2004STATIC union xfs_btree_key * 2031union xfs_btree_key *
2005xfs_btree_high_key_from_key( 2032xfs_btree_high_key_from_key(
2006 struct xfs_btree_cur *cur, 2033 struct xfs_btree_cur *cur,
2007 union xfs_btree_key *key) 2034 union xfs_btree_key *key)
@@ -2075,7 +2102,7 @@ xfs_btree_get_node_keys(
2075} 2102}
2076 2103
2077/* Derive the keys for any btree block. */ 2104/* Derive the keys for any btree block. */
2078STATIC void 2105void
2079xfs_btree_get_keys( 2106xfs_btree_get_keys(
2080 struct xfs_btree_cur *cur, 2107 struct xfs_btree_cur *cur,
2081 struct xfs_btree_block *block, 2108 struct xfs_btree_block *block,
@@ -4914,3 +4941,15 @@ xfs_btree_count_blocks(
4914 return xfs_btree_visit_blocks(cur, xfs_btree_count_blocks_helper, 4941 return xfs_btree_visit_blocks(cur, xfs_btree_count_blocks_helper,
4915 blocks); 4942 blocks);
4916} 4943}
4944
4945/* Compare two btree pointers. */
4946int64_t
4947xfs_btree_diff_two_ptrs(
4948 struct xfs_btree_cur *cur,
4949 const union xfs_btree_ptr *a,
4950 const union xfs_btree_ptr *b)
4951{
4952 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
4953 return (int64_t)be64_to_cpu(a->l) - be64_to_cpu(b->l);
4954 return (int64_t)be32_to_cpu(a->s) - be32_to_cpu(b->s);
4955}
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index f2a88c3b1159..b57501c6f71d 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -255,6 +255,14 @@ typedef struct xfs_btree_cur
255 */ 255 */
256#define XFS_BUF_TO_BLOCK(bp) ((struct xfs_btree_block *)((bp)->b_addr)) 256#define XFS_BUF_TO_BLOCK(bp) ((struct xfs_btree_block *)((bp)->b_addr))
257 257
258/*
259 * Internal long and short btree block checks. They return NULL if the
260 * block is ok or the address of the failed check otherwise.
261 */
262xfs_failaddr_t __xfs_btree_check_lblock(struct xfs_btree_cur *cur,
263 struct xfs_btree_block *block, int level, struct xfs_buf *bp);
264xfs_failaddr_t __xfs_btree_check_sblock(struct xfs_btree_cur *cur,
265 struct xfs_btree_block *block, int level, struct xfs_buf *bp);
258 266
259/* 267/*
260 * Check that block header is ok. 268 * Check that block header is ok.
@@ -269,10 +277,19 @@ xfs_btree_check_block(
269/* 277/*
270 * Check that (long) pointer is ok. 278 * Check that (long) pointer is ok.
271 */ 279 */
272int /* error (0 or EFSCORRUPTED) */ 280bool /* error (0 or EFSCORRUPTED) */
273xfs_btree_check_lptr( 281xfs_btree_check_lptr(
274 struct xfs_btree_cur *cur, /* btree cursor */ 282 struct xfs_btree_cur *cur, /* btree cursor */
275 xfs_fsblock_t ptr, /* btree block disk address */ 283 xfs_fsblock_t fsbno, /* btree block disk address */
284 int level); /* btree block level */
285
286/*
287 * Check that (short) pointer is ok.
288 */
289bool /* error (0 or EFSCORRUPTED) */
290xfs_btree_check_sptr(
291 struct xfs_btree_cur *cur, /* btree cursor */
292 xfs_agblock_t agbno, /* btree block disk address */
276 int level); /* btree block level */ 293 int level); /* btree block level */
277 294
278/* 295/*
@@ -517,5 +534,16 @@ int xfs_btree_lookup_get_block(struct xfs_btree_cur *cur, int level,
517 union xfs_btree_ptr *pp, struct xfs_btree_block **blkp); 534 union xfs_btree_ptr *pp, struct xfs_btree_block **blkp);
518struct xfs_btree_block *xfs_btree_get_block(struct xfs_btree_cur *cur, 535struct xfs_btree_block *xfs_btree_get_block(struct xfs_btree_cur *cur,
519 int level, struct xfs_buf **bpp); 536 int level, struct xfs_buf **bpp);
537bool xfs_btree_ptr_is_null(struct xfs_btree_cur *cur, union xfs_btree_ptr *ptr);
538int64_t xfs_btree_diff_two_ptrs(struct xfs_btree_cur *cur,
539 const union xfs_btree_ptr *a,
540 const union xfs_btree_ptr *b);
541void xfs_btree_get_sibling(struct xfs_btree_cur *cur,
542 struct xfs_btree_block *block,
543 union xfs_btree_ptr *ptr, int lr);
544void xfs_btree_get_keys(struct xfs_btree_cur *cur,
545 struct xfs_btree_block *block, union xfs_btree_key *key);
546union xfs_btree_key *xfs_btree_high_key_from_key(struct xfs_btree_cur *cur,
547 union xfs_btree_key *key);
520 548
521#endif /* __XFS_BTREE_H__ */ 549#endif /* __XFS_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index 6d4335815c3f..651611530d2f 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -1466,6 +1466,7 @@ xfs_da3_node_lookup_int(
1466 int max; 1466 int max;
1467 int error; 1467 int error;
1468 int retval; 1468 int retval;
1469 unsigned int expected_level = 0;
1469 struct xfs_inode *dp = state->args->dp; 1470 struct xfs_inode *dp = state->args->dp;
1470 1471
1471 args = state->args; 1472 args = state->args;
@@ -1474,7 +1475,7 @@ xfs_da3_node_lookup_int(
1474 * Descend thru the B-tree searching each level for the right 1475 * Descend thru the B-tree searching each level for the right
1475 * node to use, until the right hashval is found. 1476 * node to use, until the right hashval is found.
1476 */ 1477 */
1477 blkno = (args->whichfork == XFS_DATA_FORK)? args->geo->leafblk : 0; 1478 blkno = args->geo->leafblk;
1478 for (blk = &state->path.blk[0], state->path.active = 1; 1479 for (blk = &state->path.blk[0], state->path.active = 1;
1479 state->path.active <= XFS_DA_NODE_MAXDEPTH; 1480 state->path.active <= XFS_DA_NODE_MAXDEPTH;
1480 blk++, state->path.active++) { 1481 blk++, state->path.active++) {
@@ -1517,6 +1518,18 @@ xfs_da3_node_lookup_int(
1517 dp->d_ops->node_hdr_from_disk(&nodehdr, node); 1518 dp->d_ops->node_hdr_from_disk(&nodehdr, node);
1518 btree = dp->d_ops->node_tree_p(node); 1519 btree = dp->d_ops->node_tree_p(node);
1519 1520
1521 /* Tree taller than we can handle; bail out! */
1522 if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH)
1523 return -EFSCORRUPTED;
1524
1525 /* Check the level from the root. */
1526 if (blkno == args->geo->leafblk)
1527 expected_level = nodehdr.level - 1;
1528 else if (expected_level != nodehdr.level)
1529 return -EFSCORRUPTED;
1530 else
1531 expected_level--;
1532
1520 max = nodehdr.count; 1533 max = nodehdr.count;
1521 blk->hashval = be32_to_cpu(btree[max - 1].hashval); 1534 blk->hashval = be32_to_cpu(btree[max - 1].hashval);
1522 1535
@@ -1562,8 +1575,15 @@ xfs_da3_node_lookup_int(
1562 blk->index = probe; 1575 blk->index = probe;
1563 blkno = be32_to_cpu(btree[probe].before); 1576 blkno = be32_to_cpu(btree[probe].before);
1564 } 1577 }
1578
1579 /* We can't point back to the root. */
1580 if (blkno == args->geo->leafblk)
1581 return -EFSCORRUPTED;
1565 } 1582 }
1566 1583
1584 if (expected_level != 0)
1585 return -EFSCORRUPTED;
1586
1567 /* 1587 /*
1568 * A leaf block that ends in the hashval that we are interested in 1588 * A leaf block that ends in the hashval that we are interested in
1569 * (final hashval == search hashval) means that the next block may 1589 * (final hashval == search hashval) means that the next block may
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
index ccf9783fd3f0..e10778c102ea 100644
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -30,6 +30,8 @@
30#include "xfs_bmap.h" 30#include "xfs_bmap.h"
31#include "xfs_dir2.h" 31#include "xfs_dir2.h"
32#include "xfs_dir2_priv.h" 32#include "xfs_dir2_priv.h"
33#include "xfs_ialloc.h"
34#include "xfs_errortag.h"
33#include "xfs_error.h" 35#include "xfs_error.h"
34#include "xfs_trace.h" 36#include "xfs_trace.h"
35 37
@@ -38,7 +40,9 @@ struct xfs_name xfs_name_dotdot = { (unsigned char *)"..", 2, XFS_DIR3_FT_DIR };
38/* 40/*
39 * Convert inode mode to directory entry filetype 41 * Convert inode mode to directory entry filetype
40 */ 42 */
41unsigned char xfs_mode_to_ftype(int mode) 43unsigned char
44xfs_mode_to_ftype(
45 int mode)
42{ 46{
43 switch (mode & S_IFMT) { 47 switch (mode & S_IFMT) {
44 case S_IFREG: 48 case S_IFREG:
@@ -202,22 +206,8 @@ xfs_dir_ino_validate(
202 xfs_mount_t *mp, 206 xfs_mount_t *mp,
203 xfs_ino_t ino) 207 xfs_ino_t ino)
204{ 208{
205 xfs_agblock_t agblkno; 209 bool ino_ok = xfs_verify_dir_ino(mp, ino);
206 xfs_agino_t agino; 210
207 xfs_agnumber_t agno;
208 int ino_ok;
209 int ioff;
210
211 agno = XFS_INO_TO_AGNO(mp, ino);
212 agblkno = XFS_INO_TO_AGBNO(mp, ino);
213 ioff = XFS_INO_TO_OFFSET(mp, ino);
214 agino = XFS_OFFBNO_TO_AGINO(mp, agblkno, ioff);
215 ino_ok =
216 agno < mp->m_sb.sb_agcount &&
217 agblkno < mp->m_sb.sb_agblocks &&
218 agblkno != 0 &&
219 ioff < (1 << mp->m_sb.sb_inopblog) &&
220 XFS_AGINO_TO_INO(mp, agno, agino) == ino;
221 if (unlikely(XFS_TEST_ERROR(!ino_ok, mp, XFS_ERRTAG_DIR_INO_VALIDATE))) { 211 if (unlikely(XFS_TEST_ERROR(!ino_ok, mp, XFS_ERRTAG_DIR_INO_VALIDATE))) {
222 xfs_warn(mp, "Invalid inode number 0x%Lx", 212 xfs_warn(mp, "Invalid inode number 0x%Lx",
223 (unsigned long long) ino); 213 (unsigned long long) ino);
diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h
index 21c8f8bf94d5..1a8f2cf977ca 100644
--- a/fs/xfs/libxfs/xfs_dir2.h
+++ b/fs/xfs/libxfs/xfs_dir2.h
@@ -324,4 +324,21 @@ xfs_dir2_leaf_tail_p(struct xfs_da_geometry *geo, struct xfs_dir2_leaf *lp)
324 sizeof(struct xfs_dir2_leaf_tail)); 324 sizeof(struct xfs_dir2_leaf_tail));
325} 325}
326 326
327/*
328 * The Linux API doesn't pass down the total size of the buffer
329 * we read into down to the filesystem. With the filldir concept
330 * it's not needed for correct information, but the XFS dir2 leaf
331 * code wants an estimate of the buffer size to calculate it's
332 * readahead window and size the buffers used for mapping to
333 * physical blocks.
334 *
335 * Try to give it an estimate that's good enough, maybe at some
336 * point we can change the ->readdir prototype to include the
337 * buffer size. For now we use the current glibc buffer size.
338 * musl libc hardcodes 2k and dietlibc uses PAGE_SIZE.
339 */
340#define XFS_READDIR_BUFSIZE (32768)
341
342unsigned char xfs_dir3_get_dtype(struct xfs_mount *mp, uint8_t filetype);
343
327#endif /* __XFS_DIR2_H__ */ 344#endif /* __XFS_DIR2_H__ */
diff --git a/fs/xfs/libxfs/xfs_errortag.h b/fs/xfs/libxfs/xfs_errortag.h
new file mode 100644
index 000000000000..bc1789d95152
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_errortag.h
@@ -0,0 +1,106 @@
1/*
2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3 * Copyright (C) 2017 Oracle.
4 * All Rights Reserved.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#ifndef __XFS_ERRORTAG_H_
21#define __XFS_ERRORTAG_H_
22
23/*
24 * error injection tags - the labels can be anything you want
25 * but each tag should have its own unique number
26 */
27
28#define XFS_ERRTAG_NOERROR 0
29#define XFS_ERRTAG_IFLUSH_1 1
30#define XFS_ERRTAG_IFLUSH_2 2
31#define XFS_ERRTAG_IFLUSH_3 3
32#define XFS_ERRTAG_IFLUSH_4 4
33#define XFS_ERRTAG_IFLUSH_5 5
34#define XFS_ERRTAG_IFLUSH_6 6
35#define XFS_ERRTAG_DA_READ_BUF 7
36#define XFS_ERRTAG_BTREE_CHECK_LBLOCK 8
37#define XFS_ERRTAG_BTREE_CHECK_SBLOCK 9
38#define XFS_ERRTAG_ALLOC_READ_AGF 10
39#define XFS_ERRTAG_IALLOC_READ_AGI 11
40#define XFS_ERRTAG_ITOBP_INOTOBP 12
41#define XFS_ERRTAG_IUNLINK 13
42#define XFS_ERRTAG_IUNLINK_REMOVE 14
43#define XFS_ERRTAG_DIR_INO_VALIDATE 15
44#define XFS_ERRTAG_BULKSTAT_READ_CHUNK 16
45#define XFS_ERRTAG_IODONE_IOERR 17
46#define XFS_ERRTAG_STRATREAD_IOERR 18
47#define XFS_ERRTAG_STRATCMPL_IOERR 19
48#define XFS_ERRTAG_DIOWRITE_IOERR 20
49#define XFS_ERRTAG_BMAPIFORMAT 21
50#define XFS_ERRTAG_FREE_EXTENT 22
51#define XFS_ERRTAG_RMAP_FINISH_ONE 23
52#define XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE 24
53#define XFS_ERRTAG_REFCOUNT_FINISH_ONE 25
54#define XFS_ERRTAG_BMAP_FINISH_ONE 26
55#define XFS_ERRTAG_AG_RESV_CRITICAL 27
56/*
57 * DEBUG mode instrumentation to test and/or trigger delayed allocation
58 * block killing in the event of failed writes. When enabled, all
59 * buffered writes are silenty dropped and handled as if they failed.
60 * All delalloc blocks in the range of the write (including pre-existing
61 * delalloc blocks!) are tossed as part of the write failure error
62 * handling sequence.
63 */
64#define XFS_ERRTAG_DROP_WRITES 28
65#define XFS_ERRTAG_LOG_BAD_CRC 29
66#define XFS_ERRTAG_LOG_ITEM_PIN 30
67#define XFS_ERRTAG_BUF_LRU_REF 31
68#define XFS_ERRTAG_MAX 32
69
70/*
71 * Random factors for above tags, 1 means always, 2 means 1/2 time, etc.
72 */
73#define XFS_RANDOM_DEFAULT 100
74#define XFS_RANDOM_IFLUSH_1 XFS_RANDOM_DEFAULT
75#define XFS_RANDOM_IFLUSH_2 XFS_RANDOM_DEFAULT
76#define XFS_RANDOM_IFLUSH_3 XFS_RANDOM_DEFAULT
77#define XFS_RANDOM_IFLUSH_4 XFS_RANDOM_DEFAULT
78#define XFS_RANDOM_IFLUSH_5 XFS_RANDOM_DEFAULT
79#define XFS_RANDOM_IFLUSH_6 XFS_RANDOM_DEFAULT
80#define XFS_RANDOM_DA_READ_BUF XFS_RANDOM_DEFAULT
81#define XFS_RANDOM_BTREE_CHECK_LBLOCK (XFS_RANDOM_DEFAULT/4)
82#define XFS_RANDOM_BTREE_CHECK_SBLOCK XFS_RANDOM_DEFAULT
83#define XFS_RANDOM_ALLOC_READ_AGF XFS_RANDOM_DEFAULT
84#define XFS_RANDOM_IALLOC_READ_AGI XFS_RANDOM_DEFAULT
85#define XFS_RANDOM_ITOBP_INOTOBP XFS_RANDOM_DEFAULT
86#define XFS_RANDOM_IUNLINK XFS_RANDOM_DEFAULT
87#define XFS_RANDOM_IUNLINK_REMOVE XFS_RANDOM_DEFAULT
88#define XFS_RANDOM_DIR_INO_VALIDATE XFS_RANDOM_DEFAULT
89#define XFS_RANDOM_BULKSTAT_READ_CHUNK XFS_RANDOM_DEFAULT
90#define XFS_RANDOM_IODONE_IOERR (XFS_RANDOM_DEFAULT/10)
91#define XFS_RANDOM_STRATREAD_IOERR (XFS_RANDOM_DEFAULT/10)
92#define XFS_RANDOM_STRATCMPL_IOERR (XFS_RANDOM_DEFAULT/10)
93#define XFS_RANDOM_DIOWRITE_IOERR (XFS_RANDOM_DEFAULT/10)
94#define XFS_RANDOM_BMAPIFORMAT XFS_RANDOM_DEFAULT
95#define XFS_RANDOM_FREE_EXTENT 1
96#define XFS_RANDOM_RMAP_FINISH_ONE 1
97#define XFS_RANDOM_REFCOUNT_CONTINUE_UPDATE 1
98#define XFS_RANDOM_REFCOUNT_FINISH_ONE 1
99#define XFS_RANDOM_BMAP_FINISH_ONE 1
100#define XFS_RANDOM_AG_RESV_CRITICAL 4
101#define XFS_RANDOM_DROP_WRITES 1
102#define XFS_RANDOM_LOG_BAD_CRC 1
103#define XFS_RANDOM_LOG_ITEM_PIN 1
104#define XFS_RANDOM_BUF_LRU_REF 2
105
106#endif /* __XFS_ERRORTAG_H_ */
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 23229f0c5b15..1acb584fc5f7 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -315,6 +315,11 @@ static inline bool xfs_sb_good_version(struct xfs_sb *sbp)
315 return false; 315 return false;
316} 316}
317 317
318static inline bool xfs_sb_version_hasrealtime(struct xfs_sb *sbp)
319{
320 return sbp->sb_rblocks > 0;
321}
322
318/* 323/*
319 * Detect a mismatched features2 field. Older kernels read/wrote 324 * Detect a mismatched features2 field. Older kernels read/wrote
320 * this into the wrong slot, so to be safe we keep them in sync. 325 * this into the wrong slot, so to be safe we keep them in sync.
@@ -500,12 +505,12 @@ xfs_sb_has_incompat_log_feature(
500/* 505/*
501 * V5 superblock specific feature checks 506 * V5 superblock specific feature checks
502 */ 507 */
503static inline int xfs_sb_version_hascrc(struct xfs_sb *sbp) 508static inline bool xfs_sb_version_hascrc(struct xfs_sb *sbp)
504{ 509{
505 return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5; 510 return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5;
506} 511}
507 512
508static inline int xfs_sb_version_has_pquotino(struct xfs_sb *sbp) 513static inline bool xfs_sb_version_has_pquotino(struct xfs_sb *sbp)
509{ 514{
510 return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5; 515 return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5;
511} 516}
@@ -518,7 +523,7 @@ static inline int xfs_sb_version_hasftype(struct xfs_sb *sbp)
518 (sbp->sb_features2 & XFS_SB_VERSION2_FTYPE)); 523 (sbp->sb_features2 & XFS_SB_VERSION2_FTYPE));
519} 524}
520 525
521static inline int xfs_sb_version_hasfinobt(xfs_sb_t *sbp) 526static inline bool xfs_sb_version_hasfinobt(xfs_sb_t *sbp)
522{ 527{
523 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) && 528 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) &&
524 (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FINOBT); 529 (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FINOBT);
@@ -941,7 +946,7 @@ typedef enum xfs_dinode_fmt {
941 XFS_DINODE_FMT_LOCAL, /* bulk data */ 946 XFS_DINODE_FMT_LOCAL, /* bulk data */
942 XFS_DINODE_FMT_EXTENTS, /* struct xfs_bmbt_rec */ 947 XFS_DINODE_FMT_EXTENTS, /* struct xfs_bmbt_rec */
943 XFS_DINODE_FMT_BTREE, /* struct xfs_bmdr_block */ 948 XFS_DINODE_FMT_BTREE, /* struct xfs_bmdr_block */
944 XFS_DINODE_FMT_UUID /* uuid_t */ 949 XFS_DINODE_FMT_UUID /* added long ago, but never used */
945} xfs_dinode_fmt_t; 950} xfs_dinode_fmt_t;
946 951
947/* 952/*
@@ -1142,7 +1147,7 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
1142 * Dquot and dquot block format definitions 1147 * Dquot and dquot block format definitions
1143 */ 1148 */
1144#define XFS_DQUOT_MAGIC 0x4451 /* 'DQ' */ 1149#define XFS_DQUOT_MAGIC 0x4451 /* 'DQ' */
1145#define XFS_DQUOT_VERSION (u_int8_t)0x01 /* latest version number */ 1150#define XFS_DQUOT_VERSION (uint8_t)0x01 /* latest version number */
1146 1151
1147/* 1152/*
1148 * This is the main portion of the on-disk representation of quota 1153 * This is the main portion of the on-disk representation of quota
@@ -1548,10 +1553,6 @@ typedef struct xfs_bmbt_rec {
1548typedef uint64_t xfs_bmbt_rec_base_t; /* use this for casts */ 1553typedef uint64_t xfs_bmbt_rec_base_t; /* use this for casts */
1549typedef xfs_bmbt_rec_t xfs_bmdr_rec_t; 1554typedef xfs_bmbt_rec_t xfs_bmdr_rec_t;
1550 1555
1551typedef struct xfs_bmbt_rec_host {
1552 uint64_t l0, l1;
1553} xfs_bmbt_rec_host_t;
1554
1555/* 1556/*
1556 * Values and macros for delayed-allocation startblock fields. 1557 * Values and macros for delayed-allocation startblock fields.
1557 */ 1558 */
@@ -1577,24 +1578,6 @@ static inline xfs_filblks_t startblockval(xfs_fsblock_t x)
1577} 1578}
1578 1579
1579/* 1580/*
1580 * Possible extent states.
1581 */
1582typedef enum {
1583 XFS_EXT_NORM, XFS_EXT_UNWRITTEN,
1584} xfs_exntst_t;
1585
1586/*
1587 * Incore version of above.
1588 */
1589typedef struct xfs_bmbt_irec
1590{
1591 xfs_fileoff_t br_startoff; /* starting file offset */
1592 xfs_fsblock_t br_startblock; /* starting block number */
1593 xfs_filblks_t br_blockcount; /* number of blocks */
1594 xfs_exntst_t br_state; /* extent state */
1595} xfs_bmbt_irec_t;
1596
1597/*
1598 * Key structure for non-leaf levels of the tree. 1581 * Key structure for non-leaf levels of the tree.
1599 */ 1582 */
1600typedef struct xfs_bmbt_key { 1583typedef struct xfs_bmbt_key {
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index 8c61f21535d4..b90924104596 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -468,6 +468,82 @@ typedef struct xfs_swapext
468#define XFS_FSOP_GOING_FLAGS_LOGFLUSH 0x1 /* flush log but not data */ 468#define XFS_FSOP_GOING_FLAGS_LOGFLUSH 0x1 /* flush log but not data */
469#define XFS_FSOP_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */ 469#define XFS_FSOP_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */
470 470
471/* metadata scrubbing */
472struct xfs_scrub_metadata {
473 __u32 sm_type; /* What to check? */
474 __u32 sm_flags; /* flags; see below. */
475 __u64 sm_ino; /* inode number. */
476 __u32 sm_gen; /* inode generation. */
477 __u32 sm_agno; /* ag number. */
478 __u64 sm_reserved[5]; /* pad to 64 bytes */
479};
480
481/*
482 * Metadata types and flags for scrub operation.
483 */
484
485/* Scrub subcommands. */
486#define XFS_SCRUB_TYPE_PROBE 0 /* presence test ioctl */
487#define XFS_SCRUB_TYPE_SB 1 /* superblock */
488#define XFS_SCRUB_TYPE_AGF 2 /* AG free header */
489#define XFS_SCRUB_TYPE_AGFL 3 /* AG free list */
490#define XFS_SCRUB_TYPE_AGI 4 /* AG inode header */
491#define XFS_SCRUB_TYPE_BNOBT 5 /* freesp by block btree */
492#define XFS_SCRUB_TYPE_CNTBT 6 /* freesp by length btree */
493#define XFS_SCRUB_TYPE_INOBT 7 /* inode btree */
494#define XFS_SCRUB_TYPE_FINOBT 8 /* free inode btree */
495#define XFS_SCRUB_TYPE_RMAPBT 9 /* reverse mapping btree */
496#define XFS_SCRUB_TYPE_REFCNTBT 10 /* reference count btree */
497#define XFS_SCRUB_TYPE_INODE 11 /* inode record */
498#define XFS_SCRUB_TYPE_BMBTD 12 /* data fork block mapping */
499#define XFS_SCRUB_TYPE_BMBTA 13 /* attr fork block mapping */
500#define XFS_SCRUB_TYPE_BMBTC 14 /* CoW fork block mapping */
501#define XFS_SCRUB_TYPE_DIR 15 /* directory */
502#define XFS_SCRUB_TYPE_XATTR 16 /* extended attribute */
503#define XFS_SCRUB_TYPE_SYMLINK 17 /* symbolic link */
504#define XFS_SCRUB_TYPE_PARENT 18 /* parent pointers */
505#define XFS_SCRUB_TYPE_RTBITMAP 19 /* realtime bitmap */
506#define XFS_SCRUB_TYPE_RTSUM 20 /* realtime summary */
507#define XFS_SCRUB_TYPE_UQUOTA 21 /* user quotas */
508#define XFS_SCRUB_TYPE_GQUOTA 22 /* group quotas */
509#define XFS_SCRUB_TYPE_PQUOTA 23 /* project quotas */
510
511/* Number of scrub subcommands. */
512#define XFS_SCRUB_TYPE_NR 24
513
514/* i: Repair this metadata. */
515#define XFS_SCRUB_IFLAG_REPAIR (1 << 0)
516
517/* o: Metadata object needs repair. */
518#define XFS_SCRUB_OFLAG_CORRUPT (1 << 1)
519
520/*
521 * o: Metadata object could be optimized. It's not corrupt, but
522 * we could improve on it somehow.
523 */
524#define XFS_SCRUB_OFLAG_PREEN (1 << 2)
525
526/* o: Cross-referencing failed. */
527#define XFS_SCRUB_OFLAG_XFAIL (1 << 3)
528
529/* o: Metadata object disagrees with cross-referenced metadata. */
530#define XFS_SCRUB_OFLAG_XCORRUPT (1 << 4)
531
532/* o: Scan was not complete. */
533#define XFS_SCRUB_OFLAG_INCOMPLETE (1 << 5)
534
535/* o: Metadata object looked funny but isn't corrupt. */
536#define XFS_SCRUB_OFLAG_WARNING (1 << 6)
537
538#define XFS_SCRUB_FLAGS_IN (XFS_SCRUB_IFLAG_REPAIR)
539#define XFS_SCRUB_FLAGS_OUT (XFS_SCRUB_OFLAG_CORRUPT | \
540 XFS_SCRUB_OFLAG_PREEN | \
541 XFS_SCRUB_OFLAG_XFAIL | \
542 XFS_SCRUB_OFLAG_XCORRUPT | \
543 XFS_SCRUB_OFLAG_INCOMPLETE | \
544 XFS_SCRUB_OFLAG_WARNING)
545#define XFS_SCRUB_FLAGS_ALL (XFS_SCRUB_FLAGS_IN | XFS_SCRUB_FLAGS_OUT)
546
471/* 547/*
472 * ioctl limits 548 * ioctl limits
473 */ 549 */
@@ -511,6 +587,7 @@ typedef struct xfs_swapext
511#define XFS_IOC_ZERO_RANGE _IOW ('X', 57, struct xfs_flock64) 587#define XFS_IOC_ZERO_RANGE _IOW ('X', 57, struct xfs_flock64)
512#define XFS_IOC_FREE_EOFBLOCKS _IOR ('X', 58, struct xfs_fs_eofblocks) 588#define XFS_IOC_FREE_EOFBLOCKS _IOR ('X', 58, struct xfs_fs_eofblocks)
513/* XFS_IOC_GETFSMAP ------ hoisted 59 */ 589/* XFS_IOC_GETFSMAP ------ hoisted 59 */
590#define XFS_IOC_SCRUB_METADATA _IOWR('X', 60, struct xfs_scrub_metadata)
514 591
515/* 592/*
516 * ioctl commands that replace IRIX syssgi()'s 593 * ioctl commands that replace IRIX syssgi()'s
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index dfd643909f85..de3f04a98656 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -31,6 +31,7 @@
31#include "xfs_ialloc_btree.h" 31#include "xfs_ialloc_btree.h"
32#include "xfs_alloc.h" 32#include "xfs_alloc.h"
33#include "xfs_rtalloc.h" 33#include "xfs_rtalloc.h"
34#include "xfs_errortag.h"
34#include "xfs_error.h" 35#include "xfs_error.h"
35#include "xfs_bmap.h" 36#include "xfs_bmap.h"
36#include "xfs_cksum.h" 37#include "xfs_cksum.h"
@@ -2664,3 +2665,93 @@ xfs_ialloc_pagi_init(
2664 xfs_trans_brelse(tp, bp); 2665 xfs_trans_brelse(tp, bp);
2665 return 0; 2666 return 0;
2666} 2667}
2668
2669/* Calculate the first and last possible inode number in an AG. */
2670void
2671xfs_ialloc_agino_range(
2672 struct xfs_mount *mp,
2673 xfs_agnumber_t agno,
2674 xfs_agino_t *first,
2675 xfs_agino_t *last)
2676{
2677 xfs_agblock_t bno;
2678 xfs_agblock_t eoag;
2679
2680 eoag = xfs_ag_block_count(mp, agno);
2681
2682 /*
2683 * Calculate the first inode, which will be in the first
2684 * cluster-aligned block after the AGFL.
2685 */
2686 bno = round_up(XFS_AGFL_BLOCK(mp) + 1,
2687 xfs_ialloc_cluster_alignment(mp));
2688 *first = XFS_OFFBNO_TO_AGINO(mp, bno, 0);
2689
2690 /*
2691 * Calculate the last inode, which will be at the end of the
2692 * last (aligned) cluster that can be allocated in the AG.
2693 */
2694 bno = round_down(eoag, xfs_ialloc_cluster_alignment(mp));
2695 *last = XFS_OFFBNO_TO_AGINO(mp, bno, 0) - 1;
2696}
2697
2698/*
2699 * Verify that an AG inode number pointer neither points outside the AG
2700 * nor points at static metadata.
2701 */
2702bool
2703xfs_verify_agino(
2704 struct xfs_mount *mp,
2705 xfs_agnumber_t agno,
2706 xfs_agino_t agino)
2707{
2708 xfs_agino_t first;
2709 xfs_agino_t last;
2710
2711 xfs_ialloc_agino_range(mp, agno, &first, &last);
2712 return agino >= first && agino <= last;
2713}
2714
2715/*
2716 * Verify that an FS inode number pointer neither points outside the
2717 * filesystem nor points at static AG metadata.
2718 */
2719bool
2720xfs_verify_ino(
2721 struct xfs_mount *mp,
2722 xfs_ino_t ino)
2723{
2724 xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, ino);
2725 xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ino);
2726
2727 if (agno >= mp->m_sb.sb_agcount)
2728 return false;
2729 if (XFS_AGINO_TO_INO(mp, agno, agino) != ino)
2730 return false;
2731 return xfs_verify_agino(mp, agno, agino);
2732}
2733
2734/* Is this an internal inode number? */
2735bool
2736xfs_internal_inum(
2737 struct xfs_mount *mp,
2738 xfs_ino_t ino)
2739{
2740 return ino == mp->m_sb.sb_rbmino || ino == mp->m_sb.sb_rsumino ||
2741 (xfs_sb_version_hasquota(&mp->m_sb) &&
2742 xfs_is_quota_inode(&mp->m_sb, ino));
2743}
2744
2745/*
2746 * Verify that a directory entry's inode number doesn't point at an internal
2747 * inode, empty space, or static AG metadata.
2748 */
2749bool
2750xfs_verify_dir_ino(
2751 struct xfs_mount *mp,
2752 xfs_ino_t ino)
2753{
2754 if (xfs_internal_inum(mp, ino))
2755 return false;
2756 return xfs_verify_ino(mp, ino);
2757}
diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h
index b32cfb5aeb5b..d2bdcd5e7312 100644
--- a/fs/xfs/libxfs/xfs_ialloc.h
+++ b/fs/xfs/libxfs/xfs_ialloc.h
@@ -173,5 +173,12 @@ void xfs_inobt_btrec_to_irec(struct xfs_mount *mp, union xfs_btree_rec *rec,
173 struct xfs_inobt_rec_incore *irec); 173 struct xfs_inobt_rec_incore *irec);
174 174
175int xfs_ialloc_cluster_alignment(struct xfs_mount *mp); 175int xfs_ialloc_cluster_alignment(struct xfs_mount *mp);
176void xfs_ialloc_agino_range(struct xfs_mount *mp, xfs_agnumber_t agno,
177 xfs_agino_t *first, xfs_agino_t *last);
178bool xfs_verify_agino(struct xfs_mount *mp, xfs_agnumber_t agno,
179 xfs_agino_t agino);
180bool xfs_verify_ino(struct xfs_mount *mp, xfs_ino_t ino);
181bool xfs_internal_inum(struct xfs_mount *mp, xfs_ino_t ino);
182bool xfs_verify_dir_ino(struct xfs_mount *mp, xfs_ino_t ino);
176 183
177#endif /* __XFS_IALLOC_H__ */ 184#endif /* __XFS_IALLOC_H__ */
diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c
new file mode 100644
index 000000000000..343a94246f5b
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_iext_tree.c
@@ -0,0 +1,1043 @@
1/*
2 * Copyright (c) 2017 Christoph Hellwig.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 */
13
14#include <linux/cache.h>
15#include <linux/kernel.h>
16#include <linux/slab.h>
17#include "xfs.h"
18#include "xfs_format.h"
19#include "xfs_bit.h"
20#include "xfs_log_format.h"
21#include "xfs_inode.h"
22#include "xfs_inode_fork.h"
23#include "xfs_trans_resv.h"
24#include "xfs_mount.h"
25#include "xfs_trace.h"
26
27/*
28 * In-core extent record layout:
29 *
30 * +-------+----------------------------+
31 * | 00:53 | all 54 bits of startoff |
32 * | 54:63 | low 10 bits of startblock |
33 * +-------+----------------------------+
34 * | 00:20 | all 21 bits of length |
35 * | 21 | unwritten extent bit |
36 * | 22:63 | high 42 bits of startblock |
37 * +-------+----------------------------+
38 */
39#define XFS_IEXT_STARTOFF_MASK xfs_mask64lo(BMBT_STARTOFF_BITLEN)
40#define XFS_IEXT_LENGTH_MASK xfs_mask64lo(BMBT_BLOCKCOUNT_BITLEN)
41#define XFS_IEXT_STARTBLOCK_MASK xfs_mask64lo(BMBT_STARTBLOCK_BITLEN)
42
43struct xfs_iext_rec {
44 uint64_t lo;
45 uint64_t hi;
46};
47
48/*
49 * Given that the length can't be a zero, only an empty hi value indicates an
50 * unused record.
51 */
52static bool xfs_iext_rec_is_empty(struct xfs_iext_rec *rec)
53{
54 return rec->hi == 0;
55}
56
57static inline void xfs_iext_rec_clear(struct xfs_iext_rec *rec)
58{
59 rec->lo = 0;
60 rec->hi = 0;
61}
62
63static void
64xfs_iext_set(
65 struct xfs_iext_rec *rec,
66 struct xfs_bmbt_irec *irec)
67{
68 ASSERT((irec->br_startoff & ~XFS_IEXT_STARTOFF_MASK) == 0);
69 ASSERT((irec->br_blockcount & ~XFS_IEXT_LENGTH_MASK) == 0);
70 ASSERT((irec->br_startblock & ~XFS_IEXT_STARTBLOCK_MASK) == 0);
71
72 rec->lo = irec->br_startoff & XFS_IEXT_STARTOFF_MASK;
73 rec->hi = irec->br_blockcount & XFS_IEXT_LENGTH_MASK;
74
75 rec->lo |= (irec->br_startblock << 54);
76 rec->hi |= ((irec->br_startblock & ~xfs_mask64lo(10)) << (22 - 10));
77
78 if (irec->br_state == XFS_EXT_UNWRITTEN)
79 rec->hi |= (1 << 21);
80}
81
82static void
83xfs_iext_get(
84 struct xfs_bmbt_irec *irec,
85 struct xfs_iext_rec *rec)
86{
87 irec->br_startoff = rec->lo & XFS_IEXT_STARTOFF_MASK;
88 irec->br_blockcount = rec->hi & XFS_IEXT_LENGTH_MASK;
89
90 irec->br_startblock = rec->lo >> 54;
91 irec->br_startblock |= (rec->hi & xfs_mask64hi(42)) >> (22 - 10);
92
93 if (rec->hi & (1 << 21))
94 irec->br_state = XFS_EXT_UNWRITTEN;
95 else
96 irec->br_state = XFS_EXT_NORM;
97}
98
99enum {
100 NODE_SIZE = 256,
101 KEYS_PER_NODE = NODE_SIZE / (sizeof(uint64_t) + sizeof(void *)),
102 RECS_PER_LEAF = (NODE_SIZE - (2 * sizeof(struct xfs_iext_leaf *))) /
103 sizeof(struct xfs_iext_rec),
104};
105
106/*
107 * In-core extent btree block layout:
108 *
109 * There are two types of blocks in the btree: leaf and inner (non-leaf) blocks.
110 *
111 * The leaf blocks are made up by %KEYS_PER_NODE extent records, which each
112 * contain the startoffset, blockcount, startblock and unwritten extent flag.
113 * See above for the exact format, followed by pointers to the previous and next
114 * leaf blocks (if there are any).
115 *
116 * The inner (non-leaf) blocks first contain KEYS_PER_NODE lookup keys, followed
117 * by an equal number of pointers to the btree blocks at the next lower level.
118 *
119 * +-------+-------+-------+-------+-------+----------+----------+
120 * Leaf: | rec 1 | rec 2 | rec 3 | rec 4 | rec N | prev-ptr | next-ptr |
121 * +-------+-------+-------+-------+-------+----------+----------+
122 *
123 * +-------+-------+-------+-------+-------+-------+------+-------+
124 * Inner: | key 1 | key 2 | key 3 | key N | ptr 1 | ptr 2 | ptr3 | ptr N |
125 * +-------+-------+-------+-------+-------+-------+------+-------+
126 */
127struct xfs_iext_node {
128 uint64_t keys[KEYS_PER_NODE];
129#define XFS_IEXT_KEY_INVALID (1ULL << 63)
130 void *ptrs[KEYS_PER_NODE];
131};
132
133struct xfs_iext_leaf {
134 struct xfs_iext_rec recs[RECS_PER_LEAF];
135 struct xfs_iext_leaf *prev;
136 struct xfs_iext_leaf *next;
137};
138
139inline xfs_extnum_t xfs_iext_count(struct xfs_ifork *ifp)
140{
141 return ifp->if_bytes / sizeof(struct xfs_iext_rec);
142}
143
144static inline int xfs_iext_max_recs(struct xfs_ifork *ifp)
145{
146 if (ifp->if_height == 1)
147 return xfs_iext_count(ifp);
148 return RECS_PER_LEAF;
149}
150
151static inline struct xfs_iext_rec *cur_rec(struct xfs_iext_cursor *cur)
152{
153 return &cur->leaf->recs[cur->pos];
154}
155
156static inline bool xfs_iext_valid(struct xfs_ifork *ifp,
157 struct xfs_iext_cursor *cur)
158{
159 if (!cur->leaf)
160 return false;
161 if (cur->pos < 0 || cur->pos >= xfs_iext_max_recs(ifp))
162 return false;
163 if (xfs_iext_rec_is_empty(cur_rec(cur)))
164 return false;
165 return true;
166}
167
168static void *
169xfs_iext_find_first_leaf(
170 struct xfs_ifork *ifp)
171{
172 struct xfs_iext_node *node = ifp->if_u1.if_root;
173 int height;
174
175 if (!ifp->if_height)
176 return NULL;
177
178 for (height = ifp->if_height; height > 1; height--) {
179 node = node->ptrs[0];
180 ASSERT(node);
181 }
182
183 return node;
184}
185
186static void *
187xfs_iext_find_last_leaf(
188 struct xfs_ifork *ifp)
189{
190 struct xfs_iext_node *node = ifp->if_u1.if_root;
191 int height, i;
192
193 if (!ifp->if_height)
194 return NULL;
195
196 for (height = ifp->if_height; height > 1; height--) {
197 for (i = 1; i < KEYS_PER_NODE; i++)
198 if (!node->ptrs[i])
199 break;
200 node = node->ptrs[i - 1];
201 ASSERT(node);
202 }
203
204 return node;
205}
206
207void
208xfs_iext_first(
209 struct xfs_ifork *ifp,
210 struct xfs_iext_cursor *cur)
211{
212 cur->pos = 0;
213 cur->leaf = xfs_iext_find_first_leaf(ifp);
214}
215
216void
217xfs_iext_last(
218 struct xfs_ifork *ifp,
219 struct xfs_iext_cursor *cur)
220{
221 int i;
222
223 cur->leaf = xfs_iext_find_last_leaf(ifp);
224 if (!cur->leaf) {
225 cur->pos = 0;
226 return;
227 }
228
229 for (i = 1; i < xfs_iext_max_recs(ifp); i++) {
230 if (xfs_iext_rec_is_empty(&cur->leaf->recs[i]))
231 break;
232 }
233 cur->pos = i - 1;
234}
235
236void
237xfs_iext_next(
238 struct xfs_ifork *ifp,
239 struct xfs_iext_cursor *cur)
240{
241 if (!cur->leaf) {
242 ASSERT(cur->pos <= 0 || cur->pos >= RECS_PER_LEAF);
243 xfs_iext_first(ifp, cur);
244 return;
245 }
246
247 ASSERT(cur->pos >= 0);
248 ASSERT(cur->pos < xfs_iext_max_recs(ifp));
249
250 cur->pos++;
251 if (ifp->if_height > 1 && !xfs_iext_valid(ifp, cur) &&
252 cur->leaf->next) {
253 cur->leaf = cur->leaf->next;
254 cur->pos = 0;
255 }
256}
257
258void
259xfs_iext_prev(
260 struct xfs_ifork *ifp,
261 struct xfs_iext_cursor *cur)
262{
263 if (!cur->leaf) {
264 ASSERT(cur->pos <= 0 || cur->pos >= RECS_PER_LEAF);
265 xfs_iext_last(ifp, cur);
266 return;
267 }
268
269 ASSERT(cur->pos >= 0);
270 ASSERT(cur->pos <= RECS_PER_LEAF);
271
272recurse:
273 do {
274 cur->pos--;
275 if (xfs_iext_valid(ifp, cur))
276 return;
277 } while (cur->pos > 0);
278
279 if (ifp->if_height > 1 && cur->leaf->prev) {
280 cur->leaf = cur->leaf->prev;
281 cur->pos = RECS_PER_LEAF;
282 goto recurse;
283 }
284}
285
286static inline int
287xfs_iext_key_cmp(
288 struct xfs_iext_node *node,
289 int n,
290 xfs_fileoff_t offset)
291{
292 if (node->keys[n] > offset)
293 return 1;
294 if (node->keys[n] < offset)
295 return -1;
296 return 0;
297}
298
299static inline int
300xfs_iext_rec_cmp(
301 struct xfs_iext_rec *rec,
302 xfs_fileoff_t offset)
303{
304 uint64_t rec_offset = rec->lo & XFS_IEXT_STARTOFF_MASK;
305 u32 rec_len = rec->hi & XFS_IEXT_LENGTH_MASK;
306
307 if (rec_offset > offset)
308 return 1;
309 if (rec_offset + rec_len <= offset)
310 return -1;
311 return 0;
312}
313
314static void *
315xfs_iext_find_level(
316 struct xfs_ifork *ifp,
317 xfs_fileoff_t offset,
318 int level)
319{
320 struct xfs_iext_node *node = ifp->if_u1.if_root;
321 int height, i;
322
323 if (!ifp->if_height)
324 return NULL;
325
326 for (height = ifp->if_height; height > level; height--) {
327 for (i = 1; i < KEYS_PER_NODE; i++)
328 if (xfs_iext_key_cmp(node, i, offset) > 0)
329 break;
330
331 node = node->ptrs[i - 1];
332 if (!node)
333 break;
334 }
335
336 return node;
337}
338
339static int
340xfs_iext_node_pos(
341 struct xfs_iext_node *node,
342 xfs_fileoff_t offset)
343{
344 int i;
345
346 for (i = 1; i < KEYS_PER_NODE; i++) {
347 if (xfs_iext_key_cmp(node, i, offset) > 0)
348 break;
349 }
350
351 return i - 1;
352}
353
354static int
355xfs_iext_node_insert_pos(
356 struct xfs_iext_node *node,
357 xfs_fileoff_t offset)
358{
359 int i;
360
361 for (i = 0; i < KEYS_PER_NODE; i++) {
362 if (xfs_iext_key_cmp(node, i, offset) > 0)
363 return i;
364 }
365
366 return KEYS_PER_NODE;
367}
368
369static int
370xfs_iext_node_nr_entries(
371 struct xfs_iext_node *node,
372 int start)
373{
374 int i;
375
376 for (i = start; i < KEYS_PER_NODE; i++) {
377 if (node->keys[i] == XFS_IEXT_KEY_INVALID)
378 break;
379 }
380
381 return i;
382}
383
384static int
385xfs_iext_leaf_nr_entries(
386 struct xfs_ifork *ifp,
387 struct xfs_iext_leaf *leaf,
388 int start)
389{
390 int i;
391
392 for (i = start; i < xfs_iext_max_recs(ifp); i++) {
393 if (xfs_iext_rec_is_empty(&leaf->recs[i]))
394 break;
395 }
396
397 return i;
398}
399
400static inline uint64_t
401xfs_iext_leaf_key(
402 struct xfs_iext_leaf *leaf,
403 int n)
404{
405 return leaf->recs[n].lo & XFS_IEXT_STARTOFF_MASK;
406}
407
408static void
409xfs_iext_grow(
410 struct xfs_ifork *ifp)
411{
412 struct xfs_iext_node *node = kmem_zalloc(NODE_SIZE, KM_NOFS);
413 int i;
414
415 if (ifp->if_height == 1) {
416 struct xfs_iext_leaf *prev = ifp->if_u1.if_root;
417
418 node->keys[0] = xfs_iext_leaf_key(prev, 0);
419 node->ptrs[0] = prev;
420 } else {
421 struct xfs_iext_node *prev = ifp->if_u1.if_root;
422
423 ASSERT(ifp->if_height > 1);
424
425 node->keys[0] = prev->keys[0];
426 node->ptrs[0] = prev;
427 }
428
429 for (i = 1; i < KEYS_PER_NODE; i++)
430 node->keys[i] = XFS_IEXT_KEY_INVALID;
431
432 ifp->if_u1.if_root = node;
433 ifp->if_height++;
434}
435
436static void
437xfs_iext_update_node(
438 struct xfs_ifork *ifp,
439 xfs_fileoff_t old_offset,
440 xfs_fileoff_t new_offset,
441 int level,
442 void *ptr)
443{
444 struct xfs_iext_node *node = ifp->if_u1.if_root;
445 int height, i;
446
447 for (height = ifp->if_height; height > level; height--) {
448 for (i = 0; i < KEYS_PER_NODE; i++) {
449 if (i > 0 && xfs_iext_key_cmp(node, i, old_offset) > 0)
450 break;
451 if (node->keys[i] == old_offset)
452 node->keys[i] = new_offset;
453 }
454 node = node->ptrs[i - 1];
455 ASSERT(node);
456 }
457
458 ASSERT(node == ptr);
459}
460
461static struct xfs_iext_node *
462xfs_iext_split_node(
463 struct xfs_iext_node **nodep,
464 int *pos,
465 int *nr_entries)
466{
467 struct xfs_iext_node *node = *nodep;
468 struct xfs_iext_node *new = kmem_zalloc(NODE_SIZE, KM_NOFS);
469 const int nr_move = KEYS_PER_NODE / 2;
470 int nr_keep = nr_move + (KEYS_PER_NODE & 1);
471 int i = 0;
472
473 /* for sequential append operations just spill over into the new node */
474 if (*pos == KEYS_PER_NODE) {
475 *nodep = new;
476 *pos = 0;
477 *nr_entries = 0;
478 goto done;
479 }
480
481
482 for (i = 0; i < nr_move; i++) {
483 new->keys[i] = node->keys[nr_keep + i];
484 new->ptrs[i] = node->ptrs[nr_keep + i];
485
486 node->keys[nr_keep + i] = XFS_IEXT_KEY_INVALID;
487 node->ptrs[nr_keep + i] = NULL;
488 }
489
490 if (*pos >= nr_keep) {
491 *nodep = new;
492 *pos -= nr_keep;
493 *nr_entries = nr_move;
494 } else {
495 *nr_entries = nr_keep;
496 }
497done:
498 for (; i < KEYS_PER_NODE; i++)
499 new->keys[i] = XFS_IEXT_KEY_INVALID;
500 return new;
501}
502
503static void
504xfs_iext_insert_node(
505 struct xfs_ifork *ifp,
506 uint64_t offset,
507 void *ptr,
508 int level)
509{
510 struct xfs_iext_node *node, *new;
511 int i, pos, nr_entries;
512
513again:
514 if (ifp->if_height < level)
515 xfs_iext_grow(ifp);
516
517 new = NULL;
518 node = xfs_iext_find_level(ifp, offset, level);
519 pos = xfs_iext_node_insert_pos(node, offset);
520 nr_entries = xfs_iext_node_nr_entries(node, pos);
521
522 ASSERT(pos >= nr_entries || xfs_iext_key_cmp(node, pos, offset) != 0);
523 ASSERT(nr_entries <= KEYS_PER_NODE);
524
525 if (nr_entries == KEYS_PER_NODE)
526 new = xfs_iext_split_node(&node, &pos, &nr_entries);
527
528 /*
529 * Update the pointers in higher levels if the first entry changes
530 * in an existing node.
531 */
532 if (node != new && pos == 0 && nr_entries > 0)
533 xfs_iext_update_node(ifp, node->keys[0], offset, level, node);
534
535 for (i = nr_entries; i > pos; i--) {
536 node->keys[i] = node->keys[i - 1];
537 node->ptrs[i] = node->ptrs[i - 1];
538 }
539 node->keys[pos] = offset;
540 node->ptrs[pos] = ptr;
541
542 if (new) {
543 offset = new->keys[0];
544 ptr = new;
545 level++;
546 goto again;
547 }
548}
549
550static struct xfs_iext_leaf *
551xfs_iext_split_leaf(
552 struct xfs_iext_cursor *cur,
553 int *nr_entries)
554{
555 struct xfs_iext_leaf *leaf = cur->leaf;
556 struct xfs_iext_leaf *new = kmem_zalloc(NODE_SIZE, KM_NOFS);
557 const int nr_move = RECS_PER_LEAF / 2;
558 int nr_keep = nr_move + (RECS_PER_LEAF & 1);
559 int i;
560
561 /* for sequential append operations just spill over into the new node */
562 if (cur->pos == RECS_PER_LEAF) {
563 cur->leaf = new;
564 cur->pos = 0;
565 *nr_entries = 0;
566 goto done;
567 }
568
569 for (i = 0; i < nr_move; i++) {
570 new->recs[i] = leaf->recs[nr_keep + i];
571 xfs_iext_rec_clear(&leaf->recs[nr_keep + i]);
572 }
573
574 if (cur->pos >= nr_keep) {
575 cur->leaf = new;
576 cur->pos -= nr_keep;
577 *nr_entries = nr_move;
578 } else {
579 *nr_entries = nr_keep;
580 }
581done:
582 if (leaf->next)
583 leaf->next->prev = new;
584 new->next = leaf->next;
585 new->prev = leaf;
586 leaf->next = new;
587 return new;
588}
589
590static void
591xfs_iext_alloc_root(
592 struct xfs_ifork *ifp,
593 struct xfs_iext_cursor *cur)
594{
595 ASSERT(ifp->if_bytes == 0);
596
597 ifp->if_u1.if_root = kmem_zalloc(sizeof(struct xfs_iext_rec), KM_NOFS);
598 ifp->if_height = 1;
599
600 /* now that we have a node step into it */
601 cur->leaf = ifp->if_u1.if_root;
602 cur->pos = 0;
603}
604
605static void
606xfs_iext_realloc_root(
607 struct xfs_ifork *ifp,
608 struct xfs_iext_cursor *cur)
609{
610 size_t new_size = ifp->if_bytes + sizeof(struct xfs_iext_rec);
611 void *new;
612
613 /* account for the prev/next pointers */
614 if (new_size / sizeof(struct xfs_iext_rec) == RECS_PER_LEAF)
615 new_size = NODE_SIZE;
616
617 new = kmem_realloc(ifp->if_u1.if_root, new_size, KM_NOFS);
618 memset(new + ifp->if_bytes, 0, new_size - ifp->if_bytes);
619 ifp->if_u1.if_root = new;
620 cur->leaf = new;
621}
622
623void
624xfs_iext_insert(
625 struct xfs_inode *ip,
626 struct xfs_iext_cursor *cur,
627 struct xfs_bmbt_irec *irec,
628 int state)
629{
630 struct xfs_ifork *ifp = xfs_iext_state_to_fork(ip, state);
631 xfs_fileoff_t offset = irec->br_startoff;
632 struct xfs_iext_leaf *new = NULL;
633 int nr_entries, i;
634
635 trace_xfs_iext_insert(ip, cur, state, _RET_IP_);
636
637 if (ifp->if_height == 0)
638 xfs_iext_alloc_root(ifp, cur);
639 else if (ifp->if_height == 1)
640 xfs_iext_realloc_root(ifp, cur);
641
642 nr_entries = xfs_iext_leaf_nr_entries(ifp, cur->leaf, cur->pos);
643 ASSERT(nr_entries <= RECS_PER_LEAF);
644 ASSERT(cur->pos >= nr_entries ||
645 xfs_iext_rec_cmp(cur_rec(cur), irec->br_startoff) != 0);
646
647 if (nr_entries == RECS_PER_LEAF)
648 new = xfs_iext_split_leaf(cur, &nr_entries);
649
650 /*
651 * Update the pointers in higher levels if the first entry changes
652 * in an existing node.
653 */
654 if (cur->leaf != new && cur->pos == 0 && nr_entries > 0) {
655 xfs_iext_update_node(ifp, xfs_iext_leaf_key(cur->leaf, 0),
656 offset, 1, cur->leaf);
657 }
658
659 for (i = nr_entries; i > cur->pos; i--)
660 cur->leaf->recs[i] = cur->leaf->recs[i - 1];
661 xfs_iext_set(cur_rec(cur), irec);
662 ifp->if_bytes += sizeof(struct xfs_iext_rec);
663
664 if (new)
665 xfs_iext_insert_node(ifp, xfs_iext_leaf_key(new, 0), new, 2);
666}
667
668static struct xfs_iext_node *
669xfs_iext_rebalance_node(
670 struct xfs_iext_node *parent,
671 int *pos,
672 struct xfs_iext_node *node,
673 int nr_entries)
674{
675 /*
676 * If the neighbouring nodes are completely full, or have different
677 * parents, we might never be able to merge our node, and will only
678 * delete it once the number of entries hits zero.
679 */
680 if (nr_entries == 0)
681 return node;
682
683 if (*pos > 0) {
684 struct xfs_iext_node *prev = parent->ptrs[*pos - 1];
685 int nr_prev = xfs_iext_node_nr_entries(prev, 0), i;
686
687 if (nr_prev + nr_entries <= KEYS_PER_NODE) {
688 for (i = 0; i < nr_entries; i++) {
689 prev->keys[nr_prev + i] = node->keys[i];
690 prev->ptrs[nr_prev + i] = node->ptrs[i];
691 }
692 return node;
693 }
694 }
695
696 if (*pos + 1 < xfs_iext_node_nr_entries(parent, *pos)) {
697 struct xfs_iext_node *next = parent->ptrs[*pos + 1];
698 int nr_next = xfs_iext_node_nr_entries(next, 0), i;
699
700 if (nr_entries + nr_next <= KEYS_PER_NODE) {
701 /*
702 * Merge the next node into this node so that we don't
703 * have to do an additional update of the keys in the
704 * higher levels.
705 */
706 for (i = 0; i < nr_next; i++) {
707 node->keys[nr_entries + i] = next->keys[i];
708 node->ptrs[nr_entries + i] = next->ptrs[i];
709 }
710
711 ++*pos;
712 return next;
713 }
714 }
715
716 return NULL;
717}
718
719static void
720xfs_iext_remove_node(
721 struct xfs_ifork *ifp,
722 xfs_fileoff_t offset,
723 void *victim)
724{
725 struct xfs_iext_node *node, *parent;
726 int level = 2, pos, nr_entries, i;
727
728 ASSERT(level <= ifp->if_height);
729 node = xfs_iext_find_level(ifp, offset, level);
730 pos = xfs_iext_node_pos(node, offset);
731again:
732 ASSERT(node->ptrs[pos]);
733 ASSERT(node->ptrs[pos] == victim);
734 kmem_free(victim);
735
736 nr_entries = xfs_iext_node_nr_entries(node, pos) - 1;
737 offset = node->keys[0];
738 for (i = pos; i < nr_entries; i++) {
739 node->keys[i] = node->keys[i + 1];
740 node->ptrs[i] = node->ptrs[i + 1];
741 }
742 node->keys[nr_entries] = XFS_IEXT_KEY_INVALID;
743 node->ptrs[nr_entries] = NULL;
744
745 if (pos == 0 && nr_entries > 0) {
746 xfs_iext_update_node(ifp, offset, node->keys[0], level, node);
747 offset = node->keys[0];
748 }
749
750 if (nr_entries >= KEYS_PER_NODE / 2)
751 return;
752
753 if (level < ifp->if_height) {
754 /*
755 * If we aren't at the root yet try to find a neighbour node to
756 * merge with (or delete the node if it is empty), and then
757 * recurse up to the next level.
758 */
759 level++;
760 parent = xfs_iext_find_level(ifp, offset, level);
761 pos = xfs_iext_node_pos(parent, offset);
762
763 ASSERT(pos != KEYS_PER_NODE);
764 ASSERT(parent->ptrs[pos] == node);
765
766 node = xfs_iext_rebalance_node(parent, &pos, node, nr_entries);
767 if (node) {
768 victim = node;
769 node = parent;
770 goto again;
771 }
772 } else if (nr_entries == 1) {
773 /*
774 * If we are at the root and only one entry is left we can just
775 * free this node and update the root pointer.
776 */
777 ASSERT(node == ifp->if_u1.if_root);
778 ifp->if_u1.if_root = node->ptrs[0];
779 ifp->if_height--;
780 kmem_free(node);
781 }
782}
783
784static void
785xfs_iext_rebalance_leaf(
786 struct xfs_ifork *ifp,
787 struct xfs_iext_cursor *cur,
788 struct xfs_iext_leaf *leaf,
789 xfs_fileoff_t offset,
790 int nr_entries)
791{
792 /*
793 * If the neighbouring nodes are completely full we might never be able
794 * to merge our node, and will only delete it once the number of
795 * entries hits zero.
796 */
797 if (nr_entries == 0)
798 goto remove_node;
799
800 if (leaf->prev) {
801 int nr_prev = xfs_iext_leaf_nr_entries(ifp, leaf->prev, 0), i;
802
803 if (nr_prev + nr_entries <= RECS_PER_LEAF) {
804 for (i = 0; i < nr_entries; i++)
805 leaf->prev->recs[nr_prev + i] = leaf->recs[i];
806
807 if (cur->leaf == leaf) {
808 cur->leaf = leaf->prev;
809 cur->pos += nr_prev;
810 }
811 goto remove_node;
812 }
813 }
814
815 if (leaf->next) {
816 int nr_next = xfs_iext_leaf_nr_entries(ifp, leaf->next, 0), i;
817
818 if (nr_entries + nr_next <= RECS_PER_LEAF) {
819 /*
820 * Merge the next node into this node so that we don't
821 * have to do an additional update of the keys in the
822 * higher levels.
823 */
824 for (i = 0; i < nr_next; i++) {
825 leaf->recs[nr_entries + i] =
826 leaf->next->recs[i];
827 }
828
829 if (cur->leaf == leaf->next) {
830 cur->leaf = leaf;
831 cur->pos += nr_entries;
832 }
833
834 offset = xfs_iext_leaf_key(leaf->next, 0);
835 leaf = leaf->next;
836 goto remove_node;
837 }
838 }
839
840 return;
841remove_node:
842 if (leaf->prev)
843 leaf->prev->next = leaf->next;
844 if (leaf->next)
845 leaf->next->prev = leaf->prev;
846 xfs_iext_remove_node(ifp, offset, leaf);
847}
848
849static void
850xfs_iext_free_last_leaf(
851 struct xfs_ifork *ifp)
852{
853 ifp->if_u1.if_root = NULL;
854 ifp->if_height--;
855 kmem_free(ifp->if_u1.if_root);
856}
857
858void
859xfs_iext_remove(
860 struct xfs_inode *ip,
861 struct xfs_iext_cursor *cur,
862 int state)
863{
864 struct xfs_ifork *ifp = xfs_iext_state_to_fork(ip, state);
865 struct xfs_iext_leaf *leaf = cur->leaf;
866 xfs_fileoff_t offset = xfs_iext_leaf_key(leaf, 0);
867 int i, nr_entries;
868
869 trace_xfs_iext_remove(ip, cur, state, _RET_IP_);
870
871 ASSERT(ifp->if_height > 0);
872 ASSERT(ifp->if_u1.if_root != NULL);
873 ASSERT(xfs_iext_valid(ifp, cur));
874
875 nr_entries = xfs_iext_leaf_nr_entries(ifp, leaf, cur->pos) - 1;
876 for (i = cur->pos; i < nr_entries; i++)
877 leaf->recs[i] = leaf->recs[i + 1];
878 xfs_iext_rec_clear(&leaf->recs[nr_entries]);
879 ifp->if_bytes -= sizeof(struct xfs_iext_rec);
880
881 if (cur->pos == 0 && nr_entries > 0) {
882 xfs_iext_update_node(ifp, offset, xfs_iext_leaf_key(leaf, 0), 1,
883 leaf);
884 offset = xfs_iext_leaf_key(leaf, 0);
885 } else if (cur->pos == nr_entries) {
886 if (ifp->if_height > 1 && leaf->next)
887 cur->leaf = leaf->next;
888 else
889 cur->leaf = NULL;
890 cur->pos = 0;
891 }
892
893 if (nr_entries >= RECS_PER_LEAF / 2)
894 return;
895
896 if (ifp->if_height > 1)
897 xfs_iext_rebalance_leaf(ifp, cur, leaf, offset, nr_entries);
898 else if (nr_entries == 0)
899 xfs_iext_free_last_leaf(ifp);
900}
901
902/*
903 * Lookup the extent covering bno.
904 *
905 * If there is an extent covering bno return the extent index, and store the
906 * expanded extent structure in *gotp, and the extent cursor in *cur.
907 * If there is no extent covering bno, but there is an extent after it (e.g.
908 * it lies in a hole) return that extent in *gotp and its cursor in *cur
909 * instead.
910 * If bno is beyond the last extent return false, and return an invalid
911 * cursor value.
912 */
913bool
914xfs_iext_lookup_extent(
915 struct xfs_inode *ip,
916 struct xfs_ifork *ifp,
917 xfs_fileoff_t offset,
918 struct xfs_iext_cursor *cur,
919 struct xfs_bmbt_irec *gotp)
920{
921 XFS_STATS_INC(ip->i_mount, xs_look_exlist);
922
923 cur->leaf = xfs_iext_find_level(ifp, offset, 1);
924 if (!cur->leaf) {
925 cur->pos = 0;
926 return false;
927 }
928
929 for (cur->pos = 0; cur->pos < xfs_iext_max_recs(ifp); cur->pos++) {
930 struct xfs_iext_rec *rec = cur_rec(cur);
931
932 if (xfs_iext_rec_is_empty(rec))
933 break;
934 if (xfs_iext_rec_cmp(rec, offset) >= 0)
935 goto found;
936 }
937
938 /* Try looking in the next node for an entry > offset */
939 if (ifp->if_height == 1 || !cur->leaf->next)
940 return false;
941 cur->leaf = cur->leaf->next;
942 cur->pos = 0;
943 if (!xfs_iext_valid(ifp, cur))
944 return false;
945found:
946 xfs_iext_get(gotp, cur_rec(cur));
947 return true;
948}
949
950/*
951 * Returns the last extent before end, and if this extent doesn't cover
952 * end, update end to the end of the extent.
953 */
954bool
955xfs_iext_lookup_extent_before(
956 struct xfs_inode *ip,
957 struct xfs_ifork *ifp,
958 xfs_fileoff_t *end,
959 struct xfs_iext_cursor *cur,
960 struct xfs_bmbt_irec *gotp)
961{
962 /* could be optimized to not even look up the next on a match.. */
963 if (xfs_iext_lookup_extent(ip, ifp, *end - 1, cur, gotp) &&
964 gotp->br_startoff <= *end - 1)
965 return true;
966 if (!xfs_iext_prev_extent(ifp, cur, gotp))
967 return false;
968 *end = gotp->br_startoff + gotp->br_blockcount;
969 return true;
970}
971
972void
973xfs_iext_update_extent(
974 struct xfs_inode *ip,
975 int state,
976 struct xfs_iext_cursor *cur,
977 struct xfs_bmbt_irec *new)
978{
979 struct xfs_ifork *ifp = xfs_iext_state_to_fork(ip, state);
980
981 if (cur->pos == 0) {
982 struct xfs_bmbt_irec old;
983
984 xfs_iext_get(&old, cur_rec(cur));
985 if (new->br_startoff != old.br_startoff) {
986 xfs_iext_update_node(ifp, old.br_startoff,
987 new->br_startoff, 1, cur->leaf);
988 }
989 }
990
991 trace_xfs_bmap_pre_update(ip, cur, state, _RET_IP_);
992 xfs_iext_set(cur_rec(cur), new);
993 trace_xfs_bmap_post_update(ip, cur, state, _RET_IP_);
994}
995
996/*
997 * Return true if the cursor points at an extent and return the extent structure
998 * in gotp. Else return false.
999 */
1000bool
1001xfs_iext_get_extent(
1002 struct xfs_ifork *ifp,
1003 struct xfs_iext_cursor *cur,
1004 struct xfs_bmbt_irec *gotp)
1005{
1006 if (!xfs_iext_valid(ifp, cur))
1007 return false;
1008 xfs_iext_get(gotp, cur_rec(cur));
1009 return true;
1010}
1011
1012/*
1013 * This is a recursive function, because of that we need to be extremely
1014 * careful with stack usage.
1015 */
1016static void
1017xfs_iext_destroy_node(
1018 struct xfs_iext_node *node,
1019 int level)
1020{
1021 int i;
1022
1023 if (level > 1) {
1024 for (i = 0; i < KEYS_PER_NODE; i++) {
1025 if (node->keys[i] == XFS_IEXT_KEY_INVALID)
1026 break;
1027 xfs_iext_destroy_node(node->ptrs[i], level - 1);
1028 }
1029 }
1030
1031 kmem_free(node);
1032}
1033
1034void
1035xfs_iext_destroy(
1036 struct xfs_ifork *ifp)
1037{
1038 xfs_iext_destroy_node(ifp->if_u1.if_root, ifp->if_height);
1039
1040 ifp->if_bytes = 0;
1041 ifp->if_height = 0;
1042 ifp->if_u1.if_root = NULL;
1043}
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 378f8fbc91a7..6b7989038d75 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -24,6 +24,7 @@
24#include "xfs_mount.h" 24#include "xfs_mount.h"
25#include "xfs_defer.h" 25#include "xfs_defer.h"
26#include "xfs_inode.h" 26#include "xfs_inode.h"
27#include "xfs_errortag.h"
27#include "xfs_error.h" 28#include "xfs_error.h"
28#include "xfs_cksum.h" 29#include "xfs_cksum.h"
29#include "xfs_icache.h" 30#include "xfs_icache.h"
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index 31840ca24018..1c90ec41e9df 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -42,21 +42,27 @@ STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int);
42STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int); 42STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int);
43STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int); 43STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int);
44 44
45static inline dev_t xfs_to_linux_dev_t(xfs_dev_t dev)
46{
47 return MKDEV(sysv_major(dev) & 0x1ff, sysv_minor(dev));
48}
49
45/* 50/*
46 * Move inode type and inode format specific information from the 51 * Copy inode type and data and attr format specific information from the
47 * on-disk inode to the in-core inode. For fifos, devs, and sockets 52 * on-disk inode to the in-core inode and fork structures. For fifos, devices,
48 * this means set if_rdev to the proper value. For files, directories, 53 * and sockets this means set i_rdev to the proper value. For files,
49 * and symlinks this means to bring in the in-line data or extent 54 * directories, and symlinks this means to bring in the in-line data or extent
50 * pointers. For a file in B-tree format, only the root is immediately 55 * pointers as well as the attribute fork. For a fork in B-tree format, only
51 * brought in-core. The rest will be in-lined in if_extents when it 56 * the root is immediately brought in-core. The rest will be read in later when
52 * is first referenced (see xfs_iread_extents()). 57 * first referenced (see xfs_iread_extents()).
53 */ 58 */
54int 59int
55xfs_iformat_fork( 60xfs_iformat_fork(
56 xfs_inode_t *ip, 61 struct xfs_inode *ip,
57 xfs_dinode_t *dip) 62 struct xfs_dinode *dip)
58{ 63{
59 xfs_attr_shortform_t *atp; 64 struct inode *inode = VFS_I(ip);
65 struct xfs_attr_shortform *atp;
60 int size; 66 int size;
61 int error = 0; 67 int error = 0;
62 xfs_fsize_t di_size; 68 xfs_fsize_t di_size;
@@ -95,8 +101,7 @@ xfs_iformat_fork(
95 return -EFSCORRUPTED; 101 return -EFSCORRUPTED;
96 } 102 }
97 103
98 if (unlikely(xfs_is_reflink_inode(ip) && 104 if (unlikely(xfs_is_reflink_inode(ip) && !S_ISREG(inode->i_mode))) {
99 (VFS_I(ip)->i_mode & S_IFMT) != S_IFREG)) {
100 xfs_warn(ip->i_mount, 105 xfs_warn(ip->i_mount,
101 "corrupt dinode %llu, wrong file type for reflink.", 106 "corrupt dinode %llu, wrong file type for reflink.",
102 ip->i_ino); 107 ip->i_ino);
@@ -115,7 +120,7 @@ xfs_iformat_fork(
115 return -EFSCORRUPTED; 120 return -EFSCORRUPTED;
116 } 121 }
117 122
118 switch (VFS_I(ip)->i_mode & S_IFMT) { 123 switch (inode->i_mode & S_IFMT) {
119 case S_IFIFO: 124 case S_IFIFO:
120 case S_IFCHR: 125 case S_IFCHR:
121 case S_IFBLK: 126 case S_IFBLK:
@@ -126,7 +131,7 @@ xfs_iformat_fork(
126 return -EFSCORRUPTED; 131 return -EFSCORRUPTED;
127 } 132 }
128 ip->i_d.di_size = 0; 133 ip->i_d.di_size = 0;
129 ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip); 134 inode->i_rdev = xfs_to_linux_dev_t(xfs_dinode_get_rdev(dip));
130 break; 135 break;
131 136
132 case S_IFREG: 137 case S_IFREG:
@@ -184,8 +189,7 @@ xfs_iformat_fork(
184 return error; 189 return error;
185 190
186 /* Check inline dir contents. */ 191 /* Check inline dir contents. */
187 if (S_ISDIR(VFS_I(ip)->i_mode) && 192 if (S_ISDIR(inode->i_mode) && dip->di_format == XFS_DINODE_FMT_LOCAL) {
188 dip->di_format == XFS_DINODE_FMT_LOCAL) {
189 error = xfs_dir2_sf_verify(ip); 193 error = xfs_dir2_sf_verify(ip);
190 if (error) { 194 if (error) {
191 xfs_idestroy_fork(ip, XFS_DATA_FORK); 195 xfs_idestroy_fork(ip, XFS_DATA_FORK);
@@ -265,19 +269,14 @@ xfs_init_local_fork(
265 if (zero_terminate) 269 if (zero_terminate)
266 mem_size++; 270 mem_size++;
267 271
268 if (size == 0) 272 if (size) {
269 ifp->if_u1.if_data = NULL;
270 else if (mem_size <= sizeof(ifp->if_u2.if_inline_data))
271 ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
272 else {
273 real_size = roundup(mem_size, 4); 273 real_size = roundup(mem_size, 4);
274 ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP | KM_NOFS); 274 ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP | KM_NOFS);
275 }
276
277 if (size) {
278 memcpy(ifp->if_u1.if_data, data, size); 275 memcpy(ifp->if_u1.if_data, data, size);
279 if (zero_terminate) 276 if (zero_terminate)
280 ifp->if_u1.if_data[size] = '\0'; 277 ifp->if_u1.if_data[size] = '\0';
278 } else {
279 ifp->if_u1.if_data = NULL;
281 } 280 }
282 281
283 ifp->if_bytes = size; 282 ifp->if_bytes = size;
@@ -288,13 +287,6 @@ xfs_init_local_fork(
288 287
289/* 288/*
290 * The file is in-lined in the on-disk inode. 289 * The file is in-lined in the on-disk inode.
291 * If it fits into if_inline_data, then copy
292 * it there, otherwise allocate a buffer for it
293 * and copy the data there. Either way, set
294 * if_data to point at the data.
295 * If we allocate a buffer for the data, make
296 * sure that its size is a multiple of 4 and
297 * record the real size in i_real_bytes.
298 */ 290 */
299STATIC int 291STATIC int
300xfs_iformat_local( 292xfs_iformat_local(
@@ -324,9 +316,7 @@ xfs_iformat_local(
324 316
325/* 317/*
326 * The file consists of a set of extents all of which fit into the on-disk 318 * The file consists of a set of extents all of which fit into the on-disk
327 * inode. If there are few enough extents to fit into the if_inline_ext, then 319 * inode.
328 * copy them there. Otherwise allocate a buffer for them and copy them into it.
329 * Either way, set if_extents to point at the extents.
330 */ 320 */
331STATIC int 321STATIC int
332xfs_iformat_extents( 322xfs_iformat_extents(
@@ -336,9 +326,12 @@ xfs_iformat_extents(
336{ 326{
337 struct xfs_mount *mp = ip->i_mount; 327 struct xfs_mount *mp = ip->i_mount;
338 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 328 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
329 int state = xfs_bmap_fork_to_state(whichfork);
339 int nex = XFS_DFORK_NEXTENTS(dip, whichfork); 330 int nex = XFS_DFORK_NEXTENTS(dip, whichfork);
340 int size = nex * sizeof(xfs_bmbt_rec_t); 331 int size = nex * sizeof(xfs_bmbt_rec_t);
332 struct xfs_iext_cursor icur;
341 struct xfs_bmbt_rec *dp; 333 struct xfs_bmbt_rec *dp;
334 struct xfs_bmbt_irec new;
342 int i; 335 int i;
343 336
344 /* 337 /*
@@ -354,27 +347,25 @@ xfs_iformat_extents(
354 } 347 }
355 348
356 ifp->if_real_bytes = 0; 349 ifp->if_real_bytes = 0;
357 if (nex == 0) 350 ifp->if_bytes = 0;
358 ifp->if_u1.if_extents = NULL; 351 ifp->if_u1.if_root = NULL;
359 else if (nex <= XFS_INLINE_EXTS) 352 ifp->if_height = 0;
360 ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
361 else
362 xfs_iext_add(ifp, 0, nex);
363
364 ifp->if_bytes = size;
365 if (size) { 353 if (size) {
366 dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork); 354 dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork);
355
356 xfs_iext_first(ifp, &icur);
367 for (i = 0; i < nex; i++, dp++) { 357 for (i = 0; i < nex; i++, dp++) {
368 xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); 358 xfs_bmbt_disk_get_all(dp, &new);
369 ep->l0 = get_unaligned_be64(&dp->l0); 359 if (!xfs_bmbt_validate_extent(mp, whichfork, &new)) {
370 ep->l1 = get_unaligned_be64(&dp->l1);
371 if (!xfs_bmbt_validate_extent(mp, whichfork, ep)) {
372 XFS_ERROR_REPORT("xfs_iformat_extents(2)", 360 XFS_ERROR_REPORT("xfs_iformat_extents(2)",
373 XFS_ERRLEVEL_LOW, mp); 361 XFS_ERRLEVEL_LOW, mp);
374 return -EFSCORRUPTED; 362 return -EFSCORRUPTED;
375 } 363 }
364
365 xfs_iext_insert(ip, &icur, &new, state);
366 trace_xfs_read_extent(ip, &icur, state, _THIS_IP_);
367 xfs_iext_next(ifp, &icur);
376 } 368 }
377 XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork);
378 } 369 }
379 ifp->if_flags |= XFS_IFEXTENTS; 370 ifp->if_flags |= XFS_IFEXTENTS;
380 return 0; 371 return 0;
@@ -440,47 +431,14 @@ xfs_iformat_btree(
440 ifp->if_flags &= ~XFS_IFEXTENTS; 431 ifp->if_flags &= ~XFS_IFEXTENTS;
441 ifp->if_flags |= XFS_IFBROOT; 432 ifp->if_flags |= XFS_IFBROOT;
442 433
434 ifp->if_real_bytes = 0;
435 ifp->if_bytes = 0;
436 ifp->if_u1.if_root = NULL;
437 ifp->if_height = 0;
443 return 0; 438 return 0;
444} 439}
445 440
446/* 441/*
447 * Read in extents from a btree-format inode.
448 * Allocate and fill in if_extents. Real work is done in xfs_bmap.c.
449 */
450int
451xfs_iread_extents(
452 xfs_trans_t *tp,
453 xfs_inode_t *ip,
454 int whichfork)
455{
456 int error;
457 xfs_ifork_t *ifp;
458 xfs_extnum_t nextents;
459
460 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
461
462 if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
463 XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW,
464 ip->i_mount);
465 return -EFSCORRUPTED;
466 }
467 nextents = XFS_IFORK_NEXTENTS(ip, whichfork);
468 ifp = XFS_IFORK_PTR(ip, whichfork);
469
470 /*
471 * We know that the size is valid (it's checked in iformat_btree)
472 */
473 ifp->if_bytes = ifp->if_real_bytes = 0;
474 xfs_iext_add(ifp, 0, nextents);
475 error = xfs_bmap_read_extents(tp, ip, whichfork);
476 if (error) {
477 xfs_iext_destroy(ifp);
478 return error;
479 }
480 ifp->if_flags |= XFS_IFEXTENTS;
481 return 0;
482}
483/*
484 * Reallocate the space for if_broot based on the number of records 442 * Reallocate the space for if_broot based on the number of records
485 * being added or deleted as indicated in rec_diff. Move the records 443 * being added or deleted as indicated in rec_diff. Move the records
486 * and pointers in if_broot to fit the new size. When shrinking this 444 * and pointers in if_broot to fit the new size. When shrinking this
@@ -644,26 +602,9 @@ xfs_idata_realloc(
644 ASSERT(new_size >= 0); 602 ASSERT(new_size >= 0);
645 603
646 if (new_size == 0) { 604 if (new_size == 0) {
647 if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { 605 kmem_free(ifp->if_u1.if_data);
648 kmem_free(ifp->if_u1.if_data);
649 }
650 ifp->if_u1.if_data = NULL; 606 ifp->if_u1.if_data = NULL;
651 real_size = 0; 607 real_size = 0;
652 } else if (new_size <= sizeof(ifp->if_u2.if_inline_data)) {
653 /*
654 * If the valid extents/data can fit in if_inline_ext/data,
655 * copy them from the malloc'd vector and free it.
656 */
657 if (ifp->if_u1.if_data == NULL) {
658 ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
659 } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
660 ASSERT(ifp->if_real_bytes != 0);
661 memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data,
662 new_size);
663 kmem_free(ifp->if_u1.if_data);
664 ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
665 }
666 real_size = 0;
667 } else { 608 } else {
668 /* 609 /*
669 * Stuck with malloc/realloc. 610 * Stuck with malloc/realloc.
@@ -677,7 +618,7 @@ xfs_idata_realloc(
677 ASSERT(ifp->if_real_bytes == 0); 618 ASSERT(ifp->if_real_bytes == 0);
678 ifp->if_u1.if_data = kmem_alloc(real_size, 619 ifp->if_u1.if_data = kmem_alloc(real_size,
679 KM_SLEEP | KM_NOFS); 620 KM_SLEEP | KM_NOFS);
680 } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { 621 } else {
681 /* 622 /*
682 * Only do the realloc if the underlying size 623 * Only do the realloc if the underlying size
683 * is really changing. 624 * is really changing.
@@ -688,12 +629,6 @@ xfs_idata_realloc(
688 real_size, 629 real_size,
689 KM_SLEEP | KM_NOFS); 630 KM_SLEEP | KM_NOFS);
690 } 631 }
691 } else {
692 ASSERT(ifp->if_real_bytes == 0);
693 ifp->if_u1.if_data = kmem_alloc(real_size,
694 KM_SLEEP | KM_NOFS);
695 memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data,
696 ifp->if_bytes);
697 } 632 }
698 } 633 }
699 ifp->if_real_bytes = real_size; 634 ifp->if_real_bytes = real_size;
@@ -721,23 +656,18 @@ xfs_idestroy_fork(
721 * so check and free it up if we do. 656 * so check and free it up if we do.
722 */ 657 */
723 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { 658 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
724 if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) && 659 if (ifp->if_u1.if_data != NULL) {
725 (ifp->if_u1.if_data != NULL)) {
726 ASSERT(ifp->if_real_bytes != 0); 660 ASSERT(ifp->if_real_bytes != 0);
727 kmem_free(ifp->if_u1.if_data); 661 kmem_free(ifp->if_u1.if_data);
728 ifp->if_u1.if_data = NULL; 662 ifp->if_u1.if_data = NULL;
729 ifp->if_real_bytes = 0; 663 ifp->if_real_bytes = 0;
730 } 664 }
731 } else if ((ifp->if_flags & XFS_IFEXTENTS) && 665 } else if ((ifp->if_flags & XFS_IFEXTENTS) && ifp->if_height) {
732 ((ifp->if_flags & XFS_IFEXTIREC) ||
733 ((ifp->if_u1.if_extents != NULL) &&
734 (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)))) {
735 ASSERT(ifp->if_real_bytes != 0);
736 xfs_iext_destroy(ifp); 666 xfs_iext_destroy(ifp);
737 } 667 }
738 ASSERT(ifp->if_u1.if_extents == NULL || 668
739 ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext);
740 ASSERT(ifp->if_real_bytes == 0); 669 ASSERT(ifp->if_real_bytes == 0);
670
741 if (whichfork == XFS_ATTR_FORK) { 671 if (whichfork == XFS_ATTR_FORK) {
742 kmem_zone_free(xfs_ifork_zone, ip->i_afp); 672 kmem_zone_free(xfs_ifork_zone, ip->i_afp);
743 ip->i_afp = NULL; 673 ip->i_afp = NULL;
@@ -747,19 +677,9 @@ xfs_idestroy_fork(
747 } 677 }
748} 678}
749 679
750/* Count number of incore extents based on if_bytes */
751xfs_extnum_t
752xfs_iext_count(struct xfs_ifork *ifp)
753{
754 return ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
755}
756
757/* 680/*
758 * Convert in-core extents to on-disk form 681 * Convert in-core extents to on-disk form
759 * 682 *
760 * For either the data or attr fork in extent format, we need to endian convert
761 * the in-core extent as we place them into the on-disk inode.
762 *
763 * In the case of the data fork, the in-core and on-disk fork sizes can be 683 * In the case of the data fork, the in-core and on-disk fork sizes can be
764 * different due to delayed allocation extents. We only copy on-disk extents 684 * different due to delayed allocation extents. We only copy on-disk extents
765 * here, so callers must always use the physical fork size to determine the 685 * here, so callers must always use the physical fork size to determine the
@@ -768,53 +688,32 @@ xfs_iext_count(struct xfs_ifork *ifp)
768 */ 688 */
769int 689int
770xfs_iextents_copy( 690xfs_iextents_copy(
771 xfs_inode_t *ip, 691 struct xfs_inode *ip,
772 xfs_bmbt_rec_t *dp, 692 struct xfs_bmbt_rec *dp,
773 int whichfork) 693 int whichfork)
774{ 694{
775 int copied; 695 int state = xfs_bmap_fork_to_state(whichfork);
776 int i; 696 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
777 xfs_ifork_t *ifp; 697 struct xfs_iext_cursor icur;
778 int nrecs; 698 struct xfs_bmbt_irec rec;
779 xfs_fsblock_t start_block; 699 int copied = 0;
780 700
781 ifp = XFS_IFORK_PTR(ip, whichfork); 701 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED));
782 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
783 ASSERT(ifp->if_bytes > 0); 702 ASSERT(ifp->if_bytes > 0);
784 703
785 nrecs = xfs_iext_count(ifp); 704 for_each_xfs_iext(ifp, &icur, &rec) {
786 XFS_BMAP_TRACE_EXLIST(ip, nrecs, whichfork); 705 if (isnullstartblock(rec.br_startblock))
787 ASSERT(nrecs > 0);
788
789 /*
790 * There are some delayed allocation extents in the
791 * inode, so copy the extents one at a time and skip
792 * the delayed ones. There must be at least one
793 * non-delayed extent.
794 */
795 copied = 0;
796 for (i = 0; i < nrecs; i++) {
797 xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
798
799 ASSERT(xfs_bmbt_validate_extent(ip->i_mount, whichfork, ep));
800
801 start_block = xfs_bmbt_get_startblock(ep);
802 if (isnullstartblock(start_block)) {
803 /*
804 * It's a delayed allocation extent, so skip it.
805 */
806 continue; 706 continue;
807 } 707 ASSERT(xfs_bmbt_validate_extent(ip->i_mount, whichfork, &rec));
808 708 xfs_bmbt_disk_set_all(dp, &rec);
809 /* Translate to on disk format */ 709 trace_xfs_write_extent(ip, &icur, state, _RET_IP_);
810 put_unaligned_be64(ep->l0, &dp->l0); 710 copied += sizeof(struct xfs_bmbt_rec);
811 put_unaligned_be64(ep->l1, &dp->l1);
812 dp++; 711 dp++;
813 copied++;
814 } 712 }
815 ASSERT(copied != 0);
816 713
817 return (copied * (uint)sizeof(xfs_bmbt_rec_t)); 714 ASSERT(copied > 0);
715 ASSERT(copied <= ifp->if_bytes);
716 return copied;
818} 717}
819 718
820/* 719/*
@@ -872,7 +771,6 @@ xfs_iflush_fork(
872 !(iip->ili_fields & extflag[whichfork])); 771 !(iip->ili_fields & extflag[whichfork]));
873 if ((iip->ili_fields & extflag[whichfork]) && 772 if ((iip->ili_fields & extflag[whichfork]) &&
874 (ifp->if_bytes > 0)) { 773 (ifp->if_bytes > 0)) {
875 ASSERT(xfs_iext_get_ext(ifp, 0));
876 ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0); 774 ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0);
877 (void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp, 775 (void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp,
878 whichfork); 776 whichfork);
@@ -894,16 +792,7 @@ xfs_iflush_fork(
894 case XFS_DINODE_FMT_DEV: 792 case XFS_DINODE_FMT_DEV:
895 if (iip->ili_fields & XFS_ILOG_DEV) { 793 if (iip->ili_fields & XFS_ILOG_DEV) {
896 ASSERT(whichfork == XFS_DATA_FORK); 794 ASSERT(whichfork == XFS_DATA_FORK);
897 xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev); 795 xfs_dinode_put_rdev(dip, sysv_encode_dev(VFS_I(ip)->i_rdev));
898 }
899 break;
900
901 case XFS_DINODE_FMT_UUID:
902 if (iip->ili_fields & XFS_ILOG_UUID) {
903 ASSERT(whichfork == XFS_DATA_FORK);
904 memcpy(XFS_DFORK_DPTR(dip),
905 &ip->i_df.if_u2.if_uuid,
906 sizeof(uuid_t));
907 } 796 }
908 break; 797 break;
909 798
@@ -913,33 +802,6 @@ xfs_iflush_fork(
913 } 802 }
914} 803}
915 804
916/*
917 * Return a pointer to the extent record at file index idx.
918 */
919xfs_bmbt_rec_host_t *
920xfs_iext_get_ext(
921 xfs_ifork_t *ifp, /* inode fork pointer */
922 xfs_extnum_t idx) /* index of target extent */
923{
924 ASSERT(idx >= 0);
925 ASSERT(idx < xfs_iext_count(ifp));
926
927 if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) {
928 return ifp->if_u1.if_ext_irec->er_extbuf;
929 } else if (ifp->if_flags & XFS_IFEXTIREC) {
930 xfs_ext_irec_t *erp; /* irec pointer */
931 int erp_idx = 0; /* irec index */
932 xfs_extnum_t page_idx = idx; /* ext index in target list */
933
934 erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0);
935 return &erp->er_extbuf[page_idx];
936 } else if (ifp->if_bytes) {
937 return &ifp->if_u1.if_extents[idx];
938 } else {
939 return NULL;
940 }
941}
942
943/* Convert bmap state flags to an inode fork. */ 805/* Convert bmap state flags to an inode fork. */
944struct xfs_ifork * 806struct xfs_ifork *
945xfs_iext_state_to_fork( 807xfs_iext_state_to_fork(
@@ -954,1011 +816,6 @@ xfs_iext_state_to_fork(
954} 816}
955 817
956/* 818/*
957 * Insert new item(s) into the extent records for incore inode
958 * fork 'ifp'. 'count' new items are inserted at index 'idx'.
959 */
960void
961xfs_iext_insert(
962 xfs_inode_t *ip, /* incore inode pointer */
963 xfs_extnum_t idx, /* starting index of new items */
964 xfs_extnum_t count, /* number of inserted items */
965 xfs_bmbt_irec_t *new, /* items to insert */
966 int state) /* type of extent conversion */
967{
968 xfs_ifork_t *ifp = xfs_iext_state_to_fork(ip, state);
969 xfs_extnum_t i; /* extent record index */
970
971 trace_xfs_iext_insert(ip, idx, new, state, _RET_IP_);
972
973 ASSERT(ifp->if_flags & XFS_IFEXTENTS);
974 xfs_iext_add(ifp, idx, count);
975 for (i = idx; i < idx + count; i++, new++)
976 xfs_bmbt_set_all(xfs_iext_get_ext(ifp, i), new);
977}
978
979/*
980 * This is called when the amount of space required for incore file
981 * extents needs to be increased. The ext_diff parameter stores the
982 * number of new extents being added and the idx parameter contains
983 * the extent index where the new extents will be added. If the new
984 * extents are being appended, then we just need to (re)allocate and
985 * initialize the space. Otherwise, if the new extents are being
986 * inserted into the middle of the existing entries, a bit more work
987 * is required to make room for the new extents to be inserted. The
988 * caller is responsible for filling in the new extent entries upon
989 * return.
990 */
991void
992xfs_iext_add(
993 xfs_ifork_t *ifp, /* inode fork pointer */
994 xfs_extnum_t idx, /* index to begin adding exts */
995 int ext_diff) /* number of extents to add */
996{
997 int byte_diff; /* new bytes being added */
998 int new_size; /* size of extents after adding */
999 xfs_extnum_t nextents; /* number of extents in file */
1000
1001 nextents = xfs_iext_count(ifp);
1002 ASSERT((idx >= 0) && (idx <= nextents));
1003 byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t);
1004 new_size = ifp->if_bytes + byte_diff;
1005 /*
1006 * If the new number of extents (nextents + ext_diff)
1007 * fits inside the inode, then continue to use the inline
1008 * extent buffer.
1009 */
1010 if (nextents + ext_diff <= XFS_INLINE_EXTS) {
1011 if (idx < nextents) {
1012 memmove(&ifp->if_u2.if_inline_ext[idx + ext_diff],
1013 &ifp->if_u2.if_inline_ext[idx],
1014 (nextents - idx) * sizeof(xfs_bmbt_rec_t));
1015 memset(&ifp->if_u2.if_inline_ext[idx], 0, byte_diff);
1016 }
1017 ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
1018 ifp->if_real_bytes = 0;
1019 }
1020 /*
1021 * Otherwise use a linear (direct) extent list.
1022 * If the extents are currently inside the inode,
1023 * xfs_iext_realloc_direct will switch us from
1024 * inline to direct extent allocation mode.
1025 */
1026 else if (nextents + ext_diff <= XFS_LINEAR_EXTS) {
1027 xfs_iext_realloc_direct(ifp, new_size);
1028 if (idx < nextents) {
1029 memmove(&ifp->if_u1.if_extents[idx + ext_diff],
1030 &ifp->if_u1.if_extents[idx],
1031 (nextents - idx) * sizeof(xfs_bmbt_rec_t));
1032 memset(&ifp->if_u1.if_extents[idx], 0, byte_diff);
1033 }
1034 }
1035 /* Indirection array */
1036 else {
1037 xfs_ext_irec_t *erp;
1038 int erp_idx = 0;
1039 int page_idx = idx;
1040
1041 ASSERT(nextents + ext_diff > XFS_LINEAR_EXTS);
1042 if (ifp->if_flags & XFS_IFEXTIREC) {
1043 erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 1);
1044 } else {
1045 xfs_iext_irec_init(ifp);
1046 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1047 erp = ifp->if_u1.if_ext_irec;
1048 }
1049 /* Extents fit in target extent page */
1050 if (erp && erp->er_extcount + ext_diff <= XFS_LINEAR_EXTS) {
1051 if (page_idx < erp->er_extcount) {
1052 memmove(&erp->er_extbuf[page_idx + ext_diff],
1053 &erp->er_extbuf[page_idx],
1054 (erp->er_extcount - page_idx) *
1055 sizeof(xfs_bmbt_rec_t));
1056 memset(&erp->er_extbuf[page_idx], 0, byte_diff);
1057 }
1058 erp->er_extcount += ext_diff;
1059 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
1060 }
1061 /* Insert a new extent page */
1062 else if (erp) {
1063 xfs_iext_add_indirect_multi(ifp,
1064 erp_idx, page_idx, ext_diff);
1065 }
1066 /*
1067 * If extent(s) are being appended to the last page in
1068 * the indirection array and the new extent(s) don't fit
1069 * in the page, then erp is NULL and erp_idx is set to
1070 * the next index needed in the indirection array.
1071 */
1072 else {
1073 uint count = ext_diff;
1074
1075 while (count) {
1076 erp = xfs_iext_irec_new(ifp, erp_idx);
1077 erp->er_extcount = min(count, XFS_LINEAR_EXTS);
1078 count -= erp->er_extcount;
1079 if (count)
1080 erp_idx++;
1081 }
1082 }
1083 }
1084 ifp->if_bytes = new_size;
1085}
1086
1087/*
1088 * This is called when incore extents are being added to the indirection
1089 * array and the new extents do not fit in the target extent list. The
1090 * erp_idx parameter contains the irec index for the target extent list
1091 * in the indirection array, and the idx parameter contains the extent
1092 * index within the list. The number of extents being added is stored
1093 * in the count parameter.
1094 *
1095 * |-------| |-------|
1096 * | | | | idx - number of extents before idx
1097 * | idx | | count |
1098 * | | | | count - number of extents being inserted at idx
1099 * |-------| |-------|
1100 * | count | | nex2 | nex2 - number of extents after idx + count
1101 * |-------| |-------|
1102 */
1103void
1104xfs_iext_add_indirect_multi(
1105 xfs_ifork_t *ifp, /* inode fork pointer */
1106 int erp_idx, /* target extent irec index */
1107 xfs_extnum_t idx, /* index within target list */
1108 int count) /* new extents being added */
1109{
1110 int byte_diff; /* new bytes being added */
1111 xfs_ext_irec_t *erp; /* pointer to irec entry */
1112 xfs_extnum_t ext_diff; /* number of extents to add */
1113 xfs_extnum_t ext_cnt; /* new extents still needed */
1114 xfs_extnum_t nex2; /* extents after idx + count */
1115 xfs_bmbt_rec_t *nex2_ep = NULL; /* temp list for nex2 extents */
1116 int nlists; /* number of irec's (lists) */
1117
1118 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1119 erp = &ifp->if_u1.if_ext_irec[erp_idx];
1120 nex2 = erp->er_extcount - idx;
1121 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1122
1123 /*
1124 * Save second part of target extent list
1125 * (all extents past */
1126 if (nex2) {
1127 byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
1128 nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_NOFS);
1129 memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff);
1130 erp->er_extcount -= nex2;
1131 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2);
1132 memset(&erp->er_extbuf[idx], 0, byte_diff);
1133 }
1134
1135 /*
1136 * Add the new extents to the end of the target
1137 * list, then allocate new irec record(s) and
1138 * extent buffer(s) as needed to store the rest
1139 * of the new extents.
1140 */
1141 ext_cnt = count;
1142 ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS - erp->er_extcount);
1143 if (ext_diff) {
1144 erp->er_extcount += ext_diff;
1145 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
1146 ext_cnt -= ext_diff;
1147 }
1148 while (ext_cnt) {
1149 erp_idx++;
1150 erp = xfs_iext_irec_new(ifp, erp_idx);
1151 ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS);
1152 erp->er_extcount = ext_diff;
1153 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
1154 ext_cnt -= ext_diff;
1155 }
1156
1157 /* Add nex2 extents back to indirection array */
1158 if (nex2) {
1159 xfs_extnum_t ext_avail;
1160 int i;
1161
1162 byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
1163 ext_avail = XFS_LINEAR_EXTS - erp->er_extcount;
1164 i = 0;
1165 /*
1166 * If nex2 extents fit in the current page, append
1167 * nex2_ep after the new extents.
1168 */
1169 if (nex2 <= ext_avail) {
1170 i = erp->er_extcount;
1171 }
1172 /*
1173 * Otherwise, check if space is available in the
1174 * next page.
1175 */
1176 else if ((erp_idx < nlists - 1) &&
1177 (nex2 <= (ext_avail = XFS_LINEAR_EXTS -
1178 ifp->if_u1.if_ext_irec[erp_idx+1].er_extcount))) {
1179 erp_idx++;
1180 erp++;
1181 /* Create a hole for nex2 extents */
1182 memmove(&erp->er_extbuf[nex2], erp->er_extbuf,
1183 erp->er_extcount * sizeof(xfs_bmbt_rec_t));
1184 }
1185 /*
1186 * Final choice, create a new extent page for
1187 * nex2 extents.
1188 */
1189 else {
1190 erp_idx++;
1191 erp = xfs_iext_irec_new(ifp, erp_idx);
1192 }
1193 memmove(&erp->er_extbuf[i], nex2_ep, byte_diff);
1194 kmem_free(nex2_ep);
1195 erp->er_extcount += nex2;
1196 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2);
1197 }
1198}
1199
1200/*
1201 * This is called when the amount of space required for incore file
1202 * extents needs to be decreased. The ext_diff parameter stores the
1203 * number of extents to be removed and the idx parameter contains
1204 * the extent index where the extents will be removed from.
1205 *
1206 * If the amount of space needed has decreased below the linear
1207 * limit, XFS_IEXT_BUFSZ, then switch to using the contiguous
1208 * extent array. Otherwise, use kmem_realloc() to adjust the
1209 * size to what is needed.
1210 */
1211void
1212xfs_iext_remove(
1213 xfs_inode_t *ip, /* incore inode pointer */
1214 xfs_extnum_t idx, /* index to begin removing exts */
1215 int ext_diff, /* number of extents to remove */
1216 int state) /* type of extent conversion */
1217{
1218 xfs_ifork_t *ifp = xfs_iext_state_to_fork(ip, state);
1219 xfs_extnum_t nextents; /* number of extents in file */
1220 int new_size; /* size of extents after removal */
1221
1222 trace_xfs_iext_remove(ip, idx, state, _RET_IP_);
1223
1224 ASSERT(ext_diff > 0);
1225 nextents = xfs_iext_count(ifp);
1226 new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t);
1227
1228 if (new_size == 0) {
1229 xfs_iext_destroy(ifp);
1230 } else if (ifp->if_flags & XFS_IFEXTIREC) {
1231 xfs_iext_remove_indirect(ifp, idx, ext_diff);
1232 } else if (ifp->if_real_bytes) {
1233 xfs_iext_remove_direct(ifp, idx, ext_diff);
1234 } else {
1235 xfs_iext_remove_inline(ifp, idx, ext_diff);
1236 }
1237 ifp->if_bytes = new_size;
1238}
1239
1240/*
1241 * This removes ext_diff extents from the inline buffer, beginning
1242 * at extent index idx.
1243 */
1244void
1245xfs_iext_remove_inline(
1246 xfs_ifork_t *ifp, /* inode fork pointer */
1247 xfs_extnum_t idx, /* index to begin removing exts */
1248 int ext_diff) /* number of extents to remove */
1249{
1250 int nextents; /* number of extents in file */
1251
1252 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
1253 ASSERT(idx < XFS_INLINE_EXTS);
1254 nextents = xfs_iext_count(ifp);
1255 ASSERT(((nextents - ext_diff) > 0) &&
1256 (nextents - ext_diff) < XFS_INLINE_EXTS);
1257
1258 if (idx + ext_diff < nextents) {
1259 memmove(&ifp->if_u2.if_inline_ext[idx],
1260 &ifp->if_u2.if_inline_ext[idx + ext_diff],
1261 (nextents - (idx + ext_diff)) *
1262 sizeof(xfs_bmbt_rec_t));
1263 memset(&ifp->if_u2.if_inline_ext[nextents - ext_diff],
1264 0, ext_diff * sizeof(xfs_bmbt_rec_t));
1265 } else {
1266 memset(&ifp->if_u2.if_inline_ext[idx], 0,
1267 ext_diff * sizeof(xfs_bmbt_rec_t));
1268 }
1269}
1270
1271/*
1272 * This removes ext_diff extents from a linear (direct) extent list,
1273 * beginning at extent index idx. If the extents are being removed
1274 * from the end of the list (ie. truncate) then we just need to re-
1275 * allocate the list to remove the extra space. Otherwise, if the
1276 * extents are being removed from the middle of the existing extent
1277 * entries, then we first need to move the extent records beginning
1278 * at idx + ext_diff up in the list to overwrite the records being
1279 * removed, then remove the extra space via kmem_realloc.
1280 */
1281void
1282xfs_iext_remove_direct(
1283 xfs_ifork_t *ifp, /* inode fork pointer */
1284 xfs_extnum_t idx, /* index to begin removing exts */
1285 int ext_diff) /* number of extents to remove */
1286{
1287 xfs_extnum_t nextents; /* number of extents in file */
1288 int new_size; /* size of extents after removal */
1289
1290 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
1291 new_size = ifp->if_bytes -
1292 (ext_diff * sizeof(xfs_bmbt_rec_t));
1293 nextents = xfs_iext_count(ifp);
1294
1295 if (new_size == 0) {
1296 xfs_iext_destroy(ifp);
1297 return;
1298 }
1299 /* Move extents up in the list (if needed) */
1300 if (idx + ext_diff < nextents) {
1301 memmove(&ifp->if_u1.if_extents[idx],
1302 &ifp->if_u1.if_extents[idx + ext_diff],
1303 (nextents - (idx + ext_diff)) *
1304 sizeof(xfs_bmbt_rec_t));
1305 }
1306 memset(&ifp->if_u1.if_extents[nextents - ext_diff],
1307 0, ext_diff * sizeof(xfs_bmbt_rec_t));
1308 /*
1309 * Reallocate the direct extent list. If the extents
1310 * will fit inside the inode then xfs_iext_realloc_direct
1311 * will switch from direct to inline extent allocation
1312 * mode for us.
1313 */
1314 xfs_iext_realloc_direct(ifp, new_size);
1315 ifp->if_bytes = new_size;
1316}
1317
1318/*
1319 * This is called when incore extents are being removed from the
1320 * indirection array and the extents being removed span multiple extent
1321 * buffers. The idx parameter contains the file extent index where we
1322 * want to begin removing extents, and the count parameter contains
1323 * how many extents need to be removed.
1324 *
1325 * |-------| |-------|
1326 * | nex1 | | | nex1 - number of extents before idx
1327 * |-------| | count |
1328 * | | | | count - number of extents being removed at idx
1329 * | count | |-------|
1330 * | | | nex2 | nex2 - number of extents after idx + count
1331 * |-------| |-------|
1332 */
1333void
1334xfs_iext_remove_indirect(
1335 xfs_ifork_t *ifp, /* inode fork pointer */
1336 xfs_extnum_t idx, /* index to begin removing extents */
1337 int count) /* number of extents to remove */
1338{
1339 xfs_ext_irec_t *erp; /* indirection array pointer */
1340 int erp_idx = 0; /* indirection array index */
1341 xfs_extnum_t ext_cnt; /* extents left to remove */
1342 xfs_extnum_t ext_diff; /* extents to remove in current list */
1343 xfs_extnum_t nex1; /* number of extents before idx */
1344 xfs_extnum_t nex2; /* extents after idx + count */
1345 int page_idx = idx; /* index in target extent list */
1346
1347 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1348 erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0);
1349 ASSERT(erp != NULL);
1350 nex1 = page_idx;
1351 ext_cnt = count;
1352 while (ext_cnt) {
1353 nex2 = MAX((erp->er_extcount - (nex1 + ext_cnt)), 0);
1354 ext_diff = MIN(ext_cnt, (erp->er_extcount - nex1));
1355 /*
1356 * Check for deletion of entire list;
1357 * xfs_iext_irec_remove() updates extent offsets.
1358 */
1359 if (ext_diff == erp->er_extcount) {
1360 xfs_iext_irec_remove(ifp, erp_idx);
1361 ext_cnt -= ext_diff;
1362 nex1 = 0;
1363 if (ext_cnt) {
1364 ASSERT(erp_idx < ifp->if_real_bytes /
1365 XFS_IEXT_BUFSZ);
1366 erp = &ifp->if_u1.if_ext_irec[erp_idx];
1367 nex1 = 0;
1368 continue;
1369 } else {
1370 break;
1371 }
1372 }
1373 /* Move extents up (if needed) */
1374 if (nex2) {
1375 memmove(&erp->er_extbuf[nex1],
1376 &erp->er_extbuf[nex1 + ext_diff],
1377 nex2 * sizeof(xfs_bmbt_rec_t));
1378 }
1379 /* Zero out rest of page */
1380 memset(&erp->er_extbuf[nex1 + nex2], 0, (XFS_IEXT_BUFSZ -
1381 ((nex1 + nex2) * sizeof(xfs_bmbt_rec_t))));
1382 /* Update remaining counters */
1383 erp->er_extcount -= ext_diff;
1384 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -ext_diff);
1385 ext_cnt -= ext_diff;
1386 nex1 = 0;
1387 erp_idx++;
1388 erp++;
1389 }
1390 ifp->if_bytes -= count * sizeof(xfs_bmbt_rec_t);
1391 xfs_iext_irec_compact(ifp);
1392}
1393
1394/*
1395 * Create, destroy, or resize a linear (direct) block of extents.
1396 */
1397void
1398xfs_iext_realloc_direct(
1399 xfs_ifork_t *ifp, /* inode fork pointer */
1400 int new_size) /* new size of extents after adding */
1401{
1402 int rnew_size; /* real new size of extents */
1403
1404 rnew_size = new_size;
1405
1406 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC) ||
1407 ((new_size >= 0) && (new_size <= XFS_IEXT_BUFSZ) &&
1408 (new_size != ifp->if_real_bytes)));
1409
1410 /* Free extent records */
1411 if (new_size == 0) {
1412 xfs_iext_destroy(ifp);
1413 }
1414 /* Resize direct extent list and zero any new bytes */
1415 else if (ifp->if_real_bytes) {
1416 /* Check if extents will fit inside the inode */
1417 if (new_size <= XFS_INLINE_EXTS * sizeof(xfs_bmbt_rec_t)) {
1418 xfs_iext_direct_to_inline(ifp, new_size /
1419 (uint)sizeof(xfs_bmbt_rec_t));
1420 ifp->if_bytes = new_size;
1421 return;
1422 }
1423 if (!is_power_of_2(new_size)){
1424 rnew_size = roundup_pow_of_two(new_size);
1425 }
1426 if (rnew_size != ifp->if_real_bytes) {
1427 ifp->if_u1.if_extents =
1428 kmem_realloc(ifp->if_u1.if_extents,
1429 rnew_size, KM_NOFS);
1430 }
1431 if (rnew_size > ifp->if_real_bytes) {
1432 memset(&ifp->if_u1.if_extents[ifp->if_bytes /
1433 (uint)sizeof(xfs_bmbt_rec_t)], 0,
1434 rnew_size - ifp->if_real_bytes);
1435 }
1436 }
1437 /* Switch from the inline extent buffer to a direct extent list */
1438 else {
1439 if (!is_power_of_2(new_size)) {
1440 rnew_size = roundup_pow_of_two(new_size);
1441 }
1442 xfs_iext_inline_to_direct(ifp, rnew_size);
1443 }
1444 ifp->if_real_bytes = rnew_size;
1445 ifp->if_bytes = new_size;
1446}
1447
1448/*
1449 * Switch from linear (direct) extent records to inline buffer.
1450 */
1451void
1452xfs_iext_direct_to_inline(
1453 xfs_ifork_t *ifp, /* inode fork pointer */
1454 xfs_extnum_t nextents) /* number of extents in file */
1455{
1456 ASSERT(ifp->if_flags & XFS_IFEXTENTS);
1457 ASSERT(nextents <= XFS_INLINE_EXTS);
1458 /*
1459 * The inline buffer was zeroed when we switched
1460 * from inline to direct extent allocation mode,
1461 * so we don't need to clear it here.
1462 */
1463 memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents,
1464 nextents * sizeof(xfs_bmbt_rec_t));
1465 kmem_free(ifp->if_u1.if_extents);
1466 ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
1467 ifp->if_real_bytes = 0;
1468}
1469
1470/*
1471 * Switch from inline buffer to linear (direct) extent records.
1472 * new_size should already be rounded up to the next power of 2
1473 * by the caller (when appropriate), so use new_size as it is.
1474 * However, since new_size may be rounded up, we can't update
1475 * if_bytes here. It is the caller's responsibility to update
1476 * if_bytes upon return.
1477 */
1478void
1479xfs_iext_inline_to_direct(
1480 xfs_ifork_t *ifp, /* inode fork pointer */
1481 int new_size) /* number of extents in file */
1482{
1483 ifp->if_u1.if_extents = kmem_alloc(new_size, KM_NOFS);
1484 memset(ifp->if_u1.if_extents, 0, new_size);
1485 if (ifp->if_bytes) {
1486 memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext,
1487 ifp->if_bytes);
1488 memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
1489 sizeof(xfs_bmbt_rec_t));
1490 }
1491 ifp->if_real_bytes = new_size;
1492}
1493
1494/*
1495 * Resize an extent indirection array to new_size bytes.
1496 */
1497STATIC void
1498xfs_iext_realloc_indirect(
1499 xfs_ifork_t *ifp, /* inode fork pointer */
1500 int new_size) /* new indirection array size */
1501{
1502 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1503 ASSERT(ifp->if_real_bytes);
1504 ASSERT((new_size >= 0) &&
1505 (new_size != ((ifp->if_real_bytes / XFS_IEXT_BUFSZ) *
1506 sizeof(xfs_ext_irec_t))));
1507 if (new_size == 0) {
1508 xfs_iext_destroy(ifp);
1509 } else {
1510 ifp->if_u1.if_ext_irec =
1511 kmem_realloc(ifp->if_u1.if_ext_irec, new_size, KM_NOFS);
1512 }
1513}
1514
1515/*
1516 * Switch from indirection array to linear (direct) extent allocations.
1517 */
1518STATIC void
1519xfs_iext_indirect_to_direct(
1520 xfs_ifork_t *ifp) /* inode fork pointer */
1521{
1522 xfs_bmbt_rec_host_t *ep; /* extent record pointer */
1523 xfs_extnum_t nextents; /* number of extents in file */
1524 int size; /* size of file extents */
1525
1526 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1527 nextents = xfs_iext_count(ifp);
1528 ASSERT(nextents <= XFS_LINEAR_EXTS);
1529 size = nextents * sizeof(xfs_bmbt_rec_t);
1530
1531 xfs_iext_irec_compact_pages(ifp);
1532 ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ);
1533
1534 ep = ifp->if_u1.if_ext_irec->er_extbuf;
1535 kmem_free(ifp->if_u1.if_ext_irec);
1536 ifp->if_flags &= ~XFS_IFEXTIREC;
1537 ifp->if_u1.if_extents = ep;
1538 ifp->if_bytes = size;
1539 if (nextents < XFS_LINEAR_EXTS) {
1540 xfs_iext_realloc_direct(ifp, size);
1541 }
1542}
1543
1544/*
1545 * Remove all records from the indirection array.
1546 */
1547STATIC void
1548xfs_iext_irec_remove_all(
1549 struct xfs_ifork *ifp)
1550{
1551 int nlists;
1552 int i;
1553
1554 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1555 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1556 for (i = 0; i < nlists; i++)
1557 kmem_free(ifp->if_u1.if_ext_irec[i].er_extbuf);
1558 kmem_free(ifp->if_u1.if_ext_irec);
1559 ifp->if_flags &= ~XFS_IFEXTIREC;
1560}
1561
1562/*
1563 * Free incore file extents.
1564 */
1565void
1566xfs_iext_destroy(
1567 xfs_ifork_t *ifp) /* inode fork pointer */
1568{
1569 if (ifp->if_flags & XFS_IFEXTIREC) {
1570 xfs_iext_irec_remove_all(ifp);
1571 } else if (ifp->if_real_bytes) {
1572 kmem_free(ifp->if_u1.if_extents);
1573 } else if (ifp->if_bytes) {
1574 memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
1575 sizeof(xfs_bmbt_rec_t));
1576 }
1577 ifp->if_u1.if_extents = NULL;
1578 ifp->if_real_bytes = 0;
1579 ifp->if_bytes = 0;
1580}
1581
1582/*
1583 * Return a pointer to the extent record for file system block bno.
1584 */
1585xfs_bmbt_rec_host_t * /* pointer to found extent record */
1586xfs_iext_bno_to_ext(
1587 xfs_ifork_t *ifp, /* inode fork pointer */
1588 xfs_fileoff_t bno, /* block number to search for */
1589 xfs_extnum_t *idxp) /* index of target extent */
1590{
1591 xfs_bmbt_rec_host_t *base; /* pointer to first extent */
1592 xfs_filblks_t blockcount = 0; /* number of blocks in extent */
1593 xfs_bmbt_rec_host_t *ep = NULL; /* pointer to target extent */
1594 xfs_ext_irec_t *erp = NULL; /* indirection array pointer */
1595 int high; /* upper boundary in search */
1596 xfs_extnum_t idx = 0; /* index of target extent */
1597 int low; /* lower boundary in search */
1598 xfs_extnum_t nextents; /* number of file extents */
1599 xfs_fileoff_t startoff = 0; /* start offset of extent */
1600
1601 nextents = xfs_iext_count(ifp);
1602 if (nextents == 0) {
1603 *idxp = 0;
1604 return NULL;
1605 }
1606 low = 0;
1607 if (ifp->if_flags & XFS_IFEXTIREC) {
1608 /* Find target extent list */
1609 int erp_idx = 0;
1610 erp = xfs_iext_bno_to_irec(ifp, bno, &erp_idx);
1611 base = erp->er_extbuf;
1612 high = erp->er_extcount - 1;
1613 } else {
1614 base = ifp->if_u1.if_extents;
1615 high = nextents - 1;
1616 }
1617 /* Binary search extent records */
1618 while (low <= high) {
1619 idx = (low + high) >> 1;
1620 ep = base + idx;
1621 startoff = xfs_bmbt_get_startoff(ep);
1622 blockcount = xfs_bmbt_get_blockcount(ep);
1623 if (bno < startoff) {
1624 high = idx - 1;
1625 } else if (bno >= startoff + blockcount) {
1626 low = idx + 1;
1627 } else {
1628 /* Convert back to file-based extent index */
1629 if (ifp->if_flags & XFS_IFEXTIREC) {
1630 idx += erp->er_extoff;
1631 }
1632 *idxp = idx;
1633 return ep;
1634 }
1635 }
1636 /* Convert back to file-based extent index */
1637 if (ifp->if_flags & XFS_IFEXTIREC) {
1638 idx += erp->er_extoff;
1639 }
1640 if (bno >= startoff + blockcount) {
1641 if (++idx == nextents) {
1642 ep = NULL;
1643 } else {
1644 ep = xfs_iext_get_ext(ifp, idx);
1645 }
1646 }
1647 *idxp = idx;
1648 return ep;
1649}
1650
1651/*
1652 * Return a pointer to the indirection array entry containing the
1653 * extent record for filesystem block bno. Store the index of the
1654 * target irec in *erp_idxp.
1655 */
1656xfs_ext_irec_t * /* pointer to found extent record */
1657xfs_iext_bno_to_irec(
1658 xfs_ifork_t *ifp, /* inode fork pointer */
1659 xfs_fileoff_t bno, /* block number to search for */
1660 int *erp_idxp) /* irec index of target ext list */
1661{
1662 xfs_ext_irec_t *erp = NULL; /* indirection array pointer */
1663 xfs_ext_irec_t *erp_next; /* next indirection array entry */
1664 int erp_idx; /* indirection array index */
1665 int nlists; /* number of extent irec's (lists) */
1666 int high; /* binary search upper limit */
1667 int low; /* binary search lower limit */
1668
1669 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1670 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1671 erp_idx = 0;
1672 low = 0;
1673 high = nlists - 1;
1674 while (low <= high) {
1675 erp_idx = (low + high) >> 1;
1676 erp = &ifp->if_u1.if_ext_irec[erp_idx];
1677 erp_next = erp_idx < nlists - 1 ? erp + 1 : NULL;
1678 if (bno < xfs_bmbt_get_startoff(erp->er_extbuf)) {
1679 high = erp_idx - 1;
1680 } else if (erp_next && bno >=
1681 xfs_bmbt_get_startoff(erp_next->er_extbuf)) {
1682 low = erp_idx + 1;
1683 } else {
1684 break;
1685 }
1686 }
1687 *erp_idxp = erp_idx;
1688 return erp;
1689}
1690
1691/*
1692 * Return a pointer to the indirection array entry containing the
1693 * extent record at file extent index *idxp. Store the index of the
1694 * target irec in *erp_idxp and store the page index of the target
1695 * extent record in *idxp.
1696 */
1697xfs_ext_irec_t *
1698xfs_iext_idx_to_irec(
1699 xfs_ifork_t *ifp, /* inode fork pointer */
1700 xfs_extnum_t *idxp, /* extent index (file -> page) */
1701 int *erp_idxp, /* pointer to target irec */
1702 int realloc) /* new bytes were just added */
1703{
1704 xfs_ext_irec_t *prev; /* pointer to previous irec */
1705 xfs_ext_irec_t *erp = NULL; /* pointer to current irec */
1706 int erp_idx; /* indirection array index */
1707 int nlists; /* number of irec's (ex lists) */
1708 int high; /* binary search upper limit */
1709 int low; /* binary search lower limit */
1710 xfs_extnum_t page_idx = *idxp; /* extent index in target list */
1711
1712 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1713 ASSERT(page_idx >= 0);
1714 ASSERT(page_idx <= xfs_iext_count(ifp));
1715 ASSERT(page_idx < xfs_iext_count(ifp) || realloc);
1716
1717 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1718 erp_idx = 0;
1719 low = 0;
1720 high = nlists - 1;
1721
1722 /* Binary search extent irec's */
1723 while (low <= high) {
1724 erp_idx = (low + high) >> 1;
1725 erp = &ifp->if_u1.if_ext_irec[erp_idx];
1726 prev = erp_idx > 0 ? erp - 1 : NULL;
1727 if (page_idx < erp->er_extoff || (page_idx == erp->er_extoff &&
1728 realloc && prev && prev->er_extcount < XFS_LINEAR_EXTS)) {
1729 high = erp_idx - 1;
1730 } else if (page_idx > erp->er_extoff + erp->er_extcount ||
1731 (page_idx == erp->er_extoff + erp->er_extcount &&
1732 !realloc)) {
1733 low = erp_idx + 1;
1734 } else if (page_idx == erp->er_extoff + erp->er_extcount &&
1735 erp->er_extcount == XFS_LINEAR_EXTS) {
1736 ASSERT(realloc);
1737 page_idx = 0;
1738 erp_idx++;
1739 erp = erp_idx < nlists ? erp + 1 : NULL;
1740 break;
1741 } else {
1742 page_idx -= erp->er_extoff;
1743 break;
1744 }
1745 }
1746 *idxp = page_idx;
1747 *erp_idxp = erp_idx;
1748 return erp;
1749}
1750
1751/*
1752 * Allocate and initialize an indirection array once the space needed
1753 * for incore extents increases above XFS_IEXT_BUFSZ.
1754 */
1755void
1756xfs_iext_irec_init(
1757 xfs_ifork_t *ifp) /* inode fork pointer */
1758{
1759 xfs_ext_irec_t *erp; /* indirection array pointer */
1760 xfs_extnum_t nextents; /* number of extents in file */
1761
1762 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
1763 nextents = xfs_iext_count(ifp);
1764 ASSERT(nextents <= XFS_LINEAR_EXTS);
1765
1766 erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS);
1767
1768 if (nextents == 0) {
1769 ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
1770 } else if (!ifp->if_real_bytes) {
1771 xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ);
1772 } else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) {
1773 xfs_iext_realloc_direct(ifp, XFS_IEXT_BUFSZ);
1774 }
1775 erp->er_extbuf = ifp->if_u1.if_extents;
1776 erp->er_extcount = nextents;
1777 erp->er_extoff = 0;
1778
1779 ifp->if_flags |= XFS_IFEXTIREC;
1780 ifp->if_real_bytes = XFS_IEXT_BUFSZ;
1781 ifp->if_bytes = nextents * sizeof(xfs_bmbt_rec_t);
1782 ifp->if_u1.if_ext_irec = erp;
1783
1784 return;
1785}
1786
1787/*
1788 * Allocate and initialize a new entry in the indirection array.
1789 */
1790xfs_ext_irec_t *
1791xfs_iext_irec_new(
1792 xfs_ifork_t *ifp, /* inode fork pointer */
1793 int erp_idx) /* index for new irec */
1794{
1795 xfs_ext_irec_t *erp; /* indirection array pointer */
1796 int i; /* loop counter */
1797 int nlists; /* number of irec's (ex lists) */
1798
1799 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1800 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1801
1802 /* Resize indirection array */
1803 xfs_iext_realloc_indirect(ifp, ++nlists *
1804 sizeof(xfs_ext_irec_t));
1805 /*
1806 * Move records down in the array so the
1807 * new page can use erp_idx.
1808 */
1809 erp = ifp->if_u1.if_ext_irec;
1810 for (i = nlists - 1; i > erp_idx; i--) {
1811 memmove(&erp[i], &erp[i-1], sizeof(xfs_ext_irec_t));
1812 }
1813 ASSERT(i == erp_idx);
1814
1815 /* Initialize new extent record */
1816 erp = ifp->if_u1.if_ext_irec;
1817 erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
1818 ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
1819 memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ);
1820 erp[erp_idx].er_extcount = 0;
1821 erp[erp_idx].er_extoff = erp_idx > 0 ?
1822 erp[erp_idx-1].er_extoff + erp[erp_idx-1].er_extcount : 0;
1823 return (&erp[erp_idx]);
1824}
1825
1826/*
1827 * Remove a record from the indirection array.
1828 */
1829void
1830xfs_iext_irec_remove(
1831 xfs_ifork_t *ifp, /* inode fork pointer */
1832 int erp_idx) /* irec index to remove */
1833{
1834 xfs_ext_irec_t *erp; /* indirection array pointer */
1835 int i; /* loop counter */
1836 int nlists; /* number of irec's (ex lists) */
1837
1838 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1839 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1840 erp = &ifp->if_u1.if_ext_irec[erp_idx];
1841 if (erp->er_extbuf) {
1842 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1,
1843 -erp->er_extcount);
1844 kmem_free(erp->er_extbuf);
1845 }
1846 /* Compact extent records */
1847 erp = ifp->if_u1.if_ext_irec;
1848 for (i = erp_idx; i < nlists - 1; i++) {
1849 memmove(&erp[i], &erp[i+1], sizeof(xfs_ext_irec_t));
1850 }
1851 /*
1852 * Manually free the last extent record from the indirection
1853 * array. A call to xfs_iext_realloc_indirect() with a size
1854 * of zero would result in a call to xfs_iext_destroy() which
1855 * would in turn call this function again, creating a nasty
1856 * infinite loop.
1857 */
1858 if (--nlists) {
1859 xfs_iext_realloc_indirect(ifp,
1860 nlists * sizeof(xfs_ext_irec_t));
1861 } else {
1862 kmem_free(ifp->if_u1.if_ext_irec);
1863 }
1864 ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
1865}
1866
1867/*
1868 * This is called to clean up large amounts of unused memory allocated
1869 * by the indirection array. Before compacting anything though, verify
1870 * that the indirection array is still needed and switch back to the
1871 * linear extent list (or even the inline buffer) if possible. The
1872 * compaction policy is as follows:
1873 *
1874 * Full Compaction: Extents fit into a single page (or inline buffer)
1875 * Partial Compaction: Extents occupy less than 50% of allocated space
1876 * No Compaction: Extents occupy at least 50% of allocated space
1877 */
1878void
1879xfs_iext_irec_compact(
1880 xfs_ifork_t *ifp) /* inode fork pointer */
1881{
1882 xfs_extnum_t nextents; /* number of extents in file */
1883 int nlists; /* number of irec's (ex lists) */
1884
1885 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1886 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1887 nextents = xfs_iext_count(ifp);
1888
1889 if (nextents == 0) {
1890 xfs_iext_destroy(ifp);
1891 } else if (nextents <= XFS_INLINE_EXTS) {
1892 xfs_iext_indirect_to_direct(ifp);
1893 xfs_iext_direct_to_inline(ifp, nextents);
1894 } else if (nextents <= XFS_LINEAR_EXTS) {
1895 xfs_iext_indirect_to_direct(ifp);
1896 } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) {
1897 xfs_iext_irec_compact_pages(ifp);
1898 }
1899}
1900
1901/*
1902 * Combine extents from neighboring extent pages.
1903 */
1904void
1905xfs_iext_irec_compact_pages(
1906 xfs_ifork_t *ifp) /* inode fork pointer */
1907{
1908 xfs_ext_irec_t *erp, *erp_next;/* pointers to irec entries */
1909 int erp_idx = 0; /* indirection array index */
1910 int nlists; /* number of irec's (ex lists) */
1911
1912 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1913 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1914 while (erp_idx < nlists - 1) {
1915 erp = &ifp->if_u1.if_ext_irec[erp_idx];
1916 erp_next = erp + 1;
1917 if (erp_next->er_extcount <=
1918 (XFS_LINEAR_EXTS - erp->er_extcount)) {
1919 memcpy(&erp->er_extbuf[erp->er_extcount],
1920 erp_next->er_extbuf, erp_next->er_extcount *
1921 sizeof(xfs_bmbt_rec_t));
1922 erp->er_extcount += erp_next->er_extcount;
1923 /*
1924 * Free page before removing extent record
1925 * so er_extoffs don't get modified in
1926 * xfs_iext_irec_remove.
1927 */
1928 kmem_free(erp_next->er_extbuf);
1929 erp_next->er_extbuf = NULL;
1930 xfs_iext_irec_remove(ifp, erp_idx + 1);
1931 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1932 } else {
1933 erp_idx++;
1934 }
1935 }
1936}
1937
1938/*
1939 * This is called to update the er_extoff field in the indirection
1940 * array when extents have been added or removed from one of the
1941 * extent lists. erp_idx contains the irec index to begin updating
1942 * at and ext_diff contains the number of extents that were added
1943 * or removed.
1944 */
1945void
1946xfs_iext_irec_update_extoffs(
1947 xfs_ifork_t *ifp, /* inode fork pointer */
1948 int erp_idx, /* irec index to update */
1949 int ext_diff) /* number of new extents */
1950{
1951 int i; /* loop counter */
1952 int nlists; /* number of irec's (ex lists */
1953
1954 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1955 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1956 for (i = erp_idx; i < nlists; i++) {
1957 ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff;
1958 }
1959}
1960
1961/*
1962 * Initialize an inode's copy-on-write fork. 819 * Initialize an inode's copy-on-write fork.
1963 */ 820 */
1964void 821void
@@ -1974,61 +831,3 @@ xfs_ifork_init_cow(
1974 ip->i_cformat = XFS_DINODE_FMT_EXTENTS; 831 ip->i_cformat = XFS_DINODE_FMT_EXTENTS;
1975 ip->i_cnextents = 0; 832 ip->i_cnextents = 0;
1976} 833}
1977
1978/*
1979 * Lookup the extent covering bno.
1980 *
1981 * If there is an extent covering bno return the extent index, and store the
1982 * expanded extent structure in *gotp, and the extent index in *idx.
1983 * If there is no extent covering bno, but there is an extent after it (e.g.
1984 * it lies in a hole) return that extent in *gotp and its index in *idx
1985 * instead.
1986 * If bno is beyond the last extent return false, and return the index after
1987 * the last valid index in *idxp.
1988 */
1989bool
1990xfs_iext_lookup_extent(
1991 struct xfs_inode *ip,
1992 struct xfs_ifork *ifp,
1993 xfs_fileoff_t bno,
1994 xfs_extnum_t *idxp,
1995 struct xfs_bmbt_irec *gotp)
1996{
1997 struct xfs_bmbt_rec_host *ep;
1998
1999 XFS_STATS_INC(ip->i_mount, xs_look_exlist);
2000
2001 ep = xfs_iext_bno_to_ext(ifp, bno, idxp);
2002 if (!ep)
2003 return false;
2004 xfs_bmbt_get_all(ep, gotp);
2005 return true;
2006}
2007
2008/*
2009 * Return true if there is an extent at index idx, and return the expanded
2010 * extent structure at idx in that case. Else return false.
2011 */
2012bool
2013xfs_iext_get_extent(
2014 struct xfs_ifork *ifp,
2015 xfs_extnum_t idx,
2016 struct xfs_bmbt_irec *gotp)
2017{
2018 if (idx < 0 || idx >= xfs_iext_count(ifp))
2019 return false;
2020 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), gotp);
2021 return true;
2022}
2023
2024void
2025xfs_iext_update_extent(
2026 struct xfs_ifork *ifp,
2027 xfs_extnum_t idx,
2028 struct xfs_bmbt_irec *gotp)
2029{
2030 ASSERT(idx >= 0);
2031 ASSERT(idx < xfs_iext_count(ifp));
2032
2033 xfs_bmbt_set_all(xfs_iext_get_ext(ifp, idx), gotp);
2034}
diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h
index 11af705219f6..b9f0098e33b8 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.h
+++ b/fs/xfs/libxfs/xfs_inode_fork.h
@@ -22,56 +22,19 @@ struct xfs_inode_log_item;
22struct xfs_dinode; 22struct xfs_dinode;
23 23
24/* 24/*
25 * The following xfs_ext_irec_t struct introduces a second (top) level
26 * to the in-core extent allocation scheme. These structs are allocated
27 * in a contiguous block, creating an indirection array where each entry
28 * (irec) contains a pointer to a buffer of in-core extent records which
29 * it manages. Each extent buffer is 4k in size, since 4k is the system
30 * page size on Linux i386 and systems with larger page sizes don't seem
31 * to gain much, if anything, by using their native page size as the
32 * extent buffer size. Also, using 4k extent buffers everywhere provides
33 * a consistent interface for CXFS across different platforms.
34 *
35 * There is currently no limit on the number of irec's (extent lists)
36 * allowed, so heavily fragmented files may require an indirection array
37 * which spans multiple system pages of memory. The number of extents
38 * which would require this amount of contiguous memory is very large
39 * and should not cause problems in the foreseeable future. However,
40 * if the memory needed for the contiguous array ever becomes a problem,
41 * it is possible that a third level of indirection may be required.
42 */
43typedef struct xfs_ext_irec {
44 xfs_bmbt_rec_host_t *er_extbuf; /* block of extent records */
45 xfs_extnum_t er_extoff; /* extent offset in file */
46 xfs_extnum_t er_extcount; /* number of extents in page/block */
47} xfs_ext_irec_t;
48
49/*
50 * File incore extent information, present for each of data & attr forks. 25 * File incore extent information, present for each of data & attr forks.
51 */ 26 */
52#define XFS_IEXT_BUFSZ 4096
53#define XFS_LINEAR_EXTS (XFS_IEXT_BUFSZ / (uint)sizeof(xfs_bmbt_rec_t))
54#define XFS_INLINE_EXTS 2
55#define XFS_INLINE_DATA 32
56typedef struct xfs_ifork { 27typedef struct xfs_ifork {
57 int if_bytes; /* bytes in if_u1 */ 28 int if_bytes; /* bytes in if_u1 */
58 int if_real_bytes; /* bytes allocated in if_u1 */ 29 int if_real_bytes; /* bytes allocated in if_u1 */
59 struct xfs_btree_block *if_broot; /* file's incore btree root */ 30 struct xfs_btree_block *if_broot; /* file's incore btree root */
60 short if_broot_bytes; /* bytes allocated for root */ 31 short if_broot_bytes; /* bytes allocated for root */
61 unsigned char if_flags; /* per-fork flags */ 32 unsigned char if_flags; /* per-fork flags */
33 int if_height; /* height of the extent tree */
62 union { 34 union {
63 xfs_bmbt_rec_host_t *if_extents;/* linear map file exts */ 35 void *if_root; /* extent tree root */
64 xfs_ext_irec_t *if_ext_irec; /* irec map file exts */
65 char *if_data; /* inline file data */ 36 char *if_data; /* inline file data */
66 } if_u1; 37 } if_u1;
67 union {
68 xfs_bmbt_rec_host_t if_inline_ext[XFS_INLINE_EXTS];
69 /* very small file extents */
70 char if_inline_data[XFS_INLINE_DATA];
71 /* very small file data */
72 xfs_dev_t if_rdev; /* dev number if special */
73 uuid_t if_uuid; /* mount point value */
74 } if_u2;
75} xfs_ifork_t; 38} xfs_ifork_t;
76 39
77/* 40/*
@@ -80,7 +43,6 @@ typedef struct xfs_ifork {
80#define XFS_IFINLINE 0x01 /* Inline data is read in */ 43#define XFS_IFINLINE 0x01 /* Inline data is read in */
81#define XFS_IFEXTENTS 0x02 /* All extent pointers are read in */ 44#define XFS_IFEXTENTS 0x02 /* All extent pointers are read in */
82#define XFS_IFBROOT 0x04 /* i_broot points to the bmap b-tree root */ 45#define XFS_IFBROOT 0x04 /* i_broot points to the bmap b-tree root */
83#define XFS_IFEXTIREC 0x08 /* Indirection array of extent blocks */
84 46
85/* 47/*
86 * Fork handling. 48 * Fork handling.
@@ -150,45 +112,75 @@ int xfs_iextents_copy(struct xfs_inode *, struct xfs_bmbt_rec *,
150 int); 112 int);
151void xfs_init_local_fork(struct xfs_inode *, int, const void *, int); 113void xfs_init_local_fork(struct xfs_inode *, int, const void *, int);
152 114
153struct xfs_bmbt_rec_host * 115xfs_extnum_t xfs_iext_count(struct xfs_ifork *ifp);
154 xfs_iext_get_ext(struct xfs_ifork *, xfs_extnum_t); 116void xfs_iext_insert(struct xfs_inode *, struct xfs_iext_cursor *cur,
155xfs_extnum_t xfs_iext_count(struct xfs_ifork *); 117 struct xfs_bmbt_irec *, int);
156void xfs_iext_insert(struct xfs_inode *, xfs_extnum_t, xfs_extnum_t, 118void xfs_iext_remove(struct xfs_inode *, struct xfs_iext_cursor *,
157 struct xfs_bmbt_irec *, int); 119 int);
158void xfs_iext_add(struct xfs_ifork *, xfs_extnum_t, int);
159void xfs_iext_add_indirect_multi(struct xfs_ifork *, int,
160 xfs_extnum_t, int);
161void xfs_iext_remove(struct xfs_inode *, xfs_extnum_t, int, int);
162void xfs_iext_remove_inline(struct xfs_ifork *, xfs_extnum_t, int);
163void xfs_iext_remove_direct(struct xfs_ifork *, xfs_extnum_t, int);
164void xfs_iext_remove_indirect(struct xfs_ifork *, xfs_extnum_t, int);
165void xfs_iext_realloc_direct(struct xfs_ifork *, int);
166void xfs_iext_direct_to_inline(struct xfs_ifork *, xfs_extnum_t);
167void xfs_iext_inline_to_direct(struct xfs_ifork *, int);
168void xfs_iext_destroy(struct xfs_ifork *); 120void xfs_iext_destroy(struct xfs_ifork *);
169struct xfs_bmbt_rec_host *
170 xfs_iext_bno_to_ext(struct xfs_ifork *, xfs_fileoff_t, int *);
171struct xfs_ext_irec *
172 xfs_iext_bno_to_irec(struct xfs_ifork *, xfs_fileoff_t, int *);
173struct xfs_ext_irec *
174 xfs_iext_idx_to_irec(struct xfs_ifork *, xfs_extnum_t *, int *,
175 int);
176void xfs_iext_irec_init(struct xfs_ifork *);
177struct xfs_ext_irec *
178 xfs_iext_irec_new(struct xfs_ifork *, int);
179void xfs_iext_irec_remove(struct xfs_ifork *, int);
180void xfs_iext_irec_compact(struct xfs_ifork *);
181void xfs_iext_irec_compact_pages(struct xfs_ifork *);
182void xfs_iext_irec_compact_full(struct xfs_ifork *);
183void xfs_iext_irec_update_extoffs(struct xfs_ifork *, int, int);
184 121
185bool xfs_iext_lookup_extent(struct xfs_inode *ip, 122bool xfs_iext_lookup_extent(struct xfs_inode *ip,
186 struct xfs_ifork *ifp, xfs_fileoff_t bno, 123 struct xfs_ifork *ifp, xfs_fileoff_t bno,
187 xfs_extnum_t *idxp, struct xfs_bmbt_irec *gotp); 124 struct xfs_iext_cursor *cur,
188bool xfs_iext_get_extent(struct xfs_ifork *ifp, xfs_extnum_t idx,
189 struct xfs_bmbt_irec *gotp); 125 struct xfs_bmbt_irec *gotp);
190void xfs_iext_update_extent(struct xfs_ifork *ifp, xfs_extnum_t idx, 126bool xfs_iext_lookup_extent_before(struct xfs_inode *ip,
127 struct xfs_ifork *ifp, xfs_fileoff_t *end,
128 struct xfs_iext_cursor *cur,
191 struct xfs_bmbt_irec *gotp); 129 struct xfs_bmbt_irec *gotp);
130bool xfs_iext_get_extent(struct xfs_ifork *ifp,
131 struct xfs_iext_cursor *cur,
132 struct xfs_bmbt_irec *gotp);
133void xfs_iext_update_extent(struct xfs_inode *ip, int state,
134 struct xfs_iext_cursor *cur,
135 struct xfs_bmbt_irec *gotp);
136
137void xfs_iext_first(struct xfs_ifork *, struct xfs_iext_cursor *);
138void xfs_iext_last(struct xfs_ifork *, struct xfs_iext_cursor *);
139void xfs_iext_next(struct xfs_ifork *, struct xfs_iext_cursor *);
140void xfs_iext_prev(struct xfs_ifork *, struct xfs_iext_cursor *);
141
142static inline bool xfs_iext_next_extent(struct xfs_ifork *ifp,
143 struct xfs_iext_cursor *cur, struct xfs_bmbt_irec *gotp)
144{
145 xfs_iext_next(ifp, cur);
146 return xfs_iext_get_extent(ifp, cur, gotp);
147}
148
149static inline bool xfs_iext_prev_extent(struct xfs_ifork *ifp,
150 struct xfs_iext_cursor *cur, struct xfs_bmbt_irec *gotp)
151{
152 xfs_iext_prev(ifp, cur);
153 return xfs_iext_get_extent(ifp, cur, gotp);
154}
155
156/*
157 * Return the extent after cur in gotp without updating the cursor.
158 */
159static inline bool xfs_iext_peek_next_extent(struct xfs_ifork *ifp,
160 struct xfs_iext_cursor *cur, struct xfs_bmbt_irec *gotp)
161{
162 struct xfs_iext_cursor ncur = *cur;
163
164 xfs_iext_next(ifp, &ncur);
165 return xfs_iext_get_extent(ifp, &ncur, gotp);
166}
167
168/*
169 * Return the extent before cur in gotp without updating the cursor.
170 */
171static inline bool xfs_iext_peek_prev_extent(struct xfs_ifork *ifp,
172 struct xfs_iext_cursor *cur, struct xfs_bmbt_irec *gotp)
173{
174 struct xfs_iext_cursor ncur = *cur;
175
176 xfs_iext_prev(ifp, &ncur);
177 return xfs_iext_get_extent(ifp, &ncur, gotp);
178}
179
180#define for_each_xfs_iext(ifp, ext, got) \
181 for (xfs_iext_first((ifp), (ext)); \
182 xfs_iext_get_extent((ifp), (ext), (got)); \
183 xfs_iext_next((ifp), (ext)))
192 184
193extern struct kmem_zone *xfs_ifork_zone; 185extern struct kmem_zone *xfs_ifork_zone;
194 186
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index 71de185735e0..996f035ee205 100644
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -264,7 +264,7 @@ typedef struct xfs_trans_header {
264 * (if any) is indicated in the ilf_dsize field. Changes to this structure 264 * (if any) is indicated in the ilf_dsize field. Changes to this structure
265 * must be added on to the end. 265 * must be added on to the end.
266 */ 266 */
267typedef struct xfs_inode_log_format { 267struct xfs_inode_log_format {
268 uint16_t ilf_type; /* inode log item type */ 268 uint16_t ilf_type; /* inode log item type */
269 uint16_t ilf_size; /* size of this item */ 269 uint16_t ilf_size; /* size of this item */
270 uint32_t ilf_fields; /* flags for fields logged */ 270 uint32_t ilf_fields; /* flags for fields logged */
@@ -274,12 +274,12 @@ typedef struct xfs_inode_log_format {
274 uint64_t ilf_ino; /* inode number */ 274 uint64_t ilf_ino; /* inode number */
275 union { 275 union {
276 uint32_t ilfu_rdev; /* rdev value for dev inode*/ 276 uint32_t ilfu_rdev; /* rdev value for dev inode*/
277 uuid_t ilfu_uuid; /* mount point value */ 277 u8 __pad[16]; /* unused */
278 } ilf_u; 278 } ilf_u;
279 int64_t ilf_blkno; /* blkno of inode buffer */ 279 int64_t ilf_blkno; /* blkno of inode buffer */
280 int32_t ilf_len; /* len of inode buffer */ 280 int32_t ilf_len; /* len of inode buffer */
281 int32_t ilf_boffset; /* off of inode in buffer */ 281 int32_t ilf_boffset; /* off of inode in buffer */
282} xfs_inode_log_format_t; 282};
283 283
284/* 284/*
285 * Old 32 bit systems will log in this format without the 64 bit 285 * Old 32 bit systems will log in this format without the 64 bit
@@ -295,7 +295,7 @@ struct xfs_inode_log_format_32 {
295 uint64_t ilf_ino; /* inode number */ 295 uint64_t ilf_ino; /* inode number */
296 union { 296 union {
297 uint32_t ilfu_rdev; /* rdev value for dev inode*/ 297 uint32_t ilfu_rdev; /* rdev value for dev inode*/
298 uuid_t ilfu_uuid; /* mount point value */ 298 u8 __pad[16]; /* unused */
299 } ilf_u; 299 } ilf_u;
300 int64_t ilf_blkno; /* blkno of inode buffer */ 300 int64_t ilf_blkno; /* blkno of inode buffer */
301 int32_t ilf_len; /* len of inode buffer */ 301 int32_t ilf_len; /* len of inode buffer */
@@ -311,7 +311,7 @@ struct xfs_inode_log_format_32 {
311#define XFS_ILOG_DEXT 0x004 /* log i_df.if_extents */ 311#define XFS_ILOG_DEXT 0x004 /* log i_df.if_extents */
312#define XFS_ILOG_DBROOT 0x008 /* log i_df.i_broot */ 312#define XFS_ILOG_DBROOT 0x008 /* log i_df.i_broot */
313#define XFS_ILOG_DEV 0x010 /* log the dev field */ 313#define XFS_ILOG_DEV 0x010 /* log the dev field */
314#define XFS_ILOG_UUID 0x020 /* log the uuid field */ 314#define XFS_ILOG_UUID 0x020 /* added long ago, but never used */
315#define XFS_ILOG_ADATA 0x040 /* log i_af.if_data */ 315#define XFS_ILOG_ADATA 0x040 /* log i_af.if_data */
316#define XFS_ILOG_AEXT 0x080 /* log i_af.if_extents */ 316#define XFS_ILOG_AEXT 0x080 /* log i_af.if_extents */
317#define XFS_ILOG_ABROOT 0x100 /* log i_af.i_broot */ 317#define XFS_ILOG_ABROOT 0x100 /* log i_af.i_broot */
@@ -329,9 +329,9 @@ struct xfs_inode_log_format_32 {
329 329
330#define XFS_ILOG_NONCORE (XFS_ILOG_DDATA | XFS_ILOG_DEXT | \ 330#define XFS_ILOG_NONCORE (XFS_ILOG_DDATA | XFS_ILOG_DEXT | \
331 XFS_ILOG_DBROOT | XFS_ILOG_DEV | \ 331 XFS_ILOG_DBROOT | XFS_ILOG_DEV | \
332 XFS_ILOG_UUID | XFS_ILOG_ADATA | \ 332 XFS_ILOG_ADATA | XFS_ILOG_AEXT | \
333 XFS_ILOG_AEXT | XFS_ILOG_ABROOT | \ 333 XFS_ILOG_ABROOT | XFS_ILOG_DOWNER | \
334 XFS_ILOG_DOWNER | XFS_ILOG_AOWNER) 334 XFS_ILOG_AOWNER)
335 335
336#define XFS_ILOG_DFORK (XFS_ILOG_DDATA | XFS_ILOG_DEXT | \ 336#define XFS_ILOG_DFORK (XFS_ILOG_DDATA | XFS_ILOG_DEXT | \
337 XFS_ILOG_DBROOT) 337 XFS_ILOG_DBROOT)
@@ -341,10 +341,10 @@ struct xfs_inode_log_format_32 {
341 341
342#define XFS_ILOG_ALL (XFS_ILOG_CORE | XFS_ILOG_DDATA | \ 342#define XFS_ILOG_ALL (XFS_ILOG_CORE | XFS_ILOG_DDATA | \
343 XFS_ILOG_DEXT | XFS_ILOG_DBROOT | \ 343 XFS_ILOG_DEXT | XFS_ILOG_DBROOT | \
344 XFS_ILOG_DEV | XFS_ILOG_UUID | \ 344 XFS_ILOG_DEV | XFS_ILOG_ADATA | \
345 XFS_ILOG_ADATA | XFS_ILOG_AEXT | \ 345 XFS_ILOG_AEXT | XFS_ILOG_ABROOT | \
346 XFS_ILOG_ABROOT | XFS_ILOG_TIMESTAMP | \ 346 XFS_ILOG_TIMESTAMP | XFS_ILOG_DOWNER | \
347 XFS_ILOG_DOWNER | XFS_ILOG_AOWNER) 347 XFS_ILOG_AOWNER)
348 348
349static inline int xfs_ilog_fbroot(int w) 349static inline int xfs_ilog_fbroot(int w)
350{ 350{
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index 9d5406b4f663..585b35d34142 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -30,6 +30,7 @@
30#include "xfs_bmap.h" 30#include "xfs_bmap.h"
31#include "xfs_refcount_btree.h" 31#include "xfs_refcount_btree.h"
32#include "xfs_alloc.h" 32#include "xfs_alloc.h"
33#include "xfs_errortag.h"
33#include "xfs_error.h" 34#include "xfs_error.h"
34#include "xfs_trace.h" 35#include "xfs_trace.h"
35#include "xfs_cksum.h" 36#include "xfs_cksum.h"
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index 55c88a732690..dd019cee1b3b 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -34,6 +34,7 @@
34#include "xfs_rmap_btree.h" 34#include "xfs_rmap_btree.h"
35#include "xfs_trans_space.h" 35#include "xfs_trans_space.h"
36#include "xfs_trace.h" 36#include "xfs_trace.h"
37#include "xfs_errortag.h"
37#include "xfs_error.h" 38#include "xfs_error.h"
38#include "xfs_extent_busy.h" 39#include "xfs_extent_busy.h"
39#include "xfs_bmap.h" 40#include "xfs_bmap.h"
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
index 5d4e43ef4eea..3fb29a5ea915 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -672,7 +672,6 @@ xfs_rtmodify_range(
672 /* 672 /*
673 * Compute a mask of relevant bits. 673 * Compute a mask of relevant bits.
674 */ 674 */
675 bit = 0;
676 mask = ((xfs_rtword_t)1 << lastbit) - 1; 675 mask = ((xfs_rtword_t)1 << lastbit) - 1;
677 /* 676 /*
678 * Set/clear the active bits. 677 * Set/clear the active bits.
@@ -1086,3 +1085,15 @@ xfs_rtalloc_query_all(
1086 1085
1087 return xfs_rtalloc_query_range(tp, &keys[0], &keys[1], fn, priv); 1086 return xfs_rtalloc_query_range(tp, &keys[0], &keys[1], fn, priv);
1088} 1087}
1088
1089/*
1090 * Verify that an realtime block number pointer doesn't point off the
1091 * end of the realtime device.
1092 */
1093bool
1094xfs_verify_rtbno(
1095 struct xfs_mount *mp,
1096 xfs_rtblock_t rtbno)
1097{
1098 return rtbno < mp->m_sb.sb_rblocks;
1099}
diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h
index 0220159bd463..3c560695c546 100644
--- a/fs/xfs/libxfs/xfs_types.h
+++ b/fs/xfs/libxfs/xfs_types.h
@@ -48,6 +48,12 @@ typedef int64_t xfs_srtblock_t; /* signed version of xfs_rtblock_t */
48typedef int64_t xfs_sfiloff_t; /* signed block number in a file */ 48typedef int64_t xfs_sfiloff_t; /* signed block number in a file */
49 49
50/* 50/*
51 * New verifiers will return the instruction address of the failing check.
52 * NULL means everything is ok.
53 */
54typedef void * xfs_failaddr_t;
55
56/*
51 * Null values for the types. 57 * Null values for the types.
52 */ 58 */
53#define NULLFSBLOCK ((xfs_fsblock_t)-1) 59#define NULLFSBLOCK ((xfs_fsblock_t)-1)
@@ -136,5 +142,21 @@ typedef uint32_t xfs_dqid_t;
136#define XFS_NBWORD (1 << XFS_NBWORDLOG) 142#define XFS_NBWORD (1 << XFS_NBWORDLOG)
137#define XFS_WORDMASK ((1 << XFS_WORDLOG) - 1) 143#define XFS_WORDMASK ((1 << XFS_WORDLOG) - 1)
138 144
145struct xfs_iext_cursor {
146 struct xfs_iext_leaf *leaf;
147 int pos;
148};
149
150typedef enum {
151 XFS_EXT_NORM, XFS_EXT_UNWRITTEN,
152} xfs_exntst_t;
153
154typedef struct xfs_bmbt_irec
155{
156 xfs_fileoff_t br_startoff; /* starting file offset */
157 xfs_fsblock_t br_startblock; /* starting block number */
158 xfs_filblks_t br_blockcount; /* number of blocks */
159 xfs_exntst_t br_state; /* extent state */
160} xfs_bmbt_irec_t;
139 161
140#endif /* __XFS_TYPES_H__ */ 162#endif /* __XFS_TYPES_H__ */
diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c
new file mode 100644
index 000000000000..2a9b4f9e93c6
--- /dev/null
+++ b/fs/xfs/scrub/agheader.c
@@ -0,0 +1,658 @@
1/*
2 * Copyright (C) 2017 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#include "xfs.h"
21#include "xfs_fs.h"
22#include "xfs_shared.h"
23#include "xfs_format.h"
24#include "xfs_trans_resv.h"
25#include "xfs_mount.h"
26#include "xfs_defer.h"
27#include "xfs_btree.h"
28#include "xfs_bit.h"
29#include "xfs_log_format.h"
30#include "xfs_trans.h"
31#include "xfs_sb.h"
32#include "xfs_inode.h"
33#include "xfs_alloc.h"
34#include "xfs_ialloc.h"
35#include "scrub/xfs_scrub.h"
36#include "scrub/scrub.h"
37#include "scrub/common.h"
38#include "scrub/trace.h"
39
40/*
41 * Set up scrub to check all the static metadata in each AG.
42 * This means the SB, AGF, AGI, and AGFL headers.
43 */
44int
45xfs_scrub_setup_ag_header(
46 struct xfs_scrub_context *sc,
47 struct xfs_inode *ip)
48{
49 struct xfs_mount *mp = sc->mp;
50
51 if (sc->sm->sm_agno >= mp->m_sb.sb_agcount ||
52 sc->sm->sm_ino || sc->sm->sm_gen)
53 return -EINVAL;
54 return xfs_scrub_setup_fs(sc, ip);
55}
56
57/* Walk all the blocks in the AGFL. */
58int
59xfs_scrub_walk_agfl(
60 struct xfs_scrub_context *sc,
61 int (*fn)(struct xfs_scrub_context *,
62 xfs_agblock_t bno, void *),
63 void *priv)
64{
65 struct xfs_agf *agf;
66 __be32 *agfl_bno;
67 struct xfs_mount *mp = sc->mp;
68 unsigned int flfirst;
69 unsigned int fllast;
70 int i;
71 int error;
72
73 agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
74 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, sc->sa.agfl_bp);
75 flfirst = be32_to_cpu(agf->agf_flfirst);
76 fllast = be32_to_cpu(agf->agf_fllast);
77
78 /* Nothing to walk in an empty AGFL. */
79 if (agf->agf_flcount == cpu_to_be32(0))
80 return 0;
81
82 /* first to last is a consecutive list. */
83 if (fllast >= flfirst) {
84 for (i = flfirst; i <= fllast; i++) {
85 error = fn(sc, be32_to_cpu(agfl_bno[i]), priv);
86 if (error)
87 return error;
88 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
89 return error;
90 }
91
92 return 0;
93 }
94
95 /* first to the end */
96 for (i = flfirst; i < XFS_AGFL_SIZE(mp); i++) {
97 error = fn(sc, be32_to_cpu(agfl_bno[i]), priv);
98 if (error)
99 return error;
100 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
101 return error;
102 }
103
104 /* the start to last. */
105 for (i = 0; i <= fllast; i++) {
106 error = fn(sc, be32_to_cpu(agfl_bno[i]), priv);
107 if (error)
108 return error;
109 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
110 return error;
111 }
112
113 return 0;
114}
115
116/* Superblock */
117
118/*
119 * Scrub the filesystem superblock.
120 *
121 * Note: We do /not/ attempt to check AG 0's superblock. Mount is
122 * responsible for validating all the geometry information in sb 0, so
123 * if the filesystem is capable of initiating online scrub, then clearly
124 * sb 0 is ok and we can use its information to check everything else.
125 */
126int
127xfs_scrub_superblock(
128 struct xfs_scrub_context *sc)
129{
130 struct xfs_mount *mp = sc->mp;
131 struct xfs_buf *bp;
132 struct xfs_dsb *sb;
133 xfs_agnumber_t agno;
134 uint32_t v2_ok;
135 __be32 features_mask;
136 int error;
137 __be16 vernum_mask;
138
139 agno = sc->sm->sm_agno;
140 if (agno == 0)
141 return 0;
142
143 error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp,
144 XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
145 XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_sb_buf_ops);
146 if (!xfs_scrub_process_error(sc, agno, XFS_SB_BLOCK(mp), &error))
147 return error;
148
149 sb = XFS_BUF_TO_SBP(bp);
150
151 /*
152 * Verify the geometries match. Fields that are permanently
153 * set by mkfs are checked; fields that can be updated later
154 * (and are not propagated to backup superblocks) are preen
155 * checked.
156 */
157 if (sb->sb_blocksize != cpu_to_be32(mp->m_sb.sb_blocksize))
158 xfs_scrub_block_set_corrupt(sc, bp);
159
160 if (sb->sb_dblocks != cpu_to_be64(mp->m_sb.sb_dblocks))
161 xfs_scrub_block_set_corrupt(sc, bp);
162
163 if (sb->sb_rblocks != cpu_to_be64(mp->m_sb.sb_rblocks))
164 xfs_scrub_block_set_corrupt(sc, bp);
165
166 if (sb->sb_rextents != cpu_to_be64(mp->m_sb.sb_rextents))
167 xfs_scrub_block_set_corrupt(sc, bp);
168
169 if (!uuid_equal(&sb->sb_uuid, &mp->m_sb.sb_uuid))
170 xfs_scrub_block_set_preen(sc, bp);
171
172 if (sb->sb_logstart != cpu_to_be64(mp->m_sb.sb_logstart))
173 xfs_scrub_block_set_corrupt(sc, bp);
174
175 if (sb->sb_rootino != cpu_to_be64(mp->m_sb.sb_rootino))
176 xfs_scrub_block_set_preen(sc, bp);
177
178 if (sb->sb_rbmino != cpu_to_be64(mp->m_sb.sb_rbmino))
179 xfs_scrub_block_set_preen(sc, bp);
180
181 if (sb->sb_rsumino != cpu_to_be64(mp->m_sb.sb_rsumino))
182 xfs_scrub_block_set_preen(sc, bp);
183
184 if (sb->sb_rextsize != cpu_to_be32(mp->m_sb.sb_rextsize))
185 xfs_scrub_block_set_corrupt(sc, bp);
186
187 if (sb->sb_agblocks != cpu_to_be32(mp->m_sb.sb_agblocks))
188 xfs_scrub_block_set_corrupt(sc, bp);
189
190 if (sb->sb_agcount != cpu_to_be32(mp->m_sb.sb_agcount))
191 xfs_scrub_block_set_corrupt(sc, bp);
192
193 if (sb->sb_rbmblocks != cpu_to_be32(mp->m_sb.sb_rbmblocks))
194 xfs_scrub_block_set_corrupt(sc, bp);
195
196 if (sb->sb_logblocks != cpu_to_be32(mp->m_sb.sb_logblocks))
197 xfs_scrub_block_set_corrupt(sc, bp);
198
199 /* Check sb_versionnum bits that are set at mkfs time. */
200 vernum_mask = cpu_to_be16(~XFS_SB_VERSION_OKBITS |
201 XFS_SB_VERSION_NUMBITS |
202 XFS_SB_VERSION_ALIGNBIT |
203 XFS_SB_VERSION_DALIGNBIT |
204 XFS_SB_VERSION_SHAREDBIT |
205 XFS_SB_VERSION_LOGV2BIT |
206 XFS_SB_VERSION_SECTORBIT |
207 XFS_SB_VERSION_EXTFLGBIT |
208 XFS_SB_VERSION_DIRV2BIT);
209 if ((sb->sb_versionnum & vernum_mask) !=
210 (cpu_to_be16(mp->m_sb.sb_versionnum) & vernum_mask))
211 xfs_scrub_block_set_corrupt(sc, bp);
212
213 /* Check sb_versionnum bits that can be set after mkfs time. */
214 vernum_mask = cpu_to_be16(XFS_SB_VERSION_ATTRBIT |
215 XFS_SB_VERSION_NLINKBIT |
216 XFS_SB_VERSION_QUOTABIT);
217 if ((sb->sb_versionnum & vernum_mask) !=
218 (cpu_to_be16(mp->m_sb.sb_versionnum) & vernum_mask))
219 xfs_scrub_block_set_preen(sc, bp);
220
221 if (sb->sb_sectsize != cpu_to_be16(mp->m_sb.sb_sectsize))
222 xfs_scrub_block_set_corrupt(sc, bp);
223
224 if (sb->sb_inodesize != cpu_to_be16(mp->m_sb.sb_inodesize))
225 xfs_scrub_block_set_corrupt(sc, bp);
226
227 if (sb->sb_inopblock != cpu_to_be16(mp->m_sb.sb_inopblock))
228 xfs_scrub_block_set_corrupt(sc, bp);
229
230 if (memcmp(sb->sb_fname, mp->m_sb.sb_fname, sizeof(sb->sb_fname)))
231 xfs_scrub_block_set_preen(sc, bp);
232
233 if (sb->sb_blocklog != mp->m_sb.sb_blocklog)
234 xfs_scrub_block_set_corrupt(sc, bp);
235
236 if (sb->sb_sectlog != mp->m_sb.sb_sectlog)
237 xfs_scrub_block_set_corrupt(sc, bp);
238
239 if (sb->sb_inodelog != mp->m_sb.sb_inodelog)
240 xfs_scrub_block_set_corrupt(sc, bp);
241
242 if (sb->sb_inopblog != mp->m_sb.sb_inopblog)
243 xfs_scrub_block_set_corrupt(sc, bp);
244
245 if (sb->sb_agblklog != mp->m_sb.sb_agblklog)
246 xfs_scrub_block_set_corrupt(sc, bp);
247
248 if (sb->sb_rextslog != mp->m_sb.sb_rextslog)
249 xfs_scrub_block_set_corrupt(sc, bp);
250
251 if (sb->sb_imax_pct != mp->m_sb.sb_imax_pct)
252 xfs_scrub_block_set_preen(sc, bp);
253
254 /*
255 * Skip the summary counters since we track them in memory anyway.
256 * sb_icount, sb_ifree, sb_fdblocks, sb_frexents
257 */
258
259 if (sb->sb_uquotino != cpu_to_be64(mp->m_sb.sb_uquotino))
260 xfs_scrub_block_set_preen(sc, bp);
261
262 if (sb->sb_gquotino != cpu_to_be64(mp->m_sb.sb_gquotino))
263 xfs_scrub_block_set_preen(sc, bp);
264
265 /*
266 * Skip the quota flags since repair will force quotacheck.
267 * sb_qflags
268 */
269
270 if (sb->sb_flags != mp->m_sb.sb_flags)
271 xfs_scrub_block_set_corrupt(sc, bp);
272
273 if (sb->sb_shared_vn != mp->m_sb.sb_shared_vn)
274 xfs_scrub_block_set_corrupt(sc, bp);
275
276 if (sb->sb_inoalignmt != cpu_to_be32(mp->m_sb.sb_inoalignmt))
277 xfs_scrub_block_set_corrupt(sc, bp);
278
279 if (sb->sb_unit != cpu_to_be32(mp->m_sb.sb_unit))
280 xfs_scrub_block_set_preen(sc, bp);
281
282 if (sb->sb_width != cpu_to_be32(mp->m_sb.sb_width))
283 xfs_scrub_block_set_preen(sc, bp);
284
285 if (sb->sb_dirblklog != mp->m_sb.sb_dirblklog)
286 xfs_scrub_block_set_corrupt(sc, bp);
287
288 if (sb->sb_logsectlog != mp->m_sb.sb_logsectlog)
289 xfs_scrub_block_set_corrupt(sc, bp);
290
291 if (sb->sb_logsectsize != cpu_to_be16(mp->m_sb.sb_logsectsize))
292 xfs_scrub_block_set_corrupt(sc, bp);
293
294 if (sb->sb_logsunit != cpu_to_be32(mp->m_sb.sb_logsunit))
295 xfs_scrub_block_set_corrupt(sc, bp);
296
297 /* Do we see any invalid bits in sb_features2? */
298 if (!xfs_sb_version_hasmorebits(&mp->m_sb)) {
299 if (sb->sb_features2 != 0)
300 xfs_scrub_block_set_corrupt(sc, bp);
301 } else {
302 v2_ok = XFS_SB_VERSION2_OKBITS;
303 if (XFS_SB_VERSION_NUM(&mp->m_sb) >= XFS_SB_VERSION_5)
304 v2_ok |= XFS_SB_VERSION2_CRCBIT;
305
306 if (!!(sb->sb_features2 & cpu_to_be32(~v2_ok)))
307 xfs_scrub_block_set_corrupt(sc, bp);
308
309 if (sb->sb_features2 != sb->sb_bad_features2)
310 xfs_scrub_block_set_preen(sc, bp);
311 }
312
313 /* Check sb_features2 flags that are set at mkfs time. */
314 features_mask = cpu_to_be32(XFS_SB_VERSION2_LAZYSBCOUNTBIT |
315 XFS_SB_VERSION2_PROJID32BIT |
316 XFS_SB_VERSION2_CRCBIT |
317 XFS_SB_VERSION2_FTYPE);
318 if ((sb->sb_features2 & features_mask) !=
319 (cpu_to_be32(mp->m_sb.sb_features2) & features_mask))
320 xfs_scrub_block_set_corrupt(sc, bp);
321
322 /* Check sb_features2 flags that can be set after mkfs time. */
323 features_mask = cpu_to_be32(XFS_SB_VERSION2_ATTR2BIT);
324 if ((sb->sb_features2 & features_mask) !=
325 (cpu_to_be32(mp->m_sb.sb_features2) & features_mask))
326 xfs_scrub_block_set_corrupt(sc, bp);
327
328 if (!xfs_sb_version_hascrc(&mp->m_sb)) {
329 /* all v5 fields must be zero */
330 if (memchr_inv(&sb->sb_features_compat, 0,
331 sizeof(struct xfs_dsb) -
332 offsetof(struct xfs_dsb, sb_features_compat)))
333 xfs_scrub_block_set_corrupt(sc, bp);
334 } else {
335 /* Check compat flags; all are set at mkfs time. */
336 features_mask = cpu_to_be32(XFS_SB_FEAT_COMPAT_UNKNOWN);
337 if ((sb->sb_features_compat & features_mask) !=
338 (cpu_to_be32(mp->m_sb.sb_features_compat) & features_mask))
339 xfs_scrub_block_set_corrupt(sc, bp);
340
341 /* Check ro compat flags; all are set at mkfs time. */
342 features_mask = cpu_to_be32(XFS_SB_FEAT_RO_COMPAT_UNKNOWN |
343 XFS_SB_FEAT_RO_COMPAT_FINOBT |
344 XFS_SB_FEAT_RO_COMPAT_RMAPBT |
345 XFS_SB_FEAT_RO_COMPAT_REFLINK);
346 if ((sb->sb_features_ro_compat & features_mask) !=
347 (cpu_to_be32(mp->m_sb.sb_features_ro_compat) &
348 features_mask))
349 xfs_scrub_block_set_corrupt(sc, bp);
350
351 /* Check incompat flags; all are set at mkfs time. */
352 features_mask = cpu_to_be32(XFS_SB_FEAT_INCOMPAT_UNKNOWN |
353 XFS_SB_FEAT_INCOMPAT_FTYPE |
354 XFS_SB_FEAT_INCOMPAT_SPINODES |
355 XFS_SB_FEAT_INCOMPAT_META_UUID);
356 if ((sb->sb_features_incompat & features_mask) !=
357 (cpu_to_be32(mp->m_sb.sb_features_incompat) &
358 features_mask))
359 xfs_scrub_block_set_corrupt(sc, bp);
360
361 /* Check log incompat flags; all are set at mkfs time. */
362 features_mask = cpu_to_be32(XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN);
363 if ((sb->sb_features_log_incompat & features_mask) !=
364 (cpu_to_be32(mp->m_sb.sb_features_log_incompat) &
365 features_mask))
366 xfs_scrub_block_set_corrupt(sc, bp);
367
368 /* Don't care about sb_crc */
369
370 if (sb->sb_spino_align != cpu_to_be32(mp->m_sb.sb_spino_align))
371 xfs_scrub_block_set_corrupt(sc, bp);
372
373 if (sb->sb_pquotino != cpu_to_be64(mp->m_sb.sb_pquotino))
374 xfs_scrub_block_set_preen(sc, bp);
375
376 /* Don't care about sb_lsn */
377 }
378
379 if (xfs_sb_version_hasmetauuid(&mp->m_sb)) {
380 /* The metadata UUID must be the same for all supers */
381 if (!uuid_equal(&sb->sb_meta_uuid, &mp->m_sb.sb_meta_uuid))
382 xfs_scrub_block_set_corrupt(sc, bp);
383 }
384
385 /* Everything else must be zero. */
386 if (memchr_inv(sb + 1, 0,
387 BBTOB(bp->b_length) - sizeof(struct xfs_dsb)))
388 xfs_scrub_block_set_corrupt(sc, bp);
389
390 return error;
391}
392
393/* AGF */
394
395/* Scrub the AGF. */
396int
397xfs_scrub_agf(
398 struct xfs_scrub_context *sc)
399{
400 struct xfs_mount *mp = sc->mp;
401 struct xfs_agf *agf;
402 xfs_agnumber_t agno;
403 xfs_agblock_t agbno;
404 xfs_agblock_t eoag;
405 xfs_agblock_t agfl_first;
406 xfs_agblock_t agfl_last;
407 xfs_agblock_t agfl_count;
408 xfs_agblock_t fl_count;
409 int level;
410 int error = 0;
411
412 agno = sc->sa.agno = sc->sm->sm_agno;
413 error = xfs_scrub_ag_read_headers(sc, agno, &sc->sa.agi_bp,
414 &sc->sa.agf_bp, &sc->sa.agfl_bp);
415 if (!xfs_scrub_process_error(sc, agno, XFS_AGF_BLOCK(sc->mp), &error))
416 goto out;
417
418 agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
419
420 /* Check the AG length */
421 eoag = be32_to_cpu(agf->agf_length);
422 if (eoag != xfs_ag_block_count(mp, agno))
423 xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
424
425 /* Check the AGF btree roots and levels */
426 agbno = be32_to_cpu(agf->agf_roots[XFS_BTNUM_BNO]);
427 if (!xfs_verify_agbno(mp, agno, agbno))
428 xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
429
430 agbno = be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNT]);
431 if (!xfs_verify_agbno(mp, agno, agbno))
432 xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
433
434 level = be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]);
435 if (level <= 0 || level > XFS_BTREE_MAXLEVELS)
436 xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
437
438 level = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]);
439 if (level <= 0 || level > XFS_BTREE_MAXLEVELS)
440 xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
441
442 if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
443 agbno = be32_to_cpu(agf->agf_roots[XFS_BTNUM_RMAP]);
444 if (!xfs_verify_agbno(mp, agno, agbno))
445 xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
446
447 level = be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]);
448 if (level <= 0 || level > XFS_BTREE_MAXLEVELS)
449 xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
450 }
451
452 if (xfs_sb_version_hasreflink(&mp->m_sb)) {
453 agbno = be32_to_cpu(agf->agf_refcount_root);
454 if (!xfs_verify_agbno(mp, agno, agbno))
455 xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
456
457 level = be32_to_cpu(agf->agf_refcount_level);
458 if (level <= 0 || level > XFS_BTREE_MAXLEVELS)
459 xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
460 }
461
462 /* Check the AGFL counters */
463 agfl_first = be32_to_cpu(agf->agf_flfirst);
464 agfl_last = be32_to_cpu(agf->agf_fllast);
465 agfl_count = be32_to_cpu(agf->agf_flcount);
466 if (agfl_last > agfl_first)
467 fl_count = agfl_last - agfl_first + 1;
468 else
469 fl_count = XFS_AGFL_SIZE(mp) - agfl_first + agfl_last + 1;
470 if (agfl_count != 0 && fl_count != agfl_count)
471 xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
472
473out:
474 return error;
475}
476
477/* AGFL */
478
479struct xfs_scrub_agfl_info {
480 unsigned int sz_entries;
481 unsigned int nr_entries;
482 xfs_agblock_t *entries;
483};
484
485/* Scrub an AGFL block. */
486STATIC int
487xfs_scrub_agfl_block(
488 struct xfs_scrub_context *sc,
489 xfs_agblock_t agbno,
490 void *priv)
491{
492 struct xfs_mount *mp = sc->mp;
493 struct xfs_scrub_agfl_info *sai = priv;
494 xfs_agnumber_t agno = sc->sa.agno;
495
496 if (xfs_verify_agbno(mp, agno, agbno) &&
497 sai->nr_entries < sai->sz_entries)
498 sai->entries[sai->nr_entries++] = agbno;
499 else
500 xfs_scrub_block_set_corrupt(sc, sc->sa.agfl_bp);
501
502 return 0;
503}
504
505static int
506xfs_scrub_agblock_cmp(
507 const void *pa,
508 const void *pb)
509{
510 const xfs_agblock_t *a = pa;
511 const xfs_agblock_t *b = pb;
512
513 return (int)*a - (int)*b;
514}
515
516/* Scrub the AGFL. */
517int
518xfs_scrub_agfl(
519 struct xfs_scrub_context *sc)
520{
521 struct xfs_scrub_agfl_info sai = { 0 };
522 struct xfs_agf *agf;
523 xfs_agnumber_t agno;
524 unsigned int agflcount;
525 unsigned int i;
526 int error;
527
528 agno = sc->sa.agno = sc->sm->sm_agno;
529 error = xfs_scrub_ag_read_headers(sc, agno, &sc->sa.agi_bp,
530 &sc->sa.agf_bp, &sc->sa.agfl_bp);
531 if (!xfs_scrub_process_error(sc, agno, XFS_AGFL_BLOCK(sc->mp), &error))
532 goto out;
533 if (!sc->sa.agf_bp)
534 return -EFSCORRUPTED;
535
536 /* Allocate buffer to ensure uniqueness of AGFL entries. */
537 agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
538 agflcount = be32_to_cpu(agf->agf_flcount);
539 if (agflcount > XFS_AGFL_SIZE(sc->mp)) {
540 xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
541 goto out;
542 }
543 sai.sz_entries = agflcount;
544 sai.entries = kmem_zalloc(sizeof(xfs_agblock_t) * agflcount, KM_NOFS);
545 if (!sai.entries) {
546 error = -ENOMEM;
547 goto out;
548 }
549
550 /* Check the blocks in the AGFL. */
551 error = xfs_scrub_walk_agfl(sc, xfs_scrub_agfl_block, &sai);
552 if (error)
553 goto out_free;
554
555 if (agflcount != sai.nr_entries) {
556 xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
557 goto out_free;
558 }
559
560 /* Sort entries, check for duplicates. */
561 sort(sai.entries, sai.nr_entries, sizeof(sai.entries[0]),
562 xfs_scrub_agblock_cmp, NULL);
563 for (i = 1; i < sai.nr_entries; i++) {
564 if (sai.entries[i] == sai.entries[i - 1]) {
565 xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
566 break;
567 }
568 }
569
570out_free:
571 kmem_free(sai.entries);
572out:
573 return error;
574}
575
576/* AGI */
577
578/* Scrub the AGI. */
579int
580xfs_scrub_agi(
581 struct xfs_scrub_context *sc)
582{
583 struct xfs_mount *mp = sc->mp;
584 struct xfs_agi *agi;
585 xfs_agnumber_t agno;
586 xfs_agblock_t agbno;
587 xfs_agblock_t eoag;
588 xfs_agino_t agino;
589 xfs_agino_t first_agino;
590 xfs_agino_t last_agino;
591 xfs_agino_t icount;
592 int i;
593 int level;
594 int error = 0;
595
596 agno = sc->sa.agno = sc->sm->sm_agno;
597 error = xfs_scrub_ag_read_headers(sc, agno, &sc->sa.agi_bp,
598 &sc->sa.agf_bp, &sc->sa.agfl_bp);
599 if (!xfs_scrub_process_error(sc, agno, XFS_AGI_BLOCK(sc->mp), &error))
600 goto out;
601
602 agi = XFS_BUF_TO_AGI(sc->sa.agi_bp);
603
604 /* Check the AG length */
605 eoag = be32_to_cpu(agi->agi_length);
606 if (eoag != xfs_ag_block_count(mp, agno))
607 xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
608
609 /* Check btree roots and levels */
610 agbno = be32_to_cpu(agi->agi_root);
611 if (!xfs_verify_agbno(mp, agno, agbno))
612 xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
613
614 level = be32_to_cpu(agi->agi_level);
615 if (level <= 0 || level > XFS_BTREE_MAXLEVELS)
616 xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
617
618 if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
619 agbno = be32_to_cpu(agi->agi_free_root);
620 if (!xfs_verify_agbno(mp, agno, agbno))
621 xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
622
623 level = be32_to_cpu(agi->agi_free_level);
624 if (level <= 0 || level > XFS_BTREE_MAXLEVELS)
625 xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
626 }
627
628 /* Check inode counters */
629 xfs_ialloc_agino_range(mp, agno, &first_agino, &last_agino);
630 icount = be32_to_cpu(agi->agi_count);
631 if (icount > last_agino - first_agino + 1 ||
632 icount < be32_to_cpu(agi->agi_freecount))
633 xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
634
635 /* Check inode pointers */
636 agino = be32_to_cpu(agi->agi_newino);
637 if (agino != NULLAGINO && !xfs_verify_agino(mp, agno, agino))
638 xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
639
640 agino = be32_to_cpu(agi->agi_dirino);
641 if (agino != NULLAGINO && !xfs_verify_agino(mp, agno, agino))
642 xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
643
644 /* Check unlinked inode buckets */
645 for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) {
646 agino = be32_to_cpu(agi->agi_unlinked[i]);
647 if (agino == NULLAGINO)
648 continue;
649 if (!xfs_verify_agino(mp, agno, agino))
650 xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
651 }
652
653 if (agi->agi_pad32 != cpu_to_be32(0))
654 xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
655
656out:
657 return error;
658}
diff --git a/fs/xfs/scrub/alloc.c b/fs/xfs/scrub/alloc.c
new file mode 100644
index 000000000000..059663e13414
--- /dev/null
+++ b/fs/xfs/scrub/alloc.c
@@ -0,0 +1,102 @@
1/*
2 * Copyright (C) 2017 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#include "xfs.h"
21#include "xfs_fs.h"
22#include "xfs_shared.h"
23#include "xfs_format.h"
24#include "xfs_trans_resv.h"
25#include "xfs_mount.h"
26#include "xfs_defer.h"
27#include "xfs_btree.h"
28#include "xfs_bit.h"
29#include "xfs_log_format.h"
30#include "xfs_trans.h"
31#include "xfs_sb.h"
32#include "xfs_alloc.h"
33#include "xfs_rmap.h"
34#include "scrub/xfs_scrub.h"
35#include "scrub/scrub.h"
36#include "scrub/common.h"
37#include "scrub/btree.h"
38#include "scrub/trace.h"
39
40/*
41 * Set us up to scrub free space btrees.
42 */
43int
44xfs_scrub_setup_ag_allocbt(
45 struct xfs_scrub_context *sc,
46 struct xfs_inode *ip)
47{
48 return xfs_scrub_setup_ag_btree(sc, ip, false);
49}
50
51/* Free space btree scrubber. */
52
53/* Scrub a bnobt/cntbt record. */
54STATIC int
55xfs_scrub_allocbt_rec(
56 struct xfs_scrub_btree *bs,
57 union xfs_btree_rec *rec)
58{
59 struct xfs_mount *mp = bs->cur->bc_mp;
60 xfs_agnumber_t agno = bs->cur->bc_private.a.agno;
61 xfs_agblock_t bno;
62 xfs_extlen_t len;
63 int error = 0;
64
65 bno = be32_to_cpu(rec->alloc.ar_startblock);
66 len = be32_to_cpu(rec->alloc.ar_blockcount);
67
68 if (bno + len <= bno ||
69 !xfs_verify_agbno(mp, agno, bno) ||
70 !xfs_verify_agbno(mp, agno, bno + len - 1))
71 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
72
73 return error;
74}
75
76/* Scrub the freespace btrees for some AG. */
77STATIC int
78xfs_scrub_allocbt(
79 struct xfs_scrub_context *sc,
80 xfs_btnum_t which)
81{
82 struct xfs_owner_info oinfo;
83 struct xfs_btree_cur *cur;
84
85 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG);
86 cur = which == XFS_BTNUM_BNO ? sc->sa.bno_cur : sc->sa.cnt_cur;
87 return xfs_scrub_btree(sc, cur, xfs_scrub_allocbt_rec, &oinfo, NULL);
88}
89
90int
91xfs_scrub_bnobt(
92 struct xfs_scrub_context *sc)
93{
94 return xfs_scrub_allocbt(sc, XFS_BTNUM_BNO);
95}
96
97int
98xfs_scrub_cntbt(
99 struct xfs_scrub_context *sc)
100{
101 return xfs_scrub_allocbt(sc, XFS_BTNUM_CNT);
102}
diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c
new file mode 100644
index 000000000000..4ed80474f545
--- /dev/null
+++ b/fs/xfs/scrub/attr.c
@@ -0,0 +1,471 @@
1/*
2 * Copyright (C) 2017 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#include "xfs.h"
21#include "xfs_fs.h"
22#include "xfs_shared.h"
23#include "xfs_format.h"
24#include "xfs_trans_resv.h"
25#include "xfs_mount.h"
26#include "xfs_defer.h"
27#include "xfs_btree.h"
28#include "xfs_bit.h"
29#include "xfs_log_format.h"
30#include "xfs_trans.h"
31#include "xfs_sb.h"
32#include "xfs_inode.h"
33#include "xfs_da_format.h"
34#include "xfs_da_btree.h"
35#include "xfs_dir2.h"
36#include "xfs_attr.h"
37#include "xfs_attr_leaf.h"
38#include "scrub/xfs_scrub.h"
39#include "scrub/scrub.h"
40#include "scrub/common.h"
41#include "scrub/dabtree.h"
42#include "scrub/trace.h"
43
44#include <linux/posix_acl_xattr.h>
45#include <linux/xattr.h>
46
47/* Set us up to scrub an inode's extended attributes. */
48int
49xfs_scrub_setup_xattr(
50 struct xfs_scrub_context *sc,
51 struct xfs_inode *ip)
52{
53 size_t sz;
54
55 /*
56 * Allocate the buffer without the inode lock held. We need enough
57 * space to read every xattr value in the file or enough space to
58 * hold three copies of the xattr free space bitmap. (Not both at
59 * the same time.)
60 */
61 sz = max_t(size_t, XATTR_SIZE_MAX, 3 * sizeof(long) *
62 BITS_TO_LONGS(sc->mp->m_attr_geo->blksize));
63 sc->buf = kmem_zalloc_large(sz, KM_SLEEP);
64 if (!sc->buf)
65 return -ENOMEM;
66
67 return xfs_scrub_setup_inode_contents(sc, ip, 0);
68}
69
70/* Extended Attributes */
71
72struct xfs_scrub_xattr {
73 struct xfs_attr_list_context context;
74 struct xfs_scrub_context *sc;
75};
76
77/*
78 * Check that an extended attribute key can be looked up by hash.
79 *
80 * We use the XFS attribute list iterator (i.e. xfs_attr_list_int_ilocked)
81 * to call this function for every attribute key in an inode. Once
82 * we're here, we load the attribute value to see if any errors happen,
83 * or if we get more or less data than we expected.
84 */
85static void
86xfs_scrub_xattr_listent(
87 struct xfs_attr_list_context *context,
88 int flags,
89 unsigned char *name,
90 int namelen,
91 int valuelen)
92{
93 struct xfs_scrub_xattr *sx;
94 struct xfs_da_args args = { NULL };
95 int error = 0;
96
97 sx = container_of(context, struct xfs_scrub_xattr, context);
98
99 if (flags & XFS_ATTR_INCOMPLETE) {
100 /* Incomplete attr key, just mark the inode for preening. */
101 xfs_scrub_ino_set_preen(sx->sc, context->dp->i_ino, NULL);
102 return;
103 }
104
105 args.flags = ATTR_KERNOTIME;
106 if (flags & XFS_ATTR_ROOT)
107 args.flags |= ATTR_ROOT;
108 else if (flags & XFS_ATTR_SECURE)
109 args.flags |= ATTR_SECURE;
110 args.geo = context->dp->i_mount->m_attr_geo;
111 args.whichfork = XFS_ATTR_FORK;
112 args.dp = context->dp;
113 args.name = name;
114 args.namelen = namelen;
115 args.hashval = xfs_da_hashname(args.name, args.namelen);
116 args.trans = context->tp;
117 args.value = sx->sc->buf;
118 args.valuelen = XATTR_SIZE_MAX;
119
120 error = xfs_attr_get_ilocked(context->dp, &args);
121 if (error == -EEXIST)
122 error = 0;
123 if (!xfs_scrub_fblock_process_error(sx->sc, XFS_ATTR_FORK, args.blkno,
124 &error))
125 goto fail_xref;
126 if (args.valuelen != valuelen)
127 xfs_scrub_fblock_set_corrupt(sx->sc, XFS_ATTR_FORK,
128 args.blkno);
129
130fail_xref:
131 return;
132}
133
134/*
135 * Mark a range [start, start+len) in this map. Returns true if the
136 * region was free, and false if there's a conflict or a problem.
137 *
138 * Within a char, the lowest bit of the char represents the byte with
139 * the smallest address
140 */
141STATIC bool
142xfs_scrub_xattr_set_map(
143 struct xfs_scrub_context *sc,
144 unsigned long *map,
145 unsigned int start,
146 unsigned int len)
147{
148 unsigned int mapsize = sc->mp->m_attr_geo->blksize;
149 bool ret = true;
150
151 if (start >= mapsize)
152 return false;
153 if (start + len > mapsize) {
154 len = mapsize - start;
155 ret = false;
156 }
157
158 if (find_next_bit(map, mapsize, start) < start + len)
159 ret = false;
160 bitmap_set(map, start, len);
161
162 return ret;
163}
164
165/*
166 * Check the leaf freemap from the usage bitmap. Returns false if the
167 * attr freemap has problems or points to used space.
168 */
169STATIC bool
170xfs_scrub_xattr_check_freemap(
171 struct xfs_scrub_context *sc,
172 unsigned long *map,
173 struct xfs_attr3_icleaf_hdr *leafhdr)
174{
175 unsigned long *freemap;
176 unsigned long *dstmap;
177 unsigned int mapsize = sc->mp->m_attr_geo->blksize;
178 int i;
179
180 /* Construct bitmap of freemap contents. */
181 freemap = (unsigned long *)sc->buf + BITS_TO_LONGS(mapsize);
182 bitmap_zero(freemap, mapsize);
183 for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
184 if (!xfs_scrub_xattr_set_map(sc, freemap,
185 leafhdr->freemap[i].base,
186 leafhdr->freemap[i].size))
187 return false;
188 }
189
190 /* Look for bits that are set in freemap and are marked in use. */
191 dstmap = freemap + BITS_TO_LONGS(mapsize);
192 return bitmap_and(dstmap, freemap, map, mapsize) == 0;
193}
194
195/*
196 * Check this leaf entry's relations to everything else.
197 * Returns the number of bytes used for the name/value data.
198 */
199STATIC void
200xfs_scrub_xattr_entry(
201 struct xfs_scrub_da_btree *ds,
202 int level,
203 char *buf_end,
204 struct xfs_attr_leafblock *leaf,
205 struct xfs_attr3_icleaf_hdr *leafhdr,
206 unsigned long *usedmap,
207 struct xfs_attr_leaf_entry *ent,
208 int idx,
209 unsigned int *usedbytes,
210 __u32 *last_hashval)
211{
212 struct xfs_mount *mp = ds->state->mp;
213 char *name_end;
214 struct xfs_attr_leaf_name_local *lentry;
215 struct xfs_attr_leaf_name_remote *rentry;
216 unsigned int nameidx;
217 unsigned int namesize;
218
219 if (ent->pad2 != 0)
220 xfs_scrub_da_set_corrupt(ds, level);
221
222 /* Hash values in order? */
223 if (be32_to_cpu(ent->hashval) < *last_hashval)
224 xfs_scrub_da_set_corrupt(ds, level);
225 *last_hashval = be32_to_cpu(ent->hashval);
226
227 nameidx = be16_to_cpu(ent->nameidx);
228 if (nameidx < leafhdr->firstused ||
229 nameidx >= mp->m_attr_geo->blksize) {
230 xfs_scrub_da_set_corrupt(ds, level);
231 return;
232 }
233
234 /* Check the name information. */
235 if (ent->flags & XFS_ATTR_LOCAL) {
236 lentry = xfs_attr3_leaf_name_local(leaf, idx);
237 namesize = xfs_attr_leaf_entsize_local(lentry->namelen,
238 be16_to_cpu(lentry->valuelen));
239 name_end = (char *)lentry + namesize;
240 if (lentry->namelen == 0)
241 xfs_scrub_da_set_corrupt(ds, level);
242 } else {
243 rentry = xfs_attr3_leaf_name_remote(leaf, idx);
244 namesize = xfs_attr_leaf_entsize_remote(rentry->namelen);
245 name_end = (char *)rentry + namesize;
246 if (rentry->namelen == 0 || rentry->valueblk == 0)
247 xfs_scrub_da_set_corrupt(ds, level);
248 }
249 if (name_end > buf_end)
250 xfs_scrub_da_set_corrupt(ds, level);
251
252 if (!xfs_scrub_xattr_set_map(ds->sc, usedmap, nameidx, namesize))
253 xfs_scrub_da_set_corrupt(ds, level);
254 if (!(ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
255 *usedbytes += namesize;
256}
257
258/* Scrub an attribute leaf. */
259STATIC int
260xfs_scrub_xattr_block(
261 struct xfs_scrub_da_btree *ds,
262 int level)
263{
264 struct xfs_attr3_icleaf_hdr leafhdr;
265 struct xfs_mount *mp = ds->state->mp;
266 struct xfs_da_state_blk *blk = &ds->state->path.blk[level];
267 struct xfs_buf *bp = blk->bp;
268 xfs_dablk_t *last_checked = ds->private;
269 struct xfs_attr_leafblock *leaf = bp->b_addr;
270 struct xfs_attr_leaf_entry *ent;
271 struct xfs_attr_leaf_entry *entries;
272 unsigned long *usedmap = ds->sc->buf;
273 char *buf_end;
274 size_t off;
275 __u32 last_hashval = 0;
276 unsigned int usedbytes = 0;
277 unsigned int hdrsize;
278 int i;
279
280 if (*last_checked == blk->blkno)
281 return 0;
282 *last_checked = blk->blkno;
283 bitmap_zero(usedmap, mp->m_attr_geo->blksize);
284
285 /* Check all the padding. */
286 if (xfs_sb_version_hascrc(&ds->sc->mp->m_sb)) {
287 struct xfs_attr3_leafblock *leaf = bp->b_addr;
288
289 if (leaf->hdr.pad1 != 0 || leaf->hdr.pad2 != 0 ||
290 leaf->hdr.info.hdr.pad != 0)
291 xfs_scrub_da_set_corrupt(ds, level);
292 } else {
293 if (leaf->hdr.pad1 != 0 || leaf->hdr.info.pad != 0)
294 xfs_scrub_da_set_corrupt(ds, level);
295 }
296
297 /* Check the leaf header */
298 xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf);
299 hdrsize = xfs_attr3_leaf_hdr_size(leaf);
300
301 if (leafhdr.usedbytes > mp->m_attr_geo->blksize)
302 xfs_scrub_da_set_corrupt(ds, level);
303 if (leafhdr.firstused > mp->m_attr_geo->blksize)
304 xfs_scrub_da_set_corrupt(ds, level);
305 if (leafhdr.firstused < hdrsize)
306 xfs_scrub_da_set_corrupt(ds, level);
307 if (!xfs_scrub_xattr_set_map(ds->sc, usedmap, 0, hdrsize))
308 xfs_scrub_da_set_corrupt(ds, level);
309
310 if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
311 goto out;
312
313 entries = xfs_attr3_leaf_entryp(leaf);
314 if ((char *)&entries[leafhdr.count] > (char *)leaf + leafhdr.firstused)
315 xfs_scrub_da_set_corrupt(ds, level);
316
317 buf_end = (char *)bp->b_addr + mp->m_attr_geo->blksize;
318 for (i = 0, ent = entries; i < leafhdr.count; ent++, i++) {
319 /* Mark the leaf entry itself. */
320 off = (char *)ent - (char *)leaf;
321 if (!xfs_scrub_xattr_set_map(ds->sc, usedmap, off,
322 sizeof(xfs_attr_leaf_entry_t))) {
323 xfs_scrub_da_set_corrupt(ds, level);
324 goto out;
325 }
326
327 /* Check the entry and nameval. */
328 xfs_scrub_xattr_entry(ds, level, buf_end, leaf, &leafhdr,
329 usedmap, ent, i, &usedbytes, &last_hashval);
330
331 if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
332 goto out;
333 }
334
335 if (!xfs_scrub_xattr_check_freemap(ds->sc, usedmap, &leafhdr))
336 xfs_scrub_da_set_corrupt(ds, level);
337
338 if (leafhdr.usedbytes != usedbytes)
339 xfs_scrub_da_set_corrupt(ds, level);
340
341out:
342 return 0;
343}
344
345/* Scrub a attribute btree record. */
346STATIC int
347xfs_scrub_xattr_rec(
348 struct xfs_scrub_da_btree *ds,
349 int level,
350 void *rec)
351{
352 struct xfs_mount *mp = ds->state->mp;
353 struct xfs_attr_leaf_entry *ent = rec;
354 struct xfs_da_state_blk *blk;
355 struct xfs_attr_leaf_name_local *lentry;
356 struct xfs_attr_leaf_name_remote *rentry;
357 struct xfs_buf *bp;
358 xfs_dahash_t calc_hash;
359 xfs_dahash_t hash;
360 int nameidx;
361 int hdrsize;
362 unsigned int badflags;
363 int error;
364
365 blk = &ds->state->path.blk[level];
366
367 /* Check the whole block, if necessary. */
368 error = xfs_scrub_xattr_block(ds, level);
369 if (error)
370 goto out;
371 if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
372 goto out;
373
374 /* Check the hash of the entry. */
375 error = xfs_scrub_da_btree_hash(ds, level, &ent->hashval);
376 if (error)
377 goto out;
378
379 /* Find the attr entry's location. */
380 bp = blk->bp;
381 hdrsize = xfs_attr3_leaf_hdr_size(bp->b_addr);
382 nameidx = be16_to_cpu(ent->nameidx);
383 if (nameidx < hdrsize || nameidx >= mp->m_attr_geo->blksize) {
384 xfs_scrub_da_set_corrupt(ds, level);
385 goto out;
386 }
387
388 /* Retrieve the entry and check it. */
389 hash = be32_to_cpu(ent->hashval);
390 badflags = ~(XFS_ATTR_LOCAL | XFS_ATTR_ROOT | XFS_ATTR_SECURE |
391 XFS_ATTR_INCOMPLETE);
392 if ((ent->flags & badflags) != 0)
393 xfs_scrub_da_set_corrupt(ds, level);
394 if (ent->flags & XFS_ATTR_LOCAL) {
395 lentry = (struct xfs_attr_leaf_name_local *)
396 (((char *)bp->b_addr) + nameidx);
397 if (lentry->namelen <= 0) {
398 xfs_scrub_da_set_corrupt(ds, level);
399 goto out;
400 }
401 calc_hash = xfs_da_hashname(lentry->nameval, lentry->namelen);
402 } else {
403 rentry = (struct xfs_attr_leaf_name_remote *)
404 (((char *)bp->b_addr) + nameidx);
405 if (rentry->namelen <= 0) {
406 xfs_scrub_da_set_corrupt(ds, level);
407 goto out;
408 }
409 calc_hash = xfs_da_hashname(rentry->name, rentry->namelen);
410 }
411 if (calc_hash != hash)
412 xfs_scrub_da_set_corrupt(ds, level);
413
414out:
415 return error;
416}
417
418/* Scrub the extended attribute metadata. */
419int
420xfs_scrub_xattr(
421 struct xfs_scrub_context *sc)
422{
423 struct xfs_scrub_xattr sx;
424 struct attrlist_cursor_kern cursor = { 0 };
425 xfs_dablk_t last_checked = -1U;
426 int error = 0;
427
428 if (!xfs_inode_hasattr(sc->ip))
429 return -ENOENT;
430
431 memset(&sx, 0, sizeof(sx));
432 /* Check attribute tree structure */
433 error = xfs_scrub_da_btree(sc, XFS_ATTR_FORK, xfs_scrub_xattr_rec,
434 &last_checked);
435 if (error)
436 goto out;
437
438 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
439 goto out;
440
441 /* Check that every attr key can also be looked up by hash. */
442 sx.context.dp = sc->ip;
443 sx.context.cursor = &cursor;
444 sx.context.resynch = 1;
445 sx.context.put_listent = xfs_scrub_xattr_listent;
446 sx.context.tp = sc->tp;
447 sx.context.flags = ATTR_INCOMPLETE;
448 sx.sc = sc;
449
450 /*
451 * Look up every xattr in this file by name.
452 *
453 * Use the backend implementation of xfs_attr_list to call
454 * xfs_scrub_xattr_listent on every attribute key in this inode.
455 * In other words, we use the same iterator/callback mechanism
456 * that listattr uses to scrub extended attributes, though in our
457 * _listent function, we check the value of the attribute.
458 *
459 * The VFS only locks i_rwsem when modifying attrs, so keep all
460 * three locks held because that's the only way to ensure we're
461 * the only thread poking into the da btree. We traverse the da
462 * btree while holding a leaf buffer locked for the xattr name
463 * iteration, which doesn't really follow the usual buffer
464 * locking order.
465 */
466 error = xfs_attr_list_int_ilocked(&sx.context);
467 if (!xfs_scrub_fblock_process_error(sc, XFS_ATTR_FORK, 0, &error))
468 goto out;
469out:
470 return error;
471}
diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
new file mode 100644
index 000000000000..42fec0bcd9e1
--- /dev/null
+++ b/fs/xfs/scrub/bmap.c
@@ -0,0 +1,363 @@
1/*
2 * Copyright (C) 2017 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#include "xfs.h"
21#include "xfs_fs.h"
22#include "xfs_shared.h"
23#include "xfs_format.h"
24#include "xfs_trans_resv.h"
25#include "xfs_mount.h"
26#include "xfs_defer.h"
27#include "xfs_btree.h"
28#include "xfs_bit.h"
29#include "xfs_log_format.h"
30#include "xfs_trans.h"
31#include "xfs_sb.h"
32#include "xfs_inode.h"
33#include "xfs_inode_fork.h"
34#include "xfs_alloc.h"
35#include "xfs_rtalloc.h"
36#include "xfs_bmap.h"
37#include "xfs_bmap_util.h"
38#include "xfs_bmap_btree.h"
39#include "xfs_rmap.h"
40#include "scrub/xfs_scrub.h"
41#include "scrub/scrub.h"
42#include "scrub/common.h"
43#include "scrub/btree.h"
44#include "scrub/trace.h"
45
46/* Set us up with an inode's bmap. */
47int
48xfs_scrub_setup_inode_bmap(
49 struct xfs_scrub_context *sc,
50 struct xfs_inode *ip)
51{
52 struct xfs_mount *mp = sc->mp;
53 int error;
54
55 error = xfs_scrub_get_inode(sc, ip);
56 if (error)
57 goto out;
58
59 sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
60 xfs_ilock(sc->ip, sc->ilock_flags);
61
62 /*
63 * We don't want any ephemeral data fork updates sitting around
64 * while we inspect block mappings, so wait for directio to finish
65 * and flush dirty data if we have delalloc reservations.
66 */
67 if (S_ISREG(VFS_I(sc->ip)->i_mode) &&
68 sc->sm->sm_type == XFS_SCRUB_TYPE_BMBTD) {
69 inode_dio_wait(VFS_I(sc->ip));
70 error = filemap_write_and_wait(VFS_I(sc->ip)->i_mapping);
71 if (error)
72 goto out;
73 }
74
75 /* Got the inode, lock it and we're ready to go. */
76 error = xfs_scrub_trans_alloc(sc->sm, mp, &sc->tp);
77 if (error)
78 goto out;
79 sc->ilock_flags |= XFS_ILOCK_EXCL;
80 xfs_ilock(sc->ip, XFS_ILOCK_EXCL);
81
82out:
83 /* scrub teardown will unlock and release the inode */
84 return error;
85}
86
87/*
88 * Inode fork block mapping (BMBT) scrubber.
89 * More complex than the others because we have to scrub
90 * all the extents regardless of whether or not the fork
91 * is in btree format.
92 */
93
94struct xfs_scrub_bmap_info {
95 struct xfs_scrub_context *sc;
96 xfs_fileoff_t lastoff;
97 bool is_rt;
98 bool is_shared;
99 int whichfork;
100};
101
102/* Scrub a single extent record. */
103STATIC int
104xfs_scrub_bmap_extent(
105 struct xfs_inode *ip,
106 struct xfs_btree_cur *cur,
107 struct xfs_scrub_bmap_info *info,
108 struct xfs_bmbt_irec *irec)
109{
110 struct xfs_mount *mp = info->sc->mp;
111 struct xfs_buf *bp = NULL;
112 int error = 0;
113
114 if (cur)
115 xfs_btree_get_block(cur, 0, &bp);
116
117 /*
118 * Check for out-of-order extents. This record could have come
119 * from the incore list, for which there is no ordering check.
120 */
121 if (irec->br_startoff < info->lastoff)
122 xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
123 irec->br_startoff);
124
125 /* There should never be a "hole" extent in either extent list. */
126 if (irec->br_startblock == HOLESTARTBLOCK)
127 xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
128 irec->br_startoff);
129
130 /*
131 * Check for delalloc extents. We never iterate the ones in the
132 * in-core extent scan, and we should never see these in the bmbt.
133 */
134 if (isnullstartblock(irec->br_startblock))
135 xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
136 irec->br_startoff);
137
138 /* Make sure the extent points to a valid place. */
139 if (irec->br_startblock + irec->br_blockcount <= irec->br_startblock)
140 xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
141 irec->br_startoff);
142 if (info->is_rt &&
143 (!xfs_verify_rtbno(mp, irec->br_startblock) ||
144 !xfs_verify_rtbno(mp, irec->br_startblock +
145 irec->br_blockcount - 1)))
146 xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
147 irec->br_startoff);
148 if (!info->is_rt &&
149 (!xfs_verify_fsbno(mp, irec->br_startblock) ||
150 !xfs_verify_fsbno(mp, irec->br_startblock +
151 irec->br_blockcount - 1)))
152 xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
153 irec->br_startoff);
154
155 /* We don't allow unwritten extents on attr forks. */
156 if (irec->br_state == XFS_EXT_UNWRITTEN &&
157 info->whichfork == XFS_ATTR_FORK)
158 xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
159 irec->br_startoff);
160
161 info->lastoff = irec->br_startoff + irec->br_blockcount;
162 return error;
163}
164
165/* Scrub a bmbt record. */
166STATIC int
167xfs_scrub_bmapbt_rec(
168 struct xfs_scrub_btree *bs,
169 union xfs_btree_rec *rec)
170{
171 struct xfs_bmbt_irec irec;
172 struct xfs_scrub_bmap_info *info = bs->private;
173 struct xfs_inode *ip = bs->cur->bc_private.b.ip;
174 struct xfs_buf *bp = NULL;
175 struct xfs_btree_block *block;
176 uint64_t owner;
177 int i;
178
179 /*
180 * Check the owners of the btree blocks up to the level below
181 * the root since the verifiers don't do that.
182 */
183 if (xfs_sb_version_hascrc(&bs->cur->bc_mp->m_sb) &&
184 bs->cur->bc_ptrs[0] == 1) {
185 for (i = 0; i < bs->cur->bc_nlevels - 1; i++) {
186 block = xfs_btree_get_block(bs->cur, i, &bp);
187 owner = be64_to_cpu(block->bb_u.l.bb_owner);
188 if (owner != ip->i_ino)
189 xfs_scrub_fblock_set_corrupt(bs->sc,
190 info->whichfork, 0);
191 }
192 }
193
194 /* Set up the in-core record and scrub it. */
195 xfs_bmbt_disk_get_all(&rec->bmbt, &irec);
196 return xfs_scrub_bmap_extent(ip, bs->cur, info, &irec);
197}
198
199/* Scan the btree records. */
200STATIC int
201xfs_scrub_bmap_btree(
202 struct xfs_scrub_context *sc,
203 int whichfork,
204 struct xfs_scrub_bmap_info *info)
205{
206 struct xfs_owner_info oinfo;
207 struct xfs_mount *mp = sc->mp;
208 struct xfs_inode *ip = sc->ip;
209 struct xfs_btree_cur *cur;
210 int error;
211
212 cur = xfs_bmbt_init_cursor(mp, sc->tp, ip, whichfork);
213 xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
214 error = xfs_scrub_btree(sc, cur, xfs_scrub_bmapbt_rec, &oinfo, info);
215 xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR :
216 XFS_BTREE_NOERROR);
217 return error;
218}
219
220/*
221 * Scrub an inode fork's block mappings.
222 *
223 * First we scan every record in every btree block, if applicable.
224 * Then we unconditionally scan the incore extent cache.
225 */
226STATIC int
227xfs_scrub_bmap(
228 struct xfs_scrub_context *sc,
229 int whichfork)
230{
231 struct xfs_bmbt_irec irec;
232 struct xfs_scrub_bmap_info info = { NULL };
233 struct xfs_mount *mp = sc->mp;
234 struct xfs_inode *ip = sc->ip;
235 struct xfs_ifork *ifp;
236 xfs_fileoff_t endoff;
237 struct xfs_iext_cursor icur;
238 bool found;
239 int error = 0;
240
241 ifp = XFS_IFORK_PTR(ip, whichfork);
242
243 info.is_rt = whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip);
244 info.whichfork = whichfork;
245 info.is_shared = whichfork == XFS_DATA_FORK && xfs_is_reflink_inode(ip);
246 info.sc = sc;
247
248 switch (whichfork) {
249 case XFS_COW_FORK:
250 /* Non-existent CoW forks are ignorable. */
251 if (!ifp)
252 goto out;
253 /* No CoW forks on non-reflink inodes/filesystems. */
254 if (!xfs_is_reflink_inode(ip)) {
255 xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino, NULL);
256 goto out;
257 }
258 break;
259 case XFS_ATTR_FORK:
260 if (!ifp)
261 goto out;
262 if (!xfs_sb_version_hasattr(&mp->m_sb) &&
263 !xfs_sb_version_hasattr2(&mp->m_sb))
264 xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino, NULL);
265 break;
266 default:
267 ASSERT(whichfork == XFS_DATA_FORK);
268 break;
269 }
270
271 /* Check the fork values */
272 switch (XFS_IFORK_FORMAT(ip, whichfork)) {
273 case XFS_DINODE_FMT_UUID:
274 case XFS_DINODE_FMT_DEV:
275 case XFS_DINODE_FMT_LOCAL:
276 /* No mappings to check. */
277 goto out;
278 case XFS_DINODE_FMT_EXTENTS:
279 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
280 xfs_scrub_fblock_set_corrupt(sc, whichfork, 0);
281 goto out;
282 }
283 break;
284 case XFS_DINODE_FMT_BTREE:
285 if (whichfork == XFS_COW_FORK) {
286 xfs_scrub_fblock_set_corrupt(sc, whichfork, 0);
287 goto out;
288 }
289
290 error = xfs_scrub_bmap_btree(sc, whichfork, &info);
291 if (error)
292 goto out;
293 break;
294 default:
295 xfs_scrub_fblock_set_corrupt(sc, whichfork, 0);
296 goto out;
297 }
298
299 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
300 goto out;
301
302 /* Now try to scrub the in-memory extent list. */
303 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
304 error = xfs_iread_extents(sc->tp, ip, whichfork);
305 if (!xfs_scrub_fblock_process_error(sc, whichfork, 0, &error))
306 goto out;
307 }
308
309 /* Find the offset of the last extent in the mapping. */
310 error = xfs_bmap_last_offset(ip, &endoff, whichfork);
311 if (!xfs_scrub_fblock_process_error(sc, whichfork, 0, &error))
312 goto out;
313
314 /* Scrub extent records. */
315 info.lastoff = 0;
316 ifp = XFS_IFORK_PTR(ip, whichfork);
317 for (found = xfs_iext_lookup_extent(ip, ifp, 0, &icur, &irec);
318 found != 0;
319 found = xfs_iext_next_extent(ifp, &icur, &irec)) {
320 if (xfs_scrub_should_terminate(sc, &error))
321 break;
322 if (isnullstartblock(irec.br_startblock))
323 continue;
324 if (irec.br_startoff >= endoff) {
325 xfs_scrub_fblock_set_corrupt(sc, whichfork,
326 irec.br_startoff);
327 goto out;
328 }
329 error = xfs_scrub_bmap_extent(ip, NULL, &info, &irec);
330 if (error)
331 goto out;
332 }
333
334out:
335 return error;
336}
337
338/* Scrub an inode's data fork. */
339int
340xfs_scrub_bmap_data(
341 struct xfs_scrub_context *sc)
342{
343 return xfs_scrub_bmap(sc, XFS_DATA_FORK);
344}
345
346/* Scrub an inode's attr fork. */
347int
348xfs_scrub_bmap_attr(
349 struct xfs_scrub_context *sc)
350{
351 return xfs_scrub_bmap(sc, XFS_ATTR_FORK);
352}
353
354/* Scrub an inode's CoW fork. */
355int
356xfs_scrub_bmap_cow(
357 struct xfs_scrub_context *sc)
358{
359 if (!xfs_is_reflink_inode(sc->ip))
360 return -ENOENT;
361
362 return xfs_scrub_bmap(sc, XFS_COW_FORK);
363}
diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c
new file mode 100644
index 000000000000..df0766132ace
--- /dev/null
+++ b/fs/xfs/scrub/btree.c
@@ -0,0 +1,516 @@
1/*
2 * Copyright (C) 2017 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#include "xfs.h"
21#include "xfs_fs.h"
22#include "xfs_shared.h"
23#include "xfs_format.h"
24#include "xfs_trans_resv.h"
25#include "xfs_mount.h"
26#include "xfs_defer.h"
27#include "xfs_btree.h"
28#include "xfs_bit.h"
29#include "xfs_log_format.h"
30#include "xfs_trans.h"
31#include "xfs_sb.h"
32#include "xfs_inode.h"
33#include "xfs_alloc.h"
34#include "scrub/scrub.h"
35#include "scrub/common.h"
36#include "scrub/btree.h"
37#include "scrub/trace.h"
38
39/* btree scrubbing */
40
41/*
42 * Check for btree operation errors. See the section about handling
43 * operational errors in common.c.
44 */
45bool
46xfs_scrub_btree_process_error(
47 struct xfs_scrub_context *sc,
48 struct xfs_btree_cur *cur,
49 int level,
50 int *error)
51{
52 if (*error == 0)
53 return true;
54
55 switch (*error) {
56 case -EDEADLOCK:
57 /* Used to restart an op with deadlock avoidance. */
58 trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error);
59 break;
60 case -EFSBADCRC:
61 case -EFSCORRUPTED:
62 /* Note the badness but don't abort. */
63 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
64 *error = 0;
65 /* fall through */
66 default:
67 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
68 trace_xfs_scrub_ifork_btree_op_error(sc, cur, level,
69 *error, __return_address);
70 else
71 trace_xfs_scrub_btree_op_error(sc, cur, level,
72 *error, __return_address);
73 break;
74 }
75 return false;
76}
77
78/* Record btree block corruption. */
79void
80xfs_scrub_btree_set_corrupt(
81 struct xfs_scrub_context *sc,
82 struct xfs_btree_cur *cur,
83 int level)
84{
85 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
86
87 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
88 trace_xfs_scrub_ifork_btree_error(sc, cur, level,
89 __return_address);
90 else
91 trace_xfs_scrub_btree_error(sc, cur, level,
92 __return_address);
93}
94
95/*
96 * Make sure this record is in order and doesn't stray outside of the parent
97 * keys.
98 */
99STATIC void
100xfs_scrub_btree_rec(
101 struct xfs_scrub_btree *bs)
102{
103 struct xfs_btree_cur *cur = bs->cur;
104 union xfs_btree_rec *rec;
105 union xfs_btree_key key;
106 union xfs_btree_key hkey;
107 union xfs_btree_key *keyp;
108 struct xfs_btree_block *block;
109 struct xfs_btree_block *keyblock;
110 struct xfs_buf *bp;
111
112 block = xfs_btree_get_block(cur, 0, &bp);
113 rec = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block);
114
115 trace_xfs_scrub_btree_rec(bs->sc, cur, 0);
116
117 /* If this isn't the first record, are they in order? */
118 if (!bs->firstrec && !cur->bc_ops->recs_inorder(cur, &bs->lastrec, rec))
119 xfs_scrub_btree_set_corrupt(bs->sc, cur, 0);
120 bs->firstrec = false;
121 memcpy(&bs->lastrec, rec, cur->bc_ops->rec_len);
122
123 if (cur->bc_nlevels == 1)
124 return;
125
126 /* Is this at least as large as the parent low key? */
127 cur->bc_ops->init_key_from_rec(&key, rec);
128 keyblock = xfs_btree_get_block(cur, 1, &bp);
129 keyp = xfs_btree_key_addr(cur, cur->bc_ptrs[1], keyblock);
130 if (cur->bc_ops->diff_two_keys(cur, &key, keyp) < 0)
131 xfs_scrub_btree_set_corrupt(bs->sc, cur, 1);
132
133 if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
134 return;
135
136 /* Is this no larger than the parent high key? */
137 cur->bc_ops->init_high_key_from_rec(&hkey, rec);
138 keyp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[1], keyblock);
139 if (cur->bc_ops->diff_two_keys(cur, keyp, &hkey) < 0)
140 xfs_scrub_btree_set_corrupt(bs->sc, cur, 1);
141}
142
143/*
144 * Make sure this key is in order and doesn't stray outside of the parent
145 * keys.
146 */
147STATIC void
148xfs_scrub_btree_key(
149 struct xfs_scrub_btree *bs,
150 int level)
151{
152 struct xfs_btree_cur *cur = bs->cur;
153 union xfs_btree_key *key;
154 union xfs_btree_key *keyp;
155 struct xfs_btree_block *block;
156 struct xfs_btree_block *keyblock;
157 struct xfs_buf *bp;
158
159 block = xfs_btree_get_block(cur, level, &bp);
160 key = xfs_btree_key_addr(cur, cur->bc_ptrs[level], block);
161
162 trace_xfs_scrub_btree_key(bs->sc, cur, level);
163
164 /* If this isn't the first key, are they in order? */
165 if (!bs->firstkey[level] &&
166 !cur->bc_ops->keys_inorder(cur, &bs->lastkey[level], key))
167 xfs_scrub_btree_set_corrupt(bs->sc, cur, level);
168 bs->firstkey[level] = false;
169 memcpy(&bs->lastkey[level], key, cur->bc_ops->key_len);
170
171 if (level + 1 >= cur->bc_nlevels)
172 return;
173
174 /* Is this at least as large as the parent low key? */
175 keyblock = xfs_btree_get_block(cur, level + 1, &bp);
176 keyp = xfs_btree_key_addr(cur, cur->bc_ptrs[level + 1], keyblock);
177 if (cur->bc_ops->diff_two_keys(cur, key, keyp) < 0)
178 xfs_scrub_btree_set_corrupt(bs->sc, cur, level);
179
180 if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
181 return;
182
183 /* Is this no larger than the parent high key? */
184 key = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level], block);
185 keyp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level + 1], keyblock);
186 if (cur->bc_ops->diff_two_keys(cur, keyp, key) < 0)
187 xfs_scrub_btree_set_corrupt(bs->sc, cur, level);
188}
189
190/*
191 * Check a btree pointer. Returns true if it's ok to use this pointer.
192 * Callers do not need to set the corrupt flag.
193 */
194static bool
195xfs_scrub_btree_ptr_ok(
196 struct xfs_scrub_btree *bs,
197 int level,
198 union xfs_btree_ptr *ptr)
199{
200 bool res;
201
202 /* A btree rooted in an inode has no block pointer to the root. */
203 if ((bs->cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
204 level == bs->cur->bc_nlevels)
205 return true;
206
207 /* Otherwise, check the pointers. */
208 if (bs->cur->bc_flags & XFS_BTREE_LONG_PTRS)
209 res = xfs_btree_check_lptr(bs->cur, be64_to_cpu(ptr->l), level);
210 else
211 res = xfs_btree_check_sptr(bs->cur, be32_to_cpu(ptr->s), level);
212 if (!res)
213 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, level);
214
215 return res;
216}
217
218/* Check that a btree block's sibling matches what we expect it. */
219STATIC int
220xfs_scrub_btree_block_check_sibling(
221 struct xfs_scrub_btree *bs,
222 int level,
223 int direction,
224 union xfs_btree_ptr *sibling)
225{
226 struct xfs_btree_cur *cur = bs->cur;
227 struct xfs_btree_block *pblock;
228 struct xfs_buf *pbp;
229 struct xfs_btree_cur *ncur = NULL;
230 union xfs_btree_ptr *pp;
231 int success;
232 int error;
233
234 error = xfs_btree_dup_cursor(cur, &ncur);
235 if (!xfs_scrub_btree_process_error(bs->sc, cur, level + 1, &error) ||
236 !ncur)
237 return error;
238
239 /*
240 * If the pointer is null, we shouldn't be able to move the upper
241 * level pointer anywhere.
242 */
243 if (xfs_btree_ptr_is_null(cur, sibling)) {
244 if (direction > 0)
245 error = xfs_btree_increment(ncur, level + 1, &success);
246 else
247 error = xfs_btree_decrement(ncur, level + 1, &success);
248 if (error == 0 && success)
249 xfs_scrub_btree_set_corrupt(bs->sc, cur, level);
250 error = 0;
251 goto out;
252 }
253
254 /* Increment upper level pointer. */
255 if (direction > 0)
256 error = xfs_btree_increment(ncur, level + 1, &success);
257 else
258 error = xfs_btree_decrement(ncur, level + 1, &success);
259 if (!xfs_scrub_btree_process_error(bs->sc, cur, level + 1, &error))
260 goto out;
261 if (!success) {
262 xfs_scrub_btree_set_corrupt(bs->sc, cur, level + 1);
263 goto out;
264 }
265
266 /* Compare upper level pointer to sibling pointer. */
267 pblock = xfs_btree_get_block(ncur, level + 1, &pbp);
268 pp = xfs_btree_ptr_addr(ncur, ncur->bc_ptrs[level + 1], pblock);
269 if (!xfs_scrub_btree_ptr_ok(bs, level + 1, pp))
270 goto out;
271
272 if (xfs_btree_diff_two_ptrs(cur, pp, sibling))
273 xfs_scrub_btree_set_corrupt(bs->sc, cur, level);
274out:
275 xfs_btree_del_cursor(ncur, XFS_BTREE_ERROR);
276 return error;
277}
278
279/* Check the siblings of a btree block. */
280STATIC int
281xfs_scrub_btree_block_check_siblings(
282 struct xfs_scrub_btree *bs,
283 struct xfs_btree_block *block)
284{
285 struct xfs_btree_cur *cur = bs->cur;
286 union xfs_btree_ptr leftsib;
287 union xfs_btree_ptr rightsib;
288 int level;
289 int error = 0;
290
291 xfs_btree_get_sibling(cur, block, &leftsib, XFS_BB_LEFTSIB);
292 xfs_btree_get_sibling(cur, block, &rightsib, XFS_BB_RIGHTSIB);
293 level = xfs_btree_get_level(block);
294
295 /* Root block should never have siblings. */
296 if (level == cur->bc_nlevels - 1) {
297 if (!xfs_btree_ptr_is_null(cur, &leftsib) ||
298 !xfs_btree_ptr_is_null(cur, &rightsib))
299 xfs_scrub_btree_set_corrupt(bs->sc, cur, level);
300 goto out;
301 }
302
303 /*
304 * Does the left & right sibling pointers match the adjacent
305 * parent level pointers?
306 * (These function absorbs error codes for us.)
307 */
308 error = xfs_scrub_btree_block_check_sibling(bs, level, -1, &leftsib);
309 if (error)
310 return error;
311 error = xfs_scrub_btree_block_check_sibling(bs, level, 1, &rightsib);
312 if (error)
313 return error;
314out:
315 return error;
316}
317
318/*
319 * Grab and scrub a btree block given a btree pointer. Returns block
320 * and buffer pointers (if applicable) if they're ok to use.
321 */
322STATIC int
323xfs_scrub_btree_get_block(
324 struct xfs_scrub_btree *bs,
325 int level,
326 union xfs_btree_ptr *pp,
327 struct xfs_btree_block **pblock,
328 struct xfs_buf **pbp)
329{
330 void *failed_at;
331 int error;
332
333 *pblock = NULL;
334 *pbp = NULL;
335
336 error = xfs_btree_lookup_get_block(bs->cur, level, pp, pblock);
337 if (!xfs_scrub_btree_process_error(bs->sc, bs->cur, level, &error) ||
338 !*pblock)
339 return error;
340
341 xfs_btree_get_block(bs->cur, level, pbp);
342 if (bs->cur->bc_flags & XFS_BTREE_LONG_PTRS)
343 failed_at = __xfs_btree_check_lblock(bs->cur, *pblock,
344 level, *pbp);
345 else
346 failed_at = __xfs_btree_check_sblock(bs->cur, *pblock,
347 level, *pbp);
348 if (failed_at) {
349 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, level);
350 return 0;
351 }
352
353 /*
354 * Check the block's siblings; this function absorbs error codes
355 * for us.
356 */
357 return xfs_scrub_btree_block_check_siblings(bs, *pblock);
358}
359
360/*
361 * Check that the low and high keys of this block match the keys stored
362 * in the parent block.
363 */
364STATIC void
365xfs_scrub_btree_block_keys(
366 struct xfs_scrub_btree *bs,
367 int level,
368 struct xfs_btree_block *block)
369{
370 union xfs_btree_key block_keys;
371 struct xfs_btree_cur *cur = bs->cur;
372 union xfs_btree_key *high_bk;
373 union xfs_btree_key *parent_keys;
374 union xfs_btree_key *high_pk;
375 struct xfs_btree_block *parent_block;
376 struct xfs_buf *bp;
377
378 if (level >= cur->bc_nlevels - 1)
379 return;
380
381 /* Calculate the keys for this block. */
382 xfs_btree_get_keys(cur, block, &block_keys);
383
384 /* Obtain the parent's copy of the keys for this block. */
385 parent_block = xfs_btree_get_block(cur, level + 1, &bp);
386 parent_keys = xfs_btree_key_addr(cur, cur->bc_ptrs[level + 1],
387 parent_block);
388
389 if (cur->bc_ops->diff_two_keys(cur, &block_keys, parent_keys) != 0)
390 xfs_scrub_btree_set_corrupt(bs->sc, cur, 1);
391
392 if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
393 return;
394
395 /* Get high keys */
396 high_bk = xfs_btree_high_key_from_key(cur, &block_keys);
397 high_pk = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level + 1],
398 parent_block);
399
400 if (cur->bc_ops->diff_two_keys(cur, high_bk, high_pk) != 0)
401 xfs_scrub_btree_set_corrupt(bs->sc, cur, 1);
402}
403
404/*
405 * Visit all nodes and leaves of a btree. Check that all pointers and
406 * records are in order, that the keys reflect the records, and use a callback
407 * so that the caller can verify individual records.
408 */
409int
410xfs_scrub_btree(
411 struct xfs_scrub_context *sc,
412 struct xfs_btree_cur *cur,
413 xfs_scrub_btree_rec_fn scrub_fn,
414 struct xfs_owner_info *oinfo,
415 void *private)
416{
417 struct xfs_scrub_btree bs = { NULL };
418 union xfs_btree_ptr ptr;
419 union xfs_btree_ptr *pp;
420 union xfs_btree_rec *recp;
421 struct xfs_btree_block *block;
422 int level;
423 struct xfs_buf *bp;
424 int i;
425 int error = 0;
426
427 /* Initialize scrub state */
428 bs.cur = cur;
429 bs.scrub_rec = scrub_fn;
430 bs.oinfo = oinfo;
431 bs.firstrec = true;
432 bs.private = private;
433 bs.sc = sc;
434 for (i = 0; i < XFS_BTREE_MAXLEVELS; i++)
435 bs.firstkey[i] = true;
436 INIT_LIST_HEAD(&bs.to_check);
437
438 /* Don't try to check a tree with a height we can't handle. */
439 if (cur->bc_nlevels > XFS_BTREE_MAXLEVELS) {
440 xfs_scrub_btree_set_corrupt(sc, cur, 0);
441 goto out;
442 }
443
444 /*
445 * Load the root of the btree. The helper function absorbs
446 * error codes for us.
447 */
448 level = cur->bc_nlevels - 1;
449 cur->bc_ops->init_ptr_from_cur(cur, &ptr);
450 if (!xfs_scrub_btree_ptr_ok(&bs, cur->bc_nlevels, &ptr))
451 goto out;
452 error = xfs_scrub_btree_get_block(&bs, level, &ptr, &block, &bp);
453 if (error || !block)
454 goto out;
455
456 cur->bc_ptrs[level] = 1;
457
458 while (level < cur->bc_nlevels) {
459 block = xfs_btree_get_block(cur, level, &bp);
460
461 if (level == 0) {
462 /* End of leaf, pop back towards the root. */
463 if (cur->bc_ptrs[level] >
464 be16_to_cpu(block->bb_numrecs)) {
465 xfs_scrub_btree_block_keys(&bs, level, block);
466 if (level < cur->bc_nlevels - 1)
467 cur->bc_ptrs[level + 1]++;
468 level++;
469 continue;
470 }
471
472 /* Records in order for scrub? */
473 xfs_scrub_btree_rec(&bs);
474
475 /* Call out to the record checker. */
476 recp = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block);
477 error = bs.scrub_rec(&bs, recp);
478 if (error)
479 break;
480 if (xfs_scrub_should_terminate(sc, &error) ||
481 (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
482 break;
483
484 cur->bc_ptrs[level]++;
485 continue;
486 }
487
488 /* End of node, pop back towards the root. */
489 if (cur->bc_ptrs[level] > be16_to_cpu(block->bb_numrecs)) {
490 xfs_scrub_btree_block_keys(&bs, level, block);
491 if (level < cur->bc_nlevels - 1)
492 cur->bc_ptrs[level + 1]++;
493 level++;
494 continue;
495 }
496
497 /* Keys in order for scrub? */
498 xfs_scrub_btree_key(&bs, level);
499
500 /* Drill another level deeper. */
501 pp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[level], block);
502 if (!xfs_scrub_btree_ptr_ok(&bs, level, pp)) {
503 cur->bc_ptrs[level]++;
504 continue;
505 }
506 level--;
507 error = xfs_scrub_btree_get_block(&bs, level, pp, &block, &bp);
508 if (error || !block)
509 goto out;
510
511 cur->bc_ptrs[level] = 1;
512 }
513
514out:
515 return error;
516}
diff --git a/fs/xfs/scrub/btree.h b/fs/xfs/scrub/btree.h
new file mode 100644
index 000000000000..4de825a626d1
--- /dev/null
+++ b/fs/xfs/scrub/btree.h
@@ -0,0 +1,57 @@
1/*
2 * Copyright (C) 2017 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#ifndef __XFS_SCRUB_BTREE_H__
21#define __XFS_SCRUB_BTREE_H__
22
23/* btree scrub */
24
25/* Check for btree operation errors. */
26bool xfs_scrub_btree_process_error(struct xfs_scrub_context *sc,
27 struct xfs_btree_cur *cur, int level, int *error);
28
29/* Check for btree corruption. */
30void xfs_scrub_btree_set_corrupt(struct xfs_scrub_context *sc,
31 struct xfs_btree_cur *cur, int level);
32
33struct xfs_scrub_btree;
34typedef int (*xfs_scrub_btree_rec_fn)(
35 struct xfs_scrub_btree *bs,
36 union xfs_btree_rec *rec);
37
38struct xfs_scrub_btree {
39 /* caller-provided scrub state */
40 struct xfs_scrub_context *sc;
41 struct xfs_btree_cur *cur;
42 xfs_scrub_btree_rec_fn scrub_rec;
43 struct xfs_owner_info *oinfo;
44 void *private;
45
46 /* internal scrub state */
47 union xfs_btree_rec lastrec;
48 bool firstrec;
49 union xfs_btree_key lastkey[XFS_BTREE_MAXLEVELS];
50 bool firstkey[XFS_BTREE_MAXLEVELS];
51 struct list_head to_check;
52};
53int xfs_scrub_btree(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur,
54 xfs_scrub_btree_rec_fn scrub_fn,
55 struct xfs_owner_info *oinfo, void *private);
56
57#endif /* __XFS_SCRUB_BTREE_H__ */
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
new file mode 100644
index 000000000000..ac95fe911d96
--- /dev/null
+++ b/fs/xfs/scrub/common.c
@@ -0,0 +1,574 @@
1/*
2 * Copyright (C) 2017 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#include "xfs.h"
21#include "xfs_fs.h"
22#include "xfs_shared.h"
23#include "xfs_format.h"
24#include "xfs_trans_resv.h"
25#include "xfs_mount.h"
26#include "xfs_defer.h"
27#include "xfs_btree.h"
28#include "xfs_bit.h"
29#include "xfs_log_format.h"
30#include "xfs_trans.h"
31#include "xfs_sb.h"
32#include "xfs_inode.h"
33#include "xfs_icache.h"
34#include "xfs_itable.h"
35#include "xfs_alloc.h"
36#include "xfs_alloc_btree.h"
37#include "xfs_bmap.h"
38#include "xfs_bmap_btree.h"
39#include "xfs_ialloc.h"
40#include "xfs_ialloc_btree.h"
41#include "xfs_refcount.h"
42#include "xfs_refcount_btree.h"
43#include "xfs_rmap.h"
44#include "xfs_rmap_btree.h"
45#include "xfs_log.h"
46#include "xfs_trans_priv.h"
47#include "scrub/xfs_scrub.h"
48#include "scrub/scrub.h"
49#include "scrub/common.h"
50#include "scrub/trace.h"
51#include "scrub/btree.h"
52
53/* Common code for the metadata scrubbers. */
54
55/*
56 * Handling operational errors.
57 *
58 * The *_process_error() family of functions are used to process error return
59 * codes from functions called as part of a scrub operation.
60 *
61 * If there's no error, we return true to tell the caller that it's ok
62 * to move on to the next check in its list.
63 *
64 * For non-verifier errors (e.g. ENOMEM) we return false to tell the
65 * caller that something bad happened, and we preserve *error so that
66 * the caller can return the *error up the stack to userspace.
67 *
68 * Verifier errors (EFSBADCRC/EFSCORRUPTED) are recorded by setting
69 * OFLAG_CORRUPT in sm_flags and the *error is cleared. In other words,
70 * we track verifier errors (and failed scrub checks) via OFLAG_CORRUPT,
71 * not via return codes. We return false to tell the caller that
72 * something bad happened. Since the error has been cleared, the caller
73 * will (presumably) return that zero and scrubbing will move on to
74 * whatever's next.
75 *
76 * ftrace can be used to record the precise metadata location and the
77 * approximate code location of the failed operation.
78 */
79
80/* Check for operational errors. */
81bool
82xfs_scrub_process_error(
83 struct xfs_scrub_context *sc,
84 xfs_agnumber_t agno,
85 xfs_agblock_t bno,
86 int *error)
87{
88 switch (*error) {
89 case 0:
90 return true;
91 case -EDEADLOCK:
92 /* Used to restart an op with deadlock avoidance. */
93 trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error);
94 break;
95 case -EFSBADCRC:
96 case -EFSCORRUPTED:
97 /* Note the badness but don't abort. */
98 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
99 *error = 0;
100 /* fall through */
101 default:
102 trace_xfs_scrub_op_error(sc, agno, bno, *error,
103 __return_address);
104 break;
105 }
106 return false;
107}
108
109/* Check for operational errors for a file offset. */
110bool
111xfs_scrub_fblock_process_error(
112 struct xfs_scrub_context *sc,
113 int whichfork,
114 xfs_fileoff_t offset,
115 int *error)
116{
117 switch (*error) {
118 case 0:
119 return true;
120 case -EDEADLOCK:
121 /* Used to restart an op with deadlock avoidance. */
122 trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error);
123 break;
124 case -EFSBADCRC:
125 case -EFSCORRUPTED:
126 /* Note the badness but don't abort. */
127 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
128 *error = 0;
129 /* fall through */
130 default:
131 trace_xfs_scrub_file_op_error(sc, whichfork, offset, *error,
132 __return_address);
133 break;
134 }
135 return false;
136}
137
138/*
139 * Handling scrub corruption/optimization/warning checks.
140 *
141 * The *_set_{corrupt,preen,warning}() family of functions are used to
142 * record the presence of metadata that is incorrect (corrupt), could be
143 * optimized somehow (preen), or should be flagged for administrative
144 * review but is not incorrect (warn).
145 *
146 * ftrace can be used to record the precise metadata location and
147 * approximate code location of the failed check.
148 */
149
150/* Record a block which could be optimized. */
151void
152xfs_scrub_block_set_preen(
153 struct xfs_scrub_context *sc,
154 struct xfs_buf *bp)
155{
156 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
157 trace_xfs_scrub_block_preen(sc, bp->b_bn, __return_address);
158}
159
160/*
161 * Record an inode which could be optimized. The trace data will
162 * include the block given by bp if bp is given; otherwise it will use
163 * the block location of the inode record itself.
164 */
165void
166xfs_scrub_ino_set_preen(
167 struct xfs_scrub_context *sc,
168 xfs_ino_t ino,
169 struct xfs_buf *bp)
170{
171 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
172 trace_xfs_scrub_ino_preen(sc, ino, bp ? bp->b_bn : 0,
173 __return_address);
174}
175
176/* Record a corrupt block. */
177void
178xfs_scrub_block_set_corrupt(
179 struct xfs_scrub_context *sc,
180 struct xfs_buf *bp)
181{
182 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
183 trace_xfs_scrub_block_error(sc, bp->b_bn, __return_address);
184}
185
186/*
187 * Record a corrupt inode. The trace data will include the block given
188 * by bp if bp is given; otherwise it will use the block location of the
189 * inode record itself.
190 */
191void
192xfs_scrub_ino_set_corrupt(
193 struct xfs_scrub_context *sc,
194 xfs_ino_t ino,
195 struct xfs_buf *bp)
196{
197 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
198 trace_xfs_scrub_ino_error(sc, ino, bp ? bp->b_bn : 0, __return_address);
199}
200
201/* Record corruption in a block indexed by a file fork. */
202void
203xfs_scrub_fblock_set_corrupt(
204 struct xfs_scrub_context *sc,
205 int whichfork,
206 xfs_fileoff_t offset)
207{
208 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
209 trace_xfs_scrub_fblock_error(sc, whichfork, offset, __return_address);
210}
211
212/*
213 * Warn about inodes that need administrative review but is not
214 * incorrect.
215 */
216void
217xfs_scrub_ino_set_warning(
218 struct xfs_scrub_context *sc,
219 xfs_ino_t ino,
220 struct xfs_buf *bp)
221{
222 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING;
223 trace_xfs_scrub_ino_warning(sc, ino, bp ? bp->b_bn : 0,
224 __return_address);
225}
226
227/* Warn about a block indexed by a file fork that needs review. */
228void
229xfs_scrub_fblock_set_warning(
230 struct xfs_scrub_context *sc,
231 int whichfork,
232 xfs_fileoff_t offset)
233{
234 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING;
235 trace_xfs_scrub_fblock_warning(sc, whichfork, offset, __return_address);
236}
237
238/* Signal an incomplete scrub. */
239void
240xfs_scrub_set_incomplete(
241 struct xfs_scrub_context *sc)
242{
243 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_INCOMPLETE;
244 trace_xfs_scrub_incomplete(sc, __return_address);
245}
246
247/*
248 * AG scrubbing
249 *
250 * These helpers facilitate locking an allocation group's header
251 * buffers, setting up cursors for all btrees that are present, and
252 * cleaning everything up once we're through.
253 */
254
255/* Decide if we want to return an AG header read failure. */
256static inline bool
257want_ag_read_header_failure(
258 struct xfs_scrub_context *sc,
259 unsigned int type)
260{
261 /* Return all AG header read failures when scanning btrees. */
262 if (sc->sm->sm_type != XFS_SCRUB_TYPE_AGF &&
263 sc->sm->sm_type != XFS_SCRUB_TYPE_AGFL &&
264 sc->sm->sm_type != XFS_SCRUB_TYPE_AGI)
265 return true;
266 /*
267 * If we're scanning a given type of AG header, we only want to
268 * see read failures from that specific header. We'd like the
269 * other headers to cross-check them, but this isn't required.
270 */
271 if (sc->sm->sm_type == type)
272 return true;
273 return false;
274}
275
276/*
277 * Grab all the headers for an AG.
278 *
279 * The headers should be released by xfs_scrub_ag_free, but as a fail
280 * safe we attach all the buffers we grab to the scrub transaction so
281 * they'll all be freed when we cancel it.
282 */
283int
284xfs_scrub_ag_read_headers(
285 struct xfs_scrub_context *sc,
286 xfs_agnumber_t agno,
287 struct xfs_buf **agi,
288 struct xfs_buf **agf,
289 struct xfs_buf **agfl)
290{
291 struct xfs_mount *mp = sc->mp;
292 int error;
293
294 error = xfs_ialloc_read_agi(mp, sc->tp, agno, agi);
295 if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGI))
296 goto out;
297
298 error = xfs_alloc_read_agf(mp, sc->tp, agno, 0, agf);
299 if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGF))
300 goto out;
301
302 error = xfs_alloc_read_agfl(mp, sc->tp, agno, agfl);
303 if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGFL))
304 goto out;
305
306out:
307 return error;
308}
309
310/* Release all the AG btree cursors. */
311void
312xfs_scrub_ag_btcur_free(
313 struct xfs_scrub_ag *sa)
314{
315 if (sa->refc_cur)
316 xfs_btree_del_cursor(sa->refc_cur, XFS_BTREE_ERROR);
317 if (sa->rmap_cur)
318 xfs_btree_del_cursor(sa->rmap_cur, XFS_BTREE_ERROR);
319 if (sa->fino_cur)
320 xfs_btree_del_cursor(sa->fino_cur, XFS_BTREE_ERROR);
321 if (sa->ino_cur)
322 xfs_btree_del_cursor(sa->ino_cur, XFS_BTREE_ERROR);
323 if (sa->cnt_cur)
324 xfs_btree_del_cursor(sa->cnt_cur, XFS_BTREE_ERROR);
325 if (sa->bno_cur)
326 xfs_btree_del_cursor(sa->bno_cur, XFS_BTREE_ERROR);
327
328 sa->refc_cur = NULL;
329 sa->rmap_cur = NULL;
330 sa->fino_cur = NULL;
331 sa->ino_cur = NULL;
332 sa->bno_cur = NULL;
333 sa->cnt_cur = NULL;
334}
335
336/* Initialize all the btree cursors for an AG. */
337int
338xfs_scrub_ag_btcur_init(
339 struct xfs_scrub_context *sc,
340 struct xfs_scrub_ag *sa)
341{
342 struct xfs_mount *mp = sc->mp;
343 xfs_agnumber_t agno = sa->agno;
344
345 if (sa->agf_bp) {
346 /* Set up a bnobt cursor for cross-referencing. */
347 sa->bno_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp,
348 agno, XFS_BTNUM_BNO);
349 if (!sa->bno_cur)
350 goto err;
351
352 /* Set up a cntbt cursor for cross-referencing. */
353 sa->cnt_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp,
354 agno, XFS_BTNUM_CNT);
355 if (!sa->cnt_cur)
356 goto err;
357 }
358
359 /* Set up a inobt cursor for cross-referencing. */
360 if (sa->agi_bp) {
361 sa->ino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp,
362 agno, XFS_BTNUM_INO);
363 if (!sa->ino_cur)
364 goto err;
365 }
366
367 /* Set up a finobt cursor for cross-referencing. */
368 if (sa->agi_bp && xfs_sb_version_hasfinobt(&mp->m_sb)) {
369 sa->fino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp,
370 agno, XFS_BTNUM_FINO);
371 if (!sa->fino_cur)
372 goto err;
373 }
374
375 /* Set up a rmapbt cursor for cross-referencing. */
376 if (sa->agf_bp && xfs_sb_version_hasrmapbt(&mp->m_sb)) {
377 sa->rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp, sa->agf_bp,
378 agno);
379 if (!sa->rmap_cur)
380 goto err;
381 }
382
383 /* Set up a refcountbt cursor for cross-referencing. */
384 if (sa->agf_bp && xfs_sb_version_hasreflink(&mp->m_sb)) {
385 sa->refc_cur = xfs_refcountbt_init_cursor(mp, sc->tp,
386 sa->agf_bp, agno, NULL);
387 if (!sa->refc_cur)
388 goto err;
389 }
390
391 return 0;
392err:
393 return -ENOMEM;
394}
395
396/* Release the AG header context and btree cursors. */
397void
398xfs_scrub_ag_free(
399 struct xfs_scrub_context *sc,
400 struct xfs_scrub_ag *sa)
401{
402 xfs_scrub_ag_btcur_free(sa);
403 if (sa->agfl_bp) {
404 xfs_trans_brelse(sc->tp, sa->agfl_bp);
405 sa->agfl_bp = NULL;
406 }
407 if (sa->agf_bp) {
408 xfs_trans_brelse(sc->tp, sa->agf_bp);
409 sa->agf_bp = NULL;
410 }
411 if (sa->agi_bp) {
412 xfs_trans_brelse(sc->tp, sa->agi_bp);
413 sa->agi_bp = NULL;
414 }
415 sa->agno = NULLAGNUMBER;
416}
417
418/*
419 * For scrub, grab the AGI and the AGF headers, in that order. Locking
420 * order requires us to get the AGI before the AGF. We use the
421 * transaction to avoid deadlocking on crosslinked metadata buffers;
422 * either the caller passes one in (bmap scrub) or we have to create a
423 * transaction ourselves.
424 */
425int
426xfs_scrub_ag_init(
427 struct xfs_scrub_context *sc,
428 xfs_agnumber_t agno,
429 struct xfs_scrub_ag *sa)
430{
431 int error;
432
433 sa->agno = agno;
434 error = xfs_scrub_ag_read_headers(sc, agno, &sa->agi_bp,
435 &sa->agf_bp, &sa->agfl_bp);
436 if (error)
437 return error;
438
439 return xfs_scrub_ag_btcur_init(sc, sa);
440}
441
442/* Per-scrubber setup functions */
443
444/* Set us up with a transaction and an empty context. */
445int
446xfs_scrub_setup_fs(
447 struct xfs_scrub_context *sc,
448 struct xfs_inode *ip)
449{
450 return xfs_scrub_trans_alloc(sc->sm, sc->mp, &sc->tp);
451}
452
453/* Set us up with AG headers and btree cursors. */
454int
455xfs_scrub_setup_ag_btree(
456 struct xfs_scrub_context *sc,
457 struct xfs_inode *ip,
458 bool force_log)
459{
460 struct xfs_mount *mp = sc->mp;
461 int error;
462
463 /*
464 * If the caller asks us to checkpont the log, do so. This
465 * expensive operation should be performed infrequently and only
466 * as a last resort. Any caller that sets force_log should
467 * document why they need to do so.
468 */
469 if (force_log) {
470 error = xfs_scrub_checkpoint_log(mp);
471 if (error)
472 return error;
473 }
474
475 error = xfs_scrub_setup_ag_header(sc, ip);
476 if (error)
477 return error;
478
479 return xfs_scrub_ag_init(sc, sc->sm->sm_agno, &sc->sa);
480}
481
482/* Push everything out of the log onto disk. */
483int
484xfs_scrub_checkpoint_log(
485 struct xfs_mount *mp)
486{
487 int error;
488
489 error = _xfs_log_force(mp, XFS_LOG_SYNC, NULL);
490 if (error)
491 return error;
492 xfs_ail_push_all_sync(mp->m_ail);
493 return 0;
494}
495
496/*
497 * Given an inode and the scrub control structure, grab either the
498 * inode referenced in the control structure or the inode passed in.
499 * The inode is not locked.
500 */
501int
502xfs_scrub_get_inode(
503 struct xfs_scrub_context *sc,
504 struct xfs_inode *ip_in)
505{
506 struct xfs_mount *mp = sc->mp;
507 struct xfs_inode *ip = NULL;
508 int error;
509
510 /*
511 * If userspace passed us an AG number or a generation number
512 * without an inode number, they haven't got a clue so bail out
513 * immediately.
514 */
515 if (sc->sm->sm_agno || (sc->sm->sm_gen && !sc->sm->sm_ino))
516 return -EINVAL;
517
518 /* We want to scan the inode we already had opened. */
519 if (sc->sm->sm_ino == 0 || sc->sm->sm_ino == ip_in->i_ino) {
520 sc->ip = ip_in;
521 return 0;
522 }
523
524 /* Look up the inode, see if the generation number matches. */
525 if (xfs_internal_inum(mp, sc->sm->sm_ino))
526 return -ENOENT;
527 error = xfs_iget(mp, NULL, sc->sm->sm_ino,
528 XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE, 0, &ip);
529 if (error == -ENOENT || error == -EINVAL) {
530 /* inode doesn't exist... */
531 return -ENOENT;
532 } else if (error) {
533 trace_xfs_scrub_op_error(sc,
534 XFS_INO_TO_AGNO(mp, sc->sm->sm_ino),
535 XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino),
536 error, __return_address);
537 return error;
538 }
539 if (VFS_I(ip)->i_generation != sc->sm->sm_gen) {
540 iput(VFS_I(ip));
541 return -ENOENT;
542 }
543
544 sc->ip = ip;
545 return 0;
546}
547
548/* Set us up to scrub a file's contents. */
549int
550xfs_scrub_setup_inode_contents(
551 struct xfs_scrub_context *sc,
552 struct xfs_inode *ip,
553 unsigned int resblks)
554{
555 struct xfs_mount *mp = sc->mp;
556 int error;
557
558 error = xfs_scrub_get_inode(sc, ip);
559 if (error)
560 return error;
561
562 /* Got the inode, lock it and we're ready to go. */
563 sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
564 xfs_ilock(sc->ip, sc->ilock_flags);
565 error = xfs_scrub_trans_alloc(sc->sm, mp, &sc->tp);
566 if (error)
567 goto out;
568 sc->ilock_flags |= XFS_ILOCK_EXCL;
569 xfs_ilock(sc->ip, XFS_ILOCK_EXCL);
570
571out:
572 /* scrub teardown will unlock and release the inode for us */
573 return error;
574}
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
new file mode 100644
index 000000000000..5c043855570e
--- /dev/null
+++ b/fs/xfs/scrub/common.h
@@ -0,0 +1,144 @@
1/*
2 * Copyright (C) 2017 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#ifndef __XFS_SCRUB_COMMON_H__
21#define __XFS_SCRUB_COMMON_H__
22
23/*
24 * We /could/ terminate a scrub/repair operation early. If we're not
25 * in a good place to continue (fatal signal, etc.) then bail out.
26 * Note that we're careful not to make any judgements about *error.
27 */
28static inline bool
29xfs_scrub_should_terminate(
30 struct xfs_scrub_context *sc,
31 int *error)
32{
33 if (fatal_signal_pending(current)) {
34 if (*error == 0)
35 *error = -EAGAIN;
36 return true;
37 }
38 return false;
39}
40
41/*
42 * Grab an empty transaction so that we can re-grab locked buffers if
43 * one of our btrees turns out to be cyclic.
44 */
45static inline int
46xfs_scrub_trans_alloc(
47 struct xfs_scrub_metadata *sm,
48 struct xfs_mount *mp,
49 struct xfs_trans **tpp)
50{
51 return xfs_trans_alloc_empty(mp, tpp);
52}
53
54bool xfs_scrub_process_error(struct xfs_scrub_context *sc, xfs_agnumber_t agno,
55 xfs_agblock_t bno, int *error);
56bool xfs_scrub_fblock_process_error(struct xfs_scrub_context *sc, int whichfork,
57 xfs_fileoff_t offset, int *error);
58
59void xfs_scrub_block_set_preen(struct xfs_scrub_context *sc,
60 struct xfs_buf *bp);
61void xfs_scrub_ino_set_preen(struct xfs_scrub_context *sc, xfs_ino_t ino,
62 struct xfs_buf *bp);
63
64void xfs_scrub_block_set_corrupt(struct xfs_scrub_context *sc,
65 struct xfs_buf *bp);
66void xfs_scrub_ino_set_corrupt(struct xfs_scrub_context *sc, xfs_ino_t ino,
67 struct xfs_buf *bp);
68void xfs_scrub_fblock_set_corrupt(struct xfs_scrub_context *sc, int whichfork,
69 xfs_fileoff_t offset);
70
71void xfs_scrub_ino_set_warning(struct xfs_scrub_context *sc, xfs_ino_t ino,
72 struct xfs_buf *bp);
73void xfs_scrub_fblock_set_warning(struct xfs_scrub_context *sc, int whichfork,
74 xfs_fileoff_t offset);
75
76void xfs_scrub_set_incomplete(struct xfs_scrub_context *sc);
77int xfs_scrub_checkpoint_log(struct xfs_mount *mp);
78
79/* Setup functions */
80int xfs_scrub_setup_fs(struct xfs_scrub_context *sc, struct xfs_inode *ip);
81int xfs_scrub_setup_ag_header(struct xfs_scrub_context *sc,
82 struct xfs_inode *ip);
83int xfs_scrub_setup_ag_allocbt(struct xfs_scrub_context *sc,
84 struct xfs_inode *ip);
85int xfs_scrub_setup_ag_iallocbt(struct xfs_scrub_context *sc,
86 struct xfs_inode *ip);
87int xfs_scrub_setup_ag_rmapbt(struct xfs_scrub_context *sc,
88 struct xfs_inode *ip);
89int xfs_scrub_setup_ag_refcountbt(struct xfs_scrub_context *sc,
90 struct xfs_inode *ip);
91int xfs_scrub_setup_inode(struct xfs_scrub_context *sc,
92 struct xfs_inode *ip);
93int xfs_scrub_setup_inode_bmap(struct xfs_scrub_context *sc,
94 struct xfs_inode *ip);
95int xfs_scrub_setup_inode_bmap_data(struct xfs_scrub_context *sc,
96 struct xfs_inode *ip);
97int xfs_scrub_setup_directory(struct xfs_scrub_context *sc,
98 struct xfs_inode *ip);
99int xfs_scrub_setup_xattr(struct xfs_scrub_context *sc,
100 struct xfs_inode *ip);
101int xfs_scrub_setup_symlink(struct xfs_scrub_context *sc,
102 struct xfs_inode *ip);
103int xfs_scrub_setup_parent(struct xfs_scrub_context *sc,
104 struct xfs_inode *ip);
105#ifdef CONFIG_XFS_RT
106int xfs_scrub_setup_rt(struct xfs_scrub_context *sc, struct xfs_inode *ip);
107#else
108static inline int
109xfs_scrub_setup_rt(struct xfs_scrub_context *sc, struct xfs_inode *ip)
110{
111 return -ENOENT;
112}
113#endif
114#ifdef CONFIG_XFS_QUOTA
115int xfs_scrub_setup_quota(struct xfs_scrub_context *sc, struct xfs_inode *ip);
116#else
117static inline int
118xfs_scrub_setup_quota(struct xfs_scrub_context *sc, struct xfs_inode *ip)
119{
120 return -ENOENT;
121}
122#endif
123
124void xfs_scrub_ag_free(struct xfs_scrub_context *sc, struct xfs_scrub_ag *sa);
125int xfs_scrub_ag_init(struct xfs_scrub_context *sc, xfs_agnumber_t agno,
126 struct xfs_scrub_ag *sa);
127int xfs_scrub_ag_read_headers(struct xfs_scrub_context *sc, xfs_agnumber_t agno,
128 struct xfs_buf **agi, struct xfs_buf **agf,
129 struct xfs_buf **agfl);
130void xfs_scrub_ag_btcur_free(struct xfs_scrub_ag *sa);
131int xfs_scrub_ag_btcur_init(struct xfs_scrub_context *sc,
132 struct xfs_scrub_ag *sa);
133int xfs_scrub_walk_agfl(struct xfs_scrub_context *sc,
134 int (*fn)(struct xfs_scrub_context *, xfs_agblock_t bno,
135 void *),
136 void *priv);
137
138int xfs_scrub_setup_ag_btree(struct xfs_scrub_context *sc,
139 struct xfs_inode *ip, bool force_log);
140int xfs_scrub_get_inode(struct xfs_scrub_context *sc, struct xfs_inode *ip_in);
141int xfs_scrub_setup_inode_contents(struct xfs_scrub_context *sc,
142 struct xfs_inode *ip, unsigned int resblks);
143
144#endif /* __XFS_SCRUB_COMMON_H__ */
diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c
new file mode 100644
index 000000000000..d94edd93cba8
--- /dev/null
+++ b/fs/xfs/scrub/dabtree.c
@@ -0,0 +1,591 @@
1/*
2 * Copyright (C) 2017 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#include "xfs.h"
21#include "xfs_fs.h"
22#include "xfs_shared.h"
23#include "xfs_format.h"
24#include "xfs_trans_resv.h"
25#include "xfs_mount.h"
26#include "xfs_defer.h"
27#include "xfs_btree.h"
28#include "xfs_bit.h"
29#include "xfs_log_format.h"
30#include "xfs_trans.h"
31#include "xfs_sb.h"
32#include "xfs_inode.h"
33#include "xfs_inode_fork.h"
34#include "xfs_da_format.h"
35#include "xfs_da_btree.h"
36#include "xfs_dir2.h"
37#include "xfs_dir2_priv.h"
38#include "xfs_attr_leaf.h"
39#include "scrub/xfs_scrub.h"
40#include "scrub/scrub.h"
41#include "scrub/common.h"
42#include "scrub/trace.h"
43#include "scrub/dabtree.h"
44
45/* Directory/Attribute Btree */
46
47/*
48 * Check for da btree operation errors. See the section about handling
49 * operational errors in common.c.
50 */
51bool
52xfs_scrub_da_process_error(
53 struct xfs_scrub_da_btree *ds,
54 int level,
55 int *error)
56{
57 struct xfs_scrub_context *sc = ds->sc;
58
59 if (*error == 0)
60 return true;
61
62 switch (*error) {
63 case -EDEADLOCK:
64 /* Used to restart an op with deadlock avoidance. */
65 trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error);
66 break;
67 case -EFSBADCRC:
68 case -EFSCORRUPTED:
69 /* Note the badness but don't abort. */
70 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
71 *error = 0;
72 /* fall through */
73 default:
74 trace_xfs_scrub_file_op_error(sc, ds->dargs.whichfork,
75 xfs_dir2_da_to_db(ds->dargs.geo,
76 ds->state->path.blk[level].blkno),
77 *error, __return_address);
78 break;
79 }
80 return false;
81}
82
83/*
84 * Check for da btree corruption. See the section about handling
85 * operational errors in common.c.
86 */
87void
88xfs_scrub_da_set_corrupt(
89 struct xfs_scrub_da_btree *ds,
90 int level)
91{
92 struct xfs_scrub_context *sc = ds->sc;
93
94 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
95
96 trace_xfs_scrub_fblock_error(sc, ds->dargs.whichfork,
97 xfs_dir2_da_to_db(ds->dargs.geo,
98 ds->state->path.blk[level].blkno),
99 __return_address);
100}
101
102/* Find an entry at a certain level in a da btree. */
103STATIC void *
104xfs_scrub_da_btree_entry(
105 struct xfs_scrub_da_btree *ds,
106 int level,
107 int rec)
108{
109 char *ents;
110 struct xfs_da_state_blk *blk;
111 void *baddr;
112
113 /* Dispatch the entry finding function. */
114 blk = &ds->state->path.blk[level];
115 baddr = blk->bp->b_addr;
116 switch (blk->magic) {
117 case XFS_ATTR_LEAF_MAGIC:
118 case XFS_ATTR3_LEAF_MAGIC:
119 ents = (char *)xfs_attr3_leaf_entryp(baddr);
120 return ents + (rec * sizeof(struct xfs_attr_leaf_entry));
121 case XFS_DIR2_LEAFN_MAGIC:
122 case XFS_DIR3_LEAFN_MAGIC:
123 ents = (char *)ds->dargs.dp->d_ops->leaf_ents_p(baddr);
124 return ents + (rec * sizeof(struct xfs_dir2_leaf_entry));
125 case XFS_DIR2_LEAF1_MAGIC:
126 case XFS_DIR3_LEAF1_MAGIC:
127 ents = (char *)ds->dargs.dp->d_ops->leaf_ents_p(baddr);
128 return ents + (rec * sizeof(struct xfs_dir2_leaf_entry));
129 case XFS_DA_NODE_MAGIC:
130 case XFS_DA3_NODE_MAGIC:
131 ents = (char *)ds->dargs.dp->d_ops->node_tree_p(baddr);
132 return ents + (rec * sizeof(struct xfs_da_node_entry));
133 }
134
135 return NULL;
136}
137
138/* Scrub a da btree hash (key). */
139int
140xfs_scrub_da_btree_hash(
141 struct xfs_scrub_da_btree *ds,
142 int level,
143 __be32 *hashp)
144{
145 struct xfs_da_state_blk *blks;
146 struct xfs_da_node_entry *entry;
147 xfs_dahash_t hash;
148 xfs_dahash_t parent_hash;
149
150 /* Is this hash in order? */
151 hash = be32_to_cpu(*hashp);
152 if (hash < ds->hashes[level])
153 xfs_scrub_da_set_corrupt(ds, level);
154 ds->hashes[level] = hash;
155
156 if (level == 0)
157 return 0;
158
159 /* Is this hash no larger than the parent hash? */
160 blks = ds->state->path.blk;
161 entry = xfs_scrub_da_btree_entry(ds, level - 1, blks[level - 1].index);
162 parent_hash = be32_to_cpu(entry->hashval);
163 if (parent_hash < hash)
164 xfs_scrub_da_set_corrupt(ds, level);
165
166 return 0;
167}
168
169/*
170 * Check a da btree pointer. Returns true if it's ok to use this
171 * pointer.
172 */
173STATIC bool
174xfs_scrub_da_btree_ptr_ok(
175 struct xfs_scrub_da_btree *ds,
176 int level,
177 xfs_dablk_t blkno)
178{
179 if (blkno < ds->lowest || (ds->highest != 0 && blkno >= ds->highest)) {
180 xfs_scrub_da_set_corrupt(ds, level);
181 return false;
182 }
183
184 return true;
185}
186
187/*
188 * The da btree scrubber can handle leaf1 blocks as a degenerate
189 * form of leafn blocks. Since the regular da code doesn't handle
190 * leaf1, we must multiplex the verifiers.
191 */
192static void
193xfs_scrub_da_btree_read_verify(
194 struct xfs_buf *bp)
195{
196 struct xfs_da_blkinfo *info = bp->b_addr;
197
198 switch (be16_to_cpu(info->magic)) {
199 case XFS_DIR2_LEAF1_MAGIC:
200 case XFS_DIR3_LEAF1_MAGIC:
201 bp->b_ops = &xfs_dir3_leaf1_buf_ops;
202 bp->b_ops->verify_read(bp);
203 return;
204 default:
205 /*
206 * xfs_da3_node_buf_ops already know how to handle
207 * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks.
208 */
209 bp->b_ops = &xfs_da3_node_buf_ops;
210 bp->b_ops->verify_read(bp);
211 return;
212 }
213}
214static void
215xfs_scrub_da_btree_write_verify(
216 struct xfs_buf *bp)
217{
218 struct xfs_da_blkinfo *info = bp->b_addr;
219
220 switch (be16_to_cpu(info->magic)) {
221 case XFS_DIR2_LEAF1_MAGIC:
222 case XFS_DIR3_LEAF1_MAGIC:
223 bp->b_ops = &xfs_dir3_leaf1_buf_ops;
224 bp->b_ops->verify_write(bp);
225 return;
226 default:
227 /*
228 * xfs_da3_node_buf_ops already know how to handle
229 * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks.
230 */
231 bp->b_ops = &xfs_da3_node_buf_ops;
232 bp->b_ops->verify_write(bp);
233 return;
234 }
235}
236
237static const struct xfs_buf_ops xfs_scrub_da_btree_buf_ops = {
238 .name = "xfs_scrub_da_btree",
239 .verify_read = xfs_scrub_da_btree_read_verify,
240 .verify_write = xfs_scrub_da_btree_write_verify,
241};
242
243/* Check a block's sibling. */
244STATIC int
245xfs_scrub_da_btree_block_check_sibling(
246 struct xfs_scrub_da_btree *ds,
247 int level,
248 int direction,
249 xfs_dablk_t sibling)
250{
251 int retval;
252 int error;
253
254 memcpy(&ds->state->altpath, &ds->state->path,
255 sizeof(ds->state->altpath));
256
257 /*
258 * If the pointer is null, we shouldn't be able to move the upper
259 * level pointer anywhere.
260 */
261 if (sibling == 0) {
262 error = xfs_da3_path_shift(ds->state, &ds->state->altpath,
263 direction, false, &retval);
264 if (error == 0 && retval == 0)
265 xfs_scrub_da_set_corrupt(ds, level);
266 error = 0;
267 goto out;
268 }
269
270 /* Move the alternate cursor one block in the direction given. */
271 error = xfs_da3_path_shift(ds->state, &ds->state->altpath,
272 direction, false, &retval);
273 if (!xfs_scrub_da_process_error(ds, level, &error))
274 return error;
275 if (retval) {
276 xfs_scrub_da_set_corrupt(ds, level);
277 return error;
278 }
279
280 /* Compare upper level pointer to sibling pointer. */
281 if (ds->state->altpath.blk[level].blkno != sibling)
282 xfs_scrub_da_set_corrupt(ds, level);
283 xfs_trans_brelse(ds->dargs.trans, ds->state->altpath.blk[level].bp);
284out:
285 return error;
286}
287
288/* Check a block's sibling pointers. */
289STATIC int
290xfs_scrub_da_btree_block_check_siblings(
291 struct xfs_scrub_da_btree *ds,
292 int level,
293 struct xfs_da_blkinfo *hdr)
294{
295 xfs_dablk_t forw;
296 xfs_dablk_t back;
297 int error = 0;
298
299 forw = be32_to_cpu(hdr->forw);
300 back = be32_to_cpu(hdr->back);
301
302 /* Top level blocks should not have sibling pointers. */
303 if (level == 0) {
304 if (forw != 0 || back != 0)
305 xfs_scrub_da_set_corrupt(ds, level);
306 return 0;
307 }
308
309 /*
310 * Check back (left) and forw (right) pointers. These functions
311 * absorb error codes for us.
312 */
313 error = xfs_scrub_da_btree_block_check_sibling(ds, level, 0, back);
314 if (error)
315 goto out;
316 error = xfs_scrub_da_btree_block_check_sibling(ds, level, 1, forw);
317
318out:
319 memset(&ds->state->altpath, 0, sizeof(ds->state->altpath));
320 return error;
321}
322
323/* Load a dir/attribute block from a btree. */
324STATIC int
325xfs_scrub_da_btree_block(
326 struct xfs_scrub_da_btree *ds,
327 int level,
328 xfs_dablk_t blkno)
329{
330 struct xfs_da_state_blk *blk;
331 struct xfs_da_intnode *node;
332 struct xfs_da_node_entry *btree;
333 struct xfs_da3_blkinfo *hdr3;
334 struct xfs_da_args *dargs = &ds->dargs;
335 struct xfs_inode *ip = ds->dargs.dp;
336 xfs_ino_t owner;
337 int *pmaxrecs;
338 struct xfs_da3_icnode_hdr nodehdr;
339 int error = 0;
340
341 blk = &ds->state->path.blk[level];
342 ds->state->path.active = level + 1;
343
344 /* Release old block. */
345 if (blk->bp) {
346 xfs_trans_brelse(dargs->trans, blk->bp);
347 blk->bp = NULL;
348 }
349
350 /* Check the pointer. */
351 blk->blkno = blkno;
352 if (!xfs_scrub_da_btree_ptr_ok(ds, level, blkno))
353 goto out_nobuf;
354
355 /* Read the buffer. */
356 error = xfs_da_read_buf(dargs->trans, dargs->dp, blk->blkno, -2,
357 &blk->bp, dargs->whichfork,
358 &xfs_scrub_da_btree_buf_ops);
359 if (!xfs_scrub_da_process_error(ds, level, &error))
360 goto out_nobuf;
361
362 /*
363 * We didn't find a dir btree root block, which means that
364 * there's no LEAF1/LEAFN tree (at least not where it's supposed
365 * to be), so jump out now.
366 */
367 if (ds->dargs.whichfork == XFS_DATA_FORK && level == 0 &&
368 blk->bp == NULL)
369 goto out_nobuf;
370
371 /* It's /not/ ok for attr trees not to have a da btree. */
372 if (blk->bp == NULL) {
373 xfs_scrub_da_set_corrupt(ds, level);
374 goto out_nobuf;
375 }
376
377 hdr3 = blk->bp->b_addr;
378 blk->magic = be16_to_cpu(hdr3->hdr.magic);
379 pmaxrecs = &ds->maxrecs[level];
380
381 /* We only started zeroing the header on v5 filesystems. */
382 if (xfs_sb_version_hascrc(&ds->sc->mp->m_sb) && hdr3->hdr.pad)
383 xfs_scrub_da_set_corrupt(ds, level);
384
385 /* Check the owner. */
386 if (xfs_sb_version_hascrc(&ip->i_mount->m_sb)) {
387 owner = be64_to_cpu(hdr3->owner);
388 if (owner != ip->i_ino)
389 xfs_scrub_da_set_corrupt(ds, level);
390 }
391
392 /* Check the siblings. */
393 error = xfs_scrub_da_btree_block_check_siblings(ds, level, &hdr3->hdr);
394 if (error)
395 goto out;
396
397 /* Interpret the buffer. */
398 switch (blk->magic) {
399 case XFS_ATTR_LEAF_MAGIC:
400 case XFS_ATTR3_LEAF_MAGIC:
401 xfs_trans_buf_set_type(dargs->trans, blk->bp,
402 XFS_BLFT_ATTR_LEAF_BUF);
403 blk->magic = XFS_ATTR_LEAF_MAGIC;
404 blk->hashval = xfs_attr_leaf_lasthash(blk->bp, pmaxrecs);
405 if (ds->tree_level != 0)
406 xfs_scrub_da_set_corrupt(ds, level);
407 break;
408 case XFS_DIR2_LEAFN_MAGIC:
409 case XFS_DIR3_LEAFN_MAGIC:
410 xfs_trans_buf_set_type(dargs->trans, blk->bp,
411 XFS_BLFT_DIR_LEAFN_BUF);
412 blk->magic = XFS_DIR2_LEAFN_MAGIC;
413 blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs);
414 if (ds->tree_level != 0)
415 xfs_scrub_da_set_corrupt(ds, level);
416 break;
417 case XFS_DIR2_LEAF1_MAGIC:
418 case XFS_DIR3_LEAF1_MAGIC:
419 xfs_trans_buf_set_type(dargs->trans, blk->bp,
420 XFS_BLFT_DIR_LEAF1_BUF);
421 blk->magic = XFS_DIR2_LEAF1_MAGIC;
422 blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs);
423 if (ds->tree_level != 0)
424 xfs_scrub_da_set_corrupt(ds, level);
425 break;
426 case XFS_DA_NODE_MAGIC:
427 case XFS_DA3_NODE_MAGIC:
428 xfs_trans_buf_set_type(dargs->trans, blk->bp,
429 XFS_BLFT_DA_NODE_BUF);
430 blk->magic = XFS_DA_NODE_MAGIC;
431 node = blk->bp->b_addr;
432 ip->d_ops->node_hdr_from_disk(&nodehdr, node);
433 btree = ip->d_ops->node_tree_p(node);
434 *pmaxrecs = nodehdr.count;
435 blk->hashval = be32_to_cpu(btree[*pmaxrecs - 1].hashval);
436 if (level == 0) {
437 if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) {
438 xfs_scrub_da_set_corrupt(ds, level);
439 goto out_freebp;
440 }
441 ds->tree_level = nodehdr.level;
442 } else {
443 if (ds->tree_level != nodehdr.level) {
444 xfs_scrub_da_set_corrupt(ds, level);
445 goto out_freebp;
446 }
447 }
448
449 /* XXX: Check hdr3.pad32 once we know how to fix it. */
450 break;
451 default:
452 xfs_scrub_da_set_corrupt(ds, level);
453 goto out_freebp;
454 }
455
456out:
457 return error;
458out_freebp:
459 xfs_trans_brelse(dargs->trans, blk->bp);
460 blk->bp = NULL;
461out_nobuf:
462 blk->blkno = 0;
463 return error;
464}
465
466/* Visit all nodes and leaves of a da btree. */
467int
468xfs_scrub_da_btree(
469 struct xfs_scrub_context *sc,
470 int whichfork,
471 xfs_scrub_da_btree_rec_fn scrub_fn,
472 void *private)
473{
474 struct xfs_scrub_da_btree ds = {};
475 struct xfs_mount *mp = sc->mp;
476 struct xfs_da_state_blk *blks;
477 struct xfs_da_node_entry *key;
478 void *rec;
479 xfs_dablk_t blkno;
480 int level;
481 int error;
482
483 /* Skip short format data structures; no btree to scan. */
484 if (XFS_IFORK_FORMAT(sc->ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
485 XFS_IFORK_FORMAT(sc->ip, whichfork) != XFS_DINODE_FMT_BTREE)
486 return 0;
487
488 /* Set up initial da state. */
489 ds.dargs.dp = sc->ip;
490 ds.dargs.whichfork = whichfork;
491 ds.dargs.trans = sc->tp;
492 ds.dargs.op_flags = XFS_DA_OP_OKNOENT;
493 ds.state = xfs_da_state_alloc();
494 ds.state->args = &ds.dargs;
495 ds.state->mp = mp;
496 ds.sc = sc;
497 ds.private = private;
498 if (whichfork == XFS_ATTR_FORK) {
499 ds.dargs.geo = mp->m_attr_geo;
500 ds.lowest = 0;
501 ds.highest = 0;
502 } else {
503 ds.dargs.geo = mp->m_dir_geo;
504 ds.lowest = ds.dargs.geo->leafblk;
505 ds.highest = ds.dargs.geo->freeblk;
506 }
507 blkno = ds.lowest;
508 level = 0;
509
510 /* Find the root of the da tree, if present. */
511 blks = ds.state->path.blk;
512 error = xfs_scrub_da_btree_block(&ds, level, blkno);
513 if (error)
514 goto out_state;
515 /*
516 * We didn't find a block at ds.lowest, which means that there's
517 * no LEAF1/LEAFN tree (at least not where it's supposed to be),
518 * so jump out now.
519 */
520 if (blks[level].bp == NULL)
521 goto out_state;
522
523 blks[level].index = 0;
524 while (level >= 0 && level < XFS_DA_NODE_MAXDEPTH) {
525 /* Handle leaf block. */
526 if (blks[level].magic != XFS_DA_NODE_MAGIC) {
527 /* End of leaf, pop back towards the root. */
528 if (blks[level].index >= ds.maxrecs[level]) {
529 if (level > 0)
530 blks[level - 1].index++;
531 ds.tree_level++;
532 level--;
533 continue;
534 }
535
536 /* Dispatch record scrubbing. */
537 rec = xfs_scrub_da_btree_entry(&ds, level,
538 blks[level].index);
539 error = scrub_fn(&ds, level, rec);
540 if (error)
541 break;
542 if (xfs_scrub_should_terminate(sc, &error) ||
543 (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
544 break;
545
546 blks[level].index++;
547 continue;
548 }
549
550
551 /* End of node, pop back towards the root. */
552 if (blks[level].index >= ds.maxrecs[level]) {
553 if (level > 0)
554 blks[level - 1].index++;
555 ds.tree_level++;
556 level--;
557 continue;
558 }
559
560 /* Hashes in order for scrub? */
561 key = xfs_scrub_da_btree_entry(&ds, level, blks[level].index);
562 error = xfs_scrub_da_btree_hash(&ds, level, &key->hashval);
563 if (error)
564 goto out;
565
566 /* Drill another level deeper. */
567 blkno = be32_to_cpu(key->before);
568 level++;
569 ds.tree_level--;
570 error = xfs_scrub_da_btree_block(&ds, level, blkno);
571 if (error)
572 goto out;
573 if (blks[level].bp == NULL)
574 goto out;
575
576 blks[level].index = 0;
577 }
578
579out:
580 /* Release all the buffers we're tracking. */
581 for (level = 0; level < XFS_DA_NODE_MAXDEPTH; level++) {
582 if (blks[level].bp == NULL)
583 continue;
584 xfs_trans_brelse(sc->tp, blks[level].bp);
585 blks[level].bp = NULL;
586 }
587
588out_state:
589 xfs_da_state_free(ds.state);
590 return error;
591}
diff --git a/fs/xfs/scrub/dabtree.h b/fs/xfs/scrub/dabtree.h
new file mode 100644
index 000000000000..d31468d68cef
--- /dev/null
+++ b/fs/xfs/scrub/dabtree.h
@@ -0,0 +1,59 @@
1/*
2 * Copyright (C) 2017 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#ifndef __XFS_SCRUB_DABTREE_H__
21#define __XFS_SCRUB_DABTREE_H__
22
23/* dir/attr btree */
24
25struct xfs_scrub_da_btree {
26 struct xfs_da_args dargs;
27 xfs_dahash_t hashes[XFS_DA_NODE_MAXDEPTH];
28 int maxrecs[XFS_DA_NODE_MAXDEPTH];
29 struct xfs_da_state *state;
30 struct xfs_scrub_context *sc;
31 void *private;
32
33 /*
34 * Lowest and highest directory block address in which we expect
35 * to find dir/attr btree node blocks. For a directory this
36 * (presumably) means between LEAF_OFFSET and FREE_OFFSET; for
37 * attributes there is no limit.
38 */
39 xfs_dablk_t lowest;
40 xfs_dablk_t highest;
41
42 int tree_level;
43};
44
45typedef int (*xfs_scrub_da_btree_rec_fn)(struct xfs_scrub_da_btree *ds,
46 int level, void *rec);
47
48/* Check for da btree operation errors. */
49bool xfs_scrub_da_process_error(struct xfs_scrub_da_btree *ds, int level, int *error);
50
51/* Check for da btree corruption. */
52void xfs_scrub_da_set_corrupt(struct xfs_scrub_da_btree *ds, int level);
53
54int xfs_scrub_da_btree_hash(struct xfs_scrub_da_btree *ds, int level,
55 __be32 *hashp);
56int xfs_scrub_da_btree(struct xfs_scrub_context *sc, int whichfork,
57 xfs_scrub_da_btree_rec_fn scrub_fn, void *private);
58
59#endif /* __XFS_SCRUB_DABTREE_H__ */
diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c
new file mode 100644
index 000000000000..69e1efdd4019
--- /dev/null
+++ b/fs/xfs/scrub/dir.c
@@ -0,0 +1,816 @@
1/*
2 * Copyright (C) 2017 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#include "xfs.h"
21#include "xfs_fs.h"
22#include "xfs_shared.h"
23#include "xfs_format.h"
24#include "xfs_trans_resv.h"
25#include "xfs_mount.h"
26#include "xfs_defer.h"
27#include "xfs_btree.h"
28#include "xfs_bit.h"
29#include "xfs_log_format.h"
30#include "xfs_trans.h"
31#include "xfs_sb.h"
32#include "xfs_inode.h"
33#include "xfs_icache.h"
34#include "xfs_itable.h"
35#include "xfs_da_format.h"
36#include "xfs_da_btree.h"
37#include "xfs_dir2.h"
38#include "xfs_dir2_priv.h"
39#include "xfs_ialloc.h"
40#include "scrub/xfs_scrub.h"
41#include "scrub/scrub.h"
42#include "scrub/common.h"
43#include "scrub/trace.h"
44#include "scrub/dabtree.h"
45
46/* Set us up to scrub directories. */
47int
48xfs_scrub_setup_directory(
49 struct xfs_scrub_context *sc,
50 struct xfs_inode *ip)
51{
52 return xfs_scrub_setup_inode_contents(sc, ip, 0);
53}
54
55/* Directories */
56
57/* Scrub a directory entry. */
58
59struct xfs_scrub_dir_ctx {
60 /* VFS fill-directory iterator */
61 struct dir_context dir_iter;
62
63 struct xfs_scrub_context *sc;
64};
65
66/* Check that an inode's mode matches a given DT_ type. */
67STATIC int
68xfs_scrub_dir_check_ftype(
69 struct xfs_scrub_dir_ctx *sdc,
70 xfs_fileoff_t offset,
71 xfs_ino_t inum,
72 int dtype)
73{
74 struct xfs_mount *mp = sdc->sc->mp;
75 struct xfs_inode *ip;
76 int ino_dtype;
77 int error = 0;
78
79 if (!xfs_sb_version_hasftype(&mp->m_sb)) {
80 if (dtype != DT_UNKNOWN && dtype != DT_DIR)
81 xfs_scrub_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK,
82 offset);
83 goto out;
84 }
85
86 /*
87 * Grab the inode pointed to by the dirent. We release the
88 * inode before we cancel the scrub transaction. Since we're
89 * don't know a priori that releasing the inode won't trigger
90 * eofblocks cleanup (which allocates what would be a nested
91 * transaction), we can't use DONTCACHE here because DONTCACHE
92 * inodes can trigger immediate inactive cleanup of the inode.
93 */
94 error = xfs_iget(mp, sdc->sc->tp, inum, 0, 0, &ip);
95 if (!xfs_scrub_fblock_process_error(sdc->sc, XFS_DATA_FORK, offset,
96 &error))
97 goto out;
98
99 /* Convert mode to the DT_* values that dir_emit uses. */
100 ino_dtype = xfs_dir3_get_dtype(mp,
101 xfs_mode_to_ftype(VFS_I(ip)->i_mode));
102 if (ino_dtype != dtype)
103 xfs_scrub_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset);
104 iput(VFS_I(ip));
105out:
106 return error;
107}
108
109/*
110 * Scrub a single directory entry.
111 *
112 * We use the VFS directory iterator (i.e. readdir) to call this
113 * function for every directory entry in a directory. Once we're here,
114 * we check the inode number to make sure it's sane, then we check that
115 * we can look up this filename. Finally, we check the ftype.
116 */
117STATIC int
118xfs_scrub_dir_actor(
119 struct dir_context *dir_iter,
120 const char *name,
121 int namelen,
122 loff_t pos,
123 u64 ino,
124 unsigned type)
125{
126 struct xfs_mount *mp;
127 struct xfs_inode *ip;
128 struct xfs_scrub_dir_ctx *sdc;
129 struct xfs_name xname;
130 xfs_ino_t lookup_ino;
131 xfs_dablk_t offset;
132 int error = 0;
133
134 sdc = container_of(dir_iter, struct xfs_scrub_dir_ctx, dir_iter);
135 ip = sdc->sc->ip;
136 mp = ip->i_mount;
137 offset = xfs_dir2_db_to_da(mp->m_dir_geo,
138 xfs_dir2_dataptr_to_db(mp->m_dir_geo, pos));
139
140 /* Does this inode number make sense? */
141 if (!xfs_verify_dir_ino(mp, ino)) {
142 xfs_scrub_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset);
143 goto out;
144 }
145
146 if (!strncmp(".", name, namelen)) {
147 /* If this is "." then check that the inum matches the dir. */
148 if (xfs_sb_version_hasftype(&mp->m_sb) && type != DT_DIR)
149 xfs_scrub_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK,
150 offset);
151 if (ino != ip->i_ino)
152 xfs_scrub_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK,
153 offset);
154 } else if (!strncmp("..", name, namelen)) {
155 /*
156 * If this is ".." in the root inode, check that the inum
157 * matches this dir.
158 */
159 if (xfs_sb_version_hasftype(&mp->m_sb) && type != DT_DIR)
160 xfs_scrub_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK,
161 offset);
162 if (ip->i_ino == mp->m_sb.sb_rootino && ino != ip->i_ino)
163 xfs_scrub_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK,
164 offset);
165 }
166
167 /* Verify that we can look up this name by hash. */
168 xname.name = name;
169 xname.len = namelen;
170 xname.type = XFS_DIR3_FT_UNKNOWN;
171
172 error = xfs_dir_lookup(sdc->sc->tp, ip, &xname, &lookup_ino, NULL);
173 if (!xfs_scrub_fblock_process_error(sdc->sc, XFS_DATA_FORK, offset,
174 &error))
175 goto fail_xref;
176 if (lookup_ino != ino) {
177 xfs_scrub_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset);
178 goto out;
179 }
180
181 /* Verify the file type. This function absorbs error codes. */
182 error = xfs_scrub_dir_check_ftype(sdc, offset, lookup_ino, type);
183 if (error)
184 goto out;
185out:
186 return error;
187fail_xref:
188 return error;
189}
190
191/* Scrub a directory btree record. */
192STATIC int
193xfs_scrub_dir_rec(
194 struct xfs_scrub_da_btree *ds,
195 int level,
196 void *rec)
197{
198 struct xfs_mount *mp = ds->state->mp;
199 struct xfs_dir2_leaf_entry *ent = rec;
200 struct xfs_inode *dp = ds->dargs.dp;
201 struct xfs_dir2_data_entry *dent;
202 struct xfs_buf *bp;
203 xfs_ino_t ino;
204 xfs_dablk_t rec_bno;
205 xfs_dir2_db_t db;
206 xfs_dir2_data_aoff_t off;
207 xfs_dir2_dataptr_t ptr;
208 xfs_dahash_t calc_hash;
209 xfs_dahash_t hash;
210 unsigned int tag;
211 int error;
212
213 /* Check the hash of the entry. */
214 error = xfs_scrub_da_btree_hash(ds, level, &ent->hashval);
215 if (error)
216 goto out;
217
218 /* Valid hash pointer? */
219 ptr = be32_to_cpu(ent->address);
220 if (ptr == 0)
221 return 0;
222
223 /* Find the directory entry's location. */
224 db = xfs_dir2_dataptr_to_db(mp->m_dir_geo, ptr);
225 off = xfs_dir2_dataptr_to_off(mp->m_dir_geo, ptr);
226 rec_bno = xfs_dir2_db_to_da(mp->m_dir_geo, db);
227
228 if (rec_bno >= mp->m_dir_geo->leafblk) {
229 xfs_scrub_da_set_corrupt(ds, level);
230 goto out;
231 }
232 error = xfs_dir3_data_read(ds->dargs.trans, dp, rec_bno, -2, &bp);
233 if (!xfs_scrub_fblock_process_error(ds->sc, XFS_DATA_FORK, rec_bno,
234 &error))
235 goto out;
236 if (!bp) {
237 xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
238 goto out;
239 }
240
241 /* Retrieve the entry, sanity check it, and compare hashes. */
242 dent = (struct xfs_dir2_data_entry *)(((char *)bp->b_addr) + off);
243 ino = be64_to_cpu(dent->inumber);
244 hash = be32_to_cpu(ent->hashval);
245 tag = be16_to_cpup(dp->d_ops->data_entry_tag_p(dent));
246 if (!xfs_verify_dir_ino(mp, ino) || tag != off)
247 xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
248 if (dent->namelen == 0) {
249 xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
250 goto out_relse;
251 }
252 calc_hash = xfs_da_hashname(dent->name, dent->namelen);
253 if (calc_hash != hash)
254 xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
255
256out_relse:
257 xfs_trans_brelse(ds->dargs.trans, bp);
258out:
259 return error;
260}
261
262/*
263 * Is this unused entry either in the bestfree or smaller than all of
264 * them? We've already checked that the bestfrees are sorted longest to
265 * shortest, and that there aren't any bogus entries.
266 */
267STATIC void
268xfs_scrub_directory_check_free_entry(
269 struct xfs_scrub_context *sc,
270 xfs_dablk_t lblk,
271 struct xfs_dir2_data_free *bf,
272 struct xfs_dir2_data_unused *dup)
273{
274 struct xfs_dir2_data_free *dfp;
275 unsigned int dup_length;
276
277 dup_length = be16_to_cpu(dup->length);
278
279 /* Unused entry is shorter than any of the bestfrees */
280 if (dup_length < be16_to_cpu(bf[XFS_DIR2_DATA_FD_COUNT - 1].length))
281 return;
282
283 for (dfp = &bf[XFS_DIR2_DATA_FD_COUNT - 1]; dfp >= bf; dfp--)
284 if (dup_length == be16_to_cpu(dfp->length))
285 return;
286
287 /* Unused entry should be in the bestfrees but wasn't found. */
288 xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
289}
290
291/* Check free space info in a directory data block. */
292STATIC int
293xfs_scrub_directory_data_bestfree(
294 struct xfs_scrub_context *sc,
295 xfs_dablk_t lblk,
296 bool is_block)
297{
298 struct xfs_dir2_data_unused *dup;
299 struct xfs_dir2_data_free *dfp;
300 struct xfs_buf *bp;
301 struct xfs_dir2_data_free *bf;
302 struct xfs_mount *mp = sc->mp;
303 const struct xfs_dir_ops *d_ops;
304 char *ptr;
305 char *endptr;
306 u16 tag;
307 unsigned int nr_bestfrees = 0;
308 unsigned int nr_frees = 0;
309 unsigned int smallest_bestfree;
310 int newlen;
311 int offset;
312 int error;
313
314 d_ops = sc->ip->d_ops;
315
316 if (is_block) {
317 /* dir block format */
318 if (lblk != XFS_B_TO_FSBT(mp, XFS_DIR2_DATA_OFFSET))
319 xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
320 error = xfs_dir3_block_read(sc->tp, sc->ip, &bp);
321 } else {
322 /* dir data format */
323 error = xfs_dir3_data_read(sc->tp, sc->ip, lblk, -1, &bp);
324 }
325 if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error))
326 goto out;
327
328 /* XXX: Check xfs_dir3_data_hdr.pad is zero once we start setting it. */
329
330 /* Do the bestfrees correspond to actual free space? */
331 bf = d_ops->data_bestfree_p(bp->b_addr);
332 smallest_bestfree = UINT_MAX;
333 for (dfp = &bf[0]; dfp < &bf[XFS_DIR2_DATA_FD_COUNT]; dfp++) {
334 offset = be16_to_cpu(dfp->offset);
335 if (offset == 0)
336 continue;
337 if (offset >= mp->m_dir_geo->blksize) {
338 xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
339 goto out_buf;
340 }
341 dup = (struct xfs_dir2_data_unused *)(bp->b_addr + offset);
342 tag = be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup));
343
344 /* bestfree doesn't match the entry it points at? */
345 if (dup->freetag != cpu_to_be16(XFS_DIR2_DATA_FREE_TAG) ||
346 be16_to_cpu(dup->length) != be16_to_cpu(dfp->length) ||
347 tag != ((char *)dup - (char *)bp->b_addr)) {
348 xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
349 goto out_buf;
350 }
351
352 /* bestfree records should be ordered largest to smallest */
353 if (smallest_bestfree < be16_to_cpu(dfp->length)) {
354 xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
355 goto out_buf;
356 }
357
358 smallest_bestfree = be16_to_cpu(dfp->length);
359 nr_bestfrees++;
360 }
361
362 /* Make sure the bestfrees are actually the best free spaces. */
363 ptr = (char *)d_ops->data_entry_p(bp->b_addr);
364 if (is_block) {
365 struct xfs_dir2_block_tail *btp;
366
367 btp = xfs_dir2_block_tail_p(mp->m_dir_geo, bp->b_addr);
368 endptr = (char *)xfs_dir2_block_leaf_p(btp);
369 } else
370 endptr = (char *)bp->b_addr + BBTOB(bp->b_length);
371
372 /* Iterate the entries, stopping when we hit or go past the end. */
373 while (ptr < endptr) {
374 dup = (struct xfs_dir2_data_unused *)ptr;
375 /* Skip real entries */
376 if (dup->freetag != cpu_to_be16(XFS_DIR2_DATA_FREE_TAG)) {
377 struct xfs_dir2_data_entry *dep;
378
379 dep = (struct xfs_dir2_data_entry *)ptr;
380 newlen = d_ops->data_entsize(dep->namelen);
381 if (newlen <= 0) {
382 xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK,
383 lblk);
384 goto out_buf;
385 }
386 ptr += newlen;
387 continue;
388 }
389
390 /* Spot check this free entry */
391 tag = be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup));
392 if (tag != ((char *)dup - (char *)bp->b_addr))
393 xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
394
395 /*
396 * Either this entry is a bestfree or it's smaller than
397 * any of the bestfrees.
398 */
399 xfs_scrub_directory_check_free_entry(sc, lblk, bf, dup);
400
401 /* Move on. */
402 newlen = be16_to_cpu(dup->length);
403 if (newlen <= 0) {
404 xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
405 goto out_buf;
406 }
407 ptr += newlen;
408 if (ptr <= endptr)
409 nr_frees++;
410 }
411
412 /* We're required to fill all the space. */
413 if (ptr != endptr)
414 xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
415
416 /* Did we see at least as many free slots as there are bestfrees? */
417 if (nr_frees < nr_bestfrees)
418 xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
419out_buf:
420 xfs_trans_brelse(sc->tp, bp);
421out:
422 return error;
423}
424
425/*
426 * Does the free space length in the free space index block ($len) match
427 * the longest length in the directory data block's bestfree array?
428 * Assume that we've already checked that the data block's bestfree
429 * array is in order.
430 */
431STATIC void
432xfs_scrub_directory_check_freesp(
433 struct xfs_scrub_context *sc,
434 xfs_dablk_t lblk,
435 struct xfs_buf *dbp,
436 unsigned int len)
437{
438 struct xfs_dir2_data_free *dfp;
439
440 dfp = sc->ip->d_ops->data_bestfree_p(dbp->b_addr);
441
442 if (len != be16_to_cpu(dfp->length))
443 xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
444
445 if (len > 0 && be16_to_cpu(dfp->offset) == 0)
446 xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
447}
448
449/* Check free space info in a directory leaf1 block. */
450STATIC int
451xfs_scrub_directory_leaf1_bestfree(
452 struct xfs_scrub_context *sc,
453 struct xfs_da_args *args,
454 xfs_dablk_t lblk)
455{
456 struct xfs_dir3_icleaf_hdr leafhdr;
457 struct xfs_dir2_leaf_entry *ents;
458 struct xfs_dir2_leaf_tail *ltp;
459 struct xfs_dir2_leaf *leaf;
460 struct xfs_buf *dbp;
461 struct xfs_buf *bp;
462 const struct xfs_dir_ops *d_ops = sc->ip->d_ops;
463 struct xfs_da_geometry *geo = sc->mp->m_dir_geo;
464 __be16 *bestp;
465 __u16 best;
466 __u32 hash;
467 __u32 lasthash = 0;
468 __u32 bestcount;
469 unsigned int stale = 0;
470 int i;
471 int error;
472
473 /* Read the free space block. */
474 error = xfs_dir3_leaf_read(sc->tp, sc->ip, lblk, -1, &bp);
475 if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error))
476 goto out;
477
478 leaf = bp->b_addr;
479 d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
480 ents = d_ops->leaf_ents_p(leaf);
481 ltp = xfs_dir2_leaf_tail_p(geo, leaf);
482 bestcount = be32_to_cpu(ltp->bestcount);
483 bestp = xfs_dir2_leaf_bests_p(ltp);
484
485 if (xfs_sb_version_hascrc(&sc->mp->m_sb)) {
486 struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr;
487
488 if (hdr3->pad != cpu_to_be32(0))
489 xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
490 }
491
492 /*
493 * There should be as many bestfree slots as there are dir data
494 * blocks that can fit under i_size.
495 */
496 if (bestcount != xfs_dir2_byte_to_db(geo, sc->ip->i_d.di_size)) {
497 xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
498 goto out;
499 }
500
501 /* Is the leaf count even remotely sane? */
502 if (leafhdr.count > d_ops->leaf_max_ents(geo)) {
503 xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
504 goto out;
505 }
506
507 /* Leaves and bests don't overlap in leaf format. */
508 if ((char *)&ents[leafhdr.count] > (char *)bestp) {
509 xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
510 goto out;
511 }
512
513 /* Check hash value order, count stale entries. */
514 for (i = 0; i < leafhdr.count; i++) {
515 hash = be32_to_cpu(ents[i].hashval);
516 if (i > 0 && lasthash > hash)
517 xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
518 lasthash = hash;
519 if (ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
520 stale++;
521 }
522 if (leafhdr.stale != stale)
523 xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
524
525 /* Check all the bestfree entries. */
526 for (i = 0; i < bestcount; i++, bestp++) {
527 best = be16_to_cpu(*bestp);
528 if (best == NULLDATAOFF)
529 continue;
530 error = xfs_dir3_data_read(sc->tp, sc->ip,
531 i * args->geo->fsbcount, -1, &dbp);
532 if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk,
533 &error))
534 continue;
535 xfs_scrub_directory_check_freesp(sc, lblk, dbp, best);
536 xfs_trans_brelse(sc->tp, dbp);
537 }
538out:
539 return error;
540}
541
542/* Check free space info in a directory freespace block. */
543STATIC int
544xfs_scrub_directory_free_bestfree(
545 struct xfs_scrub_context *sc,
546 struct xfs_da_args *args,
547 xfs_dablk_t lblk)
548{
549 struct xfs_dir3_icfree_hdr freehdr;
550 struct xfs_buf *dbp;
551 struct xfs_buf *bp;
552 __be16 *bestp;
553 __u16 best;
554 unsigned int stale = 0;
555 int i;
556 int error;
557
558 /* Read the free space block */
559 error = xfs_dir2_free_read(sc->tp, sc->ip, lblk, &bp);
560 if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error))
561 goto out;
562
563 if (xfs_sb_version_hascrc(&sc->mp->m_sb)) {
564 struct xfs_dir3_free_hdr *hdr3 = bp->b_addr;
565
566 if (hdr3->pad != cpu_to_be32(0))
567 xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
568 }
569
570 /* Check all the entries. */
571 sc->ip->d_ops->free_hdr_from_disk(&freehdr, bp->b_addr);
572 bestp = sc->ip->d_ops->free_bests_p(bp->b_addr);
573 for (i = 0; i < freehdr.nvalid; i++, bestp++) {
574 best = be16_to_cpu(*bestp);
575 if (best == NULLDATAOFF) {
576 stale++;
577 continue;
578 }
579 error = xfs_dir3_data_read(sc->tp, sc->ip,
580 (freehdr.firstdb + i) * args->geo->fsbcount,
581 -1, &dbp);
582 if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk,
583 &error))
584 continue;
585 xfs_scrub_directory_check_freesp(sc, lblk, dbp, best);
586 xfs_trans_brelse(sc->tp, dbp);
587 }
588
589 if (freehdr.nused + stale != freehdr.nvalid)
590 xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
591out:
592 return error;
593}
594
595/* Check free space information in directories. */
596STATIC int
597xfs_scrub_directory_blocks(
598 struct xfs_scrub_context *sc)
599{
600 struct xfs_bmbt_irec got;
601 struct xfs_da_args args;
602 struct xfs_ifork *ifp;
603 struct xfs_mount *mp = sc->mp;
604 xfs_fileoff_t leaf_lblk;
605 xfs_fileoff_t free_lblk;
606 xfs_fileoff_t lblk;
607 struct xfs_iext_cursor icur;
608 xfs_dablk_t dabno;
609 bool found;
610 int is_block = 0;
611 int error;
612
613 /* Ignore local format directories. */
614 if (sc->ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
615 sc->ip->i_d.di_format != XFS_DINODE_FMT_BTREE)
616 return 0;
617
618 ifp = XFS_IFORK_PTR(sc->ip, XFS_DATA_FORK);
619 lblk = XFS_B_TO_FSB(mp, XFS_DIR2_DATA_OFFSET);
620 leaf_lblk = XFS_B_TO_FSB(mp, XFS_DIR2_LEAF_OFFSET);
621 free_lblk = XFS_B_TO_FSB(mp, XFS_DIR2_FREE_OFFSET);
622
623 /* Is this a block dir? */
624 args.dp = sc->ip;
625 args.geo = mp->m_dir_geo;
626 args.trans = sc->tp;
627 error = xfs_dir2_isblock(&args, &is_block);
628 if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error))
629 goto out;
630
631 /* Iterate all the data extents in the directory... */
632 found = xfs_iext_lookup_extent(sc->ip, ifp, lblk, &icur, &got);
633 while (found) {
634 /* Block directories only have a single block at offset 0. */
635 if (is_block &&
636 (got.br_startoff > 0 ||
637 got.br_blockcount != args.geo->fsbcount)) {
638 xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK,
639 got.br_startoff);
640 break;
641 }
642
643 /* No more data blocks... */
644 if (got.br_startoff >= leaf_lblk)
645 break;
646
647 /*
648 * Check each data block's bestfree data.
649 *
650 * Iterate all the fsbcount-aligned block offsets in
651 * this directory. The directory block reading code is
652 * smart enough to do its own bmap lookups to handle
653 * discontiguous directory blocks. When we're done
654 * with the extent record, re-query the bmap at the
655 * next fsbcount-aligned offset to avoid redundant
656 * block checks.
657 */
658 for (lblk = roundup((xfs_dablk_t)got.br_startoff,
659 args.geo->fsbcount);
660 lblk < got.br_startoff + got.br_blockcount;
661 lblk += args.geo->fsbcount) {
662 error = xfs_scrub_directory_data_bestfree(sc, lblk,
663 is_block);
664 if (error)
665 goto out;
666 }
667 dabno = got.br_startoff + got.br_blockcount;
668 lblk = roundup(dabno, args.geo->fsbcount);
669 found = xfs_iext_lookup_extent(sc->ip, ifp, lblk, &icur, &got);
670 }
671
672 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
673 goto out;
674
675 /* Look for a leaf1 block, which has free info. */
676 if (xfs_iext_lookup_extent(sc->ip, ifp, leaf_lblk, &icur, &got) &&
677 got.br_startoff == leaf_lblk &&
678 got.br_blockcount == args.geo->fsbcount &&
679 !xfs_iext_next_extent(ifp, &icur, &got)) {
680 if (is_block) {
681 xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
682 goto out;
683 }
684 error = xfs_scrub_directory_leaf1_bestfree(sc, &args,
685 leaf_lblk);
686 if (error)
687 goto out;
688 }
689
690 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
691 goto out;
692
693 /* Scan for free blocks */
694 lblk = free_lblk;
695 found = xfs_iext_lookup_extent(sc->ip, ifp, lblk, &icur, &got);
696 while (found) {
697 /*
698 * Dirs can't have blocks mapped above 2^32.
699 * Single-block dirs shouldn't even be here.
700 */
701 lblk = got.br_startoff;
702 if (lblk & ~0xFFFFFFFFULL) {
703 xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
704 goto out;
705 }
706 if (is_block) {
707 xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
708 goto out;
709 }
710
711 /*
712 * Check each dir free block's bestfree data.
713 *
714 * Iterate all the fsbcount-aligned block offsets in
715 * this directory. The directory block reading code is
716 * smart enough to do its own bmap lookups to handle
717 * discontiguous directory blocks. When we're done
718 * with the extent record, re-query the bmap at the
719 * next fsbcount-aligned offset to avoid redundant
720 * block checks.
721 */
722 for (lblk = roundup((xfs_dablk_t)got.br_startoff,
723 args.geo->fsbcount);
724 lblk < got.br_startoff + got.br_blockcount;
725 lblk += args.geo->fsbcount) {
726 error = xfs_scrub_directory_free_bestfree(sc, &args,
727 lblk);
728 if (error)
729 goto out;
730 }
731 dabno = got.br_startoff + got.br_blockcount;
732 lblk = roundup(dabno, args.geo->fsbcount);
733 found = xfs_iext_lookup_extent(sc->ip, ifp, lblk, &icur, &got);
734 }
735out:
736 return error;
737}
738
739/* Scrub a whole directory. */
740int
741xfs_scrub_directory(
742 struct xfs_scrub_context *sc)
743{
744 struct xfs_scrub_dir_ctx sdc = {
745 .dir_iter.actor = xfs_scrub_dir_actor,
746 .dir_iter.pos = 0,
747 .sc = sc,
748 };
749 size_t bufsize;
750 loff_t oldpos;
751 int error = 0;
752
753 if (!S_ISDIR(VFS_I(sc->ip)->i_mode))
754 return -ENOENT;
755
756 /* Plausible size? */
757 if (sc->ip->i_d.di_size < xfs_dir2_sf_hdr_size(0)) {
758 xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino, NULL);
759 goto out;
760 }
761
762 /* Check directory tree structure */
763 error = xfs_scrub_da_btree(sc, XFS_DATA_FORK, xfs_scrub_dir_rec, NULL);
764 if (error)
765 return error;
766
767 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
768 return error;
769
770 /* Check the freespace. */
771 error = xfs_scrub_directory_blocks(sc);
772 if (error)
773 return error;
774
775 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
776 return error;
777
778 /*
779 * Check that every dirent we see can also be looked up by hash.
780 * Userspace usually asks for a 32k buffer, so we will too.
781 */
782 bufsize = (size_t)min_t(loff_t, XFS_READDIR_BUFSIZE,
783 sc->ip->i_d.di_size);
784
785 /*
786 * Look up every name in this directory by hash.
787 *
788 * Use the xfs_readdir function to call xfs_scrub_dir_actor on
789 * every directory entry in this directory. In _actor, we check
790 * the name, inode number, and ftype (if applicable) of the
791 * entry. xfs_readdir uses the VFS filldir functions to provide
792 * iteration context.
793 *
794 * The VFS grabs a read or write lock via i_rwsem before it reads
795 * or writes to a directory. If we've gotten this far we've
796 * already obtained IOLOCK_EXCL, which (since 4.10) is the same as
797 * getting a write lock on i_rwsem. Therefore, it is safe for us
798 * to drop the ILOCK here in order to reuse the _readdir and
799 * _dir_lookup routines, which do their own ILOCK locking.
800 */
801 oldpos = 0;
802 sc->ilock_flags &= ~XFS_ILOCK_EXCL;
803 xfs_iunlock(sc->ip, XFS_ILOCK_EXCL);
804 while (true) {
805 error = xfs_readdir(sc->tp, sc->ip, &sdc.dir_iter, bufsize);
806 if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0,
807 &error))
808 goto out;
809 if (oldpos == sdc.dir_iter.pos)
810 break;
811 oldpos = sdc.dir_iter.pos;
812 }
813
814out:
815 return error;
816}
diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c
new file mode 100644
index 000000000000..496d6f2fbb9e
--- /dev/null
+++ b/fs/xfs/scrub/ialloc.c
@@ -0,0 +1,337 @@
1/*
2 * Copyright (C) 2017 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#include "xfs.h"
21#include "xfs_fs.h"
22#include "xfs_shared.h"
23#include "xfs_format.h"
24#include "xfs_trans_resv.h"
25#include "xfs_mount.h"
26#include "xfs_defer.h"
27#include "xfs_btree.h"
28#include "xfs_bit.h"
29#include "xfs_log_format.h"
30#include "xfs_trans.h"
31#include "xfs_sb.h"
32#include "xfs_inode.h"
33#include "xfs_alloc.h"
34#include "xfs_ialloc.h"
35#include "xfs_ialloc_btree.h"
36#include "xfs_icache.h"
37#include "xfs_rmap.h"
38#include "xfs_log.h"
39#include "xfs_trans_priv.h"
40#include "scrub/xfs_scrub.h"
41#include "scrub/scrub.h"
42#include "scrub/common.h"
43#include "scrub/btree.h"
44#include "scrub/trace.h"
45
46/*
47 * Set us up to scrub inode btrees.
48 * If we detect a discrepancy between the inobt and the inode,
49 * try again after forcing logged inode cores out to disk.
50 */
51int
52xfs_scrub_setup_ag_iallocbt(
53 struct xfs_scrub_context *sc,
54 struct xfs_inode *ip)
55{
56 return xfs_scrub_setup_ag_btree(sc, ip, sc->try_harder);
57}
58
59/* Inode btree scrubber. */
60
61/* Is this chunk worth checking? */
62STATIC bool
63xfs_scrub_iallocbt_chunk(
64 struct xfs_scrub_btree *bs,
65 struct xfs_inobt_rec_incore *irec,
66 xfs_agino_t agino,
67 xfs_extlen_t len)
68{
69 struct xfs_mount *mp = bs->cur->bc_mp;
70 xfs_agnumber_t agno = bs->cur->bc_private.a.agno;
71 xfs_agblock_t bno;
72
73 bno = XFS_AGINO_TO_AGBNO(mp, agino);
74 if (bno + len <= bno ||
75 !xfs_verify_agbno(mp, agno, bno) ||
76 !xfs_verify_agbno(mp, agno, bno + len - 1))
77 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
78
79 return true;
80}
81
82/* Count the number of free inodes. */
83static unsigned int
84xfs_scrub_iallocbt_freecount(
85 xfs_inofree_t freemask)
86{
87 BUILD_BUG_ON(sizeof(freemask) != sizeof(__u64));
88 return hweight64(freemask);
89}
90
91/* Check a particular inode with ir_free. */
92STATIC int
93xfs_scrub_iallocbt_check_cluster_freemask(
94 struct xfs_scrub_btree *bs,
95 xfs_ino_t fsino,
96 xfs_agino_t chunkino,
97 xfs_agino_t clusterino,
98 struct xfs_inobt_rec_incore *irec,
99 struct xfs_buf *bp)
100{
101 struct xfs_dinode *dip;
102 struct xfs_mount *mp = bs->cur->bc_mp;
103 bool inode_is_free = false;
104 bool freemask_ok;
105 bool inuse;
106 int error = 0;
107
108 if (xfs_scrub_should_terminate(bs->sc, &error))
109 return error;
110
111 dip = xfs_buf_offset(bp, clusterino * mp->m_sb.sb_inodesize);
112 if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC ||
113 (dip->di_version >= 3 &&
114 be64_to_cpu(dip->di_ino) != fsino + clusterino)) {
115 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
116 goto out;
117 }
118
119 if (irec->ir_free & XFS_INOBT_MASK(chunkino + clusterino))
120 inode_is_free = true;
121 error = xfs_icache_inode_is_allocated(mp, bs->cur->bc_tp,
122 fsino + clusterino, &inuse);
123 if (error == -ENODATA) {
124 /* Not cached, just read the disk buffer */
125 freemask_ok = inode_is_free ^ !!(dip->di_mode);
126 if (!bs->sc->try_harder && !freemask_ok)
127 return -EDEADLOCK;
128 } else if (error < 0) {
129 /*
130 * Inode is only half assembled, or there was an IO error,
131 * or the verifier failed, so don't bother trying to check.
132 * The inode scrubber can deal with this.
133 */
134 goto out;
135 } else {
136 /* Inode is all there. */
137 freemask_ok = inode_is_free ^ inuse;
138 }
139 if (!freemask_ok)
140 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
141out:
142 return 0;
143}
144
145/* Make sure the free mask is consistent with what the inodes think. */
146STATIC int
147xfs_scrub_iallocbt_check_freemask(
148 struct xfs_scrub_btree *bs,
149 struct xfs_inobt_rec_incore *irec)
150{
151 struct xfs_owner_info oinfo;
152 struct xfs_imap imap;
153 struct xfs_mount *mp = bs->cur->bc_mp;
154 struct xfs_dinode *dip;
155 struct xfs_buf *bp;
156 xfs_ino_t fsino;
157 xfs_agino_t nr_inodes;
158 xfs_agino_t agino;
159 xfs_agino_t chunkino;
160 xfs_agino_t clusterino;
161 xfs_agblock_t agbno;
162 int blks_per_cluster;
163 uint16_t holemask;
164 uint16_t ir_holemask;
165 int error = 0;
166
167 /* Make sure the freemask matches the inode records. */
168 blks_per_cluster = xfs_icluster_size_fsb(mp);
169 nr_inodes = XFS_OFFBNO_TO_AGINO(mp, blks_per_cluster, 0);
170 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES);
171
172 for (agino = irec->ir_startino;
173 agino < irec->ir_startino + XFS_INODES_PER_CHUNK;
174 agino += blks_per_cluster * mp->m_sb.sb_inopblock) {
175 fsino = XFS_AGINO_TO_INO(mp, bs->cur->bc_private.a.agno, agino);
176 chunkino = agino - irec->ir_startino;
177 agbno = XFS_AGINO_TO_AGBNO(mp, agino);
178
179 /* Compute the holemask mask for this cluster. */
180 for (clusterino = 0, holemask = 0; clusterino < nr_inodes;
181 clusterino += XFS_INODES_PER_HOLEMASK_BIT)
182 holemask |= XFS_INOBT_MASK((chunkino + clusterino) /
183 XFS_INODES_PER_HOLEMASK_BIT);
184
185 /* The whole cluster must be a hole or not a hole. */
186 ir_holemask = (irec->ir_holemask & holemask);
187 if (ir_holemask != holemask && ir_holemask != 0) {
188 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
189 continue;
190 }
191
192 /* If any part of this is a hole, skip it. */
193 if (ir_holemask)
194 continue;
195
196 /* Grab the inode cluster buffer. */
197 imap.im_blkno = XFS_AGB_TO_DADDR(mp, bs->cur->bc_private.a.agno,
198 agbno);
199 imap.im_len = XFS_FSB_TO_BB(mp, blks_per_cluster);
200 imap.im_boffset = 0;
201
202 error = xfs_imap_to_bp(mp, bs->cur->bc_tp, &imap,
203 &dip, &bp, 0, 0);
204 if (!xfs_scrub_btree_process_error(bs->sc, bs->cur, 0, &error))
205 continue;
206
207 /* Which inodes are free? */
208 for (clusterino = 0; clusterino < nr_inodes; clusterino++) {
209 error = xfs_scrub_iallocbt_check_cluster_freemask(bs,
210 fsino, chunkino, clusterino, irec, bp);
211 if (error) {
212 xfs_trans_brelse(bs->cur->bc_tp, bp);
213 return error;
214 }
215 }
216
217 xfs_trans_brelse(bs->cur->bc_tp, bp);
218 }
219
220 return error;
221}
222
223/* Scrub an inobt/finobt record. */
224STATIC int
225xfs_scrub_iallocbt_rec(
226 struct xfs_scrub_btree *bs,
227 union xfs_btree_rec *rec)
228{
229 struct xfs_mount *mp = bs->cur->bc_mp;
230 struct xfs_inobt_rec_incore irec;
231 uint64_t holes;
232 xfs_agnumber_t agno = bs->cur->bc_private.a.agno;
233 xfs_agino_t agino;
234 xfs_agblock_t agbno;
235 xfs_extlen_t len;
236 int holecount;
237 int i;
238 int error = 0;
239 unsigned int real_freecount;
240 uint16_t holemask;
241
242 xfs_inobt_btrec_to_irec(mp, rec, &irec);
243
244 if (irec.ir_count > XFS_INODES_PER_CHUNK ||
245 irec.ir_freecount > XFS_INODES_PER_CHUNK)
246 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
247
248 real_freecount = irec.ir_freecount +
249 (XFS_INODES_PER_CHUNK - irec.ir_count);
250 if (real_freecount != xfs_scrub_iallocbt_freecount(irec.ir_free))
251 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
252
253 agino = irec.ir_startino;
254 /* Record has to be properly aligned within the AG. */
255 if (!xfs_verify_agino(mp, agno, agino) ||
256 !xfs_verify_agino(mp, agno, agino + XFS_INODES_PER_CHUNK - 1)) {
257 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
258 goto out;
259 }
260
261 /* Make sure this record is aligned to cluster and inoalignmnt size. */
262 agbno = XFS_AGINO_TO_AGBNO(mp, irec.ir_startino);
263 if ((agbno & (xfs_ialloc_cluster_alignment(mp) - 1)) ||
264 (agbno & (xfs_icluster_size_fsb(mp) - 1)))
265 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
266
267 /* Handle non-sparse inodes */
268 if (!xfs_inobt_issparse(irec.ir_holemask)) {
269 len = XFS_B_TO_FSB(mp,
270 XFS_INODES_PER_CHUNK * mp->m_sb.sb_inodesize);
271 if (irec.ir_count != XFS_INODES_PER_CHUNK)
272 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
273
274 if (!xfs_scrub_iallocbt_chunk(bs, &irec, agino, len))
275 goto out;
276 goto check_freemask;
277 }
278
279 /* Check each chunk of a sparse inode cluster. */
280 holemask = irec.ir_holemask;
281 holecount = 0;
282 len = XFS_B_TO_FSB(mp,
283 XFS_INODES_PER_HOLEMASK_BIT * mp->m_sb.sb_inodesize);
284 holes = ~xfs_inobt_irec_to_allocmask(&irec);
285 if ((holes & irec.ir_free) != holes ||
286 irec.ir_freecount > irec.ir_count)
287 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
288
289 for (i = 0; i < XFS_INOBT_HOLEMASK_BITS; i++) {
290 if (holemask & 1)
291 holecount += XFS_INODES_PER_HOLEMASK_BIT;
292 else if (!xfs_scrub_iallocbt_chunk(bs, &irec, agino, len))
293 break;
294 holemask >>= 1;
295 agino += XFS_INODES_PER_HOLEMASK_BIT;
296 }
297
298 if (holecount > XFS_INODES_PER_CHUNK ||
299 holecount + irec.ir_count != XFS_INODES_PER_CHUNK)
300 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
301
302check_freemask:
303 error = xfs_scrub_iallocbt_check_freemask(bs, &irec);
304 if (error)
305 goto out;
306
307out:
308 return error;
309}
310
311/* Scrub the inode btrees for some AG. */
312STATIC int
313xfs_scrub_iallocbt(
314 struct xfs_scrub_context *sc,
315 xfs_btnum_t which)
316{
317 struct xfs_btree_cur *cur;
318 struct xfs_owner_info oinfo;
319
320 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT);
321 cur = which == XFS_BTNUM_INO ? sc->sa.ino_cur : sc->sa.fino_cur;
322 return xfs_scrub_btree(sc, cur, xfs_scrub_iallocbt_rec, &oinfo, NULL);
323}
324
325int
326xfs_scrub_inobt(
327 struct xfs_scrub_context *sc)
328{
329 return xfs_scrub_iallocbt(sc, XFS_BTNUM_INO);
330}
331
332int
333xfs_scrub_finobt(
334 struct xfs_scrub_context *sc)
335{
336 return xfs_scrub_iallocbt(sc, XFS_BTNUM_FINO);
337}
diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c
new file mode 100644
index 000000000000..637b7a892313
--- /dev/null
+++ b/fs/xfs/scrub/inode.c
@@ -0,0 +1,611 @@
1/*
2 * Copyright (C) 2017 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#include "xfs.h"
21#include "xfs_fs.h"
22#include "xfs_shared.h"
23#include "xfs_format.h"
24#include "xfs_trans_resv.h"
25#include "xfs_mount.h"
26#include "xfs_defer.h"
27#include "xfs_btree.h"
28#include "xfs_bit.h"
29#include "xfs_log_format.h"
30#include "xfs_trans.h"
31#include "xfs_sb.h"
32#include "xfs_inode.h"
33#include "xfs_icache.h"
34#include "xfs_inode_buf.h"
35#include "xfs_inode_fork.h"
36#include "xfs_ialloc.h"
37#include "xfs_da_format.h"
38#include "xfs_reflink.h"
39#include "scrub/xfs_scrub.h"
40#include "scrub/scrub.h"
41#include "scrub/common.h"
42#include "scrub/trace.h"
43
44/*
45 * Grab total control of the inode metadata. It doesn't matter here if
46 * the file data is still changing; exclusive access to the metadata is
47 * the goal.
48 */
49int
50xfs_scrub_setup_inode(
51 struct xfs_scrub_context *sc,
52 struct xfs_inode *ip)
53{
54 struct xfs_mount *mp = sc->mp;
55 int error;
56
57 /*
58 * Try to get the inode. If the verifiers fail, we try again
59 * in raw mode.
60 */
61 error = xfs_scrub_get_inode(sc, ip);
62 switch (error) {
63 case 0:
64 break;
65 case -EFSCORRUPTED:
66 case -EFSBADCRC:
67 return 0;
68 default:
69 return error;
70 }
71
72 /* Got the inode, lock it and we're ready to go. */
73 sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
74 xfs_ilock(sc->ip, sc->ilock_flags);
75 error = xfs_scrub_trans_alloc(sc->sm, mp, &sc->tp);
76 if (error)
77 goto out;
78 sc->ilock_flags |= XFS_ILOCK_EXCL;
79 xfs_ilock(sc->ip, XFS_ILOCK_EXCL);
80
81out:
82 /* scrub teardown will unlock and release the inode for us */
83 return error;
84}
85
86/* Inode core */
87
88/*
89 * Validate di_extsize hint.
90 *
91 * The rules are documented at xfs_ioctl_setattr_check_extsize().
92 * These functions must be kept in sync with each other.
93 */
94STATIC void
95xfs_scrub_inode_extsize(
96 struct xfs_scrub_context *sc,
97 struct xfs_buf *bp,
98 struct xfs_dinode *dip,
99 xfs_ino_t ino,
100 uint16_t mode,
101 uint16_t flags)
102{
103 struct xfs_mount *mp = sc->mp;
104 bool rt_flag;
105 bool hint_flag;
106 bool inherit_flag;
107 uint32_t extsize;
108 uint32_t extsize_bytes;
109 uint32_t blocksize_bytes;
110
111 rt_flag = (flags & XFS_DIFLAG_REALTIME);
112 hint_flag = (flags & XFS_DIFLAG_EXTSIZE);
113 inherit_flag = (flags & XFS_DIFLAG_EXTSZINHERIT);
114 extsize = be32_to_cpu(dip->di_extsize);
115 extsize_bytes = XFS_FSB_TO_B(sc->mp, extsize);
116
117 if (rt_flag)
118 blocksize_bytes = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog;
119 else
120 blocksize_bytes = mp->m_sb.sb_blocksize;
121
122 if ((hint_flag || inherit_flag) && !(S_ISDIR(mode) || S_ISREG(mode)))
123 goto bad;
124
125 if (hint_flag && !S_ISREG(mode))
126 goto bad;
127
128 if (inherit_flag && !S_ISDIR(mode))
129 goto bad;
130
131 if ((hint_flag || inherit_flag) && extsize == 0)
132 goto bad;
133
134 if (!(hint_flag || inherit_flag) && extsize != 0)
135 goto bad;
136
137 if (extsize_bytes % blocksize_bytes)
138 goto bad;
139
140 if (extsize > MAXEXTLEN)
141 goto bad;
142
143 if (!rt_flag && extsize > mp->m_sb.sb_agblocks / 2)
144 goto bad;
145
146 return;
147bad:
148 xfs_scrub_ino_set_corrupt(sc, ino, bp);
149}
150
151/*
152 * Validate di_cowextsize hint.
153 *
154 * The rules are documented at xfs_ioctl_setattr_check_cowextsize().
155 * These functions must be kept in sync with each other.
156 */
157STATIC void
158xfs_scrub_inode_cowextsize(
159 struct xfs_scrub_context *sc,
160 struct xfs_buf *bp,
161 struct xfs_dinode *dip,
162 xfs_ino_t ino,
163 uint16_t mode,
164 uint16_t flags,
165 uint64_t flags2)
166{
167 struct xfs_mount *mp = sc->mp;
168 bool rt_flag;
169 bool hint_flag;
170 uint32_t extsize;
171 uint32_t extsize_bytes;
172
173 rt_flag = (flags & XFS_DIFLAG_REALTIME);
174 hint_flag = (flags2 & XFS_DIFLAG2_COWEXTSIZE);
175 extsize = be32_to_cpu(dip->di_cowextsize);
176 extsize_bytes = XFS_FSB_TO_B(sc->mp, extsize);
177
178 if (hint_flag && !xfs_sb_version_hasreflink(&mp->m_sb))
179 goto bad;
180
181 if (hint_flag && !(S_ISDIR(mode) || S_ISREG(mode)))
182 goto bad;
183
184 if (hint_flag && extsize == 0)
185 goto bad;
186
187 if (!hint_flag && extsize != 0)
188 goto bad;
189
190 if (hint_flag && rt_flag)
191 goto bad;
192
193 if (extsize_bytes % mp->m_sb.sb_blocksize)
194 goto bad;
195
196 if (extsize > MAXEXTLEN)
197 goto bad;
198
199 if (extsize > mp->m_sb.sb_agblocks / 2)
200 goto bad;
201
202 return;
203bad:
204 xfs_scrub_ino_set_corrupt(sc, ino, bp);
205}
206
207/* Make sure the di_flags make sense for the inode. */
208STATIC void
209xfs_scrub_inode_flags(
210 struct xfs_scrub_context *sc,
211 struct xfs_buf *bp,
212 struct xfs_dinode *dip,
213 xfs_ino_t ino,
214 uint16_t mode,
215 uint16_t flags)
216{
217 struct xfs_mount *mp = sc->mp;
218
219 if (flags & ~XFS_DIFLAG_ANY)
220 goto bad;
221
222 /* rt flags require rt device */
223 if ((flags & (XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT)) &&
224 !mp->m_rtdev_targp)
225 goto bad;
226
227 /* new rt bitmap flag only valid for rbmino */
228 if ((flags & XFS_DIFLAG_NEWRTBM) && ino != mp->m_sb.sb_rbmino)
229 goto bad;
230
231 /* directory-only flags */
232 if ((flags & (XFS_DIFLAG_RTINHERIT |
233 XFS_DIFLAG_EXTSZINHERIT |
234 XFS_DIFLAG_PROJINHERIT |
235 XFS_DIFLAG_NOSYMLINKS)) &&
236 !S_ISDIR(mode))
237 goto bad;
238
239 /* file-only flags */
240 if ((flags & (XFS_DIFLAG_REALTIME | FS_XFLAG_EXTSIZE)) &&
241 !S_ISREG(mode))
242 goto bad;
243
244 /* filestreams and rt make no sense */
245 if ((flags & XFS_DIFLAG_FILESTREAM) && (flags & XFS_DIFLAG_REALTIME))
246 goto bad;
247
248 return;
249bad:
250 xfs_scrub_ino_set_corrupt(sc, ino, bp);
251}
252
253/* Make sure the di_flags2 make sense for the inode. */
254STATIC void
255xfs_scrub_inode_flags2(
256 struct xfs_scrub_context *sc,
257 struct xfs_buf *bp,
258 struct xfs_dinode *dip,
259 xfs_ino_t ino,
260 uint16_t mode,
261 uint16_t flags,
262 uint64_t flags2)
263{
264 struct xfs_mount *mp = sc->mp;
265
266 if (flags2 & ~XFS_DIFLAG2_ANY)
267 goto bad;
268
269 /* reflink flag requires reflink feature */
270 if ((flags2 & XFS_DIFLAG2_REFLINK) &&
271 !xfs_sb_version_hasreflink(&mp->m_sb))
272 goto bad;
273
274 /* cowextsize flag is checked w.r.t. mode separately */
275
276 /* file/dir-only flags */
277 if ((flags2 & XFS_DIFLAG2_DAX) && !(S_ISREG(mode) || S_ISDIR(mode)))
278 goto bad;
279
280 /* file-only flags */
281 if ((flags2 & XFS_DIFLAG2_REFLINK) && !S_ISREG(mode))
282 goto bad;
283
284 /* realtime and reflink make no sense, currently */
285 if ((flags & XFS_DIFLAG_REALTIME) && (flags2 & XFS_DIFLAG2_REFLINK))
286 goto bad;
287
288 /* dax and reflink make no sense, currently */
289 if ((flags2 & XFS_DIFLAG2_DAX) && (flags2 & XFS_DIFLAG2_REFLINK))
290 goto bad;
291
292 return;
293bad:
294 xfs_scrub_ino_set_corrupt(sc, ino, bp);
295}
296
297/* Scrub all the ondisk inode fields. */
298STATIC void
299xfs_scrub_dinode(
300 struct xfs_scrub_context *sc,
301 struct xfs_buf *bp,
302 struct xfs_dinode *dip,
303 xfs_ino_t ino)
304{
305 struct xfs_mount *mp = sc->mp;
306 size_t fork_recs;
307 unsigned long long isize;
308 uint64_t flags2;
309 uint32_t nextents;
310 uint16_t flags;
311 uint16_t mode;
312
313 flags = be16_to_cpu(dip->di_flags);
314 if (dip->di_version >= 3)
315 flags2 = be64_to_cpu(dip->di_flags2);
316 else
317 flags2 = 0;
318
319 /* di_mode */
320 mode = be16_to_cpu(dip->di_mode);
321 if (mode & ~(S_IALLUGO | S_IFMT))
322 xfs_scrub_ino_set_corrupt(sc, ino, bp);
323
324 /* v1/v2 fields */
325 switch (dip->di_version) {
326 case 1:
327 /*
328 * We autoconvert v1 inodes into v2 inodes on writeout,
329 * so just mark this inode for preening.
330 */
331 xfs_scrub_ino_set_preen(sc, ino, bp);
332 break;
333 case 2:
334 case 3:
335 if (dip->di_onlink != 0)
336 xfs_scrub_ino_set_corrupt(sc, ino, bp);
337
338 if (dip->di_mode == 0 && sc->ip)
339 xfs_scrub_ino_set_corrupt(sc, ino, bp);
340
341 if (dip->di_projid_hi != 0 &&
342 !xfs_sb_version_hasprojid32bit(&mp->m_sb))
343 xfs_scrub_ino_set_corrupt(sc, ino, bp);
344 break;
345 default:
346 xfs_scrub_ino_set_corrupt(sc, ino, bp);
347 return;
348 }
349
350 /*
351 * di_uid/di_gid -- -1 isn't invalid, but there's no way that
352 * userspace could have created that.
353 */
354 if (dip->di_uid == cpu_to_be32(-1U) ||
355 dip->di_gid == cpu_to_be32(-1U))
356 xfs_scrub_ino_set_warning(sc, ino, bp);
357
358 /* di_format */
359 switch (dip->di_format) {
360 case XFS_DINODE_FMT_DEV:
361 if (!S_ISCHR(mode) && !S_ISBLK(mode) &&
362 !S_ISFIFO(mode) && !S_ISSOCK(mode))
363 xfs_scrub_ino_set_corrupt(sc, ino, bp);
364 break;
365 case XFS_DINODE_FMT_LOCAL:
366 if (!S_ISDIR(mode) && !S_ISLNK(mode))
367 xfs_scrub_ino_set_corrupt(sc, ino, bp);
368 break;
369 case XFS_DINODE_FMT_EXTENTS:
370 if (!S_ISREG(mode) && !S_ISDIR(mode) && !S_ISLNK(mode))
371 xfs_scrub_ino_set_corrupt(sc, ino, bp);
372 break;
373 case XFS_DINODE_FMT_BTREE:
374 if (!S_ISREG(mode) && !S_ISDIR(mode))
375 xfs_scrub_ino_set_corrupt(sc, ino, bp);
376 break;
377 case XFS_DINODE_FMT_UUID:
378 default:
379 xfs_scrub_ino_set_corrupt(sc, ino, bp);
380 break;
381 }
382
383 /*
384 * di_size. xfs_dinode_verify checks for things that screw up
385 * the VFS such as the upper bit being set and zero-length
386 * symlinks/directories, but we can do more here.
387 */
388 isize = be64_to_cpu(dip->di_size);
389 if (isize & (1ULL << 63))
390 xfs_scrub_ino_set_corrupt(sc, ino, bp);
391
392 /* Devices, fifos, and sockets must have zero size */
393 if (!S_ISDIR(mode) && !S_ISREG(mode) && !S_ISLNK(mode) && isize != 0)
394 xfs_scrub_ino_set_corrupt(sc, ino, bp);
395
396 /* Directories can't be larger than the data section size (32G) */
397 if (S_ISDIR(mode) && (isize == 0 || isize >= XFS_DIR2_SPACE_SIZE))
398 xfs_scrub_ino_set_corrupt(sc, ino, bp);
399
400 /* Symlinks can't be larger than SYMLINK_MAXLEN */
401 if (S_ISLNK(mode) && (isize == 0 || isize >= XFS_SYMLINK_MAXLEN))
402 xfs_scrub_ino_set_corrupt(sc, ino, bp);
403
404 /*
405 * Warn if the running kernel can't handle the kinds of offsets
406 * needed to deal with the file size. In other words, if the
407 * pagecache can't cache all the blocks in this file due to
408 * overly large offsets, flag the inode for admin review.
409 */
410 if (isize >= mp->m_super->s_maxbytes)
411 xfs_scrub_ino_set_warning(sc, ino, bp);
412
413 /* di_nblocks */
414 if (flags2 & XFS_DIFLAG2_REFLINK) {
415 ; /* nblocks can exceed dblocks */
416 } else if (flags & XFS_DIFLAG_REALTIME) {
417 /*
418 * nblocks is the sum of data extents (in the rtdev),
419 * attr extents (in the datadev), and both forks' bmbt
420 * blocks (in the datadev). This clumsy check is the
421 * best we can do without cross-referencing with the
422 * inode forks.
423 */
424 if (be64_to_cpu(dip->di_nblocks) >=
425 mp->m_sb.sb_dblocks + mp->m_sb.sb_rblocks)
426 xfs_scrub_ino_set_corrupt(sc, ino, bp);
427 } else {
428 if (be64_to_cpu(dip->di_nblocks) >= mp->m_sb.sb_dblocks)
429 xfs_scrub_ino_set_corrupt(sc, ino, bp);
430 }
431
432 xfs_scrub_inode_flags(sc, bp, dip, ino, mode, flags);
433
434 xfs_scrub_inode_extsize(sc, bp, dip, ino, mode, flags);
435
436 /* di_nextents */
437 nextents = be32_to_cpu(dip->di_nextents);
438 fork_recs = XFS_DFORK_DSIZE(dip, mp) / sizeof(struct xfs_bmbt_rec);
439 switch (dip->di_format) {
440 case XFS_DINODE_FMT_EXTENTS:
441 if (nextents > fork_recs)
442 xfs_scrub_ino_set_corrupt(sc, ino, bp);
443 break;
444 case XFS_DINODE_FMT_BTREE:
445 if (nextents <= fork_recs)
446 xfs_scrub_ino_set_corrupt(sc, ino, bp);
447 break;
448 default:
449 if (nextents != 0)
450 xfs_scrub_ino_set_corrupt(sc, ino, bp);
451 break;
452 }
453
454 /* di_forkoff */
455 if (XFS_DFORK_APTR(dip) >= (char *)dip + mp->m_sb.sb_inodesize)
456 xfs_scrub_ino_set_corrupt(sc, ino, bp);
457 if (dip->di_anextents != 0 && dip->di_forkoff == 0)
458 xfs_scrub_ino_set_corrupt(sc, ino, bp);
459 if (dip->di_forkoff == 0 && dip->di_aformat != XFS_DINODE_FMT_EXTENTS)
460 xfs_scrub_ino_set_corrupt(sc, ino, bp);
461
462 /* di_aformat */
463 if (dip->di_aformat != XFS_DINODE_FMT_LOCAL &&
464 dip->di_aformat != XFS_DINODE_FMT_EXTENTS &&
465 dip->di_aformat != XFS_DINODE_FMT_BTREE)
466 xfs_scrub_ino_set_corrupt(sc, ino, bp);
467
468 /* di_anextents */
469 nextents = be16_to_cpu(dip->di_anextents);
470 fork_recs = XFS_DFORK_ASIZE(dip, mp) / sizeof(struct xfs_bmbt_rec);
471 switch (dip->di_aformat) {
472 case XFS_DINODE_FMT_EXTENTS:
473 if (nextents > fork_recs)
474 xfs_scrub_ino_set_corrupt(sc, ino, bp);
475 break;
476 case XFS_DINODE_FMT_BTREE:
477 if (nextents <= fork_recs)
478 xfs_scrub_ino_set_corrupt(sc, ino, bp);
479 break;
480 default:
481 if (nextents != 0)
482 xfs_scrub_ino_set_corrupt(sc, ino, bp);
483 }
484
485 if (dip->di_version >= 3) {
486 xfs_scrub_inode_flags2(sc, bp, dip, ino, mode, flags, flags2);
487 xfs_scrub_inode_cowextsize(sc, bp, dip, ino, mode, flags,
488 flags2);
489 }
490}
491
492/* Map and read a raw inode. */
493STATIC int
494xfs_scrub_inode_map_raw(
495 struct xfs_scrub_context *sc,
496 xfs_ino_t ino,
497 struct xfs_buf **bpp,
498 struct xfs_dinode **dipp)
499{
500 struct xfs_imap imap;
501 struct xfs_mount *mp = sc->mp;
502 struct xfs_buf *bp = NULL;
503 struct xfs_dinode *dip;
504 int error;
505
506 error = xfs_imap(mp, sc->tp, ino, &imap, XFS_IGET_UNTRUSTED);
507 if (error == -EINVAL) {
508 /*
509 * Inode could have gotten deleted out from under us;
510 * just forget about it.
511 */
512 error = -ENOENT;
513 goto out;
514 }
515 if (!xfs_scrub_process_error(sc, XFS_INO_TO_AGNO(mp, ino),
516 XFS_INO_TO_AGBNO(mp, ino), &error))
517 goto out;
518
519 error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp,
520 imap.im_blkno, imap.im_len, XBF_UNMAPPED, &bp,
521 NULL);
522 if (!xfs_scrub_process_error(sc, XFS_INO_TO_AGNO(mp, ino),
523 XFS_INO_TO_AGBNO(mp, ino), &error))
524 goto out;
525
526 /*
527 * Is this really an inode? We disabled verifiers in the above
528 * xfs_trans_read_buf call because the inode buffer verifier
529 * fails on /any/ inode record in the inode cluster with a bad
530 * magic or version number, not just the one that we're
531 * checking. Therefore, grab the buffer unconditionally, attach
532 * the inode verifiers by hand, and run the inode verifier only
533 * on the one inode we want.
534 */
535 bp->b_ops = &xfs_inode_buf_ops;
536 dip = xfs_buf_offset(bp, imap.im_boffset);
537 if (!xfs_dinode_verify(mp, ino, dip) ||
538 !xfs_dinode_good_version(mp, dip->di_version)) {
539 xfs_scrub_ino_set_corrupt(sc, ino, bp);
540 goto out_buf;
541 }
542
543 /* ...and is it the one we asked for? */
544 if (be32_to_cpu(dip->di_gen) != sc->sm->sm_gen) {
545 error = -ENOENT;
546 goto out_buf;
547 }
548
549 *dipp = dip;
550 *bpp = bp;
551out:
552 return error;
553out_buf:
554 xfs_trans_brelse(sc->tp, bp);
555 return error;
556}
557
558/* Scrub an inode. */
559int
560xfs_scrub_inode(
561 struct xfs_scrub_context *sc)
562{
563 struct xfs_dinode di;
564 struct xfs_mount *mp = sc->mp;
565 struct xfs_buf *bp = NULL;
566 struct xfs_dinode *dip;
567 xfs_ino_t ino;
568
569 bool has_shared;
570 int error = 0;
571
572 /* Did we get the in-core inode, or are we doing this manually? */
573 if (sc->ip) {
574 ino = sc->ip->i_ino;
575 xfs_inode_to_disk(sc->ip, &di, 0);
576 dip = &di;
577 } else {
578 /* Map & read inode. */
579 ino = sc->sm->sm_ino;
580 error = xfs_scrub_inode_map_raw(sc, ino, &bp, &dip);
581 if (error || !bp)
582 goto out;
583 }
584
585 xfs_scrub_dinode(sc, bp, dip, ino);
586 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
587 goto out;
588
589 /* Now let's do the things that require a live inode. */
590 if (!sc->ip)
591 goto out;
592
593 /*
594 * Does this inode have the reflink flag set but no shared extents?
595 * Set the preening flag if this is the case.
596 */
597 if (xfs_is_reflink_inode(sc->ip)) {
598 error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip,
599 &has_shared);
600 if (!xfs_scrub_process_error(sc, XFS_INO_TO_AGNO(mp, ino),
601 XFS_INO_TO_AGBNO(mp, ino), &error))
602 goto out;
603 if (!has_shared)
604 xfs_scrub_ino_set_preen(sc, ino, bp);
605 }
606
607out:
608 if (bp)
609 xfs_trans_brelse(sc->tp, bp);
610 return error;
611}
diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c
new file mode 100644
index 000000000000..63a25334fc83
--- /dev/null
+++ b/fs/xfs/scrub/parent.c
@@ -0,0 +1,317 @@
1/*
2 * Copyright (C) 2017 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#include "xfs.h"
21#include "xfs_fs.h"
22#include "xfs_shared.h"
23#include "xfs_format.h"
24#include "xfs_trans_resv.h"
25#include "xfs_mount.h"
26#include "xfs_defer.h"
27#include "xfs_btree.h"
28#include "xfs_bit.h"
29#include "xfs_log_format.h"
30#include "xfs_trans.h"
31#include "xfs_sb.h"
32#include "xfs_inode.h"
33#include "xfs_icache.h"
34#include "xfs_dir2.h"
35#include "xfs_dir2_priv.h"
36#include "xfs_ialloc.h"
37#include "scrub/xfs_scrub.h"
38#include "scrub/scrub.h"
39#include "scrub/common.h"
40#include "scrub/trace.h"
41
42/* Set us up to scrub parents. */
43int
44xfs_scrub_setup_parent(
45 struct xfs_scrub_context *sc,
46 struct xfs_inode *ip)
47{
48 return xfs_scrub_setup_inode_contents(sc, ip, 0);
49}
50
51/* Parent pointers */
52
53/* Look for an entry in a parent pointing to this inode. */
54
55struct xfs_scrub_parent_ctx {
56 struct dir_context dc;
57 xfs_ino_t ino;
58 xfs_nlink_t nlink;
59};
60
61/* Look for a single entry in a directory pointing to an inode. */
62STATIC int
63xfs_scrub_parent_actor(
64 struct dir_context *dc,
65 const char *name,
66 int namelen,
67 loff_t pos,
68 u64 ino,
69 unsigned type)
70{
71 struct xfs_scrub_parent_ctx *spc;
72
73 spc = container_of(dc, struct xfs_scrub_parent_ctx, dc);
74 if (spc->ino == ino)
75 spc->nlink++;
76 return 0;
77}
78
79/* Count the number of dentries in the parent dir that point to this inode. */
80STATIC int
81xfs_scrub_parent_count_parent_dentries(
82 struct xfs_scrub_context *sc,
83 struct xfs_inode *parent,
84 xfs_nlink_t *nlink)
85{
86 struct xfs_scrub_parent_ctx spc = {
87 .dc.actor = xfs_scrub_parent_actor,
88 .dc.pos = 0,
89 .ino = sc->ip->i_ino,
90 .nlink = 0,
91 };
92 size_t bufsize;
93 loff_t oldpos;
94 uint lock_mode;
95 int error = 0;
96
97 /*
98 * If there are any blocks, read-ahead block 0 as we're almost
99 * certain to have the next operation be a read there. This is
100 * how we guarantee that the parent's extent map has been loaded,
101 * if there is one.
102 */
103 lock_mode = xfs_ilock_data_map_shared(parent);
104 if (parent->i_d.di_nextents > 0)
105 error = xfs_dir3_data_readahead(parent, 0, -1);
106 xfs_iunlock(parent, lock_mode);
107 if (error)
108 return error;
109
110 /*
111 * Iterate the parent dir to confirm that there is
112 * exactly one entry pointing back to the inode being
113 * scanned.
114 */
115 bufsize = (size_t)min_t(loff_t, XFS_READDIR_BUFSIZE,
116 parent->i_d.di_size);
117 oldpos = 0;
118 while (true) {
119 error = xfs_readdir(sc->tp, parent, &spc.dc, bufsize);
120 if (error)
121 goto out;
122 if (oldpos == spc.dc.pos)
123 break;
124 oldpos = spc.dc.pos;
125 }
126 *nlink = spc.nlink;
127out:
128 return error;
129}
130
131/*
132 * Given the inode number of the alleged parent of the inode being
133 * scrubbed, try to validate that the parent has exactly one directory
134 * entry pointing back to the inode being scrubbed.
135 */
136STATIC int
137xfs_scrub_parent_validate(
138 struct xfs_scrub_context *sc,
139 xfs_ino_t dnum,
140 bool *try_again)
141{
142 struct xfs_mount *mp = sc->mp;
143 struct xfs_inode *dp = NULL;
144 xfs_nlink_t expected_nlink;
145 xfs_nlink_t nlink;
146 int error = 0;
147
148 *try_again = false;
149
150 /* '..' must not point to ourselves. */
151 if (sc->ip->i_ino == dnum) {
152 xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
153 goto out;
154 }
155
156 /*
157 * If we're an unlinked directory, the parent /won't/ have a link
158 * to us. Otherwise, it should have one link.
159 */
160 expected_nlink = VFS_I(sc->ip)->i_nlink == 0 ? 0 : 1;
161
162 /*
163 * Grab this parent inode. We release the inode before we
164 * cancel the scrub transaction. Since we're don't know a
165 * priori that releasing the inode won't trigger eofblocks
166 * cleanup (which allocates what would be a nested transaction)
167 * if the parent pointer erroneously points to a file, we
168 * can't use DONTCACHE here because DONTCACHE inodes can trigger
169 * immediate inactive cleanup of the inode.
170 */
171 error = xfs_iget(mp, sc->tp, dnum, 0, 0, &dp);
172 if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error))
173 goto out;
174 if (dp == sc->ip) {
175 xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
176 goto out_rele;
177 }
178
179 /*
180 * We prefer to keep the inode locked while we lock and search
181 * its alleged parent for a forward reference. If we can grab
182 * the iolock, validate the pointers and we're done. We must
183 * use nowait here to avoid an ABBA deadlock on the parent and
184 * the child inodes.
185 */
186 if (xfs_ilock_nowait(dp, XFS_IOLOCK_SHARED)) {
187 error = xfs_scrub_parent_count_parent_dentries(sc, dp, &nlink);
188 if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0,
189 &error))
190 goto out_unlock;
191 if (nlink != expected_nlink)
192 xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
193 goto out_unlock;
194 }
195
196 /*
197 * The game changes if we get here. We failed to lock the parent,
198 * so we're going to try to verify both pointers while only holding
199 * one lock so as to avoid deadlocking with something that's actually
200 * trying to traverse down the directory tree.
201 */
202 xfs_iunlock(sc->ip, sc->ilock_flags);
203 sc->ilock_flags = 0;
204 xfs_ilock(dp, XFS_IOLOCK_SHARED);
205
206 /* Go looking for our dentry. */
207 error = xfs_scrub_parent_count_parent_dentries(sc, dp, &nlink);
208 if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error))
209 goto out_unlock;
210
211 /* Drop the parent lock, relock this inode. */
212 xfs_iunlock(dp, XFS_IOLOCK_SHARED);
213 sc->ilock_flags = XFS_IOLOCK_EXCL;
214 xfs_ilock(sc->ip, sc->ilock_flags);
215
216 /*
217 * If we're an unlinked directory, the parent /won't/ have a link
218 * to us. Otherwise, it should have one link. We have to re-set
219 * it here because we dropped the lock on sc->ip.
220 */
221 expected_nlink = VFS_I(sc->ip)->i_nlink == 0 ? 0 : 1;
222
223 /* Look up '..' to see if the inode changed. */
224 error = xfs_dir_lookup(sc->tp, sc->ip, &xfs_name_dotdot, &dnum, NULL);
225 if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error))
226 goto out_rele;
227
228 /* Drat, parent changed. Try again! */
229 if (dnum != dp->i_ino) {
230 iput(VFS_I(dp));
231 *try_again = true;
232 return 0;
233 }
234 iput(VFS_I(dp));
235
236 /*
237 * '..' didn't change, so check that there was only one entry
238 * for us in the parent.
239 */
240 if (nlink != expected_nlink)
241 xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
242 return error;
243
244out_unlock:
245 xfs_iunlock(dp, XFS_IOLOCK_SHARED);
246out_rele:
247 iput(VFS_I(dp));
248out:
249 return error;
250}
251
252/* Scrub a parent pointer. */
253int
254xfs_scrub_parent(
255 struct xfs_scrub_context *sc)
256{
257 struct xfs_mount *mp = sc->mp;
258 xfs_ino_t dnum;
259 bool try_again;
260 int tries = 0;
261 int error = 0;
262
263 /*
264 * If we're a directory, check that the '..' link points up to
265 * a directory that has one entry pointing to us.
266 */
267 if (!S_ISDIR(VFS_I(sc->ip)->i_mode))
268 return -ENOENT;
269
270 /* We're not a special inode, are we? */
271 if (!xfs_verify_dir_ino(mp, sc->ip->i_ino)) {
272 xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
273 goto out;
274 }
275
276 /*
277 * The VFS grabs a read or write lock via i_rwsem before it reads
278 * or writes to a directory. If we've gotten this far we've
279 * already obtained IOLOCK_EXCL, which (since 4.10) is the same as
280 * getting a write lock on i_rwsem. Therefore, it is safe for us
281 * to drop the ILOCK here in order to do directory lookups.
282 */
283 sc->ilock_flags &= ~(XFS_ILOCK_EXCL | XFS_MMAPLOCK_EXCL);
284 xfs_iunlock(sc->ip, XFS_ILOCK_EXCL | XFS_MMAPLOCK_EXCL);
285
286 /* Look up '..' */
287 error = xfs_dir_lookup(sc->tp, sc->ip, &xfs_name_dotdot, &dnum, NULL);
288 if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error))
289 goto out;
290 if (!xfs_verify_dir_ino(mp, dnum)) {
291 xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
292 goto out;
293 }
294
295 /* Is this the root dir? Then '..' must point to itself. */
296 if (sc->ip == mp->m_rootip) {
297 if (sc->ip->i_ino != mp->m_sb.sb_rootino ||
298 sc->ip->i_ino != dnum)
299 xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
300 goto out;
301 }
302
303 do {
304 error = xfs_scrub_parent_validate(sc, dnum, &try_again);
305 if (error)
306 goto out;
307 } while (try_again && ++tries < 20);
308
309 /*
310 * We gave it our best shot but failed, so mark this scrub
311 * incomplete. Userspace can decide if it wants to try again.
312 */
313 if (try_again && tries == 20)
314 xfs_scrub_set_incomplete(sc);
315out:
316 return error;
317}
diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c
new file mode 100644
index 000000000000..8e58ba842946
--- /dev/null
+++ b/fs/xfs/scrub/quota.c
@@ -0,0 +1,304 @@
1/*
2 * Copyright (C) 2017 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#include "xfs.h"
21#include "xfs_fs.h"
22#include "xfs_shared.h"
23#include "xfs_format.h"
24#include "xfs_trans_resv.h"
25#include "xfs_mount.h"
26#include "xfs_defer.h"
27#include "xfs_btree.h"
28#include "xfs_bit.h"
29#include "xfs_log_format.h"
30#include "xfs_trans.h"
31#include "xfs_sb.h"
32#include "xfs_inode.h"
33#include "xfs_inode_fork.h"
34#include "xfs_alloc.h"
35#include "xfs_bmap.h"
36#include "xfs_quota.h"
37#include "xfs_qm.h"
38#include "xfs_dquot.h"
39#include "xfs_dquot_item.h"
40#include "scrub/xfs_scrub.h"
41#include "scrub/scrub.h"
42#include "scrub/common.h"
43#include "scrub/trace.h"
44
45/* Convert a scrub type code to a DQ flag, or return 0 if error. */
46static inline uint
47xfs_scrub_quota_to_dqtype(
48 struct xfs_scrub_context *sc)
49{
50 switch (sc->sm->sm_type) {
51 case XFS_SCRUB_TYPE_UQUOTA:
52 return XFS_DQ_USER;
53 case XFS_SCRUB_TYPE_GQUOTA:
54 return XFS_DQ_GROUP;
55 case XFS_SCRUB_TYPE_PQUOTA:
56 return XFS_DQ_PROJ;
57 default:
58 return 0;
59 }
60}
61
62/* Set us up to scrub a quota. */
63int
64xfs_scrub_setup_quota(
65 struct xfs_scrub_context *sc,
66 struct xfs_inode *ip)
67{
68 uint dqtype;
69
70 /*
71 * If userspace gave us an AG number or inode data, they don't
72 * know what they're doing. Get out.
73 */
74 if (sc->sm->sm_agno || sc->sm->sm_ino || sc->sm->sm_gen)
75 return -EINVAL;
76
77 dqtype = xfs_scrub_quota_to_dqtype(sc);
78 if (dqtype == 0)
79 return -EINVAL;
80 if (!xfs_this_quota_on(sc->mp, dqtype))
81 return -ENOENT;
82 return 0;
83}
84
85/* Quotas. */
86
87/* Scrub the fields in an individual quota item. */
88STATIC void
89xfs_scrub_quota_item(
90 struct xfs_scrub_context *sc,
91 uint dqtype,
92 struct xfs_dquot *dq,
93 xfs_dqid_t id)
94{
95 struct xfs_mount *mp = sc->mp;
96 struct xfs_disk_dquot *d = &dq->q_core;
97 struct xfs_quotainfo *qi = mp->m_quotainfo;
98 xfs_fileoff_t offset;
99 unsigned long long bsoft;
100 unsigned long long isoft;
101 unsigned long long rsoft;
102 unsigned long long bhard;
103 unsigned long long ihard;
104 unsigned long long rhard;
105 unsigned long long bcount;
106 unsigned long long icount;
107 unsigned long long rcount;
108 xfs_ino_t fs_icount;
109
110 offset = id * qi->qi_dqperchunk;
111
112 /*
113 * We fed $id and DQNEXT into the xfs_qm_dqget call, which means
114 * that the actual dquot we got must either have the same id or
115 * the next higher id.
116 */
117 if (id > be32_to_cpu(d->d_id))
118 xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
119
120 /* Did we get the dquot type we wanted? */
121 if (dqtype != (d->d_flags & XFS_DQ_ALLTYPES))
122 xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
123
124 if (d->d_pad0 != cpu_to_be32(0) || d->d_pad != cpu_to_be16(0))
125 xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
126
127 /* Check the limits. */
128 bhard = be64_to_cpu(d->d_blk_hardlimit);
129 ihard = be64_to_cpu(d->d_ino_hardlimit);
130 rhard = be64_to_cpu(d->d_rtb_hardlimit);
131
132 bsoft = be64_to_cpu(d->d_blk_softlimit);
133 isoft = be64_to_cpu(d->d_ino_softlimit);
134 rsoft = be64_to_cpu(d->d_rtb_softlimit);
135
136 /*
137 * Warn if the hard limits are larger than the fs.
138 * Administrators can do this, though in production this seems
139 * suspect, which is why we flag it for review.
140 *
141 * Complain about corruption if the soft limit is greater than
142 * the hard limit.
143 */
144 if (bhard > mp->m_sb.sb_dblocks)
145 xfs_scrub_fblock_set_warning(sc, XFS_DATA_FORK, offset);
146 if (bsoft > bhard)
147 xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
148
149 if (ihard > mp->m_maxicount)
150 xfs_scrub_fblock_set_warning(sc, XFS_DATA_FORK, offset);
151 if (isoft > ihard)
152 xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
153
154 if (rhard > mp->m_sb.sb_rblocks)
155 xfs_scrub_fblock_set_warning(sc, XFS_DATA_FORK, offset);
156 if (rsoft > rhard)
157 xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
158
159 /* Check the resource counts. */
160 bcount = be64_to_cpu(d->d_bcount);
161 icount = be64_to_cpu(d->d_icount);
162 rcount = be64_to_cpu(d->d_rtbcount);
163 fs_icount = percpu_counter_sum(&mp->m_icount);
164
165 /*
166 * Check that usage doesn't exceed physical limits. However, on
167 * a reflink filesystem we're allowed to exceed physical space
168 * if there are no quota limits.
169 */
170 if (xfs_sb_version_hasreflink(&mp->m_sb)) {
171 if (mp->m_sb.sb_dblocks < bcount)
172 xfs_scrub_fblock_set_warning(sc, XFS_DATA_FORK,
173 offset);
174 } else {
175 if (mp->m_sb.sb_dblocks < bcount)
176 xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK,
177 offset);
178 }
179 if (icount > fs_icount || rcount > mp->m_sb.sb_rblocks)
180 xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
181
182 /*
183 * We can violate the hard limits if the admin suddenly sets a
184 * lower limit than the actual usage. However, we flag it for
185 * admin review.
186 */
187 if (id != 0 && bhard != 0 && bcount > bhard)
188 xfs_scrub_fblock_set_warning(sc, XFS_DATA_FORK, offset);
189 if (id != 0 && ihard != 0 && icount > ihard)
190 xfs_scrub_fblock_set_warning(sc, XFS_DATA_FORK, offset);
191 if (id != 0 && rhard != 0 && rcount > rhard)
192 xfs_scrub_fblock_set_warning(sc, XFS_DATA_FORK, offset);
193}
194
195/* Scrub all of a quota type's items. */
196int
197xfs_scrub_quota(
198 struct xfs_scrub_context *sc)
199{
200 struct xfs_bmbt_irec irec = { 0 };
201 struct xfs_mount *mp = sc->mp;
202 struct xfs_inode *ip;
203 struct xfs_quotainfo *qi = mp->m_quotainfo;
204 struct xfs_dquot *dq;
205 xfs_fileoff_t max_dqid_off;
206 xfs_fileoff_t off = 0;
207 xfs_dqid_t id = 0;
208 uint dqtype;
209 int nimaps;
210 int error;
211
212 if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
213 return -ENOENT;
214
215 mutex_lock(&qi->qi_quotaofflock);
216 dqtype = xfs_scrub_quota_to_dqtype(sc);
217 if (!xfs_this_quota_on(sc->mp, dqtype)) {
218 error = -ENOENT;
219 goto out_unlock_quota;
220 }
221
222 /* Attach to the quota inode and set sc->ip so that reporting works. */
223 ip = xfs_quota_inode(sc->mp, dqtype);
224 sc->ip = ip;
225
226 /* Look for problem extents. */
227 xfs_ilock(ip, XFS_ILOCK_EXCL);
228 if (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) {
229 xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino, NULL);
230 goto out_unlock_inode;
231 }
232 max_dqid_off = ((xfs_dqid_t)-1) / qi->qi_dqperchunk;
233 while (1) {
234 if (xfs_scrub_should_terminate(sc, &error))
235 break;
236
237 off = irec.br_startoff + irec.br_blockcount;
238 nimaps = 1;
239 error = xfs_bmapi_read(ip, off, -1, &irec, &nimaps,
240 XFS_BMAPI_ENTIRE);
241 if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, off,
242 &error))
243 goto out_unlock_inode;
244 if (!nimaps)
245 break;
246 if (irec.br_startblock == HOLESTARTBLOCK)
247 continue;
248
249 /* Check the extent record doesn't point to crap. */
250 if (irec.br_startblock + irec.br_blockcount <=
251 irec.br_startblock)
252 xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK,
253 irec.br_startoff);
254 if (!xfs_verify_fsbno(mp, irec.br_startblock) ||
255 !xfs_verify_fsbno(mp, irec.br_startblock +
256 irec.br_blockcount - 1))
257 xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK,
258 irec.br_startoff);
259
260 /*
261 * Unwritten extents or blocks mapped above the highest
262 * quota id shouldn't happen.
263 */
264 if (isnullstartblock(irec.br_startblock) ||
265 irec.br_startoff > max_dqid_off ||
266 irec.br_startoff + irec.br_blockcount > max_dqid_off + 1)
267 xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, off);
268 }
269 xfs_iunlock(ip, XFS_ILOCK_EXCL);
270 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
271 goto out;
272
273 /* Check all the quota items. */
274 while (id < ((xfs_dqid_t)-1ULL)) {
275 if (xfs_scrub_should_terminate(sc, &error))
276 break;
277
278 error = xfs_qm_dqget(mp, NULL, id, dqtype, XFS_QMOPT_DQNEXT,
279 &dq);
280 if (error == -ENOENT)
281 break;
282 if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK,
283 id * qi->qi_dqperchunk, &error))
284 break;
285
286 xfs_scrub_quota_item(sc, dqtype, dq, id);
287
288 id = be32_to_cpu(dq->q_core.d_id) + 1;
289 xfs_qm_dqput(dq);
290 if (!id)
291 break;
292 }
293
294out:
295 /* We set sc->ip earlier, so make sure we clear it now. */
296 sc->ip = NULL;
297out_unlock_quota:
298 mutex_unlock(&qi->qi_quotaofflock);
299 return error;
300
301out_unlock_inode:
302 xfs_iunlock(ip, XFS_ILOCK_EXCL);
303 goto out;
304}
diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c
new file mode 100644
index 000000000000..2f88a8d44bd0
--- /dev/null
+++ b/fs/xfs/scrub/refcount.c
@@ -0,0 +1,99 @@
1/*
2 * Copyright (C) 2017 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#include "xfs.h"
21#include "xfs_fs.h"
22#include "xfs_shared.h"
23#include "xfs_format.h"
24#include "xfs_trans_resv.h"
25#include "xfs_mount.h"
26#include "xfs_defer.h"
27#include "xfs_btree.h"
28#include "xfs_bit.h"
29#include "xfs_log_format.h"
30#include "xfs_trans.h"
31#include "xfs_sb.h"
32#include "xfs_alloc.h"
33#include "xfs_rmap.h"
34#include "scrub/xfs_scrub.h"
35#include "scrub/scrub.h"
36#include "scrub/common.h"
37#include "scrub/btree.h"
38#include "scrub/trace.h"
39
40/*
41 * Set us up to scrub reference count btrees.
42 */
43int
44xfs_scrub_setup_ag_refcountbt(
45 struct xfs_scrub_context *sc,
46 struct xfs_inode *ip)
47{
48 return xfs_scrub_setup_ag_btree(sc, ip, false);
49}
50
51/* Reference count btree scrubber. */
52
53/* Scrub a refcountbt record. */
54STATIC int
55xfs_scrub_refcountbt_rec(
56 struct xfs_scrub_btree *bs,
57 union xfs_btree_rec *rec)
58{
59 struct xfs_mount *mp = bs->cur->bc_mp;
60 xfs_agnumber_t agno = bs->cur->bc_private.a.agno;
61 xfs_agblock_t bno;
62 xfs_extlen_t len;
63 xfs_nlink_t refcount;
64 bool has_cowflag;
65 int error = 0;
66
67 bno = be32_to_cpu(rec->refc.rc_startblock);
68 len = be32_to_cpu(rec->refc.rc_blockcount);
69 refcount = be32_to_cpu(rec->refc.rc_refcount);
70
71 /* Only CoW records can have refcount == 1. */
72 has_cowflag = (bno & XFS_REFC_COW_START);
73 if ((refcount == 1 && !has_cowflag) || (refcount != 1 && has_cowflag))
74 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
75
76 /* Check the extent. */
77 bno &= ~XFS_REFC_COW_START;
78 if (bno + len <= bno ||
79 !xfs_verify_agbno(mp, agno, bno) ||
80 !xfs_verify_agbno(mp, agno, bno + len - 1))
81 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
82
83 if (refcount == 0)
84 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
85
86 return error;
87}
88
89/* Scrub the refcount btree for some AG. */
90int
91xfs_scrub_refcountbt(
92 struct xfs_scrub_context *sc)
93{
94 struct xfs_owner_info oinfo;
95
96 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_REFC);
97 return xfs_scrub_btree(sc, sc->sa.refc_cur, xfs_scrub_refcountbt_rec,
98 &oinfo, NULL);
99}
diff --git a/fs/xfs/scrub/rmap.c b/fs/xfs/scrub/rmap.c
new file mode 100644
index 000000000000..97846c424690
--- /dev/null
+++ b/fs/xfs/scrub/rmap.c
@@ -0,0 +1,138 @@
1/*
2 * Copyright (C) 2017 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#include "xfs.h"
21#include "xfs_fs.h"
22#include "xfs_shared.h"
23#include "xfs_format.h"
24#include "xfs_trans_resv.h"
25#include "xfs_mount.h"
26#include "xfs_defer.h"
27#include "xfs_btree.h"
28#include "xfs_bit.h"
29#include "xfs_log_format.h"
30#include "xfs_trans.h"
31#include "xfs_sb.h"
32#include "xfs_alloc.h"
33#include "xfs_ialloc.h"
34#include "xfs_rmap.h"
35#include "scrub/xfs_scrub.h"
36#include "scrub/scrub.h"
37#include "scrub/common.h"
38#include "scrub/btree.h"
39#include "scrub/trace.h"
40
41/*
42 * Set us up to scrub reverse mapping btrees.
43 */
44int
45xfs_scrub_setup_ag_rmapbt(
46 struct xfs_scrub_context *sc,
47 struct xfs_inode *ip)
48{
49 return xfs_scrub_setup_ag_btree(sc, ip, false);
50}
51
52/* Reverse-mapping scrubber. */
53
54/* Scrub an rmapbt record. */
55STATIC int
56xfs_scrub_rmapbt_rec(
57 struct xfs_scrub_btree *bs,
58 union xfs_btree_rec *rec)
59{
60 struct xfs_mount *mp = bs->cur->bc_mp;
61 struct xfs_rmap_irec irec;
62 xfs_agnumber_t agno = bs->cur->bc_private.a.agno;
63 bool non_inode;
64 bool is_unwritten;
65 bool is_bmbt;
66 bool is_attr;
67 int error;
68
69 error = xfs_rmap_btrec_to_irec(rec, &irec);
70 if (!xfs_scrub_btree_process_error(bs->sc, bs->cur, 0, &error))
71 goto out;
72
73 /* Check extent. */
74 if (irec.rm_startblock + irec.rm_blockcount <= irec.rm_startblock)
75 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
76
77 if (irec.rm_owner == XFS_RMAP_OWN_FS) {
78 /*
79 * xfs_verify_agbno returns false for static fs metadata.
80 * Since that only exists at the start of the AG, validate
81 * that by hand.
82 */
83 if (irec.rm_startblock != 0 ||
84 irec.rm_blockcount != XFS_AGFL_BLOCK(mp) + 1)
85 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
86 } else {
87 /*
88 * Otherwise we must point somewhere past the static metadata
89 * but before the end of the FS. Run the regular check.
90 */
91 if (!xfs_verify_agbno(mp, agno, irec.rm_startblock) ||
92 !xfs_verify_agbno(mp, agno, irec.rm_startblock +
93 irec.rm_blockcount - 1))
94 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
95 }
96
97 /* Check flags. */
98 non_inode = XFS_RMAP_NON_INODE_OWNER(irec.rm_owner);
99 is_bmbt = irec.rm_flags & XFS_RMAP_BMBT_BLOCK;
100 is_attr = irec.rm_flags & XFS_RMAP_ATTR_FORK;
101 is_unwritten = irec.rm_flags & XFS_RMAP_UNWRITTEN;
102
103 if (is_bmbt && irec.rm_offset != 0)
104 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
105
106 if (non_inode && irec.rm_offset != 0)
107 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
108
109 if (is_unwritten && (is_bmbt || non_inode || is_attr))
110 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
111
112 if (non_inode && (is_bmbt || is_unwritten || is_attr))
113 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
114
115 if (!non_inode) {
116 if (!xfs_verify_ino(mp, irec.rm_owner))
117 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
118 } else {
119 /* Non-inode owner within the magic values? */
120 if (irec.rm_owner <= XFS_RMAP_OWN_MIN ||
121 irec.rm_owner > XFS_RMAP_OWN_FS)
122 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
123 }
124out:
125 return error;
126}
127
128/* Scrub the rmap btree for some AG. */
129int
130xfs_scrub_rmapbt(
131 struct xfs_scrub_context *sc)
132{
133 struct xfs_owner_info oinfo;
134
135 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG);
136 return xfs_scrub_btree(sc, sc->sa.rmap_cur, xfs_scrub_rmapbt_rec,
137 &oinfo, NULL);
138}
diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c
new file mode 100644
index 000000000000..c6fedb698008
--- /dev/null
+++ b/fs/xfs/scrub/rtbitmap.c
@@ -0,0 +1,108 @@
1/*
2 * Copyright (C) 2017 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#include "xfs.h"
21#include "xfs_fs.h"
22#include "xfs_shared.h"
23#include "xfs_format.h"
24#include "xfs_trans_resv.h"
25#include "xfs_mount.h"
26#include "xfs_defer.h"
27#include "xfs_btree.h"
28#include "xfs_bit.h"
29#include "xfs_log_format.h"
30#include "xfs_trans.h"
31#include "xfs_sb.h"
32#include "xfs_alloc.h"
33#include "xfs_rtalloc.h"
34#include "xfs_inode.h"
35#include "scrub/xfs_scrub.h"
36#include "scrub/scrub.h"
37#include "scrub/common.h"
38#include "scrub/trace.h"
39
40/* Set us up with the realtime metadata locked. */
41int
42xfs_scrub_setup_rt(
43 struct xfs_scrub_context *sc,
44 struct xfs_inode *ip)
45{
46 struct xfs_mount *mp = sc->mp;
47 int error = 0;
48
49 /*
50 * If userspace gave us an AG number or inode data, they don't
51 * know what they're doing. Get out.
52 */
53 if (sc->sm->sm_agno || sc->sm->sm_ino || sc->sm->sm_gen)
54 return -EINVAL;
55
56 error = xfs_scrub_setup_fs(sc, ip);
57 if (error)
58 return error;
59
60 sc->ilock_flags = XFS_ILOCK_EXCL | XFS_ILOCK_RTBITMAP;
61 sc->ip = mp->m_rbmip;
62 xfs_ilock(sc->ip, sc->ilock_flags);
63
64 return 0;
65}
66
67/* Realtime bitmap. */
68
69/* Scrub a free extent record from the realtime bitmap. */
70STATIC int
71xfs_scrub_rtbitmap_rec(
72 struct xfs_trans *tp,
73 struct xfs_rtalloc_rec *rec,
74 void *priv)
75{
76 struct xfs_scrub_context *sc = priv;
77
78 if (rec->ar_startblock + rec->ar_blockcount <= rec->ar_startblock ||
79 !xfs_verify_rtbno(sc->mp, rec->ar_startblock) ||
80 !xfs_verify_rtbno(sc->mp, rec->ar_startblock +
81 rec->ar_blockcount - 1))
82 xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
83 return 0;
84}
85
86/* Scrub the realtime bitmap. */
87int
88xfs_scrub_rtbitmap(
89 struct xfs_scrub_context *sc)
90{
91 int error;
92
93 error = xfs_rtalloc_query_all(sc->tp, xfs_scrub_rtbitmap_rec, sc);
94 if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error))
95 goto out;
96
97out:
98 return error;
99}
100
101/* Scrub the realtime summary. */
102int
103xfs_scrub_rtsummary(
104 struct xfs_scrub_context *sc)
105{
106 /* XXX: implement this some day */
107 return -ENOENT;
108}
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
new file mode 100644
index 000000000000..9c42c4efd01e
--- /dev/null
+++ b/fs/xfs/scrub/scrub.c
@@ -0,0 +1,392 @@
1/*
2 * Copyright (C) 2017 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#include "xfs.h"
21#include "xfs_fs.h"
22#include "xfs_shared.h"
23#include "xfs_format.h"
24#include "xfs_trans_resv.h"
25#include "xfs_mount.h"
26#include "xfs_defer.h"
27#include "xfs_btree.h"
28#include "xfs_bit.h"
29#include "xfs_log_format.h"
30#include "xfs_trans.h"
31#include "xfs_sb.h"
32#include "xfs_inode.h"
33#include "xfs_icache.h"
34#include "xfs_itable.h"
35#include "xfs_alloc.h"
36#include "xfs_alloc_btree.h"
37#include "xfs_bmap.h"
38#include "xfs_bmap_btree.h"
39#include "xfs_ialloc.h"
40#include "xfs_ialloc_btree.h"
41#include "xfs_refcount.h"
42#include "xfs_refcount_btree.h"
43#include "xfs_rmap.h"
44#include "xfs_rmap_btree.h"
45#include "scrub/xfs_scrub.h"
46#include "scrub/scrub.h"
47#include "scrub/common.h"
48#include "scrub/trace.h"
49#include "scrub/scrub.h"
50#include "scrub/btree.h"
51
52/*
53 * Online Scrub and Repair
54 *
55 * Traditionally, XFS (the kernel driver) did not know how to check or
56 * repair on-disk data structures. That task was left to the xfs_check
57 * and xfs_repair tools, both of which require taking the filesystem
58 * offline for a thorough but time consuming examination. Online
59 * scrub & repair, on the other hand, enables us to check the metadata
60 * for obvious errors while carefully stepping around the filesystem's
61 * ongoing operations, locking rules, etc.
62 *
63 * Given that most XFS metadata consist of records stored in a btree,
64 * most of the checking functions iterate the btree blocks themselves
65 * looking for irregularities. When a record block is encountered, each
66 * record can be checked for obviously bad values. Record values can
67 * also be cross-referenced against other btrees to look for potential
68 * misunderstandings between pieces of metadata.
69 *
70 * It is expected that the checkers responsible for per-AG metadata
71 * structures will lock the AG headers (AGI, AGF, AGFL), iterate the
72 * metadata structure, and perform any relevant cross-referencing before
73 * unlocking the AG and returning the results to userspace. These
74 * scrubbers must not keep an AG locked for too long to avoid tying up
75 * the block and inode allocators.
76 *
77 * Block maps and b-trees rooted in an inode present a special challenge
78 * because they can involve extents from any AG. The general scrubber
79 * structure of lock -> check -> xref -> unlock still holds, but AG
80 * locking order rules /must/ be obeyed to avoid deadlocks. The
81 * ordering rule, of course, is that we must lock in increasing AG
82 * order. Helper functions are provided to track which AG headers we've
83 * already locked. If we detect an imminent locking order violation, we
84 * can signal a potential deadlock, in which case the scrubber can jump
85 * out to the top level, lock all the AGs in order, and retry the scrub.
86 *
87 * For file data (directories, extended attributes, symlinks) scrub, we
88 * can simply lock the inode and walk the data. For btree data
89 * (directories and attributes) we follow the same btree-scrubbing
90 * strategy outlined previously to check the records.
91 *
92 * We use a bit of trickery with transactions to avoid buffer deadlocks
93 * if there is a cycle in the metadata. The basic problem is that
94 * travelling down a btree involves locking the current buffer at each
95 * tree level. If a pointer should somehow point back to a buffer that
96 * we've already examined, we will deadlock due to the second buffer
97 * locking attempt. Note however that grabbing a buffer in transaction
98 * context links the locked buffer to the transaction. If we try to
99 * re-grab the buffer in the context of the same transaction, we avoid
100 * the second lock attempt and continue. Between the verifier and the
101 * scrubber, something will notice that something is amiss and report
102 * the corruption. Therefore, each scrubber will allocate an empty
103 * transaction, attach buffers to it, and cancel the transaction at the
104 * end of the scrub run. Cancelling a non-dirty transaction simply
105 * unlocks the buffers.
106 *
107 * There are four pieces of data that scrub can communicate to
108 * userspace. The first is the error code (errno), which can be used to
109 * communicate operational errors in performing the scrub. There are
110 * also three flags that can be set in the scrub context. If the data
111 * structure itself is corrupt, the CORRUPT flag will be set. If
112 * the metadata is correct but otherwise suboptimal, the PREEN flag
113 * will be set.
114 */
115
116/*
117 * Scrub probe -- userspace uses this to probe if we're willing to scrub
118 * or repair a given mountpoint. This will be used by xfs_scrub to
119 * probe the kernel's abilities to scrub (and repair) the metadata. We
120 * do this by validating the ioctl inputs from userspace, preparing the
121 * filesystem for a scrub (or a repair) operation, and immediately
122 * returning to userspace. Userspace can use the returned errno and
123 * structure state to decide (in broad terms) if scrub/repair are
124 * supported by the running kernel.
125 */
126static int
127xfs_scrub_probe(
128 struct xfs_scrub_context *sc)
129{
130 int error = 0;
131
132 if (sc->sm->sm_ino || sc->sm->sm_agno)
133 return -EINVAL;
134 if (xfs_scrub_should_terminate(sc, &error))
135 return error;
136
137 return 0;
138}
139
140/* Scrub setup and teardown */
141
142/* Free all the resources and finish the transactions. */
143STATIC int
144xfs_scrub_teardown(
145 struct xfs_scrub_context *sc,
146 struct xfs_inode *ip_in,
147 int error)
148{
149 xfs_scrub_ag_free(sc, &sc->sa);
150 if (sc->tp) {
151 xfs_trans_cancel(sc->tp);
152 sc->tp = NULL;
153 }
154 if (sc->ip) {
155 xfs_iunlock(sc->ip, sc->ilock_flags);
156 if (sc->ip != ip_in &&
157 !xfs_internal_inum(sc->mp, sc->ip->i_ino))
158 iput(VFS_I(sc->ip));
159 sc->ip = NULL;
160 }
161 if (sc->buf) {
162 kmem_free(sc->buf);
163 sc->buf = NULL;
164 }
165 return error;
166}
167
168/* Scrubbing dispatch. */
169
170static const struct xfs_scrub_meta_ops meta_scrub_ops[] = {
171 { /* ioctl presence test */
172 .setup = xfs_scrub_setup_fs,
173 .scrub = xfs_scrub_probe,
174 },
175 { /* superblock */
176 .setup = xfs_scrub_setup_ag_header,
177 .scrub = xfs_scrub_superblock,
178 },
179 { /* agf */
180 .setup = xfs_scrub_setup_ag_header,
181 .scrub = xfs_scrub_agf,
182 },
183 { /* agfl */
184 .setup = xfs_scrub_setup_ag_header,
185 .scrub = xfs_scrub_agfl,
186 },
187 { /* agi */
188 .setup = xfs_scrub_setup_ag_header,
189 .scrub = xfs_scrub_agi,
190 },
191 { /* bnobt */
192 .setup = xfs_scrub_setup_ag_allocbt,
193 .scrub = xfs_scrub_bnobt,
194 },
195 { /* cntbt */
196 .setup = xfs_scrub_setup_ag_allocbt,
197 .scrub = xfs_scrub_cntbt,
198 },
199 { /* inobt */
200 .setup = xfs_scrub_setup_ag_iallocbt,
201 .scrub = xfs_scrub_inobt,
202 },
203 { /* finobt */
204 .setup = xfs_scrub_setup_ag_iallocbt,
205 .scrub = xfs_scrub_finobt,
206 .has = xfs_sb_version_hasfinobt,
207 },
208 { /* rmapbt */
209 .setup = xfs_scrub_setup_ag_rmapbt,
210 .scrub = xfs_scrub_rmapbt,
211 .has = xfs_sb_version_hasrmapbt,
212 },
213 { /* refcountbt */
214 .setup = xfs_scrub_setup_ag_refcountbt,
215 .scrub = xfs_scrub_refcountbt,
216 .has = xfs_sb_version_hasreflink,
217 },
218 { /* inode record */
219 .setup = xfs_scrub_setup_inode,
220 .scrub = xfs_scrub_inode,
221 },
222 { /* inode data fork */
223 .setup = xfs_scrub_setup_inode_bmap,
224 .scrub = xfs_scrub_bmap_data,
225 },
226 { /* inode attr fork */
227 .setup = xfs_scrub_setup_inode_bmap,
228 .scrub = xfs_scrub_bmap_attr,
229 },
230 { /* inode CoW fork */
231 .setup = xfs_scrub_setup_inode_bmap,
232 .scrub = xfs_scrub_bmap_cow,
233 },
234 { /* directory */
235 .setup = xfs_scrub_setup_directory,
236 .scrub = xfs_scrub_directory,
237 },
238 { /* extended attributes */
239 .setup = xfs_scrub_setup_xattr,
240 .scrub = xfs_scrub_xattr,
241 },
242 { /* symbolic link */
243 .setup = xfs_scrub_setup_symlink,
244 .scrub = xfs_scrub_symlink,
245 },
246 { /* parent pointers */
247 .setup = xfs_scrub_setup_parent,
248 .scrub = xfs_scrub_parent,
249 },
250 { /* realtime bitmap */
251 .setup = xfs_scrub_setup_rt,
252 .scrub = xfs_scrub_rtbitmap,
253 .has = xfs_sb_version_hasrealtime,
254 },
255 { /* realtime summary */
256 .setup = xfs_scrub_setup_rt,
257 .scrub = xfs_scrub_rtsummary,
258 .has = xfs_sb_version_hasrealtime,
259 },
260 { /* user quota */
261 .setup = xfs_scrub_setup_quota,
262 .scrub = xfs_scrub_quota,
263 },
264 { /* group quota */
265 .setup = xfs_scrub_setup_quota,
266 .scrub = xfs_scrub_quota,
267 },
268 { /* project quota */
269 .setup = xfs_scrub_setup_quota,
270 .scrub = xfs_scrub_quota,
271 },
272};
273
274/* This isn't a stable feature, warn once per day. */
275static inline void
276xfs_scrub_experimental_warning(
277 struct xfs_mount *mp)
278{
279 static struct ratelimit_state scrub_warning = RATELIMIT_STATE_INIT(
280 "xfs_scrub_warning", 86400 * HZ, 1);
281 ratelimit_set_flags(&scrub_warning, RATELIMIT_MSG_ON_RELEASE);
282
283 if (__ratelimit(&scrub_warning))
284 xfs_alert(mp,
285"EXPERIMENTAL online scrub feature in use. Use at your own risk!");
286}
287
288/* Dispatch metadata scrubbing. */
289int
290xfs_scrub_metadata(
291 struct xfs_inode *ip,
292 struct xfs_scrub_metadata *sm)
293{
294 struct xfs_scrub_context sc;
295 struct xfs_mount *mp = ip->i_mount;
296 const struct xfs_scrub_meta_ops *ops;
297 bool try_harder = false;
298 int error = 0;
299
300 trace_xfs_scrub_start(ip, sm, error);
301
302 /* Forbidden if we are shut down or mounted norecovery. */
303 error = -ESHUTDOWN;
304 if (XFS_FORCED_SHUTDOWN(mp))
305 goto out;
306 error = -ENOTRECOVERABLE;
307 if (mp->m_flags & XFS_MOUNT_NORECOVERY)
308 goto out;
309
310 /* Check our inputs. */
311 error = -EINVAL;
312 sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
313 if (sm->sm_flags & ~XFS_SCRUB_FLAGS_IN)
314 goto out;
315 if (memchr_inv(sm->sm_reserved, 0, sizeof(sm->sm_reserved)))
316 goto out;
317
318 /* Do we know about this type of metadata? */
319 error = -ENOENT;
320 if (sm->sm_type >= XFS_SCRUB_TYPE_NR)
321 goto out;
322 ops = &meta_scrub_ops[sm->sm_type];
323 if (ops->scrub == NULL)
324 goto out;
325
326 /*
327 * We won't scrub any filesystem that doesn't have the ability
328 * to record unwritten extents. The option was made default in
329 * 2003, removed from mkfs in 2007, and cannot be disabled in
330 * v5, so if we find a filesystem without this flag it's either
331 * really old or totally unsupported. Avoid it either way.
332 * We also don't support v1-v3 filesystems, which aren't
333 * mountable.
334 */
335 error = -EOPNOTSUPP;
336 if (!xfs_sb_version_hasextflgbit(&mp->m_sb))
337 goto out;
338
339 /* Does this fs even support this type of metadata? */
340 error = -ENOENT;
341 if (ops->has && !ops->has(&mp->m_sb))
342 goto out;
343
344 /* We don't know how to repair anything yet. */
345 error = -EOPNOTSUPP;
346 if (sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
347 goto out;
348
349 xfs_scrub_experimental_warning(mp);
350
351retry_op:
352 /* Set up for the operation. */
353 memset(&sc, 0, sizeof(sc));
354 sc.mp = ip->i_mount;
355 sc.sm = sm;
356 sc.ops = ops;
357 sc.try_harder = try_harder;
358 sc.sa.agno = NULLAGNUMBER;
359 error = sc.ops->setup(&sc, ip);
360 if (error)
361 goto out_teardown;
362
363 /* Scrub for errors. */
364 error = sc.ops->scrub(&sc);
365 if (!try_harder && error == -EDEADLOCK) {
366 /*
367 * Scrubbers return -EDEADLOCK to mean 'try harder'.
368 * Tear down everything we hold, then set up again with
369 * preparation for worst-case scenarios.
370 */
371 error = xfs_scrub_teardown(&sc, ip, 0);
372 if (error)
373 goto out;
374 try_harder = true;
375 goto retry_op;
376 } else if (error)
377 goto out_teardown;
378
379 if (sc.sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT |
380 XFS_SCRUB_OFLAG_XCORRUPT))
381 xfs_alert_ratelimited(mp, "Corruption detected during scrub.");
382
383out_teardown:
384 error = xfs_scrub_teardown(&sc, ip, error);
385out:
386 trace_xfs_scrub_done(ip, sm, error);
387 if (error == -EFSCORRUPTED || error == -EFSBADCRC) {
388 sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
389 error = 0;
390 }
391 return error;
392}
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
new file mode 100644
index 000000000000..e9ec041cf713
--- /dev/null
+++ b/fs/xfs/scrub/scrub.h
@@ -0,0 +1,115 @@
1/*
2 * Copyright (C) 2017 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#ifndef __XFS_SCRUB_SCRUB_H__
21#define __XFS_SCRUB_SCRUB_H__
22
23struct xfs_scrub_context;
24
25struct xfs_scrub_meta_ops {
26 /* Acquire whatever resources are needed for the operation. */
27 int (*setup)(struct xfs_scrub_context *,
28 struct xfs_inode *);
29
30 /* Examine metadata for errors. */
31 int (*scrub)(struct xfs_scrub_context *);
32
33 /* Decide if we even have this piece of metadata. */
34 bool (*has)(struct xfs_sb *);
35};
36
37/* Buffer pointers and btree cursors for an entire AG. */
38struct xfs_scrub_ag {
39 xfs_agnumber_t agno;
40
41 /* AG btree roots */
42 struct xfs_buf *agf_bp;
43 struct xfs_buf *agfl_bp;
44 struct xfs_buf *agi_bp;
45
46 /* AG btrees */
47 struct xfs_btree_cur *bno_cur;
48 struct xfs_btree_cur *cnt_cur;
49 struct xfs_btree_cur *ino_cur;
50 struct xfs_btree_cur *fino_cur;
51 struct xfs_btree_cur *rmap_cur;
52 struct xfs_btree_cur *refc_cur;
53};
54
55struct xfs_scrub_context {
56 /* General scrub state. */
57 struct xfs_mount *mp;
58 struct xfs_scrub_metadata *sm;
59 const struct xfs_scrub_meta_ops *ops;
60 struct xfs_trans *tp;
61 struct xfs_inode *ip;
62 void *buf;
63 uint ilock_flags;
64 bool try_harder;
65
66 /* State tracking for single-AG operations. */
67 struct xfs_scrub_ag sa;
68};
69
70/* Metadata scrubbers */
71int xfs_scrub_tester(struct xfs_scrub_context *sc);
72int xfs_scrub_superblock(struct xfs_scrub_context *sc);
73int xfs_scrub_agf(struct xfs_scrub_context *sc);
74int xfs_scrub_agfl(struct xfs_scrub_context *sc);
75int xfs_scrub_agi(struct xfs_scrub_context *sc);
76int xfs_scrub_bnobt(struct xfs_scrub_context *sc);
77int xfs_scrub_cntbt(struct xfs_scrub_context *sc);
78int xfs_scrub_inobt(struct xfs_scrub_context *sc);
79int xfs_scrub_finobt(struct xfs_scrub_context *sc);
80int xfs_scrub_rmapbt(struct xfs_scrub_context *sc);
81int xfs_scrub_refcountbt(struct xfs_scrub_context *sc);
82int xfs_scrub_inode(struct xfs_scrub_context *sc);
83int xfs_scrub_bmap_data(struct xfs_scrub_context *sc);
84int xfs_scrub_bmap_attr(struct xfs_scrub_context *sc);
85int xfs_scrub_bmap_cow(struct xfs_scrub_context *sc);
86int xfs_scrub_directory(struct xfs_scrub_context *sc);
87int xfs_scrub_xattr(struct xfs_scrub_context *sc);
88int xfs_scrub_symlink(struct xfs_scrub_context *sc);
89int xfs_scrub_parent(struct xfs_scrub_context *sc);
90#ifdef CONFIG_XFS_RT
91int xfs_scrub_rtbitmap(struct xfs_scrub_context *sc);
92int xfs_scrub_rtsummary(struct xfs_scrub_context *sc);
93#else
94static inline int
95xfs_scrub_rtbitmap(struct xfs_scrub_context *sc)
96{
97 return -ENOENT;
98}
99static inline int
100xfs_scrub_rtsummary(struct xfs_scrub_context *sc)
101{
102 return -ENOENT;
103}
104#endif
105#ifdef CONFIG_XFS_QUOTA
106int xfs_scrub_quota(struct xfs_scrub_context *sc);
107#else
108static inline int
109xfs_scrub_quota(struct xfs_scrub_context *sc)
110{
111 return -ENOENT;
112}
113#endif
114
115#endif /* __XFS_SCRUB_SCRUB_H__ */
diff --git a/fs/xfs/scrub/symlink.c b/fs/xfs/scrub/symlink.c
new file mode 100644
index 000000000000..3aa3d60f7c16
--- /dev/null
+++ b/fs/xfs/scrub/symlink.c
@@ -0,0 +1,92 @@
1/*
2 * Copyright (C) 2017 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#include "xfs.h"
21#include "xfs_fs.h"
22#include "xfs_shared.h"
23#include "xfs_format.h"
24#include "xfs_trans_resv.h"
25#include "xfs_mount.h"
26#include "xfs_defer.h"
27#include "xfs_btree.h"
28#include "xfs_bit.h"
29#include "xfs_log_format.h"
30#include "xfs_trans.h"
31#include "xfs_sb.h"
32#include "xfs_inode.h"
33#include "xfs_inode_fork.h"
34#include "xfs_symlink.h"
35#include "scrub/xfs_scrub.h"
36#include "scrub/scrub.h"
37#include "scrub/common.h"
38#include "scrub/trace.h"
39
40/* Set us up to scrub a symbolic link. */
41int
42xfs_scrub_setup_symlink(
43 struct xfs_scrub_context *sc,
44 struct xfs_inode *ip)
45{
46 /* Allocate the buffer without the inode lock held. */
47 sc->buf = kmem_zalloc_large(XFS_SYMLINK_MAXLEN + 1, KM_SLEEP);
48 if (!sc->buf)
49 return -ENOMEM;
50
51 return xfs_scrub_setup_inode_contents(sc, ip, 0);
52}
53
54/* Symbolic links. */
55
56int
57xfs_scrub_symlink(
58 struct xfs_scrub_context *sc)
59{
60 struct xfs_inode *ip = sc->ip;
61 struct xfs_ifork *ifp;
62 loff_t len;
63 int error = 0;
64
65 if (!S_ISLNK(VFS_I(ip)->i_mode))
66 return -ENOENT;
67 ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
68 len = ip->i_d.di_size;
69
70 /* Plausible size? */
71 if (len > XFS_SYMLINK_MAXLEN || len <= 0) {
72 xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
73 goto out;
74 }
75
76 /* Inline symlink? */
77 if (ifp->if_flags & XFS_IFINLINE) {
78 if (len > XFS_IFORK_DSIZE(ip) ||
79 len > strnlen(ifp->if_u1.if_data, XFS_IFORK_DSIZE(ip)))
80 xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
81 goto out;
82 }
83
84 /* Remote symlink; must read the contents. */
85 error = xfs_readlink_bmap_ilocked(sc->ip, sc->buf);
86 if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error))
87 goto out;
88 if (strnlen(sc->buf, XFS_SYMLINK_MAXLEN) < len)
89 xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
90out:
91 return error;
92}
diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c
new file mode 100644
index 000000000000..472080e75788
--- /dev/null
+++ b/fs/xfs/scrub/trace.c
@@ -0,0 +1,59 @@
1/*
2 * Copyright (C) 2017 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#include "xfs.h"
21#include "xfs_fs.h"
22#include "xfs_shared.h"
23#include "xfs_format.h"
24#include "xfs_log_format.h"
25#include "xfs_trans_resv.h"
26#include "xfs_mount.h"
27#include "xfs_defer.h"
28#include "xfs_da_format.h"
29#include "xfs_defer.h"
30#include "xfs_inode.h"
31#include "xfs_btree.h"
32#include "xfs_trans.h"
33#include "xfs_bit.h"
34#include "scrub/xfs_scrub.h"
35#include "scrub/scrub.h"
36#include "scrub/common.h"
37
38/* Figure out which block the btree cursor was pointing to. */
39static inline xfs_fsblock_t
40xfs_scrub_btree_cur_fsbno(
41 struct xfs_btree_cur *cur,
42 int level)
43{
44 if (level < cur->bc_nlevels && cur->bc_bufs[level])
45 return XFS_DADDR_TO_FSB(cur->bc_mp, cur->bc_bufs[level]->b_bn);
46 else if (level == cur->bc_nlevels - 1 &&
47 cur->bc_flags & XFS_BTREE_LONG_PTRS)
48 return XFS_INO_TO_FSB(cur->bc_mp, cur->bc_private.b.ip->i_ino);
49 else if (!(cur->bc_flags & XFS_BTREE_LONG_PTRS))
50 return XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_private.a.agno, 0);
51 return NULLFSBLOCK;
52}
53
54/*
55 * We include this last to have the helpers above available for the trace
56 * event implementations.
57 */
58#define CREATE_TRACE_POINTS
59#include "scrub/trace.h"
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
new file mode 100644
index 000000000000..c4ebfb5c1ee8
--- /dev/null
+++ b/fs/xfs/scrub/trace.h
@@ -0,0 +1,499 @@
1/*
2 * Copyright (C) 2017 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#undef TRACE_SYSTEM
21#define TRACE_SYSTEM xfs_scrub
22
23#if !defined(_TRACE_XFS_SCRUB_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
24#define _TRACE_XFS_SCRUB_TRACE_H
25
26#include <linux/tracepoint.h>
27#include "xfs_bit.h"
28
29DECLARE_EVENT_CLASS(xfs_scrub_class,
30 TP_PROTO(struct xfs_inode *ip, struct xfs_scrub_metadata *sm,
31 int error),
32 TP_ARGS(ip, sm, error),
33 TP_STRUCT__entry(
34 __field(dev_t, dev)
35 __field(xfs_ino_t, ino)
36 __field(unsigned int, type)
37 __field(xfs_agnumber_t, agno)
38 __field(xfs_ino_t, inum)
39 __field(unsigned int, gen)
40 __field(unsigned int, flags)
41 __field(int, error)
42 ),
43 TP_fast_assign(
44 __entry->dev = ip->i_mount->m_super->s_dev;
45 __entry->ino = ip->i_ino;
46 __entry->type = sm->sm_type;
47 __entry->agno = sm->sm_agno;
48 __entry->inum = sm->sm_ino;
49 __entry->gen = sm->sm_gen;
50 __entry->flags = sm->sm_flags;
51 __entry->error = error;
52 ),
53 TP_printk("dev %d:%d ino %llu type %u agno %u inum %llu gen %u flags 0x%x error %d",
54 MAJOR(__entry->dev), MINOR(__entry->dev),
55 __entry->ino,
56 __entry->type,
57 __entry->agno,
58 __entry->inum,
59 __entry->gen,
60 __entry->flags,
61 __entry->error)
62)
63#define DEFINE_SCRUB_EVENT(name) \
64DEFINE_EVENT(xfs_scrub_class, name, \
65 TP_PROTO(struct xfs_inode *ip, struct xfs_scrub_metadata *sm, \
66 int error), \
67 TP_ARGS(ip, sm, error))
68
69DEFINE_SCRUB_EVENT(xfs_scrub_start);
70DEFINE_SCRUB_EVENT(xfs_scrub_done);
71DEFINE_SCRUB_EVENT(xfs_scrub_deadlock_retry);
72
73TRACE_EVENT(xfs_scrub_op_error,
74 TP_PROTO(struct xfs_scrub_context *sc, xfs_agnumber_t agno,
75 xfs_agblock_t bno, int error, void *ret_ip),
76 TP_ARGS(sc, agno, bno, error, ret_ip),
77 TP_STRUCT__entry(
78 __field(dev_t, dev)
79 __field(unsigned int, type)
80 __field(xfs_agnumber_t, agno)
81 __field(xfs_agblock_t, bno)
82 __field(int, error)
83 __field(void *, ret_ip)
84 ),
85 TP_fast_assign(
86 __entry->dev = sc->mp->m_super->s_dev;
87 __entry->type = sc->sm->sm_type;
88 __entry->agno = agno;
89 __entry->bno = bno;
90 __entry->error = error;
91 __entry->ret_ip = ret_ip;
92 ),
93 TP_printk("dev %d:%d type %u agno %u agbno %u error %d ret_ip %pF",
94 MAJOR(__entry->dev), MINOR(__entry->dev),
95 __entry->type,
96 __entry->agno,
97 __entry->bno,
98 __entry->error,
99 __entry->ret_ip)
100);
101
102TRACE_EVENT(xfs_scrub_file_op_error,
103 TP_PROTO(struct xfs_scrub_context *sc, int whichfork,
104 xfs_fileoff_t offset, int error, void *ret_ip),
105 TP_ARGS(sc, whichfork, offset, error, ret_ip),
106 TP_STRUCT__entry(
107 __field(dev_t, dev)
108 __field(xfs_ino_t, ino)
109 __field(int, whichfork)
110 __field(unsigned int, type)
111 __field(xfs_fileoff_t, offset)
112 __field(int, error)
113 __field(void *, ret_ip)
114 ),
115 TP_fast_assign(
116 __entry->dev = sc->ip->i_mount->m_super->s_dev;
117 __entry->ino = sc->ip->i_ino;
118 __entry->whichfork = whichfork;
119 __entry->type = sc->sm->sm_type;
120 __entry->offset = offset;
121 __entry->error = error;
122 __entry->ret_ip = ret_ip;
123 ),
124 TP_printk("dev %d:%d ino %llu fork %d type %u offset %llu error %d ret_ip %pF",
125 MAJOR(__entry->dev), MINOR(__entry->dev),
126 __entry->ino,
127 __entry->whichfork,
128 __entry->type,
129 __entry->offset,
130 __entry->error,
131 __entry->ret_ip)
132);
133
134DECLARE_EVENT_CLASS(xfs_scrub_block_error_class,
135 TP_PROTO(struct xfs_scrub_context *sc, xfs_daddr_t daddr, void *ret_ip),
136 TP_ARGS(sc, daddr, ret_ip),
137 TP_STRUCT__entry(
138 __field(dev_t, dev)
139 __field(unsigned int, type)
140 __field(xfs_agnumber_t, agno)
141 __field(xfs_agblock_t, bno)
142 __field(void *, ret_ip)
143 ),
144 TP_fast_assign(
145 xfs_fsblock_t fsbno;
146 xfs_agnumber_t agno;
147 xfs_agblock_t bno;
148
149 fsbno = XFS_DADDR_TO_FSB(sc->mp, daddr);
150 agno = XFS_FSB_TO_AGNO(sc->mp, fsbno);
151 bno = XFS_FSB_TO_AGBNO(sc->mp, fsbno);
152
153 __entry->dev = sc->mp->m_super->s_dev;
154 __entry->type = sc->sm->sm_type;
155 __entry->agno = agno;
156 __entry->bno = bno;
157 __entry->ret_ip = ret_ip;
158 ),
159 TP_printk("dev %d:%d type %u agno %u agbno %u ret_ip %pF",
160 MAJOR(__entry->dev), MINOR(__entry->dev),
161 __entry->type,
162 __entry->agno,
163 __entry->bno,
164 __entry->ret_ip)
165)
166
167#define DEFINE_SCRUB_BLOCK_ERROR_EVENT(name) \
168DEFINE_EVENT(xfs_scrub_block_error_class, name, \
169 TP_PROTO(struct xfs_scrub_context *sc, xfs_daddr_t daddr, \
170 void *ret_ip), \
171 TP_ARGS(sc, daddr, ret_ip))
172
173DEFINE_SCRUB_BLOCK_ERROR_EVENT(xfs_scrub_block_error);
174DEFINE_SCRUB_BLOCK_ERROR_EVENT(xfs_scrub_block_preen);
175
176DECLARE_EVENT_CLASS(xfs_scrub_ino_error_class,
177 TP_PROTO(struct xfs_scrub_context *sc, xfs_ino_t ino, xfs_daddr_t daddr,
178 void *ret_ip),
179 TP_ARGS(sc, ino, daddr, ret_ip),
180 TP_STRUCT__entry(
181 __field(dev_t, dev)
182 __field(xfs_ino_t, ino)
183 __field(unsigned int, type)
184 __field(xfs_agnumber_t, agno)
185 __field(xfs_agblock_t, bno)
186 __field(void *, ret_ip)
187 ),
188 TP_fast_assign(
189 xfs_fsblock_t fsbno;
190 xfs_agnumber_t agno;
191 xfs_agblock_t bno;
192
193 if (daddr) {
194 fsbno = XFS_DADDR_TO_FSB(sc->mp, daddr);
195 agno = XFS_FSB_TO_AGNO(sc->mp, fsbno);
196 bno = XFS_FSB_TO_AGBNO(sc->mp, fsbno);
197 } else {
198 agno = XFS_INO_TO_AGNO(sc->mp, ino);
199 bno = XFS_AGINO_TO_AGBNO(sc->mp,
200 XFS_INO_TO_AGINO(sc->mp, ino));
201 }
202
203 __entry->dev = sc->mp->m_super->s_dev;
204 __entry->ino = ino;
205 __entry->type = sc->sm->sm_type;
206 __entry->agno = agno;
207 __entry->bno = bno;
208 __entry->ret_ip = ret_ip;
209 ),
210 TP_printk("dev %d:%d ino %llu type %u agno %u agbno %u ret_ip %pF",
211 MAJOR(__entry->dev), MINOR(__entry->dev),
212 __entry->ino,
213 __entry->type,
214 __entry->agno,
215 __entry->bno,
216 __entry->ret_ip)
217)
218
219#define DEFINE_SCRUB_INO_ERROR_EVENT(name) \
220DEFINE_EVENT(xfs_scrub_ino_error_class, name, \
221 TP_PROTO(struct xfs_scrub_context *sc, xfs_ino_t ino, \
222 xfs_daddr_t daddr, void *ret_ip), \
223 TP_ARGS(sc, ino, daddr, ret_ip))
224
225DEFINE_SCRUB_INO_ERROR_EVENT(xfs_scrub_ino_error);
226DEFINE_SCRUB_INO_ERROR_EVENT(xfs_scrub_ino_preen);
227DEFINE_SCRUB_INO_ERROR_EVENT(xfs_scrub_ino_warning);
228
229DECLARE_EVENT_CLASS(xfs_scrub_fblock_error_class,
230 TP_PROTO(struct xfs_scrub_context *sc, int whichfork,
231 xfs_fileoff_t offset, void *ret_ip),
232 TP_ARGS(sc, whichfork, offset, ret_ip),
233 TP_STRUCT__entry(
234 __field(dev_t, dev)
235 __field(xfs_ino_t, ino)
236 __field(int, whichfork)
237 __field(unsigned int, type)
238 __field(xfs_fileoff_t, offset)
239 __field(void *, ret_ip)
240 ),
241 TP_fast_assign(
242 __entry->dev = sc->ip->i_mount->m_super->s_dev;
243 __entry->ino = sc->ip->i_ino;
244 __entry->whichfork = whichfork;
245 __entry->type = sc->sm->sm_type;
246 __entry->offset = offset;
247 __entry->ret_ip = ret_ip;
248 ),
249 TP_printk("dev %d:%d ino %llu fork %d type %u offset %llu ret_ip %pF",
250 MAJOR(__entry->dev), MINOR(__entry->dev),
251 __entry->ino,
252 __entry->whichfork,
253 __entry->type,
254 __entry->offset,
255 __entry->ret_ip)
256);
257
258#define DEFINE_SCRUB_FBLOCK_ERROR_EVENT(name) \
259DEFINE_EVENT(xfs_scrub_fblock_error_class, name, \
260 TP_PROTO(struct xfs_scrub_context *sc, int whichfork, \
261 xfs_fileoff_t offset, void *ret_ip), \
262 TP_ARGS(sc, whichfork, offset, ret_ip))
263
264DEFINE_SCRUB_FBLOCK_ERROR_EVENT(xfs_scrub_fblock_error);
265DEFINE_SCRUB_FBLOCK_ERROR_EVENT(xfs_scrub_fblock_warning);
266
267TRACE_EVENT(xfs_scrub_incomplete,
268 TP_PROTO(struct xfs_scrub_context *sc, void *ret_ip),
269 TP_ARGS(sc, ret_ip),
270 TP_STRUCT__entry(
271 __field(dev_t, dev)
272 __field(unsigned int, type)
273 __field(void *, ret_ip)
274 ),
275 TP_fast_assign(
276 __entry->dev = sc->mp->m_super->s_dev;
277 __entry->type = sc->sm->sm_type;
278 __entry->ret_ip = ret_ip;
279 ),
280 TP_printk("dev %d:%d type %u ret_ip %pF",
281 MAJOR(__entry->dev), MINOR(__entry->dev),
282 __entry->type,
283 __entry->ret_ip)
284);
285
286TRACE_EVENT(xfs_scrub_btree_op_error,
287 TP_PROTO(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur,
288 int level, int error, void *ret_ip),
289 TP_ARGS(sc, cur, level, error, ret_ip),
290 TP_STRUCT__entry(
291 __field(dev_t, dev)
292 __field(unsigned int, type)
293 __field(xfs_btnum_t, btnum)
294 __field(int, level)
295 __field(xfs_agnumber_t, agno)
296 __field(xfs_agblock_t, bno)
297 __field(int, ptr);
298 __field(int, error)
299 __field(void *, ret_ip)
300 ),
301 TP_fast_assign(
302 xfs_fsblock_t fsbno = xfs_scrub_btree_cur_fsbno(cur, level);
303
304 __entry->dev = sc->mp->m_super->s_dev;
305 __entry->type = sc->sm->sm_type;
306 __entry->btnum = cur->bc_btnum;
307 __entry->level = level;
308 __entry->agno = XFS_FSB_TO_AGNO(cur->bc_mp, fsbno);
309 __entry->bno = XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno);
310 __entry->ptr = cur->bc_ptrs[level];
311 __entry->error = error;
312 __entry->ret_ip = ret_ip;
313 ),
314 TP_printk("dev %d:%d type %u btnum %d level %d ptr %d agno %u agbno %u error %d ret_ip %pF",
315 MAJOR(__entry->dev), MINOR(__entry->dev),
316 __entry->type,
317 __entry->btnum,
318 __entry->level,
319 __entry->ptr,
320 __entry->agno,
321 __entry->bno,
322 __entry->error,
323 __entry->ret_ip)
324);
325
326TRACE_EVENT(xfs_scrub_ifork_btree_op_error,
327 TP_PROTO(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur,
328 int level, int error, void *ret_ip),
329 TP_ARGS(sc, cur, level, error, ret_ip),
330 TP_STRUCT__entry(
331 __field(dev_t, dev)
332 __field(xfs_ino_t, ino)
333 __field(int, whichfork)
334 __field(unsigned int, type)
335 __field(xfs_btnum_t, btnum)
336 __field(int, level)
337 __field(int, ptr)
338 __field(xfs_agnumber_t, agno)
339 __field(xfs_agblock_t, bno)
340 __field(int, error)
341 __field(void *, ret_ip)
342 ),
343 TP_fast_assign(
344 xfs_fsblock_t fsbno = xfs_scrub_btree_cur_fsbno(cur, level);
345 __entry->dev = sc->mp->m_super->s_dev;
346 __entry->ino = sc->ip->i_ino;
347 __entry->whichfork = cur->bc_private.b.whichfork;
348 __entry->type = sc->sm->sm_type;
349 __entry->btnum = cur->bc_btnum;
350 __entry->level = level;
351 __entry->ptr = cur->bc_ptrs[level];
352 __entry->agno = XFS_FSB_TO_AGNO(cur->bc_mp, fsbno);
353 __entry->bno = XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno);
354 __entry->error = error;
355 __entry->ret_ip = ret_ip;
356 ),
357 TP_printk("dev %d:%d ino %llu fork %d type %u btnum %d level %d ptr %d agno %u agbno %u error %d ret_ip %pF",
358 MAJOR(__entry->dev), MINOR(__entry->dev),
359 __entry->ino,
360 __entry->whichfork,
361 __entry->type,
362 __entry->btnum,
363 __entry->level,
364 __entry->ptr,
365 __entry->agno,
366 __entry->bno,
367 __entry->error,
368 __entry->ret_ip)
369);
370
371TRACE_EVENT(xfs_scrub_btree_error,
372 TP_PROTO(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur,
373 int level, void *ret_ip),
374 TP_ARGS(sc, cur, level, ret_ip),
375 TP_STRUCT__entry(
376 __field(dev_t, dev)
377 __field(unsigned int, type)
378 __field(xfs_btnum_t, btnum)
379 __field(int, level)
380 __field(xfs_agnumber_t, agno)
381 __field(xfs_agblock_t, bno)
382 __field(int, ptr);
383 __field(void *, ret_ip)
384 ),
385 TP_fast_assign(
386 xfs_fsblock_t fsbno = xfs_scrub_btree_cur_fsbno(cur, level);
387 __entry->dev = sc->mp->m_super->s_dev;
388 __entry->type = sc->sm->sm_type;
389 __entry->btnum = cur->bc_btnum;
390 __entry->level = level;
391 __entry->agno = XFS_FSB_TO_AGNO(cur->bc_mp, fsbno);
392 __entry->bno = XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno);
393 __entry->ptr = cur->bc_ptrs[level];
394 __entry->ret_ip = ret_ip;
395 ),
396 TP_printk("dev %d:%d type %u btnum %d level %d ptr %d agno %u agbno %u ret_ip %pF",
397 MAJOR(__entry->dev), MINOR(__entry->dev),
398 __entry->type,
399 __entry->btnum,
400 __entry->level,
401 __entry->ptr,
402 __entry->agno,
403 __entry->bno,
404 __entry->ret_ip)
405);
406
407TRACE_EVENT(xfs_scrub_ifork_btree_error,
408 TP_PROTO(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur,
409 int level, void *ret_ip),
410 TP_ARGS(sc, cur, level, ret_ip),
411 TP_STRUCT__entry(
412 __field(dev_t, dev)
413 __field(xfs_ino_t, ino)
414 __field(int, whichfork)
415 __field(unsigned int, type)
416 __field(xfs_btnum_t, btnum)
417 __field(int, level)
418 __field(xfs_agnumber_t, agno)
419 __field(xfs_agblock_t, bno)
420 __field(int, ptr);
421 __field(void *, ret_ip)
422 ),
423 TP_fast_assign(
424 xfs_fsblock_t fsbno = xfs_scrub_btree_cur_fsbno(cur, level);
425 __entry->dev = sc->mp->m_super->s_dev;
426 __entry->ino = sc->ip->i_ino;
427 __entry->whichfork = cur->bc_private.b.whichfork;
428 __entry->type = sc->sm->sm_type;
429 __entry->btnum = cur->bc_btnum;
430 __entry->level = level;
431 __entry->agno = XFS_FSB_TO_AGNO(cur->bc_mp, fsbno);
432 __entry->bno = XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno);
433 __entry->ptr = cur->bc_ptrs[level];
434 __entry->ret_ip = ret_ip;
435 ),
436 TP_printk("dev %d:%d ino %llu fork %d type %u btnum %d level %d ptr %d agno %u agbno %u ret_ip %pF",
437 MAJOR(__entry->dev), MINOR(__entry->dev),
438 __entry->ino,
439 __entry->whichfork,
440 __entry->type,
441 __entry->btnum,
442 __entry->level,
443 __entry->ptr,
444 __entry->agno,
445 __entry->bno,
446 __entry->ret_ip)
447);
448
449DECLARE_EVENT_CLASS(xfs_scrub_sbtree_class,
450 TP_PROTO(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur,
451 int level),
452 TP_ARGS(sc, cur, level),
453 TP_STRUCT__entry(
454 __field(dev_t, dev)
455 __field(int, type)
456 __field(xfs_btnum_t, btnum)
457 __field(xfs_agnumber_t, agno)
458 __field(xfs_agblock_t, bno)
459 __field(int, level)
460 __field(int, nlevels)
461 __field(int, ptr)
462 ),
463 TP_fast_assign(
464 xfs_fsblock_t fsbno = xfs_scrub_btree_cur_fsbno(cur, level);
465
466 __entry->dev = sc->mp->m_super->s_dev;
467 __entry->type = sc->sm->sm_type;
468 __entry->btnum = cur->bc_btnum;
469 __entry->agno = XFS_FSB_TO_AGNO(cur->bc_mp, fsbno);
470 __entry->bno = XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno);
471 __entry->level = level;
472 __entry->nlevels = cur->bc_nlevels;
473 __entry->ptr = cur->bc_ptrs[level];
474 ),
475 TP_printk("dev %d:%d type %u btnum %d agno %u agbno %u level %d nlevels %d ptr %d",
476 MAJOR(__entry->dev), MINOR(__entry->dev),
477 __entry->type,
478 __entry->btnum,
479 __entry->agno,
480 __entry->bno,
481 __entry->level,
482 __entry->nlevels,
483 __entry->ptr)
484)
485#define DEFINE_SCRUB_SBTREE_EVENT(name) \
486DEFINE_EVENT(xfs_scrub_sbtree_class, name, \
487 TP_PROTO(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur, \
488 int level), \
489 TP_ARGS(sc, cur, level))
490
491DEFINE_SCRUB_SBTREE_EVENT(xfs_scrub_btree_rec);
492DEFINE_SCRUB_SBTREE_EVENT(xfs_scrub_btree_key);
493
494#endif /* _TRACE_XFS_SCRUB_TRACE_H */
495
496#undef TRACE_INCLUDE_PATH
497#define TRACE_INCLUDE_PATH .
498#define TRACE_INCLUDE_FILE scrub/trace
499#include <trace/define_trace.h>
diff --git a/fs/xfs/scrub/xfs_scrub.h b/fs/xfs/scrub/xfs_scrub.h
new file mode 100644
index 000000000000..e00e0eadac6a
--- /dev/null
+++ b/fs/xfs/scrub/xfs_scrub.h
@@ -0,0 +1,29 @@
1/*
2 * Copyright (C) 2017 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#ifndef __XFS_SCRUB_H__
21#define __XFS_SCRUB_H__
22
23#ifndef CONFIG_XFS_ONLINE_SCRUB
24# define xfs_scrub_metadata(ip, sm) (-ENOTTY)
25#else
26int xfs_scrub_metadata(struct xfs_inode *ip, struct xfs_scrub_metadata *sm);
27#endif /* CONFIG_XFS_ONLINE_SCRUB */
28
29#endif /* __XFS_SCRUB_H__ */
diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h
index 80cd0fd86783..5ff7f228d616 100644
--- a/fs/xfs/xfs.h
+++ b/fs/xfs/xfs.h
@@ -19,7 +19,6 @@
19#define __XFS_H__ 19#define __XFS_H__
20 20
21#ifdef CONFIG_XFS_DEBUG 21#ifdef CONFIG_XFS_DEBUG
22#define STATIC
23#define DEBUG 1 22#define DEBUG 1
24#define XFS_BUF_LOCK_TRACKING 1 23#define XFS_BUF_LOCK_TRACKING 1
25#endif 24#endif
diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h
index 5d5a5e277f35..d07bf27451c9 100644
--- a/fs/xfs/xfs_attr.h
+++ b/fs/xfs/xfs_attr.h
@@ -48,6 +48,8 @@ struct xfs_attr_list_context;
48#define ATTR_KERNOTIME 0x1000 /* [kernel] don't update inode timestamps */ 48#define ATTR_KERNOTIME 0x1000 /* [kernel] don't update inode timestamps */
49#define ATTR_KERNOVAL 0x2000 /* [kernel] get attr size only, not value */ 49#define ATTR_KERNOVAL 0x2000 /* [kernel] get attr size only, not value */
50 50
51#define ATTR_INCOMPLETE 0x4000 /* [kernel] return INCOMPLETE attr keys */
52
51#define XFS_ATTR_FLAGS \ 53#define XFS_ATTR_FLAGS \
52 { ATTR_DONTFOLLOW, "DONTFOLLOW" }, \ 54 { ATTR_DONTFOLLOW, "DONTFOLLOW" }, \
53 { ATTR_ROOT, "ROOT" }, \ 55 { ATTR_ROOT, "ROOT" }, \
@@ -56,7 +58,8 @@ struct xfs_attr_list_context;
56 { ATTR_CREATE, "CREATE" }, \ 58 { ATTR_CREATE, "CREATE" }, \
57 { ATTR_REPLACE, "REPLACE" }, \ 59 { ATTR_REPLACE, "REPLACE" }, \
58 { ATTR_KERNOTIME, "KERNOTIME" }, \ 60 { ATTR_KERNOTIME, "KERNOTIME" }, \
59 { ATTR_KERNOVAL, "KERNOVAL" } 61 { ATTR_KERNOVAL, "KERNOVAL" }, \
62 { ATTR_INCOMPLETE, "INCOMPLETE" }
60 63
61/* 64/*
62 * The maximum size (into the kernel or returned from the kernel) of an 65 * The maximum size (into the kernel or returned from the kernel) of an
diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c
index e3a950ed35a8..52818ea2eb50 100644
--- a/fs/xfs/xfs_attr_inactive.c
+++ b/fs/xfs/xfs_attr_inactive.c
@@ -251,47 +251,44 @@ xfs_attr3_node_inactive(
251 * traversal of the tree so we may deal with many blocks 251 * traversal of the tree so we may deal with many blocks
252 * before we come back to this one. 252 * before we come back to this one.
253 */ 253 */
254 error = xfs_da3_node_read(*trans, dp, child_fsb, -2, &child_bp, 254 error = xfs_da3_node_read(*trans, dp, child_fsb, -1, &child_bp,
255 XFS_ATTR_FORK); 255 XFS_ATTR_FORK);
256 if (error) 256 if (error)
257 return error; 257 return error;
258 if (child_bp) {
259 /* save for re-read later */
260 child_blkno = XFS_BUF_ADDR(child_bp);
261 258
262 /* 259 /* save for re-read later */
263 * Invalidate the subtree, however we have to. 260 child_blkno = XFS_BUF_ADDR(child_bp);
264 */
265 info = child_bp->b_addr;
266 switch (info->magic) {
267 case cpu_to_be16(XFS_DA_NODE_MAGIC):
268 case cpu_to_be16(XFS_DA3_NODE_MAGIC):
269 error = xfs_attr3_node_inactive(trans, dp,
270 child_bp, level + 1);
271 break;
272 case cpu_to_be16(XFS_ATTR_LEAF_MAGIC):
273 case cpu_to_be16(XFS_ATTR3_LEAF_MAGIC):
274 error = xfs_attr3_leaf_inactive(trans, dp,
275 child_bp);
276 break;
277 default:
278 error = -EIO;
279 xfs_trans_brelse(*trans, child_bp);
280 break;
281 }
282 if (error)
283 return error;
284 261
285 /* 262 /*
286 * Remove the subsidiary block from the cache 263 * Invalidate the subtree, however we have to.
287 * and from the log. 264 */
288 */ 265 info = child_bp->b_addr;
289 error = xfs_da_get_buf(*trans, dp, 0, child_blkno, 266 switch (info->magic) {
290 &child_bp, XFS_ATTR_FORK); 267 case cpu_to_be16(XFS_DA_NODE_MAGIC):
291 if (error) 268 case cpu_to_be16(XFS_DA3_NODE_MAGIC):
292 return error; 269 error = xfs_attr3_node_inactive(trans, dp, child_bp,
293 xfs_trans_binval(*trans, child_bp); 270 level + 1);
271 break;
272 case cpu_to_be16(XFS_ATTR_LEAF_MAGIC):
273 case cpu_to_be16(XFS_ATTR3_LEAF_MAGIC):
274 error = xfs_attr3_leaf_inactive(trans, dp, child_bp);
275 break;
276 default:
277 error = -EIO;
278 xfs_trans_brelse(*trans, child_bp);
279 break;
294 } 280 }
281 if (error)
282 return error;
283
284 /*
285 * Remove the subsidiary block from the cache and from the log.
286 */
287 error = xfs_da_get_buf(*trans, dp, 0, child_blkno, &child_bp,
288 XFS_ATTR_FORK);
289 if (error)
290 return error;
291 xfs_trans_binval(*trans, child_bp);
295 292
296 /* 293 /*
297 * If we're not done, re-read the parent to get the next 294 * If we're not done, re-read the parent to get the next
diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
index 7740c8a5e736..3e59a348ea71 100644
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -204,19 +204,103 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
204 return 0; 204 return 0;
205} 205}
206 206
207/*
208 * We didn't find the block & hash mentioned in the cursor state, so
209 * walk down the attr btree looking for the hash.
210 */
207STATIC int 211STATIC int
208xfs_attr_node_list(xfs_attr_list_context_t *context) 212xfs_attr_node_list_lookup(
213 struct xfs_attr_list_context *context,
214 struct attrlist_cursor_kern *cursor,
215 struct xfs_buf **pbp)
209{ 216{
210 attrlist_cursor_kern_t *cursor; 217 struct xfs_da3_icnode_hdr nodehdr;
211 xfs_attr_leafblock_t *leaf; 218 struct xfs_da_intnode *node;
212 xfs_da_intnode_t *node; 219 struct xfs_da_node_entry *btree;
213 struct xfs_attr3_icleaf_hdr leafhdr; 220 struct xfs_inode *dp = context->dp;
214 struct xfs_da3_icnode_hdr nodehdr; 221 struct xfs_mount *mp = dp->i_mount;
215 struct xfs_da_node_entry *btree; 222 struct xfs_trans *tp = context->tp;
216 int error, i; 223 struct xfs_buf *bp;
217 struct xfs_buf *bp; 224 int i;
218 struct xfs_inode *dp = context->dp; 225 int error = 0;
219 struct xfs_mount *mp = dp->i_mount; 226 unsigned int expected_level = 0;
227 uint16_t magic;
228
229 ASSERT(*pbp == NULL);
230 cursor->blkno = 0;
231 for (;;) {
232 error = xfs_da3_node_read(tp, dp, cursor->blkno, -1, &bp,
233 XFS_ATTR_FORK);
234 if (error)
235 return error;
236 node = bp->b_addr;
237 magic = be16_to_cpu(node->hdr.info.magic);
238 if (magic == XFS_ATTR_LEAF_MAGIC ||
239 magic == XFS_ATTR3_LEAF_MAGIC)
240 break;
241 if (magic != XFS_DA_NODE_MAGIC &&
242 magic != XFS_DA3_NODE_MAGIC) {
243 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
244 node);
245 goto out_corruptbuf;
246 }
247
248 dp->d_ops->node_hdr_from_disk(&nodehdr, node);
249
250 /* Tree taller than we can handle; bail out! */
251 if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH)
252 goto out_corruptbuf;
253
254 /* Check the level from the root node. */
255 if (cursor->blkno == 0)
256 expected_level = nodehdr.level - 1;
257 else if (expected_level != nodehdr.level)
258 goto out_corruptbuf;
259 else
260 expected_level--;
261
262 btree = dp->d_ops->node_tree_p(node);
263 for (i = 0; i < nodehdr.count; btree++, i++) {
264 if (cursor->hashval <= be32_to_cpu(btree->hashval)) {
265 cursor->blkno = be32_to_cpu(btree->before);
266 trace_xfs_attr_list_node_descend(context,
267 btree);
268 break;
269 }
270 }
271 xfs_trans_brelse(tp, bp);
272
273 if (i == nodehdr.count)
274 return 0;
275
276 /* We can't point back to the root. */
277 if (cursor->blkno == 0)
278 return -EFSCORRUPTED;
279 }
280
281 if (expected_level != 0)
282 goto out_corruptbuf;
283
284 *pbp = bp;
285 return 0;
286
287out_corruptbuf:
288 xfs_trans_brelse(tp, bp);
289 return -EFSCORRUPTED;
290}
291
292STATIC int
293xfs_attr_node_list(
294 struct xfs_attr_list_context *context)
295{
296 struct xfs_attr3_icleaf_hdr leafhdr;
297 struct attrlist_cursor_kern *cursor;
298 struct xfs_attr_leafblock *leaf;
299 struct xfs_da_intnode *node;
300 struct xfs_buf *bp;
301 struct xfs_inode *dp = context->dp;
302 struct xfs_mount *mp = dp->i_mount;
303 int error;
220 304
221 trace_xfs_attr_node_list(context); 305 trace_xfs_attr_node_list(context);
222 306
@@ -277,47 +361,9 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
277 * Note that start of node block is same as start of leaf block. 361 * Note that start of node block is same as start of leaf block.
278 */ 362 */
279 if (bp == NULL) { 363 if (bp == NULL) {
280 cursor->blkno = 0; 364 error = xfs_attr_node_list_lookup(context, cursor, &bp);
281 for (;;) { 365 if (error || !bp)
282 uint16_t magic; 366 return error;
283
284 error = xfs_da3_node_read(context->tp, dp,
285 cursor->blkno, -1, &bp,
286 XFS_ATTR_FORK);
287 if (error)
288 return error;
289 node = bp->b_addr;
290 magic = be16_to_cpu(node->hdr.info.magic);
291 if (magic == XFS_ATTR_LEAF_MAGIC ||
292 magic == XFS_ATTR3_LEAF_MAGIC)
293 break;
294 if (magic != XFS_DA_NODE_MAGIC &&
295 magic != XFS_DA3_NODE_MAGIC) {
296 XFS_CORRUPTION_ERROR("xfs_attr_node_list(3)",
297 XFS_ERRLEVEL_LOW,
298 context->dp->i_mount,
299 node);
300 xfs_trans_brelse(context->tp, bp);
301 return -EFSCORRUPTED;
302 }
303
304 dp->d_ops->node_hdr_from_disk(&nodehdr, node);
305 btree = dp->d_ops->node_tree_p(node);
306 for (i = 0; i < nodehdr.count; btree++, i++) {
307 if (cursor->hashval
308 <= be32_to_cpu(btree->hashval)) {
309 cursor->blkno = be32_to_cpu(btree->before);
310 trace_xfs_attr_list_node_descend(context,
311 btree);
312 break;
313 }
314 }
315 if (i == nodehdr.count) {
316 xfs_trans_brelse(context->tp, bp);
317 return 0;
318 }
319 xfs_trans_brelse(context->tp, bp);
320 }
321 } 367 }
322 ASSERT(bp != NULL); 368 ASSERT(bp != NULL);
323 369
@@ -407,7 +453,8 @@ xfs_attr3_leaf_list_int(
407 cursor->offset = 0; 453 cursor->offset = 0;
408 } 454 }
409 455
410 if (entry->flags & XFS_ATTR_INCOMPLETE) 456 if ((entry->flags & XFS_ATTR_INCOMPLETE) &&
457 !(context->flags & ATTR_INCOMPLETE))
411 continue; /* skip incomplete entries */ 458 continue; /* skip incomplete entries */
412 459
413 if (entry->flags & XFS_ATTR_LOCAL) { 460 if (entry->flags & XFS_ATTR_LOCAL) {
@@ -499,8 +546,8 @@ xfs_attr_list_int(
499#define ATTR_ENTBASESIZE /* minimum bytes used by an attr */ \ 546#define ATTR_ENTBASESIZE /* minimum bytes used by an attr */ \
500 (((struct attrlist_ent *) 0)->a_name - (char *) 0) 547 (((struct attrlist_ent *) 0)->a_name - (char *) 0)
501#define ATTR_ENTSIZE(namelen) /* actual bytes used by an attr */ \ 548#define ATTR_ENTSIZE(namelen) /* actual bytes used by an attr */ \
502 ((ATTR_ENTBASESIZE + (namelen) + 1 + sizeof(u_int32_t)-1) \ 549 ((ATTR_ENTBASESIZE + (namelen) + 1 + sizeof(uint32_t)-1) \
503 & ~(sizeof(u_int32_t)-1)) 550 & ~(sizeof(uint32_t)-1))
504 551
505/* 552/*
506 * Format an attribute and copy it out to the user's buffer. 553 * Format an attribute and copy it out to the user's buffer.
@@ -583,6 +630,10 @@ xfs_attr_list(
583 (cursor->hashval || cursor->blkno || cursor->offset)) 630 (cursor->hashval || cursor->blkno || cursor->offset))
584 return -EINVAL; 631 return -EINVAL;
585 632
633 /* Only internal consumers can retrieve incomplete attrs. */
634 if (flags & ATTR_INCOMPLETE)
635 return -EINVAL;
636
586 /* 637 /*
587 * Check for a properly aligned buffer. 638 * Check for a properly aligned buffer.
588 */ 639 */
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 6503cfa44262..6d37ab43195f 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -229,15 +229,17 @@ xfs_bmap_count_leaves(
229 struct xfs_ifork *ifp, 229 struct xfs_ifork *ifp,
230 xfs_filblks_t *count) 230 xfs_filblks_t *count)
231{ 231{
232 struct xfs_iext_cursor icur;
232 struct xfs_bmbt_irec got; 233 struct xfs_bmbt_irec got;
233 xfs_extnum_t numrecs = 0, i = 0; 234 xfs_extnum_t numrecs = 0;
234 235
235 while (xfs_iext_get_extent(ifp, i++, &got)) { 236 for_each_xfs_iext(ifp, &icur, &got) {
236 if (!isnullstartblock(got.br_startblock)) { 237 if (!isnullstartblock(got.br_startblock)) {
237 *count += got.br_blockcount; 238 *count += got.br_blockcount;
238 numrecs++; 239 numrecs++;
239 } 240 }
240 } 241 }
242
241 return numrecs; 243 return numrecs;
242} 244}
243 245
@@ -405,125 +407,103 @@ xfs_bmap_count_blocks(
405 return 0; 407 return 0;
406} 408}
407 409
408/* 410static int
409 * returns 1 for success, 0 if we failed to map the extent. 411xfs_getbmap_report_one(
410 */ 412 struct xfs_inode *ip,
411STATIC int 413 struct getbmapx *bmv,
412xfs_getbmapx_fix_eof_hole( 414 struct kgetbmap *out,
413 xfs_inode_t *ip, /* xfs incore inode pointer */ 415 int64_t bmv_end,
414 int whichfork, 416 struct xfs_bmbt_irec *got)
415 struct getbmapx *out, /* output structure */
416 int prealloced, /* this is a file with
417 * preallocated data space */
418 int64_t end, /* last block requested */
419 xfs_fsblock_t startblock,
420 bool moretocome)
421{ 417{
422 int64_t fixlen; 418 struct kgetbmap *p = out + bmv->bmv_entries;
423 xfs_mount_t *mp; /* file system mount point */ 419 bool shared = false, trimmed = false;
424 xfs_ifork_t *ifp; /* inode fork pointer */ 420 int error;
425 xfs_extnum_t lastx; /* last extent pointer */ 421
426 xfs_fileoff_t fileblock; 422 error = xfs_reflink_trim_around_shared(ip, got, &shared, &trimmed);
427 423 if (error)
428 if (startblock == HOLESTARTBLOCK) { 424 return error;
429 mp = ip->i_mount; 425
430 out->bmv_block = -1; 426 if (isnullstartblock(got->br_startblock) ||
431 fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, XFS_ISIZE(ip))); 427 got->br_startblock == DELAYSTARTBLOCK) {
432 fixlen -= out->bmv_offset; 428 /*
433 if (prealloced && out->bmv_offset + out->bmv_length == end) { 429 * Delalloc extents that start beyond EOF can occur due to
434 /* Came to hole at EOF. Trim it. */ 430 * speculative EOF allocation when the delalloc extent is larger
435 if (fixlen <= 0) 431 * than the largest freespace extent at conversion time. These
436 return 0; 432 * extents cannot be converted by data writeback, so can exist
437 out->bmv_length = fixlen; 433 * here even if we are not supposed to be finding delalloc
438 } 434 * extents.
435 */
436 if (got->br_startoff < XFS_B_TO_FSB(ip->i_mount, XFS_ISIZE(ip)))
437 ASSERT((bmv->bmv_iflags & BMV_IF_DELALLOC) != 0);
438
439 p->bmv_oflags |= BMV_OF_DELALLOC;
440 p->bmv_block = -2;
439 } else { 441 } else {
440 if (startblock == DELAYSTARTBLOCK) 442 p->bmv_block = xfs_fsb_to_db(ip, got->br_startblock);
441 out->bmv_block = -2;
442 else
443 out->bmv_block = xfs_fsb_to_db(ip, startblock);
444 fileblock = XFS_BB_TO_FSB(ip->i_mount, out->bmv_offset);
445 ifp = XFS_IFORK_PTR(ip, whichfork);
446 if (!moretocome &&
447 xfs_iext_bno_to_ext(ifp, fileblock, &lastx) &&
448 (lastx == xfs_iext_count(ifp) - 1))
449 out->bmv_oflags |= BMV_OF_LAST;
450 } 443 }
451 444
452 return 1; 445 if (got->br_state == XFS_EXT_UNWRITTEN &&
446 (bmv->bmv_iflags & BMV_IF_PREALLOC))
447 p->bmv_oflags |= BMV_OF_PREALLOC;
448
449 if (shared)
450 p->bmv_oflags |= BMV_OF_SHARED;
451
452 p->bmv_offset = XFS_FSB_TO_BB(ip->i_mount, got->br_startoff);
453 p->bmv_length = XFS_FSB_TO_BB(ip->i_mount, got->br_blockcount);
454
455 bmv->bmv_offset = p->bmv_offset + p->bmv_length;
456 bmv->bmv_length = max(0LL, bmv_end - bmv->bmv_offset);
457 bmv->bmv_entries++;
458 return 0;
453} 459}
454 460
455/* Adjust the reported bmap around shared/unshared extent transitions. */ 461static void
456STATIC int 462xfs_getbmap_report_hole(
457xfs_getbmap_adjust_shared( 463 struct xfs_inode *ip,
458 struct xfs_inode *ip, 464 struct getbmapx *bmv,
459 int whichfork, 465 struct kgetbmap *out,
460 struct xfs_bmbt_irec *map, 466 int64_t bmv_end,
461 struct getbmapx *out, 467 xfs_fileoff_t bno,
462 struct xfs_bmbt_irec *next_map) 468 xfs_fileoff_t end)
463{ 469{
464 struct xfs_mount *mp = ip->i_mount; 470 struct kgetbmap *p = out + bmv->bmv_entries;
465 xfs_agnumber_t agno;
466 xfs_agblock_t agbno;
467 xfs_agblock_t ebno;
468 xfs_extlen_t elen;
469 xfs_extlen_t nlen;
470 int error;
471 471
472 next_map->br_startblock = NULLFSBLOCK; 472 if (bmv->bmv_iflags & BMV_IF_NO_HOLES)
473 next_map->br_startoff = NULLFILEOFF; 473 return;
474 next_map->br_blockcount = 0;
475 474
476 /* Only written data blocks can be shared. */ 475 p->bmv_block = -1;
477 if (!xfs_is_reflink_inode(ip) || 476 p->bmv_offset = XFS_FSB_TO_BB(ip->i_mount, bno);
478 whichfork != XFS_DATA_FORK || 477 p->bmv_length = XFS_FSB_TO_BB(ip->i_mount, end - bno);
479 !xfs_bmap_is_real_extent(map))
480 return 0;
481 478
482 agno = XFS_FSB_TO_AGNO(mp, map->br_startblock); 479 bmv->bmv_offset = p->bmv_offset + p->bmv_length;
483 agbno = XFS_FSB_TO_AGBNO(mp, map->br_startblock); 480 bmv->bmv_length = max(0LL, bmv_end - bmv->bmv_offset);
484 error = xfs_reflink_find_shared(mp, NULL, agno, agbno, 481 bmv->bmv_entries++;
485 map->br_blockcount, &ebno, &elen, true); 482}
486 if (error)
487 return error;
488 483
489 if (ebno == NULLAGBLOCK) { 484static inline bool
490 /* No shared blocks at all. */ 485xfs_getbmap_full(
491 return 0; 486 struct getbmapx *bmv)
492 } else if (agbno == ebno) { 487{
493 /* 488 return bmv->bmv_length == 0 || bmv->bmv_entries >= bmv->bmv_count - 1;
494 * Shared extent at (agbno, elen). Shrink the reported 489}
495 * extent length and prepare to move the start of map[i]
496 * to agbno+elen, with the aim of (re)formatting the new
497 * map[i] the next time through the inner loop.
498 */
499 out->bmv_length = XFS_FSB_TO_BB(mp, elen);
500 out->bmv_oflags |= BMV_OF_SHARED;
501 if (elen != map->br_blockcount) {
502 *next_map = *map;
503 next_map->br_startblock += elen;
504 next_map->br_startoff += elen;
505 next_map->br_blockcount -= elen;
506 }
507 map->br_blockcount -= elen;
508 } else {
509 /*
510 * There's an unshared extent (agbno, ebno - agbno)
511 * followed by shared extent at (ebno, elen). Shrink
512 * the reported extent length to cover only the unshared
513 * extent and prepare to move up the start of map[i] to
514 * ebno, with the aim of (re)formatting the new map[i]
515 * the next time through the inner loop.
516 */
517 *next_map = *map;
518 nlen = ebno - agbno;
519 out->bmv_length = XFS_FSB_TO_BB(mp, nlen);
520 next_map->br_startblock += nlen;
521 next_map->br_startoff += nlen;
522 next_map->br_blockcount -= nlen;
523 map->br_blockcount -= nlen;
524 }
525 490
526 return 0; 491static bool
492xfs_getbmap_next_rec(
493 struct xfs_bmbt_irec *rec,
494 xfs_fileoff_t total_end)
495{
496 xfs_fileoff_t end = rec->br_startoff + rec->br_blockcount;
497
498 if (end == total_end)
499 return false;
500
501 rec->br_startoff += rec->br_blockcount;
502 if (!isnullstartblock(rec->br_startblock) &&
503 rec->br_startblock != DELAYSTARTBLOCK)
504 rec->br_startblock += rec->br_blockcount;
505 rec->br_blockcount = total_end - end;
506 return true;
527} 507}
528 508
529/* 509/*
@@ -535,33 +515,22 @@ xfs_getbmap_adjust_shared(
535 */ 515 */
536int /* error code */ 516int /* error code */
537xfs_getbmap( 517xfs_getbmap(
538 xfs_inode_t *ip, 518 struct xfs_inode *ip,
539 struct getbmapx *bmv, /* user bmap structure */ 519 struct getbmapx *bmv, /* user bmap structure */
540 xfs_bmap_format_t formatter, /* format to user */ 520 struct kgetbmap *out)
541 void *arg) /* formatter arg */
542{ 521{
543 int64_t bmvend; /* last block requested */ 522 struct xfs_mount *mp = ip->i_mount;
544 int error = 0; /* return value */ 523 int iflags = bmv->bmv_iflags;
545 int64_t fixlen; /* length for -1 case */ 524 int whichfork, lock, error = 0;
546 int i; /* extent number */ 525 int64_t bmv_end, max_len;
547 int lock; /* lock state */ 526 xfs_fileoff_t bno, first_bno;
548 xfs_bmbt_irec_t *map; /* buffer for user's data */ 527 struct xfs_ifork *ifp;
549 xfs_mount_t *mp; /* file system mount point */ 528 struct xfs_bmbt_irec got, rec;
550 int nex; /* # of user extents can do */ 529 xfs_filblks_t len;
551 int subnex; /* # of bmapi's can do */ 530 struct xfs_iext_cursor icur;
552 int nmap; /* number of map entries */ 531
553 struct getbmapx *out; /* output structure */ 532 if (bmv->bmv_iflags & ~BMV_IF_VALID)
554 int whichfork; /* data or attr fork */ 533 return -EINVAL;
555 int prealloced; /* this is a file with
556 * preallocated data space */
557 int iflags; /* interface flags */
558 int bmapi_flags; /* flags for xfs_bmapi */
559 int cur_ext = 0;
560 struct xfs_bmbt_irec inject_map;
561
562 mp = ip->i_mount;
563 iflags = bmv->bmv_iflags;
564
565#ifndef DEBUG 534#ifndef DEBUG
566 /* Only allow CoW fork queries if we're debugging. */ 535 /* Only allow CoW fork queries if we're debugging. */
567 if (iflags & BMV_IF_COWFORK) 536 if (iflags & BMV_IF_COWFORK)
@@ -570,89 +539,42 @@ xfs_getbmap(
570 if ((iflags & BMV_IF_ATTRFORK) && (iflags & BMV_IF_COWFORK)) 539 if ((iflags & BMV_IF_ATTRFORK) && (iflags & BMV_IF_COWFORK))
571 return -EINVAL; 540 return -EINVAL;
572 541
542 if (bmv->bmv_length < -1)
543 return -EINVAL;
544 bmv->bmv_entries = 0;
545 if (bmv->bmv_length == 0)
546 return 0;
547
573 if (iflags & BMV_IF_ATTRFORK) 548 if (iflags & BMV_IF_ATTRFORK)
574 whichfork = XFS_ATTR_FORK; 549 whichfork = XFS_ATTR_FORK;
575 else if (iflags & BMV_IF_COWFORK) 550 else if (iflags & BMV_IF_COWFORK)
576 whichfork = XFS_COW_FORK; 551 whichfork = XFS_COW_FORK;
577 else 552 else
578 whichfork = XFS_DATA_FORK; 553 whichfork = XFS_DATA_FORK;
554 ifp = XFS_IFORK_PTR(ip, whichfork);
579 555
556 xfs_ilock(ip, XFS_IOLOCK_SHARED);
580 switch (whichfork) { 557 switch (whichfork) {
581 case XFS_ATTR_FORK: 558 case XFS_ATTR_FORK:
582 if (XFS_IFORK_Q(ip)) { 559 if (!XFS_IFORK_Q(ip))
583 if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS && 560 goto out_unlock_iolock;
584 ip->i_d.di_aformat != XFS_DINODE_FMT_BTREE &&
585 ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)
586 return -EINVAL;
587 } else if (unlikely(
588 ip->i_d.di_aformat != 0 &&
589 ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS)) {
590 XFS_ERROR_REPORT("xfs_getbmap", XFS_ERRLEVEL_LOW,
591 ip->i_mount);
592 return -EFSCORRUPTED;
593 }
594 561
595 prealloced = 0; 562 max_len = 1LL << 32;
596 fixlen = 1LL << 32; 563 lock = xfs_ilock_attr_map_shared(ip);
597 break; 564 break;
598 case XFS_COW_FORK: 565 case XFS_COW_FORK:
599 if (ip->i_cformat != XFS_DINODE_FMT_EXTENTS) 566 /* No CoW fork? Just return */
600 return -EINVAL; 567 if (!ifp)
568 goto out_unlock_iolock;
601 569
602 if (xfs_get_cowextsz_hint(ip)) { 570 if (xfs_get_cowextsz_hint(ip))
603 prealloced = 1; 571 max_len = mp->m_super->s_maxbytes;
604 fixlen = mp->m_super->s_maxbytes; 572 else
605 } else { 573 max_len = XFS_ISIZE(ip);
606 prealloced = 0;
607 fixlen = XFS_ISIZE(ip);
608 }
609 break;
610 default:
611 /* Local format data forks report no extents. */
612 if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
613 bmv->bmv_entries = 0;
614 return 0;
615 }
616 if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
617 ip->i_d.di_format != XFS_DINODE_FMT_BTREE)
618 return -EINVAL;
619 574
620 if (xfs_get_extsz_hint(ip) || 575 lock = XFS_ILOCK_SHARED;
621 ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){ 576 xfs_ilock(ip, lock);
622 prealloced = 1;
623 fixlen = mp->m_super->s_maxbytes;
624 } else {
625 prealloced = 0;
626 fixlen = XFS_ISIZE(ip);
627 }
628 break; 577 break;
629 }
630
631 if (bmv->bmv_length == -1) {
632 fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, fixlen));
633 bmv->bmv_length =
634 max_t(int64_t, fixlen - bmv->bmv_offset, 0);
635 } else if (bmv->bmv_length == 0) {
636 bmv->bmv_entries = 0;
637 return 0;
638 } else if (bmv->bmv_length < 0) {
639 return -EINVAL;
640 }
641
642 nex = bmv->bmv_count - 1;
643 if (nex <= 0)
644 return -EINVAL;
645 bmvend = bmv->bmv_offset + bmv->bmv_length;
646
647
648 if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
649 return -ENOMEM;
650 out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0);
651 if (!out)
652 return -ENOMEM;
653
654 xfs_ilock(ip, XFS_IOLOCK_SHARED);
655 switch (whichfork) {
656 case XFS_DATA_FORK: 578 case XFS_DATA_FORK:
657 if (!(iflags & BMV_IF_DELALLOC) && 579 if (!(iflags & BMV_IF_DELALLOC) &&
658 (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size)) { 580 (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size)) {
@@ -670,154 +592,105 @@ xfs_getbmap(
670 */ 592 */
671 } 593 }
672 594
595 if (xfs_get_extsz_hint(ip) ||
596 (ip->i_d.di_flags &
597 (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))
598 max_len = mp->m_super->s_maxbytes;
599 else
600 max_len = XFS_ISIZE(ip);
601
673 lock = xfs_ilock_data_map_shared(ip); 602 lock = xfs_ilock_data_map_shared(ip);
674 break; 603 break;
675 case XFS_COW_FORK:
676 lock = XFS_ILOCK_SHARED;
677 xfs_ilock(ip, lock);
678 break;
679 case XFS_ATTR_FORK:
680 lock = xfs_ilock_attr_map_shared(ip);
681 break;
682 } 604 }
683 605
684 /* 606 switch (XFS_IFORK_FORMAT(ip, whichfork)) {
685 * Don't let nex be bigger than the number of extents 607 case XFS_DINODE_FMT_EXTENTS:
686 * we can have assuming alternating holes and real extents. 608 case XFS_DINODE_FMT_BTREE:
687 */ 609 break;
688 if (nex > XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1) 610 case XFS_DINODE_FMT_LOCAL:
689 nex = XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1; 611 /* Local format inode forks report no extents. */
690
691 bmapi_flags = xfs_bmapi_aflag(whichfork);
692 if (!(iflags & BMV_IF_PREALLOC))
693 bmapi_flags |= XFS_BMAPI_IGSTATE;
694
695 /*
696 * Allocate enough space to handle "subnex" maps at a time.
697 */
698 error = -ENOMEM;
699 subnex = 16;
700 map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS);
701 if (!map)
702 goto out_unlock_ilock; 612 goto out_unlock_ilock;
613 default:
614 error = -EINVAL;
615 goto out_unlock_ilock;
616 }
703 617
704 bmv->bmv_entries = 0; 618 if (bmv->bmv_length == -1) {
705 619 max_len = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, max_len));
706 if (XFS_IFORK_NEXTENTS(ip, whichfork) == 0 && 620 bmv->bmv_length = max(0LL, max_len - bmv->bmv_offset);
707 (whichfork == XFS_ATTR_FORK || !(iflags & BMV_IF_DELALLOC))) {
708 error = 0;
709 goto out_free_map;
710 } 621 }
711 622
712 do { 623 bmv_end = bmv->bmv_offset + bmv->bmv_length;
713 nmap = (nex> subnex) ? subnex : nex;
714 error = xfs_bmapi_read(ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset),
715 XFS_BB_TO_FSB(mp, bmv->bmv_length),
716 map, &nmap, bmapi_flags);
717 if (error)
718 goto out_free_map;
719 ASSERT(nmap <= subnex);
720
721 for (i = 0; i < nmap && bmv->bmv_length &&
722 cur_ext < bmv->bmv_count - 1; i++) {
723 out[cur_ext].bmv_oflags = 0;
724 if (map[i].br_state == XFS_EXT_UNWRITTEN)
725 out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC;
726 else if (map[i].br_startblock == DELAYSTARTBLOCK)
727 out[cur_ext].bmv_oflags |= BMV_OF_DELALLOC;
728 out[cur_ext].bmv_offset =
729 XFS_FSB_TO_BB(mp, map[i].br_startoff);
730 out[cur_ext].bmv_length =
731 XFS_FSB_TO_BB(mp, map[i].br_blockcount);
732 out[cur_ext].bmv_unused1 = 0;
733 out[cur_ext].bmv_unused2 = 0;
734 624
735 /* 625 first_bno = bno = XFS_BB_TO_FSBT(mp, bmv->bmv_offset);
736 * delayed allocation extents that start beyond EOF can 626 len = XFS_BB_TO_FSB(mp, bmv->bmv_length);
737 * occur due to speculative EOF allocation when the
738 * delalloc extent is larger than the largest freespace
739 * extent at conversion time. These extents cannot be
740 * converted by data writeback, so can exist here even
741 * if we are not supposed to be finding delalloc
742 * extents.
743 */
744 if (map[i].br_startblock == DELAYSTARTBLOCK &&
745 map[i].br_startoff < XFS_B_TO_FSB(mp, XFS_ISIZE(ip)))
746 ASSERT((iflags & BMV_IF_DELALLOC) != 0);
747
748 if (map[i].br_startblock == HOLESTARTBLOCK &&
749 whichfork == XFS_ATTR_FORK) {
750 /* came to the end of attribute fork */
751 out[cur_ext].bmv_oflags |= BMV_OF_LAST;
752 goto out_free_map;
753 }
754 627
755 /* Is this a shared block? */ 628 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
756 error = xfs_getbmap_adjust_shared(ip, whichfork, 629 error = xfs_iread_extents(NULL, ip, whichfork);
757 &map[i], &out[cur_ext], &inject_map); 630 if (error)
758 if (error) 631 goto out_unlock_ilock;
759 goto out_free_map; 632 }
760 633
761 if (!xfs_getbmapx_fix_eof_hole(ip, whichfork, 634 if (!xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got)) {
762 &out[cur_ext], prealloced, bmvend, 635 /*
763 map[i].br_startblock, 636 * Report a whole-file hole if the delalloc flag is set to
764 inject_map.br_startblock != NULLFSBLOCK)) 637 * stay compatible with the old implementation.
765 goto out_free_map; 638 */
639 if (iflags & BMV_IF_DELALLOC)
640 xfs_getbmap_report_hole(ip, bmv, out, bmv_end, bno,
641 XFS_B_TO_FSB(mp, XFS_ISIZE(ip)));
642 goto out_unlock_ilock;
643 }
766 644
767 bmv->bmv_offset = 645 while (!xfs_getbmap_full(bmv)) {
768 out[cur_ext].bmv_offset + 646 xfs_trim_extent(&got, first_bno, len);
769 out[cur_ext].bmv_length;
770 bmv->bmv_length =
771 max_t(int64_t, 0, bmvend - bmv->bmv_offset);
772 647
773 /* 648 /*
774 * In case we don't want to return the hole, 649 * Report an entry for a hole if this extent doesn't directly
775 * don't increase cur_ext so that we can reuse 650 * follow the previous one.
776 * it in the next loop. 651 */
777 */ 652 if (got.br_startoff > bno) {
778 if ((iflags & BMV_IF_NO_HOLES) && 653 xfs_getbmap_report_hole(ip, bmv, out, bmv_end, bno,
779 map[i].br_startblock == HOLESTARTBLOCK) { 654 got.br_startoff);
780 memset(&out[cur_ext], 0, sizeof(out[cur_ext])); 655 if (xfs_getbmap_full(bmv))
781 continue; 656 break;
782 } 657 }
783 658
784 /* 659 /*
785 * In order to report shared extents accurately, 660 * In order to report shared extents accurately, we report each
786 * we report each distinct shared/unshared part 661 * distinct shared / unshared part of a single bmbt record with
787 * of a single bmbt record using multiple bmap 662 * an individual getbmapx record.
788 * extents. To make that happen, we iterate the 663 */
789 * same map array item multiple times, each 664 bno = got.br_startoff + got.br_blockcount;
790 * time trimming out the subextent that we just 665 rec = got;
791 * reported. 666 do {
792 * 667 error = xfs_getbmap_report_one(ip, bmv, out, bmv_end,
793 * Because of this, we must check the out array 668 &rec);
794 * index (cur_ext) directly against bmv_count-1 669 if (error || xfs_getbmap_full(bmv))
795 * to avoid overflows. 670 goto out_unlock_ilock;
796 */ 671 } while (xfs_getbmap_next_rec(&rec, bno));
797 if (inject_map.br_startblock != NULLFSBLOCK) { 672
798 map[i] = inject_map; 673 if (!xfs_iext_next_extent(ifp, &icur, &got)) {
799 i--; 674 xfs_fileoff_t end = XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
675
676 out[bmv->bmv_entries - 1].bmv_oflags |= BMV_OF_LAST;
677
678 if (whichfork != XFS_ATTR_FORK && bno < end &&
679 !xfs_getbmap_full(bmv)) {
680 xfs_getbmap_report_hole(ip, bmv, out, bmv_end,
681 bno, end);
800 } 682 }
801 bmv->bmv_entries++; 683 break;
802 cur_ext++;
803 } 684 }
804 } while (nmap && bmv->bmv_length && cur_ext < bmv->bmv_count - 1);
805 685
806 out_free_map: 686 if (bno >= first_bno + len)
807 kmem_free(map);
808 out_unlock_ilock:
809 xfs_iunlock(ip, lock);
810 out_unlock_iolock:
811 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
812
813 for (i = 0; i < cur_ext; i++) {
814 /* format results & advance arg */
815 error = formatter(&arg, &out[i]);
816 if (error)
817 break; 687 break;
818 } 688 }
819 689
820 kmem_free(out); 690out_unlock_ilock:
691 xfs_iunlock(ip, lock);
692out_unlock_iolock:
693 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
821 return error; 694 return error;
822} 695}
823 696
@@ -1389,53 +1262,12 @@ out:
1389 1262
1390} 1263}
1391 1264
1392/*
1393 * @next_fsb will keep track of the extent currently undergoing shift.
1394 * @stop_fsb will keep track of the extent at which we have to stop.
1395 * If we are shifting left, we will start with block (offset + len) and
1396 * shift each extent till last extent.
1397 * If we are shifting right, we will start with last extent inside file space
1398 * and continue until we reach the block corresponding to offset.
1399 */
1400static int 1265static int
1401xfs_shift_file_space( 1266xfs_prepare_shift(
1402 struct xfs_inode *ip, 1267 struct xfs_inode *ip,
1403 xfs_off_t offset, 1268 loff_t offset)
1404 xfs_off_t len,
1405 enum shift_direction direction)
1406{ 1269{
1407 int done = 0;
1408 struct xfs_mount *mp = ip->i_mount;
1409 struct xfs_trans *tp;
1410 int error; 1270 int error;
1411 struct xfs_defer_ops dfops;
1412 xfs_fsblock_t first_block;
1413 xfs_fileoff_t stop_fsb;
1414 xfs_fileoff_t next_fsb;
1415 xfs_fileoff_t shift_fsb;
1416 uint resblks;
1417
1418 ASSERT(direction == SHIFT_LEFT || direction == SHIFT_RIGHT);
1419
1420 if (direction == SHIFT_LEFT) {
1421 /*
1422 * Reserve blocks to cover potential extent merges after left
1423 * shift operations.
1424 */
1425 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
1426 next_fsb = XFS_B_TO_FSB(mp, offset + len);
1427 stop_fsb = XFS_B_TO_FSB(mp, VFS_I(ip)->i_size);
1428 } else {
1429 /*
1430 * If right shift, delegate the work of initialization of
1431 * next_fsb to xfs_bmap_shift_extent as it has ilock held.
1432 */
1433 resblks = 0;
1434 next_fsb = NULLFSBLOCK;
1435 stop_fsb = XFS_B_TO_FSB(mp, offset);
1436 }
1437
1438 shift_fsb = XFS_B_TO_FSB(mp, len);
1439 1271
1440 /* 1272 /*
1441 * Trim eofblocks to avoid shifting uninitialized post-eof preallocation 1273 * Trim eofblocks to avoid shifting uninitialized post-eof preallocation
@@ -1451,8 +1283,7 @@ xfs_shift_file_space(
1451 * Writeback and invalidate cache for the remainder of the file as we're 1283 * Writeback and invalidate cache for the remainder of the file as we're
1452 * about to shift down every extent from offset to EOF. 1284 * about to shift down every extent from offset to EOF.
1453 */ 1285 */
1454 error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, 1286 error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, offset, -1);
1455 offset, -1);
1456 if (error) 1287 if (error)
1457 return error; 1288 return error;
1458 error = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, 1289 error = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
@@ -1472,16 +1303,50 @@ xfs_shift_file_space(
1472 return error; 1303 return error;
1473 } 1304 }
1474 1305
1475 /* 1306 return 0;
1476 * The extent shifting code works on extent granularity. So, if 1307}
1477 * stop_fsb is not the starting block of extent, we need to split 1308
1478 * the extent at stop_fsb. 1309/*
1479 */ 1310 * xfs_collapse_file_space()
1480 if (direction == SHIFT_RIGHT) { 1311 * This routine frees disk space and shift extent for the given file.
1481 error = xfs_bmap_split_extent(ip, stop_fsb); 1312 * The first thing we do is to free data blocks in the specified range
1482 if (error) 1313 * by calling xfs_free_file_space(). It would also sync dirty data
1483 return error; 1314 * and invalidate page cache over the region on which collapse range
1484 } 1315 * is working. And Shift extent records to the left to cover a hole.
1316 * RETURNS:
1317 * 0 on success
1318 * errno on error
1319 *
1320 */
1321int
1322xfs_collapse_file_space(
1323 struct xfs_inode *ip,
1324 xfs_off_t offset,
1325 xfs_off_t len)
1326{
1327 struct xfs_mount *mp = ip->i_mount;
1328 struct xfs_trans *tp;
1329 int error;
1330 struct xfs_defer_ops dfops;
1331 xfs_fsblock_t first_block;
1332 xfs_fileoff_t stop_fsb = XFS_B_TO_FSB(mp, VFS_I(ip)->i_size);
1333 xfs_fileoff_t next_fsb = XFS_B_TO_FSB(mp, offset + len);
1334 xfs_fileoff_t shift_fsb = XFS_B_TO_FSB(mp, len);
1335 uint resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
1336 bool done = false;
1337
1338 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
1339 ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL));
1340
1341 trace_xfs_collapse_file_space(ip);
1342
1343 error = xfs_free_file_space(ip, offset, len);
1344 if (error)
1345 return error;
1346
1347 error = xfs_prepare_shift(ip, offset);
1348 if (error)
1349 return error;
1485 1350
1486 while (!error && !done) { 1351 while (!error && !done) {
1487 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, 1352 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0,
@@ -1495,25 +1360,17 @@ xfs_shift_file_space(
1495 XFS_QMOPT_RES_REGBLKS); 1360 XFS_QMOPT_RES_REGBLKS);
1496 if (error) 1361 if (error)
1497 goto out_trans_cancel; 1362 goto out_trans_cancel;
1498
1499 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 1363 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
1500 1364
1501 xfs_defer_init(&dfops, &first_block); 1365 xfs_defer_init(&dfops, &first_block);
1502 1366 error = xfs_bmap_collapse_extents(tp, ip, &next_fsb, shift_fsb,
1503 /* 1367 &done, stop_fsb, &first_block, &dfops);
1504 * We are using the write transaction in which max 2 bmbt
1505 * updates are allowed
1506 */
1507 error = xfs_bmap_shift_extents(tp, ip, &next_fsb, shift_fsb,
1508 &done, stop_fsb, &first_block, &dfops,
1509 direction, XFS_BMAP_MAX_SHIFT_EXTENTS);
1510 if (error) 1368 if (error)
1511 goto out_bmap_cancel; 1369 goto out_bmap_cancel;
1512 1370
1513 error = xfs_defer_finish(&tp, &dfops); 1371 error = xfs_defer_finish(&tp, &dfops);
1514 if (error) 1372 if (error)
1515 goto out_bmap_cancel; 1373 goto out_bmap_cancel;
1516
1517 error = xfs_trans_commit(tp); 1374 error = xfs_trans_commit(tp);
1518 } 1375 }
1519 1376
@@ -1527,36 +1384,6 @@ out_trans_cancel:
1527} 1384}
1528 1385
1529/* 1386/*
1530 * xfs_collapse_file_space()
1531 * This routine frees disk space and shift extent for the given file.
1532 * The first thing we do is to free data blocks in the specified range
1533 * by calling xfs_free_file_space(). It would also sync dirty data
1534 * and invalidate page cache over the region on which collapse range
1535 * is working. And Shift extent records to the left to cover a hole.
1536 * RETURNS:
1537 * 0 on success
1538 * errno on error
1539 *
1540 */
1541int
1542xfs_collapse_file_space(
1543 struct xfs_inode *ip,
1544 xfs_off_t offset,
1545 xfs_off_t len)
1546{
1547 int error;
1548
1549 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
1550 trace_xfs_collapse_file_space(ip);
1551
1552 error = xfs_free_file_space(ip, offset, len);
1553 if (error)
1554 return error;
1555
1556 return xfs_shift_file_space(ip, offset, len, SHIFT_LEFT);
1557}
1558
1559/*
1560 * xfs_insert_file_space() 1387 * xfs_insert_file_space()
1561 * This routine create hole space by shifting extents for the given file. 1388 * This routine create hole space by shifting extents for the given file.
1562 * The first thing we do is to sync dirty data and invalidate page cache 1389 * The first thing we do is to sync dirty data and invalidate page cache
@@ -1574,10 +1401,60 @@ xfs_insert_file_space(
1574 loff_t offset, 1401 loff_t offset,
1575 loff_t len) 1402 loff_t len)
1576{ 1403{
1404 struct xfs_mount *mp = ip->i_mount;
1405 struct xfs_trans *tp;
1406 int error;
1407 struct xfs_defer_ops dfops;
1408 xfs_fsblock_t first_block;
1409 xfs_fileoff_t stop_fsb = XFS_B_TO_FSB(mp, offset);
1410 xfs_fileoff_t next_fsb = NULLFSBLOCK;
1411 xfs_fileoff_t shift_fsb = XFS_B_TO_FSB(mp, len);
1412 bool done = false;
1413
1577 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 1414 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
1415 ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL));
1416
1578 trace_xfs_insert_file_space(ip); 1417 trace_xfs_insert_file_space(ip);
1579 1418
1580 return xfs_shift_file_space(ip, offset, len, SHIFT_RIGHT); 1419 error = xfs_prepare_shift(ip, offset);
1420 if (error)
1421 return error;
1422
1423 /*
1424 * The extent shifting code works on extent granularity. So, if stop_fsb
1425 * is not the starting block of extent, we need to split the extent at
1426 * stop_fsb.
1427 */
1428 error = xfs_bmap_split_extent(ip, stop_fsb);
1429 if (error)
1430 return error;
1431
1432 while (!error && !done) {
1433 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0,
1434 &tp);
1435 if (error)
1436 break;
1437
1438 xfs_ilock(ip, XFS_ILOCK_EXCL);
1439 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
1440 xfs_defer_init(&dfops, &first_block);
1441 error = xfs_bmap_insert_extents(tp, ip, &next_fsb, shift_fsb,
1442 &done, stop_fsb, &first_block, &dfops);
1443 if (error)
1444 goto out_bmap_cancel;
1445
1446 error = xfs_defer_finish(&tp, &dfops);
1447 if (error)
1448 goto out_bmap_cancel;
1449 error = xfs_trans_commit(tp);
1450 }
1451
1452 return error;
1453
1454out_bmap_cancel:
1455 xfs_defer_cancel(&dfops);
1456 xfs_trans_cancel(tp);
1457 return error;
1581} 1458}
1582 1459
1583/* 1460/*
@@ -1832,7 +1709,6 @@ xfs_swap_extent_forks(
1832 xfs_filblks_t aforkblks = 0; 1709 xfs_filblks_t aforkblks = 0;
1833 xfs_filblks_t taforkblks = 0; 1710 xfs_filblks_t taforkblks = 0;
1834 xfs_extnum_t junk; 1711 xfs_extnum_t junk;
1835 xfs_extnum_t nextents;
1836 uint64_t tmp; 1712 uint64_t tmp;
1837 int error; 1713 int error;
1838 1714
@@ -1907,13 +1783,6 @@ xfs_swap_extent_forks(
1907 1783
1908 switch (ip->i_d.di_format) { 1784 switch (ip->i_d.di_format) {
1909 case XFS_DINODE_FMT_EXTENTS: 1785 case XFS_DINODE_FMT_EXTENTS:
1910 /*
1911 * If the extents fit in the inode, fix the pointer. Otherwise
1912 * it's already NULL or pointing to the extent.
1913 */
1914 nextents = xfs_iext_count(&ip->i_df);
1915 if (nextents <= XFS_INLINE_EXTS)
1916 ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
1917 (*src_log_flags) |= XFS_ILOG_DEXT; 1786 (*src_log_flags) |= XFS_ILOG_DEXT;
1918 break; 1787 break;
1919 case XFS_DINODE_FMT_BTREE: 1788 case XFS_DINODE_FMT_BTREE:
@@ -1925,13 +1794,6 @@ xfs_swap_extent_forks(
1925 1794
1926 switch (tip->i_d.di_format) { 1795 switch (tip->i_d.di_format) {
1927 case XFS_DINODE_FMT_EXTENTS: 1796 case XFS_DINODE_FMT_EXTENTS:
1928 /*
1929 * If the extents fit in the inode, fix the pointer. Otherwise
1930 * it's already NULL or pointing to the extent.
1931 */
1932 nextents = xfs_iext_count(&tip->i_df);
1933 if (nextents <= XFS_INLINE_EXTS)
1934 tifp->if_u1.if_extents = tifp->if_u2.if_inline_ext;
1935 (*target_log_flags) |= XFS_ILOG_DEXT; 1797 (*target_log_flags) |= XFS_ILOG_DEXT;
1936 break; 1798 break;
1937 case XFS_DINODE_FMT_BTREE: 1799 case XFS_DINODE_FMT_BTREE:
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index 7d330b3c77c3..4d4ae48bd4f6 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -47,10 +47,14 @@ int xfs_bmap_eof(struct xfs_inode *ip, xfs_fileoff_t endoff,
47int xfs_bmap_punch_delalloc_range(struct xfs_inode *ip, 47int xfs_bmap_punch_delalloc_range(struct xfs_inode *ip,
48 xfs_fileoff_t start_fsb, xfs_fileoff_t length); 48 xfs_fileoff_t start_fsb, xfs_fileoff_t length);
49 49
50/* bmap to userspace formatter - copy to user & advance pointer */ 50struct kgetbmap {
51typedef int (*xfs_bmap_format_t)(void **, struct getbmapx *); 51 __s64 bmv_offset; /* file offset of segment in blocks */
52 __s64 bmv_block; /* starting block (64-bit daddr_t) */
53 __s64 bmv_length; /* length of segment, blocks */
54 __s32 bmv_oflags; /* output flags */
55};
52int xfs_getbmap(struct xfs_inode *ip, struct getbmapx *bmv, 56int xfs_getbmap(struct xfs_inode *ip, struct getbmapx *bmv,
53 xfs_bmap_format_t formatter, void *arg); 57 struct kgetbmap *out);
54 58
55/* functions in xfs_bmap.c that are only needed by xfs_bmap_util.c */ 59/* functions in xfs_bmap.c that are only needed by xfs_bmap_util.c */
56int xfs_bmap_extsize_align(struct xfs_mount *mp, struct xfs_bmbt_irec *gotp, 60int xfs_bmap_extsize_align(struct xfs_mount *mp, struct xfs_bmbt_irec *gotp,
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 2f97c12ca75e..4db6e8d780f6 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -42,6 +42,8 @@
42#include "xfs_mount.h" 42#include "xfs_mount.h"
43#include "xfs_trace.h" 43#include "xfs_trace.h"
44#include "xfs_log.h" 44#include "xfs_log.h"
45#include "xfs_errortag.h"
46#include "xfs_error.h"
45 47
46static kmem_zone_t *xfs_buf_zone; 48static kmem_zone_t *xfs_buf_zone;
47 49
@@ -2129,3 +2131,17 @@ xfs_buf_terminate(void)
2129{ 2131{
2130 kmem_zone_destroy(xfs_buf_zone); 2132 kmem_zone_destroy(xfs_buf_zone);
2131} 2133}
2134
2135void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref)
2136{
2137 /*
2138 * Set the lru reference count to 0 based on the error injection tag.
2139 * This allows userspace to disrupt buffer caching for debug/testing
2140 * purposes.
2141 */
2142 if (XFS_TEST_ERROR(false, bp->b_target->bt_mount,
2143 XFS_ERRTAG_BUF_LRU_REF))
2144 lru_ref = 0;
2145
2146 atomic_set(&bp->b_lru_ref, lru_ref);
2147}
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index bf71507ddb16..f873bb786824 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -352,10 +352,7 @@ extern void xfs_buf_terminate(void);
352#define XFS_BUF_ADDR(bp) ((bp)->b_maps[0].bm_bn) 352#define XFS_BUF_ADDR(bp) ((bp)->b_maps[0].bm_bn)
353#define XFS_BUF_SET_ADDR(bp, bno) ((bp)->b_maps[0].bm_bn = (xfs_daddr_t)(bno)) 353#define XFS_BUF_SET_ADDR(bp, bno) ((bp)->b_maps[0].bm_bn = (xfs_daddr_t)(bno))
354 354
355static inline void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref) 355void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref);
356{
357 atomic_set(&bp->b_lru_ref, lru_ref);
358}
359 356
360static inline int xfs_buf_ispinned(struct xfs_buf *bp) 357static inline int xfs_buf_ispinned(struct xfs_buf *bp)
361{ 358{
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
index ba2638d37031..0c58918bc0ad 100644
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -41,7 +41,7 @@ static unsigned char xfs_dir3_filetype_table[] = {
41 DT_FIFO, DT_SOCK, DT_LNK, DT_WHT, 41 DT_FIFO, DT_SOCK, DT_LNK, DT_WHT,
42}; 42};
43 43
44static unsigned char 44unsigned char
45xfs_dir3_get_dtype( 45xfs_dir3_get_dtype(
46 struct xfs_mount *mp, 46 struct xfs_mount *mp,
47 uint8_t filetype) 47 uint8_t filetype)
@@ -266,7 +266,7 @@ xfs_dir2_leaf_readbuf(
266 xfs_dablk_t next_ra; 266 xfs_dablk_t next_ra;
267 xfs_dablk_t map_off; 267 xfs_dablk_t map_off;
268 xfs_dablk_t last_da; 268 xfs_dablk_t last_da;
269 xfs_extnum_t idx; 269 struct xfs_iext_cursor icur;
270 int ra_want; 270 int ra_want;
271 int error = 0; 271 int error = 0;
272 272
@@ -283,7 +283,7 @@ xfs_dir2_leaf_readbuf(
283 */ 283 */
284 last_da = xfs_dir2_byte_to_da(geo, XFS_DIR2_LEAF_OFFSET); 284 last_da = xfs_dir2_byte_to_da(geo, XFS_DIR2_LEAF_OFFSET);
285 map_off = xfs_dir2_db_to_da(geo, xfs_dir2_byte_to_db(geo, *cur_off)); 285 map_off = xfs_dir2_db_to_da(geo, xfs_dir2_byte_to_db(geo, *cur_off));
286 if (!xfs_iext_lookup_extent(dp, ifp, map_off, &idx, &map)) 286 if (!xfs_iext_lookup_extent(dp, ifp, map_off, &icur, &map))
287 goto out; 287 goto out;
288 if (map.br_startoff >= last_da) 288 if (map.br_startoff >= last_da)
289 goto out; 289 goto out;
@@ -311,7 +311,7 @@ xfs_dir2_leaf_readbuf(
311 if (next_ra >= last_da) 311 if (next_ra >= last_da)
312 goto out_no_ra; 312 goto out_no_ra;
313 if (map.br_blockcount < geo->fsbcount && 313 if (map.br_blockcount < geo->fsbcount &&
314 !xfs_iext_get_extent(ifp, ++idx, &map)) 314 !xfs_iext_next_extent(ifp, &icur, &map))
315 goto out_no_ra; 315 goto out_no_ra;
316 if (map.br_startoff >= last_da) 316 if (map.br_startoff >= last_da)
317 goto out_no_ra; 317 goto out_no_ra;
@@ -334,7 +334,7 @@ xfs_dir2_leaf_readbuf(
334 ra_want -= geo->fsbcount; 334 ra_want -= geo->fsbcount;
335 next_ra += geo->fsbcount; 335 next_ra += geo->fsbcount;
336 } 336 }
337 if (!xfs_iext_get_extent(ifp, ++idx, &map)) { 337 if (!xfs_iext_next_extent(ifp, &icur, &map)) {
338 *ra_blk = last_da; 338 *ra_blk = last_da;
339 break; 339 break;
340 } 340 }
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index cd82429d8df7..d57c2db64e59 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -53,13 +53,6 @@
53 * otherwise by the lowest id first, see xfs_dqlock2. 53 * otherwise by the lowest id first, see xfs_dqlock2.
54 */ 54 */
55 55
56#ifdef DEBUG
57xfs_buftarg_t *xfs_dqerror_target;
58int xfs_do_dqerror;
59int xfs_dqreq_num;
60int xfs_dqerror_mod = 33;
61#endif
62
63struct kmem_zone *xfs_qm_dqtrxzone; 56struct kmem_zone *xfs_qm_dqtrxzone;
64static struct kmem_zone *xfs_qm_dqzone; 57static struct kmem_zone *xfs_qm_dqzone;
65 58
@@ -703,7 +696,7 @@ xfs_dq_get_next_id(
703 xfs_dqid_t next_id = *id + 1; /* simple advance */ 696 xfs_dqid_t next_id = *id + 1; /* simple advance */
704 uint lock_flags; 697 uint lock_flags;
705 struct xfs_bmbt_irec got; 698 struct xfs_bmbt_irec got;
706 xfs_extnum_t idx; 699 struct xfs_iext_cursor cur;
707 xfs_fsblock_t start; 700 xfs_fsblock_t start;
708 int error = 0; 701 int error = 0;
709 702
@@ -727,7 +720,7 @@ xfs_dq_get_next_id(
727 return error; 720 return error;
728 } 721 }
729 722
730 if (xfs_iext_lookup_extent(quotip, &quotip->i_df, start, &idx, &got)) { 723 if (xfs_iext_lookup_extent(quotip, &quotip->i_df, start, &cur, &got)) {
731 /* contiguous chunk, bump startoff for the id calculation */ 724 /* contiguous chunk, bump startoff for the id calculation */
732 if (got.br_startoff < start) 725 if (got.br_startoff < start)
733 got.br_startoff = start; 726 got.br_startoff = start;
@@ -770,15 +763,6 @@ xfs_qm_dqget(
770 return -ESRCH; 763 return -ESRCH;
771 } 764 }
772 765
773#ifdef DEBUG
774 if (xfs_do_dqerror) {
775 if ((xfs_dqerror_target == mp->m_ddev_targp) &&
776 (xfs_dqreq_num++ % xfs_dqerror_mod) == 0) {
777 xfs_debug(mp, "Returning error in dqget");
778 return -EIO;
779 }
780 }
781
782 ASSERT(type == XFS_DQ_USER || 766 ASSERT(type == XFS_DQ_USER ||
783 type == XFS_DQ_PROJ || 767 type == XFS_DQ_PROJ ||
784 type == XFS_DQ_GROUP); 768 type == XFS_DQ_GROUP);
@@ -786,7 +770,6 @@ xfs_qm_dqget(
786 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 770 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
787 ASSERT(xfs_inode_dquot(ip, type) == NULL); 771 ASSERT(xfs_inode_dquot(ip, type) == NULL);
788 } 772 }
789#endif
790 773
791restart: 774restart:
792 mutex_lock(&qi->qi_tree_lock); 775 mutex_lock(&qi->qi_tree_lock);
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index eaf86f55b7f2..4c9f35d983b2 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -21,6 +21,7 @@
21#include "xfs_log_format.h" 21#include "xfs_log_format.h"
22#include "xfs_trans_resv.h" 22#include "xfs_trans_resv.h"
23#include "xfs_mount.h" 23#include "xfs_mount.h"
24#include "xfs_errortag.h"
24#include "xfs_error.h" 25#include "xfs_error.h"
25#include "xfs_sysfs.h" 26#include "xfs_sysfs.h"
26 27
@@ -58,6 +59,7 @@ static unsigned int xfs_errortag_random_default[] = {
58 XFS_RANDOM_DROP_WRITES, 59 XFS_RANDOM_DROP_WRITES,
59 XFS_RANDOM_LOG_BAD_CRC, 60 XFS_RANDOM_LOG_BAD_CRC,
60 XFS_RANDOM_LOG_ITEM_PIN, 61 XFS_RANDOM_LOG_ITEM_PIN,
62 XFS_RANDOM_BUF_LRU_REF,
61}; 63};
62 64
63struct xfs_errortag_attr { 65struct xfs_errortag_attr {
@@ -163,6 +165,7 @@ XFS_ERRORTAG_ATTR_RW(ag_resv_critical, XFS_ERRTAG_AG_RESV_CRITICAL);
163XFS_ERRORTAG_ATTR_RW(drop_writes, XFS_ERRTAG_DROP_WRITES); 165XFS_ERRORTAG_ATTR_RW(drop_writes, XFS_ERRTAG_DROP_WRITES);
164XFS_ERRORTAG_ATTR_RW(log_bad_crc, XFS_ERRTAG_LOG_BAD_CRC); 166XFS_ERRORTAG_ATTR_RW(log_bad_crc, XFS_ERRTAG_LOG_BAD_CRC);
165XFS_ERRORTAG_ATTR_RW(log_item_pin, XFS_ERRTAG_LOG_ITEM_PIN); 167XFS_ERRORTAG_ATTR_RW(log_item_pin, XFS_ERRTAG_LOG_ITEM_PIN);
168XFS_ERRORTAG_ATTR_RW(buf_lru_ref, XFS_ERRTAG_BUF_LRU_REF);
166 169
167static struct attribute *xfs_errortag_attrs[] = { 170static struct attribute *xfs_errortag_attrs[] = {
168 XFS_ERRORTAG_ATTR_LIST(noerror), 171 XFS_ERRORTAG_ATTR_LIST(noerror),
@@ -196,10 +199,11 @@ static struct attribute *xfs_errortag_attrs[] = {
196 XFS_ERRORTAG_ATTR_LIST(drop_writes), 199 XFS_ERRORTAG_ATTR_LIST(drop_writes),
197 XFS_ERRORTAG_ATTR_LIST(log_bad_crc), 200 XFS_ERRORTAG_ATTR_LIST(log_bad_crc),
198 XFS_ERRORTAG_ATTR_LIST(log_item_pin), 201 XFS_ERRORTAG_ATTR_LIST(log_item_pin),
202 XFS_ERRORTAG_ATTR_LIST(buf_lru_ref),
199 NULL, 203 NULL,
200}; 204};
201 205
202struct kobj_type xfs_errortag_ktype = { 206static struct kobj_type xfs_errortag_ktype = {
203 .release = xfs_sysfs_release, 207 .release = xfs_sysfs_release,
204 .sysfs_ops = &xfs_errortag_sysfs_ops, 208 .sysfs_ops = &xfs_errortag_sysfs_ops,
205 .default_attrs = xfs_errortag_attrs, 209 .default_attrs = xfs_errortag_attrs,
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index 7c4bef3bddb7..ea816c1bf8db 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -63,87 +63,6 @@ extern void xfs_verifier_error(struct xfs_buf *bp);
63 } \ 63 } \
64 } 64 }
65 65
66/*
67 * error injection tags - the labels can be anything you want
68 * but each tag should have its own unique number
69 */
70
71#define XFS_ERRTAG_NOERROR 0
72#define XFS_ERRTAG_IFLUSH_1 1
73#define XFS_ERRTAG_IFLUSH_2 2
74#define XFS_ERRTAG_IFLUSH_3 3
75#define XFS_ERRTAG_IFLUSH_4 4
76#define XFS_ERRTAG_IFLUSH_5 5
77#define XFS_ERRTAG_IFLUSH_6 6
78#define XFS_ERRTAG_DA_READ_BUF 7
79#define XFS_ERRTAG_BTREE_CHECK_LBLOCK 8
80#define XFS_ERRTAG_BTREE_CHECK_SBLOCK 9
81#define XFS_ERRTAG_ALLOC_READ_AGF 10
82#define XFS_ERRTAG_IALLOC_READ_AGI 11
83#define XFS_ERRTAG_ITOBP_INOTOBP 12
84#define XFS_ERRTAG_IUNLINK 13
85#define XFS_ERRTAG_IUNLINK_REMOVE 14
86#define XFS_ERRTAG_DIR_INO_VALIDATE 15
87#define XFS_ERRTAG_BULKSTAT_READ_CHUNK 16
88#define XFS_ERRTAG_IODONE_IOERR 17
89#define XFS_ERRTAG_STRATREAD_IOERR 18
90#define XFS_ERRTAG_STRATCMPL_IOERR 19
91#define XFS_ERRTAG_DIOWRITE_IOERR 20
92#define XFS_ERRTAG_BMAPIFORMAT 21
93#define XFS_ERRTAG_FREE_EXTENT 22
94#define XFS_ERRTAG_RMAP_FINISH_ONE 23
95#define XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE 24
96#define XFS_ERRTAG_REFCOUNT_FINISH_ONE 25
97#define XFS_ERRTAG_BMAP_FINISH_ONE 26
98#define XFS_ERRTAG_AG_RESV_CRITICAL 27
99/*
100 * DEBUG mode instrumentation to test and/or trigger delayed allocation
101 * block killing in the event of failed writes. When enabled, all
102 * buffered writes are silenty dropped and handled as if they failed.
103 * All delalloc blocks in the range of the write (including pre-existing
104 * delalloc blocks!) are tossed as part of the write failure error
105 * handling sequence.
106 */
107#define XFS_ERRTAG_DROP_WRITES 28
108#define XFS_ERRTAG_LOG_BAD_CRC 29
109#define XFS_ERRTAG_LOG_ITEM_PIN 30
110#define XFS_ERRTAG_MAX 31
111
112/*
113 * Random factors for above tags, 1 means always, 2 means 1/2 time, etc.
114 */
115#define XFS_RANDOM_DEFAULT 100
116#define XFS_RANDOM_IFLUSH_1 XFS_RANDOM_DEFAULT
117#define XFS_RANDOM_IFLUSH_2 XFS_RANDOM_DEFAULT
118#define XFS_RANDOM_IFLUSH_3 XFS_RANDOM_DEFAULT
119#define XFS_RANDOM_IFLUSH_4 XFS_RANDOM_DEFAULT
120#define XFS_RANDOM_IFLUSH_5 XFS_RANDOM_DEFAULT
121#define XFS_RANDOM_IFLUSH_6 XFS_RANDOM_DEFAULT
122#define XFS_RANDOM_DA_READ_BUF XFS_RANDOM_DEFAULT
123#define XFS_RANDOM_BTREE_CHECK_LBLOCK (XFS_RANDOM_DEFAULT/4)
124#define XFS_RANDOM_BTREE_CHECK_SBLOCK XFS_RANDOM_DEFAULT
125#define XFS_RANDOM_ALLOC_READ_AGF XFS_RANDOM_DEFAULT
126#define XFS_RANDOM_IALLOC_READ_AGI XFS_RANDOM_DEFAULT
127#define XFS_RANDOM_ITOBP_INOTOBP XFS_RANDOM_DEFAULT
128#define XFS_RANDOM_IUNLINK XFS_RANDOM_DEFAULT
129#define XFS_RANDOM_IUNLINK_REMOVE XFS_RANDOM_DEFAULT
130#define XFS_RANDOM_DIR_INO_VALIDATE XFS_RANDOM_DEFAULT
131#define XFS_RANDOM_BULKSTAT_READ_CHUNK XFS_RANDOM_DEFAULT
132#define XFS_RANDOM_IODONE_IOERR (XFS_RANDOM_DEFAULT/10)
133#define XFS_RANDOM_STRATREAD_IOERR (XFS_RANDOM_DEFAULT/10)
134#define XFS_RANDOM_STRATCMPL_IOERR (XFS_RANDOM_DEFAULT/10)
135#define XFS_RANDOM_DIOWRITE_IOERR (XFS_RANDOM_DEFAULT/10)
136#define XFS_RANDOM_BMAPIFORMAT XFS_RANDOM_DEFAULT
137#define XFS_RANDOM_FREE_EXTENT 1
138#define XFS_RANDOM_RMAP_FINISH_ONE 1
139#define XFS_RANDOM_REFCOUNT_CONTINUE_UPDATE 1
140#define XFS_RANDOM_REFCOUNT_FINISH_ONE 1
141#define XFS_RANDOM_BMAP_FINISH_ONE 1
142#define XFS_RANDOM_AG_RESV_CRITICAL 4
143#define XFS_RANDOM_DROP_WRITES 1
144#define XFS_RANDOM_LOG_BAD_CRC 1
145#define XFS_RANDOM_LOG_ITEM_PIN 1
146
147#ifdef DEBUG 66#ifdef DEBUG
148extern int xfs_errortag_init(struct xfs_mount *mp); 67extern int xfs_errortag_init(struct xfs_mount *mp);
149extern void xfs_errortag_del(struct xfs_mount *mp); 68extern void xfs_errortag_del(struct xfs_mount *mp);
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 6526ef0e2a23..18146873a8b3 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -984,7 +984,7 @@ xfs_file_readdir(
984 * point we can change the ->readdir prototype to include the 984 * point we can change the ->readdir prototype to include the
985 * buffer size. For now we use the current glibc buffer size. 985 * buffer size. For now we use the current glibc buffer size.
986 */ 986 */
987 bufsize = (size_t)min_t(loff_t, 32768, ip->i_d.di_size); 987 bufsize = (size_t)min_t(loff_t, XFS_READDIR_BUFSIZE, ip->i_d.di_size);
988 988
989 return xfs_readdir(NULL, ip, ctx, bufsize); 989 return xfs_readdir(NULL, ip, ctx, bufsize);
990} 990}
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 34227115a5d6..43005fbe8b1e 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -610,7 +610,7 @@ again:
610 } else { 610 } else {
611 rcu_read_unlock(); 611 rcu_read_unlock();
612 if (flags & XFS_IGET_INCORE) { 612 if (flags & XFS_IGET_INCORE) {
613 error = -ENOENT; 613 error = -ENODATA;
614 goto out_error_or_again; 614 goto out_error_or_again;
615 } 615 }
616 XFS_STATS_INC(mp, xs_ig_missed); 616 XFS_STATS_INC(mp, xs_ig_missed);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 4ec5b7f45401..d8226f7a5dde 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -39,6 +39,7 @@
39#include "xfs_ialloc.h" 39#include "xfs_ialloc.h"
40#include "xfs_bmap.h" 40#include "xfs_bmap.h"
41#include "xfs_bmap_util.h" 41#include "xfs_bmap_util.h"
42#include "xfs_errortag.h"
42#include "xfs_error.h" 43#include "xfs_error.h"
43#include "xfs_quota.h" 44#include "xfs_quota.h"
44#include "xfs_filestream.h" 45#include "xfs_filestream.h"
@@ -384,14 +385,6 @@ xfs_isilocked(
384} 385}
385#endif 386#endif
386 387
387#ifdef DEBUG
388int xfs_locked_n;
389int xfs_small_retries;
390int xfs_middle_retries;
391int xfs_lots_retries;
392int xfs_lock_delays;
393#endif
394
395/* 388/*
396 * xfs_lockdep_subclass_ok() is only used in an ASSERT, so is only called when 389 * xfs_lockdep_subclass_ok() is only used in an ASSERT, so is only called when
397 * DEBUG or XFS_WARN is set. And MAX_LOCKDEP_SUBCLASSES is then only defined 390 * DEBUG or XFS_WARN is set. And MAX_LOCKDEP_SUBCLASSES is then only defined
@@ -544,24 +537,11 @@ again:
544 537
545 if ((attempts % 5) == 0) { 538 if ((attempts % 5) == 0) {
546 delay(1); /* Don't just spin the CPU */ 539 delay(1); /* Don't just spin the CPU */
547#ifdef DEBUG
548 xfs_lock_delays++;
549#endif
550 } 540 }
551 i = 0; 541 i = 0;
552 try_lock = 0; 542 try_lock = 0;
553 goto again; 543 goto again;
554 } 544 }
555
556#ifdef DEBUG
557 if (attempts) {
558 if (attempts < 5) xfs_small_retries++;
559 else if (attempts < 100) xfs_middle_retries++;
560 else xfs_lots_retries++;
561 } else {
562 xfs_locked_n++;
563 }
564#endif
565} 545}
566 546
567/* 547/*
@@ -767,7 +747,7 @@ xfs_ialloc(
767 xfs_inode_t *pip, 747 xfs_inode_t *pip,
768 umode_t mode, 748 umode_t mode,
769 xfs_nlink_t nlink, 749 xfs_nlink_t nlink,
770 xfs_dev_t rdev, 750 dev_t rdev,
771 prid_t prid, 751 prid_t prid,
772 int okalloc, 752 int okalloc,
773 xfs_buf_t **ialloc_context, 753 xfs_buf_t **ialloc_context,
@@ -819,6 +799,7 @@ xfs_ialloc(
819 set_nlink(inode, nlink); 799 set_nlink(inode, nlink);
820 ip->i_d.di_uid = xfs_kuid_to_uid(current_fsuid()); 800 ip->i_d.di_uid = xfs_kuid_to_uid(current_fsuid());
821 ip->i_d.di_gid = xfs_kgid_to_gid(current_fsgid()); 801 ip->i_d.di_gid = xfs_kgid_to_gid(current_fsgid());
802 inode->i_rdev = rdev;
822 xfs_set_projid(ip, prid); 803 xfs_set_projid(ip, prid);
823 804
824 if (pip && XFS_INHERIT_GID(pip)) { 805 if (pip && XFS_INHERIT_GID(pip)) {
@@ -867,7 +848,6 @@ xfs_ialloc(
867 case S_IFBLK: 848 case S_IFBLK:
868 case S_IFSOCK: 849 case S_IFSOCK:
869 ip->i_d.di_format = XFS_DINODE_FMT_DEV; 850 ip->i_d.di_format = XFS_DINODE_FMT_DEV;
870 ip->i_df.if_u2.if_rdev = rdev;
871 ip->i_df.if_flags = 0; 851 ip->i_df.if_flags = 0;
872 flags |= XFS_ILOG_DEV; 852 flags |= XFS_ILOG_DEV;
873 break; 853 break;
@@ -933,7 +913,7 @@ xfs_ialloc(
933 ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; 913 ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
934 ip->i_df.if_flags = XFS_IFEXTENTS; 914 ip->i_df.if_flags = XFS_IFEXTENTS;
935 ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0; 915 ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0;
936 ip->i_df.if_u1.if_extents = NULL; 916 ip->i_df.if_u1.if_root = NULL;
937 break; 917 break;
938 default: 918 default:
939 ASSERT(0); 919 ASSERT(0);
@@ -975,7 +955,7 @@ xfs_dir_ialloc(
975 the inode. */ 955 the inode. */
976 umode_t mode, 956 umode_t mode,
977 xfs_nlink_t nlink, 957 xfs_nlink_t nlink,
978 xfs_dev_t rdev, 958 dev_t rdev,
979 prid_t prid, /* project id */ 959 prid_t prid, /* project id */
980 int okalloc, /* ok to allocate new space */ 960 int okalloc, /* ok to allocate new space */
981 xfs_inode_t **ipp, /* pointer to inode; it will be 961 xfs_inode_t **ipp, /* pointer to inode; it will be
@@ -1147,7 +1127,7 @@ xfs_create(
1147 xfs_inode_t *dp, 1127 xfs_inode_t *dp,
1148 struct xfs_name *name, 1128 struct xfs_name *name,
1149 umode_t mode, 1129 umode_t mode,
1150 xfs_dev_t rdev, 1130 dev_t rdev,
1151 xfs_inode_t **ipp) 1131 xfs_inode_t **ipp)
1152{ 1132{
1153 int is_dir = S_ISDIR(mode); 1133 int is_dir = S_ISDIR(mode);
@@ -1183,7 +1163,6 @@ xfs_create(
1183 return error; 1163 return error;
1184 1164
1185 if (is_dir) { 1165 if (is_dir) {
1186 rdev = 0;
1187 resblks = XFS_MKDIR_SPACE_RES(mp, name->len); 1166 resblks = XFS_MKDIR_SPACE_RES(mp, name->len);
1188 tres = &M_RES(mp)->tr_mkdir; 1167 tres = &M_RES(mp)->tr_mkdir;
1189 } else { 1168 } else {
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 0ee453de239a..cc13c3763721 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -391,7 +391,7 @@ void xfs_inactive(struct xfs_inode *ip);
391int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name, 391int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,
392 struct xfs_inode **ipp, struct xfs_name *ci_name); 392 struct xfs_inode **ipp, struct xfs_name *ci_name);
393int xfs_create(struct xfs_inode *dp, struct xfs_name *name, 393int xfs_create(struct xfs_inode *dp, struct xfs_name *name,
394 umode_t mode, xfs_dev_t rdev, struct xfs_inode **ipp); 394 umode_t mode, dev_t rdev, struct xfs_inode **ipp);
395int xfs_create_tmpfile(struct xfs_inode *dp, struct dentry *dentry, 395int xfs_create_tmpfile(struct xfs_inode *dp, struct dentry *dentry,
396 umode_t mode, struct xfs_inode **ipp); 396 umode_t mode, struct xfs_inode **ipp);
397int xfs_remove(struct xfs_inode *dp, struct xfs_name *name, 397int xfs_remove(struct xfs_inode *dp, struct xfs_name *name,
@@ -428,7 +428,7 @@ xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip);
428xfs_extlen_t xfs_get_cowextsz_hint(struct xfs_inode *ip); 428xfs_extlen_t xfs_get_cowextsz_hint(struct xfs_inode *ip);
429 429
430int xfs_dir_ialloc(struct xfs_trans **, struct xfs_inode *, umode_t, 430int xfs_dir_ialloc(struct xfs_trans **, struct xfs_inode *, umode_t,
431 xfs_nlink_t, xfs_dev_t, prid_t, int, 431 xfs_nlink_t, dev_t, prid_t, int,
432 struct xfs_inode **, int *); 432 struct xfs_inode **, int *);
433 433
434/* from xfs_file.c */ 434/* from xfs_file.c */
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 9bbc2d7cc8cb..6ee5c3bf19ad 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -72,7 +72,6 @@ xfs_inode_item_data_fork_size(
72 break; 72 break;
73 73
74 case XFS_DINODE_FMT_DEV: 74 case XFS_DINODE_FMT_DEV:
75 case XFS_DINODE_FMT_UUID:
76 break; 75 break;
77 default: 76 default:
78 ASSERT(0); 77 ASSERT(0);
@@ -156,15 +155,13 @@ xfs_inode_item_format_data_fork(
156 switch (ip->i_d.di_format) { 155 switch (ip->i_d.di_format) {
157 case XFS_DINODE_FMT_EXTENTS: 156 case XFS_DINODE_FMT_EXTENTS:
158 iip->ili_fields &= 157 iip->ili_fields &=
159 ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | 158 ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | XFS_ILOG_DEV);
160 XFS_ILOG_DEV | XFS_ILOG_UUID);
161 159
162 if ((iip->ili_fields & XFS_ILOG_DEXT) && 160 if ((iip->ili_fields & XFS_ILOG_DEXT) &&
163 ip->i_d.di_nextents > 0 && 161 ip->i_d.di_nextents > 0 &&
164 ip->i_df.if_bytes > 0) { 162 ip->i_df.if_bytes > 0) {
165 struct xfs_bmbt_rec *p; 163 struct xfs_bmbt_rec *p;
166 164
167 ASSERT(ip->i_df.if_u1.if_extents != NULL);
168 ASSERT(xfs_iext_count(&ip->i_df) > 0); 165 ASSERT(xfs_iext_count(&ip->i_df) > 0);
169 166
170 p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IEXT); 167 p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IEXT);
@@ -181,8 +178,7 @@ xfs_inode_item_format_data_fork(
181 break; 178 break;
182 case XFS_DINODE_FMT_BTREE: 179 case XFS_DINODE_FMT_BTREE:
183 iip->ili_fields &= 180 iip->ili_fields &=
184 ~(XFS_ILOG_DDATA | XFS_ILOG_DEXT | 181 ~(XFS_ILOG_DDATA | XFS_ILOG_DEXT | XFS_ILOG_DEV);
185 XFS_ILOG_DEV | XFS_ILOG_UUID);
186 182
187 if ((iip->ili_fields & XFS_ILOG_DBROOT) && 183 if ((iip->ili_fields & XFS_ILOG_DBROOT) &&
188 ip->i_df.if_broot_bytes > 0) { 184 ip->i_df.if_broot_bytes > 0) {
@@ -200,8 +196,7 @@ xfs_inode_item_format_data_fork(
200 break; 196 break;
201 case XFS_DINODE_FMT_LOCAL: 197 case XFS_DINODE_FMT_LOCAL:
202 iip->ili_fields &= 198 iip->ili_fields &=
203 ~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT | 199 ~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT | XFS_ILOG_DEV);
204 XFS_ILOG_DEV | XFS_ILOG_UUID);
205 if ((iip->ili_fields & XFS_ILOG_DDATA) && 200 if ((iip->ili_fields & XFS_ILOG_DDATA) &&
206 ip->i_df.if_bytes > 0) { 201 ip->i_df.if_bytes > 0) {
207 /* 202 /*
@@ -224,17 +219,9 @@ xfs_inode_item_format_data_fork(
224 break; 219 break;
225 case XFS_DINODE_FMT_DEV: 220 case XFS_DINODE_FMT_DEV:
226 iip->ili_fields &= 221 iip->ili_fields &=
227 ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | 222 ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | XFS_ILOG_DEXT);
228 XFS_ILOG_DEXT | XFS_ILOG_UUID);
229 if (iip->ili_fields & XFS_ILOG_DEV) 223 if (iip->ili_fields & XFS_ILOG_DEV)
230 ilf->ilf_u.ilfu_rdev = ip->i_df.if_u2.if_rdev; 224 ilf->ilf_u.ilfu_rdev = sysv_encode_dev(VFS_I(ip)->i_rdev);
231 break;
232 case XFS_DINODE_FMT_UUID:
233 iip->ili_fields &=
234 ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
235 XFS_ILOG_DEXT | XFS_ILOG_DEV);
236 if (iip->ili_fields & XFS_ILOG_UUID)
237 ilf->ilf_u.ilfu_uuid = ip->i_df.if_u2.if_uuid;
238 break; 225 break;
239 default: 226 default:
240 ASSERT(0); 227 ASSERT(0);
@@ -264,7 +251,6 @@ xfs_inode_item_format_attr_fork(
264 251
265 ASSERT(xfs_iext_count(ip->i_afp) == 252 ASSERT(xfs_iext_count(ip->i_afp) ==
266 ip->i_d.di_anextents); 253 ip->i_d.di_anextents);
267 ASSERT(ip->i_afp->if_u1.if_extents != NULL);
268 254
269 p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_EXT); 255 p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_EXT);
270 data_bytes = xfs_iextents_copy(ip, p, XFS_ATTR_FORK); 256 data_bytes = xfs_iextents_copy(ip, p, XFS_ATTR_FORK);
@@ -441,7 +427,7 @@ xfs_inode_item_format(
441 ilf->ilf_dsize = 0; 427 ilf->ilf_dsize = 0;
442 ilf->ilf_asize = 0; 428 ilf->ilf_asize = 0;
443 ilf->ilf_pad = 0; 429 ilf->ilf_pad = 0;
444 uuid_copy(&ilf->ilf_u.ilfu_uuid, &uuid_null); 430 memset(&ilf->ilf_u, 0, sizeof(ilf->ilf_u));
445 431
446 xlog_finish_iovec(lv, vecp, sizeof(*ilf)); 432 xlog_finish_iovec(lv, vecp, sizeof(*ilf));
447 433
@@ -892,8 +878,7 @@ xfs_inode_item_format_convert(
892 in_f->ilf_asize = in_f32->ilf_asize; 878 in_f->ilf_asize = in_f32->ilf_asize;
893 in_f->ilf_dsize = in_f32->ilf_dsize; 879 in_f->ilf_dsize = in_f32->ilf_dsize;
894 in_f->ilf_ino = in_f32->ilf_ino; 880 in_f->ilf_ino = in_f32->ilf_ino;
895 /* copy biggest field of ilf_u */ 881 memcpy(&in_f->ilf_u, &in_f32->ilf_u, sizeof(in_f->ilf_u));
896 uuid_copy(&in_f->ilf_u.ilfu_uuid, &in_f32->ilf_u.ilfu_uuid);
897 in_f->ilf_blkno = in_f32->ilf_blkno; 882 in_f->ilf_blkno = in_f32->ilf_blkno;
898 in_f->ilf_len = in_f32->ilf_len; 883 in_f->ilf_len = in_f32->ilf_len;
899 in_f->ilf_boffset = in_f32->ilf_boffset; 884 in_f->ilf_boffset = in_f32->ilf_boffset;
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index 4c7722e325b3..b72373a33cd9 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -48,7 +48,7 @@ extern void xfs_iflush_done(struct xfs_buf *, struct xfs_log_item *);
48extern void xfs_istale_done(struct xfs_buf *, struct xfs_log_item *); 48extern void xfs_istale_done(struct xfs_buf *, struct xfs_log_item *);
49extern void xfs_iflush_abort(struct xfs_inode *, bool); 49extern void xfs_iflush_abort(struct xfs_inode *, bool);
50extern int xfs_inode_item_format_convert(xfs_log_iovec_t *, 50extern int xfs_inode_item_format_convert(xfs_log_iovec_t *,
51 xfs_inode_log_format_t *); 51 struct xfs_inode_log_format *);
52 52
53extern struct kmem_zone *xfs_ili_zone; 53extern struct kmem_zone *xfs_ili_zone;
54 54
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index aa75389be8cf..20dc65fef6a4 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -44,6 +44,7 @@
44#include "xfs_btree.h" 44#include "xfs_btree.h"
45#include <linux/fsmap.h> 45#include <linux/fsmap.h>
46#include "xfs_fsmap.h" 46#include "xfs_fsmap.h"
47#include "scrub/xfs_scrub.h"
47 48
48#include <linux/capability.h> 49#include <linux/capability.h>
49#include <linux/cred.h> 50#include <linux/cred.h>
@@ -310,8 +311,8 @@ xfs_readlink_by_handle(
310int 311int
311xfs_set_dmattrs( 312xfs_set_dmattrs(
312 xfs_inode_t *ip, 313 xfs_inode_t *ip,
313 u_int evmask, 314 uint evmask,
314 u_int16_t state) 315 uint16_t state)
315{ 316{
316 xfs_mount_t *mp = ip->i_mount; 317 xfs_mount_t *mp = ip->i_mount;
317 xfs_trans_t *tp; 318 xfs_trans_t *tp;
@@ -1201,6 +1202,8 @@ out_unlock:
1201 * 8. for non-realtime files, the extent size hint must be limited 1202 * 8. for non-realtime files, the extent size hint must be limited
1202 * to half the AG size to avoid alignment extending the extent beyond the 1203 * to half the AG size to avoid alignment extending the extent beyond the
1203 * limits of the AG. 1204 * limits of the AG.
1205 *
1206 * Please keep this function in sync with xfs_scrub_inode_extsize.
1204 */ 1207 */
1205static int 1208static int
1206xfs_ioctl_setattr_check_extsize( 1209xfs_ioctl_setattr_check_extsize(
@@ -1257,6 +1260,8 @@ xfs_ioctl_setattr_check_extsize(
1257 * 5. Extent size must be a multiple of the appropriate block size. 1260 * 5. Extent size must be a multiple of the appropriate block size.
1258 * 6. The extent size hint must be limited to half the AG size to avoid 1261 * 6. The extent size hint must be limited to half the AG size to avoid
1259 * alignment extending the extent beyond the limits of the AG. 1262 * alignment extending the extent beyond the limits of the AG.
1263 *
1264 * Please keep this function in sync with xfs_scrub_inode_cowextsize.
1260 */ 1265 */
1261static int 1266static int
1262xfs_ioctl_setattr_check_cowextsize( 1267xfs_ioctl_setattr_check_cowextsize(
@@ -1540,17 +1545,26 @@ out_drop_write:
1540 return error; 1545 return error;
1541} 1546}
1542 1547
1543STATIC int 1548static bool
1544xfs_getbmap_format(void **ap, struct getbmapx *bmv) 1549xfs_getbmap_format(
1550 struct kgetbmap *p,
1551 struct getbmapx __user *u,
1552 size_t recsize)
1545{ 1553{
1546 struct getbmap __user *base = (struct getbmap __user *)*ap; 1554 if (put_user(p->bmv_offset, &u->bmv_offset) ||
1547 1555 put_user(p->bmv_block, &u->bmv_block) ||
1548 /* copy only getbmap portion (not getbmapx) */ 1556 put_user(p->bmv_length, &u->bmv_length) ||
1549 if (copy_to_user(base, bmv, sizeof(struct getbmap))) 1557 put_user(0, &u->bmv_count) ||
1550 return -EFAULT; 1558 put_user(0, &u->bmv_entries))
1551 1559 return false;
1552 *ap += sizeof(struct getbmap); 1560 if (recsize < sizeof(struct getbmapx))
1553 return 0; 1561 return true;
1562 if (put_user(0, &u->bmv_iflags) ||
1563 put_user(p->bmv_oflags, &u->bmv_oflags) ||
1564 put_user(0, &u->bmv_unused1) ||
1565 put_user(0, &u->bmv_unused2))
1566 return false;
1567 return true;
1554} 1568}
1555 1569
1556STATIC int 1570STATIC int
@@ -1560,68 +1574,57 @@ xfs_ioc_getbmap(
1560 void __user *arg) 1574 void __user *arg)
1561{ 1575{
1562 struct getbmapx bmx = { 0 }; 1576 struct getbmapx bmx = { 0 };
1563 int error; 1577 struct kgetbmap *buf;
1564 1578 size_t recsize;
1565 /* struct getbmap is a strict subset of struct getbmapx. */ 1579 int error, i;
1566 if (copy_from_user(&bmx, arg, offsetof(struct getbmapx, bmv_iflags)))
1567 return -EFAULT;
1568 1580
1569 if (bmx.bmv_count < 2) 1581 switch (cmd) {
1582 case XFS_IOC_GETBMAPA:
1583 bmx.bmv_iflags = BMV_IF_ATTRFORK;
1584 /*FALLTHRU*/
1585 case XFS_IOC_GETBMAP:
1586 if (file->f_mode & FMODE_NOCMTIME)
1587 bmx.bmv_iflags |= BMV_IF_NO_DMAPI_READ;
1588 /* struct getbmap is a strict subset of struct getbmapx. */
1589 recsize = sizeof(struct getbmap);
1590 break;
1591 case XFS_IOC_GETBMAPX:
1592 recsize = sizeof(struct getbmapx);
1593 break;
1594 default:
1570 return -EINVAL; 1595 return -EINVAL;
1596 }
1571 1597
1572 bmx.bmv_iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0); 1598 if (copy_from_user(&bmx, arg, recsize))
1573 if (file->f_mode & FMODE_NOCMTIME)
1574 bmx.bmv_iflags |= BMV_IF_NO_DMAPI_READ;
1575
1576 error = xfs_getbmap(XFS_I(file_inode(file)), &bmx, xfs_getbmap_format,
1577 (__force struct getbmap *)arg+1);
1578 if (error)
1579 return error;
1580
1581 /* copy back header - only size of getbmap */
1582 if (copy_to_user(arg, &bmx, sizeof(struct getbmap)))
1583 return -EFAULT;
1584 return 0;
1585}
1586
1587STATIC int
1588xfs_getbmapx_format(void **ap, struct getbmapx *bmv)
1589{
1590 struct getbmapx __user *base = (struct getbmapx __user *)*ap;
1591
1592 if (copy_to_user(base, bmv, sizeof(struct getbmapx)))
1593 return -EFAULT;
1594
1595 *ap += sizeof(struct getbmapx);
1596 return 0;
1597}
1598
1599STATIC int
1600xfs_ioc_getbmapx(
1601 struct xfs_inode *ip,
1602 void __user *arg)
1603{
1604 struct getbmapx bmx;
1605 int error;
1606
1607 if (copy_from_user(&bmx, arg, sizeof(bmx)))
1608 return -EFAULT; 1599 return -EFAULT;
1609 1600
1610 if (bmx.bmv_count < 2) 1601 if (bmx.bmv_count < 2)
1611 return -EINVAL; 1602 return -EINVAL;
1603 if (bmx.bmv_count > ULONG_MAX / recsize)
1604 return -ENOMEM;
1612 1605
1613 if (bmx.bmv_iflags & (~BMV_IF_VALID)) 1606 buf = kmem_zalloc_large(bmx.bmv_count * sizeof(*buf), 0);
1614 return -EINVAL; 1607 if (!buf)
1608 return -ENOMEM;
1615 1609
1616 error = xfs_getbmap(ip, &bmx, xfs_getbmapx_format, 1610 error = xfs_getbmap(XFS_I(file_inode(file)), &bmx, buf);
1617 (__force struct getbmapx *)arg+1);
1618 if (error) 1611 if (error)
1619 return error; 1612 goto out_free_buf;
1620 1613
1621 /* copy back header */ 1614 error = -EFAULT;
1622 if (copy_to_user(arg, &bmx, sizeof(struct getbmapx))) 1615 if (copy_to_user(arg, &bmx, recsize))
1623 return -EFAULT; 1616 goto out_free_buf;
1617 arg += recsize;
1618
1619 for (i = 0; i < bmx.bmv_entries; i++) {
1620 if (!xfs_getbmap_format(buf + i, arg, recsize))
1621 goto out_free_buf;
1622 arg += recsize;
1623 }
1624 1624
1625 error = 0;
1626out_free_buf:
1627 kmem_free(buf);
1625 return 0; 1628 return 0;
1626} 1629}
1627 1630
@@ -1703,6 +1706,30 @@ xfs_ioc_getfsmap(
1703 return 0; 1706 return 0;
1704} 1707}
1705 1708
1709STATIC int
1710xfs_ioc_scrub_metadata(
1711 struct xfs_inode *ip,
1712 void __user *arg)
1713{
1714 struct xfs_scrub_metadata scrub;
1715 int error;
1716
1717 if (!capable(CAP_SYS_ADMIN))
1718 return -EPERM;
1719
1720 if (copy_from_user(&scrub, arg, sizeof(scrub)))
1721 return -EFAULT;
1722
1723 error = xfs_scrub_metadata(ip, &scrub);
1724 if (error)
1725 return error;
1726
1727 if (copy_to_user(arg, &scrub, sizeof(scrub)))
1728 return -EFAULT;
1729
1730 return 0;
1731}
1732
1706int 1733int
1707xfs_ioc_swapext( 1734xfs_ioc_swapext(
1708 xfs_swapext_t *sxp) 1735 xfs_swapext_t *sxp)
@@ -1878,14 +1905,15 @@ xfs_file_ioctl(
1878 1905
1879 case XFS_IOC_GETBMAP: 1906 case XFS_IOC_GETBMAP:
1880 case XFS_IOC_GETBMAPA: 1907 case XFS_IOC_GETBMAPA:
1881 return xfs_ioc_getbmap(filp, cmd, arg);
1882
1883 case XFS_IOC_GETBMAPX: 1908 case XFS_IOC_GETBMAPX:
1884 return xfs_ioc_getbmapx(ip, arg); 1909 return xfs_ioc_getbmap(filp, cmd, arg);
1885 1910
1886 case FS_IOC_GETFSMAP: 1911 case FS_IOC_GETFSMAP:
1887 return xfs_ioc_getfsmap(ip, arg); 1912 return xfs_ioc_getfsmap(ip, arg);
1888 1913
1914 case XFS_IOC_SCRUB_METADATA:
1915 return xfs_ioc_scrub_metadata(ip, arg);
1916
1889 case XFS_IOC_FD_TO_HANDLE: 1917 case XFS_IOC_FD_TO_HANDLE:
1890 case XFS_IOC_PATH_TO_HANDLE: 1918 case XFS_IOC_PATH_TO_HANDLE:
1891 case XFS_IOC_PATH_TO_FSHANDLE: { 1919 case XFS_IOC_PATH_TO_FSHANDLE: {
diff --git a/fs/xfs/xfs_ioctl.h b/fs/xfs/xfs_ioctl.h
index e86c3ea137d2..8de879f0c7d5 100644
--- a/fs/xfs/xfs_ioctl.h
+++ b/fs/xfs/xfs_ioctl.h
@@ -86,7 +86,7 @@ xfs_file_compat_ioctl(
86extern int 86extern int
87xfs_set_dmattrs( 87xfs_set_dmattrs(
88 struct xfs_inode *ip, 88 struct xfs_inode *ip,
89 u_int evmask, 89 uint evmask,
90 u_int16_t state); 90 uint16_t state);
91 91
92#endif 92#endif
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index fa0bc4d46065..35c79e246fde 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -556,6 +556,7 @@ xfs_file_compat_ioctl(
556 case XFS_IOC_ERROR_INJECTION: 556 case XFS_IOC_ERROR_INJECTION:
557 case XFS_IOC_ERROR_CLEARALL: 557 case XFS_IOC_ERROR_CLEARALL:
558 case FS_IOC_GETFSMAP: 558 case FS_IOC_GETFSMAP:
559 case XFS_IOC_SCRUB_METADATA:
559 return xfs_file_ioctl(filp, cmd, p); 560 return xfs_file_ioctl(filp, cmd, p);
560#ifndef BROKEN_X86_ALIGNMENT 561#ifndef BROKEN_X86_ALIGNMENT
561 /* These are handled fine if no alignment issues */ 562 /* These are handled fine if no alignment issues */
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 9744b4819e0d..18077e2189a9 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -30,6 +30,7 @@
30#include "xfs_bmap_btree.h" 30#include "xfs_bmap_btree.h"
31#include "xfs_bmap.h" 31#include "xfs_bmap.h"
32#include "xfs_bmap_util.h" 32#include "xfs_bmap_util.h"
33#include "xfs_errortag.h"
33#include "xfs_error.h" 34#include "xfs_error.h"
34#include "xfs_trans.h" 35#include "xfs_trans.h"
35#include "xfs_trans_space.h" 36#include "xfs_trans_space.h"
@@ -389,7 +390,7 @@ xfs_iomap_prealloc_size(
389 struct xfs_inode *ip, 390 struct xfs_inode *ip,
390 loff_t offset, 391 loff_t offset,
391 loff_t count, 392 loff_t count,
392 xfs_extnum_t idx) 393 struct xfs_iext_cursor *icur)
393{ 394{
394 struct xfs_mount *mp = ip->i_mount; 395 struct xfs_mount *mp = ip->i_mount;
395 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); 396 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
@@ -414,7 +415,7 @@ xfs_iomap_prealloc_size(
414 */ 415 */
415 if ((mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) || 416 if ((mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) ||
416 XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_dalign) || 417 XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_dalign) ||
417 !xfs_iext_get_extent(ifp, idx - 1, &prev) || 418 !xfs_iext_peek_prev_extent(ifp, icur, &prev) ||
418 prev.br_startoff + prev.br_blockcount < offset_fsb) 419 prev.br_startoff + prev.br_blockcount < offset_fsb)
419 return mp->m_writeio_blocks; 420 return mp->m_writeio_blocks;
420 421
@@ -532,7 +533,7 @@ xfs_file_iomap_begin_delay(
532 xfs_fileoff_t end_fsb; 533 xfs_fileoff_t end_fsb;
533 int error = 0, eof = 0; 534 int error = 0, eof = 0;
534 struct xfs_bmbt_irec got; 535 struct xfs_bmbt_irec got;
535 xfs_extnum_t idx; 536 struct xfs_iext_cursor icur;
536 xfs_fsblock_t prealloc_blocks = 0; 537 xfs_fsblock_t prealloc_blocks = 0;
537 538
538 ASSERT(!XFS_IS_REALTIME_INODE(ip)); 539 ASSERT(!XFS_IS_REALTIME_INODE(ip));
@@ -557,7 +558,7 @@ xfs_file_iomap_begin_delay(
557 goto out_unlock; 558 goto out_unlock;
558 } 559 }
559 560
560 eof = !xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got); 561 eof = !xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got);
561 if (!eof && got.br_startoff <= offset_fsb) { 562 if (!eof && got.br_startoff <= offset_fsb) {
562 if (xfs_is_reflink_inode(ip)) { 563 if (xfs_is_reflink_inode(ip)) {
563 bool shared; 564 bool shared;
@@ -591,7 +592,8 @@ xfs_file_iomap_begin_delay(
591 end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb); 592 end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb);
592 593
593 if (eof) { 594 if (eof) {
594 prealloc_blocks = xfs_iomap_prealloc_size(ip, offset, count, idx); 595 prealloc_blocks = xfs_iomap_prealloc_size(ip, offset, count,
596 &icur);
595 if (prealloc_blocks) { 597 if (prealloc_blocks) {
596 xfs_extlen_t align; 598 xfs_extlen_t align;
597 xfs_off_t end_offset; 599 xfs_off_t end_offset;
@@ -613,7 +615,8 @@ xfs_file_iomap_begin_delay(
613 615
614retry: 616retry:
615 error = xfs_bmapi_reserve_delalloc(ip, XFS_DATA_FORK, offset_fsb, 617 error = xfs_bmapi_reserve_delalloc(ip, XFS_DATA_FORK, offset_fsb,
616 end_fsb - offset_fsb, prealloc_blocks, &got, &idx, eof); 618 end_fsb - offset_fsb, prealloc_blocks, &got, &icur,
619 eof);
617 switch (error) { 620 switch (error) {
618 case 0: 621 case 0:
619 break; 622 break;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 17081c77ef86..56475fcd76f2 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -160,7 +160,6 @@ xfs_generic_create(
160 if (S_ISCHR(mode) || S_ISBLK(mode)) { 160 if (S_ISCHR(mode) || S_ISBLK(mode)) {
161 if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff)) 161 if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff))
162 return -EINVAL; 162 return -EINVAL;
163 rdev = sysv_encode_dev(rdev);
164 } else { 163 } else {
165 rdev = 0; 164 rdev = 0;
166 } 165 }
@@ -535,8 +534,7 @@ xfs_vn_getattr(
535 case S_IFBLK: 534 case S_IFBLK:
536 case S_IFCHR: 535 case S_IFCHR:
537 stat->blksize = BLKDEV_IOSIZE; 536 stat->blksize = BLKDEV_IOSIZE;
538 stat->rdev = MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff, 537 stat->rdev = inode->i_rdev;
539 sysv_minor(ip->i_df.if_u2.if_rdev));
540 break; 538 break;
541 default: 539 default:
542 if (XFS_IS_REALTIME_INODE(ip)) { 540 if (XFS_IS_REALTIME_INODE(ip)) {
@@ -886,22 +884,6 @@ xfs_setattr_size(
886 return error; 884 return error;
887 885
888 /* 886 /*
889 * We are going to log the inode size change in this transaction so
890 * any previous writes that are beyond the on disk EOF and the new
891 * EOF that have not been written out need to be written here. If we
892 * do not write the data out, we expose ourselves to the null files
893 * problem. Note that this includes any block zeroing we did above;
894 * otherwise those blocks may not be zeroed after a crash.
895 */
896 if (did_zeroing ||
897 (newsize > ip->i_d.di_size && oldsize != ip->i_d.di_size)) {
898 error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
899 ip->i_d.di_size, newsize);
900 if (error)
901 return error;
902 }
903
904 /*
905 * We've already locked out new page faults, so now we can safely remove 887 * We've already locked out new page faults, so now we can safely remove
906 * pages from the page cache knowing they won't get refaulted until we 888 * pages from the page cache knowing they won't get refaulted until we
907 * drop the XFS_MMAP_EXCL lock after the extent manipulations are 889 * drop the XFS_MMAP_EXCL lock after the extent manipulations are
@@ -917,9 +899,29 @@ xfs_setattr_size(
917 * user visible changes). There's not much we can do about this, except 899 * user visible changes). There's not much we can do about this, except
918 * to hope that the caller sees ENOMEM and retries the truncate 900 * to hope that the caller sees ENOMEM and retries the truncate
919 * operation. 901 * operation.
902 *
903 * And we update in-core i_size and truncate page cache beyond newsize
904 * before writeback the [di_size, newsize] range, so we're guaranteed
905 * not to write stale data past the new EOF on truncate down.
920 */ 906 */
921 truncate_setsize(inode, newsize); 907 truncate_setsize(inode, newsize);
922 908
909 /*
910 * We are going to log the inode size change in this transaction so
911 * any previous writes that are beyond the on disk EOF and the new
912 * EOF that have not been written out need to be written here. If we
913 * do not write the data out, we expose ourselves to the null files
914 * problem. Note that this includes any block zeroing we did above;
915 * otherwise those blocks may not be zeroed after a crash.
916 */
917 if (did_zeroing ||
918 (newsize > ip->i_d.di_size && oldsize != ip->i_d.di_size)) {
919 error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
920 ip->i_d.di_size, newsize - 1);
921 if (error)
922 return error;
923 }
924
923 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); 925 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
924 if (error) 926 if (error)
925 return error; 927 return error;
@@ -1231,18 +1233,6 @@ xfs_setup_inode(
1231 inode->i_uid = xfs_uid_to_kuid(ip->i_d.di_uid); 1233 inode->i_uid = xfs_uid_to_kuid(ip->i_d.di_uid);
1232 inode->i_gid = xfs_gid_to_kgid(ip->i_d.di_gid); 1234 inode->i_gid = xfs_gid_to_kgid(ip->i_d.di_gid);
1233 1235
1234 switch (inode->i_mode & S_IFMT) {
1235 case S_IFBLK:
1236 case S_IFCHR:
1237 inode->i_rdev =
1238 MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff,
1239 sysv_minor(ip->i_df.if_u2.if_rdev));
1240 break;
1241 default:
1242 inode->i_rdev = 0;
1243 break;
1244 }
1245
1246 i_size_write(inode, ip->i_d.di_size); 1236 i_size_write(inode, ip->i_d.di_size);
1247 xfs_diflags_to_iflags(inode, ip); 1237 xfs_diflags_to_iflags(inode, ip);
1248 1238
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index c393a2f6d8c3..d58310514423 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -31,16 +31,6 @@
31#include "xfs_trace.h" 31#include "xfs_trace.h"
32#include "xfs_icache.h" 32#include "xfs_icache.h"
33 33
34int
35xfs_internal_inum(
36 xfs_mount_t *mp,
37 xfs_ino_t ino)
38{
39 return (ino == mp->m_sb.sb_rbmino || ino == mp->m_sb.sb_rsumino ||
40 (xfs_sb_version_hasquota(&mp->m_sb) &&
41 xfs_is_quota_inode(&mp->m_sb, ino)));
42}
43
44/* 34/*
45 * Return stat information for one inode. 35 * Return stat information for one inode.
46 * Return 0 if ok, else errno. 36 * Return 0 if ok, else errno.
@@ -119,12 +109,11 @@ xfs_bulkstat_one_int(
119 109
120 switch (dic->di_format) { 110 switch (dic->di_format) {
121 case XFS_DINODE_FMT_DEV: 111 case XFS_DINODE_FMT_DEV:
122 buf->bs_rdev = ip->i_df.if_u2.if_rdev; 112 buf->bs_rdev = sysv_encode_dev(inode->i_rdev);
123 buf->bs_blksize = BLKDEV_IOSIZE; 113 buf->bs_blksize = BLKDEV_IOSIZE;
124 buf->bs_blocks = 0; 114 buf->bs_blocks = 0;
125 break; 115 break;
126 case XFS_DINODE_FMT_LOCAL: 116 case XFS_DINODE_FMT_LOCAL:
127 case XFS_DINODE_FMT_UUID:
128 buf->bs_rdev = 0; 117 buf->bs_rdev = 0;
129 buf->bs_blksize = mp->m_sb.sb_blocksize; 118 buf->bs_blksize = mp->m_sb.sb_blocksize;
130 buf->bs_blocks = 0; 119 buf->bs_blocks = 0;
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h
index 17e86e0541af..6ea8b3912fa4 100644
--- a/fs/xfs/xfs_itable.h
+++ b/fs/xfs/xfs_itable.h
@@ -96,6 +96,4 @@ xfs_inumbers(
96 void __user *buffer, /* buffer with inode info */ 96 void __user *buffer, /* buffer with inode info */
97 inumbers_fmt_pf formatter); 97 inumbers_fmt_pf formatter);
98 98
99int xfs_internal_inum(struct xfs_mount *mp, xfs_ino_t ino);
100
101#endif /* __XFS_ITABLE_H__ */ 99#endif /* __XFS_ITABLE_H__ */
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index dcd1292664b3..6282bfc1afa9 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -142,6 +142,13 @@ typedef __u32 xfs_nlink_t;
142#define SYNCHRONIZE() barrier() 142#define SYNCHRONIZE() barrier()
143#define __return_address __builtin_return_address(0) 143#define __return_address __builtin_return_address(0)
144 144
145/*
146 * Return the address of a label. Use barrier() so that the optimizer
147 * won't reorder code to refactor the error jumpouts into a single
148 * return, which throws off the reported address.
149 */
150#define __this_address ({ __label__ __here; __here: barrier(); &&__here; })
151
145#define XFS_PROJID_DEFAULT 0 152#define XFS_PROJID_DEFAULT 0
146 153
147#define MIN(a,b) (min(a,b)) 154#define MIN(a,b) (min(a,b))
@@ -243,10 +250,6 @@ static inline uint64_t howmany_64(uint64_t x, uint32_t y)
243#define ASSERT(expr) \ 250#define ASSERT(expr) \
244 (likely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) 251 (likely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
245 252
246#ifndef STATIC
247# define STATIC noinline
248#endif
249
250#else /* !DEBUG */ 253#else /* !DEBUG */
251 254
252#ifdef XFS_WARN 255#ifdef XFS_WARN
@@ -254,21 +257,15 @@ static inline uint64_t howmany_64(uint64_t x, uint32_t y)
254#define ASSERT(expr) \ 257#define ASSERT(expr) \
255 (likely(expr) ? (void)0 : asswarn(#expr, __FILE__, __LINE__)) 258 (likely(expr) ? (void)0 : asswarn(#expr, __FILE__, __LINE__))
256 259
257#ifndef STATIC
258# define STATIC static noinline
259#endif
260
261#else /* !DEBUG && !XFS_WARN */ 260#else /* !DEBUG && !XFS_WARN */
262 261
263#define ASSERT(expr) ((void)0) 262#define ASSERT(expr) ((void)0)
264 263
265#ifndef STATIC
266# define STATIC static noinline
267#endif
268
269#endif /* XFS_WARN */ 264#endif /* XFS_WARN */
270#endif /* DEBUG */ 265#endif /* DEBUG */
271 266
267#define STATIC static noinline
268
272#ifdef CONFIG_XFS_RT 269#ifdef CONFIG_XFS_RT
273 270
274/* 271/*
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index dc95a49d62e7..38d4227895ae 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -22,6 +22,7 @@
22#include "xfs_log_format.h" 22#include "xfs_log_format.h"
23#include "xfs_trans_resv.h" 23#include "xfs_trans_resv.h"
24#include "xfs_mount.h" 24#include "xfs_mount.h"
25#include "xfs_errortag.h"
25#include "xfs_error.h" 26#include "xfs_error.h"
26#include "xfs_trans.h" 27#include "xfs_trans.h"
27#include "xfs_trans_priv.h" 28#include "xfs_trans_priv.h"
@@ -608,6 +609,7 @@ xfs_log_mount(
608 xfs_daddr_t blk_offset, 609 xfs_daddr_t blk_offset,
609 int num_bblks) 610 int num_bblks)
610{ 611{
612 bool fatal = xfs_sb_version_hascrc(&mp->m_sb);
611 int error = 0; 613 int error = 0;
612 int min_logfsbs; 614 int min_logfsbs;
613 615
@@ -659,9 +661,20 @@ xfs_log_mount(
659 XFS_FSB_TO_B(mp, mp->m_sb.sb_logblocks), 661 XFS_FSB_TO_B(mp, mp->m_sb.sb_logblocks),
660 XFS_MAX_LOG_BYTES); 662 XFS_MAX_LOG_BYTES);
661 error = -EINVAL; 663 error = -EINVAL;
664 } else if (mp->m_sb.sb_logsunit > 1 &&
665 mp->m_sb.sb_logsunit % mp->m_sb.sb_blocksize) {
666 xfs_warn(mp,
667 "log stripe unit %u bytes must be a multiple of block size",
668 mp->m_sb.sb_logsunit);
669 error = -EINVAL;
670 fatal = true;
662 } 671 }
663 if (error) { 672 if (error) {
664 if (xfs_sb_version_hascrc(&mp->m_sb)) { 673 /*
674 * Log check errors are always fatal on v5; or whenever bad
675 * metadata leads to a crash.
676 */
677 if (fatal) {
665 xfs_crit(mp, "AAIEEE! Log failed size checks. Abort!"); 678 xfs_crit(mp, "AAIEEE! Log failed size checks. Abort!");
666 ASSERT(0); 679 ASSERT(0);
667 goto out_free_log; 680 goto out_free_log;
@@ -744,6 +757,7 @@ xfs_log_mount_finish(
744{ 757{
745 int error = 0; 758 int error = 0;
746 bool readonly = (mp->m_flags & XFS_MOUNT_RDONLY); 759 bool readonly = (mp->m_flags & XFS_MOUNT_RDONLY);
760 bool recovered = mp->m_log->l_flags & XLOG_RECOVERY_NEEDED;
747 761
748 if (mp->m_flags & XFS_MOUNT_NORECOVERY) { 762 if (mp->m_flags & XFS_MOUNT_NORECOVERY) {
749 ASSERT(mp->m_flags & XFS_MOUNT_RDONLY); 763 ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
@@ -780,6 +794,21 @@ xfs_log_mount_finish(
780 mp->m_super->s_flags &= ~MS_ACTIVE; 794 mp->m_super->s_flags &= ~MS_ACTIVE;
781 evict_inodes(mp->m_super); 795 evict_inodes(mp->m_super);
782 796
797 /*
798 * Drain the buffer LRU after log recovery. This is required for v4
799 * filesystems to avoid leaving around buffers with NULL verifier ops,
800 * but we do it unconditionally to make sure we're always in a clean
801 * cache state after mount.
802 *
803 * Don't push in the error case because the AIL may have pending intents
804 * that aren't removed until recovery is cancelled.
805 */
806 if (!error && recovered) {
807 xfs_log_force(mp, XFS_LOG_SYNC);
808 xfs_ail_push_all_sync(mp->m_ail);
809 }
810 xfs_wait_buftarg(mp->m_ddev_targp);
811
783 if (readonly) 812 if (readonly)
784 mp->m_flags |= XFS_MOUNT_RDONLY; 813 mp->m_flags |= XFS_MOUNT_RDONLY;
785 814
@@ -3734,7 +3763,7 @@ xlog_ticket_alloc(
3734 * one of the iclogs. This uses backup pointers stored in a different 3763 * one of the iclogs. This uses backup pointers stored in a different
3735 * part of the log in case we trash the log structure. 3764 * part of the log in case we trash the log structure.
3736 */ 3765 */
3737void 3766STATIC void
3738xlog_verify_dest_ptr( 3767xlog_verify_dest_ptr(
3739 struct xlog *log, 3768 struct xlog *log,
3740 void *ptr) 3769 void *ptr)
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index ee34899396b2..87b1c331f9eb 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -85,17 +85,21 @@ struct xfs_buf_cancel {
85 */ 85 */
86 86
87/* 87/*
88 * Verify the given count of basic blocks is valid number of blocks 88 * Verify the log-relative block number and length in basic blocks are valid for
89 * to specify for an operation involving the given XFS log buffer. 89 * an operation involving the given XFS log buffer. Returns true if the fields
90 * Returns nonzero if the count is valid, 0 otherwise. 90 * are valid, false otherwise.
91 */ 91 */
92 92static inline bool
93static inline int 93xlog_verify_bp(
94xlog_buf_bbcount_valid(
95 struct xlog *log, 94 struct xlog *log,
95 xfs_daddr_t blk_no,
96 int bbcount) 96 int bbcount)
97{ 97{
98 return bbcount > 0 && bbcount <= log->l_logBBsize; 98 if (blk_no < 0 || blk_no >= log->l_logBBsize)
99 return false;
100 if (bbcount <= 0 || (blk_no + bbcount) > log->l_logBBsize)
101 return false;
102 return true;
99} 103}
100 104
101/* 105/*
@@ -110,7 +114,11 @@ xlog_get_bp(
110{ 114{
111 struct xfs_buf *bp; 115 struct xfs_buf *bp;
112 116
113 if (!xlog_buf_bbcount_valid(log, nbblks)) { 117 /*
118 * Pass log block 0 since we don't have an addr yet, buffer will be
119 * verified on read.
120 */
121 if (!xlog_verify_bp(log, 0, nbblks)) {
114 xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer", 122 xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer",
115 nbblks); 123 nbblks);
116 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); 124 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
@@ -180,9 +188,10 @@ xlog_bread_noalign(
180{ 188{
181 int error; 189 int error;
182 190
183 if (!xlog_buf_bbcount_valid(log, nbblks)) { 191 if (!xlog_verify_bp(log, blk_no, nbblks)) {
184 xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer", 192 xfs_warn(log->l_mp,
185 nbblks); 193 "Invalid log block/length (0x%llx, 0x%x) for buffer",
194 blk_no, nbblks);
186 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); 195 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
187 return -EFSCORRUPTED; 196 return -EFSCORRUPTED;
188 } 197 }
@@ -265,9 +274,10 @@ xlog_bwrite(
265{ 274{
266 int error; 275 int error;
267 276
268 if (!xlog_buf_bbcount_valid(log, nbblks)) { 277 if (!xlog_verify_bp(log, blk_no, nbblks)) {
269 xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer", 278 xfs_warn(log->l_mp,
270 nbblks); 279 "Invalid log block/length (0x%llx, 0x%x) for buffer",
280 blk_no, nbblks);
271 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); 281 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
272 return -EFSCORRUPTED; 282 return -EFSCORRUPTED;
273 } 283 }
@@ -753,7 +763,7 @@ xlog_find_head(
753 * in the in-core log. The following number can be made tighter if 763 * in the in-core log. The following number can be made tighter if
754 * we actually look at the block size of the filesystem. 764 * we actually look at the block size of the filesystem.
755 */ 765 */
756 num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log); 766 num_scan_bblks = min_t(int, log_bbnum, XLOG_TOTAL_REC_SHIFT(log));
757 if (head_blk >= num_scan_bblks) { 767 if (head_blk >= num_scan_bblks) {
758 /* 768 /*
759 * We are guaranteed that the entire check can be performed 769 * We are guaranteed that the entire check can be performed
@@ -2975,7 +2985,7 @@ xlog_recover_inode_pass2(
2975 struct xlog_recover_item *item, 2985 struct xlog_recover_item *item,
2976 xfs_lsn_t current_lsn) 2986 xfs_lsn_t current_lsn)
2977{ 2987{
2978 xfs_inode_log_format_t *in_f; 2988 struct xfs_inode_log_format *in_f;
2979 xfs_mount_t *mp = log->l_mp; 2989 xfs_mount_t *mp = log->l_mp;
2980 xfs_buf_t *bp; 2990 xfs_buf_t *bp;
2981 xfs_dinode_t *dip; 2991 xfs_dinode_t *dip;
@@ -2989,10 +2999,10 @@ xlog_recover_inode_pass2(
2989 uint isize; 2999 uint isize;
2990 int need_free = 0; 3000 int need_free = 0;
2991 3001
2992 if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) { 3002 if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) {
2993 in_f = item->ri_buf[0].i_addr; 3003 in_f = item->ri_buf[0].i_addr;
2994 } else { 3004 } else {
2995 in_f = kmem_alloc(sizeof(xfs_inode_log_format_t), KM_SLEEP); 3005 in_f = kmem_alloc(sizeof(struct xfs_inode_log_format), KM_SLEEP);
2996 need_free = 1; 3006 need_free = 1;
2997 error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f); 3007 error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f);
2998 if (error) 3008 if (error)
@@ -3163,16 +3173,8 @@ xlog_recover_inode_pass2(
3163 } 3173 }
3164 3174
3165 fields = in_f->ilf_fields; 3175 fields = in_f->ilf_fields;
3166 switch (fields & (XFS_ILOG_DEV | XFS_ILOG_UUID)) { 3176 if (fields & XFS_ILOG_DEV)
3167 case XFS_ILOG_DEV:
3168 xfs_dinode_put_rdev(dip, in_f->ilf_u.ilfu_rdev); 3177 xfs_dinode_put_rdev(dip, in_f->ilf_u.ilfu_rdev);
3169 break;
3170 case XFS_ILOG_UUID:
3171 memcpy(XFS_DFORK_DPTR(dip),
3172 &in_f->ilf_u.ilfu_uuid,
3173 sizeof(uuid_t));
3174 break;
3175 }
3176 3178
3177 if (in_f->ilf_size == 2) 3179 if (in_f->ilf_size == 2)
3178 goto out_owner_change; 3180 goto out_owner_change;
@@ -4297,7 +4299,7 @@ xlog_recover_add_to_trans(
4297 char *dp, 4299 char *dp,
4298 int len) 4300 int len)
4299{ 4301{
4300 xfs_inode_log_format_t *in_f; /* any will do */ 4302 struct xfs_inode_log_format *in_f; /* any will do */
4301 xlog_recover_item_t *item; 4303 xlog_recover_item_t *item;
4302 char *ptr; 4304 char *ptr;
4303 4305
@@ -4331,7 +4333,7 @@ xlog_recover_add_to_trans(
4331 4333
4332 ptr = kmem_alloc(len, KM_SLEEP); 4334 ptr = kmem_alloc(len, KM_SLEEP);
4333 memcpy(ptr, dp, len); 4335 memcpy(ptr, dp, len);
4334 in_f = (xfs_inode_log_format_t *)ptr; 4336 in_f = (struct xfs_inode_log_format *)ptr;
4335 4337
4336 /* take the tail entry */ 4338 /* take the tail entry */
4337 item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list); 4339 item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list);
@@ -5823,7 +5825,7 @@ xlog_recover_cancel(
5823 * Read all of the agf and agi counters and check that they 5825 * Read all of the agf and agi counters and check that they
5824 * are consistent with the superblock counters. 5826 * are consistent with the superblock counters.
5825 */ 5827 */
5826void 5828STATIC void
5827xlog_recover_check_summary( 5829xlog_recover_check_summary(
5828 struct xlog *log) 5830 struct xlog *log)
5829{ 5831{
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index e9727d0a541a..c879b517cc94 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1022,10 +1022,21 @@ xfs_mountfs(
1022 xfs_rtunmount_inodes(mp); 1022 xfs_rtunmount_inodes(mp);
1023 out_rele_rip: 1023 out_rele_rip:
1024 IRELE(rip); 1024 IRELE(rip);
1025 cancel_delayed_work_sync(&mp->m_reclaim_work);
1026 xfs_reclaim_inodes(mp, SYNC_WAIT);
1027 /* Clean out dquots that might be in memory after quotacheck. */ 1025 /* Clean out dquots that might be in memory after quotacheck. */
1028 xfs_qm_unmount(mp); 1026 xfs_qm_unmount(mp);
1027 /*
1028 * Cancel all delayed reclaim work and reclaim the inodes directly.
1029 * We have to do this /after/ rtunmount and qm_unmount because those
1030 * two will have scheduled delayed reclaim for the rt/quota inodes.
1031 *
1032 * This is slightly different from the unmountfs call sequence
1033 * because we could be tearing down a partially set up mount. In
1034 * particular, if log_mount_finish fails we bail out without calling
1035 * qm_unmount_quotas and therefore rely on qm_unmount to release the
1036 * quota inodes.
1037 */
1038 cancel_delayed_work_sync(&mp->m_reclaim_work);
1039 xfs_reclaim_inodes(mp, SYNC_WAIT);
1029 out_log_dealloc: 1040 out_log_dealloc:
1030 mp->m_flags |= XFS_MOUNT_UNMOUNTING; 1041 mp->m_flags |= XFS_MOUNT_UNMOUNTING;
1031 xfs_log_mount_cancel(mp); 1042 xfs_log_mount_cancel(mp);
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 37e603bf1591..cc041a29eb70 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -273,7 +273,7 @@ xfs_reflink_reserve_cow(
273 struct xfs_bmbt_irec got; 273 struct xfs_bmbt_irec got;
274 int error = 0; 274 int error = 0;
275 bool eof = false, trimmed; 275 bool eof = false, trimmed;
276 xfs_extnum_t idx; 276 struct xfs_iext_cursor icur;
277 277
278 /* 278 /*
279 * Search the COW fork extent list first. This serves two purposes: 279 * Search the COW fork extent list first. This serves two purposes:
@@ -284,7 +284,7 @@ xfs_reflink_reserve_cow(
284 * tree. 284 * tree.
285 */ 285 */
286 286
287 if (!xfs_iext_lookup_extent(ip, ifp, imap->br_startoff, &idx, &got)) 287 if (!xfs_iext_lookup_extent(ip, ifp, imap->br_startoff, &icur, &got))
288 eof = true; 288 eof = true;
289 if (!eof && got.br_startoff <= imap->br_startoff) { 289 if (!eof && got.br_startoff <= imap->br_startoff) {
290 trace_xfs_reflink_cow_found(ip, imap); 290 trace_xfs_reflink_cow_found(ip, imap);
@@ -312,7 +312,7 @@ xfs_reflink_reserve_cow(
312 return error; 312 return error;
313 313
314 error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, imap->br_startoff, 314 error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, imap->br_startoff,
315 imap->br_blockcount, 0, &got, &idx, eof); 315 imap->br_blockcount, 0, &got, &icur, eof);
316 if (error == -ENOSPC || error == -EDQUOT) 316 if (error == -ENOSPC || error == -EDQUOT)
317 trace_xfs_reflink_cow_enospc(ip, imap); 317 trace_xfs_reflink_cow_enospc(ip, imap);
318 if (error) 318 if (error)
@@ -353,29 +353,22 @@ xfs_reflink_convert_cow(
353 xfs_off_t offset, 353 xfs_off_t offset,
354 xfs_off_t count) 354 xfs_off_t count)
355{ 355{
356 struct xfs_bmbt_irec got;
357 struct xfs_defer_ops dfops;
358 struct xfs_mount *mp = ip->i_mount; 356 struct xfs_mount *mp = ip->i_mount;
359 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
360 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); 357 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
361 xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + count); 358 xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + count);
362 xfs_extnum_t idx; 359 xfs_filblks_t count_fsb = end_fsb - offset_fsb;
363 bool found; 360 struct xfs_bmbt_irec imap;
364 int error = 0; 361 struct xfs_defer_ops dfops;
362 xfs_fsblock_t first_block = NULLFSBLOCK;
363 int nimaps = 1, error = 0;
365 364
366 xfs_ilock(ip, XFS_ILOCK_EXCL); 365 ASSERT(count != 0);
367 366
368 /* Convert all the extents to real from unwritten. */ 367 xfs_ilock(ip, XFS_ILOCK_EXCL);
369 for (found = xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got); 368 error = xfs_bmapi_write(NULL, ip, offset_fsb, count_fsb,
370 found && got.br_startoff < end_fsb; 369 XFS_BMAPI_COWFORK | XFS_BMAPI_CONVERT |
371 found = xfs_iext_get_extent(ifp, ++idx, &got)) { 370 XFS_BMAPI_CONVERT_ONLY, &first_block, 0, &imap, &nimaps,
372 error = xfs_reflink_convert_cow_extent(ip, &got, offset_fsb, 371 &dfops);
373 end_fsb - offset_fsb, &dfops);
374 if (error)
375 break;
376 }
377
378 /* Finish up. */
379 xfs_iunlock(ip, XFS_ILOCK_EXCL); 372 xfs_iunlock(ip, XFS_ILOCK_EXCL);
380 return error; 373 return error;
381} 374}
@@ -399,7 +392,7 @@ xfs_reflink_allocate_cow(
399 bool trimmed; 392 bool trimmed;
400 xfs_filblks_t resaligned; 393 xfs_filblks_t resaligned;
401 xfs_extlen_t resblks = 0; 394 xfs_extlen_t resblks = 0;
402 xfs_extnum_t idx; 395 struct xfs_iext_cursor icur;
403 396
404retry: 397retry:
405 ASSERT(xfs_is_reflink_inode(ip)); 398 ASSERT(xfs_is_reflink_inode(ip));
@@ -409,7 +402,7 @@ retry:
409 * Even if the extent is not shared we might have a preallocation for 402 * Even if the extent is not shared we might have a preallocation for
410 * it in the COW fork. If so use it. 403 * it in the COW fork. If so use it.
411 */ 404 */
412 if (xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &idx, &got) && 405 if (xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &got) &&
413 got.br_startoff <= offset_fsb) { 406 got.br_startoff <= offset_fsb) {
414 *shared = true; 407 *shared = true;
415 408
@@ -496,13 +489,13 @@ xfs_reflink_find_cow_mapping(
496 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); 489 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
497 xfs_fileoff_t offset_fsb; 490 xfs_fileoff_t offset_fsb;
498 struct xfs_bmbt_irec got; 491 struct xfs_bmbt_irec got;
499 xfs_extnum_t idx; 492 struct xfs_iext_cursor icur;
500 493
501 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)); 494 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED));
502 ASSERT(xfs_is_reflink_inode(ip)); 495 ASSERT(xfs_is_reflink_inode(ip));
503 496
504 offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); 497 offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
505 if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got)) 498 if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got))
506 return false; 499 return false;
507 if (got.br_startoff > offset_fsb) 500 if (got.br_startoff > offset_fsb)
508 return false; 501 return false;
@@ -524,18 +517,18 @@ xfs_reflink_trim_irec_to_next_cow(
524{ 517{
525 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); 518 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
526 struct xfs_bmbt_irec got; 519 struct xfs_bmbt_irec got;
527 xfs_extnum_t idx; 520 struct xfs_iext_cursor icur;
528 521
529 if (!xfs_is_reflink_inode(ip)) 522 if (!xfs_is_reflink_inode(ip))
530 return; 523 return;
531 524
532 /* Find the extent in the CoW fork. */ 525 /* Find the extent in the CoW fork. */
533 if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got)) 526 if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got))
534 return; 527 return;
535 528
536 /* This is the extent before; try sliding up one. */ 529 /* This is the extent before; try sliding up one. */
537 if (got.br_startoff < offset_fsb) { 530 if (got.br_startoff < offset_fsb) {
538 if (!xfs_iext_get_extent(ifp, idx + 1, &got)) 531 if (!xfs_iext_next_extent(ifp, &icur, &got))
539 return; 532 return;
540 } 533 }
541 534
@@ -562,24 +555,32 @@ xfs_reflink_cancel_cow_blocks(
562{ 555{
563 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); 556 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
564 struct xfs_bmbt_irec got, del; 557 struct xfs_bmbt_irec got, del;
565 xfs_extnum_t idx; 558 struct xfs_iext_cursor icur;
566 xfs_fsblock_t firstfsb; 559 xfs_fsblock_t firstfsb;
567 struct xfs_defer_ops dfops; 560 struct xfs_defer_ops dfops;
568 int error = 0; 561 int error = 0;
569 562
570 if (!xfs_is_reflink_inode(ip)) 563 if (!xfs_is_reflink_inode(ip))
571 return 0; 564 return 0;
572 if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got)) 565 if (!xfs_iext_lookup_extent_before(ip, ifp, &end_fsb, &icur, &got))
573 return 0; 566 return 0;
574 567
575 while (got.br_startoff < end_fsb) { 568 /* Walk backwards until we're out of the I/O range... */
569 while (got.br_startoff + got.br_blockcount > offset_fsb) {
576 del = got; 570 del = got;
577 xfs_trim_extent(&del, offset_fsb, end_fsb - offset_fsb); 571 xfs_trim_extent(&del, offset_fsb, end_fsb - offset_fsb);
572
573 /* Extent delete may have bumped ext forward */
574 if (!del.br_blockcount) {
575 xfs_iext_prev(ifp, &icur);
576 goto next_extent;
577 }
578
578 trace_xfs_reflink_cancel_cow(ip, &del); 579 trace_xfs_reflink_cancel_cow(ip, &del);
579 580
580 if (isnullstartblock(del.br_startblock)) { 581 if (isnullstartblock(del.br_startblock)) {
581 error = xfs_bmap_del_extent_delay(ip, XFS_COW_FORK, 582 error = xfs_bmap_del_extent_delay(ip, XFS_COW_FORK,
582 &idx, &got, &del); 583 &icur, &got, &del);
583 if (error) 584 if (error)
584 break; 585 break;
585 } else if (del.br_state == XFS_EXT_UNWRITTEN || cancel_real) { 586 } else if (del.br_state == XFS_EXT_UNWRITTEN || cancel_real) {
@@ -610,10 +611,10 @@ xfs_reflink_cancel_cow_blocks(
610 } 611 }
611 612
612 /* Remove the mapping from the CoW fork. */ 613 /* Remove the mapping from the CoW fork. */
613 xfs_bmap_del_extent_cow(ip, &idx, &got, &del); 614 xfs_bmap_del_extent_cow(ip, &icur, &got, &del);
614 } 615 }
615 616next_extent:
616 if (!xfs_iext_get_extent(ifp, ++idx, &got)) 617 if (!xfs_iext_get_extent(ifp, &icur, &got))
617 break; 618 break;
618 } 619 }
619 620
@@ -698,7 +699,7 @@ xfs_reflink_end_cow(
698 int error; 699 int error;
699 unsigned int resblks; 700 unsigned int resblks;
700 xfs_filblks_t rlen; 701 xfs_filblks_t rlen;
701 xfs_extnum_t idx; 702 struct xfs_iext_cursor icur;
702 703
703 trace_xfs_reflink_end_cow(ip, offset, count); 704 trace_xfs_reflink_end_cow(ip, offset, count);
704 705
@@ -733,27 +734,22 @@ xfs_reflink_end_cow(
733 xfs_ilock(ip, XFS_ILOCK_EXCL); 734 xfs_ilock(ip, XFS_ILOCK_EXCL);
734 xfs_trans_ijoin(tp, ip, 0); 735 xfs_trans_ijoin(tp, ip, 0);
735 736
736 /* If there is a hole at end_fsb - 1 go to the previous extent */ 737 /*
737 if (!xfs_iext_lookup_extent(ip, ifp, end_fsb - 1, &idx, &got) || 738 * In case of racing, overlapping AIO writes no COW extents might be
738 got.br_startoff > end_fsb) { 739 * left by the time I/O completes for the loser of the race. In that
739 /* 740 * case we are done.
740 * In case of racing, overlapping AIO writes no COW extents 741 */
741 * might be left by the time I/O completes for the loser of 742 if (!xfs_iext_lookup_extent_before(ip, ifp, &end_fsb, &icur, &got))
742 * the race. In that case we are done. 743 goto out_cancel;
743 */
744 if (idx <= 0)
745 goto out_cancel;
746 xfs_iext_get_extent(ifp, --idx, &got);
747 }
748 744
749 /* Walk backwards until we're out of the I/O range... */ 745 /* Walk backwards until we're out of the I/O range... */
750 while (got.br_startoff + got.br_blockcount > offset_fsb) { 746 while (got.br_startoff + got.br_blockcount > offset_fsb) {
751 del = got; 747 del = got;
752 xfs_trim_extent(&del, offset_fsb, end_fsb - offset_fsb); 748 xfs_trim_extent(&del, offset_fsb, end_fsb - offset_fsb);
753 749
754 /* Extent delete may have bumped idx forward */ 750 /* Extent delete may have bumped ext forward */
755 if (!del.br_blockcount) { 751 if (!del.br_blockcount) {
756 idx--; 752 xfs_iext_prev(ifp, &icur);
757 goto next_extent; 753 goto next_extent;
758 } 754 }
759 755
@@ -765,7 +761,7 @@ xfs_reflink_end_cow(
765 * allocated but have not yet been involved in a write. 761 * allocated but have not yet been involved in a write.
766 */ 762 */
767 if (got.br_state == XFS_EXT_UNWRITTEN) { 763 if (got.br_state == XFS_EXT_UNWRITTEN) {
768 idx--; 764 xfs_iext_prev(ifp, &icur);
769 goto next_extent; 765 goto next_extent;
770 } 766 }
771 767
@@ -796,14 +792,14 @@ xfs_reflink_end_cow(
796 goto out_defer; 792 goto out_defer;
797 793
798 /* Remove the mapping from the CoW fork. */ 794 /* Remove the mapping from the CoW fork. */
799 xfs_bmap_del_extent_cow(ip, &idx, &got, &del); 795 xfs_bmap_del_extent_cow(ip, &icur, &got, &del);
800 796
801 xfs_defer_ijoin(&dfops, ip); 797 xfs_defer_ijoin(&dfops, ip);
802 error = xfs_defer_finish(&tp, &dfops); 798 error = xfs_defer_finish(&tp, &dfops);
803 if (error) 799 if (error)
804 goto out_defer; 800 goto out_defer;
805next_extent: 801next_extent:
806 if (!xfs_iext_get_extent(ifp, idx, &got)) 802 if (!xfs_iext_get_extent(ifp, &icur, &got))
807 break; 803 break;
808 } 804 }
809 805
@@ -1433,7 +1429,7 @@ xfs_reflink_inode_has_shared_extents(
1433 xfs_extlen_t aglen; 1429 xfs_extlen_t aglen;
1434 xfs_agblock_t rbno; 1430 xfs_agblock_t rbno;
1435 xfs_extlen_t rlen; 1431 xfs_extlen_t rlen;
1436 xfs_extnum_t idx; 1432 struct xfs_iext_cursor icur;
1437 bool found; 1433 bool found;
1438 int error; 1434 int error;
1439 1435
@@ -1445,7 +1441,7 @@ xfs_reflink_inode_has_shared_extents(
1445 } 1441 }
1446 1442
1447 *has_shared = false; 1443 *has_shared = false;
1448 found = xfs_iext_lookup_extent(ip, ifp, 0, &idx, &got); 1444 found = xfs_iext_lookup_extent(ip, ifp, 0, &icur, &got);
1449 while (found) { 1445 while (found) {
1450 if (isnullstartblock(got.br_startblock) || 1446 if (isnullstartblock(got.br_startblock) ||
1451 got.br_state != XFS_EXT_NORM) 1447 got.br_state != XFS_EXT_NORM)
@@ -1464,7 +1460,7 @@ xfs_reflink_inode_has_shared_extents(
1464 return 0; 1460 return 0;
1465 } 1461 }
1466next: 1462next:
1467 found = xfs_iext_get_extent(ifp, ++idx, &got); 1463 found = xfs_iext_next_extent(ifp, &icur, &got);
1468 } 1464 }
1469 1465
1470 return 0; 1466 return 0;
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h
index 79defa722bf1..3f30f846d7f2 100644
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h
@@ -138,6 +138,7 @@ int xfs_rtalloc_query_range(struct xfs_trans *tp,
138int xfs_rtalloc_query_all(struct xfs_trans *tp, 138int xfs_rtalloc_query_all(struct xfs_trans *tp,
139 xfs_rtalloc_query_range_fn fn, 139 xfs_rtalloc_query_range_fn fn,
140 void *priv); 140 void *priv);
141bool xfs_verify_rtbno(struct xfs_mount *mp, xfs_rtblock_t rtbno);
141#else 142#else
142# define xfs_rtallocate_extent(t,b,min,max,l,f,p,rb) (ENOSYS) 143# define xfs_rtallocate_extent(t,b,min,max,l,f,p,rb) (ENOSYS)
143# define xfs_rtfree_extent(t,b,l) (ENOSYS) 144# define xfs_rtfree_extent(t,b,l) (ENOSYS)
@@ -146,6 +147,7 @@ int xfs_rtalloc_query_all(struct xfs_trans *tp,
146# define xfs_rtalloc_query_range(t,l,h,f,p) (ENOSYS) 147# define xfs_rtalloc_query_range(t,l,h,f,p) (ENOSYS)
147# define xfs_rtalloc_query_all(t,f,p) (ENOSYS) 148# define xfs_rtalloc_query_all(t,f,p) (ENOSYS)
148# define xfs_rtbuf_get(m,t,b,i,p) (ENOSYS) 149# define xfs_rtbuf_get(m,t,b,i,p) (ENOSYS)
150# define xfs_verify_rtbno(m, r) (false)
149static inline int /* error */ 151static inline int /* error */
150xfs_rtmount_init( 152xfs_rtmount_init(
151 xfs_mount_t *mp) /* file system mount structure */ 153 xfs_mount_t *mp) /* file system mount structure */
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index bb5514688d47..515ba042d75c 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -218,53 +218,15 @@ TRACE_EVENT(xfs_attr_list_node_descend,
218 __entry->bt_before) 218 __entry->bt_before)
219); 219);
220 220
221TRACE_EVENT(xfs_iext_insert,
222 TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx,
223 struct xfs_bmbt_irec *r, int state, unsigned long caller_ip),
224 TP_ARGS(ip, idx, r, state, caller_ip),
225 TP_STRUCT__entry(
226 __field(dev_t, dev)
227 __field(xfs_ino_t, ino)
228 __field(xfs_extnum_t, idx)
229 __field(xfs_fileoff_t, startoff)
230 __field(xfs_fsblock_t, startblock)
231 __field(xfs_filblks_t, blockcount)
232 __field(xfs_exntst_t, state)
233 __field(int, bmap_state)
234 __field(unsigned long, caller_ip)
235 ),
236 TP_fast_assign(
237 __entry->dev = VFS_I(ip)->i_sb->s_dev;
238 __entry->ino = ip->i_ino;
239 __entry->idx = idx;
240 __entry->startoff = r->br_startoff;
241 __entry->startblock = r->br_startblock;
242 __entry->blockcount = r->br_blockcount;
243 __entry->state = r->br_state;
244 __entry->bmap_state = state;
245 __entry->caller_ip = caller_ip;
246 ),
247 TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
248 "offset %lld block %lld count %lld flag %d caller %ps",
249 MAJOR(__entry->dev), MINOR(__entry->dev),
250 __entry->ino,
251 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
252 (long)__entry->idx,
253 __entry->startoff,
254 (int64_t)__entry->startblock,
255 __entry->blockcount,
256 __entry->state,
257 (char *)__entry->caller_ip)
258);
259
260DECLARE_EVENT_CLASS(xfs_bmap_class, 221DECLARE_EVENT_CLASS(xfs_bmap_class,
261 TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state, 222 TP_PROTO(struct xfs_inode *ip, struct xfs_iext_cursor *cur, int state,
262 unsigned long caller_ip), 223 unsigned long caller_ip),
263 TP_ARGS(ip, idx, state, caller_ip), 224 TP_ARGS(ip, cur, state, caller_ip),
264 TP_STRUCT__entry( 225 TP_STRUCT__entry(
265 __field(dev_t, dev) 226 __field(dev_t, dev)
266 __field(xfs_ino_t, ino) 227 __field(xfs_ino_t, ino)
267 __field(xfs_extnum_t, idx) 228 __field(void *, leaf);
229 __field(int, pos);
268 __field(xfs_fileoff_t, startoff) 230 __field(xfs_fileoff_t, startoff)
269 __field(xfs_fsblock_t, startblock) 231 __field(xfs_fsblock_t, startblock)
270 __field(xfs_filblks_t, blockcount) 232 __field(xfs_filblks_t, blockcount)
@@ -277,10 +239,11 @@ DECLARE_EVENT_CLASS(xfs_bmap_class,
277 struct xfs_bmbt_irec r; 239 struct xfs_bmbt_irec r;
278 240
279 ifp = xfs_iext_state_to_fork(ip, state); 241 ifp = xfs_iext_state_to_fork(ip, state);
280 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &r); 242 xfs_iext_get_extent(ifp, cur, &r);
281 __entry->dev = VFS_I(ip)->i_sb->s_dev; 243 __entry->dev = VFS_I(ip)->i_sb->s_dev;
282 __entry->ino = ip->i_ino; 244 __entry->ino = ip->i_ino;
283 __entry->idx = idx; 245 __entry->leaf = cur->leaf;
246 __entry->pos = cur->pos;
284 __entry->startoff = r.br_startoff; 247 __entry->startoff = r.br_startoff;
285 __entry->startblock = r.br_startblock; 248 __entry->startblock = r.br_startblock;
286 __entry->blockcount = r.br_blockcount; 249 __entry->blockcount = r.br_blockcount;
@@ -288,12 +251,13 @@ DECLARE_EVENT_CLASS(xfs_bmap_class,
288 __entry->bmap_state = state; 251 __entry->bmap_state = state;
289 __entry->caller_ip = caller_ip; 252 __entry->caller_ip = caller_ip;
290 ), 253 ),
291 TP_printk("dev %d:%d ino 0x%llx state %s idx %ld " 254 TP_printk("dev %d:%d ino 0x%llx state %s cur 0x%p/%d "
292 "offset %lld block %lld count %lld flag %d caller %ps", 255 "offset %lld block %lld count %lld flag %d caller %ps",
293 MAJOR(__entry->dev), MINOR(__entry->dev), 256 MAJOR(__entry->dev), MINOR(__entry->dev),
294 __entry->ino, 257 __entry->ino,
295 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS), 258 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
296 (long)__entry->idx, 259 __entry->leaf,
260 __entry->pos,
297 __entry->startoff, 261 __entry->startoff,
298 (int64_t)__entry->startblock, 262 (int64_t)__entry->startblock,
299 __entry->blockcount, 263 __entry->blockcount,
@@ -303,13 +267,15 @@ DECLARE_EVENT_CLASS(xfs_bmap_class,
303 267
304#define DEFINE_BMAP_EVENT(name) \ 268#define DEFINE_BMAP_EVENT(name) \
305DEFINE_EVENT(xfs_bmap_class, name, \ 269DEFINE_EVENT(xfs_bmap_class, name, \
306 TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state, \ 270 TP_PROTO(struct xfs_inode *ip, struct xfs_iext_cursor *cur, int state, \
307 unsigned long caller_ip), \ 271 unsigned long caller_ip), \
308 TP_ARGS(ip, idx, state, caller_ip)) 272 TP_ARGS(ip, cur, state, caller_ip))
273DEFINE_BMAP_EVENT(xfs_iext_insert);
309DEFINE_BMAP_EVENT(xfs_iext_remove); 274DEFINE_BMAP_EVENT(xfs_iext_remove);
310DEFINE_BMAP_EVENT(xfs_bmap_pre_update); 275DEFINE_BMAP_EVENT(xfs_bmap_pre_update);
311DEFINE_BMAP_EVENT(xfs_bmap_post_update); 276DEFINE_BMAP_EVENT(xfs_bmap_post_update);
312DEFINE_BMAP_EVENT(xfs_extlist); 277DEFINE_BMAP_EVENT(xfs_read_extent);
278DEFINE_BMAP_EVENT(xfs_write_extent);
313 279
314DECLARE_EVENT_CLASS(xfs_buf_class, 280DECLARE_EVENT_CLASS(xfs_buf_class,
315 TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip), 281 TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip),
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 354368a906e5..cef89f7127d3 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -25,6 +25,7 @@
25#include "xfs_trans.h" 25#include "xfs_trans.h"
26#include "xfs_trans_priv.h" 26#include "xfs_trans_priv.h"
27#include "xfs_trace.h" 27#include "xfs_trace.h"
28#include "xfs_errortag.h"
28#include "xfs_error.h" 29#include "xfs_error.h"
29#include "xfs_log.h" 30#include "xfs_log.h"
30 31
@@ -514,11 +515,26 @@ xfsaild(
514 current->flags |= PF_MEMALLOC; 515 current->flags |= PF_MEMALLOC;
515 set_freezable(); 516 set_freezable();
516 517
517 while (!kthread_should_stop()) { 518 while (1) {
518 if (tout && tout <= 20) 519 if (tout && tout <= 20)
519 __set_current_state(TASK_KILLABLE); 520 set_current_state(TASK_KILLABLE);
520 else 521 else
521 __set_current_state(TASK_INTERRUPTIBLE); 522 set_current_state(TASK_INTERRUPTIBLE);
523
524 /*
525 * Check kthread_should_stop() after we set the task state
526 * to guarantee that we either see the stop bit and exit or
527 * the task state is reset to runnable such that it's not
528 * scheduled out indefinitely and detects the stop bit at
529 * next iteration.
530 *
531 * A memory barrier is included in above task state set to
532 * serialize again kthread_stop().
533 */
534 if (kthread_should_stop()) {
535 __set_current_state(TASK_RUNNING);
536 break;
537 }
522 538
523 spin_lock(&ailp->xa_lock); 539 spin_lock(&ailp->xa_lock);
524 540