aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-05-02 17:49:33 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-05-02 17:49:33 -0400
commitc8d8566952fda026966784a62f324c8352f77430 (patch)
tree89f25a47ce7662768bc6780bb7daeb85070784f1
parenta9586d9be812be4a0046ad4d312b013e587607cb (diff)
parentcab09a81fbefcb21db5213a84461d421946f6eb8 (diff)
Merge tag 'for-linus-v3.10-rc1' of git://oss.sgi.com/xfs/xfs
Pull xfs update from Ben Myers: "For 3.10-rc1 we have a number of bug fixes and cleanups and a currently experimental feature from David Chinner, CRCs protection for metadata. CRCs are enabled by using mkfs.xfs to create a filesystem with the feature bits set. - numerous fixes for speculative preallocation - don't verify buffers on IO errors - rename of random32 to prandom32 - refactoring/rearrangement in xfs_bmap.c - removal of unused m_inode_shrink in struct xfs_mount - fix error handling of xfs_bufs and readahead - quota driven preallocation throttling - fix WARN_ON in xfs_vm_releasepage - add ratelimited printk for different alert levels - fix spurious forced shutdowns due to freed Extent Free Intents - remove some obsolete XLOG_CIL_HARD_SPACE_LIMIT() macros - remove some obsoleted comments - (experimental) CRC support for metadata" * tag 'for-linus-v3.10-rc1' of git://oss.sgi.com/xfs/xfs: (46 commits) xfs: fix da node magic number mismatches xfs: Remote attr validation fixes and optimisations xfs: Teach dquot recovery about CONFIG_XFS_QUOTA xfs: add metadata CRC documentation xfs: implement extended feature masks xfs: add CRC checks to the superblock xfs: buffer type overruns blf_flags field xfs: add buffer types to directory and attribute buffers xfs: add CRC protection to remote attributes xfs: split remote attribute code out xfs: add CRCs to attr leaf blocks xfs: add CRCs to dir2/da node blocks xfs: shortform directory offsets change for dir3 format xfs: add CRC checking to dir2 leaf blocks xfs: add CRC checking to dir2 data blocks xfs: add CRC checking to dir2 free blocks xfs: add CRC checks to block format directory blocks xfs: add CRC checks to remote symlinks xfs: split out symlink code into it's own file. xfs: add version 3 inode format with CRCs ...
-rw-r--r--Documentation/filesystems/xfs-self-describing-metadata.txt350
-rw-r--r--fs/xfs/Makefile6
-rw-r--r--fs/xfs/xfs_ag.h56
-rw-r--r--fs/xfs/xfs_alloc.c201
-rw-r--r--fs/xfs/xfs_alloc_btree.c105
-rw-r--r--fs/xfs/xfs_alloc_btree.h12
-rw-r--r--fs/xfs/xfs_aops.c12
-rw-r--r--fs/xfs/xfs_attr.c454
-rw-r--r--fs/xfs/xfs_attr.h1
-rw-r--r--fs/xfs/xfs_attr_leaf.c1781
-rw-r--r--fs/xfs/xfs_attr_leaf.h122
-rw-r--r--fs/xfs/xfs_attr_remote.c541
-rw-r--r--fs/xfs/xfs_attr_remote.h46
-rw-r--r--fs/xfs/xfs_bmap.c4314
-rw-r--r--fs/xfs/xfs_bmap_btree.c110
-rw-r--r--fs/xfs/xfs_bmap_btree.h19
-rw-r--r--fs/xfs/xfs_btree.c256
-rw-r--r--fs/xfs/xfs_btree.h64
-rw-r--r--fs/xfs/xfs_buf.c4
-rw-r--r--fs/xfs/xfs_buf_item.h64
-rw-r--r--fs/xfs/xfs_da_btree.c1501
-rw-r--r--fs/xfs/xfs_da_btree.h130
-rw-r--r--fs/xfs/xfs_dinode.h43
-rw-r--r--fs/xfs/xfs_dir2_block.c179
-rw-r--r--fs/xfs/xfs_dir2_data.c266
-rw-r--r--fs/xfs/xfs_dir2_format.h278
-rw-r--r--fs/xfs/xfs_dir2_leaf.c898
-rw-r--r--fs/xfs/xfs_dir2_node.c1007
-rw-r--r--fs/xfs/xfs_dir2_priv.h50
-rw-r--r--fs/xfs/xfs_dir2_sf.c12
-rw-r--r--fs/xfs/xfs_dquot.c160
-rw-r--r--fs/xfs/xfs_dquot.h16
-rw-r--r--fs/xfs/xfs_error.c4
-rw-r--r--fs/xfs/xfs_extfree_item.c27
-rw-r--r--fs/xfs/xfs_extfree_item.h14
-rw-r--r--fs/xfs/xfs_file.c2
-rw-r--r--fs/xfs/xfs_fsops.c34
-rw-r--r--fs/xfs/xfs_ialloc.c109
-rw-r--r--fs/xfs/xfs_ialloc_btree.c87
-rw-r--r--fs/xfs/xfs_ialloc_btree.h9
-rw-r--r--fs/xfs/xfs_inode.c212
-rw-r--r--fs/xfs/xfs_inode.h31
-rw-r--r--fs/xfs/xfs_inode_item.c2
-rw-r--r--fs/xfs/xfs_iomap.c163
-rw-r--r--fs/xfs/xfs_linux.h1
-rw-r--r--fs/xfs/xfs_log.c2
-rw-r--r--fs/xfs/xfs_log_cil.c4
-rw-r--r--fs/xfs/xfs_log_priv.h1
-rw-r--r--fs/xfs/xfs_log_recover.c246
-rw-r--r--fs/xfs/xfs_message.h26
-rw-r--r--fs/xfs/xfs_mount.c146
-rw-r--r--fs/xfs/xfs_mount.h2
-rw-r--r--fs/xfs/xfs_qm.c25
-rw-r--r--fs/xfs/xfs_qm.h4
-rw-r--r--fs/xfs/xfs_qm_syscalls.c9
-rw-r--r--fs/xfs/xfs_quota.h11
-rw-r--r--fs/xfs/xfs_sb.h166
-rw-r--r--fs/xfs/xfs_symlink.c730
-rw-r--r--fs/xfs/xfs_symlink.h66
-rw-r--r--fs/xfs/xfs_trace.c2
-rw-r--r--fs/xfs/xfs_trace.h24
-rw-r--r--fs/xfs/xfs_trans_buf.c63
-rw-r--r--fs/xfs/xfs_trans_dquot.c10
-rw-r--r--fs/xfs/xfs_vnodeops.c478
64 files changed, 9922 insertions, 5846 deletions
diff --git a/Documentation/filesystems/xfs-self-describing-metadata.txt b/Documentation/filesystems/xfs-self-describing-metadata.txt
new file mode 100644
index 000000000000..05aa455163e3
--- /dev/null
+++ b/Documentation/filesystems/xfs-self-describing-metadata.txt
@@ -0,0 +1,350 @@
1XFS Self Describing Metadata
2----------------------------
3
4Introduction
5------------
6
7The largest scalability problem facing XFS is not one of algorithmic
8scalability, but of verification of the filesystem structure. Scalabilty of the
9structures and indexes on disk and the algorithms for iterating them are
10adequate for supporting PB scale filesystems with billions of inodes, however it
11is this very scalability that causes the verification problem.
12
13Almost all metadata on XFS is dynamically allocated. The only fixed location
14metadata is the allocation group headers (SB, AGF, AGFL and AGI), while all
15other metadata structures need to be discovered by walking the filesystem
16structure in different ways. While this is already done by userspace tools for
17validating and repairing the structure, there are limits to what they can
18verify, and this in turn limits the supportable size of an XFS filesystem.
19
20For example, it is entirely possible to manually use xfs_db and a bit of
21scripting to analyse the structure of a 100TB filesystem when trying to
22determine the root cause of a corruption problem, but it is still mainly a
23manual task of verifying that things like single bit errors or misplaced writes
24weren't the ultimate cause of a corruption event. It may take a few hours to a
25few days to perform such forensic analysis, so for at this scale root cause
26analysis is entirely possible.
27
28However, if we scale the filesystem up to 1PB, we now have 10x as much metadata
29to analyse and so that analysis blows out towards weeks/months of forensic work.
30Most of the analysis work is slow and tedious, so as the amount of analysis goes
31up, the more likely that the cause will be lost in the noise. Hence the primary
32concern for supporting PB scale filesystems is minimising the time and effort
33required for basic forensic analysis of the filesystem structure.
34
35
36Self Describing Metadata
37------------------------
38
39One of the problems with the current metadata format is that apart from the
40magic number in the metadata block, we have no other way of identifying what it
41is supposed to be. We can't even identify if it is the right place. Put simply,
42you can't look at a single metadata block in isolation and say "yes, it is
43supposed to be there and the contents are valid".
44
45Hence most of the time spent on forensic analysis is spent doing basic
46verification of metadata values, looking for values that are in range (and hence
47not detected by automated verification checks) but are not correct. Finding and
48understanding how things like cross linked block lists (e.g. sibling
49pointers in a btree end up with loops in them) are the key to understanding what
50went wrong, but it is impossible to tell what order the blocks were linked into
51each other or written to disk after the fact.
52
53Hence we need to record more information into the metadata to allow us to
54quickly determine if the metadata is intact and can be ignored for the purpose
55of analysis. We can't protect against every possible type of error, but we can
56ensure that common types of errors are easily detectable. Hence the concept of
57self describing metadata.
58
59The first, fundamental requirement of self describing metadata is that the
60metadata object contains some form of unique identifier in a well known
61location. This allows us to identify the expected contents of the block and
62hence parse and verify the metadata object. IF we can't independently identify
63the type of metadata in the object, then the metadata doesn't describe itself
64very well at all!
65
66Luckily, almost all XFS metadata has magic numbers embedded already - only the
67AGFL, remote symlinks and remote attribute blocks do not contain identifying
68magic numbers. Hence we can change the on-disk format of all these objects to
69add more identifying information and detect this simply by changing the magic
70numbers in the metadata objects. That is, if it has the current magic number,
71the metadata isn't self identifying. If it contains a new magic number, it is
72self identifying and we can do much more expansive automated verification of the
73metadata object at runtime, during forensic analysis or repair.
74
75As a primary concern, self describing metadata needs some form of overall
76integrity checking. We cannot trust the metadata if we cannot verify that it has
77not been changed as a result of external influences. Hence we need some form of
78integrity check, and this is done by adding CRC32c validation to the metadata
79block. If we can verify the block contains the metadata it was intended to
80contain, a large amount of the manual verification work can be skipped.
81
82CRC32c was selected as metadata cannot be more than 64k in length in XFS and
83hence a 32 bit CRC is more than sufficient to detect multi-bit errors in
84metadata blocks. CRC32c is also now hardware accelerated on common CPUs so it is
85fast. So while CRC32c is not the strongest of possible integrity checks that
86could be used, it is more than sufficient for our needs and has relatively
87little overhead. Adding support for larger integrity fields and/or algorithms
88does really provide any extra value over CRC32c, but it does add a lot of
89complexity and so there is no provision for changing the integrity checking
90mechanism.
91
92Self describing metadata needs to contain enough information so that the
93metadata block can be verified as being in the correct place without needing to
94look at any other metadata. This means it needs to contain location information.
95Just adding a block number to the metadata is not sufficient to protect against
96mis-directed writes - a write might be misdirected to the wrong LUN and so be
97written to the "correct block" of the wrong filesystem. Hence location
98information must contain a filesystem identifier as well as a block number.
99
100Another key information point in forensic analysis is knowing who the metadata
101block belongs to. We already know the type, the location, that it is valid
102and/or corrupted, and how long ago that it was last modified. Knowing the owner
103of the block is important as it allows us to find other related metadata to
104determine the scope of the corruption. For example, if we have a extent btree
105object, we don't know what inode it belongs to and hence have to walk the entire
106filesystem to find the owner of the block. Worse, the corruption could mean that
107no owner can be found (i.e. it's an orphan block), and so without an owner field
108in the metadata we have no idea of the scope of the corruption. If we have an
109owner field in the metadata object, we can immediately do top down validation to
110determine the scope of the problem.
111
112Different types of metadata have different owner identifiers. For example,
113directory, attribute and extent tree blocks are all owned by an inode, whilst
114freespace btree blocks are owned by an allocation group. Hence the size and
115contents of the owner field are determined by the type of metadata object we are
116looking at. The owner information can also identify misplaced writes (e.g.
117freespace btree block written to the wrong AG).
118
119Self describing metadata also needs to contain some indication of when it was
120written to the filesystem. One of the key information points when doing forensic
121analysis is how recently the block was modified. Correlation of set of corrupted
122metadata blocks based on modification times is important as it can indicate
123whether the corruptions are related, whether there's been multiple corruption
124events that lead to the eventual failure, and even whether there are corruptions
125present that the run-time verification is not detecting.
126
127For example, we can determine whether a metadata object is supposed to be free
128space or still allocated if it is still referenced by its owner by looking at
129when the free space btree block that contains the block was last written
130compared to when the metadata object itself was last written. If the free space
131block is more recent than the object and the object's owner, then there is a
132very good chance that the block should have been removed from the owner.
133
134To provide this "written timestamp", each metadata block gets the Log Sequence
135Number (LSN) of the most recent transaction it was modified on written into it.
136This number will always increase over the life of the filesystem, and the only
137thing that resets it is running xfs_repair on the filesystem. Further, by use of
138the LSN we can tell if the corrupted metadata all belonged to the same log
139checkpoint and hence have some idea of how much modification occurred between
140the first and last instance of corrupt metadata on disk and, further, how much
141modification occurred between the corruption being written and when it was
142detected.
143
144Runtime Validation
145------------------
146
147Validation of self-describing metadata takes place at runtime in two places:
148
149 - immediately after a successful read from disk
150 - immediately prior to write IO submission
151
152The verification is completely stateless - it is done independently of the
153modification process, and seeks only to check that the metadata is what it says
154it is and that the metadata fields are within bounds and internally consistent.
155As such, we cannot catch all types of corruption that can occur within a block
156as there may be certain limitations that operational state enforces of the
157metadata, or there may be corruption of interblock relationships (e.g. corrupted
158sibling pointer lists). Hence we still need stateful checking in the main code
159body, but in general most of the per-field validation is handled by the
160verifiers.
161
162For read verification, the caller needs to specify the expected type of metadata
163that it should see, and the IO completion process verifies that the metadata
164object matches what was expected. If the verification process fails, then it
165marks the object being read as EFSCORRUPTED. The caller needs to catch this
166error (same as for IO errors), and if it needs to take special action due to a
167verification error it can do so by catching the EFSCORRUPTED error value. If we
168need more discrimination of error type at higher levels, we can define new
169error numbers for different errors as necessary.
170
171The first step in read verification is checking the magic number and determining
172whether CRC validating is necessary. If it is, the CRC32c is calculated and
173compared against the value stored in the object itself. Once this is validated,
174further checks are made against the location information, followed by extensive
175object specific metadata validation. If any of these checks fail, then the
176buffer is considered corrupt and the EFSCORRUPTED error is set appropriately.
177
178Write verification is the opposite of the read verification - first the object
179is extensively verified and if it is OK we then update the LSN from the last
180modification made to the object, After this, we calculate the CRC and insert it
181into the object. Once this is done the write IO is allowed to continue. If any
182error occurs during this process, the buffer is again marked with a EFSCORRUPTED
183error for the higher layers to catch.
184
185Structures
186----------
187
188A typical on-disk structure needs to contain the following information:
189
190struct xfs_ondisk_hdr {
191 __be32 magic; /* magic number */
192 __be32 crc; /* CRC, not logged */
193 uuid_t uuid; /* filesystem identifier */
194 __be64 owner; /* parent object */
195 __be64 blkno; /* location on disk */
196 __be64 lsn; /* last modification in log, not logged */
197};
198
199Depending on the metadata, this information may be part of a header structure
200separate to the metadata contents, or may be distributed through an existing
201structure. The latter occurs with metadata that already contains some of this
202information, such as the superblock and AG headers.
203
204Other metadata may have different formats for the information, but the same
205level of information is generally provided. For example:
206
207 - short btree blocks have a 32 bit owner (ag number) and a 32 bit block
208 number for location. The two of these combined provide the same
209 information as @owner and @blkno in eh above structure, but using 8
210 bytes less space on disk.
211
212 - directory/attribute node blocks have a 16 bit magic number, and the
213 header that contains the magic number has other information in it as
214 well. hence the additional metadata headers change the overall format
215 of the metadata.
216
217A typical buffer read verifier is structured as follows:
218
219#define XFS_FOO_CRC_OFF offsetof(struct xfs_ondisk_hdr, crc)
220
221static void
222xfs_foo_read_verify(
223 struct xfs_buf *bp)
224{
225 struct xfs_mount *mp = bp->b_target->bt_mount;
226
227 if ((xfs_sb_version_hascrc(&mp->m_sb) &&
228 !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
229 XFS_FOO_CRC_OFF)) ||
230 !xfs_foo_verify(bp)) {
231 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
232 xfs_buf_ioerror(bp, EFSCORRUPTED);
233 }
234}
235
236The code ensures that the CRC is only checked if the filesystem has CRCs enabled
237by checking the superblock of the feature bit, and then if the CRC verifies OK
238(or is not needed) it verifies the actual contents of the block.
239
240The verifier function will take a couple of different forms, depending on
241whether the magic number can be used to determine the format of the block. In
242the case it can't, the code is structured as follows:
243
244static bool
245xfs_foo_verify(
246 struct xfs_buf *bp)
247{
248 struct xfs_mount *mp = bp->b_target->bt_mount;
249 struct xfs_ondisk_hdr *hdr = bp->b_addr;
250
251 if (hdr->magic != cpu_to_be32(XFS_FOO_MAGIC))
252 return false;
253
254 if (!xfs_sb_version_hascrc(&mp->m_sb)) {
255 if (!uuid_equal(&hdr->uuid, &mp->m_sb.sb_uuid))
256 return false;
257 if (bp->b_bn != be64_to_cpu(hdr->blkno))
258 return false;
259 if (hdr->owner == 0)
260 return false;
261 }
262
263 /* object specific verification checks here */
264
265 return true;
266}
267
268If there are different magic numbers for the different formats, the verifier
269will look like:
270
271static bool
272xfs_foo_verify(
273 struct xfs_buf *bp)
274{
275 struct xfs_mount *mp = bp->b_target->bt_mount;
276 struct xfs_ondisk_hdr *hdr = bp->b_addr;
277
278 if (hdr->magic == cpu_to_be32(XFS_FOO_CRC_MAGIC)) {
279 if (!uuid_equal(&hdr->uuid, &mp->m_sb.sb_uuid))
280 return false;
281 if (bp->b_bn != be64_to_cpu(hdr->blkno))
282 return false;
283 if (hdr->owner == 0)
284 return false;
285 } else if (hdr->magic != cpu_to_be32(XFS_FOO_MAGIC))
286 return false;
287
288 /* object specific verification checks here */
289
290 return true;
291}
292
293Write verifiers are very similar to the read verifiers, they just do things in
294the opposite order to the read verifiers. A typical write verifier:
295
296static void
297xfs_foo_write_verify(
298 struct xfs_buf *bp)
299{
300 struct xfs_mount *mp = bp->b_target->bt_mount;
301 struct xfs_buf_log_item *bip = bp->b_fspriv;
302
303 if (!xfs_foo_verify(bp)) {
304 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
305 xfs_buf_ioerror(bp, EFSCORRUPTED);
306 return;
307 }
308
309 if (!xfs_sb_version_hascrc(&mp->m_sb))
310 return;
311
312
313 if (bip) {
314 struct xfs_ondisk_hdr *hdr = bp->b_addr;
315 hdr->lsn = cpu_to_be64(bip->bli_item.li_lsn);
316 }
317 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_FOO_CRC_OFF);
318}
319
320This will verify the internal structure of the metadata before we go any
321further, detecting corruptions that have occurred as the metadata has been
322modified in memory. If the metadata verifies OK, and CRCs are enabled, we then
323update the LSN field (when it was last modified) and calculate the CRC on the
324metadata. Once this is done, we can issue the IO.
325
326Inodes and Dquots
327-----------------
328
329Inodes and dquots are special snowflakes. They have per-object CRC and
330self-identifiers, but they are packed so that there are multiple objects per
331buffer. Hence we do not use per-buffer verifiers to do the work of per-object
332verification and CRC calculations. The per-buffer verifiers simply perform basic
333identification of the buffer - that they contain inodes or dquots, and that
334there are magic numbers in all the expected spots. All further CRC and
335verification checks are done when each inode is read from or written back to the
336buffer.
337
338The structure of the verifiers and the identifiers checks is very similar to the
339buffer code described above. The only difference is where they are called. For
340example, inode read verification is done in xfs_iread() when the inode is first
341read out of the buffer and the struct xfs_inode is instantiated. The inode is
342already extensively verified during writeback in xfs_iflush_int, so the only
343addition here is to add the LSN and CRC to the inode as it is copied back into
344the buffer.
345
346XXX: inode unlinked list modification doesn't recalculate the inode CRC! None of
347the unlinked list modifications check or update CRCs, neither during unlink nor
348log recovery. So, it's gone unnoticed until now. This won't matter immediately -
349repair will probably complain about it - but it needs to be fixed.
350
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index d02201df855b..6313b69b6644 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -45,11 +45,11 @@ xfs-y += xfs_aops.o \
45 xfs_itable.o \ 45 xfs_itable.o \
46 xfs_message.o \ 46 xfs_message.o \
47 xfs_mru_cache.o \ 47 xfs_mru_cache.o \
48 xfs_super.o \
49 xfs_xattr.o \
50 xfs_rename.o \ 48 xfs_rename.o \
49 xfs_super.o \
51 xfs_utils.o \ 50 xfs_utils.o \
52 xfs_vnodeops.o \ 51 xfs_vnodeops.o \
52 xfs_xattr.o \
53 kmem.o \ 53 kmem.o \
54 uuid.o 54 uuid.o
55 55
@@ -58,6 +58,7 @@ xfs-y += xfs_alloc.o \
58 xfs_alloc_btree.o \ 58 xfs_alloc_btree.o \
59 xfs_attr.o \ 59 xfs_attr.o \
60 xfs_attr_leaf.o \ 60 xfs_attr_leaf.o \
61 xfs_attr_remote.o \
61 xfs_bmap.o \ 62 xfs_bmap.o \
62 xfs_bmap_btree.o \ 63 xfs_bmap_btree.o \
63 xfs_btree.o \ 64 xfs_btree.o \
@@ -73,6 +74,7 @@ xfs-y += xfs_alloc.o \
73 xfs_inode.o \ 74 xfs_inode.o \
74 xfs_log_recover.o \ 75 xfs_log_recover.o \
75 xfs_mount.o \ 76 xfs_mount.o \
77 xfs_symlink.o \
76 xfs_trans.o 78 xfs_trans.o
77 79
78# low-level transaction/log code 80# low-level transaction/log code
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index f2aeedb6a579..317aa86d96ea 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -30,6 +30,7 @@ struct xfs_trans;
30 30
31#define XFS_AGF_MAGIC 0x58414746 /* 'XAGF' */ 31#define XFS_AGF_MAGIC 0x58414746 /* 'XAGF' */
32#define XFS_AGI_MAGIC 0x58414749 /* 'XAGI' */ 32#define XFS_AGI_MAGIC 0x58414749 /* 'XAGI' */
33#define XFS_AGFL_MAGIC 0x5841464c /* 'XAFL' */
33#define XFS_AGF_VERSION 1 34#define XFS_AGF_VERSION 1
34#define XFS_AGI_VERSION 1 35#define XFS_AGI_VERSION 1
35 36
@@ -63,12 +64,29 @@ typedef struct xfs_agf {
63 __be32 agf_spare0; /* spare field */ 64 __be32 agf_spare0; /* spare field */
64 __be32 agf_levels[XFS_BTNUM_AGF]; /* btree levels */ 65 __be32 agf_levels[XFS_BTNUM_AGF]; /* btree levels */
65 __be32 agf_spare1; /* spare field */ 66 __be32 agf_spare1; /* spare field */
67
66 __be32 agf_flfirst; /* first freelist block's index */ 68 __be32 agf_flfirst; /* first freelist block's index */
67 __be32 agf_fllast; /* last freelist block's index */ 69 __be32 agf_fllast; /* last freelist block's index */
68 __be32 agf_flcount; /* count of blocks in freelist */ 70 __be32 agf_flcount; /* count of blocks in freelist */
69 __be32 agf_freeblks; /* total free blocks */ 71 __be32 agf_freeblks; /* total free blocks */
72
70 __be32 agf_longest; /* longest free space */ 73 __be32 agf_longest; /* longest free space */
71 __be32 agf_btreeblks; /* # of blocks held in AGF btrees */ 74 __be32 agf_btreeblks; /* # of blocks held in AGF btrees */
75 uuid_t agf_uuid; /* uuid of filesystem */
76
77 /*
78 * reserve some contiguous space for future logged fields before we add
79 * the unlogged fields. This makes the range logging via flags and
80 * structure offsets much simpler.
81 */
82 __be64 agf_spare64[16];
83
84 /* unlogged fields, written during buffer writeback. */
85 __be64 agf_lsn; /* last write sequence */
86 __be32 agf_crc; /* crc of agf sector */
87 __be32 agf_spare2;
88
89 /* structure must be padded to 64 bit alignment */
72} xfs_agf_t; 90} xfs_agf_t;
73 91
74#define XFS_AGF_MAGICNUM 0x00000001 92#define XFS_AGF_MAGICNUM 0x00000001
@@ -83,7 +101,8 @@ typedef struct xfs_agf {
83#define XFS_AGF_FREEBLKS 0x00000200 101#define XFS_AGF_FREEBLKS 0x00000200
84#define XFS_AGF_LONGEST 0x00000400 102#define XFS_AGF_LONGEST 0x00000400
85#define XFS_AGF_BTREEBLKS 0x00000800 103#define XFS_AGF_BTREEBLKS 0x00000800
86#define XFS_AGF_NUM_BITS 12 104#define XFS_AGF_UUID 0x00001000
105#define XFS_AGF_NUM_BITS 13
87#define XFS_AGF_ALL_BITS ((1 << XFS_AGF_NUM_BITS) - 1) 106#define XFS_AGF_ALL_BITS ((1 << XFS_AGF_NUM_BITS) - 1)
88 107
89#define XFS_AGF_FLAGS \ 108#define XFS_AGF_FLAGS \
@@ -98,7 +117,8 @@ typedef struct xfs_agf {
98 { XFS_AGF_FLCOUNT, "FLCOUNT" }, \ 117 { XFS_AGF_FLCOUNT, "FLCOUNT" }, \
99 { XFS_AGF_FREEBLKS, "FREEBLKS" }, \ 118 { XFS_AGF_FREEBLKS, "FREEBLKS" }, \
100 { XFS_AGF_LONGEST, "LONGEST" }, \ 119 { XFS_AGF_LONGEST, "LONGEST" }, \
101 { XFS_AGF_BTREEBLKS, "BTREEBLKS" } 120 { XFS_AGF_BTREEBLKS, "BTREEBLKS" }, \
121 { XFS_AGF_UUID, "UUID" }
102 122
103/* disk block (xfs_daddr_t) in the AG */ 123/* disk block (xfs_daddr_t) in the AG */
104#define XFS_AGF_DADDR(mp) ((xfs_daddr_t)(1 << (mp)->m_sectbb_log)) 124#define XFS_AGF_DADDR(mp) ((xfs_daddr_t)(1 << (mp)->m_sectbb_log))
@@ -132,6 +152,7 @@ typedef struct xfs_agi {
132 __be32 agi_root; /* root of inode btree */ 152 __be32 agi_root; /* root of inode btree */
133 __be32 agi_level; /* levels in inode btree */ 153 __be32 agi_level; /* levels in inode btree */
134 __be32 agi_freecount; /* number of free inodes */ 154 __be32 agi_freecount; /* number of free inodes */
155
135 __be32 agi_newino; /* new inode just allocated */ 156 __be32 agi_newino; /* new inode just allocated */
136 __be32 agi_dirino; /* last directory inode chunk */ 157 __be32 agi_dirino; /* last directory inode chunk */
137 /* 158 /*
@@ -139,6 +160,13 @@ typedef struct xfs_agi {
139 * still being referenced. 160 * still being referenced.
140 */ 161 */
141 __be32 agi_unlinked[XFS_AGI_UNLINKED_BUCKETS]; 162 __be32 agi_unlinked[XFS_AGI_UNLINKED_BUCKETS];
163
164 uuid_t agi_uuid; /* uuid of filesystem */
165 __be32 agi_crc; /* crc of agi sector */
166 __be32 agi_pad32;
167 __be64 agi_lsn; /* last write sequence */
168
169 /* structure must be padded to 64 bit alignment */
142} xfs_agi_t; 170} xfs_agi_t;
143 171
144#define XFS_AGI_MAGICNUM 0x00000001 172#define XFS_AGI_MAGICNUM 0x00000001
@@ -171,11 +199,31 @@ extern const struct xfs_buf_ops xfs_agi_buf_ops;
171 */ 199 */
172#define XFS_AGFL_DADDR(mp) ((xfs_daddr_t)(3 << (mp)->m_sectbb_log)) 200#define XFS_AGFL_DADDR(mp) ((xfs_daddr_t)(3 << (mp)->m_sectbb_log))
173#define XFS_AGFL_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGFL_DADDR(mp)) 201#define XFS_AGFL_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGFL_DADDR(mp))
174#define XFS_AGFL_SIZE(mp) ((mp)->m_sb.sb_sectsize / sizeof(xfs_agblock_t))
175#define XFS_BUF_TO_AGFL(bp) ((xfs_agfl_t *)((bp)->b_addr)) 202#define XFS_BUF_TO_AGFL(bp) ((xfs_agfl_t *)((bp)->b_addr))
176 203
204#define XFS_BUF_TO_AGFL_BNO(mp, bp) \
205 (xfs_sb_version_hascrc(&((mp)->m_sb)) ? \
206 &(XFS_BUF_TO_AGFL(bp)->agfl_bno[0]) : \
207 (__be32 *)(bp)->b_addr)
208
209/*
210 * Size of the AGFL. For CRC-enabled filesystes we steal a couple of
211 * slots in the beginning of the block for a proper header with the
212 * location information and CRC.
213 */
214#define XFS_AGFL_SIZE(mp) \
215 (((mp)->m_sb.sb_sectsize - \
216 (xfs_sb_version_hascrc(&((mp)->m_sb)) ? \
217 sizeof(struct xfs_agfl) : 0)) / \
218 sizeof(xfs_agblock_t))
219
177typedef struct xfs_agfl { 220typedef struct xfs_agfl {
178 __be32 agfl_bno[1]; /* actually XFS_AGFL_SIZE(mp) */ 221 __be32 agfl_magicnum;
222 __be32 agfl_seqno;
223 uuid_t agfl_uuid;
224 __be64 agfl_lsn;
225 __be32 agfl_crc;
226 __be32 agfl_bno[]; /* actually XFS_AGFL_SIZE(mp) */
179} xfs_agfl_t; 227} xfs_agfl_t;
180 228
181/* 229/*
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 0ad23253e8b1..5673bcfda2f0 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -33,7 +33,9 @@
33#include "xfs_alloc.h" 33#include "xfs_alloc.h"
34#include "xfs_extent_busy.h" 34#include "xfs_extent_busy.h"
35#include "xfs_error.h" 35#include "xfs_error.h"
36#include "xfs_cksum.h"
36#include "xfs_trace.h" 37#include "xfs_trace.h"
38#include "xfs_buf_item.h"
37 39
38struct workqueue_struct *xfs_alloc_wq; 40struct workqueue_struct *xfs_alloc_wq;
39 41
@@ -430,53 +432,84 @@ xfs_alloc_fixup_trees(
430 return 0; 432 return 0;
431} 433}
432 434
433static void 435static bool
434xfs_agfl_verify( 436xfs_agfl_verify(
435 struct xfs_buf *bp) 437 struct xfs_buf *bp)
436{ 438{
437#ifdef WHEN_CRCS_COME_ALONG
438 /*
439 * we cannot actually do any verification of the AGFL because mkfs does
440 * not initialise the AGFL to zero or NULL. Hence the only valid part of
441 * the AGFL is what the AGF says is active. We can't get to the AGF, so
442 * we can't verify just those entries are valid.
443 *
444 * This problem goes away when the CRC format change comes along as that
445 * requires the AGFL to be initialised by mkfs. At that point, we can
446 * verify the blocks in the agfl -active or not- lie within the bounds
447 * of the AG. Until then, just leave this check ifdef'd out.
448 */
449 struct xfs_mount *mp = bp->b_target->bt_mount; 439 struct xfs_mount *mp = bp->b_target->bt_mount;
450 struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp); 440 struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
451 int agfl_ok = 1;
452
453 int i; 441 int i;
454 442
443 if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_uuid))
444 return false;
445 if (be32_to_cpu(agfl->agfl_magicnum) != XFS_AGFL_MAGIC)
446 return false;
447 /*
448 * during growfs operations, the perag is not fully initialised,
449 * so we can't use it for any useful checking. growfs ensures we can't
450 * use it by using uncached buffers that don't have the perag attached
451 * so we can detect and avoid this problem.
452 */
453 if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
454 return false;
455
455 for (i = 0; i < XFS_AGFL_SIZE(mp); i++) { 456 for (i = 0; i < XFS_AGFL_SIZE(mp); i++) {
456 if (be32_to_cpu(agfl->agfl_bno[i]) == NULLAGBLOCK || 457 if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK &&
457 be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks) 458 be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks)
458 agfl_ok = 0; 459 return false;
459 } 460 }
461 return true;
462}
463
464static void
465xfs_agfl_read_verify(
466 struct xfs_buf *bp)
467{
468 struct xfs_mount *mp = bp->b_target->bt_mount;
469 int agfl_ok = 1;
470
471 /*
472 * There is no verification of non-crc AGFLs because mkfs does not
473 * initialise the AGFL to zero or NULL. Hence the only valid part of the
474 * AGFL is what the AGF says is active. We can't get to the AGF, so we
475 * can't verify just those entries are valid.
476 */
477 if (!xfs_sb_version_hascrc(&mp->m_sb))
478 return;
479
480 agfl_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
481 offsetof(struct xfs_agfl, agfl_crc));
482
483 agfl_ok = agfl_ok && xfs_agfl_verify(bp);
460 484
461 if (!agfl_ok) { 485 if (!agfl_ok) {
462 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, agfl); 486 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
463 xfs_buf_ioerror(bp, EFSCORRUPTED); 487 xfs_buf_ioerror(bp, EFSCORRUPTED);
464 } 488 }
465#endif
466} 489}
467 490
468static void 491static void
469xfs_agfl_write_verify( 492xfs_agfl_write_verify(
470 struct xfs_buf *bp) 493 struct xfs_buf *bp)
471{ 494{
472 xfs_agfl_verify(bp); 495 struct xfs_mount *mp = bp->b_target->bt_mount;
473} 496 struct xfs_buf_log_item *bip = bp->b_fspriv;
474 497
475static void 498 /* no verification of non-crc AGFLs */
476xfs_agfl_read_verify( 499 if (!xfs_sb_version_hascrc(&mp->m_sb))
477 struct xfs_buf *bp) 500 return;
478{ 501
479 xfs_agfl_verify(bp); 502 if (!xfs_agfl_verify(bp)) {
503 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
504 xfs_buf_ioerror(bp, EFSCORRUPTED);
505 return;
506 }
507
508 if (bip)
509 XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
510
511 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
512 offsetof(struct xfs_agfl, agfl_crc));
480} 513}
481 514
482const struct xfs_buf_ops xfs_agfl_buf_ops = { 515const struct xfs_buf_ops xfs_agfl_buf_ops = {
@@ -842,7 +875,7 @@ xfs_alloc_ag_vextent_near(
842 */ 875 */
843 int dofirst; /* set to do first algorithm */ 876 int dofirst; /* set to do first algorithm */
844 877
845 dofirst = random32() & 1; 878 dofirst = prandom_u32() & 1;
846#endif 879#endif
847 880
848restart: 881restart:
@@ -1982,18 +2015,18 @@ xfs_alloc_get_freelist(
1982 int btreeblk) /* destination is a AGF btree */ 2015 int btreeblk) /* destination is a AGF btree */
1983{ 2016{
1984 xfs_agf_t *agf; /* a.g. freespace structure */ 2017 xfs_agf_t *agf; /* a.g. freespace structure */
1985 xfs_agfl_t *agfl; /* a.g. freelist structure */
1986 xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */ 2018 xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */
1987 xfs_agblock_t bno; /* block number returned */ 2019 xfs_agblock_t bno; /* block number returned */
2020 __be32 *agfl_bno;
1988 int error; 2021 int error;
1989 int logflags; 2022 int logflags;
1990 xfs_mount_t *mp; /* mount structure */ 2023 xfs_mount_t *mp = tp->t_mountp;
1991 xfs_perag_t *pag; /* per allocation group data */ 2024 xfs_perag_t *pag; /* per allocation group data */
1992 2025
1993 agf = XFS_BUF_TO_AGF(agbp);
1994 /* 2026 /*
1995 * Freelist is empty, give up. 2027 * Freelist is empty, give up.
1996 */ 2028 */
2029 agf = XFS_BUF_TO_AGF(agbp);
1997 if (!agf->agf_flcount) { 2030 if (!agf->agf_flcount) {
1998 *bnop = NULLAGBLOCK; 2031 *bnop = NULLAGBLOCK;
1999 return 0; 2032 return 0;
@@ -2001,15 +2034,17 @@ xfs_alloc_get_freelist(
2001 /* 2034 /*
2002 * Read the array of free blocks. 2035 * Read the array of free blocks.
2003 */ 2036 */
2004 mp = tp->t_mountp; 2037 error = xfs_alloc_read_agfl(mp, tp, be32_to_cpu(agf->agf_seqno),
2005 if ((error = xfs_alloc_read_agfl(mp, tp, 2038 &agflbp);
2006 be32_to_cpu(agf->agf_seqno), &agflbp))) 2039 if (error)
2007 return error; 2040 return error;
2008 agfl = XFS_BUF_TO_AGFL(agflbp); 2041
2042
2009 /* 2043 /*
2010 * Get the block number and update the data structures. 2044 * Get the block number and update the data structures.
2011 */ 2045 */
2012 bno = be32_to_cpu(agfl->agfl_bno[be32_to_cpu(agf->agf_flfirst)]); 2046 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
2047 bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
2013 be32_add_cpu(&agf->agf_flfirst, 1); 2048 be32_add_cpu(&agf->agf_flfirst, 1);
2014 xfs_trans_brelse(tp, agflbp); 2049 xfs_trans_brelse(tp, agflbp);
2015 if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp)) 2050 if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp))
@@ -2058,11 +2093,14 @@ xfs_alloc_log_agf(
2058 offsetof(xfs_agf_t, agf_freeblks), 2093 offsetof(xfs_agf_t, agf_freeblks),
2059 offsetof(xfs_agf_t, agf_longest), 2094 offsetof(xfs_agf_t, agf_longest),
2060 offsetof(xfs_agf_t, agf_btreeblks), 2095 offsetof(xfs_agf_t, agf_btreeblks),
2096 offsetof(xfs_agf_t, agf_uuid),
2061 sizeof(xfs_agf_t) 2097 sizeof(xfs_agf_t)
2062 }; 2098 };
2063 2099
2064 trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_); 2100 trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_);
2065 2101
2102 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF);
2103
2066 xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last); 2104 xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
2067 xfs_trans_log_buf(tp, bp, (uint)first, (uint)last); 2105 xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
2068} 2106}
@@ -2099,12 +2137,13 @@ xfs_alloc_put_freelist(
2099 int btreeblk) /* block came from a AGF btree */ 2137 int btreeblk) /* block came from a AGF btree */
2100{ 2138{
2101 xfs_agf_t *agf; /* a.g. freespace structure */ 2139 xfs_agf_t *agf; /* a.g. freespace structure */
2102 xfs_agfl_t *agfl; /* a.g. free block array */
2103 __be32 *blockp;/* pointer to array entry */ 2140 __be32 *blockp;/* pointer to array entry */
2104 int error; 2141 int error;
2105 int logflags; 2142 int logflags;
2106 xfs_mount_t *mp; /* mount structure */ 2143 xfs_mount_t *mp; /* mount structure */
2107 xfs_perag_t *pag; /* per allocation group data */ 2144 xfs_perag_t *pag; /* per allocation group data */
2145 __be32 *agfl_bno;
2146 int startoff;
2108 2147
2109 agf = XFS_BUF_TO_AGF(agbp); 2148 agf = XFS_BUF_TO_AGF(agbp);
2110 mp = tp->t_mountp; 2149 mp = tp->t_mountp;
@@ -2112,7 +2151,6 @@ xfs_alloc_put_freelist(
2112 if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp, 2151 if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp,
2113 be32_to_cpu(agf->agf_seqno), &agflbp))) 2152 be32_to_cpu(agf->agf_seqno), &agflbp)))
2114 return error; 2153 return error;
2115 agfl = XFS_BUF_TO_AGFL(agflbp);
2116 be32_add_cpu(&agf->agf_fllast, 1); 2154 be32_add_cpu(&agf->agf_fllast, 1);
2117 if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp)) 2155 if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp))
2118 agf->agf_fllast = 0; 2156 agf->agf_fllast = 0;
@@ -2133,32 +2171,38 @@ xfs_alloc_put_freelist(
2133 xfs_alloc_log_agf(tp, agbp, logflags); 2171 xfs_alloc_log_agf(tp, agbp, logflags);
2134 2172
2135 ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)); 2173 ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp));
2136 blockp = &agfl->agfl_bno[be32_to_cpu(agf->agf_fllast)]; 2174
2175 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
2176 blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
2137 *blockp = cpu_to_be32(bno); 2177 *blockp = cpu_to_be32(bno);
2178 startoff = (char *)blockp - (char *)agflbp->b_addr;
2179
2138 xfs_alloc_log_agf(tp, agbp, logflags); 2180 xfs_alloc_log_agf(tp, agbp, logflags);
2139 xfs_trans_log_buf(tp, agflbp, 2181
2140 (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl), 2182 xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF);
2141 (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl + 2183 xfs_trans_log_buf(tp, agflbp, startoff,
2142 sizeof(xfs_agblock_t) - 1)); 2184 startoff + sizeof(xfs_agblock_t) - 1);
2143 return 0; 2185 return 0;
2144} 2186}
2145 2187
2146static void 2188static bool
2147xfs_agf_verify( 2189xfs_agf_verify(
2190 struct xfs_mount *mp,
2148 struct xfs_buf *bp) 2191 struct xfs_buf *bp)
2149 { 2192 {
2150 struct xfs_mount *mp = bp->b_target->bt_mount; 2193 struct xfs_agf *agf = XFS_BUF_TO_AGF(bp);
2151 struct xfs_agf *agf;
2152 int agf_ok;
2153 2194
2154 agf = XFS_BUF_TO_AGF(bp); 2195 if (xfs_sb_version_hascrc(&mp->m_sb) &&
2196 !uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_uuid))
2197 return false;
2155 2198
2156 agf_ok = agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) && 2199 if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) &&
2157 XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) && 2200 XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
2158 be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) && 2201 be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
2159 be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) && 2202 be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
2160 be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) && 2203 be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) &&
2161 be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp); 2204 be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)))
2205 return false;
2162 2206
2163 /* 2207 /*
2164 * during growfs operations, the perag is not fully initialised, 2208 * during growfs operations, the perag is not fully initialised,
@@ -2166,33 +2210,58 @@ xfs_agf_verify(
2166 * use it by using uncached buffers that don't have the perag attached 2210 * use it by using uncached buffers that don't have the perag attached
2167 * so we can detect and avoid this problem. 2211 * so we can detect and avoid this problem.
2168 */ 2212 */
2169 if (bp->b_pag) 2213 if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno)
2170 agf_ok = agf_ok && be32_to_cpu(agf->agf_seqno) == 2214 return false;
2171 bp->b_pag->pag_agno;
2172 2215
2173 if (xfs_sb_version_haslazysbcount(&mp->m_sb)) 2216 if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
2174 agf_ok = agf_ok && be32_to_cpu(agf->agf_btreeblks) <= 2217 be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length))
2175 be32_to_cpu(agf->agf_length); 2218 return false;
2219
2220 return true;;
2176 2221
2177 if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF,
2178 XFS_RANDOM_ALLOC_READ_AGF))) {
2179 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, agf);
2180 xfs_buf_ioerror(bp, EFSCORRUPTED);
2181 }
2182} 2222}
2183 2223
2184static void 2224static void
2185xfs_agf_read_verify( 2225xfs_agf_read_verify(
2186 struct xfs_buf *bp) 2226 struct xfs_buf *bp)
2187{ 2227{
2188 xfs_agf_verify(bp); 2228 struct xfs_mount *mp = bp->b_target->bt_mount;
2229 int agf_ok = 1;
2230
2231 if (xfs_sb_version_hascrc(&mp->m_sb))
2232 agf_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
2233 offsetof(struct xfs_agf, agf_crc));
2234
2235 agf_ok = agf_ok && xfs_agf_verify(mp, bp);
2236
2237 if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF,
2238 XFS_RANDOM_ALLOC_READ_AGF))) {
2239 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
2240 xfs_buf_ioerror(bp, EFSCORRUPTED);
2241 }
2189} 2242}
2190 2243
2191static void 2244static void
2192xfs_agf_write_verify( 2245xfs_agf_write_verify(
2193 struct xfs_buf *bp) 2246 struct xfs_buf *bp)
2194{ 2247{
2195 xfs_agf_verify(bp); 2248 struct xfs_mount *mp = bp->b_target->bt_mount;
2249 struct xfs_buf_log_item *bip = bp->b_fspriv;
2250
2251 if (!xfs_agf_verify(mp, bp)) {
2252 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
2253 xfs_buf_ioerror(bp, EFSCORRUPTED);
2254 return;
2255 }
2256
2257 if (!xfs_sb_version_hascrc(&mp->m_sb))
2258 return;
2259
2260 if (bip)
2261 XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
2262
2263 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
2264 offsetof(struct xfs_agf, agf_crc));
2196} 2265}
2197 2266
2198const struct xfs_buf_ops xfs_agf_buf_ops = { 2267const struct xfs_buf_ops xfs_agf_buf_ops = {
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index b1ddef6b2689..30c4c1434faf 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -33,6 +33,7 @@
33#include "xfs_extent_busy.h" 33#include "xfs_extent_busy.h"
34#include "xfs_error.h" 34#include "xfs_error.h"
35#include "xfs_trace.h" 35#include "xfs_trace.h"
36#include "xfs_cksum.h"
36 37
37 38
38STATIC struct xfs_btree_cur * 39STATIC struct xfs_btree_cur *
@@ -272,7 +273,7 @@ xfs_allocbt_key_diff(
272 return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock; 273 return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
273} 274}
274 275
275static void 276static bool
276xfs_allocbt_verify( 277xfs_allocbt_verify(
277 struct xfs_buf *bp) 278 struct xfs_buf *bp)
278{ 279{
@@ -280,66 +281,103 @@ xfs_allocbt_verify(
280 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 281 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
281 struct xfs_perag *pag = bp->b_pag; 282 struct xfs_perag *pag = bp->b_pag;
282 unsigned int level; 283 unsigned int level;
283 int sblock_ok; /* block passes checks */
284 284
285 /* 285 /*
286 * magic number and level verification 286 * magic number and level verification
287 * 287 *
288 * During growfs operations, we can't verify the exact level as the 288 * During growfs operations, we can't verify the exact level or owner as
289 * perag is not fully initialised and hence not attached to the buffer. 289 * the perag is not fully initialised and hence not attached to the
290 * In this case, check against the maximum tree depth. 290 * buffer. In this case, check against the maximum tree depth.
291 *
292 * Similarly, during log recovery we will have a perag structure
293 * attached, but the agf information will not yet have been initialised
294 * from the on disk AGF. Again, we can only check against maximum limits
295 * in this case.
291 */ 296 */
292 level = be16_to_cpu(block->bb_level); 297 level = be16_to_cpu(block->bb_level);
293 switch (block->bb_magic) { 298 switch (block->bb_magic) {
299 case cpu_to_be32(XFS_ABTB_CRC_MAGIC):
300 if (!xfs_sb_version_hascrc(&mp->m_sb))
301 return false;
302 if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid))
303 return false;
304 if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
305 return false;
306 if (pag &&
307 be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
308 return false;
309 /* fall through */
294 case cpu_to_be32(XFS_ABTB_MAGIC): 310 case cpu_to_be32(XFS_ABTB_MAGIC):
295 if (pag) 311 if (pag && pag->pagf_init) {
296 sblock_ok = level < pag->pagf_levels[XFS_BTNUM_BNOi]; 312 if (level >= pag->pagf_levels[XFS_BTNUM_BNOi])
297 else 313 return false;
298 sblock_ok = level < mp->m_ag_maxlevels; 314 } else if (level >= mp->m_ag_maxlevels)
315 return false;
299 break; 316 break;
317 case cpu_to_be32(XFS_ABTC_CRC_MAGIC):
318 if (!xfs_sb_version_hascrc(&mp->m_sb))
319 return false;
320 if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid))
321 return false;
322 if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
323 return false;
324 if (pag &&
325 be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
326 return false;
327 /* fall through */
300 case cpu_to_be32(XFS_ABTC_MAGIC): 328 case cpu_to_be32(XFS_ABTC_MAGIC):
301 if (pag) 329 if (pag && pag->pagf_init) {
302 sblock_ok = level < pag->pagf_levels[XFS_BTNUM_CNTi]; 330 if (level >= pag->pagf_levels[XFS_BTNUM_CNTi])
303 else 331 return false;
304 sblock_ok = level < mp->m_ag_maxlevels; 332 } else if (level >= mp->m_ag_maxlevels)
333 return false;
305 break; 334 break;
306 default: 335 default:
307 sblock_ok = 0; 336 return false;
308 break;
309 } 337 }
310 338
311 /* numrecs verification */ 339 /* numrecs verification */
312 sblock_ok = sblock_ok && 340 if (be16_to_cpu(block->bb_numrecs) > mp->m_alloc_mxr[level != 0])
313 be16_to_cpu(block->bb_numrecs) <= mp->m_alloc_mxr[level != 0]; 341 return false;
314 342
315 /* sibling pointer verification */ 343 /* sibling pointer verification */
316 sblock_ok = sblock_ok && 344 if (!block->bb_u.s.bb_leftsib ||
317 (block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) || 345 (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks &&
318 be32_to_cpu(block->bb_u.s.bb_leftsib) < mp->m_sb.sb_agblocks) && 346 block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK)))
319 block->bb_u.s.bb_leftsib && 347 return false;
320 (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) || 348 if (!block->bb_u.s.bb_rightsib ||
321 be32_to_cpu(block->bb_u.s.bb_rightsib) < mp->m_sb.sb_agblocks) && 349 (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks &&
322 block->bb_u.s.bb_rightsib; 350 block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK)))
323 351 return false;
324 if (!sblock_ok) { 352
325 trace_xfs_btree_corrupt(bp, _RET_IP_); 353 return true;
326 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, block);
327 xfs_buf_ioerror(bp, EFSCORRUPTED);
328 }
329} 354}
330 355
331static void 356static void
332xfs_allocbt_read_verify( 357xfs_allocbt_read_verify(
333 struct xfs_buf *bp) 358 struct xfs_buf *bp)
334{ 359{
335 xfs_allocbt_verify(bp); 360 if (!(xfs_btree_sblock_verify_crc(bp) &&
361 xfs_allocbt_verify(bp))) {
362 trace_xfs_btree_corrupt(bp, _RET_IP_);
363 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
364 bp->b_target->bt_mount, bp->b_addr);
365 xfs_buf_ioerror(bp, EFSCORRUPTED);
366 }
336} 367}
337 368
338static void 369static void
339xfs_allocbt_write_verify( 370xfs_allocbt_write_verify(
340 struct xfs_buf *bp) 371 struct xfs_buf *bp)
341{ 372{
342 xfs_allocbt_verify(bp); 373 if (!xfs_allocbt_verify(bp)) {
374 trace_xfs_btree_corrupt(bp, _RET_IP_);
375 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
376 bp->b_target->bt_mount, bp->b_addr);
377 xfs_buf_ioerror(bp, EFSCORRUPTED);
378 }
379 xfs_btree_sblock_calc_crc(bp);
380
343} 381}
344 382
345const struct xfs_buf_ops xfs_allocbt_buf_ops = { 383const struct xfs_buf_ops xfs_allocbt_buf_ops = {
@@ -444,6 +482,9 @@ xfs_allocbt_init_cursor(
444 cur->bc_private.a.agbp = agbp; 482 cur->bc_private.a.agbp = agbp;
445 cur->bc_private.a.agno = agno; 483 cur->bc_private.a.agno = agno;
446 484
485 if (xfs_sb_version_hascrc(&mp->m_sb))
486 cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
487
447 return cur; 488 return cur;
448} 489}
449 490
diff --git a/fs/xfs/xfs_alloc_btree.h b/fs/xfs/xfs_alloc_btree.h
index 7e89a2b429dd..e3a3f7424192 100644
--- a/fs/xfs/xfs_alloc_btree.h
+++ b/fs/xfs/xfs_alloc_btree.h
@@ -31,8 +31,10 @@ struct xfs_mount;
31 * by blockcount and blockno. All blocks look the same to make the code 31 * by blockcount and blockno. All blocks look the same to make the code
32 * simpler; if we have time later, we'll make the optimizations. 32 * simpler; if we have time later, we'll make the optimizations.
33 */ 33 */
34#define XFS_ABTB_MAGIC 0x41425442 /* 'ABTB' for bno tree */ 34#define XFS_ABTB_MAGIC 0x41425442 /* 'ABTB' for bno tree */
35#define XFS_ABTC_MAGIC 0x41425443 /* 'ABTC' for cnt tree */ 35#define XFS_ABTB_CRC_MAGIC 0x41423342 /* 'AB3B' */
36#define XFS_ABTC_MAGIC 0x41425443 /* 'ABTC' for cnt tree */
37#define XFS_ABTC_CRC_MAGIC 0x41423343 /* 'AB3C' */
36 38
37/* 39/*
38 * Data record/key structure 40 * Data record/key structure
@@ -59,10 +61,10 @@ typedef __be32 xfs_alloc_ptr_t;
59 61
60/* 62/*
61 * Btree block header size depends on a superblock flag. 63 * Btree block header size depends on a superblock flag.
62 *
63 * (not quite yet, but soon)
64 */ 64 */
65#define XFS_ALLOC_BLOCK_LEN(mp) XFS_BTREE_SBLOCK_LEN 65#define XFS_ALLOC_BLOCK_LEN(mp) \
66 (xfs_sb_version_hascrc(&((mp)->m_sb)) ? \
67 XFS_BTREE_SBLOCK_CRC_LEN : XFS_BTREE_SBLOCK_LEN)
66 68
67/* 69/*
68 * Record, key, and pointer address macros for btree blocks. 70 * Record, key, and pointer address macros for btree blocks.
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 5f707e537171..3244c988d379 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -953,13 +953,13 @@ xfs_vm_writepage(
953 unsigned offset_into_page = offset & (PAGE_CACHE_SIZE - 1); 953 unsigned offset_into_page = offset & (PAGE_CACHE_SIZE - 1);
954 954
955 /* 955 /*
956 * Just skip the page if it is fully outside i_size, e.g. due 956 * Skip the page if it is fully outside i_size, e.g. due to a
957 * to a truncate operation that is in progress. 957 * truncate operation that is in progress. We must redirty the
958 * page so that reclaim stops reclaiming it. Otherwise
959 * xfs_vm_releasepage() is called on it and gets confused.
958 */ 960 */
959 if (page->index >= end_index + 1 || offset_into_page == 0) { 961 if (page->index >= end_index + 1 || offset_into_page == 0)
960 unlock_page(page); 962 goto redirty;
961 return 0;
962 }
963 963
964 /* 964 /*
965 * The page straddles i_size. It must be zeroed out on each 965 * The page straddles i_size. It must be zeroed out on each
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 888683844d98..20fe3fe9d341 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -15,7 +15,6 @@
15 * along with this program; if not, write the Free Software Foundation, 15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18
19#include "xfs.h" 18#include "xfs.h"
20#include "xfs_fs.h" 19#include "xfs_fs.h"
21#include "xfs_types.h" 20#include "xfs_types.h"
@@ -35,6 +34,7 @@
35#include "xfs_bmap.h" 34#include "xfs_bmap.h"
36#include "xfs_attr.h" 35#include "xfs_attr.h"
37#include "xfs_attr_leaf.h" 36#include "xfs_attr_leaf.h"
37#include "xfs_attr_remote.h"
38#include "xfs_error.h" 38#include "xfs_error.h"
39#include "xfs_quota.h" 39#include "xfs_quota.h"
40#include "xfs_trans_space.h" 40#include "xfs_trans_space.h"
@@ -74,13 +74,6 @@ STATIC int xfs_attr_node_list(xfs_attr_list_context_t *context);
74STATIC int xfs_attr_fillstate(xfs_da_state_t *state); 74STATIC int xfs_attr_fillstate(xfs_da_state_t *state);
75STATIC int xfs_attr_refillstate(xfs_da_state_t *state); 75STATIC int xfs_attr_refillstate(xfs_da_state_t *state);
76 76
77/*
78 * Routines to manipulate out-of-line attribute values.
79 */
80STATIC int xfs_attr_rmtval_set(xfs_da_args_t *args);
81STATIC int xfs_attr_rmtval_remove(xfs_da_args_t *args);
82
83#define ATTR_RMTVALUE_MAPSIZE 1 /* # of map entries at once */
84 77
85STATIC int 78STATIC int
86xfs_attr_name_to_xname( 79xfs_attr_name_to_xname(
@@ -820,7 +813,7 @@ xfs_attr_inactive(xfs_inode_t *dp)
820 error = 0; 813 error = 0;
821 goto out; 814 goto out;
822 } 815 }
823 error = xfs_attr_root_inactive(&trans, dp); 816 error = xfs_attr3_root_inactive(&trans, dp);
824 if (error) 817 if (error)
825 goto out; 818 goto out;
826 819
@@ -906,7 +899,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
906 */ 899 */
907 dp = args->dp; 900 dp = args->dp;
908 args->blkno = 0; 901 args->blkno = 0;
909 error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); 902 error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
910 if (error) 903 if (error)
911 return error; 904 return error;
912 905
@@ -914,14 +907,14 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
914 * Look up the given attribute in the leaf block. Figure out if 907 * Look up the given attribute in the leaf block. Figure out if
915 * the given flags produce an error or call for an atomic rename. 908 * the given flags produce an error or call for an atomic rename.
916 */ 909 */
917 retval = xfs_attr_leaf_lookup_int(bp, args); 910 retval = xfs_attr3_leaf_lookup_int(bp, args);
918 if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) { 911 if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) {
919 xfs_trans_brelse(args->trans, bp); 912 xfs_trans_brelse(args->trans, bp);
920 return(retval); 913 return retval;
921 } else if (retval == EEXIST) { 914 } else if (retval == EEXIST) {
922 if (args->flags & ATTR_CREATE) { /* pure create op */ 915 if (args->flags & ATTR_CREATE) { /* pure create op */
923 xfs_trans_brelse(args->trans, bp); 916 xfs_trans_brelse(args->trans, bp);
924 return(retval); 917 return retval;
925 } 918 }
926 919
927 trace_xfs_attr_leaf_replace(args); 920 trace_xfs_attr_leaf_replace(args);
@@ -937,7 +930,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
937 * Add the attribute to the leaf block, transitioning to a Btree 930 * Add the attribute to the leaf block, transitioning to a Btree
938 * if required. 931 * if required.
939 */ 932 */
940 retval = xfs_attr_leaf_add(bp, args); 933 retval = xfs_attr3_leaf_add(bp, args);
941 if (retval == ENOSPC) { 934 if (retval == ENOSPC) {
942 /* 935 /*
943 * Promote the attribute list to the Btree format, then 936 * Promote the attribute list to the Btree format, then
@@ -945,7 +938,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
945 * can manage its own transactions. 938 * can manage its own transactions.
946 */ 939 */
947 xfs_bmap_init(args->flist, args->firstblock); 940 xfs_bmap_init(args->flist, args->firstblock);
948 error = xfs_attr_leaf_to_node(args); 941 error = xfs_attr3_leaf_to_node(args);
949 if (!error) { 942 if (!error) {
950 error = xfs_bmap_finish(&args->trans, args->flist, 943 error = xfs_bmap_finish(&args->trans, args->flist,
951 &committed); 944 &committed);
@@ -1010,7 +1003,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
1010 * In a separate transaction, set the incomplete flag on the 1003 * In a separate transaction, set the incomplete flag on the
1011 * "old" attr and clear the incomplete flag on the "new" attr. 1004 * "old" attr and clear the incomplete flag on the "new" attr.
1012 */ 1005 */
1013 error = xfs_attr_leaf_flipflags(args); 1006 error = xfs_attr3_leaf_flipflags(args);
1014 if (error) 1007 if (error)
1015 return(error); 1008 return(error);
1016 1009
@@ -1032,19 +1025,19 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
1032 * Read in the block containing the "old" attr, then 1025 * Read in the block containing the "old" attr, then
1033 * remove the "old" attr from that block (neat, huh!) 1026 * remove the "old" attr from that block (neat, huh!)
1034 */ 1027 */
1035 error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno, 1028 error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno,
1036 -1, &bp); 1029 -1, &bp);
1037 if (error) 1030 if (error)
1038 return error; 1031 return error;
1039 1032
1040 xfs_attr_leaf_remove(bp, args); 1033 xfs_attr3_leaf_remove(bp, args);
1041 1034
1042 /* 1035 /*
1043 * If the result is small enough, shrink it all into the inode. 1036 * If the result is small enough, shrink it all into the inode.
1044 */ 1037 */
1045 if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) { 1038 if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
1046 xfs_bmap_init(args->flist, args->firstblock); 1039 xfs_bmap_init(args->flist, args->firstblock);
1047 error = xfs_attr_leaf_to_shortform(bp, args, forkoff); 1040 error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
1048 /* bp is gone due to xfs_da_shrink_inode */ 1041 /* bp is gone due to xfs_da_shrink_inode */
1049 if (!error) { 1042 if (!error) {
1050 error = xfs_bmap_finish(&args->trans, 1043 error = xfs_bmap_finish(&args->trans,
@@ -1076,9 +1069,9 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
1076 /* 1069 /*
1077 * Added a "remote" value, just clear the incomplete flag. 1070 * Added a "remote" value, just clear the incomplete flag.
1078 */ 1071 */
1079 error = xfs_attr_leaf_clearflag(args); 1072 error = xfs_attr3_leaf_clearflag(args);
1080 } 1073 }
1081 return(error); 1074 return error;
1082} 1075}
1083 1076
1084/* 1077/*
@@ -1101,24 +1094,24 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
1101 */ 1094 */
1102 dp = args->dp; 1095 dp = args->dp;
1103 args->blkno = 0; 1096 args->blkno = 0;
1104 error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); 1097 error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
1105 if (error) 1098 if (error)
1106 return error; 1099 return error;
1107 1100
1108 error = xfs_attr_leaf_lookup_int(bp, args); 1101 error = xfs_attr3_leaf_lookup_int(bp, args);
1109 if (error == ENOATTR) { 1102 if (error == ENOATTR) {
1110 xfs_trans_brelse(args->trans, bp); 1103 xfs_trans_brelse(args->trans, bp);
1111 return(error); 1104 return error;
1112 } 1105 }
1113 1106
1114 xfs_attr_leaf_remove(bp, args); 1107 xfs_attr3_leaf_remove(bp, args);
1115 1108
1116 /* 1109 /*
1117 * If the result is small enough, shrink it all into the inode. 1110 * If the result is small enough, shrink it all into the inode.
1118 */ 1111 */
1119 if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) { 1112 if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
1120 xfs_bmap_init(args->flist, args->firstblock); 1113 xfs_bmap_init(args->flist, args->firstblock);
1121 error = xfs_attr_leaf_to_shortform(bp, args, forkoff); 1114 error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
1122 /* bp is gone due to xfs_da_shrink_inode */ 1115 /* bp is gone due to xfs_da_shrink_inode */
1123 if (!error) { 1116 if (!error) {
1124 error = xfs_bmap_finish(&args->trans, args->flist, 1117 error = xfs_bmap_finish(&args->trans, args->flist,
@@ -1128,7 +1121,7 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
1128 ASSERT(committed); 1121 ASSERT(committed);
1129 args->trans = NULL; 1122 args->trans = NULL;
1130 xfs_bmap_cancel(args->flist); 1123 xfs_bmap_cancel(args->flist);
1131 return(error); 1124 return error;
1132 } 1125 }
1133 1126
1134 /* 1127 /*
@@ -1138,7 +1131,7 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
1138 if (committed) 1131 if (committed)
1139 xfs_trans_ijoin(args->trans, dp, 0); 1132 xfs_trans_ijoin(args->trans, dp, 0);
1140 } 1133 }
1141 return(0); 1134 return 0;
1142} 1135}
1143 1136
1144/* 1137/*
@@ -1156,21 +1149,21 @@ xfs_attr_leaf_get(xfs_da_args_t *args)
1156 trace_xfs_attr_leaf_get(args); 1149 trace_xfs_attr_leaf_get(args);
1157 1150
1158 args->blkno = 0; 1151 args->blkno = 0;
1159 error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); 1152 error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
1160 if (error) 1153 if (error)
1161 return error; 1154 return error;
1162 1155
1163 error = xfs_attr_leaf_lookup_int(bp, args); 1156 error = xfs_attr3_leaf_lookup_int(bp, args);
1164 if (error != EEXIST) { 1157 if (error != EEXIST) {
1165 xfs_trans_brelse(args->trans, bp); 1158 xfs_trans_brelse(args->trans, bp);
1166 return(error); 1159 return error;
1167 } 1160 }
1168 error = xfs_attr_leaf_getvalue(bp, args); 1161 error = xfs_attr3_leaf_getvalue(bp, args);
1169 xfs_trans_brelse(args->trans, bp); 1162 xfs_trans_brelse(args->trans, bp);
1170 if (!error && (args->rmtblkno > 0) && !(args->flags & ATTR_KERNOVAL)) { 1163 if (!error && (args->rmtblkno > 0) && !(args->flags & ATTR_KERNOVAL)) {
1171 error = xfs_attr_rmtval_get(args); 1164 error = xfs_attr_rmtval_get(args);
1172 } 1165 }
1173 return(error); 1166 return error;
1174} 1167}
1175 1168
1176/* 1169/*
@@ -1185,11 +1178,11 @@ xfs_attr_leaf_list(xfs_attr_list_context_t *context)
1185 trace_xfs_attr_leaf_list(context); 1178 trace_xfs_attr_leaf_list(context);
1186 1179
1187 context->cursor->blkno = 0; 1180 context->cursor->blkno = 0;
1188 error = xfs_attr_leaf_read(NULL, context->dp, 0, -1, &bp); 1181 error = xfs_attr3_leaf_read(NULL, context->dp, 0, -1, &bp);
1189 if (error) 1182 if (error)
1190 return XFS_ERROR(error); 1183 return XFS_ERROR(error);
1191 1184
1192 error = xfs_attr_leaf_list_int(bp, context); 1185 error = xfs_attr3_leaf_list_int(bp, context);
1193 xfs_trans_brelse(NULL, bp); 1186 xfs_trans_brelse(NULL, bp);
1194 return XFS_ERROR(error); 1187 return XFS_ERROR(error);
1195} 1188}
@@ -1236,7 +1229,7 @@ restart:
1236 * Search to see if name already exists, and get back a pointer 1229 * Search to see if name already exists, and get back a pointer
1237 * to where it should go. 1230 * to where it should go.
1238 */ 1231 */
1239 error = xfs_da_node_lookup_int(state, &retval); 1232 error = xfs_da3_node_lookup_int(state, &retval);
1240 if (error) 1233 if (error)
1241 goto out; 1234 goto out;
1242 blk = &state->path.blk[ state->path.active-1 ]; 1235 blk = &state->path.blk[ state->path.active-1 ];
@@ -1258,7 +1251,7 @@ restart:
1258 args->rmtblkcnt = 0; 1251 args->rmtblkcnt = 0;
1259 } 1252 }
1260 1253
1261 retval = xfs_attr_leaf_add(blk->bp, state->args); 1254 retval = xfs_attr3_leaf_add(blk->bp, state->args);
1262 if (retval == ENOSPC) { 1255 if (retval == ENOSPC) {
1263 if (state->path.active == 1) { 1256 if (state->path.active == 1) {
1264 /* 1257 /*
@@ -1268,7 +1261,7 @@ restart:
1268 */ 1261 */
1269 xfs_da_state_free(state); 1262 xfs_da_state_free(state);
1270 xfs_bmap_init(args->flist, args->firstblock); 1263 xfs_bmap_init(args->flist, args->firstblock);
1271 error = xfs_attr_leaf_to_node(args); 1264 error = xfs_attr3_leaf_to_node(args);
1272 if (!error) { 1265 if (!error) {
1273 error = xfs_bmap_finish(&args->trans, 1266 error = xfs_bmap_finish(&args->trans,
1274 args->flist, 1267 args->flist,
@@ -1307,7 +1300,7 @@ restart:
1307 * in the index2/blkno2/rmtblkno2/rmtblkcnt2 fields. 1300 * in the index2/blkno2/rmtblkno2/rmtblkcnt2 fields.
1308 */ 1301 */
1309 xfs_bmap_init(args->flist, args->firstblock); 1302 xfs_bmap_init(args->flist, args->firstblock);
1310 error = xfs_da_split(state); 1303 error = xfs_da3_split(state);
1311 if (!error) { 1304 if (!error) {
1312 error = xfs_bmap_finish(&args->trans, args->flist, 1305 error = xfs_bmap_finish(&args->trans, args->flist,
1313 &committed); 1306 &committed);
@@ -1329,7 +1322,7 @@ restart:
1329 /* 1322 /*
1330 * Addition succeeded, update Btree hashvals. 1323 * Addition succeeded, update Btree hashvals.
1331 */ 1324 */
1332 xfs_da_fixhashpath(state, &state->path); 1325 xfs_da3_fixhashpath(state, &state->path);
1333 } 1326 }
1334 1327
1335 /* 1328 /*
@@ -1370,7 +1363,7 @@ restart:
1370 * In a separate transaction, set the incomplete flag on the 1363 * In a separate transaction, set the incomplete flag on the
1371 * "old" attr and clear the incomplete flag on the "new" attr. 1364 * "old" attr and clear the incomplete flag on the "new" attr.
1372 */ 1365 */
1373 error = xfs_attr_leaf_flipflags(args); 1366 error = xfs_attr3_leaf_flipflags(args);
1374 if (error) 1367 if (error)
1375 goto out; 1368 goto out;
1376 1369
@@ -1400,7 +1393,7 @@ restart:
1400 state->blocksize = state->mp->m_sb.sb_blocksize; 1393 state->blocksize = state->mp->m_sb.sb_blocksize;
1401 state->node_ents = state->mp->m_attr_node_ents; 1394 state->node_ents = state->mp->m_attr_node_ents;
1402 state->inleaf = 0; 1395 state->inleaf = 0;
1403 error = xfs_da_node_lookup_int(state, &retval); 1396 error = xfs_da3_node_lookup_int(state, &retval);
1404 if (error) 1397 if (error)
1405 goto out; 1398 goto out;
1406 1399
@@ -1409,15 +1402,15 @@ restart:
1409 */ 1402 */
1410 blk = &state->path.blk[ state->path.active-1 ]; 1403 blk = &state->path.blk[ state->path.active-1 ];
1411 ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC); 1404 ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1412 error = xfs_attr_leaf_remove(blk->bp, args); 1405 error = xfs_attr3_leaf_remove(blk->bp, args);
1413 xfs_da_fixhashpath(state, &state->path); 1406 xfs_da3_fixhashpath(state, &state->path);
1414 1407
1415 /* 1408 /*
1416 * Check to see if the tree needs to be collapsed. 1409 * Check to see if the tree needs to be collapsed.
1417 */ 1410 */
1418 if (retval && (state->path.active > 1)) { 1411 if (retval && (state->path.active > 1)) {
1419 xfs_bmap_init(args->flist, args->firstblock); 1412 xfs_bmap_init(args->flist, args->firstblock);
1420 error = xfs_da_join(state); 1413 error = xfs_da3_join(state);
1421 if (!error) { 1414 if (!error) {
1422 error = xfs_bmap_finish(&args->trans, 1415 error = xfs_bmap_finish(&args->trans,
1423 args->flist, 1416 args->flist,
@@ -1450,7 +1443,7 @@ restart:
1450 /* 1443 /*
1451 * Added a "remote" value, just clear the incomplete flag. 1444 * Added a "remote" value, just clear the incomplete flag.
1452 */ 1445 */
1453 error = xfs_attr_leaf_clearflag(args); 1446 error = xfs_attr3_leaf_clearflag(args);
1454 if (error) 1447 if (error)
1455 goto out; 1448 goto out;
1456 } 1449 }
@@ -1495,7 +1488,7 @@ xfs_attr_node_removename(xfs_da_args_t *args)
1495 /* 1488 /*
1496 * Search to see if name exists, and get back a pointer to it. 1489 * Search to see if name exists, and get back a pointer to it.
1497 */ 1490 */
1498 error = xfs_da_node_lookup_int(state, &retval); 1491 error = xfs_da3_node_lookup_int(state, &retval);
1499 if (error || (retval != EEXIST)) { 1492 if (error || (retval != EEXIST)) {
1500 if (error == 0) 1493 if (error == 0)
1501 error = retval; 1494 error = retval;
@@ -1524,7 +1517,7 @@ xfs_attr_node_removename(xfs_da_args_t *args)
1524 * Mark the attribute as INCOMPLETE, then bunmapi() the 1517 * Mark the attribute as INCOMPLETE, then bunmapi() the
1525 * remote value. 1518 * remote value.
1526 */ 1519 */
1527 error = xfs_attr_leaf_setflag(args); 1520 error = xfs_attr3_leaf_setflag(args);
1528 if (error) 1521 if (error)
1529 goto out; 1522 goto out;
1530 error = xfs_attr_rmtval_remove(args); 1523 error = xfs_attr_rmtval_remove(args);
@@ -1545,15 +1538,15 @@ xfs_attr_node_removename(xfs_da_args_t *args)
1545 */ 1538 */
1546 blk = &state->path.blk[ state->path.active-1 ]; 1539 blk = &state->path.blk[ state->path.active-1 ];
1547 ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC); 1540 ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1548 retval = xfs_attr_leaf_remove(blk->bp, args); 1541 retval = xfs_attr3_leaf_remove(blk->bp, args);
1549 xfs_da_fixhashpath(state, &state->path); 1542 xfs_da3_fixhashpath(state, &state->path);
1550 1543
1551 /* 1544 /*
1552 * Check to see if the tree needs to be collapsed. 1545 * Check to see if the tree needs to be collapsed.
1553 */ 1546 */
1554 if (retval && (state->path.active > 1)) { 1547 if (retval && (state->path.active > 1)) {
1555 xfs_bmap_init(args->flist, args->firstblock); 1548 xfs_bmap_init(args->flist, args->firstblock);
1556 error = xfs_da_join(state); 1549 error = xfs_da3_join(state);
1557 if (!error) { 1550 if (!error) {
1558 error = xfs_bmap_finish(&args->trans, args->flist, 1551 error = xfs_bmap_finish(&args->trans, args->flist,
1559 &committed); 1552 &committed);
@@ -1591,13 +1584,13 @@ xfs_attr_node_removename(xfs_da_args_t *args)
1591 ASSERT(state->path.blk[0].bp); 1584 ASSERT(state->path.blk[0].bp);
1592 state->path.blk[0].bp = NULL; 1585 state->path.blk[0].bp = NULL;
1593 1586
1594 error = xfs_attr_leaf_read(args->trans, args->dp, 0, -1, &bp); 1587 error = xfs_attr3_leaf_read(args->trans, args->dp, 0, -1, &bp);
1595 if (error) 1588 if (error)
1596 goto out; 1589 goto out;
1597 1590
1598 if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) { 1591 if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
1599 xfs_bmap_init(args->flist, args->firstblock); 1592 xfs_bmap_init(args->flist, args->firstblock);
1600 error = xfs_attr_leaf_to_shortform(bp, args, forkoff); 1593 error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
1601 /* bp is gone due to xfs_da_shrink_inode */ 1594 /* bp is gone due to xfs_da_shrink_inode */
1602 if (!error) { 1595 if (!error) {
1603 error = xfs_bmap_finish(&args->trans, 1596 error = xfs_bmap_finish(&args->trans,
@@ -1699,7 +1692,7 @@ xfs_attr_refillstate(xfs_da_state_t *state)
1699 ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH)); 1692 ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1700 for (blk = path->blk, level = 0; level < path->active; blk++, level++) { 1693 for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1701 if (blk->disk_blkno) { 1694 if (blk->disk_blkno) {
1702 error = xfs_da_node_read(state->args->trans, 1695 error = xfs_da3_node_read(state->args->trans,
1703 state->args->dp, 1696 state->args->dp,
1704 blk->blkno, blk->disk_blkno, 1697 blk->blkno, blk->disk_blkno,
1705 &blk->bp, XFS_ATTR_FORK); 1698 &blk->bp, XFS_ATTR_FORK);
@@ -1718,7 +1711,7 @@ xfs_attr_refillstate(xfs_da_state_t *state)
1718 ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH)); 1711 ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1719 for (blk = path->blk, level = 0; level < path->active; blk++, level++) { 1712 for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1720 if (blk->disk_blkno) { 1713 if (blk->disk_blkno) {
1721 error = xfs_da_node_read(state->args->trans, 1714 error = xfs_da3_node_read(state->args->trans,
1722 state->args->dp, 1715 state->args->dp,
1723 blk->blkno, blk->disk_blkno, 1716 blk->blkno, blk->disk_blkno,
1724 &blk->bp, XFS_ATTR_FORK); 1717 &blk->bp, XFS_ATTR_FORK);
@@ -1758,7 +1751,7 @@ xfs_attr_node_get(xfs_da_args_t *args)
1758 /* 1751 /*
1759 * Search to see if name exists, and get back a pointer to it. 1752 * Search to see if name exists, and get back a pointer to it.
1760 */ 1753 */
1761 error = xfs_da_node_lookup_int(state, &retval); 1754 error = xfs_da3_node_lookup_int(state, &retval);
1762 if (error) { 1755 if (error) {
1763 retval = error; 1756 retval = error;
1764 } else if (retval == EEXIST) { 1757 } else if (retval == EEXIST) {
@@ -1769,7 +1762,7 @@ xfs_attr_node_get(xfs_da_args_t *args)
1769 /* 1762 /*
1770 * Get the value, local or "remote" 1763 * Get the value, local or "remote"
1771 */ 1764 */
1772 retval = xfs_attr_leaf_getvalue(blk->bp, args); 1765 retval = xfs_attr3_leaf_getvalue(blk->bp, args);
1773 if (!retval && (args->rmtblkno > 0) 1766 if (!retval && (args->rmtblkno > 0)
1774 && !(args->flags & ATTR_KERNOVAL)) { 1767 && !(args->flags & ATTR_KERNOVAL)) {
1775 retval = xfs_attr_rmtval_get(args); 1768 retval = xfs_attr_rmtval_get(args);
@@ -1794,7 +1787,9 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
1794 attrlist_cursor_kern_t *cursor; 1787 attrlist_cursor_kern_t *cursor;
1795 xfs_attr_leafblock_t *leaf; 1788 xfs_attr_leafblock_t *leaf;
1796 xfs_da_intnode_t *node; 1789 xfs_da_intnode_t *node;
1797 xfs_da_node_entry_t *btree; 1790 struct xfs_attr3_icleaf_hdr leafhdr;
1791 struct xfs_da3_icnode_hdr nodehdr;
1792 struct xfs_da_node_entry *btree;
1798 int error, i; 1793 int error, i;
1799 struct xfs_buf *bp; 1794 struct xfs_buf *bp;
1800 1795
@@ -1810,27 +1805,33 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
1810 */ 1805 */
1811 bp = NULL; 1806 bp = NULL;
1812 if (cursor->blkno > 0) { 1807 if (cursor->blkno > 0) {
1813 error = xfs_da_node_read(NULL, context->dp, cursor->blkno, -1, 1808 error = xfs_da3_node_read(NULL, context->dp, cursor->blkno, -1,
1814 &bp, XFS_ATTR_FORK); 1809 &bp, XFS_ATTR_FORK);
1815 if ((error != 0) && (error != EFSCORRUPTED)) 1810 if ((error != 0) && (error != EFSCORRUPTED))
1816 return(error); 1811 return(error);
1817 if (bp) { 1812 if (bp) {
1813 struct xfs_attr_leaf_entry *entries;
1814
1818 node = bp->b_addr; 1815 node = bp->b_addr;
1819 switch (be16_to_cpu(node->hdr.info.magic)) { 1816 switch (be16_to_cpu(node->hdr.info.magic)) {
1820 case XFS_DA_NODE_MAGIC: 1817 case XFS_DA_NODE_MAGIC:
1818 case XFS_DA3_NODE_MAGIC:
1821 trace_xfs_attr_list_wrong_blk(context); 1819 trace_xfs_attr_list_wrong_blk(context);
1822 xfs_trans_brelse(NULL, bp); 1820 xfs_trans_brelse(NULL, bp);
1823 bp = NULL; 1821 bp = NULL;
1824 break; 1822 break;
1825 case XFS_ATTR_LEAF_MAGIC: 1823 case XFS_ATTR_LEAF_MAGIC:
1824 case XFS_ATTR3_LEAF_MAGIC:
1826 leaf = bp->b_addr; 1825 leaf = bp->b_addr;
1827 if (cursor->hashval > be32_to_cpu(leaf->entries[ 1826 xfs_attr3_leaf_hdr_from_disk(&leafhdr, leaf);
1828 be16_to_cpu(leaf->hdr.count)-1].hashval)) { 1827 entries = xfs_attr3_leaf_entryp(leaf);
1828 if (cursor->hashval > be32_to_cpu(
1829 entries[leafhdr.count - 1].hashval)) {
1829 trace_xfs_attr_list_wrong_blk(context); 1830 trace_xfs_attr_list_wrong_blk(context);
1830 xfs_trans_brelse(NULL, bp); 1831 xfs_trans_brelse(NULL, bp);
1831 bp = NULL; 1832 bp = NULL;
1832 } else if (cursor->hashval <= 1833 } else if (cursor->hashval <= be32_to_cpu(
1833 be32_to_cpu(leaf->entries[0].hashval)) { 1834 entries[0].hashval)) {
1834 trace_xfs_attr_list_wrong_blk(context); 1835 trace_xfs_attr_list_wrong_blk(context);
1835 xfs_trans_brelse(NULL, bp); 1836 xfs_trans_brelse(NULL, bp);
1836 bp = NULL; 1837 bp = NULL;
@@ -1852,27 +1853,31 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
1852 if (bp == NULL) { 1853 if (bp == NULL) {
1853 cursor->blkno = 0; 1854 cursor->blkno = 0;
1854 for (;;) { 1855 for (;;) {
1855 error = xfs_da_node_read(NULL, context->dp, 1856 __uint16_t magic;
1857
1858 error = xfs_da3_node_read(NULL, context->dp,
1856 cursor->blkno, -1, &bp, 1859 cursor->blkno, -1, &bp,
1857 XFS_ATTR_FORK); 1860 XFS_ATTR_FORK);
1858 if (error) 1861 if (error)
1859 return(error); 1862 return(error);
1860 node = bp->b_addr; 1863 node = bp->b_addr;
1861 if (node->hdr.info.magic == 1864 magic = be16_to_cpu(node->hdr.info.magic);
1862 cpu_to_be16(XFS_ATTR_LEAF_MAGIC)) 1865 if (magic == XFS_ATTR_LEAF_MAGIC ||
1866 magic == XFS_ATTR3_LEAF_MAGIC)
1863 break; 1867 break;
1864 if (unlikely(node->hdr.info.magic != 1868 if (magic != XFS_DA_NODE_MAGIC &&
1865 cpu_to_be16(XFS_DA_NODE_MAGIC))) { 1869 magic != XFS_DA3_NODE_MAGIC) {
1866 XFS_CORRUPTION_ERROR("xfs_attr_node_list(3)", 1870 XFS_CORRUPTION_ERROR("xfs_attr_node_list(3)",
1867 XFS_ERRLEVEL_LOW, 1871 XFS_ERRLEVEL_LOW,
1868 context->dp->i_mount, 1872 context->dp->i_mount,
1869 node); 1873 node);
1870 xfs_trans_brelse(NULL, bp); 1874 xfs_trans_brelse(NULL, bp);
1871 return(XFS_ERROR(EFSCORRUPTED)); 1875 return XFS_ERROR(EFSCORRUPTED);
1872 } 1876 }
1873 btree = node->btree; 1877
1874 for (i = 0; i < be16_to_cpu(node->hdr.count); 1878 xfs_da3_node_hdr_from_disk(&nodehdr, node);
1875 btree++, i++) { 1879 btree = xfs_da3_node_tree_p(node);
1880 for (i = 0; i < nodehdr.count; btree++, i++) {
1876 if (cursor->hashval 1881 if (cursor->hashval
1877 <= be32_to_cpu(btree->hashval)) { 1882 <= be32_to_cpu(btree->hashval)) {
1878 cursor->blkno = be32_to_cpu(btree->before); 1883 cursor->blkno = be32_to_cpu(btree->before);
@@ -1881,9 +1886,9 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
1881 break; 1886 break;
1882 } 1887 }
1883 } 1888 }
1884 if (i == be16_to_cpu(node->hdr.count)) { 1889 if (i == nodehdr.count) {
1885 xfs_trans_brelse(NULL, bp); 1890 xfs_trans_brelse(NULL, bp);
1886 return(0); 1891 return 0;
1887 } 1892 }
1888 xfs_trans_brelse(NULL, bp); 1893 xfs_trans_brelse(NULL, bp);
1889 } 1894 }
@@ -1897,310 +1902,21 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
1897 */ 1902 */
1898 for (;;) { 1903 for (;;) {
1899 leaf = bp->b_addr; 1904 leaf = bp->b_addr;
1900 error = xfs_attr_leaf_list_int(bp, context); 1905 error = xfs_attr3_leaf_list_int(bp, context);
1901 if (error) { 1906 if (error) {
1902 xfs_trans_brelse(NULL, bp); 1907 xfs_trans_brelse(NULL, bp);
1903 return error; 1908 return error;
1904 } 1909 }
1905 if (context->seen_enough || leaf->hdr.info.forw == 0) 1910 xfs_attr3_leaf_hdr_from_disk(&leafhdr, leaf);
1911 if (context->seen_enough || leafhdr.forw == 0)
1906 break; 1912 break;
1907 cursor->blkno = be32_to_cpu(leaf->hdr.info.forw); 1913 cursor->blkno = leafhdr.forw;
1908 xfs_trans_brelse(NULL, bp); 1914 xfs_trans_brelse(NULL, bp);
1909 error = xfs_attr_leaf_read(NULL, context->dp, cursor->blkno, -1, 1915 error = xfs_attr3_leaf_read(NULL, context->dp, cursor->blkno, -1,
1910 &bp); 1916 &bp);
1911 if (error) 1917 if (error)
1912 return error; 1918 return error;
1913 } 1919 }
1914 xfs_trans_brelse(NULL, bp); 1920 xfs_trans_brelse(NULL, bp);
1915 return(0); 1921 return 0;
1916}
1917
1918
1919/*========================================================================
1920 * External routines for manipulating out-of-line attribute values.
1921 *========================================================================*/
1922
1923/*
1924 * Read the value associated with an attribute from the out-of-line buffer
1925 * that we stored it in.
1926 */
1927int
1928xfs_attr_rmtval_get(xfs_da_args_t *args)
1929{
1930 xfs_bmbt_irec_t map[ATTR_RMTVALUE_MAPSIZE];
1931 xfs_mount_t *mp;
1932 xfs_daddr_t dblkno;
1933 void *dst;
1934 xfs_buf_t *bp;
1935 int nmap, error, tmp, valuelen, blkcnt, i;
1936 xfs_dablk_t lblkno;
1937
1938 trace_xfs_attr_rmtval_get(args);
1939
1940 ASSERT(!(args->flags & ATTR_KERNOVAL));
1941
1942 mp = args->dp->i_mount;
1943 dst = args->value;
1944 valuelen = args->valuelen;
1945 lblkno = args->rmtblkno;
1946 while (valuelen > 0) {
1947 nmap = ATTR_RMTVALUE_MAPSIZE;
1948 error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
1949 args->rmtblkcnt, map, &nmap,
1950 XFS_BMAPI_ATTRFORK);
1951 if (error)
1952 return(error);
1953 ASSERT(nmap >= 1);
1954
1955 for (i = 0; (i < nmap) && (valuelen > 0); i++) {
1956 ASSERT((map[i].br_startblock != DELAYSTARTBLOCK) &&
1957 (map[i].br_startblock != HOLESTARTBLOCK));
1958 dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
1959 blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
1960 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
1961 dblkno, blkcnt, 0, &bp, NULL);
1962 if (error)
1963 return(error);
1964
1965 tmp = min_t(int, valuelen, BBTOB(bp->b_length));
1966 xfs_buf_iomove(bp, 0, tmp, dst, XBRW_READ);
1967 xfs_buf_relse(bp);
1968 dst += tmp;
1969 valuelen -= tmp;
1970
1971 lblkno += map[i].br_blockcount;
1972 }
1973 }
1974 ASSERT(valuelen == 0);
1975 return(0);
1976}
1977
1978/*
1979 * Write the value associated with an attribute into the out-of-line buffer
1980 * that we have defined for it.
1981 */
1982STATIC int
1983xfs_attr_rmtval_set(xfs_da_args_t *args)
1984{
1985 xfs_mount_t *mp;
1986 xfs_fileoff_t lfileoff;
1987 xfs_inode_t *dp;
1988 xfs_bmbt_irec_t map;
1989 xfs_daddr_t dblkno;
1990 void *src;
1991 xfs_buf_t *bp;
1992 xfs_dablk_t lblkno;
1993 int blkcnt, valuelen, nmap, error, tmp, committed;
1994
1995 trace_xfs_attr_rmtval_set(args);
1996
1997 dp = args->dp;
1998 mp = dp->i_mount;
1999 src = args->value;
2000
2001 /*
2002 * Find a "hole" in the attribute address space large enough for
2003 * us to drop the new attribute's value into.
2004 */
2005 blkcnt = XFS_B_TO_FSB(mp, args->valuelen);
2006 lfileoff = 0;
2007 error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff,
2008 XFS_ATTR_FORK);
2009 if (error) {
2010 return(error);
2011 }
2012 args->rmtblkno = lblkno = (xfs_dablk_t)lfileoff;
2013 args->rmtblkcnt = blkcnt;
2014
2015 /*
2016 * Roll through the "value", allocating blocks on disk as required.
2017 */
2018 while (blkcnt > 0) {
2019 /*
2020 * Allocate a single extent, up to the size of the value.
2021 */
2022 xfs_bmap_init(args->flist, args->firstblock);
2023 nmap = 1;
2024 error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno,
2025 blkcnt,
2026 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
2027 args->firstblock, args->total, &map, &nmap,
2028 args->flist);
2029 if (!error) {
2030 error = xfs_bmap_finish(&args->trans, args->flist,
2031 &committed);
2032 }
2033 if (error) {
2034 ASSERT(committed);
2035 args->trans = NULL;
2036 xfs_bmap_cancel(args->flist);
2037 return(error);
2038 }
2039
2040 /*
2041 * bmap_finish() may have committed the last trans and started
2042 * a new one. We need the inode to be in all transactions.
2043 */
2044 if (committed)
2045 xfs_trans_ijoin(args->trans, dp, 0);
2046
2047 ASSERT(nmap == 1);
2048 ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
2049 (map.br_startblock != HOLESTARTBLOCK));
2050 lblkno += map.br_blockcount;
2051 blkcnt -= map.br_blockcount;
2052
2053 /*
2054 * Start the next trans in the chain.
2055 */
2056 error = xfs_trans_roll(&args->trans, dp);
2057 if (error)
2058 return (error);
2059 }
2060
2061 /*
2062 * Roll through the "value", copying the attribute value to the
2063 * already-allocated blocks. Blocks are written synchronously
2064 * so that we can know they are all on disk before we turn off
2065 * the INCOMPLETE flag.
2066 */
2067 lblkno = args->rmtblkno;
2068 valuelen = args->valuelen;
2069 while (valuelen > 0) {
2070 int buflen;
2071
2072 /*
2073 * Try to remember where we decided to put the value.
2074 */
2075 xfs_bmap_init(args->flist, args->firstblock);
2076 nmap = 1;
2077 error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno,
2078 args->rmtblkcnt, &map, &nmap,
2079 XFS_BMAPI_ATTRFORK);
2080 if (error)
2081 return(error);
2082 ASSERT(nmap == 1);
2083 ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
2084 (map.br_startblock != HOLESTARTBLOCK));
2085
2086 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
2087 blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
2088
2089 bp = xfs_buf_get(mp->m_ddev_targp, dblkno, blkcnt, 0);
2090 if (!bp)
2091 return ENOMEM;
2092
2093 buflen = BBTOB(bp->b_length);
2094 tmp = min_t(int, valuelen, buflen);
2095 xfs_buf_iomove(bp, 0, tmp, src, XBRW_WRITE);
2096 if (tmp < buflen)
2097 xfs_buf_zero(bp, tmp, buflen - tmp);
2098
2099 error = xfs_bwrite(bp); /* GROT: NOTE: synchronous write */
2100 xfs_buf_relse(bp);
2101 if (error)
2102 return error;
2103 src += tmp;
2104 valuelen -= tmp;
2105
2106 lblkno += map.br_blockcount;
2107 }
2108 ASSERT(valuelen == 0);
2109 return(0);
2110}
2111
2112/*
2113 * Remove the value associated with an attribute by deleting the
2114 * out-of-line buffer that it is stored on.
2115 */
2116STATIC int
2117xfs_attr_rmtval_remove(xfs_da_args_t *args)
2118{
2119 xfs_mount_t *mp;
2120 xfs_bmbt_irec_t map;
2121 xfs_buf_t *bp;
2122 xfs_daddr_t dblkno;
2123 xfs_dablk_t lblkno;
2124 int valuelen, blkcnt, nmap, error, done, committed;
2125
2126 trace_xfs_attr_rmtval_remove(args);
2127
2128 mp = args->dp->i_mount;
2129
2130 /*
2131 * Roll through the "value", invalidating the attribute value's
2132 * blocks.
2133 */
2134 lblkno = args->rmtblkno;
2135 valuelen = args->rmtblkcnt;
2136 while (valuelen > 0) {
2137 /*
2138 * Try to remember where we decided to put the value.
2139 */
2140 nmap = 1;
2141 error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
2142 args->rmtblkcnt, &map, &nmap,
2143 XFS_BMAPI_ATTRFORK);
2144 if (error)
2145 return(error);
2146 ASSERT(nmap == 1);
2147 ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
2148 (map.br_startblock != HOLESTARTBLOCK));
2149
2150 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
2151 blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
2152
2153 /*
2154 * If the "remote" value is in the cache, remove it.
2155 */
2156 bp = xfs_incore(mp->m_ddev_targp, dblkno, blkcnt, XBF_TRYLOCK);
2157 if (bp) {
2158 xfs_buf_stale(bp);
2159 xfs_buf_relse(bp);
2160 bp = NULL;
2161 }
2162
2163 valuelen -= map.br_blockcount;
2164
2165 lblkno += map.br_blockcount;
2166 }
2167
2168 /*
2169 * Keep de-allocating extents until the remote-value region is gone.
2170 */
2171 lblkno = args->rmtblkno;
2172 blkcnt = args->rmtblkcnt;
2173 done = 0;
2174 while (!done) {
2175 xfs_bmap_init(args->flist, args->firstblock);
2176 error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
2177 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
2178 1, args->firstblock, args->flist,
2179 &done);
2180 if (!error) {
2181 error = xfs_bmap_finish(&args->trans, args->flist,
2182 &committed);
2183 }
2184 if (error) {
2185 ASSERT(committed);
2186 args->trans = NULL;
2187 xfs_bmap_cancel(args->flist);
2188 return(error);
2189 }
2190
2191 /*
2192 * bmap_finish() may have committed the last trans and started
2193 * a new one. We need the inode to be in all transactions.
2194 */
2195 if (committed)
2196 xfs_trans_ijoin(args->trans, args->dp, 0);
2197
2198 /*
2199 * Close out trans and start the next one in the chain.
2200 */
2201 error = xfs_trans_roll(&args->trans, args->dp);
2202 if (error)
2203 return (error);
2204 }
2205 return(0);
2206} 1922}
diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h
index e920d68ef509..de8dd58da46c 100644
--- a/fs/xfs/xfs_attr.h
+++ b/fs/xfs/xfs_attr.h
@@ -140,7 +140,6 @@ typedef struct xfs_attr_list_context {
140 * Overall external interface routines. 140 * Overall external interface routines.
141 */ 141 */
142int xfs_attr_inactive(struct xfs_inode *dp); 142int xfs_attr_inactive(struct xfs_inode *dp);
143int xfs_attr_rmtval_get(struct xfs_da_args *args);
144int xfs_attr_list_int(struct xfs_attr_list_context *); 143int xfs_attr_list_int(struct xfs_attr_list_context *);
145 144
146#endif /* __XFS_ATTR_H__ */ 145#endif /* __XFS_ATTR_H__ */
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index ee24993c7d12..08d5457c948e 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3 * Copyright (c) 2013 Red Hat, Inc.
3 * All Rights Reserved. 4 * All Rights Reserved.
4 * 5 *
5 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
@@ -31,6 +32,7 @@
31#include "xfs_alloc.h" 32#include "xfs_alloc.h"
32#include "xfs_btree.h" 33#include "xfs_btree.h"
33#include "xfs_attr_sf.h" 34#include "xfs_attr_sf.h"
35#include "xfs_attr_remote.h"
34#include "xfs_dinode.h" 36#include "xfs_dinode.h"
35#include "xfs_inode.h" 37#include "xfs_inode.h"
36#include "xfs_inode_item.h" 38#include "xfs_inode_item.h"
@@ -39,6 +41,9 @@
39#include "xfs_attr_leaf.h" 41#include "xfs_attr_leaf.h"
40#include "xfs_error.h" 42#include "xfs_error.h"
41#include "xfs_trace.h" 43#include "xfs_trace.h"
44#include "xfs_buf_item.h"
45#include "xfs_cksum.h"
46
42 47
43/* 48/*
44 * xfs_attr_leaf.c 49 * xfs_attr_leaf.c
@@ -53,85 +58,226 @@
53/* 58/*
54 * Routines used for growing the Btree. 59 * Routines used for growing the Btree.
55 */ 60 */
56STATIC int xfs_attr_leaf_create(xfs_da_args_t *args, xfs_dablk_t which_block, 61STATIC int xfs_attr3_leaf_create(struct xfs_da_args *args,
57 struct xfs_buf **bpp); 62 xfs_dablk_t which_block, struct xfs_buf **bpp);
58STATIC int xfs_attr_leaf_add_work(struct xfs_buf *leaf_buffer, 63STATIC int xfs_attr3_leaf_add_work(struct xfs_buf *leaf_buffer,
59 xfs_da_args_t *args, int freemap_index); 64 struct xfs_attr3_icleaf_hdr *ichdr,
60STATIC void xfs_attr_leaf_compact(struct xfs_da_args *args, 65 struct xfs_da_args *args, int freemap_index);
61 struct xfs_buf *leaf_buffer); 66STATIC void xfs_attr3_leaf_compact(struct xfs_da_args *args,
62STATIC void xfs_attr_leaf_rebalance(xfs_da_state_t *state, 67 struct xfs_attr3_icleaf_hdr *ichdr,
68 struct xfs_buf *leaf_buffer);
69STATIC void xfs_attr3_leaf_rebalance(xfs_da_state_t *state,
63 xfs_da_state_blk_t *blk1, 70 xfs_da_state_blk_t *blk1,
64 xfs_da_state_blk_t *blk2); 71 xfs_da_state_blk_t *blk2);
65STATIC int xfs_attr_leaf_figure_balance(xfs_da_state_t *state, 72STATIC int xfs_attr3_leaf_figure_balance(xfs_da_state_t *state,
66 xfs_da_state_blk_t *leaf_blk_1, 73 xfs_da_state_blk_t *leaf_blk_1,
67 xfs_da_state_blk_t *leaf_blk_2, 74 struct xfs_attr3_icleaf_hdr *ichdr1,
68 int *number_entries_in_blk1, 75 xfs_da_state_blk_t *leaf_blk_2,
69 int *number_usedbytes_in_blk1); 76 struct xfs_attr3_icleaf_hdr *ichdr2,
77 int *number_entries_in_blk1,
78 int *number_usedbytes_in_blk1);
70 79
71/* 80/*
72 * Routines used for shrinking the Btree. 81 * Routines used for shrinking the Btree.
73 */ 82 */
74STATIC int xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, 83STATIC int xfs_attr3_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp,
75 struct xfs_buf *bp, int level); 84 struct xfs_buf *bp, int level);
76STATIC int xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, 85STATIC int xfs_attr3_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp,
77 struct xfs_buf *bp); 86 struct xfs_buf *bp);
78STATIC int xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp, 87STATIC int xfs_attr3_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp,
79 xfs_dablk_t blkno, int blkcnt); 88 xfs_dablk_t blkno, int blkcnt);
80 89
81/* 90/*
82 * Utility routines. 91 * Utility routines.
83 */ 92 */
84STATIC void xfs_attr_leaf_moveents(xfs_attr_leafblock_t *src_leaf, 93STATIC void xfs_attr3_leaf_moveents(struct xfs_attr_leafblock *src_leaf,
85 int src_start, 94 struct xfs_attr3_icleaf_hdr *src_ichdr, int src_start,
86 xfs_attr_leafblock_t *dst_leaf, 95 struct xfs_attr_leafblock *dst_leaf,
87 int dst_start, int move_count, 96 struct xfs_attr3_icleaf_hdr *dst_ichdr, int dst_start,
88 xfs_mount_t *mp); 97 int move_count, struct xfs_mount *mp);
89STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index); 98STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index);
90 99
91static void 100void
92xfs_attr_leaf_verify( 101xfs_attr3_leaf_hdr_from_disk(
102 struct xfs_attr3_icleaf_hdr *to,
103 struct xfs_attr_leafblock *from)
104{
105 int i;
106
107 ASSERT(from->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC) ||
108 from->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC));
109
110 if (from->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC)) {
111 struct xfs_attr3_leaf_hdr *hdr3 = (struct xfs_attr3_leaf_hdr *)from;
112
113 to->forw = be32_to_cpu(hdr3->info.hdr.forw);
114 to->back = be32_to_cpu(hdr3->info.hdr.back);
115 to->magic = be16_to_cpu(hdr3->info.hdr.magic);
116 to->count = be16_to_cpu(hdr3->count);
117 to->usedbytes = be16_to_cpu(hdr3->usedbytes);
118 to->firstused = be16_to_cpu(hdr3->firstused);
119 to->holes = hdr3->holes;
120
121 for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
122 to->freemap[i].base = be16_to_cpu(hdr3->freemap[i].base);
123 to->freemap[i].size = be16_to_cpu(hdr3->freemap[i].size);
124 }
125 return;
126 }
127 to->forw = be32_to_cpu(from->hdr.info.forw);
128 to->back = be32_to_cpu(from->hdr.info.back);
129 to->magic = be16_to_cpu(from->hdr.info.magic);
130 to->count = be16_to_cpu(from->hdr.count);
131 to->usedbytes = be16_to_cpu(from->hdr.usedbytes);
132 to->firstused = be16_to_cpu(from->hdr.firstused);
133 to->holes = from->hdr.holes;
134
135 for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
136 to->freemap[i].base = be16_to_cpu(from->hdr.freemap[i].base);
137 to->freemap[i].size = be16_to_cpu(from->hdr.freemap[i].size);
138 }
139}
140
141void
142xfs_attr3_leaf_hdr_to_disk(
143 struct xfs_attr_leafblock *to,
144 struct xfs_attr3_icleaf_hdr *from)
145{
146 int i;
147
148 ASSERT(from->magic == XFS_ATTR_LEAF_MAGIC ||
149 from->magic == XFS_ATTR3_LEAF_MAGIC);
150
151 if (from->magic == XFS_ATTR3_LEAF_MAGIC) {
152 struct xfs_attr3_leaf_hdr *hdr3 = (struct xfs_attr3_leaf_hdr *)to;
153
154 hdr3->info.hdr.forw = cpu_to_be32(from->forw);
155 hdr3->info.hdr.back = cpu_to_be32(from->back);
156 hdr3->info.hdr.magic = cpu_to_be16(from->magic);
157 hdr3->count = cpu_to_be16(from->count);
158 hdr3->usedbytes = cpu_to_be16(from->usedbytes);
159 hdr3->firstused = cpu_to_be16(from->firstused);
160 hdr3->holes = from->holes;
161 hdr3->pad1 = 0;
162
163 for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
164 hdr3->freemap[i].base = cpu_to_be16(from->freemap[i].base);
165 hdr3->freemap[i].size = cpu_to_be16(from->freemap[i].size);
166 }
167 return;
168 }
169 to->hdr.info.forw = cpu_to_be32(from->forw);
170 to->hdr.info.back = cpu_to_be32(from->back);
171 to->hdr.info.magic = cpu_to_be16(from->magic);
172 to->hdr.count = cpu_to_be16(from->count);
173 to->hdr.usedbytes = cpu_to_be16(from->usedbytes);
174 to->hdr.firstused = cpu_to_be16(from->firstused);
175 to->hdr.holes = from->holes;
176 to->hdr.pad1 = 0;
177
178 for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
179 to->hdr.freemap[i].base = cpu_to_be16(from->freemap[i].base);
180 to->hdr.freemap[i].size = cpu_to_be16(from->freemap[i].size);
181 }
182}
183
184static bool
185xfs_attr3_leaf_verify(
93 struct xfs_buf *bp) 186 struct xfs_buf *bp)
94{ 187{
95 struct xfs_mount *mp = bp->b_target->bt_mount; 188 struct xfs_mount *mp = bp->b_target->bt_mount;
96 struct xfs_attr_leaf_hdr *hdr = bp->b_addr; 189 struct xfs_attr_leafblock *leaf = bp->b_addr;
97 int block_ok = 0; 190 struct xfs_attr3_icleaf_hdr ichdr;
98 191
99 block_ok = hdr->info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC); 192 xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
100 if (!block_ok) { 193
101 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr); 194 if (xfs_sb_version_hascrc(&mp->m_sb)) {
102 xfs_buf_ioerror(bp, EFSCORRUPTED); 195 struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
196
197 if (ichdr.magic != XFS_ATTR3_LEAF_MAGIC)
198 return false;
199
200 if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_uuid))
201 return false;
202 if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn)
203 return false;
204 } else {
205 if (ichdr.magic != XFS_ATTR_LEAF_MAGIC)
206 return false;
103 } 207 }
208 if (ichdr.count == 0)
209 return false;
210
211 /* XXX: need to range check rest of attr header values */
212 /* XXX: hash order check? */
213
214 return true;
104} 215}
105 216
106static void 217static void
107xfs_attr_leaf_read_verify( 218xfs_attr3_leaf_write_verify(
108 struct xfs_buf *bp) 219 struct xfs_buf *bp)
109{ 220{
110 xfs_attr_leaf_verify(bp); 221 struct xfs_mount *mp = bp->b_target->bt_mount;
222 struct xfs_buf_log_item *bip = bp->b_fspriv;
223 struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr;
224
225 if (!xfs_attr3_leaf_verify(bp)) {
226 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
227 xfs_buf_ioerror(bp, EFSCORRUPTED);
228 return;
229 }
230
231 if (!xfs_sb_version_hascrc(&mp->m_sb))
232 return;
233
234 if (bip)
235 hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn);
236
237 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_ATTR3_LEAF_CRC_OFF);
111} 238}
112 239
240/*
241 * leaf/node format detection on trees is sketchy, so a node read can be done on
242 * leaf level blocks when detection identifies the tree as a node format tree
243 * incorrectly. In this case, we need to swap the verifier to match the correct
244 * format of the block being read.
245 */
113static void 246static void
114xfs_attr_leaf_write_verify( 247xfs_attr3_leaf_read_verify(
115 struct xfs_buf *bp) 248 struct xfs_buf *bp)
116{ 249{
117 xfs_attr_leaf_verify(bp); 250 struct xfs_mount *mp = bp->b_target->bt_mount;
251
252 if ((xfs_sb_version_hascrc(&mp->m_sb) &&
253 !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
254 XFS_ATTR3_LEAF_CRC_OFF)) ||
255 !xfs_attr3_leaf_verify(bp)) {
256 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
257 xfs_buf_ioerror(bp, EFSCORRUPTED);
258 }
118} 259}
119 260
120const struct xfs_buf_ops xfs_attr_leaf_buf_ops = { 261const struct xfs_buf_ops xfs_attr3_leaf_buf_ops = {
121 .verify_read = xfs_attr_leaf_read_verify, 262 .verify_read = xfs_attr3_leaf_read_verify,
122 .verify_write = xfs_attr_leaf_write_verify, 263 .verify_write = xfs_attr3_leaf_write_verify,
123}; 264};
124 265
125int 266int
126xfs_attr_leaf_read( 267xfs_attr3_leaf_read(
127 struct xfs_trans *tp, 268 struct xfs_trans *tp,
128 struct xfs_inode *dp, 269 struct xfs_inode *dp,
129 xfs_dablk_t bno, 270 xfs_dablk_t bno,
130 xfs_daddr_t mappedbno, 271 xfs_daddr_t mappedbno,
131 struct xfs_buf **bpp) 272 struct xfs_buf **bpp)
132{ 273{
133 return xfs_da_read_buf(tp, dp, bno, mappedbno, bpp, 274 int err;
134 XFS_ATTR_FORK, &xfs_attr_leaf_buf_ops); 275
276 err = xfs_da_read_buf(tp, dp, bno, mappedbno, bpp,
277 XFS_ATTR_FORK, &xfs_attr3_leaf_buf_ops);
278 if (!err && tp)
279 xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_ATTR_LEAF_BUF);
280 return err;
135} 281}
136 282
137/*======================================================================== 283/*========================================================================
@@ -172,7 +318,8 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes)
172 int dsize; 318 int dsize;
173 xfs_mount_t *mp = dp->i_mount; 319 xfs_mount_t *mp = dp->i_mount;
174 320
175 offset = (XFS_LITINO(mp) - bytes) >> 3; /* rounded down */ 321 /* rounded down */
322 offset = (XFS_LITINO(mp, dp->i_d.di_version) - bytes) >> 3;
176 323
177 switch (dp->i_d.di_format) { 324 switch (dp->i_d.di_format) {
178 case XFS_DINODE_FMT_DEV: 325 case XFS_DINODE_FMT_DEV:
@@ -231,7 +378,7 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes)
231 return 0; 378 return 0;
232 return dp->i_d.di_forkoff; 379 return dp->i_d.di_forkoff;
233 } 380 }
234 dsize = XFS_BMAP_BROOT_SPACE(dp->i_df.if_broot); 381 dsize = XFS_BMAP_BROOT_SPACE(mp, dp->i_df.if_broot);
235 break; 382 break;
236 } 383 }
237 384
@@ -243,7 +390,8 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes)
243 minforkoff = roundup(minforkoff, 8) >> 3; 390 minforkoff = roundup(minforkoff, 8) >> 3;
244 391
245 /* attr fork btree root can have at least this many key/ptr pairs */ 392 /* attr fork btree root can have at least this many key/ptr pairs */
246 maxforkoff = XFS_LITINO(mp) - XFS_BMDR_SPACE_CALC(MINABTPTRS); 393 maxforkoff = XFS_LITINO(mp, dp->i_d.di_version) -
394 XFS_BMDR_SPACE_CALC(MINABTPTRS);
247 maxforkoff = maxforkoff >> 3; /* rounded down */ 395 maxforkoff = maxforkoff >> 3; /* rounded down */
248 396
249 if (offset >= maxforkoff) 397 if (offset >= maxforkoff)
@@ -557,7 +705,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
557 } 705 }
558 706
559 ASSERT(blkno == 0); 707 ASSERT(blkno == 0);
560 error = xfs_attr_leaf_create(args, blkno, &bp); 708 error = xfs_attr3_leaf_create(args, blkno, &bp);
561 if (error) { 709 if (error) {
562 error = xfs_da_shrink_inode(args, 0, bp); 710 error = xfs_da_shrink_inode(args, 0, bp);
563 bp = NULL; 711 bp = NULL;
@@ -586,9 +734,9 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
586 nargs.hashval = xfs_da_hashname(sfe->nameval, 734 nargs.hashval = xfs_da_hashname(sfe->nameval,
587 sfe->namelen); 735 sfe->namelen);
588 nargs.flags = XFS_ATTR_NSP_ONDISK_TO_ARGS(sfe->flags); 736 nargs.flags = XFS_ATTR_NSP_ONDISK_TO_ARGS(sfe->flags);
589 error = xfs_attr_leaf_lookup_int(bp, &nargs); /* set a->index */ 737 error = xfs_attr3_leaf_lookup_int(bp, &nargs); /* set a->index */
590 ASSERT(error == ENOATTR); 738 ASSERT(error == ENOATTR);
591 error = xfs_attr_leaf_add(bp, &nargs); 739 error = xfs_attr3_leaf_add(bp, &nargs);
592 ASSERT(error != ENOSPC); 740 ASSERT(error != ENOSPC);
593 if (error) 741 if (error)
594 goto out; 742 goto out;
@@ -801,7 +949,7 @@ xfs_attr_shortform_allfit(
801 continue; /* don't copy partial entries */ 949 continue; /* don't copy partial entries */
802 if (!(entry->flags & XFS_ATTR_LOCAL)) 950 if (!(entry->flags & XFS_ATTR_LOCAL))
803 return(0); 951 return(0);
804 name_loc = xfs_attr_leaf_name_local(leaf, i); 952 name_loc = xfs_attr3_leaf_name_local(leaf, i);
805 if (name_loc->namelen >= XFS_ATTR_SF_ENTSIZE_MAX) 953 if (name_loc->namelen >= XFS_ATTR_SF_ENTSIZE_MAX)
806 return(0); 954 return(0);
807 if (be16_to_cpu(name_loc->valuelen) >= XFS_ATTR_SF_ENTSIZE_MAX) 955 if (be16_to_cpu(name_loc->valuelen) >= XFS_ATTR_SF_ENTSIZE_MAX)
@@ -821,29 +969,34 @@ xfs_attr_shortform_allfit(
821 * Convert a leaf attribute list to shortform attribute list 969 * Convert a leaf attribute list to shortform attribute list
822 */ 970 */
823int 971int
824xfs_attr_leaf_to_shortform( 972xfs_attr3_leaf_to_shortform(
825 struct xfs_buf *bp, 973 struct xfs_buf *bp,
826 xfs_da_args_t *args, 974 struct xfs_da_args *args,
827 int forkoff) 975 int forkoff)
828{ 976{
829 xfs_attr_leafblock_t *leaf; 977 struct xfs_attr_leafblock *leaf;
830 xfs_attr_leaf_entry_t *entry; 978 struct xfs_attr3_icleaf_hdr ichdr;
831 xfs_attr_leaf_name_local_t *name_loc; 979 struct xfs_attr_leaf_entry *entry;
832 xfs_da_args_t nargs; 980 struct xfs_attr_leaf_name_local *name_loc;
833 xfs_inode_t *dp; 981 struct xfs_da_args nargs;
834 char *tmpbuffer; 982 struct xfs_inode *dp = args->dp;
835 int error, i; 983 char *tmpbuffer;
984 int error;
985 int i;
836 986
837 trace_xfs_attr_leaf_to_sf(args); 987 trace_xfs_attr_leaf_to_sf(args);
838 988
839 dp = args->dp;
840 tmpbuffer = kmem_alloc(XFS_LBSIZE(dp->i_mount), KM_SLEEP); 989 tmpbuffer = kmem_alloc(XFS_LBSIZE(dp->i_mount), KM_SLEEP);
841 ASSERT(tmpbuffer != NULL); 990 if (!tmpbuffer)
991 return ENOMEM;
842 992
843 ASSERT(bp != NULL);
844 memcpy(tmpbuffer, bp->b_addr, XFS_LBSIZE(dp->i_mount)); 993 memcpy(tmpbuffer, bp->b_addr, XFS_LBSIZE(dp->i_mount));
994
845 leaf = (xfs_attr_leafblock_t *)tmpbuffer; 995 leaf = (xfs_attr_leafblock_t *)tmpbuffer;
846 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); 996 xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
997 entry = xfs_attr3_leaf_entryp(leaf);
998
999 /* XXX (dgc): buffer is about to be marked stale - why zero it? */
847 memset(bp->b_addr, 0, XFS_LBSIZE(dp->i_mount)); 1000 memset(bp->b_addr, 0, XFS_LBSIZE(dp->i_mount));
848 1001
849 /* 1002 /*
@@ -873,14 +1026,14 @@ xfs_attr_leaf_to_shortform(
873 nargs.whichfork = XFS_ATTR_FORK; 1026 nargs.whichfork = XFS_ATTR_FORK;
874 nargs.trans = args->trans; 1027 nargs.trans = args->trans;
875 nargs.op_flags = XFS_DA_OP_OKNOENT; 1028 nargs.op_flags = XFS_DA_OP_OKNOENT;
876 entry = &leaf->entries[0]; 1029
877 for (i = 0; i < be16_to_cpu(leaf->hdr.count); entry++, i++) { 1030 for (i = 0; i < ichdr.count; entry++, i++) {
878 if (entry->flags & XFS_ATTR_INCOMPLETE) 1031 if (entry->flags & XFS_ATTR_INCOMPLETE)
879 continue; /* don't copy partial entries */ 1032 continue; /* don't copy partial entries */
880 if (!entry->nameidx) 1033 if (!entry->nameidx)
881 continue; 1034 continue;
882 ASSERT(entry->flags & XFS_ATTR_LOCAL); 1035 ASSERT(entry->flags & XFS_ATTR_LOCAL);
883 name_loc = xfs_attr_leaf_name_local(leaf, i); 1036 name_loc = xfs_attr3_leaf_name_local(leaf, i);
884 nargs.name = name_loc->nameval; 1037 nargs.name = name_loc->nameval;
885 nargs.namelen = name_loc->namelen; 1038 nargs.namelen = name_loc->namelen;
886 nargs.value = &name_loc->nameval[nargs.namelen]; 1039 nargs.value = &name_loc->nameval[nargs.namelen];
@@ -893,61 +1046,75 @@ xfs_attr_leaf_to_shortform(
893 1046
894out: 1047out:
895 kmem_free(tmpbuffer); 1048 kmem_free(tmpbuffer);
896 return(error); 1049 return error;
897} 1050}
898 1051
899/* 1052/*
900 * Convert from using a single leaf to a root node and a leaf. 1053 * Convert from using a single leaf to a root node and a leaf.
901 */ 1054 */
902int 1055int
903xfs_attr_leaf_to_node(xfs_da_args_t *args) 1056xfs_attr3_leaf_to_node(
1057 struct xfs_da_args *args)
904{ 1058{
905 xfs_attr_leafblock_t *leaf; 1059 struct xfs_attr_leafblock *leaf;
906 xfs_da_intnode_t *node; 1060 struct xfs_attr3_icleaf_hdr icleafhdr;
907 xfs_inode_t *dp; 1061 struct xfs_attr_leaf_entry *entries;
908 struct xfs_buf *bp1, *bp2; 1062 struct xfs_da_node_entry *btree;
909 xfs_dablk_t blkno; 1063 struct xfs_da3_icnode_hdr icnodehdr;
910 int error; 1064 struct xfs_da_intnode *node;
1065 struct xfs_inode *dp = args->dp;
1066 struct xfs_mount *mp = dp->i_mount;
1067 struct xfs_buf *bp1 = NULL;
1068 struct xfs_buf *bp2 = NULL;
1069 xfs_dablk_t blkno;
1070 int error;
911 1071
912 trace_xfs_attr_leaf_to_node(args); 1072 trace_xfs_attr_leaf_to_node(args);
913 1073
914 dp = args->dp;
915 bp1 = bp2 = NULL;
916 error = xfs_da_grow_inode(args, &blkno); 1074 error = xfs_da_grow_inode(args, &blkno);
917 if (error) 1075 if (error)
918 goto out; 1076 goto out;
919 error = xfs_attr_leaf_read(args->trans, args->dp, 0, -1, &bp1); 1077 error = xfs_attr3_leaf_read(args->trans, dp, 0, -1, &bp1);
920 if (error) 1078 if (error)
921 goto out; 1079 goto out;
922 1080
923 bp2 = NULL; 1081 error = xfs_da_get_buf(args->trans, dp, blkno, -1, &bp2, XFS_ATTR_FORK);
924 error = xfs_da_get_buf(args->trans, args->dp, blkno, -1, &bp2,
925 XFS_ATTR_FORK);
926 if (error) 1082 if (error)
927 goto out; 1083 goto out;
1084
1085 /* copy leaf to new buffer, update identifiers */
1086 xfs_trans_buf_set_type(args->trans, bp2, XFS_BLFT_ATTR_LEAF_BUF);
928 bp2->b_ops = bp1->b_ops; 1087 bp2->b_ops = bp1->b_ops;
929 memcpy(bp2->b_addr, bp1->b_addr, XFS_LBSIZE(dp->i_mount)); 1088 memcpy(bp2->b_addr, bp1->b_addr, XFS_LBSIZE(mp));
930 bp1 = NULL; 1089 if (xfs_sb_version_hascrc(&mp->m_sb)) {
931 xfs_trans_log_buf(args->trans, bp2, 0, XFS_LBSIZE(dp->i_mount) - 1); 1090 struct xfs_da3_blkinfo *hdr3 = bp2->b_addr;
1091 hdr3->blkno = cpu_to_be64(bp2->b_bn);
1092 }
1093 xfs_trans_log_buf(args->trans, bp2, 0, XFS_LBSIZE(mp) - 1);
932 1094
933 /* 1095 /*
934 * Set up the new root node. 1096 * Set up the new root node.
935 */ 1097 */
936 error = xfs_da_node_create(args, 0, 1, &bp1, XFS_ATTR_FORK); 1098 error = xfs_da3_node_create(args, 0, 1, &bp1, XFS_ATTR_FORK);
937 if (error) 1099 if (error)
938 goto out; 1100 goto out;
939 node = bp1->b_addr; 1101 node = bp1->b_addr;
1102 xfs_da3_node_hdr_from_disk(&icnodehdr, node);
1103 btree = xfs_da3_node_tree_p(node);
1104
940 leaf = bp2->b_addr; 1105 leaf = bp2->b_addr;
941 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); 1106 xfs_attr3_leaf_hdr_from_disk(&icleafhdr, leaf);
1107 entries = xfs_attr3_leaf_entryp(leaf);
1108
942 /* both on-disk, don't endian-flip twice */ 1109 /* both on-disk, don't endian-flip twice */
943 node->btree[0].hashval = 1110 btree[0].hashval = entries[icleafhdr.count - 1].hashval;
944 leaf->entries[be16_to_cpu(leaf->hdr.count)-1 ].hashval; 1111 btree[0].before = cpu_to_be32(blkno);
945 node->btree[0].before = cpu_to_be32(blkno); 1112 icnodehdr.count = 1;
946 node->hdr.count = cpu_to_be16(1); 1113 xfs_da3_node_hdr_to_disk(node, &icnodehdr);
947 xfs_trans_log_buf(args->trans, bp1, 0, XFS_LBSIZE(dp->i_mount) - 1); 1114 xfs_trans_log_buf(args->trans, bp1, 0, XFS_LBSIZE(mp) - 1);
948 error = 0; 1115 error = 0;
949out: 1116out:
950 return(error); 1117 return error;
951} 1118}
952 1119
953 1120
@@ -960,52 +1127,63 @@ out:
960 * or a leaf in a node attribute list. 1127 * or a leaf in a node attribute list.
961 */ 1128 */
962STATIC int 1129STATIC int
963xfs_attr_leaf_create( 1130xfs_attr3_leaf_create(
964 xfs_da_args_t *args, 1131 struct xfs_da_args *args,
965 xfs_dablk_t blkno, 1132 xfs_dablk_t blkno,
966 struct xfs_buf **bpp) 1133 struct xfs_buf **bpp)
967{ 1134{
968 xfs_attr_leafblock_t *leaf; 1135 struct xfs_attr_leafblock *leaf;
969 xfs_attr_leaf_hdr_t *hdr; 1136 struct xfs_attr3_icleaf_hdr ichdr;
970 xfs_inode_t *dp; 1137 struct xfs_inode *dp = args->dp;
971 struct xfs_buf *bp; 1138 struct xfs_mount *mp = dp->i_mount;
972 int error; 1139 struct xfs_buf *bp;
1140 int error;
973 1141
974 trace_xfs_attr_leaf_create(args); 1142 trace_xfs_attr_leaf_create(args);
975 1143
976 dp = args->dp;
977 ASSERT(dp != NULL);
978 error = xfs_da_get_buf(args->trans, args->dp, blkno, -1, &bp, 1144 error = xfs_da_get_buf(args->trans, args->dp, blkno, -1, &bp,
979 XFS_ATTR_FORK); 1145 XFS_ATTR_FORK);
980 if (error) 1146 if (error)
981 return(error); 1147 return error;
982 bp->b_ops = &xfs_attr_leaf_buf_ops; 1148 bp->b_ops = &xfs_attr3_leaf_buf_ops;
1149 xfs_trans_buf_set_type(args->trans, bp, XFS_BLFT_ATTR_LEAF_BUF);
983 leaf = bp->b_addr; 1150 leaf = bp->b_addr;
984 memset((char *)leaf, 0, XFS_LBSIZE(dp->i_mount)); 1151 memset(leaf, 0, XFS_LBSIZE(mp));
985 hdr = &leaf->hdr;
986 hdr->info.magic = cpu_to_be16(XFS_ATTR_LEAF_MAGIC);
987 hdr->firstused = cpu_to_be16(XFS_LBSIZE(dp->i_mount));
988 if (!hdr->firstused) {
989 hdr->firstused = cpu_to_be16(
990 XFS_LBSIZE(dp->i_mount) - XFS_ATTR_LEAF_NAME_ALIGN);
991 }
992 1152
993 hdr->freemap[0].base = cpu_to_be16(sizeof(xfs_attr_leaf_hdr_t)); 1153 memset(&ichdr, 0, sizeof(ichdr));
994 hdr->freemap[0].size = cpu_to_be16(be16_to_cpu(hdr->firstused) - 1154 ichdr.firstused = XFS_LBSIZE(mp);
995 sizeof(xfs_attr_leaf_hdr_t));
996 1155
997 xfs_trans_log_buf(args->trans, bp, 0, XFS_LBSIZE(dp->i_mount) - 1); 1156 if (xfs_sb_version_hascrc(&mp->m_sb)) {
1157 struct xfs_da3_blkinfo *hdr3 = bp->b_addr;
1158
1159 ichdr.magic = XFS_ATTR3_LEAF_MAGIC;
1160
1161 hdr3->blkno = cpu_to_be64(bp->b_bn);
1162 hdr3->owner = cpu_to_be64(dp->i_ino);
1163 uuid_copy(&hdr3->uuid, &mp->m_sb.sb_uuid);
1164
1165 ichdr.freemap[0].base = sizeof(struct xfs_attr3_leaf_hdr);
1166 } else {
1167 ichdr.magic = XFS_ATTR_LEAF_MAGIC;
1168 ichdr.freemap[0].base = sizeof(struct xfs_attr_leaf_hdr);
1169 }
1170 ichdr.freemap[0].size = ichdr.firstused - ichdr.freemap[0].base;
1171
1172 xfs_attr3_leaf_hdr_to_disk(leaf, &ichdr);
1173 xfs_trans_log_buf(args->trans, bp, 0, XFS_LBSIZE(mp) - 1);
998 1174
999 *bpp = bp; 1175 *bpp = bp;
1000 return(0); 1176 return 0;
1001} 1177}
1002 1178
1003/* 1179/*
1004 * Split the leaf node, rebalance, then add the new entry. 1180 * Split the leaf node, rebalance, then add the new entry.
1005 */ 1181 */
1006int 1182int
1007xfs_attr_leaf_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk, 1183xfs_attr3_leaf_split(
1008 xfs_da_state_blk_t *newblk) 1184 struct xfs_da_state *state,
1185 struct xfs_da_state_blk *oldblk,
1186 struct xfs_da_state_blk *newblk)
1009{ 1187{
1010 xfs_dablk_t blkno; 1188 xfs_dablk_t blkno;
1011 int error; 1189 int error;
@@ -1019,7 +1197,7 @@ xfs_attr_leaf_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
1019 error = xfs_da_grow_inode(state->args, &blkno); 1197 error = xfs_da_grow_inode(state->args, &blkno);
1020 if (error) 1198 if (error)
1021 return(error); 1199 return(error);
1022 error = xfs_attr_leaf_create(state->args, blkno, &newblk->bp); 1200 error = xfs_attr3_leaf_create(state->args, blkno, &newblk->bp);
1023 if (error) 1201 if (error)
1024 return(error); 1202 return(error);
1025 newblk->blkno = blkno; 1203 newblk->blkno = blkno;
@@ -1029,8 +1207,8 @@ xfs_attr_leaf_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
1029 * Rebalance the entries across the two leaves. 1207 * Rebalance the entries across the two leaves.
1030 * NOTE: rebalance() currently depends on the 2nd block being empty. 1208 * NOTE: rebalance() currently depends on the 2nd block being empty.
1031 */ 1209 */
1032 xfs_attr_leaf_rebalance(state, oldblk, newblk); 1210 xfs_attr3_leaf_rebalance(state, oldblk, newblk);
1033 error = xfs_da_blk_link(state, oldblk, newblk); 1211 error = xfs_da3_blk_link(state, oldblk, newblk);
1034 if (error) 1212 if (error)
1035 return(error); 1213 return(error);
1036 1214
@@ -1043,10 +1221,10 @@ xfs_attr_leaf_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
1043 */ 1221 */
1044 if (state->inleaf) { 1222 if (state->inleaf) {
1045 trace_xfs_attr_leaf_add_old(state->args); 1223 trace_xfs_attr_leaf_add_old(state->args);
1046 error = xfs_attr_leaf_add(oldblk->bp, state->args); 1224 error = xfs_attr3_leaf_add(oldblk->bp, state->args);
1047 } else { 1225 } else {
1048 trace_xfs_attr_leaf_add_new(state->args); 1226 trace_xfs_attr_leaf_add_new(state->args);
1049 error = xfs_attr_leaf_add(newblk->bp, state->args); 1227 error = xfs_attr3_leaf_add(newblk->bp, state->args);
1050 } 1228 }
1051 1229
1052 /* 1230 /*
@@ -1061,22 +1239,23 @@ xfs_attr_leaf_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
1061 * Add a name to the leaf attribute list structure. 1239 * Add a name to the leaf attribute list structure.
1062 */ 1240 */
1063int 1241int
1064xfs_attr_leaf_add( 1242xfs_attr3_leaf_add(
1065 struct xfs_buf *bp, 1243 struct xfs_buf *bp,
1066 struct xfs_da_args *args) 1244 struct xfs_da_args *args)
1067{ 1245{
1068 xfs_attr_leafblock_t *leaf; 1246 struct xfs_attr_leafblock *leaf;
1069 xfs_attr_leaf_hdr_t *hdr; 1247 struct xfs_attr3_icleaf_hdr ichdr;
1070 xfs_attr_leaf_map_t *map; 1248 int tablesize;
1071 int tablesize, entsize, sum, tmp, i; 1249 int entsize;
1250 int sum;
1251 int tmp;
1252 int i;
1072 1253
1073 trace_xfs_attr_leaf_add(args); 1254 trace_xfs_attr_leaf_add(args);
1074 1255
1075 leaf = bp->b_addr; 1256 leaf = bp->b_addr;
1076 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); 1257 xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
1077 ASSERT((args->index >= 0) 1258 ASSERT(args->index >= 0 && args->index <= ichdr.count);
1078 && (args->index <= be16_to_cpu(leaf->hdr.count)));
1079 hdr = &leaf->hdr;
1080 entsize = xfs_attr_leaf_newentsize(args->namelen, args->valuelen, 1259 entsize = xfs_attr_leaf_newentsize(args->namelen, args->valuelen,
1081 args->trans->t_mountp->m_sb.sb_blocksize, NULL); 1260 args->trans->t_mountp->m_sb.sb_blocksize, NULL);
1082 1261
@@ -1084,25 +1263,23 @@ xfs_attr_leaf_add(
1084 * Search through freemap for first-fit on new name length. 1263 * Search through freemap for first-fit on new name length.
1085 * (may need to figure in size of entry struct too) 1264 * (may need to figure in size of entry struct too)
1086 */ 1265 */
1087 tablesize = (be16_to_cpu(hdr->count) + 1) 1266 tablesize = (ichdr.count + 1) * sizeof(xfs_attr_leaf_entry_t)
1088 * sizeof(xfs_attr_leaf_entry_t) 1267 + xfs_attr3_leaf_hdr_size(leaf);
1089 + sizeof(xfs_attr_leaf_hdr_t); 1268 for (sum = 0, i = XFS_ATTR_LEAF_MAPSIZE - 1; i >= 0; i--) {
1090 map = &hdr->freemap[XFS_ATTR_LEAF_MAPSIZE-1]; 1269 if (tablesize > ichdr.firstused) {
1091 for (sum = 0, i = XFS_ATTR_LEAF_MAPSIZE-1; i >= 0; map--, i--) { 1270 sum += ichdr.freemap[i].size;
1092 if (tablesize > be16_to_cpu(hdr->firstused)) {
1093 sum += be16_to_cpu(map->size);
1094 continue; 1271 continue;
1095 } 1272 }
1096 if (!map->size) 1273 if (!ichdr.freemap[i].size)
1097 continue; /* no space in this map */ 1274 continue; /* no space in this map */
1098 tmp = entsize; 1275 tmp = entsize;
1099 if (be16_to_cpu(map->base) < be16_to_cpu(hdr->firstused)) 1276 if (ichdr.freemap[i].base < ichdr.firstused)
1100 tmp += sizeof(xfs_attr_leaf_entry_t); 1277 tmp += sizeof(xfs_attr_leaf_entry_t);
1101 if (be16_to_cpu(map->size) >= tmp) { 1278 if (ichdr.freemap[i].size >= tmp) {
1102 tmp = xfs_attr_leaf_add_work(bp, args, i); 1279 tmp = xfs_attr3_leaf_add_work(bp, &ichdr, args, i);
1103 return(tmp); 1280 goto out_log_hdr;
1104 } 1281 }
1105 sum += be16_to_cpu(map->size); 1282 sum += ichdr.freemap[i].size;
1106 } 1283 }
1107 1284
1108 /* 1285 /*
@@ -1110,82 +1287,89 @@ xfs_attr_leaf_add(
1110 * and we don't have enough freespace, then compaction will do us 1287 * and we don't have enough freespace, then compaction will do us
1111 * no good and we should just give up. 1288 * no good and we should just give up.
1112 */ 1289 */
1113 if (!hdr->holes && (sum < entsize)) 1290 if (!ichdr.holes && sum < entsize)
1114 return(XFS_ERROR(ENOSPC)); 1291 return XFS_ERROR(ENOSPC);
1115 1292
1116 /* 1293 /*
1117 * Compact the entries to coalesce free space. 1294 * Compact the entries to coalesce free space.
1118 * This may change the hdr->count via dropping INCOMPLETE entries. 1295 * This may change the hdr->count via dropping INCOMPLETE entries.
1119 */ 1296 */
1120 xfs_attr_leaf_compact(args, bp); 1297 xfs_attr3_leaf_compact(args, &ichdr, bp);
1121 1298
1122 /* 1299 /*
1123 * After compaction, the block is guaranteed to have only one 1300 * After compaction, the block is guaranteed to have only one
1124 * free region, in freemap[0]. If it is not big enough, give up. 1301 * free region, in freemap[0]. If it is not big enough, give up.
1125 */ 1302 */
1126 if (be16_to_cpu(hdr->freemap[0].size) 1303 if (ichdr.freemap[0].size < (entsize + sizeof(xfs_attr_leaf_entry_t))) {
1127 < (entsize + sizeof(xfs_attr_leaf_entry_t))) 1304 tmp = ENOSPC;
1128 return(XFS_ERROR(ENOSPC)); 1305 goto out_log_hdr;
1306 }
1307
1308 tmp = xfs_attr3_leaf_add_work(bp, &ichdr, args, 0);
1129 1309
1130 return(xfs_attr_leaf_add_work(bp, args, 0)); 1310out_log_hdr:
1311 xfs_attr3_leaf_hdr_to_disk(leaf, &ichdr);
1312 xfs_trans_log_buf(args->trans, bp,
1313 XFS_DA_LOGRANGE(leaf, &leaf->hdr,
1314 xfs_attr3_leaf_hdr_size(leaf)));
1315 return tmp;
1131} 1316}
1132 1317
1133/* 1318/*
1134 * Add a name to a leaf attribute list structure. 1319 * Add a name to a leaf attribute list structure.
1135 */ 1320 */
1136STATIC int 1321STATIC int
1137xfs_attr_leaf_add_work( 1322xfs_attr3_leaf_add_work(
1138 struct xfs_buf *bp, 1323 struct xfs_buf *bp,
1139 xfs_da_args_t *args, 1324 struct xfs_attr3_icleaf_hdr *ichdr,
1140 int mapindex) 1325 struct xfs_da_args *args,
1326 int mapindex)
1141{ 1327{
1142 xfs_attr_leafblock_t *leaf; 1328 struct xfs_attr_leafblock *leaf;
1143 xfs_attr_leaf_hdr_t *hdr; 1329 struct xfs_attr_leaf_entry *entry;
1144 xfs_attr_leaf_entry_t *entry; 1330 struct xfs_attr_leaf_name_local *name_loc;
1145 xfs_attr_leaf_name_local_t *name_loc; 1331 struct xfs_attr_leaf_name_remote *name_rmt;
1146 xfs_attr_leaf_name_remote_t *name_rmt; 1332 struct xfs_mount *mp;
1147 xfs_attr_leaf_map_t *map; 1333 int tmp;
1148 xfs_mount_t *mp; 1334 int i;
1149 int tmp, i;
1150 1335
1151 trace_xfs_attr_leaf_add_work(args); 1336 trace_xfs_attr_leaf_add_work(args);
1152 1337
1153 leaf = bp->b_addr; 1338 leaf = bp->b_addr;
1154 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); 1339 ASSERT(mapindex >= 0 && mapindex < XFS_ATTR_LEAF_MAPSIZE);
1155 hdr = &leaf->hdr; 1340 ASSERT(args->index >= 0 && args->index <= ichdr->count);
1156 ASSERT((mapindex >= 0) && (mapindex < XFS_ATTR_LEAF_MAPSIZE));
1157 ASSERT((args->index >= 0) && (args->index <= be16_to_cpu(hdr->count)));
1158 1341
1159 /* 1342 /*
1160 * Force open some space in the entry array and fill it in. 1343 * Force open some space in the entry array and fill it in.
1161 */ 1344 */
1162 entry = &leaf->entries[args->index]; 1345 entry = &xfs_attr3_leaf_entryp(leaf)[args->index];
1163 if (args->index < be16_to_cpu(hdr->count)) { 1346 if (args->index < ichdr->count) {
1164 tmp = be16_to_cpu(hdr->count) - args->index; 1347 tmp = ichdr->count - args->index;
1165 tmp *= sizeof(xfs_attr_leaf_entry_t); 1348 tmp *= sizeof(xfs_attr_leaf_entry_t);
1166 memmove((char *)(entry+1), (char *)entry, tmp); 1349 memmove(entry + 1, entry, tmp);
1167 xfs_trans_log_buf(args->trans, bp, 1350 xfs_trans_log_buf(args->trans, bp,
1168 XFS_DA_LOGRANGE(leaf, entry, tmp + sizeof(*entry))); 1351 XFS_DA_LOGRANGE(leaf, entry, tmp + sizeof(*entry)));
1169 } 1352 }
1170 be16_add_cpu(&hdr->count, 1); 1353 ichdr->count++;
1171 1354
1172 /* 1355 /*
1173 * Allocate space for the new string (at the end of the run). 1356 * Allocate space for the new string (at the end of the run).
1174 */ 1357 */
1175 map = &hdr->freemap[mapindex];
1176 mp = args->trans->t_mountp; 1358 mp = args->trans->t_mountp;
1177 ASSERT(be16_to_cpu(map->base) < XFS_LBSIZE(mp)); 1359 ASSERT(ichdr->freemap[mapindex].base < XFS_LBSIZE(mp));
1178 ASSERT((be16_to_cpu(map->base) & 0x3) == 0); 1360 ASSERT((ichdr->freemap[mapindex].base & 0x3) == 0);
1179 ASSERT(be16_to_cpu(map->size) >= 1361 ASSERT(ichdr->freemap[mapindex].size >=
1180 xfs_attr_leaf_newentsize(args->namelen, args->valuelen, 1362 xfs_attr_leaf_newentsize(args->namelen, args->valuelen,
1181 mp->m_sb.sb_blocksize, NULL)); 1363 mp->m_sb.sb_blocksize, NULL));
1182 ASSERT(be16_to_cpu(map->size) < XFS_LBSIZE(mp)); 1364 ASSERT(ichdr->freemap[mapindex].size < XFS_LBSIZE(mp));
1183 ASSERT((be16_to_cpu(map->size) & 0x3) == 0); 1365 ASSERT((ichdr->freemap[mapindex].size & 0x3) == 0);
1184 be16_add_cpu(&map->size, 1366
1185 -xfs_attr_leaf_newentsize(args->namelen, args->valuelen, 1367 ichdr->freemap[mapindex].size -=
1186 mp->m_sb.sb_blocksize, &tmp)); 1368 xfs_attr_leaf_newentsize(args->namelen, args->valuelen,
1187 entry->nameidx = cpu_to_be16(be16_to_cpu(map->base) + 1369 mp->m_sb.sb_blocksize, &tmp);
1188 be16_to_cpu(map->size)); 1370
1371 entry->nameidx = cpu_to_be16(ichdr->freemap[mapindex].base +
1372 ichdr->freemap[mapindex].size);
1189 entry->hashval = cpu_to_be32(args->hashval); 1373 entry->hashval = cpu_to_be32(args->hashval);
1190 entry->flags = tmp ? XFS_ATTR_LOCAL : 0; 1374 entry->flags = tmp ? XFS_ATTR_LOCAL : 0;
1191 entry->flags |= XFS_ATTR_NSP_ARGS_TO_ONDISK(args->flags); 1375 entry->flags |= XFS_ATTR_NSP_ARGS_TO_ONDISK(args->flags);
@@ -1200,7 +1384,7 @@ xfs_attr_leaf_add_work(
1200 XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry))); 1384 XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry)));
1201 ASSERT((args->index == 0) || 1385 ASSERT((args->index == 0) ||
1202 (be32_to_cpu(entry->hashval) >= be32_to_cpu((entry-1)->hashval))); 1386 (be32_to_cpu(entry->hashval) >= be32_to_cpu((entry-1)->hashval)));
1203 ASSERT((args->index == be16_to_cpu(hdr->count)-1) || 1387 ASSERT((args->index == ichdr->count - 1) ||
1204 (be32_to_cpu(entry->hashval) <= be32_to_cpu((entry+1)->hashval))); 1388 (be32_to_cpu(entry->hashval) <= be32_to_cpu((entry+1)->hashval)));
1205 1389
1206 /* 1390 /*
@@ -1211,14 +1395,14 @@ xfs_attr_leaf_add_work(
1211 * as part of this transaction (a split operation for example). 1395 * as part of this transaction (a split operation for example).
1212 */ 1396 */
1213 if (entry->flags & XFS_ATTR_LOCAL) { 1397 if (entry->flags & XFS_ATTR_LOCAL) {
1214 name_loc = xfs_attr_leaf_name_local(leaf, args->index); 1398 name_loc = xfs_attr3_leaf_name_local(leaf, args->index);
1215 name_loc->namelen = args->namelen; 1399 name_loc->namelen = args->namelen;
1216 name_loc->valuelen = cpu_to_be16(args->valuelen); 1400 name_loc->valuelen = cpu_to_be16(args->valuelen);
1217 memcpy((char *)name_loc->nameval, args->name, args->namelen); 1401 memcpy((char *)name_loc->nameval, args->name, args->namelen);
1218 memcpy((char *)&name_loc->nameval[args->namelen], args->value, 1402 memcpy((char *)&name_loc->nameval[args->namelen], args->value,
1219 be16_to_cpu(name_loc->valuelen)); 1403 be16_to_cpu(name_loc->valuelen));
1220 } else { 1404 } else {
1221 name_rmt = xfs_attr_leaf_name_remote(leaf, args->index); 1405 name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index);
1222 name_rmt->namelen = args->namelen; 1406 name_rmt->namelen = args->namelen;
1223 memcpy((char *)name_rmt->name, args->name, args->namelen); 1407 memcpy((char *)name_rmt->name, args->name, args->namelen);
1224 entry->flags |= XFS_ATTR_INCOMPLETE; 1408 entry->flags |= XFS_ATTR_INCOMPLETE;
@@ -1229,44 +1413,41 @@ xfs_attr_leaf_add_work(
1229 args->rmtblkcnt = XFS_B_TO_FSB(mp, args->valuelen); 1413 args->rmtblkcnt = XFS_B_TO_FSB(mp, args->valuelen);
1230 } 1414 }
1231 xfs_trans_log_buf(args->trans, bp, 1415 xfs_trans_log_buf(args->trans, bp,
1232 XFS_DA_LOGRANGE(leaf, xfs_attr_leaf_name(leaf, args->index), 1416 XFS_DA_LOGRANGE(leaf, xfs_attr3_leaf_name(leaf, args->index),
1233 xfs_attr_leaf_entsize(leaf, args->index))); 1417 xfs_attr_leaf_entsize(leaf, args->index)));
1234 1418
1235 /* 1419 /*
1236 * Update the control info for this leaf node 1420 * Update the control info for this leaf node
1237 */ 1421 */
1238 if (be16_to_cpu(entry->nameidx) < be16_to_cpu(hdr->firstused)) { 1422 if (be16_to_cpu(entry->nameidx) < ichdr->firstused)
1239 /* both on-disk, don't endian-flip twice */ 1423 ichdr->firstused = be16_to_cpu(entry->nameidx);
1240 hdr->firstused = entry->nameidx; 1424
1241 } 1425 ASSERT(ichdr->firstused >= ichdr->count * sizeof(xfs_attr_leaf_entry_t)
1242 ASSERT(be16_to_cpu(hdr->firstused) >= 1426 + xfs_attr3_leaf_hdr_size(leaf));
1243 ((be16_to_cpu(hdr->count) * sizeof(*entry)) + sizeof(*hdr))); 1427 tmp = (ichdr->count - 1) * sizeof(xfs_attr_leaf_entry_t)
1244 tmp = (be16_to_cpu(hdr->count)-1) * sizeof(xfs_attr_leaf_entry_t) 1428 + xfs_attr3_leaf_hdr_size(leaf);
1245 + sizeof(xfs_attr_leaf_hdr_t); 1429
1246 map = &hdr->freemap[0]; 1430 for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
1247 for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; map++, i++) { 1431 if (ichdr->freemap[i].base == tmp) {
1248 if (be16_to_cpu(map->base) == tmp) { 1432 ichdr->freemap[i].base += sizeof(xfs_attr_leaf_entry_t);
1249 be16_add_cpu(&map->base, sizeof(xfs_attr_leaf_entry_t)); 1433 ichdr->freemap[i].size -= sizeof(xfs_attr_leaf_entry_t);
1250 be16_add_cpu(&map->size,
1251 -((int)sizeof(xfs_attr_leaf_entry_t)));
1252 } 1434 }
1253 } 1435 }
1254 be16_add_cpu(&hdr->usedbytes, xfs_attr_leaf_entsize(leaf, args->index)); 1436 ichdr->usedbytes += xfs_attr_leaf_entsize(leaf, args->index);
1255 xfs_trans_log_buf(args->trans, bp, 1437 return 0;
1256 XFS_DA_LOGRANGE(leaf, hdr, sizeof(*hdr)));
1257 return(0);
1258} 1438}
1259 1439
1260/* 1440/*
1261 * Garbage collect a leaf attribute list block by copying it to a new buffer. 1441 * Garbage collect a leaf attribute list block by copying it to a new buffer.
1262 */ 1442 */
1263STATIC void 1443STATIC void
1264xfs_attr_leaf_compact( 1444xfs_attr3_leaf_compact(
1265 struct xfs_da_args *args, 1445 struct xfs_da_args *args,
1446 struct xfs_attr3_icleaf_hdr *ichdr_d,
1266 struct xfs_buf *bp) 1447 struct xfs_buf *bp)
1267{ 1448{
1268 xfs_attr_leafblock_t *leaf_s, *leaf_d; 1449 xfs_attr_leafblock_t *leaf_s, *leaf_d;
1269 xfs_attr_leaf_hdr_t *hdr_s, *hdr_d; 1450 struct xfs_attr3_icleaf_hdr ichdr_s;
1270 struct xfs_trans *trans = args->trans; 1451 struct xfs_trans *trans = args->trans;
1271 struct xfs_mount *mp = trans->t_mountp; 1452 struct xfs_mount *mp = trans->t_mountp;
1272 char *tmpbuffer; 1453 char *tmpbuffer;
@@ -1283,34 +1464,69 @@ xfs_attr_leaf_compact(
1283 */ 1464 */
1284 leaf_s = (xfs_attr_leafblock_t *)tmpbuffer; 1465 leaf_s = (xfs_attr_leafblock_t *)tmpbuffer;
1285 leaf_d = bp->b_addr; 1466 leaf_d = bp->b_addr;
1286 hdr_s = &leaf_s->hdr; 1467 ichdr_s = *ichdr_d; /* struct copy */
1287 hdr_d = &leaf_d->hdr; 1468 ichdr_d->firstused = XFS_LBSIZE(mp);
1288 hdr_d->info = hdr_s->info; /* struct copy */ 1469 ichdr_d->usedbytes = 0;
1289 hdr_d->firstused = cpu_to_be16(XFS_LBSIZE(mp)); 1470 ichdr_d->count = 0;
1290 /* handle truncation gracefully */ 1471 ichdr_d->holes = 0;
1291 if (!hdr_d->firstused) { 1472 ichdr_d->freemap[0].base = xfs_attr3_leaf_hdr_size(leaf_s);
1292 hdr_d->firstused = cpu_to_be16( 1473 ichdr_d->freemap[0].size = ichdr_d->firstused - ichdr_d->freemap[0].base;
1293 XFS_LBSIZE(mp) - XFS_ATTR_LEAF_NAME_ALIGN);
1294 }
1295 hdr_d->usedbytes = 0;
1296 hdr_d->count = 0;
1297 hdr_d->holes = 0;
1298 hdr_d->freemap[0].base = cpu_to_be16(sizeof(xfs_attr_leaf_hdr_t));
1299 hdr_d->freemap[0].size = cpu_to_be16(be16_to_cpu(hdr_d->firstused) -
1300 sizeof(xfs_attr_leaf_hdr_t));
1301 1474
1302 /* 1475 /*
1303 * Copy all entry's in the same (sorted) order, 1476 * Copy all entry's in the same (sorted) order,
1304 * but allocate name/value pairs packed and in sequence. 1477 * but allocate name/value pairs packed and in sequence.
1305 */ 1478 */
1306 xfs_attr_leaf_moveents(leaf_s, 0, leaf_d, 0, 1479 xfs_attr3_leaf_moveents(leaf_s, &ichdr_s, 0, leaf_d, ichdr_d, 0,
1307 be16_to_cpu(hdr_s->count), mp); 1480 ichdr_s.count, mp);
1481 /*
1482 * this logs the entire buffer, but the caller must write the header
1483 * back to the buffer when it is finished modifying it.
1484 */
1308 xfs_trans_log_buf(trans, bp, 0, XFS_LBSIZE(mp) - 1); 1485 xfs_trans_log_buf(trans, bp, 0, XFS_LBSIZE(mp) - 1);
1309 1486
1310 kmem_free(tmpbuffer); 1487 kmem_free(tmpbuffer);
1311} 1488}
1312 1489
1313/* 1490/*
1491 * Compare two leaf blocks "order".
1492 * Return 0 unless leaf2 should go before leaf1.
1493 */
1494static int
1495xfs_attr3_leaf_order(
1496 struct xfs_buf *leaf1_bp,
1497 struct xfs_attr3_icleaf_hdr *leaf1hdr,
1498 struct xfs_buf *leaf2_bp,
1499 struct xfs_attr3_icleaf_hdr *leaf2hdr)
1500{
1501 struct xfs_attr_leaf_entry *entries1;
1502 struct xfs_attr_leaf_entry *entries2;
1503
1504 entries1 = xfs_attr3_leaf_entryp(leaf1_bp->b_addr);
1505 entries2 = xfs_attr3_leaf_entryp(leaf2_bp->b_addr);
1506 if (leaf1hdr->count > 0 && leaf2hdr->count > 0 &&
1507 ((be32_to_cpu(entries2[0].hashval) <
1508 be32_to_cpu(entries1[0].hashval)) ||
1509 (be32_to_cpu(entries2[leaf2hdr->count - 1].hashval) <
1510 be32_to_cpu(entries1[leaf1hdr->count - 1].hashval)))) {
1511 return 1;
1512 }
1513 return 0;
1514}
1515
1516int
1517xfs_attr_leaf_order(
1518 struct xfs_buf *leaf1_bp,
1519 struct xfs_buf *leaf2_bp)
1520{
1521 struct xfs_attr3_icleaf_hdr ichdr1;
1522 struct xfs_attr3_icleaf_hdr ichdr2;
1523
1524 xfs_attr3_leaf_hdr_from_disk(&ichdr1, leaf1_bp->b_addr);
1525 xfs_attr3_leaf_hdr_from_disk(&ichdr2, leaf2_bp->b_addr);
1526 return xfs_attr3_leaf_order(leaf1_bp, &ichdr1, leaf2_bp, &ichdr2);
1527}
1528
1529/*
1314 * Redistribute the attribute list entries between two leaf nodes, 1530 * Redistribute the attribute list entries between two leaf nodes,
1315 * taking into account the size of the new entry. 1531 * taking into account the size of the new entry.
1316 * 1532 *
@@ -1323,14 +1539,23 @@ xfs_attr_leaf_compact(
1323 * the "new" and "old" values can end up in different blocks. 1539 * the "new" and "old" values can end up in different blocks.
1324 */ 1540 */
1325STATIC void 1541STATIC void
1326xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, 1542xfs_attr3_leaf_rebalance(
1327 xfs_da_state_blk_t *blk2) 1543 struct xfs_da_state *state,
1544 struct xfs_da_state_blk *blk1,
1545 struct xfs_da_state_blk *blk2)
1328{ 1546{
1329 xfs_da_args_t *args; 1547 struct xfs_da_args *args;
1330 xfs_da_state_blk_t *tmp_blk; 1548 struct xfs_attr_leafblock *leaf1;
1331 xfs_attr_leafblock_t *leaf1, *leaf2; 1549 struct xfs_attr_leafblock *leaf2;
1332 xfs_attr_leaf_hdr_t *hdr1, *hdr2; 1550 struct xfs_attr3_icleaf_hdr ichdr1;
1333 int count, totallen, max, space, swap; 1551 struct xfs_attr3_icleaf_hdr ichdr2;
1552 struct xfs_attr_leaf_entry *entries1;
1553 struct xfs_attr_leaf_entry *entries2;
1554 int count;
1555 int totallen;
1556 int max;
1557 int space;
1558 int swap;
1334 1559
1335 /* 1560 /*
1336 * Set up environment. 1561 * Set up environment.
@@ -1339,9 +1564,9 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
1339 ASSERT(blk2->magic == XFS_ATTR_LEAF_MAGIC); 1564 ASSERT(blk2->magic == XFS_ATTR_LEAF_MAGIC);
1340 leaf1 = blk1->bp->b_addr; 1565 leaf1 = blk1->bp->b_addr;
1341 leaf2 = blk2->bp->b_addr; 1566 leaf2 = blk2->bp->b_addr;
1342 ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); 1567 xfs_attr3_leaf_hdr_from_disk(&ichdr1, leaf1);
1343 ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); 1568 xfs_attr3_leaf_hdr_from_disk(&ichdr2, leaf2);
1344 ASSERT(leaf2->hdr.count == 0); 1569 ASSERT(ichdr2.count == 0);
1345 args = state->args; 1570 args = state->args;
1346 1571
1347 trace_xfs_attr_leaf_rebalance(args); 1572 trace_xfs_attr_leaf_rebalance(args);
@@ -1353,16 +1578,23 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
1353 * second block, this code should never set "swap". 1578 * second block, this code should never set "swap".
1354 */ 1579 */
1355 swap = 0; 1580 swap = 0;
1356 if (xfs_attr_leaf_order(blk1->bp, blk2->bp)) { 1581 if (xfs_attr3_leaf_order(blk1->bp, &ichdr1, blk2->bp, &ichdr2)) {
1582 struct xfs_da_state_blk *tmp_blk;
1583 struct xfs_attr3_icleaf_hdr tmp_ichdr;
1584
1357 tmp_blk = blk1; 1585 tmp_blk = blk1;
1358 blk1 = blk2; 1586 blk1 = blk2;
1359 blk2 = tmp_blk; 1587 blk2 = tmp_blk;
1588
1589 /* struct copies to swap them rather than reconverting */
1590 tmp_ichdr = ichdr1;
1591 ichdr1 = ichdr2;
1592 ichdr2 = tmp_ichdr;
1593
1360 leaf1 = blk1->bp->b_addr; 1594 leaf1 = blk1->bp->b_addr;
1361 leaf2 = blk2->bp->b_addr; 1595 leaf2 = blk2->bp->b_addr;
1362 swap = 1; 1596 swap = 1;
1363 } 1597 }
1364 hdr1 = &leaf1->hdr;
1365 hdr2 = &leaf2->hdr;
1366 1598
1367 /* 1599 /*
1368 * Examine entries until we reduce the absolute difference in 1600 * Examine entries until we reduce the absolute difference in
@@ -1372,41 +1604,39 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
1372 * "inleaf" is true if the new entry should be inserted into blk1. 1604 * "inleaf" is true if the new entry should be inserted into blk1.
1373 * If "swap" is also true, then reverse the sense of "inleaf". 1605 * If "swap" is also true, then reverse the sense of "inleaf".
1374 */ 1606 */
1375 state->inleaf = xfs_attr_leaf_figure_balance(state, blk1, blk2, 1607 state->inleaf = xfs_attr3_leaf_figure_balance(state, blk1, &ichdr1,
1376 &count, &totallen); 1608 blk2, &ichdr2,
1609 &count, &totallen);
1377 if (swap) 1610 if (swap)
1378 state->inleaf = !state->inleaf; 1611 state->inleaf = !state->inleaf;
1379 1612
1380 /* 1613 /*
1381 * Move any entries required from leaf to leaf: 1614 * Move any entries required from leaf to leaf:
1382 */ 1615 */
1383 if (count < be16_to_cpu(hdr1->count)) { 1616 if (count < ichdr1.count) {
1384 /* 1617 /*
1385 * Figure the total bytes to be added to the destination leaf. 1618 * Figure the total bytes to be added to the destination leaf.
1386 */ 1619 */
1387 /* number entries being moved */ 1620 /* number entries being moved */
1388 count = be16_to_cpu(hdr1->count) - count; 1621 count = ichdr1.count - count;
1389 space = be16_to_cpu(hdr1->usedbytes) - totallen; 1622 space = ichdr1.usedbytes - totallen;
1390 space += count * sizeof(xfs_attr_leaf_entry_t); 1623 space += count * sizeof(xfs_attr_leaf_entry_t);
1391 1624
1392 /* 1625 /*
1393 * leaf2 is the destination, compact it if it looks tight. 1626 * leaf2 is the destination, compact it if it looks tight.
1394 */ 1627 */
1395 max = be16_to_cpu(hdr2->firstused) 1628 max = ichdr2.firstused - xfs_attr3_leaf_hdr_size(leaf1);
1396 - sizeof(xfs_attr_leaf_hdr_t); 1629 max -= ichdr2.count * sizeof(xfs_attr_leaf_entry_t);
1397 max -= be16_to_cpu(hdr2->count) * sizeof(xfs_attr_leaf_entry_t);
1398 if (space > max) 1630 if (space > max)
1399 xfs_attr_leaf_compact(args, blk2->bp); 1631 xfs_attr3_leaf_compact(args, &ichdr2, blk2->bp);
1400 1632
1401 /* 1633 /*
1402 * Move high entries from leaf1 to low end of leaf2. 1634 * Move high entries from leaf1 to low end of leaf2.
1403 */ 1635 */
1404 xfs_attr_leaf_moveents(leaf1, be16_to_cpu(hdr1->count) - count, 1636 xfs_attr3_leaf_moveents(leaf1, &ichdr1, ichdr1.count - count,
1405 leaf2, 0, count, state->mp); 1637 leaf2, &ichdr2, 0, count, state->mp);
1406 1638
1407 xfs_trans_log_buf(args->trans, blk1->bp, 0, state->blocksize-1); 1639 } else if (count > ichdr1.count) {
1408 xfs_trans_log_buf(args->trans, blk2->bp, 0, state->blocksize-1);
1409 } else if (count > be16_to_cpu(hdr1->count)) {
1410 /* 1640 /*
1411 * I assert that since all callers pass in an empty 1641 * I assert that since all callers pass in an empty
1412 * second buffer, this code should never execute. 1642 * second buffer, this code should never execute.
@@ -1417,36 +1647,37 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
1417 * Figure the total bytes to be added to the destination leaf. 1647 * Figure the total bytes to be added to the destination leaf.
1418 */ 1648 */
1419 /* number entries being moved */ 1649 /* number entries being moved */
1420 count -= be16_to_cpu(hdr1->count); 1650 count -= ichdr1.count;
1421 space = totallen - be16_to_cpu(hdr1->usedbytes); 1651 space = totallen - ichdr1.usedbytes;
1422 space += count * sizeof(xfs_attr_leaf_entry_t); 1652 space += count * sizeof(xfs_attr_leaf_entry_t);
1423 1653
1424 /* 1654 /*
1425 * leaf1 is the destination, compact it if it looks tight. 1655 * leaf1 is the destination, compact it if it looks tight.
1426 */ 1656 */
1427 max = be16_to_cpu(hdr1->firstused) 1657 max = ichdr1.firstused - xfs_attr3_leaf_hdr_size(leaf1);
1428 - sizeof(xfs_attr_leaf_hdr_t); 1658 max -= ichdr1.count * sizeof(xfs_attr_leaf_entry_t);
1429 max -= be16_to_cpu(hdr1->count) * sizeof(xfs_attr_leaf_entry_t);
1430 if (space > max) 1659 if (space > max)
1431 xfs_attr_leaf_compact(args, blk1->bp); 1660 xfs_attr3_leaf_compact(args, &ichdr1, blk1->bp);
1432 1661
1433 /* 1662 /*
1434 * Move low entries from leaf2 to high end of leaf1. 1663 * Move low entries from leaf2 to high end of leaf1.
1435 */ 1664 */
1436 xfs_attr_leaf_moveents(leaf2, 0, leaf1, 1665 xfs_attr3_leaf_moveents(leaf2, &ichdr2, 0, leaf1, &ichdr1,
1437 be16_to_cpu(hdr1->count), count, state->mp); 1666 ichdr1.count, count, state->mp);
1438
1439 xfs_trans_log_buf(args->trans, blk1->bp, 0, state->blocksize-1);
1440 xfs_trans_log_buf(args->trans, blk2->bp, 0, state->blocksize-1);
1441 } 1667 }
1442 1668
1669 xfs_attr3_leaf_hdr_to_disk(leaf1, &ichdr1);
1670 xfs_attr3_leaf_hdr_to_disk(leaf2, &ichdr2);
1671 xfs_trans_log_buf(args->trans, blk1->bp, 0, state->blocksize-1);
1672 xfs_trans_log_buf(args->trans, blk2->bp, 0, state->blocksize-1);
1673
1443 /* 1674 /*
1444 * Copy out last hashval in each block for B-tree code. 1675 * Copy out last hashval in each block for B-tree code.
1445 */ 1676 */
1446 blk1->hashval = be32_to_cpu( 1677 entries1 = xfs_attr3_leaf_entryp(leaf1);
1447 leaf1->entries[be16_to_cpu(leaf1->hdr.count)-1].hashval); 1678 entries2 = xfs_attr3_leaf_entryp(leaf2);
1448 blk2->hashval = be32_to_cpu( 1679 blk1->hashval = be32_to_cpu(entries1[ichdr1.count - 1].hashval);
1449 leaf2->entries[be16_to_cpu(leaf2->hdr.count)-1].hashval); 1680 blk2->hashval = be32_to_cpu(entries2[ichdr2.count - 1].hashval);
1450 1681
1451 /* 1682 /*
1452 * Adjust the expected index for insertion. 1683 * Adjust the expected index for insertion.
@@ -1460,12 +1691,12 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
1460 * inserting. The index/blkno fields refer to the "old" entry, 1691 * inserting. The index/blkno fields refer to the "old" entry,
1461 * while the index2/blkno2 fields refer to the "new" entry. 1692 * while the index2/blkno2 fields refer to the "new" entry.
1462 */ 1693 */
1463 if (blk1->index > be16_to_cpu(leaf1->hdr.count)) { 1694 if (blk1->index > ichdr1.count) {
1464 ASSERT(state->inleaf == 0); 1695 ASSERT(state->inleaf == 0);
1465 blk2->index = blk1->index - be16_to_cpu(leaf1->hdr.count); 1696 blk2->index = blk1->index - ichdr1.count;
1466 args->index = args->index2 = blk2->index; 1697 args->index = args->index2 = blk2->index;
1467 args->blkno = args->blkno2 = blk2->blkno; 1698 args->blkno = args->blkno2 = blk2->blkno;
1468 } else if (blk1->index == be16_to_cpu(leaf1->hdr.count)) { 1699 } else if (blk1->index == ichdr1.count) {
1469 if (state->inleaf) { 1700 if (state->inleaf) {
1470 args->index = blk1->index; 1701 args->index = blk1->index;
1471 args->blkno = blk1->blkno; 1702 args->blkno = blk1->blkno;
@@ -1477,8 +1708,7 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
1477 * is already stored in blkno2/index2, so don't 1708 * is already stored in blkno2/index2, so don't
1478 * overwrite it overwise we corrupt the tree. 1709 * overwrite it overwise we corrupt the tree.
1479 */ 1710 */
1480 blk2->index = blk1->index 1711 blk2->index = blk1->index - ichdr1.count;
1481 - be16_to_cpu(leaf1->hdr.count);
1482 args->index = blk2->index; 1712 args->index = blk2->index;
1483 args->blkno = blk2->blkno; 1713 args->blkno = blk2->blkno;
1484 if (!state->extravalid) { 1714 if (!state->extravalid) {
@@ -1506,42 +1736,40 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
1506 * GROT: Do a double-split for this case? 1736 * GROT: Do a double-split for this case?
1507 */ 1737 */
1508STATIC int 1738STATIC int
1509xfs_attr_leaf_figure_balance(xfs_da_state_t *state, 1739xfs_attr3_leaf_figure_balance(
1510 xfs_da_state_blk_t *blk1, 1740 struct xfs_da_state *state,
1511 xfs_da_state_blk_t *blk2, 1741 struct xfs_da_state_blk *blk1,
1512 int *countarg, int *usedbytesarg) 1742 struct xfs_attr3_icleaf_hdr *ichdr1,
1743 struct xfs_da_state_blk *blk2,
1744 struct xfs_attr3_icleaf_hdr *ichdr2,
1745 int *countarg,
1746 int *usedbytesarg)
1513{ 1747{
1514 xfs_attr_leafblock_t *leaf1, *leaf2; 1748 struct xfs_attr_leafblock *leaf1 = blk1->bp->b_addr;
1515 xfs_attr_leaf_hdr_t *hdr1, *hdr2; 1749 struct xfs_attr_leafblock *leaf2 = blk2->bp->b_addr;
1516 xfs_attr_leaf_entry_t *entry; 1750 struct xfs_attr_leaf_entry *entry;
1517 int count, max, index, totallen, half; 1751 int count;
1518 int lastdelta, foundit, tmp; 1752 int max;
1519 1753 int index;
1520 /* 1754 int totallen = 0;
1521 * Set up environment. 1755 int half;
1522 */ 1756 int lastdelta;
1523 leaf1 = blk1->bp->b_addr; 1757 int foundit = 0;
1524 leaf2 = blk2->bp->b_addr; 1758 int tmp;
1525 hdr1 = &leaf1->hdr;
1526 hdr2 = &leaf2->hdr;
1527 foundit = 0;
1528 totallen = 0;
1529 1759
1530 /* 1760 /*
1531 * Examine entries until we reduce the absolute difference in 1761 * Examine entries until we reduce the absolute difference in
1532 * byte usage between the two blocks to a minimum. 1762 * byte usage between the two blocks to a minimum.
1533 */ 1763 */
1534 max = be16_to_cpu(hdr1->count) + be16_to_cpu(hdr2->count); 1764 max = ichdr1->count + ichdr2->count;
1535 half = (max+1) * sizeof(*entry); 1765 half = (max + 1) * sizeof(*entry);
1536 half += be16_to_cpu(hdr1->usedbytes) + 1766 half += ichdr1->usedbytes + ichdr2->usedbytes +
1537 be16_to_cpu(hdr2->usedbytes) + 1767 xfs_attr_leaf_newentsize(state->args->namelen,
1538 xfs_attr_leaf_newentsize( 1768 state->args->valuelen,
1539 state->args->namelen, 1769 state->blocksize, NULL);
1540 state->args->valuelen,
1541 state->blocksize, NULL);
1542 half /= 2; 1770 half /= 2;
1543 lastdelta = state->blocksize; 1771 lastdelta = state->blocksize;
1544 entry = &leaf1->entries[0]; 1772 entry = xfs_attr3_leaf_entryp(leaf1);
1545 for (count = index = 0; count < max; entry++, index++, count++) { 1773 for (count = index = 0; count < max; entry++, index++, count++) {
1546 1774
1547#define XFS_ATTR_ABS(A) (((A) < 0) ? -(A) : (A)) 1775#define XFS_ATTR_ABS(A) (((A) < 0) ? -(A) : (A))
@@ -1564,9 +1792,9 @@ xfs_attr_leaf_figure_balance(xfs_da_state_t *state,
1564 /* 1792 /*
1565 * Wrap around into the second block if necessary. 1793 * Wrap around into the second block if necessary.
1566 */ 1794 */
1567 if (count == be16_to_cpu(hdr1->count)) { 1795 if (count == ichdr1->count) {
1568 leaf1 = leaf2; 1796 leaf1 = leaf2;
1569 entry = &leaf1->entries[0]; 1797 entry = xfs_attr3_leaf_entryp(leaf1);
1570 index = 0; 1798 index = 0;
1571 } 1799 }
1572 1800
@@ -1597,7 +1825,7 @@ xfs_attr_leaf_figure_balance(xfs_da_state_t *state,
1597 1825
1598 *countarg = count; 1826 *countarg = count;
1599 *usedbytesarg = totallen; 1827 *usedbytesarg = totallen;
1600 return(foundit); 1828 return foundit;
1601} 1829}
1602 1830
1603/*======================================================================== 1831/*========================================================================
@@ -1616,14 +1844,20 @@ xfs_attr_leaf_figure_balance(xfs_da_state_t *state,
1616 * GROT: allow for INCOMPLETE entries in calculation. 1844 * GROT: allow for INCOMPLETE entries in calculation.
1617 */ 1845 */
1618int 1846int
1619xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action) 1847xfs_attr3_leaf_toosmall(
1848 struct xfs_da_state *state,
1849 int *action)
1620{ 1850{
1621 xfs_attr_leafblock_t *leaf; 1851 struct xfs_attr_leafblock *leaf;
1622 xfs_da_state_blk_t *blk; 1852 struct xfs_da_state_blk *blk;
1623 xfs_da_blkinfo_t *info; 1853 struct xfs_attr3_icleaf_hdr ichdr;
1624 int count, bytes, forward, error, retval, i; 1854 struct xfs_buf *bp;
1625 xfs_dablk_t blkno; 1855 xfs_dablk_t blkno;
1626 struct xfs_buf *bp; 1856 int bytes;
1857 int forward;
1858 int error;
1859 int retval;
1860 int i;
1627 1861
1628 trace_xfs_attr_leaf_toosmall(state->args); 1862 trace_xfs_attr_leaf_toosmall(state->args);
1629 1863
@@ -1633,13 +1867,11 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
1633 * to coalesce with a sibling. 1867 * to coalesce with a sibling.
1634 */ 1868 */
1635 blk = &state->path.blk[ state->path.active-1 ]; 1869 blk = &state->path.blk[ state->path.active-1 ];
1636 info = blk->bp->b_addr; 1870 leaf = blk->bp->b_addr;
1637 ASSERT(info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); 1871 xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
1638 leaf = (xfs_attr_leafblock_t *)info; 1872 bytes = xfs_attr3_leaf_hdr_size(leaf) +
1639 count = be16_to_cpu(leaf->hdr.count); 1873 ichdr.count * sizeof(xfs_attr_leaf_entry_t) +
1640 bytes = sizeof(xfs_attr_leaf_hdr_t) + 1874 ichdr.usedbytes;
1641 count * sizeof(xfs_attr_leaf_entry_t) +
1642 be16_to_cpu(leaf->hdr.usedbytes);
1643 if (bytes > (state->blocksize >> 1)) { 1875 if (bytes > (state->blocksize >> 1)) {
1644 *action = 0; /* blk over 50%, don't try to join */ 1876 *action = 0; /* blk over 50%, don't try to join */
1645 return(0); 1877 return(0);
@@ -1651,14 +1883,14 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
1651 * coalesce it with a sibling block. We choose (arbitrarily) 1883 * coalesce it with a sibling block. We choose (arbitrarily)
1652 * to merge with the forward block unless it is NULL. 1884 * to merge with the forward block unless it is NULL.
1653 */ 1885 */
1654 if (count == 0) { 1886 if (ichdr.count == 0) {
1655 /* 1887 /*
1656 * Make altpath point to the block we want to keep and 1888 * Make altpath point to the block we want to keep and
1657 * path point to the block we want to drop (this one). 1889 * path point to the block we want to drop (this one).
1658 */ 1890 */
1659 forward = (info->forw != 0); 1891 forward = (ichdr.forw != 0);
1660 memcpy(&state->altpath, &state->path, sizeof(state->path)); 1892 memcpy(&state->altpath, &state->path, sizeof(state->path));
1661 error = xfs_da_path_shift(state, &state->altpath, forward, 1893 error = xfs_da3_path_shift(state, &state->altpath, forward,
1662 0, &retval); 1894 0, &retval);
1663 if (error) 1895 if (error)
1664 return(error); 1896 return(error);
@@ -1667,7 +1899,7 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
1667 } else { 1899 } else {
1668 *action = 2; 1900 *action = 2;
1669 } 1901 }
1670 return(0); 1902 return 0;
1671 } 1903 }
1672 1904
1673 /* 1905 /*
@@ -1678,28 +1910,28 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
1678 * to shrink an attribute list over time. 1910 * to shrink an attribute list over time.
1679 */ 1911 */
1680 /* start with smaller blk num */ 1912 /* start with smaller blk num */
1681 forward = (be32_to_cpu(info->forw) < be32_to_cpu(info->back)); 1913 forward = ichdr.forw < ichdr.back;
1682 for (i = 0; i < 2; forward = !forward, i++) { 1914 for (i = 0; i < 2; forward = !forward, i++) {
1915 struct xfs_attr3_icleaf_hdr ichdr2;
1683 if (forward) 1916 if (forward)
1684 blkno = be32_to_cpu(info->forw); 1917 blkno = ichdr.forw;
1685 else 1918 else
1686 blkno = be32_to_cpu(info->back); 1919 blkno = ichdr.back;
1687 if (blkno == 0) 1920 if (blkno == 0)
1688 continue; 1921 continue;
1689 error = xfs_attr_leaf_read(state->args->trans, state->args->dp, 1922 error = xfs_attr3_leaf_read(state->args->trans, state->args->dp,
1690 blkno, -1, &bp); 1923 blkno, -1, &bp);
1691 if (error) 1924 if (error)
1692 return(error); 1925 return(error);
1693 1926
1694 leaf = (xfs_attr_leafblock_t *)info; 1927 xfs_attr3_leaf_hdr_from_disk(&ichdr2, bp->b_addr);
1695 count = be16_to_cpu(leaf->hdr.count); 1928
1696 bytes = state->blocksize - (state->blocksize>>2); 1929 bytes = state->blocksize - (state->blocksize >> 2) -
1697 bytes -= be16_to_cpu(leaf->hdr.usedbytes); 1930 ichdr.usedbytes - ichdr2.usedbytes -
1698 leaf = bp->b_addr; 1931 ((ichdr.count + ichdr2.count) *
1699 count += be16_to_cpu(leaf->hdr.count); 1932 sizeof(xfs_attr_leaf_entry_t)) -
1700 bytes -= be16_to_cpu(leaf->hdr.usedbytes); 1933 xfs_attr3_leaf_hdr_size(leaf);
1701 bytes -= count * sizeof(xfs_attr_leaf_entry_t); 1934
1702 bytes -= sizeof(xfs_attr_leaf_hdr_t);
1703 xfs_trans_brelse(state->args->trans, bp); 1935 xfs_trans_brelse(state->args->trans, bp);
1704 if (bytes >= 0) 1936 if (bytes >= 0)
1705 break; /* fits with at least 25% to spare */ 1937 break; /* fits with at least 25% to spare */
@@ -1715,10 +1947,10 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
1715 */ 1947 */
1716 memcpy(&state->altpath, &state->path, sizeof(state->path)); 1948 memcpy(&state->altpath, &state->path, sizeof(state->path));
1717 if (blkno < blk->blkno) { 1949 if (blkno < blk->blkno) {
1718 error = xfs_da_path_shift(state, &state->altpath, forward, 1950 error = xfs_da3_path_shift(state, &state->altpath, forward,
1719 0, &retval); 1951 0, &retval);
1720 } else { 1952 } else {
1721 error = xfs_da_path_shift(state, &state->path, forward, 1953 error = xfs_da3_path_shift(state, &state->path, forward,
1722 0, &retval); 1954 0, &retval);
1723 } 1955 }
1724 if (error) 1956 if (error)
@@ -1738,32 +1970,35 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
1738 * If two leaves are 37% full, when combined they will leave 25% free. 1970 * If two leaves are 37% full, when combined they will leave 25% free.
1739 */ 1971 */
1740int 1972int
1741xfs_attr_leaf_remove( 1973xfs_attr3_leaf_remove(
1742 struct xfs_buf *bp, 1974 struct xfs_buf *bp,
1743 xfs_da_args_t *args) 1975 struct xfs_da_args *args)
1744{ 1976{
1745 xfs_attr_leafblock_t *leaf; 1977 struct xfs_attr_leafblock *leaf;
1746 xfs_attr_leaf_hdr_t *hdr; 1978 struct xfs_attr3_icleaf_hdr ichdr;
1747 xfs_attr_leaf_map_t *map; 1979 struct xfs_attr_leaf_entry *entry;
1748 xfs_attr_leaf_entry_t *entry; 1980 struct xfs_mount *mp = args->trans->t_mountp;
1749 int before, after, smallest, entsize; 1981 int before;
1750 int tablesize, tmp, i; 1982 int after;
1751 xfs_mount_t *mp; 1983 int smallest;
1984 int entsize;
1985 int tablesize;
1986 int tmp;
1987 int i;
1752 1988
1753 trace_xfs_attr_leaf_remove(args); 1989 trace_xfs_attr_leaf_remove(args);
1754 1990
1755 leaf = bp->b_addr; 1991 leaf = bp->b_addr;
1756 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); 1992 xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
1757 hdr = &leaf->hdr; 1993
1758 mp = args->trans->t_mountp; 1994 ASSERT(ichdr.count > 0 && ichdr.count < XFS_LBSIZE(mp) / 8);
1759 ASSERT((be16_to_cpu(hdr->count) > 0) 1995 ASSERT(args->index >= 0 && args->index < ichdr.count);
1760 && (be16_to_cpu(hdr->count) < (XFS_LBSIZE(mp)/8))); 1996 ASSERT(ichdr.firstused >= ichdr.count * sizeof(*entry) +
1761 ASSERT((args->index >= 0) 1997 xfs_attr3_leaf_hdr_size(leaf));
1762 && (args->index < be16_to_cpu(hdr->count))); 1998
1763 ASSERT(be16_to_cpu(hdr->firstused) >= 1999 entry = &xfs_attr3_leaf_entryp(leaf)[args->index];
1764 ((be16_to_cpu(hdr->count) * sizeof(*entry)) + sizeof(*hdr))); 2000
1765 entry = &leaf->entries[args->index]; 2001 ASSERT(be16_to_cpu(entry->nameidx) >= ichdr.firstused);
1766 ASSERT(be16_to_cpu(entry->nameidx) >= be16_to_cpu(hdr->firstused));
1767 ASSERT(be16_to_cpu(entry->nameidx) < XFS_LBSIZE(mp)); 2002 ASSERT(be16_to_cpu(entry->nameidx) < XFS_LBSIZE(mp));
1768 2003
1769 /* 2004 /*
@@ -1772,30 +2007,28 @@ xfs_attr_leaf_remove(
1772 * find smallest free region in case we need to replace it, 2007 * find smallest free region in case we need to replace it,
1773 * adjust any map that borders the entry table, 2008 * adjust any map that borders the entry table,
1774 */ 2009 */
1775 tablesize = be16_to_cpu(hdr->count) * sizeof(xfs_attr_leaf_entry_t) 2010 tablesize = ichdr.count * sizeof(xfs_attr_leaf_entry_t)
1776 + sizeof(xfs_attr_leaf_hdr_t); 2011 + xfs_attr3_leaf_hdr_size(leaf);
1777 map = &hdr->freemap[0]; 2012 tmp = ichdr.freemap[0].size;
1778 tmp = be16_to_cpu(map->size);
1779 before = after = -1; 2013 before = after = -1;
1780 smallest = XFS_ATTR_LEAF_MAPSIZE - 1; 2014 smallest = XFS_ATTR_LEAF_MAPSIZE - 1;
1781 entsize = xfs_attr_leaf_entsize(leaf, args->index); 2015 entsize = xfs_attr_leaf_entsize(leaf, args->index);
1782 for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; map++, i++) { 2016 for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
1783 ASSERT(be16_to_cpu(map->base) < XFS_LBSIZE(mp)); 2017 ASSERT(ichdr.freemap[i].base < XFS_LBSIZE(mp));
1784 ASSERT(be16_to_cpu(map->size) < XFS_LBSIZE(mp)); 2018 ASSERT(ichdr.freemap[i].size < XFS_LBSIZE(mp));
1785 if (be16_to_cpu(map->base) == tablesize) { 2019 if (ichdr.freemap[i].base == tablesize) {
1786 be16_add_cpu(&map->base, 2020 ichdr.freemap[i].base -= sizeof(xfs_attr_leaf_entry_t);
1787 -((int)sizeof(xfs_attr_leaf_entry_t))); 2021 ichdr.freemap[i].size += sizeof(xfs_attr_leaf_entry_t);
1788 be16_add_cpu(&map->size, sizeof(xfs_attr_leaf_entry_t));
1789 } 2022 }
1790 2023
1791 if ((be16_to_cpu(map->base) + be16_to_cpu(map->size)) 2024 if (ichdr.freemap[i].base + ichdr.freemap[i].size ==
1792 == be16_to_cpu(entry->nameidx)) { 2025 be16_to_cpu(entry->nameidx)) {
1793 before = i; 2026 before = i;
1794 } else if (be16_to_cpu(map->base) 2027 } else if (ichdr.freemap[i].base ==
1795 == (be16_to_cpu(entry->nameidx) + entsize)) { 2028 (be16_to_cpu(entry->nameidx) + entsize)) {
1796 after = i; 2029 after = i;
1797 } else if (be16_to_cpu(map->size) < tmp) { 2030 } else if (ichdr.freemap[i].size < tmp) {
1798 tmp = be16_to_cpu(map->size); 2031 tmp = ichdr.freemap[i].size;
1799 smallest = i; 2032 smallest = i;
1800 } 2033 }
1801 } 2034 }
@@ -1806,36 +2039,30 @@ xfs_attr_leaf_remove(
1806 */ 2039 */
1807 if ((before >= 0) || (after >= 0)) { 2040 if ((before >= 0) || (after >= 0)) {
1808 if ((before >= 0) && (after >= 0)) { 2041 if ((before >= 0) && (after >= 0)) {
1809 map = &hdr->freemap[before]; 2042 ichdr.freemap[before].size += entsize;
1810 be16_add_cpu(&map->size, entsize); 2043 ichdr.freemap[before].size += ichdr.freemap[after].size;
1811 be16_add_cpu(&map->size, 2044 ichdr.freemap[after].base = 0;
1812 be16_to_cpu(hdr->freemap[after].size)); 2045 ichdr.freemap[after].size = 0;
1813 hdr->freemap[after].base = 0;
1814 hdr->freemap[after].size = 0;
1815 } else if (before >= 0) { 2046 } else if (before >= 0) {
1816 map = &hdr->freemap[before]; 2047 ichdr.freemap[before].size += entsize;
1817 be16_add_cpu(&map->size, entsize);
1818 } else { 2048 } else {
1819 map = &hdr->freemap[after]; 2049 ichdr.freemap[after].base = be16_to_cpu(entry->nameidx);
1820 /* both on-disk, don't endian flip twice */ 2050 ichdr.freemap[after].size += entsize;
1821 map->base = entry->nameidx;
1822 be16_add_cpu(&map->size, entsize);
1823 } 2051 }
1824 } else { 2052 } else {
1825 /* 2053 /*
1826 * Replace smallest region (if it is smaller than free'd entry) 2054 * Replace smallest region (if it is smaller than free'd entry)
1827 */ 2055 */
1828 map = &hdr->freemap[smallest]; 2056 if (ichdr.freemap[smallest].size < entsize) {
1829 if (be16_to_cpu(map->size) < entsize) { 2057 ichdr.freemap[smallest].base = be16_to_cpu(entry->nameidx);
1830 map->base = cpu_to_be16(be16_to_cpu(entry->nameidx)); 2058 ichdr.freemap[smallest].size = entsize;
1831 map->size = cpu_to_be16(entsize);
1832 } 2059 }
1833 } 2060 }
1834 2061
1835 /* 2062 /*
1836 * Did we remove the first entry? 2063 * Did we remove the first entry?
1837 */ 2064 */
1838 if (be16_to_cpu(entry->nameidx) == be16_to_cpu(hdr->firstused)) 2065 if (be16_to_cpu(entry->nameidx) == ichdr.firstused)
1839 smallest = 1; 2066 smallest = 1;
1840 else 2067 else
1841 smallest = 0; 2068 smallest = 0;
@@ -1843,20 +2070,20 @@ xfs_attr_leaf_remove(
1843 /* 2070 /*
1844 * Compress the remaining entries and zero out the removed stuff. 2071 * Compress the remaining entries and zero out the removed stuff.
1845 */ 2072 */
1846 memset(xfs_attr_leaf_name(leaf, args->index), 0, entsize); 2073 memset(xfs_attr3_leaf_name(leaf, args->index), 0, entsize);
1847 be16_add_cpu(&hdr->usedbytes, -entsize); 2074 ichdr.usedbytes -= entsize;
1848 xfs_trans_log_buf(args->trans, bp, 2075 xfs_trans_log_buf(args->trans, bp,
1849 XFS_DA_LOGRANGE(leaf, xfs_attr_leaf_name(leaf, args->index), 2076 XFS_DA_LOGRANGE(leaf, xfs_attr3_leaf_name(leaf, args->index),
1850 entsize)); 2077 entsize));
1851 2078
1852 tmp = (be16_to_cpu(hdr->count) - args->index) 2079 tmp = (ichdr.count - args->index) * sizeof(xfs_attr_leaf_entry_t);
1853 * sizeof(xfs_attr_leaf_entry_t); 2080 memmove(entry, entry + 1, tmp);
1854 memmove((char *)entry, (char *)(entry+1), tmp); 2081 ichdr.count--;
1855 be16_add_cpu(&hdr->count, -1);
1856 xfs_trans_log_buf(args->trans, bp, 2082 xfs_trans_log_buf(args->trans, bp,
1857 XFS_DA_LOGRANGE(leaf, entry, tmp + sizeof(*entry))); 2083 XFS_DA_LOGRANGE(leaf, entry, tmp + sizeof(xfs_attr_leaf_entry_t)));
1858 entry = &leaf->entries[be16_to_cpu(hdr->count)]; 2084
1859 memset((char *)entry, 0, sizeof(xfs_attr_leaf_entry_t)); 2085 entry = &xfs_attr3_leaf_entryp(leaf)[ichdr.count];
2086 memset(entry, 0, sizeof(xfs_attr_leaf_entry_t));
1860 2087
1861 /* 2088 /*
1862 * If we removed the first entry, re-find the first used byte 2089 * If we removed the first entry, re-find the first used byte
@@ -1866,130 +2093,130 @@ xfs_attr_leaf_remove(
1866 */ 2093 */
1867 if (smallest) { 2094 if (smallest) {
1868 tmp = XFS_LBSIZE(mp); 2095 tmp = XFS_LBSIZE(mp);
1869 entry = &leaf->entries[0]; 2096 entry = xfs_attr3_leaf_entryp(leaf);
1870 for (i = be16_to_cpu(hdr->count)-1; i >= 0; entry++, i--) { 2097 for (i = ichdr.count - 1; i >= 0; entry++, i--) {
1871 ASSERT(be16_to_cpu(entry->nameidx) >= 2098 ASSERT(be16_to_cpu(entry->nameidx) >= ichdr.firstused);
1872 be16_to_cpu(hdr->firstused));
1873 ASSERT(be16_to_cpu(entry->nameidx) < XFS_LBSIZE(mp)); 2099 ASSERT(be16_to_cpu(entry->nameidx) < XFS_LBSIZE(mp));
1874 2100
1875 if (be16_to_cpu(entry->nameidx) < tmp) 2101 if (be16_to_cpu(entry->nameidx) < tmp)
1876 tmp = be16_to_cpu(entry->nameidx); 2102 tmp = be16_to_cpu(entry->nameidx);
1877 } 2103 }
1878 hdr->firstused = cpu_to_be16(tmp); 2104 ichdr.firstused = tmp;
1879 if (!hdr->firstused) { 2105 if (!ichdr.firstused)
1880 hdr->firstused = cpu_to_be16( 2106 ichdr.firstused = tmp - XFS_ATTR_LEAF_NAME_ALIGN;
1881 tmp - XFS_ATTR_LEAF_NAME_ALIGN);
1882 }
1883 } else { 2107 } else {
1884 hdr->holes = 1; /* mark as needing compaction */ 2108 ichdr.holes = 1; /* mark as needing compaction */
1885 } 2109 }
2110 xfs_attr3_leaf_hdr_to_disk(leaf, &ichdr);
1886 xfs_trans_log_buf(args->trans, bp, 2111 xfs_trans_log_buf(args->trans, bp,
1887 XFS_DA_LOGRANGE(leaf, hdr, sizeof(*hdr))); 2112 XFS_DA_LOGRANGE(leaf, &leaf->hdr,
2113 xfs_attr3_leaf_hdr_size(leaf)));
1888 2114
1889 /* 2115 /*
1890 * Check if leaf is less than 50% full, caller may want to 2116 * Check if leaf is less than 50% full, caller may want to
1891 * "join" the leaf with a sibling if so. 2117 * "join" the leaf with a sibling if so.
1892 */ 2118 */
1893 tmp = sizeof(xfs_attr_leaf_hdr_t); 2119 tmp = ichdr.usedbytes + xfs_attr3_leaf_hdr_size(leaf) +
1894 tmp += be16_to_cpu(leaf->hdr.count) * sizeof(xfs_attr_leaf_entry_t); 2120 ichdr.count * sizeof(xfs_attr_leaf_entry_t);
1895 tmp += be16_to_cpu(leaf->hdr.usedbytes); 2121
1896 return(tmp < mp->m_attr_magicpct); /* leaf is < 37% full */ 2122 return tmp < mp->m_attr_magicpct; /* leaf is < 37% full */
1897} 2123}
1898 2124
1899/* 2125/*
1900 * Move all the attribute list entries from drop_leaf into save_leaf. 2126 * Move all the attribute list entries from drop_leaf into save_leaf.
1901 */ 2127 */
1902void 2128void
1903xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, 2129xfs_attr3_leaf_unbalance(
1904 xfs_da_state_blk_t *save_blk) 2130 struct xfs_da_state *state,
2131 struct xfs_da_state_blk *drop_blk,
2132 struct xfs_da_state_blk *save_blk)
1905{ 2133{
1906 xfs_attr_leafblock_t *drop_leaf, *save_leaf, *tmp_leaf; 2134 struct xfs_attr_leafblock *drop_leaf = drop_blk->bp->b_addr;
1907 xfs_attr_leaf_hdr_t *drop_hdr, *save_hdr, *tmp_hdr; 2135 struct xfs_attr_leafblock *save_leaf = save_blk->bp->b_addr;
1908 xfs_mount_t *mp; 2136 struct xfs_attr3_icleaf_hdr drophdr;
1909 char *tmpbuffer; 2137 struct xfs_attr3_icleaf_hdr savehdr;
2138 struct xfs_attr_leaf_entry *entry;
2139 struct xfs_mount *mp = state->mp;
1910 2140
1911 trace_xfs_attr_leaf_unbalance(state->args); 2141 trace_xfs_attr_leaf_unbalance(state->args);
1912 2142
1913 /*
1914 * Set up environment.
1915 */
1916 mp = state->mp;
1917 ASSERT(drop_blk->magic == XFS_ATTR_LEAF_MAGIC);
1918 ASSERT(save_blk->magic == XFS_ATTR_LEAF_MAGIC);
1919 drop_leaf = drop_blk->bp->b_addr; 2143 drop_leaf = drop_blk->bp->b_addr;
1920 save_leaf = save_blk->bp->b_addr; 2144 save_leaf = save_blk->bp->b_addr;
1921 ASSERT(drop_leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); 2145 xfs_attr3_leaf_hdr_from_disk(&drophdr, drop_leaf);
1922 ASSERT(save_leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); 2146 xfs_attr3_leaf_hdr_from_disk(&savehdr, save_leaf);
1923 drop_hdr = &drop_leaf->hdr; 2147 entry = xfs_attr3_leaf_entryp(drop_leaf);
1924 save_hdr = &save_leaf->hdr;
1925 2148
1926 /* 2149 /*
1927 * Save last hashval from dying block for later Btree fixup. 2150 * Save last hashval from dying block for later Btree fixup.
1928 */ 2151 */
1929 drop_blk->hashval = be32_to_cpu( 2152 drop_blk->hashval = be32_to_cpu(entry[drophdr.count - 1].hashval);
1930 drop_leaf->entries[be16_to_cpu(drop_leaf->hdr.count)-1].hashval);
1931 2153
1932 /* 2154 /*
1933 * Check if we need a temp buffer, or can we do it in place. 2155 * Check if we need a temp buffer, or can we do it in place.
1934 * Note that we don't check "leaf" for holes because we will 2156 * Note that we don't check "leaf" for holes because we will
1935 * always be dropping it, toosmall() decided that for us already. 2157 * always be dropping it, toosmall() decided that for us already.
1936 */ 2158 */
1937 if (save_hdr->holes == 0) { 2159 if (savehdr.holes == 0) {
1938 /* 2160 /*
1939 * dest leaf has no holes, so we add there. May need 2161 * dest leaf has no holes, so we add there. May need
1940 * to make some room in the entry array. 2162 * to make some room in the entry array.
1941 */ 2163 */
1942 if (xfs_attr_leaf_order(save_blk->bp, drop_blk->bp)) { 2164 if (xfs_attr3_leaf_order(save_blk->bp, &savehdr,
1943 xfs_attr_leaf_moveents(drop_leaf, 0, save_leaf, 0, 2165 drop_blk->bp, &drophdr)) {
1944 be16_to_cpu(drop_hdr->count), mp); 2166 xfs_attr3_leaf_moveents(drop_leaf, &drophdr, 0,
2167 save_leaf, &savehdr, 0,
2168 drophdr.count, mp);
1945 } else { 2169 } else {
1946 xfs_attr_leaf_moveents(drop_leaf, 0, save_leaf, 2170 xfs_attr3_leaf_moveents(drop_leaf, &drophdr, 0,
1947 be16_to_cpu(save_hdr->count), 2171 save_leaf, &savehdr,
1948 be16_to_cpu(drop_hdr->count), mp); 2172 savehdr.count, drophdr.count, mp);
1949 } 2173 }
1950 } else { 2174 } else {
1951 /* 2175 /*
1952 * Destination has holes, so we make a temporary copy 2176 * Destination has holes, so we make a temporary copy
1953 * of the leaf and add them both to that. 2177 * of the leaf and add them both to that.
1954 */ 2178 */
1955 tmpbuffer = kmem_alloc(state->blocksize, KM_SLEEP); 2179 struct xfs_attr_leafblock *tmp_leaf;
1956 ASSERT(tmpbuffer != NULL); 2180 struct xfs_attr3_icleaf_hdr tmphdr;
1957 memset(tmpbuffer, 0, state->blocksize); 2181
1958 tmp_leaf = (xfs_attr_leafblock_t *)tmpbuffer; 2182 tmp_leaf = kmem_alloc(state->blocksize, KM_SLEEP);
1959 tmp_hdr = &tmp_leaf->hdr; 2183 memset(tmp_leaf, 0, state->blocksize);
1960 tmp_hdr->info = save_hdr->info; /* struct copy */ 2184 memset(&tmphdr, 0, sizeof(tmphdr));
1961 tmp_hdr->count = 0; 2185
1962 tmp_hdr->firstused = cpu_to_be16(state->blocksize); 2186 tmphdr.magic = savehdr.magic;
1963 if (!tmp_hdr->firstused) { 2187 tmphdr.forw = savehdr.forw;
1964 tmp_hdr->firstused = cpu_to_be16( 2188 tmphdr.back = savehdr.back;
1965 state->blocksize - XFS_ATTR_LEAF_NAME_ALIGN); 2189 tmphdr.firstused = state->blocksize;
1966 } 2190 if (xfs_attr3_leaf_order(save_blk->bp, &savehdr,
1967 tmp_hdr->usedbytes = 0; 2191 drop_blk->bp, &drophdr)) {
1968 if (xfs_attr_leaf_order(save_blk->bp, drop_blk->bp)) { 2192 xfs_attr3_leaf_moveents(drop_leaf, &drophdr, 0,
1969 xfs_attr_leaf_moveents(drop_leaf, 0, tmp_leaf, 0, 2193 tmp_leaf, &tmphdr, 0,
1970 be16_to_cpu(drop_hdr->count), mp); 2194 drophdr.count, mp);
1971 xfs_attr_leaf_moveents(save_leaf, 0, tmp_leaf, 2195 xfs_attr3_leaf_moveents(save_leaf, &savehdr, 0,
1972 be16_to_cpu(tmp_leaf->hdr.count), 2196 tmp_leaf, &tmphdr, tmphdr.count,
1973 be16_to_cpu(save_hdr->count), mp); 2197 savehdr.count, mp);
1974 } else { 2198 } else {
1975 xfs_attr_leaf_moveents(save_leaf, 0, tmp_leaf, 0, 2199 xfs_attr3_leaf_moveents(save_leaf, &savehdr, 0,
1976 be16_to_cpu(save_hdr->count), mp); 2200 tmp_leaf, &tmphdr, 0,
1977 xfs_attr_leaf_moveents(drop_leaf, 0, tmp_leaf, 2201 savehdr.count, mp);
1978 be16_to_cpu(tmp_leaf->hdr.count), 2202 xfs_attr3_leaf_moveents(drop_leaf, &drophdr, 0,
1979 be16_to_cpu(drop_hdr->count), mp); 2203 tmp_leaf, &tmphdr, tmphdr.count,
2204 drophdr.count, mp);
1980 } 2205 }
1981 memcpy((char *)save_leaf, (char *)tmp_leaf, state->blocksize); 2206 memcpy(save_leaf, tmp_leaf, state->blocksize);
1982 kmem_free(tmpbuffer); 2207 savehdr = tmphdr; /* struct copy */
2208 kmem_free(tmp_leaf);
1983 } 2209 }
1984 2210
2211 xfs_attr3_leaf_hdr_to_disk(save_leaf, &savehdr);
1985 xfs_trans_log_buf(state->args->trans, save_blk->bp, 0, 2212 xfs_trans_log_buf(state->args->trans, save_blk->bp, 0,
1986 state->blocksize - 1); 2213 state->blocksize - 1);
1987 2214
1988 /* 2215 /*
1989 * Copy out last hashval in each block for B-tree code. 2216 * Copy out last hashval in each block for B-tree code.
1990 */ 2217 */
1991 save_blk->hashval = be32_to_cpu( 2218 entry = xfs_attr3_leaf_entryp(save_leaf);
1992 save_leaf->entries[be16_to_cpu(save_leaf->hdr.count)-1].hashval); 2219 save_blk->hashval = be32_to_cpu(entry[savehdr.count - 1].hashval);
1993} 2220}
1994 2221
1995/*======================================================================== 2222/*========================================================================
@@ -2010,31 +2237,33 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
2010 * Don't change the args->value unless we find the attribute. 2237 * Don't change the args->value unless we find the attribute.
2011 */ 2238 */
2012int 2239int
2013xfs_attr_leaf_lookup_int( 2240xfs_attr3_leaf_lookup_int(
2014 struct xfs_buf *bp, 2241 struct xfs_buf *bp,
2015 xfs_da_args_t *args) 2242 struct xfs_da_args *args)
2016{ 2243{
2017 xfs_attr_leafblock_t *leaf; 2244 struct xfs_attr_leafblock *leaf;
2018 xfs_attr_leaf_entry_t *entry; 2245 struct xfs_attr3_icleaf_hdr ichdr;
2019 xfs_attr_leaf_name_local_t *name_loc; 2246 struct xfs_attr_leaf_entry *entry;
2020 xfs_attr_leaf_name_remote_t *name_rmt; 2247 struct xfs_attr_leaf_entry *entries;
2021 int probe, span; 2248 struct xfs_attr_leaf_name_local *name_loc;
2022 xfs_dahash_t hashval; 2249 struct xfs_attr_leaf_name_remote *name_rmt;
2250 xfs_dahash_t hashval;
2251 int probe;
2252 int span;
2023 2253
2024 trace_xfs_attr_leaf_lookup(args); 2254 trace_xfs_attr_leaf_lookup(args);
2025 2255
2026 leaf = bp->b_addr; 2256 leaf = bp->b_addr;
2027 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); 2257 xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
2028 ASSERT(be16_to_cpu(leaf->hdr.count) 2258 entries = xfs_attr3_leaf_entryp(leaf);
2029 < (XFS_LBSIZE(args->dp->i_mount)/8)); 2259 ASSERT(ichdr.count < XFS_LBSIZE(args->dp->i_mount) / 8);
2030 2260
2031 /* 2261 /*
2032 * Binary search. (note: small blocks will skip this loop) 2262 * Binary search. (note: small blocks will skip this loop)
2033 */ 2263 */
2034 hashval = args->hashval; 2264 hashval = args->hashval;
2035 probe = span = be16_to_cpu(leaf->hdr.count) / 2; 2265 probe = span = ichdr.count / 2;
2036 for (entry = &leaf->entries[probe]; span > 4; 2266 for (entry = &entries[probe]; span > 4; entry = &entries[probe]) {
2037 entry = &leaf->entries[probe]) {
2038 span /= 2; 2267 span /= 2;
2039 if (be32_to_cpu(entry->hashval) < hashval) 2268 if (be32_to_cpu(entry->hashval) < hashval)
2040 probe += span; 2269 probe += span;
@@ -2043,35 +2272,31 @@ xfs_attr_leaf_lookup_int(
2043 else 2272 else
2044 break; 2273 break;
2045 } 2274 }
2046 ASSERT((probe >= 0) && 2275 ASSERT(probe >= 0 && (!ichdr.count || probe < ichdr.count));
2047 (!leaf->hdr.count 2276 ASSERT(span <= 4 || be32_to_cpu(entry->hashval) == hashval);
2048 || (probe < be16_to_cpu(leaf->hdr.count))));
2049 ASSERT((span <= 4) || (be32_to_cpu(entry->hashval) == hashval));
2050 2277
2051 /* 2278 /*
2052 * Since we may have duplicate hashval's, find the first matching 2279 * Since we may have duplicate hashval's, find the first matching
2053 * hashval in the leaf. 2280 * hashval in the leaf.
2054 */ 2281 */
2055 while ((probe > 0) && (be32_to_cpu(entry->hashval) >= hashval)) { 2282 while (probe > 0 && be32_to_cpu(entry->hashval) >= hashval) {
2056 entry--; 2283 entry--;
2057 probe--; 2284 probe--;
2058 } 2285 }
2059 while ((probe < be16_to_cpu(leaf->hdr.count)) && 2286 while (probe < ichdr.count &&
2060 (be32_to_cpu(entry->hashval) < hashval)) { 2287 be32_to_cpu(entry->hashval) < hashval) {
2061 entry++; 2288 entry++;
2062 probe++; 2289 probe++;
2063 } 2290 }
2064 if ((probe == be16_to_cpu(leaf->hdr.count)) || 2291 if (probe == ichdr.count || be32_to_cpu(entry->hashval) != hashval) {
2065 (be32_to_cpu(entry->hashval) != hashval)) {
2066 args->index = probe; 2292 args->index = probe;
2067 return(XFS_ERROR(ENOATTR)); 2293 return XFS_ERROR(ENOATTR);
2068 } 2294 }
2069 2295
2070 /* 2296 /*
2071 * Duplicate keys may be present, so search all of them for a match. 2297 * Duplicate keys may be present, so search all of them for a match.
2072 */ 2298 */
2073 for ( ; (probe < be16_to_cpu(leaf->hdr.count)) && 2299 for (; probe < ichdr.count && (be32_to_cpu(entry->hashval) == hashval);
2074 (be32_to_cpu(entry->hashval) == hashval);
2075 entry++, probe++) { 2300 entry++, probe++) {
2076/* 2301/*
2077 * GROT: Add code to remove incomplete entries. 2302 * GROT: Add code to remove incomplete entries.
@@ -2085,21 +2310,22 @@ xfs_attr_leaf_lookup_int(
2085 continue; 2310 continue;
2086 } 2311 }
2087 if (entry->flags & XFS_ATTR_LOCAL) { 2312 if (entry->flags & XFS_ATTR_LOCAL) {
2088 name_loc = xfs_attr_leaf_name_local(leaf, probe); 2313 name_loc = xfs_attr3_leaf_name_local(leaf, probe);
2089 if (name_loc->namelen != args->namelen) 2314 if (name_loc->namelen != args->namelen)
2090 continue; 2315 continue;
2091 if (memcmp(args->name, (char *)name_loc->nameval, args->namelen) != 0) 2316 if (memcmp(args->name, name_loc->nameval,
2317 args->namelen) != 0)
2092 continue; 2318 continue;
2093 if (!xfs_attr_namesp_match(args->flags, entry->flags)) 2319 if (!xfs_attr_namesp_match(args->flags, entry->flags))
2094 continue; 2320 continue;
2095 args->index = probe; 2321 args->index = probe;
2096 return(XFS_ERROR(EEXIST)); 2322 return XFS_ERROR(EEXIST);
2097 } else { 2323 } else {
2098 name_rmt = xfs_attr_leaf_name_remote(leaf, probe); 2324 name_rmt = xfs_attr3_leaf_name_remote(leaf, probe);
2099 if (name_rmt->namelen != args->namelen) 2325 if (name_rmt->namelen != args->namelen)
2100 continue; 2326 continue;
2101 if (memcmp(args->name, (char *)name_rmt->name, 2327 if (memcmp(args->name, name_rmt->name,
2102 args->namelen) != 0) 2328 args->namelen) != 0)
2103 continue; 2329 continue;
2104 if (!xfs_attr_namesp_match(args->flags, entry->flags)) 2330 if (!xfs_attr_namesp_match(args->flags, entry->flags))
2105 continue; 2331 continue;
@@ -2107,11 +2333,11 @@ xfs_attr_leaf_lookup_int(
2107 args->rmtblkno = be32_to_cpu(name_rmt->valueblk); 2333 args->rmtblkno = be32_to_cpu(name_rmt->valueblk);
2108 args->rmtblkcnt = XFS_B_TO_FSB(args->dp->i_mount, 2334 args->rmtblkcnt = XFS_B_TO_FSB(args->dp->i_mount,
2109 be32_to_cpu(name_rmt->valuelen)); 2335 be32_to_cpu(name_rmt->valuelen));
2110 return(XFS_ERROR(EEXIST)); 2336 return XFS_ERROR(EEXIST);
2111 } 2337 }
2112 } 2338 }
2113 args->index = probe; 2339 args->index = probe;
2114 return(XFS_ERROR(ENOATTR)); 2340 return XFS_ERROR(ENOATTR);
2115} 2341}
2116 2342
2117/* 2343/*
@@ -2119,40 +2345,40 @@ xfs_attr_leaf_lookup_int(
2119 * list structure. 2345 * list structure.
2120 */ 2346 */
2121int 2347int
2122xfs_attr_leaf_getvalue( 2348xfs_attr3_leaf_getvalue(
2123 struct xfs_buf *bp, 2349 struct xfs_buf *bp,
2124 xfs_da_args_t *args) 2350 struct xfs_da_args *args)
2125{ 2351{
2126 int valuelen; 2352 struct xfs_attr_leafblock *leaf;
2127 xfs_attr_leafblock_t *leaf; 2353 struct xfs_attr3_icleaf_hdr ichdr;
2128 xfs_attr_leaf_entry_t *entry; 2354 struct xfs_attr_leaf_entry *entry;
2129 xfs_attr_leaf_name_local_t *name_loc; 2355 struct xfs_attr_leaf_name_local *name_loc;
2130 xfs_attr_leaf_name_remote_t *name_rmt; 2356 struct xfs_attr_leaf_name_remote *name_rmt;
2357 int valuelen;
2131 2358
2132 leaf = bp->b_addr; 2359 leaf = bp->b_addr;
2133 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); 2360 xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
2134 ASSERT(be16_to_cpu(leaf->hdr.count) 2361 ASSERT(ichdr.count < XFS_LBSIZE(args->dp->i_mount) / 8);
2135 < (XFS_LBSIZE(args->dp->i_mount)/8)); 2362 ASSERT(args->index < ichdr.count);
2136 ASSERT(args->index < be16_to_cpu(leaf->hdr.count));
2137 2363
2138 entry = &leaf->entries[args->index]; 2364 entry = &xfs_attr3_leaf_entryp(leaf)[args->index];
2139 if (entry->flags & XFS_ATTR_LOCAL) { 2365 if (entry->flags & XFS_ATTR_LOCAL) {
2140 name_loc = xfs_attr_leaf_name_local(leaf, args->index); 2366 name_loc = xfs_attr3_leaf_name_local(leaf, args->index);
2141 ASSERT(name_loc->namelen == args->namelen); 2367 ASSERT(name_loc->namelen == args->namelen);
2142 ASSERT(memcmp(args->name, name_loc->nameval, args->namelen) == 0); 2368 ASSERT(memcmp(args->name, name_loc->nameval, args->namelen) == 0);
2143 valuelen = be16_to_cpu(name_loc->valuelen); 2369 valuelen = be16_to_cpu(name_loc->valuelen);
2144 if (args->flags & ATTR_KERNOVAL) { 2370 if (args->flags & ATTR_KERNOVAL) {
2145 args->valuelen = valuelen; 2371 args->valuelen = valuelen;
2146 return(0); 2372 return 0;
2147 } 2373 }
2148 if (args->valuelen < valuelen) { 2374 if (args->valuelen < valuelen) {
2149 args->valuelen = valuelen; 2375 args->valuelen = valuelen;
2150 return(XFS_ERROR(ERANGE)); 2376 return XFS_ERROR(ERANGE);
2151 } 2377 }
2152 args->valuelen = valuelen; 2378 args->valuelen = valuelen;
2153 memcpy(args->value, &name_loc->nameval[args->namelen], valuelen); 2379 memcpy(args->value, &name_loc->nameval[args->namelen], valuelen);
2154 } else { 2380 } else {
2155 name_rmt = xfs_attr_leaf_name_remote(leaf, args->index); 2381 name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index);
2156 ASSERT(name_rmt->namelen == args->namelen); 2382 ASSERT(name_rmt->namelen == args->namelen);
2157 ASSERT(memcmp(args->name, name_rmt->name, args->namelen) == 0); 2383 ASSERT(memcmp(args->name, name_rmt->name, args->namelen) == 0);
2158 valuelen = be32_to_cpu(name_rmt->valuelen); 2384 valuelen = be32_to_cpu(name_rmt->valuelen);
@@ -2160,15 +2386,15 @@ xfs_attr_leaf_getvalue(
2160 args->rmtblkcnt = XFS_B_TO_FSB(args->dp->i_mount, valuelen); 2386 args->rmtblkcnt = XFS_B_TO_FSB(args->dp->i_mount, valuelen);
2161 if (args->flags & ATTR_KERNOVAL) { 2387 if (args->flags & ATTR_KERNOVAL) {
2162 args->valuelen = valuelen; 2388 args->valuelen = valuelen;
2163 return(0); 2389 return 0;
2164 } 2390 }
2165 if (args->valuelen < valuelen) { 2391 if (args->valuelen < valuelen) {
2166 args->valuelen = valuelen; 2392 args->valuelen = valuelen;
2167 return(XFS_ERROR(ERANGE)); 2393 return XFS_ERROR(ERANGE);
2168 } 2394 }
2169 args->valuelen = valuelen; 2395 args->valuelen = valuelen;
2170 } 2396 }
2171 return(0); 2397 return 0;
2172} 2398}
2173 2399
2174/*======================================================================== 2400/*========================================================================
@@ -2181,13 +2407,21 @@ xfs_attr_leaf_getvalue(
2181 */ 2407 */
2182/*ARGSUSED*/ 2408/*ARGSUSED*/
2183STATIC void 2409STATIC void
2184xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s, 2410xfs_attr3_leaf_moveents(
2185 xfs_attr_leafblock_t *leaf_d, int start_d, 2411 struct xfs_attr_leafblock *leaf_s,
2186 int count, xfs_mount_t *mp) 2412 struct xfs_attr3_icleaf_hdr *ichdr_s,
2413 int start_s,
2414 struct xfs_attr_leafblock *leaf_d,
2415 struct xfs_attr3_icleaf_hdr *ichdr_d,
2416 int start_d,
2417 int count,
2418 struct xfs_mount *mp)
2187{ 2419{
2188 xfs_attr_leaf_hdr_t *hdr_s, *hdr_d; 2420 struct xfs_attr_leaf_entry *entry_s;
2189 xfs_attr_leaf_entry_t *entry_s, *entry_d; 2421 struct xfs_attr_leaf_entry *entry_d;
2190 int desti, tmp, i; 2422 int desti;
2423 int tmp;
2424 int i;
2191 2425
2192 /* 2426 /*
2193 * Check for nothing to do. 2427 * Check for nothing to do.
@@ -2198,45 +2432,41 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s,
2198 /* 2432 /*
2199 * Set up environment. 2433 * Set up environment.
2200 */ 2434 */
2201 ASSERT(leaf_s->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); 2435 ASSERT(ichdr_s->magic == XFS_ATTR_LEAF_MAGIC ||
2202 ASSERT(leaf_d->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); 2436 ichdr_s->magic == XFS_ATTR3_LEAF_MAGIC);
2203 hdr_s = &leaf_s->hdr; 2437 ASSERT(ichdr_s->magic == ichdr_d->magic);
2204 hdr_d = &leaf_d->hdr; 2438 ASSERT(ichdr_s->count > 0 && ichdr_s->count < XFS_LBSIZE(mp) / 8);
2205 ASSERT((be16_to_cpu(hdr_s->count) > 0) && 2439 ASSERT(ichdr_s->firstused >= (ichdr_s->count * sizeof(*entry_s))
2206 (be16_to_cpu(hdr_s->count) < (XFS_LBSIZE(mp)/8))); 2440 + xfs_attr3_leaf_hdr_size(leaf_s));
2207 ASSERT(be16_to_cpu(hdr_s->firstused) >= 2441 ASSERT(ichdr_d->count < XFS_LBSIZE(mp) / 8);
2208 ((be16_to_cpu(hdr_s->count) 2442 ASSERT(ichdr_d->firstused >= (ichdr_d->count * sizeof(*entry_d))
2209 * sizeof(*entry_s))+sizeof(*hdr_s))); 2443 + xfs_attr3_leaf_hdr_size(leaf_d));
2210 ASSERT(be16_to_cpu(hdr_d->count) < (XFS_LBSIZE(mp)/8)); 2444
2211 ASSERT(be16_to_cpu(hdr_d->firstused) >= 2445 ASSERT(start_s < ichdr_s->count);
2212 ((be16_to_cpu(hdr_d->count) 2446 ASSERT(start_d <= ichdr_d->count);
2213 * sizeof(*entry_d))+sizeof(*hdr_d))); 2447 ASSERT(count <= ichdr_s->count);
2214 2448
2215 ASSERT(start_s < be16_to_cpu(hdr_s->count));
2216 ASSERT(start_d <= be16_to_cpu(hdr_d->count));
2217 ASSERT(count <= be16_to_cpu(hdr_s->count));
2218 2449
2219 /* 2450 /*
2220 * Move the entries in the destination leaf up to make a hole? 2451 * Move the entries in the destination leaf up to make a hole?
2221 */ 2452 */
2222 if (start_d < be16_to_cpu(hdr_d->count)) { 2453 if (start_d < ichdr_d->count) {
2223 tmp = be16_to_cpu(hdr_d->count) - start_d; 2454 tmp = ichdr_d->count - start_d;
2224 tmp *= sizeof(xfs_attr_leaf_entry_t); 2455 tmp *= sizeof(xfs_attr_leaf_entry_t);
2225 entry_s = &leaf_d->entries[start_d]; 2456 entry_s = &xfs_attr3_leaf_entryp(leaf_d)[start_d];
2226 entry_d = &leaf_d->entries[start_d + count]; 2457 entry_d = &xfs_attr3_leaf_entryp(leaf_d)[start_d + count];
2227 memmove((char *)entry_d, (char *)entry_s, tmp); 2458 memmove(entry_d, entry_s, tmp);
2228 } 2459 }
2229 2460
2230 /* 2461 /*
2231 * Copy all entry's in the same (sorted) order, 2462 * Copy all entry's in the same (sorted) order,
2232 * but allocate attribute info packed and in sequence. 2463 * but allocate attribute info packed and in sequence.
2233 */ 2464 */
2234 entry_s = &leaf_s->entries[start_s]; 2465 entry_s = &xfs_attr3_leaf_entryp(leaf_s)[start_s];
2235 entry_d = &leaf_d->entries[start_d]; 2466 entry_d = &xfs_attr3_leaf_entryp(leaf_d)[start_d];
2236 desti = start_d; 2467 desti = start_d;
2237 for (i = 0; i < count; entry_s++, entry_d++, desti++, i++) { 2468 for (i = 0; i < count; entry_s++, entry_d++, desti++, i++) {
2238 ASSERT(be16_to_cpu(entry_s->nameidx) 2469 ASSERT(be16_to_cpu(entry_s->nameidx) >= ichdr_s->firstused);
2239 >= be16_to_cpu(hdr_s->firstused));
2240 tmp = xfs_attr_leaf_entsize(leaf_s, start_s + i); 2470 tmp = xfs_attr_leaf_entsize(leaf_s, start_s + i);
2241#ifdef GROT 2471#ifdef GROT
2242 /* 2472 /*
@@ -2245,36 +2475,34 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s,
2245 * off for 6.2, should be revisited later. 2475 * off for 6.2, should be revisited later.
2246 */ 2476 */
2247 if (entry_s->flags & XFS_ATTR_INCOMPLETE) { /* skip partials? */ 2477 if (entry_s->flags & XFS_ATTR_INCOMPLETE) { /* skip partials? */
2248 memset(xfs_attr_leaf_name(leaf_s, start_s + i), 0, tmp); 2478 memset(xfs_attr3_leaf_name(leaf_s, start_s + i), 0, tmp);
2249 be16_add_cpu(&hdr_s->usedbytes, -tmp); 2479 ichdr_s->usedbytes -= tmp;
2250 be16_add_cpu(&hdr_s->count, -1); 2480 ichdr_s->count -= 1;
2251 entry_d--; /* to compensate for ++ in loop hdr */ 2481 entry_d--; /* to compensate for ++ in loop hdr */
2252 desti--; 2482 desti--;
2253 if ((start_s + i) < offset) 2483 if ((start_s + i) < offset)
2254 result++; /* insertion index adjustment */ 2484 result++; /* insertion index adjustment */
2255 } else { 2485 } else {
2256#endif /* GROT */ 2486#endif /* GROT */
2257 be16_add_cpu(&hdr_d->firstused, -tmp); 2487 ichdr_d->firstused -= tmp;
2258 /* both on-disk, don't endian flip twice */ 2488 /* both on-disk, don't endian flip twice */
2259 entry_d->hashval = entry_s->hashval; 2489 entry_d->hashval = entry_s->hashval;
2260 /* both on-disk, don't endian flip twice */ 2490 entry_d->nameidx = cpu_to_be16(ichdr_d->firstused);
2261 entry_d->nameidx = hdr_d->firstused;
2262 entry_d->flags = entry_s->flags; 2491 entry_d->flags = entry_s->flags;
2263 ASSERT(be16_to_cpu(entry_d->nameidx) + tmp 2492 ASSERT(be16_to_cpu(entry_d->nameidx) + tmp
2264 <= XFS_LBSIZE(mp)); 2493 <= XFS_LBSIZE(mp));
2265 memmove(xfs_attr_leaf_name(leaf_d, desti), 2494 memmove(xfs_attr3_leaf_name(leaf_d, desti),
2266 xfs_attr_leaf_name(leaf_s, start_s + i), tmp); 2495 xfs_attr3_leaf_name(leaf_s, start_s + i), tmp);
2267 ASSERT(be16_to_cpu(entry_s->nameidx) + tmp 2496 ASSERT(be16_to_cpu(entry_s->nameidx) + tmp
2268 <= XFS_LBSIZE(mp)); 2497 <= XFS_LBSIZE(mp));
2269 memset(xfs_attr_leaf_name(leaf_s, start_s + i), 0, tmp); 2498 memset(xfs_attr3_leaf_name(leaf_s, start_s + i), 0, tmp);
2270 be16_add_cpu(&hdr_s->usedbytes, -tmp); 2499 ichdr_s->usedbytes -= tmp;
2271 be16_add_cpu(&hdr_d->usedbytes, tmp); 2500 ichdr_d->usedbytes += tmp;
2272 be16_add_cpu(&hdr_s->count, -1); 2501 ichdr_s->count -= 1;
2273 be16_add_cpu(&hdr_d->count, 1); 2502 ichdr_d->count += 1;
2274 tmp = be16_to_cpu(hdr_d->count) 2503 tmp = ichdr_d->count * sizeof(xfs_attr_leaf_entry_t)
2275 * sizeof(xfs_attr_leaf_entry_t) 2504 + xfs_attr3_leaf_hdr_size(leaf_d);
2276 + sizeof(xfs_attr_leaf_hdr_t); 2505 ASSERT(ichdr_d->firstused >= tmp);
2277 ASSERT(be16_to_cpu(hdr_d->firstused) >= tmp);
2278#ifdef GROT 2506#ifdef GROT
2279 } 2507 }
2280#endif /* GROT */ 2508#endif /* GROT */
@@ -2283,71 +2511,40 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s,
2283 /* 2511 /*
2284 * Zero out the entries we just copied. 2512 * Zero out the entries we just copied.
2285 */ 2513 */
2286 if (start_s == be16_to_cpu(hdr_s->count)) { 2514 if (start_s == ichdr_s->count) {
2287 tmp = count * sizeof(xfs_attr_leaf_entry_t); 2515 tmp = count * sizeof(xfs_attr_leaf_entry_t);
2288 entry_s = &leaf_s->entries[start_s]; 2516 entry_s = &xfs_attr3_leaf_entryp(leaf_s)[start_s];
2289 ASSERT(((char *)entry_s + tmp) <= 2517 ASSERT(((char *)entry_s + tmp) <=
2290 ((char *)leaf_s + XFS_LBSIZE(mp))); 2518 ((char *)leaf_s + XFS_LBSIZE(mp)));
2291 memset((char *)entry_s, 0, tmp); 2519 memset(entry_s, 0, tmp);
2292 } else { 2520 } else {
2293 /* 2521 /*
2294 * Move the remaining entries down to fill the hole, 2522 * Move the remaining entries down to fill the hole,
2295 * then zero the entries at the top. 2523 * then zero the entries at the top.
2296 */ 2524 */
2297 tmp = be16_to_cpu(hdr_s->count) - count; 2525 tmp = (ichdr_s->count - count) * sizeof(xfs_attr_leaf_entry_t);
2298 tmp *= sizeof(xfs_attr_leaf_entry_t); 2526 entry_s = &xfs_attr3_leaf_entryp(leaf_s)[start_s + count];
2299 entry_s = &leaf_s->entries[start_s + count]; 2527 entry_d = &xfs_attr3_leaf_entryp(leaf_s)[start_s];
2300 entry_d = &leaf_s->entries[start_s]; 2528 memmove(entry_d, entry_s, tmp);
2301 memmove((char *)entry_d, (char *)entry_s, tmp);
2302 2529
2303 tmp = count * sizeof(xfs_attr_leaf_entry_t); 2530 tmp = count * sizeof(xfs_attr_leaf_entry_t);
2304 entry_s = &leaf_s->entries[be16_to_cpu(hdr_s->count)]; 2531 entry_s = &xfs_attr3_leaf_entryp(leaf_s)[ichdr_s->count];
2305 ASSERT(((char *)entry_s + tmp) <= 2532 ASSERT(((char *)entry_s + tmp) <=
2306 ((char *)leaf_s + XFS_LBSIZE(mp))); 2533 ((char *)leaf_s + XFS_LBSIZE(mp)));
2307 memset((char *)entry_s, 0, tmp); 2534 memset(entry_s, 0, tmp);
2308 } 2535 }
2309 2536
2310 /* 2537 /*
2311 * Fill in the freemap information 2538 * Fill in the freemap information
2312 */ 2539 */
2313 hdr_d->freemap[0].base = cpu_to_be16(sizeof(xfs_attr_leaf_hdr_t)); 2540 ichdr_d->freemap[0].base = xfs_attr3_leaf_hdr_size(leaf_d);
2314 be16_add_cpu(&hdr_d->freemap[0].base, be16_to_cpu(hdr_d->count) * 2541 ichdr_d->freemap[0].base += ichdr_d->count * sizeof(xfs_attr_leaf_entry_t);
2315 sizeof(xfs_attr_leaf_entry_t)); 2542 ichdr_d->freemap[0].size = ichdr_d->firstused - ichdr_d->freemap[0].base;
2316 hdr_d->freemap[0].size = cpu_to_be16(be16_to_cpu(hdr_d->firstused) 2543 ichdr_d->freemap[1].base = 0;
2317 - be16_to_cpu(hdr_d->freemap[0].base)); 2544 ichdr_d->freemap[2].base = 0;
2318 hdr_d->freemap[1].base = 0; 2545 ichdr_d->freemap[1].size = 0;
2319 hdr_d->freemap[2].base = 0; 2546 ichdr_d->freemap[2].size = 0;
2320 hdr_d->freemap[1].size = 0; 2547 ichdr_s->holes = 1; /* leaf may not be compact */
2321 hdr_d->freemap[2].size = 0;
2322 hdr_s->holes = 1; /* leaf may not be compact */
2323}
2324
2325/*
2326 * Compare two leaf blocks "order".
2327 * Return 0 unless leaf2 should go before leaf1.
2328 */
2329int
2330xfs_attr_leaf_order(
2331 struct xfs_buf *leaf1_bp,
2332 struct xfs_buf *leaf2_bp)
2333{
2334 xfs_attr_leafblock_t *leaf1, *leaf2;
2335
2336 leaf1 = leaf1_bp->b_addr;
2337 leaf2 = leaf2_bp->b_addr;
2338 ASSERT((leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)) &&
2339 (leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)));
2340 if ((be16_to_cpu(leaf1->hdr.count) > 0) &&
2341 (be16_to_cpu(leaf2->hdr.count) > 0) &&
2342 ((be32_to_cpu(leaf2->entries[0].hashval) <
2343 be32_to_cpu(leaf1->entries[0].hashval)) ||
2344 (be32_to_cpu(leaf2->entries[
2345 be16_to_cpu(leaf2->hdr.count)-1].hashval) <
2346 be32_to_cpu(leaf1->entries[
2347 be16_to_cpu(leaf1->hdr.count)-1].hashval)))) {
2348 return(1);
2349 }
2350 return(0);
2351} 2548}
2352 2549
2353/* 2550/*
@@ -2358,15 +2555,16 @@ xfs_attr_leaf_lasthash(
2358 struct xfs_buf *bp, 2555 struct xfs_buf *bp,
2359 int *count) 2556 int *count)
2360{ 2557{
2361 xfs_attr_leafblock_t *leaf; 2558 struct xfs_attr3_icleaf_hdr ichdr;
2559 struct xfs_attr_leaf_entry *entries;
2362 2560
2363 leaf = bp->b_addr; 2561 xfs_attr3_leaf_hdr_from_disk(&ichdr, bp->b_addr);
2364 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); 2562 entries = xfs_attr3_leaf_entryp(bp->b_addr);
2365 if (count) 2563 if (count)
2366 *count = be16_to_cpu(leaf->hdr.count); 2564 *count = ichdr.count;
2367 if (!leaf->hdr.count) 2565 if (!ichdr.count)
2368 return(0); 2566 return 0;
2369 return be32_to_cpu(leaf->entries[be16_to_cpu(leaf->hdr.count)-1].hashval); 2567 return be32_to_cpu(entries[ichdr.count - 1].hashval);
2370} 2568}
2371 2569
2372/* 2570/*
@@ -2376,20 +2574,21 @@ xfs_attr_leaf_lasthash(
2376STATIC int 2574STATIC int
2377xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index) 2575xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index)
2378{ 2576{
2577 struct xfs_attr_leaf_entry *entries;
2379 xfs_attr_leaf_name_local_t *name_loc; 2578 xfs_attr_leaf_name_local_t *name_loc;
2380 xfs_attr_leaf_name_remote_t *name_rmt; 2579 xfs_attr_leaf_name_remote_t *name_rmt;
2381 int size; 2580 int size;
2382 2581
2383 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); 2582 entries = xfs_attr3_leaf_entryp(leaf);
2384 if (leaf->entries[index].flags & XFS_ATTR_LOCAL) { 2583 if (entries[index].flags & XFS_ATTR_LOCAL) {
2385 name_loc = xfs_attr_leaf_name_local(leaf, index); 2584 name_loc = xfs_attr3_leaf_name_local(leaf, index);
2386 size = xfs_attr_leaf_entsize_local(name_loc->namelen, 2585 size = xfs_attr_leaf_entsize_local(name_loc->namelen,
2387 be16_to_cpu(name_loc->valuelen)); 2586 be16_to_cpu(name_loc->valuelen));
2388 } else { 2587 } else {
2389 name_rmt = xfs_attr_leaf_name_remote(leaf, index); 2588 name_rmt = xfs_attr3_leaf_name_remote(leaf, index);
2390 size = xfs_attr_leaf_entsize_remote(name_rmt->namelen); 2589 size = xfs_attr_leaf_entsize_remote(name_rmt->namelen);
2391 } 2590 }
2392 return(size); 2591 return size;
2393} 2592}
2394 2593
2395/* 2594/*
@@ -2414,35 +2613,40 @@ xfs_attr_leaf_newentsize(int namelen, int valuelen, int blocksize, int *local)
2414 *local = 0; 2613 *local = 0;
2415 } 2614 }
2416 } 2615 }
2417 return(size); 2616 return size;
2418} 2617}
2419 2618
2420/* 2619/*
2421 * Copy out attribute list entries for attr_list(), for leaf attribute lists. 2620 * Copy out attribute list entries for attr_list(), for leaf attribute lists.
2422 */ 2621 */
2423int 2622int
2424xfs_attr_leaf_list_int( 2623xfs_attr3_leaf_list_int(
2425 struct xfs_buf *bp, 2624 struct xfs_buf *bp,
2426 xfs_attr_list_context_t *context) 2625 struct xfs_attr_list_context *context)
2427{ 2626{
2428 attrlist_cursor_kern_t *cursor; 2627 struct attrlist_cursor_kern *cursor;
2429 xfs_attr_leafblock_t *leaf; 2628 struct xfs_attr_leafblock *leaf;
2430 xfs_attr_leaf_entry_t *entry; 2629 struct xfs_attr3_icleaf_hdr ichdr;
2431 int retval, i; 2630 struct xfs_attr_leaf_entry *entries;
2631 struct xfs_attr_leaf_entry *entry;
2632 int retval;
2633 int i;
2634
2635 trace_xfs_attr_list_leaf(context);
2432 2636
2433 ASSERT(bp != NULL);
2434 leaf = bp->b_addr; 2637 leaf = bp->b_addr;
2638 xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
2639 entries = xfs_attr3_leaf_entryp(leaf);
2640
2435 cursor = context->cursor; 2641 cursor = context->cursor;
2436 cursor->initted = 1; 2642 cursor->initted = 1;
2437 2643
2438 trace_xfs_attr_list_leaf(context);
2439
2440 /* 2644 /*
2441 * Re-find our place in the leaf block if this is a new syscall. 2645 * Re-find our place in the leaf block if this is a new syscall.
2442 */ 2646 */
2443 if (context->resynch) { 2647 if (context->resynch) {
2444 entry = &leaf->entries[0]; 2648 entry = &entries[0];
2445 for (i = 0; i < be16_to_cpu(leaf->hdr.count); entry++, i++) { 2649 for (i = 0; i < ichdr.count; entry++, i++) {
2446 if (be32_to_cpu(entry->hashval) == cursor->hashval) { 2650 if (be32_to_cpu(entry->hashval) == cursor->hashval) {
2447 if (cursor->offset == context->dupcnt) { 2651 if (cursor->offset == context->dupcnt) {
2448 context->dupcnt = 0; 2652 context->dupcnt = 0;
@@ -2455,12 +2659,12 @@ xfs_attr_leaf_list_int(
2455 break; 2659 break;
2456 } 2660 }
2457 } 2661 }
2458 if (i == be16_to_cpu(leaf->hdr.count)) { 2662 if (i == ichdr.count) {
2459 trace_xfs_attr_list_notfound(context); 2663 trace_xfs_attr_list_notfound(context);
2460 return(0); 2664 return 0;
2461 } 2665 }
2462 } else { 2666 } else {
2463 entry = &leaf->entries[0]; 2667 entry = &entries[0];
2464 i = 0; 2668 i = 0;
2465 } 2669 }
2466 context->resynch = 0; 2670 context->resynch = 0;
@@ -2469,7 +2673,7 @@ xfs_attr_leaf_list_int(
2469 * We have found our place, start copying out the new attributes. 2673 * We have found our place, start copying out the new attributes.
2470 */ 2674 */
2471 retval = 0; 2675 retval = 0;
2472 for ( ; (i < be16_to_cpu(leaf->hdr.count)); entry++, i++) { 2676 for (; i < ichdr.count; entry++, i++) {
2473 if (be32_to_cpu(entry->hashval) != cursor->hashval) { 2677 if (be32_to_cpu(entry->hashval) != cursor->hashval) {
2474 cursor->hashval = be32_to_cpu(entry->hashval); 2678 cursor->hashval = be32_to_cpu(entry->hashval);
2475 cursor->offset = 0; 2679 cursor->offset = 0;
@@ -2480,7 +2684,7 @@ xfs_attr_leaf_list_int(
2480 2684
2481 if (entry->flags & XFS_ATTR_LOCAL) { 2685 if (entry->flags & XFS_ATTR_LOCAL) {
2482 xfs_attr_leaf_name_local_t *name_loc = 2686 xfs_attr_leaf_name_local_t *name_loc =
2483 xfs_attr_leaf_name_local(leaf, i); 2687 xfs_attr3_leaf_name_local(leaf, i);
2484 2688
2485 retval = context->put_listent(context, 2689 retval = context->put_listent(context,
2486 entry->flags, 2690 entry->flags,
@@ -2492,7 +2696,7 @@ xfs_attr_leaf_list_int(
2492 return retval; 2696 return retval;
2493 } else { 2697 } else {
2494 xfs_attr_leaf_name_remote_t *name_rmt = 2698 xfs_attr_leaf_name_remote_t *name_rmt =
2495 xfs_attr_leaf_name_remote(leaf, i); 2699 xfs_attr3_leaf_name_remote(leaf, i);
2496 2700
2497 int valuelen = be32_to_cpu(name_rmt->valuelen); 2701 int valuelen = be32_to_cpu(name_rmt->valuelen);
2498 2702
@@ -2532,7 +2736,7 @@ xfs_attr_leaf_list_int(
2532 cursor->offset++; 2736 cursor->offset++;
2533 } 2737 }
2534 trace_xfs_attr_list_leaf_end(context); 2738 trace_xfs_attr_list_leaf_end(context);
2535 return(retval); 2739 return retval;
2536} 2740}
2537 2741
2538 2742
@@ -2544,14 +2748,16 @@ xfs_attr_leaf_list_int(
2544 * Clear the INCOMPLETE flag on an entry in a leaf block. 2748 * Clear the INCOMPLETE flag on an entry in a leaf block.
2545 */ 2749 */
2546int 2750int
2547xfs_attr_leaf_clearflag(xfs_da_args_t *args) 2751xfs_attr3_leaf_clearflag(
2752 struct xfs_da_args *args)
2548{ 2753{
2549 xfs_attr_leafblock_t *leaf; 2754 struct xfs_attr_leafblock *leaf;
2550 xfs_attr_leaf_entry_t *entry; 2755 struct xfs_attr_leaf_entry *entry;
2551 xfs_attr_leaf_name_remote_t *name_rmt; 2756 struct xfs_attr_leaf_name_remote *name_rmt;
2552 struct xfs_buf *bp; 2757 struct xfs_buf *bp;
2553 int error; 2758 int error;
2554#ifdef DEBUG 2759#ifdef DEBUG
2760 struct xfs_attr3_icleaf_hdr ichdr;
2555 xfs_attr_leaf_name_local_t *name_loc; 2761 xfs_attr_leaf_name_local_t *name_loc;
2556 int namelen; 2762 int namelen;
2557 char *name; 2763 char *name;
@@ -2561,23 +2767,25 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args)
2561 /* 2767 /*
2562 * Set up the operation. 2768 * Set up the operation.
2563 */ 2769 */
2564 error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); 2770 error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
2565 if (error) 2771 if (error)
2566 return(error); 2772 return(error);
2567 2773
2568 leaf = bp->b_addr; 2774 leaf = bp->b_addr;
2569 ASSERT(args->index < be16_to_cpu(leaf->hdr.count)); 2775 entry = &xfs_attr3_leaf_entryp(leaf)[args->index];
2570 ASSERT(args->index >= 0);
2571 entry = &leaf->entries[ args->index ];
2572 ASSERT(entry->flags & XFS_ATTR_INCOMPLETE); 2776 ASSERT(entry->flags & XFS_ATTR_INCOMPLETE);
2573 2777
2574#ifdef DEBUG 2778#ifdef DEBUG
2779 xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
2780 ASSERT(args->index < ichdr.count);
2781 ASSERT(args->index >= 0);
2782
2575 if (entry->flags & XFS_ATTR_LOCAL) { 2783 if (entry->flags & XFS_ATTR_LOCAL) {
2576 name_loc = xfs_attr_leaf_name_local(leaf, args->index); 2784 name_loc = xfs_attr3_leaf_name_local(leaf, args->index);
2577 namelen = name_loc->namelen; 2785 namelen = name_loc->namelen;
2578 name = (char *)name_loc->nameval; 2786 name = (char *)name_loc->nameval;
2579 } else { 2787 } else {
2580 name_rmt = xfs_attr_leaf_name_remote(leaf, args->index); 2788 name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index);
2581 namelen = name_rmt->namelen; 2789 namelen = name_rmt->namelen;
2582 name = (char *)name_rmt->name; 2790 name = (char *)name_rmt->name;
2583 } 2791 }
@@ -2592,7 +2800,7 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args)
2592 2800
2593 if (args->rmtblkno) { 2801 if (args->rmtblkno) {
2594 ASSERT((entry->flags & XFS_ATTR_LOCAL) == 0); 2802 ASSERT((entry->flags & XFS_ATTR_LOCAL) == 0);
2595 name_rmt = xfs_attr_leaf_name_remote(leaf, args->index); 2803 name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index);
2596 name_rmt->valueblk = cpu_to_be32(args->rmtblkno); 2804 name_rmt->valueblk = cpu_to_be32(args->rmtblkno);
2597 name_rmt->valuelen = cpu_to_be32(args->valuelen); 2805 name_rmt->valuelen = cpu_to_be32(args->valuelen);
2598 xfs_trans_log_buf(args->trans, bp, 2806 xfs_trans_log_buf(args->trans, bp,
@@ -2609,34 +2817,41 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args)
2609 * Set the INCOMPLETE flag on an entry in a leaf block. 2817 * Set the INCOMPLETE flag on an entry in a leaf block.
2610 */ 2818 */
2611int 2819int
2612xfs_attr_leaf_setflag(xfs_da_args_t *args) 2820xfs_attr3_leaf_setflag(
2821 struct xfs_da_args *args)
2613{ 2822{
2614 xfs_attr_leafblock_t *leaf; 2823 struct xfs_attr_leafblock *leaf;
2615 xfs_attr_leaf_entry_t *entry; 2824 struct xfs_attr_leaf_entry *entry;
2616 xfs_attr_leaf_name_remote_t *name_rmt; 2825 struct xfs_attr_leaf_name_remote *name_rmt;
2617 struct xfs_buf *bp; 2826 struct xfs_buf *bp;
2618 int error; 2827 int error;
2828#ifdef DEBUG
2829 struct xfs_attr3_icleaf_hdr ichdr;
2830#endif
2619 2831
2620 trace_xfs_attr_leaf_setflag(args); 2832 trace_xfs_attr_leaf_setflag(args);
2621 2833
2622 /* 2834 /*
2623 * Set up the operation. 2835 * Set up the operation.
2624 */ 2836 */
2625 error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); 2837 error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
2626 if (error) 2838 if (error)
2627 return(error); 2839 return(error);
2628 2840
2629 leaf = bp->b_addr; 2841 leaf = bp->b_addr;
2630 ASSERT(args->index < be16_to_cpu(leaf->hdr.count)); 2842#ifdef DEBUG
2843 xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
2844 ASSERT(args->index < ichdr.count);
2631 ASSERT(args->index >= 0); 2845 ASSERT(args->index >= 0);
2632 entry = &leaf->entries[ args->index ]; 2846#endif
2847 entry = &xfs_attr3_leaf_entryp(leaf)[args->index];
2633 2848
2634 ASSERT((entry->flags & XFS_ATTR_INCOMPLETE) == 0); 2849 ASSERT((entry->flags & XFS_ATTR_INCOMPLETE) == 0);
2635 entry->flags |= XFS_ATTR_INCOMPLETE; 2850 entry->flags |= XFS_ATTR_INCOMPLETE;
2636 xfs_trans_log_buf(args->trans, bp, 2851 xfs_trans_log_buf(args->trans, bp,
2637 XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry))); 2852 XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry)));
2638 if ((entry->flags & XFS_ATTR_LOCAL) == 0) { 2853 if ((entry->flags & XFS_ATTR_LOCAL) == 0) {
2639 name_rmt = xfs_attr_leaf_name_remote(leaf, args->index); 2854 name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index);
2640 name_rmt->valueblk = 0; 2855 name_rmt->valueblk = 0;
2641 name_rmt->valuelen = 0; 2856 name_rmt->valuelen = 0;
2642 xfs_trans_log_buf(args->trans, bp, 2857 xfs_trans_log_buf(args->trans, bp,
@@ -2657,14 +2872,20 @@ xfs_attr_leaf_setflag(xfs_da_args_t *args)
2657 * Note that they could be in different blocks, or in the same block. 2872 * Note that they could be in different blocks, or in the same block.
2658 */ 2873 */
2659int 2874int
2660xfs_attr_leaf_flipflags(xfs_da_args_t *args) 2875xfs_attr3_leaf_flipflags(
2876 struct xfs_da_args *args)
2661{ 2877{
2662 xfs_attr_leafblock_t *leaf1, *leaf2; 2878 struct xfs_attr_leafblock *leaf1;
2663 xfs_attr_leaf_entry_t *entry1, *entry2; 2879 struct xfs_attr_leafblock *leaf2;
2664 xfs_attr_leaf_name_remote_t *name_rmt; 2880 struct xfs_attr_leaf_entry *entry1;
2665 struct xfs_buf *bp1, *bp2; 2881 struct xfs_attr_leaf_entry *entry2;
2882 struct xfs_attr_leaf_name_remote *name_rmt;
2883 struct xfs_buf *bp1;
2884 struct xfs_buf *bp2;
2666 int error; 2885 int error;
2667#ifdef DEBUG 2886#ifdef DEBUG
2887 struct xfs_attr3_icleaf_hdr ichdr1;
2888 struct xfs_attr3_icleaf_hdr ichdr2;
2668 xfs_attr_leaf_name_local_t *name_loc; 2889 xfs_attr_leaf_name_local_t *name_loc;
2669 int namelen1, namelen2; 2890 int namelen1, namelen2;
2670 char *name1, *name2; 2891 char *name1, *name2;
@@ -2675,7 +2896,7 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args)
2675 /* 2896 /*
2676 * Read the block containing the "old" attr 2897 * Read the block containing the "old" attr
2677 */ 2898 */
2678 error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno, -1, &bp1); 2899 error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp1);
2679 if (error) 2900 if (error)
2680 return error; 2901 return error;
2681 2902
@@ -2683,7 +2904,7 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args)
2683 * Read the block containing the "new" attr, if it is different 2904 * Read the block containing the "new" attr, if it is different
2684 */ 2905 */
2685 if (args->blkno2 != args->blkno) { 2906 if (args->blkno2 != args->blkno) {
2686 error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno2, 2907 error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno2,
2687 -1, &bp2); 2908 -1, &bp2);
2688 if (error) 2909 if (error)
2689 return error; 2910 return error;
@@ -2692,31 +2913,35 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args)
2692 } 2913 }
2693 2914
2694 leaf1 = bp1->b_addr; 2915 leaf1 = bp1->b_addr;
2695 ASSERT(args->index < be16_to_cpu(leaf1->hdr.count)); 2916 entry1 = &xfs_attr3_leaf_entryp(leaf1)[args->index];
2696 ASSERT(args->index >= 0);
2697 entry1 = &leaf1->entries[ args->index ];
2698 2917
2699 leaf2 = bp2->b_addr; 2918 leaf2 = bp2->b_addr;
2700 ASSERT(args->index2 < be16_to_cpu(leaf2->hdr.count)); 2919 entry2 = &xfs_attr3_leaf_entryp(leaf2)[args->index2];
2701 ASSERT(args->index2 >= 0);
2702 entry2 = &leaf2->entries[ args->index2 ];
2703 2920
2704#ifdef DEBUG 2921#ifdef DEBUG
2922 xfs_attr3_leaf_hdr_from_disk(&ichdr1, leaf1);
2923 ASSERT(args->index < ichdr1.count);
2924 ASSERT(args->index >= 0);
2925
2926 xfs_attr3_leaf_hdr_from_disk(&ichdr2, leaf2);
2927 ASSERT(args->index2 < ichdr2.count);
2928 ASSERT(args->index2 >= 0);
2929
2705 if (entry1->flags & XFS_ATTR_LOCAL) { 2930 if (entry1->flags & XFS_ATTR_LOCAL) {
2706 name_loc = xfs_attr_leaf_name_local(leaf1, args->index); 2931 name_loc = xfs_attr3_leaf_name_local(leaf1, args->index);
2707 namelen1 = name_loc->namelen; 2932 namelen1 = name_loc->namelen;
2708 name1 = (char *)name_loc->nameval; 2933 name1 = (char *)name_loc->nameval;
2709 } else { 2934 } else {
2710 name_rmt = xfs_attr_leaf_name_remote(leaf1, args->index); 2935 name_rmt = xfs_attr3_leaf_name_remote(leaf1, args->index);
2711 namelen1 = name_rmt->namelen; 2936 namelen1 = name_rmt->namelen;
2712 name1 = (char *)name_rmt->name; 2937 name1 = (char *)name_rmt->name;
2713 } 2938 }
2714 if (entry2->flags & XFS_ATTR_LOCAL) { 2939 if (entry2->flags & XFS_ATTR_LOCAL) {
2715 name_loc = xfs_attr_leaf_name_local(leaf2, args->index2); 2940 name_loc = xfs_attr3_leaf_name_local(leaf2, args->index2);
2716 namelen2 = name_loc->namelen; 2941 namelen2 = name_loc->namelen;
2717 name2 = (char *)name_loc->nameval; 2942 name2 = (char *)name_loc->nameval;
2718 } else { 2943 } else {
2719 name_rmt = xfs_attr_leaf_name_remote(leaf2, args->index2); 2944 name_rmt = xfs_attr3_leaf_name_remote(leaf2, args->index2);
2720 namelen2 = name_rmt->namelen; 2945 namelen2 = name_rmt->namelen;
2721 name2 = (char *)name_rmt->name; 2946 name2 = (char *)name_rmt->name;
2722 } 2947 }
@@ -2733,7 +2958,7 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args)
2733 XFS_DA_LOGRANGE(leaf1, entry1, sizeof(*entry1))); 2958 XFS_DA_LOGRANGE(leaf1, entry1, sizeof(*entry1)));
2734 if (args->rmtblkno) { 2959 if (args->rmtblkno) {
2735 ASSERT((entry1->flags & XFS_ATTR_LOCAL) == 0); 2960 ASSERT((entry1->flags & XFS_ATTR_LOCAL) == 0);
2736 name_rmt = xfs_attr_leaf_name_remote(leaf1, args->index); 2961 name_rmt = xfs_attr3_leaf_name_remote(leaf1, args->index);
2737 name_rmt->valueblk = cpu_to_be32(args->rmtblkno); 2962 name_rmt->valueblk = cpu_to_be32(args->rmtblkno);
2738 name_rmt->valuelen = cpu_to_be32(args->valuelen); 2963 name_rmt->valuelen = cpu_to_be32(args->valuelen);
2739 xfs_trans_log_buf(args->trans, bp1, 2964 xfs_trans_log_buf(args->trans, bp1,
@@ -2744,7 +2969,7 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args)
2744 xfs_trans_log_buf(args->trans, bp2, 2969 xfs_trans_log_buf(args->trans, bp2,
2745 XFS_DA_LOGRANGE(leaf2, entry2, sizeof(*entry2))); 2970 XFS_DA_LOGRANGE(leaf2, entry2, sizeof(*entry2)));
2746 if ((entry2->flags & XFS_ATTR_LOCAL) == 0) { 2971 if ((entry2->flags & XFS_ATTR_LOCAL) == 0) {
2747 name_rmt = xfs_attr_leaf_name_remote(leaf2, args->index2); 2972 name_rmt = xfs_attr3_leaf_name_remote(leaf2, args->index2);
2748 name_rmt->valueblk = 0; 2973 name_rmt->valueblk = 0;
2749 name_rmt->valuelen = 0; 2974 name_rmt->valuelen = 0;
2750 xfs_trans_log_buf(args->trans, bp2, 2975 xfs_trans_log_buf(args->trans, bp2,
@@ -2756,7 +2981,7 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args)
2756 */ 2981 */
2757 error = xfs_trans_roll(&args->trans, args->dp); 2982 error = xfs_trans_roll(&args->trans, args->dp);
2758 2983
2759 return(error); 2984 return error;
2760} 2985}
2761 2986
2762/*======================================================================== 2987/*========================================================================
@@ -2768,12 +2993,14 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args)
2768 * We're doing a depth-first traversal in order to invalidate everything. 2993 * We're doing a depth-first traversal in order to invalidate everything.
2769 */ 2994 */
2770int 2995int
2771xfs_attr_root_inactive(xfs_trans_t **trans, xfs_inode_t *dp) 2996xfs_attr3_root_inactive(
2997 struct xfs_trans **trans,
2998 struct xfs_inode *dp)
2772{ 2999{
2773 xfs_da_blkinfo_t *info; 3000 struct xfs_da_blkinfo *info;
2774 xfs_daddr_t blkno; 3001 struct xfs_buf *bp;
2775 struct xfs_buf *bp; 3002 xfs_daddr_t blkno;
2776 int error; 3003 int error;
2777 3004
2778 /* 3005 /*
2779 * Read block 0 to see what we have to work with. 3006 * Read block 0 to see what we have to work with.
@@ -2781,40 +3008,46 @@ xfs_attr_root_inactive(xfs_trans_t **trans, xfs_inode_t *dp)
2781 * the extents in reverse order the extent containing 3008 * the extents in reverse order the extent containing
2782 * block 0 must still be there. 3009 * block 0 must still be there.
2783 */ 3010 */
2784 error = xfs_da_node_read(*trans, dp, 0, -1, &bp, XFS_ATTR_FORK); 3011 error = xfs_da3_node_read(*trans, dp, 0, -1, &bp, XFS_ATTR_FORK);
2785 if (error) 3012 if (error)
2786 return(error); 3013 return error;
2787 blkno = XFS_BUF_ADDR(bp); 3014 blkno = bp->b_bn;
2788 3015
2789 /* 3016 /*
2790 * Invalidate the tree, even if the "tree" is only a single leaf block. 3017 * Invalidate the tree, even if the "tree" is only a single leaf block.
2791 * This is a depth-first traversal! 3018 * This is a depth-first traversal!
2792 */ 3019 */
2793 info = bp->b_addr; 3020 info = bp->b_addr;
2794 if (info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC)) { 3021 switch (info->magic) {
2795 error = xfs_attr_node_inactive(trans, dp, bp, 1); 3022 case cpu_to_be16(XFS_DA_NODE_MAGIC):
2796 } else if (info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)) { 3023 case cpu_to_be16(XFS_DA3_NODE_MAGIC):
2797 error = xfs_attr_leaf_inactive(trans, dp, bp); 3024 error = xfs_attr3_node_inactive(trans, dp, bp, 1);
2798 } else { 3025 break;
3026 case cpu_to_be16(XFS_ATTR_LEAF_MAGIC):
3027 case cpu_to_be16(XFS_ATTR3_LEAF_MAGIC):
3028 error = xfs_attr3_leaf_inactive(trans, dp, bp);
3029 break;
3030 default:
2799 error = XFS_ERROR(EIO); 3031 error = XFS_ERROR(EIO);
2800 xfs_trans_brelse(*trans, bp); 3032 xfs_trans_brelse(*trans, bp);
3033 break;
2801 } 3034 }
2802 if (error) 3035 if (error)
2803 return(error); 3036 return error;
2804 3037
2805 /* 3038 /*
2806 * Invalidate the incore copy of the root block. 3039 * Invalidate the incore copy of the root block.
2807 */ 3040 */
2808 error = xfs_da_get_buf(*trans, dp, 0, blkno, &bp, XFS_ATTR_FORK); 3041 error = xfs_da_get_buf(*trans, dp, 0, blkno, &bp, XFS_ATTR_FORK);
2809 if (error) 3042 if (error)
2810 return(error); 3043 return error;
2811 xfs_trans_binval(*trans, bp); /* remove from cache */ 3044 xfs_trans_binval(*trans, bp); /* remove from cache */
2812 /* 3045 /*
2813 * Commit the invalidate and start the next transaction. 3046 * Commit the invalidate and start the next transaction.
2814 */ 3047 */
2815 error = xfs_trans_roll(trans, dp); 3048 error = xfs_trans_roll(trans, dp);
2816 3049
2817 return (error); 3050 return error;
2818} 3051}
2819 3052
2820/* 3053/*
@@ -2822,7 +3055,7 @@ xfs_attr_root_inactive(xfs_trans_t **trans, xfs_inode_t *dp)
2822 * We're doing a depth-first traversal in order to invalidate everything. 3055 * We're doing a depth-first traversal in order to invalidate everything.
2823 */ 3056 */
2824STATIC int 3057STATIC int
2825xfs_attr_node_inactive( 3058xfs_attr3_node_inactive(
2826 struct xfs_trans **trans, 3059 struct xfs_trans **trans,
2827 struct xfs_inode *dp, 3060 struct xfs_inode *dp,
2828 struct xfs_buf *bp, 3061 struct xfs_buf *bp,
@@ -2832,26 +3065,28 @@ xfs_attr_node_inactive(
2832 xfs_da_intnode_t *node; 3065 xfs_da_intnode_t *node;
2833 xfs_dablk_t child_fsb; 3066 xfs_dablk_t child_fsb;
2834 xfs_daddr_t parent_blkno, child_blkno; 3067 xfs_daddr_t parent_blkno, child_blkno;
2835 int error, count, i; 3068 int error, i;
2836 struct xfs_buf *child_bp; 3069 struct xfs_buf *child_bp;
3070 struct xfs_da_node_entry *btree;
3071 struct xfs_da3_icnode_hdr ichdr;
2837 3072
2838 /* 3073 /*
2839 * Since this code is recursive (gasp!) we must protect ourselves. 3074 * Since this code is recursive (gasp!) we must protect ourselves.
2840 */ 3075 */
2841 if (level > XFS_DA_NODE_MAXDEPTH) { 3076 if (level > XFS_DA_NODE_MAXDEPTH) {
2842 xfs_trans_brelse(*trans, bp); /* no locks for later trans */ 3077 xfs_trans_brelse(*trans, bp); /* no locks for later trans */
2843 return(XFS_ERROR(EIO)); 3078 return XFS_ERROR(EIO);
2844 } 3079 }
2845 3080
2846 node = bp->b_addr; 3081 node = bp->b_addr;
2847 ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); 3082 xfs_da3_node_hdr_from_disk(&ichdr, node);
2848 parent_blkno = XFS_BUF_ADDR(bp); /* save for re-read later */ 3083 parent_blkno = bp->b_bn;
2849 count = be16_to_cpu(node->hdr.count); 3084 if (!ichdr.count) {
2850 if (!count) {
2851 xfs_trans_brelse(*trans, bp); 3085 xfs_trans_brelse(*trans, bp);
2852 return(0); 3086 return 0;
2853 } 3087 }
2854 child_fsb = be32_to_cpu(node->btree[0].before); 3088 btree = xfs_da3_node_tree_p(node);
3089 child_fsb = be32_to_cpu(btree[0].before);
2855 xfs_trans_brelse(*trans, bp); /* no locks for later trans */ 3090 xfs_trans_brelse(*trans, bp); /* no locks for later trans */
2856 3091
2857 /* 3092 /*
@@ -2859,14 +3094,14 @@ xfs_attr_node_inactive(
2859 * over the leaves removing all of them. If this is higher up 3094 * over the leaves removing all of them. If this is higher up
2860 * in the tree, recurse downward. 3095 * in the tree, recurse downward.
2861 */ 3096 */
2862 for (i = 0; i < count; i++) { 3097 for (i = 0; i < ichdr.count; i++) {
2863 /* 3098 /*
2864 * Read the subsidiary block to see what we have to work with. 3099 * Read the subsidiary block to see what we have to work with.
2865 * Don't do this in a transaction. This is a depth-first 3100 * Don't do this in a transaction. This is a depth-first
2866 * traversal of the tree so we may deal with many blocks 3101 * traversal of the tree so we may deal with many blocks
2867 * before we come back to this one. 3102 * before we come back to this one.
2868 */ 3103 */
2869 error = xfs_da_node_read(*trans, dp, child_fsb, -2, &child_bp, 3104 error = xfs_da3_node_read(*trans, dp, child_fsb, -2, &child_bp,
2870 XFS_ATTR_FORK); 3105 XFS_ATTR_FORK);
2871 if (error) 3106 if (error)
2872 return(error); 3107 return(error);
@@ -2878,18 +3113,24 @@ xfs_attr_node_inactive(
2878 * Invalidate the subtree, however we have to. 3113 * Invalidate the subtree, however we have to.
2879 */ 3114 */
2880 info = child_bp->b_addr; 3115 info = child_bp->b_addr;
2881 if (info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC)) { 3116 switch (info->magic) {
2882 error = xfs_attr_node_inactive(trans, dp, 3117 case cpu_to_be16(XFS_DA_NODE_MAGIC):
2883 child_bp, level+1); 3118 case cpu_to_be16(XFS_DA3_NODE_MAGIC):
2884 } else if (info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)) { 3119 error = xfs_attr3_node_inactive(trans, dp,
2885 error = xfs_attr_leaf_inactive(trans, dp, 3120 child_bp, level + 1);
2886 child_bp); 3121 break;
2887 } else { 3122 case cpu_to_be16(XFS_ATTR_LEAF_MAGIC):
3123 case cpu_to_be16(XFS_ATTR3_LEAF_MAGIC):
3124 error = xfs_attr3_leaf_inactive(trans, dp,
3125 child_bp);
3126 break;
3127 default:
2888 error = XFS_ERROR(EIO); 3128 error = XFS_ERROR(EIO);
2889 xfs_trans_brelse(*trans, child_bp); 3129 xfs_trans_brelse(*trans, child_bp);
3130 break;
2890 } 3131 }
2891 if (error) 3132 if (error)
2892 return(error); 3133 return error;
2893 3134
2894 /* 3135 /*
2895 * Remove the subsidiary block from the cache 3136 * Remove the subsidiary block from the cache
@@ -2898,7 +3139,7 @@ xfs_attr_node_inactive(
2898 error = xfs_da_get_buf(*trans, dp, 0, child_blkno, 3139 error = xfs_da_get_buf(*trans, dp, 0, child_blkno,
2899 &child_bp, XFS_ATTR_FORK); 3140 &child_bp, XFS_ATTR_FORK);
2900 if (error) 3141 if (error)
2901 return(error); 3142 return error;
2902 xfs_trans_binval(*trans, child_bp); 3143 xfs_trans_binval(*trans, child_bp);
2903 } 3144 }
2904 3145
@@ -2906,12 +3147,12 @@ xfs_attr_node_inactive(
2906 * If we're not done, re-read the parent to get the next 3147 * If we're not done, re-read the parent to get the next
2907 * child block number. 3148 * child block number.
2908 */ 3149 */
2909 if ((i+1) < count) { 3150 if (i + 1 < ichdr.count) {
2910 error = xfs_da_node_read(*trans, dp, 0, parent_blkno, 3151 error = xfs_da3_node_read(*trans, dp, 0, parent_blkno,
2911 &bp, XFS_ATTR_FORK); 3152 &bp, XFS_ATTR_FORK);
2912 if (error) 3153 if (error)
2913 return(error); 3154 return error;
2914 child_fsb = be32_to_cpu(node->btree[i+1].before); 3155 child_fsb = be32_to_cpu(btree[i + 1].before);
2915 xfs_trans_brelse(*trans, bp); 3156 xfs_trans_brelse(*trans, bp);
2916 } 3157 }
2917 /* 3158 /*
@@ -2919,10 +3160,10 @@ xfs_attr_node_inactive(
2919 */ 3160 */
2920 error = xfs_trans_roll(trans, dp); 3161 error = xfs_trans_roll(trans, dp);
2921 if (error) 3162 if (error)
2922 return (error); 3163 return error;
2923 } 3164 }
2924 3165
2925 return(0); 3166 return 0;
2926} 3167}
2927 3168
2928/* 3169/*
@@ -2932,29 +3173,35 @@ xfs_attr_node_inactive(
2932 * caught holding something that the logging code wants to flush to disk. 3173 * caught holding something that the logging code wants to flush to disk.
2933 */ 3174 */
2934STATIC int 3175STATIC int
2935xfs_attr_leaf_inactive( 3176xfs_attr3_leaf_inactive(
2936 struct xfs_trans **trans, 3177 struct xfs_trans **trans,
2937 struct xfs_inode *dp, 3178 struct xfs_inode *dp,
2938 struct xfs_buf *bp) 3179 struct xfs_buf *bp)
2939{ 3180{
2940 xfs_attr_leafblock_t *leaf; 3181 struct xfs_attr_leafblock *leaf;
2941 xfs_attr_leaf_entry_t *entry; 3182 struct xfs_attr3_icleaf_hdr ichdr;
2942 xfs_attr_leaf_name_remote_t *name_rmt; 3183 struct xfs_attr_leaf_entry *entry;
2943 xfs_attr_inactive_list_t *list, *lp; 3184 struct xfs_attr_leaf_name_remote *name_rmt;
2944 int error, count, size, tmp, i; 3185 struct xfs_attr_inactive_list *list;
3186 struct xfs_attr_inactive_list *lp;
3187 int error;
3188 int count;
3189 int size;
3190 int tmp;
3191 int i;
2945 3192
2946 leaf = bp->b_addr; 3193 leaf = bp->b_addr;
2947 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); 3194 xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
2948 3195
2949 /* 3196 /*
2950 * Count the number of "remote" value extents. 3197 * Count the number of "remote" value extents.
2951 */ 3198 */
2952 count = 0; 3199 count = 0;
2953 entry = &leaf->entries[0]; 3200 entry = xfs_attr3_leaf_entryp(leaf);
2954 for (i = 0; i < be16_to_cpu(leaf->hdr.count); entry++, i++) { 3201 for (i = 0; i < ichdr.count; entry++, i++) {
2955 if (be16_to_cpu(entry->nameidx) && 3202 if (be16_to_cpu(entry->nameidx) &&
2956 ((entry->flags & XFS_ATTR_LOCAL) == 0)) { 3203 ((entry->flags & XFS_ATTR_LOCAL) == 0)) {
2957 name_rmt = xfs_attr_leaf_name_remote(leaf, i); 3204 name_rmt = xfs_attr3_leaf_name_remote(leaf, i);
2958 if (name_rmt->valueblk) 3205 if (name_rmt->valueblk)
2959 count++; 3206 count++;
2960 } 3207 }
@@ -2965,24 +3212,24 @@ xfs_attr_leaf_inactive(
2965 */ 3212 */
2966 if (count == 0) { 3213 if (count == 0) {
2967 xfs_trans_brelse(*trans, bp); 3214 xfs_trans_brelse(*trans, bp);
2968 return(0); 3215 return 0;
2969 } 3216 }
2970 3217
2971 /* 3218 /*
2972 * Allocate storage for a list of all the "remote" value extents. 3219 * Allocate storage for a list of all the "remote" value extents.
2973 */ 3220 */
2974 size = count * sizeof(xfs_attr_inactive_list_t); 3221 size = count * sizeof(xfs_attr_inactive_list_t);
2975 list = (xfs_attr_inactive_list_t *)kmem_alloc(size, KM_SLEEP); 3222 list = kmem_alloc(size, KM_SLEEP);
2976 3223
2977 /* 3224 /*
2978 * Identify each of the "remote" value extents. 3225 * Identify each of the "remote" value extents.
2979 */ 3226 */
2980 lp = list; 3227 lp = list;
2981 entry = &leaf->entries[0]; 3228 entry = xfs_attr3_leaf_entryp(leaf);
2982 for (i = 0; i < be16_to_cpu(leaf->hdr.count); entry++, i++) { 3229 for (i = 0; i < ichdr.count; entry++, i++) {
2983 if (be16_to_cpu(entry->nameidx) && 3230 if (be16_to_cpu(entry->nameidx) &&
2984 ((entry->flags & XFS_ATTR_LOCAL) == 0)) { 3231 ((entry->flags & XFS_ATTR_LOCAL) == 0)) {
2985 name_rmt = xfs_attr_leaf_name_remote(leaf, i); 3232 name_rmt = xfs_attr3_leaf_name_remote(leaf, i);
2986 if (name_rmt->valueblk) { 3233 if (name_rmt->valueblk) {
2987 lp->valueblk = be32_to_cpu(name_rmt->valueblk); 3234 lp->valueblk = be32_to_cpu(name_rmt->valueblk);
2988 lp->valuelen = XFS_B_TO_FSB(dp->i_mount, 3235 lp->valuelen = XFS_B_TO_FSB(dp->i_mount,
@@ -2998,15 +3245,15 @@ xfs_attr_leaf_inactive(
2998 */ 3245 */
2999 error = 0; 3246 error = 0;
3000 for (lp = list, i = 0; i < count; i++, lp++) { 3247 for (lp = list, i = 0; i < count; i++, lp++) {
3001 tmp = xfs_attr_leaf_freextent(trans, dp, 3248 tmp = xfs_attr3_leaf_freextent(trans, dp,
3002 lp->valueblk, lp->valuelen); 3249 lp->valueblk, lp->valuelen);
3003 3250
3004 if (error == 0) 3251 if (error == 0)
3005 error = tmp; /* save only the 1st errno */ 3252 error = tmp; /* save only the 1st errno */
3006 } 3253 }
3007 3254
3008 kmem_free((xfs_caddr_t)list); 3255 kmem_free(list);
3009 return(error); 3256 return error;
3010} 3257}
3011 3258
3012/* 3259/*
@@ -3014,14 +3261,20 @@ xfs_attr_leaf_inactive(
3014 * invalidate any buffers that are incore/in transactions. 3261 * invalidate any buffers that are incore/in transactions.
3015 */ 3262 */
3016STATIC int 3263STATIC int
3017xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp, 3264xfs_attr3_leaf_freextent(
3018 xfs_dablk_t blkno, int blkcnt) 3265 struct xfs_trans **trans,
3266 struct xfs_inode *dp,
3267 xfs_dablk_t blkno,
3268 int blkcnt)
3019{ 3269{
3020 xfs_bmbt_irec_t map; 3270 struct xfs_bmbt_irec map;
3021 xfs_dablk_t tblkno; 3271 struct xfs_buf *bp;
3022 int tblkcnt, dblkcnt, nmap, error; 3272 xfs_dablk_t tblkno;
3023 xfs_daddr_t dblkno; 3273 xfs_daddr_t dblkno;
3024 xfs_buf_t *bp; 3274 int tblkcnt;
3275 int dblkcnt;
3276 int nmap;
3277 int error;
3025 3278
3026 /* 3279 /*
3027 * Roll through the "value", invalidating the attribute value's 3280 * Roll through the "value", invalidating the attribute value's
diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h
index 77de139a58f0..f9d7846097e2 100644
--- a/fs/xfs/xfs_attr_leaf.h
+++ b/fs/xfs/xfs_attr_leaf.h
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (c) 2000,2002-2003,2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000,2002-2003,2005 Silicon Graphics, Inc.
3 * Copyright (c) 2013 Red Hat, Inc.
3 * All Rights Reserved. 4 * All Rights Reserved.
4 * 5 *
5 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
@@ -89,7 +90,7 @@ typedef struct xfs_attr_leaf_hdr { /* constant-structure header block */
89 90
90typedef struct xfs_attr_leaf_entry { /* sorted on key, not name */ 91typedef struct xfs_attr_leaf_entry { /* sorted on key, not name */
91 __be32 hashval; /* hash value of name */ 92 __be32 hashval; /* hash value of name */
92 __be16 nameidx; /* index into buffer of name/value */ 93 __be16 nameidx; /* index into buffer of name/value */
93 __u8 flags; /* LOCAL/ROOT/SECURE/INCOMPLETE flag */ 94 __u8 flags; /* LOCAL/ROOT/SECURE/INCOMPLETE flag */
94 __u8 pad2; /* unused pad byte */ 95 __u8 pad2; /* unused pad byte */
95} xfs_attr_leaf_entry_t; 96} xfs_attr_leaf_entry_t;
@@ -115,6 +116,54 @@ typedef struct xfs_attr_leafblock {
115} xfs_attr_leafblock_t; 116} xfs_attr_leafblock_t;
116 117
117/* 118/*
119 * CRC enabled leaf structures. Called "version 3" structures to match the
120 * version number of the directory and dablk structures for this feature, and
121 * attr2 is already taken by the variable inode attribute fork size feature.
122 */
123struct xfs_attr3_leaf_hdr {
124 struct xfs_da3_blkinfo info;
125 __be16 count;
126 __be16 usedbytes;
127 __be16 firstused;
128 __u8 holes;
129 __u8 pad1;
130 struct xfs_attr_leaf_map freemap[XFS_ATTR_LEAF_MAPSIZE];
131};
132
133#define XFS_ATTR3_LEAF_CRC_OFF (offsetof(struct xfs_attr3_leaf_hdr, info.crc))
134
135struct xfs_attr3_leafblock {
136 struct xfs_attr3_leaf_hdr hdr;
137 struct xfs_attr_leaf_entry entries[1];
138
139 /*
140 * The rest of the block contains the following structures after the
141 * leaf entries, growing from the bottom up. The variables are never
142 * referenced, the locations accessed purely from helper functions.
143 *
144 * struct xfs_attr_leaf_name_local
145 * struct xfs_attr_leaf_name_remote
146 */
147};
148
149/*
150 * incore, neutral version of the attribute leaf header
151 */
152struct xfs_attr3_icleaf_hdr {
153 __uint32_t forw;
154 __uint32_t back;
155 __uint16_t magic;
156 __uint16_t count;
157 __uint16_t usedbytes;
158 __uint16_t firstused;
159 __u8 holes;
160 struct {
161 __uint16_t base;
162 __uint16_t size;
163 } freemap[XFS_ATTR_LEAF_MAPSIZE];
164};
165
166/*
118 * Flags used in the leaf_entry[i].flags field. 167 * Flags used in the leaf_entry[i].flags field.
119 * NOTE: the INCOMPLETE bit must not collide with the flags bits specified 168 * NOTE: the INCOMPLETE bit must not collide with the flags bits specified
120 * on the system call, they are "or"ed together for various operations. 169 * on the system call, they are "or"ed together for various operations.
@@ -147,26 +196,43 @@ typedef struct xfs_attr_leafblock {
147 */ 196 */
148#define XFS_ATTR_LEAF_NAME_ALIGN ((uint)sizeof(xfs_dablk_t)) 197#define XFS_ATTR_LEAF_NAME_ALIGN ((uint)sizeof(xfs_dablk_t))
149 198
199static inline int
200xfs_attr3_leaf_hdr_size(struct xfs_attr_leafblock *leafp)
201{
202 if (leafp->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC))
203 return sizeof(struct xfs_attr3_leaf_hdr);
204 return sizeof(struct xfs_attr_leaf_hdr);
205}
206
207static inline struct xfs_attr_leaf_entry *
208xfs_attr3_leaf_entryp(xfs_attr_leafblock_t *leafp)
209{
210 if (leafp->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC))
211 return &((struct xfs_attr3_leafblock *)leafp)->entries[0];
212 return &leafp->entries[0];
213}
214
150/* 215/*
151 * Cast typed pointers for "local" and "remote" name/value structs. 216 * Cast typed pointers for "local" and "remote" name/value structs.
152 */ 217 */
153static inline xfs_attr_leaf_name_remote_t * 218static inline char *
154xfs_attr_leaf_name_remote(xfs_attr_leafblock_t *leafp, int idx) 219xfs_attr3_leaf_name(xfs_attr_leafblock_t *leafp, int idx)
155{ 220{
156 return (xfs_attr_leaf_name_remote_t *) 221 struct xfs_attr_leaf_entry *entries = xfs_attr3_leaf_entryp(leafp);
157 &((char *)leafp)[be16_to_cpu(leafp->entries[idx].nameidx)]; 222
223 return &((char *)leafp)[be16_to_cpu(entries[idx].nameidx)];
158} 224}
159 225
160static inline xfs_attr_leaf_name_local_t * 226static inline xfs_attr_leaf_name_remote_t *
161xfs_attr_leaf_name_local(xfs_attr_leafblock_t *leafp, int idx) 227xfs_attr3_leaf_name_remote(xfs_attr_leafblock_t *leafp, int idx)
162{ 228{
163 return (xfs_attr_leaf_name_local_t *) 229 return (xfs_attr_leaf_name_remote_t *)xfs_attr3_leaf_name(leafp, idx);
164 &((char *)leafp)[be16_to_cpu(leafp->entries[idx].nameidx)];
165} 230}
166 231
167static inline char *xfs_attr_leaf_name(xfs_attr_leafblock_t *leafp, int idx) 232static inline xfs_attr_leaf_name_local_t *
233xfs_attr3_leaf_name_local(xfs_attr_leafblock_t *leafp, int idx)
168{ 234{
169 return &((char *)leafp)[be16_to_cpu(leafp->entries[idx].nameidx)]; 235 return (xfs_attr_leaf_name_local_t *)xfs_attr3_leaf_name(leafp, idx);
170} 236}
171 237
172/* 238/*
@@ -221,37 +287,37 @@ int xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes);
221/* 287/*
222 * Internal routines when attribute fork size == XFS_LBSIZE(mp). 288 * Internal routines when attribute fork size == XFS_LBSIZE(mp).
223 */ 289 */
224int xfs_attr_leaf_to_node(struct xfs_da_args *args); 290int xfs_attr3_leaf_to_node(struct xfs_da_args *args);
225int xfs_attr_leaf_to_shortform(struct xfs_buf *bp, 291int xfs_attr3_leaf_to_shortform(struct xfs_buf *bp,
226 struct xfs_da_args *args, int forkoff); 292 struct xfs_da_args *args, int forkoff);
227int xfs_attr_leaf_clearflag(struct xfs_da_args *args); 293int xfs_attr3_leaf_clearflag(struct xfs_da_args *args);
228int xfs_attr_leaf_setflag(struct xfs_da_args *args); 294int xfs_attr3_leaf_setflag(struct xfs_da_args *args);
229int xfs_attr_leaf_flipflags(xfs_da_args_t *args); 295int xfs_attr3_leaf_flipflags(struct xfs_da_args *args);
230 296
231/* 297/*
232 * Routines used for growing the Btree. 298 * Routines used for growing the Btree.
233 */ 299 */
234int xfs_attr_leaf_split(struct xfs_da_state *state, 300int xfs_attr3_leaf_split(struct xfs_da_state *state,
235 struct xfs_da_state_blk *oldblk, 301 struct xfs_da_state_blk *oldblk,
236 struct xfs_da_state_blk *newblk); 302 struct xfs_da_state_blk *newblk);
237int xfs_attr_leaf_lookup_int(struct xfs_buf *leaf, 303int xfs_attr3_leaf_lookup_int(struct xfs_buf *leaf,
238 struct xfs_da_args *args); 304 struct xfs_da_args *args);
239int xfs_attr_leaf_getvalue(struct xfs_buf *bp, struct xfs_da_args *args); 305int xfs_attr3_leaf_getvalue(struct xfs_buf *bp, struct xfs_da_args *args);
240int xfs_attr_leaf_add(struct xfs_buf *leaf_buffer, 306int xfs_attr3_leaf_add(struct xfs_buf *leaf_buffer,
241 struct xfs_da_args *args); 307 struct xfs_da_args *args);
242int xfs_attr_leaf_remove(struct xfs_buf *leaf_buffer, 308int xfs_attr3_leaf_remove(struct xfs_buf *leaf_buffer,
243 struct xfs_da_args *args); 309 struct xfs_da_args *args);
244int xfs_attr_leaf_list_int(struct xfs_buf *bp, 310int xfs_attr3_leaf_list_int(struct xfs_buf *bp,
245 struct xfs_attr_list_context *context); 311 struct xfs_attr_list_context *context);
246 312
247/* 313/*
248 * Routines used for shrinking the Btree. 314 * Routines used for shrinking the Btree.
249 */ 315 */
250int xfs_attr_leaf_toosmall(struct xfs_da_state *state, int *retval); 316int xfs_attr3_leaf_toosmall(struct xfs_da_state *state, int *retval);
251void xfs_attr_leaf_unbalance(struct xfs_da_state *state, 317void xfs_attr3_leaf_unbalance(struct xfs_da_state *state,
252 struct xfs_da_state_blk *drop_blk, 318 struct xfs_da_state_blk *drop_blk,
253 struct xfs_da_state_blk *save_blk); 319 struct xfs_da_state_blk *save_blk);
254int xfs_attr_root_inactive(struct xfs_trans **trans, struct xfs_inode *dp); 320int xfs_attr3_root_inactive(struct xfs_trans **trans, struct xfs_inode *dp);
255 321
256/* 322/*
257 * Utility routines. 323 * Utility routines.
@@ -261,10 +327,12 @@ int xfs_attr_leaf_order(struct xfs_buf *leaf1_bp,
261 struct xfs_buf *leaf2_bp); 327 struct xfs_buf *leaf2_bp);
262int xfs_attr_leaf_newentsize(int namelen, int valuelen, int blocksize, 328int xfs_attr_leaf_newentsize(int namelen, int valuelen, int blocksize,
263 int *local); 329 int *local);
264int xfs_attr_leaf_read(struct xfs_trans *tp, struct xfs_inode *dp, 330int xfs_attr3_leaf_read(struct xfs_trans *tp, struct xfs_inode *dp,
265 xfs_dablk_t bno, xfs_daddr_t mappedbno, 331 xfs_dablk_t bno, xfs_daddr_t mappedbno,
266 struct xfs_buf **bpp); 332 struct xfs_buf **bpp);
333void xfs_attr3_leaf_hdr_from_disk(struct xfs_attr3_icleaf_hdr *to,
334 struct xfs_attr_leafblock *from);
267 335
268extern const struct xfs_buf_ops xfs_attr_leaf_buf_ops; 336extern const struct xfs_buf_ops xfs_attr3_leaf_buf_ops;
269 337
270#endif /* __XFS_ATTR_LEAF_H__ */ 338#endif /* __XFS_ATTR_LEAF_H__ */
diff --git a/fs/xfs/xfs_attr_remote.c b/fs/xfs/xfs_attr_remote.c
new file mode 100644
index 000000000000..dee84466dcc9
--- /dev/null
+++ b/fs/xfs/xfs_attr_remote.c
@@ -0,0 +1,541 @@
1/*
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3 * Copyright (c) 2013 Red Hat, Inc.
4 * All Rights Reserved.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it would be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19#include "xfs.h"
20#include "xfs_fs.h"
21#include "xfs_types.h"
22#include "xfs_bit.h"
23#include "xfs_log.h"
24#include "xfs_trans.h"
25#include "xfs_sb.h"
26#include "xfs_ag.h"
27#include "xfs_mount.h"
28#include "xfs_error.h"
29#include "xfs_da_btree.h"
30#include "xfs_bmap_btree.h"
31#include "xfs_dinode.h"
32#include "xfs_inode.h"
33#include "xfs_alloc.h"
34#include "xfs_inode_item.h"
35#include "xfs_bmap.h"
36#include "xfs_attr.h"
37#include "xfs_attr_leaf.h"
38#include "xfs_attr_remote.h"
39#include "xfs_trans_space.h"
40#include "xfs_trace.h"
41#include "xfs_cksum.h"
42#include "xfs_buf_item.h"
43
44#define ATTR_RMTVALUE_MAPSIZE 1 /* # of map entries at once */
45
46/*
47 * Each contiguous block has a header, so it is not just a simple attribute
48 * length to FSB conversion.
49 */
50static int
51xfs_attr3_rmt_blocks(
52 struct xfs_mount *mp,
53 int attrlen)
54{
55 int buflen = XFS_ATTR3_RMT_BUF_SPACE(mp,
56 mp->m_sb.sb_blocksize);
57 return (attrlen + buflen - 1) / buflen;
58}
59
60static bool
61xfs_attr3_rmt_verify(
62 struct xfs_buf *bp)
63{
64 struct xfs_mount *mp = bp->b_target->bt_mount;
65 struct xfs_attr3_rmt_hdr *rmt = bp->b_addr;
66
67 if (!xfs_sb_version_hascrc(&mp->m_sb))
68 return false;
69 if (rmt->rm_magic != cpu_to_be32(XFS_ATTR3_RMT_MAGIC))
70 return false;
71 if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_uuid))
72 return false;
73 if (bp->b_bn != be64_to_cpu(rmt->rm_blkno))
74 return false;
75 if (be32_to_cpu(rmt->rm_offset) +
76 be32_to_cpu(rmt->rm_bytes) >= XATTR_SIZE_MAX)
77 return false;
78 if (rmt->rm_owner == 0)
79 return false;
80
81 return true;
82}
83
84static void
85xfs_attr3_rmt_read_verify(
86 struct xfs_buf *bp)
87{
88 struct xfs_mount *mp = bp->b_target->bt_mount;
89
90 /* no verification of non-crc buffers */
91 if (!xfs_sb_version_hascrc(&mp->m_sb))
92 return;
93
94 if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
95 XFS_ATTR3_RMT_CRC_OFF) ||
96 !xfs_attr3_rmt_verify(bp)) {
97 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
98 xfs_buf_ioerror(bp, EFSCORRUPTED);
99 }
100}
101
102static void
103xfs_attr3_rmt_write_verify(
104 struct xfs_buf *bp)
105{
106 struct xfs_mount *mp = bp->b_target->bt_mount;
107 struct xfs_buf_log_item *bip = bp->b_fspriv;
108
109 /* no verification of non-crc buffers */
110 if (!xfs_sb_version_hascrc(&mp->m_sb))
111 return;
112
113 if (!xfs_attr3_rmt_verify(bp)) {
114 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
115 xfs_buf_ioerror(bp, EFSCORRUPTED);
116 return;
117 }
118
119 if (bip) {
120 struct xfs_attr3_rmt_hdr *rmt = bp->b_addr;
121 rmt->rm_lsn = cpu_to_be64(bip->bli_item.li_lsn);
122 }
123 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
124 XFS_ATTR3_RMT_CRC_OFF);
125}
126
127const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = {
128 .verify_read = xfs_attr3_rmt_read_verify,
129 .verify_write = xfs_attr3_rmt_write_verify,
130};
131
132static int
133xfs_attr3_rmt_hdr_set(
134 struct xfs_mount *mp,
135 xfs_ino_t ino,
136 uint32_t offset,
137 uint32_t size,
138 struct xfs_buf *bp)
139{
140 struct xfs_attr3_rmt_hdr *rmt = bp->b_addr;
141
142 if (!xfs_sb_version_hascrc(&mp->m_sb))
143 return 0;
144
145 rmt->rm_magic = cpu_to_be32(XFS_ATTR3_RMT_MAGIC);
146 rmt->rm_offset = cpu_to_be32(offset);
147 rmt->rm_bytes = cpu_to_be32(size);
148 uuid_copy(&rmt->rm_uuid, &mp->m_sb.sb_uuid);
149 rmt->rm_owner = cpu_to_be64(ino);
150 rmt->rm_blkno = cpu_to_be64(bp->b_bn);
151 bp->b_ops = &xfs_attr3_rmt_buf_ops;
152
153 return sizeof(struct xfs_attr3_rmt_hdr);
154}
155
156/*
157 * Checking of the remote attribute header is split into two parts. the verifier
158 * does CRC, location and bounds checking, the unpacking function checks the
159 * attribute parameters and owner.
160 */
161static bool
162xfs_attr3_rmt_hdr_ok(
163 struct xfs_mount *mp,
164 xfs_ino_t ino,
165 uint32_t offset,
166 uint32_t size,
167 struct xfs_buf *bp)
168{
169 struct xfs_attr3_rmt_hdr *rmt = bp->b_addr;
170
171 if (offset != be32_to_cpu(rmt->rm_offset))
172 return false;
173 if (size != be32_to_cpu(rmt->rm_bytes))
174 return false;
175 if (ino != be64_to_cpu(rmt->rm_owner))
176 return false;
177
178 /* ok */
179 return true;
180}
181
182/*
183 * Read the value associated with an attribute from the out-of-line buffer
184 * that we stored it in.
185 */
186int
187xfs_attr_rmtval_get(
188 struct xfs_da_args *args)
189{
190 struct xfs_bmbt_irec map[ATTR_RMTVALUE_MAPSIZE];
191 struct xfs_mount *mp = args->dp->i_mount;
192 struct xfs_buf *bp;
193 xfs_daddr_t dblkno;
194 xfs_dablk_t lblkno = args->rmtblkno;
195 void *dst = args->value;
196 int valuelen = args->valuelen;
197 int nmap;
198 int error;
199 int blkcnt;
200 int i;
201 int offset = 0;
202
203 trace_xfs_attr_rmtval_get(args);
204
205 ASSERT(!(args->flags & ATTR_KERNOVAL));
206
207 while (valuelen > 0) {
208 nmap = ATTR_RMTVALUE_MAPSIZE;
209 error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
210 args->rmtblkcnt, map, &nmap,
211 XFS_BMAPI_ATTRFORK);
212 if (error)
213 return error;
214 ASSERT(nmap >= 1);
215
216 for (i = 0; (i < nmap) && (valuelen > 0); i++) {
217 int byte_cnt;
218 char *src;
219
220 ASSERT((map[i].br_startblock != DELAYSTARTBLOCK) &&
221 (map[i].br_startblock != HOLESTARTBLOCK));
222 dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
223 blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
224 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
225 dblkno, blkcnt, 0, &bp,
226 &xfs_attr3_rmt_buf_ops);
227 if (error)
228 return error;
229
230 byte_cnt = min_t(int, valuelen, BBTOB(bp->b_length));
231 byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, byte_cnt);
232
233 src = bp->b_addr;
234 if (xfs_sb_version_hascrc(&mp->m_sb)) {
235 if (!xfs_attr3_rmt_hdr_ok(mp, args->dp->i_ino,
236 offset, byte_cnt, bp)) {
237 xfs_alert(mp,
238"remote attribute header does not match required off/len/owner (0x%x/Ox%x,0x%llx)",
239 offset, byte_cnt, args->dp->i_ino);
240 xfs_buf_relse(bp);
241 return EFSCORRUPTED;
242
243 }
244
245 src += sizeof(struct xfs_attr3_rmt_hdr);
246 }
247
248 memcpy(dst, src, byte_cnt);
249 xfs_buf_relse(bp);
250
251 offset += byte_cnt;
252 dst += byte_cnt;
253 valuelen -= byte_cnt;
254
255 lblkno += map[i].br_blockcount;
256 }
257 }
258 ASSERT(valuelen == 0);
259 return 0;
260}
261
262/*
263 * Write the value associated with an attribute into the out-of-line buffer
264 * that we have defined for it.
265 */
266int
267xfs_attr_rmtval_set(
268 struct xfs_da_args *args)
269{
270 struct xfs_inode *dp = args->dp;
271 struct xfs_mount *mp = dp->i_mount;
272 struct xfs_bmbt_irec map;
273 struct xfs_buf *bp;
274 xfs_daddr_t dblkno;
275 xfs_dablk_t lblkno;
276 xfs_fileoff_t lfileoff = 0;
277 void *src = args->value;
278 int blkcnt;
279 int valuelen;
280 int nmap;
281 int error;
282 int hdrcnt = 0;
283 bool crcs = xfs_sb_version_hascrc(&mp->m_sb);
284 int offset = 0;
285
286 trace_xfs_attr_rmtval_set(args);
287
288 /*
289 * Find a "hole" in the attribute address space large enough for
290 * us to drop the new attribute's value into. Because CRC enable
291 * attributes have headers, we can't just do a straight byte to FSB
292 * conversion. We calculate the worst case block count in this case
293 * and we may not need that many, so we have to handle this when
294 * allocating the blocks below.
295 */
296 if (!crcs)
297 blkcnt = XFS_B_TO_FSB(mp, args->valuelen);
298 else
299 blkcnt = xfs_attr3_rmt_blocks(mp, args->valuelen);
300
301 error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff,
302 XFS_ATTR_FORK);
303 if (error)
304 return error;
305
306 /* Start with the attribute data. We'll allocate the rest afterwards. */
307 if (crcs)
308 blkcnt = XFS_B_TO_FSB(mp, args->valuelen);
309
310 args->rmtblkno = lblkno = (xfs_dablk_t)lfileoff;
311 args->rmtblkcnt = blkcnt;
312
313 /*
314 * Roll through the "value", allocating blocks on disk as required.
315 */
316 while (blkcnt > 0) {
317 int committed;
318
319 /*
320 * Allocate a single extent, up to the size of the value.
321 */
322 xfs_bmap_init(args->flist, args->firstblock);
323 nmap = 1;
324 error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno,
325 blkcnt,
326 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
327 args->firstblock, args->total, &map, &nmap,
328 args->flist);
329 if (!error) {
330 error = xfs_bmap_finish(&args->trans, args->flist,
331 &committed);
332 }
333 if (error) {
334 ASSERT(committed);
335 args->trans = NULL;
336 xfs_bmap_cancel(args->flist);
337 return(error);
338 }
339
340 /*
341 * bmap_finish() may have committed the last trans and started
342 * a new one. We need the inode to be in all transactions.
343 */
344 if (committed)
345 xfs_trans_ijoin(args->trans, dp, 0);
346
347 ASSERT(nmap == 1);
348 ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
349 (map.br_startblock != HOLESTARTBLOCK));
350 lblkno += map.br_blockcount;
351 blkcnt -= map.br_blockcount;
352 hdrcnt++;
353
354 /*
355 * If we have enough blocks for the attribute data, calculate
356 * how many extra blocks we need for headers. We might run
357 * through this multiple times in the case that the additional
358 * headers in the blocks needed for the data fragments spills
359 * into requiring more blocks. e.g. for 512 byte blocks, we'll
360 * spill for another block every 9 headers we require in this
361 * loop.
362 */
363 if (crcs && blkcnt == 0) {
364 int total_len;
365
366 total_len = args->valuelen +
367 hdrcnt * sizeof(struct xfs_attr3_rmt_hdr);
368 blkcnt = XFS_B_TO_FSB(mp, total_len);
369 blkcnt -= args->rmtblkcnt;
370 args->rmtblkcnt += blkcnt;
371 }
372
373 /*
374 * Start the next trans in the chain.
375 */
376 error = xfs_trans_roll(&args->trans, dp);
377 if (error)
378 return (error);
379 }
380
381 /*
382 * Roll through the "value", copying the attribute value to the
383 * already-allocated blocks. Blocks are written synchronously
384 * so that we can know they are all on disk before we turn off
385 * the INCOMPLETE flag.
386 */
387 lblkno = args->rmtblkno;
388 valuelen = args->valuelen;
389 while (valuelen > 0) {
390 int byte_cnt;
391 char *buf;
392
393 /*
394 * Try to remember where we decided to put the value.
395 */
396 xfs_bmap_init(args->flist, args->firstblock);
397 nmap = 1;
398 error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno,
399 args->rmtblkcnt, &map, &nmap,
400 XFS_BMAPI_ATTRFORK);
401 if (error)
402 return(error);
403 ASSERT(nmap == 1);
404 ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
405 (map.br_startblock != HOLESTARTBLOCK));
406
407 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
408 blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
409
410 bp = xfs_buf_get(mp->m_ddev_targp, dblkno, blkcnt, 0);
411 if (!bp)
412 return ENOMEM;
413 bp->b_ops = &xfs_attr3_rmt_buf_ops;
414
415 byte_cnt = BBTOB(bp->b_length);
416 byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, byte_cnt);
417 if (valuelen < byte_cnt)
418 byte_cnt = valuelen;
419
420 buf = bp->b_addr;
421 buf += xfs_attr3_rmt_hdr_set(mp, dp->i_ino, offset,
422 byte_cnt, bp);
423 memcpy(buf, src, byte_cnt);
424
425 if (byte_cnt < BBTOB(bp->b_length))
426 xfs_buf_zero(bp, byte_cnt,
427 BBTOB(bp->b_length) - byte_cnt);
428
429 error = xfs_bwrite(bp); /* GROT: NOTE: synchronous write */
430 xfs_buf_relse(bp);
431 if (error)
432 return error;
433
434 src += byte_cnt;
435 valuelen -= byte_cnt;
436 offset += byte_cnt;
437 hdrcnt--;
438
439 lblkno += map.br_blockcount;
440 }
441 ASSERT(valuelen == 0);
442 ASSERT(hdrcnt == 0);
443 return 0;
444}
445
446/*
447 * Remove the value associated with an attribute by deleting the
448 * out-of-line buffer that it is stored on.
449 */
450int
451xfs_attr_rmtval_remove(xfs_da_args_t *args)
452{
453 xfs_mount_t *mp;
454 xfs_bmbt_irec_t map;
455 xfs_buf_t *bp;
456 xfs_daddr_t dblkno;
457 xfs_dablk_t lblkno;
458 int valuelen, blkcnt, nmap, error, done, committed;
459
460 trace_xfs_attr_rmtval_remove(args);
461
462 mp = args->dp->i_mount;
463
464 /*
465 * Roll through the "value", invalidating the attribute value's
466 * blocks.
467 */
468 lblkno = args->rmtblkno;
469 valuelen = args->rmtblkcnt;
470 while (valuelen > 0) {
471 /*
472 * Try to remember where we decided to put the value.
473 */
474 nmap = 1;
475 error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
476 args->rmtblkcnt, &map, &nmap,
477 XFS_BMAPI_ATTRFORK);
478 if (error)
479 return(error);
480 ASSERT(nmap == 1);
481 ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
482 (map.br_startblock != HOLESTARTBLOCK));
483
484 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
485 blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
486
487 /*
488 * If the "remote" value is in the cache, remove it.
489 */
490 bp = xfs_incore(mp->m_ddev_targp, dblkno, blkcnt, XBF_TRYLOCK);
491 if (bp) {
492 xfs_buf_stale(bp);
493 xfs_buf_relse(bp);
494 bp = NULL;
495 }
496
497 valuelen -= map.br_blockcount;
498
499 lblkno += map.br_blockcount;
500 }
501
502 /*
503 * Keep de-allocating extents until the remote-value region is gone.
504 */
505 lblkno = args->rmtblkno;
506 blkcnt = args->rmtblkcnt;
507 done = 0;
508 while (!done) {
509 xfs_bmap_init(args->flist, args->firstblock);
510 error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
511 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
512 1, args->firstblock, args->flist,
513 &done);
514 if (!error) {
515 error = xfs_bmap_finish(&args->trans, args->flist,
516 &committed);
517 }
518 if (error) {
519 ASSERT(committed);
520 args->trans = NULL;
521 xfs_bmap_cancel(args->flist);
522 return error;
523 }
524
525 /*
526 * bmap_finish() may have committed the last trans and started
527 * a new one. We need the inode to be in all transactions.
528 */
529 if (committed)
530 xfs_trans_ijoin(args->trans, args->dp, 0);
531
532 /*
533 * Close out trans and start the next one in the chain.
534 */
535 error = xfs_trans_roll(&args->trans, args->dp);
536 if (error)
537 return (error);
538 }
539 return(0);
540}
541
diff --git a/fs/xfs/xfs_attr_remote.h b/fs/xfs/xfs_attr_remote.h
new file mode 100644
index 000000000000..c7cca60a062a
--- /dev/null
+++ b/fs/xfs/xfs_attr_remote.h
@@ -0,0 +1,46 @@
1/*
2 * Copyright (c) 2013 Red Hat, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_ATTR_REMOTE_H__
19#define __XFS_ATTR_REMOTE_H__
20
21#define XFS_ATTR3_RMT_MAGIC 0x5841524d /* XARM */
22
23struct xfs_attr3_rmt_hdr {
24 __be32 rm_magic;
25 __be32 rm_offset;
26 __be32 rm_bytes;
27 __be32 rm_crc;
28 uuid_t rm_uuid;
29 __be64 rm_owner;
30 __be64 rm_blkno;
31 __be64 rm_lsn;
32};
33
34#define XFS_ATTR3_RMT_CRC_OFF offsetof(struct xfs_attr3_rmt_hdr, rm_crc)
35
36#define XFS_ATTR3_RMT_BUF_SPACE(mp, bufsize) \
37 ((bufsize) - (xfs_sb_version_hascrc(&(mp)->m_sb) ? \
38 sizeof(struct xfs_attr3_rmt_hdr) : 0))
39
40extern const struct xfs_buf_ops xfs_attr3_rmt_buf_ops;
41
42int xfs_attr_rmtval_get(struct xfs_da_args *args);
43int xfs_attr_rmtval_set(struct xfs_da_args *args);
44int xfs_attr_rmtval_remove(struct xfs_da_args *args);
45
46#endif /* __XFS_ATTR_REMOTE_H__ */
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index b44af9211bd9..89042848f9ec 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -25,6 +25,7 @@
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h" 27#include "xfs_dir2.h"
28#include "xfs_mount.h"
28#include "xfs_da_btree.h" 29#include "xfs_da_btree.h"
29#include "xfs_bmap_btree.h" 30#include "xfs_bmap_btree.h"
30#include "xfs_alloc_btree.h" 31#include "xfs_alloc_btree.h"
@@ -47,180 +48,78 @@
47#include "xfs_filestream.h" 48#include "xfs_filestream.h"
48#include "xfs_vnodeops.h" 49#include "xfs_vnodeops.h"
49#include "xfs_trace.h" 50#include "xfs_trace.h"
51#include "xfs_symlink.h"
50 52
51 53
52kmem_zone_t *xfs_bmap_free_item_zone; 54kmem_zone_t *xfs_bmap_free_item_zone;
53 55
54/* 56/*
55 * Prototypes for internal bmap routines. 57 * Miscellaneous helper functions
56 */
57
58#ifdef DEBUG
59STATIC void
60xfs_bmap_check_leaf_extents(
61 struct xfs_btree_cur *cur,
62 struct xfs_inode *ip,
63 int whichfork);
64#else
65#define xfs_bmap_check_leaf_extents(cur, ip, whichfork) do { } while (0)
66#endif
67
68
69/*
70 * Called from xfs_bmap_add_attrfork to handle extents format files.
71 */
72STATIC int /* error */
73xfs_bmap_add_attrfork_extents(
74 xfs_trans_t *tp, /* transaction pointer */
75 xfs_inode_t *ip, /* incore inode pointer */
76 xfs_fsblock_t *firstblock, /* first block allocated */
77 xfs_bmap_free_t *flist, /* blocks to free at commit */
78 int *flags); /* inode logging flags */
79
80/*
81 * Called from xfs_bmap_add_attrfork to handle local format files.
82 */ 58 */
83STATIC int /* error */
84xfs_bmap_add_attrfork_local(
85 xfs_trans_t *tp, /* transaction pointer */
86 xfs_inode_t *ip, /* incore inode pointer */
87 xfs_fsblock_t *firstblock, /* first block allocated */
88 xfs_bmap_free_t *flist, /* blocks to free at commit */
89 int *flags); /* inode logging flags */
90 59
91/* 60/*
92 * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file. 61 * Compute and fill in the value of the maximum depth of a bmap btree
93 * It figures out where to ask the underlying allocator to put the new extent. 62 * in this filesystem. Done once, during mount.
94 */
95STATIC int /* error */
96xfs_bmap_alloc(
97 xfs_bmalloca_t *ap); /* bmap alloc argument struct */
98
99/*
100 * Transform a btree format file with only one leaf node, where the
101 * extents list will fit in the inode, into an extents format file.
102 * Since the file extents are already in-core, all we have to do is
103 * give up the space for the btree root and pitch the leaf block.
104 */
105STATIC int /* error */
106xfs_bmap_btree_to_extents(
107 xfs_trans_t *tp, /* transaction pointer */
108 xfs_inode_t *ip, /* incore inode pointer */
109 xfs_btree_cur_t *cur, /* btree cursor */
110 int *logflagsp, /* inode logging flags */
111 int whichfork); /* data or attr fork */
112
113/*
114 * Remove the entry "free" from the free item list. Prev points to the
115 * previous entry, unless "free" is the head of the list.
116 */
117STATIC void
118xfs_bmap_del_free(
119 xfs_bmap_free_t *flist, /* free item list header */
120 xfs_bmap_free_item_t *prev, /* previous item on list, if any */
121 xfs_bmap_free_item_t *free); /* list item to be freed */
122
123/*
124 * Convert an extents-format file into a btree-format file.
125 * The new file will have a root block (in the inode) and a single child block.
126 */
127STATIC int /* error */
128xfs_bmap_extents_to_btree(
129 xfs_trans_t *tp, /* transaction pointer */
130 xfs_inode_t *ip, /* incore inode pointer */
131 xfs_fsblock_t *firstblock, /* first-block-allocated */
132 xfs_bmap_free_t *flist, /* blocks freed in xaction */
133 xfs_btree_cur_t **curp, /* cursor returned to caller */
134 int wasdel, /* converting a delayed alloc */
135 int *logflagsp, /* inode logging flags */
136 int whichfork); /* data or attr fork */
137
138/*
139 * Convert a local file to an extents file.
140 * This code is sort of bogus, since the file data needs to get
141 * logged so it won't be lost. The bmap-level manipulations are ok, though.
142 */
143STATIC int /* error */
144xfs_bmap_local_to_extents(
145 xfs_trans_t *tp, /* transaction pointer */
146 xfs_inode_t *ip, /* incore inode pointer */
147 xfs_fsblock_t *firstblock, /* first block allocated in xaction */
148 xfs_extlen_t total, /* total blocks needed by transaction */
149 int *logflagsp, /* inode logging flags */
150 int whichfork, /* data or attr fork */
151 void (*init_fn)(struct xfs_buf *bp,
152 struct xfs_inode *ip,
153 struct xfs_ifork *ifp));
154
155/*
156 * Search the extents list for the inode, for the extent containing bno.
157 * If bno lies in a hole, point to the next entry. If bno lies past eof,
158 * *eofp will be set, and *prevp will contain the last entry (null if none).
159 * Else, *lastxp will be set to the index of the found
160 * entry; *gotp will contain the entry.
161 */
162STATIC xfs_bmbt_rec_host_t * /* pointer to found extent entry */
163xfs_bmap_search_extents(
164 xfs_inode_t *ip, /* incore inode pointer */
165 xfs_fileoff_t bno, /* block number searched for */
166 int whichfork, /* data or attr fork */
167 int *eofp, /* out: end of file found */
168 xfs_extnum_t *lastxp, /* out: last extent index */
169 xfs_bmbt_irec_t *gotp, /* out: extent entry found */
170 xfs_bmbt_irec_t *prevp); /* out: previous extent entry found */
171
172/*
173 * Compute the worst-case number of indirect blocks that will be used
174 * for ip's delayed extent of length "len".
175 */
176STATIC xfs_filblks_t
177xfs_bmap_worst_indlen(
178 xfs_inode_t *ip, /* incore inode pointer */
179 xfs_filblks_t len); /* delayed extent length */
180
181#ifdef DEBUG
182/*
183 * Perform various validation checks on the values being returned
184 * from xfs_bmapi().
185 */ 63 */
186STATIC void 64void
187xfs_bmap_validate_ret( 65xfs_bmap_compute_maxlevels(
188 xfs_fileoff_t bno, 66 xfs_mount_t *mp, /* file system mount structure */
189 xfs_filblks_t len, 67 int whichfork) /* data or attr fork */
190 int flags, 68{
191 xfs_bmbt_irec_t *mval, 69 int level; /* btree level */
192 int nmap, 70 uint maxblocks; /* max blocks at this level */
193 int ret_nmap); 71 uint maxleafents; /* max leaf entries possible */
194#else 72 int maxrootrecs; /* max records in root block */
195#define xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap) 73 int minleafrecs; /* min records in leaf block */
196#endif /* DEBUG */ 74 int minnoderecs; /* min records in node block */
197 75 int sz; /* root block size */
198STATIC int
199xfs_bmap_count_tree(
200 xfs_mount_t *mp,
201 xfs_trans_t *tp,
202 xfs_ifork_t *ifp,
203 xfs_fsblock_t blockno,
204 int levelin,
205 int *count);
206
207STATIC void
208xfs_bmap_count_leaves(
209 xfs_ifork_t *ifp,
210 xfs_extnum_t idx,
211 int numrecs,
212 int *count);
213 76
214STATIC void 77 /*
215xfs_bmap_disk_count_leaves( 78 * The maximum number of extents in a file, hence the maximum
216 struct xfs_mount *mp, 79 * number of leaf entries, is controlled by the type of di_nextents
217 struct xfs_btree_block *block, 80 * (a signed 32-bit number, xfs_extnum_t), or by di_anextents
218 int numrecs, 81 * (a signed 16-bit number, xfs_aextnum_t).
219 int *count); 82 *
83 * Note that we can no longer assume that if we are in ATTR1 that
84 * the fork offset of all the inodes will be
85 * (xfs_default_attroffset(ip) >> 3) because we could have mounted
86 * with ATTR2 and then mounted back with ATTR1, keeping the
87 * di_forkoff's fixed but probably at various positions. Therefore,
88 * for both ATTR1 and ATTR2 we have to assume the worst case scenario
89 * of a minimum size available.
90 */
91 if (whichfork == XFS_DATA_FORK) {
92 maxleafents = MAXEXTNUM;
93 sz = XFS_BMDR_SPACE_CALC(MINDBTPTRS);
94 } else {
95 maxleafents = MAXAEXTNUM;
96 sz = XFS_BMDR_SPACE_CALC(MINABTPTRS);
97 }
98 maxrootrecs = xfs_bmdr_maxrecs(mp, sz, 0);
99 minleafrecs = mp->m_bmap_dmnr[0];
100 minnoderecs = mp->m_bmap_dmnr[1];
101 maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
102 for (level = 1; maxblocks > 1; level++) {
103 if (maxblocks <= maxrootrecs)
104 maxblocks = 1;
105 else
106 maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
107 }
108 mp->m_bm_maxlevels[whichfork] = level;
109}
220 110
221/* 111/*
222 * Bmap internal routines. 112 * Convert the given file system block to a disk block. We have to treat it
113 * differently based on whether the file is a real time file or not, because the
114 * bmap code does.
223 */ 115 */
116xfs_daddr_t
117xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb)
118{
119 return (XFS_IS_REALTIME_INODE(ip) ? \
120 (xfs_daddr_t)XFS_FSB_TO_BB((ip)->i_mount, (fsb)) : \
121 XFS_FSB_TO_DADDR((ip)->i_mount, (fsb)));
122}
224 123
225STATIC int /* error */ 124STATIC int /* error */
226xfs_bmbt_lookup_eq( 125xfs_bmbt_lookup_eq(
@@ -290,6 +189,1070 @@ xfs_bmbt_update(
290} 189}
291 190
292/* 191/*
192 * Compute the worst-case number of indirect blocks that will be used
193 * for ip's delayed extent of length "len".
194 */
195STATIC xfs_filblks_t
196xfs_bmap_worst_indlen(
197 xfs_inode_t *ip, /* incore inode pointer */
198 xfs_filblks_t len) /* delayed extent length */
199{
200 int level; /* btree level number */
201 int maxrecs; /* maximum record count at this level */
202 xfs_mount_t *mp; /* mount structure */
203 xfs_filblks_t rval; /* return value */
204
205 mp = ip->i_mount;
206 maxrecs = mp->m_bmap_dmxr[0];
207 for (level = 0, rval = 0;
208 level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK);
209 level++) {
210 len += maxrecs - 1;
211 do_div(len, maxrecs);
212 rval += len;
213 if (len == 1)
214 return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
215 level - 1;
216 if (level == 0)
217 maxrecs = mp->m_bmap_dmxr[1];
218 }
219 return rval;
220}
221
222/*
223 * Calculate the default attribute fork offset for newly created inodes.
224 */
225uint
226xfs_default_attroffset(
227 struct xfs_inode *ip)
228{
229 struct xfs_mount *mp = ip->i_mount;
230 uint offset;
231
232 if (mp->m_sb.sb_inodesize == 256) {
233 offset = XFS_LITINO(mp, ip->i_d.di_version) -
234 XFS_BMDR_SPACE_CALC(MINABTPTRS);
235 } else {
236 offset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS);
237 }
238
239 ASSERT(offset < XFS_LITINO(mp, ip->i_d.di_version));
240 return offset;
241}
242
243/*
244 * Helper routine to reset inode di_forkoff field when switching
245 * attribute fork from local to extent format - we reset it where
246 * possible to make space available for inline data fork extents.
247 */
248STATIC void
249xfs_bmap_forkoff_reset(
250 xfs_mount_t *mp,
251 xfs_inode_t *ip,
252 int whichfork)
253{
254 if (whichfork == XFS_ATTR_FORK &&
255 ip->i_d.di_format != XFS_DINODE_FMT_DEV &&
256 ip->i_d.di_format != XFS_DINODE_FMT_UUID &&
257 ip->i_d.di_format != XFS_DINODE_FMT_BTREE) {
258 uint dfl_forkoff = xfs_default_attroffset(ip) >> 3;
259
260 if (dfl_forkoff > ip->i_d.di_forkoff)
261 ip->i_d.di_forkoff = dfl_forkoff;
262 }
263}
264
265/*
266 * Extent tree block counting routines.
267 */
268
269/*
270 * Count leaf blocks given a range of extent records.
271 */
272STATIC void
273xfs_bmap_count_leaves(
274 xfs_ifork_t *ifp,
275 xfs_extnum_t idx,
276 int numrecs,
277 int *count)
278{
279 int b;
280
281 for (b = 0; b < numrecs; b++) {
282 xfs_bmbt_rec_host_t *frp = xfs_iext_get_ext(ifp, idx + b);
283 *count += xfs_bmbt_get_blockcount(frp);
284 }
285}
286
287/*
288 * Count leaf blocks given a range of extent records originally
289 * in btree format.
290 */
291STATIC void
292xfs_bmap_disk_count_leaves(
293 struct xfs_mount *mp,
294 struct xfs_btree_block *block,
295 int numrecs,
296 int *count)
297{
298 int b;
299 xfs_bmbt_rec_t *frp;
300
301 for (b = 1; b <= numrecs; b++) {
302 frp = XFS_BMBT_REC_ADDR(mp, block, b);
303 *count += xfs_bmbt_disk_get_blockcount(frp);
304 }
305}
306
307/*
308 * Recursively walks each level of a btree
309 * to count total fsblocks is use.
310 */
311STATIC int /* error */
312xfs_bmap_count_tree(
313 xfs_mount_t *mp, /* file system mount point */
314 xfs_trans_t *tp, /* transaction pointer */
315 xfs_ifork_t *ifp, /* inode fork pointer */
316 xfs_fsblock_t blockno, /* file system block number */
317 int levelin, /* level in btree */
318 int *count) /* Count of blocks */
319{
320 int error;
321 xfs_buf_t *bp, *nbp;
322 int level = levelin;
323 __be64 *pp;
324 xfs_fsblock_t bno = blockno;
325 xfs_fsblock_t nextbno;
326 struct xfs_btree_block *block, *nextblock;
327 int numrecs;
328
329 error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF,
330 &xfs_bmbt_buf_ops);
331 if (error)
332 return error;
333 *count += 1;
334 block = XFS_BUF_TO_BLOCK(bp);
335
336 if (--level) {
337 /* Not at node above leaves, count this level of nodes */
338 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
339 while (nextbno != NULLFSBLOCK) {
340 error = xfs_btree_read_bufl(mp, tp, nextbno, 0, &nbp,
341 XFS_BMAP_BTREE_REF,
342 &xfs_bmbt_buf_ops);
343 if (error)
344 return error;
345 *count += 1;
346 nextblock = XFS_BUF_TO_BLOCK(nbp);
347 nextbno = be64_to_cpu(nextblock->bb_u.l.bb_rightsib);
348 xfs_trans_brelse(tp, nbp);
349 }
350
351 /* Dive to the next level */
352 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
353 bno = be64_to_cpu(*pp);
354 if (unlikely((error =
355 xfs_bmap_count_tree(mp, tp, ifp, bno, level, count)) < 0)) {
356 xfs_trans_brelse(tp, bp);
357 XFS_ERROR_REPORT("xfs_bmap_count_tree(1)",
358 XFS_ERRLEVEL_LOW, mp);
359 return XFS_ERROR(EFSCORRUPTED);
360 }
361 xfs_trans_brelse(tp, bp);
362 } else {
363 /* count all level 1 nodes and their leaves */
364 for (;;) {
365 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
366 numrecs = be16_to_cpu(block->bb_numrecs);
367 xfs_bmap_disk_count_leaves(mp, block, numrecs, count);
368 xfs_trans_brelse(tp, bp);
369 if (nextbno == NULLFSBLOCK)
370 break;
371 bno = nextbno;
372 error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
373 XFS_BMAP_BTREE_REF,
374 &xfs_bmbt_buf_ops);
375 if (error)
376 return error;
377 *count += 1;
378 block = XFS_BUF_TO_BLOCK(bp);
379 }
380 }
381 return 0;
382}
383
384/*
385 * Count fsblocks of the given fork.
386 */
387int /* error */
388xfs_bmap_count_blocks(
389 xfs_trans_t *tp, /* transaction pointer */
390 xfs_inode_t *ip, /* incore inode */
391 int whichfork, /* data or attr fork */
392 int *count) /* out: count of blocks */
393{
394 struct xfs_btree_block *block; /* current btree block */
395 xfs_fsblock_t bno; /* block # of "block" */
396 xfs_ifork_t *ifp; /* fork structure */
397 int level; /* btree level, for checking */
398 xfs_mount_t *mp; /* file system mount structure */
399 __be64 *pp; /* pointer to block address */
400
401 bno = NULLFSBLOCK;
402 mp = ip->i_mount;
403 ifp = XFS_IFORK_PTR(ip, whichfork);
404 if ( XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ) {
405 xfs_bmap_count_leaves(ifp, 0,
406 ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t),
407 count);
408 return 0;
409 }
410
411 /*
412 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
413 */
414 block = ifp->if_broot;
415 level = be16_to_cpu(block->bb_level);
416 ASSERT(level > 0);
417 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
418 bno = be64_to_cpu(*pp);
419 ASSERT(bno != NULLDFSBNO);
420 ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
421 ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
422
423 if (unlikely(xfs_bmap_count_tree(mp, tp, ifp, bno, level, count) < 0)) {
424 XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)", XFS_ERRLEVEL_LOW,
425 mp);
426 return XFS_ERROR(EFSCORRUPTED);
427 }
428
429 return 0;
430}
431
432/*
433 * Debug/sanity checking code
434 */
435
436STATIC int
437xfs_bmap_sanity_check(
438 struct xfs_mount *mp,
439 struct xfs_buf *bp,
440 int level)
441{
442 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
443
444 if (block->bb_magic != cpu_to_be32(XFS_BMAP_CRC_MAGIC) &&
445 block->bb_magic != cpu_to_be32(XFS_BMAP_MAGIC))
446 return 0;
447
448 if (be16_to_cpu(block->bb_level) != level ||
449 be16_to_cpu(block->bb_numrecs) == 0 ||
450 be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0])
451 return 0;
452
453 return 1;
454}
455
456#ifdef DEBUG
457STATIC struct xfs_buf *
458xfs_bmap_get_bp(
459 struct xfs_btree_cur *cur,
460 xfs_fsblock_t bno)
461{
462 struct xfs_log_item_desc *lidp;
463 int i;
464
465 if (!cur)
466 return NULL;
467
468 for (i = 0; i < XFS_BTREE_MAXLEVELS; i++) {
469 if (!cur->bc_bufs[i])
470 break;
471 if (XFS_BUF_ADDR(cur->bc_bufs[i]) == bno)
472 return cur->bc_bufs[i];
473 }
474
475 /* Chase down all the log items to see if the bp is there */
476 list_for_each_entry(lidp, &cur->bc_tp->t_items, lid_trans) {
477 struct xfs_buf_log_item *bip;
478 bip = (struct xfs_buf_log_item *)lidp->lid_item;
479 if (bip->bli_item.li_type == XFS_LI_BUF &&
480 XFS_BUF_ADDR(bip->bli_buf) == bno)
481 return bip->bli_buf;
482 }
483
484 return NULL;
485}
486
487STATIC void
488xfs_check_block(
489 struct xfs_btree_block *block,
490 xfs_mount_t *mp,
491 int root,
492 short sz)
493{
494 int i, j, dmxr;
495 __be64 *pp, *thispa; /* pointer to block address */
496 xfs_bmbt_key_t *prevp, *keyp;
497
498 ASSERT(be16_to_cpu(block->bb_level) > 0);
499
500 prevp = NULL;
501 for( i = 1; i <= xfs_btree_get_numrecs(block); i++) {
502 dmxr = mp->m_bmap_dmxr[0];
503 keyp = XFS_BMBT_KEY_ADDR(mp, block, i);
504
505 if (prevp) {
506 ASSERT(be64_to_cpu(prevp->br_startoff) <
507 be64_to_cpu(keyp->br_startoff));
508 }
509 prevp = keyp;
510
511 /*
512 * Compare the block numbers to see if there are dups.
513 */
514 if (root)
515 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz);
516 else
517 pp = XFS_BMBT_PTR_ADDR(mp, block, i, dmxr);
518
519 for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) {
520 if (root)
521 thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz);
522 else
523 thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr);
524 if (*thispa == *pp) {
525 xfs_warn(mp, "%s: thispa(%d) == pp(%d) %Ld",
526 __func__, j, i,
527 (unsigned long long)be64_to_cpu(*thispa));
528 panic("%s: ptrs are equal in node\n",
529 __func__);
530 }
531 }
532 }
533}
534
535/*
536 * Check that the extents for the inode ip are in the right order in all
537 * btree leaves.
538 */
539
540STATIC void
541xfs_bmap_check_leaf_extents(
542 xfs_btree_cur_t *cur, /* btree cursor or null */
543 xfs_inode_t *ip, /* incore inode pointer */
544 int whichfork) /* data or attr fork */
545{
546 struct xfs_btree_block *block; /* current btree block */
547 xfs_fsblock_t bno; /* block # of "block" */
548 xfs_buf_t *bp; /* buffer for "block" */
549 int error; /* error return value */
550 xfs_extnum_t i=0, j; /* index into the extents list */
551 xfs_ifork_t *ifp; /* fork structure */
552 int level; /* btree level, for checking */
553 xfs_mount_t *mp; /* file system mount structure */
554 __be64 *pp; /* pointer to block address */
555 xfs_bmbt_rec_t *ep; /* pointer to current extent */
556 xfs_bmbt_rec_t last = {0, 0}; /* last extent in prev block */
557 xfs_bmbt_rec_t *nextp; /* pointer to next extent */
558 int bp_release = 0;
559
560 if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) {
561 return;
562 }
563
564 bno = NULLFSBLOCK;
565 mp = ip->i_mount;
566 ifp = XFS_IFORK_PTR(ip, whichfork);
567 block = ifp->if_broot;
568 /*
569 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
570 */
571 level = be16_to_cpu(block->bb_level);
572 ASSERT(level > 0);
573 xfs_check_block(block, mp, 1, ifp->if_broot_bytes);
574 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
575 bno = be64_to_cpu(*pp);
576
577 ASSERT(bno != NULLDFSBNO);
578 ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
579 ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
580
581 /*
582 * Go down the tree until leaf level is reached, following the first
583 * pointer (leftmost) at each level.
584 */
585 while (level-- > 0) {
586 /* See if buf is in cur first */
587 bp_release = 0;
588 bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
589 if (!bp) {
590 bp_release = 1;
591 error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
592 XFS_BMAP_BTREE_REF,
593 &xfs_bmbt_buf_ops);
594 if (error)
595 goto error_norelse;
596 }
597 block = XFS_BUF_TO_BLOCK(bp);
598 XFS_WANT_CORRUPTED_GOTO(
599 xfs_bmap_sanity_check(mp, bp, level),
600 error0);
601 if (level == 0)
602 break;
603
604 /*
605 * Check this block for basic sanity (increasing keys and
606 * no duplicate blocks).
607 */
608
609 xfs_check_block(block, mp, 0, 0);
610 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
611 bno = be64_to_cpu(*pp);
612 XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0);
613 if (bp_release) {
614 bp_release = 0;
615 xfs_trans_brelse(NULL, bp);
616 }
617 }
618
619 /*
620 * Here with bp and block set to the leftmost leaf node in the tree.
621 */
622 i = 0;
623
624 /*
625 * Loop over all leaf nodes checking that all extents are in the right order.
626 */
627 for (;;) {
628 xfs_fsblock_t nextbno;
629 xfs_extnum_t num_recs;
630
631
632 num_recs = xfs_btree_get_numrecs(block);
633
634 /*
635 * Read-ahead the next leaf block, if any.
636 */
637
638 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
639
640 /*
641 * Check all the extents to make sure they are OK.
642 * If we had a previous block, the last entry should
643 * conform with the first entry in this one.
644 */
645
646 ep = XFS_BMBT_REC_ADDR(mp, block, 1);
647 if (i) {
648 ASSERT(xfs_bmbt_disk_get_startoff(&last) +
649 xfs_bmbt_disk_get_blockcount(&last) <=
650 xfs_bmbt_disk_get_startoff(ep));
651 }
652 for (j = 1; j < num_recs; j++) {
653 nextp = XFS_BMBT_REC_ADDR(mp, block, j + 1);
654 ASSERT(xfs_bmbt_disk_get_startoff(ep) +
655 xfs_bmbt_disk_get_blockcount(ep) <=
656 xfs_bmbt_disk_get_startoff(nextp));
657 ep = nextp;
658 }
659
660 last = *ep;
661 i += num_recs;
662 if (bp_release) {
663 bp_release = 0;
664 xfs_trans_brelse(NULL, bp);
665 }
666 bno = nextbno;
667 /*
668 * If we've reached the end, stop.
669 */
670 if (bno == NULLFSBLOCK)
671 break;
672
673 bp_release = 0;
674 bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
675 if (!bp) {
676 bp_release = 1;
677 error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
678 XFS_BMAP_BTREE_REF,
679 &xfs_bmbt_buf_ops);
680 if (error)
681 goto error_norelse;
682 }
683 block = XFS_BUF_TO_BLOCK(bp);
684 }
685 if (bp_release) {
686 bp_release = 0;
687 xfs_trans_brelse(NULL, bp);
688 }
689 return;
690
691error0:
692 xfs_warn(mp, "%s: at error0", __func__);
693 if (bp_release)
694 xfs_trans_brelse(NULL, bp);
695error_norelse:
696 xfs_warn(mp, "%s: BAD after btree leaves for %d extents",
697 __func__, i);
698 panic("%s: CORRUPTED BTREE OR SOMETHING", __func__);
699 return;
700}
701
702/*
703 * Add bmap trace insert entries for all the contents of the extent records.
704 */
705void
706xfs_bmap_trace_exlist(
707 xfs_inode_t *ip, /* incore inode pointer */
708 xfs_extnum_t cnt, /* count of entries in the list */
709 int whichfork, /* data or attr fork */
710 unsigned long caller_ip)
711{
712 xfs_extnum_t idx; /* extent record index */
713 xfs_ifork_t *ifp; /* inode fork pointer */
714 int state = 0;
715
716 if (whichfork == XFS_ATTR_FORK)
717 state |= BMAP_ATTRFORK;
718
719 ifp = XFS_IFORK_PTR(ip, whichfork);
720 ASSERT(cnt == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
721 for (idx = 0; idx < cnt; idx++)
722 trace_xfs_extlist(ip, idx, whichfork, caller_ip);
723}
724
725/*
726 * Validate that the bmbt_irecs being returned from bmapi are valid
727 * given the callers original parameters. Specifically check the
728 * ranges of the returned irecs to ensure that they only extent beyond
729 * the given parameters if the XFS_BMAPI_ENTIRE flag was set.
730 */
731STATIC void
732xfs_bmap_validate_ret(
733 xfs_fileoff_t bno,
734 xfs_filblks_t len,
735 int flags,
736 xfs_bmbt_irec_t *mval,
737 int nmap,
738 int ret_nmap)
739{
740 int i; /* index to map values */
741
742 ASSERT(ret_nmap <= nmap);
743
744 for (i = 0; i < ret_nmap; i++) {
745 ASSERT(mval[i].br_blockcount > 0);
746 if (!(flags & XFS_BMAPI_ENTIRE)) {
747 ASSERT(mval[i].br_startoff >= bno);
748 ASSERT(mval[i].br_blockcount <= len);
749 ASSERT(mval[i].br_startoff + mval[i].br_blockcount <=
750 bno + len);
751 } else {
752 ASSERT(mval[i].br_startoff < bno + len);
753 ASSERT(mval[i].br_startoff + mval[i].br_blockcount >
754 bno);
755 }
756 ASSERT(i == 0 ||
757 mval[i - 1].br_startoff + mval[i - 1].br_blockcount ==
758 mval[i].br_startoff);
759 ASSERT(mval[i].br_startblock != DELAYSTARTBLOCK &&
760 mval[i].br_startblock != HOLESTARTBLOCK);
761 ASSERT(mval[i].br_state == XFS_EXT_NORM ||
762 mval[i].br_state == XFS_EXT_UNWRITTEN);
763 }
764}
765
766#else
767#define xfs_bmap_check_leaf_extents(cur, ip, whichfork) do { } while (0)
768#define xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap)
769#endif /* DEBUG */
770
771/*
772 * bmap free list manipulation functions
773 */
774
775/*
776 * Add the extent to the list of extents to be free at transaction end.
777 * The list is maintained sorted (by block number).
778 */
779void
780xfs_bmap_add_free(
781 xfs_fsblock_t bno, /* fs block number of extent */
782 xfs_filblks_t len, /* length of extent */
783 xfs_bmap_free_t *flist, /* list of extents */
784 xfs_mount_t *mp) /* mount point structure */
785{
786 xfs_bmap_free_item_t *cur; /* current (next) element */
787 xfs_bmap_free_item_t *new; /* new element */
788 xfs_bmap_free_item_t *prev; /* previous element */
789#ifdef DEBUG
790 xfs_agnumber_t agno;
791 xfs_agblock_t agbno;
792
793 ASSERT(bno != NULLFSBLOCK);
794 ASSERT(len > 0);
795 ASSERT(len <= MAXEXTLEN);
796 ASSERT(!isnullstartblock(bno));
797 agno = XFS_FSB_TO_AGNO(mp, bno);
798 agbno = XFS_FSB_TO_AGBNO(mp, bno);
799 ASSERT(agno < mp->m_sb.sb_agcount);
800 ASSERT(agbno < mp->m_sb.sb_agblocks);
801 ASSERT(len < mp->m_sb.sb_agblocks);
802 ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
803#endif
804 ASSERT(xfs_bmap_free_item_zone != NULL);
805 new = kmem_zone_alloc(xfs_bmap_free_item_zone, KM_SLEEP);
806 new->xbfi_startblock = bno;
807 new->xbfi_blockcount = (xfs_extlen_t)len;
808 for (prev = NULL, cur = flist->xbf_first;
809 cur != NULL;
810 prev = cur, cur = cur->xbfi_next) {
811 if (cur->xbfi_startblock >= bno)
812 break;
813 }
814 if (prev)
815 prev->xbfi_next = new;
816 else
817 flist->xbf_first = new;
818 new->xbfi_next = cur;
819 flist->xbf_count++;
820}
821
822/*
823 * Remove the entry "free" from the free item list. Prev points to the
824 * previous entry, unless "free" is the head of the list.
825 */
826STATIC void
827xfs_bmap_del_free(
828 xfs_bmap_free_t *flist, /* free item list header */
829 xfs_bmap_free_item_t *prev, /* previous item on list, if any */
830 xfs_bmap_free_item_t *free) /* list item to be freed */
831{
832 if (prev)
833 prev->xbfi_next = free->xbfi_next;
834 else
835 flist->xbf_first = free->xbfi_next;
836 flist->xbf_count--;
837 kmem_zone_free(xfs_bmap_free_item_zone, free);
838}
839
840
841/*
842 * Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi
843 * caller. Frees all the extents that need freeing, which must be done
844 * last due to locking considerations. We never free any extents in
845 * the first transaction.
846 *
847 * Return 1 if the given transaction was committed and a new one
848 * started, and 0 otherwise in the committed parameter.
849 */
850int /* error */
851xfs_bmap_finish(
852 xfs_trans_t **tp, /* transaction pointer addr */
853 xfs_bmap_free_t *flist, /* i/o: list extents to free */
854 int *committed) /* xact committed or not */
855{
856 xfs_efd_log_item_t *efd; /* extent free data */
857 xfs_efi_log_item_t *efi; /* extent free intention */
858 int error; /* error return value */
859 xfs_bmap_free_item_t *free; /* free extent item */
860 unsigned int logres; /* new log reservation */
861 unsigned int logcount; /* new log count */
862 xfs_mount_t *mp; /* filesystem mount structure */
863 xfs_bmap_free_item_t *next; /* next item on free list */
864 xfs_trans_t *ntp; /* new transaction pointer */
865
866 ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
867 if (flist->xbf_count == 0) {
868 *committed = 0;
869 return 0;
870 }
871 ntp = *tp;
872 efi = xfs_trans_get_efi(ntp, flist->xbf_count);
873 for (free = flist->xbf_first; free; free = free->xbfi_next)
874 xfs_trans_log_efi_extent(ntp, efi, free->xbfi_startblock,
875 free->xbfi_blockcount);
876 logres = ntp->t_log_res;
877 logcount = ntp->t_log_count;
878 ntp = xfs_trans_dup(*tp);
879 error = xfs_trans_commit(*tp, 0);
880 *tp = ntp;
881 *committed = 1;
882 /*
883 * We have a new transaction, so we should return committed=1,
884 * even though we're returning an error.
885 */
886 if (error)
887 return error;
888
889 /*
890 * transaction commit worked ok so we can drop the extra ticket
891 * reference that we gained in xfs_trans_dup()
892 */
893 xfs_log_ticket_put(ntp->t_ticket);
894
895 if ((error = xfs_trans_reserve(ntp, 0, logres, 0, XFS_TRANS_PERM_LOG_RES,
896 logcount)))
897 return error;
898 efd = xfs_trans_get_efd(ntp, efi, flist->xbf_count);
899 for (free = flist->xbf_first; free != NULL; free = next) {
900 next = free->xbfi_next;
901 if ((error = xfs_free_extent(ntp, free->xbfi_startblock,
902 free->xbfi_blockcount))) {
903 /*
904 * The bmap free list will be cleaned up at a
905 * higher level. The EFI will be canceled when
906 * this transaction is aborted.
907 * Need to force shutdown here to make sure it
908 * happens, since this transaction may not be
909 * dirty yet.
910 */
911 mp = ntp->t_mountp;
912 if (!XFS_FORCED_SHUTDOWN(mp))
913 xfs_force_shutdown(mp,
914 (error == EFSCORRUPTED) ?
915 SHUTDOWN_CORRUPT_INCORE :
916 SHUTDOWN_META_IO_ERROR);
917 return error;
918 }
919 xfs_trans_log_efd_extent(ntp, efd, free->xbfi_startblock,
920 free->xbfi_blockcount);
921 xfs_bmap_del_free(flist, NULL, free);
922 }
923 return 0;
924}
925
926/*
927 * Free up any items left in the list.
928 */
929void
930xfs_bmap_cancel(
931 xfs_bmap_free_t *flist) /* list of bmap_free_items */
932{
933 xfs_bmap_free_item_t *free; /* free list item */
934 xfs_bmap_free_item_t *next;
935
936 if (flist->xbf_count == 0)
937 return;
938 ASSERT(flist->xbf_first != NULL);
939 for (free = flist->xbf_first; free; free = next) {
940 next = free->xbfi_next;
941 xfs_bmap_del_free(flist, NULL, free);
942 }
943 ASSERT(flist->xbf_count == 0);
944}
945
946/*
947 * Inode fork format manipulation functions
948 */
949
950/*
951 * Transform a btree format file with only one leaf node, where the
952 * extents list will fit in the inode, into an extents format file.
953 * Since the file extents are already in-core, all we have to do is
954 * give up the space for the btree root and pitch the leaf block.
955 */
956STATIC int /* error */
957xfs_bmap_btree_to_extents(
958 xfs_trans_t *tp, /* transaction pointer */
959 xfs_inode_t *ip, /* incore inode pointer */
960 xfs_btree_cur_t *cur, /* btree cursor */
961 int *logflagsp, /* inode logging flags */
962 int whichfork) /* data or attr fork */
963{
964 /* REFERENCED */
965 struct xfs_btree_block *cblock;/* child btree block */
966 xfs_fsblock_t cbno; /* child block number */
967 xfs_buf_t *cbp; /* child block's buffer */
968 int error; /* error return value */
969 xfs_ifork_t *ifp; /* inode fork data */
970 xfs_mount_t *mp; /* mount point structure */
971 __be64 *pp; /* ptr to block address */
972 struct xfs_btree_block *rblock;/* root btree block */
973
974 mp = ip->i_mount;
975 ifp = XFS_IFORK_PTR(ip, whichfork);
976 ASSERT(ifp->if_flags & XFS_IFEXTENTS);
977 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
978 rblock = ifp->if_broot;
979 ASSERT(be16_to_cpu(rblock->bb_level) == 1);
980 ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1);
981 ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1);
982 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes);
983 cbno = be64_to_cpu(*pp);
984 *logflagsp = 0;
985#ifdef DEBUG
986 if ((error = xfs_btree_check_lptr(cur, cbno, 1)))
987 return error;
988#endif
989 error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, XFS_BMAP_BTREE_REF,
990 &xfs_bmbt_buf_ops);
991 if (error)
992 return error;
993 cblock = XFS_BUF_TO_BLOCK(cbp);
994 if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
995 return error;
996 xfs_bmap_add_free(cbno, 1, cur->bc_private.b.flist, mp);
997 ip->i_d.di_nblocks--;
998 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
999 xfs_trans_binval(tp, cbp);
1000 if (cur->bc_bufs[0] == cbp)
1001 cur->bc_bufs[0] = NULL;
1002 xfs_iroot_realloc(ip, -1, whichfork);
1003 ASSERT(ifp->if_broot == NULL);
1004 ASSERT((ifp->if_flags & XFS_IFBROOT) == 0);
1005 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
1006 *logflagsp = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
1007 return 0;
1008}
1009
1010/*
1011 * Convert an extents-format file into a btree-format file.
1012 * The new file will have a root block (in the inode) and a single child block.
1013 */
1014STATIC int /* error */
1015xfs_bmap_extents_to_btree(
1016 xfs_trans_t *tp, /* transaction pointer */
1017 xfs_inode_t *ip, /* incore inode pointer */
1018 xfs_fsblock_t *firstblock, /* first-block-allocated */
1019 xfs_bmap_free_t *flist, /* blocks freed in xaction */
1020 xfs_btree_cur_t **curp, /* cursor returned to caller */
1021 int wasdel, /* converting a delayed alloc */
1022 int *logflagsp, /* inode logging flags */
1023 int whichfork) /* data or attr fork */
1024{
1025 struct xfs_btree_block *ablock; /* allocated (child) bt block */
1026 xfs_buf_t *abp; /* buffer for ablock */
1027 xfs_alloc_arg_t args; /* allocation arguments */
1028 xfs_bmbt_rec_t *arp; /* child record pointer */
1029 struct xfs_btree_block *block; /* btree root block */
1030 xfs_btree_cur_t *cur; /* bmap btree cursor */
1031 xfs_bmbt_rec_host_t *ep; /* extent record pointer */
1032 int error; /* error return value */
1033 xfs_extnum_t i, cnt; /* extent record index */
1034 xfs_ifork_t *ifp; /* inode fork pointer */
1035 xfs_bmbt_key_t *kp; /* root block key pointer */
1036 xfs_mount_t *mp; /* mount structure */
1037 xfs_extnum_t nextents; /* number of file extents */
1038 xfs_bmbt_ptr_t *pp; /* root block address pointer */
1039
1040 mp = ip->i_mount;
1041 ifp = XFS_IFORK_PTR(ip, whichfork);
1042 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS);
1043
1044 /*
1045 * Make space in the inode incore.
1046 */
1047 xfs_iroot_realloc(ip, 1, whichfork);
1048 ifp->if_flags |= XFS_IFBROOT;
1049
1050 /*
1051 * Fill in the root.
1052 */
1053 block = ifp->if_broot;
1054 if (xfs_sb_version_hascrc(&mp->m_sb))
1055 xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL,
1056 XFS_BMAP_CRC_MAGIC, 1, 1, ip->i_ino,
1057 XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS);
1058 else
1059 xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL,
1060 XFS_BMAP_MAGIC, 1, 1, ip->i_ino,
1061 XFS_BTREE_LONG_PTRS);
1062
1063 /*
1064 * Need a cursor. Can't allocate until bb_level is filled in.
1065 */
1066 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
1067 cur->bc_private.b.firstblock = *firstblock;
1068 cur->bc_private.b.flist = flist;
1069 cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0;
1070 /*
1071 * Convert to a btree with two levels, one record in root.
1072 */
1073 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE);
1074 memset(&args, 0, sizeof(args));
1075 args.tp = tp;
1076 args.mp = mp;
1077 args.firstblock = *firstblock;
1078 if (*firstblock == NULLFSBLOCK) {
1079 args.type = XFS_ALLOCTYPE_START_BNO;
1080 args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
1081 } else if (flist->xbf_low) {
1082 args.type = XFS_ALLOCTYPE_START_BNO;
1083 args.fsbno = *firstblock;
1084 } else {
1085 args.type = XFS_ALLOCTYPE_NEAR_BNO;
1086 args.fsbno = *firstblock;
1087 }
1088 args.minlen = args.maxlen = args.prod = 1;
1089 args.wasdel = wasdel;
1090 *logflagsp = 0;
1091 if ((error = xfs_alloc_vextent(&args))) {
1092 xfs_iroot_realloc(ip, -1, whichfork);
1093 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
1094 return error;
1095 }
1096 /*
1097 * Allocation can't fail, the space was reserved.
1098 */
1099 ASSERT(args.fsbno != NULLFSBLOCK);
1100 ASSERT(*firstblock == NULLFSBLOCK ||
1101 args.agno == XFS_FSB_TO_AGNO(mp, *firstblock) ||
1102 (flist->xbf_low &&
1103 args.agno > XFS_FSB_TO_AGNO(mp, *firstblock)));
1104 *firstblock = cur->bc_private.b.firstblock = args.fsbno;
1105 cur->bc_private.b.allocated++;
1106 ip->i_d.di_nblocks++;
1107 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
1108 abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0);
1109 /*
1110 * Fill in the child block.
1111 */
1112 abp->b_ops = &xfs_bmbt_buf_ops;
1113 ablock = XFS_BUF_TO_BLOCK(abp);
1114 if (xfs_sb_version_hascrc(&mp->m_sb))
1115 xfs_btree_init_block_int(mp, ablock, abp->b_bn,
1116 XFS_BMAP_CRC_MAGIC, 0, 0, ip->i_ino,
1117 XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS);
1118 else
1119 xfs_btree_init_block_int(mp, ablock, abp->b_bn,
1120 XFS_BMAP_MAGIC, 0, 0, ip->i_ino,
1121 XFS_BTREE_LONG_PTRS);
1122
1123 arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
1124 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1125 for (cnt = i = 0; i < nextents; i++) {
1126 ep = xfs_iext_get_ext(ifp, i);
1127 if (!isnullstartblock(xfs_bmbt_get_startblock(ep))) {
1128 arp->l0 = cpu_to_be64(ep->l0);
1129 arp->l1 = cpu_to_be64(ep->l1);
1130 arp++; cnt++;
1131 }
1132 }
1133 ASSERT(cnt == XFS_IFORK_NEXTENTS(ip, whichfork));
1134 xfs_btree_set_numrecs(ablock, cnt);
1135
1136 /*
1137 * Fill in the root key and pointer.
1138 */
1139 kp = XFS_BMBT_KEY_ADDR(mp, block, 1);
1140 arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
1141 kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp));
1142 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, xfs_bmbt_get_maxrecs(cur,
1143 be16_to_cpu(block->bb_level)));
1144 *pp = cpu_to_be64(args.fsbno);
1145
1146 /*
1147 * Do all this logging at the end so that
1148 * the root is at the right level.
1149 */
1150 xfs_btree_log_block(cur, abp, XFS_BB_ALL_BITS);
1151 xfs_btree_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs));
1152 ASSERT(*curp == NULL);
1153 *curp = cur;
1154 *logflagsp = XFS_ILOG_CORE | xfs_ilog_fbroot(whichfork);
1155 return 0;
1156}
1157
1158/*
1159 * Convert a local file to an extents file.
1160 * This code is out of bounds for data forks of regular files,
1161 * since the file data needs to get logged so things will stay consistent.
1162 * (The bmap-level manipulations are ok, though).
1163 */
1164STATIC int /* error */
1165xfs_bmap_local_to_extents(
1166 xfs_trans_t *tp, /* transaction pointer */
1167 xfs_inode_t *ip, /* incore inode pointer */
1168 xfs_fsblock_t *firstblock, /* first block allocated in xaction */
1169 xfs_extlen_t total, /* total blocks needed by transaction */
1170 int *logflagsp, /* inode logging flags */
1171 int whichfork,
1172 void (*init_fn)(struct xfs_trans *tp,
1173 struct xfs_buf *bp,
1174 struct xfs_inode *ip,
1175 struct xfs_ifork *ifp))
1176{
1177 int error; /* error return value */
1178 int flags; /* logging flags returned */
1179 xfs_ifork_t *ifp; /* inode fork pointer */
1180
1181 /*
1182 * We don't want to deal with the case of keeping inode data inline yet.
1183 * So sending the data fork of a regular inode is invalid.
1184 */
1185 ASSERT(!(S_ISREG(ip->i_d.di_mode) && whichfork == XFS_DATA_FORK));
1186 ifp = XFS_IFORK_PTR(ip, whichfork);
1187 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
1188 flags = 0;
1189 error = 0;
1190 if (ifp->if_bytes) {
1191 xfs_alloc_arg_t args; /* allocation arguments */
1192 xfs_buf_t *bp; /* buffer for extent block */
1193 xfs_bmbt_rec_host_t *ep;/* extent record pointer */
1194
1195 ASSERT((ifp->if_flags &
1196 (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) == XFS_IFINLINE);
1197 memset(&args, 0, sizeof(args));
1198 args.tp = tp;
1199 args.mp = ip->i_mount;
1200 args.firstblock = *firstblock;
1201 /*
1202 * Allocate a block. We know we need only one, since the
1203 * file currently fits in an inode.
1204 */
1205 if (*firstblock == NULLFSBLOCK) {
1206 args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino);
1207 args.type = XFS_ALLOCTYPE_START_BNO;
1208 } else {
1209 args.fsbno = *firstblock;
1210 args.type = XFS_ALLOCTYPE_NEAR_BNO;
1211 }
1212 args.total = total;
1213 args.minlen = args.maxlen = args.prod = 1;
1214 error = xfs_alloc_vextent(&args);
1215 if (error)
1216 goto done;
1217
1218 /* Can't fail, the space was reserved. */
1219 ASSERT(args.fsbno != NULLFSBLOCK);
1220 ASSERT(args.len == 1);
1221 *firstblock = args.fsbno;
1222 bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0);
1223
1224 /* initialise the block and copy the data */
1225 init_fn(tp, bp, ip, ifp);
1226
1227 /* account for the change in fork size and log everything */
1228 xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1);
1229 xfs_bmap_forkoff_reset(args.mp, ip, whichfork);
1230 xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
1231 xfs_iext_add(ifp, 0, 1);
1232 ep = xfs_iext_get_ext(ifp, 0);
1233 xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM);
1234 trace_xfs_bmap_post_update(ip, 0,
1235 whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0,
1236 _THIS_IP_);
1237 XFS_IFORK_NEXT_SET(ip, whichfork, 1);
1238 ip->i_d.di_nblocks = 1;
1239 xfs_trans_mod_dquot_byino(tp, ip,
1240 XFS_TRANS_DQ_BCOUNT, 1L);
1241 flags |= xfs_ilog_fext(whichfork);
1242 } else {
1243 ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) == 0);
1244 xfs_bmap_forkoff_reset(ip->i_mount, ip, whichfork);
1245 }
1246 ifp->if_flags &= ~XFS_IFINLINE;
1247 ifp->if_flags |= XFS_IFEXTENTS;
1248 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
1249 flags |= XFS_ILOG_CORE;
1250done:
1251 *logflagsp = flags;
1252 return error;
1253}
1254
1255/*
293 * Called from xfs_bmap_add_attrfork to handle btree format files. 1256 * Called from xfs_bmap_add_attrfork to handle btree format files.
294 */ 1257 */
295STATIC int /* error */ 1258STATIC int /* error */
@@ -360,29 +1323,22 @@ xfs_bmap_add_attrfork_extents(
360} 1323}
361 1324
362/* 1325/*
363 * Block initialisation functions for local to extent format conversion. 1326 * Block initialisation function for local to extent format conversion.
364 * As these get more complex, they will be moved to the relevant files, 1327 *
365 * but for now they are too simple to worry about. 1328 * This shouldn't actually be called by anyone, so make sure debug kernels cause
1329 * a noticable failure.
366 */ 1330 */
367STATIC void 1331STATIC void
368xfs_bmap_local_to_extents_init_fn( 1332xfs_bmap_local_to_extents_init_fn(
1333 struct xfs_trans *tp,
369 struct xfs_buf *bp, 1334 struct xfs_buf *bp,
370 struct xfs_inode *ip, 1335 struct xfs_inode *ip,
371 struct xfs_ifork *ifp) 1336 struct xfs_ifork *ifp)
372{ 1337{
1338 ASSERT(0);
373 bp->b_ops = &xfs_bmbt_buf_ops; 1339 bp->b_ops = &xfs_bmbt_buf_ops;
374 memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes); 1340 memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes);
375} 1341 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_BTREE_BUF);
376
377STATIC void
378xfs_symlink_local_to_remote(
379 struct xfs_buf *bp,
380 struct xfs_inode *ip,
381 struct xfs_ifork *ifp)
382{
383 /* remote symlink blocks are not verifiable until CRCs come along */
384 bp->b_ops = NULL;
385 memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes);
386} 1342}
387 1343
388/* 1344/*
@@ -394,8 +1350,7 @@ xfs_symlink_local_to_remote(
394 * 1350 *
395 * XXX (dgc): investigate whether directory conversion can use the generic 1351 * XXX (dgc): investigate whether directory conversion can use the generic
396 * formatting callout. It should be possible - it's just a very complex 1352 * formatting callout. It should be possible - it's just a very complex
397 * formatter. it would also require passing the transaction through to the init 1353 * formatter.
398 * function.
399 */ 1354 */
400STATIC int /* error */ 1355STATIC int /* error */
401xfs_bmap_add_attrfork_local( 1356xfs_bmap_add_attrfork_local(
@@ -432,6 +1387,640 @@ xfs_bmap_add_attrfork_local(
432} 1387}
433 1388
434/* 1389/*
1390 * Convert inode from non-attributed to attributed.
1391 * Must not be in a transaction, ip must not be locked.
1392 */
1393int /* error code */
1394xfs_bmap_add_attrfork(
1395 xfs_inode_t *ip, /* incore inode pointer */
1396 int size, /* space new attribute needs */
1397 int rsvd) /* xact may use reserved blks */
1398{
1399 xfs_fsblock_t firstblock; /* 1st block/ag allocated */
1400 xfs_bmap_free_t flist; /* freed extent records */
1401 xfs_mount_t *mp; /* mount structure */
1402 xfs_trans_t *tp; /* transaction pointer */
1403 int blks; /* space reservation */
1404 int version = 1; /* superblock attr version */
1405 int committed; /* xaction was committed */
1406 int logflags; /* logging flags */
1407 int error; /* error return value */
1408
1409 ASSERT(XFS_IFORK_Q(ip) == 0);
1410
1411 mp = ip->i_mount;
1412 ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
1413 tp = xfs_trans_alloc(mp, XFS_TRANS_ADDAFORK);
1414 blks = XFS_ADDAFORK_SPACE_RES(mp);
1415 if (rsvd)
1416 tp->t_flags |= XFS_TRANS_RESERVE;
1417 if ((error = xfs_trans_reserve(tp, blks, XFS_ADDAFORK_LOG_RES(mp), 0,
1418 XFS_TRANS_PERM_LOG_RES, XFS_ADDAFORK_LOG_COUNT)))
1419 goto error0;
1420 xfs_ilock(ip, XFS_ILOCK_EXCL);
1421 error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?
1422 XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
1423 XFS_QMOPT_RES_REGBLKS);
1424 if (error) {
1425 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1426 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES);
1427 return error;
1428 }
1429 if (XFS_IFORK_Q(ip))
1430 goto error1;
1431 if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) {
1432 /*
1433 * For inodes coming from pre-6.2 filesystems.
1434 */
1435 ASSERT(ip->i_d.di_aformat == 0);
1436 ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
1437 }
1438 ASSERT(ip->i_d.di_anextents == 0);
1439
1440 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
1441 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1442
1443 switch (ip->i_d.di_format) {
1444 case XFS_DINODE_FMT_DEV:
1445 ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3;
1446 break;
1447 case XFS_DINODE_FMT_UUID:
1448 ip->i_d.di_forkoff = roundup(sizeof(uuid_t), 8) >> 3;
1449 break;
1450 case XFS_DINODE_FMT_LOCAL:
1451 case XFS_DINODE_FMT_EXTENTS:
1452 case XFS_DINODE_FMT_BTREE:
1453 ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size);
1454 if (!ip->i_d.di_forkoff)
1455 ip->i_d.di_forkoff = xfs_default_attroffset(ip) >> 3;
1456 else if (mp->m_flags & XFS_MOUNT_ATTR2)
1457 version = 2;
1458 break;
1459 default:
1460 ASSERT(0);
1461 error = XFS_ERROR(EINVAL);
1462 goto error1;
1463 }
1464
1465 ASSERT(ip->i_afp == NULL);
1466 ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP);
1467 ip->i_afp->if_flags = XFS_IFEXTENTS;
1468 logflags = 0;
1469 xfs_bmap_init(&flist, &firstblock);
1470 switch (ip->i_d.di_format) {
1471 case XFS_DINODE_FMT_LOCAL:
1472 error = xfs_bmap_add_attrfork_local(tp, ip, &firstblock, &flist,
1473 &logflags);
1474 break;
1475 case XFS_DINODE_FMT_EXTENTS:
1476 error = xfs_bmap_add_attrfork_extents(tp, ip, &firstblock,
1477 &flist, &logflags);
1478 break;
1479 case XFS_DINODE_FMT_BTREE:
1480 error = xfs_bmap_add_attrfork_btree(tp, ip, &firstblock, &flist,
1481 &logflags);
1482 break;
1483 default:
1484 error = 0;
1485 break;
1486 }
1487 if (logflags)
1488 xfs_trans_log_inode(tp, ip, logflags);
1489 if (error)
1490 goto error2;
1491 if (!xfs_sb_version_hasattr(&mp->m_sb) ||
1492 (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) {
1493 __int64_t sbfields = 0;
1494
1495 spin_lock(&mp->m_sb_lock);
1496 if (!xfs_sb_version_hasattr(&mp->m_sb)) {
1497 xfs_sb_version_addattr(&mp->m_sb);
1498 sbfields |= XFS_SB_VERSIONNUM;
1499 }
1500 if (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2) {
1501 xfs_sb_version_addattr2(&mp->m_sb);
1502 sbfields |= (XFS_SB_VERSIONNUM | XFS_SB_FEATURES2);
1503 }
1504 if (sbfields) {
1505 spin_unlock(&mp->m_sb_lock);
1506 xfs_mod_sb(tp, sbfields);
1507 } else
1508 spin_unlock(&mp->m_sb_lock);
1509 }
1510
1511 error = xfs_bmap_finish(&tp, &flist, &committed);
1512 if (error)
1513 goto error2;
1514 return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
1515error2:
1516 xfs_bmap_cancel(&flist);
1517error1:
1518 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1519error0:
1520 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
1521 return error;
1522}
1523
1524/*
1525 * Internal and external extent tree search functions.
1526 */
1527
1528/*
1529 * Read in the extents to if_extents.
1530 * All inode fields are set up by caller, we just traverse the btree
1531 * and copy the records in. If the file system cannot contain unwritten
1532 * extents, the records are checked for no "state" flags.
1533 */
1534int /* error */
1535xfs_bmap_read_extents(
1536 xfs_trans_t *tp, /* transaction pointer */
1537 xfs_inode_t *ip, /* incore inode */
1538 int whichfork) /* data or attr fork */
1539{
1540 struct xfs_btree_block *block; /* current btree block */
1541 xfs_fsblock_t bno; /* block # of "block" */
1542 xfs_buf_t *bp; /* buffer for "block" */
1543 int error; /* error return value */
1544 xfs_exntfmt_t exntf; /* XFS_EXTFMT_NOSTATE, if checking */
1545 xfs_extnum_t i, j; /* index into the extents list */
1546 xfs_ifork_t *ifp; /* fork structure */
1547 int level; /* btree level, for checking */
1548 xfs_mount_t *mp; /* file system mount structure */
1549 __be64 *pp; /* pointer to block address */
1550 /* REFERENCED */
1551 xfs_extnum_t room; /* number of entries there's room for */
1552
1553 bno = NULLFSBLOCK;
1554 mp = ip->i_mount;
1555 ifp = XFS_IFORK_PTR(ip, whichfork);
1556 exntf = (whichfork != XFS_DATA_FORK) ? XFS_EXTFMT_NOSTATE :
1557 XFS_EXTFMT_INODE(ip);
1558 block = ifp->if_broot;
1559 /*
1560 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
1561 */
1562 level = be16_to_cpu(block->bb_level);
1563 ASSERT(level > 0);
1564 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
1565 bno = be64_to_cpu(*pp);
1566 ASSERT(bno != NULLDFSBNO);
1567 ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
1568 ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
1569 /*
1570 * Go down the tree until leaf level is reached, following the first
1571 * pointer (leftmost) at each level.
1572 */
1573 while (level-- > 0) {
1574 error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
1575 XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops);
1576 if (error)
1577 return error;
1578 block = XFS_BUF_TO_BLOCK(bp);
1579 XFS_WANT_CORRUPTED_GOTO(
1580 xfs_bmap_sanity_check(mp, bp, level),
1581 error0);
1582 if (level == 0)
1583 break;
1584 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
1585 bno = be64_to_cpu(*pp);
1586 XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0);
1587 xfs_trans_brelse(tp, bp);
1588 }
1589 /*
1590 * Here with bp and block set to the leftmost leaf node in the tree.
1591 */
1592 room = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1593 i = 0;
1594 /*
1595 * Loop over all leaf nodes. Copy information to the extent records.
1596 */
1597 for (;;) {
1598 xfs_bmbt_rec_t *frp;
1599 xfs_fsblock_t nextbno;
1600 xfs_extnum_t num_recs;
1601 xfs_extnum_t start;
1602
1603 num_recs = xfs_btree_get_numrecs(block);
1604 if (unlikely(i + num_recs > room)) {
1605 ASSERT(i + num_recs <= room);
1606 xfs_warn(ip->i_mount,
1607 "corrupt dinode %Lu, (btree extents).",
1608 (unsigned long long) ip->i_ino);
1609 XFS_CORRUPTION_ERROR("xfs_bmap_read_extents(1)",
1610 XFS_ERRLEVEL_LOW, ip->i_mount, block);
1611 goto error0;
1612 }
1613 XFS_WANT_CORRUPTED_GOTO(
1614 xfs_bmap_sanity_check(mp, bp, 0),
1615 error0);
1616 /*
1617 * Read-ahead the next leaf block, if any.
1618 */
1619 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
1620 if (nextbno != NULLFSBLOCK)
1621 xfs_btree_reada_bufl(mp, nextbno, 1,
1622 &xfs_bmbt_buf_ops);
1623 /*
1624 * Copy records into the extent records.
1625 */
1626 frp = XFS_BMBT_REC_ADDR(mp, block, 1);
1627 start = i;
1628 for (j = 0; j < num_recs; j++, i++, frp++) {
1629 xfs_bmbt_rec_host_t *trp = xfs_iext_get_ext(ifp, i);
1630 trp->l0 = be64_to_cpu(frp->l0);
1631 trp->l1 = be64_to_cpu(frp->l1);
1632 }
1633 if (exntf == XFS_EXTFMT_NOSTATE) {
1634 /*
1635 * Check all attribute bmap btree records and
1636 * any "older" data bmap btree records for a
1637 * set bit in the "extent flag" position.
1638 */
1639 if (unlikely(xfs_check_nostate_extents(ifp,
1640 start, num_recs))) {
1641 XFS_ERROR_REPORT("xfs_bmap_read_extents(2)",
1642 XFS_ERRLEVEL_LOW,
1643 ip->i_mount);
1644 goto error0;
1645 }
1646 }
1647 xfs_trans_brelse(tp, bp);
1648 bno = nextbno;
1649 /*
1650 * If we've reached the end, stop.
1651 */
1652 if (bno == NULLFSBLOCK)
1653 break;
1654 error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
1655 XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops);
1656 if (error)
1657 return error;
1658 block = XFS_BUF_TO_BLOCK(bp);
1659 }
1660 ASSERT(i == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
1661 ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork));
1662 XFS_BMAP_TRACE_EXLIST(ip, i, whichfork);
1663 return 0;
1664error0:
1665 xfs_trans_brelse(tp, bp);
1666 return XFS_ERROR(EFSCORRUPTED);
1667}
1668
1669
1670/*
1671 * Search the extent records for the entry containing block bno.
1672 * If bno lies in a hole, point to the next entry. If bno lies
1673 * past eof, *eofp will be set, and *prevp will contain the last
1674 * entry (null if none). Else, *lastxp will be set to the index
1675 * of the found entry; *gotp will contain the entry.
1676 */
1677STATIC xfs_bmbt_rec_host_t * /* pointer to found extent entry */
1678xfs_bmap_search_multi_extents(
1679 xfs_ifork_t *ifp, /* inode fork pointer */
1680 xfs_fileoff_t bno, /* block number searched for */
1681 int *eofp, /* out: end of file found */
1682 xfs_extnum_t *lastxp, /* out: last extent index */
1683 xfs_bmbt_irec_t *gotp, /* out: extent entry found */
1684 xfs_bmbt_irec_t *prevp) /* out: previous extent entry found */
1685{
1686 xfs_bmbt_rec_host_t *ep; /* extent record pointer */
1687 xfs_extnum_t lastx; /* last extent index */
1688
1689 /*
1690 * Initialize the extent entry structure to catch access to
1691 * uninitialized br_startblock field.
1692 */
1693 gotp->br_startoff = 0xffa5a5a5a5a5a5a5LL;
1694 gotp->br_blockcount = 0xa55a5a5a5a5a5a5aLL;
1695 gotp->br_state = XFS_EXT_INVALID;
1696#if XFS_BIG_BLKNOS
1697 gotp->br_startblock = 0xffffa5a5a5a5a5a5LL;
1698#else
1699 gotp->br_startblock = 0xffffa5a5;
1700#endif
1701 prevp->br_startoff = NULLFILEOFF;
1702
1703 ep = xfs_iext_bno_to_ext(ifp, bno, &lastx);
1704 if (lastx > 0) {
1705 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx - 1), prevp);
1706 }
1707 if (lastx < (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))) {
1708 xfs_bmbt_get_all(ep, gotp);
1709 *eofp = 0;
1710 } else {
1711 if (lastx > 0) {
1712 *gotp = *prevp;
1713 }
1714 *eofp = 1;
1715 ep = NULL;
1716 }
1717 *lastxp = lastx;
1718 return ep;
1719}
1720
1721/*
1722 * Search the extents list for the inode, for the extent containing bno.
1723 * If bno lies in a hole, point to the next entry. If bno lies past eof,
1724 * *eofp will be set, and *prevp will contain the last entry (null if none).
1725 * Else, *lastxp will be set to the index of the found
1726 * entry; *gotp will contain the entry.
1727 */
1728STATIC xfs_bmbt_rec_host_t * /* pointer to found extent entry */
1729xfs_bmap_search_extents(
1730 xfs_inode_t *ip, /* incore inode pointer */
1731 xfs_fileoff_t bno, /* block number searched for */
1732 int fork, /* data or attr fork */
1733 int *eofp, /* out: end of file found */
1734 xfs_extnum_t *lastxp, /* out: last extent index */
1735 xfs_bmbt_irec_t *gotp, /* out: extent entry found */
1736 xfs_bmbt_irec_t *prevp) /* out: previous extent entry found */
1737{
1738 xfs_ifork_t *ifp; /* inode fork pointer */
1739 xfs_bmbt_rec_host_t *ep; /* extent record pointer */
1740
1741 XFS_STATS_INC(xs_look_exlist);
1742 ifp = XFS_IFORK_PTR(ip, fork);
1743
1744 ep = xfs_bmap_search_multi_extents(ifp, bno, eofp, lastxp, gotp, prevp);
1745
1746 if (unlikely(!(gotp->br_startblock) && (*lastxp != NULLEXTNUM) &&
1747 !(XFS_IS_REALTIME_INODE(ip) && fork == XFS_DATA_FORK))) {
1748 xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO,
1749 "Access to block zero in inode %llu "
1750 "start_block: %llx start_off: %llx "
1751 "blkcnt: %llx extent-state: %x lastx: %x\n",
1752 (unsigned long long)ip->i_ino,
1753 (unsigned long long)gotp->br_startblock,
1754 (unsigned long long)gotp->br_startoff,
1755 (unsigned long long)gotp->br_blockcount,
1756 gotp->br_state, *lastxp);
1757 *lastxp = NULLEXTNUM;
1758 *eofp = 1;
1759 return NULL;
1760 }
1761 return ep;
1762}
1763
1764/*
1765 * Returns the file-relative block number of the first unused block(s)
1766 * in the file with at least "len" logically contiguous blocks free.
1767 * This is the lowest-address hole if the file has holes, else the first block
1768 * past the end of file.
1769 * Return 0 if the file is currently local (in-inode).
1770 */
1771int /* error */
1772xfs_bmap_first_unused(
1773 xfs_trans_t *tp, /* transaction pointer */
1774 xfs_inode_t *ip, /* incore inode */
1775 xfs_extlen_t len, /* size of hole to find */
1776 xfs_fileoff_t *first_unused, /* unused block */
1777 int whichfork) /* data or attr fork */
1778{
1779 int error; /* error return value */
1780 int idx; /* extent record index */
1781 xfs_ifork_t *ifp; /* inode fork pointer */
1782 xfs_fileoff_t lastaddr; /* last block number seen */
1783 xfs_fileoff_t lowest; /* lowest useful block */
1784 xfs_fileoff_t max; /* starting useful block */
1785 xfs_fileoff_t off; /* offset for this block */
1786 xfs_extnum_t nextents; /* number of extent entries */
1787
1788 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE ||
1789 XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ||
1790 XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
1791 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
1792 *first_unused = 0;
1793 return 0;
1794 }
1795 ifp = XFS_IFORK_PTR(ip, whichfork);
1796 if (!(ifp->if_flags & XFS_IFEXTENTS) &&
1797 (error = xfs_iread_extents(tp, ip, whichfork)))
1798 return error;
1799 lowest = *first_unused;
1800 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1801 for (idx = 0, lastaddr = 0, max = lowest; idx < nextents; idx++) {
1802 xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, idx);
1803 off = xfs_bmbt_get_startoff(ep);
1804 /*
1805 * See if the hole before this extent will work.
1806 */
1807 if (off >= lowest + len && off - max >= len) {
1808 *first_unused = max;
1809 return 0;
1810 }
1811 lastaddr = off + xfs_bmbt_get_blockcount(ep);
1812 max = XFS_FILEOFF_MAX(lastaddr, lowest);
1813 }
1814 *first_unused = max;
1815 return 0;
1816}
1817
1818/*
1819 * Returns the file-relative block number of the last block + 1 before
1820 * last_block (input value) in the file.
1821 * This is not based on i_size, it is based on the extent records.
1822 * Returns 0 for local files, as they do not have extent records.
1823 */
1824int /* error */
1825xfs_bmap_last_before(
1826 xfs_trans_t *tp, /* transaction pointer */
1827 xfs_inode_t *ip, /* incore inode */
1828 xfs_fileoff_t *last_block, /* last block */
1829 int whichfork) /* data or attr fork */
1830{
1831 xfs_fileoff_t bno; /* input file offset */
1832 int eof; /* hit end of file */
1833 xfs_bmbt_rec_host_t *ep; /* pointer to last extent */
1834 int error; /* error return value */
1835 xfs_bmbt_irec_t got; /* current extent value */
1836 xfs_ifork_t *ifp; /* inode fork pointer */
1837 xfs_extnum_t lastx; /* last extent used */
1838 xfs_bmbt_irec_t prev; /* previous extent value */
1839
1840 if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
1841 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
1842 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL)
1843 return XFS_ERROR(EIO);
1844 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
1845 *last_block = 0;
1846 return 0;
1847 }
1848 ifp = XFS_IFORK_PTR(ip, whichfork);
1849 if (!(ifp->if_flags & XFS_IFEXTENTS) &&
1850 (error = xfs_iread_extents(tp, ip, whichfork)))
1851 return error;
1852 bno = *last_block - 1;
1853 ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
1854 &prev);
1855 if (eof || xfs_bmbt_get_startoff(ep) > bno) {
1856 if (prev.br_startoff == NULLFILEOFF)
1857 *last_block = 0;
1858 else
1859 *last_block = prev.br_startoff + prev.br_blockcount;
1860 }
1861 /*
1862 * Otherwise *last_block is already the right answer.
1863 */
1864 return 0;
1865}
1866
1867STATIC int
1868xfs_bmap_last_extent(
1869 struct xfs_trans *tp,
1870 struct xfs_inode *ip,
1871 int whichfork,
1872 struct xfs_bmbt_irec *rec,
1873 int *is_empty)
1874{
1875 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
1876 int error;
1877 int nextents;
1878
1879 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
1880 error = xfs_iread_extents(tp, ip, whichfork);
1881 if (error)
1882 return error;
1883 }
1884
1885 nextents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
1886 if (nextents == 0) {
1887 *is_empty = 1;
1888 return 0;
1889 }
1890
1891 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, nextents - 1), rec);
1892 *is_empty = 0;
1893 return 0;
1894}
1895
1896/*
1897 * Check the last inode extent to determine whether this allocation will result
1898 * in blocks being allocated at the end of the file. When we allocate new data
1899 * blocks at the end of the file which do not start at the previous data block,
1900 * we will try to align the new blocks at stripe unit boundaries.
1901 *
1902 * Returns 0 in bma->aeof if the file (fork) is empty as any new write will be
1903 * at, or past the EOF.
1904 */
1905STATIC int
1906xfs_bmap_isaeof(
1907 struct xfs_bmalloca *bma,
1908 int whichfork)
1909{
1910 struct xfs_bmbt_irec rec;
1911 int is_empty;
1912 int error;
1913
1914 bma->aeof = 0;
1915 error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec,
1916 &is_empty);
1917 if (error || is_empty)
1918 return error;
1919
1920 /*
1921 * Check if we are allocation or past the last extent, or at least into
1922 * the last delayed allocated extent.
1923 */
1924 bma->aeof = bma->offset >= rec.br_startoff + rec.br_blockcount ||
1925 (bma->offset >= rec.br_startoff &&
1926 isnullstartblock(rec.br_startblock));
1927 return 0;
1928}
1929
1930/*
1931 * Check if the endoff is outside the last extent. If so the caller will grow
1932 * the allocation to a stripe unit boundary. All offsets are considered outside
1933 * the end of file for an empty fork, so 1 is returned in *eof in that case.
1934 */
1935int
1936xfs_bmap_eof(
1937 struct xfs_inode *ip,
1938 xfs_fileoff_t endoff,
1939 int whichfork,
1940 int *eof)
1941{
1942 struct xfs_bmbt_irec rec;
1943 int error;
1944
1945 error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, eof);
1946 if (error || *eof)
1947 return error;
1948
1949 *eof = endoff >= rec.br_startoff + rec.br_blockcount;
1950 return 0;
1951}
1952
1953/*
1954 * Returns the file-relative block number of the first block past eof in
1955 * the file. This is not based on i_size, it is based on the extent records.
1956 * Returns 0 for local files, as they do not have extent records.
1957 */
1958int
1959xfs_bmap_last_offset(
1960 struct xfs_trans *tp,
1961 struct xfs_inode *ip,
1962 xfs_fileoff_t *last_block,
1963 int whichfork)
1964{
1965 struct xfs_bmbt_irec rec;
1966 int is_empty;
1967 int error;
1968
1969 *last_block = 0;
1970
1971 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL)
1972 return 0;
1973
1974 if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
1975 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
1976 return XFS_ERROR(EIO);
1977
1978 error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty);
1979 if (error || is_empty)
1980 return error;
1981
1982 *last_block = rec.br_startoff + rec.br_blockcount;
1983 return 0;
1984}
1985
1986/*
1987 * Returns whether the selected fork of the inode has exactly one
1988 * block or not. For the data fork we check this matches di_size,
1989 * implying the file's range is 0..bsize-1.
1990 */
1991int /* 1=>1 block, 0=>otherwise */
1992xfs_bmap_one_block(
1993 xfs_inode_t *ip, /* incore inode */
1994 int whichfork) /* data or attr fork */
1995{
1996 xfs_bmbt_rec_host_t *ep; /* ptr to fork's extent */
1997 xfs_ifork_t *ifp; /* inode fork pointer */
1998 int rval; /* return value */
1999 xfs_bmbt_irec_t s; /* internal version of extent */
2000
2001#ifndef DEBUG
2002 if (whichfork == XFS_DATA_FORK)
2003 return XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize;
2004#endif /* !DEBUG */
2005 if (XFS_IFORK_NEXTENTS(ip, whichfork) != 1)
2006 return 0;
2007 if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
2008 return 0;
2009 ifp = XFS_IFORK_PTR(ip, whichfork);
2010 ASSERT(ifp->if_flags & XFS_IFEXTENTS);
2011 ep = xfs_iext_get_ext(ifp, 0);
2012 xfs_bmbt_get_all(ep, &s);
2013 rval = s.br_startoff == 0 && s.br_blockcount == 1;
2014 if (rval && whichfork == XFS_DATA_FORK)
2015 ASSERT(XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize);
2016 return rval;
2017}
2018
2019/*
2020 * Extent tree manipulation functions used during allocation.
2021 */
2022
2023/*
435 * Convert a delayed allocation to a real allocation. 2024 * Convert a delayed allocation to a real allocation.
436 */ 2025 */
437STATIC int /* error */ 2026STATIC int /* error */
@@ -1894,6 +3483,10 @@ done:
1894} 3483}
1895 3484
1896/* 3485/*
3486 * Functions used in the extent read, allocate and remove paths
3487 */
3488
3489/*
1897 * Adjust the size of the new extent based on di_extsize and rt extsize. 3490 * Adjust the size of the new extent based on di_extsize and rt extsize.
1898 */ 3491 */
1899STATIC int 3492STATIC int
@@ -2666,1628 +4259,6 @@ xfs_bmap_alloc(
2666} 4259}
2667 4260
2668/* 4261/*
2669 * Transform a btree format file with only one leaf node, where the
2670 * extents list will fit in the inode, into an extents format file.
2671 * Since the file extents are already in-core, all we have to do is
2672 * give up the space for the btree root and pitch the leaf block.
2673 */
2674STATIC int /* error */
2675xfs_bmap_btree_to_extents(
2676 xfs_trans_t *tp, /* transaction pointer */
2677 xfs_inode_t *ip, /* incore inode pointer */
2678 xfs_btree_cur_t *cur, /* btree cursor */
2679 int *logflagsp, /* inode logging flags */
2680 int whichfork) /* data or attr fork */
2681{
2682 /* REFERENCED */
2683 struct xfs_btree_block *cblock;/* child btree block */
2684 xfs_fsblock_t cbno; /* child block number */
2685 xfs_buf_t *cbp; /* child block's buffer */
2686 int error; /* error return value */
2687 xfs_ifork_t *ifp; /* inode fork data */
2688 xfs_mount_t *mp; /* mount point structure */
2689 __be64 *pp; /* ptr to block address */
2690 struct xfs_btree_block *rblock;/* root btree block */
2691
2692 mp = ip->i_mount;
2693 ifp = XFS_IFORK_PTR(ip, whichfork);
2694 ASSERT(ifp->if_flags & XFS_IFEXTENTS);
2695 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
2696 rblock = ifp->if_broot;
2697 ASSERT(be16_to_cpu(rblock->bb_level) == 1);
2698 ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1);
2699 ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1);
2700 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes);
2701 cbno = be64_to_cpu(*pp);
2702 *logflagsp = 0;
2703#ifdef DEBUG
2704 if ((error = xfs_btree_check_lptr(cur, cbno, 1)))
2705 return error;
2706#endif
2707 error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, XFS_BMAP_BTREE_REF,
2708 &xfs_bmbt_buf_ops);
2709 if (error)
2710 return error;
2711 cblock = XFS_BUF_TO_BLOCK(cbp);
2712 if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
2713 return error;
2714 xfs_bmap_add_free(cbno, 1, cur->bc_private.b.flist, mp);
2715 ip->i_d.di_nblocks--;
2716 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
2717 xfs_trans_binval(tp, cbp);
2718 if (cur->bc_bufs[0] == cbp)
2719 cur->bc_bufs[0] = NULL;
2720 xfs_iroot_realloc(ip, -1, whichfork);
2721 ASSERT(ifp->if_broot == NULL);
2722 ASSERT((ifp->if_flags & XFS_IFBROOT) == 0);
2723 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
2724 *logflagsp = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
2725 return 0;
2726}
2727
2728/*
2729 * Called by xfs_bmapi to update file extent records and the btree
2730 * after removing space (or undoing a delayed allocation).
2731 */
2732STATIC int /* error */
2733xfs_bmap_del_extent(
2734 xfs_inode_t *ip, /* incore inode pointer */
2735 xfs_trans_t *tp, /* current transaction pointer */
2736 xfs_extnum_t *idx, /* extent number to update/delete */
2737 xfs_bmap_free_t *flist, /* list of extents to be freed */
2738 xfs_btree_cur_t *cur, /* if null, not a btree */
2739 xfs_bmbt_irec_t *del, /* data to remove from extents */
2740 int *logflagsp, /* inode logging flags */
2741 int whichfork) /* data or attr fork */
2742{
2743 xfs_filblks_t da_new; /* new delay-alloc indirect blocks */
2744 xfs_filblks_t da_old; /* old delay-alloc indirect blocks */
2745 xfs_fsblock_t del_endblock=0; /* first block past del */
2746 xfs_fileoff_t del_endoff; /* first offset past del */
2747 int delay; /* current block is delayed allocated */
2748 int do_fx; /* free extent at end of routine */
2749 xfs_bmbt_rec_host_t *ep; /* current extent entry pointer */
2750 int error; /* error return value */
2751 int flags; /* inode logging flags */
2752 xfs_bmbt_irec_t got; /* current extent entry */
2753 xfs_fileoff_t got_endoff; /* first offset past got */
2754 int i; /* temp state */
2755 xfs_ifork_t *ifp; /* inode fork pointer */
2756 xfs_mount_t *mp; /* mount structure */
2757 xfs_filblks_t nblks; /* quota/sb block count */
2758 xfs_bmbt_irec_t new; /* new record to be inserted */
2759 /* REFERENCED */
2760 uint qfield; /* quota field to update */
2761 xfs_filblks_t temp; /* for indirect length calculations */
2762 xfs_filblks_t temp2; /* for indirect length calculations */
2763 int state = 0;
2764
2765 XFS_STATS_INC(xs_del_exlist);
2766
2767 if (whichfork == XFS_ATTR_FORK)
2768 state |= BMAP_ATTRFORK;
2769
2770 mp = ip->i_mount;
2771 ifp = XFS_IFORK_PTR(ip, whichfork);
2772 ASSERT((*idx >= 0) && (*idx < ifp->if_bytes /
2773 (uint)sizeof(xfs_bmbt_rec_t)));
2774 ASSERT(del->br_blockcount > 0);
2775 ep = xfs_iext_get_ext(ifp, *idx);
2776 xfs_bmbt_get_all(ep, &got);
2777 ASSERT(got.br_startoff <= del->br_startoff);
2778 del_endoff = del->br_startoff + del->br_blockcount;
2779 got_endoff = got.br_startoff + got.br_blockcount;
2780 ASSERT(got_endoff >= del_endoff);
2781 delay = isnullstartblock(got.br_startblock);
2782 ASSERT(isnullstartblock(del->br_startblock) == delay);
2783 flags = 0;
2784 qfield = 0;
2785 error = 0;
2786 /*
2787 * If deleting a real allocation, must free up the disk space.
2788 */
2789 if (!delay) {
2790 flags = XFS_ILOG_CORE;
2791 /*
2792 * Realtime allocation. Free it and record di_nblocks update.
2793 */
2794 if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) {
2795 xfs_fsblock_t bno;
2796 xfs_filblks_t len;
2797
2798 ASSERT(do_mod(del->br_blockcount,
2799 mp->m_sb.sb_rextsize) == 0);
2800 ASSERT(do_mod(del->br_startblock,
2801 mp->m_sb.sb_rextsize) == 0);
2802 bno = del->br_startblock;
2803 len = del->br_blockcount;
2804 do_div(bno, mp->m_sb.sb_rextsize);
2805 do_div(len, mp->m_sb.sb_rextsize);
2806 error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len);
2807 if (error)
2808 goto done;
2809 do_fx = 0;
2810 nblks = len * mp->m_sb.sb_rextsize;
2811 qfield = XFS_TRANS_DQ_RTBCOUNT;
2812 }
2813 /*
2814 * Ordinary allocation.
2815 */
2816 else {
2817 do_fx = 1;
2818 nblks = del->br_blockcount;
2819 qfield = XFS_TRANS_DQ_BCOUNT;
2820 }
2821 /*
2822 * Set up del_endblock and cur for later.
2823 */
2824 del_endblock = del->br_startblock + del->br_blockcount;
2825 if (cur) {
2826 if ((error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
2827 got.br_startblock, got.br_blockcount,
2828 &i)))
2829 goto done;
2830 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
2831 }
2832 da_old = da_new = 0;
2833 } else {
2834 da_old = startblockval(got.br_startblock);
2835 da_new = 0;
2836 nblks = 0;
2837 do_fx = 0;
2838 }
2839 /*
2840 * Set flag value to use in switch statement.
2841 * Left-contig is 2, right-contig is 1.
2842 */
2843 switch (((got.br_startoff == del->br_startoff) << 1) |
2844 (got_endoff == del_endoff)) {
2845 case 3:
2846 /*
2847 * Matches the whole extent. Delete the entry.
2848 */
2849 xfs_iext_remove(ip, *idx, 1,
2850 whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0);
2851 --*idx;
2852 if (delay)
2853 break;
2854
2855 XFS_IFORK_NEXT_SET(ip, whichfork,
2856 XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
2857 flags |= XFS_ILOG_CORE;
2858 if (!cur) {
2859 flags |= xfs_ilog_fext(whichfork);
2860 break;
2861 }
2862 if ((error = xfs_btree_delete(cur, &i)))
2863 goto done;
2864 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
2865 break;
2866
2867 case 2:
2868 /*
2869 * Deleting the first part of the extent.
2870 */
2871 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2872 xfs_bmbt_set_startoff(ep, del_endoff);
2873 temp = got.br_blockcount - del->br_blockcount;
2874 xfs_bmbt_set_blockcount(ep, temp);
2875 if (delay) {
2876 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2877 da_old);
2878 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
2879 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2880 da_new = temp;
2881 break;
2882 }
2883 xfs_bmbt_set_startblock(ep, del_endblock);
2884 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2885 if (!cur) {
2886 flags |= xfs_ilog_fext(whichfork);
2887 break;
2888 }
2889 if ((error = xfs_bmbt_update(cur, del_endoff, del_endblock,
2890 got.br_blockcount - del->br_blockcount,
2891 got.br_state)))
2892 goto done;
2893 break;
2894
2895 case 1:
2896 /*
2897 * Deleting the last part of the extent.
2898 */
2899 temp = got.br_blockcount - del->br_blockcount;
2900 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2901 xfs_bmbt_set_blockcount(ep, temp);
2902 if (delay) {
2903 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2904 da_old);
2905 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
2906 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2907 da_new = temp;
2908 break;
2909 }
2910 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2911 if (!cur) {
2912 flags |= xfs_ilog_fext(whichfork);
2913 break;
2914 }
2915 if ((error = xfs_bmbt_update(cur, got.br_startoff,
2916 got.br_startblock,
2917 got.br_blockcount - del->br_blockcount,
2918 got.br_state)))
2919 goto done;
2920 break;
2921
2922 case 0:
2923 /*
2924 * Deleting the middle of the extent.
2925 */
2926 temp = del->br_startoff - got.br_startoff;
2927 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2928 xfs_bmbt_set_blockcount(ep, temp);
2929 new.br_startoff = del_endoff;
2930 temp2 = got_endoff - del_endoff;
2931 new.br_blockcount = temp2;
2932 new.br_state = got.br_state;
2933 if (!delay) {
2934 new.br_startblock = del_endblock;
2935 flags |= XFS_ILOG_CORE;
2936 if (cur) {
2937 if ((error = xfs_bmbt_update(cur,
2938 got.br_startoff,
2939 got.br_startblock, temp,
2940 got.br_state)))
2941 goto done;
2942 if ((error = xfs_btree_increment(cur, 0, &i)))
2943 goto done;
2944 cur->bc_rec.b = new;
2945 error = xfs_btree_insert(cur, &i);
2946 if (error && error != ENOSPC)
2947 goto done;
2948 /*
2949 * If get no-space back from btree insert,
2950 * it tried a split, and we have a zero
2951 * block reservation.
2952 * Fix up our state and return the error.
2953 */
2954 if (error == ENOSPC) {
2955 /*
2956 * Reset the cursor, don't trust
2957 * it after any insert operation.
2958 */
2959 if ((error = xfs_bmbt_lookup_eq(cur,
2960 got.br_startoff,
2961 got.br_startblock,
2962 temp, &i)))
2963 goto done;
2964 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
2965 /*
2966 * Update the btree record back
2967 * to the original value.
2968 */
2969 if ((error = xfs_bmbt_update(cur,
2970 got.br_startoff,
2971 got.br_startblock,
2972 got.br_blockcount,
2973 got.br_state)))
2974 goto done;
2975 /*
2976 * Reset the extent record back
2977 * to the original value.
2978 */
2979 xfs_bmbt_set_blockcount(ep,
2980 got.br_blockcount);
2981 flags = 0;
2982 error = XFS_ERROR(ENOSPC);
2983 goto done;
2984 }
2985 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
2986 } else
2987 flags |= xfs_ilog_fext(whichfork);
2988 XFS_IFORK_NEXT_SET(ip, whichfork,
2989 XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
2990 } else {
2991 ASSERT(whichfork == XFS_DATA_FORK);
2992 temp = xfs_bmap_worst_indlen(ip, temp);
2993 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
2994 temp2 = xfs_bmap_worst_indlen(ip, temp2);
2995 new.br_startblock = nullstartblock((int)temp2);
2996 da_new = temp + temp2;
2997 while (da_new > da_old) {
2998 if (temp) {
2999 temp--;
3000 da_new--;
3001 xfs_bmbt_set_startblock(ep,
3002 nullstartblock((int)temp));
3003 }
3004 if (da_new == da_old)
3005 break;
3006 if (temp2) {
3007 temp2--;
3008 da_new--;
3009 new.br_startblock =
3010 nullstartblock((int)temp2);
3011 }
3012 }
3013 }
3014 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
3015 xfs_iext_insert(ip, *idx + 1, 1, &new, state);
3016 ++*idx;
3017 break;
3018 }
3019 /*
3020 * If we need to, add to list of extents to delete.
3021 */
3022 if (do_fx)
3023 xfs_bmap_add_free(del->br_startblock, del->br_blockcount, flist,
3024 mp);
3025 /*
3026 * Adjust inode # blocks in the file.
3027 */
3028 if (nblks)
3029 ip->i_d.di_nblocks -= nblks;
3030 /*
3031 * Adjust quota data.
3032 */
3033 if (qfield)
3034 xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks);
3035
3036 /*
3037 * Account for change in delayed indirect blocks.
3038 * Nothing to do for disk quota accounting here.
3039 */
3040 ASSERT(da_old >= da_new);
3041 if (da_old > da_new) {
3042 xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
3043 (int64_t)(da_old - da_new), 0);
3044 }
3045done:
3046 *logflagsp = flags;
3047 return error;
3048}
3049
3050/*
3051 * Remove the entry "free" from the free item list. Prev points to the
3052 * previous entry, unless "free" is the head of the list.
3053 */
3054STATIC void
3055xfs_bmap_del_free(
3056 xfs_bmap_free_t *flist, /* free item list header */
3057 xfs_bmap_free_item_t *prev, /* previous item on list, if any */
3058 xfs_bmap_free_item_t *free) /* list item to be freed */
3059{
3060 if (prev)
3061 prev->xbfi_next = free->xbfi_next;
3062 else
3063 flist->xbf_first = free->xbfi_next;
3064 flist->xbf_count--;
3065 kmem_zone_free(xfs_bmap_free_item_zone, free);
3066}
3067
3068/*
3069 * Convert an extents-format file into a btree-format file.
3070 * The new file will have a root block (in the inode) and a single child block.
3071 */
3072STATIC int /* error */
3073xfs_bmap_extents_to_btree(
3074 xfs_trans_t *tp, /* transaction pointer */
3075 xfs_inode_t *ip, /* incore inode pointer */
3076 xfs_fsblock_t *firstblock, /* first-block-allocated */
3077 xfs_bmap_free_t *flist, /* blocks freed in xaction */
3078 xfs_btree_cur_t **curp, /* cursor returned to caller */
3079 int wasdel, /* converting a delayed alloc */
3080 int *logflagsp, /* inode logging flags */
3081 int whichfork) /* data or attr fork */
3082{
3083 struct xfs_btree_block *ablock; /* allocated (child) bt block */
3084 xfs_buf_t *abp; /* buffer for ablock */
3085 xfs_alloc_arg_t args; /* allocation arguments */
3086 xfs_bmbt_rec_t *arp; /* child record pointer */
3087 struct xfs_btree_block *block; /* btree root block */
3088 xfs_btree_cur_t *cur; /* bmap btree cursor */
3089 xfs_bmbt_rec_host_t *ep; /* extent record pointer */
3090 int error; /* error return value */
3091 xfs_extnum_t i, cnt; /* extent record index */
3092 xfs_ifork_t *ifp; /* inode fork pointer */
3093 xfs_bmbt_key_t *kp; /* root block key pointer */
3094 xfs_mount_t *mp; /* mount structure */
3095 xfs_extnum_t nextents; /* number of file extents */
3096 xfs_bmbt_ptr_t *pp; /* root block address pointer */
3097
3098 ifp = XFS_IFORK_PTR(ip, whichfork);
3099 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS);
3100
3101 /*
3102 * Make space in the inode incore.
3103 */
3104 xfs_iroot_realloc(ip, 1, whichfork);
3105 ifp->if_flags |= XFS_IFBROOT;
3106
3107 /*
3108 * Fill in the root.
3109 */
3110 block = ifp->if_broot;
3111 block->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC);
3112 block->bb_level = cpu_to_be16(1);
3113 block->bb_numrecs = cpu_to_be16(1);
3114 block->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
3115 block->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
3116
3117 /*
3118 * Need a cursor. Can't allocate until bb_level is filled in.
3119 */
3120 mp = ip->i_mount;
3121 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
3122 cur->bc_private.b.firstblock = *firstblock;
3123 cur->bc_private.b.flist = flist;
3124 cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0;
3125 /*
3126 * Convert to a btree with two levels, one record in root.
3127 */
3128 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE);
3129 memset(&args, 0, sizeof(args));
3130 args.tp = tp;
3131 args.mp = mp;
3132 args.firstblock = *firstblock;
3133 if (*firstblock == NULLFSBLOCK) {
3134 args.type = XFS_ALLOCTYPE_START_BNO;
3135 args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
3136 } else if (flist->xbf_low) {
3137 args.type = XFS_ALLOCTYPE_START_BNO;
3138 args.fsbno = *firstblock;
3139 } else {
3140 args.type = XFS_ALLOCTYPE_NEAR_BNO;
3141 args.fsbno = *firstblock;
3142 }
3143 args.minlen = args.maxlen = args.prod = 1;
3144 args.wasdel = wasdel;
3145 *logflagsp = 0;
3146 if ((error = xfs_alloc_vextent(&args))) {
3147 xfs_iroot_realloc(ip, -1, whichfork);
3148 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
3149 return error;
3150 }
3151 /*
3152 * Allocation can't fail, the space was reserved.
3153 */
3154 ASSERT(args.fsbno != NULLFSBLOCK);
3155 ASSERT(*firstblock == NULLFSBLOCK ||
3156 args.agno == XFS_FSB_TO_AGNO(mp, *firstblock) ||
3157 (flist->xbf_low &&
3158 args.agno > XFS_FSB_TO_AGNO(mp, *firstblock)));
3159 *firstblock = cur->bc_private.b.firstblock = args.fsbno;
3160 cur->bc_private.b.allocated++;
3161 ip->i_d.di_nblocks++;
3162 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
3163 abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0);
3164 /*
3165 * Fill in the child block.
3166 */
3167 abp->b_ops = &xfs_bmbt_buf_ops;
3168 ablock = XFS_BUF_TO_BLOCK(abp);
3169 ablock->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC);
3170 ablock->bb_level = 0;
3171 ablock->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
3172 ablock->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
3173 arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
3174 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
3175 for (cnt = i = 0; i < nextents; i++) {
3176 ep = xfs_iext_get_ext(ifp, i);
3177 if (!isnullstartblock(xfs_bmbt_get_startblock(ep))) {
3178 arp->l0 = cpu_to_be64(ep->l0);
3179 arp->l1 = cpu_to_be64(ep->l1);
3180 arp++; cnt++;
3181 }
3182 }
3183 ASSERT(cnt == XFS_IFORK_NEXTENTS(ip, whichfork));
3184 xfs_btree_set_numrecs(ablock, cnt);
3185
3186 /*
3187 * Fill in the root key and pointer.
3188 */
3189 kp = XFS_BMBT_KEY_ADDR(mp, block, 1);
3190 arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
3191 kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp));
3192 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, xfs_bmbt_get_maxrecs(cur,
3193 be16_to_cpu(block->bb_level)));
3194 *pp = cpu_to_be64(args.fsbno);
3195
3196 /*
3197 * Do all this logging at the end so that
3198 * the root is at the right level.
3199 */
3200 xfs_btree_log_block(cur, abp, XFS_BB_ALL_BITS);
3201 xfs_btree_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs));
3202 ASSERT(*curp == NULL);
3203 *curp = cur;
3204 *logflagsp = XFS_ILOG_CORE | xfs_ilog_fbroot(whichfork);
3205 return 0;
3206}
3207
3208/*
3209 * Calculate the default attribute fork offset for newly created inodes.
3210 */
3211uint
3212xfs_default_attroffset(
3213 struct xfs_inode *ip)
3214{
3215 struct xfs_mount *mp = ip->i_mount;
3216 uint offset;
3217
3218 if (mp->m_sb.sb_inodesize == 256) {
3219 offset = XFS_LITINO(mp) -
3220 XFS_BMDR_SPACE_CALC(MINABTPTRS);
3221 } else {
3222 offset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS);
3223 }
3224
3225 ASSERT(offset < XFS_LITINO(mp));
3226 return offset;
3227}
3228
3229/*
3230 * Helper routine to reset inode di_forkoff field when switching
3231 * attribute fork from local to extent format - we reset it where
3232 * possible to make space available for inline data fork extents.
3233 */
3234STATIC void
3235xfs_bmap_forkoff_reset(
3236 xfs_mount_t *mp,
3237 xfs_inode_t *ip,
3238 int whichfork)
3239{
3240 if (whichfork == XFS_ATTR_FORK &&
3241 ip->i_d.di_format != XFS_DINODE_FMT_DEV &&
3242 ip->i_d.di_format != XFS_DINODE_FMT_UUID &&
3243 ip->i_d.di_format != XFS_DINODE_FMT_BTREE) {
3244 uint dfl_forkoff = xfs_default_attroffset(ip) >> 3;
3245
3246 if (dfl_forkoff > ip->i_d.di_forkoff)
3247 ip->i_d.di_forkoff = dfl_forkoff;
3248 }
3249}
3250
3251/*
3252 * Convert a local file to an extents file.
3253 * This code is out of bounds for data forks of regular files,
3254 * since the file data needs to get logged so things will stay consistent.
3255 * (The bmap-level manipulations are ok, though).
3256 */
3257STATIC int /* error */
3258xfs_bmap_local_to_extents(
3259 xfs_trans_t *tp, /* transaction pointer */
3260 xfs_inode_t *ip, /* incore inode pointer */
3261 xfs_fsblock_t *firstblock, /* first block allocated in xaction */
3262 xfs_extlen_t total, /* total blocks needed by transaction */
3263 int *logflagsp, /* inode logging flags */
3264 int whichfork,
3265 void (*init_fn)(struct xfs_buf *bp,
3266 struct xfs_inode *ip,
3267 struct xfs_ifork *ifp))
3268{
3269 int error; /* error return value */
3270 int flags; /* logging flags returned */
3271 xfs_ifork_t *ifp; /* inode fork pointer */
3272
3273 /*
3274 * We don't want to deal with the case of keeping inode data inline yet.
3275 * So sending the data fork of a regular inode is invalid.
3276 */
3277 ASSERT(!(S_ISREG(ip->i_d.di_mode) && whichfork == XFS_DATA_FORK));
3278 ifp = XFS_IFORK_PTR(ip, whichfork);
3279 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
3280 flags = 0;
3281 error = 0;
3282 if (ifp->if_bytes) {
3283 xfs_alloc_arg_t args; /* allocation arguments */
3284 xfs_buf_t *bp; /* buffer for extent block */
3285 xfs_bmbt_rec_host_t *ep;/* extent record pointer */
3286
3287 ASSERT((ifp->if_flags &
3288 (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) == XFS_IFINLINE);
3289 memset(&args, 0, sizeof(args));
3290 args.tp = tp;
3291 args.mp = ip->i_mount;
3292 args.firstblock = *firstblock;
3293 /*
3294 * Allocate a block. We know we need only one, since the
3295 * file currently fits in an inode.
3296 */
3297 if (*firstblock == NULLFSBLOCK) {
3298 args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino);
3299 args.type = XFS_ALLOCTYPE_START_BNO;
3300 } else {
3301 args.fsbno = *firstblock;
3302 args.type = XFS_ALLOCTYPE_NEAR_BNO;
3303 }
3304 args.total = total;
3305 args.minlen = args.maxlen = args.prod = 1;
3306 error = xfs_alloc_vextent(&args);
3307 if (error)
3308 goto done;
3309
3310 /* Can't fail, the space was reserved. */
3311 ASSERT(args.fsbno != NULLFSBLOCK);
3312 ASSERT(args.len == 1);
3313 *firstblock = args.fsbno;
3314 bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0);
3315
3316 /* initialise the block and copy the data */
3317 init_fn(bp, ip, ifp);
3318
3319 /* account for the change in fork size and log everything */
3320 xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1);
3321 xfs_bmap_forkoff_reset(args.mp, ip, whichfork);
3322 xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
3323 xfs_iext_add(ifp, 0, 1);
3324 ep = xfs_iext_get_ext(ifp, 0);
3325 xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM);
3326 trace_xfs_bmap_post_update(ip, 0,
3327 whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0,
3328 _THIS_IP_);
3329 XFS_IFORK_NEXT_SET(ip, whichfork, 1);
3330 ip->i_d.di_nblocks = 1;
3331 xfs_trans_mod_dquot_byino(tp, ip,
3332 XFS_TRANS_DQ_BCOUNT, 1L);
3333 flags |= xfs_ilog_fext(whichfork);
3334 } else {
3335 ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) == 0);
3336 xfs_bmap_forkoff_reset(ip->i_mount, ip, whichfork);
3337 }
3338 ifp->if_flags &= ~XFS_IFINLINE;
3339 ifp->if_flags |= XFS_IFEXTENTS;
3340 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
3341 flags |= XFS_ILOG_CORE;
3342done:
3343 *logflagsp = flags;
3344 return error;
3345}
3346
3347/*
3348 * Search the extent records for the entry containing block bno.
3349 * If bno lies in a hole, point to the next entry. If bno lies
3350 * past eof, *eofp will be set, and *prevp will contain the last
3351 * entry (null if none). Else, *lastxp will be set to the index
3352 * of the found entry; *gotp will contain the entry.
3353 */
3354STATIC xfs_bmbt_rec_host_t * /* pointer to found extent entry */
3355xfs_bmap_search_multi_extents(
3356 xfs_ifork_t *ifp, /* inode fork pointer */
3357 xfs_fileoff_t bno, /* block number searched for */
3358 int *eofp, /* out: end of file found */
3359 xfs_extnum_t *lastxp, /* out: last extent index */
3360 xfs_bmbt_irec_t *gotp, /* out: extent entry found */
3361 xfs_bmbt_irec_t *prevp) /* out: previous extent entry found */
3362{
3363 xfs_bmbt_rec_host_t *ep; /* extent record pointer */
3364 xfs_extnum_t lastx; /* last extent index */
3365
3366 /*
3367 * Initialize the extent entry structure to catch access to
3368 * uninitialized br_startblock field.
3369 */
3370 gotp->br_startoff = 0xffa5a5a5a5a5a5a5LL;
3371 gotp->br_blockcount = 0xa55a5a5a5a5a5a5aLL;
3372 gotp->br_state = XFS_EXT_INVALID;
3373#if XFS_BIG_BLKNOS
3374 gotp->br_startblock = 0xffffa5a5a5a5a5a5LL;
3375#else
3376 gotp->br_startblock = 0xffffa5a5;
3377#endif
3378 prevp->br_startoff = NULLFILEOFF;
3379
3380 ep = xfs_iext_bno_to_ext(ifp, bno, &lastx);
3381 if (lastx > 0) {
3382 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx - 1), prevp);
3383 }
3384 if (lastx < (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))) {
3385 xfs_bmbt_get_all(ep, gotp);
3386 *eofp = 0;
3387 } else {
3388 if (lastx > 0) {
3389 *gotp = *prevp;
3390 }
3391 *eofp = 1;
3392 ep = NULL;
3393 }
3394 *lastxp = lastx;
3395 return ep;
3396}
3397
3398/*
3399 * Search the extents list for the inode, for the extent containing bno.
3400 * If bno lies in a hole, point to the next entry. If bno lies past eof,
3401 * *eofp will be set, and *prevp will contain the last entry (null if none).
3402 * Else, *lastxp will be set to the index of the found
3403 * entry; *gotp will contain the entry.
3404 */
3405STATIC xfs_bmbt_rec_host_t * /* pointer to found extent entry */
3406xfs_bmap_search_extents(
3407 xfs_inode_t *ip, /* incore inode pointer */
3408 xfs_fileoff_t bno, /* block number searched for */
3409 int fork, /* data or attr fork */
3410 int *eofp, /* out: end of file found */
3411 xfs_extnum_t *lastxp, /* out: last extent index */
3412 xfs_bmbt_irec_t *gotp, /* out: extent entry found */
3413 xfs_bmbt_irec_t *prevp) /* out: previous extent entry found */
3414{
3415 xfs_ifork_t *ifp; /* inode fork pointer */
3416 xfs_bmbt_rec_host_t *ep; /* extent record pointer */
3417
3418 XFS_STATS_INC(xs_look_exlist);
3419 ifp = XFS_IFORK_PTR(ip, fork);
3420
3421 ep = xfs_bmap_search_multi_extents(ifp, bno, eofp, lastxp, gotp, prevp);
3422
3423 if (unlikely(!(gotp->br_startblock) && (*lastxp != NULLEXTNUM) &&
3424 !(XFS_IS_REALTIME_INODE(ip) && fork == XFS_DATA_FORK))) {
3425 xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO,
3426 "Access to block zero in inode %llu "
3427 "start_block: %llx start_off: %llx "
3428 "blkcnt: %llx extent-state: %x lastx: %x\n",
3429 (unsigned long long)ip->i_ino,
3430 (unsigned long long)gotp->br_startblock,
3431 (unsigned long long)gotp->br_startoff,
3432 (unsigned long long)gotp->br_blockcount,
3433 gotp->br_state, *lastxp);
3434 *lastxp = NULLEXTNUM;
3435 *eofp = 1;
3436 return NULL;
3437 }
3438 return ep;
3439}
3440
3441/*
3442 * Compute the worst-case number of indirect blocks that will be used
3443 * for ip's delayed extent of length "len".
3444 */
3445STATIC xfs_filblks_t
3446xfs_bmap_worst_indlen(
3447 xfs_inode_t *ip, /* incore inode pointer */
3448 xfs_filblks_t len) /* delayed extent length */
3449{
3450 int level; /* btree level number */
3451 int maxrecs; /* maximum record count at this level */
3452 xfs_mount_t *mp; /* mount structure */
3453 xfs_filblks_t rval; /* return value */
3454
3455 mp = ip->i_mount;
3456 maxrecs = mp->m_bmap_dmxr[0];
3457 for (level = 0, rval = 0;
3458 level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK);
3459 level++) {
3460 len += maxrecs - 1;
3461 do_div(len, maxrecs);
3462 rval += len;
3463 if (len == 1)
3464 return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
3465 level - 1;
3466 if (level == 0)
3467 maxrecs = mp->m_bmap_dmxr[1];
3468 }
3469 return rval;
3470}
3471
3472/*
3473 * Convert inode from non-attributed to attributed.
3474 * Must not be in a transaction, ip must not be locked.
3475 */
3476int /* error code */
3477xfs_bmap_add_attrfork(
3478 xfs_inode_t *ip, /* incore inode pointer */
3479 int size, /* space new attribute needs */
3480 int rsvd) /* xact may use reserved blks */
3481{
3482 xfs_fsblock_t firstblock; /* 1st block/ag allocated */
3483 xfs_bmap_free_t flist; /* freed extent records */
3484 xfs_mount_t *mp; /* mount structure */
3485 xfs_trans_t *tp; /* transaction pointer */
3486 int blks; /* space reservation */
3487 int version = 1; /* superblock attr version */
3488 int committed; /* xaction was committed */
3489 int logflags; /* logging flags */
3490 int error; /* error return value */
3491
3492 ASSERT(XFS_IFORK_Q(ip) == 0);
3493
3494 mp = ip->i_mount;
3495 ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
3496 tp = xfs_trans_alloc(mp, XFS_TRANS_ADDAFORK);
3497 blks = XFS_ADDAFORK_SPACE_RES(mp);
3498 if (rsvd)
3499 tp->t_flags |= XFS_TRANS_RESERVE;
3500 if ((error = xfs_trans_reserve(tp, blks, XFS_ADDAFORK_LOG_RES(mp), 0,
3501 XFS_TRANS_PERM_LOG_RES, XFS_ADDAFORK_LOG_COUNT)))
3502 goto error0;
3503 xfs_ilock(ip, XFS_ILOCK_EXCL);
3504 error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?
3505 XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
3506 XFS_QMOPT_RES_REGBLKS);
3507 if (error) {
3508 xfs_iunlock(ip, XFS_ILOCK_EXCL);
3509 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES);
3510 return error;
3511 }
3512 if (XFS_IFORK_Q(ip))
3513 goto error1;
3514 if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) {
3515 /*
3516 * For inodes coming from pre-6.2 filesystems.
3517 */
3518 ASSERT(ip->i_d.di_aformat == 0);
3519 ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
3520 }
3521 ASSERT(ip->i_d.di_anextents == 0);
3522
3523 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
3524 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
3525
3526 switch (ip->i_d.di_format) {
3527 case XFS_DINODE_FMT_DEV:
3528 ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3;
3529 break;
3530 case XFS_DINODE_FMT_UUID:
3531 ip->i_d.di_forkoff = roundup(sizeof(uuid_t), 8) >> 3;
3532 break;
3533 case XFS_DINODE_FMT_LOCAL:
3534 case XFS_DINODE_FMT_EXTENTS:
3535 case XFS_DINODE_FMT_BTREE:
3536 ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size);
3537 if (!ip->i_d.di_forkoff)
3538 ip->i_d.di_forkoff = xfs_default_attroffset(ip) >> 3;
3539 else if (mp->m_flags & XFS_MOUNT_ATTR2)
3540 version = 2;
3541 break;
3542 default:
3543 ASSERT(0);
3544 error = XFS_ERROR(EINVAL);
3545 goto error1;
3546 }
3547
3548 ASSERT(ip->i_afp == NULL);
3549 ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP);
3550 ip->i_afp->if_flags = XFS_IFEXTENTS;
3551 logflags = 0;
3552 xfs_bmap_init(&flist, &firstblock);
3553 switch (ip->i_d.di_format) {
3554 case XFS_DINODE_FMT_LOCAL:
3555 error = xfs_bmap_add_attrfork_local(tp, ip, &firstblock, &flist,
3556 &logflags);
3557 break;
3558 case XFS_DINODE_FMT_EXTENTS:
3559 error = xfs_bmap_add_attrfork_extents(tp, ip, &firstblock,
3560 &flist, &logflags);
3561 break;
3562 case XFS_DINODE_FMT_BTREE:
3563 error = xfs_bmap_add_attrfork_btree(tp, ip, &firstblock, &flist,
3564 &logflags);
3565 break;
3566 default:
3567 error = 0;
3568 break;
3569 }
3570 if (logflags)
3571 xfs_trans_log_inode(tp, ip, logflags);
3572 if (error)
3573 goto error2;
3574 if (!xfs_sb_version_hasattr(&mp->m_sb) ||
3575 (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) {
3576 __int64_t sbfields = 0;
3577
3578 spin_lock(&mp->m_sb_lock);
3579 if (!xfs_sb_version_hasattr(&mp->m_sb)) {
3580 xfs_sb_version_addattr(&mp->m_sb);
3581 sbfields |= XFS_SB_VERSIONNUM;
3582 }
3583 if (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2) {
3584 xfs_sb_version_addattr2(&mp->m_sb);
3585 sbfields |= (XFS_SB_VERSIONNUM | XFS_SB_FEATURES2);
3586 }
3587 if (sbfields) {
3588 spin_unlock(&mp->m_sb_lock);
3589 xfs_mod_sb(tp, sbfields);
3590 } else
3591 spin_unlock(&mp->m_sb_lock);
3592 }
3593
3594 error = xfs_bmap_finish(&tp, &flist, &committed);
3595 if (error)
3596 goto error2;
3597 return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
3598error2:
3599 xfs_bmap_cancel(&flist);
3600error1:
3601 xfs_iunlock(ip, XFS_ILOCK_EXCL);
3602error0:
3603 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
3604 return error;
3605}
3606
3607/*
3608 * Add the extent to the list of extents to be free at transaction end.
3609 * The list is maintained sorted (by block number).
3610 */
3611/* ARGSUSED */
3612void
3613xfs_bmap_add_free(
3614 xfs_fsblock_t bno, /* fs block number of extent */
3615 xfs_filblks_t len, /* length of extent */
3616 xfs_bmap_free_t *flist, /* list of extents */
3617 xfs_mount_t *mp) /* mount point structure */
3618{
3619 xfs_bmap_free_item_t *cur; /* current (next) element */
3620 xfs_bmap_free_item_t *new; /* new element */
3621 xfs_bmap_free_item_t *prev; /* previous element */
3622#ifdef DEBUG
3623 xfs_agnumber_t agno;
3624 xfs_agblock_t agbno;
3625
3626 ASSERT(bno != NULLFSBLOCK);
3627 ASSERT(len > 0);
3628 ASSERT(len <= MAXEXTLEN);
3629 ASSERT(!isnullstartblock(bno));
3630 agno = XFS_FSB_TO_AGNO(mp, bno);
3631 agbno = XFS_FSB_TO_AGBNO(mp, bno);
3632 ASSERT(agno < mp->m_sb.sb_agcount);
3633 ASSERT(agbno < mp->m_sb.sb_agblocks);
3634 ASSERT(len < mp->m_sb.sb_agblocks);
3635 ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
3636#endif
3637 ASSERT(xfs_bmap_free_item_zone != NULL);
3638 new = kmem_zone_alloc(xfs_bmap_free_item_zone, KM_SLEEP);
3639 new->xbfi_startblock = bno;
3640 new->xbfi_blockcount = (xfs_extlen_t)len;
3641 for (prev = NULL, cur = flist->xbf_first;
3642 cur != NULL;
3643 prev = cur, cur = cur->xbfi_next) {
3644 if (cur->xbfi_startblock >= bno)
3645 break;
3646 }
3647 if (prev)
3648 prev->xbfi_next = new;
3649 else
3650 flist->xbf_first = new;
3651 new->xbfi_next = cur;
3652 flist->xbf_count++;
3653}
3654
3655/*
3656 * Compute and fill in the value of the maximum depth of a bmap btree
3657 * in this filesystem. Done once, during mount.
3658 */
3659void
3660xfs_bmap_compute_maxlevels(
3661 xfs_mount_t *mp, /* file system mount structure */
3662 int whichfork) /* data or attr fork */
3663{
3664 int level; /* btree level */
3665 uint maxblocks; /* max blocks at this level */
3666 uint maxleafents; /* max leaf entries possible */
3667 int maxrootrecs; /* max records in root block */
3668 int minleafrecs; /* min records in leaf block */
3669 int minnoderecs; /* min records in node block */
3670 int sz; /* root block size */
3671
3672 /*
3673 * The maximum number of extents in a file, hence the maximum
3674 * number of leaf entries, is controlled by the type of di_nextents
3675 * (a signed 32-bit number, xfs_extnum_t), or by di_anextents
3676 * (a signed 16-bit number, xfs_aextnum_t).
3677 *
3678 * Note that we can no longer assume that if we are in ATTR1 that
3679 * the fork offset of all the inodes will be
3680 * (xfs_default_attroffset(ip) >> 3) because we could have mounted
3681 * with ATTR2 and then mounted back with ATTR1, keeping the
3682 * di_forkoff's fixed but probably at various positions. Therefore,
3683 * for both ATTR1 and ATTR2 we have to assume the worst case scenario
3684 * of a minimum size available.
3685 */
3686 if (whichfork == XFS_DATA_FORK) {
3687 maxleafents = MAXEXTNUM;
3688 sz = XFS_BMDR_SPACE_CALC(MINDBTPTRS);
3689 } else {
3690 maxleafents = MAXAEXTNUM;
3691 sz = XFS_BMDR_SPACE_CALC(MINABTPTRS);
3692 }
3693 maxrootrecs = xfs_bmdr_maxrecs(mp, sz, 0);
3694 minleafrecs = mp->m_bmap_dmnr[0];
3695 minnoderecs = mp->m_bmap_dmnr[1];
3696 maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
3697 for (level = 1; maxblocks > 1; level++) {
3698 if (maxblocks <= maxrootrecs)
3699 maxblocks = 1;
3700 else
3701 maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
3702 }
3703 mp->m_bm_maxlevels[whichfork] = level;
3704}
3705
3706/*
3707 * Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi
3708 * caller. Frees all the extents that need freeing, which must be done
3709 * last due to locking considerations. We never free any extents in
3710 * the first transaction.
3711 *
3712 * Return 1 if the given transaction was committed and a new one
3713 * started, and 0 otherwise in the committed parameter.
3714 */
3715int /* error */
3716xfs_bmap_finish(
3717 xfs_trans_t **tp, /* transaction pointer addr */
3718 xfs_bmap_free_t *flist, /* i/o: list extents to free */
3719 int *committed) /* xact committed or not */
3720{
3721 xfs_efd_log_item_t *efd; /* extent free data */
3722 xfs_efi_log_item_t *efi; /* extent free intention */
3723 int error; /* error return value */
3724 xfs_bmap_free_item_t *free; /* free extent item */
3725 unsigned int logres; /* new log reservation */
3726 unsigned int logcount; /* new log count */
3727 xfs_mount_t *mp; /* filesystem mount structure */
3728 xfs_bmap_free_item_t *next; /* next item on free list */
3729 xfs_trans_t *ntp; /* new transaction pointer */
3730
3731 ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
3732 if (flist->xbf_count == 0) {
3733 *committed = 0;
3734 return 0;
3735 }
3736 ntp = *tp;
3737 efi = xfs_trans_get_efi(ntp, flist->xbf_count);
3738 for (free = flist->xbf_first; free; free = free->xbfi_next)
3739 xfs_trans_log_efi_extent(ntp, efi, free->xbfi_startblock,
3740 free->xbfi_blockcount);
3741 logres = ntp->t_log_res;
3742 logcount = ntp->t_log_count;
3743 ntp = xfs_trans_dup(*tp);
3744 error = xfs_trans_commit(*tp, 0);
3745 *tp = ntp;
3746 *committed = 1;
3747 /*
3748 * We have a new transaction, so we should return committed=1,
3749 * even though we're returning an error.
3750 */
3751 if (error)
3752 return error;
3753
3754 /*
3755 * transaction commit worked ok so we can drop the extra ticket
3756 * reference that we gained in xfs_trans_dup()
3757 */
3758 xfs_log_ticket_put(ntp->t_ticket);
3759
3760 if ((error = xfs_trans_reserve(ntp, 0, logres, 0, XFS_TRANS_PERM_LOG_RES,
3761 logcount)))
3762 return error;
3763 efd = xfs_trans_get_efd(ntp, efi, flist->xbf_count);
3764 for (free = flist->xbf_first; free != NULL; free = next) {
3765 next = free->xbfi_next;
3766 if ((error = xfs_free_extent(ntp, free->xbfi_startblock,
3767 free->xbfi_blockcount))) {
3768 /*
3769 * The bmap free list will be cleaned up at a
3770 * higher level. The EFI will be canceled when
3771 * this transaction is aborted.
3772 * Need to force shutdown here to make sure it
3773 * happens, since this transaction may not be
3774 * dirty yet.
3775 */
3776 mp = ntp->t_mountp;
3777 if (!XFS_FORCED_SHUTDOWN(mp))
3778 xfs_force_shutdown(mp,
3779 (error == EFSCORRUPTED) ?
3780 SHUTDOWN_CORRUPT_INCORE :
3781 SHUTDOWN_META_IO_ERROR);
3782 return error;
3783 }
3784 xfs_trans_log_efd_extent(ntp, efd, free->xbfi_startblock,
3785 free->xbfi_blockcount);
3786 xfs_bmap_del_free(flist, NULL, free);
3787 }
3788 return 0;
3789}
3790
3791/*
3792 * Free up any items left in the list.
3793 */
3794void
3795xfs_bmap_cancel(
3796 xfs_bmap_free_t *flist) /* list of bmap_free_items */
3797{
3798 xfs_bmap_free_item_t *free; /* free list item */
3799 xfs_bmap_free_item_t *next;
3800
3801 if (flist->xbf_count == 0)
3802 return;
3803 ASSERT(flist->xbf_first != NULL);
3804 for (free = flist->xbf_first; free; free = next) {
3805 next = free->xbfi_next;
3806 xfs_bmap_del_free(flist, NULL, free);
3807 }
3808 ASSERT(flist->xbf_count == 0);
3809}
3810
3811/*
3812 * Returns the file-relative block number of the first unused block(s)
3813 * in the file with at least "len" logically contiguous blocks free.
3814 * This is the lowest-address hole if the file has holes, else the first block
3815 * past the end of file.
3816 * Return 0 if the file is currently local (in-inode).
3817 */
3818int /* error */
3819xfs_bmap_first_unused(
3820 xfs_trans_t *tp, /* transaction pointer */
3821 xfs_inode_t *ip, /* incore inode */
3822 xfs_extlen_t len, /* size of hole to find */
3823 xfs_fileoff_t *first_unused, /* unused block */
3824 int whichfork) /* data or attr fork */
3825{
3826 int error; /* error return value */
3827 int idx; /* extent record index */
3828 xfs_ifork_t *ifp; /* inode fork pointer */
3829 xfs_fileoff_t lastaddr; /* last block number seen */
3830 xfs_fileoff_t lowest; /* lowest useful block */
3831 xfs_fileoff_t max; /* starting useful block */
3832 xfs_fileoff_t off; /* offset for this block */
3833 xfs_extnum_t nextents; /* number of extent entries */
3834
3835 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE ||
3836 XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ||
3837 XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
3838 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
3839 *first_unused = 0;
3840 return 0;
3841 }
3842 ifp = XFS_IFORK_PTR(ip, whichfork);
3843 if (!(ifp->if_flags & XFS_IFEXTENTS) &&
3844 (error = xfs_iread_extents(tp, ip, whichfork)))
3845 return error;
3846 lowest = *first_unused;
3847 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
3848 for (idx = 0, lastaddr = 0, max = lowest; idx < nextents; idx++) {
3849 xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, idx);
3850 off = xfs_bmbt_get_startoff(ep);
3851 /*
3852 * See if the hole before this extent will work.
3853 */
3854 if (off >= lowest + len && off - max >= len) {
3855 *first_unused = max;
3856 return 0;
3857 }
3858 lastaddr = off + xfs_bmbt_get_blockcount(ep);
3859 max = XFS_FILEOFF_MAX(lastaddr, lowest);
3860 }
3861 *first_unused = max;
3862 return 0;
3863}
3864
3865/*
3866 * Returns the file-relative block number of the last block + 1 before
3867 * last_block (input value) in the file.
3868 * This is not based on i_size, it is based on the extent records.
3869 * Returns 0 for local files, as they do not have extent records.
3870 */
3871int /* error */
3872xfs_bmap_last_before(
3873 xfs_trans_t *tp, /* transaction pointer */
3874 xfs_inode_t *ip, /* incore inode */
3875 xfs_fileoff_t *last_block, /* last block */
3876 int whichfork) /* data or attr fork */
3877{
3878 xfs_fileoff_t bno; /* input file offset */
3879 int eof; /* hit end of file */
3880 xfs_bmbt_rec_host_t *ep; /* pointer to last extent */
3881 int error; /* error return value */
3882 xfs_bmbt_irec_t got; /* current extent value */
3883 xfs_ifork_t *ifp; /* inode fork pointer */
3884 xfs_extnum_t lastx; /* last extent used */
3885 xfs_bmbt_irec_t prev; /* previous extent value */
3886
3887 if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
3888 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
3889 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL)
3890 return XFS_ERROR(EIO);
3891 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
3892 *last_block = 0;
3893 return 0;
3894 }
3895 ifp = XFS_IFORK_PTR(ip, whichfork);
3896 if (!(ifp->if_flags & XFS_IFEXTENTS) &&
3897 (error = xfs_iread_extents(tp, ip, whichfork)))
3898 return error;
3899 bno = *last_block - 1;
3900 ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
3901 &prev);
3902 if (eof || xfs_bmbt_get_startoff(ep) > bno) {
3903 if (prev.br_startoff == NULLFILEOFF)
3904 *last_block = 0;
3905 else
3906 *last_block = prev.br_startoff + prev.br_blockcount;
3907 }
3908 /*
3909 * Otherwise *last_block is already the right answer.
3910 */
3911 return 0;
3912}
3913
3914STATIC int
3915xfs_bmap_last_extent(
3916 struct xfs_trans *tp,
3917 struct xfs_inode *ip,
3918 int whichfork,
3919 struct xfs_bmbt_irec *rec,
3920 int *is_empty)
3921{
3922 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
3923 int error;
3924 int nextents;
3925
3926 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
3927 error = xfs_iread_extents(tp, ip, whichfork);
3928 if (error)
3929 return error;
3930 }
3931
3932 nextents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
3933 if (nextents == 0) {
3934 *is_empty = 1;
3935 return 0;
3936 }
3937
3938 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, nextents - 1), rec);
3939 *is_empty = 0;
3940 return 0;
3941}
3942
3943/*
3944 * Check the last inode extent to determine whether this allocation will result
3945 * in blocks being allocated at the end of the file. When we allocate new data
3946 * blocks at the end of the file which do not start at the previous data block,
3947 * we will try to align the new blocks at stripe unit boundaries.
3948 *
3949 * Returns 0 in bma->aeof if the file (fork) is empty as any new write will be
3950 * at, or past the EOF.
3951 */
3952STATIC int
3953xfs_bmap_isaeof(
3954 struct xfs_bmalloca *bma,
3955 int whichfork)
3956{
3957 struct xfs_bmbt_irec rec;
3958 int is_empty;
3959 int error;
3960
3961 bma->aeof = 0;
3962 error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec,
3963 &is_empty);
3964 if (error || is_empty)
3965 return error;
3966
3967 /*
3968 * Check if we are allocation or past the last extent, or at least into
3969 * the last delayed allocated extent.
3970 */
3971 bma->aeof = bma->offset >= rec.br_startoff + rec.br_blockcount ||
3972 (bma->offset >= rec.br_startoff &&
3973 isnullstartblock(rec.br_startblock));
3974 return 0;
3975}
3976
3977/*
3978 * Check if the endoff is outside the last extent. If so the caller will grow
3979 * the allocation to a stripe unit boundary. All offsets are considered outside
3980 * the end of file for an empty fork, so 1 is returned in *eof in that case.
3981 */
3982int
3983xfs_bmap_eof(
3984 struct xfs_inode *ip,
3985 xfs_fileoff_t endoff,
3986 int whichfork,
3987 int *eof)
3988{
3989 struct xfs_bmbt_irec rec;
3990 int error;
3991
3992 error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, eof);
3993 if (error || *eof)
3994 return error;
3995
3996 *eof = endoff >= rec.br_startoff + rec.br_blockcount;
3997 return 0;
3998}
3999
4000/*
4001 * Returns the file-relative block number of the first block past eof in
4002 * the file. This is not based on i_size, it is based on the extent records.
4003 * Returns 0 for local files, as they do not have extent records.
4004 */
4005int
4006xfs_bmap_last_offset(
4007 struct xfs_trans *tp,
4008 struct xfs_inode *ip,
4009 xfs_fileoff_t *last_block,
4010 int whichfork)
4011{
4012 struct xfs_bmbt_irec rec;
4013 int is_empty;
4014 int error;
4015
4016 *last_block = 0;
4017
4018 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL)
4019 return 0;
4020
4021 if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
4022 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
4023 return XFS_ERROR(EIO);
4024
4025 error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty);
4026 if (error || is_empty)
4027 return error;
4028
4029 *last_block = rec.br_startoff + rec.br_blockcount;
4030 return 0;
4031}
4032
4033/*
4034 * Returns whether the selected fork of the inode has exactly one
4035 * block or not. For the data fork we check this matches di_size,
4036 * implying the file's range is 0..bsize-1.
4037 */
4038int /* 1=>1 block, 0=>otherwise */
4039xfs_bmap_one_block(
4040 xfs_inode_t *ip, /* incore inode */
4041 int whichfork) /* data or attr fork */
4042{
4043 xfs_bmbt_rec_host_t *ep; /* ptr to fork's extent */
4044 xfs_ifork_t *ifp; /* inode fork pointer */
4045 int rval; /* return value */
4046 xfs_bmbt_irec_t s; /* internal version of extent */
4047
4048#ifndef DEBUG
4049 if (whichfork == XFS_DATA_FORK)
4050 return XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize;
4051#endif /* !DEBUG */
4052 if (XFS_IFORK_NEXTENTS(ip, whichfork) != 1)
4053 return 0;
4054 if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
4055 return 0;
4056 ifp = XFS_IFORK_PTR(ip, whichfork);
4057 ASSERT(ifp->if_flags & XFS_IFEXTENTS);
4058 ep = xfs_iext_get_ext(ifp, 0);
4059 xfs_bmbt_get_all(ep, &s);
4060 rval = s.br_startoff == 0 && s.br_blockcount == 1;
4061 if (rval && whichfork == XFS_DATA_FORK)
4062 ASSERT(XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize);
4063 return rval;
4064}
4065
4066STATIC int
4067xfs_bmap_sanity_check(
4068 struct xfs_mount *mp,
4069 struct xfs_buf *bp,
4070 int level)
4071{
4072 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
4073
4074 if (block->bb_magic != cpu_to_be32(XFS_BMAP_MAGIC) ||
4075 be16_to_cpu(block->bb_level) != level ||
4076 be16_to_cpu(block->bb_numrecs) == 0 ||
4077 be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0])
4078 return 0;
4079 return 1;
4080}
4081
4082/*
4083 * Read in the extents to if_extents.
4084 * All inode fields are set up by caller, we just traverse the btree
4085 * and copy the records in. If the file system cannot contain unwritten
4086 * extents, the records are checked for no "state" flags.
4087 */
4088int /* error */
4089xfs_bmap_read_extents(
4090 xfs_trans_t *tp, /* transaction pointer */
4091 xfs_inode_t *ip, /* incore inode */
4092 int whichfork) /* data or attr fork */
4093{
4094 struct xfs_btree_block *block; /* current btree block */
4095 xfs_fsblock_t bno; /* block # of "block" */
4096 xfs_buf_t *bp; /* buffer for "block" */
4097 int error; /* error return value */
4098 xfs_exntfmt_t exntf; /* XFS_EXTFMT_NOSTATE, if checking */
4099 xfs_extnum_t i, j; /* index into the extents list */
4100 xfs_ifork_t *ifp; /* fork structure */
4101 int level; /* btree level, for checking */
4102 xfs_mount_t *mp; /* file system mount structure */
4103 __be64 *pp; /* pointer to block address */
4104 /* REFERENCED */
4105 xfs_extnum_t room; /* number of entries there's room for */
4106
4107 bno = NULLFSBLOCK;
4108 mp = ip->i_mount;
4109 ifp = XFS_IFORK_PTR(ip, whichfork);
4110 exntf = (whichfork != XFS_DATA_FORK) ? XFS_EXTFMT_NOSTATE :
4111 XFS_EXTFMT_INODE(ip);
4112 block = ifp->if_broot;
4113 /*
4114 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
4115 */
4116 level = be16_to_cpu(block->bb_level);
4117 ASSERT(level > 0);
4118 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
4119 bno = be64_to_cpu(*pp);
4120 ASSERT(bno != NULLDFSBNO);
4121 ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
4122 ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
4123 /*
4124 * Go down the tree until leaf level is reached, following the first
4125 * pointer (leftmost) at each level.
4126 */
4127 while (level-- > 0) {
4128 error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
4129 XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops);
4130 if (error)
4131 return error;
4132 block = XFS_BUF_TO_BLOCK(bp);
4133 XFS_WANT_CORRUPTED_GOTO(
4134 xfs_bmap_sanity_check(mp, bp, level),
4135 error0);
4136 if (level == 0)
4137 break;
4138 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
4139 bno = be64_to_cpu(*pp);
4140 XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0);
4141 xfs_trans_brelse(tp, bp);
4142 }
4143 /*
4144 * Here with bp and block set to the leftmost leaf node in the tree.
4145 */
4146 room = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
4147 i = 0;
4148 /*
4149 * Loop over all leaf nodes. Copy information to the extent records.
4150 */
4151 for (;;) {
4152 xfs_bmbt_rec_t *frp;
4153 xfs_fsblock_t nextbno;
4154 xfs_extnum_t num_recs;
4155 xfs_extnum_t start;
4156
4157 num_recs = xfs_btree_get_numrecs(block);
4158 if (unlikely(i + num_recs > room)) {
4159 ASSERT(i + num_recs <= room);
4160 xfs_warn(ip->i_mount,
4161 "corrupt dinode %Lu, (btree extents).",
4162 (unsigned long long) ip->i_ino);
4163 XFS_CORRUPTION_ERROR("xfs_bmap_read_extents(1)",
4164 XFS_ERRLEVEL_LOW, ip->i_mount, block);
4165 goto error0;
4166 }
4167 XFS_WANT_CORRUPTED_GOTO(
4168 xfs_bmap_sanity_check(mp, bp, 0),
4169 error0);
4170 /*
4171 * Read-ahead the next leaf block, if any.
4172 */
4173 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
4174 if (nextbno != NULLFSBLOCK)
4175 xfs_btree_reada_bufl(mp, nextbno, 1,
4176 &xfs_bmbt_buf_ops);
4177 /*
4178 * Copy records into the extent records.
4179 */
4180 frp = XFS_BMBT_REC_ADDR(mp, block, 1);
4181 start = i;
4182 for (j = 0; j < num_recs; j++, i++, frp++) {
4183 xfs_bmbt_rec_host_t *trp = xfs_iext_get_ext(ifp, i);
4184 trp->l0 = be64_to_cpu(frp->l0);
4185 trp->l1 = be64_to_cpu(frp->l1);
4186 }
4187 if (exntf == XFS_EXTFMT_NOSTATE) {
4188 /*
4189 * Check all attribute bmap btree records and
4190 * any "older" data bmap btree records for a
4191 * set bit in the "extent flag" position.
4192 */
4193 if (unlikely(xfs_check_nostate_extents(ifp,
4194 start, num_recs))) {
4195 XFS_ERROR_REPORT("xfs_bmap_read_extents(2)",
4196 XFS_ERRLEVEL_LOW,
4197 ip->i_mount);
4198 goto error0;
4199 }
4200 }
4201 xfs_trans_brelse(tp, bp);
4202 bno = nextbno;
4203 /*
4204 * If we've reached the end, stop.
4205 */
4206 if (bno == NULLFSBLOCK)
4207 break;
4208 error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
4209 XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops);
4210 if (error)
4211 return error;
4212 block = XFS_BUF_TO_BLOCK(bp);
4213 }
4214 ASSERT(i == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
4215 ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork));
4216 XFS_BMAP_TRACE_EXLIST(ip, i, whichfork);
4217 return 0;
4218error0:
4219 xfs_trans_brelse(tp, bp);
4220 return XFS_ERROR(EFSCORRUPTED);
4221}
4222
4223#ifdef DEBUG
4224/*
4225 * Add bmap trace insert entries for all the contents of the extent records.
4226 */
4227void
4228xfs_bmap_trace_exlist(
4229 xfs_inode_t *ip, /* incore inode pointer */
4230 xfs_extnum_t cnt, /* count of entries in the list */
4231 int whichfork, /* data or attr fork */
4232 unsigned long caller_ip)
4233{
4234 xfs_extnum_t idx; /* extent record index */
4235 xfs_ifork_t *ifp; /* inode fork pointer */
4236 int state = 0;
4237
4238 if (whichfork == XFS_ATTR_FORK)
4239 state |= BMAP_ATTRFORK;
4240
4241 ifp = XFS_IFORK_PTR(ip, whichfork);
4242 ASSERT(cnt == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
4243 for (idx = 0; idx < cnt; idx++)
4244 trace_xfs_extlist(ip, idx, whichfork, caller_ip);
4245}
4246
4247/*
4248 * Validate that the bmbt_irecs being returned from bmapi are valid
4249 * given the callers original parameters. Specifically check the
4250 * ranges of the returned irecs to ensure that they only extent beyond
4251 * the given parameters if the XFS_BMAPI_ENTIRE flag was set.
4252 */
4253STATIC void
4254xfs_bmap_validate_ret(
4255 xfs_fileoff_t bno,
4256 xfs_filblks_t len,
4257 int flags,
4258 xfs_bmbt_irec_t *mval,
4259 int nmap,
4260 int ret_nmap)
4261{
4262 int i; /* index to map values */
4263
4264 ASSERT(ret_nmap <= nmap);
4265
4266 for (i = 0; i < ret_nmap; i++) {
4267 ASSERT(mval[i].br_blockcount > 0);
4268 if (!(flags & XFS_BMAPI_ENTIRE)) {
4269 ASSERT(mval[i].br_startoff >= bno);
4270 ASSERT(mval[i].br_blockcount <= len);
4271 ASSERT(mval[i].br_startoff + mval[i].br_blockcount <=
4272 bno + len);
4273 } else {
4274 ASSERT(mval[i].br_startoff < bno + len);
4275 ASSERT(mval[i].br_startoff + mval[i].br_blockcount >
4276 bno);
4277 }
4278 ASSERT(i == 0 ||
4279 mval[i - 1].br_startoff + mval[i - 1].br_blockcount ==
4280 mval[i].br_startoff);
4281 ASSERT(mval[i].br_startblock != DELAYSTARTBLOCK &&
4282 mval[i].br_startblock != HOLESTARTBLOCK);
4283 ASSERT(mval[i].br_state == XFS_EXT_NORM ||
4284 mval[i].br_state == XFS_EXT_UNWRITTEN);
4285 }
4286}
4287#endif /* DEBUG */
4288
4289
4290/*
4291 * Trim the returned map to the required bounds 4262 * Trim the returned map to the required bounds
4292 */ 4263 */
4293STATIC void 4264STATIC void
@@ -5151,6 +5122,328 @@ error0:
5151} 5122}
5152 5123
5153/* 5124/*
5125 * Called by xfs_bmapi to update file extent records and the btree
5126 * after removing space (or undoing a delayed allocation).
5127 */
5128STATIC int /* error */
5129xfs_bmap_del_extent(
5130 xfs_inode_t *ip, /* incore inode pointer */
5131 xfs_trans_t *tp, /* current transaction pointer */
5132 xfs_extnum_t *idx, /* extent number to update/delete */
5133 xfs_bmap_free_t *flist, /* list of extents to be freed */
5134 xfs_btree_cur_t *cur, /* if null, not a btree */
5135 xfs_bmbt_irec_t *del, /* data to remove from extents */
5136 int *logflagsp, /* inode logging flags */
5137 int whichfork) /* data or attr fork */
5138{
5139 xfs_filblks_t da_new; /* new delay-alloc indirect blocks */
5140 xfs_filblks_t da_old; /* old delay-alloc indirect blocks */
5141 xfs_fsblock_t del_endblock=0; /* first block past del */
5142 xfs_fileoff_t del_endoff; /* first offset past del */
5143 int delay; /* current block is delayed allocated */
5144 int do_fx; /* free extent at end of routine */
5145 xfs_bmbt_rec_host_t *ep; /* current extent entry pointer */
5146 int error; /* error return value */
5147 int flags; /* inode logging flags */
5148 xfs_bmbt_irec_t got; /* current extent entry */
5149 xfs_fileoff_t got_endoff; /* first offset past got */
5150 int i; /* temp state */
5151 xfs_ifork_t *ifp; /* inode fork pointer */
5152 xfs_mount_t *mp; /* mount structure */
5153 xfs_filblks_t nblks; /* quota/sb block count */
5154 xfs_bmbt_irec_t new; /* new record to be inserted */
5155 /* REFERENCED */
5156 uint qfield; /* quota field to update */
5157 xfs_filblks_t temp; /* for indirect length calculations */
5158 xfs_filblks_t temp2; /* for indirect length calculations */
5159 int state = 0;
5160
5161 XFS_STATS_INC(xs_del_exlist);
5162
5163 if (whichfork == XFS_ATTR_FORK)
5164 state |= BMAP_ATTRFORK;
5165
5166 mp = ip->i_mount;
5167 ifp = XFS_IFORK_PTR(ip, whichfork);
5168 ASSERT((*idx >= 0) && (*idx < ifp->if_bytes /
5169 (uint)sizeof(xfs_bmbt_rec_t)));
5170 ASSERT(del->br_blockcount > 0);
5171 ep = xfs_iext_get_ext(ifp, *idx);
5172 xfs_bmbt_get_all(ep, &got);
5173 ASSERT(got.br_startoff <= del->br_startoff);
5174 del_endoff = del->br_startoff + del->br_blockcount;
5175 got_endoff = got.br_startoff + got.br_blockcount;
5176 ASSERT(got_endoff >= del_endoff);
5177 delay = isnullstartblock(got.br_startblock);
5178 ASSERT(isnullstartblock(del->br_startblock) == delay);
5179 flags = 0;
5180 qfield = 0;
5181 error = 0;
5182 /*
5183 * If deleting a real allocation, must free up the disk space.
5184 */
5185 if (!delay) {
5186 flags = XFS_ILOG_CORE;
5187 /*
5188 * Realtime allocation. Free it and record di_nblocks update.
5189 */
5190 if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) {
5191 xfs_fsblock_t bno;
5192 xfs_filblks_t len;
5193
5194 ASSERT(do_mod(del->br_blockcount,
5195 mp->m_sb.sb_rextsize) == 0);
5196 ASSERT(do_mod(del->br_startblock,
5197 mp->m_sb.sb_rextsize) == 0);
5198 bno = del->br_startblock;
5199 len = del->br_blockcount;
5200 do_div(bno, mp->m_sb.sb_rextsize);
5201 do_div(len, mp->m_sb.sb_rextsize);
5202 error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len);
5203 if (error)
5204 goto done;
5205 do_fx = 0;
5206 nblks = len * mp->m_sb.sb_rextsize;
5207 qfield = XFS_TRANS_DQ_RTBCOUNT;
5208 }
5209 /*
5210 * Ordinary allocation.
5211 */
5212 else {
5213 do_fx = 1;
5214 nblks = del->br_blockcount;
5215 qfield = XFS_TRANS_DQ_BCOUNT;
5216 }
5217 /*
5218 * Set up del_endblock and cur for later.
5219 */
5220 del_endblock = del->br_startblock + del->br_blockcount;
5221 if (cur) {
5222 if ((error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
5223 got.br_startblock, got.br_blockcount,
5224 &i)))
5225 goto done;
5226 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
5227 }
5228 da_old = da_new = 0;
5229 } else {
5230 da_old = startblockval(got.br_startblock);
5231 da_new = 0;
5232 nblks = 0;
5233 do_fx = 0;
5234 }
5235 /*
5236 * Set flag value to use in switch statement.
5237 * Left-contig is 2, right-contig is 1.
5238 */
5239 switch (((got.br_startoff == del->br_startoff) << 1) |
5240 (got_endoff == del_endoff)) {
5241 case 3:
5242 /*
5243 * Matches the whole extent. Delete the entry.
5244 */
5245 xfs_iext_remove(ip, *idx, 1,
5246 whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0);
5247 --*idx;
5248 if (delay)
5249 break;
5250
5251 XFS_IFORK_NEXT_SET(ip, whichfork,
5252 XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
5253 flags |= XFS_ILOG_CORE;
5254 if (!cur) {
5255 flags |= xfs_ilog_fext(whichfork);
5256 break;
5257 }
5258 if ((error = xfs_btree_delete(cur, &i)))
5259 goto done;
5260 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
5261 break;
5262
5263 case 2:
5264 /*
5265 * Deleting the first part of the extent.
5266 */
5267 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
5268 xfs_bmbt_set_startoff(ep, del_endoff);
5269 temp = got.br_blockcount - del->br_blockcount;
5270 xfs_bmbt_set_blockcount(ep, temp);
5271 if (delay) {
5272 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
5273 da_old);
5274 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
5275 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
5276 da_new = temp;
5277 break;
5278 }
5279 xfs_bmbt_set_startblock(ep, del_endblock);
5280 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
5281 if (!cur) {
5282 flags |= xfs_ilog_fext(whichfork);
5283 break;
5284 }
5285 if ((error = xfs_bmbt_update(cur, del_endoff, del_endblock,
5286 got.br_blockcount - del->br_blockcount,
5287 got.br_state)))
5288 goto done;
5289 break;
5290
5291 case 1:
5292 /*
5293 * Deleting the last part of the extent.
5294 */
5295 temp = got.br_blockcount - del->br_blockcount;
5296 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
5297 xfs_bmbt_set_blockcount(ep, temp);
5298 if (delay) {
5299 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
5300 da_old);
5301 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
5302 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
5303 da_new = temp;
5304 break;
5305 }
5306 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
5307 if (!cur) {
5308 flags |= xfs_ilog_fext(whichfork);
5309 break;
5310 }
5311 if ((error = xfs_bmbt_update(cur, got.br_startoff,
5312 got.br_startblock,
5313 got.br_blockcount - del->br_blockcount,
5314 got.br_state)))
5315 goto done;
5316 break;
5317
5318 case 0:
5319 /*
5320 * Deleting the middle of the extent.
5321 */
5322 temp = del->br_startoff - got.br_startoff;
5323 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
5324 xfs_bmbt_set_blockcount(ep, temp);
5325 new.br_startoff = del_endoff;
5326 temp2 = got_endoff - del_endoff;
5327 new.br_blockcount = temp2;
5328 new.br_state = got.br_state;
5329 if (!delay) {
5330 new.br_startblock = del_endblock;
5331 flags |= XFS_ILOG_CORE;
5332 if (cur) {
5333 if ((error = xfs_bmbt_update(cur,
5334 got.br_startoff,
5335 got.br_startblock, temp,
5336 got.br_state)))
5337 goto done;
5338 if ((error = xfs_btree_increment(cur, 0, &i)))
5339 goto done;
5340 cur->bc_rec.b = new;
5341 error = xfs_btree_insert(cur, &i);
5342 if (error && error != ENOSPC)
5343 goto done;
5344 /*
5345 * If get no-space back from btree insert,
5346 * it tried a split, and we have a zero
5347 * block reservation.
5348 * Fix up our state and return the error.
5349 */
5350 if (error == ENOSPC) {
5351 /*
5352 * Reset the cursor, don't trust
5353 * it after any insert operation.
5354 */
5355 if ((error = xfs_bmbt_lookup_eq(cur,
5356 got.br_startoff,
5357 got.br_startblock,
5358 temp, &i)))
5359 goto done;
5360 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
5361 /*
5362 * Update the btree record back
5363 * to the original value.
5364 */
5365 if ((error = xfs_bmbt_update(cur,
5366 got.br_startoff,
5367 got.br_startblock,
5368 got.br_blockcount,
5369 got.br_state)))
5370 goto done;
5371 /*
5372 * Reset the extent record back
5373 * to the original value.
5374 */
5375 xfs_bmbt_set_blockcount(ep,
5376 got.br_blockcount);
5377 flags = 0;
5378 error = XFS_ERROR(ENOSPC);
5379 goto done;
5380 }
5381 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
5382 } else
5383 flags |= xfs_ilog_fext(whichfork);
5384 XFS_IFORK_NEXT_SET(ip, whichfork,
5385 XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
5386 } else {
5387 ASSERT(whichfork == XFS_DATA_FORK);
5388 temp = xfs_bmap_worst_indlen(ip, temp);
5389 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
5390 temp2 = xfs_bmap_worst_indlen(ip, temp2);
5391 new.br_startblock = nullstartblock((int)temp2);
5392 da_new = temp + temp2;
5393 while (da_new > da_old) {
5394 if (temp) {
5395 temp--;
5396 da_new--;
5397 xfs_bmbt_set_startblock(ep,
5398 nullstartblock((int)temp));
5399 }
5400 if (da_new == da_old)
5401 break;
5402 if (temp2) {
5403 temp2--;
5404 da_new--;
5405 new.br_startblock =
5406 nullstartblock((int)temp2);
5407 }
5408 }
5409 }
5410 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
5411 xfs_iext_insert(ip, *idx + 1, 1, &new, state);
5412 ++*idx;
5413 break;
5414 }
5415 /*
5416 * If we need to, add to list of extents to delete.
5417 */
5418 if (do_fx)
5419 xfs_bmap_add_free(del->br_startblock, del->br_blockcount, flist,
5420 mp);
5421 /*
5422 * Adjust inode # blocks in the file.
5423 */
5424 if (nblks)
5425 ip->i_d.di_nblocks -= nblks;
5426 /*
5427 * Adjust quota data.
5428 */
5429 if (qfield)
5430 xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks);
5431
5432 /*
5433 * Account for change in delayed indirect blocks.
5434 * Nothing to do for disk quota accounting here.
5435 */
5436 ASSERT(da_old >= da_new);
5437 if (da_old > da_new) {
5438 xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
5439 (int64_t)(da_old - da_new), 0);
5440 }
5441done:
5442 *logflagsp = flags;
5443 return error;
5444}
5445
5446/*
5154 * Unmap (remove) blocks from a file. 5447 * Unmap (remove) blocks from a file.
5155 * If nexts is nonzero then the number of extents to remove is limited to 5448 * If nexts is nonzero then the number of extents to remove is limited to
5156 * that value. If not all extents in the block range can be removed then 5449 * that value. If not all extents in the block range can be removed then
@@ -5811,416 +6104,6 @@ xfs_getbmap(
5811 return error; 6104 return error;
5812} 6105}
5813 6106
5814#ifdef DEBUG
5815STATIC struct xfs_buf *
5816xfs_bmap_get_bp(
5817 struct xfs_btree_cur *cur,
5818 xfs_fsblock_t bno)
5819{
5820 struct xfs_log_item_desc *lidp;
5821 int i;
5822
5823 if (!cur)
5824 return NULL;
5825
5826 for (i = 0; i < XFS_BTREE_MAXLEVELS; i++) {
5827 if (!cur->bc_bufs[i])
5828 break;
5829 if (XFS_BUF_ADDR(cur->bc_bufs[i]) == bno)
5830 return cur->bc_bufs[i];
5831 }
5832
5833 /* Chase down all the log items to see if the bp is there */
5834 list_for_each_entry(lidp, &cur->bc_tp->t_items, lid_trans) {
5835 struct xfs_buf_log_item *bip;
5836 bip = (struct xfs_buf_log_item *)lidp->lid_item;
5837 if (bip->bli_item.li_type == XFS_LI_BUF &&
5838 XFS_BUF_ADDR(bip->bli_buf) == bno)
5839 return bip->bli_buf;
5840 }
5841
5842 return NULL;
5843}
5844
5845STATIC void
5846xfs_check_block(
5847 struct xfs_btree_block *block,
5848 xfs_mount_t *mp,
5849 int root,
5850 short sz)
5851{
5852 int i, j, dmxr;
5853 __be64 *pp, *thispa; /* pointer to block address */
5854 xfs_bmbt_key_t *prevp, *keyp;
5855
5856 ASSERT(be16_to_cpu(block->bb_level) > 0);
5857
5858 prevp = NULL;
5859 for( i = 1; i <= xfs_btree_get_numrecs(block); i++) {
5860 dmxr = mp->m_bmap_dmxr[0];
5861 keyp = XFS_BMBT_KEY_ADDR(mp, block, i);
5862
5863 if (prevp) {
5864 ASSERT(be64_to_cpu(prevp->br_startoff) <
5865 be64_to_cpu(keyp->br_startoff));
5866 }
5867 prevp = keyp;
5868
5869 /*
5870 * Compare the block numbers to see if there are dups.
5871 */
5872 if (root)
5873 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz);
5874 else
5875 pp = XFS_BMBT_PTR_ADDR(mp, block, i, dmxr);
5876
5877 for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) {
5878 if (root)
5879 thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz);
5880 else
5881 thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr);
5882 if (*thispa == *pp) {
5883 xfs_warn(mp, "%s: thispa(%d) == pp(%d) %Ld",
5884 __func__, j, i,
5885 (unsigned long long)be64_to_cpu(*thispa));
5886 panic("%s: ptrs are equal in node\n",
5887 __func__);
5888 }
5889 }
5890 }
5891}
5892
5893/*
5894 * Check that the extents for the inode ip are in the right order in all
5895 * btree leaves.
5896 */
5897
5898STATIC void
5899xfs_bmap_check_leaf_extents(
5900 xfs_btree_cur_t *cur, /* btree cursor or null */
5901 xfs_inode_t *ip, /* incore inode pointer */
5902 int whichfork) /* data or attr fork */
5903{
5904 struct xfs_btree_block *block; /* current btree block */
5905 xfs_fsblock_t bno; /* block # of "block" */
5906 xfs_buf_t *bp; /* buffer for "block" */
5907 int error; /* error return value */
5908 xfs_extnum_t i=0, j; /* index into the extents list */
5909 xfs_ifork_t *ifp; /* fork structure */
5910 int level; /* btree level, for checking */
5911 xfs_mount_t *mp; /* file system mount structure */
5912 __be64 *pp; /* pointer to block address */
5913 xfs_bmbt_rec_t *ep; /* pointer to current extent */
5914 xfs_bmbt_rec_t last = {0, 0}; /* last extent in prev block */
5915 xfs_bmbt_rec_t *nextp; /* pointer to next extent */
5916 int bp_release = 0;
5917
5918 if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) {
5919 return;
5920 }
5921
5922 bno = NULLFSBLOCK;
5923 mp = ip->i_mount;
5924 ifp = XFS_IFORK_PTR(ip, whichfork);
5925 block = ifp->if_broot;
5926 /*
5927 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
5928 */
5929 level = be16_to_cpu(block->bb_level);
5930 ASSERT(level > 0);
5931 xfs_check_block(block, mp, 1, ifp->if_broot_bytes);
5932 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
5933 bno = be64_to_cpu(*pp);
5934
5935 ASSERT(bno != NULLDFSBNO);
5936 ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
5937 ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
5938
5939 /*
5940 * Go down the tree until leaf level is reached, following the first
5941 * pointer (leftmost) at each level.
5942 */
5943 while (level-- > 0) {
5944 /* See if buf is in cur first */
5945 bp_release = 0;
5946 bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
5947 if (!bp) {
5948 bp_release = 1;
5949 error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
5950 XFS_BMAP_BTREE_REF,
5951 &xfs_bmbt_buf_ops);
5952 if (error)
5953 goto error_norelse;
5954 }
5955 block = XFS_BUF_TO_BLOCK(bp);
5956 XFS_WANT_CORRUPTED_GOTO(
5957 xfs_bmap_sanity_check(mp, bp, level),
5958 error0);
5959 if (level == 0)
5960 break;
5961
5962 /*
5963 * Check this block for basic sanity (increasing keys and
5964 * no duplicate blocks).
5965 */
5966
5967 xfs_check_block(block, mp, 0, 0);
5968 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
5969 bno = be64_to_cpu(*pp);
5970 XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0);
5971 if (bp_release) {
5972 bp_release = 0;
5973 xfs_trans_brelse(NULL, bp);
5974 }
5975 }
5976
5977 /*
5978 * Here with bp and block set to the leftmost leaf node in the tree.
5979 */
5980 i = 0;
5981
5982 /*
5983 * Loop over all leaf nodes checking that all extents are in the right order.
5984 */
5985 for (;;) {
5986 xfs_fsblock_t nextbno;
5987 xfs_extnum_t num_recs;
5988
5989
5990 num_recs = xfs_btree_get_numrecs(block);
5991
5992 /*
5993 * Read-ahead the next leaf block, if any.
5994 */
5995
5996 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
5997
5998 /*
5999 * Check all the extents to make sure they are OK.
6000 * If we had a previous block, the last entry should
6001 * conform with the first entry in this one.
6002 */
6003
6004 ep = XFS_BMBT_REC_ADDR(mp, block, 1);
6005 if (i) {
6006 ASSERT(xfs_bmbt_disk_get_startoff(&last) +
6007 xfs_bmbt_disk_get_blockcount(&last) <=
6008 xfs_bmbt_disk_get_startoff(ep));
6009 }
6010 for (j = 1; j < num_recs; j++) {
6011 nextp = XFS_BMBT_REC_ADDR(mp, block, j + 1);
6012 ASSERT(xfs_bmbt_disk_get_startoff(ep) +
6013 xfs_bmbt_disk_get_blockcount(ep) <=
6014 xfs_bmbt_disk_get_startoff(nextp));
6015 ep = nextp;
6016 }
6017
6018 last = *ep;
6019 i += num_recs;
6020 if (bp_release) {
6021 bp_release = 0;
6022 xfs_trans_brelse(NULL, bp);
6023 }
6024 bno = nextbno;
6025 /*
6026 * If we've reached the end, stop.
6027 */
6028 if (bno == NULLFSBLOCK)
6029 break;
6030
6031 bp_release = 0;
6032 bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
6033 if (!bp) {
6034 bp_release = 1;
6035 error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
6036 XFS_BMAP_BTREE_REF,
6037 &xfs_bmbt_buf_ops);
6038 if (error)
6039 goto error_norelse;
6040 }
6041 block = XFS_BUF_TO_BLOCK(bp);
6042 }
6043 if (bp_release) {
6044 bp_release = 0;
6045 xfs_trans_brelse(NULL, bp);
6046 }
6047 return;
6048
6049error0:
6050 xfs_warn(mp, "%s: at error0", __func__);
6051 if (bp_release)
6052 xfs_trans_brelse(NULL, bp);
6053error_norelse:
6054 xfs_warn(mp, "%s: BAD after btree leaves for %d extents",
6055 __func__, i);
6056 panic("%s: CORRUPTED BTREE OR SOMETHING", __func__);
6057 return;
6058}
6059#endif
6060
6061/*
6062 * Count fsblocks of the given fork.
6063 */
6064int /* error */
6065xfs_bmap_count_blocks(
6066 xfs_trans_t *tp, /* transaction pointer */
6067 xfs_inode_t *ip, /* incore inode */
6068 int whichfork, /* data or attr fork */
6069 int *count) /* out: count of blocks */
6070{
6071 struct xfs_btree_block *block; /* current btree block */
6072 xfs_fsblock_t bno; /* block # of "block" */
6073 xfs_ifork_t *ifp; /* fork structure */
6074 int level; /* btree level, for checking */
6075 xfs_mount_t *mp; /* file system mount structure */
6076 __be64 *pp; /* pointer to block address */
6077
6078 bno = NULLFSBLOCK;
6079 mp = ip->i_mount;
6080 ifp = XFS_IFORK_PTR(ip, whichfork);
6081 if ( XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ) {
6082 xfs_bmap_count_leaves(ifp, 0,
6083 ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t),
6084 count);
6085 return 0;
6086 }
6087
6088 /*
6089 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
6090 */
6091 block = ifp->if_broot;
6092 level = be16_to_cpu(block->bb_level);
6093 ASSERT(level > 0);
6094 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
6095 bno = be64_to_cpu(*pp);
6096 ASSERT(bno != NULLDFSBNO);
6097 ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
6098 ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
6099
6100 if (unlikely(xfs_bmap_count_tree(mp, tp, ifp, bno, level, count) < 0)) {
6101 XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)", XFS_ERRLEVEL_LOW,
6102 mp);
6103 return XFS_ERROR(EFSCORRUPTED);
6104 }
6105
6106 return 0;
6107}
6108
6109/*
6110 * Recursively walks each level of a btree
6111 * to count total fsblocks is use.
6112 */
6113STATIC int /* error */
6114xfs_bmap_count_tree(
6115 xfs_mount_t *mp, /* file system mount point */
6116 xfs_trans_t *tp, /* transaction pointer */
6117 xfs_ifork_t *ifp, /* inode fork pointer */
6118 xfs_fsblock_t blockno, /* file system block number */
6119 int levelin, /* level in btree */
6120 int *count) /* Count of blocks */
6121{
6122 int error;
6123 xfs_buf_t *bp, *nbp;
6124 int level = levelin;
6125 __be64 *pp;
6126 xfs_fsblock_t bno = blockno;
6127 xfs_fsblock_t nextbno;
6128 struct xfs_btree_block *block, *nextblock;
6129 int numrecs;
6130
6131 error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF,
6132 &xfs_bmbt_buf_ops);
6133 if (error)
6134 return error;
6135 *count += 1;
6136 block = XFS_BUF_TO_BLOCK(bp);
6137
6138 if (--level) {
6139 /* Not at node above leaves, count this level of nodes */
6140 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
6141 while (nextbno != NULLFSBLOCK) {
6142 error = xfs_btree_read_bufl(mp, tp, nextbno, 0, &nbp,
6143 XFS_BMAP_BTREE_REF,
6144 &xfs_bmbt_buf_ops);
6145 if (error)
6146 return error;
6147 *count += 1;
6148 nextblock = XFS_BUF_TO_BLOCK(nbp);
6149 nextbno = be64_to_cpu(nextblock->bb_u.l.bb_rightsib);
6150 xfs_trans_brelse(tp, nbp);
6151 }
6152
6153 /* Dive to the next level */
6154 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
6155 bno = be64_to_cpu(*pp);
6156 if (unlikely((error =
6157 xfs_bmap_count_tree(mp, tp, ifp, bno, level, count)) < 0)) {
6158 xfs_trans_brelse(tp, bp);
6159 XFS_ERROR_REPORT("xfs_bmap_count_tree(1)",
6160 XFS_ERRLEVEL_LOW, mp);
6161 return XFS_ERROR(EFSCORRUPTED);
6162 }
6163 xfs_trans_brelse(tp, bp);
6164 } else {
6165 /* count all level 1 nodes and their leaves */
6166 for (;;) {
6167 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
6168 numrecs = be16_to_cpu(block->bb_numrecs);
6169 xfs_bmap_disk_count_leaves(mp, block, numrecs, count);
6170 xfs_trans_brelse(tp, bp);
6171 if (nextbno == NULLFSBLOCK)
6172 break;
6173 bno = nextbno;
6174 error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
6175 XFS_BMAP_BTREE_REF,
6176 &xfs_bmbt_buf_ops);
6177 if (error)
6178 return error;
6179 *count += 1;
6180 block = XFS_BUF_TO_BLOCK(bp);
6181 }
6182 }
6183 return 0;
6184}
6185
6186/*
6187 * Count leaf blocks given a range of extent records.
6188 */
6189STATIC void
6190xfs_bmap_count_leaves(
6191 xfs_ifork_t *ifp,
6192 xfs_extnum_t idx,
6193 int numrecs,
6194 int *count)
6195{
6196 int b;
6197
6198 for (b = 0; b < numrecs; b++) {
6199 xfs_bmbt_rec_host_t *frp = xfs_iext_get_ext(ifp, idx + b);
6200 *count += xfs_bmbt_get_blockcount(frp);
6201 }
6202}
6203
6204/*
6205 * Count leaf blocks given a range of extent records originally
6206 * in btree format.
6207 */
6208STATIC void
6209xfs_bmap_disk_count_leaves(
6210 struct xfs_mount *mp,
6211 struct xfs_btree_block *block,
6212 int numrecs,
6213 int *count)
6214{
6215 int b;
6216 xfs_bmbt_rec_t *frp;
6217
6218 for (b = 1; b <= numrecs; b++) {
6219 frp = XFS_BMBT_REC_ADDR(mp, block, b);
6220 *count += xfs_bmbt_disk_get_blockcount(frp);
6221 }
6222}
6223
6224/* 6107/*
6225 * dead simple method of punching delalyed allocation blocks from a range in 6108 * dead simple method of punching delalyed allocation blocks from a range in
6226 * the inode. Walks a block at a time so will be slow, but is only executed in 6109 * the inode. Walks a block at a time so will be slow, but is only executed in
@@ -6295,16 +6178,3 @@ next_block:
6295 6178
6296 return error; 6179 return error;
6297} 6180}
6298
6299/*
6300 * Convert the given file system block to a disk block. We have to treat it
6301 * differently based on whether the file is a real time file or not, because the
6302 * bmap code does.
6303 */
6304xfs_daddr_t
6305xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb)
6306{
6307 return (XFS_IS_REALTIME_INODE(ip) ? \
6308 (xfs_daddr_t)XFS_FSB_TO_BB((ip)->i_mount, (fsb)) : \
6309 XFS_FSB_TO_DADDR((ip)->i_mount, (fsb)));
6310}
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 061b45cbe614..3a86c3fa6de1 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -37,6 +37,7 @@
37#include "xfs_error.h" 37#include "xfs_error.h"
38#include "xfs_quota.h" 38#include "xfs_quota.h"
39#include "xfs_trace.h" 39#include "xfs_trace.h"
40#include "xfs_cksum.h"
40 41
41/* 42/*
42 * Determine the extent state. 43 * Determine the extent state.
@@ -59,24 +60,31 @@ xfs_extent_state(
59 */ 60 */
60void 61void
61xfs_bmdr_to_bmbt( 62xfs_bmdr_to_bmbt(
62 struct xfs_mount *mp, 63 struct xfs_inode *ip,
63 xfs_bmdr_block_t *dblock, 64 xfs_bmdr_block_t *dblock,
64 int dblocklen, 65 int dblocklen,
65 struct xfs_btree_block *rblock, 66 struct xfs_btree_block *rblock,
66 int rblocklen) 67 int rblocklen)
67{ 68{
69 struct xfs_mount *mp = ip->i_mount;
68 int dmxr; 70 int dmxr;
69 xfs_bmbt_key_t *fkp; 71 xfs_bmbt_key_t *fkp;
70 __be64 *fpp; 72 __be64 *fpp;
71 xfs_bmbt_key_t *tkp; 73 xfs_bmbt_key_t *tkp;
72 __be64 *tpp; 74 __be64 *tpp;
73 75
74 rblock->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC); 76 if (xfs_sb_version_hascrc(&mp->m_sb))
77 xfs_btree_init_block_int(mp, rblock, XFS_BUF_DADDR_NULL,
78 XFS_BMAP_CRC_MAGIC, 0, 0, ip->i_ino,
79 XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS);
80 else
81 xfs_btree_init_block_int(mp, rblock, XFS_BUF_DADDR_NULL,
82 XFS_BMAP_MAGIC, 0, 0, ip->i_ino,
83 XFS_BTREE_LONG_PTRS);
84
75 rblock->bb_level = dblock->bb_level; 85 rblock->bb_level = dblock->bb_level;
76 ASSERT(be16_to_cpu(rblock->bb_level) > 0); 86 ASSERT(be16_to_cpu(rblock->bb_level) > 0);
77 rblock->bb_numrecs = dblock->bb_numrecs; 87 rblock->bb_numrecs = dblock->bb_numrecs;
78 rblock->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
79 rblock->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
80 dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0); 88 dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0);
81 fkp = XFS_BMDR_KEY_ADDR(dblock, 1); 89 fkp = XFS_BMDR_KEY_ADDR(dblock, 1);
82 tkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1); 90 tkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1);
@@ -424,7 +432,13 @@ xfs_bmbt_to_bmdr(
424 xfs_bmbt_key_t *tkp; 432 xfs_bmbt_key_t *tkp;
425 __be64 *tpp; 433 __be64 *tpp;
426 434
427 ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_MAGIC)); 435 if (xfs_sb_version_hascrc(&mp->m_sb)) {
436 ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_CRC_MAGIC));
437 ASSERT(uuid_equal(&rblock->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid));
438 ASSERT(rblock->bb_u.l.bb_blkno ==
439 cpu_to_be64(XFS_BUF_DADDR_NULL));
440 } else
441 ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_MAGIC));
428 ASSERT(rblock->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO)); 442 ASSERT(rblock->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO));
429 ASSERT(rblock->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO)); 443 ASSERT(rblock->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO));
430 ASSERT(rblock->bb_level != 0); 444 ASSERT(rblock->bb_level != 0);
@@ -708,59 +722,89 @@ xfs_bmbt_key_diff(
708 cur->bc_rec.b.br_startoff; 722 cur->bc_rec.b.br_startoff;
709} 723}
710 724
711static void 725static int
712xfs_bmbt_verify( 726xfs_bmbt_verify(
713 struct xfs_buf *bp) 727 struct xfs_buf *bp)
714{ 728{
715 struct xfs_mount *mp = bp->b_target->bt_mount; 729 struct xfs_mount *mp = bp->b_target->bt_mount;
716 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 730 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
717 unsigned int level; 731 unsigned int level;
718 int lblock_ok; /* block passes checks */
719 732
720 /* magic number and level verification. 733 switch (block->bb_magic) {
734 case cpu_to_be32(XFS_BMAP_CRC_MAGIC):
735 if (!xfs_sb_version_hascrc(&mp->m_sb))
736 return false;
737 if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid))
738 return false;
739 if (be64_to_cpu(block->bb_u.l.bb_blkno) != bp->b_bn)
740 return false;
741 /*
742 * XXX: need a better way of verifying the owner here. Right now
743 * just make sure there has been one set.
744 */
745 if (be64_to_cpu(block->bb_u.l.bb_owner) == 0)
746 return false;
747 /* fall through */
748 case cpu_to_be32(XFS_BMAP_MAGIC):
749 break;
750 default:
751 return false;
752 }
753
754 /*
755 * numrecs and level verification.
721 * 756 *
722 * We don't know waht fork we belong to, so just verify that the level 757 * We don't know what fork we belong to, so just verify that the level
723 * is less than the maximum of the two. Later checks will be more 758 * is less than the maximum of the two. Later checks will be more
724 * precise. 759 * precise.
725 */ 760 */
726 level = be16_to_cpu(block->bb_level); 761 level = be16_to_cpu(block->bb_level);
727 lblock_ok = block->bb_magic == cpu_to_be32(XFS_BMAP_MAGIC) && 762 if (level > max(mp->m_bm_maxlevels[0], mp->m_bm_maxlevels[1]))
728 level < max(mp->m_bm_maxlevels[0], mp->m_bm_maxlevels[1]); 763 return false;
729 764 if (be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0])
730 /* numrecs verification */ 765 return false;
731 lblock_ok = lblock_ok &&
732 be16_to_cpu(block->bb_numrecs) <= mp->m_bmap_dmxr[level != 0];
733 766
734 /* sibling pointer verification */ 767 /* sibling pointer verification */
735 lblock_ok = lblock_ok && 768 if (!block->bb_u.l.bb_leftsib ||
736 block->bb_u.l.bb_leftsib && 769 (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLDFSBNO) &&
737 (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO) || 770 !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_leftsib))))
738 XFS_FSB_SANITY_CHECK(mp, 771 return false;
739 be64_to_cpu(block->bb_u.l.bb_leftsib))) && 772 if (!block->bb_u.l.bb_rightsib ||
740 block->bb_u.l.bb_rightsib && 773 (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLDFSBNO) &&
741 (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO) || 774 !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_rightsib))))
742 XFS_FSB_SANITY_CHECK(mp, 775 return false;
743 be64_to_cpu(block->bb_u.l.bb_rightsib))); 776
744 777 return true;
745 if (!lblock_ok) { 778
746 trace_xfs_btree_corrupt(bp, _RET_IP_);
747 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, block);
748 xfs_buf_ioerror(bp, EFSCORRUPTED);
749 }
750} 779}
751 780
752static void 781static void
753xfs_bmbt_read_verify( 782xfs_bmbt_read_verify(
754 struct xfs_buf *bp) 783 struct xfs_buf *bp)
755{ 784{
756 xfs_bmbt_verify(bp); 785 if (!(xfs_btree_lblock_verify_crc(bp) &&
786 xfs_bmbt_verify(bp))) {
787 trace_xfs_btree_corrupt(bp, _RET_IP_);
788 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
789 bp->b_target->bt_mount, bp->b_addr);
790 xfs_buf_ioerror(bp, EFSCORRUPTED);
791 }
792
757} 793}
758 794
759static void 795static void
760xfs_bmbt_write_verify( 796xfs_bmbt_write_verify(
761 struct xfs_buf *bp) 797 struct xfs_buf *bp)
762{ 798{
763 xfs_bmbt_verify(bp); 799 if (!xfs_bmbt_verify(bp)) {
800 xfs_warn(bp->b_target->bt_mount, "bmbt daddr 0x%llx failed", bp->b_bn);
801 trace_xfs_btree_corrupt(bp, _RET_IP_);
802 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
803 bp->b_target->bt_mount, bp->b_addr);
804 xfs_buf_ioerror(bp, EFSCORRUPTED);
805 return;
806 }
807 xfs_btree_lblock_calc_crc(bp);
764} 808}
765 809
766const struct xfs_buf_ops xfs_bmbt_buf_ops = { 810const struct xfs_buf_ops xfs_bmbt_buf_ops = {
@@ -838,6 +882,8 @@ xfs_bmbt_init_cursor(
838 882
839 cur->bc_ops = &xfs_bmbt_ops; 883 cur->bc_ops = &xfs_bmbt_ops;
840 cur->bc_flags = XFS_BTREE_LONG_PTRS | XFS_BTREE_ROOT_IN_INODE; 884 cur->bc_flags = XFS_BTREE_LONG_PTRS | XFS_BTREE_ROOT_IN_INODE;
885 if (xfs_sb_version_hascrc(&mp->m_sb))
886 cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
841 887
842 cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork); 888 cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork);
843 cur->bc_private.b.ip = ip; 889 cur->bc_private.b.ip = ip;
diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h
index 88469ca08696..70c43d9f72c1 100644
--- a/fs/xfs/xfs_bmap_btree.h
+++ b/fs/xfs/xfs_bmap_btree.h
@@ -18,7 +18,8 @@
18#ifndef __XFS_BMAP_BTREE_H__ 18#ifndef __XFS_BMAP_BTREE_H__
19#define __XFS_BMAP_BTREE_H__ 19#define __XFS_BMAP_BTREE_H__
20 20
21#define XFS_BMAP_MAGIC 0x424d4150 /* 'BMAP' */ 21#define XFS_BMAP_MAGIC 0x424d4150 /* 'BMAP' */
22#define XFS_BMAP_CRC_MAGIC 0x424d4133 /* 'BMA3' */
22 23
23struct xfs_btree_cur; 24struct xfs_btree_cur;
24struct xfs_btree_block; 25struct xfs_btree_block;
@@ -136,10 +137,10 @@ typedef __be64 xfs_bmbt_ptr_t, xfs_bmdr_ptr_t;
136 137
137/* 138/*
138 * Btree block header size depends on a superblock flag. 139 * Btree block header size depends on a superblock flag.
139 *
140 * (not quite yet, but soon)
141 */ 140 */
142#define XFS_BMBT_BLOCK_LEN(mp) XFS_BTREE_LBLOCK_LEN 141#define XFS_BMBT_BLOCK_LEN(mp) \
142 (xfs_sb_version_hascrc(&((mp)->m_sb)) ? \
143 XFS_BTREE_LBLOCK_CRC_LEN : XFS_BTREE_LBLOCK_LEN)
143 144
144#define XFS_BMBT_REC_ADDR(mp, block, index) \ 145#define XFS_BMBT_REC_ADDR(mp, block, index) \
145 ((xfs_bmbt_rec_t *) \ 146 ((xfs_bmbt_rec_t *) \
@@ -186,12 +187,12 @@ typedef __be64 xfs_bmbt_ptr_t, xfs_bmdr_ptr_t;
186#define XFS_BMAP_BROOT_PTR_ADDR(mp, bb, i, sz) \ 187#define XFS_BMAP_BROOT_PTR_ADDR(mp, bb, i, sz) \
187 XFS_BMBT_PTR_ADDR(mp, bb, i, xfs_bmbt_maxrecs(mp, sz, 0)) 188 XFS_BMBT_PTR_ADDR(mp, bb, i, xfs_bmbt_maxrecs(mp, sz, 0))
188 189
189#define XFS_BMAP_BROOT_SPACE_CALC(nrecs) \ 190#define XFS_BMAP_BROOT_SPACE_CALC(mp, nrecs) \
190 (int)(XFS_BTREE_LBLOCK_LEN + \ 191 (int)(XFS_BMBT_BLOCK_LEN(mp) + \
191 ((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t)))) 192 ((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t))))
192 193
193#define XFS_BMAP_BROOT_SPACE(bb) \ 194#define XFS_BMAP_BROOT_SPACE(mp, bb) \
194 (XFS_BMAP_BROOT_SPACE_CALC(be16_to_cpu((bb)->bb_numrecs))) 195 (XFS_BMAP_BROOT_SPACE_CALC(mp, be16_to_cpu((bb)->bb_numrecs)))
195#define XFS_BMDR_SPACE_CALC(nrecs) \ 196#define XFS_BMDR_SPACE_CALC(nrecs) \
196 (int)(sizeof(xfs_bmdr_block_t) + \ 197 (int)(sizeof(xfs_bmdr_block_t) + \
197 ((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t)))) 198 ((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t))))
@@ -204,7 +205,7 @@ typedef __be64 xfs_bmbt_ptr_t, xfs_bmdr_ptr_t;
204/* 205/*
205 * Prototypes for xfs_bmap.c to call. 206 * Prototypes for xfs_bmap.c to call.
206 */ 207 */
207extern void xfs_bmdr_to_bmbt(struct xfs_mount *, xfs_bmdr_block_t *, int, 208extern void xfs_bmdr_to_bmbt(struct xfs_inode *, xfs_bmdr_block_t *, int,
208 struct xfs_btree_block *, int); 209 struct xfs_btree_block *, int);
209extern void xfs_bmbt_get_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s); 210extern void xfs_bmbt_get_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s);
210extern xfs_filblks_t xfs_bmbt_get_blockcount(xfs_bmbt_rec_host_t *r); 211extern xfs_filblks_t xfs_bmbt_get_blockcount(xfs_bmbt_rec_host_t *r);
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index db010408d701..8804b8a3c310 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -30,9 +30,11 @@
30#include "xfs_dinode.h" 30#include "xfs_dinode.h"
31#include "xfs_inode.h" 31#include "xfs_inode.h"
32#include "xfs_inode_item.h" 32#include "xfs_inode_item.h"
33#include "xfs_buf_item.h"
33#include "xfs_btree.h" 34#include "xfs_btree.h"
34#include "xfs_error.h" 35#include "xfs_error.h"
35#include "xfs_trace.h" 36#include "xfs_trace.h"
37#include "xfs_cksum.h"
36 38
37/* 39/*
38 * Cursor allocation zone. 40 * Cursor allocation zone.
@@ -42,9 +44,13 @@ kmem_zone_t *xfs_btree_cur_zone;
42/* 44/*
43 * Btree magic numbers. 45 * Btree magic numbers.
44 */ 46 */
45const __uint32_t xfs_magics[XFS_BTNUM_MAX] = { 47static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = {
46 XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC 48 { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC },
49 { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC,
50 XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC }
47}; 51};
52#define xfs_btree_magic(cur) \
53 xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum]
48 54
49 55
50STATIC int /* error (0 or EFSCORRUPTED) */ 56STATIC int /* error (0 or EFSCORRUPTED) */
@@ -54,30 +60,38 @@ xfs_btree_check_lblock(
54 int level, /* level of the btree block */ 60 int level, /* level of the btree block */
55 struct xfs_buf *bp) /* buffer for block, if any */ 61 struct xfs_buf *bp) /* buffer for block, if any */
56{ 62{
57 int lblock_ok; /* block passes checks */ 63 int lblock_ok = 1; /* block passes checks */
58 struct xfs_mount *mp; /* file system mount point */ 64 struct xfs_mount *mp; /* file system mount point */
59 65
60 mp = cur->bc_mp; 66 mp = cur->bc_mp;
61 lblock_ok = 67
62 be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] && 68 if (xfs_sb_version_hascrc(&mp->m_sb)) {
69 lblock_ok = lblock_ok &&
70 uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid) &&
71 block->bb_u.l.bb_blkno == cpu_to_be64(
72 bp ? bp->b_bn : XFS_BUF_DADDR_NULL);
73 }
74
75 lblock_ok = lblock_ok &&
76 be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) &&
63 be16_to_cpu(block->bb_level) == level && 77 be16_to_cpu(block->bb_level) == level &&
64 be16_to_cpu(block->bb_numrecs) <= 78 be16_to_cpu(block->bb_numrecs) <=
65 cur->bc_ops->get_maxrecs(cur, level) && 79 cur->bc_ops->get_maxrecs(cur, level) &&
66 block->bb_u.l.bb_leftsib && 80 block->bb_u.l.bb_leftsib &&
67 (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO) || 81 (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO) ||
68 XFS_FSB_SANITY_CHECK(mp, 82 XFS_FSB_SANITY_CHECK(mp,
69 be64_to_cpu(block->bb_u.l.bb_leftsib))) && 83 be64_to_cpu(block->bb_u.l.bb_leftsib))) &&
70 block->bb_u.l.bb_rightsib && 84 block->bb_u.l.bb_rightsib &&
71 (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO) || 85 (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO) ||
72 XFS_FSB_SANITY_CHECK(mp, 86 XFS_FSB_SANITY_CHECK(mp,
73 be64_to_cpu(block->bb_u.l.bb_rightsib))); 87 be64_to_cpu(block->bb_u.l.bb_rightsib)));
88
74 if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp, 89 if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,
75 XFS_ERRTAG_BTREE_CHECK_LBLOCK, 90 XFS_ERRTAG_BTREE_CHECK_LBLOCK,
76 XFS_RANDOM_BTREE_CHECK_LBLOCK))) { 91 XFS_RANDOM_BTREE_CHECK_LBLOCK))) {
77 if (bp) 92 if (bp)
78 trace_xfs_btree_corrupt(bp, _RET_IP_); 93 trace_xfs_btree_corrupt(bp, _RET_IP_);
79 XFS_ERROR_REPORT("xfs_btree_check_lblock", XFS_ERRLEVEL_LOW, 94 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
80 mp);
81 return XFS_ERROR(EFSCORRUPTED); 95 return XFS_ERROR(EFSCORRUPTED);
82 } 96 }
83 return 0; 97 return 0;
@@ -90,16 +104,26 @@ xfs_btree_check_sblock(
90 int level, /* level of the btree block */ 104 int level, /* level of the btree block */
91 struct xfs_buf *bp) /* buffer containing block */ 105 struct xfs_buf *bp) /* buffer containing block */
92{ 106{
107 struct xfs_mount *mp; /* file system mount point */
93 struct xfs_buf *agbp; /* buffer for ag. freespace struct */ 108 struct xfs_buf *agbp; /* buffer for ag. freespace struct */
94 struct xfs_agf *agf; /* ag. freespace structure */ 109 struct xfs_agf *agf; /* ag. freespace structure */
95 xfs_agblock_t agflen; /* native ag. freespace length */ 110 xfs_agblock_t agflen; /* native ag. freespace length */
96 int sblock_ok; /* block passes checks */ 111 int sblock_ok = 1; /* block passes checks */
97 112
113 mp = cur->bc_mp;
98 agbp = cur->bc_private.a.agbp; 114 agbp = cur->bc_private.a.agbp;
99 agf = XFS_BUF_TO_AGF(agbp); 115 agf = XFS_BUF_TO_AGF(agbp);
100 agflen = be32_to_cpu(agf->agf_length); 116 agflen = be32_to_cpu(agf->agf_length);
101 sblock_ok = 117
102 be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] && 118 if (xfs_sb_version_hascrc(&mp->m_sb)) {
119 sblock_ok = sblock_ok &&
120 uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid) &&
121 block->bb_u.s.bb_blkno == cpu_to_be64(
122 bp ? bp->b_bn : XFS_BUF_DADDR_NULL);
123 }
124
125 sblock_ok = sblock_ok &&
126 be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) &&
103 be16_to_cpu(block->bb_level) == level && 127 be16_to_cpu(block->bb_level) == level &&
104 be16_to_cpu(block->bb_numrecs) <= 128 be16_to_cpu(block->bb_numrecs) <=
105 cur->bc_ops->get_maxrecs(cur, level) && 129 cur->bc_ops->get_maxrecs(cur, level) &&
@@ -109,13 +133,13 @@ xfs_btree_check_sblock(
109 (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) || 133 (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) ||
110 be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) && 134 be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) &&
111 block->bb_u.s.bb_rightsib; 135 block->bb_u.s.bb_rightsib;
112 if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp, 136
137 if (unlikely(XFS_TEST_ERROR(!sblock_ok, mp,
113 XFS_ERRTAG_BTREE_CHECK_SBLOCK, 138 XFS_ERRTAG_BTREE_CHECK_SBLOCK,
114 XFS_RANDOM_BTREE_CHECK_SBLOCK))) { 139 XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
115 if (bp) 140 if (bp)
116 trace_xfs_btree_corrupt(bp, _RET_IP_); 141 trace_xfs_btree_corrupt(bp, _RET_IP_);
117 XFS_CORRUPTION_ERROR("xfs_btree_check_sblock", 142 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
118 XFS_ERRLEVEL_LOW, cur->bc_mp, block);
119 return XFS_ERROR(EFSCORRUPTED); 143 return XFS_ERROR(EFSCORRUPTED);
120 } 144 }
121 return 0; 145 return 0;
@@ -194,6 +218,72 @@ xfs_btree_check_ptr(
194#endif 218#endif
195 219
196/* 220/*
221 * Calculate CRC on the whole btree block and stuff it into the
222 * long-form btree header.
223 *
224 * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
225 * it into the buffer so recovery knows what the last modifcation was that made
226 * it to disk.
227 */
228void
229xfs_btree_lblock_calc_crc(
230 struct xfs_buf *bp)
231{
232 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
233 struct xfs_buf_log_item *bip = bp->b_fspriv;
234
235 if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
236 return;
237 if (bip)
238 block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
239 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
240 XFS_BTREE_LBLOCK_CRC_OFF);
241}
242
243bool
244xfs_btree_lblock_verify_crc(
245 struct xfs_buf *bp)
246{
247 if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
248 return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
249 XFS_BTREE_LBLOCK_CRC_OFF);
250 return true;
251}
252
253/*
254 * Calculate CRC on the whole btree block and stuff it into the
255 * short-form btree header.
256 *
257 * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
258 * it into the buffer so recovery knows what the last modifcation was that made
259 * it to disk.
260 */
261void
262xfs_btree_sblock_calc_crc(
263 struct xfs_buf *bp)
264{
265 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
266 struct xfs_buf_log_item *bip = bp->b_fspriv;
267
268 if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
269 return;
270 if (bip)
271 block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
272 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
273 XFS_BTREE_SBLOCK_CRC_OFF);
274}
275
276bool
277xfs_btree_sblock_verify_crc(
278 struct xfs_buf *bp)
279{
280 if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
281 return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
282 XFS_BTREE_SBLOCK_CRC_OFF);
283 return true;
284}
285
286/*
197 * Delete the btree cursor. 287 * Delete the btree cursor.
198 */ 288 */
199void 289void
@@ -277,10 +367,8 @@ xfs_btree_dup_cursor(
277 *ncur = NULL; 367 *ncur = NULL;
278 return error; 368 return error;
279 } 369 }
280 new->bc_bufs[i] = bp; 370 }
281 ASSERT(!xfs_buf_geterror(bp)); 371 new->bc_bufs[i] = bp;
282 } else
283 new->bc_bufs[i] = NULL;
284 } 372 }
285 *ncur = new; 373 *ncur = new;
286 return 0; 374 return 0;
@@ -321,9 +409,14 @@ xfs_btree_dup_cursor(
321 */ 409 */
322static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur) 410static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)
323{ 411{
324 return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ? 412 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
325 XFS_BTREE_LBLOCK_LEN : 413 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
326 XFS_BTREE_SBLOCK_LEN; 414 return XFS_BTREE_LBLOCK_CRC_LEN;
415 return XFS_BTREE_LBLOCK_LEN;
416 }
417 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
418 return XFS_BTREE_SBLOCK_CRC_LEN;
419 return XFS_BTREE_SBLOCK_LEN;
327} 420}
328 421
329/* 422/*
@@ -863,43 +956,85 @@ xfs_btree_set_sibling(
863} 956}
864 957
865void 958void
959xfs_btree_init_block_int(
960 struct xfs_mount *mp,
961 struct xfs_btree_block *buf,
962 xfs_daddr_t blkno,
963 __u32 magic,
964 __u16 level,
965 __u16 numrecs,
966 __u64 owner,
967 unsigned int flags)
968{
969 buf->bb_magic = cpu_to_be32(magic);
970 buf->bb_level = cpu_to_be16(level);
971 buf->bb_numrecs = cpu_to_be16(numrecs);
972
973 if (flags & XFS_BTREE_LONG_PTRS) {
974 buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
975 buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
976 if (flags & XFS_BTREE_CRC_BLOCKS) {
977 buf->bb_u.l.bb_blkno = cpu_to_be64(blkno);
978 buf->bb_u.l.bb_owner = cpu_to_be64(owner);
979 uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid);
980 buf->bb_u.l.bb_pad = 0;
981 }
982 } else {
983 /* owner is a 32 bit value on short blocks */
984 __u32 __owner = (__u32)owner;
985
986 buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
987 buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
988 if (flags & XFS_BTREE_CRC_BLOCKS) {
989 buf->bb_u.s.bb_blkno = cpu_to_be64(blkno);
990 buf->bb_u.s.bb_owner = cpu_to_be32(__owner);
991 uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid);
992 }
993 }
994}
995
996void
866xfs_btree_init_block( 997xfs_btree_init_block(
867 struct xfs_mount *mp, 998 struct xfs_mount *mp,
868 struct xfs_buf *bp, 999 struct xfs_buf *bp,
869 __u32 magic, 1000 __u32 magic,
870 __u16 level, 1001 __u16 level,
871 __u16 numrecs, 1002 __u16 numrecs,
1003 __u64 owner,
872 unsigned int flags) 1004 unsigned int flags)
873{ 1005{
874 struct xfs_btree_block *new = XFS_BUF_TO_BLOCK(bp); 1006 xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
875 1007 magic, level, numrecs, owner, flags);
876 new->bb_magic = cpu_to_be32(magic);
877 new->bb_level = cpu_to_be16(level);
878 new->bb_numrecs = cpu_to_be16(numrecs);
879
880 if (flags & XFS_BTREE_LONG_PTRS) {
881 new->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
882 new->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
883 } else {
884 new->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
885 new->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
886 }
887} 1008}
888 1009
889STATIC void 1010STATIC void
890xfs_btree_init_block_cur( 1011xfs_btree_init_block_cur(
891 struct xfs_btree_cur *cur, 1012 struct xfs_btree_cur *cur,
1013 struct xfs_buf *bp,
892 int level, 1014 int level,
893 int numrecs, 1015 int numrecs)
894 struct xfs_buf *bp)
895{ 1016{
896 xfs_btree_init_block(cur->bc_mp, bp, xfs_magics[cur->bc_btnum], 1017 __u64 owner;
897 level, numrecs, cur->bc_flags); 1018
1019 /*
1020 * we can pull the owner from the cursor right now as the different
1021 * owners align directly with the pointer size of the btree. This may
1022 * change in future, but is safe for current users of the generic btree
1023 * code.
1024 */
1025 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
1026 owner = cur->bc_private.b.ip->i_ino;
1027 else
1028 owner = cur->bc_private.a.agno;
1029
1030 xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
1031 xfs_btree_magic(cur), level, numrecs,
1032 owner, cur->bc_flags);
898} 1033}
899 1034
900/* 1035/*
901 * Return true if ptr is the last record in the btree and 1036 * Return true if ptr is the last record in the btree and
902 * we need to track updateѕ to this record. The decision 1037 * we need to track updates to this record. The decision
903 * will be further refined in the update_lastrec method. 1038 * will be further refined in the update_lastrec method.
904 */ 1039 */
905STATIC int 1040STATIC int
@@ -1147,6 +1282,7 @@ xfs_btree_log_keys(
1147 XFS_BTREE_TRACE_ARGBII(cur, bp, first, last); 1282 XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1148 1283
1149 if (bp) { 1284 if (bp) {
1285 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
1150 xfs_trans_log_buf(cur->bc_tp, bp, 1286 xfs_trans_log_buf(cur->bc_tp, bp,
1151 xfs_btree_key_offset(cur, first), 1287 xfs_btree_key_offset(cur, first),
1152 xfs_btree_key_offset(cur, last + 1) - 1); 1288 xfs_btree_key_offset(cur, last + 1) - 1);
@@ -1171,6 +1307,7 @@ xfs_btree_log_recs(
1171 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 1307 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1172 XFS_BTREE_TRACE_ARGBII(cur, bp, first, last); 1308 XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1173 1309
1310 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
1174 xfs_trans_log_buf(cur->bc_tp, bp, 1311 xfs_trans_log_buf(cur->bc_tp, bp,
1175 xfs_btree_rec_offset(cur, first), 1312 xfs_btree_rec_offset(cur, first),
1176 xfs_btree_rec_offset(cur, last + 1) - 1); 1313 xfs_btree_rec_offset(cur, last + 1) - 1);
@@ -1195,6 +1332,7 @@ xfs_btree_log_ptrs(
1195 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 1332 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
1196 int level = xfs_btree_get_level(block); 1333 int level = xfs_btree_get_level(block);
1197 1334
1335 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
1198 xfs_trans_log_buf(cur->bc_tp, bp, 1336 xfs_trans_log_buf(cur->bc_tp, bp,
1199 xfs_btree_ptr_offset(cur, first, level), 1337 xfs_btree_ptr_offset(cur, first, level),
1200 xfs_btree_ptr_offset(cur, last + 1, level) - 1); 1338 xfs_btree_ptr_offset(cur, last + 1, level) - 1);
@@ -1223,7 +1361,12 @@ xfs_btree_log_block(
1223 offsetof(struct xfs_btree_block, bb_numrecs), 1361 offsetof(struct xfs_btree_block, bb_numrecs),
1224 offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib), 1362 offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib),
1225 offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib), 1363 offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib),
1226 XFS_BTREE_SBLOCK_LEN 1364 offsetof(struct xfs_btree_block, bb_u.s.bb_blkno),
1365 offsetof(struct xfs_btree_block, bb_u.s.bb_lsn),
1366 offsetof(struct xfs_btree_block, bb_u.s.bb_uuid),
1367 offsetof(struct xfs_btree_block, bb_u.s.bb_owner),
1368 offsetof(struct xfs_btree_block, bb_u.s.bb_crc),
1369 XFS_BTREE_SBLOCK_CRC_LEN
1227 }; 1370 };
1228 static const short loffsets[] = { /* table of offsets (long) */ 1371 static const short loffsets[] = { /* table of offsets (long) */
1229 offsetof(struct xfs_btree_block, bb_magic), 1372 offsetof(struct xfs_btree_block, bb_magic),
@@ -1231,17 +1374,40 @@ xfs_btree_log_block(
1231 offsetof(struct xfs_btree_block, bb_numrecs), 1374 offsetof(struct xfs_btree_block, bb_numrecs),
1232 offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib), 1375 offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib),
1233 offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib), 1376 offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib),
1234 XFS_BTREE_LBLOCK_LEN 1377 offsetof(struct xfs_btree_block, bb_u.l.bb_blkno),
1378 offsetof(struct xfs_btree_block, bb_u.l.bb_lsn),
1379 offsetof(struct xfs_btree_block, bb_u.l.bb_uuid),
1380 offsetof(struct xfs_btree_block, bb_u.l.bb_owner),
1381 offsetof(struct xfs_btree_block, bb_u.l.bb_crc),
1382 offsetof(struct xfs_btree_block, bb_u.l.bb_pad),
1383 XFS_BTREE_LBLOCK_CRC_LEN
1235 }; 1384 };
1236 1385
1237 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 1386 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1238 XFS_BTREE_TRACE_ARGBI(cur, bp, fields); 1387 XFS_BTREE_TRACE_ARGBI(cur, bp, fields);
1239 1388
1240 if (bp) { 1389 if (bp) {
1390 int nbits;
1391
1392 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
1393 /*
1394 * We don't log the CRC when updating a btree
1395 * block but instead recreate it during log
1396 * recovery. As the log buffers have checksums
1397 * of their own this is safe and avoids logging a crc
1398 * update in a lot of places.
1399 */
1400 if (fields == XFS_BB_ALL_BITS)
1401 fields = XFS_BB_ALL_BITS_CRC;
1402 nbits = XFS_BB_NUM_BITS_CRC;
1403 } else {
1404 nbits = XFS_BB_NUM_BITS;
1405 }
1241 xfs_btree_offsets(fields, 1406 xfs_btree_offsets(fields,
1242 (cur->bc_flags & XFS_BTREE_LONG_PTRS) ? 1407 (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
1243 loffsets : soffsets, 1408 loffsets : soffsets,
1244 XFS_BB_NUM_BITS, &first, &last); 1409 nbits, &first, &last);
1410 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
1245 xfs_trans_log_buf(cur->bc_tp, bp, first, last); 1411 xfs_trans_log_buf(cur->bc_tp, bp, first, last);
1246 } else { 1412 } else {
1247 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, 1413 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
@@ -2204,7 +2370,7 @@ xfs_btree_split(
2204 goto error0; 2370 goto error0;
2205 2371
2206 /* Fill in the btree header for the new right block. */ 2372 /* Fill in the btree header for the new right block. */
2207 xfs_btree_init_block_cur(cur, xfs_btree_get_level(left), 0, rbp); 2373 xfs_btree_init_block_cur(cur, rbp, xfs_btree_get_level(left), 0);
2208 2374
2209 /* 2375 /*
2210 * Split the entries between the old and the new block evenly. 2376 * Split the entries between the old and the new block evenly.
@@ -2513,7 +2679,7 @@ xfs_btree_new_root(
2513 nptr = 2; 2679 nptr = 2;
2514 } 2680 }
2515 /* Fill in the new block's btree header and log it. */ 2681 /* Fill in the new block's btree header and log it. */
2516 xfs_btree_init_block_cur(cur, cur->bc_nlevels, 2, nbp); 2682 xfs_btree_init_block_cur(cur, nbp, cur->bc_nlevels, 2);
2517 xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS); 2683 xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS);
2518 ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) && 2684 ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) &&
2519 !xfs_btree_ptr_is_null(cur, &rptr)); 2685 !xfs_btree_ptr_is_null(cur, &rptr));
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h
index f932897194eb..6e6c915673fe 100644
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/xfs_btree.h
@@ -42,11 +42,15 @@ extern kmem_zone_t *xfs_btree_cur_zone;
42 * Generic btree header. 42 * Generic btree header.
43 * 43 *
44 * This is a combination of the actual format used on disk for short and long 44 * This is a combination of the actual format used on disk for short and long
45 * format btrees. The first three fields are shared by both format, but 45 * format btrees. The first three fields are shared by both format, but the
46 * the pointers are different and should be used with care. 46 * pointers are different and should be used with care.
47 * 47 *
48 * To get the size of the actual short or long form headers please use 48 * To get the size of the actual short or long form headers please use the size
49 * the size macros below. Never use sizeof(xfs_btree_block). 49 * macros below. Never use sizeof(xfs_btree_block).
50 *
51 * The blkno, crc, lsn, owner and uuid fields are only available in filesystems
52 * with the crc feature bit, and all accesses to them must be conditional on
53 * that flag.
50 */ 54 */
51struct xfs_btree_block { 55struct xfs_btree_block {
52 __be32 bb_magic; /* magic number for block type */ 56 __be32 bb_magic; /* magic number for block type */
@@ -56,10 +60,23 @@ struct xfs_btree_block {
56 struct { 60 struct {
57 __be32 bb_leftsib; 61 __be32 bb_leftsib;
58 __be32 bb_rightsib; 62 __be32 bb_rightsib;
63
64 __be64 bb_blkno;
65 __be64 bb_lsn;
66 uuid_t bb_uuid;
67 __be32 bb_owner;
68 __le32 bb_crc;
59 } s; /* short form pointers */ 69 } s; /* short form pointers */
60 struct { 70 struct {
61 __be64 bb_leftsib; 71 __be64 bb_leftsib;
62 __be64 bb_rightsib; 72 __be64 bb_rightsib;
73
74 __be64 bb_blkno;
75 __be64 bb_lsn;
76 uuid_t bb_uuid;
77 __be64 bb_owner;
78 __le32 bb_crc;
79 __be32 bb_pad; /* padding for alignment */
63 } l; /* long form pointers */ 80 } l; /* long form pointers */
64 } bb_u; /* rest */ 81 } bb_u; /* rest */
65}; 82};
@@ -67,6 +84,16 @@ struct xfs_btree_block {
67#define XFS_BTREE_SBLOCK_LEN 16 /* size of a short form block */ 84#define XFS_BTREE_SBLOCK_LEN 16 /* size of a short form block */
68#define XFS_BTREE_LBLOCK_LEN 24 /* size of a long form block */ 85#define XFS_BTREE_LBLOCK_LEN 24 /* size of a long form block */
69 86
87/* sizes of CRC enabled btree blocks */
88#define XFS_BTREE_SBLOCK_CRC_LEN (XFS_BTREE_SBLOCK_LEN + 40)
89#define XFS_BTREE_LBLOCK_CRC_LEN (XFS_BTREE_LBLOCK_LEN + 48)
90
91
92#define XFS_BTREE_SBLOCK_CRC_OFF \
93 offsetof(struct xfs_btree_block, bb_u.s.bb_crc)
94#define XFS_BTREE_LBLOCK_CRC_OFF \
95 offsetof(struct xfs_btree_block, bb_u.l.bb_crc)
96
70 97
71/* 98/*
72 * Generic key, ptr and record wrapper structures. 99 * Generic key, ptr and record wrapper structures.
@@ -101,13 +128,11 @@ union xfs_btree_rec {
101#define XFS_BB_NUMRECS 0x04 128#define XFS_BB_NUMRECS 0x04
102#define XFS_BB_LEFTSIB 0x08 129#define XFS_BB_LEFTSIB 0x08
103#define XFS_BB_RIGHTSIB 0x10 130#define XFS_BB_RIGHTSIB 0x10
131#define XFS_BB_BLKNO 0x20
104#define XFS_BB_NUM_BITS 5 132#define XFS_BB_NUM_BITS 5
105#define XFS_BB_ALL_BITS ((1 << XFS_BB_NUM_BITS) - 1) 133#define XFS_BB_ALL_BITS ((1 << XFS_BB_NUM_BITS) - 1)
106 134#define XFS_BB_NUM_BITS_CRC 8
107/* 135#define XFS_BB_ALL_BITS_CRC ((1 << XFS_BB_NUM_BITS_CRC) - 1)
108 * Magic numbers for btree blocks.
109 */
110extern const __uint32_t xfs_magics[];
111 136
112/* 137/*
113 * Generic stats interface 138 * Generic stats interface
@@ -256,6 +281,7 @@ typedef struct xfs_btree_cur
256#define XFS_BTREE_LONG_PTRS (1<<0) /* pointers are 64bits long */ 281#define XFS_BTREE_LONG_PTRS (1<<0) /* pointers are 64bits long */
257#define XFS_BTREE_ROOT_IN_INODE (1<<1) /* root may be variable size */ 282#define XFS_BTREE_ROOT_IN_INODE (1<<1) /* root may be variable size */
258#define XFS_BTREE_LASTREC_UPDATE (1<<2) /* track last rec externally */ 283#define XFS_BTREE_LASTREC_UPDATE (1<<2) /* track last rec externally */
284#define XFS_BTREE_CRC_BLOCKS (1<<3) /* uses extended btree blocks */
259 285
260 286
261#define XFS_BTREE_NOERROR 0 287#define XFS_BTREE_NOERROR 0
@@ -393,8 +419,20 @@ xfs_btree_init_block(
393 __u32 magic, 419 __u32 magic,
394 __u16 level, 420 __u16 level,
395 __u16 numrecs, 421 __u16 numrecs,
422 __u64 owner,
396 unsigned int flags); 423 unsigned int flags);
397 424
425void
426xfs_btree_init_block_int(
427 struct xfs_mount *mp,
428 struct xfs_btree_block *buf,
429 xfs_daddr_t blkno,
430 __u32 magic,
431 __u16 level,
432 __u16 numrecs,
433 __u64 owner,
434 unsigned int flags);
435
398/* 436/*
399 * Common btree core entry points. 437 * Common btree core entry points.
400 */ 438 */
@@ -408,6 +446,14 @@ int xfs_btree_delete(struct xfs_btree_cur *, int *);
408int xfs_btree_get_rec(struct xfs_btree_cur *, union xfs_btree_rec **, int *); 446int xfs_btree_get_rec(struct xfs_btree_cur *, union xfs_btree_rec **, int *);
409 447
410/* 448/*
449 * btree block CRC helpers
450 */
451void xfs_btree_lblock_calc_crc(struct xfs_buf *);
452bool xfs_btree_lblock_verify_crc(struct xfs_buf *);
453void xfs_btree_sblock_calc_crc(struct xfs_buf *);
454bool xfs_btree_sblock_verify_crc(struct xfs_buf *);
455
456/*
411 * Internal btree helpers also used by xfs_bmap.c. 457 * Internal btree helpers also used by xfs_bmap.c.
412 */ 458 */
413void xfs_btree_log_block(struct xfs_btree_cur *, struct xfs_buf *, int); 459void xfs_btree_log_block(struct xfs_btree_cur *, struct xfs_buf *, int);
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 8459b5d8cb71..82b70bda9f47 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1022,7 +1022,9 @@ xfs_buf_iodone_work(
1022 bool read = !!(bp->b_flags & XBF_READ); 1022 bool read = !!(bp->b_flags & XBF_READ);
1023 1023
1024 bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD); 1024 bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD);
1025 if (read && bp->b_ops) 1025
1026 /* only validate buffers that were read without errors */
1027 if (read && bp->b_ops && !bp->b_error && (bp->b_flags & XBF_DONE))
1026 bp->b_ops->verify_read(bp); 1028 bp->b_ops->verify_read(bp);
1027 1029
1028 if (bp->b_iodone) 1030 if (bp->b_iodone)
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index ee36c88ecfde..2573d2a75fc8 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -24,19 +24,20 @@ extern kmem_zone_t *xfs_buf_item_zone;
24 * This flag indicates that the buffer contains on disk inodes 24 * This flag indicates that the buffer contains on disk inodes
25 * and requires special recovery handling. 25 * and requires special recovery handling.
26 */ 26 */
27#define XFS_BLF_INODE_BUF 0x1 27#define XFS_BLF_INODE_BUF (1<<0)
28/* 28/*
29 * This flag indicates that the buffer should not be replayed 29 * This flag indicates that the buffer should not be replayed
30 * during recovery because its blocks are being freed. 30 * during recovery because its blocks are being freed.
31 */ 31 */
32#define XFS_BLF_CANCEL 0x2 32#define XFS_BLF_CANCEL (1<<1)
33
33/* 34/*
34 * This flag indicates that the buffer contains on disk 35 * This flag indicates that the buffer contains on disk
35 * user or group dquots and may require special recovery handling. 36 * user or group dquots and may require special recovery handling.
36 */ 37 */
37#define XFS_BLF_UDQUOT_BUF 0x4 38#define XFS_BLF_UDQUOT_BUF (1<<2)
38#define XFS_BLF_PDQUOT_BUF 0x8 39#define XFS_BLF_PDQUOT_BUF (1<<3)
39#define XFS_BLF_GDQUOT_BUF 0x10 40#define XFS_BLF_GDQUOT_BUF (1<<4)
40 41
41#define XFS_BLF_CHUNK 128 42#define XFS_BLF_CHUNK 128
42#define XFS_BLF_SHIFT 7 43#define XFS_BLF_SHIFT 7
@@ -61,6 +62,55 @@ typedef struct xfs_buf_log_format {
61} xfs_buf_log_format_t; 62} xfs_buf_log_format_t;
62 63
63/* 64/*
65 * All buffers now need to tell recovery where the magic number
66 * is so that it can verify and calculate the CRCs on the buffer correctly
67 * once the changes have been replayed into the buffer.
68 *
69 * The type value is held in the upper 5 bits of the blf_flags field, which is
70 * an unsigned 16 bit field. Hence we need to shift it 11 bits up and down.
71 */
72#define XFS_BLFT_BITS 5
73#define XFS_BLFT_SHIFT 11
74#define XFS_BLFT_MASK (((1 << XFS_BLFT_BITS) - 1) << XFS_BLFT_SHIFT)
75
76enum xfs_blft {
77 XFS_BLFT_UNKNOWN_BUF = 0,
78 XFS_BLFT_UDQUOT_BUF,
79 XFS_BLFT_PDQUOT_BUF,
80 XFS_BLFT_GDQUOT_BUF,
81 XFS_BLFT_BTREE_BUF,
82 XFS_BLFT_AGF_BUF,
83 XFS_BLFT_AGFL_BUF,
84 XFS_BLFT_AGI_BUF,
85 XFS_BLFT_DINO_BUF,
86 XFS_BLFT_SYMLINK_BUF,
87 XFS_BLFT_DIR_BLOCK_BUF,
88 XFS_BLFT_DIR_DATA_BUF,
89 XFS_BLFT_DIR_FREE_BUF,
90 XFS_BLFT_DIR_LEAF1_BUF,
91 XFS_BLFT_DIR_LEAFN_BUF,
92 XFS_BLFT_DA_NODE_BUF,
93 XFS_BLFT_ATTR_LEAF_BUF,
94 XFS_BLFT_ATTR_RMT_BUF,
95 XFS_BLFT_SB_BUF,
96 XFS_BLFT_MAX_BUF = (1 << XFS_BLFT_BITS),
97};
98
99static inline void
100xfs_blft_to_flags(struct xfs_buf_log_format *blf, enum xfs_blft type)
101{
102 ASSERT(type > XFS_BLFT_UNKNOWN_BUF && type < XFS_BLFT_MAX_BUF);
103 blf->blf_flags &= ~XFS_BLFT_MASK;
104 blf->blf_flags |= ((type << XFS_BLFT_SHIFT) & XFS_BLFT_MASK);
105}
106
107static inline __uint16_t
108xfs_blft_from_flags(struct xfs_buf_log_format *blf)
109{
110 return (blf->blf_flags & XFS_BLFT_MASK) >> XFS_BLFT_SHIFT;
111}
112
113/*
64 * buf log item flags 114 * buf log item flags
65 */ 115 */
66#define XFS_BLI_HOLD 0x01 116#define XFS_BLI_HOLD 0x01
@@ -113,6 +163,10 @@ void xfs_buf_attach_iodone(struct xfs_buf *,
113void xfs_buf_iodone_callbacks(struct xfs_buf *); 163void xfs_buf_iodone_callbacks(struct xfs_buf *);
114void xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *); 164void xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *);
115 165
166void xfs_trans_buf_set_type(struct xfs_trans *, struct xfs_buf *,
167 enum xfs_blft);
168void xfs_trans_buf_copy_type(struct xfs_buf *dst_bp, struct xfs_buf *src_bp);
169
116#endif /* __KERNEL__ */ 170#endif /* __KERNEL__ */
117 171
118#endif /* __XFS_BUF_ITEM_H__ */ 172#endif /* __XFS_BUF_ITEM_H__ */
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 4d7696a02418..9b26a99ebfe9 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3 * Copyright (c) 2013 Red Hat, Inc.
3 * All Rights Reserved. 4 * All Rights Reserved.
4 * 5 *
5 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
@@ -38,6 +39,8 @@
38#include "xfs_attr_leaf.h" 39#include "xfs_attr_leaf.h"
39#include "xfs_error.h" 40#include "xfs_error.h"
40#include "xfs_trace.h" 41#include "xfs_trace.h"
42#include "xfs_cksum.h"
43#include "xfs_buf_item.h"
41 44
42/* 45/*
43 * xfs_da_btree.c 46 * xfs_da_btree.c
@@ -52,69 +55,195 @@
52/* 55/*
53 * Routines used for growing the Btree. 56 * Routines used for growing the Btree.
54 */ 57 */
55STATIC int xfs_da_root_split(xfs_da_state_t *state, 58STATIC int xfs_da3_root_split(xfs_da_state_t *state,
56 xfs_da_state_blk_t *existing_root, 59 xfs_da_state_blk_t *existing_root,
57 xfs_da_state_blk_t *new_child); 60 xfs_da_state_blk_t *new_child);
58STATIC int xfs_da_node_split(xfs_da_state_t *state, 61STATIC int xfs_da3_node_split(xfs_da_state_t *state,
59 xfs_da_state_blk_t *existing_blk, 62 xfs_da_state_blk_t *existing_blk,
60 xfs_da_state_blk_t *split_blk, 63 xfs_da_state_blk_t *split_blk,
61 xfs_da_state_blk_t *blk_to_add, 64 xfs_da_state_blk_t *blk_to_add,
62 int treelevel, 65 int treelevel,
63 int *result); 66 int *result);
64STATIC void xfs_da_node_rebalance(xfs_da_state_t *state, 67STATIC void xfs_da3_node_rebalance(xfs_da_state_t *state,
65 xfs_da_state_blk_t *node_blk_1, 68 xfs_da_state_blk_t *node_blk_1,
66 xfs_da_state_blk_t *node_blk_2); 69 xfs_da_state_blk_t *node_blk_2);
67STATIC void xfs_da_node_add(xfs_da_state_t *state, 70STATIC void xfs_da3_node_add(xfs_da_state_t *state,
68 xfs_da_state_blk_t *old_node_blk, 71 xfs_da_state_blk_t *old_node_blk,
69 xfs_da_state_blk_t *new_node_blk); 72 xfs_da_state_blk_t *new_node_blk);
70 73
71/* 74/*
72 * Routines used for shrinking the Btree. 75 * Routines used for shrinking the Btree.
73 */ 76 */
74STATIC int xfs_da_root_join(xfs_da_state_t *state, 77STATIC int xfs_da3_root_join(xfs_da_state_t *state,
75 xfs_da_state_blk_t *root_blk); 78 xfs_da_state_blk_t *root_blk);
76STATIC int xfs_da_node_toosmall(xfs_da_state_t *state, int *retval); 79STATIC int xfs_da3_node_toosmall(xfs_da_state_t *state, int *retval);
77STATIC void xfs_da_node_remove(xfs_da_state_t *state, 80STATIC void xfs_da3_node_remove(xfs_da_state_t *state,
78 xfs_da_state_blk_t *drop_blk); 81 xfs_da_state_blk_t *drop_blk);
79STATIC void xfs_da_node_unbalance(xfs_da_state_t *state, 82STATIC void xfs_da3_node_unbalance(xfs_da_state_t *state,
80 xfs_da_state_blk_t *src_node_blk, 83 xfs_da_state_blk_t *src_node_blk,
81 xfs_da_state_blk_t *dst_node_blk); 84 xfs_da_state_blk_t *dst_node_blk);
82 85
83/* 86/*
84 * Utility routines. 87 * Utility routines.
85 */ 88 */
86STATIC uint xfs_da_node_lasthash(struct xfs_buf *bp, int *count); 89STATIC int xfs_da3_blk_unlink(xfs_da_state_t *state,
87STATIC int xfs_da_node_order(struct xfs_buf *node1_bp,
88 struct xfs_buf *node2_bp);
89STATIC int xfs_da_blk_unlink(xfs_da_state_t *state,
90 xfs_da_state_blk_t *drop_blk, 90 xfs_da_state_blk_t *drop_blk,
91 xfs_da_state_blk_t *save_blk); 91 xfs_da_state_blk_t *save_blk);
92STATIC void xfs_da_state_kill_altpath(xfs_da_state_t *state);
93 92
94static void 93
95xfs_da_node_verify( 94kmem_zone_t *xfs_da_state_zone; /* anchor for state struct zone */
95
96/*
97 * Allocate a dir-state structure.
98 * We don't put them on the stack since they're large.
99 */
100xfs_da_state_t *
101xfs_da_state_alloc(void)
102{
103 return kmem_zone_zalloc(xfs_da_state_zone, KM_NOFS);
104}
105
106/*
107 * Kill the altpath contents of a da-state structure.
108 */
109STATIC void
110xfs_da_state_kill_altpath(xfs_da_state_t *state)
111{
112 int i;
113
114 for (i = 0; i < state->altpath.active; i++)
115 state->altpath.blk[i].bp = NULL;
116 state->altpath.active = 0;
117}
118
119/*
120 * Free a da-state structure.
121 */
122void
123xfs_da_state_free(xfs_da_state_t *state)
124{
125 xfs_da_state_kill_altpath(state);
126#ifdef DEBUG
127 memset((char *)state, 0, sizeof(*state));
128#endif /* DEBUG */
129 kmem_zone_free(xfs_da_state_zone, state);
130}
131
132void
133xfs_da3_node_hdr_from_disk(
134 struct xfs_da3_icnode_hdr *to,
135 struct xfs_da_intnode *from)
136{
137 ASSERT(from->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
138 from->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC));
139
140 if (from->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) {
141 struct xfs_da3_node_hdr *hdr3 = (struct xfs_da3_node_hdr *)from;
142
143 to->forw = be32_to_cpu(hdr3->info.hdr.forw);
144 to->back = be32_to_cpu(hdr3->info.hdr.back);
145 to->magic = be16_to_cpu(hdr3->info.hdr.magic);
146 to->count = be16_to_cpu(hdr3->__count);
147 to->level = be16_to_cpu(hdr3->__level);
148 return;
149 }
150 to->forw = be32_to_cpu(from->hdr.info.forw);
151 to->back = be32_to_cpu(from->hdr.info.back);
152 to->magic = be16_to_cpu(from->hdr.info.magic);
153 to->count = be16_to_cpu(from->hdr.__count);
154 to->level = be16_to_cpu(from->hdr.__level);
155}
156
157void
158xfs_da3_node_hdr_to_disk(
159 struct xfs_da_intnode *to,
160 struct xfs_da3_icnode_hdr *from)
161{
162 ASSERT(from->magic == XFS_DA_NODE_MAGIC ||
163 from->magic == XFS_DA3_NODE_MAGIC);
164
165 if (from->magic == XFS_DA3_NODE_MAGIC) {
166 struct xfs_da3_node_hdr *hdr3 = (struct xfs_da3_node_hdr *)to;
167
168 hdr3->info.hdr.forw = cpu_to_be32(from->forw);
169 hdr3->info.hdr.back = cpu_to_be32(from->back);
170 hdr3->info.hdr.magic = cpu_to_be16(from->magic);
171 hdr3->__count = cpu_to_be16(from->count);
172 hdr3->__level = cpu_to_be16(from->level);
173 return;
174 }
175 to->hdr.info.forw = cpu_to_be32(from->forw);
176 to->hdr.info.back = cpu_to_be32(from->back);
177 to->hdr.info.magic = cpu_to_be16(from->magic);
178 to->hdr.__count = cpu_to_be16(from->count);
179 to->hdr.__level = cpu_to_be16(from->level);
180}
181
182static bool
183xfs_da3_node_verify(
96 struct xfs_buf *bp) 184 struct xfs_buf *bp)
97{ 185{
98 struct xfs_mount *mp = bp->b_target->bt_mount; 186 struct xfs_mount *mp = bp->b_target->bt_mount;
99 struct xfs_da_node_hdr *hdr = bp->b_addr; 187 struct xfs_da_intnode *hdr = bp->b_addr;
100 int block_ok = 0; 188 struct xfs_da3_icnode_hdr ichdr;
101 189
102 block_ok = hdr->info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC); 190 xfs_da3_node_hdr_from_disk(&ichdr, hdr);
103 block_ok = block_ok && 191
104 be16_to_cpu(hdr->level) > 0 && 192 if (xfs_sb_version_hascrc(&mp->m_sb)) {
105 be16_to_cpu(hdr->count) > 0 ; 193 struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
106 if (!block_ok) { 194
107 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr); 195 if (ichdr.magic != XFS_DA3_NODE_MAGIC)
108 xfs_buf_ioerror(bp, EFSCORRUPTED); 196 return false;
197
198 if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_uuid))
199 return false;
200 if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn)
201 return false;
202 } else {
203 if (ichdr.magic != XFS_DA_NODE_MAGIC)
204 return false;
109 } 205 }
206 if (ichdr.level == 0)
207 return false;
208 if (ichdr.level > XFS_DA_NODE_MAXDEPTH)
209 return false;
210 if (ichdr.count == 0)
211 return false;
110 212
213 /*
214 * we don't know if the node is for and attribute or directory tree,
215 * so only fail if the count is outside both bounds
216 */
217 if (ichdr.count > mp->m_dir_node_ents &&
218 ichdr.count > mp->m_attr_node_ents)
219 return false;
220
221 /* XXX: hash order check? */
222
223 return true;
111} 224}
112 225
113static void 226static void
114xfs_da_node_write_verify( 227xfs_da3_node_write_verify(
115 struct xfs_buf *bp) 228 struct xfs_buf *bp)
116{ 229{
117 xfs_da_node_verify(bp); 230 struct xfs_mount *mp = bp->b_target->bt_mount;
231 struct xfs_buf_log_item *bip = bp->b_fspriv;
232 struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
233
234 if (!xfs_da3_node_verify(bp)) {
235 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
236 xfs_buf_ioerror(bp, EFSCORRUPTED);
237 return;
238 }
239
240 if (!xfs_sb_version_hascrc(&mp->m_sb))
241 return;
242
243 if (bip)
244 hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn);
245
246 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DA3_NODE_CRC_OFF);
118} 247}
119 248
120/* 249/*
@@ -124,40 +253,47 @@ xfs_da_node_write_verify(
124 * format of the block being read. 253 * format of the block being read.
125 */ 254 */
126static void 255static void
127xfs_da_node_read_verify( 256xfs_da3_node_read_verify(
128 struct xfs_buf *bp) 257 struct xfs_buf *bp)
129{ 258{
130 struct xfs_mount *mp = bp->b_target->bt_mount; 259 struct xfs_mount *mp = bp->b_target->bt_mount;
131 struct xfs_da_blkinfo *info = bp->b_addr; 260 struct xfs_da_blkinfo *info = bp->b_addr;
132 261
133 switch (be16_to_cpu(info->magic)) { 262 switch (be16_to_cpu(info->magic)) {
263 case XFS_DA3_NODE_MAGIC:
264 if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
265 XFS_DA3_NODE_CRC_OFF))
266 break;
267 /* fall through */
134 case XFS_DA_NODE_MAGIC: 268 case XFS_DA_NODE_MAGIC:
135 xfs_da_node_verify(bp); 269 if (!xfs_da3_node_verify(bp))
136 break; 270 break;
271 return;
137 case XFS_ATTR_LEAF_MAGIC: 272 case XFS_ATTR_LEAF_MAGIC:
138 bp->b_ops = &xfs_attr_leaf_buf_ops; 273 bp->b_ops = &xfs_attr3_leaf_buf_ops;
139 bp->b_ops->verify_read(bp); 274 bp->b_ops->verify_read(bp);
140 return; 275 return;
141 case XFS_DIR2_LEAFN_MAGIC: 276 case XFS_DIR2_LEAFN_MAGIC:
142 bp->b_ops = &xfs_dir2_leafn_buf_ops; 277 case XFS_DIR3_LEAFN_MAGIC:
278 bp->b_ops = &xfs_dir3_leafn_buf_ops;
143 bp->b_ops->verify_read(bp); 279 bp->b_ops->verify_read(bp);
144 return; 280 return;
145 default: 281 default:
146 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
147 mp, info);
148 xfs_buf_ioerror(bp, EFSCORRUPTED);
149 break; 282 break;
150 } 283 }
284
285 /* corrupt block */
286 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
287 xfs_buf_ioerror(bp, EFSCORRUPTED);
151} 288}
152 289
153const struct xfs_buf_ops xfs_da_node_buf_ops = { 290const struct xfs_buf_ops xfs_da3_node_buf_ops = {
154 .verify_read = xfs_da_node_read_verify, 291 .verify_read = xfs_da3_node_read_verify,
155 .verify_write = xfs_da_node_write_verify, 292 .verify_write = xfs_da3_node_write_verify,
156}; 293};
157 294
158
159int 295int
160xfs_da_node_read( 296xfs_da3_node_read(
161 struct xfs_trans *tp, 297 struct xfs_trans *tp,
162 struct xfs_inode *dp, 298 struct xfs_inode *dp,
163 xfs_dablk_t bno, 299 xfs_dablk_t bno,
@@ -165,8 +301,35 @@ xfs_da_node_read(
165 struct xfs_buf **bpp, 301 struct xfs_buf **bpp,
166 int which_fork) 302 int which_fork)
167{ 303{
168 return xfs_da_read_buf(tp, dp, bno, mappedbno, bpp, 304 int err;
169 which_fork, &xfs_da_node_buf_ops); 305
306 err = xfs_da_read_buf(tp, dp, bno, mappedbno, bpp,
307 which_fork, &xfs_da3_node_buf_ops);
308 if (!err && tp) {
309 struct xfs_da_blkinfo *info = (*bpp)->b_addr;
310 int type;
311
312 switch (be16_to_cpu(info->magic)) {
313 case XFS_DA_NODE_MAGIC:
314 case XFS_DA3_NODE_MAGIC:
315 type = XFS_BLFT_DA_NODE_BUF;
316 break;
317 case XFS_ATTR_LEAF_MAGIC:
318 case XFS_ATTR3_LEAF_MAGIC:
319 type = XFS_BLFT_ATTR_LEAF_BUF;
320 break;
321 case XFS_DIR2_LEAFN_MAGIC:
322 case XFS_DIR3_LEAFN_MAGIC:
323 type = XFS_BLFT_DIR_LEAFN_BUF;
324 break;
325 default:
326 type = 0;
327 ASSERT(0);
328 break;
329 }
330 xfs_trans_buf_set_type(tp, *bpp, type);
331 }
332 return err;
170} 333}
171 334
172/*======================================================================== 335/*========================================================================
@@ -177,33 +340,46 @@ xfs_da_node_read(
177 * Create the initial contents of an intermediate node. 340 * Create the initial contents of an intermediate node.
178 */ 341 */
179int 342int
180xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level, 343xfs_da3_node_create(
181 struct xfs_buf **bpp, int whichfork) 344 struct xfs_da_args *args,
345 xfs_dablk_t blkno,
346 int level,
347 struct xfs_buf **bpp,
348 int whichfork)
182{ 349{
183 xfs_da_intnode_t *node; 350 struct xfs_da_intnode *node;
184 struct xfs_buf *bp; 351 struct xfs_trans *tp = args->trans;
185 int error; 352 struct xfs_mount *mp = tp->t_mountp;
186 xfs_trans_t *tp; 353 struct xfs_da3_icnode_hdr ichdr = {0};
354 struct xfs_buf *bp;
355 int error;
187 356
188 trace_xfs_da_node_create(args); 357 trace_xfs_da_node_create(args);
358 ASSERT(level <= XFS_DA_NODE_MAXDEPTH);
189 359
190 tp = args->trans;
191 error = xfs_da_get_buf(tp, args->dp, blkno, -1, &bp, whichfork); 360 error = xfs_da_get_buf(tp, args->dp, blkno, -1, &bp, whichfork);
192 if (error) 361 if (error)
193 return(error); 362 return(error);
194 ASSERT(bp != NULL); 363 bp->b_ops = &xfs_da3_node_buf_ops;
364 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF);
195 node = bp->b_addr; 365 node = bp->b_addr;
196 node->hdr.info.forw = 0;
197 node->hdr.info.back = 0;
198 node->hdr.info.magic = cpu_to_be16(XFS_DA_NODE_MAGIC);
199 node->hdr.info.pad = 0;
200 node->hdr.count = 0;
201 node->hdr.level = cpu_to_be16(level);
202 366
367 if (xfs_sb_version_hascrc(&mp->m_sb)) {
368 struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
369
370 ichdr.magic = XFS_DA3_NODE_MAGIC;
371 hdr3->info.blkno = cpu_to_be64(bp->b_bn);
372 hdr3->info.owner = cpu_to_be64(args->dp->i_ino);
373 uuid_copy(&hdr3->info.uuid, &mp->m_sb.sb_uuid);
374 } else {
375 ichdr.magic = XFS_DA_NODE_MAGIC;
376 }
377 ichdr.level = level;
378
379 xfs_da3_node_hdr_to_disk(node, &ichdr);
203 xfs_trans_log_buf(tp, bp, 380 xfs_trans_log_buf(tp, bp,
204 XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr))); 381 XFS_DA_LOGRANGE(node, &node->hdr, xfs_da3_node_hdr_size(node)));
205 382
206 bp->b_ops = &xfs_da_node_buf_ops;
207 *bpp = bp; 383 *bpp = bp;
208 return(0); 384 return(0);
209} 385}
@@ -213,12 +389,18 @@ xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level,
213 * intermediate nodes, rebalance, etc. 389 * intermediate nodes, rebalance, etc.
214 */ 390 */
215int /* error */ 391int /* error */
216xfs_da_split(xfs_da_state_t *state) 392xfs_da3_split(
393 struct xfs_da_state *state)
217{ 394{
218 xfs_da_state_blk_t *oldblk, *newblk, *addblk; 395 struct xfs_da_state_blk *oldblk;
219 xfs_da_intnode_t *node; 396 struct xfs_da_state_blk *newblk;
220 struct xfs_buf *bp; 397 struct xfs_da_state_blk *addblk;
221 int max, action, error, i; 398 struct xfs_da_intnode *node;
399 struct xfs_buf *bp;
400 int max;
401 int action;
402 int error;
403 int i;
222 404
223 trace_xfs_da_split(state->args); 405 trace_xfs_da_split(state->args);
224 406
@@ -246,7 +428,7 @@ xfs_da_split(xfs_da_state_t *state)
246 */ 428 */
247 switch (oldblk->magic) { 429 switch (oldblk->magic) {
248 case XFS_ATTR_LEAF_MAGIC: 430 case XFS_ATTR_LEAF_MAGIC:
249 error = xfs_attr_leaf_split(state, oldblk, newblk); 431 error = xfs_attr3_leaf_split(state, oldblk, newblk);
250 if ((error != 0) && (error != ENOSPC)) { 432 if ((error != 0) && (error != ENOSPC)) {
251 return(error); /* GROT: attr is inconsistent */ 433 return(error); /* GROT: attr is inconsistent */
252 } 434 }
@@ -261,12 +443,12 @@ xfs_da_split(xfs_da_state_t *state)
261 if (state->inleaf) { 443 if (state->inleaf) {
262 state->extraafter = 0; /* before newblk */ 444 state->extraafter = 0; /* before newblk */
263 trace_xfs_attr_leaf_split_before(state->args); 445 trace_xfs_attr_leaf_split_before(state->args);
264 error = xfs_attr_leaf_split(state, oldblk, 446 error = xfs_attr3_leaf_split(state, oldblk,
265 &state->extrablk); 447 &state->extrablk);
266 } else { 448 } else {
267 state->extraafter = 1; /* after newblk */ 449 state->extraafter = 1; /* after newblk */
268 trace_xfs_attr_leaf_split_after(state->args); 450 trace_xfs_attr_leaf_split_after(state->args);
269 error = xfs_attr_leaf_split(state, newblk, 451 error = xfs_attr3_leaf_split(state, newblk,
270 &state->extrablk); 452 &state->extrablk);
271 } 453 }
272 if (error) 454 if (error)
@@ -280,7 +462,7 @@ xfs_da_split(xfs_da_state_t *state)
280 addblk = newblk; 462 addblk = newblk;
281 break; 463 break;
282 case XFS_DA_NODE_MAGIC: 464 case XFS_DA_NODE_MAGIC:
283 error = xfs_da_node_split(state, oldblk, newblk, addblk, 465 error = xfs_da3_node_split(state, oldblk, newblk, addblk,
284 max - i, &action); 466 max - i, &action);
285 addblk->bp = NULL; 467 addblk->bp = NULL;
286 if (error) 468 if (error)
@@ -298,7 +480,7 @@ xfs_da_split(xfs_da_state_t *state)
298 /* 480 /*
299 * Update the btree to show the new hashval for this child. 481 * Update the btree to show the new hashval for this child.
300 */ 482 */
301 xfs_da_fixhashpath(state, &state->path); 483 xfs_da3_fixhashpath(state, &state->path);
302 } 484 }
303 if (!addblk) 485 if (!addblk)
304 return(0); 486 return(0);
@@ -308,7 +490,7 @@ xfs_da_split(xfs_da_state_t *state)
308 */ 490 */
309 ASSERT(state->path.active == 0); 491 ASSERT(state->path.active == 0);
310 oldblk = &state->path.blk[0]; 492 oldblk = &state->path.blk[0];
311 error = xfs_da_root_split(state, oldblk, addblk); 493 error = xfs_da3_root_split(state, oldblk, addblk);
312 if (error) { 494 if (error) {
313 addblk->bp = NULL; 495 addblk->bp = NULL;
314 return(error); /* GROT: dir is inconsistent */ 496 return(error); /* GROT: dir is inconsistent */
@@ -319,8 +501,12 @@ xfs_da_split(xfs_da_state_t *state)
319 * just got bumped because of the addition of a new root node. 501 * just got bumped because of the addition of a new root node.
320 * There might be three blocks involved if a double split occurred, 502 * There might be three blocks involved if a double split occurred,
321 * and the original block 0 could be at any position in the list. 503 * and the original block 0 could be at any position in the list.
504 *
505 * Note: the magic numbers and sibling pointers are in the same
506 * physical place for both v2 and v3 headers (by design). Hence it
507 * doesn't matter which version of the xfs_da_intnode structure we use
508 * here as the result will be the same using either structure.
322 */ 509 */
323
324 node = oldblk->bp->b_addr; 510 node = oldblk->bp->b_addr;
325 if (node->hdr.info.forw) { 511 if (node->hdr.info.forw) {
326 if (be32_to_cpu(node->hdr.info.forw) == addblk->blkno) { 512 if (be32_to_cpu(node->hdr.info.forw) == addblk->blkno) {
@@ -359,18 +545,25 @@ xfs_da_split(xfs_da_state_t *state)
359 * the EOF, extending the inode in process. 545 * the EOF, extending the inode in process.
360 */ 546 */
361STATIC int /* error */ 547STATIC int /* error */
362xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, 548xfs_da3_root_split(
363 xfs_da_state_blk_t *blk2) 549 struct xfs_da_state *state,
550 struct xfs_da_state_blk *blk1,
551 struct xfs_da_state_blk *blk2)
364{ 552{
365 xfs_da_intnode_t *node, *oldroot; 553 struct xfs_da_intnode *node;
366 xfs_da_args_t *args; 554 struct xfs_da_intnode *oldroot;
367 xfs_dablk_t blkno; 555 struct xfs_da_node_entry *btree;
368 struct xfs_buf *bp; 556 struct xfs_da3_icnode_hdr nodehdr;
369 int error, size; 557 struct xfs_da_args *args;
370 xfs_inode_t *dp; 558 struct xfs_buf *bp;
371 xfs_trans_t *tp; 559 struct xfs_inode *dp;
372 xfs_mount_t *mp; 560 struct xfs_trans *tp;
373 xfs_dir2_leaf_t *leaf; 561 struct xfs_mount *mp;
562 struct xfs_dir2_leaf *leaf;
563 xfs_dablk_t blkno;
564 int level;
565 int error;
566 int size;
374 567
375 trace_xfs_da_root_split(state->args); 568 trace_xfs_da_root_split(state->args);
376 569
@@ -379,29 +572,65 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
379 * to a free space somewhere. 572 * to a free space somewhere.
380 */ 573 */
381 args = state->args; 574 args = state->args;
382 ASSERT(args != NULL);
383 error = xfs_da_grow_inode(args, &blkno); 575 error = xfs_da_grow_inode(args, &blkno);
384 if (error) 576 if (error)
385 return(error); 577 return error;
578
386 dp = args->dp; 579 dp = args->dp;
387 tp = args->trans; 580 tp = args->trans;
388 mp = state->mp; 581 mp = state->mp;
389 error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, args->whichfork); 582 error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, args->whichfork);
390 if (error) 583 if (error)
391 return(error); 584 return error;
392 ASSERT(bp != NULL);
393 node = bp->b_addr; 585 node = bp->b_addr;
394 oldroot = blk1->bp->b_addr; 586 oldroot = blk1->bp->b_addr;
395 if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)) { 587 if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
396 size = (int)((char *)&oldroot->btree[be16_to_cpu(oldroot->hdr.count)] - 588 oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) {
397 (char *)oldroot); 589 struct xfs_da3_icnode_hdr nodehdr;
590
591 xfs_da3_node_hdr_from_disk(&nodehdr, oldroot);
592 btree = xfs_da3_node_tree_p(oldroot);
593 size = (int)((char *)&btree[nodehdr.count] - (char *)oldroot);
594 level = nodehdr.level;
595
596 /*
597 * we are about to copy oldroot to bp, so set up the type
598 * of bp while we know exactly what it will be.
599 */
600 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF);
398 } else { 601 } else {
399 ASSERT(oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); 602 struct xfs_dir3_icleaf_hdr leafhdr;
603 struct xfs_dir2_leaf_entry *ents;
604
400 leaf = (xfs_dir2_leaf_t *)oldroot; 605 leaf = (xfs_dir2_leaf_t *)oldroot;
401 size = (int)((char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] - 606 xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
402 (char *)leaf); 607 ents = xfs_dir3_leaf_ents_p(leaf);
608
609 ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC ||
610 leafhdr.magic == XFS_DIR3_LEAFN_MAGIC);
611 size = (int)((char *)&ents[leafhdr.count] - (char *)leaf);
612 level = 0;
613
614 /*
615 * we are about to copy oldroot to bp, so set up the type
616 * of bp while we know exactly what it will be.
617 */
618 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_LEAFN_BUF);
403 } 619 }
620
621 /*
622 * we can copy most of the information in the node from one block to
623 * another, but for CRC enabled headers we have to make sure that the
624 * block specific identifiers are kept intact. We update the buffer
625 * directly for this.
626 */
404 memcpy(node, oldroot, size); 627 memcpy(node, oldroot, size);
628 if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC) ||
629 oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) {
630 struct xfs_da3_intnode *node3 = (struct xfs_da3_intnode *)node;
631
632 node3->hdr.info.blkno = cpu_to_be64(bp->b_bn);
633 }
405 xfs_trans_log_buf(tp, bp, 0, size - 1); 634 xfs_trans_log_buf(tp, bp, 0, size - 1);
406 635
407 bp->b_ops = blk1->bp->b_ops; 636 bp->b_ops = blk1->bp->b_ops;
@@ -411,20 +640,25 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
411 /* 640 /*
412 * Set up the new root node. 641 * Set up the new root node.
413 */ 642 */
414 error = xfs_da_node_create(args, 643 error = xfs_da3_node_create(args,
415 (args->whichfork == XFS_DATA_FORK) ? mp->m_dirleafblk : 0, 644 (args->whichfork == XFS_DATA_FORK) ? mp->m_dirleafblk : 0,
416 be16_to_cpu(node->hdr.level) + 1, &bp, args->whichfork); 645 level + 1, &bp, args->whichfork);
417 if (error) 646 if (error)
418 return(error); 647 return error;
648
419 node = bp->b_addr; 649 node = bp->b_addr;
420 node->btree[0].hashval = cpu_to_be32(blk1->hashval); 650 xfs_da3_node_hdr_from_disk(&nodehdr, node);
421 node->btree[0].before = cpu_to_be32(blk1->blkno); 651 btree = xfs_da3_node_tree_p(node);
422 node->btree[1].hashval = cpu_to_be32(blk2->hashval); 652 btree[0].hashval = cpu_to_be32(blk1->hashval);
423 node->btree[1].before = cpu_to_be32(blk2->blkno); 653 btree[0].before = cpu_to_be32(blk1->blkno);
424 node->hdr.count = cpu_to_be16(2); 654 btree[1].hashval = cpu_to_be32(blk2->hashval);
655 btree[1].before = cpu_to_be32(blk2->blkno);
656 nodehdr.count = 2;
657 xfs_da3_node_hdr_to_disk(node, &nodehdr);
425 658
426#ifdef DEBUG 659#ifdef DEBUG
427 if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)) { 660 if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
661 oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) {
428 ASSERT(blk1->blkno >= mp->m_dirleafblk && 662 ASSERT(blk1->blkno >= mp->m_dirleafblk &&
429 blk1->blkno < mp->m_dirfreeblk); 663 blk1->blkno < mp->m_dirfreeblk);
430 ASSERT(blk2->blkno >= mp->m_dirleafblk && 664 ASSERT(blk2->blkno >= mp->m_dirleafblk &&
@@ -434,30 +668,34 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
434 668
435 /* Header is already logged by xfs_da_node_create */ 669 /* Header is already logged by xfs_da_node_create */
436 xfs_trans_log_buf(tp, bp, 670 xfs_trans_log_buf(tp, bp,
437 XFS_DA_LOGRANGE(node, node->btree, 671 XFS_DA_LOGRANGE(node, btree, sizeof(xfs_da_node_entry_t) * 2));
438 sizeof(xfs_da_node_entry_t) * 2));
439 672
440 return(0); 673 return 0;
441} 674}
442 675
443/* 676/*
444 * Split the node, rebalance, then add the new entry. 677 * Split the node, rebalance, then add the new entry.
445 */ 678 */
446STATIC int /* error */ 679STATIC int /* error */
447xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk, 680xfs_da3_node_split(
448 xfs_da_state_blk_t *newblk, 681 struct xfs_da_state *state,
449 xfs_da_state_blk_t *addblk, 682 struct xfs_da_state_blk *oldblk,
450 int treelevel, int *result) 683 struct xfs_da_state_blk *newblk,
684 struct xfs_da_state_blk *addblk,
685 int treelevel,
686 int *result)
451{ 687{
452 xfs_da_intnode_t *node; 688 struct xfs_da_intnode *node;
453 xfs_dablk_t blkno; 689 struct xfs_da3_icnode_hdr nodehdr;
454 int newcount, error; 690 xfs_dablk_t blkno;
455 int useextra; 691 int newcount;
692 int error;
693 int useextra;
456 694
457 trace_xfs_da_node_split(state->args); 695 trace_xfs_da_node_split(state->args);
458 696
459 node = oldblk->bp->b_addr; 697 node = oldblk->bp->b_addr;
460 ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); 698 xfs_da3_node_hdr_from_disk(&nodehdr, node);
461 699
462 /* 700 /*
463 * With V2 dirs the extra block is data or freespace. 701 * With V2 dirs the extra block is data or freespace.
@@ -467,7 +705,7 @@ xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
467 /* 705 /*
468 * Do we have to split the node? 706 * Do we have to split the node?
469 */ 707 */
470 if ((be16_to_cpu(node->hdr.count) + newcount) > state->node_ents) { 708 if (nodehdr.count + newcount > state->node_ents) {
471 /* 709 /*
472 * Allocate a new node, add to the doubly linked chain of 710 * Allocate a new node, add to the doubly linked chain of
473 * nodes, then move some of our excess entries into it. 711 * nodes, then move some of our excess entries into it.
@@ -476,14 +714,14 @@ xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
476 if (error) 714 if (error)
477 return(error); /* GROT: dir is inconsistent */ 715 return(error); /* GROT: dir is inconsistent */
478 716
479 error = xfs_da_node_create(state->args, blkno, treelevel, 717 error = xfs_da3_node_create(state->args, blkno, treelevel,
480 &newblk->bp, state->args->whichfork); 718 &newblk->bp, state->args->whichfork);
481 if (error) 719 if (error)
482 return(error); /* GROT: dir is inconsistent */ 720 return(error); /* GROT: dir is inconsistent */
483 newblk->blkno = blkno; 721 newblk->blkno = blkno;
484 newblk->magic = XFS_DA_NODE_MAGIC; 722 newblk->magic = XFS_DA_NODE_MAGIC;
485 xfs_da_node_rebalance(state, oldblk, newblk); 723 xfs_da3_node_rebalance(state, oldblk, newblk);
486 error = xfs_da_blk_link(state, oldblk, newblk); 724 error = xfs_da3_blk_link(state, oldblk, newblk);
487 if (error) 725 if (error)
488 return(error); 726 return(error);
489 *result = 1; 727 *result = 1;
@@ -495,7 +733,7 @@ xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
495 * Insert the new entry(s) into the correct block 733 * Insert the new entry(s) into the correct block
496 * (updating last hashval in the process). 734 * (updating last hashval in the process).
497 * 735 *
498 * xfs_da_node_add() inserts BEFORE the given index, 736 * xfs_da3_node_add() inserts BEFORE the given index,
499 * and as a result of using node_lookup_int() we always 737 * and as a result of using node_lookup_int() we always
500 * point to a valid entry (not after one), but a split 738 * point to a valid entry (not after one), but a split
501 * operation always results in a new block whose hashvals 739 * operation always results in a new block whose hashvals
@@ -504,22 +742,23 @@ xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
504 * If we had double-split op below us, then add the extra block too. 742 * If we had double-split op below us, then add the extra block too.
505 */ 743 */
506 node = oldblk->bp->b_addr; 744 node = oldblk->bp->b_addr;
507 if (oldblk->index <= be16_to_cpu(node->hdr.count)) { 745 xfs_da3_node_hdr_from_disk(&nodehdr, node);
746 if (oldblk->index <= nodehdr.count) {
508 oldblk->index++; 747 oldblk->index++;
509 xfs_da_node_add(state, oldblk, addblk); 748 xfs_da3_node_add(state, oldblk, addblk);
510 if (useextra) { 749 if (useextra) {
511 if (state->extraafter) 750 if (state->extraafter)
512 oldblk->index++; 751 oldblk->index++;
513 xfs_da_node_add(state, oldblk, &state->extrablk); 752 xfs_da3_node_add(state, oldblk, &state->extrablk);
514 state->extravalid = 0; 753 state->extravalid = 0;
515 } 754 }
516 } else { 755 } else {
517 newblk->index++; 756 newblk->index++;
518 xfs_da_node_add(state, newblk, addblk); 757 xfs_da3_node_add(state, newblk, addblk);
519 if (useextra) { 758 if (useextra) {
520 if (state->extraafter) 759 if (state->extraafter)
521 newblk->index++; 760 newblk->index++;
522 xfs_da_node_add(state, newblk, &state->extrablk); 761 xfs_da3_node_add(state, newblk, &state->extrablk);
523 state->extravalid = 0; 762 state->extravalid = 0;
524 } 763 }
525 } 764 }
@@ -534,33 +773,53 @@ xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
534 * NOTE: if blk2 is empty, then it will get the upper half of blk1. 773 * NOTE: if blk2 is empty, then it will get the upper half of blk1.
535 */ 774 */
536STATIC void 775STATIC void
537xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, 776xfs_da3_node_rebalance(
538 xfs_da_state_blk_t *blk2) 777 struct xfs_da_state *state,
778 struct xfs_da_state_blk *blk1,
779 struct xfs_da_state_blk *blk2)
539{ 780{
540 xfs_da_intnode_t *node1, *node2, *tmpnode; 781 struct xfs_da_intnode *node1;
541 xfs_da_node_entry_t *btree_s, *btree_d; 782 struct xfs_da_intnode *node2;
542 int count, tmp; 783 struct xfs_da_intnode *tmpnode;
543 xfs_trans_t *tp; 784 struct xfs_da_node_entry *btree1;
785 struct xfs_da_node_entry *btree2;
786 struct xfs_da_node_entry *btree_s;
787 struct xfs_da_node_entry *btree_d;
788 struct xfs_da3_icnode_hdr nodehdr1;
789 struct xfs_da3_icnode_hdr nodehdr2;
790 struct xfs_trans *tp;
791 int count;
792 int tmp;
793 int swap = 0;
544 794
545 trace_xfs_da_node_rebalance(state->args); 795 trace_xfs_da_node_rebalance(state->args);
546 796
547 node1 = blk1->bp->b_addr; 797 node1 = blk1->bp->b_addr;
548 node2 = blk2->bp->b_addr; 798 node2 = blk2->bp->b_addr;
799 xfs_da3_node_hdr_from_disk(&nodehdr1, node1);
800 xfs_da3_node_hdr_from_disk(&nodehdr2, node2);
801 btree1 = xfs_da3_node_tree_p(node1);
802 btree2 = xfs_da3_node_tree_p(node2);
803
549 /* 804 /*
550 * Figure out how many entries need to move, and in which direction. 805 * Figure out how many entries need to move, and in which direction.
551 * Swap the nodes around if that makes it simpler. 806 * Swap the nodes around if that makes it simpler.
552 */ 807 */
553 if ((be16_to_cpu(node1->hdr.count) > 0) && (be16_to_cpu(node2->hdr.count) > 0) && 808 if (nodehdr1.count > 0 && nodehdr2.count > 0 &&
554 ((be32_to_cpu(node2->btree[0].hashval) < be32_to_cpu(node1->btree[0].hashval)) || 809 ((be32_to_cpu(btree2[0].hashval) < be32_to_cpu(btree1[0].hashval)) ||
555 (be32_to_cpu(node2->btree[be16_to_cpu(node2->hdr.count)-1].hashval) < 810 (be32_to_cpu(btree2[nodehdr2.count - 1].hashval) <
556 be32_to_cpu(node1->btree[be16_to_cpu(node1->hdr.count)-1].hashval)))) { 811 be32_to_cpu(btree1[nodehdr1.count - 1].hashval)))) {
557 tmpnode = node1; 812 tmpnode = node1;
558 node1 = node2; 813 node1 = node2;
559 node2 = tmpnode; 814 node2 = tmpnode;
815 xfs_da3_node_hdr_from_disk(&nodehdr1, node1);
816 xfs_da3_node_hdr_from_disk(&nodehdr2, node2);
817 btree1 = xfs_da3_node_tree_p(node1);
818 btree2 = xfs_da3_node_tree_p(node2);
819 swap = 1;
560 } 820 }
561 ASSERT(node1->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); 821
562 ASSERT(node2->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); 822 count = (nodehdr1.count - nodehdr2.count) / 2;
563 count = (be16_to_cpu(node1->hdr.count) - be16_to_cpu(node2->hdr.count)) / 2;
564 if (count == 0) 823 if (count == 0)
565 return; 824 return;
566 tp = state->args->trans; 825 tp = state->args->trans;
@@ -571,10 +830,11 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
571 /* 830 /*
572 * Move elements in node2 up to make a hole. 831 * Move elements in node2 up to make a hole.
573 */ 832 */
574 if ((tmp = be16_to_cpu(node2->hdr.count)) > 0) { 833 tmp = nodehdr2.count;
834 if (tmp > 0) {
575 tmp *= (uint)sizeof(xfs_da_node_entry_t); 835 tmp *= (uint)sizeof(xfs_da_node_entry_t);
576 btree_s = &node2->btree[0]; 836 btree_s = &btree2[0];
577 btree_d = &node2->btree[count]; 837 btree_d = &btree2[count];
578 memmove(btree_d, btree_s, tmp); 838 memmove(btree_d, btree_s, tmp);
579 } 839 }
580 840
@@ -582,12 +842,12 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
582 * Move the req'd B-tree elements from high in node1 to 842 * Move the req'd B-tree elements from high in node1 to
583 * low in node2. 843 * low in node2.
584 */ 844 */
585 be16_add_cpu(&node2->hdr.count, count); 845 nodehdr2.count += count;
586 tmp = count * (uint)sizeof(xfs_da_node_entry_t); 846 tmp = count * (uint)sizeof(xfs_da_node_entry_t);
587 btree_s = &node1->btree[be16_to_cpu(node1->hdr.count) - count]; 847 btree_s = &btree1[nodehdr1.count - count];
588 btree_d = &node2->btree[0]; 848 btree_d = &btree2[0];
589 memcpy(btree_d, btree_s, tmp); 849 memcpy(btree_d, btree_s, tmp);
590 be16_add_cpu(&node1->hdr.count, -count); 850 nodehdr1.count -= count;
591 } else { 851 } else {
592 /* 852 /*
593 * Move the req'd B-tree elements from low in node2 to 853 * Move the req'd B-tree elements from low in node2 to
@@ -595,49 +855,60 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
595 */ 855 */
596 count = -count; 856 count = -count;
597 tmp = count * (uint)sizeof(xfs_da_node_entry_t); 857 tmp = count * (uint)sizeof(xfs_da_node_entry_t);
598 btree_s = &node2->btree[0]; 858 btree_s = &btree2[0];
599 btree_d = &node1->btree[be16_to_cpu(node1->hdr.count)]; 859 btree_d = &btree1[nodehdr1.count];
600 memcpy(btree_d, btree_s, tmp); 860 memcpy(btree_d, btree_s, tmp);
601 be16_add_cpu(&node1->hdr.count, count); 861 nodehdr1.count += count;
862
602 xfs_trans_log_buf(tp, blk1->bp, 863 xfs_trans_log_buf(tp, blk1->bp,
603 XFS_DA_LOGRANGE(node1, btree_d, tmp)); 864 XFS_DA_LOGRANGE(node1, btree_d, tmp));
604 865
605 /* 866 /*
606 * Move elements in node2 down to fill the hole. 867 * Move elements in node2 down to fill the hole.
607 */ 868 */
608 tmp = be16_to_cpu(node2->hdr.count) - count; 869 tmp = nodehdr2.count - count;
609 tmp *= (uint)sizeof(xfs_da_node_entry_t); 870 tmp *= (uint)sizeof(xfs_da_node_entry_t);
610 btree_s = &node2->btree[count]; 871 btree_s = &btree2[count];
611 btree_d = &node2->btree[0]; 872 btree_d = &btree2[0];
612 memmove(btree_d, btree_s, tmp); 873 memmove(btree_d, btree_s, tmp);
613 be16_add_cpu(&node2->hdr.count, -count); 874 nodehdr2.count -= count;
614 } 875 }
615 876
616 /* 877 /*
617 * Log header of node 1 and all current bits of node 2. 878 * Log header of node 1 and all current bits of node 2.
618 */ 879 */
880 xfs_da3_node_hdr_to_disk(node1, &nodehdr1);
619 xfs_trans_log_buf(tp, blk1->bp, 881 xfs_trans_log_buf(tp, blk1->bp,
620 XFS_DA_LOGRANGE(node1, &node1->hdr, sizeof(node1->hdr))); 882 XFS_DA_LOGRANGE(node1, &node1->hdr,
883 xfs_da3_node_hdr_size(node1)));
884
885 xfs_da3_node_hdr_to_disk(node2, &nodehdr2);
621 xfs_trans_log_buf(tp, blk2->bp, 886 xfs_trans_log_buf(tp, blk2->bp,
622 XFS_DA_LOGRANGE(node2, &node2->hdr, 887 XFS_DA_LOGRANGE(node2, &node2->hdr,
623 sizeof(node2->hdr) + 888 xfs_da3_node_hdr_size(node2) +
624 sizeof(node2->btree[0]) * be16_to_cpu(node2->hdr.count))); 889 (sizeof(btree2[0]) * nodehdr2.count)));
625 890
626 /* 891 /*
627 * Record the last hashval from each block for upward propagation. 892 * Record the last hashval from each block for upward propagation.
628 * (note: don't use the swapped node pointers) 893 * (note: don't use the swapped node pointers)
629 */ 894 */
630 node1 = blk1->bp->b_addr; 895 if (swap) {
631 node2 = blk2->bp->b_addr; 896 node1 = blk1->bp->b_addr;
632 blk1->hashval = be32_to_cpu(node1->btree[be16_to_cpu(node1->hdr.count)-1].hashval); 897 node2 = blk2->bp->b_addr;
633 blk2->hashval = be32_to_cpu(node2->btree[be16_to_cpu(node2->hdr.count)-1].hashval); 898 xfs_da3_node_hdr_from_disk(&nodehdr1, node1);
899 xfs_da3_node_hdr_from_disk(&nodehdr2, node2);
900 btree1 = xfs_da3_node_tree_p(node1);
901 btree2 = xfs_da3_node_tree_p(node2);
902 }
903 blk1->hashval = be32_to_cpu(btree1[nodehdr1.count - 1].hashval);
904 blk2->hashval = be32_to_cpu(btree2[nodehdr2.count - 1].hashval);
634 905
635 /* 906 /*
636 * Adjust the expected index for insertion. 907 * Adjust the expected index for insertion.
637 */ 908 */
638 if (blk1->index >= be16_to_cpu(node1->hdr.count)) { 909 if (blk1->index >= nodehdr1.count) {
639 blk2->index = blk1->index - be16_to_cpu(node1->hdr.count); 910 blk2->index = blk1->index - nodehdr1.count;
640 blk1->index = be16_to_cpu(node1->hdr.count) + 1; /* make it invalid */ 911 blk1->index = nodehdr1.count + 1; /* make it invalid */
641 } 912 }
642} 913}
643 914
@@ -645,18 +916,23 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
645 * Add a new entry to an intermediate node. 916 * Add a new entry to an intermediate node.
646 */ 917 */
647STATIC void 918STATIC void
648xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk, 919xfs_da3_node_add(
649 xfs_da_state_blk_t *newblk) 920 struct xfs_da_state *state,
921 struct xfs_da_state_blk *oldblk,
922 struct xfs_da_state_blk *newblk)
650{ 923{
651 xfs_da_intnode_t *node; 924 struct xfs_da_intnode *node;
652 xfs_da_node_entry_t *btree; 925 struct xfs_da3_icnode_hdr nodehdr;
653 int tmp; 926 struct xfs_da_node_entry *btree;
927 int tmp;
654 928
655 trace_xfs_da_node_add(state->args); 929 trace_xfs_da_node_add(state->args);
656 930
657 node = oldblk->bp->b_addr; 931 node = oldblk->bp->b_addr;
658 ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); 932 xfs_da3_node_hdr_from_disk(&nodehdr, node);
659 ASSERT((oldblk->index >= 0) && (oldblk->index <= be16_to_cpu(node->hdr.count))); 933 btree = xfs_da3_node_tree_p(node);
934
935 ASSERT(oldblk->index >= 0 && oldblk->index <= nodehdr.count);
660 ASSERT(newblk->blkno != 0); 936 ASSERT(newblk->blkno != 0);
661 if (state->args->whichfork == XFS_DATA_FORK) 937 if (state->args->whichfork == XFS_DATA_FORK)
662 ASSERT(newblk->blkno >= state->mp->m_dirleafblk && 938 ASSERT(newblk->blkno >= state->mp->m_dirleafblk &&
@@ -666,23 +942,25 @@ xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
666 * We may need to make some room before we insert the new node. 942 * We may need to make some room before we insert the new node.
667 */ 943 */
668 tmp = 0; 944 tmp = 0;
669 btree = &node->btree[ oldblk->index ]; 945 if (oldblk->index < nodehdr.count) {
670 if (oldblk->index < be16_to_cpu(node->hdr.count)) { 946 tmp = (nodehdr.count - oldblk->index) * (uint)sizeof(*btree);
671 tmp = (be16_to_cpu(node->hdr.count) - oldblk->index) * (uint)sizeof(*btree); 947 memmove(&btree[oldblk->index + 1], &btree[oldblk->index], tmp);
672 memmove(btree + 1, btree, tmp);
673 } 948 }
674 btree->hashval = cpu_to_be32(newblk->hashval); 949 btree[oldblk->index].hashval = cpu_to_be32(newblk->hashval);
675 btree->before = cpu_to_be32(newblk->blkno); 950 btree[oldblk->index].before = cpu_to_be32(newblk->blkno);
676 xfs_trans_log_buf(state->args->trans, oldblk->bp, 951 xfs_trans_log_buf(state->args->trans, oldblk->bp,
677 XFS_DA_LOGRANGE(node, btree, tmp + sizeof(*btree))); 952 XFS_DA_LOGRANGE(node, &btree[oldblk->index],
678 be16_add_cpu(&node->hdr.count, 1); 953 tmp + sizeof(*btree)));
954
955 nodehdr.count += 1;
956 xfs_da3_node_hdr_to_disk(node, &nodehdr);
679 xfs_trans_log_buf(state->args->trans, oldblk->bp, 957 xfs_trans_log_buf(state->args->trans, oldblk->bp,
680 XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr))); 958 XFS_DA_LOGRANGE(node, &node->hdr, xfs_da3_node_hdr_size(node)));
681 959
682 /* 960 /*
683 * Copy the last hash value from the oldblk to propagate upwards. 961 * Copy the last hash value from the oldblk to propagate upwards.
684 */ 962 */
685 oldblk->hashval = be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1 ].hashval); 963 oldblk->hashval = be32_to_cpu(btree[nodehdr.count - 1].hashval);
686} 964}
687 965
688/*======================================================================== 966/*========================================================================
@@ -694,14 +972,16 @@ xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
694 * possibly deallocating that block, etc... 972 * possibly deallocating that block, etc...
695 */ 973 */
696int 974int
697xfs_da_join(xfs_da_state_t *state) 975xfs_da3_join(
976 struct xfs_da_state *state)
698{ 977{
699 xfs_da_state_blk_t *drop_blk, *save_blk; 978 struct xfs_da_state_blk *drop_blk;
700 int action, error; 979 struct xfs_da_state_blk *save_blk;
980 int action = 0;
981 int error;
701 982
702 trace_xfs_da_join(state->args); 983 trace_xfs_da_join(state->args);
703 984
704 action = 0;
705 drop_blk = &state->path.blk[ state->path.active-1 ]; 985 drop_blk = &state->path.blk[ state->path.active-1 ];
706 save_blk = &state->altpath.blk[ state->path.active-1 ]; 986 save_blk = &state->altpath.blk[ state->path.active-1 ];
707 ASSERT(state->path.blk[0].magic == XFS_DA_NODE_MAGIC); 987 ASSERT(state->path.blk[0].magic == XFS_DA_NODE_MAGIC);
@@ -722,12 +1002,12 @@ xfs_da_join(xfs_da_state_t *state)
722 */ 1002 */
723 switch (drop_blk->magic) { 1003 switch (drop_blk->magic) {
724 case XFS_ATTR_LEAF_MAGIC: 1004 case XFS_ATTR_LEAF_MAGIC:
725 error = xfs_attr_leaf_toosmall(state, &action); 1005 error = xfs_attr3_leaf_toosmall(state, &action);
726 if (error) 1006 if (error)
727 return(error); 1007 return(error);
728 if (action == 0) 1008 if (action == 0)
729 return(0); 1009 return(0);
730 xfs_attr_leaf_unbalance(state, drop_blk, save_blk); 1010 xfs_attr3_leaf_unbalance(state, drop_blk, save_blk);
731 break; 1011 break;
732 case XFS_DIR2_LEAFN_MAGIC: 1012 case XFS_DIR2_LEAFN_MAGIC:
733 error = xfs_dir2_leafn_toosmall(state, &action); 1013 error = xfs_dir2_leafn_toosmall(state, &action);
@@ -742,18 +1022,18 @@ xfs_da_join(xfs_da_state_t *state)
742 * Remove the offending node, fixup hashvals, 1022 * Remove the offending node, fixup hashvals,
743 * check for a toosmall neighbor. 1023 * check for a toosmall neighbor.
744 */ 1024 */
745 xfs_da_node_remove(state, drop_blk); 1025 xfs_da3_node_remove(state, drop_blk);
746 xfs_da_fixhashpath(state, &state->path); 1026 xfs_da3_fixhashpath(state, &state->path);
747 error = xfs_da_node_toosmall(state, &action); 1027 error = xfs_da3_node_toosmall(state, &action);
748 if (error) 1028 if (error)
749 return(error); 1029 return(error);
750 if (action == 0) 1030 if (action == 0)
751 return 0; 1031 return 0;
752 xfs_da_node_unbalance(state, drop_blk, save_blk); 1032 xfs_da3_node_unbalance(state, drop_blk, save_blk);
753 break; 1033 break;
754 } 1034 }
755 xfs_da_fixhashpath(state, &state->altpath); 1035 xfs_da3_fixhashpath(state, &state->altpath);
756 error = xfs_da_blk_unlink(state, drop_blk, save_blk); 1036 error = xfs_da3_blk_unlink(state, drop_blk, save_blk);
757 xfs_da_state_kill_altpath(state); 1037 xfs_da_state_kill_altpath(state);
758 if (error) 1038 if (error)
759 return(error); 1039 return(error);
@@ -768,9 +1048,9 @@ xfs_da_join(xfs_da_state_t *state)
768 * we only have one entry in the root, make the child block 1048 * we only have one entry in the root, make the child block
769 * the new root. 1049 * the new root.
770 */ 1050 */
771 xfs_da_node_remove(state, drop_blk); 1051 xfs_da3_node_remove(state, drop_blk);
772 xfs_da_fixhashpath(state, &state->path); 1052 xfs_da3_fixhashpath(state, &state->path);
773 error = xfs_da_root_join(state, &state->path.blk[0]); 1053 error = xfs_da3_root_join(state, &state->path.blk[0]);
774 return(error); 1054 return(error);
775} 1055}
776 1056
@@ -782,9 +1062,13 @@ xfs_da_blkinfo_onlychild_validate(struct xfs_da_blkinfo *blkinfo, __u16 level)
782 1062
783 if (level == 1) { 1063 if (level == 1) {
784 ASSERT(magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || 1064 ASSERT(magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
785 magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); 1065 magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC) ||
786 } else 1066 magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC) ||
787 ASSERT(magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); 1067 magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC));
1068 } else {
1069 ASSERT(magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
1070 magic == cpu_to_be16(XFS_DA3_NODE_MAGIC));
1071 }
788 ASSERT(!blkinfo->forw); 1072 ASSERT(!blkinfo->forw);
789 ASSERT(!blkinfo->back); 1073 ASSERT(!blkinfo->back);
790} 1074}
@@ -797,52 +1081,61 @@ xfs_da_blkinfo_onlychild_validate(struct xfs_da_blkinfo *blkinfo, __u16 level)
797 * the old root to block 0 as the new root node. 1081 * the old root to block 0 as the new root node.
798 */ 1082 */
799STATIC int 1083STATIC int
800xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk) 1084xfs_da3_root_join(
1085 struct xfs_da_state *state,
1086 struct xfs_da_state_blk *root_blk)
801{ 1087{
802 xfs_da_intnode_t *oldroot; 1088 struct xfs_da_intnode *oldroot;
803 xfs_da_args_t *args; 1089 struct xfs_da_args *args;
804 xfs_dablk_t child; 1090 xfs_dablk_t child;
805 struct xfs_buf *bp; 1091 struct xfs_buf *bp;
806 int error; 1092 struct xfs_da3_icnode_hdr oldroothdr;
1093 struct xfs_da_node_entry *btree;
1094 int error;
807 1095
808 trace_xfs_da_root_join(state->args); 1096 trace_xfs_da_root_join(state->args);
809 1097
810 args = state->args;
811 ASSERT(args != NULL);
812 ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC); 1098 ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC);
1099
1100 args = state->args;
813 oldroot = root_blk->bp->b_addr; 1101 oldroot = root_blk->bp->b_addr;
814 ASSERT(oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); 1102 xfs_da3_node_hdr_from_disk(&oldroothdr, oldroot);
815 ASSERT(!oldroot->hdr.info.forw); 1103 ASSERT(oldroothdr.forw == 0);
816 ASSERT(!oldroot->hdr.info.back); 1104 ASSERT(oldroothdr.back == 0);
817 1105
818 /* 1106 /*
819 * If the root has more than one child, then don't do anything. 1107 * If the root has more than one child, then don't do anything.
820 */ 1108 */
821 if (be16_to_cpu(oldroot->hdr.count) > 1) 1109 if (oldroothdr.count > 1)
822 return(0); 1110 return 0;
823 1111
824 /* 1112 /*
825 * Read in the (only) child block, then copy those bytes into 1113 * Read in the (only) child block, then copy those bytes into
826 * the root block's buffer and free the original child block. 1114 * the root block's buffer and free the original child block.
827 */ 1115 */
828 child = be32_to_cpu(oldroot->btree[0].before); 1116 btree = xfs_da3_node_tree_p(oldroot);
1117 child = be32_to_cpu(btree[0].before);
829 ASSERT(child != 0); 1118 ASSERT(child != 0);
830 error = xfs_da_node_read(args->trans, args->dp, child, -1, &bp, 1119 error = xfs_da3_node_read(args->trans, args->dp, child, -1, &bp,
831 args->whichfork); 1120 args->whichfork);
832 if (error) 1121 if (error)
833 return(error); 1122 return error;
834 ASSERT(bp != NULL); 1123 xfs_da_blkinfo_onlychild_validate(bp->b_addr, oldroothdr.level);
835 xfs_da_blkinfo_onlychild_validate(bp->b_addr,
836 be16_to_cpu(oldroot->hdr.level));
837 1124
838 /* 1125 /*
839 * This could be copying a leaf back into the root block in the case of 1126 * This could be copying a leaf back into the root block in the case of
840 * there only being a single leaf block left in the tree. Hence we have 1127 * there only being a single leaf block left in the tree. Hence we have
841 * to update the b_ops pointer as well to match the buffer type change 1128 * to update the b_ops pointer as well to match the buffer type change
842 * that could occur. 1129 * that could occur. For dir3 blocks we also need to update the block
1130 * number in the buffer header.
843 */ 1131 */
844 memcpy(root_blk->bp->b_addr, bp->b_addr, state->blocksize); 1132 memcpy(root_blk->bp->b_addr, bp->b_addr, state->blocksize);
845 root_blk->bp->b_ops = bp->b_ops; 1133 root_blk->bp->b_ops = bp->b_ops;
1134 xfs_trans_buf_copy_type(root_blk->bp, bp);
1135 if (oldroothdr.magic == XFS_DA3_NODE_MAGIC) {
1136 struct xfs_da3_blkinfo *da3 = root_blk->bp->b_addr;
1137 da3->blkno = cpu_to_be64(root_blk->bp->b_bn);
1138 }
846 xfs_trans_log_buf(args->trans, root_blk->bp, 0, state->blocksize - 1); 1139 xfs_trans_log_buf(args->trans, root_blk->bp, 0, state->blocksize - 1);
847 error = xfs_da_shrink_inode(args, child, bp); 1140 error = xfs_da_shrink_inode(args, child, bp);
848 return(error); 1141 return(error);
@@ -858,14 +1151,21 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
858 * If nothing can be done, return 0. 1151 * If nothing can be done, return 0.
859 */ 1152 */
860STATIC int 1153STATIC int
861xfs_da_node_toosmall(xfs_da_state_t *state, int *action) 1154xfs_da3_node_toosmall(
1155 struct xfs_da_state *state,
1156 int *action)
862{ 1157{
863 xfs_da_intnode_t *node; 1158 struct xfs_da_intnode *node;
864 xfs_da_state_blk_t *blk; 1159 struct xfs_da_state_blk *blk;
865 xfs_da_blkinfo_t *info; 1160 struct xfs_da_blkinfo *info;
866 int count, forward, error, retval, i; 1161 xfs_dablk_t blkno;
867 xfs_dablk_t blkno; 1162 struct xfs_buf *bp;
868 struct xfs_buf *bp; 1163 struct xfs_da3_icnode_hdr nodehdr;
1164 int count;
1165 int forward;
1166 int error;
1167 int retval;
1168 int i;
869 1169
870 trace_xfs_da_node_toosmall(state->args); 1170 trace_xfs_da_node_toosmall(state->args);
871 1171
@@ -876,10 +1176,9 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
876 */ 1176 */
877 blk = &state->path.blk[ state->path.active-1 ]; 1177 blk = &state->path.blk[ state->path.active-1 ];
878 info = blk->bp->b_addr; 1178 info = blk->bp->b_addr;
879 ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
880 node = (xfs_da_intnode_t *)info; 1179 node = (xfs_da_intnode_t *)info;
881 count = be16_to_cpu(node->hdr.count); 1180 xfs_da3_node_hdr_from_disk(&nodehdr, node);
882 if (count > (state->node_ents >> 1)) { 1181 if (nodehdr.count > (state->node_ents >> 1)) {
883 *action = 0; /* blk over 50%, don't try to join */ 1182 *action = 0; /* blk over 50%, don't try to join */
884 return(0); /* blk over 50%, don't try to join */ 1183 return(0); /* blk over 50%, don't try to join */
885 } 1184 }
@@ -890,14 +1189,14 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
890 * coalesce it with a sibling block. We choose (arbitrarily) 1189 * coalesce it with a sibling block. We choose (arbitrarily)
891 * to merge with the forward block unless it is NULL. 1190 * to merge with the forward block unless it is NULL.
892 */ 1191 */
893 if (count == 0) { 1192 if (nodehdr.count == 0) {
894 /* 1193 /*
895 * Make altpath point to the block we want to keep and 1194 * Make altpath point to the block we want to keep and
896 * path point to the block we want to drop (this one). 1195 * path point to the block we want to drop (this one).
897 */ 1196 */
898 forward = (info->forw != 0); 1197 forward = (info->forw != 0);
899 memcpy(&state->altpath, &state->path, sizeof(state->path)); 1198 memcpy(&state->altpath, &state->path, sizeof(state->path));
900 error = xfs_da_path_shift(state, &state->altpath, forward, 1199 error = xfs_da3_path_shift(state, &state->altpath, forward,
901 0, &retval); 1200 0, &retval);
902 if (error) 1201 if (error)
903 return(error); 1202 return(error);
@@ -916,35 +1215,34 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
916 * We prefer coalescing with the lower numbered sibling so as 1215 * We prefer coalescing with the lower numbered sibling so as
917 * to shrink a directory over time. 1216 * to shrink a directory over time.
918 */ 1217 */
1218 count = state->node_ents;
1219 count -= state->node_ents >> 2;
1220 count -= nodehdr.count;
1221
919 /* start with smaller blk num */ 1222 /* start with smaller blk num */
920 forward = (be32_to_cpu(info->forw) < be32_to_cpu(info->back)); 1223 forward = nodehdr.forw < nodehdr.back;
921 for (i = 0; i < 2; forward = !forward, i++) { 1224 for (i = 0; i < 2; forward = !forward, i++) {
922 if (forward) 1225 if (forward)
923 blkno = be32_to_cpu(info->forw); 1226 blkno = nodehdr.forw;
924 else 1227 else
925 blkno = be32_to_cpu(info->back); 1228 blkno = nodehdr.back;
926 if (blkno == 0) 1229 if (blkno == 0)
927 continue; 1230 continue;
928 error = xfs_da_node_read(state->args->trans, state->args->dp, 1231 error = xfs_da3_node_read(state->args->trans, state->args->dp,
929 blkno, -1, &bp, state->args->whichfork); 1232 blkno, -1, &bp, state->args->whichfork);
930 if (error) 1233 if (error)
931 return(error); 1234 return(error);
932 ASSERT(bp != NULL);
933 1235
934 node = (xfs_da_intnode_t *)info;
935 count = state->node_ents;
936 count -= state->node_ents >> 2;
937 count -= be16_to_cpu(node->hdr.count);
938 node = bp->b_addr; 1236 node = bp->b_addr;
939 ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); 1237 xfs_da3_node_hdr_from_disk(&nodehdr, node);
940 count -= be16_to_cpu(node->hdr.count);
941 xfs_trans_brelse(state->args->trans, bp); 1238 xfs_trans_brelse(state->args->trans, bp);
942 if (count >= 0) 1239
1240 if (count - nodehdr.count >= 0)
943 break; /* fits with at least 25% to spare */ 1241 break; /* fits with at least 25% to spare */
944 } 1242 }
945 if (i >= 2) { 1243 if (i >= 2) {
946 *action = 0; 1244 *action = 0;
947 return(0); 1245 return 0;
948 } 1246 }
949 1247
950 /* 1248 /*
@@ -953,28 +1251,42 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
953 */ 1251 */
954 memcpy(&state->altpath, &state->path, sizeof(state->path)); 1252 memcpy(&state->altpath, &state->path, sizeof(state->path));
955 if (blkno < blk->blkno) { 1253 if (blkno < blk->blkno) {
956 error = xfs_da_path_shift(state, &state->altpath, forward, 1254 error = xfs_da3_path_shift(state, &state->altpath, forward,
957 0, &retval); 1255 0, &retval);
958 if (error) {
959 return(error);
960 }
961 if (retval) {
962 *action = 0;
963 return(0);
964 }
965 } else { 1256 } else {
966 error = xfs_da_path_shift(state, &state->path, forward, 1257 error = xfs_da3_path_shift(state, &state->path, forward,
967 0, &retval); 1258 0, &retval);
968 if (error) { 1259 }
969 return(error); 1260 if (error)
970 } 1261 return error;
971 if (retval) { 1262 if (retval) {
972 *action = 0; 1263 *action = 0;
973 return(0); 1264 return 0;
974 }
975 } 1265 }
976 *action = 1; 1266 *action = 1;
977 return(0); 1267 return 0;
1268}
1269
1270/*
1271 * Pick up the last hashvalue from an intermediate node.
1272 */
1273STATIC uint
1274xfs_da3_node_lasthash(
1275 struct xfs_buf *bp,
1276 int *count)
1277{
1278 struct xfs_da_intnode *node;
1279 struct xfs_da_node_entry *btree;
1280 struct xfs_da3_icnode_hdr nodehdr;
1281
1282 node = bp->b_addr;
1283 xfs_da3_node_hdr_from_disk(&nodehdr, node);
1284 if (count)
1285 *count = nodehdr.count;
1286 if (!nodehdr.count)
1287 return 0;
1288 btree = xfs_da3_node_tree_p(node);
1289 return be32_to_cpu(btree[nodehdr.count - 1].hashval);
978} 1290}
979 1291
980/* 1292/*
@@ -982,13 +1294,16 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
982 * when we stop making changes, return. 1294 * when we stop making changes, return.
983 */ 1295 */
984void 1296void
985xfs_da_fixhashpath(xfs_da_state_t *state, xfs_da_state_path_t *path) 1297xfs_da3_fixhashpath(
1298 struct xfs_da_state *state,
1299 struct xfs_da_state_path *path)
986{ 1300{
987 xfs_da_state_blk_t *blk; 1301 struct xfs_da_state_blk *blk;
988 xfs_da_intnode_t *node; 1302 struct xfs_da_intnode *node;
989 xfs_da_node_entry_t *btree; 1303 struct xfs_da_node_entry *btree;
990 xfs_dahash_t lasthash=0; 1304 xfs_dahash_t lasthash=0;
991 int level, count; 1305 int level;
1306 int count;
992 1307
993 trace_xfs_da_fixhashpath(state->args); 1308 trace_xfs_da_fixhashpath(state->args);
994 1309
@@ -1006,23 +1321,26 @@ xfs_da_fixhashpath(xfs_da_state_t *state, xfs_da_state_path_t *path)
1006 return; 1321 return;
1007 break; 1322 break;
1008 case XFS_DA_NODE_MAGIC: 1323 case XFS_DA_NODE_MAGIC:
1009 lasthash = xfs_da_node_lasthash(blk->bp, &count); 1324 lasthash = xfs_da3_node_lasthash(blk->bp, &count);
1010 if (count == 0) 1325 if (count == 0)
1011 return; 1326 return;
1012 break; 1327 break;
1013 } 1328 }
1014 for (blk--, level--; level >= 0; blk--, level--) { 1329 for (blk--, level--; level >= 0; blk--, level--) {
1330 struct xfs_da3_icnode_hdr nodehdr;
1331
1015 node = blk->bp->b_addr; 1332 node = blk->bp->b_addr;
1016 ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); 1333 xfs_da3_node_hdr_from_disk(&nodehdr, node);
1017 btree = &node->btree[ blk->index ]; 1334 btree = xfs_da3_node_tree_p(node);
1018 if (be32_to_cpu(btree->hashval) == lasthash) 1335 if (be32_to_cpu(btree->hashval) == lasthash)
1019 break; 1336 break;
1020 blk->hashval = lasthash; 1337 blk->hashval = lasthash;
1021 btree->hashval = cpu_to_be32(lasthash); 1338 btree[blk->index].hashval = cpu_to_be32(lasthash);
1022 xfs_trans_log_buf(state->args->trans, blk->bp, 1339 xfs_trans_log_buf(state->args->trans, blk->bp,
1023 XFS_DA_LOGRANGE(node, btree, sizeof(*btree))); 1340 XFS_DA_LOGRANGE(node, &btree[blk->index],
1341 sizeof(*btree)));
1024 1342
1025 lasthash = be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1].hashval); 1343 lasthash = be32_to_cpu(btree[nodehdr.count - 1].hashval);
1026 } 1344 }
1027} 1345}
1028 1346
@@ -1030,104 +1348,120 @@ xfs_da_fixhashpath(xfs_da_state_t *state, xfs_da_state_path_t *path)
1030 * Remove an entry from an intermediate node. 1348 * Remove an entry from an intermediate node.
1031 */ 1349 */
1032STATIC void 1350STATIC void
1033xfs_da_node_remove(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk) 1351xfs_da3_node_remove(
1352 struct xfs_da_state *state,
1353 struct xfs_da_state_blk *drop_blk)
1034{ 1354{
1035 xfs_da_intnode_t *node; 1355 struct xfs_da_intnode *node;
1036 xfs_da_node_entry_t *btree; 1356 struct xfs_da3_icnode_hdr nodehdr;
1037 int tmp; 1357 struct xfs_da_node_entry *btree;
1358 int index;
1359 int tmp;
1038 1360
1039 trace_xfs_da_node_remove(state->args); 1361 trace_xfs_da_node_remove(state->args);
1040 1362
1041 node = drop_blk->bp->b_addr; 1363 node = drop_blk->bp->b_addr;
1042 ASSERT(drop_blk->index < be16_to_cpu(node->hdr.count)); 1364 xfs_da3_node_hdr_from_disk(&nodehdr, node);
1365 ASSERT(drop_blk->index < nodehdr.count);
1043 ASSERT(drop_blk->index >= 0); 1366 ASSERT(drop_blk->index >= 0);
1044 1367
1045 /* 1368 /*
1046 * Copy over the offending entry, or just zero it out. 1369 * Copy over the offending entry, or just zero it out.
1047 */ 1370 */
1048 btree = &node->btree[drop_blk->index]; 1371 index = drop_blk->index;
1049 if (drop_blk->index < (be16_to_cpu(node->hdr.count)-1)) { 1372 btree = xfs_da3_node_tree_p(node);
1050 tmp = be16_to_cpu(node->hdr.count) - drop_blk->index - 1; 1373 if (index < nodehdr.count - 1) {
1374 tmp = nodehdr.count - index - 1;
1051 tmp *= (uint)sizeof(xfs_da_node_entry_t); 1375 tmp *= (uint)sizeof(xfs_da_node_entry_t);
1052 memmove(btree, btree + 1, tmp); 1376 memmove(&btree[index], &btree[index + 1], tmp);
1053 xfs_trans_log_buf(state->args->trans, drop_blk->bp, 1377 xfs_trans_log_buf(state->args->trans, drop_blk->bp,
1054 XFS_DA_LOGRANGE(node, btree, tmp)); 1378 XFS_DA_LOGRANGE(node, &btree[index], tmp));
1055 btree = &node->btree[be16_to_cpu(node->hdr.count)-1]; 1379 index = nodehdr.count - 1;
1056 } 1380 }
1057 memset((char *)btree, 0, sizeof(xfs_da_node_entry_t)); 1381 memset(&btree[index], 0, sizeof(xfs_da_node_entry_t));
1058 xfs_trans_log_buf(state->args->trans, drop_blk->bp, 1382 xfs_trans_log_buf(state->args->trans, drop_blk->bp,
1059 XFS_DA_LOGRANGE(node, btree, sizeof(*btree))); 1383 XFS_DA_LOGRANGE(node, &btree[index], sizeof(btree[index])));
1060 be16_add_cpu(&node->hdr.count, -1); 1384 nodehdr.count -= 1;
1385 xfs_da3_node_hdr_to_disk(node, &nodehdr);
1061 xfs_trans_log_buf(state->args->trans, drop_blk->bp, 1386 xfs_trans_log_buf(state->args->trans, drop_blk->bp,
1062 XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr))); 1387 XFS_DA_LOGRANGE(node, &node->hdr, xfs_da3_node_hdr_size(node)));
1063 1388
1064 /* 1389 /*
1065 * Copy the last hash value from the block to propagate upwards. 1390 * Copy the last hash value from the block to propagate upwards.
1066 */ 1391 */
1067 btree--; 1392 drop_blk->hashval = be32_to_cpu(btree[index - 1].hashval);
1068 drop_blk->hashval = be32_to_cpu(btree->hashval);
1069} 1393}
1070 1394
1071/* 1395/*
1072 * Unbalance the btree elements between two intermediate nodes, 1396 * Unbalance the elements between two intermediate nodes,
1073 * move all Btree elements from one node into another. 1397 * move all Btree elements from one node into another.
1074 */ 1398 */
1075STATIC void 1399STATIC void
1076xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, 1400xfs_da3_node_unbalance(
1077 xfs_da_state_blk_t *save_blk) 1401 struct xfs_da_state *state,
1402 struct xfs_da_state_blk *drop_blk,
1403 struct xfs_da_state_blk *save_blk)
1078{ 1404{
1079 xfs_da_intnode_t *drop_node, *save_node; 1405 struct xfs_da_intnode *drop_node;
1080 xfs_da_node_entry_t *btree; 1406 struct xfs_da_intnode *save_node;
1081 int tmp; 1407 struct xfs_da_node_entry *drop_btree;
1082 xfs_trans_t *tp; 1408 struct xfs_da_node_entry *save_btree;
1409 struct xfs_da3_icnode_hdr drop_hdr;
1410 struct xfs_da3_icnode_hdr save_hdr;
1411 struct xfs_trans *tp;
1412 int sindex;
1413 int tmp;
1083 1414
1084 trace_xfs_da_node_unbalance(state->args); 1415 trace_xfs_da_node_unbalance(state->args);
1085 1416
1086 drop_node = drop_blk->bp->b_addr; 1417 drop_node = drop_blk->bp->b_addr;
1087 save_node = save_blk->bp->b_addr; 1418 save_node = save_blk->bp->b_addr;
1088 ASSERT(drop_node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); 1419 xfs_da3_node_hdr_from_disk(&drop_hdr, drop_node);
1089 ASSERT(save_node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); 1420 xfs_da3_node_hdr_from_disk(&save_hdr, save_node);
1421 drop_btree = xfs_da3_node_tree_p(drop_node);
1422 save_btree = xfs_da3_node_tree_p(save_node);
1090 tp = state->args->trans; 1423 tp = state->args->trans;
1091 1424
1092 /* 1425 /*
1093 * If the dying block has lower hashvals, then move all the 1426 * If the dying block has lower hashvals, then move all the
1094 * elements in the remaining block up to make a hole. 1427 * elements in the remaining block up to make a hole.
1095 */ 1428 */
1096 if ((be32_to_cpu(drop_node->btree[0].hashval) < be32_to_cpu(save_node->btree[ 0 ].hashval)) || 1429 if ((be32_to_cpu(drop_btree[0].hashval) <
1097 (be32_to_cpu(drop_node->btree[be16_to_cpu(drop_node->hdr.count)-1].hashval) < 1430 be32_to_cpu(save_btree[0].hashval)) ||
1098 be32_to_cpu(save_node->btree[be16_to_cpu(save_node->hdr.count)-1].hashval))) 1431 (be32_to_cpu(drop_btree[drop_hdr.count - 1].hashval) <
1099 { 1432 be32_to_cpu(save_btree[save_hdr.count - 1].hashval))) {
1100 btree = &save_node->btree[be16_to_cpu(drop_node->hdr.count)]; 1433 /* XXX: check this - is memmove dst correct? */
1101 tmp = be16_to_cpu(save_node->hdr.count) * (uint)sizeof(xfs_da_node_entry_t); 1434 tmp = save_hdr.count * sizeof(xfs_da_node_entry_t);
1102 memmove(btree, &save_node->btree[0], tmp); 1435 memmove(&save_btree[drop_hdr.count], &save_btree[0], tmp);
1103 btree = &save_node->btree[0]; 1436
1437 sindex = 0;
1104 xfs_trans_log_buf(tp, save_blk->bp, 1438 xfs_trans_log_buf(tp, save_blk->bp,
1105 XFS_DA_LOGRANGE(save_node, btree, 1439 XFS_DA_LOGRANGE(save_node, &save_btree[0],
1106 (be16_to_cpu(save_node->hdr.count) + be16_to_cpu(drop_node->hdr.count)) * 1440 (save_hdr.count + drop_hdr.count) *
1107 sizeof(xfs_da_node_entry_t))); 1441 sizeof(xfs_da_node_entry_t)));
1108 } else { 1442 } else {
1109 btree = &save_node->btree[be16_to_cpu(save_node->hdr.count)]; 1443 sindex = save_hdr.count;
1110 xfs_trans_log_buf(tp, save_blk->bp, 1444 xfs_trans_log_buf(tp, save_blk->bp,
1111 XFS_DA_LOGRANGE(save_node, btree, 1445 XFS_DA_LOGRANGE(save_node, &save_btree[sindex],
1112 be16_to_cpu(drop_node->hdr.count) * 1446 drop_hdr.count * sizeof(xfs_da_node_entry_t)));
1113 sizeof(xfs_da_node_entry_t)));
1114 } 1447 }
1115 1448
1116 /* 1449 /*
1117 * Move all the B-tree elements from drop_blk to save_blk. 1450 * Move all the B-tree elements from drop_blk to save_blk.
1118 */ 1451 */
1119 tmp = be16_to_cpu(drop_node->hdr.count) * (uint)sizeof(xfs_da_node_entry_t); 1452 tmp = drop_hdr.count * (uint)sizeof(xfs_da_node_entry_t);
1120 memcpy(btree, &drop_node->btree[0], tmp); 1453 memcpy(&save_btree[sindex], &drop_btree[0], tmp);
1121 be16_add_cpu(&save_node->hdr.count, be16_to_cpu(drop_node->hdr.count)); 1454 save_hdr.count += drop_hdr.count;
1122 1455
1456 xfs_da3_node_hdr_to_disk(save_node, &save_hdr);
1123 xfs_trans_log_buf(tp, save_blk->bp, 1457 xfs_trans_log_buf(tp, save_blk->bp,
1124 XFS_DA_LOGRANGE(save_node, &save_node->hdr, 1458 XFS_DA_LOGRANGE(save_node, &save_node->hdr,
1125 sizeof(save_node->hdr))); 1459 xfs_da3_node_hdr_size(save_node)));
1126 1460
1127 /* 1461 /*
1128 * Save the last hashval in the remaining block for upward propagation. 1462 * Save the last hashval in the remaining block for upward propagation.
1129 */ 1463 */
1130 save_blk->hashval = be32_to_cpu(save_node->btree[be16_to_cpu(save_node->hdr.count)-1].hashval); 1464 save_blk->hashval = be32_to_cpu(save_btree[save_hdr.count - 1].hashval);
1131} 1465}
1132 1466
1133/*======================================================================== 1467/*========================================================================
@@ -1146,16 +1480,24 @@ xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
1146 * pruned depth-first tree search. 1480 * pruned depth-first tree search.
1147 */ 1481 */
1148int /* error */ 1482int /* error */
1149xfs_da_node_lookup_int(xfs_da_state_t *state, int *result) 1483xfs_da3_node_lookup_int(
1484 struct xfs_da_state *state,
1485 int *result)
1150{ 1486{
1151 xfs_da_state_blk_t *blk; 1487 struct xfs_da_state_blk *blk;
1152 xfs_da_blkinfo_t *curr; 1488 struct xfs_da_blkinfo *curr;
1153 xfs_da_intnode_t *node; 1489 struct xfs_da_intnode *node;
1154 xfs_da_node_entry_t *btree; 1490 struct xfs_da_node_entry *btree;
1155 xfs_dablk_t blkno; 1491 struct xfs_da3_icnode_hdr nodehdr;
1156 int probe, span, max, error, retval; 1492 struct xfs_da_args *args;
1157 xfs_dahash_t hashval, btreehashval; 1493 xfs_dablk_t blkno;
1158 xfs_da_args_t *args; 1494 xfs_dahash_t hashval;
1495 xfs_dahash_t btreehashval;
1496 int probe;
1497 int span;
1498 int max;
1499 int error;
1500 int retval;
1159 1501
1160 args = state->args; 1502 args = state->args;
1161 1503
@@ -1171,7 +1513,7 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
1171 * Read the next node down in the tree. 1513 * Read the next node down in the tree.
1172 */ 1514 */
1173 blk->blkno = blkno; 1515 blk->blkno = blkno;
1174 error = xfs_da_node_read(args->trans, args->dp, blkno, 1516 error = xfs_da3_node_read(args->trans, args->dp, blkno,
1175 -1, &blk->bp, args->whichfork); 1517 -1, &blk->bp, args->whichfork);
1176 if (error) { 1518 if (error) {
1177 blk->blkno = 0; 1519 blk->blkno = 0;
@@ -1180,66 +1522,75 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
1180 } 1522 }
1181 curr = blk->bp->b_addr; 1523 curr = blk->bp->b_addr;
1182 blk->magic = be16_to_cpu(curr->magic); 1524 blk->magic = be16_to_cpu(curr->magic);
1183 ASSERT(blk->magic == XFS_DA_NODE_MAGIC || 1525
1184 blk->magic == XFS_DIR2_LEAFN_MAGIC || 1526 if (blk->magic == XFS_ATTR_LEAF_MAGIC ||
1185 blk->magic == XFS_ATTR_LEAF_MAGIC); 1527 blk->magic == XFS_ATTR3_LEAF_MAGIC) {
1528 blk->magic = XFS_ATTR_LEAF_MAGIC;
1529 blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL);
1530 break;
1531 }
1532
1533 if (blk->magic == XFS_DIR2_LEAFN_MAGIC ||
1534 blk->magic == XFS_DIR3_LEAFN_MAGIC) {
1535 blk->magic = XFS_DIR2_LEAFN_MAGIC;
1536 blk->hashval = xfs_dir2_leafn_lasthash(blk->bp, NULL);
1537 break;
1538 }
1539
1540 blk->magic = XFS_DA_NODE_MAGIC;
1541
1186 1542
1187 /* 1543 /*
1188 * Search an intermediate node for a match. 1544 * Search an intermediate node for a match.
1189 */ 1545 */
1190 if (blk->magic == XFS_DA_NODE_MAGIC) { 1546 node = blk->bp->b_addr;
1191 node = blk->bp->b_addr; 1547 xfs_da3_node_hdr_from_disk(&nodehdr, node);
1192 max = be16_to_cpu(node->hdr.count); 1548 btree = xfs_da3_node_tree_p(node);
1193 blk->hashval = be32_to_cpu(node->btree[max-1].hashval);
1194 1549
1195 /* 1550 max = nodehdr.count;
1196 * Binary search. (note: small blocks will skip loop) 1551 blk->hashval = be32_to_cpu(btree[max - 1].hashval);
1197 */
1198 probe = span = max / 2;
1199 hashval = args->hashval;
1200 for (btree = &node->btree[probe]; span > 4;
1201 btree = &node->btree[probe]) {
1202 span /= 2;
1203 btreehashval = be32_to_cpu(btree->hashval);
1204 if (btreehashval < hashval)
1205 probe += span;
1206 else if (btreehashval > hashval)
1207 probe -= span;
1208 else
1209 break;
1210 }
1211 ASSERT((probe >= 0) && (probe < max));
1212 ASSERT((span <= 4) || (be32_to_cpu(btree->hashval) == hashval));
1213 1552
1214 /* 1553 /*
1215 * Since we may have duplicate hashval's, find the first 1554 * Binary search. (note: small blocks will skip loop)
1216 * matching hashval in the node. 1555 */
1217 */ 1556 probe = span = max / 2;
1218 while ((probe > 0) && (be32_to_cpu(btree->hashval) >= hashval)) { 1557 hashval = args->hashval;
1219 btree--; 1558 while (span > 4) {
1220 probe--; 1559 span /= 2;
1221 } 1560 btreehashval = be32_to_cpu(btree[probe].hashval);
1222 while ((probe < max) && (be32_to_cpu(btree->hashval) < hashval)) { 1561 if (btreehashval < hashval)
1223 btree++; 1562 probe += span;
1224 probe++; 1563 else if (btreehashval > hashval)
1225 } 1564 probe -= span;
1565 else
1566 break;
1567 }
1568 ASSERT((probe >= 0) && (probe < max));
1569 ASSERT((span <= 4) ||
1570 (be32_to_cpu(btree[probe].hashval) == hashval));
1226 1571
1227 /* 1572 /*
1228 * Pick the right block to descend on. 1573 * Since we may have duplicate hashval's, find the first
1229 */ 1574 * matching hashval in the node.
1230 if (probe == max) { 1575 */
1231 blk->index = max-1; 1576 while (probe > 0 &&
1232 blkno = be32_to_cpu(node->btree[max-1].before); 1577 be32_to_cpu(btree[probe].hashval) >= hashval) {
1233 } else { 1578 probe--;
1234 blk->index = probe; 1579 }
1235 blkno = be32_to_cpu(btree->before); 1580 while (probe < max &&
1236 } 1581 be32_to_cpu(btree[probe].hashval) < hashval) {
1237 } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) { 1582 probe++;
1238 blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL); 1583 }
1239 break; 1584
1240 } else if (blk->magic == XFS_DIR2_LEAFN_MAGIC) { 1585 /*
1241 blk->hashval = xfs_dir2_leafn_lasthash(blk->bp, NULL); 1586 * Pick the right block to descend on.
1242 break; 1587 */
1588 if (probe == max) {
1589 blk->index = max - 1;
1590 blkno = be32_to_cpu(btree[max - 1].before);
1591 } else {
1592 blk->index = probe;
1593 blkno = be32_to_cpu(btree[probe].before);
1243 } 1594 }
1244 } 1595 }
1245 1596
@@ -1254,7 +1605,7 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
1254 retval = xfs_dir2_leafn_lookup_int(blk->bp, args, 1605 retval = xfs_dir2_leafn_lookup_int(blk->bp, args,
1255 &blk->index, state); 1606 &blk->index, state);
1256 } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) { 1607 } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
1257 retval = xfs_attr_leaf_lookup_int(blk->bp, args); 1608 retval = xfs_attr3_leaf_lookup_int(blk->bp, args);
1258 blk->index = args->index; 1609 blk->index = args->index;
1259 args->blkno = blk->blkno; 1610 args->blkno = blk->blkno;
1260 } else { 1611 } else {
@@ -1263,7 +1614,7 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
1263 } 1614 }
1264 if (((retval == ENOENT) || (retval == ENOATTR)) && 1615 if (((retval == ENOENT) || (retval == ENOATTR)) &&
1265 (blk->hashval == args->hashval)) { 1616 (blk->hashval == args->hashval)) {
1266 error = xfs_da_path_shift(state, &state->path, 1, 1, 1617 error = xfs_da3_path_shift(state, &state->path, 1, 1,
1267 &retval); 1618 &retval);
1268 if (error) 1619 if (error)
1269 return(error); 1620 return(error);
@@ -1285,16 +1636,52 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
1285 *========================================================================*/ 1636 *========================================================================*/
1286 1637
1287/* 1638/*
1639 * Compare two intermediate nodes for "order".
1640 */
1641STATIC int
1642xfs_da3_node_order(
1643 struct xfs_buf *node1_bp,
1644 struct xfs_buf *node2_bp)
1645{
1646 struct xfs_da_intnode *node1;
1647 struct xfs_da_intnode *node2;
1648 struct xfs_da_node_entry *btree1;
1649 struct xfs_da_node_entry *btree2;
1650 struct xfs_da3_icnode_hdr node1hdr;
1651 struct xfs_da3_icnode_hdr node2hdr;
1652
1653 node1 = node1_bp->b_addr;
1654 node2 = node2_bp->b_addr;
1655 xfs_da3_node_hdr_from_disk(&node1hdr, node1);
1656 xfs_da3_node_hdr_from_disk(&node2hdr, node2);
1657 btree1 = xfs_da3_node_tree_p(node1);
1658 btree2 = xfs_da3_node_tree_p(node2);
1659
1660 if (node1hdr.count > 0 && node2hdr.count > 0 &&
1661 ((be32_to_cpu(btree2[0].hashval) < be32_to_cpu(btree1[0].hashval)) ||
1662 (be32_to_cpu(btree2[node2hdr.count - 1].hashval) <
1663 be32_to_cpu(btree1[node1hdr.count - 1].hashval)))) {
1664 return 1;
1665 }
1666 return 0;
1667}
1668
1669/*
1288 * Link a new block into a doubly linked list of blocks (of whatever type). 1670 * Link a new block into a doubly linked list of blocks (of whatever type).
1289 */ 1671 */
1290int /* error */ 1672int /* error */
1291xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk, 1673xfs_da3_blk_link(
1292 xfs_da_state_blk_t *new_blk) 1674 struct xfs_da_state *state,
1675 struct xfs_da_state_blk *old_blk,
1676 struct xfs_da_state_blk *new_blk)
1293{ 1677{
1294 xfs_da_blkinfo_t *old_info, *new_info, *tmp_info; 1678 struct xfs_da_blkinfo *old_info;
1295 xfs_da_args_t *args; 1679 struct xfs_da_blkinfo *new_info;
1296 int before=0, error; 1680 struct xfs_da_blkinfo *tmp_info;
1297 struct xfs_buf *bp; 1681 struct xfs_da_args *args;
1682 struct xfs_buf *bp;
1683 int before = 0;
1684 int error;
1298 1685
1299 /* 1686 /*
1300 * Set up environment. 1687 * Set up environment.
@@ -1306,9 +1693,6 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
1306 ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC || 1693 ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC ||
1307 old_blk->magic == XFS_DIR2_LEAFN_MAGIC || 1694 old_blk->magic == XFS_DIR2_LEAFN_MAGIC ||
1308 old_blk->magic == XFS_ATTR_LEAF_MAGIC); 1695 old_blk->magic == XFS_ATTR_LEAF_MAGIC);
1309 ASSERT(old_blk->magic == be16_to_cpu(old_info->magic));
1310 ASSERT(new_blk->magic == be16_to_cpu(new_info->magic));
1311 ASSERT(old_blk->magic == new_blk->magic);
1312 1696
1313 switch (old_blk->magic) { 1697 switch (old_blk->magic) {
1314 case XFS_ATTR_LEAF_MAGIC: 1698 case XFS_ATTR_LEAF_MAGIC:
@@ -1318,7 +1702,7 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
1318 before = xfs_dir2_leafn_order(old_blk->bp, new_blk->bp); 1702 before = xfs_dir2_leafn_order(old_blk->bp, new_blk->bp);
1319 break; 1703 break;
1320 case XFS_DA_NODE_MAGIC: 1704 case XFS_DA_NODE_MAGIC:
1321 before = xfs_da_node_order(old_blk->bp, new_blk->bp); 1705 before = xfs_da3_node_order(old_blk->bp, new_blk->bp);
1322 break; 1706 break;
1323 } 1707 }
1324 1708
@@ -1333,14 +1717,14 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
1333 new_info->forw = cpu_to_be32(old_blk->blkno); 1717 new_info->forw = cpu_to_be32(old_blk->blkno);
1334 new_info->back = old_info->back; 1718 new_info->back = old_info->back;
1335 if (old_info->back) { 1719 if (old_info->back) {
1336 error = xfs_da_node_read(args->trans, args->dp, 1720 error = xfs_da3_node_read(args->trans, args->dp,
1337 be32_to_cpu(old_info->back), 1721 be32_to_cpu(old_info->back),
1338 -1, &bp, args->whichfork); 1722 -1, &bp, args->whichfork);
1339 if (error) 1723 if (error)
1340 return(error); 1724 return(error);
1341 ASSERT(bp != NULL); 1725 ASSERT(bp != NULL);
1342 tmp_info = bp->b_addr; 1726 tmp_info = bp->b_addr;
1343 ASSERT(be16_to_cpu(tmp_info->magic) == be16_to_cpu(old_info->magic)); 1727 ASSERT(tmp_info->magic == old_info->magic);
1344 ASSERT(be32_to_cpu(tmp_info->forw) == old_blk->blkno); 1728 ASSERT(be32_to_cpu(tmp_info->forw) == old_blk->blkno);
1345 tmp_info->forw = cpu_to_be32(new_blk->blkno); 1729 tmp_info->forw = cpu_to_be32(new_blk->blkno);
1346 xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1); 1730 xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
@@ -1354,7 +1738,7 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
1354 new_info->forw = old_info->forw; 1738 new_info->forw = old_info->forw;
1355 new_info->back = cpu_to_be32(old_blk->blkno); 1739 new_info->back = cpu_to_be32(old_blk->blkno);
1356 if (old_info->forw) { 1740 if (old_info->forw) {
1357 error = xfs_da_node_read(args->trans, args->dp, 1741 error = xfs_da3_node_read(args->trans, args->dp,
1358 be32_to_cpu(old_info->forw), 1742 be32_to_cpu(old_info->forw),
1359 -1, &bp, args->whichfork); 1743 -1, &bp, args->whichfork);
1360 if (error) 1744 if (error)
@@ -1375,59 +1759,20 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
1375} 1759}
1376 1760
1377/* 1761/*
1378 * Compare two intermediate nodes for "order".
1379 */
1380STATIC int
1381xfs_da_node_order(
1382 struct xfs_buf *node1_bp,
1383 struct xfs_buf *node2_bp)
1384{
1385 xfs_da_intnode_t *node1, *node2;
1386
1387 node1 = node1_bp->b_addr;
1388 node2 = node2_bp->b_addr;
1389 ASSERT(node1->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) &&
1390 node2->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
1391 if ((be16_to_cpu(node1->hdr.count) > 0) && (be16_to_cpu(node2->hdr.count) > 0) &&
1392 ((be32_to_cpu(node2->btree[0].hashval) <
1393 be32_to_cpu(node1->btree[0].hashval)) ||
1394 (be32_to_cpu(node2->btree[be16_to_cpu(node2->hdr.count)-1].hashval) <
1395 be32_to_cpu(node1->btree[be16_to_cpu(node1->hdr.count)-1].hashval)))) {
1396 return(1);
1397 }
1398 return(0);
1399}
1400
1401/*
1402 * Pick up the last hashvalue from an intermediate node.
1403 */
1404STATIC uint
1405xfs_da_node_lasthash(
1406 struct xfs_buf *bp,
1407 int *count)
1408{
1409 xfs_da_intnode_t *node;
1410
1411 node = bp->b_addr;
1412 ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
1413 if (count)
1414 *count = be16_to_cpu(node->hdr.count);
1415 if (!node->hdr.count)
1416 return(0);
1417 return be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1].hashval);
1418}
1419
1420/*
1421 * Unlink a block from a doubly linked list of blocks. 1762 * Unlink a block from a doubly linked list of blocks.
1422 */ 1763 */
1423STATIC int /* error */ 1764STATIC int /* error */
1424xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, 1765xfs_da3_blk_unlink(
1425 xfs_da_state_blk_t *save_blk) 1766 struct xfs_da_state *state,
1767 struct xfs_da_state_blk *drop_blk,
1768 struct xfs_da_state_blk *save_blk)
1426{ 1769{
1427 xfs_da_blkinfo_t *drop_info, *save_info, *tmp_info; 1770 struct xfs_da_blkinfo *drop_info;
1428 xfs_da_args_t *args; 1771 struct xfs_da_blkinfo *save_info;
1429 struct xfs_buf *bp; 1772 struct xfs_da_blkinfo *tmp_info;
1430 int error; 1773 struct xfs_da_args *args;
1774 struct xfs_buf *bp;
1775 int error;
1431 1776
1432 /* 1777 /*
1433 * Set up environment. 1778 * Set up environment.
@@ -1439,8 +1784,6 @@ xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
1439 ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC || 1784 ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC ||
1440 save_blk->magic == XFS_DIR2_LEAFN_MAGIC || 1785 save_blk->magic == XFS_DIR2_LEAFN_MAGIC ||
1441 save_blk->magic == XFS_ATTR_LEAF_MAGIC); 1786 save_blk->magic == XFS_ATTR_LEAF_MAGIC);
1442 ASSERT(save_blk->magic == be16_to_cpu(save_info->magic));
1443 ASSERT(drop_blk->magic == be16_to_cpu(drop_info->magic));
1444 ASSERT(save_blk->magic == drop_blk->magic); 1787 ASSERT(save_blk->magic == drop_blk->magic);
1445 ASSERT((be32_to_cpu(save_info->forw) == drop_blk->blkno) || 1788 ASSERT((be32_to_cpu(save_info->forw) == drop_blk->blkno) ||
1446 (be32_to_cpu(save_info->back) == drop_blk->blkno)); 1789 (be32_to_cpu(save_info->back) == drop_blk->blkno));
@@ -1454,7 +1797,7 @@ xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
1454 trace_xfs_da_unlink_back(args); 1797 trace_xfs_da_unlink_back(args);
1455 save_info->back = drop_info->back; 1798 save_info->back = drop_info->back;
1456 if (drop_info->back) { 1799 if (drop_info->back) {
1457 error = xfs_da_node_read(args->trans, args->dp, 1800 error = xfs_da3_node_read(args->trans, args->dp,
1458 be32_to_cpu(drop_info->back), 1801 be32_to_cpu(drop_info->back),
1459 -1, &bp, args->whichfork); 1802 -1, &bp, args->whichfork);
1460 if (error) 1803 if (error)
@@ -1471,7 +1814,7 @@ xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
1471 trace_xfs_da_unlink_forward(args); 1814 trace_xfs_da_unlink_forward(args);
1472 save_info->forw = drop_info->forw; 1815 save_info->forw = drop_info->forw;
1473 if (drop_info->forw) { 1816 if (drop_info->forw) {
1474 error = xfs_da_node_read(args->trans, args->dp, 1817 error = xfs_da3_node_read(args->trans, args->dp,
1475 be32_to_cpu(drop_info->forw), 1818 be32_to_cpu(drop_info->forw),
1476 -1, &bp, args->whichfork); 1819 -1, &bp, args->whichfork);
1477 if (error) 1820 if (error)
@@ -1499,15 +1842,22 @@ xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
1499 * the new bottom and the root. 1842 * the new bottom and the root.
1500 */ 1843 */
1501int /* error */ 1844int /* error */
1502xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path, 1845xfs_da3_path_shift(
1503 int forward, int release, int *result) 1846 struct xfs_da_state *state,
1847 struct xfs_da_state_path *path,
1848 int forward,
1849 int release,
1850 int *result)
1504{ 1851{
1505 xfs_da_state_blk_t *blk; 1852 struct xfs_da_state_blk *blk;
1506 xfs_da_blkinfo_t *info; 1853 struct xfs_da_blkinfo *info;
1507 xfs_da_intnode_t *node; 1854 struct xfs_da_intnode *node;
1508 xfs_da_args_t *args; 1855 struct xfs_da_args *args;
1509 xfs_dablk_t blkno=0; 1856 struct xfs_da_node_entry *btree;
1510 int level, error; 1857 struct xfs_da3_icnode_hdr nodehdr;
1858 xfs_dablk_t blkno = 0;
1859 int level;
1860 int error;
1511 1861
1512 trace_xfs_da_path_shift(state->args); 1862 trace_xfs_da_path_shift(state->args);
1513 1863
@@ -1522,16 +1872,17 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
1522 ASSERT((path->active > 0) && (path->active < XFS_DA_NODE_MAXDEPTH)); 1872 ASSERT((path->active > 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1523 level = (path->active-1) - 1; /* skip bottom layer in path */ 1873 level = (path->active-1) - 1; /* skip bottom layer in path */
1524 for (blk = &path->blk[level]; level >= 0; blk--, level--) { 1874 for (blk = &path->blk[level]; level >= 0; blk--, level--) {
1525 ASSERT(blk->bp != NULL);
1526 node = blk->bp->b_addr; 1875 node = blk->bp->b_addr;
1527 ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); 1876 xfs_da3_node_hdr_from_disk(&nodehdr, node);
1528 if (forward && (blk->index < be16_to_cpu(node->hdr.count)-1)) { 1877 btree = xfs_da3_node_tree_p(node);
1878
1879 if (forward && (blk->index < nodehdr.count - 1)) {
1529 blk->index++; 1880 blk->index++;
1530 blkno = be32_to_cpu(node->btree[blk->index].before); 1881 blkno = be32_to_cpu(btree[blk->index].before);
1531 break; 1882 break;
1532 } else if (!forward && (blk->index > 0)) { 1883 } else if (!forward && (blk->index > 0)) {
1533 blk->index--; 1884 blk->index--;
1534 blkno = be32_to_cpu(node->btree[blk->index].before); 1885 blkno = be32_to_cpu(btree[blk->index].before);
1535 break; 1886 break;
1536 } 1887 }
1537 } 1888 }
@@ -1557,45 +1908,60 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
1557 * Read the next child block. 1908 * Read the next child block.
1558 */ 1909 */
1559 blk->blkno = blkno; 1910 blk->blkno = blkno;
1560 error = xfs_da_node_read(args->trans, args->dp, blkno, -1, 1911 error = xfs_da3_node_read(args->trans, args->dp, blkno, -1,
1561 &blk->bp, args->whichfork); 1912 &blk->bp, args->whichfork);
1562 if (error) 1913 if (error)
1563 return(error); 1914 return(error);
1564 ASSERT(blk->bp != NULL);
1565 info = blk->bp->b_addr; 1915 info = blk->bp->b_addr;
1566 ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) || 1916 ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
1917 info->magic == cpu_to_be16(XFS_DA3_NODE_MAGIC) ||
1567 info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || 1918 info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
1568 info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); 1919 info->magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC) ||
1569 blk->magic = be16_to_cpu(info->magic); 1920 info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC) ||
1570 if (blk->magic == XFS_DA_NODE_MAGIC) { 1921 info->magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC));
1922
1923
1924 /*
1925 * Note: we flatten the magic number to a single type so we
1926 * don't have to compare against crc/non-crc types elsewhere.
1927 */
1928 switch (be16_to_cpu(info->magic)) {
1929 case XFS_DA_NODE_MAGIC:
1930 case XFS_DA3_NODE_MAGIC:
1931 blk->magic = XFS_DA_NODE_MAGIC;
1571 node = (xfs_da_intnode_t *)info; 1932 node = (xfs_da_intnode_t *)info;
1572 blk->hashval = be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1].hashval); 1933 xfs_da3_node_hdr_from_disk(&nodehdr, node);
1934 btree = xfs_da3_node_tree_p(node);
1935 blk->hashval = be32_to_cpu(btree[nodehdr.count - 1].hashval);
1573 if (forward) 1936 if (forward)
1574 blk->index = 0; 1937 blk->index = 0;
1575 else 1938 else
1576 blk->index = be16_to_cpu(node->hdr.count)-1; 1939 blk->index = nodehdr.count - 1;
1577 blkno = be32_to_cpu(node->btree[blk->index].before); 1940 blkno = be32_to_cpu(btree[blk->index].before);
1578 } else { 1941 break;
1942 case XFS_ATTR_LEAF_MAGIC:
1943 case XFS_ATTR3_LEAF_MAGIC:
1944 blk->magic = XFS_ATTR_LEAF_MAGIC;
1579 ASSERT(level == path->active-1); 1945 ASSERT(level == path->active-1);
1580 blk->index = 0; 1946 blk->index = 0;
1581 switch(blk->magic) { 1947 blk->hashval = xfs_attr_leaf_lasthash(blk->bp,
1582 case XFS_ATTR_LEAF_MAGIC: 1948 NULL);
1583 blk->hashval = xfs_attr_leaf_lasthash(blk->bp, 1949 break;
1584 NULL); 1950 case XFS_DIR2_LEAFN_MAGIC:
1585 break; 1951 case XFS_DIR3_LEAFN_MAGIC:
1586 case XFS_DIR2_LEAFN_MAGIC: 1952 blk->magic = XFS_DIR2_LEAFN_MAGIC;
1587 blk->hashval = xfs_dir2_leafn_lasthash(blk->bp, 1953 ASSERT(level == path->active-1);
1588 NULL); 1954 blk->index = 0;
1589 break; 1955 blk->hashval = xfs_dir2_leafn_lasthash(blk->bp,
1590 default: 1956 NULL);
1591 ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC || 1957 break;
1592 blk->magic == XFS_DIR2_LEAFN_MAGIC); 1958 default:
1593 break; 1959 ASSERT(0);
1594 } 1960 break;
1595 } 1961 }
1596 } 1962 }
1597 *result = 0; 1963 *result = 0;
1598 return(0); 1964 return 0;
1599} 1965}
1600 1966
1601 1967
@@ -1782,22 +2148,36 @@ xfs_da_grow_inode(
1782 * a bmap btree split to do that. 2148 * a bmap btree split to do that.
1783 */ 2149 */
1784STATIC int 2150STATIC int
1785xfs_da_swap_lastblock( 2151xfs_da3_swap_lastblock(
1786 xfs_da_args_t *args, 2152 struct xfs_da_args *args,
1787 xfs_dablk_t *dead_blknop, 2153 xfs_dablk_t *dead_blknop,
1788 struct xfs_buf **dead_bufp) 2154 struct xfs_buf **dead_bufp)
1789{ 2155{
1790 xfs_dablk_t dead_blkno, last_blkno, sib_blkno, par_blkno; 2156 struct xfs_da_blkinfo *dead_info;
1791 struct xfs_buf *dead_buf, *last_buf, *sib_buf, *par_buf; 2157 struct xfs_da_blkinfo *sib_info;
1792 xfs_fileoff_t lastoff; 2158 struct xfs_da_intnode *par_node;
1793 xfs_inode_t *ip; 2159 struct xfs_da_intnode *dead_node;
1794 xfs_trans_t *tp; 2160 struct xfs_dir2_leaf *dead_leaf2;
1795 xfs_mount_t *mp; 2161 struct xfs_da_node_entry *btree;
1796 int error, w, entno, level, dead_level; 2162 struct xfs_da3_icnode_hdr par_hdr;
1797 xfs_da_blkinfo_t *dead_info, *sib_info; 2163 struct xfs_inode *ip;
1798 xfs_da_intnode_t *par_node, *dead_node; 2164 struct xfs_trans *tp;
1799 xfs_dir2_leaf_t *dead_leaf2; 2165 struct xfs_mount *mp;
1800 xfs_dahash_t dead_hash; 2166 struct xfs_buf *dead_buf;
2167 struct xfs_buf *last_buf;
2168 struct xfs_buf *sib_buf;
2169 struct xfs_buf *par_buf;
2170 xfs_dahash_t dead_hash;
2171 xfs_fileoff_t lastoff;
2172 xfs_dablk_t dead_blkno;
2173 xfs_dablk_t last_blkno;
2174 xfs_dablk_t sib_blkno;
2175 xfs_dablk_t par_blkno;
2176 int error;
2177 int w;
2178 int entno;
2179 int level;
2180 int dead_level;
1801 2181
1802 trace_xfs_da_swap_lastblock(args); 2182 trace_xfs_da_swap_lastblock(args);
1803 2183
@@ -1821,7 +2201,7 @@ xfs_da_swap_lastblock(
1821 * Read the last block in the btree space. 2201 * Read the last block in the btree space.
1822 */ 2202 */
1823 last_blkno = (xfs_dablk_t)lastoff - mp->m_dirblkfsbs; 2203 last_blkno = (xfs_dablk_t)lastoff - mp->m_dirblkfsbs;
1824 error = xfs_da_node_read(tp, ip, last_blkno, -1, &last_buf, w); 2204 error = xfs_da3_node_read(tp, ip, last_blkno, -1, &last_buf, w);
1825 if (error) 2205 if (error)
1826 return error; 2206 return error;
1827 /* 2207 /*
@@ -1833,22 +2213,31 @@ xfs_da_swap_lastblock(
1833 /* 2213 /*
1834 * Get values from the moved block. 2214 * Get values from the moved block.
1835 */ 2215 */
1836 if (dead_info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)) { 2216 if (dead_info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
2217 dead_info->magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) {
2218 struct xfs_dir3_icleaf_hdr leafhdr;
2219 struct xfs_dir2_leaf_entry *ents;
2220
1837 dead_leaf2 = (xfs_dir2_leaf_t *)dead_info; 2221 dead_leaf2 = (xfs_dir2_leaf_t *)dead_info;
2222 xfs_dir3_leaf_hdr_from_disk(&leafhdr, dead_leaf2);
2223 ents = xfs_dir3_leaf_ents_p(dead_leaf2);
1838 dead_level = 0; 2224 dead_level = 0;
1839 dead_hash = be32_to_cpu(dead_leaf2->ents[be16_to_cpu(dead_leaf2->hdr.count) - 1].hashval); 2225 dead_hash = be32_to_cpu(ents[leafhdr.count - 1].hashval);
1840 } else { 2226 } else {
1841 ASSERT(dead_info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); 2227 struct xfs_da3_icnode_hdr deadhdr;
2228
1842 dead_node = (xfs_da_intnode_t *)dead_info; 2229 dead_node = (xfs_da_intnode_t *)dead_info;
1843 dead_level = be16_to_cpu(dead_node->hdr.level); 2230 xfs_da3_node_hdr_from_disk(&deadhdr, dead_node);
1844 dead_hash = be32_to_cpu(dead_node->btree[be16_to_cpu(dead_node->hdr.count) - 1].hashval); 2231 btree = xfs_da3_node_tree_p(dead_node);
2232 dead_level = deadhdr.level;
2233 dead_hash = be32_to_cpu(btree[deadhdr.count - 1].hashval);
1845 } 2234 }
1846 sib_buf = par_buf = NULL; 2235 sib_buf = par_buf = NULL;
1847 /* 2236 /*
1848 * If the moved block has a left sibling, fix up the pointers. 2237 * If the moved block has a left sibling, fix up the pointers.
1849 */ 2238 */
1850 if ((sib_blkno = be32_to_cpu(dead_info->back))) { 2239 if ((sib_blkno = be32_to_cpu(dead_info->back))) {
1851 error = xfs_da_node_read(tp, ip, sib_blkno, -1, &sib_buf, w); 2240 error = xfs_da3_node_read(tp, ip, sib_blkno, -1, &sib_buf, w);
1852 if (error) 2241 if (error)
1853 goto done; 2242 goto done;
1854 sib_info = sib_buf->b_addr; 2243 sib_info = sib_buf->b_addr;
@@ -1870,7 +2259,7 @@ xfs_da_swap_lastblock(
1870 * If the moved block has a right sibling, fix up the pointers. 2259 * If the moved block has a right sibling, fix up the pointers.
1871 */ 2260 */
1872 if ((sib_blkno = be32_to_cpu(dead_info->forw))) { 2261 if ((sib_blkno = be32_to_cpu(dead_info->forw))) {
1873 error = xfs_da_node_read(tp, ip, sib_blkno, -1, &sib_buf, w); 2262 error = xfs_da3_node_read(tp, ip, sib_blkno, -1, &sib_buf, w);
1874 if (error) 2263 if (error)
1875 goto done; 2264 goto done;
1876 sib_info = sib_buf->b_addr; 2265 sib_info = sib_buf->b_addr;
@@ -1894,31 +2283,31 @@ xfs_da_swap_lastblock(
1894 * Walk down the tree looking for the parent of the moved block. 2283 * Walk down the tree looking for the parent of the moved block.
1895 */ 2284 */
1896 for (;;) { 2285 for (;;) {
1897 error = xfs_da_node_read(tp, ip, par_blkno, -1, &par_buf, w); 2286 error = xfs_da3_node_read(tp, ip, par_blkno, -1, &par_buf, w);
1898 if (error) 2287 if (error)
1899 goto done; 2288 goto done;
1900 par_node = par_buf->b_addr; 2289 par_node = par_buf->b_addr;
1901 if (unlikely(par_node->hdr.info.magic != 2290 xfs_da3_node_hdr_from_disk(&par_hdr, par_node);
1902 cpu_to_be16(XFS_DA_NODE_MAGIC) || 2291 if (level >= 0 && level != par_hdr.level + 1) {
1903 (level >= 0 && level != be16_to_cpu(par_node->hdr.level) + 1))) {
1904 XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)", 2292 XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)",
1905 XFS_ERRLEVEL_LOW, mp); 2293 XFS_ERRLEVEL_LOW, mp);
1906 error = XFS_ERROR(EFSCORRUPTED); 2294 error = XFS_ERROR(EFSCORRUPTED);
1907 goto done; 2295 goto done;
1908 } 2296 }
1909 level = be16_to_cpu(par_node->hdr.level); 2297 level = par_hdr.level;
2298 btree = xfs_da3_node_tree_p(par_node);
1910 for (entno = 0; 2299 for (entno = 0;
1911 entno < be16_to_cpu(par_node->hdr.count) && 2300 entno < par_hdr.count &&
1912 be32_to_cpu(par_node->btree[entno].hashval) < dead_hash; 2301 be32_to_cpu(btree[entno].hashval) < dead_hash;
1913 entno++) 2302 entno++)
1914 continue; 2303 continue;
1915 if (unlikely(entno == be16_to_cpu(par_node->hdr.count))) { 2304 if (entno == par_hdr.count) {
1916 XFS_ERROR_REPORT("xfs_da_swap_lastblock(5)", 2305 XFS_ERROR_REPORT("xfs_da_swap_lastblock(5)",
1917 XFS_ERRLEVEL_LOW, mp); 2306 XFS_ERRLEVEL_LOW, mp);
1918 error = XFS_ERROR(EFSCORRUPTED); 2307 error = XFS_ERROR(EFSCORRUPTED);
1919 goto done; 2308 goto done;
1920 } 2309 }
1921 par_blkno = be32_to_cpu(par_node->btree[entno].before); 2310 par_blkno = be32_to_cpu(btree[entno].before);
1922 if (level == dead_level + 1) 2311 if (level == dead_level + 1)
1923 break; 2312 break;
1924 xfs_trans_brelse(tp, par_buf); 2313 xfs_trans_brelse(tp, par_buf);
@@ -1930,13 +2319,13 @@ xfs_da_swap_lastblock(
1930 */ 2319 */
1931 for (;;) { 2320 for (;;) {
1932 for (; 2321 for (;
1933 entno < be16_to_cpu(par_node->hdr.count) && 2322 entno < par_hdr.count &&
1934 be32_to_cpu(par_node->btree[entno].before) != last_blkno; 2323 be32_to_cpu(btree[entno].before) != last_blkno;
1935 entno++) 2324 entno++)
1936 continue; 2325 continue;
1937 if (entno < be16_to_cpu(par_node->hdr.count)) 2326 if (entno < par_hdr.count)
1938 break; 2327 break;
1939 par_blkno = be32_to_cpu(par_node->hdr.info.forw); 2328 par_blkno = par_hdr.forw;
1940 xfs_trans_brelse(tp, par_buf); 2329 xfs_trans_brelse(tp, par_buf);
1941 par_buf = NULL; 2330 par_buf = NULL;
1942 if (unlikely(par_blkno == 0)) { 2331 if (unlikely(par_blkno == 0)) {
@@ -1945,27 +2334,27 @@ xfs_da_swap_lastblock(
1945 error = XFS_ERROR(EFSCORRUPTED); 2334 error = XFS_ERROR(EFSCORRUPTED);
1946 goto done; 2335 goto done;
1947 } 2336 }
1948 error = xfs_da_node_read(tp, ip, par_blkno, -1, &par_buf, w); 2337 error = xfs_da3_node_read(tp, ip, par_blkno, -1, &par_buf, w);
1949 if (error) 2338 if (error)
1950 goto done; 2339 goto done;
1951 par_node = par_buf->b_addr; 2340 par_node = par_buf->b_addr;
1952 if (unlikely( 2341 xfs_da3_node_hdr_from_disk(&par_hdr, par_node);
1953 be16_to_cpu(par_node->hdr.level) != level || 2342 if (par_hdr.level != level) {
1954 par_node->hdr.info.magic != cpu_to_be16(XFS_DA_NODE_MAGIC))) {
1955 XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)", 2343 XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)",
1956 XFS_ERRLEVEL_LOW, mp); 2344 XFS_ERRLEVEL_LOW, mp);
1957 error = XFS_ERROR(EFSCORRUPTED); 2345 error = XFS_ERROR(EFSCORRUPTED);
1958 goto done; 2346 goto done;
1959 } 2347 }
2348 btree = xfs_da3_node_tree_p(par_node);
1960 entno = 0; 2349 entno = 0;
1961 } 2350 }
1962 /* 2351 /*
1963 * Update the parent entry pointing to the moved block. 2352 * Update the parent entry pointing to the moved block.
1964 */ 2353 */
1965 par_node->btree[entno].before = cpu_to_be32(dead_blkno); 2354 btree[entno].before = cpu_to_be32(dead_blkno);
1966 xfs_trans_log_buf(tp, par_buf, 2355 xfs_trans_log_buf(tp, par_buf,
1967 XFS_DA_LOGRANGE(par_node, &par_node->btree[entno].before, 2356 XFS_DA_LOGRANGE(par_node, &btree[entno].before,
1968 sizeof(par_node->btree[entno].before))); 2357 sizeof(btree[entno].before)));
1969 *dead_blknop = last_blkno; 2358 *dead_blknop = last_blkno;
1970 *dead_bufp = last_buf; 2359 *dead_bufp = last_buf;
1971 return 0; 2360 return 0;
@@ -2007,14 +2396,15 @@ xfs_da_shrink_inode(
2007 * Remove extents. If we get ENOSPC for a dir we have to move 2396 * Remove extents. If we get ENOSPC for a dir we have to move
2008 * the last block to the place we want to kill. 2397 * the last block to the place we want to kill.
2009 */ 2398 */
2010 if ((error = xfs_bunmapi(tp, dp, dead_blkno, count, 2399 error = xfs_bunmapi(tp, dp, dead_blkno, count,
2011 xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA, 2400 xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA,
2012 0, args->firstblock, args->flist, 2401 0, args->firstblock, args->flist, &done);
2013 &done)) == ENOSPC) { 2402 if (error == ENOSPC) {
2014 if (w != XFS_DATA_FORK) 2403 if (w != XFS_DATA_FORK)
2015 break; 2404 break;
2016 if ((error = xfs_da_swap_lastblock(args, &dead_blkno, 2405 error = xfs_da3_swap_lastblock(args, &dead_blkno,
2017 &dead_buf))) 2406 &dead_buf);
2407 if (error)
2018 break; 2408 break;
2019 } else { 2409 } else {
2020 break; 2410 break;
@@ -2279,12 +2669,21 @@ xfs_da_read_buf(
2279 magic1 = be32_to_cpu(hdr->magic); 2669 magic1 = be32_to_cpu(hdr->magic);
2280 if (unlikely( 2670 if (unlikely(
2281 XFS_TEST_ERROR((magic != XFS_DA_NODE_MAGIC) && 2671 XFS_TEST_ERROR((magic != XFS_DA_NODE_MAGIC) &&
2672 (magic != XFS_DA3_NODE_MAGIC) &&
2282 (magic != XFS_ATTR_LEAF_MAGIC) && 2673 (magic != XFS_ATTR_LEAF_MAGIC) &&
2674 (magic != XFS_ATTR3_LEAF_MAGIC) &&
2283 (magic != XFS_DIR2_LEAF1_MAGIC) && 2675 (magic != XFS_DIR2_LEAF1_MAGIC) &&
2676 (magic != XFS_DIR3_LEAF1_MAGIC) &&
2284 (magic != XFS_DIR2_LEAFN_MAGIC) && 2677 (magic != XFS_DIR2_LEAFN_MAGIC) &&
2678 (magic != XFS_DIR3_LEAFN_MAGIC) &&
2285 (magic1 != XFS_DIR2_BLOCK_MAGIC) && 2679 (magic1 != XFS_DIR2_BLOCK_MAGIC) &&
2680 (magic1 != XFS_DIR3_BLOCK_MAGIC) &&
2286 (magic1 != XFS_DIR2_DATA_MAGIC) && 2681 (magic1 != XFS_DIR2_DATA_MAGIC) &&
2287 (free->hdr.magic != cpu_to_be32(XFS_DIR2_FREE_MAGIC)), 2682 (magic1 != XFS_DIR3_DATA_MAGIC) &&
2683 (free->hdr.magic !=
2684 cpu_to_be32(XFS_DIR2_FREE_MAGIC)) &&
2685 (free->hdr.magic !=
2686 cpu_to_be32(XFS_DIR3_FREE_MAGIC)),
2288 mp, XFS_ERRTAG_DA_READ_BUF, 2687 mp, XFS_ERRTAG_DA_READ_BUF,
2289 XFS_RANDOM_DA_READ_BUF))) { 2688 XFS_RANDOM_DA_READ_BUF))) {
2290 trace_xfs_da_btree_corrupt(bp, _RET_IP_); 2689 trace_xfs_da_btree_corrupt(bp, _RET_IP_);
@@ -2342,41 +2741,3 @@ out_free:
2342 return -1; 2741 return -1;
2343 return mappedbno; 2742 return mappedbno;
2344} 2743}
2345
2346kmem_zone_t *xfs_da_state_zone; /* anchor for state struct zone */
2347
2348/*
2349 * Allocate a dir-state structure.
2350 * We don't put them on the stack since they're large.
2351 */
2352xfs_da_state_t *
2353xfs_da_state_alloc(void)
2354{
2355 return kmem_zone_zalloc(xfs_da_state_zone, KM_NOFS);
2356}
2357
2358/*
2359 * Kill the altpath contents of a da-state structure.
2360 */
2361STATIC void
2362xfs_da_state_kill_altpath(xfs_da_state_t *state)
2363{
2364 int i;
2365
2366 for (i = 0; i < state->altpath.active; i++)
2367 state->altpath.blk[i].bp = NULL;
2368 state->altpath.active = 0;
2369}
2370
2371/*
2372 * Free a da-state structure.
2373 */
2374void
2375xfs_da_state_free(xfs_da_state_t *state)
2376{
2377 xfs_da_state_kill_altpath(state);
2378#ifdef DEBUG
2379 memset((char *)state, 0, sizeof(*state));
2380#endif /* DEBUG */
2381 kmem_zone_free(xfs_da_state_zone, state);
2382}
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index ee5170c46ae1..6fb3371c63cf 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (c) 2000,2002,2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000,2002,2005 Silicon Graphics, Inc.
3 * Copyright (c) 2013 Red Hat, Inc.
3 * All Rights Reserved. 4 * All Rights Reserved.
4 * 5 *
5 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
@@ -20,7 +21,6 @@
20 21
21struct xfs_bmap_free; 22struct xfs_bmap_free;
22struct xfs_inode; 23struct xfs_inode;
23struct xfs_mount;
24struct xfs_trans; 24struct xfs_trans;
25struct zone; 25struct zone;
26 26
@@ -47,6 +47,33 @@ typedef struct xfs_da_blkinfo {
47} xfs_da_blkinfo_t; 47} xfs_da_blkinfo_t;
48 48
49/* 49/*
50 * CRC enabled directory structure types
51 *
52 * The headers change size for the additional verification information, but
53 * otherwise the tree layouts and contents are unchanged. Hence the da btree
54 * code can use the struct xfs_da_blkinfo for manipulating the tree links and
55 * magic numbers without modification for both v2 and v3 nodes.
56 */
57#define XFS_DA3_NODE_MAGIC 0x3ebe /* magic number: non-leaf blocks */
58#define XFS_ATTR3_LEAF_MAGIC 0x3bee /* magic number: attribute leaf blks */
59#define XFS_DIR3_LEAF1_MAGIC 0x3df1 /* magic number: v2 dirlf single blks */
60#define XFS_DIR3_LEAFN_MAGIC 0x3dff /* magic number: v2 dirlf multi blks */
61
62struct xfs_da3_blkinfo {
63 /*
64 * the node link manipulation code relies on the fact that the first
65 * element of this structure is the struct xfs_da_blkinfo so it can
66 * ignore the differences in the rest of the structures.
67 */
68 struct xfs_da_blkinfo hdr;
69 __be32 crc; /* CRC of block */
70 __be64 blkno; /* first block of the buffer */
71 __be64 lsn; /* sequence number of last write */
72 uuid_t uuid; /* filesystem we belong to */
73 __be64 owner; /* inode that owns the block */
74};
75
76/*
50 * This is the structure of the root and intermediate nodes in the Btree. 77 * This is the structure of the root and intermediate nodes in the Btree.
51 * The leaf nodes are defined above. 78 * The leaf nodes are defined above.
52 * 79 *
@@ -57,19 +84,76 @@ typedef struct xfs_da_blkinfo {
57 */ 84 */
58#define XFS_DA_NODE_MAXDEPTH 5 /* max depth of Btree */ 85#define XFS_DA_NODE_MAXDEPTH 5 /* max depth of Btree */
59 86
87typedef struct xfs_da_node_hdr {
88 struct xfs_da_blkinfo info; /* block type, links, etc. */
89 __be16 __count; /* count of active entries */
90 __be16 __level; /* level above leaves (leaf == 0) */
91} xfs_da_node_hdr_t;
92
93struct xfs_da3_node_hdr {
94 struct xfs_da3_blkinfo info; /* block type, links, etc. */
95 __be16 __count; /* count of active entries */
96 __be16 __level; /* level above leaves (leaf == 0) */
97 __be32 __pad32;
98};
99
100#define XFS_DA3_NODE_CRC_OFF (offsetof(struct xfs_da3_node_hdr, info.crc))
101
102typedef struct xfs_da_node_entry {
103 __be32 hashval; /* hash value for this descendant */
104 __be32 before; /* Btree block before this key */
105} xfs_da_node_entry_t;
106
60typedef struct xfs_da_intnode { 107typedef struct xfs_da_intnode {
61 struct xfs_da_node_hdr { /* constant-structure header block */ 108 struct xfs_da_node_hdr hdr;
62 xfs_da_blkinfo_t info; /* block type, links, etc. */ 109 struct xfs_da_node_entry __btree[];
63 __be16 count; /* count of active entries */
64 __be16 level; /* level above leaves (leaf == 0) */
65 } hdr;
66 struct xfs_da_node_entry {
67 __be32 hashval; /* hash value for this descendant */
68 __be32 before; /* Btree block before this key */
69 } btree[1]; /* variable sized array of keys */
70} xfs_da_intnode_t; 110} xfs_da_intnode_t;
71typedef struct xfs_da_node_hdr xfs_da_node_hdr_t; 111
72typedef struct xfs_da_node_entry xfs_da_node_entry_t; 112struct xfs_da3_intnode {
113 struct xfs_da3_node_hdr hdr;
114 struct xfs_da_node_entry __btree[];
115};
116
117/*
118 * In-core version of the node header to abstract the differences in the v2 and
119 * v3 disk format of the headers. Callers need to convert to/from disk format as
120 * appropriate.
121 */
122struct xfs_da3_icnode_hdr {
123 __uint32_t forw;
124 __uint32_t back;
125 __uint16_t magic;
126 __uint16_t count;
127 __uint16_t level;
128};
129
130extern void xfs_da3_node_hdr_from_disk(struct xfs_da3_icnode_hdr *to,
131 struct xfs_da_intnode *from);
132extern void xfs_da3_node_hdr_to_disk(struct xfs_da_intnode *to,
133 struct xfs_da3_icnode_hdr *from);
134
135static inline int
136xfs_da3_node_hdr_size(struct xfs_da_intnode *dap)
137{
138 if (dap->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC))
139 return sizeof(struct xfs_da3_node_hdr);
140 return sizeof(struct xfs_da_node_hdr);
141}
142
143static inline struct xfs_da_node_entry *
144xfs_da3_node_tree_p(struct xfs_da_intnode *dap)
145{
146 if (dap->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) {
147 struct xfs_da3_intnode *dap3 = (struct xfs_da3_intnode *)dap;
148 return dap3->__btree;
149 }
150 return dap->__btree;
151}
152
153extern void xfs_da3_intnode_from_disk(struct xfs_da3_icnode_hdr *to,
154 struct xfs_da_intnode *from);
155extern void xfs_da3_intnode_to_disk(struct xfs_da_intnode *to,
156 struct xfs_da3_icnode_hdr *from);
73 157
74#define XFS_LBSIZE(mp) (mp)->m_sb.sb_blocksize 158#define XFS_LBSIZE(mp) (mp)->m_sb.sb_blocksize
75 159
@@ -191,32 +275,34 @@ struct xfs_nameops {
191/* 275/*
192 * Routines used for growing the Btree. 276 * Routines used for growing the Btree.
193 */ 277 */
194int xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level, 278int xfs_da3_node_create(struct xfs_da_args *args, xfs_dablk_t blkno,
195 struct xfs_buf **bpp, int whichfork); 279 int level, struct xfs_buf **bpp, int whichfork);
196int xfs_da_split(xfs_da_state_t *state); 280int xfs_da3_split(xfs_da_state_t *state);
197 281
198/* 282/*
199 * Routines used for shrinking the Btree. 283 * Routines used for shrinking the Btree.
200 */ 284 */
201int xfs_da_join(xfs_da_state_t *state); 285int xfs_da3_join(xfs_da_state_t *state);
202void xfs_da_fixhashpath(xfs_da_state_t *state, 286void xfs_da3_fixhashpath(struct xfs_da_state *state,
203 xfs_da_state_path_t *path_to_to_fix); 287 struct xfs_da_state_path *path_to_to_fix);
204 288
205/* 289/*
206 * Routines used for finding things in the Btree. 290 * Routines used for finding things in the Btree.
207 */ 291 */
208int xfs_da_node_lookup_int(xfs_da_state_t *state, int *result); 292int xfs_da3_node_lookup_int(xfs_da_state_t *state, int *result);
209int xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path, 293int xfs_da3_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
210 int forward, int release, int *result); 294 int forward, int release, int *result);
211/* 295/*
212 * Utility routines. 296 * Utility routines.
213 */ 297 */
214int xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk, 298int xfs_da3_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
215 xfs_da_state_blk_t *new_blk); 299 xfs_da_state_blk_t *new_blk);
216int xfs_da_node_read(struct xfs_trans *tp, struct xfs_inode *dp, 300int xfs_da3_node_read(struct xfs_trans *tp, struct xfs_inode *dp,
217 xfs_dablk_t bno, xfs_daddr_t mappedbno, 301 xfs_dablk_t bno, xfs_daddr_t mappedbno,
218 struct xfs_buf **bpp, int which_fork); 302 struct xfs_buf **bpp, int which_fork);
219 303
304extern const struct xfs_buf_ops xfs_da3_node_buf_ops;
305
220/* 306/*
221 * Utility routines. 307 * Utility routines.
222 */ 308 */
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h
index 1d9643b3dce6..f7a0e95d197a 100644
--- a/fs/xfs/xfs_dinode.h
+++ b/fs/xfs/xfs_dinode.h
@@ -19,7 +19,7 @@
19#define __XFS_DINODE_H__ 19#define __XFS_DINODE_H__
20 20
21#define XFS_DINODE_MAGIC 0x494e /* 'IN' */ 21#define XFS_DINODE_MAGIC 0x494e /* 'IN' */
22#define XFS_DINODE_GOOD_VERSION(v) (((v) == 1 || (v) == 2)) 22#define XFS_DINODE_GOOD_VERSION(v) ((v) >= 1 && (v) <= 3)
23 23
24typedef struct xfs_timestamp { 24typedef struct xfs_timestamp {
25 __be32 t_sec; /* timestamp seconds */ 25 __be32 t_sec; /* timestamp seconds */
@@ -70,11 +70,36 @@ typedef struct xfs_dinode {
70 70
71 /* di_next_unlinked is the only non-core field in the old dinode */ 71 /* di_next_unlinked is the only non-core field in the old dinode */
72 __be32 di_next_unlinked;/* agi unlinked list ptr */ 72 __be32 di_next_unlinked;/* agi unlinked list ptr */
73} __attribute__((packed)) xfs_dinode_t; 73
74 /* start of the extended dinode, writable fields */
75 __le32 di_crc; /* CRC of the inode */
76 __be64 di_changecount; /* number of attribute changes */
77 __be64 di_lsn; /* flush sequence */
78 __be64 di_flags2; /* more random flags */
79 __u8 di_pad2[16]; /* more padding for future expansion */
80
81 /* fields only written to during inode creation */
82 xfs_timestamp_t di_crtime; /* time created */
83 __be64 di_ino; /* inode number */
84 uuid_t di_uuid; /* UUID of the filesystem */
85
86 /* structure must be padded to 64 bit alignment */
87} xfs_dinode_t;
74 88
75#define DI_MAX_FLUSH 0xffff 89#define DI_MAX_FLUSH 0xffff
76 90
77/* 91/*
92 * Size of the core inode on disk. Version 1 and 2 inodes have
93 * the same size, but version 3 has grown a few additional fields.
94 */
95static inline uint xfs_dinode_size(int version)
96{
97 if (version == 3)
98 return sizeof(struct xfs_dinode);
99 return offsetof(struct xfs_dinode, di_crc);
100}
101
102/*
78 * The 32 bit link count in the inode theoretically maxes out at UINT_MAX. 103 * The 32 bit link count in the inode theoretically maxes out at UINT_MAX.
79 * Since the pathconf interface is signed, we use 2^31 - 1 instead. 104 * Since the pathconf interface is signed, we use 2^31 - 1 instead.
80 * The old inode format had a 16 bit link count, so its maximum is USHRT_MAX. 105 * The old inode format had a 16 bit link count, so its maximum is USHRT_MAX.
@@ -104,11 +129,11 @@ typedef enum xfs_dinode_fmt {
104/* 129/*
105 * Inode size for given fs. 130 * Inode size for given fs.
106 */ 131 */
107#define XFS_LITINO(mp) \ 132#define XFS_LITINO(mp, version) \
108 ((int)(((mp)->m_sb.sb_inodesize) - sizeof(struct xfs_dinode))) 133 ((int)(((mp)->m_sb.sb_inodesize) - xfs_dinode_size(version)))
109 134
110#define XFS_BROOT_SIZE_ADJ \ 135#define XFS_BROOT_SIZE_ADJ(ip) \
111 (XFS_BTREE_LBLOCK_LEN - sizeof(xfs_bmdr_block_t)) 136 (XFS_BMBT_BLOCK_LEN((ip)->i_mount) - sizeof(xfs_bmdr_block_t))
112 137
113/* 138/*
114 * Inode data & attribute fork sizes, per inode. 139 * Inode data & attribute fork sizes, per inode.
@@ -119,10 +144,10 @@ typedef enum xfs_dinode_fmt {
119#define XFS_DFORK_DSIZE(dip,mp) \ 144#define XFS_DFORK_DSIZE(dip,mp) \
120 (XFS_DFORK_Q(dip) ? \ 145 (XFS_DFORK_Q(dip) ? \
121 XFS_DFORK_BOFF(dip) : \ 146 XFS_DFORK_BOFF(dip) : \
122 XFS_LITINO(mp)) 147 XFS_LITINO(mp, (dip)->di_version))
123#define XFS_DFORK_ASIZE(dip,mp) \ 148#define XFS_DFORK_ASIZE(dip,mp) \
124 (XFS_DFORK_Q(dip) ? \ 149 (XFS_DFORK_Q(dip) ? \
125 XFS_LITINO(mp) - XFS_DFORK_BOFF(dip) : \ 150 XFS_LITINO(mp, (dip)->di_version) - XFS_DFORK_BOFF(dip) : \
126 0) 151 0)
127#define XFS_DFORK_SIZE(dip,mp,w) \ 152#define XFS_DFORK_SIZE(dip,mp,w) \
128 ((w) == XFS_DATA_FORK ? \ 153 ((w) == XFS_DATA_FORK ? \
@@ -133,7 +158,7 @@ typedef enum xfs_dinode_fmt {
133 * Return pointers to the data or attribute forks. 158 * Return pointers to the data or attribute forks.
134 */ 159 */
135#define XFS_DFORK_DPTR(dip) \ 160#define XFS_DFORK_DPTR(dip) \
136 ((char *)(dip) + sizeof(struct xfs_dinode)) 161 ((char *)dip + xfs_dinode_size(dip->di_version))
137#define XFS_DFORK_APTR(dip) \ 162#define XFS_DFORK_APTR(dip) \
138 (XFS_DFORK_DPTR(dip) + XFS_DFORK_BOFF(dip)) 163 (XFS_DFORK_DPTR(dip) + XFS_DFORK_BOFF(dip))
139#define XFS_DFORK_PTR(dip,w) \ 164#define XFS_DFORK_PTR(dip,w) \
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 12afe07a91d7..e59f5fc816fe 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
3 * Copyright (c) 2013 Red Hat, Inc.
3 * All Rights Reserved. 4 * All Rights Reserved.
4 * 5 *
5 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
@@ -28,11 +29,13 @@
28#include "xfs_dinode.h" 29#include "xfs_dinode.h"
29#include "xfs_inode.h" 30#include "xfs_inode.h"
30#include "xfs_inode_item.h" 31#include "xfs_inode_item.h"
32#include "xfs_buf_item.h"
31#include "xfs_dir2.h" 33#include "xfs_dir2.h"
32#include "xfs_dir2_format.h" 34#include "xfs_dir2_format.h"
33#include "xfs_dir2_priv.h" 35#include "xfs_dir2_priv.h"
34#include "xfs_error.h" 36#include "xfs_error.h"
35#include "xfs_trace.h" 37#include "xfs_trace.h"
38#include "xfs_cksum.h"
36 39
37/* 40/*
38 * Local function prototypes. 41 * Local function prototypes.
@@ -56,52 +59,110 @@ xfs_dir_startup(void)
56 xfs_dir_hash_dotdot = xfs_da_hashname((unsigned char *)"..", 2); 59 xfs_dir_hash_dotdot = xfs_da_hashname((unsigned char *)"..", 2);
57} 60}
58 61
59static void 62static bool
60xfs_dir2_block_verify( 63xfs_dir3_block_verify(
61 struct xfs_buf *bp) 64 struct xfs_buf *bp)
62{ 65{
63 struct xfs_mount *mp = bp->b_target->bt_mount; 66 struct xfs_mount *mp = bp->b_target->bt_mount;
64 struct xfs_dir2_data_hdr *hdr = bp->b_addr; 67 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
65 int block_ok = 0; 68
66 69 if (xfs_sb_version_hascrc(&mp->m_sb)) {
67 block_ok = hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC); 70 if (hdr3->magic != cpu_to_be32(XFS_DIR3_BLOCK_MAGIC))
68 block_ok = block_ok && __xfs_dir2_data_check(NULL, bp) == 0; 71 return false;
69 72 if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_uuid))
70 if (!block_ok) { 73 return false;
71 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr); 74 if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
72 xfs_buf_ioerror(bp, EFSCORRUPTED); 75 return false;
76 } else {
77 if (hdr3->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC))
78 return false;
73 } 79 }
80 if (__xfs_dir3_data_check(NULL, bp))
81 return false;
82 return true;
74} 83}
75 84
76static void 85static void
77xfs_dir2_block_read_verify( 86xfs_dir3_block_read_verify(
78 struct xfs_buf *bp) 87 struct xfs_buf *bp)
79{ 88{
80 xfs_dir2_block_verify(bp); 89 struct xfs_mount *mp = bp->b_target->bt_mount;
90
91 if ((xfs_sb_version_hascrc(&mp->m_sb) &&
92 !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
93 XFS_DIR3_DATA_CRC_OFF)) ||
94 !xfs_dir3_block_verify(bp)) {
95 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
96 xfs_buf_ioerror(bp, EFSCORRUPTED);
97 }
81} 98}
82 99
83static void 100static void
84xfs_dir2_block_write_verify( 101xfs_dir3_block_write_verify(
85 struct xfs_buf *bp) 102 struct xfs_buf *bp)
86{ 103{
87 xfs_dir2_block_verify(bp); 104 struct xfs_mount *mp = bp->b_target->bt_mount;
105 struct xfs_buf_log_item *bip = bp->b_fspriv;
106 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
107
108 if (!xfs_dir3_block_verify(bp)) {
109 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
110 xfs_buf_ioerror(bp, EFSCORRUPTED);
111 return;
112 }
113
114 if (!xfs_sb_version_hascrc(&mp->m_sb))
115 return;
116
117 if (bip)
118 hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn);
119
120 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_DATA_CRC_OFF);
88} 121}
89 122
90const struct xfs_buf_ops xfs_dir2_block_buf_ops = { 123const struct xfs_buf_ops xfs_dir3_block_buf_ops = {
91 .verify_read = xfs_dir2_block_read_verify, 124 .verify_read = xfs_dir3_block_read_verify,
92 .verify_write = xfs_dir2_block_write_verify, 125 .verify_write = xfs_dir3_block_write_verify,
93}; 126};
94 127
95static int 128static int
96xfs_dir2_block_read( 129xfs_dir3_block_read(
97 struct xfs_trans *tp, 130 struct xfs_trans *tp,
98 struct xfs_inode *dp, 131 struct xfs_inode *dp,
99 struct xfs_buf **bpp) 132 struct xfs_buf **bpp)
100{ 133{
101 struct xfs_mount *mp = dp->i_mount; 134 struct xfs_mount *mp = dp->i_mount;
135 int err;
102 136
103 return xfs_da_read_buf(tp, dp, mp->m_dirdatablk, -1, bpp, 137 err = xfs_da_read_buf(tp, dp, mp->m_dirdatablk, -1, bpp,
104 XFS_DATA_FORK, &xfs_dir2_block_buf_ops); 138 XFS_DATA_FORK, &xfs_dir3_block_buf_ops);
139 if (!err && tp)
140 xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_BLOCK_BUF);
141 return err;
142}
143
144static void
145xfs_dir3_block_init(
146 struct xfs_mount *mp,
147 struct xfs_trans *tp,
148 struct xfs_buf *bp,
149 struct xfs_inode *dp)
150{
151 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
152
153 bp->b_ops = &xfs_dir3_block_buf_ops;
154 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_BLOCK_BUF);
155
156 if (xfs_sb_version_hascrc(&mp->m_sb)) {
157 memset(hdr3, 0, sizeof(*hdr3));
158 hdr3->magic = cpu_to_be32(XFS_DIR3_BLOCK_MAGIC);
159 hdr3->blkno = cpu_to_be64(bp->b_bn);
160 hdr3->owner = cpu_to_be64(dp->i_ino);
161 uuid_copy(&hdr3->uuid, &mp->m_sb.sb_uuid);
162 return;
163
164 }
165 hdr3->magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC);
105} 166}
106 167
107static void 168static void
@@ -121,7 +182,7 @@ xfs_dir2_block_need_space(
121 struct xfs_dir2_data_unused *enddup = NULL; 182 struct xfs_dir2_data_unused *enddup = NULL;
122 183
123 *compact = 0; 184 *compact = 0;
124 bf = hdr->bestfree; 185 bf = xfs_dir3_data_bestfree_p(hdr);
125 186
126 /* 187 /*
127 * If there are stale entries we'll use one for the leaf. 188 * If there are stale entries we'll use one for the leaf.
@@ -303,7 +364,7 @@ xfs_dir2_block_addname(
303 mp = dp->i_mount; 364 mp = dp->i_mount;
304 365
305 /* Read the (one and only) directory block into bp. */ 366 /* Read the (one and only) directory block into bp. */
306 error = xfs_dir2_block_read(tp, dp, &bp); 367 error = xfs_dir3_block_read(tp, dp, &bp);
307 if (error) 368 if (error)
308 return error; 369 return error;
309 370
@@ -498,7 +559,7 @@ xfs_dir2_block_addname(
498 xfs_dir2_data_log_header(tp, bp); 559 xfs_dir2_data_log_header(tp, bp);
499 xfs_dir2_block_log_tail(tp, bp); 560 xfs_dir2_block_log_tail(tp, bp);
500 xfs_dir2_data_log_entry(tp, bp, dep); 561 xfs_dir2_data_log_entry(tp, bp, dep);
501 xfs_dir2_data_check(dp, bp); 562 xfs_dir3_data_check(dp, bp);
502 return 0; 563 return 0;
503} 564}
504 565
@@ -531,7 +592,7 @@ xfs_dir2_block_getdents(
531 if (xfs_dir2_dataptr_to_db(mp, *offset) > mp->m_dirdatablk) 592 if (xfs_dir2_dataptr_to_db(mp, *offset) > mp->m_dirdatablk)
532 return 0; 593 return 0;
533 594
534 error = xfs_dir2_block_read(NULL, dp, &bp); 595 error = xfs_dir3_block_read(NULL, dp, &bp);
535 if (error) 596 if (error)
536 return error; 597 return error;
537 598
@@ -541,12 +602,12 @@ xfs_dir2_block_getdents(
541 */ 602 */
542 wantoff = xfs_dir2_dataptr_to_off(mp, *offset); 603 wantoff = xfs_dir2_dataptr_to_off(mp, *offset);
543 hdr = bp->b_addr; 604 hdr = bp->b_addr;
544 xfs_dir2_data_check(dp, bp); 605 xfs_dir3_data_check(dp, bp);
545 /* 606 /*
546 * Set up values for the loop. 607 * Set up values for the loop.
547 */ 608 */
548 btp = xfs_dir2_block_tail_p(mp, hdr); 609 btp = xfs_dir2_block_tail_p(mp, hdr);
549 ptr = (char *)(hdr + 1); 610 ptr = (char *)xfs_dir3_data_entry_p(hdr);
550 endptr = (char *)xfs_dir2_block_leaf_p(btp); 611 endptr = (char *)xfs_dir2_block_leaf_p(btp);
551 612
552 /* 613 /*
@@ -665,7 +726,7 @@ xfs_dir2_block_lookup(
665 dp = args->dp; 726 dp = args->dp;
666 mp = dp->i_mount; 727 mp = dp->i_mount;
667 hdr = bp->b_addr; 728 hdr = bp->b_addr;
668 xfs_dir2_data_check(dp, bp); 729 xfs_dir3_data_check(dp, bp);
669 btp = xfs_dir2_block_tail_p(mp, hdr); 730 btp = xfs_dir2_block_tail_p(mp, hdr);
670 blp = xfs_dir2_block_leaf_p(btp); 731 blp = xfs_dir2_block_leaf_p(btp);
671 /* 732 /*
@@ -711,12 +772,12 @@ xfs_dir2_block_lookup_int(
711 tp = args->trans; 772 tp = args->trans;
712 mp = dp->i_mount; 773 mp = dp->i_mount;
713 774
714 error = xfs_dir2_block_read(tp, dp, &bp); 775 error = xfs_dir3_block_read(tp, dp, &bp);
715 if (error) 776 if (error)
716 return error; 777 return error;
717 778
718 hdr = bp->b_addr; 779 hdr = bp->b_addr;
719 xfs_dir2_data_check(dp, bp); 780 xfs_dir3_data_check(dp, bp);
720 btp = xfs_dir2_block_tail_p(mp, hdr); 781 btp = xfs_dir2_block_tail_p(mp, hdr);
721 blp = xfs_dir2_block_leaf_p(btp); 782 blp = xfs_dir2_block_leaf_p(btp);
722 /* 783 /*
@@ -853,7 +914,7 @@ xfs_dir2_block_removename(
853 xfs_dir2_data_freescan(mp, hdr, &needlog); 914 xfs_dir2_data_freescan(mp, hdr, &needlog);
854 if (needlog) 915 if (needlog)
855 xfs_dir2_data_log_header(tp, bp); 916 xfs_dir2_data_log_header(tp, bp);
856 xfs_dir2_data_check(dp, bp); 917 xfs_dir3_data_check(dp, bp);
857 /* 918 /*
858 * See if the size as a shortform is good enough. 919 * See if the size as a shortform is good enough.
859 */ 920 */
@@ -910,7 +971,7 @@ xfs_dir2_block_replace(
910 */ 971 */
911 dep->inumber = cpu_to_be64(args->inumber); 972 dep->inumber = cpu_to_be64(args->inumber);
912 xfs_dir2_data_log_entry(args->trans, bp, dep); 973 xfs_dir2_data_log_entry(args->trans, bp, dep);
913 xfs_dir2_data_check(dp, bp); 974 xfs_dir3_data_check(dp, bp);
914 return 0; 975 return 0;
915} 976}
916 977
@@ -958,6 +1019,8 @@ xfs_dir2_leaf_to_block(
958 __be16 *tagp; /* end of entry (tag) */ 1019 __be16 *tagp; /* end of entry (tag) */
959 int to; /* block/leaf to index */ 1020 int to; /* block/leaf to index */
960 xfs_trans_t *tp; /* transaction pointer */ 1021 xfs_trans_t *tp; /* transaction pointer */
1022 struct xfs_dir2_leaf_entry *ents;
1023 struct xfs_dir3_icleaf_hdr leafhdr;
961 1024
962 trace_xfs_dir2_leaf_to_block(args); 1025 trace_xfs_dir2_leaf_to_block(args);
963 1026
@@ -965,8 +1028,12 @@ xfs_dir2_leaf_to_block(
965 tp = args->trans; 1028 tp = args->trans;
966 mp = dp->i_mount; 1029 mp = dp->i_mount;
967 leaf = lbp->b_addr; 1030 leaf = lbp->b_addr;
968 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC)); 1031 xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
1032 ents = xfs_dir3_leaf_ents_p(leaf);
969 ltp = xfs_dir2_leaf_tail_p(mp, leaf); 1033 ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1034
1035 ASSERT(leafhdr.magic == XFS_DIR2_LEAF1_MAGIC ||
1036 leafhdr.magic == XFS_DIR3_LEAF1_MAGIC);
970 /* 1037 /*
971 * If there are data blocks other than the first one, take this 1038 * If there are data blocks other than the first one, take this
972 * opportunity to remove trailing empty data blocks that may have 1039 * opportunity to remove trailing empty data blocks that may have
@@ -974,9 +1041,12 @@ xfs_dir2_leaf_to_block(
974 * These will show up in the leaf bests table. 1041 * These will show up in the leaf bests table.
975 */ 1042 */
976 while (dp->i_d.di_size > mp->m_dirblksize) { 1043 while (dp->i_d.di_size > mp->m_dirblksize) {
1044 int hdrsz;
1045
1046 hdrsz = xfs_dir3_data_hdr_size(xfs_sb_version_hascrc(&mp->m_sb));
977 bestsp = xfs_dir2_leaf_bests_p(ltp); 1047 bestsp = xfs_dir2_leaf_bests_p(ltp);
978 if (be16_to_cpu(bestsp[be32_to_cpu(ltp->bestcount) - 1]) == 1048 if (be16_to_cpu(bestsp[be32_to_cpu(ltp->bestcount) - 1]) ==
979 mp->m_dirblksize - (uint)sizeof(*hdr)) { 1049 mp->m_dirblksize - hdrsz) {
980 if ((error = 1050 if ((error =
981 xfs_dir2_leaf_trim_data(args, lbp, 1051 xfs_dir2_leaf_trim_data(args, lbp,
982 (xfs_dir2_db_t)(be32_to_cpu(ltp->bestcount) - 1)))) 1052 (xfs_dir2_db_t)(be32_to_cpu(ltp->bestcount) - 1))))
@@ -988,17 +1058,19 @@ xfs_dir2_leaf_to_block(
988 * Read the data block if we don't already have it, give up if it fails. 1058 * Read the data block if we don't already have it, give up if it fails.
989 */ 1059 */
990 if (!dbp) { 1060 if (!dbp) {
991 error = xfs_dir2_data_read(tp, dp, mp->m_dirdatablk, -1, &dbp); 1061 error = xfs_dir3_data_read(tp, dp, mp->m_dirdatablk, -1, &dbp);
992 if (error) 1062 if (error)
993 return error; 1063 return error;
994 } 1064 }
995 hdr = dbp->b_addr; 1065 hdr = dbp->b_addr;
996 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC)); 1066 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
1067 hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC));
1068
997 /* 1069 /*
998 * Size of the "leaf" area in the block. 1070 * Size of the "leaf" area in the block.
999 */ 1071 */
1000 size = (uint)sizeof(xfs_dir2_block_tail_t) + 1072 size = (uint)sizeof(xfs_dir2_block_tail_t) +
1001 (uint)sizeof(*lep) * (be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale)); 1073 (uint)sizeof(*lep) * (leafhdr.count - leafhdr.stale);
1002 /* 1074 /*
1003 * Look at the last data entry. 1075 * Look at the last data entry.
1004 */ 1076 */
@@ -1014,8 +1086,8 @@ xfs_dir2_leaf_to_block(
1014 /* 1086 /*
1015 * Start converting it to block form. 1087 * Start converting it to block form.
1016 */ 1088 */
1017 dbp->b_ops = &xfs_dir2_block_buf_ops; 1089 xfs_dir3_block_init(mp, tp, dbp, dp);
1018 hdr->magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC); 1090
1019 needlog = 1; 1091 needlog = 1;
1020 needscan = 0; 1092 needscan = 0;
1021 /* 1093 /*
@@ -1027,18 +1099,17 @@ xfs_dir2_leaf_to_block(
1027 * Initialize the block tail. 1099 * Initialize the block tail.
1028 */ 1100 */
1029 btp = xfs_dir2_block_tail_p(mp, hdr); 1101 btp = xfs_dir2_block_tail_p(mp, hdr);
1030 btp->count = cpu_to_be32(be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale)); 1102 btp->count = cpu_to_be32(leafhdr.count - leafhdr.stale);
1031 btp->stale = 0; 1103 btp->stale = 0;
1032 xfs_dir2_block_log_tail(tp, dbp); 1104 xfs_dir2_block_log_tail(tp, dbp);
1033 /* 1105 /*
1034 * Initialize the block leaf area. We compact out stale entries. 1106 * Initialize the block leaf area. We compact out stale entries.
1035 */ 1107 */
1036 lep = xfs_dir2_block_leaf_p(btp); 1108 lep = xfs_dir2_block_leaf_p(btp);
1037 for (from = to = 0; from < be16_to_cpu(leaf->hdr.count); from++) { 1109 for (from = to = 0; from < leafhdr.count; from++) {
1038 if (leaf->ents[from].address == 1110 if (ents[from].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
1039 cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
1040 continue; 1111 continue;
1041 lep[to++] = leaf->ents[from]; 1112 lep[to++] = ents[from];
1042 } 1113 }
1043 ASSERT(to == be32_to_cpu(btp->count)); 1114 ASSERT(to == be32_to_cpu(btp->count));
1044 xfs_dir2_block_log_leaf(tp, dbp, 0, be32_to_cpu(btp->count) - 1); 1115 xfs_dir2_block_log_leaf(tp, dbp, 0, be32_to_cpu(btp->count) - 1);
@@ -1137,16 +1208,16 @@ xfs_dir2_sf_to_block(
1137 return error; 1208 return error;
1138 } 1209 }
1139 /* 1210 /*
1140 * Initialize the data block. 1211 * Initialize the data block, then convert it to block format.
1141 */ 1212 */
1142 error = xfs_dir2_data_init(args, blkno, &bp); 1213 error = xfs_dir3_data_init(args, blkno, &bp);
1143 if (error) { 1214 if (error) {
1144 kmem_free(sfp); 1215 kmem_free(sfp);
1145 return error; 1216 return error;
1146 } 1217 }
1147 bp->b_ops = &xfs_dir2_block_buf_ops; 1218 xfs_dir3_block_init(mp, tp, bp, dp);
1148 hdr = bp->b_addr; 1219 hdr = bp->b_addr;
1149 hdr->magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC); 1220
1150 /* 1221 /*
1151 * Compute size of block "tail" area. 1222 * Compute size of block "tail" area.
1152 */ 1223 */
@@ -1156,7 +1227,7 @@ xfs_dir2_sf_to_block(
1156 * The whole thing is initialized to free by the init routine. 1227 * The whole thing is initialized to free by the init routine.
1157 * Say we're using the leaf and tail area. 1228 * Say we're using the leaf and tail area.
1158 */ 1229 */
1159 dup = (xfs_dir2_data_unused_t *)(hdr + 1); 1230 dup = xfs_dir3_data_unused_p(hdr);
1160 needlog = needscan = 0; 1231 needlog = needscan = 0;
1161 xfs_dir2_data_use_free(tp, bp, dup, mp->m_dirblksize - i, i, &needlog, 1232 xfs_dir2_data_use_free(tp, bp, dup, mp->m_dirblksize - i, i, &needlog,
1162 &needscan); 1233 &needscan);
@@ -1178,8 +1249,7 @@ xfs_dir2_sf_to_block(
1178 /* 1249 /*
1179 * Create entry for . 1250 * Create entry for .
1180 */ 1251 */
1181 dep = (xfs_dir2_data_entry_t *) 1252 dep = xfs_dir3_data_dot_entry_p(hdr);
1182 ((char *)hdr + XFS_DIR2_DATA_DOT_OFFSET);
1183 dep->inumber = cpu_to_be64(dp->i_ino); 1253 dep->inumber = cpu_to_be64(dp->i_ino);
1184 dep->namelen = 1; 1254 dep->namelen = 1;
1185 dep->name[0] = '.'; 1255 dep->name[0] = '.';
@@ -1192,8 +1262,7 @@ xfs_dir2_sf_to_block(
1192 /* 1262 /*
1193 * Create entry for .. 1263 * Create entry for ..
1194 */ 1264 */
1195 dep = (xfs_dir2_data_entry_t *) 1265 dep = xfs_dir3_data_dotdot_entry_p(hdr);
1196 ((char *)hdr + XFS_DIR2_DATA_DOTDOT_OFFSET);
1197 dep->inumber = cpu_to_be64(xfs_dir2_sf_get_parent_ino(sfp)); 1266 dep->inumber = cpu_to_be64(xfs_dir2_sf_get_parent_ino(sfp));
1198 dep->namelen = 2; 1267 dep->namelen = 2;
1199 dep->name[0] = dep->name[1] = '.'; 1268 dep->name[0] = dep->name[1] = '.';
@@ -1203,7 +1272,7 @@ xfs_dir2_sf_to_block(
1203 blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot); 1272 blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot);
1204 blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, 1273 blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
1205 (char *)dep - (char *)hdr)); 1274 (char *)dep - (char *)hdr));
1206 offset = XFS_DIR2_DATA_FIRST_OFFSET; 1275 offset = xfs_dir3_data_first_offset(hdr);
1207 /* 1276 /*
1208 * Loop over existing entries, stuff them in. 1277 * Loop over existing entries, stuff them in.
1209 */ 1278 */
@@ -1273,6 +1342,6 @@ xfs_dir2_sf_to_block(
1273 ASSERT(needscan == 0); 1342 ASSERT(needscan == 0);
1274 xfs_dir2_block_log_leaf(tp, bp, 0, be32_to_cpu(btp->count) - 1); 1343 xfs_dir2_block_log_leaf(tp, bp, 0, be32_to_cpu(btp->count) - 1);
1275 xfs_dir2_block_log_tail(tp, bp); 1344 xfs_dir2_block_log_tail(tp, bp);
1276 xfs_dir2_data_check(dp, bp); 1345 xfs_dir3_data_check(dp, bp);
1277 return 0; 1346 return 0;
1278} 1347}
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index ffcf1774152e..c2930238005c 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3 * Copyright (c) 2013 Red Hat, Inc.
3 * All Rights Reserved. 4 * All Rights Reserved.
4 * 5 *
5 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
@@ -30,6 +31,8 @@
30#include "xfs_dir2_format.h" 31#include "xfs_dir2_format.h"
31#include "xfs_dir2_priv.h" 32#include "xfs_dir2_priv.h"
32#include "xfs_error.h" 33#include "xfs_error.h"
34#include "xfs_buf_item.h"
35#include "xfs_cksum.h"
33 36
34STATIC xfs_dir2_data_free_t * 37STATIC xfs_dir2_data_free_t *
35xfs_dir2_data_freefind(xfs_dir2_data_hdr_t *hdr, xfs_dir2_data_unused_t *dup); 38xfs_dir2_data_freefind(xfs_dir2_data_hdr_t *hdr, xfs_dir2_data_unused_t *dup);
@@ -40,7 +43,7 @@ xfs_dir2_data_freefind(xfs_dir2_data_hdr_t *hdr, xfs_dir2_data_unused_t *dup);
40 * Return 0 is the buffer is good, otherwise an error. 43 * Return 0 is the buffer is good, otherwise an error.
41 */ 44 */
42int 45int
43__xfs_dir2_data_check( 46__xfs_dir3_data_check(
44 struct xfs_inode *dp, /* incore inode pointer */ 47 struct xfs_inode *dp, /* incore inode pointer */
45 struct xfs_buf *bp) /* data block's buffer */ 48 struct xfs_buf *bp) /* data block's buffer */
46{ 49{
@@ -65,15 +68,17 @@ __xfs_dir2_data_check(
65 68
66 mp = bp->b_target->bt_mount; 69 mp = bp->b_target->bt_mount;
67 hdr = bp->b_addr; 70 hdr = bp->b_addr;
68 bf = hdr->bestfree; 71 bf = xfs_dir3_data_bestfree_p(hdr);
69 p = (char *)(hdr + 1); 72 p = (char *)xfs_dir3_data_entry_p(hdr);
70 73
71 switch (hdr->magic) { 74 switch (hdr->magic) {
75 case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC):
72 case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC): 76 case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC):
73 btp = xfs_dir2_block_tail_p(mp, hdr); 77 btp = xfs_dir2_block_tail_p(mp, hdr);
74 lep = xfs_dir2_block_leaf_p(btp); 78 lep = xfs_dir2_block_leaf_p(btp);
75 endp = (char *)lep; 79 endp = (char *)lep;
76 break; 80 break;
81 case cpu_to_be32(XFS_DIR3_DATA_MAGIC):
77 case cpu_to_be32(XFS_DIR2_DATA_MAGIC): 82 case cpu_to_be32(XFS_DIR2_DATA_MAGIC):
78 endp = (char *)hdr + mp->m_dirblksize; 83 endp = (char *)hdr + mp->m_dirblksize;
79 break; 84 break;
@@ -148,7 +153,8 @@ __xfs_dir2_data_check(
148 (char *)dep - (char *)hdr); 153 (char *)dep - (char *)hdr);
149 count++; 154 count++;
150 lastfree = 0; 155 lastfree = 0;
151 if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) { 156 if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
157 hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) {
152 addr = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, 158 addr = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
153 (xfs_dir2_data_aoff_t) 159 (xfs_dir2_data_aoff_t)
154 ((char *)dep - (char *)hdr)); 160 ((char *)dep - (char *)hdr));
@@ -168,7 +174,8 @@ __xfs_dir2_data_check(
168 * Need to have seen all the entries and all the bestfree slots. 174 * Need to have seen all the entries and all the bestfree slots.
169 */ 175 */
170 XFS_WANT_CORRUPTED_RETURN(freeseen == 7); 176 XFS_WANT_CORRUPTED_RETURN(freeseen == 7);
171 if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) { 177 if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
178 hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) {
172 for (i = stale = 0; i < be32_to_cpu(btp->count); i++) { 179 for (i = stale = 0; i < be32_to_cpu(btp->count); i++) {
173 if (lep[i].address == 180 if (lep[i].address ==
174 cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) 181 cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
@@ -185,21 +192,27 @@ __xfs_dir2_data_check(
185 return 0; 192 return 0;
186} 193}
187 194
188static void 195static bool
189xfs_dir2_data_verify( 196xfs_dir3_data_verify(
190 struct xfs_buf *bp) 197 struct xfs_buf *bp)
191{ 198{
192 struct xfs_mount *mp = bp->b_target->bt_mount; 199 struct xfs_mount *mp = bp->b_target->bt_mount;
193 struct xfs_dir2_data_hdr *hdr = bp->b_addr; 200 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
194 int block_ok = 0;
195 201
196 block_ok = hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC); 202 if (xfs_sb_version_hascrc(&mp->m_sb)) {
197 block_ok = block_ok && __xfs_dir2_data_check(NULL, bp) == 0; 203 if (hdr3->magic != cpu_to_be32(XFS_DIR3_DATA_MAGIC))
198 204 return false;
199 if (!block_ok) { 205 if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_uuid))
200 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr); 206 return false;
201 xfs_buf_ioerror(bp, EFSCORRUPTED); 207 if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
208 return false;
209 } else {
210 if (hdr3->magic != cpu_to_be32(XFS_DIR2_DATA_MAGIC))
211 return false;
202 } 212 }
213 if (__xfs_dir3_data_check(NULL, bp))
214 return false;
215 return true;
203} 216}
204 217
205/* 218/*
@@ -208,7 +221,7 @@ xfs_dir2_data_verify(
208 * format buffer or a data format buffer on readahead. 221 * format buffer or a data format buffer on readahead.
209 */ 222 */
210static void 223static void
211xfs_dir2_data_reada_verify( 224xfs_dir3_data_reada_verify(
212 struct xfs_buf *bp) 225 struct xfs_buf *bp)
213{ 226{
214 struct xfs_mount *mp = bp->b_target->bt_mount; 227 struct xfs_mount *mp = bp->b_target->bt_mount;
@@ -216,11 +229,13 @@ xfs_dir2_data_reada_verify(
216 229
217 switch (hdr->magic) { 230 switch (hdr->magic) {
218 case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC): 231 case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC):
219 bp->b_ops = &xfs_dir2_block_buf_ops; 232 case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC):
233 bp->b_ops = &xfs_dir3_block_buf_ops;
220 bp->b_ops->verify_read(bp); 234 bp->b_ops->verify_read(bp);
221 return; 235 return;
222 case cpu_to_be32(XFS_DIR2_DATA_MAGIC): 236 case cpu_to_be32(XFS_DIR2_DATA_MAGIC):
223 xfs_dir2_data_verify(bp); 237 case cpu_to_be32(XFS_DIR3_DATA_MAGIC):
238 xfs_dir3_data_verify(bp);
224 return; 239 return;
225 default: 240 default:
226 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr); 241 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr);
@@ -230,51 +245,80 @@ xfs_dir2_data_reada_verify(
230} 245}
231 246
232static void 247static void
233xfs_dir2_data_read_verify( 248xfs_dir3_data_read_verify(
234 struct xfs_buf *bp) 249 struct xfs_buf *bp)
235{ 250{
236 xfs_dir2_data_verify(bp); 251 struct xfs_mount *mp = bp->b_target->bt_mount;
252
253 if ((xfs_sb_version_hascrc(&mp->m_sb) &&
254 !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
255 XFS_DIR3_DATA_CRC_OFF)) ||
256 !xfs_dir3_data_verify(bp)) {
257 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
258 xfs_buf_ioerror(bp, EFSCORRUPTED);
259 }
237} 260}
238 261
239static void 262static void
240xfs_dir2_data_write_verify( 263xfs_dir3_data_write_verify(
241 struct xfs_buf *bp) 264 struct xfs_buf *bp)
242{ 265{
243 xfs_dir2_data_verify(bp); 266 struct xfs_mount *mp = bp->b_target->bt_mount;
267 struct xfs_buf_log_item *bip = bp->b_fspriv;
268 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
269
270 if (!xfs_dir3_data_verify(bp)) {
271 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
272 xfs_buf_ioerror(bp, EFSCORRUPTED);
273 return;
274 }
275
276 if (!xfs_sb_version_hascrc(&mp->m_sb))
277 return;
278
279 if (bip)
280 hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn);
281
282 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_DATA_CRC_OFF);
244} 283}
245 284
246const struct xfs_buf_ops xfs_dir2_data_buf_ops = { 285const struct xfs_buf_ops xfs_dir3_data_buf_ops = {
247 .verify_read = xfs_dir2_data_read_verify, 286 .verify_read = xfs_dir3_data_read_verify,
248 .verify_write = xfs_dir2_data_write_verify, 287 .verify_write = xfs_dir3_data_write_verify,
249}; 288};
250 289
251static const struct xfs_buf_ops xfs_dir2_data_reada_buf_ops = { 290static const struct xfs_buf_ops xfs_dir3_data_reada_buf_ops = {
252 .verify_read = xfs_dir2_data_reada_verify, 291 .verify_read = xfs_dir3_data_reada_verify,
253 .verify_write = xfs_dir2_data_write_verify, 292 .verify_write = xfs_dir3_data_write_verify,
254}; 293};
255 294
256 295
257int 296int
258xfs_dir2_data_read( 297xfs_dir3_data_read(
259 struct xfs_trans *tp, 298 struct xfs_trans *tp,
260 struct xfs_inode *dp, 299 struct xfs_inode *dp,
261 xfs_dablk_t bno, 300 xfs_dablk_t bno,
262 xfs_daddr_t mapped_bno, 301 xfs_daddr_t mapped_bno,
263 struct xfs_buf **bpp) 302 struct xfs_buf **bpp)
264{ 303{
265 return xfs_da_read_buf(tp, dp, bno, mapped_bno, bpp, 304 int err;
266 XFS_DATA_FORK, &xfs_dir2_data_buf_ops); 305
306 err = xfs_da_read_buf(tp, dp, bno, mapped_bno, bpp,
307 XFS_DATA_FORK, &xfs_dir3_data_buf_ops);
308 if (!err && tp)
309 xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_DATA_BUF);
310 return err;
267} 311}
268 312
269int 313int
270xfs_dir2_data_readahead( 314xfs_dir3_data_readahead(
271 struct xfs_trans *tp, 315 struct xfs_trans *tp,
272 struct xfs_inode *dp, 316 struct xfs_inode *dp,
273 xfs_dablk_t bno, 317 xfs_dablk_t bno,
274 xfs_daddr_t mapped_bno) 318 xfs_daddr_t mapped_bno)
275{ 319{
276 return xfs_da_reada_buf(tp, dp, bno, mapped_bno, 320 return xfs_da_reada_buf(tp, dp, bno, mapped_bno,
277 XFS_DATA_FORK, &xfs_dir2_data_reada_buf_ops); 321 XFS_DATA_FORK, &xfs_dir3_data_reada_buf_ops);
278} 322}
279 323
280/* 324/*
@@ -288,12 +332,15 @@ xfs_dir2_data_freefind(
288{ 332{
289 xfs_dir2_data_free_t *dfp; /* bestfree entry */ 333 xfs_dir2_data_free_t *dfp; /* bestfree entry */
290 xfs_dir2_data_aoff_t off; /* offset value needed */ 334 xfs_dir2_data_aoff_t off; /* offset value needed */
335 struct xfs_dir2_data_free *bf;
291#if defined(DEBUG) && defined(__KERNEL__) 336#if defined(DEBUG) && defined(__KERNEL__)
292 int matched; /* matched the value */ 337 int matched; /* matched the value */
293 int seenzero; /* saw a 0 bestfree entry */ 338 int seenzero; /* saw a 0 bestfree entry */
294#endif 339#endif
295 340
296 off = (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr); 341 off = (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr);
342 bf = xfs_dir3_data_bestfree_p(hdr);
343
297#if defined(DEBUG) && defined(__KERNEL__) 344#if defined(DEBUG) && defined(__KERNEL__)
298 /* 345 /*
299 * Validate some consistency in the bestfree table. 346 * Validate some consistency in the bestfree table.
@@ -301,9 +348,11 @@ xfs_dir2_data_freefind(
301 * one we're looking for it has to be exact. 348 * one we're looking for it has to be exact.
302 */ 349 */
303 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || 350 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
304 hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); 351 hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
305 for (dfp = &hdr->bestfree[0], seenzero = matched = 0; 352 hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
306 dfp < &hdr->bestfree[XFS_DIR2_DATA_FD_COUNT]; 353 hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
354 for (dfp = &bf[0], seenzero = matched = 0;
355 dfp < &bf[XFS_DIR2_DATA_FD_COUNT];
307 dfp++) { 356 dfp++) {
308 if (!dfp->offset) { 357 if (!dfp->offset) {
309 ASSERT(!dfp->length); 358 ASSERT(!dfp->length);
@@ -319,7 +368,7 @@ xfs_dir2_data_freefind(
319 else 368 else
320 ASSERT(be16_to_cpu(dfp->offset) + be16_to_cpu(dfp->length) <= off); 369 ASSERT(be16_to_cpu(dfp->offset) + be16_to_cpu(dfp->length) <= off);
321 ASSERT(matched || be16_to_cpu(dfp->length) >= be16_to_cpu(dup->length)); 370 ASSERT(matched || be16_to_cpu(dfp->length) >= be16_to_cpu(dup->length));
322 if (dfp > &hdr->bestfree[0]) 371 if (dfp > &bf[0])
323 ASSERT(be16_to_cpu(dfp[-1].length) >= be16_to_cpu(dfp[0].length)); 372 ASSERT(be16_to_cpu(dfp[-1].length) >= be16_to_cpu(dfp[0].length));
324 } 373 }
325#endif 374#endif
@@ -328,14 +377,12 @@ xfs_dir2_data_freefind(
328 * it can't be there since they're sorted. 377 * it can't be there since they're sorted.
329 */ 378 */
330 if (be16_to_cpu(dup->length) < 379 if (be16_to_cpu(dup->length) <
331 be16_to_cpu(hdr->bestfree[XFS_DIR2_DATA_FD_COUNT - 1].length)) 380 be16_to_cpu(bf[XFS_DIR2_DATA_FD_COUNT - 1].length))
332 return NULL; 381 return NULL;
333 /* 382 /*
334 * Look at the three bestfree entries for our guy. 383 * Look at the three bestfree entries for our guy.
335 */ 384 */
336 for (dfp = &hdr->bestfree[0]; 385 for (dfp = &bf[0]; dfp < &bf[XFS_DIR2_DATA_FD_COUNT]; dfp++) {
337 dfp < &hdr->bestfree[XFS_DIR2_DATA_FD_COUNT];
338 dfp++) {
339 if (!dfp->offset) 386 if (!dfp->offset)
340 return NULL; 387 return NULL;
341 if (be16_to_cpu(dfp->offset) == off) 388 if (be16_to_cpu(dfp->offset) == off)
@@ -359,11 +406,12 @@ xfs_dir2_data_freeinsert(
359 xfs_dir2_data_free_t *dfp; /* bestfree table pointer */ 406 xfs_dir2_data_free_t *dfp; /* bestfree table pointer */
360 xfs_dir2_data_free_t new; /* new bestfree entry */ 407 xfs_dir2_data_free_t new; /* new bestfree entry */
361 408
362#ifdef __KERNEL__
363 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || 409 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
364 hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); 410 hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
365#endif 411 hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
366 dfp = hdr->bestfree; 412 hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
413
414 dfp = xfs_dir3_data_bestfree_p(hdr);
367 new.length = dup->length; 415 new.length = dup->length;
368 new.offset = cpu_to_be16((char *)dup - (char *)hdr); 416 new.offset = cpu_to_be16((char *)dup - (char *)hdr);
369 417
@@ -400,32 +448,36 @@ xfs_dir2_data_freeremove(
400 xfs_dir2_data_free_t *dfp, /* bestfree entry pointer */ 448 xfs_dir2_data_free_t *dfp, /* bestfree entry pointer */
401 int *loghead) /* out: log data header */ 449 int *loghead) /* out: log data header */
402{ 450{
403#ifdef __KERNEL__ 451 struct xfs_dir2_data_free *bf;
452
404 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || 453 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
405 hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); 454 hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
406#endif 455 hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
456 hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
457
407 /* 458 /*
408 * It's the first entry, slide the next 2 up. 459 * It's the first entry, slide the next 2 up.
409 */ 460 */
410 if (dfp == &hdr->bestfree[0]) { 461 bf = xfs_dir3_data_bestfree_p(hdr);
411 hdr->bestfree[0] = hdr->bestfree[1]; 462 if (dfp == &bf[0]) {
412 hdr->bestfree[1] = hdr->bestfree[2]; 463 bf[0] = bf[1];
464 bf[1] = bf[2];
413 } 465 }
414 /* 466 /*
415 * It's the second entry, slide the 3rd entry up. 467 * It's the second entry, slide the 3rd entry up.
416 */ 468 */
417 else if (dfp == &hdr->bestfree[1]) 469 else if (dfp == &bf[1])
418 hdr->bestfree[1] = hdr->bestfree[2]; 470 bf[1] = bf[2];
419 /* 471 /*
420 * Must be the last entry. 472 * Must be the last entry.
421 */ 473 */
422 else 474 else
423 ASSERT(dfp == &hdr->bestfree[2]); 475 ASSERT(dfp == &bf[2]);
424 /* 476 /*
425 * Clear the 3rd entry, must be zero now. 477 * Clear the 3rd entry, must be zero now.
426 */ 478 */
427 hdr->bestfree[2].length = 0; 479 bf[2].length = 0;
428 hdr->bestfree[2].offset = 0; 480 bf[2].offset = 0;
429 *loghead = 1; 481 *loghead = 1;
430} 482}
431 483
@@ -441,23 +493,27 @@ xfs_dir2_data_freescan(
441 xfs_dir2_block_tail_t *btp; /* block tail */ 493 xfs_dir2_block_tail_t *btp; /* block tail */
442 xfs_dir2_data_entry_t *dep; /* active data entry */ 494 xfs_dir2_data_entry_t *dep; /* active data entry */
443 xfs_dir2_data_unused_t *dup; /* unused data entry */ 495 xfs_dir2_data_unused_t *dup; /* unused data entry */
496 struct xfs_dir2_data_free *bf;
444 char *endp; /* end of block's data */ 497 char *endp; /* end of block's data */
445 char *p; /* current entry pointer */ 498 char *p; /* current entry pointer */
446 499
447#ifdef __KERNEL__
448 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || 500 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
449 hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); 501 hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
450#endif 502 hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
503 hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
504
451 /* 505 /*
452 * Start by clearing the table. 506 * Start by clearing the table.
453 */ 507 */
454 memset(hdr->bestfree, 0, sizeof(hdr->bestfree)); 508 bf = xfs_dir3_data_bestfree_p(hdr);
509 memset(bf, 0, sizeof(*bf) * XFS_DIR2_DATA_FD_COUNT);
455 *loghead = 1; 510 *loghead = 1;
456 /* 511 /*
457 * Set up pointers. 512 * Set up pointers.
458 */ 513 */
459 p = (char *)(hdr + 1); 514 p = (char *)xfs_dir3_data_entry_p(hdr);
460 if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) { 515 if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
516 hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) {
461 btp = xfs_dir2_block_tail_p(mp, hdr); 517 btp = xfs_dir2_block_tail_p(mp, hdr);
462 endp = (char *)xfs_dir2_block_leaf_p(btp); 518 endp = (char *)xfs_dir2_block_leaf_p(btp);
463 } else 519 } else
@@ -493,7 +549,7 @@ xfs_dir2_data_freescan(
493 * Give back the buffer for the created block. 549 * Give back the buffer for the created block.
494 */ 550 */
495int /* error */ 551int /* error */
496xfs_dir2_data_init( 552xfs_dir3_data_init(
497 xfs_da_args_t *args, /* directory operation args */ 553 xfs_da_args_t *args, /* directory operation args */
498 xfs_dir2_db_t blkno, /* logical dir block number */ 554 xfs_dir2_db_t blkno, /* logical dir block number */
499 struct xfs_buf **bpp) /* output block buffer */ 555 struct xfs_buf **bpp) /* output block buffer */
@@ -502,6 +558,7 @@ xfs_dir2_data_init(
502 xfs_dir2_data_hdr_t *hdr; /* data block header */ 558 xfs_dir2_data_hdr_t *hdr; /* data block header */
503 xfs_inode_t *dp; /* incore directory inode */ 559 xfs_inode_t *dp; /* incore directory inode */
504 xfs_dir2_data_unused_t *dup; /* unused entry pointer */ 560 xfs_dir2_data_unused_t *dup; /* unused entry pointer */
561 struct xfs_dir2_data_free *bf;
505 int error; /* error return value */ 562 int error; /* error return value */
506 int i; /* bestfree index */ 563 int i; /* bestfree index */
507 xfs_mount_t *mp; /* filesystem mount point */ 564 xfs_mount_t *mp; /* filesystem mount point */
@@ -518,27 +575,40 @@ xfs_dir2_data_init(
518 XFS_DATA_FORK); 575 XFS_DATA_FORK);
519 if (error) 576 if (error)
520 return error; 577 return error;
521 bp->b_ops = &xfs_dir2_data_buf_ops; 578 bp->b_ops = &xfs_dir3_data_buf_ops;
579 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_DATA_BUF);
522 580
523 /* 581 /*
524 * Initialize the header. 582 * Initialize the header.
525 */ 583 */
526 hdr = bp->b_addr; 584 hdr = bp->b_addr;
527 hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC); 585 if (xfs_sb_version_hascrc(&mp->m_sb)) {
528 hdr->bestfree[0].offset = cpu_to_be16(sizeof(*hdr)); 586 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
587
588 memset(hdr3, 0, sizeof(*hdr3));
589 hdr3->magic = cpu_to_be32(XFS_DIR3_DATA_MAGIC);
590 hdr3->blkno = cpu_to_be64(bp->b_bn);
591 hdr3->owner = cpu_to_be64(dp->i_ino);
592 uuid_copy(&hdr3->uuid, &mp->m_sb.sb_uuid);
593
594 } else
595 hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
596
597 bf = xfs_dir3_data_bestfree_p(hdr);
598 bf[0].offset = cpu_to_be16(xfs_dir3_data_entry_offset(hdr));
529 for (i = 1; i < XFS_DIR2_DATA_FD_COUNT; i++) { 599 for (i = 1; i < XFS_DIR2_DATA_FD_COUNT; i++) {
530 hdr->bestfree[i].length = 0; 600 bf[i].length = 0;
531 hdr->bestfree[i].offset = 0; 601 bf[i].offset = 0;
532 } 602 }
533 603
534 /* 604 /*
535 * Set up an unused entry for the block's body. 605 * Set up an unused entry for the block's body.
536 */ 606 */
537 dup = (xfs_dir2_data_unused_t *)(hdr + 1); 607 dup = xfs_dir3_data_unused_p(hdr);
538 dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); 608 dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
539 609
540 t = mp->m_dirblksize - (uint)sizeof(*hdr); 610 t = mp->m_dirblksize - (uint)xfs_dir3_data_entry_offset(hdr);
541 hdr->bestfree[0].length = cpu_to_be16(t); 611 bf[0].length = cpu_to_be16(t);
542 dup->length = cpu_to_be16(t); 612 dup->length = cpu_to_be16(t);
543 *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)hdr); 613 *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)hdr);
544 /* 614 /*
@@ -562,7 +632,9 @@ xfs_dir2_data_log_entry(
562 xfs_dir2_data_hdr_t *hdr = bp->b_addr; 632 xfs_dir2_data_hdr_t *hdr = bp->b_addr;
563 633
564 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || 634 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
565 hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); 635 hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
636 hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
637 hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
566 638
567 xfs_trans_log_buf(tp, bp, (uint)((char *)dep - (char *)hdr), 639 xfs_trans_log_buf(tp, bp, (uint)((char *)dep - (char *)hdr),
568 (uint)((char *)(xfs_dir2_data_entry_tag_p(dep) + 1) - 640 (uint)((char *)(xfs_dir2_data_entry_tag_p(dep) + 1) -
@@ -580,9 +652,11 @@ xfs_dir2_data_log_header(
580 xfs_dir2_data_hdr_t *hdr = bp->b_addr; 652 xfs_dir2_data_hdr_t *hdr = bp->b_addr;
581 653
582 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || 654 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
583 hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); 655 hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
656 hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
657 hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
584 658
585 xfs_trans_log_buf(tp, bp, 0, sizeof(*hdr) - 1); 659 xfs_trans_log_buf(tp, bp, 0, xfs_dir3_data_entry_offset(hdr) - 1);
586} 660}
587 661
588/* 662/*
@@ -597,7 +671,9 @@ xfs_dir2_data_log_unused(
597 xfs_dir2_data_hdr_t *hdr = bp->b_addr; 671 xfs_dir2_data_hdr_t *hdr = bp->b_addr;
598 672
599 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || 673 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
600 hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); 674 hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
675 hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
676 hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
601 677
602 /* 678 /*
603 * Log the first part of the unused entry. 679 * Log the first part of the unused entry.
@@ -635,6 +711,7 @@ xfs_dir2_data_make_free(
635 xfs_dir2_data_unused_t *newdup; /* new unused entry */ 711 xfs_dir2_data_unused_t *newdup; /* new unused entry */
636 xfs_dir2_data_unused_t *postdup; /* unused entry after us */ 712 xfs_dir2_data_unused_t *postdup; /* unused entry after us */
637 xfs_dir2_data_unused_t *prevdup; /* unused entry before us */ 713 xfs_dir2_data_unused_t *prevdup; /* unused entry before us */
714 struct xfs_dir2_data_free *bf;
638 715
639 mp = tp->t_mountp; 716 mp = tp->t_mountp;
640 hdr = bp->b_addr; 717 hdr = bp->b_addr;
@@ -642,12 +719,14 @@ xfs_dir2_data_make_free(
642 /* 719 /*
643 * Figure out where the end of the data area is. 720 * Figure out where the end of the data area is.
644 */ 721 */
645 if (hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC)) 722 if (hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
723 hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC))
646 endptr = (char *)hdr + mp->m_dirblksize; 724 endptr = (char *)hdr + mp->m_dirblksize;
647 else { 725 else {
648 xfs_dir2_block_tail_t *btp; /* block tail */ 726 xfs_dir2_block_tail_t *btp; /* block tail */
649 727
650 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); 728 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
729 hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
651 btp = xfs_dir2_block_tail_p(mp, hdr); 730 btp = xfs_dir2_block_tail_p(mp, hdr);
652 endptr = (char *)xfs_dir2_block_leaf_p(btp); 731 endptr = (char *)xfs_dir2_block_leaf_p(btp);
653 } 732 }
@@ -655,7 +734,7 @@ xfs_dir2_data_make_free(
655 * If this isn't the start of the block, then back up to 734 * If this isn't the start of the block, then back up to
656 * the previous entry and see if it's free. 735 * the previous entry and see if it's free.
657 */ 736 */
658 if (offset > sizeof(*hdr)) { 737 if (offset > xfs_dir3_data_entry_offset(hdr)) {
659 __be16 *tagp; /* tag just before us */ 738 __be16 *tagp; /* tag just before us */
660 739
661 tagp = (__be16 *)((char *)hdr + offset) - 1; 740 tagp = (__be16 *)((char *)hdr + offset) - 1;
@@ -681,6 +760,7 @@ xfs_dir2_data_make_free(
681 * Previous and following entries are both free, 760 * Previous and following entries are both free,
682 * merge everything into a single free entry. 761 * merge everything into a single free entry.
683 */ 762 */
763 bf = xfs_dir3_data_bestfree_p(hdr);
684 if (prevdup && postdup) { 764 if (prevdup && postdup) {
685 xfs_dir2_data_free_t *dfp2; /* another bestfree pointer */ 765 xfs_dir2_data_free_t *dfp2; /* another bestfree pointer */
686 766
@@ -695,7 +775,7 @@ xfs_dir2_data_make_free(
695 * since the third bestfree is there, there might be more 775 * since the third bestfree is there, there might be more
696 * entries. 776 * entries.
697 */ 777 */
698 needscan = (hdr->bestfree[2].length != 0); 778 needscan = (bf[2].length != 0);
699 /* 779 /*
700 * Fix up the new big freespace. 780 * Fix up the new big freespace.
701 */ 781 */
@@ -711,10 +791,10 @@ xfs_dir2_data_make_free(
711 * Remove entry 1 first then entry 0. 791 * Remove entry 1 first then entry 0.
712 */ 792 */
713 ASSERT(dfp && dfp2); 793 ASSERT(dfp && dfp2);
714 if (dfp == &hdr->bestfree[1]) { 794 if (dfp == &bf[1]) {
715 dfp = &hdr->bestfree[0]; 795 dfp = &bf[0];
716 ASSERT(dfp2 == dfp); 796 ASSERT(dfp2 == dfp);
717 dfp2 = &hdr->bestfree[1]; 797 dfp2 = &bf[1];
718 } 798 }
719 xfs_dir2_data_freeremove(hdr, dfp2, needlogp); 799 xfs_dir2_data_freeremove(hdr, dfp2, needlogp);
720 xfs_dir2_data_freeremove(hdr, dfp, needlogp); 800 xfs_dir2_data_freeremove(hdr, dfp, needlogp);
@@ -722,7 +802,7 @@ xfs_dir2_data_make_free(
722 * Now insert the new entry. 802 * Now insert the new entry.
723 */ 803 */
724 dfp = xfs_dir2_data_freeinsert(hdr, prevdup, needlogp); 804 dfp = xfs_dir2_data_freeinsert(hdr, prevdup, needlogp);
725 ASSERT(dfp == &hdr->bestfree[0]); 805 ASSERT(dfp == &bf[0]);
726 ASSERT(dfp->length == prevdup->length); 806 ASSERT(dfp->length == prevdup->length);
727 ASSERT(!dfp[1].length); 807 ASSERT(!dfp[1].length);
728 ASSERT(!dfp[2].length); 808 ASSERT(!dfp[2].length);
@@ -751,7 +831,7 @@ xfs_dir2_data_make_free(
751 */ 831 */
752 else { 832 else {
753 needscan = be16_to_cpu(prevdup->length) > 833 needscan = be16_to_cpu(prevdup->length) >
754 be16_to_cpu(hdr->bestfree[2].length); 834 be16_to_cpu(bf[2].length);
755 } 835 }
756 } 836 }
757 /* 837 /*
@@ -779,7 +859,7 @@ xfs_dir2_data_make_free(
779 */ 859 */
780 else { 860 else {
781 needscan = be16_to_cpu(newdup->length) > 861 needscan = be16_to_cpu(newdup->length) >
782 be16_to_cpu(hdr->bestfree[2].length); 862 be16_to_cpu(bf[2].length);
783 } 863 }
784 } 864 }
785 /* 865 /*
@@ -818,10 +898,13 @@ xfs_dir2_data_use_free(
818 xfs_dir2_data_unused_t *newdup; /* new unused entry */ 898 xfs_dir2_data_unused_t *newdup; /* new unused entry */
819 xfs_dir2_data_unused_t *newdup2; /* another new unused entry */ 899 xfs_dir2_data_unused_t *newdup2; /* another new unused entry */
820 int oldlen; /* old unused entry's length */ 900 int oldlen; /* old unused entry's length */
901 struct xfs_dir2_data_free *bf;
821 902
822 hdr = bp->b_addr; 903 hdr = bp->b_addr;
823 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || 904 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
824 hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)); 905 hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
906 hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
907 hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
825 ASSERT(be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG); 908 ASSERT(be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG);
826 ASSERT(offset >= (char *)dup - (char *)hdr); 909 ASSERT(offset >= (char *)dup - (char *)hdr);
827 ASSERT(offset + len <= (char *)dup + be16_to_cpu(dup->length) - (char *)hdr); 910 ASSERT(offset + len <= (char *)dup + be16_to_cpu(dup->length) - (char *)hdr);
@@ -831,7 +914,8 @@ xfs_dir2_data_use_free(
831 */ 914 */
832 dfp = xfs_dir2_data_freefind(hdr, dup); 915 dfp = xfs_dir2_data_freefind(hdr, dup);
833 oldlen = be16_to_cpu(dup->length); 916 oldlen = be16_to_cpu(dup->length);
834 ASSERT(dfp || oldlen <= be16_to_cpu(hdr->bestfree[2].length)); 917 bf = xfs_dir3_data_bestfree_p(hdr);
918 ASSERT(dfp || oldlen <= be16_to_cpu(bf[2].length));
835 /* 919 /*
836 * Check for alignment with front and back of the entry. 920 * Check for alignment with front and back of the entry.
837 */ 921 */
@@ -845,7 +929,7 @@ xfs_dir2_data_use_free(
845 */ 929 */
846 if (matchfront && matchback) { 930 if (matchfront && matchback) {
847 if (dfp) { 931 if (dfp) {
848 needscan = (hdr->bestfree[2].offset != 0); 932 needscan = (bf[2].offset != 0);
849 if (!needscan) 933 if (!needscan)
850 xfs_dir2_data_freeremove(hdr, dfp, needlogp); 934 xfs_dir2_data_freeremove(hdr, dfp, needlogp);
851 } 935 }
@@ -875,7 +959,7 @@ xfs_dir2_data_use_free(
875 * that means we don't know if there was a better 959 * that means we don't know if there was a better
876 * choice for the last slot, or not. Rescan. 960 * choice for the last slot, or not. Rescan.
877 */ 961 */
878 needscan = dfp == &hdr->bestfree[2]; 962 needscan = dfp == &bf[2];
879 } 963 }
880 } 964 }
881 /* 965 /*
@@ -902,7 +986,7 @@ xfs_dir2_data_use_free(
902 * that means we don't know if there was a better 986 * that means we don't know if there was a better
903 * choice for the last slot, or not. Rescan. 987 * choice for the last slot, or not. Rescan.
904 */ 988 */
905 needscan = dfp == &hdr->bestfree[2]; 989 needscan = dfp == &bf[2];
906 } 990 }
907 } 991 }
908 /* 992 /*
@@ -930,7 +1014,7 @@ xfs_dir2_data_use_free(
930 * the 2 new will work. 1014 * the 2 new will work.
931 */ 1015 */
932 if (dfp) { 1016 if (dfp) {
933 needscan = (hdr->bestfree[2].length != 0); 1017 needscan = (bf[2].length != 0);
934 if (!needscan) { 1018 if (!needscan) {
935 xfs_dir2_data_freeremove(hdr, dfp, needlogp); 1019 xfs_dir2_data_freeremove(hdr, dfp, needlogp);
936 xfs_dir2_data_freeinsert(hdr, newdup, needlogp); 1020 xfs_dir2_data_freeinsert(hdr, newdup, needlogp);
diff --git a/fs/xfs/xfs_dir2_format.h b/fs/xfs/xfs_dir2_format.h
index 07270981f48f..a3b1bd841a80 100644
--- a/fs/xfs/xfs_dir2_format.h
+++ b/fs/xfs/xfs_dir2_format.h
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
3 * Copyright (c) 2013 Red Hat, Inc.
3 * All Rights Reserved. 4 * All Rights Reserved.
4 * 5 *
5 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
@@ -36,6 +37,38 @@
36#define XFS_DIR2_FREE_MAGIC 0x58443246 /* XD2F: free index blocks */ 37#define XFS_DIR2_FREE_MAGIC 0x58443246 /* XD2F: free index blocks */
37 38
38/* 39/*
40 * Directory Version 3 With CRCs.
41 *
42 * The tree formats are the same as for version 2 directories. The difference
43 * is in the block header and dirent formats. In many cases the v3 structures
44 * use v2 definitions as they are no different and this makes code sharing much
45 * easier.
46 *
47 * Also, the xfs_dir3_*() functions handle both v2 and v3 formats - if the
48 * format is v2 then they switch to the existing v2 code, or the format is v3
49 * they implement the v3 functionality. This means the existing dir2 is a mix of
50 * xfs_dir2/xfs_dir3 calls and functions. The xfs_dir3 functions are called
51 * where there is a difference in the formats, otherwise the code is unchanged.
52 *
53 * Where it is possible, the code decides what to do based on the magic numbers
54 * in the blocks rather than feature bits in the superblock. This means the code
55 * is as independent of the external XFS code as possible as doesn't require
56 * passing struct xfs_mount pointers into places where it isn't really
57 * necessary.
58 *
59 * Version 3 includes:
60 *
61 * - a larger block header for CRC and identification purposes and so the
62 * offsets of all the structures inside the blocks are different.
63 *
64 * - new magic numbers to be able to detect the v2/v3 types on the fly.
65 */
66
67#define XFS_DIR3_BLOCK_MAGIC 0x58444233 /* XDB3: single block dirs */
68#define XFS_DIR3_DATA_MAGIC 0x58444433 /* XDD3: multiblock dirs */
69#define XFS_DIR3_FREE_MAGIC 0x58444633 /* XDF3: free index blocks */
70
71/*
39 * Byte offset in data block and shortform entry. 72 * Byte offset in data block and shortform entry.
40 */ 73 */
41typedef __uint16_t xfs_dir2_data_off_t; 74typedef __uint16_t xfs_dir2_data_off_t;
@@ -195,16 +228,6 @@ xfs_dir2_sf_nextentry(struct xfs_dir2_sf_hdr *hdr,
195 xfs_dir2_byte_to_db(mp, XFS_DIR2_DATA_OFFSET) 228 xfs_dir2_byte_to_db(mp, XFS_DIR2_DATA_OFFSET)
196 229
197/* 230/*
198 * Offsets of . and .. in data space (always block 0)
199 */
200#define XFS_DIR2_DATA_DOT_OFFSET \
201 ((xfs_dir2_data_aoff_t)sizeof(struct xfs_dir2_data_hdr))
202#define XFS_DIR2_DATA_DOTDOT_OFFSET \
203 (XFS_DIR2_DATA_DOT_OFFSET + xfs_dir2_data_entsize(1))
204#define XFS_DIR2_DATA_FIRST_OFFSET \
205 (XFS_DIR2_DATA_DOTDOT_OFFSET + xfs_dir2_data_entsize(2))
206
207/*
208 * Describe a free area in the data block. 231 * Describe a free area in the data block.
209 * 232 *
210 * The freespace will be formatted as a xfs_dir2_data_unused_t. 233 * The freespace will be formatted as a xfs_dir2_data_unused_t.
@@ -226,6 +249,39 @@ typedef struct xfs_dir2_data_hdr {
226} xfs_dir2_data_hdr_t; 249} xfs_dir2_data_hdr_t;
227 250
228/* 251/*
252 * define a structure for all the verification fields we are adding to the
253 * directory block structures. This will be used in several structures.
254 * The magic number must be the first entry to align with all the dir2
255 * structures so we determine how to decode them just by the magic number.
256 */
257struct xfs_dir3_blk_hdr {
258 __be32 magic; /* magic number */
259 __be32 crc; /* CRC of block */
260 __be64 blkno; /* first block of the buffer */
261 __be64 lsn; /* sequence number of last write */
262 uuid_t uuid; /* filesystem we belong to */
263 __be64 owner; /* inode that owns the block */
264};
265
266struct xfs_dir3_data_hdr {
267 struct xfs_dir3_blk_hdr hdr;
268 xfs_dir2_data_free_t best_free[XFS_DIR2_DATA_FD_COUNT];
269};
270
271#define XFS_DIR3_DATA_CRC_OFF offsetof(struct xfs_dir3_data_hdr, hdr.crc)
272
273static inline struct xfs_dir2_data_free *
274xfs_dir3_data_bestfree_p(struct xfs_dir2_data_hdr *hdr)
275{
276 if (hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
277 hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) {
278 struct xfs_dir3_data_hdr *hdr3 = (struct xfs_dir3_data_hdr *)hdr;
279 return hdr3->best_free;
280 }
281 return hdr->bestfree;
282}
283
284/*
229 * Active entry in a data block. 285 * Active entry in a data block.
230 * 286 *
231 * Aligned to 8 bytes. After the variable length name field there is a 287 * Aligned to 8 bytes. After the variable length name field there is a
@@ -280,6 +336,94 @@ xfs_dir2_data_unused_tag_p(struct xfs_dir2_data_unused *dup)
280 be16_to_cpu(dup->length) - sizeof(__be16)); 336 be16_to_cpu(dup->length) - sizeof(__be16));
281} 337}
282 338
339static inline size_t
340xfs_dir3_data_hdr_size(bool dir3)
341{
342 if (dir3)
343 return sizeof(struct xfs_dir3_data_hdr);
344 return sizeof(struct xfs_dir2_data_hdr);
345}
346
347static inline size_t
348xfs_dir3_data_entry_offset(struct xfs_dir2_data_hdr *hdr)
349{
350 bool dir3 = hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
351 hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC);
352 return xfs_dir3_data_hdr_size(dir3);
353}
354
355static inline struct xfs_dir2_data_entry *
356xfs_dir3_data_entry_p(struct xfs_dir2_data_hdr *hdr)
357{
358 return (struct xfs_dir2_data_entry *)
359 ((char *)hdr + xfs_dir3_data_entry_offset(hdr));
360}
361
362static inline struct xfs_dir2_data_unused *
363xfs_dir3_data_unused_p(struct xfs_dir2_data_hdr *hdr)
364{
365 return (struct xfs_dir2_data_unused *)
366 ((char *)hdr + xfs_dir3_data_entry_offset(hdr));
367}
368
369/*
370 * Offsets of . and .. in data space (always block 0)
371 *
372 * The macros are used for shortform directories as they have no headers to read
373 * the magic number out of. Shortform directories need to know the size of the
374 * data block header because the sfe embeds the block offset of the entry into
375 * it so that it doesn't change when format conversion occurs. Bad Things Happen
376 * if we don't follow this rule.
377 */
378#define XFS_DIR3_DATA_DOT_OFFSET(mp) \
379 xfs_dir3_data_hdr_size(xfs_sb_version_hascrc(&(mp)->m_sb))
380#define XFS_DIR3_DATA_DOTDOT_OFFSET(mp) \
381 (XFS_DIR3_DATA_DOT_OFFSET(mp) + xfs_dir2_data_entsize(1))
382#define XFS_DIR3_DATA_FIRST_OFFSET(mp) \
383 (XFS_DIR3_DATA_DOTDOT_OFFSET(mp) + xfs_dir2_data_entsize(2))
384
385static inline xfs_dir2_data_aoff_t
386xfs_dir3_data_dot_offset(struct xfs_dir2_data_hdr *hdr)
387{
388 return xfs_dir3_data_entry_offset(hdr);
389}
390
391static inline xfs_dir2_data_aoff_t
392xfs_dir3_data_dotdot_offset(struct xfs_dir2_data_hdr *hdr)
393{
394 return xfs_dir3_data_dot_offset(hdr) + xfs_dir2_data_entsize(1);
395}
396
397static inline xfs_dir2_data_aoff_t
398xfs_dir3_data_first_offset(struct xfs_dir2_data_hdr *hdr)
399{
400 return xfs_dir3_data_dotdot_offset(hdr) + xfs_dir2_data_entsize(2);
401}
402
403/*
404 * location of . and .. in data space (always block 0)
405 */
406static inline struct xfs_dir2_data_entry *
407xfs_dir3_data_dot_entry_p(struct xfs_dir2_data_hdr *hdr)
408{
409 return (struct xfs_dir2_data_entry *)
410 ((char *)hdr + xfs_dir3_data_dot_offset(hdr));
411}
412
413static inline struct xfs_dir2_data_entry *
414xfs_dir3_data_dotdot_entry_p(struct xfs_dir2_data_hdr *hdr)
415{
416 return (struct xfs_dir2_data_entry *)
417 ((char *)hdr + xfs_dir3_data_dotdot_offset(hdr));
418}
419
420static inline struct xfs_dir2_data_entry *
421xfs_dir3_data_first_entry_p(struct xfs_dir2_data_hdr *hdr)
422{
423 return (struct xfs_dir2_data_entry *)
424 ((char *)hdr + xfs_dir3_data_first_offset(hdr));
425}
426
283/* 427/*
284 * Leaf block structures. 428 * Leaf block structures.
285 * 429 *
@@ -329,6 +473,21 @@ typedef struct xfs_dir2_leaf_hdr {
329 __be16 stale; /* count of stale entries */ 473 __be16 stale; /* count of stale entries */
330} xfs_dir2_leaf_hdr_t; 474} xfs_dir2_leaf_hdr_t;
331 475
476struct xfs_dir3_leaf_hdr {
477 struct xfs_da3_blkinfo info; /* header for da routines */
478 __be16 count; /* count of entries */
479 __be16 stale; /* count of stale entries */
480 __be32 pad;
481};
482
483struct xfs_dir3_icleaf_hdr {
484 __uint32_t forw;
485 __uint32_t back;
486 __uint16_t magic;
487 __uint16_t count;
488 __uint16_t stale;
489};
490
332/* 491/*
333 * Leaf block entry. 492 * Leaf block entry.
334 */ 493 */
@@ -348,23 +507,50 @@ typedef struct xfs_dir2_leaf_tail {
348 * Leaf block. 507 * Leaf block.
349 */ 508 */
350typedef struct xfs_dir2_leaf { 509typedef struct xfs_dir2_leaf {
351 xfs_dir2_leaf_hdr_t hdr; /* leaf header */ 510 xfs_dir2_leaf_hdr_t hdr; /* leaf header */
352 xfs_dir2_leaf_entry_t ents[]; /* entries */ 511 xfs_dir2_leaf_entry_t __ents[]; /* entries */
353} xfs_dir2_leaf_t; 512} xfs_dir2_leaf_t;
354 513
355/* 514struct xfs_dir3_leaf {
356 * DB blocks here are logical directory block numbers, not filesystem blocks. 515 struct xfs_dir3_leaf_hdr hdr; /* leaf header */
357 */ 516 struct xfs_dir2_leaf_entry __ents[]; /* entries */
517};
358 518
359static inline int xfs_dir2_max_leaf_ents(struct xfs_mount *mp) 519#define XFS_DIR3_LEAF_CRC_OFF offsetof(struct xfs_dir3_leaf_hdr, info.crc)
520
521static inline int
522xfs_dir3_leaf_hdr_size(struct xfs_dir2_leaf *lp)
360{ 523{
361 return (mp->m_dirblksize - (uint)sizeof(struct xfs_dir2_leaf_hdr)) / 524 if (lp->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) ||
525 lp->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC))
526 return sizeof(struct xfs_dir3_leaf_hdr);
527 return sizeof(struct xfs_dir2_leaf_hdr);
528}
529
530static inline int
531xfs_dir3_max_leaf_ents(struct xfs_mount *mp, struct xfs_dir2_leaf *lp)
532{
533 return (mp->m_dirblksize - xfs_dir3_leaf_hdr_size(lp)) /
362 (uint)sizeof(struct xfs_dir2_leaf_entry); 534 (uint)sizeof(struct xfs_dir2_leaf_entry);
363} 535}
364 536
365/* 537/*
366 * Get address of the bestcount field in the single-leaf block. 538 * Get address of the bestcount field in the single-leaf block.
367 */ 539 */
540static inline struct xfs_dir2_leaf_entry *
541xfs_dir3_leaf_ents_p(struct xfs_dir2_leaf *lp)
542{
543 if (lp->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) ||
544 lp->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) {
545 struct xfs_dir3_leaf *lp3 = (struct xfs_dir3_leaf *)lp;
546 return lp3->__ents;
547 }
548 return lp->__ents;
549}
550
551/*
552 * Get address of the bestcount field in the single-leaf block.
553 */
368static inline struct xfs_dir2_leaf_tail * 554static inline struct xfs_dir2_leaf_tail *
369xfs_dir2_leaf_tail_p(struct xfs_mount *mp, struct xfs_dir2_leaf *lp) 555xfs_dir2_leaf_tail_p(struct xfs_mount *mp, struct xfs_dir2_leaf *lp)
370{ 556{
@@ -383,6 +569,10 @@ xfs_dir2_leaf_bests_p(struct xfs_dir2_leaf_tail *ltp)
383} 569}
384 570
385/* 571/*
572 * DB blocks here are logical directory block numbers, not filesystem blocks.
573 */
574
575/*
386 * Convert dataptr to byte in file space 576 * Convert dataptr to byte in file space
387 */ 577 */
388static inline xfs_dir2_off_t 578static inline xfs_dir2_off_t
@@ -520,19 +710,65 @@ typedef struct xfs_dir2_free {
520 /* unused entries are -1 */ 710 /* unused entries are -1 */
521} xfs_dir2_free_t; 711} xfs_dir2_free_t;
522 712
523static inline int xfs_dir2_free_max_bests(struct xfs_mount *mp) 713struct xfs_dir3_free_hdr {
714 struct xfs_dir3_blk_hdr hdr;
715 __be32 firstdb; /* db of first entry */
716 __be32 nvalid; /* count of valid entries */
717 __be32 nused; /* count of used entries */
718};
719
720struct xfs_dir3_free {
721 struct xfs_dir3_free_hdr hdr;
722 __be16 bests[]; /* best free counts */
723 /* unused entries are -1 */
724};
725
726#define XFS_DIR3_FREE_CRC_OFF offsetof(struct xfs_dir3_free, hdr.hdr.crc)
727
728/*
729 * In core version of the free block header, abstracted away from on-disk format
730 * differences. Use this in the code, and convert to/from the disk version using
731 * xfs_dir3_free_hdr_from_disk/xfs_dir3_free_hdr_to_disk.
732 */
733struct xfs_dir3_icfree_hdr {
734 __uint32_t magic;
735 __uint32_t firstdb;
736 __uint32_t nvalid;
737 __uint32_t nused;
738
739};
740
741void xfs_dir3_free_hdr_from_disk(struct xfs_dir3_icfree_hdr *to,
742 struct xfs_dir2_free *from);
743
744static inline int
745xfs_dir3_free_hdr_size(struct xfs_mount *mp)
524{ 746{
525 return (mp->m_dirblksize - sizeof(struct xfs_dir2_free_hdr)) / 747 if (xfs_sb_version_hascrc(&mp->m_sb))
748 return sizeof(struct xfs_dir3_free_hdr);
749 return sizeof(struct xfs_dir2_free_hdr);
750}
751
752static inline int
753xfs_dir3_free_max_bests(struct xfs_mount *mp)
754{
755 return (mp->m_dirblksize - xfs_dir3_free_hdr_size(mp)) /
526 sizeof(xfs_dir2_data_off_t); 756 sizeof(xfs_dir2_data_off_t);
527} 757}
528 758
759static inline __be16 *
760xfs_dir3_free_bests_p(struct xfs_mount *mp, struct xfs_dir2_free *free)
761{
762 return (__be16 *)((char *)free + xfs_dir3_free_hdr_size(mp));
763}
764
529/* 765/*
530 * Convert data space db to the corresponding free db. 766 * Convert data space db to the corresponding free db.
531 */ 767 */
532static inline xfs_dir2_db_t 768static inline xfs_dir2_db_t
533xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db) 769xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db)
534{ 770{
535 return XFS_DIR2_FREE_FIRSTDB(mp) + db / xfs_dir2_free_max_bests(mp); 771 return XFS_DIR2_FREE_FIRSTDB(mp) + db / xfs_dir3_free_max_bests(mp);
536} 772}
537 773
538/* 774/*
@@ -541,7 +777,7 @@ xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db)
541static inline int 777static inline int
542xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db) 778xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db)
543{ 779{
544 return db % xfs_dir2_free_max_bests(mp); 780 return db % xfs_dir3_free_max_bests(mp);
545} 781}
546 782
547/* 783/*
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index 60cd2fa4e047..721ba2fe8e54 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
3 * Copyright (c) 2013 Red Hat, Inc.
3 * All Rights Reserved. 4 * All Rights Reserved.
4 * 5 *
5 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
@@ -33,97 +34,371 @@
33#include "xfs_dir2_priv.h" 34#include "xfs_dir2_priv.h"
34#include "xfs_error.h" 35#include "xfs_error.h"
35#include "xfs_trace.h" 36#include "xfs_trace.h"
37#include "xfs_buf_item.h"
38#include "xfs_cksum.h"
36 39
37/* 40/*
38 * Local function declarations. 41 * Local function declarations.
39 */ 42 */
40#ifdef DEBUG
41static void xfs_dir2_leaf_check(struct xfs_inode *dp, struct xfs_buf *bp);
42#else
43#define xfs_dir2_leaf_check(dp, bp)
44#endif
45static int xfs_dir2_leaf_lookup_int(xfs_da_args_t *args, struct xfs_buf **lbpp, 43static int xfs_dir2_leaf_lookup_int(xfs_da_args_t *args, struct xfs_buf **lbpp,
46 int *indexp, struct xfs_buf **dbpp); 44 int *indexp, struct xfs_buf **dbpp);
47static void xfs_dir2_leaf_log_bests(struct xfs_trans *tp, struct xfs_buf *bp, 45static void xfs_dir3_leaf_log_bests(struct xfs_trans *tp, struct xfs_buf *bp,
48 int first, int last); 46 int first, int last);
49static void xfs_dir2_leaf_log_tail(struct xfs_trans *tp, struct xfs_buf *bp); 47static void xfs_dir3_leaf_log_tail(struct xfs_trans *tp, struct xfs_buf *bp);
50 48
51static void 49/*
52xfs_dir2_leaf_verify( 50 * Check the internal consistency of a leaf1 block.
51 * Pop an assert if something is wrong.
52 */
53#ifdef DEBUG
54#define xfs_dir3_leaf_check(mp, bp) \
55do { \
56 if (!xfs_dir3_leaf1_check((mp), (bp))) \
57 ASSERT(0); \
58} while (0);
59
60STATIC bool
61xfs_dir3_leaf1_check(
62 struct xfs_mount *mp,
63 struct xfs_buf *bp)
64{
65 struct xfs_dir2_leaf *leaf = bp->b_addr;
66 struct xfs_dir3_icleaf_hdr leafhdr;
67
68 xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
69
70 if (leafhdr.magic == XFS_DIR3_LEAF1_MAGIC) {
71 struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr;
72 if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn)
73 return false;
74 } else if (leafhdr.magic != XFS_DIR2_LEAF1_MAGIC)
75 return false;
76
77 return xfs_dir3_leaf_check_int(mp, &leafhdr, leaf);
78}
79#else
80#define xfs_dir3_leaf_check(mp, bp)
81#endif
82
83void
84xfs_dir3_leaf_hdr_from_disk(
85 struct xfs_dir3_icleaf_hdr *to,
86 struct xfs_dir2_leaf *from)
87{
88 if (from->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
89 from->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)) {
90 to->forw = be32_to_cpu(from->hdr.info.forw);
91 to->back = be32_to_cpu(from->hdr.info.back);
92 to->magic = be16_to_cpu(from->hdr.info.magic);
93 to->count = be16_to_cpu(from->hdr.count);
94 to->stale = be16_to_cpu(from->hdr.stale);
95 } else {
96 struct xfs_dir3_leaf_hdr *hdr3 = (struct xfs_dir3_leaf_hdr *)from;
97
98 to->forw = be32_to_cpu(hdr3->info.hdr.forw);
99 to->back = be32_to_cpu(hdr3->info.hdr.back);
100 to->magic = be16_to_cpu(hdr3->info.hdr.magic);
101 to->count = be16_to_cpu(hdr3->count);
102 to->stale = be16_to_cpu(hdr3->stale);
103 }
104
105 ASSERT(to->magic == XFS_DIR2_LEAF1_MAGIC ||
106 to->magic == XFS_DIR3_LEAF1_MAGIC ||
107 to->magic == XFS_DIR2_LEAFN_MAGIC ||
108 to->magic == XFS_DIR3_LEAFN_MAGIC);
109}
110
111void
112xfs_dir3_leaf_hdr_to_disk(
113 struct xfs_dir2_leaf *to,
114 struct xfs_dir3_icleaf_hdr *from)
115{
116 ASSERT(from->magic == XFS_DIR2_LEAF1_MAGIC ||
117 from->magic == XFS_DIR3_LEAF1_MAGIC ||
118 from->magic == XFS_DIR2_LEAFN_MAGIC ||
119 from->magic == XFS_DIR3_LEAFN_MAGIC);
120
121 if (from->magic == XFS_DIR2_LEAF1_MAGIC ||
122 from->magic == XFS_DIR2_LEAFN_MAGIC) {
123 to->hdr.info.forw = cpu_to_be32(from->forw);
124 to->hdr.info.back = cpu_to_be32(from->back);
125 to->hdr.info.magic = cpu_to_be16(from->magic);
126 to->hdr.count = cpu_to_be16(from->count);
127 to->hdr.stale = cpu_to_be16(from->stale);
128 } else {
129 struct xfs_dir3_leaf_hdr *hdr3 = (struct xfs_dir3_leaf_hdr *)to;
130
131 hdr3->info.hdr.forw = cpu_to_be32(from->forw);
132 hdr3->info.hdr.back = cpu_to_be32(from->back);
133 hdr3->info.hdr.magic = cpu_to_be16(from->magic);
134 hdr3->count = cpu_to_be16(from->count);
135 hdr3->stale = cpu_to_be16(from->stale);
136 }
137}
138
139bool
140xfs_dir3_leaf_check_int(
141 struct xfs_mount *mp,
142 struct xfs_dir3_icleaf_hdr *hdr,
143 struct xfs_dir2_leaf *leaf)
144{
145 struct xfs_dir2_leaf_entry *ents;
146 xfs_dir2_leaf_tail_t *ltp;
147 int stale;
148 int i;
149
150 ents = xfs_dir3_leaf_ents_p(leaf);
151 ltp = xfs_dir2_leaf_tail_p(mp, leaf);
152
153 /*
154 * XXX (dgc): This value is not restrictive enough.
155 * Should factor in the size of the bests table as well.
156 * We can deduce a value for that from di_size.
157 */
158 if (hdr->count > xfs_dir3_max_leaf_ents(mp, leaf))
159 return false;
160
161 /* Leaves and bests don't overlap in leaf format. */
162 if ((hdr->magic == XFS_DIR2_LEAF1_MAGIC ||
163 hdr->magic == XFS_DIR3_LEAF1_MAGIC) &&
164 (char *)&ents[hdr->count] > (char *)xfs_dir2_leaf_bests_p(ltp))
165 return false;
166
167 /* Check hash value order, count stale entries. */
168 for (i = stale = 0; i < hdr->count; i++) {
169 if (i + 1 < hdr->count) {
170 if (be32_to_cpu(ents[i].hashval) >
171 be32_to_cpu(ents[i + 1].hashval))
172 return false;
173 }
174 if (ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
175 stale++;
176 }
177 if (hdr->stale != stale)
178 return false;
179 return true;
180}
181
182static bool
183xfs_dir3_leaf_verify(
53 struct xfs_buf *bp, 184 struct xfs_buf *bp,
54 __be16 magic) 185 __uint16_t magic)
186{
187 struct xfs_mount *mp = bp->b_target->bt_mount;
188 struct xfs_dir2_leaf *leaf = bp->b_addr;
189 struct xfs_dir3_icleaf_hdr leafhdr;
190
191 ASSERT(magic == XFS_DIR2_LEAF1_MAGIC || magic == XFS_DIR2_LEAFN_MAGIC);
192
193 xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
194 if (xfs_sb_version_hascrc(&mp->m_sb)) {
195 struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr;
196
197 if ((magic == XFS_DIR2_LEAF1_MAGIC &&
198 leafhdr.magic != XFS_DIR3_LEAF1_MAGIC) ||
199 (magic == XFS_DIR2_LEAFN_MAGIC &&
200 leafhdr.magic != XFS_DIR3_LEAFN_MAGIC))
201 return false;
202
203 if (!uuid_equal(&leaf3->info.uuid, &mp->m_sb.sb_uuid))
204 return false;
205 if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn)
206 return false;
207 } else {
208 if (leafhdr.magic != magic)
209 return false;
210 }
211 return xfs_dir3_leaf_check_int(mp, &leafhdr, leaf);
212}
213
214static void
215__read_verify(
216 struct xfs_buf *bp,
217 __uint16_t magic)
218{
219 struct xfs_mount *mp = bp->b_target->bt_mount;
220
221 if ((xfs_sb_version_hascrc(&mp->m_sb) &&
222 !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
223 XFS_DIR3_LEAF_CRC_OFF)) ||
224 !xfs_dir3_leaf_verify(bp, magic)) {
225 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
226 xfs_buf_ioerror(bp, EFSCORRUPTED);
227 }
228}
229
230static void
231__write_verify(
232 struct xfs_buf *bp,
233 __uint16_t magic)
55{ 234{
56 struct xfs_mount *mp = bp->b_target->bt_mount; 235 struct xfs_mount *mp = bp->b_target->bt_mount;
57 struct xfs_dir2_leaf_hdr *hdr = bp->b_addr; 236 struct xfs_buf_log_item *bip = bp->b_fspriv;
58 int block_ok = 0; 237 struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr;
59 238
60 block_ok = hdr->info.magic == magic; 239 if (!xfs_dir3_leaf_verify(bp, magic)) {
61 if (!block_ok) { 240 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
62 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr);
63 xfs_buf_ioerror(bp, EFSCORRUPTED); 241 xfs_buf_ioerror(bp, EFSCORRUPTED);
242 return;
64 } 243 }
244
245 if (!xfs_sb_version_hascrc(&mp->m_sb))
246 return;
247
248 if (bip)
249 hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn);
250
251 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_LEAF_CRC_OFF);
65} 252}
66 253
67static void 254static void
68xfs_dir2_leaf1_read_verify( 255xfs_dir3_leaf1_read_verify(
69 struct xfs_buf *bp) 256 struct xfs_buf *bp)
70{ 257{
71 xfs_dir2_leaf_verify(bp, cpu_to_be16(XFS_DIR2_LEAF1_MAGIC)); 258 __read_verify(bp, XFS_DIR2_LEAF1_MAGIC);
72} 259}
73 260
74static void 261static void
75xfs_dir2_leaf1_write_verify( 262xfs_dir3_leaf1_write_verify(
76 struct xfs_buf *bp) 263 struct xfs_buf *bp)
77{ 264{
78 xfs_dir2_leaf_verify(bp, cpu_to_be16(XFS_DIR2_LEAF1_MAGIC)); 265 __write_verify(bp, XFS_DIR2_LEAF1_MAGIC);
79} 266}
80 267
81void 268static void
82xfs_dir2_leafn_read_verify( 269xfs_dir3_leafn_read_verify(
83 struct xfs_buf *bp) 270 struct xfs_buf *bp)
84{ 271{
85 xfs_dir2_leaf_verify(bp, cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); 272 __read_verify(bp, XFS_DIR2_LEAFN_MAGIC);
86} 273}
87 274
88void 275static void
89xfs_dir2_leafn_write_verify( 276xfs_dir3_leafn_write_verify(
90 struct xfs_buf *bp) 277 struct xfs_buf *bp)
91{ 278{
92 xfs_dir2_leaf_verify(bp, cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); 279 __write_verify(bp, XFS_DIR2_LEAFN_MAGIC);
93} 280}
94 281
95static const struct xfs_buf_ops xfs_dir2_leaf1_buf_ops = { 282const struct xfs_buf_ops xfs_dir3_leaf1_buf_ops = {
96 .verify_read = xfs_dir2_leaf1_read_verify, 283 .verify_read = xfs_dir3_leaf1_read_verify,
97 .verify_write = xfs_dir2_leaf1_write_verify, 284 .verify_write = xfs_dir3_leaf1_write_verify,
98}; 285};
99 286
100const struct xfs_buf_ops xfs_dir2_leafn_buf_ops = { 287const struct xfs_buf_ops xfs_dir3_leafn_buf_ops = {
101 .verify_read = xfs_dir2_leafn_read_verify, 288 .verify_read = xfs_dir3_leafn_read_verify,
102 .verify_write = xfs_dir2_leafn_write_verify, 289 .verify_write = xfs_dir3_leafn_write_verify,
103}; 290};
104 291
105static int 292static int
106xfs_dir2_leaf_read( 293xfs_dir3_leaf_read(
107 struct xfs_trans *tp, 294 struct xfs_trans *tp,
108 struct xfs_inode *dp, 295 struct xfs_inode *dp,
109 xfs_dablk_t fbno, 296 xfs_dablk_t fbno,
110 xfs_daddr_t mappedbno, 297 xfs_daddr_t mappedbno,
111 struct xfs_buf **bpp) 298 struct xfs_buf **bpp)
112{ 299{
113 return xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp, 300 int err;
114 XFS_DATA_FORK, &xfs_dir2_leaf1_buf_ops); 301
302 err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp,
303 XFS_DATA_FORK, &xfs_dir3_leaf1_buf_ops);
304 if (!err && tp)
305 xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_LEAF1_BUF);
306 return err;
115} 307}
116 308
117int 309int
118xfs_dir2_leafn_read( 310xfs_dir3_leafn_read(
119 struct xfs_trans *tp, 311 struct xfs_trans *tp,
120 struct xfs_inode *dp, 312 struct xfs_inode *dp,
121 xfs_dablk_t fbno, 313 xfs_dablk_t fbno,
122 xfs_daddr_t mappedbno, 314 xfs_daddr_t mappedbno,
123 struct xfs_buf **bpp) 315 struct xfs_buf **bpp)
124{ 316{
125 return xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp, 317 int err;
126 XFS_DATA_FORK, &xfs_dir2_leafn_buf_ops); 318
319 err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp,
320 XFS_DATA_FORK, &xfs_dir3_leafn_buf_ops);
321 if (!err && tp)
322 xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_LEAFN_BUF);
323 return err;
324}
325
326/*
327 * Initialize a new leaf block, leaf1 or leafn magic accepted.
328 */
329static void
330xfs_dir3_leaf_init(
331 struct xfs_mount *mp,
332 struct xfs_trans *tp,
333 struct xfs_buf *bp,
334 xfs_ino_t owner,
335 __uint16_t type)
336{
337 struct xfs_dir2_leaf *leaf = bp->b_addr;
338
339 ASSERT(type == XFS_DIR2_LEAF1_MAGIC || type == XFS_DIR2_LEAFN_MAGIC);
340
341 if (xfs_sb_version_hascrc(&mp->m_sb)) {
342 struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr;
343
344 memset(leaf3, 0, sizeof(*leaf3));
345
346 leaf3->info.hdr.magic = (type == XFS_DIR2_LEAF1_MAGIC)
347 ? cpu_to_be16(XFS_DIR3_LEAF1_MAGIC)
348 : cpu_to_be16(XFS_DIR3_LEAFN_MAGIC);
349 leaf3->info.blkno = cpu_to_be64(bp->b_bn);
350 leaf3->info.owner = cpu_to_be64(owner);
351 uuid_copy(&leaf3->info.uuid, &mp->m_sb.sb_uuid);
352 } else {
353 memset(leaf, 0, sizeof(*leaf));
354 leaf->hdr.info.magic = cpu_to_be16(type);
355 }
356
357 /*
358 * If it's a leaf-format directory initialize the tail.
359 * Caller is responsible for initialising the bests table.
360 */
361 if (type == XFS_DIR2_LEAF1_MAGIC) {
362 struct xfs_dir2_leaf_tail *ltp;
363
364 ltp = xfs_dir2_leaf_tail_p(mp, leaf);
365 ltp->bestcount = 0;
366 bp->b_ops = &xfs_dir3_leaf1_buf_ops;
367 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_LEAF1_BUF);
368 } else {
369 bp->b_ops = &xfs_dir3_leafn_buf_ops;
370 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_LEAFN_BUF);
371 }
372}
373
374int
375xfs_dir3_leaf_get_buf(
376 xfs_da_args_t *args,
377 xfs_dir2_db_t bno,
378 struct xfs_buf **bpp,
379 __uint16_t magic)
380{
381 struct xfs_inode *dp = args->dp;
382 struct xfs_trans *tp = args->trans;
383 struct xfs_mount *mp = dp->i_mount;
384 struct xfs_buf *bp;
385 int error;
386
387 ASSERT(magic == XFS_DIR2_LEAF1_MAGIC || magic == XFS_DIR2_LEAFN_MAGIC);
388 ASSERT(bno >= XFS_DIR2_LEAF_FIRSTDB(mp) &&
389 bno < XFS_DIR2_FREE_FIRSTDB(mp));
390
391 error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, bno), -1, &bp,
392 XFS_DATA_FORK);
393 if (error)
394 return error;
395
396 xfs_dir3_leaf_init(mp, tp, bp, dp->i_ino, magic);
397 xfs_dir3_leaf_log_header(tp, bp);
398 if (magic == XFS_DIR2_LEAF1_MAGIC)
399 xfs_dir3_leaf_log_tail(tp, bp);
400 *bpp = bp;
401 return 0;
127} 402}
128 403
129/* 404/*
@@ -149,6 +424,9 @@ xfs_dir2_block_to_leaf(
149 int needlog; /* need to log block header */ 424 int needlog; /* need to log block header */
150 int needscan; /* need to rescan bestfree */ 425 int needscan; /* need to rescan bestfree */
151 xfs_trans_t *tp; /* transaction pointer */ 426 xfs_trans_t *tp; /* transaction pointer */
427 struct xfs_dir2_data_free *bf;
428 struct xfs_dir2_leaf_entry *ents;
429 struct xfs_dir3_icleaf_hdr leafhdr;
152 430
153 trace_xfs_dir2_block_to_leaf(args); 431 trace_xfs_dir2_block_to_leaf(args);
154 432
@@ -168,26 +446,33 @@ xfs_dir2_block_to_leaf(
168 /* 446 /*
169 * Initialize the leaf block, get a buffer for it. 447 * Initialize the leaf block, get a buffer for it.
170 */ 448 */
171 if ((error = xfs_dir2_leaf_init(args, ldb, &lbp, XFS_DIR2_LEAF1_MAGIC))) { 449 error = xfs_dir3_leaf_get_buf(args, ldb, &lbp, XFS_DIR2_LEAF1_MAGIC);
450 if (error)
172 return error; 451 return error;
173 } 452
174 ASSERT(lbp != NULL);
175 leaf = lbp->b_addr; 453 leaf = lbp->b_addr;
176 hdr = dbp->b_addr; 454 hdr = dbp->b_addr;
177 xfs_dir2_data_check(dp, dbp); 455 xfs_dir3_data_check(dp, dbp);
178 btp = xfs_dir2_block_tail_p(mp, hdr); 456 btp = xfs_dir2_block_tail_p(mp, hdr);
179 blp = xfs_dir2_block_leaf_p(btp); 457 blp = xfs_dir2_block_leaf_p(btp);
458 bf = xfs_dir3_data_bestfree_p(hdr);
459 ents = xfs_dir3_leaf_ents_p(leaf);
460
180 /* 461 /*
181 * Set the counts in the leaf header. 462 * Set the counts in the leaf header.
182 */ 463 */
183 leaf->hdr.count = cpu_to_be16(be32_to_cpu(btp->count)); 464 xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
184 leaf->hdr.stale = cpu_to_be16(be32_to_cpu(btp->stale)); 465 leafhdr.count = be32_to_cpu(btp->count);
466 leafhdr.stale = be32_to_cpu(btp->stale);
467 xfs_dir3_leaf_hdr_to_disk(leaf, &leafhdr);
468 xfs_dir3_leaf_log_header(tp, lbp);
469
185 /* 470 /*
186 * Could compact these but I think we always do the conversion 471 * Could compact these but I think we always do the conversion
187 * after squeezing out stale entries. 472 * after squeezing out stale entries.
188 */ 473 */
189 memcpy(leaf->ents, blp, be32_to_cpu(btp->count) * sizeof(xfs_dir2_leaf_entry_t)); 474 memcpy(ents, blp, be32_to_cpu(btp->count) * sizeof(xfs_dir2_leaf_entry_t));
190 xfs_dir2_leaf_log_ents(tp, lbp, 0, be16_to_cpu(leaf->hdr.count) - 1); 475 xfs_dir3_leaf_log_ents(tp, lbp, 0, leafhdr.count - 1);
191 needscan = 0; 476 needscan = 0;
192 needlog = 1; 477 needlog = 1;
193 /* 478 /*
@@ -202,8 +487,13 @@ xfs_dir2_block_to_leaf(
202 /* 487 /*
203 * Fix up the block header, make it a data block. 488 * Fix up the block header, make it a data block.
204 */ 489 */
205 dbp->b_ops = &xfs_dir2_data_buf_ops; 490 dbp->b_ops = &xfs_dir3_data_buf_ops;
206 hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC); 491 xfs_trans_buf_set_type(tp, dbp, XFS_BLFT_DIR_DATA_BUF);
492 if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC))
493 hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
494 else
495 hdr->magic = cpu_to_be32(XFS_DIR3_DATA_MAGIC);
496
207 if (needscan) 497 if (needscan)
208 xfs_dir2_data_freescan(mp, hdr, &needlog); 498 xfs_dir2_data_freescan(mp, hdr, &needlog);
209 /* 499 /*
@@ -212,21 +502,22 @@ xfs_dir2_block_to_leaf(
212 ltp = xfs_dir2_leaf_tail_p(mp, leaf); 502 ltp = xfs_dir2_leaf_tail_p(mp, leaf);
213 ltp->bestcount = cpu_to_be32(1); 503 ltp->bestcount = cpu_to_be32(1);
214 bestsp = xfs_dir2_leaf_bests_p(ltp); 504 bestsp = xfs_dir2_leaf_bests_p(ltp);
215 bestsp[0] = hdr->bestfree[0].length; 505 bestsp[0] = bf[0].length;
216 /* 506 /*
217 * Log the data header and leaf bests table. 507 * Log the data header and leaf bests table.
218 */ 508 */
219 if (needlog) 509 if (needlog)
220 xfs_dir2_data_log_header(tp, dbp); 510 xfs_dir2_data_log_header(tp, dbp);
221 xfs_dir2_leaf_check(dp, lbp); 511 xfs_dir3_leaf_check(mp, lbp);
222 xfs_dir2_data_check(dp, dbp); 512 xfs_dir3_data_check(dp, dbp);
223 xfs_dir2_leaf_log_bests(tp, lbp, 0, 0); 513 xfs_dir3_leaf_log_bests(tp, lbp, 0, 0);
224 return 0; 514 return 0;
225} 515}
226 516
227STATIC void 517STATIC void
228xfs_dir2_leaf_find_stale( 518xfs_dir3_leaf_find_stale(
229 struct xfs_dir2_leaf *leaf, 519 struct xfs_dir3_icleaf_hdr *leafhdr,
520 struct xfs_dir2_leaf_entry *ents,
230 int index, 521 int index,
231 int *lowstale, 522 int *lowstale,
232 int *highstale) 523 int *highstale)
@@ -235,7 +526,7 @@ xfs_dir2_leaf_find_stale(
235 * Find the first stale entry before our index, if any. 526 * Find the first stale entry before our index, if any.
236 */ 527 */
237 for (*lowstale = index - 1; *lowstale >= 0; --*lowstale) { 528 for (*lowstale = index - 1; *lowstale >= 0; --*lowstale) {
238 if (leaf->ents[*lowstale].address == 529 if (ents[*lowstale].address ==
239 cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) 530 cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
240 break; 531 break;
241 } 532 }
@@ -245,10 +536,8 @@ xfs_dir2_leaf_find_stale(
245 * Stop if the result would require moving more entries than using 536 * Stop if the result would require moving more entries than using
246 * lowstale. 537 * lowstale.
247 */ 538 */
248 for (*highstale = index; 539 for (*highstale = index; *highstale < leafhdr->count; ++*highstale) {
249 *highstale < be16_to_cpu(leaf->hdr.count); 540 if (ents[*highstale].address ==
250 ++*highstale) {
251 if (leaf->ents[*highstale].address ==
252 cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) 541 cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
253 break; 542 break;
254 if (*lowstale >= 0 && index - *lowstale <= *highstale - index) 543 if (*lowstale >= 0 && index - *lowstale <= *highstale - index)
@@ -257,8 +546,9 @@ xfs_dir2_leaf_find_stale(
257} 546}
258 547
259struct xfs_dir2_leaf_entry * 548struct xfs_dir2_leaf_entry *
260xfs_dir2_leaf_find_entry( 549xfs_dir3_leaf_find_entry(
261 xfs_dir2_leaf_t *leaf, /* leaf structure */ 550 struct xfs_dir3_icleaf_hdr *leafhdr,
551 struct xfs_dir2_leaf_entry *ents,
262 int index, /* leaf table position */ 552 int index, /* leaf table position */
263 int compact, /* need to compact leaves */ 553 int compact, /* need to compact leaves */
264 int lowstale, /* index of prev stale leaf */ 554 int lowstale, /* index of prev stale leaf */
@@ -266,7 +556,7 @@ xfs_dir2_leaf_find_entry(
266 int *lfloglow, /* low leaf logging index */ 556 int *lfloglow, /* low leaf logging index */
267 int *lfloghigh) /* high leaf logging index */ 557 int *lfloghigh) /* high leaf logging index */
268{ 558{
269 if (!leaf->hdr.stale) { 559 if (!leafhdr->stale) {
270 xfs_dir2_leaf_entry_t *lep; /* leaf entry table pointer */ 560 xfs_dir2_leaf_entry_t *lep; /* leaf entry table pointer */
271 561
272 /* 562 /*
@@ -274,18 +564,16 @@ xfs_dir2_leaf_find_entry(
274 * 564 *
275 * If there are no stale entries, just insert a hole at index. 565 * If there are no stale entries, just insert a hole at index.
276 */ 566 */
277 lep = &leaf->ents[index]; 567 lep = &ents[index];
278 if (index < be16_to_cpu(leaf->hdr.count)) 568 if (index < leafhdr->count)
279 memmove(lep + 1, lep, 569 memmove(lep + 1, lep,
280 (be16_to_cpu(leaf->hdr.count) - index) * 570 (leafhdr->count - index) * sizeof(*lep));
281 sizeof(*lep));
282 571
283 /* 572 /*
284 * Record low and high logging indices for the leaf. 573 * Record low and high logging indices for the leaf.
285 */ 574 */
286 *lfloglow = index; 575 *lfloglow = index;
287 *lfloghigh = be16_to_cpu(leaf->hdr.count); 576 *lfloghigh = leafhdr->count++;
288 be16_add_cpu(&leaf->hdr.count, 1);
289 return lep; 577 return lep;
290 } 578 }
291 579
@@ -299,16 +587,17 @@ xfs_dir2_leaf_find_entry(
299 * entries before and after our insertion point. 587 * entries before and after our insertion point.
300 */ 588 */
301 if (compact == 0) 589 if (compact == 0)
302 xfs_dir2_leaf_find_stale(leaf, index, &lowstale, &highstale); 590 xfs_dir3_leaf_find_stale(leafhdr, ents, index,
591 &lowstale, &highstale);
303 592
304 /* 593 /*
305 * If the low one is better, use it. 594 * If the low one is better, use it.
306 */ 595 */
307 if (lowstale >= 0 && 596 if (lowstale >= 0 &&
308 (highstale == be16_to_cpu(leaf->hdr.count) || 597 (highstale == leafhdr->count ||
309 index - lowstale - 1 < highstale - index)) { 598 index - lowstale - 1 < highstale - index)) {
310 ASSERT(index - lowstale - 1 >= 0); 599 ASSERT(index - lowstale - 1 >= 0);
311 ASSERT(leaf->ents[lowstale].address == 600 ASSERT(ents[lowstale].address ==
312 cpu_to_be32(XFS_DIR2_NULL_DATAPTR)); 601 cpu_to_be32(XFS_DIR2_NULL_DATAPTR));
313 602
314 /* 603 /*
@@ -316,37 +605,34 @@ xfs_dir2_leaf_find_entry(
316 * for the new entry. 605 * for the new entry.
317 */ 606 */
318 if (index - lowstale - 1 > 0) { 607 if (index - lowstale - 1 > 0) {
319 memmove(&leaf->ents[lowstale], 608 memmove(&ents[lowstale], &ents[lowstale + 1],
320 &leaf->ents[lowstale + 1],
321 (index - lowstale - 1) * 609 (index - lowstale - 1) *
322 sizeof(xfs_dir2_leaf_entry_t)); 610 sizeof(xfs_dir2_leaf_entry_t));
323 } 611 }
324 *lfloglow = MIN(lowstale, *lfloglow); 612 *lfloglow = MIN(lowstale, *lfloglow);
325 *lfloghigh = MAX(index - 1, *lfloghigh); 613 *lfloghigh = MAX(index - 1, *lfloghigh);
326 be16_add_cpu(&leaf->hdr.stale, -1); 614 leafhdr->stale--;
327 return &leaf->ents[index - 1]; 615 return &ents[index - 1];
328 } 616 }
329 617
330 /* 618 /*
331 * The high one is better, so use that one. 619 * The high one is better, so use that one.
332 */ 620 */
333 ASSERT(highstale - index >= 0); 621 ASSERT(highstale - index >= 0);
334 ASSERT(leaf->ents[highstale].address == 622 ASSERT(ents[highstale].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR));
335 cpu_to_be32(XFS_DIR2_NULL_DATAPTR));
336 623
337 /* 624 /*
338 * Copy entries down to cover the stale entry and make room for the 625 * Copy entries down to cover the stale entry and make room for the
339 * new entry. 626 * new entry.
340 */ 627 */
341 if (highstale - index > 0) { 628 if (highstale - index > 0) {
342 memmove(&leaf->ents[index + 1], 629 memmove(&ents[index + 1], &ents[index],
343 &leaf->ents[index],
344 (highstale - index) * sizeof(xfs_dir2_leaf_entry_t)); 630 (highstale - index) * sizeof(xfs_dir2_leaf_entry_t));
345 } 631 }
346 *lfloglow = MIN(index, *lfloglow); 632 *lfloglow = MIN(index, *lfloglow);
347 *lfloghigh = MAX(highstale, *lfloghigh); 633 *lfloghigh = MAX(highstale, *lfloghigh);
348 be16_add_cpu(&leaf->hdr.stale, -1); 634 leafhdr->stale--;
349 return &leaf->ents[index]; 635 return &ents[index];
350} 636}
351 637
352/* 638/*
@@ -383,6 +669,9 @@ xfs_dir2_leaf_addname(
383 __be16 *tagp; /* end of data entry */ 669 __be16 *tagp; /* end of data entry */
384 xfs_trans_t *tp; /* transaction pointer */ 670 xfs_trans_t *tp; /* transaction pointer */
385 xfs_dir2_db_t use_block; /* data block number */ 671 xfs_dir2_db_t use_block; /* data block number */
672 struct xfs_dir2_data_free *bf; /* bestfree table */
673 struct xfs_dir2_leaf_entry *ents;
674 struct xfs_dir3_icleaf_hdr leafhdr;
386 675
387 trace_xfs_dir2_leaf_addname(args); 676 trace_xfs_dir2_leaf_addname(args);
388 677
@@ -390,7 +679,7 @@ xfs_dir2_leaf_addname(
390 tp = args->trans; 679 tp = args->trans;
391 mp = dp->i_mount; 680 mp = dp->i_mount;
392 681
393 error = xfs_dir2_leaf_read(tp, dp, mp->m_dirleafblk, -1, &lbp); 682 error = xfs_dir3_leaf_read(tp, dp, mp->m_dirleafblk, -1, &lbp);
394 if (error) 683 if (error)
395 return error; 684 return error;
396 685
@@ -403,16 +692,19 @@ xfs_dir2_leaf_addname(
403 index = xfs_dir2_leaf_search_hash(args, lbp); 692 index = xfs_dir2_leaf_search_hash(args, lbp);
404 leaf = lbp->b_addr; 693 leaf = lbp->b_addr;
405 ltp = xfs_dir2_leaf_tail_p(mp, leaf); 694 ltp = xfs_dir2_leaf_tail_p(mp, leaf);
695 ents = xfs_dir3_leaf_ents_p(leaf);
696 xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
406 bestsp = xfs_dir2_leaf_bests_p(ltp); 697 bestsp = xfs_dir2_leaf_bests_p(ltp);
407 length = xfs_dir2_data_entsize(args->namelen); 698 length = xfs_dir2_data_entsize(args->namelen);
699
408 /* 700 /*
409 * See if there are any entries with the same hash value 701 * See if there are any entries with the same hash value
410 * and space in their block for the new entry. 702 * and space in their block for the new entry.
411 * This is good because it puts multiple same-hash value entries 703 * This is good because it puts multiple same-hash value entries
412 * in a data block, improving the lookup of those entries. 704 * in a data block, improving the lookup of those entries.
413 */ 705 */
414 for (use_block = -1, lep = &leaf->ents[index]; 706 for (use_block = -1, lep = &ents[index];
415 index < be16_to_cpu(leaf->hdr.count) && be32_to_cpu(lep->hashval) == args->hashval; 707 index < leafhdr.count && be32_to_cpu(lep->hashval) == args->hashval;
416 index++, lep++) { 708 index++, lep++) {
417 if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR) 709 if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR)
418 continue; 710 continue;
@@ -445,7 +737,7 @@ xfs_dir2_leaf_addname(
445 * How many bytes do we need in the leaf block? 737 * How many bytes do we need in the leaf block?
446 */ 738 */
447 needbytes = 0; 739 needbytes = 0;
448 if (!leaf->hdr.stale) 740 if (!leafhdr.stale)
449 needbytes += sizeof(xfs_dir2_leaf_entry_t); 741 needbytes += sizeof(xfs_dir2_leaf_entry_t);
450 if (use_block == -1) 742 if (use_block == -1)
451 needbytes += sizeof(xfs_dir2_data_off_t); 743 needbytes += sizeof(xfs_dir2_data_off_t);
@@ -460,16 +752,15 @@ xfs_dir2_leaf_addname(
460 * If we don't have enough free bytes but we can make enough 752 * If we don't have enough free bytes but we can make enough
461 * by compacting out stale entries, we'll do that. 753 * by compacting out stale entries, we'll do that.
462 */ 754 */
463 if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] < 755 if ((char *)bestsp - (char *)&ents[leafhdr.count] < needbytes &&
464 needbytes && be16_to_cpu(leaf->hdr.stale) > 1) { 756 leafhdr.stale > 1)
465 compact = 1; 757 compact = 1;
466 } 758
467 /* 759 /*
468 * Otherwise if we don't have enough free bytes we need to 760 * Otherwise if we don't have enough free bytes we need to
469 * convert to node form. 761 * convert to node form.
470 */ 762 */
471 else if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu( 763 else if ((char *)bestsp - (char *)&ents[leafhdr.count] < needbytes) {
472 leaf->hdr.count)] < needbytes) {
473 /* 764 /*
474 * Just checking or no space reservation, give up. 765 * Just checking or no space reservation, give up.
475 */ 766 */
@@ -517,15 +808,15 @@ xfs_dir2_leaf_addname(
517 * point later. 808 * point later.
518 */ 809 */
519 if (compact) { 810 if (compact) {
520 xfs_dir2_leaf_compact_x1(lbp, &index, &lowstale, &highstale, 811 xfs_dir3_leaf_compact_x1(&leafhdr, ents, &index, &lowstale,
521 &lfloglow, &lfloghigh); 812 &highstale, &lfloglow, &lfloghigh);
522 } 813 }
523 /* 814 /*
524 * There are stale entries, so we'll need log-low and log-high 815 * There are stale entries, so we'll need log-low and log-high
525 * impossibly bad values later. 816 * impossibly bad values later.
526 */ 817 */
527 else if (be16_to_cpu(leaf->hdr.stale)) { 818 else if (leafhdr.stale) {
528 lfloglow = be16_to_cpu(leaf->hdr.count); 819 lfloglow = leafhdr.count;
529 lfloghigh = -1; 820 lfloghigh = -1;
530 } 821 }
531 /* 822 /*
@@ -544,7 +835,7 @@ xfs_dir2_leaf_addname(
544 /* 835 /*
545 * Initialize the block. 836 * Initialize the block.
546 */ 837 */
547 if ((error = xfs_dir2_data_init(args, use_block, &dbp))) { 838 if ((error = xfs_dir3_data_init(args, use_block, &dbp))) {
548 xfs_trans_brelse(tp, lbp); 839 xfs_trans_brelse(tp, lbp);
549 return error; 840 return error;
550 } 841 }
@@ -557,23 +848,24 @@ xfs_dir2_leaf_addname(
557 memmove(&bestsp[0], &bestsp[1], 848 memmove(&bestsp[0], &bestsp[1],
558 be32_to_cpu(ltp->bestcount) * sizeof(bestsp[0])); 849 be32_to_cpu(ltp->bestcount) * sizeof(bestsp[0]));
559 be32_add_cpu(&ltp->bestcount, 1); 850 be32_add_cpu(&ltp->bestcount, 1);
560 xfs_dir2_leaf_log_tail(tp, lbp); 851 xfs_dir3_leaf_log_tail(tp, lbp);
561 xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1); 852 xfs_dir3_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
562 } 853 }
563 /* 854 /*
564 * If we're filling in a previously empty block just log it. 855 * If we're filling in a previously empty block just log it.
565 */ 856 */
566 else 857 else
567 xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block); 858 xfs_dir3_leaf_log_bests(tp, lbp, use_block, use_block);
568 hdr = dbp->b_addr; 859 hdr = dbp->b_addr;
569 bestsp[use_block] = hdr->bestfree[0].length; 860 bf = xfs_dir3_data_bestfree_p(hdr);
861 bestsp[use_block] = bf[0].length;
570 grown = 1; 862 grown = 1;
571 } else { 863 } else {
572 /* 864 /*
573 * Already had space in some data block. 865 * Already had space in some data block.
574 * Just read that one in. 866 * Just read that one in.
575 */ 867 */
576 error = xfs_dir2_data_read(tp, dp, 868 error = xfs_dir3_data_read(tp, dp,
577 xfs_dir2_db_to_da(mp, use_block), 869 xfs_dir2_db_to_da(mp, use_block),
578 -1, &dbp); 870 -1, &dbp);
579 if (error) { 871 if (error) {
@@ -581,13 +873,14 @@ xfs_dir2_leaf_addname(
581 return error; 873 return error;
582 } 874 }
583 hdr = dbp->b_addr; 875 hdr = dbp->b_addr;
876 bf = xfs_dir3_data_bestfree_p(hdr);
584 grown = 0; 877 grown = 0;
585 } 878 }
586 /* 879 /*
587 * Point to the biggest freespace in our data block. 880 * Point to the biggest freespace in our data block.
588 */ 881 */
589 dup = (xfs_dir2_data_unused_t *) 882 dup = (xfs_dir2_data_unused_t *)
590 ((char *)hdr + be16_to_cpu(hdr->bestfree[0].offset)); 883 ((char *)hdr + be16_to_cpu(bf[0].offset));
591 ASSERT(be16_to_cpu(dup->length) >= length); 884 ASSERT(be16_to_cpu(dup->length) >= length);
592 needscan = needlog = 0; 885 needscan = needlog = 0;
593 /* 886 /*
@@ -620,13 +913,13 @@ xfs_dir2_leaf_addname(
620 * If the bests table needs to be changed, do it. 913 * If the bests table needs to be changed, do it.
621 * Log the change unless we've already done that. 914 * Log the change unless we've already done that.
622 */ 915 */
623 if (be16_to_cpu(bestsp[use_block]) != be16_to_cpu(hdr->bestfree[0].length)) { 916 if (be16_to_cpu(bestsp[use_block]) != be16_to_cpu(bf[0].length)) {
624 bestsp[use_block] = hdr->bestfree[0].length; 917 bestsp[use_block] = bf[0].length;
625 if (!grown) 918 if (!grown)
626 xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block); 919 xfs_dir3_leaf_log_bests(tp, lbp, use_block, use_block);
627 } 920 }
628 921
629 lep = xfs_dir2_leaf_find_entry(leaf, index, compact, lowstale, 922 lep = xfs_dir3_leaf_find_entry(&leafhdr, ents, index, compact, lowstale,
630 highstale, &lfloglow, &lfloghigh); 923 highstale, &lfloglow, &lfloghigh);
631 924
632 /* 925 /*
@@ -638,82 +931,40 @@ xfs_dir2_leaf_addname(
638 /* 931 /*
639 * Log the leaf fields and give up the buffers. 932 * Log the leaf fields and give up the buffers.
640 */ 933 */
641 xfs_dir2_leaf_log_header(tp, lbp); 934 xfs_dir3_leaf_hdr_to_disk(leaf, &leafhdr);
642 xfs_dir2_leaf_log_ents(tp, lbp, lfloglow, lfloghigh); 935 xfs_dir3_leaf_log_header(tp, lbp);
643 xfs_dir2_leaf_check(dp, lbp); 936 xfs_dir3_leaf_log_ents(tp, lbp, lfloglow, lfloghigh);
644 xfs_dir2_data_check(dp, dbp); 937 xfs_dir3_leaf_check(mp, lbp);
938 xfs_dir3_data_check(dp, dbp);
645 return 0; 939 return 0;
646} 940}
647 941
648#ifdef DEBUG
649/*
650 * Check the internal consistency of a leaf1 block.
651 * Pop an assert if something is wrong.
652 */
653STATIC void
654xfs_dir2_leaf_check(
655 struct xfs_inode *dp, /* incore directory inode */
656 struct xfs_buf *bp) /* leaf's buffer */
657{
658 int i; /* leaf index */
659 xfs_dir2_leaf_t *leaf; /* leaf structure */
660 xfs_dir2_leaf_tail_t *ltp; /* leaf tail pointer */
661 xfs_mount_t *mp; /* filesystem mount point */
662 int stale; /* count of stale leaves */
663
664 leaf = bp->b_addr;
665 mp = dp->i_mount;
666 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
667 /*
668 * This value is not restrictive enough.
669 * Should factor in the size of the bests table as well.
670 * We can deduce a value for that from di_size.
671 */
672 ASSERT(be16_to_cpu(leaf->hdr.count) <= xfs_dir2_max_leaf_ents(mp));
673 ltp = xfs_dir2_leaf_tail_p(mp, leaf);
674 /*
675 * Leaves and bests don't overlap.
676 */
677 ASSERT((char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] <=
678 (char *)xfs_dir2_leaf_bests_p(ltp));
679 /*
680 * Check hash value order, count stale entries.
681 */
682 for (i = stale = 0; i < be16_to_cpu(leaf->hdr.count); i++) {
683 if (i + 1 < be16_to_cpu(leaf->hdr.count))
684 ASSERT(be32_to_cpu(leaf->ents[i].hashval) <=
685 be32_to_cpu(leaf->ents[i + 1].hashval));
686 if (leaf->ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
687 stale++;
688 }
689 ASSERT(be16_to_cpu(leaf->hdr.stale) == stale);
690}
691#endif /* DEBUG */
692
693/* 942/*
694 * Compact out any stale entries in the leaf. 943 * Compact out any stale entries in the leaf.
695 * Log the header and changed leaf entries, if any. 944 * Log the header and changed leaf entries, if any.
696 */ 945 */
697void 946void
698xfs_dir2_leaf_compact( 947xfs_dir3_leaf_compact(
699 xfs_da_args_t *args, /* operation arguments */ 948 xfs_da_args_t *args, /* operation arguments */
949 struct xfs_dir3_icleaf_hdr *leafhdr,
700 struct xfs_buf *bp) /* leaf buffer */ 950 struct xfs_buf *bp) /* leaf buffer */
701{ 951{
702 int from; /* source leaf index */ 952 int from; /* source leaf index */
703 xfs_dir2_leaf_t *leaf; /* leaf structure */ 953 xfs_dir2_leaf_t *leaf; /* leaf structure */
704 int loglow; /* first leaf entry to log */ 954 int loglow; /* first leaf entry to log */
705 int to; /* target leaf index */ 955 int to; /* target leaf index */
956 struct xfs_dir2_leaf_entry *ents;
706 957
707 leaf = bp->b_addr; 958 leaf = bp->b_addr;
708 if (!leaf->hdr.stale) { 959 if (!leafhdr->stale)
709 return; 960 return;
710 } 961
711 /* 962 /*
712 * Compress out the stale entries in place. 963 * Compress out the stale entries in place.
713 */ 964 */
714 for (from = to = 0, loglow = -1; from < be16_to_cpu(leaf->hdr.count); from++) { 965 ents = xfs_dir3_leaf_ents_p(leaf);
715 if (leaf->ents[from].address == 966 for (from = to = 0, loglow = -1; from < leafhdr->count; from++) {
716 cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) 967 if (ents[from].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
717 continue; 968 continue;
718 /* 969 /*
719 * Only actually copy the entries that are different. 970 * Only actually copy the entries that are different.
@@ -721,19 +972,21 @@ xfs_dir2_leaf_compact(
721 if (from > to) { 972 if (from > to) {
722 if (loglow == -1) 973 if (loglow == -1)
723 loglow = to; 974 loglow = to;
724 leaf->ents[to] = leaf->ents[from]; 975 ents[to] = ents[from];
725 } 976 }
726 to++; 977 to++;
727 } 978 }
728 /* 979 /*
729 * Update and log the header, log the leaf entries. 980 * Update and log the header, log the leaf entries.
730 */ 981 */
731 ASSERT(be16_to_cpu(leaf->hdr.stale) == from - to); 982 ASSERT(leafhdr->stale == from - to);
732 be16_add_cpu(&leaf->hdr.count, -(be16_to_cpu(leaf->hdr.stale))); 983 leafhdr->count -= leafhdr->stale;
733 leaf->hdr.stale = 0; 984 leafhdr->stale = 0;
734 xfs_dir2_leaf_log_header(args->trans, bp); 985
986 xfs_dir3_leaf_hdr_to_disk(leaf, leafhdr);
987 xfs_dir3_leaf_log_header(args->trans, bp);
735 if (loglow != -1) 988 if (loglow != -1)
736 xfs_dir2_leaf_log_ents(args->trans, bp, loglow, to - 1); 989 xfs_dir3_leaf_log_ents(args->trans, bp, loglow, to - 1);
737} 990}
738 991
739/* 992/*
@@ -745,8 +998,9 @@ xfs_dir2_leaf_compact(
745 * and leaf logging indices. 998 * and leaf logging indices.
746 */ 999 */
747void 1000void
748xfs_dir2_leaf_compact_x1( 1001xfs_dir3_leaf_compact_x1(
749 struct xfs_buf *bp, /* leaf buffer */ 1002 struct xfs_dir3_icleaf_hdr *leafhdr,
1003 struct xfs_dir2_leaf_entry *ents,
750 int *indexp, /* insertion index */ 1004 int *indexp, /* insertion index */
751 int *lowstalep, /* out: stale entry before us */ 1005 int *lowstalep, /* out: stale entry before us */
752 int *highstalep, /* out: stale entry after us */ 1006 int *highstalep, /* out: stale entry after us */
@@ -757,22 +1011,20 @@ xfs_dir2_leaf_compact_x1(
757 int highstale; /* stale entry at/after index */ 1011 int highstale; /* stale entry at/after index */
758 int index; /* insertion index */ 1012 int index; /* insertion index */
759 int keepstale; /* source index of kept stale */ 1013 int keepstale; /* source index of kept stale */
760 xfs_dir2_leaf_t *leaf; /* leaf structure */
761 int lowstale; /* stale entry before index */ 1014 int lowstale; /* stale entry before index */
762 int newindex=0; /* new insertion index */ 1015 int newindex=0; /* new insertion index */
763 int to; /* destination copy index */ 1016 int to; /* destination copy index */
764 1017
765 leaf = bp->b_addr; 1018 ASSERT(leafhdr->stale > 1);
766 ASSERT(be16_to_cpu(leaf->hdr.stale) > 1);
767 index = *indexp; 1019 index = *indexp;
768 1020
769 xfs_dir2_leaf_find_stale(leaf, index, &lowstale, &highstale); 1021 xfs_dir3_leaf_find_stale(leafhdr, ents, index, &lowstale, &highstale);
770 1022
771 /* 1023 /*
772 * Pick the better of lowstale and highstale. 1024 * Pick the better of lowstale and highstale.
773 */ 1025 */
774 if (lowstale >= 0 && 1026 if (lowstale >= 0 &&
775 (highstale == be16_to_cpu(leaf->hdr.count) || 1027 (highstale == leafhdr->count ||
776 index - lowstale <= highstale - index)) 1028 index - lowstale <= highstale - index))
777 keepstale = lowstale; 1029 keepstale = lowstale;
778 else 1030 else
@@ -781,15 +1033,14 @@ xfs_dir2_leaf_compact_x1(
781 * Copy the entries in place, removing all the stale entries 1033 * Copy the entries in place, removing all the stale entries
782 * except keepstale. 1034 * except keepstale.
783 */ 1035 */
784 for (from = to = 0; from < be16_to_cpu(leaf->hdr.count); from++) { 1036 for (from = to = 0; from < leafhdr->count; from++) {
785 /* 1037 /*
786 * Notice the new value of index. 1038 * Notice the new value of index.
787 */ 1039 */
788 if (index == from) 1040 if (index == from)
789 newindex = to; 1041 newindex = to;
790 if (from != keepstale && 1042 if (from != keepstale &&
791 leaf->ents[from].address == 1043 ents[from].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) {
792 cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) {
793 if (from == to) 1044 if (from == to)
794 *lowlogp = to; 1045 *lowlogp = to;
795 continue; 1046 continue;
@@ -803,7 +1054,7 @@ xfs_dir2_leaf_compact_x1(
803 * Copy only the entries that have moved. 1054 * Copy only the entries that have moved.
804 */ 1055 */
805 if (from > to) 1056 if (from > to)
806 leaf->ents[to] = leaf->ents[from]; 1057 ents[to] = ents[from];
807 to++; 1058 to++;
808 } 1059 }
809 ASSERT(from > to); 1060 ASSERT(from > to);
@@ -817,8 +1068,8 @@ xfs_dir2_leaf_compact_x1(
817 /* 1068 /*
818 * Adjust the leaf header values. 1069 * Adjust the leaf header values.
819 */ 1070 */
820 be16_add_cpu(&leaf->hdr.count, -(from - to)); 1071 leafhdr->count -= from - to;
821 leaf->hdr.stale = cpu_to_be16(1); 1072 leafhdr->stale = 1;
822 /* 1073 /*
823 * Remember the low/high stale value only in the "right" 1074 * Remember the low/high stale value only in the "right"
824 * direction. 1075 * direction.
@@ -826,8 +1077,8 @@ xfs_dir2_leaf_compact_x1(
826 if (lowstale >= newindex) 1077 if (lowstale >= newindex)
827 lowstale = -1; 1078 lowstale = -1;
828 else 1079 else
829 highstale = be16_to_cpu(leaf->hdr.count); 1080 highstale = leafhdr->count;
830 *highlogp = be16_to_cpu(leaf->hdr.count) - 1; 1081 *highlogp = leafhdr->count - 1;
831 *lowstalep = lowstale; 1082 *lowstalep = lowstale;
832 *highstalep = highstale; 1083 *highstalep = highstale;
833} 1084}
@@ -965,7 +1216,7 @@ xfs_dir2_leaf_readbuf(
965 * Read the directory block starting at the first mapping. 1216 * Read the directory block starting at the first mapping.
966 */ 1217 */
967 mip->curdb = xfs_dir2_da_to_db(mp, map->br_startoff); 1218 mip->curdb = xfs_dir2_da_to_db(mp, map->br_startoff);
968 error = xfs_dir2_data_read(NULL, dp, map->br_startoff, 1219 error = xfs_dir3_data_read(NULL, dp, map->br_startoff,
969 map->br_blockcount >= mp->m_dirblkfsbs ? 1220 map->br_blockcount >= mp->m_dirblkfsbs ?
970 XFS_FSB_TO_DADDR(mp, map->br_startblock) : -1, &bp); 1221 XFS_FSB_TO_DADDR(mp, map->br_startblock) : -1, &bp);
971 1222
@@ -994,7 +1245,7 @@ xfs_dir2_leaf_readbuf(
994 */ 1245 */
995 if (i > mip->ra_current && 1246 if (i > mip->ra_current &&
996 map[mip->ra_index].br_blockcount >= mp->m_dirblkfsbs) { 1247 map[mip->ra_index].br_blockcount >= mp->m_dirblkfsbs) {
997 xfs_dir2_data_readahead(NULL, dp, 1248 xfs_dir3_data_readahead(NULL, dp,
998 map[mip->ra_index].br_startoff + mip->ra_offset, 1249 map[mip->ra_index].br_startoff + mip->ra_offset,
999 XFS_FSB_TO_DADDR(mp, 1250 XFS_FSB_TO_DADDR(mp,
1000 map[mip->ra_index].br_startblock + 1251 map[mip->ra_index].br_startblock +
@@ -1007,7 +1258,7 @@ xfs_dir2_leaf_readbuf(
1007 * use our mapping, but this is a very rare case. 1258 * use our mapping, but this is a very rare case.
1008 */ 1259 */
1009 else if (i > mip->ra_current) { 1260 else if (i > mip->ra_current) {
1010 xfs_dir2_data_readahead(NULL, dp, 1261 xfs_dir3_data_readahead(NULL, dp,
1011 map[mip->ra_index].br_startoff + 1262 map[mip->ra_index].br_startoff +
1012 mip->ra_offset, -1); 1263 mip->ra_offset, -1);
1013 mip->ra_current = i; 1264 mip->ra_current = i;
@@ -1133,17 +1384,17 @@ xfs_dir2_leaf_getdents(
1133 ASSERT(xfs_dir2_byte_to_db(mp, curoff) == 1384 ASSERT(xfs_dir2_byte_to_db(mp, curoff) ==
1134 map_info->curdb); 1385 map_info->curdb);
1135 hdr = bp->b_addr; 1386 hdr = bp->b_addr;
1136 xfs_dir2_data_check(dp, bp); 1387 xfs_dir3_data_check(dp, bp);
1137 /* 1388 /*
1138 * Find our position in the block. 1389 * Find our position in the block.
1139 */ 1390 */
1140 ptr = (char *)(hdr + 1); 1391 ptr = (char *)xfs_dir3_data_entry_p(hdr);
1141 byteoff = xfs_dir2_byte_to_off(mp, curoff); 1392 byteoff = xfs_dir2_byte_to_off(mp, curoff);
1142 /* 1393 /*
1143 * Skip past the header. 1394 * Skip past the header.
1144 */ 1395 */
1145 if (byteoff == 0) 1396 if (byteoff == 0)
1146 curoff += (uint)sizeof(*hdr); 1397 curoff += xfs_dir3_data_entry_offset(hdr);
1147 /* 1398 /*
1148 * Skip past entries until we reach our offset. 1399 * Skip past entries until we reach our offset.
1149 */ 1400 */
@@ -1220,69 +1471,12 @@ xfs_dir2_leaf_getdents(
1220 return error; 1471 return error;
1221} 1472}
1222 1473
1223/*
1224 * Initialize a new leaf block, leaf1 or leafn magic accepted.
1225 */
1226int
1227xfs_dir2_leaf_init(
1228 xfs_da_args_t *args, /* operation arguments */
1229 xfs_dir2_db_t bno, /* directory block number */
1230 struct xfs_buf **bpp, /* out: leaf buffer */
1231 int magic) /* magic number for block */
1232{
1233 struct xfs_buf *bp; /* leaf buffer */
1234 xfs_inode_t *dp; /* incore directory inode */
1235 int error; /* error return code */
1236 xfs_dir2_leaf_t *leaf; /* leaf structure */
1237 xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */
1238 xfs_mount_t *mp; /* filesystem mount point */
1239 xfs_trans_t *tp; /* transaction pointer */
1240
1241 dp = args->dp;
1242 ASSERT(dp != NULL);
1243 tp = args->trans;
1244 mp = dp->i_mount;
1245 ASSERT(bno >= XFS_DIR2_LEAF_FIRSTDB(mp) &&
1246 bno < XFS_DIR2_FREE_FIRSTDB(mp));
1247 /*
1248 * Get the buffer for the block.
1249 */
1250 error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, bno), -1, &bp,
1251 XFS_DATA_FORK);
1252 if (error)
1253 return error;
1254
1255 /*
1256 * Initialize the header.
1257 */
1258 leaf = bp->b_addr;
1259 leaf->hdr.info.magic = cpu_to_be16(magic);
1260 leaf->hdr.info.forw = 0;
1261 leaf->hdr.info.back = 0;
1262 leaf->hdr.count = 0;
1263 leaf->hdr.stale = 0;
1264 xfs_dir2_leaf_log_header(tp, bp);
1265 /*
1266 * If it's a leaf-format directory initialize the tail.
1267 * In this case our caller has the real bests table to copy into
1268 * the block.
1269 */
1270 if (magic == XFS_DIR2_LEAF1_MAGIC) {
1271 bp->b_ops = &xfs_dir2_leaf1_buf_ops;
1272 ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1273 ltp->bestcount = 0;
1274 xfs_dir2_leaf_log_tail(tp, bp);
1275 } else
1276 bp->b_ops = &xfs_dir2_leafn_buf_ops;
1277 *bpp = bp;
1278 return 0;
1279}
1280 1474
1281/* 1475/*
1282 * Log the bests entries indicated from a leaf1 block. 1476 * Log the bests entries indicated from a leaf1 block.
1283 */ 1477 */
1284static void 1478static void
1285xfs_dir2_leaf_log_bests( 1479xfs_dir3_leaf_log_bests(
1286 xfs_trans_t *tp, /* transaction pointer */ 1480 xfs_trans_t *tp, /* transaction pointer */
1287 struct xfs_buf *bp, /* leaf buffer */ 1481 struct xfs_buf *bp, /* leaf buffer */
1288 int first, /* first entry to log */ 1482 int first, /* first entry to log */
@@ -1290,11 +1484,12 @@ xfs_dir2_leaf_log_bests(
1290{ 1484{
1291 __be16 *firstb; /* pointer to first entry */ 1485 __be16 *firstb; /* pointer to first entry */
1292 __be16 *lastb; /* pointer to last entry */ 1486 __be16 *lastb; /* pointer to last entry */
1293 xfs_dir2_leaf_t *leaf; /* leaf structure */ 1487 struct xfs_dir2_leaf *leaf = bp->b_addr;
1294 xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ 1488 xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */
1295 1489
1296 leaf = bp->b_addr; 1490 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
1297 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC)); 1491 leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC));
1492
1298 ltp = xfs_dir2_leaf_tail_p(tp->t_mountp, leaf); 1493 ltp = xfs_dir2_leaf_tail_p(tp->t_mountp, leaf);
1299 firstb = xfs_dir2_leaf_bests_p(ltp) + first; 1494 firstb = xfs_dir2_leaf_bests_p(ltp) + first;
1300 lastb = xfs_dir2_leaf_bests_p(ltp) + last; 1495 lastb = xfs_dir2_leaf_bests_p(ltp) + last;
@@ -1306,7 +1501,7 @@ xfs_dir2_leaf_log_bests(
1306 * Log the leaf entries indicated from a leaf1 or leafn block. 1501 * Log the leaf entries indicated from a leaf1 or leafn block.
1307 */ 1502 */
1308void 1503void
1309xfs_dir2_leaf_log_ents( 1504xfs_dir3_leaf_log_ents(
1310 xfs_trans_t *tp, /* transaction pointer */ 1505 xfs_trans_t *tp, /* transaction pointer */
1311 struct xfs_buf *bp, /* leaf buffer */ 1506 struct xfs_buf *bp, /* leaf buffer */
1312 int first, /* first entry to log */ 1507 int first, /* first entry to log */
@@ -1314,13 +1509,17 @@ xfs_dir2_leaf_log_ents(
1314{ 1509{
1315 xfs_dir2_leaf_entry_t *firstlep; /* pointer to first entry */ 1510 xfs_dir2_leaf_entry_t *firstlep; /* pointer to first entry */
1316 xfs_dir2_leaf_entry_t *lastlep; /* pointer to last entry */ 1511 xfs_dir2_leaf_entry_t *lastlep; /* pointer to last entry */
1317 xfs_dir2_leaf_t *leaf; /* leaf structure */ 1512 struct xfs_dir2_leaf *leaf = bp->b_addr;
1513 struct xfs_dir2_leaf_entry *ents;
1318 1514
1319 leaf = bp->b_addr;
1320 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) || 1515 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
1321 leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); 1516 leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) ||
1322 firstlep = &leaf->ents[first]; 1517 leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
1323 lastlep = &leaf->ents[last]; 1518 leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC));
1519
1520 ents = xfs_dir3_leaf_ents_p(leaf);
1521 firstlep = &ents[first];
1522 lastlep = &ents[last];
1324 xfs_trans_log_buf(tp, bp, (uint)((char *)firstlep - (char *)leaf), 1523 xfs_trans_log_buf(tp, bp, (uint)((char *)firstlep - (char *)leaf),
1325 (uint)((char *)lastlep - (char *)leaf + sizeof(*lastlep) - 1)); 1524 (uint)((char *)lastlep - (char *)leaf + sizeof(*lastlep) - 1));
1326} 1525}
@@ -1329,34 +1528,38 @@ xfs_dir2_leaf_log_ents(
1329 * Log the header of the leaf1 or leafn block. 1528 * Log the header of the leaf1 or leafn block.
1330 */ 1529 */
1331void 1530void
1332xfs_dir2_leaf_log_header( 1531xfs_dir3_leaf_log_header(
1333 struct xfs_trans *tp, 1532 struct xfs_trans *tp,
1334 struct xfs_buf *bp) 1533 struct xfs_buf *bp)
1335{ 1534{
1336 xfs_dir2_leaf_t *leaf; /* leaf structure */ 1535 struct xfs_dir2_leaf *leaf = bp->b_addr;
1337 1536
1338 leaf = bp->b_addr;
1339 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) || 1537 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
1340 leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); 1538 leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) ||
1539 leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
1540 leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC));
1541
1341 xfs_trans_log_buf(tp, bp, (uint)((char *)&leaf->hdr - (char *)leaf), 1542 xfs_trans_log_buf(tp, bp, (uint)((char *)&leaf->hdr - (char *)leaf),
1342 (uint)(sizeof(leaf->hdr) - 1)); 1543 xfs_dir3_leaf_hdr_size(leaf) - 1);
1343} 1544}
1344 1545
1345/* 1546/*
1346 * Log the tail of the leaf1 block. 1547 * Log the tail of the leaf1 block.
1347 */ 1548 */
1348STATIC void 1549STATIC void
1349xfs_dir2_leaf_log_tail( 1550xfs_dir3_leaf_log_tail(
1350 struct xfs_trans *tp, 1551 struct xfs_trans *tp,
1351 struct xfs_buf *bp) 1552 struct xfs_buf *bp)
1352{ 1553{
1353 xfs_dir2_leaf_t *leaf; /* leaf structure */ 1554 struct xfs_dir2_leaf *leaf = bp->b_addr;
1354 xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ 1555 xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */
1355 xfs_mount_t *mp; /* filesystem mount point */ 1556 struct xfs_mount *mp = tp->t_mountp;
1557
1558 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
1559 leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) ||
1560 leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
1561 leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC));
1356 1562
1357 mp = tp->t_mountp;
1358 leaf = bp->b_addr;
1359 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
1360 ltp = xfs_dir2_leaf_tail_p(mp, leaf); 1563 ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1361 xfs_trans_log_buf(tp, bp, (uint)((char *)ltp - (char *)leaf), 1564 xfs_trans_log_buf(tp, bp, (uint)((char *)ltp - (char *)leaf),
1362 (uint)(mp->m_dirblksize - 1)); 1565 (uint)(mp->m_dirblksize - 1));
@@ -1380,6 +1583,7 @@ xfs_dir2_leaf_lookup(
1380 xfs_dir2_leaf_t *leaf; /* leaf structure */ 1583 xfs_dir2_leaf_t *leaf; /* leaf structure */
1381 xfs_dir2_leaf_entry_t *lep; /* leaf entry */ 1584 xfs_dir2_leaf_entry_t *lep; /* leaf entry */
1382 xfs_trans_t *tp; /* transaction pointer */ 1585 xfs_trans_t *tp; /* transaction pointer */
1586 struct xfs_dir2_leaf_entry *ents;
1383 1587
1384 trace_xfs_dir2_leaf_lookup(args); 1588 trace_xfs_dir2_leaf_lookup(args);
1385 1589
@@ -1391,12 +1595,14 @@ xfs_dir2_leaf_lookup(
1391 } 1595 }
1392 tp = args->trans; 1596 tp = args->trans;
1393 dp = args->dp; 1597 dp = args->dp;
1394 xfs_dir2_leaf_check(dp, lbp); 1598 xfs_dir3_leaf_check(dp->i_mount, lbp);
1395 leaf = lbp->b_addr; 1599 leaf = lbp->b_addr;
1600 ents = xfs_dir3_leaf_ents_p(leaf);
1396 /* 1601 /*
1397 * Get to the leaf entry and contained data entry address. 1602 * Get to the leaf entry and contained data entry address.
1398 */ 1603 */
1399 lep = &leaf->ents[index]; 1604 lep = &ents[index];
1605
1400 /* 1606 /*
1401 * Point to the data entry. 1607 * Point to the data entry.
1402 */ 1608 */
@@ -1440,18 +1646,23 @@ xfs_dir2_leaf_lookup_int(
1440 xfs_trans_t *tp; /* transaction pointer */ 1646 xfs_trans_t *tp; /* transaction pointer */
1441 xfs_dir2_db_t cidb = -1; /* case match data block no. */ 1647 xfs_dir2_db_t cidb = -1; /* case match data block no. */
1442 enum xfs_dacmp cmp; /* name compare result */ 1648 enum xfs_dacmp cmp; /* name compare result */
1649 struct xfs_dir2_leaf_entry *ents;
1650 struct xfs_dir3_icleaf_hdr leafhdr;
1443 1651
1444 dp = args->dp; 1652 dp = args->dp;
1445 tp = args->trans; 1653 tp = args->trans;
1446 mp = dp->i_mount; 1654 mp = dp->i_mount;
1447 1655
1448 error = xfs_dir2_leaf_read(tp, dp, mp->m_dirleafblk, -1, &lbp); 1656 error = xfs_dir3_leaf_read(tp, dp, mp->m_dirleafblk, -1, &lbp);
1449 if (error) 1657 if (error)
1450 return error; 1658 return error;
1451 1659
1452 *lbpp = lbp; 1660 *lbpp = lbp;
1453 leaf = lbp->b_addr; 1661 leaf = lbp->b_addr;
1454 xfs_dir2_leaf_check(dp, lbp); 1662 xfs_dir3_leaf_check(mp, lbp);
1663 ents = xfs_dir3_leaf_ents_p(leaf);
1664 xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
1665
1455 /* 1666 /*
1456 * Look for the first leaf entry with our hash value. 1667 * Look for the first leaf entry with our hash value.
1457 */ 1668 */
@@ -1460,9 +1671,9 @@ xfs_dir2_leaf_lookup_int(
1460 * Loop over all the entries with the right hash value 1671 * Loop over all the entries with the right hash value
1461 * looking to match the name. 1672 * looking to match the name.
1462 */ 1673 */
1463 for (lep = &leaf->ents[index]; index < be16_to_cpu(leaf->hdr.count) && 1674 for (lep = &ents[index];
1464 be32_to_cpu(lep->hashval) == args->hashval; 1675 index < leafhdr.count && be32_to_cpu(lep->hashval) == args->hashval;
1465 lep++, index++) { 1676 lep++, index++) {
1466 /* 1677 /*
1467 * Skip over stale leaf entries. 1678 * Skip over stale leaf entries.
1468 */ 1679 */
@@ -1479,7 +1690,7 @@ xfs_dir2_leaf_lookup_int(
1479 if (newdb != curdb) { 1690 if (newdb != curdb) {
1480 if (dbp) 1691 if (dbp)
1481 xfs_trans_brelse(tp, dbp); 1692 xfs_trans_brelse(tp, dbp);
1482 error = xfs_dir2_data_read(tp, dp, 1693 error = xfs_dir3_data_read(tp, dp,
1483 xfs_dir2_db_to_da(mp, newdb), 1694 xfs_dir2_db_to_da(mp, newdb),
1484 -1, &dbp); 1695 -1, &dbp);
1485 if (error) { 1696 if (error) {
@@ -1520,7 +1731,7 @@ xfs_dir2_leaf_lookup_int(
1520 ASSERT(cidb != -1); 1731 ASSERT(cidb != -1);
1521 if (cidb != curdb) { 1732 if (cidb != curdb) {
1522 xfs_trans_brelse(tp, dbp); 1733 xfs_trans_brelse(tp, dbp);
1523 error = xfs_dir2_data_read(tp, dp, 1734 error = xfs_dir3_data_read(tp, dp,
1524 xfs_dir2_db_to_da(mp, cidb), 1735 xfs_dir2_db_to_da(mp, cidb),
1525 -1, &dbp); 1736 -1, &dbp);
1526 if (error) { 1737 if (error) {
@@ -1566,6 +1777,9 @@ xfs_dir2_leaf_removename(
1566 int needscan; /* need to rescan data frees */ 1777 int needscan; /* need to rescan data frees */
1567 xfs_dir2_data_off_t oldbest; /* old value of best free */ 1778 xfs_dir2_data_off_t oldbest; /* old value of best free */
1568 xfs_trans_t *tp; /* transaction pointer */ 1779 xfs_trans_t *tp; /* transaction pointer */
1780 struct xfs_dir2_data_free *bf; /* bestfree table */
1781 struct xfs_dir2_leaf_entry *ents;
1782 struct xfs_dir3_icleaf_hdr leafhdr;
1569 1783
1570 trace_xfs_dir2_leaf_removename(args); 1784 trace_xfs_dir2_leaf_removename(args);
1571 1785
@@ -1580,16 +1794,19 @@ xfs_dir2_leaf_removename(
1580 mp = dp->i_mount; 1794 mp = dp->i_mount;
1581 leaf = lbp->b_addr; 1795 leaf = lbp->b_addr;
1582 hdr = dbp->b_addr; 1796 hdr = dbp->b_addr;
1583 xfs_dir2_data_check(dp, dbp); 1797 xfs_dir3_data_check(dp, dbp);
1798 bf = xfs_dir3_data_bestfree_p(hdr);
1799 xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
1800 ents = xfs_dir3_leaf_ents_p(leaf);
1584 /* 1801 /*
1585 * Point to the leaf entry, use that to point to the data entry. 1802 * Point to the leaf entry, use that to point to the data entry.
1586 */ 1803 */
1587 lep = &leaf->ents[index]; 1804 lep = &ents[index];
1588 db = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address)); 1805 db = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
1589 dep = (xfs_dir2_data_entry_t *) 1806 dep = (xfs_dir2_data_entry_t *)
1590 ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address))); 1807 ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
1591 needscan = needlog = 0; 1808 needscan = needlog = 0;
1592 oldbest = be16_to_cpu(hdr->bestfree[0].length); 1809 oldbest = be16_to_cpu(bf[0].length);
1593 ltp = xfs_dir2_leaf_tail_p(mp, leaf); 1810 ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1594 bestsp = xfs_dir2_leaf_bests_p(ltp); 1811 bestsp = xfs_dir2_leaf_bests_p(ltp);
1595 ASSERT(be16_to_cpu(bestsp[db]) == oldbest); 1812 ASSERT(be16_to_cpu(bestsp[db]) == oldbest);
@@ -1602,10 +1819,13 @@ xfs_dir2_leaf_removename(
1602 /* 1819 /*
1603 * We just mark the leaf entry stale by putting a null in it. 1820 * We just mark the leaf entry stale by putting a null in it.
1604 */ 1821 */
1605 be16_add_cpu(&leaf->hdr.stale, 1); 1822 leafhdr.stale++;
1606 xfs_dir2_leaf_log_header(tp, lbp); 1823 xfs_dir3_leaf_hdr_to_disk(leaf, &leafhdr);
1824 xfs_dir3_leaf_log_header(tp, lbp);
1825
1607 lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR); 1826 lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR);
1608 xfs_dir2_leaf_log_ents(tp, lbp, index, index); 1827 xfs_dir3_leaf_log_ents(tp, lbp, index, index);
1828
1609 /* 1829 /*
1610 * Scan the freespace in the data block again if necessary, 1830 * Scan the freespace in the data block again if necessary,
1611 * log the data block header if necessary. 1831 * log the data block header if necessary.
@@ -1618,16 +1838,16 @@ xfs_dir2_leaf_removename(
1618 * If the longest freespace in the data block has changed, 1838 * If the longest freespace in the data block has changed,
1619 * put the new value in the bests table and log that. 1839 * put the new value in the bests table and log that.
1620 */ 1840 */
1621 if (be16_to_cpu(hdr->bestfree[0].length) != oldbest) { 1841 if (be16_to_cpu(bf[0].length) != oldbest) {
1622 bestsp[db] = hdr->bestfree[0].length; 1842 bestsp[db] = bf[0].length;
1623 xfs_dir2_leaf_log_bests(tp, lbp, db, db); 1843 xfs_dir3_leaf_log_bests(tp, lbp, db, db);
1624 } 1844 }
1625 xfs_dir2_data_check(dp, dbp); 1845 xfs_dir3_data_check(dp, dbp);
1626 /* 1846 /*
1627 * If the data block is now empty then get rid of the data block. 1847 * If the data block is now empty then get rid of the data block.
1628 */ 1848 */
1629 if (be16_to_cpu(hdr->bestfree[0].length) == 1849 if (be16_to_cpu(bf[0].length) ==
1630 mp->m_dirblksize - (uint)sizeof(*hdr)) { 1850 mp->m_dirblksize - xfs_dir3_data_entry_offset(hdr)) {
1631 ASSERT(db != mp->m_dirdatablk); 1851 ASSERT(db != mp->m_dirdatablk);
1632 if ((error = xfs_dir2_shrink_inode(args, db, dbp))) { 1852 if ((error = xfs_dir2_shrink_inode(args, db, dbp))) {
1633 /* 1853 /*
@@ -1638,7 +1858,7 @@ xfs_dir2_leaf_removename(
1638 */ 1858 */
1639 if (error == ENOSPC && args->total == 0) 1859 if (error == ENOSPC && args->total == 0)
1640 error = 0; 1860 error = 0;
1641 xfs_dir2_leaf_check(dp, lbp); 1861 xfs_dir3_leaf_check(mp, lbp);
1642 return error; 1862 return error;
1643 } 1863 }
1644 dbp = NULL; 1864 dbp = NULL;
@@ -1661,8 +1881,8 @@ xfs_dir2_leaf_removename(
1661 memmove(&bestsp[db - i], bestsp, 1881 memmove(&bestsp[db - i], bestsp,
1662 (be32_to_cpu(ltp->bestcount) - (db - i)) * sizeof(*bestsp)); 1882 (be32_to_cpu(ltp->bestcount) - (db - i)) * sizeof(*bestsp));
1663 be32_add_cpu(&ltp->bestcount, -(db - i)); 1883 be32_add_cpu(&ltp->bestcount, -(db - i));
1664 xfs_dir2_leaf_log_tail(tp, lbp); 1884 xfs_dir3_leaf_log_tail(tp, lbp);
1665 xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1); 1885 xfs_dir3_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
1666 } else 1886 } else
1667 bestsp[db] = cpu_to_be16(NULLDATAOFF); 1887 bestsp[db] = cpu_to_be16(NULLDATAOFF);
1668 } 1888 }
@@ -1672,7 +1892,7 @@ xfs_dir2_leaf_removename(
1672 else if (db != mp->m_dirdatablk) 1892 else if (db != mp->m_dirdatablk)
1673 dbp = NULL; 1893 dbp = NULL;
1674 1894
1675 xfs_dir2_leaf_check(dp, lbp); 1895 xfs_dir3_leaf_check(mp, lbp);
1676 /* 1896 /*
1677 * See if we can convert to block form. 1897 * See if we can convert to block form.
1678 */ 1898 */
@@ -1695,6 +1915,7 @@ xfs_dir2_leaf_replace(
1695 xfs_dir2_leaf_t *leaf; /* leaf structure */ 1915 xfs_dir2_leaf_t *leaf; /* leaf structure */
1696 xfs_dir2_leaf_entry_t *lep; /* leaf entry */ 1916 xfs_dir2_leaf_entry_t *lep; /* leaf entry */
1697 xfs_trans_t *tp; /* transaction pointer */ 1917 xfs_trans_t *tp; /* transaction pointer */
1918 struct xfs_dir2_leaf_entry *ents;
1698 1919
1699 trace_xfs_dir2_leaf_replace(args); 1920 trace_xfs_dir2_leaf_replace(args);
1700 1921
@@ -1706,10 +1927,11 @@ xfs_dir2_leaf_replace(
1706 } 1927 }
1707 dp = args->dp; 1928 dp = args->dp;
1708 leaf = lbp->b_addr; 1929 leaf = lbp->b_addr;
1930 ents = xfs_dir3_leaf_ents_p(leaf);
1709 /* 1931 /*
1710 * Point to the leaf entry, get data address from it. 1932 * Point to the leaf entry, get data address from it.
1711 */ 1933 */
1712 lep = &leaf->ents[index]; 1934 lep = &ents[index];
1713 /* 1935 /*
1714 * Point to the data entry. 1936 * Point to the data entry.
1715 */ 1937 */
@@ -1723,7 +1945,7 @@ xfs_dir2_leaf_replace(
1723 dep->inumber = cpu_to_be64(args->inumber); 1945 dep->inumber = cpu_to_be64(args->inumber);
1724 tp = args->trans; 1946 tp = args->trans;
1725 xfs_dir2_data_log_entry(tp, dbp, dep); 1947 xfs_dir2_data_log_entry(tp, dbp, dep);
1726 xfs_dir2_leaf_check(dp, lbp); 1948 xfs_dir3_leaf_check(dp->i_mount, lbp);
1727 xfs_trans_brelse(tp, lbp); 1949 xfs_trans_brelse(tp, lbp);
1728 return 0; 1950 return 0;
1729} 1951}
@@ -1745,17 +1967,22 @@ xfs_dir2_leaf_search_hash(
1745 xfs_dir2_leaf_t *leaf; /* leaf structure */ 1967 xfs_dir2_leaf_t *leaf; /* leaf structure */
1746 xfs_dir2_leaf_entry_t *lep; /* leaf entry */ 1968 xfs_dir2_leaf_entry_t *lep; /* leaf entry */
1747 int mid=0; /* current leaf index */ 1969 int mid=0; /* current leaf index */
1970 struct xfs_dir2_leaf_entry *ents;
1971 struct xfs_dir3_icleaf_hdr leafhdr;
1748 1972
1749 leaf = lbp->b_addr; 1973 leaf = lbp->b_addr;
1974 ents = xfs_dir3_leaf_ents_p(leaf);
1975 xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
1976
1750#ifndef __KERNEL__ 1977#ifndef __KERNEL__
1751 if (!leaf->hdr.count) 1978 if (!leafhdr.count)
1752 return 0; 1979 return 0;
1753#endif 1980#endif
1754 /* 1981 /*
1755 * Note, the table cannot be empty, so we have to go through the loop. 1982 * Note, the table cannot be empty, so we have to go through the loop.
1756 * Binary search the leaf entries looking for our hash value. 1983 * Binary search the leaf entries looking for our hash value.
1757 */ 1984 */
1758 for (lep = leaf->ents, low = 0, high = be16_to_cpu(leaf->hdr.count) - 1, 1985 for (lep = ents, low = 0, high = leafhdr.count - 1,
1759 hashwant = args->hashval; 1986 hashwant = args->hashval;
1760 low <= high; ) { 1987 low <= high; ) {
1761 mid = (low + high) >> 1; 1988 mid = (low + high) >> 1;
@@ -1807,7 +2034,7 @@ xfs_dir2_leaf_trim_data(
1807 /* 2034 /*
1808 * Read the offending data block. We need its buffer. 2035 * Read the offending data block. We need its buffer.
1809 */ 2036 */
1810 error = xfs_dir2_data_read(tp, dp, xfs_dir2_db_to_da(mp, db), -1, &dbp); 2037 error = xfs_dir3_data_read(tp, dp, xfs_dir2_db_to_da(mp, db), -1, &dbp);
1811 if (error) 2038 if (error)
1812 return error; 2039 return error;
1813 2040
@@ -1817,10 +2044,12 @@ xfs_dir2_leaf_trim_data(
1817#ifdef DEBUG 2044#ifdef DEBUG
1818{ 2045{
1819 struct xfs_dir2_data_hdr *hdr = dbp->b_addr; 2046 struct xfs_dir2_data_hdr *hdr = dbp->b_addr;
2047 struct xfs_dir2_data_free *bf = xfs_dir3_data_bestfree_p(hdr);
1820 2048
1821 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC)); 2049 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
1822 ASSERT(be16_to_cpu(hdr->bestfree[0].length) == 2050 hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC));
1823 mp->m_dirblksize - (uint)sizeof(*hdr)); 2051 ASSERT(be16_to_cpu(bf[0].length) ==
2052 mp->m_dirblksize - xfs_dir3_data_entry_offset(hdr));
1824 ASSERT(db == be32_to_cpu(ltp->bestcount) - 1); 2053 ASSERT(db == be32_to_cpu(ltp->bestcount) - 1);
1825} 2054}
1826#endif 2055#endif
@@ -1839,23 +2068,29 @@ xfs_dir2_leaf_trim_data(
1839 bestsp = xfs_dir2_leaf_bests_p(ltp); 2068 bestsp = xfs_dir2_leaf_bests_p(ltp);
1840 be32_add_cpu(&ltp->bestcount, -1); 2069 be32_add_cpu(&ltp->bestcount, -1);
1841 memmove(&bestsp[1], &bestsp[0], be32_to_cpu(ltp->bestcount) * sizeof(*bestsp)); 2070 memmove(&bestsp[1], &bestsp[0], be32_to_cpu(ltp->bestcount) * sizeof(*bestsp));
1842 xfs_dir2_leaf_log_tail(tp, lbp); 2071 xfs_dir3_leaf_log_tail(tp, lbp);
1843 xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1); 2072 xfs_dir3_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
1844 return 0; 2073 return 0;
1845} 2074}
1846 2075
1847static inline size_t 2076static inline size_t
1848xfs_dir2_leaf_size( 2077xfs_dir3_leaf_size(
1849 struct xfs_dir2_leaf_hdr *hdr, 2078 struct xfs_dir3_icleaf_hdr *hdr,
1850 int counts) 2079 int counts)
1851{ 2080{
1852 int entries; 2081 int entries;
2082 int hdrsize;
2083
2084 entries = hdr->count - hdr->stale;
2085 if (hdr->magic == XFS_DIR2_LEAF1_MAGIC ||
2086 hdr->magic == XFS_DIR2_LEAFN_MAGIC)
2087 hdrsize = sizeof(struct xfs_dir2_leaf_hdr);
2088 else
2089 hdrsize = sizeof(struct xfs_dir3_leaf_hdr);
1853 2090
1854 entries = be16_to_cpu(hdr->count) - be16_to_cpu(hdr->stale); 2091 return hdrsize + entries * sizeof(xfs_dir2_leaf_entry_t)
1855 return sizeof(xfs_dir2_leaf_hdr_t) + 2092 + counts * sizeof(xfs_dir2_data_off_t)
1856 entries * sizeof(xfs_dir2_leaf_entry_t) + 2093 + sizeof(xfs_dir2_leaf_tail_t);
1857 counts * sizeof(xfs_dir2_data_off_t) +
1858 sizeof(xfs_dir2_leaf_tail_t);
1859} 2094}
1860 2095
1861/* 2096/*
@@ -1879,6 +2114,8 @@ xfs_dir2_node_to_leaf(
1879 xfs_mount_t *mp; /* filesystem mount point */ 2114 xfs_mount_t *mp; /* filesystem mount point */
1880 int rval; /* successful free trim? */ 2115 int rval; /* successful free trim? */
1881 xfs_trans_t *tp; /* transaction pointer */ 2116 xfs_trans_t *tp; /* transaction pointer */
2117 struct xfs_dir3_icleaf_hdr leafhdr;
2118 struct xfs_dir3_icfree_hdr freehdr;
1882 2119
1883 /* 2120 /*
1884 * There's more than a leaf level in the btree, so there must 2121 * There's more than a leaf level in the btree, so there must
@@ -1928,7 +2165,11 @@ xfs_dir2_node_to_leaf(
1928 return 0; 2165 return 0;
1929 lbp = state->path.blk[0].bp; 2166 lbp = state->path.blk[0].bp;
1930 leaf = lbp->b_addr; 2167 leaf = lbp->b_addr;
1931 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); 2168 xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
2169
2170 ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC ||
2171 leafhdr.magic == XFS_DIR3_LEAFN_MAGIC);
2172
1932 /* 2173 /*
1933 * Read the freespace block. 2174 * Read the freespace block.
1934 */ 2175 */
@@ -1936,44 +2177,49 @@ xfs_dir2_node_to_leaf(
1936 if (error) 2177 if (error)
1937 return error; 2178 return error;
1938 free = fbp->b_addr; 2179 free = fbp->b_addr;
1939 ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); 2180 xfs_dir3_free_hdr_from_disk(&freehdr, free);
1940 ASSERT(!free->hdr.firstdb); 2181
2182 ASSERT(!freehdr.firstdb);
1941 2183
1942 /* 2184 /*
1943 * Now see if the leafn and free data will fit in a leaf1. 2185 * Now see if the leafn and free data will fit in a leaf1.
1944 * If not, release the buffer and give up. 2186 * If not, release the buffer and give up.
1945 */ 2187 */
1946 if (xfs_dir2_leaf_size(&leaf->hdr, be32_to_cpu(free->hdr.nvalid)) > 2188 if (xfs_dir3_leaf_size(&leafhdr, freehdr.nvalid) > mp->m_dirblksize) {
1947 mp->m_dirblksize) {
1948 xfs_trans_brelse(tp, fbp); 2189 xfs_trans_brelse(tp, fbp);
1949 return 0; 2190 return 0;
1950 } 2191 }
1951 2192
1952 /* 2193 /*
1953 * If the leaf has any stale entries in it, compress them out. 2194 * If the leaf has any stale entries in it, compress them out.
1954 * The compact routine will log the header.
1955 */ 2195 */
1956 if (be16_to_cpu(leaf->hdr.stale)) 2196 if (leafhdr.stale)
1957 xfs_dir2_leaf_compact(args, lbp); 2197 xfs_dir3_leaf_compact(args, &leafhdr, lbp);
1958 else
1959 xfs_dir2_leaf_log_header(tp, lbp);
1960 2198
1961 lbp->b_ops = &xfs_dir2_leaf1_buf_ops; 2199 lbp->b_ops = &xfs_dir3_leaf1_buf_ops;
1962 leaf->hdr.info.magic = cpu_to_be16(XFS_DIR2_LEAF1_MAGIC); 2200 xfs_trans_buf_set_type(tp, lbp, XFS_BLFT_DIR_LEAF1_BUF);
2201 leafhdr.magic = (leafhdr.magic == XFS_DIR2_LEAFN_MAGIC)
2202 ? XFS_DIR2_LEAF1_MAGIC
2203 : XFS_DIR3_LEAF1_MAGIC;
1963 2204
1964 /* 2205 /*
1965 * Set up the leaf tail from the freespace block. 2206 * Set up the leaf tail from the freespace block.
1966 */ 2207 */
1967 ltp = xfs_dir2_leaf_tail_p(mp, leaf); 2208 ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1968 ltp->bestcount = free->hdr.nvalid; 2209 ltp->bestcount = cpu_to_be32(freehdr.nvalid);
2210
1969 /* 2211 /*
1970 * Set up the leaf bests table. 2212 * Set up the leaf bests table.
1971 */ 2213 */
1972 memcpy(xfs_dir2_leaf_bests_p(ltp), free->bests, 2214 memcpy(xfs_dir2_leaf_bests_p(ltp), xfs_dir3_free_bests_p(mp, free),
1973 be32_to_cpu(ltp->bestcount) * sizeof(xfs_dir2_data_off_t)); 2215 freehdr.nvalid * sizeof(xfs_dir2_data_off_t));
1974 xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1); 2216
1975 xfs_dir2_leaf_log_tail(tp, lbp); 2217 xfs_dir3_leaf_hdr_to_disk(leaf, &leafhdr);
1976 xfs_dir2_leaf_check(dp, lbp); 2218 xfs_dir3_leaf_log_header(tp, lbp);
2219 xfs_dir3_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
2220 xfs_dir3_leaf_log_tail(tp, lbp);
2221 xfs_dir3_leaf_check(mp, lbp);
2222
1977 /* 2223 /*
1978 * Get rid of the freespace block. 2224 * Get rid of the freespace block.
1979 */ 2225 */
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index 5980f9b7fa9b..ecc6c661064c 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3 * Copyright (c) 2013 Red Hat, Inc.
3 * All Rights Reserved. 4 * All Rights Reserved.
4 * 5 *
5 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
@@ -32,20 +33,14 @@
32#include "xfs_dir2_priv.h" 33#include "xfs_dir2_priv.h"
33#include "xfs_error.h" 34#include "xfs_error.h"
34#include "xfs_trace.h" 35#include "xfs_trace.h"
36#include "xfs_buf_item.h"
37#include "xfs_cksum.h"
35 38
36/* 39/*
37 * Function declarations. 40 * Function declarations.
38 */ 41 */
39static int xfs_dir2_leafn_add(struct xfs_buf *bp, xfs_da_args_t *args, 42static int xfs_dir2_leafn_add(struct xfs_buf *bp, xfs_da_args_t *args,
40 int index); 43 int index);
41#ifdef DEBUG
42static void xfs_dir2_leafn_check(struct xfs_inode *dp, struct xfs_buf *bp);
43#else
44#define xfs_dir2_leafn_check(dp, bp)
45#endif
46static void xfs_dir2_leafn_moveents(xfs_da_args_t *args, struct xfs_buf *bp_s,
47 int start_s, struct xfs_buf *bp_d,
48 int start_d, int count);
49static void xfs_dir2_leafn_rebalance(xfs_da_state_t *state, 44static void xfs_dir2_leafn_rebalance(xfs_da_state_t *state,
50 xfs_da_state_blk_t *blk1, 45 xfs_da_state_blk_t *blk1,
51 xfs_da_state_blk_t *blk2); 46 xfs_da_state_blk_t *blk2);
@@ -55,52 +50,126 @@ static int xfs_dir2_leafn_remove(xfs_da_args_t *args, struct xfs_buf *bp,
55static int xfs_dir2_node_addname_int(xfs_da_args_t *args, 50static int xfs_dir2_node_addname_int(xfs_da_args_t *args,
56 xfs_da_state_blk_t *fblk); 51 xfs_da_state_blk_t *fblk);
57 52
58static void 53/*
59xfs_dir2_free_verify( 54 * Check internal consistency of a leafn block.
55 */
56#ifdef DEBUG
57#define xfs_dir3_leaf_check(mp, bp) \
58do { \
59 if (!xfs_dir3_leafn_check((mp), (bp))) \
60 ASSERT(0); \
61} while (0);
62
63static bool
64xfs_dir3_leafn_check(
65 struct xfs_mount *mp,
66 struct xfs_buf *bp)
67{
68 struct xfs_dir2_leaf *leaf = bp->b_addr;
69 struct xfs_dir3_icleaf_hdr leafhdr;
70
71 xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
72
73 if (leafhdr.magic == XFS_DIR3_LEAFN_MAGIC) {
74 struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr;
75 if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn)
76 return false;
77 } else if (leafhdr.magic != XFS_DIR2_LEAFN_MAGIC)
78 return false;
79
80 return xfs_dir3_leaf_check_int(mp, &leafhdr, leaf);
81}
82#else
83#define xfs_dir3_leaf_check(mp, bp)
84#endif
85
86static bool
87xfs_dir3_free_verify(
60 struct xfs_buf *bp) 88 struct xfs_buf *bp)
61{ 89{
62 struct xfs_mount *mp = bp->b_target->bt_mount; 90 struct xfs_mount *mp = bp->b_target->bt_mount;
63 struct xfs_dir2_free_hdr *hdr = bp->b_addr; 91 struct xfs_dir2_free_hdr *hdr = bp->b_addr;
64 int block_ok = 0;
65 92
66 block_ok = hdr->magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC); 93 if (xfs_sb_version_hascrc(&mp->m_sb)) {
67 if (!block_ok) { 94 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
68 XFS_CORRUPTION_ERROR("xfs_dir2_free_verify magic", 95
69 XFS_ERRLEVEL_LOW, mp, hdr); 96 if (hdr3->magic != cpu_to_be32(XFS_DIR3_FREE_MAGIC))
70 xfs_buf_ioerror(bp, EFSCORRUPTED); 97 return false;
98 if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_uuid))
99 return false;
100 if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
101 return false;
102 } else {
103 if (hdr->magic != cpu_to_be32(XFS_DIR2_FREE_MAGIC))
104 return false;
71 } 105 }
106
107 /* XXX: should bounds check the xfs_dir3_icfree_hdr here */
108
109 return true;
72} 110}
73 111
74static void 112static void
75xfs_dir2_free_read_verify( 113xfs_dir3_free_read_verify(
76 struct xfs_buf *bp) 114 struct xfs_buf *bp)
77{ 115{
78 xfs_dir2_free_verify(bp); 116 struct xfs_mount *mp = bp->b_target->bt_mount;
117
118 if ((xfs_sb_version_hascrc(&mp->m_sb) &&
119 !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
120 XFS_DIR3_FREE_CRC_OFF)) ||
121 !xfs_dir3_free_verify(bp)) {
122 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
123 xfs_buf_ioerror(bp, EFSCORRUPTED);
124 }
79} 125}
80 126
81static void 127static void
82xfs_dir2_free_write_verify( 128xfs_dir3_free_write_verify(
83 struct xfs_buf *bp) 129 struct xfs_buf *bp)
84{ 130{
85 xfs_dir2_free_verify(bp); 131 struct xfs_mount *mp = bp->b_target->bt_mount;
132 struct xfs_buf_log_item *bip = bp->b_fspriv;
133 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
134
135 if (!xfs_dir3_free_verify(bp)) {
136 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
137 xfs_buf_ioerror(bp, EFSCORRUPTED);
138 return;
139 }
140
141 if (!xfs_sb_version_hascrc(&mp->m_sb))
142 return;
143
144 if (bip)
145 hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn);
146
147 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_FREE_CRC_OFF);
86} 148}
87 149
88static const struct xfs_buf_ops xfs_dir2_free_buf_ops = { 150const struct xfs_buf_ops xfs_dir3_free_buf_ops = {
89 .verify_read = xfs_dir2_free_read_verify, 151 .verify_read = xfs_dir3_free_read_verify,
90 .verify_write = xfs_dir2_free_write_verify, 152 .verify_write = xfs_dir3_free_write_verify,
91}; 153};
92 154
93 155
94static int 156static int
95__xfs_dir2_free_read( 157__xfs_dir3_free_read(
96 struct xfs_trans *tp, 158 struct xfs_trans *tp,
97 struct xfs_inode *dp, 159 struct xfs_inode *dp,
98 xfs_dablk_t fbno, 160 xfs_dablk_t fbno,
99 xfs_daddr_t mappedbno, 161 xfs_daddr_t mappedbno,
100 struct xfs_buf **bpp) 162 struct xfs_buf **bpp)
101{ 163{
102 return xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp, 164 int err;
103 XFS_DATA_FORK, &xfs_dir2_free_buf_ops); 165
166 err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp,
167 XFS_DATA_FORK, &xfs_dir3_free_buf_ops);
168
169 /* try read returns without an error or *bpp if it lands in a hole */
170 if (!err && tp && *bpp)
171 xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_FREE_BUF);
172 return err;
104} 173}
105 174
106int 175int
@@ -110,7 +179,7 @@ xfs_dir2_free_read(
110 xfs_dablk_t fbno, 179 xfs_dablk_t fbno,
111 struct xfs_buf **bpp) 180 struct xfs_buf **bpp)
112{ 181{
113 return __xfs_dir2_free_read(tp, dp, fbno, -1, bpp); 182 return __xfs_dir3_free_read(tp, dp, fbno, -1, bpp);
114} 183}
115 184
116static int 185static int
@@ -120,7 +189,95 @@ xfs_dir2_free_try_read(
120 xfs_dablk_t fbno, 189 xfs_dablk_t fbno,
121 struct xfs_buf **bpp) 190 struct xfs_buf **bpp)
122{ 191{
123 return __xfs_dir2_free_read(tp, dp, fbno, -2, bpp); 192 return __xfs_dir3_free_read(tp, dp, fbno, -2, bpp);
193}
194
195
196void
197xfs_dir3_free_hdr_from_disk(
198 struct xfs_dir3_icfree_hdr *to,
199 struct xfs_dir2_free *from)
200{
201 if (from->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)) {
202 to->magic = be32_to_cpu(from->hdr.magic);
203 to->firstdb = be32_to_cpu(from->hdr.firstdb);
204 to->nvalid = be32_to_cpu(from->hdr.nvalid);
205 to->nused = be32_to_cpu(from->hdr.nused);
206 } else {
207 struct xfs_dir3_free_hdr *hdr3 = (struct xfs_dir3_free_hdr *)from;
208
209 to->magic = be32_to_cpu(hdr3->hdr.magic);
210 to->firstdb = be32_to_cpu(hdr3->firstdb);
211 to->nvalid = be32_to_cpu(hdr3->nvalid);
212 to->nused = be32_to_cpu(hdr3->nused);
213 }
214
215 ASSERT(to->magic == XFS_DIR2_FREE_MAGIC ||
216 to->magic == XFS_DIR3_FREE_MAGIC);
217}
218
219static void
220xfs_dir3_free_hdr_to_disk(
221 struct xfs_dir2_free *to,
222 struct xfs_dir3_icfree_hdr *from)
223{
224 ASSERT(from->magic == XFS_DIR2_FREE_MAGIC ||
225 from->magic == XFS_DIR3_FREE_MAGIC);
226
227 if (from->magic == XFS_DIR2_FREE_MAGIC) {
228 to->hdr.magic = cpu_to_be32(from->magic);
229 to->hdr.firstdb = cpu_to_be32(from->firstdb);
230 to->hdr.nvalid = cpu_to_be32(from->nvalid);
231 to->hdr.nused = cpu_to_be32(from->nused);
232 } else {
233 struct xfs_dir3_free_hdr *hdr3 = (struct xfs_dir3_free_hdr *)to;
234
235 hdr3->hdr.magic = cpu_to_be32(from->magic);
236 hdr3->firstdb = cpu_to_be32(from->firstdb);
237 hdr3->nvalid = cpu_to_be32(from->nvalid);
238 hdr3->nused = cpu_to_be32(from->nused);
239 }
240}
241
242static int
243xfs_dir3_free_get_buf(
244 struct xfs_trans *tp,
245 struct xfs_inode *dp,
246 xfs_dir2_db_t fbno,
247 struct xfs_buf **bpp)
248{
249 struct xfs_mount *mp = dp->i_mount;
250 struct xfs_buf *bp;
251 int error;
252 struct xfs_dir3_icfree_hdr hdr;
253
254 error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, fbno),
255 -1, &bp, XFS_DATA_FORK);
256 if (error)
257 return error;
258
259 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_FREE_BUF);
260 bp->b_ops = &xfs_dir3_free_buf_ops;
261
262 /*
263 * Initialize the new block to be empty, and remember
264 * its first slot as our empty slot.
265 */
266 hdr.magic = XFS_DIR2_FREE_MAGIC;
267 hdr.firstdb = 0;
268 hdr.nused = 0;
269 hdr.nvalid = 0;
270 if (xfs_sb_version_hascrc(&mp->m_sb)) {
271 struct xfs_dir3_free_hdr *hdr3 = bp->b_addr;
272
273 hdr.magic = XFS_DIR3_FREE_MAGIC;
274 hdr3->hdr.blkno = cpu_to_be64(bp->b_bn);
275 hdr3->hdr.owner = cpu_to_be64(dp->i_ino);
276 uuid_copy(&hdr3->hdr.uuid, &mp->m_sb.sb_uuid);
277 }
278 xfs_dir3_free_hdr_to_disk(bp->b_addr, &hdr);
279 *bpp = bp;
280 return 0;
124} 281}
125 282
126/* 283/*
@@ -134,13 +291,16 @@ xfs_dir2_free_log_bests(
134 int last) /* last entry to log */ 291 int last) /* last entry to log */
135{ 292{
136 xfs_dir2_free_t *free; /* freespace structure */ 293 xfs_dir2_free_t *free; /* freespace structure */
294 __be16 *bests;
137 295
138 free = bp->b_addr; 296 free = bp->b_addr;
139 ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); 297 bests = xfs_dir3_free_bests_p(tp->t_mountp, free);
298 ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC) ||
299 free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC));
140 xfs_trans_log_buf(tp, bp, 300 xfs_trans_log_buf(tp, bp,
141 (uint)((char *)&free->bests[first] - (char *)free), 301 (uint)((char *)&bests[first] - (char *)free),
142 (uint)((char *)&free->bests[last] - (char *)free + 302 (uint)((char *)&bests[last] - (char *)free +
143 sizeof(free->bests[0]) - 1)); 303 sizeof(bests[0]) - 1));
144} 304}
145 305
146/* 306/*
@@ -154,9 +314,9 @@ xfs_dir2_free_log_header(
154 xfs_dir2_free_t *free; /* freespace structure */ 314 xfs_dir2_free_t *free; /* freespace structure */
155 315
156 free = bp->b_addr; 316 free = bp->b_addr;
157 ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); 317 ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC) ||
158 xfs_trans_log_buf(tp, bp, (uint)((char *)&free->hdr - (char *)free), 318 free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC));
159 (uint)(sizeof(xfs_dir2_free_hdr_t) - 1)); 319 xfs_trans_log_buf(tp, bp, 0, xfs_dir3_free_hdr_size(tp->t_mountp) - 1);
160} 320}
161 321
162/* 322/*
@@ -183,6 +343,7 @@ xfs_dir2_leaf_to_node(
183 xfs_dir2_data_off_t off; /* freespace entry value */ 343 xfs_dir2_data_off_t off; /* freespace entry value */
184 __be16 *to; /* pointer to freespace entry */ 344 __be16 *to; /* pointer to freespace entry */
185 xfs_trans_t *tp; /* transaction pointer */ 345 xfs_trans_t *tp; /* transaction pointer */
346 struct xfs_dir3_icfree_hdr freehdr;
186 347
187 trace_xfs_dir2_leaf_to_node(args); 348 trace_xfs_dir2_leaf_to_node(args);
188 349
@@ -199,44 +360,53 @@ xfs_dir2_leaf_to_node(
199 /* 360 /*
200 * Get the buffer for the new freespace block. 361 * Get the buffer for the new freespace block.
201 */ 362 */
202 error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, fdb), -1, &fbp, 363 error = xfs_dir3_free_get_buf(tp, dp, fdb, &fbp);
203 XFS_DATA_FORK);
204 if (error) 364 if (error)
205 return error; 365 return error;
206 fbp->b_ops = &xfs_dir2_free_buf_ops;
207 366
208 free = fbp->b_addr; 367 free = fbp->b_addr;
368 xfs_dir3_free_hdr_from_disk(&freehdr, free);
209 leaf = lbp->b_addr; 369 leaf = lbp->b_addr;
210 ltp = xfs_dir2_leaf_tail_p(mp, leaf); 370 ltp = xfs_dir2_leaf_tail_p(mp, leaf);
211 /* 371 ASSERT(be32_to_cpu(ltp->bestcount) <=
212 * Initialize the freespace block header. 372 (uint)dp->i_d.di_size / mp->m_dirblksize);
213 */ 373
214 free->hdr.magic = cpu_to_be32(XFS_DIR2_FREE_MAGIC);
215 free->hdr.firstdb = 0;
216 ASSERT(be32_to_cpu(ltp->bestcount) <= (uint)dp->i_d.di_size / mp->m_dirblksize);
217 free->hdr.nvalid = ltp->bestcount;
218 /* 374 /*
219 * Copy freespace entries from the leaf block to the new block. 375 * Copy freespace entries from the leaf block to the new block.
220 * Count active entries. 376 * Count active entries.
221 */ 377 */
222 for (i = n = 0, from = xfs_dir2_leaf_bests_p(ltp), to = free->bests; 378 from = xfs_dir2_leaf_bests_p(ltp);
223 i < be32_to_cpu(ltp->bestcount); i++, from++, to++) { 379 to = xfs_dir3_free_bests_p(mp, free);
380 for (i = n = 0; i < be32_to_cpu(ltp->bestcount); i++, from++, to++) {
224 if ((off = be16_to_cpu(*from)) != NULLDATAOFF) 381 if ((off = be16_to_cpu(*from)) != NULLDATAOFF)
225 n++; 382 n++;
226 *to = cpu_to_be16(off); 383 *to = cpu_to_be16(off);
227 } 384 }
228 free->hdr.nused = cpu_to_be32(n);
229
230 lbp->b_ops = &xfs_dir2_leafn_buf_ops;
231 leaf->hdr.info.magic = cpu_to_be16(XFS_DIR2_LEAFN_MAGIC);
232 385
233 /* 386 /*
234 * Log everything. 387 * Now initialize the freespace block header.
235 */ 388 */
236 xfs_dir2_leaf_log_header(tp, lbp); 389 freehdr.nused = n;
390 freehdr.nvalid = be32_to_cpu(ltp->bestcount);
391
392 xfs_dir3_free_hdr_to_disk(fbp->b_addr, &freehdr);
393 xfs_dir2_free_log_bests(tp, fbp, 0, freehdr.nvalid - 1);
237 xfs_dir2_free_log_header(tp, fbp); 394 xfs_dir2_free_log_header(tp, fbp);
238 xfs_dir2_free_log_bests(tp, fbp, 0, be32_to_cpu(free->hdr.nvalid) - 1); 395
239 xfs_dir2_leafn_check(dp, lbp); 396 /*
397 * Converting the leaf to a leafnode is just a matter of changing the
398 * magic number and the ops. Do the change directly to the buffer as
399 * it's less work (and less code) than decoding the header to host
400 * format and back again.
401 */
402 if (leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC))
403 leaf->hdr.info.magic = cpu_to_be16(XFS_DIR2_LEAFN_MAGIC);
404 else
405 leaf->hdr.info.magic = cpu_to_be16(XFS_DIR3_LEAFN_MAGIC);
406 lbp->b_ops = &xfs_dir3_leafn_buf_ops;
407 xfs_trans_buf_set_type(tp, lbp, XFS_BLFT_DIR_LEAFN_BUF);
408 xfs_dir3_leaf_log_header(tp, lbp);
409 xfs_dir3_leaf_check(mp, lbp);
240 return 0; 410 return 0;
241} 411}
242 412
@@ -260,6 +430,8 @@ xfs_dir2_leafn_add(
260 int lowstale; /* previous stale entry */ 430 int lowstale; /* previous stale entry */
261 xfs_mount_t *mp; /* filesystem mount point */ 431 xfs_mount_t *mp; /* filesystem mount point */
262 xfs_trans_t *tp; /* transaction pointer */ 432 xfs_trans_t *tp; /* transaction pointer */
433 struct xfs_dir3_icleaf_hdr leafhdr;
434 struct xfs_dir2_leaf_entry *ents;
263 435
264 trace_xfs_dir2_leafn_add(args, index); 436 trace_xfs_dir2_leafn_add(args, index);
265 437
@@ -267,6 +439,8 @@ xfs_dir2_leafn_add(
267 mp = dp->i_mount; 439 mp = dp->i_mount;
268 tp = args->trans; 440 tp = args->trans;
269 leaf = bp->b_addr; 441 leaf = bp->b_addr;
442 xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
443 ents = xfs_dir3_leaf_ents_p(leaf);
270 444
271 /* 445 /*
272 * Quick check just to make sure we are not going to index 446 * Quick check just to make sure we are not going to index
@@ -282,15 +456,15 @@ xfs_dir2_leafn_add(
282 * a compact. 456 * a compact.
283 */ 457 */
284 458
285 if (be16_to_cpu(leaf->hdr.count) == xfs_dir2_max_leaf_ents(mp)) { 459 if (leafhdr.count == xfs_dir3_max_leaf_ents(mp, leaf)) {
286 if (!leaf->hdr.stale) 460 if (!leafhdr.stale)
287 return XFS_ERROR(ENOSPC); 461 return XFS_ERROR(ENOSPC);
288 compact = be16_to_cpu(leaf->hdr.stale) > 1; 462 compact = leafhdr.stale > 1;
289 } else 463 } else
290 compact = 0; 464 compact = 0;
291 ASSERT(index == 0 || be32_to_cpu(leaf->ents[index - 1].hashval) <= args->hashval); 465 ASSERT(index == 0 || be32_to_cpu(ents[index - 1].hashval) <= args->hashval);
292 ASSERT(index == be16_to_cpu(leaf->hdr.count) || 466 ASSERT(index == leafhdr.count ||
293 be32_to_cpu(leaf->ents[index].hashval) >= args->hashval); 467 be32_to_cpu(ents[index].hashval) >= args->hashval);
294 468
295 if (args->op_flags & XFS_DA_OP_JUSTCHECK) 469 if (args->op_flags & XFS_DA_OP_JUSTCHECK)
296 return 0; 470 return 0;
@@ -299,61 +473,51 @@ xfs_dir2_leafn_add(
299 * Compact out all but one stale leaf entry. Leaves behind 473 * Compact out all but one stale leaf entry. Leaves behind
300 * the entry closest to index. 474 * the entry closest to index.
301 */ 475 */
302 if (compact) { 476 if (compact)
303 xfs_dir2_leaf_compact_x1(bp, &index, &lowstale, &highstale, 477 xfs_dir3_leaf_compact_x1(&leafhdr, ents, &index, &lowstale,
304 &lfloglow, &lfloghigh); 478 &highstale, &lfloglow, &lfloghigh);
305 } 479 else if (leafhdr.stale) {
306 /* 480 /*
307 * Set impossible logging indices for this case. 481 * Set impossible logging indices for this case.
308 */ 482 */
309 else if (leaf->hdr.stale) { 483 lfloglow = leafhdr.count;
310 lfloglow = be16_to_cpu(leaf->hdr.count);
311 lfloghigh = -1; 484 lfloghigh = -1;
312 } 485 }
313 486
314 /* 487 /*
315 * Insert the new entry, log everything. 488 * Insert the new entry, log everything.
316 */ 489 */
317 lep = xfs_dir2_leaf_find_entry(leaf, index, compact, lowstale, 490 lep = xfs_dir3_leaf_find_entry(&leafhdr, ents, index, compact, lowstale,
318 highstale, &lfloglow, &lfloghigh); 491 highstale, &lfloglow, &lfloghigh);
319 492
320 lep->hashval = cpu_to_be32(args->hashval); 493 lep->hashval = cpu_to_be32(args->hashval);
321 lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(mp, 494 lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(mp,
322 args->blkno, args->index)); 495 args->blkno, args->index));
323 xfs_dir2_leaf_log_header(tp, bp); 496
324 xfs_dir2_leaf_log_ents(tp, bp, lfloglow, lfloghigh); 497 xfs_dir3_leaf_hdr_to_disk(leaf, &leafhdr);
325 xfs_dir2_leafn_check(dp, bp); 498 xfs_dir3_leaf_log_header(tp, bp);
499 xfs_dir3_leaf_log_ents(tp, bp, lfloglow, lfloghigh);
500 xfs_dir3_leaf_check(mp, bp);
326 return 0; 501 return 0;
327} 502}
328 503
329#ifdef DEBUG 504#ifdef DEBUG
330/* 505static void
331 * Check internal consistency of a leafn block. 506xfs_dir2_free_hdr_check(
332 */ 507 struct xfs_mount *mp,
333void 508 struct xfs_buf *bp,
334xfs_dir2_leafn_check( 509 xfs_dir2_db_t db)
335 struct xfs_inode *dp,
336 struct xfs_buf *bp)
337{ 510{
338 int i; /* leaf index */ 511 struct xfs_dir3_icfree_hdr hdr;
339 xfs_dir2_leaf_t *leaf; /* leaf structure */
340 xfs_mount_t *mp; /* filesystem mount point */
341 int stale; /* count of stale leaves */
342 512
343 leaf = bp->b_addr; 513 xfs_dir3_free_hdr_from_disk(&hdr, bp->b_addr);
344 mp = dp->i_mount; 514
345 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); 515 ASSERT((hdr.firstdb % xfs_dir3_free_max_bests(mp)) == 0);
346 ASSERT(be16_to_cpu(leaf->hdr.count) <= xfs_dir2_max_leaf_ents(mp)); 516 ASSERT(hdr.firstdb <= db);
347 for (i = stale = 0; i < be16_to_cpu(leaf->hdr.count); i++) { 517 ASSERT(db < hdr.firstdb + hdr.nvalid);
348 if (i + 1 < be16_to_cpu(leaf->hdr.count)) {
349 ASSERT(be32_to_cpu(leaf->ents[i].hashval) <=
350 be32_to_cpu(leaf->ents[i + 1].hashval));
351 }
352 if (leaf->ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
353 stale++;
354 }
355 ASSERT(be16_to_cpu(leaf->hdr.stale) == stale);
356} 518}
519#else
520#define xfs_dir2_free_hdr_check(mp, dp, db)
357#endif /* DEBUG */ 521#endif /* DEBUG */
358 522
359/* 523/*
@@ -365,15 +529,22 @@ xfs_dir2_leafn_lasthash(
365 struct xfs_buf *bp, /* leaf buffer */ 529 struct xfs_buf *bp, /* leaf buffer */
366 int *count) /* count of entries in leaf */ 530 int *count) /* count of entries in leaf */
367{ 531{
368 xfs_dir2_leaf_t *leaf; /* leaf structure */ 532 struct xfs_dir2_leaf *leaf = bp->b_addr;
533 struct xfs_dir2_leaf_entry *ents;
534 struct xfs_dir3_icleaf_hdr leafhdr;
535
536 xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
537
538 ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC ||
539 leafhdr.magic == XFS_DIR3_LEAFN_MAGIC);
369 540
370 leaf = bp->b_addr;
371 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
372 if (count) 541 if (count)
373 *count = be16_to_cpu(leaf->hdr.count); 542 *count = leafhdr.count;
374 if (!leaf->hdr.count) 543 if (!leafhdr.count)
375 return 0; 544 return 0;
376 return be32_to_cpu(leaf->ents[be16_to_cpu(leaf->hdr.count) - 1].hashval); 545
546 ents = xfs_dir3_leaf_ents_p(leaf);
547 return be32_to_cpu(ents[leafhdr.count - 1].hashval);
377} 548}
378 549
379/* 550/*
@@ -402,16 +573,19 @@ xfs_dir2_leafn_lookup_for_addname(
402 xfs_dir2_db_t newdb; /* new data block number */ 573 xfs_dir2_db_t newdb; /* new data block number */
403 xfs_dir2_db_t newfdb; /* new free block number */ 574 xfs_dir2_db_t newfdb; /* new free block number */
404 xfs_trans_t *tp; /* transaction pointer */ 575 xfs_trans_t *tp; /* transaction pointer */
576 struct xfs_dir2_leaf_entry *ents;
577 struct xfs_dir3_icleaf_hdr leafhdr;
405 578
406 dp = args->dp; 579 dp = args->dp;
407 tp = args->trans; 580 tp = args->trans;
408 mp = dp->i_mount; 581 mp = dp->i_mount;
409 leaf = bp->b_addr; 582 leaf = bp->b_addr;
410 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); 583 xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
411#ifdef __KERNEL__ 584 ents = xfs_dir3_leaf_ents_p(leaf);
412 ASSERT(be16_to_cpu(leaf->hdr.count) > 0); 585
413#endif 586 xfs_dir3_leaf_check(mp, bp);
414 xfs_dir2_leafn_check(dp, bp); 587 ASSERT(leafhdr.count > 0);
588
415 /* 589 /*
416 * Look up the hash value in the leaf entries. 590 * Look up the hash value in the leaf entries.
417 */ 591 */
@@ -424,15 +598,16 @@ xfs_dir2_leafn_lookup_for_addname(
424 curbp = state->extrablk.bp; 598 curbp = state->extrablk.bp;
425 curfdb = state->extrablk.blkno; 599 curfdb = state->extrablk.blkno;
426 free = curbp->b_addr; 600 free = curbp->b_addr;
427 ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); 601 ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC) ||
602 free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC));
428 } 603 }
429 length = xfs_dir2_data_entsize(args->namelen); 604 length = xfs_dir2_data_entsize(args->namelen);
430 /* 605 /*
431 * Loop over leaf entries with the right hash value. 606 * Loop over leaf entries with the right hash value.
432 */ 607 */
433 for (lep = &leaf->ents[index]; index < be16_to_cpu(leaf->hdr.count) && 608 for (lep = &ents[index];
434 be32_to_cpu(lep->hashval) == args->hashval; 609 index < leafhdr.count && be32_to_cpu(lep->hashval) == args->hashval;
435 lep++, index++) { 610 lep++, index++) {
436 /* 611 /*
437 * Skip stale leaf entries. 612 * Skip stale leaf entries.
438 */ 613 */
@@ -451,6 +626,8 @@ xfs_dir2_leafn_lookup_for_addname(
451 * in hand, take a look at it. 626 * in hand, take a look at it.
452 */ 627 */
453 if (newdb != curdb) { 628 if (newdb != curdb) {
629 __be16 *bests;
630
454 curdb = newdb; 631 curdb = newdb;
455 /* 632 /*
456 * Convert the data block to the free block 633 * Convert the data block to the free block
@@ -473,13 +650,8 @@ xfs_dir2_leafn_lookup_for_addname(
473 if (error) 650 if (error)
474 return error; 651 return error;
475 free = curbp->b_addr; 652 free = curbp->b_addr;
476 ASSERT(be32_to_cpu(free->hdr.magic) == 653
477 XFS_DIR2_FREE_MAGIC); 654 xfs_dir2_free_hdr_check(mp, curbp, curdb);
478 ASSERT((be32_to_cpu(free->hdr.firstdb) %
479 xfs_dir2_free_max_bests(mp)) == 0);
480 ASSERT(be32_to_cpu(free->hdr.firstdb) <= curdb);
481 ASSERT(curdb < be32_to_cpu(free->hdr.firstdb) +
482 be32_to_cpu(free->hdr.nvalid));
483 } 655 }
484 /* 656 /*
485 * Get the index for our entry. 657 * Get the index for our entry.
@@ -488,8 +660,8 @@ xfs_dir2_leafn_lookup_for_addname(
488 /* 660 /*
489 * If it has room, return it. 661 * If it has room, return it.
490 */ 662 */
491 if (unlikely(free->bests[fi] == 663 bests = xfs_dir3_free_bests_p(mp, free);
492 cpu_to_be16(NULLDATAOFF))) { 664 if (unlikely(bests[fi] == cpu_to_be16(NULLDATAOFF))) {
493 XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int", 665 XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int",
494 XFS_ERRLEVEL_LOW, mp); 666 XFS_ERRLEVEL_LOW, mp);
495 if (curfdb != newfdb) 667 if (curfdb != newfdb)
@@ -497,7 +669,7 @@ xfs_dir2_leafn_lookup_for_addname(
497 return XFS_ERROR(EFSCORRUPTED); 669 return XFS_ERROR(EFSCORRUPTED);
498 } 670 }
499 curfdb = newfdb; 671 curfdb = newfdb;
500 if (be16_to_cpu(free->bests[fi]) >= length) 672 if (be16_to_cpu(bests[fi]) >= length)
501 goto out; 673 goto out;
502 } 674 }
503 } 675 }
@@ -511,6 +683,12 @@ out:
511 state->extrablk.bp = curbp; 683 state->extrablk.bp = curbp;
512 state->extrablk.index = fi; 684 state->extrablk.index = fi;
513 state->extrablk.blkno = curfdb; 685 state->extrablk.blkno = curfdb;
686
687 /*
688 * Important: this magic number is not in the buffer - it's for
689 * buffer type information and therefore only the free/data type
690 * matters here, not whether CRCs are enabled or not.
691 */
514 state->extrablk.magic = XFS_DIR2_FREE_MAGIC; 692 state->extrablk.magic = XFS_DIR2_FREE_MAGIC;
515 } else { 693 } else {
516 state->extravalid = 0; 694 state->extravalid = 0;
@@ -545,16 +723,19 @@ xfs_dir2_leafn_lookup_for_entry(
545 xfs_dir2_db_t newdb; /* new data block number */ 723 xfs_dir2_db_t newdb; /* new data block number */
546 xfs_trans_t *tp; /* transaction pointer */ 724 xfs_trans_t *tp; /* transaction pointer */
547 enum xfs_dacmp cmp; /* comparison result */ 725 enum xfs_dacmp cmp; /* comparison result */
726 struct xfs_dir2_leaf_entry *ents;
727 struct xfs_dir3_icleaf_hdr leafhdr;
548 728
549 dp = args->dp; 729 dp = args->dp;
550 tp = args->trans; 730 tp = args->trans;
551 mp = dp->i_mount; 731 mp = dp->i_mount;
552 leaf = bp->b_addr; 732 leaf = bp->b_addr;
553 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); 733 xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
554#ifdef __KERNEL__ 734 ents = xfs_dir3_leaf_ents_p(leaf);
555 ASSERT(be16_to_cpu(leaf->hdr.count) > 0); 735
556#endif 736 xfs_dir3_leaf_check(mp, bp);
557 xfs_dir2_leafn_check(dp, bp); 737 ASSERT(leafhdr.count > 0);
738
558 /* 739 /*
559 * Look up the hash value in the leaf entries. 740 * Look up the hash value in the leaf entries.
560 */ 741 */
@@ -569,9 +750,9 @@ xfs_dir2_leafn_lookup_for_entry(
569 /* 750 /*
570 * Loop over leaf entries with the right hash value. 751 * Loop over leaf entries with the right hash value.
571 */ 752 */
572 for (lep = &leaf->ents[index]; index < be16_to_cpu(leaf->hdr.count) && 753 for (lep = &ents[index];
573 be32_to_cpu(lep->hashval) == args->hashval; 754 index < leafhdr.count && be32_to_cpu(lep->hashval) == args->hashval;
574 lep++, index++) { 755 lep++, index++) {
575 /* 756 /*
576 * Skip stale leaf entries. 757 * Skip stale leaf entries.
577 */ 758 */
@@ -604,13 +785,13 @@ xfs_dir2_leafn_lookup_for_entry(
604 ASSERT(state->extravalid); 785 ASSERT(state->extravalid);
605 curbp = state->extrablk.bp; 786 curbp = state->extrablk.bp;
606 } else { 787 } else {
607 error = xfs_dir2_data_read(tp, dp, 788 error = xfs_dir3_data_read(tp, dp,
608 xfs_dir2_db_to_da(mp, newdb), 789 xfs_dir2_db_to_da(mp, newdb),
609 -1, &curbp); 790 -1, &curbp);
610 if (error) 791 if (error)
611 return error; 792 return error;
612 } 793 }
613 xfs_dir2_data_check(dp, curbp); 794 xfs_dir3_data_check(dp, curbp);
614 curdb = newdb; 795 curdb = newdb;
615 } 796 }
616 /* 797 /*
@@ -638,13 +819,13 @@ xfs_dir2_leafn_lookup_for_entry(
638 state->extrablk.index = (int)((char *)dep - 819 state->extrablk.index = (int)((char *)dep -
639 (char *)curbp->b_addr); 820 (char *)curbp->b_addr);
640 state->extrablk.magic = XFS_DIR2_DATA_MAGIC; 821 state->extrablk.magic = XFS_DIR2_DATA_MAGIC;
641 curbp->b_ops = &xfs_dir2_data_buf_ops; 822 curbp->b_ops = &xfs_dir3_data_buf_ops;
823 xfs_trans_buf_set_type(tp, curbp, XFS_BLFT_DIR_DATA_BUF);
642 if (cmp == XFS_CMP_EXACT) 824 if (cmp == XFS_CMP_EXACT)
643 return XFS_ERROR(EEXIST); 825 return XFS_ERROR(EEXIST);
644 } 826 }
645 } 827 }
646 ASSERT(index == be16_to_cpu(leaf->hdr.count) || 828 ASSERT(index == leafhdr.count || (args->op_flags & XFS_DA_OP_OKNOENT));
647 (args->op_flags & XFS_DA_OP_OKNOENT));
648 if (curbp) { 829 if (curbp) {
649 if (args->cmpresult == XFS_CMP_DIFFERENT) { 830 if (args->cmpresult == XFS_CMP_DIFFERENT) {
650 /* Giving back last used data block. */ 831 /* Giving back last used data block. */
@@ -653,7 +834,8 @@ xfs_dir2_leafn_lookup_for_entry(
653 state->extrablk.index = -1; 834 state->extrablk.index = -1;
654 state->extrablk.blkno = curdb; 835 state->extrablk.blkno = curdb;
655 state->extrablk.magic = XFS_DIR2_DATA_MAGIC; 836 state->extrablk.magic = XFS_DIR2_DATA_MAGIC;
656 curbp->b_ops = &xfs_dir2_data_buf_ops; 837 curbp->b_ops = &xfs_dir3_data_buf_ops;
838 xfs_trans_buf_set_type(tp, curbp, XFS_BLFT_DIR_DATA_BUF);
657 } else { 839 } else {
658 /* If the curbp is not the CI match block, drop it */ 840 /* If the curbp is not the CI match block, drop it */
659 if (state->extrablk.bp != curbp) 841 if (state->extrablk.bp != curbp)
@@ -689,52 +871,50 @@ xfs_dir2_leafn_lookup_int(
689 * Log entries and headers. Stale entries are preserved. 871 * Log entries and headers. Stale entries are preserved.
690 */ 872 */
691static void 873static void
692xfs_dir2_leafn_moveents( 874xfs_dir3_leafn_moveents(
693 xfs_da_args_t *args, /* operation arguments */ 875 xfs_da_args_t *args, /* operation arguments */
694 struct xfs_buf *bp_s, /* source leaf buffer */ 876 struct xfs_buf *bp_s, /* source */
695 int start_s, /* source leaf index */ 877 struct xfs_dir3_icleaf_hdr *shdr,
696 struct xfs_buf *bp_d, /* destination leaf buffer */ 878 struct xfs_dir2_leaf_entry *sents,
697 int start_d, /* destination leaf index */ 879 int start_s,/* source leaf index */
698 int count) /* count of leaves to copy */ 880 struct xfs_buf *bp_d, /* destination */
881 struct xfs_dir3_icleaf_hdr *dhdr,
882 struct xfs_dir2_leaf_entry *dents,
883 int start_d,/* destination leaf index */
884 int count) /* count of leaves to copy */
699{ 885{
700 xfs_dir2_leaf_t *leaf_d; /* destination leaf structure */ 886 struct xfs_trans *tp = args->trans;
701 xfs_dir2_leaf_t *leaf_s; /* source leaf structure */ 887 int stale; /* count stale leaves copied */
702 int stale; /* count stale leaves copied */
703 xfs_trans_t *tp; /* transaction pointer */
704 888
705 trace_xfs_dir2_leafn_moveents(args, start_s, start_d, count); 889 trace_xfs_dir2_leafn_moveents(args, start_s, start_d, count);
706 890
707 /* 891 /*
708 * Silently return if nothing to do. 892 * Silently return if nothing to do.
709 */ 893 */
710 if (count == 0) { 894 if (count == 0)
711 return; 895 return;
712 } 896
713 tp = args->trans;
714 leaf_s = bp_s->b_addr;
715 leaf_d = bp_d->b_addr;
716 /* 897 /*
717 * If the destination index is not the end of the current 898 * If the destination index is not the end of the current
718 * destination leaf entries, open up a hole in the destination 899 * destination leaf entries, open up a hole in the destination
719 * to hold the new entries. 900 * to hold the new entries.
720 */ 901 */
721 if (start_d < be16_to_cpu(leaf_d->hdr.count)) { 902 if (start_d < dhdr->count) {
722 memmove(&leaf_d->ents[start_d + count], &leaf_d->ents[start_d], 903 memmove(&dents[start_d + count], &dents[start_d],
723 (be16_to_cpu(leaf_d->hdr.count) - start_d) * 904 (dhdr->count - start_d) * sizeof(xfs_dir2_leaf_entry_t));
724 sizeof(xfs_dir2_leaf_entry_t)); 905 xfs_dir3_leaf_log_ents(tp, bp_d, start_d + count,
725 xfs_dir2_leaf_log_ents(tp, bp_d, start_d + count, 906 count + dhdr->count - 1);
726 count + be16_to_cpu(leaf_d->hdr.count) - 1);
727 } 907 }
728 /* 908 /*
729 * If the source has stale leaves, count the ones in the copy range 909 * If the source has stale leaves, count the ones in the copy range
730 * so we can update the header correctly. 910 * so we can update the header correctly.
731 */ 911 */
732 if (leaf_s->hdr.stale) { 912 if (shdr->stale) {
733 int i; /* temp leaf index */ 913 int i; /* temp leaf index */
734 914
735 for (i = start_s, stale = 0; i < start_s + count; i++) { 915 for (i = start_s, stale = 0; i < start_s + count; i++) {
736 if (leaf_s->ents[i].address == 916 if (sents[i].address ==
737 cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) 917 cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
738 stale++; 918 stale++;
739 } 919 }
740 } else 920 } else
@@ -742,29 +922,27 @@ xfs_dir2_leafn_moveents(
742 /* 922 /*
743 * Copy the leaf entries from source to destination. 923 * Copy the leaf entries from source to destination.
744 */ 924 */
745 memcpy(&leaf_d->ents[start_d], &leaf_s->ents[start_s], 925 memcpy(&dents[start_d], &sents[start_s],
746 count * sizeof(xfs_dir2_leaf_entry_t)); 926 count * sizeof(xfs_dir2_leaf_entry_t));
747 xfs_dir2_leaf_log_ents(tp, bp_d, start_d, start_d + count - 1); 927 xfs_dir3_leaf_log_ents(tp, bp_d, start_d, start_d + count - 1);
928
748 /* 929 /*
749 * If there are source entries after the ones we copied, 930 * If there are source entries after the ones we copied,
750 * delete the ones we copied by sliding the next ones down. 931 * delete the ones we copied by sliding the next ones down.
751 */ 932 */
752 if (start_s + count < be16_to_cpu(leaf_s->hdr.count)) { 933 if (start_s + count < shdr->count) {
753 memmove(&leaf_s->ents[start_s], &leaf_s->ents[start_s + count], 934 memmove(&sents[start_s], &sents[start_s + count],
754 count * sizeof(xfs_dir2_leaf_entry_t)); 935 count * sizeof(xfs_dir2_leaf_entry_t));
755 xfs_dir2_leaf_log_ents(tp, bp_s, start_s, start_s + count - 1); 936 xfs_dir3_leaf_log_ents(tp, bp_s, start_s, start_s + count - 1);
756 } 937 }
938
757 /* 939 /*
758 * Update the headers and log them. 940 * Update the headers and log them.
759 */ 941 */
760 be16_add_cpu(&leaf_s->hdr.count, -(count)); 942 shdr->count -= count;
761 be16_add_cpu(&leaf_s->hdr.stale, -(stale)); 943 shdr->stale -= stale;
762 be16_add_cpu(&leaf_d->hdr.count, count); 944 dhdr->count += count;
763 be16_add_cpu(&leaf_d->hdr.stale, stale); 945 dhdr->stale += stale;
764 xfs_dir2_leaf_log_header(tp, bp_s);
765 xfs_dir2_leaf_log_header(tp, bp_d);
766 xfs_dir2_leafn_check(args->dp, bp_s);
767 xfs_dir2_leafn_check(args->dp, bp_d);
768} 946}
769 947
770/* 948/*
@@ -773,21 +951,25 @@ xfs_dir2_leafn_moveents(
773 */ 951 */
774int /* sort order */ 952int /* sort order */
775xfs_dir2_leafn_order( 953xfs_dir2_leafn_order(
776 struct xfs_buf *leaf1_bp, /* leaf1 buffer */ 954 struct xfs_buf *leaf1_bp, /* leaf1 buffer */
777 struct xfs_buf *leaf2_bp) /* leaf2 buffer */ 955 struct xfs_buf *leaf2_bp) /* leaf2 buffer */
778{ 956{
779 xfs_dir2_leaf_t *leaf1; /* leaf1 structure */ 957 struct xfs_dir2_leaf *leaf1 = leaf1_bp->b_addr;
780 xfs_dir2_leaf_t *leaf2; /* leaf2 structure */ 958 struct xfs_dir2_leaf *leaf2 = leaf2_bp->b_addr;
781 959 struct xfs_dir2_leaf_entry *ents1;
782 leaf1 = leaf1_bp->b_addr; 960 struct xfs_dir2_leaf_entry *ents2;
783 leaf2 = leaf2_bp->b_addr; 961 struct xfs_dir3_icleaf_hdr hdr1;
784 ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); 962 struct xfs_dir3_icleaf_hdr hdr2;
785 ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); 963
786 if (be16_to_cpu(leaf1->hdr.count) > 0 && 964 xfs_dir3_leaf_hdr_from_disk(&hdr1, leaf1);
787 be16_to_cpu(leaf2->hdr.count) > 0 && 965 xfs_dir3_leaf_hdr_from_disk(&hdr2, leaf2);
788 (be32_to_cpu(leaf2->ents[0].hashval) < be32_to_cpu(leaf1->ents[0].hashval) || 966 ents1 = xfs_dir3_leaf_ents_p(leaf1);
789 be32_to_cpu(leaf2->ents[be16_to_cpu(leaf2->hdr.count) - 1].hashval) < 967 ents2 = xfs_dir3_leaf_ents_p(leaf2);
790 be32_to_cpu(leaf1->ents[be16_to_cpu(leaf1->hdr.count) - 1].hashval))) 968
969 if (hdr1.count > 0 && hdr2.count > 0 &&
970 (be32_to_cpu(ents2[0].hashval) < be32_to_cpu(ents1[0].hashval) ||
971 be32_to_cpu(ents2[hdr2.count - 1].hashval) <
972 be32_to_cpu(ents1[hdr1.count - 1].hashval)))
791 return 1; 973 return 1;
792 return 0; 974 return 0;
793} 975}
@@ -816,6 +998,10 @@ xfs_dir2_leafn_rebalance(
816#endif 998#endif
817 int oldsum; /* old total leaf count */ 999 int oldsum; /* old total leaf count */
818 int swap; /* swapped leaf blocks */ 1000 int swap; /* swapped leaf blocks */
1001 struct xfs_dir2_leaf_entry *ents1;
1002 struct xfs_dir2_leaf_entry *ents2;
1003 struct xfs_dir3_icleaf_hdr hdr1;
1004 struct xfs_dir3_icleaf_hdr hdr2;
819 1005
820 args = state->args; 1006 args = state->args;
821 /* 1007 /*
@@ -830,11 +1016,17 @@ xfs_dir2_leafn_rebalance(
830 } 1016 }
831 leaf1 = blk1->bp->b_addr; 1017 leaf1 = blk1->bp->b_addr;
832 leaf2 = blk2->bp->b_addr; 1018 leaf2 = blk2->bp->b_addr;
833 oldsum = be16_to_cpu(leaf1->hdr.count) + be16_to_cpu(leaf2->hdr.count); 1019 xfs_dir3_leaf_hdr_from_disk(&hdr1, leaf1);
1020 xfs_dir3_leaf_hdr_from_disk(&hdr2, leaf2);
1021 ents1 = xfs_dir3_leaf_ents_p(leaf1);
1022 ents2 = xfs_dir3_leaf_ents_p(leaf2);
1023
1024 oldsum = hdr1.count + hdr2.count;
834#ifdef DEBUG 1025#ifdef DEBUG
835 oldstale = be16_to_cpu(leaf1->hdr.stale) + be16_to_cpu(leaf2->hdr.stale); 1026 oldstale = hdr1.stale + hdr2.stale;
836#endif 1027#endif
837 mid = oldsum >> 1; 1028 mid = oldsum >> 1;
1029
838 /* 1030 /*
839 * If the old leaf count was odd then the new one will be even, 1031 * If the old leaf count was odd then the new one will be even,
840 * so we need to divide the new count evenly. 1032 * so we need to divide the new count evenly.
@@ -842,10 +1034,10 @@ xfs_dir2_leafn_rebalance(
842 if (oldsum & 1) { 1034 if (oldsum & 1) {
843 xfs_dahash_t midhash; /* middle entry hash value */ 1035 xfs_dahash_t midhash; /* middle entry hash value */
844 1036
845 if (mid >= be16_to_cpu(leaf1->hdr.count)) 1037 if (mid >= hdr1.count)
846 midhash = be32_to_cpu(leaf2->ents[mid - be16_to_cpu(leaf1->hdr.count)].hashval); 1038 midhash = be32_to_cpu(ents2[mid - hdr1.count].hashval);
847 else 1039 else
848 midhash = be32_to_cpu(leaf1->ents[mid].hashval); 1040 midhash = be32_to_cpu(ents1[mid].hashval);
849 isleft = args->hashval <= midhash; 1041 isleft = args->hashval <= midhash;
850 } 1042 }
851 /* 1043 /*
@@ -859,30 +1051,42 @@ xfs_dir2_leafn_rebalance(
859 * Calculate moved entry count. Positive means left-to-right, 1051 * Calculate moved entry count. Positive means left-to-right,
860 * negative means right-to-left. Then move the entries. 1052 * negative means right-to-left. Then move the entries.
861 */ 1053 */
862 count = be16_to_cpu(leaf1->hdr.count) - mid + (isleft == 0); 1054 count = hdr1.count - mid + (isleft == 0);
863 if (count > 0) 1055 if (count > 0)
864 xfs_dir2_leafn_moveents(args, blk1->bp, 1056 xfs_dir3_leafn_moveents(args, blk1->bp, &hdr1, ents1,
865 be16_to_cpu(leaf1->hdr.count) - count, blk2->bp, 0, count); 1057 hdr1.count - count, blk2->bp,
1058 &hdr2, ents2, 0, count);
866 else if (count < 0) 1059 else if (count < 0)
867 xfs_dir2_leafn_moveents(args, blk2->bp, 0, blk1->bp, 1060 xfs_dir3_leafn_moveents(args, blk2->bp, &hdr2, ents2, 0,
868 be16_to_cpu(leaf1->hdr.count), count); 1061 blk1->bp, &hdr1, ents1,
869 ASSERT(be16_to_cpu(leaf1->hdr.count) + be16_to_cpu(leaf2->hdr.count) == oldsum); 1062 hdr1.count, count);
870 ASSERT(be16_to_cpu(leaf1->hdr.stale) + be16_to_cpu(leaf2->hdr.stale) == oldstale); 1063
1064 ASSERT(hdr1.count + hdr2.count == oldsum);
1065 ASSERT(hdr1.stale + hdr2.stale == oldstale);
1066
1067 /* log the changes made when moving the entries */
1068 xfs_dir3_leaf_hdr_to_disk(leaf1, &hdr1);
1069 xfs_dir3_leaf_hdr_to_disk(leaf2, &hdr2);
1070 xfs_dir3_leaf_log_header(args->trans, blk1->bp);
1071 xfs_dir3_leaf_log_header(args->trans, blk2->bp);
1072
1073 xfs_dir3_leaf_check(args->dp->i_mount, blk1->bp);
1074 xfs_dir3_leaf_check(args->dp->i_mount, blk2->bp);
1075
871 /* 1076 /*
872 * Mark whether we're inserting into the old or new leaf. 1077 * Mark whether we're inserting into the old or new leaf.
873 */ 1078 */
874 if (be16_to_cpu(leaf1->hdr.count) < be16_to_cpu(leaf2->hdr.count)) 1079 if (hdr1.count < hdr2.count)
875 state->inleaf = swap; 1080 state->inleaf = swap;
876 else if (be16_to_cpu(leaf1->hdr.count) > be16_to_cpu(leaf2->hdr.count)) 1081 else if (hdr1.count > hdr2.count)
877 state->inleaf = !swap; 1082 state->inleaf = !swap;
878 else 1083 else
879 state->inleaf = 1084 state->inleaf = swap ^ (blk1->index <= hdr1.count);
880 swap ^ (blk1->index <= be16_to_cpu(leaf1->hdr.count));
881 /* 1085 /*
882 * Adjust the expected index for insertion. 1086 * Adjust the expected index for insertion.
883 */ 1087 */
884 if (!state->inleaf) 1088 if (!state->inleaf)
885 blk2->index = blk1->index - be16_to_cpu(leaf1->hdr.count); 1089 blk2->index = blk1->index - hdr1.count;
886 1090
887 /* 1091 /*
888 * Finally sanity check just to make sure we are not returning a 1092 * Finally sanity check just to make sure we are not returning a
@@ -898,7 +1102,7 @@ xfs_dir2_leafn_rebalance(
898} 1102}
899 1103
900static int 1104static int
901xfs_dir2_data_block_free( 1105xfs_dir3_data_block_free(
902 xfs_da_args_t *args, 1106 xfs_da_args_t *args,
903 struct xfs_dir2_data_hdr *hdr, 1107 struct xfs_dir2_data_hdr *hdr,
904 struct xfs_dir2_free *free, 1108 struct xfs_dir2_free *free,
@@ -909,57 +1113,66 @@ xfs_dir2_data_block_free(
909{ 1113{
910 struct xfs_trans *tp = args->trans; 1114 struct xfs_trans *tp = args->trans;
911 int logfree = 0; 1115 int logfree = 0;
1116 __be16 *bests;
1117 struct xfs_dir3_icfree_hdr freehdr;
912 1118
913 if (!hdr) { 1119 xfs_dir3_free_hdr_from_disk(&freehdr, free);
914 /* One less used entry in the free table. */
915 be32_add_cpu(&free->hdr.nused, -1);
916 xfs_dir2_free_log_header(tp, fbp);
917 1120
1121 bests = xfs_dir3_free_bests_p(tp->t_mountp, free);
1122 if (hdr) {
918 /* 1123 /*
919 * If this was the last entry in the table, we can trim the 1124 * Data block is not empty, just set the free entry to the new
920 * table size back. There might be other entries at the end 1125 * value.
921 * referring to non-existent data blocks, get those too.
922 */ 1126 */
923 if (findex == be32_to_cpu(free->hdr.nvalid) - 1) { 1127 bests[findex] = cpu_to_be16(longest);
924 int i; /* free entry index */ 1128 xfs_dir2_free_log_bests(tp, fbp, findex, findex);
1129 return 0;
1130 }
925 1131
926 for (i = findex - 1; i >= 0; i--) { 1132 /* One less used entry in the free table. */
927 if (free->bests[i] != cpu_to_be16(NULLDATAOFF)) 1133 freehdr.nused--;
928 break;
929 }
930 free->hdr.nvalid = cpu_to_be32(i + 1);
931 logfree = 0;
932 } else {
933 /* Not the last entry, just punch it out. */
934 free->bests[findex] = cpu_to_be16(NULLDATAOFF);
935 logfree = 1;
936 }
937 /*
938 * If there are no useful entries left in the block,
939 * get rid of the block if we can.
940 */
941 if (!free->hdr.nused) {
942 int error;
943 1134
944 error = xfs_dir2_shrink_inode(args, fdb, fbp); 1135 /*
945 if (error == 0) { 1136 * If this was the last entry in the table, we can trim the table size
946 fbp = NULL; 1137 * back. There might be other entries at the end referring to
947 logfree = 0; 1138 * non-existent data blocks, get those too.
948 } else if (error != ENOSPC || args->total != 0) 1139 */
949 return error; 1140 if (findex == freehdr.nvalid - 1) {
950 /* 1141 int i; /* free entry index */
951 * It's possible to get ENOSPC if there is no 1142
952 * space reservation. In this case some one 1143 for (i = findex - 1; i >= 0; i--) {
953 * else will eventually get rid of this block. 1144 if (bests[i] != cpu_to_be16(NULLDATAOFF))
954 */ 1145 break;
955 } 1146 }
1147 freehdr.nvalid = i + 1;
1148 logfree = 0;
956 } else { 1149 } else {
1150 /* Not the last entry, just punch it out. */
1151 bests[findex] = cpu_to_be16(NULLDATAOFF);
1152 logfree = 1;
1153 }
1154
1155 xfs_dir3_free_hdr_to_disk(free, &freehdr);
1156 xfs_dir2_free_log_header(tp, fbp);
1157
1158 /*
1159 * If there are no useful entries left in the block, get rid of the
1160 * block if we can.
1161 */
1162 if (!freehdr.nused) {
1163 int error;
1164
1165 error = xfs_dir2_shrink_inode(args, fdb, fbp);
1166 if (error == 0) {
1167 fbp = NULL;
1168 logfree = 0;
1169 } else if (error != ENOSPC || args->total != 0)
1170 return error;
957 /* 1171 /*
958 * Data block is not empty, just set the free entry to the new 1172 * It's possible to get ENOSPC if there is no
959 * value. 1173 * space reservation. In this case some one
1174 * else will eventually get rid of this block.
960 */ 1175 */
961 free->bests[findex] = cpu_to_be16(longest);
962 logfree = 1;
963 } 1176 }
964 1177
965 /* Log the free entry that changed, unless we got rid of it. */ 1178 /* Log the free entry that changed, unless we got rid of it. */
@@ -994,6 +1207,9 @@ xfs_dir2_leafn_remove(
994 int needlog; /* need to log data header */ 1207 int needlog; /* need to log data header */
995 int needscan; /* need to rescan data frees */ 1208 int needscan; /* need to rescan data frees */
996 xfs_trans_t *tp; /* transaction pointer */ 1209 xfs_trans_t *tp; /* transaction pointer */
1210 struct xfs_dir2_data_free *bf; /* bestfree table */
1211 struct xfs_dir3_icleaf_hdr leafhdr;
1212 struct xfs_dir2_leaf_entry *ents;
997 1213
998 trace_xfs_dir2_leafn_remove(args, index); 1214 trace_xfs_dir2_leafn_remove(args, index);
999 1215
@@ -1001,11 +1217,14 @@ xfs_dir2_leafn_remove(
1001 tp = args->trans; 1217 tp = args->trans;
1002 mp = dp->i_mount; 1218 mp = dp->i_mount;
1003 leaf = bp->b_addr; 1219 leaf = bp->b_addr;
1004 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); 1220 xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
1221 ents = xfs_dir3_leaf_ents_p(leaf);
1222
1005 /* 1223 /*
1006 * Point to the entry we're removing. 1224 * Point to the entry we're removing.
1007 */ 1225 */
1008 lep = &leaf->ents[index]; 1226 lep = &ents[index];
1227
1009 /* 1228 /*
1010 * Extract the data block and offset from the entry. 1229 * Extract the data block and offset from the entry.
1011 */ 1230 */
@@ -1013,14 +1232,18 @@ xfs_dir2_leafn_remove(
1013 ASSERT(dblk->blkno == db); 1232 ASSERT(dblk->blkno == db);
1014 off = xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)); 1233 off = xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address));
1015 ASSERT(dblk->index == off); 1234 ASSERT(dblk->index == off);
1235
1016 /* 1236 /*
1017 * Kill the leaf entry by marking it stale. 1237 * Kill the leaf entry by marking it stale.
1018 * Log the leaf block changes. 1238 * Log the leaf block changes.
1019 */ 1239 */
1020 be16_add_cpu(&leaf->hdr.stale, 1); 1240 leafhdr.stale++;
1021 xfs_dir2_leaf_log_header(tp, bp); 1241 xfs_dir3_leaf_hdr_to_disk(leaf, &leafhdr);
1242 xfs_dir3_leaf_log_header(tp, bp);
1243
1022 lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR); 1244 lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR);
1023 xfs_dir2_leaf_log_ents(tp, bp, index, index); 1245 xfs_dir3_leaf_log_ents(tp, bp, index, index);
1246
1024 /* 1247 /*
1025 * Make the data entry free. Keep track of the longest freespace 1248 * Make the data entry free. Keep track of the longest freespace
1026 * in the data block in case it changes. 1249 * in the data block in case it changes.
@@ -1028,7 +1251,8 @@ xfs_dir2_leafn_remove(
1028 dbp = dblk->bp; 1251 dbp = dblk->bp;
1029 hdr = dbp->b_addr; 1252 hdr = dbp->b_addr;
1030 dep = (xfs_dir2_data_entry_t *)((char *)hdr + off); 1253 dep = (xfs_dir2_data_entry_t *)((char *)hdr + off);
1031 longest = be16_to_cpu(hdr->bestfree[0].length); 1254 bf = xfs_dir3_data_bestfree_p(hdr);
1255 longest = be16_to_cpu(bf[0].length);
1032 needlog = needscan = 0; 1256 needlog = needscan = 0;
1033 xfs_dir2_data_make_free(tp, dbp, off, 1257 xfs_dir2_data_make_free(tp, dbp, off,
1034 xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan); 1258 xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan);
@@ -1040,12 +1264,12 @@ xfs_dir2_leafn_remove(
1040 xfs_dir2_data_freescan(mp, hdr, &needlog); 1264 xfs_dir2_data_freescan(mp, hdr, &needlog);
1041 if (needlog) 1265 if (needlog)
1042 xfs_dir2_data_log_header(tp, dbp); 1266 xfs_dir2_data_log_header(tp, dbp);
1043 xfs_dir2_data_check(dp, dbp); 1267 xfs_dir3_data_check(dp, dbp);
1044 /* 1268 /*
1045 * If the longest data block freespace changes, need to update 1269 * If the longest data block freespace changes, need to update
1046 * the corresponding freeblock entry. 1270 * the corresponding freeblock entry.
1047 */ 1271 */
1048 if (longest < be16_to_cpu(hdr->bestfree[0].length)) { 1272 if (longest < be16_to_cpu(bf[0].length)) {
1049 int error; /* error return value */ 1273 int error; /* error return value */
1050 struct xfs_buf *fbp; /* freeblock buffer */ 1274 struct xfs_buf *fbp; /* freeblock buffer */
1051 xfs_dir2_db_t fdb; /* freeblock block number */ 1275 xfs_dir2_db_t fdb; /* freeblock block number */
@@ -1062,20 +1286,25 @@ xfs_dir2_leafn_remove(
1062 if (error) 1286 if (error)
1063 return error; 1287 return error;
1064 free = fbp->b_addr; 1288 free = fbp->b_addr;
1065 ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); 1289#ifdef DEBUG
1066 ASSERT(be32_to_cpu(free->hdr.firstdb) == 1290 {
1067 xfs_dir2_free_max_bests(mp) * 1291 struct xfs_dir3_icfree_hdr freehdr;
1068 (fdb - XFS_DIR2_FREE_FIRSTDB(mp))); 1292 xfs_dir3_free_hdr_from_disk(&freehdr, free);
1293 ASSERT(freehdr.firstdb == xfs_dir3_free_max_bests(mp) *
1294 (fdb - XFS_DIR2_FREE_FIRSTDB(mp)));
1295 }
1296#endif
1069 /* 1297 /*
1070 * Calculate which entry we need to fix. 1298 * Calculate which entry we need to fix.
1071 */ 1299 */
1072 findex = xfs_dir2_db_to_fdindex(mp, db); 1300 findex = xfs_dir2_db_to_fdindex(mp, db);
1073 longest = be16_to_cpu(hdr->bestfree[0].length); 1301 longest = be16_to_cpu(bf[0].length);
1074 /* 1302 /*
1075 * If the data block is now empty we can get rid of it 1303 * If the data block is now empty we can get rid of it
1076 * (usually). 1304 * (usually).
1077 */ 1305 */
1078 if (longest == mp->m_dirblksize - (uint)sizeof(*hdr)) { 1306 if (longest == mp->m_dirblksize -
1307 xfs_dir3_data_entry_offset(hdr)) {
1079 /* 1308 /*
1080 * Try to punch out the data block. 1309 * Try to punch out the data block.
1081 */ 1310 */
@@ -1096,21 +1325,19 @@ xfs_dir2_leafn_remove(
1096 * If we got rid of the data block, we can eliminate that entry 1325 * If we got rid of the data block, we can eliminate that entry
1097 * in the free block. 1326 * in the free block.
1098 */ 1327 */
1099 error = xfs_dir2_data_block_free(args, hdr, free, 1328 error = xfs_dir3_data_block_free(args, hdr, free,
1100 fdb, findex, fbp, longest); 1329 fdb, findex, fbp, longest);
1101 if (error) 1330 if (error)
1102 return error; 1331 return error;
1103 } 1332 }
1104 1333
1105 xfs_dir2_leafn_check(dp, bp); 1334 xfs_dir3_leaf_check(mp, bp);
1106 /* 1335 /*
1107 * Return indication of whether this leaf block is empty enough 1336 * Return indication of whether this leaf block is empty enough
1108 * to justify trying to join it with a neighbor. 1337 * to justify trying to join it with a neighbor.
1109 */ 1338 */
1110 *rval = 1339 *rval = (xfs_dir3_leaf_hdr_size(leaf) +
1111 ((uint)sizeof(leaf->hdr) + 1340 (uint)sizeof(ents[0]) * (leafhdr.count - leafhdr.stale)) <
1112 (uint)sizeof(leaf->ents[0]) *
1113 (be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale))) <
1114 mp->m_dir_magicpct; 1341 mp->m_dir_magicpct;
1115 return 0; 1342 return 0;
1116} 1343}
@@ -1143,11 +1370,11 @@ xfs_dir2_leafn_split(
1143 /* 1370 /*
1144 * Initialize the new leaf block. 1371 * Initialize the new leaf block.
1145 */ 1372 */
1146 error = xfs_dir2_leaf_init(args, xfs_dir2_da_to_db(mp, blkno), 1373 error = xfs_dir3_leaf_get_buf(args, xfs_dir2_da_to_db(mp, blkno),
1147 &newblk->bp, XFS_DIR2_LEAFN_MAGIC); 1374 &newblk->bp, XFS_DIR2_LEAFN_MAGIC);
1148 if (error) { 1375 if (error)
1149 return error; 1376 return error;
1150 } 1377
1151 newblk->blkno = blkno; 1378 newblk->blkno = blkno;
1152 newblk->magic = XFS_DIR2_LEAFN_MAGIC; 1379 newblk->magic = XFS_DIR2_LEAFN_MAGIC;
1153 /* 1380 /*
@@ -1155,7 +1382,7 @@ xfs_dir2_leafn_split(
1155 * block into the leaves. 1382 * block into the leaves.
1156 */ 1383 */
1157 xfs_dir2_leafn_rebalance(state, oldblk, newblk); 1384 xfs_dir2_leafn_rebalance(state, oldblk, newblk);
1158 error = xfs_da_blk_link(state, oldblk, newblk); 1385 error = xfs_da3_blk_link(state, oldblk, newblk);
1159 if (error) { 1386 if (error) {
1160 return error; 1387 return error;
1161 } 1388 }
@@ -1171,8 +1398,8 @@ xfs_dir2_leafn_split(
1171 */ 1398 */
1172 oldblk->hashval = xfs_dir2_leafn_lasthash(oldblk->bp, NULL); 1399 oldblk->hashval = xfs_dir2_leafn_lasthash(oldblk->bp, NULL);
1173 newblk->hashval = xfs_dir2_leafn_lasthash(newblk->bp, NULL); 1400 newblk->hashval = xfs_dir2_leafn_lasthash(newblk->bp, NULL);
1174 xfs_dir2_leafn_check(args->dp, oldblk->bp); 1401 xfs_dir3_leaf_check(mp, oldblk->bp);
1175 xfs_dir2_leafn_check(args->dp, newblk->bp); 1402 xfs_dir3_leaf_check(mp, newblk->bp);
1176 return error; 1403 return error;
1177} 1404}
1178 1405
@@ -1198,9 +1425,10 @@ xfs_dir2_leafn_toosmall(
1198 int error; /* error return value */ 1425 int error; /* error return value */
1199 int forward; /* sibling block direction */ 1426 int forward; /* sibling block direction */
1200 int i; /* sibling counter */ 1427 int i; /* sibling counter */
1201 xfs_da_blkinfo_t *info; /* leaf block header */
1202 xfs_dir2_leaf_t *leaf; /* leaf structure */ 1428 xfs_dir2_leaf_t *leaf; /* leaf structure */
1203 int rval; /* result from path_shift */ 1429 int rval; /* result from path_shift */
1430 struct xfs_dir3_icleaf_hdr leafhdr;
1431 struct xfs_dir2_leaf_entry *ents;
1204 1432
1205 /* 1433 /*
1206 * Check for the degenerate case of the block being over 50% full. 1434 * Check for the degenerate case of the block being over 50% full.
@@ -1208,11 +1436,13 @@ xfs_dir2_leafn_toosmall(
1208 * to coalesce with a sibling. 1436 * to coalesce with a sibling.
1209 */ 1437 */
1210 blk = &state->path.blk[state->path.active - 1]; 1438 blk = &state->path.blk[state->path.active - 1];
1211 info = blk->bp->b_addr; 1439 leaf = blk->bp->b_addr;
1212 ASSERT(info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); 1440 xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
1213 leaf = (xfs_dir2_leaf_t *)info; 1441 ents = xfs_dir3_leaf_ents_p(leaf);
1214 count = be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale); 1442 xfs_dir3_leaf_check(state->args->dp->i_mount, blk->bp);
1215 bytes = (uint)sizeof(leaf->hdr) + count * (uint)sizeof(leaf->ents[0]); 1443
1444 count = leafhdr.count - leafhdr.stale;
1445 bytes = xfs_dir3_leaf_hdr_size(leaf) + count * sizeof(ents[0]);
1216 if (bytes > (state->blocksize >> 1)) { 1446 if (bytes > (state->blocksize >> 1)) {
1217 /* 1447 /*
1218 * Blk over 50%, don't try to join. 1448 * Blk over 50%, don't try to join.
@@ -1231,9 +1461,9 @@ xfs_dir2_leafn_toosmall(
1231 * Make altpath point to the block we want to keep and 1461 * Make altpath point to the block we want to keep and
1232 * path point to the block we want to drop (this one). 1462 * path point to the block we want to drop (this one).
1233 */ 1463 */
1234 forward = (info->forw != 0); 1464 forward = (leafhdr.forw != 0);
1235 memcpy(&state->altpath, &state->path, sizeof(state->path)); 1465 memcpy(&state->altpath, &state->path, sizeof(state->path));
1236 error = xfs_da_path_shift(state, &state->altpath, forward, 0, 1466 error = xfs_da3_path_shift(state, &state->altpath, forward, 0,
1237 &rval); 1467 &rval);
1238 if (error) 1468 if (error)
1239 return error; 1469 return error;
@@ -1247,15 +1477,17 @@ xfs_dir2_leafn_toosmall(
1247 * We prefer coalescing with the lower numbered sibling so as 1477 * We prefer coalescing with the lower numbered sibling so as
1248 * to shrink a directory over time. 1478 * to shrink a directory over time.
1249 */ 1479 */
1250 forward = be32_to_cpu(info->forw) < be32_to_cpu(info->back); 1480 forward = leafhdr.forw < leafhdr.back;
1251 for (i = 0, bp = NULL; i < 2; forward = !forward, i++) { 1481 for (i = 0, bp = NULL; i < 2; forward = !forward, i++) {
1252 blkno = forward ? be32_to_cpu(info->forw) : be32_to_cpu(info->back); 1482 struct xfs_dir3_icleaf_hdr hdr2;
1483
1484 blkno = forward ? leafhdr.forw : leafhdr.back;
1253 if (blkno == 0) 1485 if (blkno == 0)
1254 continue; 1486 continue;
1255 /* 1487 /*
1256 * Read the sibling leaf block. 1488 * Read the sibling leaf block.
1257 */ 1489 */
1258 error = xfs_dir2_leafn_read(state->args->trans, state->args->dp, 1490 error = xfs_dir3_leafn_read(state->args->trans, state->args->dp,
1259 blkno, -1, &bp); 1491 blkno, -1, &bp);
1260 if (error) 1492 if (error)
1261 return error; 1493 return error;
@@ -1263,13 +1495,15 @@ xfs_dir2_leafn_toosmall(
1263 /* 1495 /*
1264 * Count bytes in the two blocks combined. 1496 * Count bytes in the two blocks combined.
1265 */ 1497 */
1266 leaf = (xfs_dir2_leaf_t *)info; 1498 count = leafhdr.count - leafhdr.stale;
1267 count = be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale);
1268 bytes = state->blocksize - (state->blocksize >> 2); 1499 bytes = state->blocksize - (state->blocksize >> 2);
1500
1269 leaf = bp->b_addr; 1501 leaf = bp->b_addr;
1270 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); 1502 xfs_dir3_leaf_hdr_from_disk(&hdr2, leaf);
1271 count += be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale); 1503 ents = xfs_dir3_leaf_ents_p(leaf);
1272 bytes -= count * (uint)sizeof(leaf->ents[0]); 1504 count += hdr2.count - hdr2.stale;
1505 bytes -= count * sizeof(ents[0]);
1506
1273 /* 1507 /*
1274 * Fits with at least 25% to spare. 1508 * Fits with at least 25% to spare.
1275 */ 1509 */
@@ -1291,10 +1525,10 @@ xfs_dir2_leafn_toosmall(
1291 */ 1525 */
1292 memcpy(&state->altpath, &state->path, sizeof(state->path)); 1526 memcpy(&state->altpath, &state->path, sizeof(state->path));
1293 if (blkno < blk->blkno) 1527 if (blkno < blk->blkno)
1294 error = xfs_da_path_shift(state, &state->altpath, forward, 0, 1528 error = xfs_da3_path_shift(state, &state->altpath, forward, 0,
1295 &rval); 1529 &rval);
1296 else 1530 else
1297 error = xfs_da_path_shift(state, &state->path, forward, 0, 1531 error = xfs_da3_path_shift(state, &state->path, forward, 0,
1298 &rval); 1532 &rval);
1299 if (error) { 1533 if (error) {
1300 return error; 1534 return error;
@@ -1316,34 +1550,53 @@ xfs_dir2_leafn_unbalance(
1316 xfs_da_args_t *args; /* operation arguments */ 1550 xfs_da_args_t *args; /* operation arguments */
1317 xfs_dir2_leaf_t *drop_leaf; /* dead leaf structure */ 1551 xfs_dir2_leaf_t *drop_leaf; /* dead leaf structure */
1318 xfs_dir2_leaf_t *save_leaf; /* surviving leaf structure */ 1552 xfs_dir2_leaf_t *save_leaf; /* surviving leaf structure */
1553 struct xfs_dir3_icleaf_hdr savehdr;
1554 struct xfs_dir3_icleaf_hdr drophdr;
1555 struct xfs_dir2_leaf_entry *sents;
1556 struct xfs_dir2_leaf_entry *dents;
1319 1557
1320 args = state->args; 1558 args = state->args;
1321 ASSERT(drop_blk->magic == XFS_DIR2_LEAFN_MAGIC); 1559 ASSERT(drop_blk->magic == XFS_DIR2_LEAFN_MAGIC);
1322 ASSERT(save_blk->magic == XFS_DIR2_LEAFN_MAGIC); 1560 ASSERT(save_blk->magic == XFS_DIR2_LEAFN_MAGIC);
1323 drop_leaf = drop_blk->bp->b_addr; 1561 drop_leaf = drop_blk->bp->b_addr;
1324 save_leaf = save_blk->bp->b_addr; 1562 save_leaf = save_blk->bp->b_addr;
1325 ASSERT(drop_leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); 1563
1326 ASSERT(save_leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); 1564 xfs_dir3_leaf_hdr_from_disk(&savehdr, save_leaf);
1565 xfs_dir3_leaf_hdr_from_disk(&drophdr, drop_leaf);
1566 sents = xfs_dir3_leaf_ents_p(save_leaf);
1567 dents = xfs_dir3_leaf_ents_p(drop_leaf);
1568
1327 /* 1569 /*
1328 * If there are any stale leaf entries, take this opportunity 1570 * If there are any stale leaf entries, take this opportunity
1329 * to purge them. 1571 * to purge them.
1330 */ 1572 */
1331 if (drop_leaf->hdr.stale) 1573 if (drophdr.stale)
1332 xfs_dir2_leaf_compact(args, drop_blk->bp); 1574 xfs_dir3_leaf_compact(args, &drophdr, drop_blk->bp);
1333 if (save_leaf->hdr.stale) 1575 if (savehdr.stale)
1334 xfs_dir2_leaf_compact(args, save_blk->bp); 1576 xfs_dir3_leaf_compact(args, &savehdr, save_blk->bp);
1577
1335 /* 1578 /*
1336 * Move the entries from drop to the appropriate end of save. 1579 * Move the entries from drop to the appropriate end of save.
1337 */ 1580 */
1338 drop_blk->hashval = be32_to_cpu(drop_leaf->ents[be16_to_cpu(drop_leaf->hdr.count) - 1].hashval); 1581 drop_blk->hashval = be32_to_cpu(dents[drophdr.count - 1].hashval);
1339 if (xfs_dir2_leafn_order(save_blk->bp, drop_blk->bp)) 1582 if (xfs_dir2_leafn_order(save_blk->bp, drop_blk->bp))
1340 xfs_dir2_leafn_moveents(args, drop_blk->bp, 0, save_blk->bp, 0, 1583 xfs_dir3_leafn_moveents(args, drop_blk->bp, &drophdr, dents, 0,
1341 be16_to_cpu(drop_leaf->hdr.count)); 1584 save_blk->bp, &savehdr, sents, 0,
1585 drophdr.count);
1342 else 1586 else
1343 xfs_dir2_leafn_moveents(args, drop_blk->bp, 0, save_blk->bp, 1587 xfs_dir3_leafn_moveents(args, drop_blk->bp, &drophdr, dents, 0,
1344 be16_to_cpu(save_leaf->hdr.count), be16_to_cpu(drop_leaf->hdr.count)); 1588 save_blk->bp, &savehdr, sents,
1345 save_blk->hashval = be32_to_cpu(save_leaf->ents[be16_to_cpu(save_leaf->hdr.count) - 1].hashval); 1589 savehdr.count, drophdr.count);
1346 xfs_dir2_leafn_check(args->dp, save_blk->bp); 1590 save_blk->hashval = be32_to_cpu(sents[savehdr.count - 1].hashval);
1591
1592 /* log the changes made when moving the entries */
1593 xfs_dir3_leaf_hdr_to_disk(save_leaf, &savehdr);
1594 xfs_dir3_leaf_hdr_to_disk(drop_leaf, &drophdr);
1595 xfs_dir3_leaf_log_header(args->trans, save_blk->bp);
1596 xfs_dir3_leaf_log_header(args->trans, drop_blk->bp);
1597
1598 xfs_dir3_leaf_check(args->dp->i_mount, save_blk->bp);
1599 xfs_dir3_leaf_check(args->dp->i_mount, drop_blk->bp);
1347} 1600}
1348 1601
1349/* 1602/*
@@ -1372,7 +1625,7 @@ xfs_dir2_node_addname(
1372 * Look up the name. We're not supposed to find it, but 1625 * Look up the name. We're not supposed to find it, but
1373 * this gives us the insertion point. 1626 * this gives us the insertion point.
1374 */ 1627 */
1375 error = xfs_da_node_lookup_int(state, &rval); 1628 error = xfs_da3_node_lookup_int(state, &rval);
1376 if (error) 1629 if (error)
1377 rval = error; 1630 rval = error;
1378 if (rval != ENOENT) { 1631 if (rval != ENOENT) {
@@ -1398,7 +1651,7 @@ xfs_dir2_node_addname(
1398 * It worked, fix the hash values up the btree. 1651 * It worked, fix the hash values up the btree.
1399 */ 1652 */
1400 if (!(args->op_flags & XFS_DA_OP_JUSTCHECK)) 1653 if (!(args->op_flags & XFS_DA_OP_JUSTCHECK))
1401 xfs_da_fixhashpath(state, &state->path); 1654 xfs_da3_fixhashpath(state, &state->path);
1402 } else { 1655 } else {
1403 /* 1656 /*
1404 * It didn't work, we need to split the leaf block. 1657 * It didn't work, we need to split the leaf block.
@@ -1410,7 +1663,7 @@ xfs_dir2_node_addname(
1410 /* 1663 /*
1411 * Split the leaf block and insert the new entry. 1664 * Split the leaf block and insert the new entry.
1412 */ 1665 */
1413 rval = xfs_da_split(state); 1666 rval = xfs_da3_split(state);
1414 } 1667 }
1415done: 1668done:
1416 xfs_da_state_free(state); 1669 xfs_da_state_free(state);
@@ -1447,6 +1700,9 @@ xfs_dir2_node_addname_int(
1447 int needscan; /* need to rescan data frees */ 1700 int needscan; /* need to rescan data frees */
1448 __be16 *tagp; /* data entry tag pointer */ 1701 __be16 *tagp; /* data entry tag pointer */
1449 xfs_trans_t *tp; /* transaction pointer */ 1702 xfs_trans_t *tp; /* transaction pointer */
1703 __be16 *bests;
1704 struct xfs_dir3_icfree_hdr freehdr;
1705 struct xfs_dir2_data_free *bf;
1450 1706
1451 dp = args->dp; 1707 dp = args->dp;
1452 mp = dp->i_mount; 1708 mp = dp->i_mount;
@@ -1464,36 +1720,37 @@ xfs_dir2_node_addname_int(
1464 */ 1720 */
1465 ifbno = fblk->blkno; 1721 ifbno = fblk->blkno;
1466 free = fbp->b_addr; 1722 free = fbp->b_addr;
1467 ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
1468 findex = fblk->index; 1723 findex = fblk->index;
1724 bests = xfs_dir3_free_bests_p(mp, free);
1725 xfs_dir3_free_hdr_from_disk(&freehdr, free);
1726
1469 /* 1727 /*
1470 * This means the free entry showed that the data block had 1728 * This means the free entry showed that the data block had
1471 * space for our entry, so we remembered it. 1729 * space for our entry, so we remembered it.
1472 * Use that data block. 1730 * Use that data block.
1473 */ 1731 */
1474 if (findex >= 0) { 1732 if (findex >= 0) {
1475 ASSERT(findex < be32_to_cpu(free->hdr.nvalid)); 1733 ASSERT(findex < freehdr.nvalid);
1476 ASSERT(be16_to_cpu(free->bests[findex]) != NULLDATAOFF); 1734 ASSERT(be16_to_cpu(bests[findex]) != NULLDATAOFF);
1477 ASSERT(be16_to_cpu(free->bests[findex]) >= length); 1735 ASSERT(be16_to_cpu(bests[findex]) >= length);
1478 dbno = be32_to_cpu(free->hdr.firstdb) + findex; 1736 dbno = freehdr.firstdb + findex;
1479 } 1737 } else {
1480 /* 1738 /*
1481 * The data block looked at didn't have enough room. 1739 * The data block looked at didn't have enough room.
1482 * We'll start at the beginning of the freespace entries. 1740 * We'll start at the beginning of the freespace entries.
1483 */ 1741 */
1484 else {
1485 dbno = -1; 1742 dbno = -1;
1486 findex = 0; 1743 findex = 0;
1487 } 1744 }
1488 } 1745 } else {
1489 /* 1746 /*
1490 * Didn't come in with a freespace block, so don't have a data block. 1747 * Didn't come in with a freespace block, so no data block.
1491 */ 1748 */
1492 else {
1493 ifbno = dbno = -1; 1749 ifbno = dbno = -1;
1494 fbp = NULL; 1750 fbp = NULL;
1495 findex = 0; 1751 findex = 0;
1496 } 1752 }
1753
1497 /* 1754 /*
1498 * If we don't have a data block yet, we're going to scan the 1755 * If we don't have a data block yet, we're going to scan the
1499 * freespace blocks looking for one. Figure out what the 1756 * freespace blocks looking for one. Figure out what the
@@ -1547,20 +1804,26 @@ xfs_dir2_node_addname_int(
1547 if (!fbp) 1804 if (!fbp)
1548 continue; 1805 continue;
1549 free = fbp->b_addr; 1806 free = fbp->b_addr;
1550 ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
1551 findex = 0; 1807 findex = 0;
1552 } 1808 }
1553 /* 1809 /*
1554 * Look at the current free entry. Is it good enough? 1810 * Look at the current free entry. Is it good enough?
1811 *
1812 * The bests initialisation should be where the bufer is read in
1813 * the above branch. But gcc is too stupid to realise that bests
1814 * and the freehdr are actually initialised if they are placed
1815 * there, so we have to do it here to avoid warnings. Blech.
1555 */ 1816 */
1556 if (be16_to_cpu(free->bests[findex]) != NULLDATAOFF && 1817 bests = xfs_dir3_free_bests_p(mp, free);
1557 be16_to_cpu(free->bests[findex]) >= length) 1818 xfs_dir3_free_hdr_from_disk(&freehdr, free);
1558 dbno = be32_to_cpu(free->hdr.firstdb) + findex; 1819 if (be16_to_cpu(bests[findex]) != NULLDATAOFF &&
1820 be16_to_cpu(bests[findex]) >= length)
1821 dbno = freehdr.firstdb + findex;
1559 else { 1822 else {
1560 /* 1823 /*
1561 * Are we done with the freeblock? 1824 * Are we done with the freeblock?
1562 */ 1825 */
1563 if (++findex == be32_to_cpu(free->hdr.nvalid)) { 1826 if (++findex == freehdr.nvalid) {
1564 /* 1827 /*
1565 * Drop the block. 1828 * Drop the block.
1566 */ 1829 */
@@ -1588,7 +1851,7 @@ xfs_dir2_node_addname_int(
1588 if (unlikely((error = xfs_dir2_grow_inode(args, 1851 if (unlikely((error = xfs_dir2_grow_inode(args,
1589 XFS_DIR2_DATA_SPACE, 1852 XFS_DIR2_DATA_SPACE,
1590 &dbno)) || 1853 &dbno)) ||
1591 (error = xfs_dir2_data_init(args, dbno, &dbp)))) 1854 (error = xfs_dir3_data_init(args, dbno, &dbp))))
1592 return error; 1855 return error;
1593 1856
1594 /* 1857 /*
@@ -1614,11 +1877,11 @@ xfs_dir2_node_addname_int(
1614 * If there wasn't a freespace block, the read will 1877 * If there wasn't a freespace block, the read will
1615 * return a NULL fbp. Allocate and initialize a new one. 1878 * return a NULL fbp. Allocate and initialize a new one.
1616 */ 1879 */
1617 if( fbp == NULL ) { 1880 if (!fbp) {
1618 if ((error = xfs_dir2_grow_inode(args, XFS_DIR2_FREE_SPACE, 1881 error = xfs_dir2_grow_inode(args, XFS_DIR2_FREE_SPACE,
1619 &fbno))) { 1882 &fbno);
1883 if (error)
1620 return error; 1884 return error;
1621 }
1622 1885
1623 if (unlikely(xfs_dir2_db_to_fdb(mp, dbno) != fbno)) { 1886 if (unlikely(xfs_dir2_db_to_fdb(mp, dbno) != fbno)) {
1624 xfs_alert(mp, 1887 xfs_alert(mp,
@@ -1646,27 +1909,24 @@ xfs_dir2_node_addname_int(
1646 /* 1909 /*
1647 * Get a buffer for the new block. 1910 * Get a buffer for the new block.
1648 */ 1911 */
1649 error = xfs_da_get_buf(tp, dp, 1912 error = xfs_dir3_free_get_buf(tp, dp, fbno, &fbp);
1650 xfs_dir2_db_to_da(mp, fbno),
1651 -1, &fbp, XFS_DATA_FORK);
1652 if (error) 1913 if (error)
1653 return error; 1914 return error;
1654 fbp->b_ops = &xfs_dir2_free_buf_ops; 1915 free = fbp->b_addr;
1916 bests = xfs_dir3_free_bests_p(mp, free);
1917 xfs_dir3_free_hdr_from_disk(&freehdr, free);
1655 1918
1656 /* 1919 /*
1657 * Initialize the new block to be empty, and remember 1920 * Remember the first slot as our empty slot.
1658 * its first slot as our empty slot.
1659 */ 1921 */
1660 free = fbp->b_addr; 1922 freehdr.firstdb = (fbno - XFS_DIR2_FREE_FIRSTDB(mp)) *
1661 free->hdr.magic = cpu_to_be32(XFS_DIR2_FREE_MAGIC); 1923 xfs_dir3_free_max_bests(mp);
1662 free->hdr.firstdb = cpu_to_be32(
1663 (fbno - XFS_DIR2_FREE_FIRSTDB(mp)) *
1664 xfs_dir2_free_max_bests(mp));
1665 free->hdr.nvalid = 0; 1924 free->hdr.nvalid = 0;
1666 free->hdr.nused = 0; 1925 free->hdr.nused = 0;
1667 } else { 1926 } else {
1668 free = fbp->b_addr; 1927 free = fbp->b_addr;
1669 ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); 1928 bests = xfs_dir3_free_bests_p(mp, free);
1929 xfs_dir3_free_hdr_from_disk(&freehdr, free);
1670 } 1930 }
1671 1931
1672 /* 1932 /*
@@ -1677,20 +1937,21 @@ xfs_dir2_node_addname_int(
1677 * If it's after the end of the current entries in the 1937 * If it's after the end of the current entries in the
1678 * freespace block, extend that table. 1938 * freespace block, extend that table.
1679 */ 1939 */
1680 if (findex >= be32_to_cpu(free->hdr.nvalid)) { 1940 if (findex >= freehdr.nvalid) {
1681 ASSERT(findex < xfs_dir2_free_max_bests(mp)); 1941 ASSERT(findex < xfs_dir3_free_max_bests(mp));
1682 free->hdr.nvalid = cpu_to_be32(findex + 1); 1942 freehdr.nvalid = findex + 1;
1683 /* 1943 /*
1684 * Tag new entry so nused will go up. 1944 * Tag new entry so nused will go up.
1685 */ 1945 */
1686 free->bests[findex] = cpu_to_be16(NULLDATAOFF); 1946 bests[findex] = cpu_to_be16(NULLDATAOFF);
1687 } 1947 }
1688 /* 1948 /*
1689 * If this entry was for an empty data block 1949 * If this entry was for an empty data block
1690 * (this should always be true) then update the header. 1950 * (this should always be true) then update the header.
1691 */ 1951 */
1692 if (free->bests[findex] == cpu_to_be16(NULLDATAOFF)) { 1952 if (bests[findex] == cpu_to_be16(NULLDATAOFF)) {
1693 be32_add_cpu(&free->hdr.nused, 1); 1953 freehdr.nused++;
1954 xfs_dir3_free_hdr_to_disk(fbp->b_addr, &freehdr);
1694 xfs_dir2_free_log_header(tp, fbp); 1955 xfs_dir2_free_log_header(tp, fbp);
1695 } 1956 }
1696 /* 1957 /*
@@ -1699,7 +1960,8 @@ xfs_dir2_node_addname_int(
1699 * change again. 1960 * change again.
1700 */ 1961 */
1701 hdr = dbp->b_addr; 1962 hdr = dbp->b_addr;
1702 free->bests[findex] = hdr->bestfree[0].length; 1963 bf = xfs_dir3_data_bestfree_p(hdr);
1964 bests[findex] = bf[0].length;
1703 logfree = 1; 1965 logfree = 1;
1704 } 1966 }
1705 /* 1967 /*
@@ -1715,19 +1977,20 @@ xfs_dir2_node_addname_int(
1715 /* 1977 /*
1716 * Read the data block in. 1978 * Read the data block in.
1717 */ 1979 */
1718 error = xfs_dir2_data_read(tp, dp, xfs_dir2_db_to_da(mp, dbno), 1980 error = xfs_dir3_data_read(tp, dp, xfs_dir2_db_to_da(mp, dbno),
1719 -1, &dbp); 1981 -1, &dbp);
1720 if (error) 1982 if (error)
1721 return error; 1983 return error;
1722 hdr = dbp->b_addr; 1984 hdr = dbp->b_addr;
1985 bf = xfs_dir3_data_bestfree_p(hdr);
1723 logfree = 0; 1986 logfree = 0;
1724 } 1987 }
1725 ASSERT(be16_to_cpu(hdr->bestfree[0].length) >= length); 1988 ASSERT(be16_to_cpu(bf[0].length) >= length);
1726 /* 1989 /*
1727 * Point to the existing unused space. 1990 * Point to the existing unused space.
1728 */ 1991 */
1729 dup = (xfs_dir2_data_unused_t *) 1992 dup = (xfs_dir2_data_unused_t *)
1730 ((char *)hdr + be16_to_cpu(hdr->bestfree[0].offset)); 1993 ((char *)hdr + be16_to_cpu(bf[0].offset));
1731 needscan = needlog = 0; 1994 needscan = needlog = 0;
1732 /* 1995 /*
1733 * Mark the first part of the unused space, inuse for us. 1996 * Mark the first part of the unused space, inuse for us.
@@ -1758,8 +2021,9 @@ xfs_dir2_node_addname_int(
1758 /* 2021 /*
1759 * If the freespace entry is now wrong, update it. 2022 * If the freespace entry is now wrong, update it.
1760 */ 2023 */
1761 if (be16_to_cpu(free->bests[findex]) != be16_to_cpu(hdr->bestfree[0].length)) { 2024 bests = xfs_dir3_free_bests_p(mp, free); /* gcc is so stupid */
1762 free->bests[findex] = hdr->bestfree[0].length; 2025 if (be16_to_cpu(bests[findex]) != be16_to_cpu(bf[0].length)) {
2026 bests[findex] = bf[0].length;
1763 logfree = 1; 2027 logfree = 1;
1764 } 2028 }
1765 /* 2029 /*
@@ -1777,7 +2041,7 @@ xfs_dir2_node_addname_int(
1777 2041
1778/* 2042/*
1779 * Lookup an entry in a node-format directory. 2043 * Lookup an entry in a node-format directory.
1780 * All the real work happens in xfs_da_node_lookup_int. 2044 * All the real work happens in xfs_da3_node_lookup_int.
1781 * The only real output is the inode number of the entry. 2045 * The only real output is the inode number of the entry.
1782 */ 2046 */
1783int /* error */ 2047int /* error */
@@ -1802,7 +2066,7 @@ xfs_dir2_node_lookup(
1802 /* 2066 /*
1803 * Fill in the path to the entry in the cursor. 2067 * Fill in the path to the entry in the cursor.
1804 */ 2068 */
1805 error = xfs_da_node_lookup_int(state, &rval); 2069 error = xfs_da3_node_lookup_int(state, &rval);
1806 if (error) 2070 if (error)
1807 rval = error; 2071 rval = error;
1808 else if (rval == ENOENT && args->cmpresult == XFS_CMP_CASE) { 2072 else if (rval == ENOENT && args->cmpresult == XFS_CMP_CASE) {
@@ -1857,7 +2121,7 @@ xfs_dir2_node_removename(
1857 /* 2121 /*
1858 * Look up the entry we're deleting, set up the cursor. 2122 * Look up the entry we're deleting, set up the cursor.
1859 */ 2123 */
1860 error = xfs_da_node_lookup_int(state, &rval); 2124 error = xfs_da3_node_lookup_int(state, &rval);
1861 if (error) 2125 if (error)
1862 rval = error; 2126 rval = error;
1863 /* 2127 /*
@@ -1881,12 +2145,12 @@ xfs_dir2_node_removename(
1881 /* 2145 /*
1882 * Fix the hash values up the btree. 2146 * Fix the hash values up the btree.
1883 */ 2147 */
1884 xfs_da_fixhashpath(state, &state->path); 2148 xfs_da3_fixhashpath(state, &state->path);
1885 /* 2149 /*
1886 * If we need to join leaf blocks, do it. 2150 * If we need to join leaf blocks, do it.
1887 */ 2151 */
1888 if (rval && state->path.active > 1) 2152 if (rval && state->path.active > 1)
1889 error = xfs_da_join(state); 2153 error = xfs_da3_join(state);
1890 /* 2154 /*
1891 * If no errors so far, try conversion to leaf format. 2155 * If no errors so far, try conversion to leaf format.
1892 */ 2156 */
@@ -1928,7 +2192,7 @@ xfs_dir2_node_replace(
1928 /* 2192 /*
1929 * Lookup the entry to change in the btree. 2193 * Lookup the entry to change in the btree.
1930 */ 2194 */
1931 error = xfs_da_node_lookup_int(state, &rval); 2195 error = xfs_da3_node_lookup_int(state, &rval);
1932 if (error) { 2196 if (error) {
1933 rval = error; 2197 rval = error;
1934 } 2198 }
@@ -1937,19 +2201,22 @@ xfs_dir2_node_replace(
1937 * and locked it. But paranoia is good. 2201 * and locked it. But paranoia is good.
1938 */ 2202 */
1939 if (rval == EEXIST) { 2203 if (rval == EEXIST) {
2204 struct xfs_dir2_leaf_entry *ents;
1940 /* 2205 /*
1941 * Find the leaf entry. 2206 * Find the leaf entry.
1942 */ 2207 */
1943 blk = &state->path.blk[state->path.active - 1]; 2208 blk = &state->path.blk[state->path.active - 1];
1944 ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC); 2209 ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC);
1945 leaf = blk->bp->b_addr; 2210 leaf = blk->bp->b_addr;
1946 lep = &leaf->ents[blk->index]; 2211 ents = xfs_dir3_leaf_ents_p(leaf);
2212 lep = &ents[blk->index];
1947 ASSERT(state->extravalid); 2213 ASSERT(state->extravalid);
1948 /* 2214 /*
1949 * Point to the data entry. 2215 * Point to the data entry.
1950 */ 2216 */
1951 hdr = state->extrablk.bp->b_addr; 2217 hdr = state->extrablk.bp->b_addr;
1952 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC)); 2218 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
2219 hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC));
1953 dep = (xfs_dir2_data_entry_t *) 2220 dep = (xfs_dir2_data_entry_t *)
1954 ((char *)hdr + 2221 ((char *)hdr +
1955 xfs_dir2_dataptr_to_off(state->mp, be32_to_cpu(lep->address))); 2222 xfs_dir2_dataptr_to_off(state->mp, be32_to_cpu(lep->address)));
@@ -1995,6 +2262,7 @@ xfs_dir2_node_trim_free(
1995 xfs_dir2_free_t *free; /* freespace structure */ 2262 xfs_dir2_free_t *free; /* freespace structure */
1996 xfs_mount_t *mp; /* filesystem mount point */ 2263 xfs_mount_t *mp; /* filesystem mount point */
1997 xfs_trans_t *tp; /* transaction pointer */ 2264 xfs_trans_t *tp; /* transaction pointer */
2265 struct xfs_dir3_icfree_hdr freehdr;
1998 2266
1999 dp = args->dp; 2267 dp = args->dp;
2000 mp = dp->i_mount; 2268 mp = dp->i_mount;
@@ -2012,11 +2280,12 @@ xfs_dir2_node_trim_free(
2012 if (!bp) 2280 if (!bp)
2013 return 0; 2281 return 0;
2014 free = bp->b_addr; 2282 free = bp->b_addr;
2015 ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); 2283 xfs_dir3_free_hdr_from_disk(&freehdr, free);
2284
2016 /* 2285 /*
2017 * If there are used entries, there's nothing to do. 2286 * If there are used entries, there's nothing to do.
2018 */ 2287 */
2019 if (be32_to_cpu(free->hdr.nused) > 0) { 2288 if (freehdr.nused > 0) {
2020 xfs_trans_brelse(tp, bp); 2289 xfs_trans_brelse(tp, bp);
2021 *rvalp = 0; 2290 *rvalp = 0;
2022 return 0; 2291 return 0;
diff --git a/fs/xfs/xfs_dir2_priv.h b/fs/xfs/xfs_dir2_priv.h
index 7da79f6515fd..7cf573c88aad 100644
--- a/fs/xfs/xfs_dir2_priv.h
+++ b/fs/xfs/xfs_dir2_priv.h
@@ -30,7 +30,7 @@ extern int xfs_dir_cilookup_result(struct xfs_da_args *args,
30 const unsigned char *name, int len); 30 const unsigned char *name, int len);
31 31
32/* xfs_dir2_block.c */ 32/* xfs_dir2_block.c */
33extern const struct xfs_buf_ops xfs_dir2_block_buf_ops; 33extern const struct xfs_buf_ops xfs_dir3_block_buf_ops;
34 34
35extern int xfs_dir2_block_addname(struct xfs_da_args *args); 35extern int xfs_dir2_block_addname(struct xfs_da_args *args);
36extern int xfs_dir2_block_getdents(struct xfs_inode *dp, void *dirent, 36extern int xfs_dir2_block_getdents(struct xfs_inode *dp, void *dirent,
@@ -43,17 +43,18 @@ extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args,
43 43
44/* xfs_dir2_data.c */ 44/* xfs_dir2_data.c */
45#ifdef DEBUG 45#ifdef DEBUG
46#define xfs_dir2_data_check(dp,bp) __xfs_dir2_data_check(dp, bp); 46#define xfs_dir3_data_check(dp,bp) __xfs_dir3_data_check(dp, bp);
47#else 47#else
48#define xfs_dir2_data_check(dp,bp) 48#define xfs_dir3_data_check(dp,bp)
49#endif 49#endif
50 50
51extern const struct xfs_buf_ops xfs_dir2_data_buf_ops; 51extern const struct xfs_buf_ops xfs_dir3_data_buf_ops;
52extern const struct xfs_buf_ops xfs_dir3_free_buf_ops;
52 53
53extern int __xfs_dir2_data_check(struct xfs_inode *dp, struct xfs_buf *bp); 54extern int __xfs_dir3_data_check(struct xfs_inode *dp, struct xfs_buf *bp);
54extern int xfs_dir2_data_read(struct xfs_trans *tp, struct xfs_inode *dp, 55extern int xfs_dir3_data_read(struct xfs_trans *tp, struct xfs_inode *dp,
55 xfs_dablk_t bno, xfs_daddr_t mapped_bno, struct xfs_buf **bpp); 56 xfs_dablk_t bno, xfs_daddr_t mapped_bno, struct xfs_buf **bpp);
56extern int xfs_dir2_data_readahead(struct xfs_trans *tp, struct xfs_inode *dp, 57extern int xfs_dir3_data_readahead(struct xfs_trans *tp, struct xfs_inode *dp,
57 xfs_dablk_t bno, xfs_daddr_t mapped_bno); 58 xfs_dablk_t bno, xfs_daddr_t mapped_bno);
58 59
59extern struct xfs_dir2_data_free * 60extern struct xfs_dir2_data_free *
@@ -61,7 +62,7 @@ xfs_dir2_data_freeinsert(struct xfs_dir2_data_hdr *hdr,
61 struct xfs_dir2_data_unused *dup, int *loghead); 62 struct xfs_dir2_data_unused *dup, int *loghead);
62extern void xfs_dir2_data_freescan(struct xfs_mount *mp, 63extern void xfs_dir2_data_freescan(struct xfs_mount *mp,
63 struct xfs_dir2_data_hdr *hdr, int *loghead); 64 struct xfs_dir2_data_hdr *hdr, int *loghead);
64extern int xfs_dir2_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno, 65extern int xfs_dir3_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno,
65 struct xfs_buf **bpp); 66 struct xfs_buf **bpp);
66extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_buf *bp, 67extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_buf *bp,
67 struct xfs_dir2_data_entry *dep); 68 struct xfs_dir2_data_entry *dep);
@@ -77,24 +78,26 @@ extern void xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_buf *bp,
77 xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp); 78 xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp);
78 79
79/* xfs_dir2_leaf.c */ 80/* xfs_dir2_leaf.c */
80extern const struct xfs_buf_ops xfs_dir2_leafn_buf_ops; 81extern const struct xfs_buf_ops xfs_dir3_leaf1_buf_ops;
82extern const struct xfs_buf_ops xfs_dir3_leafn_buf_ops;
81 83
82extern int xfs_dir2_leafn_read(struct xfs_trans *tp, struct xfs_inode *dp, 84extern int xfs_dir3_leafn_read(struct xfs_trans *tp, struct xfs_inode *dp,
83 xfs_dablk_t fbno, xfs_daddr_t mappedbno, struct xfs_buf **bpp); 85 xfs_dablk_t fbno, xfs_daddr_t mappedbno, struct xfs_buf **bpp);
84extern int xfs_dir2_block_to_leaf(struct xfs_da_args *args, 86extern int xfs_dir2_block_to_leaf(struct xfs_da_args *args,
85 struct xfs_buf *dbp); 87 struct xfs_buf *dbp);
86extern int xfs_dir2_leaf_addname(struct xfs_da_args *args); 88extern int xfs_dir2_leaf_addname(struct xfs_da_args *args);
87extern void xfs_dir2_leaf_compact(struct xfs_da_args *args, 89extern void xfs_dir3_leaf_compact(struct xfs_da_args *args,
88 struct xfs_buf *bp); 90 struct xfs_dir3_icleaf_hdr *leafhdr, struct xfs_buf *bp);
89extern void xfs_dir2_leaf_compact_x1(struct xfs_buf *bp, int *indexp, 91extern void xfs_dir3_leaf_compact_x1(struct xfs_dir3_icleaf_hdr *leafhdr,
92 struct xfs_dir2_leaf_entry *ents, int *indexp,
90 int *lowstalep, int *highstalep, int *lowlogp, int *highlogp); 93 int *lowstalep, int *highstalep, int *lowlogp, int *highlogp);
91extern int xfs_dir2_leaf_getdents(struct xfs_inode *dp, void *dirent, 94extern int xfs_dir2_leaf_getdents(struct xfs_inode *dp, void *dirent,
92 size_t bufsize, xfs_off_t *offset, filldir_t filldir); 95 size_t bufsize, xfs_off_t *offset, filldir_t filldir);
93extern int xfs_dir2_leaf_init(struct xfs_da_args *args, xfs_dir2_db_t bno, 96extern int xfs_dir3_leaf_get_buf(struct xfs_da_args *args, xfs_dir2_db_t bno,
94 struct xfs_buf **bpp, int magic); 97 struct xfs_buf **bpp, __uint16_t magic);
95extern void xfs_dir2_leaf_log_ents(struct xfs_trans *tp, struct xfs_buf *bp, 98extern void xfs_dir3_leaf_log_ents(struct xfs_trans *tp, struct xfs_buf *bp,
96 int first, int last); 99 int first, int last);
97extern void xfs_dir2_leaf_log_header(struct xfs_trans *tp, 100extern void xfs_dir3_leaf_log_header(struct xfs_trans *tp,
98 struct xfs_buf *bp); 101 struct xfs_buf *bp);
99extern int xfs_dir2_leaf_lookup(struct xfs_da_args *args); 102extern int xfs_dir2_leaf_lookup(struct xfs_da_args *args);
100extern int xfs_dir2_leaf_removename(struct xfs_da_args *args); 103extern int xfs_dir2_leaf_removename(struct xfs_da_args *args);
@@ -104,11 +107,18 @@ extern int xfs_dir2_leaf_search_hash(struct xfs_da_args *args,
104extern int xfs_dir2_leaf_trim_data(struct xfs_da_args *args, 107extern int xfs_dir2_leaf_trim_data(struct xfs_da_args *args,
105 struct xfs_buf *lbp, xfs_dir2_db_t db); 108 struct xfs_buf *lbp, xfs_dir2_db_t db);
106extern struct xfs_dir2_leaf_entry * 109extern struct xfs_dir2_leaf_entry *
107xfs_dir2_leaf_find_entry(struct xfs_dir2_leaf *leaf, int index, int compact, 110xfs_dir3_leaf_find_entry(struct xfs_dir3_icleaf_hdr *leafhdr,
108 int lowstale, int highstale, 111 struct xfs_dir2_leaf_entry *ents, int index, int compact,
109 int *lfloglow, int *lfloghigh); 112 int lowstale, int highstale, int *lfloglow, int *lfloghigh);
110extern int xfs_dir2_node_to_leaf(struct xfs_da_state *state); 113extern int xfs_dir2_node_to_leaf(struct xfs_da_state *state);
111 114
115extern void xfs_dir3_leaf_hdr_from_disk(struct xfs_dir3_icleaf_hdr *to,
116 struct xfs_dir2_leaf *from);
117extern void xfs_dir3_leaf_hdr_to_disk(struct xfs_dir2_leaf *to,
118 struct xfs_dir3_icleaf_hdr *from);
119extern bool xfs_dir3_leaf_check_int(struct xfs_mount *mp,
120 struct xfs_dir3_icleaf_hdr *hdr, struct xfs_dir2_leaf *leaf);
121
112/* xfs_dir2_node.c */ 122/* xfs_dir2_node.c */
113extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args, 123extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args,
114 struct xfs_buf *lbp); 124 struct xfs_buf *lbp);
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index 1b9fc3ec7e4b..6157424dbf8f 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -278,7 +278,7 @@ xfs_dir2_block_to_sf(
278 * Set up to loop over the block's entries. 278 * Set up to loop over the block's entries.
279 */ 279 */
280 btp = xfs_dir2_block_tail_p(mp, hdr); 280 btp = xfs_dir2_block_tail_p(mp, hdr);
281 ptr = (char *)(hdr + 1); 281 ptr = (char *)xfs_dir3_data_entry_p(hdr);
282 endptr = (char *)xfs_dir2_block_leaf_p(btp); 282 endptr = (char *)xfs_dir2_block_leaf_p(btp);
283 sfep = xfs_dir2_sf_firstentry(sfp); 283 sfep = xfs_dir2_sf_firstentry(sfp);
284 /* 284 /*
@@ -535,7 +535,7 @@ xfs_dir2_sf_addname_hard(
535 * to insert the new entry. 535 * to insert the new entry.
536 * If it's going to end up at the end then oldsfep will point there. 536 * If it's going to end up at the end then oldsfep will point there.
537 */ 537 */
538 for (offset = XFS_DIR2_DATA_FIRST_OFFSET, 538 for (offset = XFS_DIR3_DATA_FIRST_OFFSET(dp->i_mount),
539 oldsfep = xfs_dir2_sf_firstentry(oldsfp), 539 oldsfep = xfs_dir2_sf_firstentry(oldsfp),
540 add_datasize = xfs_dir2_data_entsize(args->namelen), 540 add_datasize = xfs_dir2_data_entsize(args->namelen),
541 eof = (char *)oldsfep == &buf[old_isize]; 541 eof = (char *)oldsfep == &buf[old_isize];
@@ -617,7 +617,7 @@ xfs_dir2_sf_addname_pick(
617 617
618 sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; 618 sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
619 size = xfs_dir2_data_entsize(args->namelen); 619 size = xfs_dir2_data_entsize(args->namelen);
620 offset = XFS_DIR2_DATA_FIRST_OFFSET; 620 offset = XFS_DIR3_DATA_FIRST_OFFSET(mp);
621 sfep = xfs_dir2_sf_firstentry(sfp); 621 sfep = xfs_dir2_sf_firstentry(sfp);
622 holefit = 0; 622 holefit = 0;
623 /* 623 /*
@@ -688,7 +688,7 @@ xfs_dir2_sf_check(
688 dp = args->dp; 688 dp = args->dp;
689 689
690 sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; 690 sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
691 offset = XFS_DIR2_DATA_FIRST_OFFSET; 691 offset = XFS_DIR3_DATA_FIRST_OFFSET(dp->i_mount);
692 ino = xfs_dir2_sf_get_parent_ino(sfp); 692 ino = xfs_dir2_sf_get_parent_ino(sfp);
693 i8count = ino > XFS_DIR2_MAX_SHORT_INUM; 693 i8count = ino > XFS_DIR2_MAX_SHORT_INUM;
694 694
@@ -812,9 +812,9 @@ xfs_dir2_sf_getdents(
812 * mp->m_dirdatablk. 812 * mp->m_dirdatablk.
813 */ 813 */
814 dot_offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, 814 dot_offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
815 XFS_DIR2_DATA_DOT_OFFSET); 815 XFS_DIR3_DATA_DOT_OFFSET(mp));
816 dotdot_offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, 816 dotdot_offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
817 XFS_DIR2_DATA_DOTDOT_OFFSET); 817 XFS_DIR3_DATA_DOTDOT_OFFSET(mp));
818 818
819 /* 819 /*
820 * Put . entry unless we're starting past it. 820 * Put . entry unless we're starting past it.
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 8025eb23ad72..a41f8bf1da37 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -36,6 +36,7 @@
36#include "xfs_trans_space.h" 36#include "xfs_trans_space.h"
37#include "xfs_trans_priv.h" 37#include "xfs_trans_priv.h"
38#include "xfs_qm.h" 38#include "xfs_qm.h"
39#include "xfs_cksum.h"
39#include "xfs_trace.h" 40#include "xfs_trace.h"
40 41
41/* 42/*
@@ -85,17 +86,23 @@ xfs_qm_dqdestroy(
85 */ 86 */
86void 87void
87xfs_qm_adjust_dqlimits( 88xfs_qm_adjust_dqlimits(
88 xfs_mount_t *mp, 89 struct xfs_mount *mp,
89 xfs_disk_dquot_t *d) 90 struct xfs_dquot *dq)
90{ 91{
91 xfs_quotainfo_t *q = mp->m_quotainfo; 92 struct xfs_quotainfo *q = mp->m_quotainfo;
93 struct xfs_disk_dquot *d = &dq->q_core;
94 int prealloc = 0;
92 95
93 ASSERT(d->d_id); 96 ASSERT(d->d_id);
94 97
95 if (q->qi_bsoftlimit && !d->d_blk_softlimit) 98 if (q->qi_bsoftlimit && !d->d_blk_softlimit) {
96 d->d_blk_softlimit = cpu_to_be64(q->qi_bsoftlimit); 99 d->d_blk_softlimit = cpu_to_be64(q->qi_bsoftlimit);
97 if (q->qi_bhardlimit && !d->d_blk_hardlimit) 100 prealloc = 1;
101 }
102 if (q->qi_bhardlimit && !d->d_blk_hardlimit) {
98 d->d_blk_hardlimit = cpu_to_be64(q->qi_bhardlimit); 103 d->d_blk_hardlimit = cpu_to_be64(q->qi_bhardlimit);
104 prealloc = 1;
105 }
99 if (q->qi_isoftlimit && !d->d_ino_softlimit) 106 if (q->qi_isoftlimit && !d->d_ino_softlimit)
100 d->d_ino_softlimit = cpu_to_be64(q->qi_isoftlimit); 107 d->d_ino_softlimit = cpu_to_be64(q->qi_isoftlimit);
101 if (q->qi_ihardlimit && !d->d_ino_hardlimit) 108 if (q->qi_ihardlimit && !d->d_ino_hardlimit)
@@ -104,6 +111,9 @@ xfs_qm_adjust_dqlimits(
104 d->d_rtb_softlimit = cpu_to_be64(q->qi_rtbsoftlimit); 111 d->d_rtb_softlimit = cpu_to_be64(q->qi_rtbsoftlimit);
105 if (q->qi_rtbhardlimit && !d->d_rtb_hardlimit) 112 if (q->qi_rtbhardlimit && !d->d_rtb_hardlimit)
106 d->d_rtb_hardlimit = cpu_to_be64(q->qi_rtbhardlimit); 113 d->d_rtb_hardlimit = cpu_to_be64(q->qi_rtbhardlimit);
114
115 if (prealloc)
116 xfs_dquot_set_prealloc_limits(dq);
107} 117}
108 118
109/* 119/*
@@ -239,6 +249,8 @@ xfs_qm_init_dquot_blk(
239 d->dd_diskdq.d_version = XFS_DQUOT_VERSION; 249 d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
240 d->dd_diskdq.d_id = cpu_to_be32(curid); 250 d->dd_diskdq.d_id = cpu_to_be32(curid);
241 d->dd_diskdq.d_flags = type; 251 d->dd_diskdq.d_flags = type;
252 if (xfs_sb_version_hascrc(&mp->m_sb))
253 uuid_copy(&d->dd_uuid, &mp->m_sb.sb_uuid);
242 } 254 }
243 255
244 xfs_trans_dquot_buf(tp, bp, 256 xfs_trans_dquot_buf(tp, bp,
@@ -248,25 +260,113 @@ xfs_qm_init_dquot_blk(
248 xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1); 260 xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1);
249} 261}
250 262
251static void 263/*
264 * Initialize the dynamic speculative preallocation thresholds. The lo/hi
265 * watermarks correspond to the soft and hard limits by default. If a soft limit
266 * is not specified, we use 95% of the hard limit.
267 */
268void
269xfs_dquot_set_prealloc_limits(struct xfs_dquot *dqp)
270{
271 __uint64_t space;
272
273 dqp->q_prealloc_hi_wmark = be64_to_cpu(dqp->q_core.d_blk_hardlimit);
274 dqp->q_prealloc_lo_wmark = be64_to_cpu(dqp->q_core.d_blk_softlimit);
275 if (!dqp->q_prealloc_lo_wmark) {
276 dqp->q_prealloc_lo_wmark = dqp->q_prealloc_hi_wmark;
277 do_div(dqp->q_prealloc_lo_wmark, 100);
278 dqp->q_prealloc_lo_wmark *= 95;
279 }
280
281 space = dqp->q_prealloc_hi_wmark;
282
283 do_div(space, 100);
284 dqp->q_low_space[XFS_QLOWSP_1_PCNT] = space;
285 dqp->q_low_space[XFS_QLOWSP_3_PCNT] = space * 3;
286 dqp->q_low_space[XFS_QLOWSP_5_PCNT] = space * 5;
287}
288
289STATIC void
290xfs_dquot_buf_calc_crc(
291 struct xfs_mount *mp,
292 struct xfs_buf *bp)
293{
294 struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr;
295 int i;
296
297 if (!xfs_sb_version_hascrc(&mp->m_sb))
298 return;
299
300 for (i = 0; i < mp->m_quotainfo->qi_dqperchunk; i++, d++) {
301 xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk),
302 offsetof(struct xfs_dqblk, dd_crc));
303 }
304}
305
306STATIC bool
307xfs_dquot_buf_verify_crc(
308 struct xfs_mount *mp,
309 struct xfs_buf *bp)
310{
311 struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr;
312 int ndquots;
313 int i;
314
315 if (!xfs_sb_version_hascrc(&mp->m_sb))
316 return true;
317
318 /*
319 * if we are in log recovery, the quota subsystem has not been
320 * initialised so we have no quotainfo structure. In that case, we need
321 * to manually calculate the number of dquots in the buffer.
322 */
323 if (mp->m_quotainfo)
324 ndquots = mp->m_quotainfo->qi_dqperchunk;
325 else
326 ndquots = xfs_qm_calc_dquots_per_chunk(mp,
327 XFS_BB_TO_FSB(mp, bp->b_length));
328
329 for (i = 0; i < ndquots; i++, d++) {
330 if (!xfs_verify_cksum((char *)d, sizeof(struct xfs_dqblk),
331 offsetof(struct xfs_dqblk, dd_crc)))
332 return false;
333 if (!uuid_equal(&d->dd_uuid, &mp->m_sb.sb_uuid))
334 return false;
335 }
336
337 return true;
338}
339
340STATIC bool
252xfs_dquot_buf_verify( 341xfs_dquot_buf_verify(
342 struct xfs_mount *mp,
253 struct xfs_buf *bp) 343 struct xfs_buf *bp)
254{ 344{
255 struct xfs_mount *mp = bp->b_target->bt_mount;
256 struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr; 345 struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr;
257 struct xfs_disk_dquot *ddq;
258 xfs_dqid_t id = 0; 346 xfs_dqid_t id = 0;
347 int ndquots;
259 int i; 348 int i;
260 349
261 /* 350 /*
351 * if we are in log recovery, the quota subsystem has not been
352 * initialised so we have no quotainfo structure. In that case, we need
353 * to manually calculate the number of dquots in the buffer.
354 */
355 if (mp->m_quotainfo)
356 ndquots = mp->m_quotainfo->qi_dqperchunk;
357 else
358 ndquots = xfs_qm_calc_dquots_per_chunk(mp, bp->b_length);
359
360 /*
262 * On the first read of the buffer, verify that each dquot is valid. 361 * On the first read of the buffer, verify that each dquot is valid.
263 * We don't know what the id of the dquot is supposed to be, just that 362 * We don't know what the id of the dquot is supposed to be, just that
264 * they should be increasing monotonically within the buffer. If the 363 * they should be increasing monotonically within the buffer. If the
265 * first id is corrupt, then it will fail on the second dquot in the 364 * first id is corrupt, then it will fail on the second dquot in the
266 * buffer so corruptions could point to the wrong dquot in this case. 365 * buffer so corruptions could point to the wrong dquot in this case.
267 */ 366 */
268 for (i = 0; i < mp->m_quotainfo->qi_dqperchunk; i++) { 367 for (i = 0; i < ndquots; i++) {
269 int error; 368 struct xfs_disk_dquot *ddq;
369 int error;
270 370
271 ddq = &d[i].dd_diskdq; 371 ddq = &d[i].dd_diskdq;
272 372
@@ -274,27 +374,37 @@ xfs_dquot_buf_verify(
274 id = be32_to_cpu(ddq->d_id); 374 id = be32_to_cpu(ddq->d_id);
275 375
276 error = xfs_qm_dqcheck(mp, ddq, id + i, 0, XFS_QMOPT_DOWARN, 376 error = xfs_qm_dqcheck(mp, ddq, id + i, 0, XFS_QMOPT_DOWARN,
277 "xfs_dquot_read_verify"); 377 "xfs_dquot_buf_verify");
278 if (error) { 378 if (error)
279 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, d); 379 return false;
280 xfs_buf_ioerror(bp, EFSCORRUPTED);
281 break;
282 }
283 } 380 }
381 return true;
284} 382}
285 383
286static void 384static void
287xfs_dquot_buf_read_verify( 385xfs_dquot_buf_read_verify(
288 struct xfs_buf *bp) 386 struct xfs_buf *bp)
289{ 387{
290 xfs_dquot_buf_verify(bp); 388 struct xfs_mount *mp = bp->b_target->bt_mount;
389
390 if (!xfs_dquot_buf_verify_crc(mp, bp) || !xfs_dquot_buf_verify(mp, bp)) {
391 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
392 xfs_buf_ioerror(bp, EFSCORRUPTED);
393 }
291} 394}
292 395
293void 396void
294xfs_dquot_buf_write_verify( 397xfs_dquot_buf_write_verify(
295 struct xfs_buf *bp) 398 struct xfs_buf *bp)
296{ 399{
297 xfs_dquot_buf_verify(bp); 400 struct xfs_mount *mp = bp->b_target->bt_mount;
401
402 if (!xfs_dquot_buf_verify(mp, bp)) {
403 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
404 xfs_buf_ioerror(bp, EFSCORRUPTED);
405 return;
406 }
407 xfs_dquot_buf_calc_crc(mp, bp);
298} 408}
299 409
300const struct xfs_buf_ops xfs_dquot_buf_ops = { 410const struct xfs_buf_ops xfs_dquot_buf_ops = {
@@ -648,6 +758,9 @@ xfs_qm_dqread(
648 dqp->q_res_icount = be64_to_cpu(ddqp->d_icount); 758 dqp->q_res_icount = be64_to_cpu(ddqp->d_icount);
649 dqp->q_res_rtbcount = be64_to_cpu(ddqp->d_rtbcount); 759 dqp->q_res_rtbcount = be64_to_cpu(ddqp->d_rtbcount);
650 760
761 /* initialize the dquot speculative prealloc thresholds */
762 xfs_dquot_set_prealloc_limits(dqp);
763
651 /* Mark the buf so that this will stay incore a little longer */ 764 /* Mark the buf so that this will stay incore a little longer */
652 xfs_buf_set_ref(bp, XFS_DQUOT_REF); 765 xfs_buf_set_ref(bp, XFS_DQUOT_REF);
653 766
@@ -1035,6 +1148,17 @@ xfs_qm_dqflush(
1035 &dqp->q_logitem.qli_item.li_lsn); 1148 &dqp->q_logitem.qli_item.li_lsn);
1036 1149
1037 /* 1150 /*
1151 * copy the lsn into the on-disk dquot now while we have the in memory
1152 * dquot here. This can't be done later in the write verifier as we
1153 * can't get access to the log item at that point in time.
1154 */
1155 if (xfs_sb_version_hascrc(&mp->m_sb)) {
1156 struct xfs_dqblk *dqb = (struct xfs_dqblk *)ddqp;
1157
1158 dqb->dd_lsn = cpu_to_be64(dqp->q_logitem.qli_item.li_lsn);
1159 }
1160
1161 /*
1038 * Attach an iodone routine so that we can remove this dquot from the 1162 * Attach an iodone routine so that we can remove this dquot from the
1039 * AIL and release the flush lock once the dquot is synced to disk. 1163 * AIL and release the flush lock once the dquot is synced to disk.
1040 */ 1164 */
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h
index c694a8469c4a..4f0ebfc43cc9 100644
--- a/fs/xfs/xfs_dquot.h
+++ b/fs/xfs/xfs_dquot.h
@@ -32,6 +32,13 @@
32struct xfs_mount; 32struct xfs_mount;
33struct xfs_trans; 33struct xfs_trans;
34 34
35enum {
36 XFS_QLOWSP_1_PCNT = 0,
37 XFS_QLOWSP_3_PCNT,
38 XFS_QLOWSP_5_PCNT,
39 XFS_QLOWSP_MAX
40};
41
35/* 42/*
36 * The incore dquot structure 43 * The incore dquot structure
37 */ 44 */
@@ -51,6 +58,9 @@ typedef struct xfs_dquot {
51 xfs_qcnt_t q_res_bcount; /* total regular nblks used+reserved */ 58 xfs_qcnt_t q_res_bcount; /* total regular nblks used+reserved */
52 xfs_qcnt_t q_res_icount; /* total inos allocd+reserved */ 59 xfs_qcnt_t q_res_icount; /* total inos allocd+reserved */
53 xfs_qcnt_t q_res_rtbcount;/* total realtime blks used+reserved */ 60 xfs_qcnt_t q_res_rtbcount;/* total realtime blks used+reserved */
61 xfs_qcnt_t q_prealloc_lo_wmark;/* prealloc throttle wmark */
62 xfs_qcnt_t q_prealloc_hi_wmark;/* prealloc disabled wmark */
63 int64_t q_low_space[XFS_QLOWSP_MAX];
54 struct mutex q_qlock; /* quota lock */ 64 struct mutex q_qlock; /* quota lock */
55 struct completion q_flush; /* flush completion queue */ 65 struct completion q_flush; /* flush completion queue */
56 atomic_t q_pincount; /* dquot pin count */ 66 atomic_t q_pincount; /* dquot pin count */
@@ -145,14 +155,16 @@ extern int xfs_qm_dqflush(struct xfs_dquot *, struct xfs_buf **);
145extern void xfs_qm_dqunpin_wait(xfs_dquot_t *); 155extern void xfs_qm_dqunpin_wait(xfs_dquot_t *);
146extern void xfs_qm_adjust_dqtimers(xfs_mount_t *, 156extern void xfs_qm_adjust_dqtimers(xfs_mount_t *,
147 xfs_disk_dquot_t *); 157 xfs_disk_dquot_t *);
148extern void xfs_qm_adjust_dqlimits(xfs_mount_t *, 158extern void xfs_qm_adjust_dqlimits(struct xfs_mount *,
149 xfs_disk_dquot_t *); 159 struct xfs_dquot *);
150extern int xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *, 160extern int xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *,
151 xfs_dqid_t, uint, uint, xfs_dquot_t **); 161 xfs_dqid_t, uint, uint, xfs_dquot_t **);
152extern void xfs_qm_dqput(xfs_dquot_t *); 162extern void xfs_qm_dqput(xfs_dquot_t *);
153 163
154extern void xfs_dqlock2(struct xfs_dquot *, struct xfs_dquot *); 164extern void xfs_dqlock2(struct xfs_dquot *, struct xfs_dquot *);
155 165
166extern void xfs_dquot_set_prealloc_limits(struct xfs_dquot *);
167
156static inline struct xfs_dquot *xfs_qm_dqhold(struct xfs_dquot *dqp) 168static inline struct xfs_dquot *xfs_qm_dqhold(struct xfs_dquot *dqp)
157{ 169{
158 xfs_dqlock(dqp); 170 xfs_dqlock(dqp);
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 610456054dc2..35d3f5b041dd 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -66,7 +66,7 @@ xfs_error_test(int error_tag, int *fsidp, char *expression,
66 int i; 66 int i;
67 int64_t fsid; 67 int64_t fsid;
68 68
69 if (random32() % randfactor) 69 if (prandom_u32() % randfactor)
70 return 0; 70 return 0;
71 71
72 memcpy(&fsid, fsidp, sizeof(xfs_fsid_t)); 72 memcpy(&fsid, fsidp, sizeof(xfs_fsid_t));
@@ -178,7 +178,7 @@ xfs_corruption_error(
178 inst_t *ra) 178 inst_t *ra)
179{ 179{
180 if (level <= xfs_error_level) 180 if (level <= xfs_error_level)
181 xfs_hex_dump(p, 16); 181 xfs_hex_dump(p, 64);
182 xfs_error_report(tag, level, mp, filename, linenum, ra); 182 xfs_error_report(tag, level, mp, filename, linenum, ra);
183 xfs_alert(mp, "Corruption detected. Unmount and run xfs_repair"); 183 xfs_alert(mp, "Corruption detected. Unmount and run xfs_repair");
184} 184}
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index feb36d7551ae..c0f375087efc 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -50,9 +50,8 @@ xfs_efi_item_free(
50 * Freeing the efi requires that we remove it from the AIL if it has already 50 * Freeing the efi requires that we remove it from the AIL if it has already
51 * been placed there. However, the EFI may not yet have been placed in the AIL 51 * been placed there. However, the EFI may not yet have been placed in the AIL
52 * when called by xfs_efi_release() from EFD processing due to the ordering of 52 * when called by xfs_efi_release() from EFD processing due to the ordering of
53 * committed vs unpin operations in bulk insert operations. Hence the 53 * committed vs unpin operations in bulk insert operations. Hence the reference
54 * test_and_clear_bit(XFS_EFI_COMMITTED) to ensure only the last caller frees 54 * count to ensure only the last caller frees the EFI.
55 * the EFI.
56 */ 55 */
57STATIC void 56STATIC void
58__xfs_efi_release( 57__xfs_efi_release(
@@ -60,7 +59,7 @@ __xfs_efi_release(
60{ 59{
61 struct xfs_ail *ailp = efip->efi_item.li_ailp; 60 struct xfs_ail *ailp = efip->efi_item.li_ailp;
62 61
63 if (!test_and_clear_bit(XFS_EFI_COMMITTED, &efip->efi_flags)) { 62 if (atomic_dec_and_test(&efip->efi_refcount)) {
64 spin_lock(&ailp->xa_lock); 63 spin_lock(&ailp->xa_lock);
65 /* xfs_trans_ail_delete() drops the AIL lock. */ 64 /* xfs_trans_ail_delete() drops the AIL lock. */
66 xfs_trans_ail_delete(ailp, &efip->efi_item, 65 xfs_trans_ail_delete(ailp, &efip->efi_item,
@@ -126,8 +125,8 @@ xfs_efi_item_pin(
126 * which the EFI is manipulated during a transaction. If we are being asked to 125 * which the EFI is manipulated during a transaction. If we are being asked to
127 * remove the EFI it's because the transaction has been cancelled and by 126 * remove the EFI it's because the transaction has been cancelled and by
128 * definition that means the EFI cannot be in the AIL so remove it from the 127 * definition that means the EFI cannot be in the AIL so remove it from the
129 * transaction and free it. Otherwise coordinate with xfs_efi_release() (via 128 * transaction and free it. Otherwise coordinate with xfs_efi_release()
130 * XFS_EFI_COMMITTED) to determine who gets to free the EFI. 129 * to determine who gets to free the EFI.
131 */ 130 */
132STATIC void 131STATIC void
133xfs_efi_item_unpin( 132xfs_efi_item_unpin(
@@ -171,19 +170,13 @@ xfs_efi_item_unlock(
171 170
172/* 171/*
173 * The EFI is logged only once and cannot be moved in the log, so simply return 172 * The EFI is logged only once and cannot be moved in the log, so simply return
174 * the lsn at which it's been logged. For bulk transaction committed 173 * the lsn at which it's been logged.
175 * processing, the EFI may be processed but not yet unpinned prior to the EFD
176 * being processed. Set the XFS_EFI_COMMITTED flag so this case can be detected
177 * when processing the EFD.
178 */ 174 */
179STATIC xfs_lsn_t 175STATIC xfs_lsn_t
180xfs_efi_item_committed( 176xfs_efi_item_committed(
181 struct xfs_log_item *lip, 177 struct xfs_log_item *lip,
182 xfs_lsn_t lsn) 178 xfs_lsn_t lsn)
183{ 179{
184 struct xfs_efi_log_item *efip = EFI_ITEM(lip);
185
186 set_bit(XFS_EFI_COMMITTED, &efip->efi_flags);
187 return lsn; 180 return lsn;
188} 181}
189 182
@@ -241,6 +234,7 @@ xfs_efi_init(
241 efip->efi_format.efi_nextents = nextents; 234 efip->efi_format.efi_nextents = nextents;
242 efip->efi_format.efi_id = (__psint_t)(void*)efip; 235 efip->efi_format.efi_id = (__psint_t)(void*)efip;
243 atomic_set(&efip->efi_next_extent, 0); 236 atomic_set(&efip->efi_next_extent, 0);
237 atomic_set(&efip->efi_refcount, 2);
244 238
245 return efip; 239 return efip;
246} 240}
@@ -310,8 +304,13 @@ xfs_efi_release(xfs_efi_log_item_t *efip,
310 uint nextents) 304 uint nextents)
311{ 305{
312 ASSERT(atomic_read(&efip->efi_next_extent) >= nextents); 306 ASSERT(atomic_read(&efip->efi_next_extent) >= nextents);
313 if (atomic_sub_and_test(nextents, &efip->efi_next_extent)) 307 if (atomic_sub_and_test(nextents, &efip->efi_next_extent)) {
314 __xfs_efi_release(efip); 308 __xfs_efi_release(efip);
309
310 /* recovery needs us to drop the EFI reference, too */
311 if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags))
312 __xfs_efi_release(efip);
313 }
315} 314}
316 315
317static inline struct xfs_efd_log_item *EFD_ITEM(struct xfs_log_item *lip) 316static inline struct xfs_efd_log_item *EFD_ITEM(struct xfs_log_item *lip)
diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h
index 375f68e42531..432222418c56 100644
--- a/fs/xfs/xfs_extfree_item.h
+++ b/fs/xfs/xfs_extfree_item.h
@@ -114,16 +114,20 @@ typedef struct xfs_efd_log_format_64 {
114 * Define EFI flag bits. Manipulated by set/clear/test_bit operators. 114 * Define EFI flag bits. Manipulated by set/clear/test_bit operators.
115 */ 115 */
116#define XFS_EFI_RECOVERED 1 116#define XFS_EFI_RECOVERED 1
117#define XFS_EFI_COMMITTED 2
118 117
119/* 118/*
120 * This is the "extent free intention" log item. It is used 119 * This is the "extent free intention" log item. It is used to log the fact
121 * to log the fact that some extents need to be free. It is 120 * that some extents need to be free. It is used in conjunction with the
122 * used in conjunction with the "extent free done" log item 121 * "extent free done" log item described below.
123 * described below. 122 *
123 * The EFI is reference counted so that it is not freed prior to both the EFI
124 * and EFD being committed and unpinned. This ensures that when the last
125 * reference goes away the EFI will always be in the AIL as it has been
126 * unpinned, regardless of whether the EFD is processed before or after the EFI.
124 */ 127 */
125typedef struct xfs_efi_log_item { 128typedef struct xfs_efi_log_item {
126 xfs_log_item_t efi_item; 129 xfs_log_item_t efi_item;
130 atomic_t efi_refcount;
127 atomic_t efi_next_extent; 131 atomic_t efi_next_extent;
128 unsigned long efi_flags; /* misc flags */ 132 unsigned long efi_flags; /* misc flags */
129 xfs_efi_log_format_t efi_format; 133 xfs_efi_log_format_t efi_format;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 3800128d2171..054d60c0ac57 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -890,7 +890,7 @@ xfs_dir_open(
890 */ 890 */
891 mode = xfs_ilock_map_shared(ip); 891 mode = xfs_ilock_map_shared(ip);
892 if (ip->i_d.di_nextents > 0) 892 if (ip->i_d.di_nextents > 0)
893 xfs_dir2_data_readahead(NULL, ip, 0, -1); 893 xfs_dir3_data_readahead(NULL, ip, 0, -1);
894 xfs_iunlock(ip, mode); 894 xfs_iunlock(ip, mode);
895 return 0; 895 return 0;
896} 896}
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 2866b8c78b7a..87595b211da1 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -247,6 +247,9 @@ xfs_growfs_data_private(
247 tmpsize = agsize - XFS_PREALLOC_BLOCKS(mp); 247 tmpsize = agsize - XFS_PREALLOC_BLOCKS(mp);
248 agf->agf_freeblks = cpu_to_be32(tmpsize); 248 agf->agf_freeblks = cpu_to_be32(tmpsize);
249 agf->agf_longest = cpu_to_be32(tmpsize); 249 agf->agf_longest = cpu_to_be32(tmpsize);
250 if (xfs_sb_version_hascrc(&mp->m_sb))
251 uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_uuid);
252
250 error = xfs_bwrite(bp); 253 error = xfs_bwrite(bp);
251 xfs_buf_relse(bp); 254 xfs_buf_relse(bp);
252 if (error) 255 if (error)
@@ -265,6 +268,11 @@ xfs_growfs_data_private(
265 } 268 }
266 269
267 agfl = XFS_BUF_TO_AGFL(bp); 270 agfl = XFS_BUF_TO_AGFL(bp);
271 if (xfs_sb_version_hascrc(&mp->m_sb)) {
272 agfl->agfl_magicnum = cpu_to_be32(XFS_AGFL_MAGIC);
273 agfl->agfl_seqno = cpu_to_be32(agno);
274 uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_uuid);
275 }
268 for (bucket = 0; bucket < XFS_AGFL_SIZE(mp); bucket++) 276 for (bucket = 0; bucket < XFS_AGFL_SIZE(mp); bucket++)
269 agfl->agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK); 277 agfl->agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK);
270 278
@@ -296,8 +304,11 @@ xfs_growfs_data_private(
296 agi->agi_freecount = 0; 304 agi->agi_freecount = 0;
297 agi->agi_newino = cpu_to_be32(NULLAGINO); 305 agi->agi_newino = cpu_to_be32(NULLAGINO);
298 agi->agi_dirino = cpu_to_be32(NULLAGINO); 306 agi->agi_dirino = cpu_to_be32(NULLAGINO);
307 if (xfs_sb_version_hascrc(&mp->m_sb))
308 uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_uuid);
299 for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) 309 for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++)
300 agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); 310 agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO);
311
301 error = xfs_bwrite(bp); 312 error = xfs_bwrite(bp);
302 xfs_buf_relse(bp); 313 xfs_buf_relse(bp);
303 if (error) 314 if (error)
@@ -316,7 +327,13 @@ xfs_growfs_data_private(
316 goto error0; 327 goto error0;
317 } 328 }
318 329
319 xfs_btree_init_block(mp, bp, XFS_ABTB_MAGIC, 0, 1, 0); 330 if (xfs_sb_version_hascrc(&mp->m_sb))
331 xfs_btree_init_block(mp, bp, XFS_ABTB_CRC_MAGIC, 0, 1,
332 agno, XFS_BTREE_CRC_BLOCKS);
333 else
334 xfs_btree_init_block(mp, bp, XFS_ABTB_MAGIC, 0, 1,
335 agno, 0);
336
320 arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1); 337 arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
321 arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp)); 338 arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp));
322 arec->ar_blockcount = cpu_to_be32( 339 arec->ar_blockcount = cpu_to_be32(
@@ -339,7 +356,13 @@ xfs_growfs_data_private(
339 goto error0; 356 goto error0;
340 } 357 }
341 358
342 xfs_btree_init_block(mp, bp, XFS_ABTC_MAGIC, 0, 1, 0); 359 if (xfs_sb_version_hascrc(&mp->m_sb))
360 xfs_btree_init_block(mp, bp, XFS_ABTC_CRC_MAGIC, 0, 1,
361 agno, XFS_BTREE_CRC_BLOCKS);
362 else
363 xfs_btree_init_block(mp, bp, XFS_ABTC_MAGIC, 0, 1,
364 agno, 0);
365
343 arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1); 366 arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
344 arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp)); 367 arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp));
345 arec->ar_blockcount = cpu_to_be32( 368 arec->ar_blockcount = cpu_to_be32(
@@ -363,7 +386,12 @@ xfs_growfs_data_private(
363 goto error0; 386 goto error0;
364 } 387 }
365 388
366 xfs_btree_init_block(mp, bp, XFS_IBT_MAGIC, 0, 0, 0); 389 if (xfs_sb_version_hascrc(&mp->m_sb))
390 xfs_btree_init_block(mp, bp, XFS_IBT_CRC_MAGIC, 0, 0,
391 agno, XFS_BTREE_CRC_BLOCKS);
392 else
393 xfs_btree_init_block(mp, bp, XFS_IBT_MAGIC, 0, 0,
394 agno, 0);
367 395
368 error = xfs_bwrite(bp); 396 error = xfs_bwrite(bp);
369 xfs_buf_relse(bp); 397 xfs_buf_relse(bp);
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 515bf71ce01c..c8f5ae1debf2 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -36,6 +36,8 @@
36#include "xfs_rtalloc.h" 36#include "xfs_rtalloc.h"
37#include "xfs_error.h" 37#include "xfs_error.h"
38#include "xfs_bmap.h" 38#include "xfs_bmap.h"
39#include "xfs_cksum.h"
40#include "xfs_buf_item.h"
39 41
40 42
41/* 43/*
@@ -165,6 +167,7 @@ xfs_ialloc_inode_init(
165 int version; 167 int version;
166 int i, j; 168 int i, j;
167 xfs_daddr_t d; 169 xfs_daddr_t d;
170 xfs_ino_t ino = 0;
168 171
169 /* 172 /*
170 * Loop over the new block(s), filling in the inodes. 173 * Loop over the new block(s), filling in the inodes.
@@ -183,13 +186,29 @@ xfs_ialloc_inode_init(
183 } 186 }
184 187
185 /* 188 /*
186 * Figure out what version number to use in the inodes we create. 189 * Figure out what version number to use in the inodes we create. If
187 * If the superblock version has caught up to the one that supports 190 * the superblock version has caught up to the one that supports the new
188 * the new inode format, then use the new inode version. Otherwise 191 * inode format, then use the new inode version. Otherwise use the old
189 * use the old version so that old kernels will continue to be 192 * version so that old kernels will continue to be able to use the file
190 * able to use the file system. 193 * system.
194 *
195 * For v3 inodes, we also need to write the inode number into the inode,
196 * so calculate the first inode number of the chunk here as
197 * XFS_OFFBNO_TO_AGINO() only works within a filesystem block, not
198 * across multiple filesystem blocks (such as a cluster) and so cannot
199 * be used in the cluster buffer loop below.
200 *
201 * Further, because we are writing the inode directly into the buffer
202 * and calculating a CRC on the entire inode, we have ot log the entire
203 * inode so that the entire range the CRC covers is present in the log.
204 * That means for v3 inode we log the entire buffer rather than just the
205 * inode cores.
191 */ 206 */
192 if (xfs_sb_version_hasnlink(&mp->m_sb)) 207 if (xfs_sb_version_hascrc(&mp->m_sb)) {
208 version = 3;
209 ino = XFS_AGINO_TO_INO(mp, agno,
210 XFS_OFFBNO_TO_AGINO(mp, agbno, 0));
211 } else if (xfs_sb_version_hasnlink(&mp->m_sb))
193 version = 2; 212 version = 2;
194 else 213 else
195 version = 1; 214 version = 1;
@@ -212,17 +231,32 @@ xfs_ialloc_inode_init(
212 * individual transactions causing a lot of log traffic. 231 * individual transactions causing a lot of log traffic.
213 */ 232 */
214 fbuf->b_ops = &xfs_inode_buf_ops; 233 fbuf->b_ops = &xfs_inode_buf_ops;
215 xfs_buf_zero(fbuf, 0, ninodes << mp->m_sb.sb_inodelog); 234 xfs_buf_zero(fbuf, 0, BBTOB(fbuf->b_length));
216 for (i = 0; i < ninodes; i++) { 235 for (i = 0; i < ninodes; i++) {
217 int ioffset = i << mp->m_sb.sb_inodelog; 236 int ioffset = i << mp->m_sb.sb_inodelog;
218 uint isize = sizeof(struct xfs_dinode); 237 uint isize = xfs_dinode_size(version);
219 238
220 free = xfs_make_iptr(mp, fbuf, i); 239 free = xfs_make_iptr(mp, fbuf, i);
221 free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); 240 free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
222 free->di_version = version; 241 free->di_version = version;
223 free->di_gen = cpu_to_be32(gen); 242 free->di_gen = cpu_to_be32(gen);
224 free->di_next_unlinked = cpu_to_be32(NULLAGINO); 243 free->di_next_unlinked = cpu_to_be32(NULLAGINO);
225 xfs_trans_log_buf(tp, fbuf, ioffset, ioffset + isize - 1); 244
245 if (version == 3) {
246 free->di_ino = cpu_to_be64(ino);
247 ino++;
248 uuid_copy(&free->di_uuid, &mp->m_sb.sb_uuid);
249 xfs_dinode_calc_crc(mp, free);
250 } else {
251 /* just log the inode core */
252 xfs_trans_log_buf(tp, fbuf, ioffset,
253 ioffset + isize - 1);
254 }
255 }
256 if (version == 3) {
257 /* need to log the entire buffer */
258 xfs_trans_log_buf(tp, fbuf, 0,
259 BBTOB(fbuf->b_length) - 1);
226 } 260 }
227 xfs_trans_inode_alloc_buf(tp, fbuf); 261 xfs_trans_inode_alloc_buf(tp, fbuf);
228 } 262 }
@@ -369,7 +403,7 @@ xfs_ialloc_ag_alloc(
369 * number from being easily guessable. 403 * number from being easily guessable.
370 */ 404 */
371 error = xfs_ialloc_inode_init(args.mp, tp, agno, args.agbno, 405 error = xfs_ialloc_inode_init(args.mp, tp, agno, args.agbno,
372 args.len, random32()); 406 args.len, prandom_u32());
373 407
374 if (error) 408 if (error)
375 return error; 409 return error;
@@ -1453,6 +1487,7 @@ xfs_ialloc_log_agi(
1453 /* 1487 /*
1454 * Log the allocation group inode header buffer. 1488 * Log the allocation group inode header buffer.
1455 */ 1489 */
1490 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGI_BUF);
1456 xfs_trans_log_buf(tp, bp, first, last); 1491 xfs_trans_log_buf(tp, bp, first, last);
1457} 1492}
1458 1493
@@ -1470,19 +1505,23 @@ xfs_check_agi_unlinked(
1470#define xfs_check_agi_unlinked(agi) 1505#define xfs_check_agi_unlinked(agi)
1471#endif 1506#endif
1472 1507
1473static void 1508static bool
1474xfs_agi_verify( 1509xfs_agi_verify(
1475 struct xfs_buf *bp) 1510 struct xfs_buf *bp)
1476{ 1511{
1477 struct xfs_mount *mp = bp->b_target->bt_mount; 1512 struct xfs_mount *mp = bp->b_target->bt_mount;
1478 struct xfs_agi *agi = XFS_BUF_TO_AGI(bp); 1513 struct xfs_agi *agi = XFS_BUF_TO_AGI(bp);
1479 int agi_ok;
1480 1514
1515 if (xfs_sb_version_hascrc(&mp->m_sb) &&
1516 !uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_uuid))
1517 return false;
1481 /* 1518 /*
1482 * Validate the magic number of the agi block. 1519 * Validate the magic number of the agi block.
1483 */ 1520 */
1484 agi_ok = agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC) && 1521 if (agi->agi_magicnum != cpu_to_be32(XFS_AGI_MAGIC))
1485 XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)); 1522 return false;
1523 if (!XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)))
1524 return false;
1486 1525
1487 /* 1526 /*
1488 * during growfs operations, the perag is not fully initialised, 1527 * during growfs operations, the perag is not fully initialised,
@@ -1490,30 +1529,52 @@ xfs_agi_verify(
1490 * use it by using uncached buffers that don't have the perag attached 1529 * use it by using uncached buffers that don't have the perag attached
1491 * so we can detect and avoid this problem. 1530 * so we can detect and avoid this problem.
1492 */ 1531 */
1493 if (bp->b_pag) 1532 if (bp->b_pag && be32_to_cpu(agi->agi_seqno) != bp->b_pag->pag_agno)
1494 agi_ok = agi_ok && be32_to_cpu(agi->agi_seqno) == 1533 return false;
1495 bp->b_pag->pag_agno;
1496 1534
1497 if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI,
1498 XFS_RANDOM_IALLOC_READ_AGI))) {
1499 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, agi);
1500 xfs_buf_ioerror(bp, EFSCORRUPTED);
1501 }
1502 xfs_check_agi_unlinked(agi); 1535 xfs_check_agi_unlinked(agi);
1536 return true;
1503} 1537}
1504 1538
1505static void 1539static void
1506xfs_agi_read_verify( 1540xfs_agi_read_verify(
1507 struct xfs_buf *bp) 1541 struct xfs_buf *bp)
1508{ 1542{
1509 xfs_agi_verify(bp); 1543 struct xfs_mount *mp = bp->b_target->bt_mount;
1544 int agi_ok = 1;
1545
1546 if (xfs_sb_version_hascrc(&mp->m_sb))
1547 agi_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
1548 offsetof(struct xfs_agi, agi_crc));
1549 agi_ok = agi_ok && xfs_agi_verify(bp);
1550
1551 if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI,
1552 XFS_RANDOM_IALLOC_READ_AGI))) {
1553 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
1554 xfs_buf_ioerror(bp, EFSCORRUPTED);
1555 }
1510} 1556}
1511 1557
1512static void 1558static void
1513xfs_agi_write_verify( 1559xfs_agi_write_verify(
1514 struct xfs_buf *bp) 1560 struct xfs_buf *bp)
1515{ 1561{
1516 xfs_agi_verify(bp); 1562 struct xfs_mount *mp = bp->b_target->bt_mount;
1563 struct xfs_buf_log_item *bip = bp->b_fspriv;
1564
1565 if (!xfs_agi_verify(bp)) {
1566 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
1567 xfs_buf_ioerror(bp, EFSCORRUPTED);
1568 return;
1569 }
1570
1571 if (!xfs_sb_version_hascrc(&mp->m_sb))
1572 return;
1573
1574 if (bip)
1575 XFS_BUF_TO_AGI(bp)->agi_lsn = cpu_to_be64(bip->bli_item.li_lsn);
1576 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
1577 offsetof(struct xfs_agi, agi_crc));
1517} 1578}
1518 1579
1519const struct xfs_buf_ops xfs_agi_buf_ops = { 1580const struct xfs_buf_ops xfs_agi_buf_ops = {
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c
index bec344b36507..c82ac8867421 100644
--- a/fs/xfs/xfs_ialloc_btree.c
+++ b/fs/xfs/xfs_ialloc_btree.c
@@ -34,6 +34,7 @@
34#include "xfs_alloc.h" 34#include "xfs_alloc.h"
35#include "xfs_error.h" 35#include "xfs_error.h"
36#include "xfs_trace.h" 36#include "xfs_trace.h"
37#include "xfs_cksum.h"
37 38
38 39
39STATIC int 40STATIC int
@@ -182,52 +183,88 @@ xfs_inobt_key_diff(
182 cur->bc_rec.i.ir_startino; 183 cur->bc_rec.i.ir_startino;
183} 184}
184 185
185void 186static int
186xfs_inobt_verify( 187xfs_inobt_verify(
187 struct xfs_buf *bp) 188 struct xfs_buf *bp)
188{ 189{
189 struct xfs_mount *mp = bp->b_target->bt_mount; 190 struct xfs_mount *mp = bp->b_target->bt_mount;
190 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 191 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
192 struct xfs_perag *pag = bp->b_pag;
191 unsigned int level; 193 unsigned int level;
192 int sblock_ok; /* block passes checks */
193 194
194 /* magic number and level verification */ 195 /*
195 level = be16_to_cpu(block->bb_level); 196 * During growfs operations, we can't verify the exact owner as the
196 sblock_ok = block->bb_magic == cpu_to_be32(XFS_IBT_MAGIC) && 197 * perag is not fully initialised and hence not attached to the buffer.
197 level < mp->m_in_maxlevels; 198 *
199 * Similarly, during log recovery we will have a perag structure
200 * attached, but the agi information will not yet have been initialised
201 * from the on disk AGI. We don't currently use any of this information,
202 * but beware of the landmine (i.e. need to check pag->pagi_init) if we
203 * ever do.
204 */
205 switch (block->bb_magic) {
206 case cpu_to_be32(XFS_IBT_CRC_MAGIC):
207 if (!xfs_sb_version_hascrc(&mp->m_sb))
208 return false;
209 if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid))
210 return false;
211 if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
212 return false;
213 if (pag &&
214 be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
215 return false;
216 /* fall through */
217 case cpu_to_be32(XFS_IBT_MAGIC):
218 break;
219 default:
220 return 0;
221 }
198 222
199 /* numrecs verification */ 223 /* numrecs and level verification */
200 sblock_ok = sblock_ok && 224 level = be16_to_cpu(block->bb_level);
201 be16_to_cpu(block->bb_numrecs) <= mp->m_inobt_mxr[level != 0]; 225 if (level >= mp->m_in_maxlevels)
226 return false;
227 if (be16_to_cpu(block->bb_numrecs) > mp->m_inobt_mxr[level != 0])
228 return false;
202 229
203 /* sibling pointer verification */ 230 /* sibling pointer verification */
204 sblock_ok = sblock_ok && 231 if (!block->bb_u.s.bb_leftsib ||
205 (block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) || 232 (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks &&
206 be32_to_cpu(block->bb_u.s.bb_leftsib) < mp->m_sb.sb_agblocks) && 233 block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK)))
207 block->bb_u.s.bb_leftsib && 234 return false;
208 (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) || 235 if (!block->bb_u.s.bb_rightsib ||
209 be32_to_cpu(block->bb_u.s.bb_rightsib) < mp->m_sb.sb_agblocks) && 236 (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks &&
210 block->bb_u.s.bb_rightsib; 237 block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK)))
211 238 return false;
212 if (!sblock_ok) { 239
213 trace_xfs_btree_corrupt(bp, _RET_IP_); 240 return true;
214 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, block);
215 xfs_buf_ioerror(bp, EFSCORRUPTED);
216 }
217} 241}
218 242
219static void 243static void
220xfs_inobt_read_verify( 244xfs_inobt_read_verify(
221 struct xfs_buf *bp) 245 struct xfs_buf *bp)
222{ 246{
223 xfs_inobt_verify(bp); 247 if (!(xfs_btree_sblock_verify_crc(bp) &&
248 xfs_inobt_verify(bp))) {
249 trace_xfs_btree_corrupt(bp, _RET_IP_);
250 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
251 bp->b_target->bt_mount, bp->b_addr);
252 xfs_buf_ioerror(bp, EFSCORRUPTED);
253 }
224} 254}
225 255
226static void 256static void
227xfs_inobt_write_verify( 257xfs_inobt_write_verify(
228 struct xfs_buf *bp) 258 struct xfs_buf *bp)
229{ 259{
230 xfs_inobt_verify(bp); 260 if (!xfs_inobt_verify(bp)) {
261 trace_xfs_btree_corrupt(bp, _RET_IP_);
262 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
263 bp->b_target->bt_mount, bp->b_addr);
264 xfs_buf_ioerror(bp, EFSCORRUPTED);
265 }
266 xfs_btree_sblock_calc_crc(bp);
267
231} 268}
232 269
233const struct xfs_buf_ops xfs_inobt_buf_ops = { 270const struct xfs_buf_ops xfs_inobt_buf_ops = {
@@ -301,6 +338,8 @@ xfs_inobt_init_cursor(
301 cur->bc_blocklog = mp->m_sb.sb_blocklog; 338 cur->bc_blocklog = mp->m_sb.sb_blocklog;
302 339
303 cur->bc_ops = &xfs_inobt_ops; 340 cur->bc_ops = &xfs_inobt_ops;
341 if (xfs_sb_version_hascrc(&mp->m_sb))
342 cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
304 343
305 cur->bc_private.a.agbp = agbp; 344 cur->bc_private.a.agbp = agbp;
306 cur->bc_private.a.agno = agno; 345 cur->bc_private.a.agno = agno;
diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h
index 25c0239a8eab..3ac36b7642e9 100644
--- a/fs/xfs/xfs_ialloc_btree.h
+++ b/fs/xfs/xfs_ialloc_btree.h
@@ -29,7 +29,8 @@ struct xfs_mount;
29/* 29/*
30 * There is a btree for the inode map per allocation group. 30 * There is a btree for the inode map per allocation group.
31 */ 31 */
32#define XFS_IBT_MAGIC 0x49414254 /* 'IABT' */ 32#define XFS_IBT_MAGIC 0x49414254 /* 'IABT' */
33#define XFS_IBT_CRC_MAGIC 0x49414233 /* 'IAB3' */
33 34
34typedef __uint64_t xfs_inofree_t; 35typedef __uint64_t xfs_inofree_t;
35#define XFS_INODES_PER_CHUNK (NBBY * sizeof(xfs_inofree_t)) 36#define XFS_INODES_PER_CHUNK (NBBY * sizeof(xfs_inofree_t))
@@ -76,10 +77,10 @@ typedef __be32 xfs_inobt_ptr_t;
76 77
77/* 78/*
78 * Btree block header size depends on a superblock flag. 79 * Btree block header size depends on a superblock flag.
79 *
80 * (not quite yet, but soon)
81 */ 80 */
82#define XFS_INOBT_BLOCK_LEN(mp) XFS_BTREE_SBLOCK_LEN 81#define XFS_INOBT_BLOCK_LEN(mp) \
82 (xfs_sb_version_hascrc(&((mp)->m_sb)) ? \
83 XFS_BTREE_SBLOCK_CRC_LEN : XFS_BTREE_SBLOCK_LEN)
83 84
84/* 85/*
85 * Record, key, and pointer address macros for btree blocks. 86 * Record, key, and pointer address macros for btree blocks.
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 4f201656d2d9..558ef4947206 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -44,6 +44,7 @@
44#include "xfs_quota.h" 44#include "xfs_quota.h"
45#include "xfs_filestream.h" 45#include "xfs_filestream.h"
46#include "xfs_vnodeops.h" 46#include "xfs_vnodeops.h"
47#include "xfs_cksum.h"
47#include "xfs_trace.h" 48#include "xfs_trace.h"
48#include "xfs_icache.h" 49#include "xfs_icache.h"
49 50
@@ -786,6 +787,7 @@ xfs_iformat_btree(
786 xfs_dinode_t *dip, 787 xfs_dinode_t *dip,
787 int whichfork) 788 int whichfork)
788{ 789{
790 struct xfs_mount *mp = ip->i_mount;
789 xfs_bmdr_block_t *dfp; 791 xfs_bmdr_block_t *dfp;
790 xfs_ifork_t *ifp; 792 xfs_ifork_t *ifp;
791 /* REFERENCED */ 793 /* REFERENCED */
@@ -794,7 +796,7 @@ xfs_iformat_btree(
794 796
795 ifp = XFS_IFORK_PTR(ip, whichfork); 797 ifp = XFS_IFORK_PTR(ip, whichfork);
796 dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork); 798 dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork);
797 size = XFS_BMAP_BROOT_SPACE(dfp); 799 size = XFS_BMAP_BROOT_SPACE(mp, dfp);
798 nrecs = be16_to_cpu(dfp->bb_numrecs); 800 nrecs = be16_to_cpu(dfp->bb_numrecs);
799 801
800 /* 802 /*
@@ -805,14 +807,14 @@ xfs_iformat_btree(
805 * blocks. 807 * blocks.
806 */ 808 */
807 if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <= 809 if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <=
808 XFS_IFORK_MAXEXT(ip, whichfork) || 810 XFS_IFORK_MAXEXT(ip, whichfork) ||
809 XFS_BMDR_SPACE_CALC(nrecs) > 811 XFS_BMDR_SPACE_CALC(nrecs) >
810 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork) || 812 XFS_DFORK_SIZE(dip, mp, whichfork) ||
811 XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) { 813 XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) {
812 xfs_warn(ip->i_mount, "corrupt inode %Lu (btree).", 814 xfs_warn(mp, "corrupt inode %Lu (btree).",
813 (unsigned long long) ip->i_ino); 815 (unsigned long long) ip->i_ino);
814 XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW, 816 XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
815 ip->i_mount, dip); 817 mp, dip);
816 return XFS_ERROR(EFSCORRUPTED); 818 return XFS_ERROR(EFSCORRUPTED);
817 } 819 }
818 820
@@ -823,8 +825,7 @@ xfs_iformat_btree(
823 * Copy and convert from the on-disk structure 825 * Copy and convert from the on-disk structure
824 * to the in-memory structure. 826 * to the in-memory structure.
825 */ 827 */
826 xfs_bmdr_to_bmbt(ip->i_mount, dfp, 828 xfs_bmdr_to_bmbt(ip, dfp, XFS_DFORK_SIZE(dip, ip->i_mount, whichfork),
827 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork),
828 ifp->if_broot, size); 829 ifp->if_broot, size);
829 ifp->if_flags &= ~XFS_IFEXTENTS; 830 ifp->if_flags &= ~XFS_IFEXTENTS;
830 ifp->if_flags |= XFS_IFBROOT; 831 ifp->if_flags |= XFS_IFBROOT;
@@ -866,6 +867,17 @@ xfs_dinode_from_disk(
866 to->di_dmstate = be16_to_cpu(from->di_dmstate); 867 to->di_dmstate = be16_to_cpu(from->di_dmstate);
867 to->di_flags = be16_to_cpu(from->di_flags); 868 to->di_flags = be16_to_cpu(from->di_flags);
868 to->di_gen = be32_to_cpu(from->di_gen); 869 to->di_gen = be32_to_cpu(from->di_gen);
870
871 if (to->di_version == 3) {
872 to->di_changecount = be64_to_cpu(from->di_changecount);
873 to->di_crtime.t_sec = be32_to_cpu(from->di_crtime.t_sec);
874 to->di_crtime.t_nsec = be32_to_cpu(from->di_crtime.t_nsec);
875 to->di_flags2 = be64_to_cpu(from->di_flags2);
876 to->di_ino = be64_to_cpu(from->di_ino);
877 to->di_lsn = be64_to_cpu(from->di_lsn);
878 memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
879 uuid_copy(&to->di_uuid, &from->di_uuid);
880 }
869} 881}
870 882
871void 883void
@@ -902,6 +914,17 @@ xfs_dinode_to_disk(
902 to->di_dmstate = cpu_to_be16(from->di_dmstate); 914 to->di_dmstate = cpu_to_be16(from->di_dmstate);
903 to->di_flags = cpu_to_be16(from->di_flags); 915 to->di_flags = cpu_to_be16(from->di_flags);
904 to->di_gen = cpu_to_be32(from->di_gen); 916 to->di_gen = cpu_to_be32(from->di_gen);
917
918 if (from->di_version == 3) {
919 to->di_changecount = cpu_to_be64(from->di_changecount);
920 to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec);
921 to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec);
922 to->di_flags2 = cpu_to_be64(from->di_flags2);
923 to->di_ino = cpu_to_be64(from->di_ino);
924 to->di_lsn = cpu_to_be64(from->di_lsn);
925 memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
926 uuid_copy(&to->di_uuid, &from->di_uuid);
927 }
905} 928}
906 929
907STATIC uint 930STATIC uint
@@ -962,6 +985,47 @@ xfs_dic2xflags(
962 (XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0); 985 (XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0);
963} 986}
964 987
988static bool
989xfs_dinode_verify(
990 struct xfs_mount *mp,
991 struct xfs_inode *ip,
992 struct xfs_dinode *dip)
993{
994 if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))
995 return false;
996
997 /* only version 3 or greater inodes are extensively verified here */
998 if (dip->di_version < 3)
999 return true;
1000
1001 if (!xfs_sb_version_hascrc(&mp->m_sb))
1002 return false;
1003 if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
1004 offsetof(struct xfs_dinode, di_crc)))
1005 return false;
1006 if (be64_to_cpu(dip->di_ino) != ip->i_ino)
1007 return false;
1008 if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_uuid))
1009 return false;
1010 return true;
1011}
1012
1013void
1014xfs_dinode_calc_crc(
1015 struct xfs_mount *mp,
1016 struct xfs_dinode *dip)
1017{
1018 __uint32_t crc;
1019
1020 if (dip->di_version < 3)
1021 return;
1022
1023 ASSERT(xfs_sb_version_hascrc(&mp->m_sb));
1024 crc = xfs_start_cksum((char *)dip, mp->m_sb.sb_inodesize,
1025 offsetof(struct xfs_dinode, di_crc));
1026 dip->di_crc = xfs_end_cksum(crc);
1027}
1028
965/* 1029/*
966 * Read the disk inode attributes into the in-core inode structure. 1030 * Read the disk inode attributes into the in-core inode structure.
967 */ 1031 */
@@ -990,17 +1054,13 @@ xfs_iread(
990 if (error) 1054 if (error)
991 return error; 1055 return error;
992 1056
993 /* 1057 /* even unallocated inodes are verified */
994 * If we got something that isn't an inode it means someone 1058 if (!xfs_dinode_verify(mp, ip, dip)) {
995 * (nfs or dmi) has a stale handle. 1059 xfs_alert(mp, "%s: validation failed for inode %lld failed",
996 */ 1060 __func__, ip->i_ino);
997 if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) { 1061
998#ifdef DEBUG 1062 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, dip);
999 xfs_alert(mp, 1063 error = XFS_ERROR(EFSCORRUPTED);
1000 "%s: dip->di_magic (0x%x) != XFS_DINODE_MAGIC (0x%x)",
1001 __func__, be16_to_cpu(dip->di_magic), XFS_DINODE_MAGIC);
1002#endif /* DEBUG */
1003 error = XFS_ERROR(EINVAL);
1004 goto out_brelse; 1064 goto out_brelse;
1005 } 1065 }
1006 1066
@@ -1022,10 +1082,20 @@ xfs_iread(
1022 goto out_brelse; 1082 goto out_brelse;
1023 } 1083 }
1024 } else { 1084 } else {
1085 /*
1086 * Partial initialisation of the in-core inode. Just the bits
1087 * that xfs_ialloc won't overwrite or relies on being correct.
1088 */
1025 ip->i_d.di_magic = be16_to_cpu(dip->di_magic); 1089 ip->i_d.di_magic = be16_to_cpu(dip->di_magic);
1026 ip->i_d.di_version = dip->di_version; 1090 ip->i_d.di_version = dip->di_version;
1027 ip->i_d.di_gen = be32_to_cpu(dip->di_gen); 1091 ip->i_d.di_gen = be32_to_cpu(dip->di_gen);
1028 ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter); 1092 ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter);
1093
1094 if (dip->di_version == 3) {
1095 ip->i_d.di_ino = be64_to_cpu(dip->di_ino);
1096 uuid_copy(&ip->i_d.di_uuid, &dip->di_uuid);
1097 }
1098
1029 /* 1099 /*
1030 * Make sure to pull in the mode here as well in 1100 * Make sure to pull in the mode here as well in
1031 * case the inode is released without being used. 1101 * case the inode is released without being used.
@@ -1161,6 +1231,7 @@ xfs_ialloc(
1161 xfs_buf_t **ialloc_context, 1231 xfs_buf_t **ialloc_context,
1162 xfs_inode_t **ipp) 1232 xfs_inode_t **ipp)
1163{ 1233{
1234 struct xfs_mount *mp = tp->t_mountp;
1164 xfs_ino_t ino; 1235 xfs_ino_t ino;
1165 xfs_inode_t *ip; 1236 xfs_inode_t *ip;
1166 uint flags; 1237 uint flags;
@@ -1187,7 +1258,7 @@ xfs_ialloc(
1187 * This is because we're setting fields here we need 1258 * This is because we're setting fields here we need
1188 * to prevent others from looking at until we're done. 1259 * to prevent others from looking at until we're done.
1189 */ 1260 */
1190 error = xfs_iget(tp->t_mountp, tp, ino, XFS_IGET_CREATE, 1261 error = xfs_iget(mp, tp, ino, XFS_IGET_CREATE,
1191 XFS_ILOCK_EXCL, &ip); 1262 XFS_ILOCK_EXCL, &ip);
1192 if (error) 1263 if (error)
1193 return error; 1264 return error;
@@ -1208,7 +1279,7 @@ xfs_ialloc(
1208 * the inode version number now. This way we only do the conversion 1279 * the inode version number now. This way we only do the conversion
1209 * here rather than here and in the flush/logging code. 1280 * here rather than here and in the flush/logging code.
1210 */ 1281 */
1211 if (xfs_sb_version_hasnlink(&tp->t_mountp->m_sb) && 1282 if (xfs_sb_version_hasnlink(&mp->m_sb) &&
1212 ip->i_d.di_version == 1) { 1283 ip->i_d.di_version == 1) {
1213 ip->i_d.di_version = 2; 1284 ip->i_d.di_version = 2;
1214 /* 1285 /*
@@ -1258,6 +1329,19 @@ xfs_ialloc(
1258 ip->i_d.di_dmevmask = 0; 1329 ip->i_d.di_dmevmask = 0;
1259 ip->i_d.di_dmstate = 0; 1330 ip->i_d.di_dmstate = 0;
1260 ip->i_d.di_flags = 0; 1331 ip->i_d.di_flags = 0;
1332
1333 if (ip->i_d.di_version == 3) {
1334 ASSERT(ip->i_d.di_ino == ino);
1335 ASSERT(uuid_equal(&ip->i_d.di_uuid, &mp->m_sb.sb_uuid));
1336 ip->i_d.di_crc = 0;
1337 ip->i_d.di_changecount = 1;
1338 ip->i_d.di_lsn = 0;
1339 ip->i_d.di_flags2 = 0;
1340 memset(&(ip->i_d.di_pad2[0]), 0, sizeof(ip->i_d.di_pad2));
1341 ip->i_d.di_crtime = ip->i_d.di_mtime;
1342 }
1343
1344
1261 flags = XFS_ILOG_CORE; 1345 flags = XFS_ILOG_CORE;
1262 switch (mode & S_IFMT) { 1346 switch (mode & S_IFMT) {
1263 case S_IFIFO: 1347 case S_IFIFO:
@@ -2037,7 +2121,7 @@ xfs_iroot_realloc(
2037 * allocate it now and get out. 2121 * allocate it now and get out.
2038 */ 2122 */
2039 if (ifp->if_broot_bytes == 0) { 2123 if (ifp->if_broot_bytes == 0) {
2040 new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(rec_diff); 2124 new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, rec_diff);
2041 ifp->if_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS); 2125 ifp->if_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
2042 ifp->if_broot_bytes = (int)new_size; 2126 ifp->if_broot_bytes = (int)new_size;
2043 return; 2127 return;
@@ -2051,9 +2135,9 @@ xfs_iroot_realloc(
2051 */ 2135 */
2052 cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0); 2136 cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
2053 new_max = cur_max + rec_diff; 2137 new_max = cur_max + rec_diff;
2054 new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max); 2138 new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
2055 ifp->if_broot = kmem_realloc(ifp->if_broot, new_size, 2139 ifp->if_broot = kmem_realloc(ifp->if_broot, new_size,
2056 (size_t)XFS_BMAP_BROOT_SPACE_CALC(cur_max), /* old size */ 2140 XFS_BMAP_BROOT_SPACE_CALC(mp, cur_max),
2057 KM_SLEEP | KM_NOFS); 2141 KM_SLEEP | KM_NOFS);
2058 op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, 2142 op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
2059 ifp->if_broot_bytes); 2143 ifp->if_broot_bytes);
@@ -2061,7 +2145,7 @@ xfs_iroot_realloc(
2061 (int)new_size); 2145 (int)new_size);
2062 ifp->if_broot_bytes = (int)new_size; 2146 ifp->if_broot_bytes = (int)new_size;
2063 ASSERT(ifp->if_broot_bytes <= 2147 ASSERT(ifp->if_broot_bytes <=
2064 XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ); 2148 XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ(ip));
2065 memmove(np, op, cur_max * (uint)sizeof(xfs_dfsbno_t)); 2149 memmove(np, op, cur_max * (uint)sizeof(xfs_dfsbno_t));
2066 return; 2150 return;
2067 } 2151 }
@@ -2076,7 +2160,7 @@ xfs_iroot_realloc(
2076 new_max = cur_max + rec_diff; 2160 new_max = cur_max + rec_diff;
2077 ASSERT(new_max >= 0); 2161 ASSERT(new_max >= 0);
2078 if (new_max > 0) 2162 if (new_max > 0)
2079 new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max); 2163 new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
2080 else 2164 else
2081 new_size = 0; 2165 new_size = 0;
2082 if (new_size > 0) { 2166 if (new_size > 0) {
@@ -2084,7 +2168,8 @@ xfs_iroot_realloc(
2084 /* 2168 /*
2085 * First copy over the btree block header. 2169 * First copy over the btree block header.
2086 */ 2170 */
2087 memcpy(new_broot, ifp->if_broot, XFS_BTREE_LBLOCK_LEN); 2171 memcpy(new_broot, ifp->if_broot,
2172 XFS_BMBT_BLOCK_LEN(ip->i_mount));
2088 } else { 2173 } else {
2089 new_broot = NULL; 2174 new_broot = NULL;
2090 ifp->if_flags &= ~XFS_IFBROOT; 2175 ifp->if_flags &= ~XFS_IFBROOT;
@@ -2114,7 +2199,7 @@ xfs_iroot_realloc(
2114 ifp->if_broot = new_broot; 2199 ifp->if_broot = new_broot;
2115 ifp->if_broot_bytes = (int)new_size; 2200 ifp->if_broot_bytes = (int)new_size;
2116 ASSERT(ifp->if_broot_bytes <= 2201 ASSERT(ifp->if_broot_bytes <=
2117 XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ); 2202 XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ(ip));
2118 return; 2203 return;
2119} 2204}
2120 2205
@@ -2427,7 +2512,7 @@ xfs_iflush_fork(
2427 ASSERT(ifp->if_broot != NULL); 2512 ASSERT(ifp->if_broot != NULL);
2428 ASSERT(ifp->if_broot_bytes <= 2513 ASSERT(ifp->if_broot_bytes <=
2429 (XFS_IFORK_SIZE(ip, whichfork) + 2514 (XFS_IFORK_SIZE(ip, whichfork) +
2430 XFS_BROOT_SIZE_ADJ)); 2515 XFS_BROOT_SIZE_ADJ(ip)));
2431 xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes, 2516 xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes,
2432 (xfs_bmdr_block_t *)cp, 2517 (xfs_bmdr_block_t *)cp,
2433 XFS_DFORK_SIZE(dip, mp, whichfork)); 2518 XFS_DFORK_SIZE(dip, mp, whichfork));
@@ -2715,20 +2800,18 @@ abort_out:
2715 2800
2716STATIC int 2801STATIC int
2717xfs_iflush_int( 2802xfs_iflush_int(
2718 xfs_inode_t *ip, 2803 struct xfs_inode *ip,
2719 xfs_buf_t *bp) 2804 struct xfs_buf *bp)
2720{ 2805{
2721 xfs_inode_log_item_t *iip; 2806 struct xfs_inode_log_item *iip = ip->i_itemp;
2722 xfs_dinode_t *dip; 2807 struct xfs_dinode *dip;
2723 xfs_mount_t *mp; 2808 struct xfs_mount *mp = ip->i_mount;
2724 2809
2725 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 2810 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
2726 ASSERT(xfs_isiflocked(ip)); 2811 ASSERT(xfs_isiflocked(ip));
2727 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 2812 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
2728 ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)); 2813 ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
2729 2814 ASSERT(iip != NULL && iip->ili_fields != 0);
2730 iip = ip->i_itemp;
2731 mp = ip->i_mount;
2732 2815
2733 /* set *dip = inode's place in the buffer */ 2816 /* set *dip = inode's place in the buffer */
2734 dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); 2817 dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
@@ -2789,9 +2872,9 @@ xfs_iflush_int(
2789 } 2872 }
2790 /* 2873 /*
2791 * bump the flush iteration count, used to detect flushes which 2874 * bump the flush iteration count, used to detect flushes which
2792 * postdate a log record during recovery. 2875 * postdate a log record during recovery. This is redundant as we now
2876 * log every change and hence this can't happen. Still, it doesn't hurt.
2793 */ 2877 */
2794
2795 ip->i_d.di_flushiter++; 2878 ip->i_d.di_flushiter++;
2796 2879
2797 /* 2880 /*
@@ -2867,41 +2950,30 @@ xfs_iflush_int(
2867 * need the AIL lock, because it is a 64 bit value that cannot be read 2950 * need the AIL lock, because it is a 64 bit value that cannot be read
2868 * atomically. 2951 * atomically.
2869 */ 2952 */
2870 if (iip != NULL && iip->ili_fields != 0) { 2953 iip->ili_last_fields = iip->ili_fields;
2871 iip->ili_last_fields = iip->ili_fields; 2954 iip->ili_fields = 0;
2872 iip->ili_fields = 0; 2955 iip->ili_logged = 1;
2873 iip->ili_logged = 1;
2874 2956
2875 xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, 2957 xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
2876 &iip->ili_item.li_lsn); 2958 &iip->ili_item.li_lsn);
2877 2959
2878 /* 2960 /*
2879 * Attach the function xfs_iflush_done to the inode's 2961 * Attach the function xfs_iflush_done to the inode's
2880 * buffer. This will remove the inode from the AIL 2962 * buffer. This will remove the inode from the AIL
2881 * and unlock the inode's flush lock when the inode is 2963 * and unlock the inode's flush lock when the inode is
2882 * completely written to disk. 2964 * completely written to disk.
2883 */ 2965 */
2884 xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item); 2966 xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item);
2885 2967
2886 ASSERT(bp->b_fspriv != NULL); 2968 /* update the lsn in the on disk inode if required */
2887 ASSERT(bp->b_iodone != NULL); 2969 if (ip->i_d.di_version == 3)
2888 } else { 2970 dip->di_lsn = cpu_to_be64(iip->ili_item.li_lsn);
2889 /* 2971
2890 * We're flushing an inode which is not in the AIL and has 2972 /* generate the checksum. */
2891 * not been logged. For this case we can immediately drop 2973 xfs_dinode_calc_crc(mp, dip);
2892 * the inode flush lock because we can avoid the whole
2893 * AIL state thing. It's OK to drop the flush lock now,
2894 * because we've already locked the buffer and to do anything
2895 * you really need both.
2896 */
2897 if (iip != NULL) {
2898 ASSERT(iip->ili_logged == 0);
2899 ASSERT(iip->ili_last_fields == 0);
2900 ASSERT((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0);
2901 }
2902 xfs_ifunlock(ip);
2903 }
2904 2974
2975 ASSERT(bp->b_fspriv != NULL);
2976 ASSERT(bp->b_iodone != NULL);
2905 return 0; 2977 return 0;
2906 2978
2907corrupt_out: 2979corrupt_out:
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 237e7f6f2ab3..91129794aaec 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -150,13 +150,38 @@ typedef struct xfs_icdinode {
150 __uint16_t di_dmstate; /* DMIG state info */ 150 __uint16_t di_dmstate; /* DMIG state info */
151 __uint16_t di_flags; /* random flags, XFS_DIFLAG_... */ 151 __uint16_t di_flags; /* random flags, XFS_DIFLAG_... */
152 __uint32_t di_gen; /* generation number */ 152 __uint32_t di_gen; /* generation number */
153
154 /* di_next_unlinked is the only non-core field in the old dinode */
155 xfs_agino_t di_next_unlinked;/* agi unlinked list ptr */
156
157 /* start of the extended dinode, writable fields */
158 __uint32_t di_crc; /* CRC of the inode */
159 __uint64_t di_changecount; /* number of attribute changes */
160 xfs_lsn_t di_lsn; /* flush sequence */
161 __uint64_t di_flags2; /* more random flags */
162 __uint8_t di_pad2[16]; /* more padding for future expansion */
163
164 /* fields only written to during inode creation */
165 xfs_ictimestamp_t di_crtime; /* time created */
166 xfs_ino_t di_ino; /* inode number */
167 uuid_t di_uuid; /* UUID of the filesystem */
168
169 /* structure must be padded to 64 bit alignment */
153} xfs_icdinode_t; 170} xfs_icdinode_t;
154 171
172static inline uint xfs_icdinode_size(int version)
173{
174 if (version == 3)
175 return sizeof(struct xfs_icdinode);
176 return offsetof(struct xfs_icdinode, di_next_unlinked);
177}
178
155/* 179/*
156 * Flags for xfs_ichgtime(). 180 * Flags for xfs_ichgtime().
157 */ 181 */
158#define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */ 182#define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */
159#define XFS_ICHGTIME_CHG 0x2 /* inode field change timestamp */ 183#define XFS_ICHGTIME_CHG 0x2 /* inode field change timestamp */
184#define XFS_ICHGTIME_CREATE 0x4 /* inode create timestamp */
160 185
161/* 186/*
162 * Per-fork incore inode flags. 187 * Per-fork incore inode flags.
@@ -180,10 +205,11 @@ typedef struct xfs_icdinode {
180#define XFS_IFORK_DSIZE(ip) \ 205#define XFS_IFORK_DSIZE(ip) \
181 (XFS_IFORK_Q(ip) ? \ 206 (XFS_IFORK_Q(ip) ? \
182 XFS_IFORK_BOFF(ip) : \ 207 XFS_IFORK_BOFF(ip) : \
183 XFS_LITINO((ip)->i_mount)) 208 XFS_LITINO((ip)->i_mount, (ip)->i_d.di_version))
184#define XFS_IFORK_ASIZE(ip) \ 209#define XFS_IFORK_ASIZE(ip) \
185 (XFS_IFORK_Q(ip) ? \ 210 (XFS_IFORK_Q(ip) ? \
186 XFS_LITINO((ip)->i_mount) - XFS_IFORK_BOFF(ip) : \ 211 XFS_LITINO((ip)->i_mount, (ip)->i_d.di_version) - \
212 XFS_IFORK_BOFF(ip) : \
187 0) 213 0)
188#define XFS_IFORK_SIZE(ip,w) \ 214#define XFS_IFORK_SIZE(ip,w) \
189 ((w) == XFS_DATA_FORK ? \ 215 ((w) == XFS_DATA_FORK ? \
@@ -555,6 +581,7 @@ int xfs_imap_to_bp(struct xfs_mount *, struct xfs_trans *,
555 struct xfs_buf **, uint, uint); 581 struct xfs_buf **, uint, uint);
556int xfs_iread(struct xfs_mount *, struct xfs_trans *, 582int xfs_iread(struct xfs_mount *, struct xfs_trans *,
557 struct xfs_inode *, uint); 583 struct xfs_inode *, uint);
584void xfs_dinode_calc_crc(struct xfs_mount *, struct xfs_dinode *);
558void xfs_dinode_to_disk(struct xfs_dinode *, 585void xfs_dinode_to_disk(struct xfs_dinode *,
559 struct xfs_icdinode *); 586 struct xfs_icdinode *);
560void xfs_idestroy_fork(struct xfs_inode *, int); 587void xfs_idestroy_fork(struct xfs_inode *, int);
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index f034bd1652f0..f76ff52e43c0 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -179,7 +179,7 @@ xfs_inode_item_format(
179 nvecs = 1; 179 nvecs = 1;
180 180
181 vecp->i_addr = &ip->i_d; 181 vecp->i_addr = &ip->i_d;
182 vecp->i_len = sizeof(struct xfs_icdinode); 182 vecp->i_len = xfs_icdinode_size(ip->i_d.di_version);
183 vecp->i_type = XLOG_REG_TYPE_ICORE; 183 vecp->i_type = XLOG_REG_TYPE_ICORE;
184 vecp++; 184 vecp++;
185 nvecs++; 185 nvecs++;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 5a30dd899d2b..8f8aaee7f379 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -42,6 +42,8 @@
42#include "xfs_iomap.h" 42#include "xfs_iomap.h"
43#include "xfs_trace.h" 43#include "xfs_trace.h"
44#include "xfs_icache.h" 44#include "xfs_icache.h"
45#include "xfs_dquot_item.h"
46#include "xfs_dquot.h"
45 47
46 48
47#define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \ 49#define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \
@@ -362,10 +364,65 @@ xfs_iomap_eof_prealloc_initial_size(
362 if (imap[0].br_startblock == HOLESTARTBLOCK) 364 if (imap[0].br_startblock == HOLESTARTBLOCK)
363 return 0; 365 return 0;
364 if (imap[0].br_blockcount <= (MAXEXTLEN >> 1)) 366 if (imap[0].br_blockcount <= (MAXEXTLEN >> 1))
365 return imap[0].br_blockcount; 367 return imap[0].br_blockcount << 1;
366 return XFS_B_TO_FSB(mp, offset); 368 return XFS_B_TO_FSB(mp, offset);
367} 369}
368 370
371STATIC bool
372xfs_quota_need_throttle(
373 struct xfs_inode *ip,
374 int type,
375 xfs_fsblock_t alloc_blocks)
376{
377 struct xfs_dquot *dq = xfs_inode_dquot(ip, type);
378
379 if (!dq || !xfs_this_quota_on(ip->i_mount, type))
380 return false;
381
382 /* no hi watermark, no throttle */
383 if (!dq->q_prealloc_hi_wmark)
384 return false;
385
386 /* under the lo watermark, no throttle */
387 if (dq->q_res_bcount + alloc_blocks < dq->q_prealloc_lo_wmark)
388 return false;
389
390 return true;
391}
392
393STATIC void
394xfs_quota_calc_throttle(
395 struct xfs_inode *ip,
396 int type,
397 xfs_fsblock_t *qblocks,
398 int *qshift)
399{
400 int64_t freesp;
401 int shift = 0;
402 struct xfs_dquot *dq = xfs_inode_dquot(ip, type);
403
404 /* over hi wmark, squash the prealloc completely */
405 if (dq->q_res_bcount >= dq->q_prealloc_hi_wmark) {
406 *qblocks = 0;
407 return;
408 }
409
410 freesp = dq->q_prealloc_hi_wmark - dq->q_res_bcount;
411 if (freesp < dq->q_low_space[XFS_QLOWSP_5_PCNT]) {
412 shift = 2;
413 if (freesp < dq->q_low_space[XFS_QLOWSP_3_PCNT])
414 shift += 2;
415 if (freesp < dq->q_low_space[XFS_QLOWSP_1_PCNT])
416 shift += 2;
417 }
418
419 /* only overwrite the throttle values if we are more aggressive */
420 if ((freesp >> shift) < (*qblocks >> *qshift)) {
421 *qblocks = freesp;
422 *qshift = shift;
423 }
424}
425
369/* 426/*
370 * If we don't have a user specified preallocation size, dynamically increase 427 * If we don't have a user specified preallocation size, dynamically increase
371 * the preallocation size as the size of the file grows. Cap the maximum size 428 * the preallocation size as the size of the file grows. Cap the maximum size
@@ -381,45 +438,89 @@ xfs_iomap_prealloc_size(
381 int nimaps) 438 int nimaps)
382{ 439{
383 xfs_fsblock_t alloc_blocks = 0; 440 xfs_fsblock_t alloc_blocks = 0;
441 int shift = 0;
442 int64_t freesp;
443 xfs_fsblock_t qblocks;
444 int qshift = 0;
384 445
385 alloc_blocks = xfs_iomap_eof_prealloc_initial_size(mp, ip, offset, 446 alloc_blocks = xfs_iomap_eof_prealloc_initial_size(mp, ip, offset,
386 imap, nimaps); 447 imap, nimaps);
387 if (alloc_blocks > 0) { 448 if (!alloc_blocks)
388 int shift = 0; 449 goto check_writeio;
389 int64_t freesp; 450 qblocks = alloc_blocks;
390
391 alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN,
392 rounddown_pow_of_two(alloc_blocks));
393
394 xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
395 freesp = mp->m_sb.sb_fdblocks;
396 if (freesp < mp->m_low_space[XFS_LOWSP_5_PCNT]) {
397 shift = 2;
398 if (freesp < mp->m_low_space[XFS_LOWSP_4_PCNT])
399 shift++;
400 if (freesp < mp->m_low_space[XFS_LOWSP_3_PCNT])
401 shift++;
402 if (freesp < mp->m_low_space[XFS_LOWSP_2_PCNT])
403 shift++;
404 if (freesp < mp->m_low_space[XFS_LOWSP_1_PCNT])
405 shift++;
406 }
407 if (shift)
408 alloc_blocks >>= shift;
409 451
410 /* 452 /*
411 * If we are still trying to allocate more space than is 453 * MAXEXTLEN is not a power of two value but we round the prealloc down
412 * available, squash the prealloc hard. This can happen if we 454 * to the nearest power of two value after throttling. To prevent the
413 * have a large file on a small filesystem and the above 455 * round down from unconditionally reducing the maximum supported prealloc
414 * lowspace thresholds are smaller than MAXEXTLEN. 456 * size, we round up first, apply appropriate throttling, round down and
415 */ 457 * cap the value to MAXEXTLEN.
416 while (alloc_blocks && alloc_blocks >= freesp) 458 */
417 alloc_blocks >>= 4; 459 alloc_blocks = XFS_FILEOFF_MIN(roundup_pow_of_two(MAXEXTLEN),
460 alloc_blocks);
461
462 xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
463 freesp = mp->m_sb.sb_fdblocks;
464 if (freesp < mp->m_low_space[XFS_LOWSP_5_PCNT]) {
465 shift = 2;
466 if (freesp < mp->m_low_space[XFS_LOWSP_4_PCNT])
467 shift++;
468 if (freesp < mp->m_low_space[XFS_LOWSP_3_PCNT])
469 shift++;
470 if (freesp < mp->m_low_space[XFS_LOWSP_2_PCNT])
471 shift++;
472 if (freesp < mp->m_low_space[XFS_LOWSP_1_PCNT])
473 shift++;
418 } 474 }
419 475
476 /*
477 * Check each quota to cap the prealloc size and provide a shift
478 * value to throttle with.
479 */
480 if (xfs_quota_need_throttle(ip, XFS_DQ_USER, alloc_blocks))
481 xfs_quota_calc_throttle(ip, XFS_DQ_USER, &qblocks, &qshift);
482 if (xfs_quota_need_throttle(ip, XFS_DQ_GROUP, alloc_blocks))
483 xfs_quota_calc_throttle(ip, XFS_DQ_GROUP, &qblocks, &qshift);
484 if (xfs_quota_need_throttle(ip, XFS_DQ_PROJ, alloc_blocks))
485 xfs_quota_calc_throttle(ip, XFS_DQ_PROJ, &qblocks, &qshift);
486
487 /*
488 * The final prealloc size is set to the minimum of free space available
489 * in each of the quotas and the overall filesystem.
490 *
491 * The shift throttle value is set to the maximum value as determined by
492 * the global low free space values and per-quota low free space values.
493 */
494 alloc_blocks = MIN(alloc_blocks, qblocks);
495 shift = MAX(shift, qshift);
496
497 if (shift)
498 alloc_blocks >>= shift;
499 /*
500 * rounddown_pow_of_two() returns an undefined result if we pass in
501 * alloc_blocks = 0.
502 */
503 if (alloc_blocks)
504 alloc_blocks = rounddown_pow_of_two(alloc_blocks);
505 if (alloc_blocks > MAXEXTLEN)
506 alloc_blocks = MAXEXTLEN;
507
508 /*
509 * If we are still trying to allocate more space than is
510 * available, squash the prealloc hard. This can happen if we
511 * have a large file on a small filesystem and the above
512 * lowspace thresholds are smaller than MAXEXTLEN.
513 */
514 while (alloc_blocks && alloc_blocks >= freesp)
515 alloc_blocks >>= 4;
516
517check_writeio:
420 if (alloc_blocks < mp->m_writeio_blocks) 518 if (alloc_blocks < mp->m_writeio_blocks)
421 alloc_blocks = mp->m_writeio_blocks; 519 alloc_blocks = mp->m_writeio_blocks;
422 520
521 trace_xfs_iomap_prealloc_size(ip, alloc_blocks, shift,
522 mp->m_writeio_blocks);
523
423 return alloc_blocks; 524 return alloc_blocks;
424} 525}
425 526
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index fe7e4df85a7b..14e59d953b7b 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -72,6 +72,7 @@
72#include <linux/kthread.h> 72#include <linux/kthread.h>
73#include <linux/freezer.h> 73#include <linux/freezer.h>
74#include <linux/list_sort.h> 74#include <linux/list_sort.h>
75#include <linux/ratelimit.h>
75 76
76#include <asm/page.h> 77#include <asm/page.h>
77#include <asm/div64.h> 78#include <asm/div64.h>
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index eec226f78a40..b345a7c85153 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -3485,7 +3485,7 @@ xlog_ticket_alloc(
3485 tic->t_curr_res = unit_bytes; 3485 tic->t_curr_res = unit_bytes;
3486 tic->t_cnt = cnt; 3486 tic->t_cnt = cnt;
3487 tic->t_ocnt = cnt; 3487 tic->t_ocnt = cnt;
3488 tic->t_tid = random32(); 3488 tic->t_tid = prandom_u32();
3489 tic->t_clientid = client; 3489 tic->t_clientid = client;
3490 tic->t_flags = XLOG_TIC_INITED; 3490 tic->t_flags = XLOG_TIC_INITED;
3491 tic->t_trans_type = 0; 3491 tic->t_trans_type = 0;
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index ddc4529d07d3..e3d0b85d852b 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -668,10 +668,6 @@ xlog_cil_push_foreground(
668 * transaction to the checkpoint context so we carry the busy extents through 668 * transaction to the checkpoint context so we carry the busy extents through
669 * to checkpoint completion, and then unlock all the items in the transaction. 669 * to checkpoint completion, and then unlock all the items in the transaction.
670 * 670 *
671 * For more specific information about the order of operations in
672 * xfs_log_commit_cil() please refer to the comments in
673 * xfs_trans_commit_iclog().
674 *
675 * Called with the context lock already held in read mode to lock out 671 * Called with the context lock already held in read mode to lock out
676 * background commit, returns without it held once background commits are 672 * background commit, returns without it held once background commits are
677 * allowed again. 673 * allowed again.
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 16d8d12ea3b4..b9ea262dd1c2 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -468,7 +468,6 @@ struct xfs_cil {
468 * threshold, yet give us plenty of space for aggregation on large logs. 468 * threshold, yet give us plenty of space for aggregation on large logs.
469 */ 469 */
470#define XLOG_CIL_SPACE_LIMIT(log) (log->l_logsize >> 3) 470#define XLOG_CIL_SPACE_LIMIT(log) (log->l_logsize >> 3)
471#define XLOG_CIL_HARD_SPACE_LIMIT(log) (3 * (log->l_logsize >> 4))
472 471
473/* 472/*
474 * ticket grant locks, queues and accounting have their own cachlines 473 * ticket grant locks, queues and accounting have their own cachlines
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index d1dba7ce75ae..93f03ec17eec 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -29,6 +29,7 @@
29#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
30#include "xfs_alloc_btree.h" 30#include "xfs_alloc_btree.h"
31#include "xfs_ialloc_btree.h" 31#include "xfs_ialloc_btree.h"
32#include "xfs_btree.h"
32#include "xfs_dinode.h" 33#include "xfs_dinode.h"
33#include "xfs_inode.h" 34#include "xfs_inode.h"
34#include "xfs_inode_item.h" 35#include "xfs_inode_item.h"
@@ -45,6 +46,14 @@
45#include "xfs_trace.h" 46#include "xfs_trace.h"
46#include "xfs_icache.h" 47#include "xfs_icache.h"
47 48
49/* Need all the magic numbers and buffer ops structures from these headers */
50#include "xfs_symlink.h"
51#include "xfs_da_btree.h"
52#include "xfs_dir2_format.h"
53#include "xfs_dir2_priv.h"
54#include "xfs_attr_leaf.h"
55#include "xfs_attr_remote.h"
56
48STATIC int 57STATIC int
49xlog_find_zeroed( 58xlog_find_zeroed(
50 struct xlog *, 59 struct xlog *,
@@ -1785,6 +1794,7 @@ xlog_recover_do_inode_buffer(
1785 xfs_agino_t *buffer_nextp; 1794 xfs_agino_t *buffer_nextp;
1786 1795
1787 trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f); 1796 trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f);
1797 bp->b_ops = &xfs_inode_buf_ops;
1788 1798
1789 inodes_per_buf = BBTOB(bp->b_io_length) >> mp->m_sb.sb_inodelog; 1799 inodes_per_buf = BBTOB(bp->b_io_length) >> mp->m_sb.sb_inodelog;
1790 for (i = 0; i < inodes_per_buf; i++) { 1800 for (i = 0; i < inodes_per_buf; i++) {
@@ -1857,6 +1867,201 @@ xlog_recover_do_inode_buffer(
1857} 1867}
1858 1868
1859/* 1869/*
1870 * Validate the recovered buffer is of the correct type and attach the
1871 * appropriate buffer operations to them for writeback. Magic numbers are in a
1872 * few places:
1873 * the first 16 bits of the buffer (inode buffer, dquot buffer),
1874 * the first 32 bits of the buffer (most blocks),
1875 * inside a struct xfs_da_blkinfo at the start of the buffer.
1876 */
1877static void
1878xlog_recovery_validate_buf_type(
1879 struct xfs_mount *mp,
1880 struct xfs_buf *bp,
1881 xfs_buf_log_format_t *buf_f)
1882{
1883 struct xfs_da_blkinfo *info = bp->b_addr;
1884 __uint32_t magic32;
1885 __uint16_t magic16;
1886 __uint16_t magicda;
1887
1888 magic32 = be32_to_cpu(*(__be32 *)bp->b_addr);
1889 magic16 = be16_to_cpu(*(__be16*)bp->b_addr);
1890 magicda = be16_to_cpu(info->magic);
1891 switch (xfs_blft_from_flags(buf_f)) {
1892 case XFS_BLFT_BTREE_BUF:
1893 switch (magic32) {
1894 case XFS_ABTB_CRC_MAGIC:
1895 case XFS_ABTC_CRC_MAGIC:
1896 case XFS_ABTB_MAGIC:
1897 case XFS_ABTC_MAGIC:
1898 bp->b_ops = &xfs_allocbt_buf_ops;
1899 break;
1900 case XFS_IBT_CRC_MAGIC:
1901 case XFS_IBT_MAGIC:
1902 bp->b_ops = &xfs_inobt_buf_ops;
1903 break;
1904 case XFS_BMAP_CRC_MAGIC:
1905 case XFS_BMAP_MAGIC:
1906 bp->b_ops = &xfs_bmbt_buf_ops;
1907 break;
1908 default:
1909 xfs_warn(mp, "Bad btree block magic!");
1910 ASSERT(0);
1911 break;
1912 }
1913 break;
1914 case XFS_BLFT_AGF_BUF:
1915 if (magic32 != XFS_AGF_MAGIC) {
1916 xfs_warn(mp, "Bad AGF block magic!");
1917 ASSERT(0);
1918 break;
1919 }
1920 bp->b_ops = &xfs_agf_buf_ops;
1921 break;
1922 case XFS_BLFT_AGFL_BUF:
1923 if (!xfs_sb_version_hascrc(&mp->m_sb))
1924 break;
1925 if (magic32 != XFS_AGFL_MAGIC) {
1926 xfs_warn(mp, "Bad AGFL block magic!");
1927 ASSERT(0);
1928 break;
1929 }
1930 bp->b_ops = &xfs_agfl_buf_ops;
1931 break;
1932 case XFS_BLFT_AGI_BUF:
1933 if (magic32 != XFS_AGI_MAGIC) {
1934 xfs_warn(mp, "Bad AGI block magic!");
1935 ASSERT(0);
1936 break;
1937 }
1938 bp->b_ops = &xfs_agi_buf_ops;
1939 break;
1940 case XFS_BLFT_UDQUOT_BUF:
1941 case XFS_BLFT_PDQUOT_BUF:
1942 case XFS_BLFT_GDQUOT_BUF:
1943#ifdef CONFIG_XFS_QUOTA
1944 if (magic16 != XFS_DQUOT_MAGIC) {
1945 xfs_warn(mp, "Bad DQUOT block magic!");
1946 ASSERT(0);
1947 break;
1948 }
1949 bp->b_ops = &xfs_dquot_buf_ops;
1950#else
1951 xfs_alert(mp,
1952 "Trying to recover dquots without QUOTA support built in!");
1953 ASSERT(0);
1954#endif
1955 break;
1956 case XFS_BLFT_DINO_BUF:
1957 /*
1958 * we get here with inode allocation buffers, not buffers that
1959 * track unlinked list changes.
1960 */
1961 if (magic16 != XFS_DINODE_MAGIC) {
1962 xfs_warn(mp, "Bad INODE block magic!");
1963 ASSERT(0);
1964 break;
1965 }
1966 bp->b_ops = &xfs_inode_buf_ops;
1967 break;
1968 case XFS_BLFT_SYMLINK_BUF:
1969 if (magic32 != XFS_SYMLINK_MAGIC) {
1970 xfs_warn(mp, "Bad symlink block magic!");
1971 ASSERT(0);
1972 break;
1973 }
1974 bp->b_ops = &xfs_symlink_buf_ops;
1975 break;
1976 case XFS_BLFT_DIR_BLOCK_BUF:
1977 if (magic32 != XFS_DIR2_BLOCK_MAGIC &&
1978 magic32 != XFS_DIR3_BLOCK_MAGIC) {
1979 xfs_warn(mp, "Bad dir block magic!");
1980 ASSERT(0);
1981 break;
1982 }
1983 bp->b_ops = &xfs_dir3_block_buf_ops;
1984 break;
1985 case XFS_BLFT_DIR_DATA_BUF:
1986 if (magic32 != XFS_DIR2_DATA_MAGIC &&
1987 magic32 != XFS_DIR3_DATA_MAGIC) {
1988 xfs_warn(mp, "Bad dir data magic!");
1989 ASSERT(0);
1990 break;
1991 }
1992 bp->b_ops = &xfs_dir3_data_buf_ops;
1993 break;
1994 case XFS_BLFT_DIR_FREE_BUF:
1995 if (magic32 != XFS_DIR2_FREE_MAGIC &&
1996 magic32 != XFS_DIR3_FREE_MAGIC) {
1997 xfs_warn(mp, "Bad dir3 free magic!");
1998 ASSERT(0);
1999 break;
2000 }
2001 bp->b_ops = &xfs_dir3_free_buf_ops;
2002 break;
2003 case XFS_BLFT_DIR_LEAF1_BUF:
2004 if (magicda != XFS_DIR2_LEAF1_MAGIC &&
2005 magicda != XFS_DIR3_LEAF1_MAGIC) {
2006 xfs_warn(mp, "Bad dir leaf1 magic!");
2007 ASSERT(0);
2008 break;
2009 }
2010 bp->b_ops = &xfs_dir3_leaf1_buf_ops;
2011 break;
2012 case XFS_BLFT_DIR_LEAFN_BUF:
2013 if (magicda != XFS_DIR2_LEAFN_MAGIC &&
2014 magicda != XFS_DIR3_LEAFN_MAGIC) {
2015 xfs_warn(mp, "Bad dir leafn magic!");
2016 ASSERT(0);
2017 break;
2018 }
2019 bp->b_ops = &xfs_dir3_leafn_buf_ops;
2020 break;
2021 case XFS_BLFT_DA_NODE_BUF:
2022 if (magicda != XFS_DA_NODE_MAGIC &&
2023 magicda != XFS_DA3_NODE_MAGIC) {
2024 xfs_warn(mp, "Bad da node magic!");
2025 ASSERT(0);
2026 break;
2027 }
2028 bp->b_ops = &xfs_da3_node_buf_ops;
2029 break;
2030 case XFS_BLFT_ATTR_LEAF_BUF:
2031 if (magicda != XFS_ATTR_LEAF_MAGIC &&
2032 magicda != XFS_ATTR3_LEAF_MAGIC) {
2033 xfs_warn(mp, "Bad attr leaf magic!");
2034 ASSERT(0);
2035 break;
2036 }
2037 bp->b_ops = &xfs_attr3_leaf_buf_ops;
2038 break;
2039 case XFS_BLFT_ATTR_RMT_BUF:
2040 if (!xfs_sb_version_hascrc(&mp->m_sb))
2041 break;
2042 if (magic32 != XFS_ATTR3_RMT_MAGIC) {
2043 xfs_warn(mp, "Bad attr remote magic!");
2044 ASSERT(0);
2045 break;
2046 }
2047 bp->b_ops = &xfs_attr3_rmt_buf_ops;
2048 break;
2049 case XFS_BLFT_SB_BUF:
2050 if (magic32 != XFS_SB_MAGIC) {
2051 xfs_warn(mp, "Bad SB block magic!");
2052 ASSERT(0);
2053 break;
2054 }
2055 bp->b_ops = &xfs_sb_buf_ops;
2056 break;
2057 default:
2058 xfs_warn(mp, "Unknown buffer type %d!",
2059 xfs_blft_from_flags(buf_f));
2060 break;
2061 }
2062}
2063
2064/*
1860 * Perform a 'normal' buffer recovery. Each logged region of the 2065 * Perform a 'normal' buffer recovery. Each logged region of the
1861 * buffer should be copied over the corresponding region in the 2066 * buffer should be copied over the corresponding region in the
1862 * given buffer. The bitmap in the buf log format structure indicates 2067 * given buffer. The bitmap in the buf log format structure indicates
@@ -1928,6 +2133,8 @@ xlog_recover_do_reg_buffer(
1928 2133
1929 /* Shouldn't be any more regions */ 2134 /* Shouldn't be any more regions */
1930 ASSERT(i == item->ri_total); 2135 ASSERT(i == item->ri_total);
2136
2137 xlog_recovery_validate_buf_type(mp, bp, buf_f);
1931} 2138}
1932 2139
1933/* 2140/*
@@ -2213,6 +2420,7 @@ xlog_recover_inode_pass2(
2213 int attr_index; 2420 int attr_index;
2214 uint fields; 2421 uint fields;
2215 xfs_icdinode_t *dicp; 2422 xfs_icdinode_t *dicp;
2423 uint isize;
2216 int need_free = 0; 2424 int need_free = 0;
2217 2425
2218 if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) { 2426 if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) {
@@ -2238,7 +2446,7 @@ xlog_recover_inode_pass2(
2238 trace_xfs_log_recover_inode_recover(log, in_f); 2446 trace_xfs_log_recover_inode_recover(log, in_f);
2239 2447
2240 bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, 0, 2448 bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, 0,
2241 NULL); 2449 &xfs_inode_buf_ops);
2242 if (!bp) { 2450 if (!bp) {
2243 error = ENOMEM; 2451 error = ENOMEM;
2244 goto error; 2452 goto error;
@@ -2349,7 +2557,8 @@ xlog_recover_inode_pass2(
2349 error = EFSCORRUPTED; 2557 error = EFSCORRUPTED;
2350 goto error; 2558 goto error;
2351 } 2559 }
2352 if (unlikely(item->ri_buf[1].i_len > sizeof(struct xfs_icdinode))) { 2560 isize = xfs_icdinode_size(dicp->di_version);
2561 if (unlikely(item->ri_buf[1].i_len > isize)) {
2353 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)", 2562 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)",
2354 XFS_ERRLEVEL_LOW, mp, dicp); 2563 XFS_ERRLEVEL_LOW, mp, dicp);
2355 xfs_buf_relse(bp); 2564 xfs_buf_relse(bp);
@@ -2361,13 +2570,13 @@ xlog_recover_inode_pass2(
2361 } 2570 }
2362 2571
2363 /* The core is in in-core format */ 2572 /* The core is in in-core format */
2364 xfs_dinode_to_disk(dip, item->ri_buf[1].i_addr); 2573 xfs_dinode_to_disk(dip, dicp);
2365 2574
2366 /* the rest is in on-disk format */ 2575 /* the rest is in on-disk format */
2367 if (item->ri_buf[1].i_len > sizeof(struct xfs_icdinode)) { 2576 if (item->ri_buf[1].i_len > isize) {
2368 memcpy((xfs_caddr_t) dip + sizeof(struct xfs_icdinode), 2577 memcpy((char *)dip + isize,
2369 item->ri_buf[1].i_addr + sizeof(struct xfs_icdinode), 2578 item->ri_buf[1].i_addr + isize,
2370 item->ri_buf[1].i_len - sizeof(struct xfs_icdinode)); 2579 item->ri_buf[1].i_len - isize);
2371 } 2580 }
2372 2581
2373 fields = in_f->ilf_fields; 2582 fields = in_f->ilf_fields;
@@ -2451,6 +2660,9 @@ xlog_recover_inode_pass2(
2451 } 2660 }
2452 2661
2453write_inode_buffer: 2662write_inode_buffer:
2663 /* re-generate the checksum. */
2664 xfs_dinode_calc_crc(log->l_mp, dip);
2665
2454 ASSERT(bp->b_target->bt_mount == mp); 2666 ASSERT(bp->b_target->bt_mount == mp);
2455 bp->b_iodone = xlog_recover_iodone; 2667 bp->b_iodone = xlog_recover_iodone;
2456 xfs_buf_delwri_queue(bp, buffer_list); 2668 xfs_buf_delwri_queue(bp, buffer_list);
@@ -2948,6 +3160,7 @@ xlog_recover_process_efi(
2948 * This will pull the EFI from the AIL and 3160 * This will pull the EFI from the AIL and
2949 * free the memory associated with it. 3161 * free the memory associated with it.
2950 */ 3162 */
3163 set_bit(XFS_EFI_RECOVERED, &efip->efi_flags);
2951 xfs_efi_release(efip, efip->efi_format.efi_nextents); 3164 xfs_efi_release(efip, efip->efi_format.efi_nextents);
2952 return XFS_ERROR(EIO); 3165 return XFS_ERROR(EIO);
2953 } 3166 }
@@ -3751,6 +3964,25 @@ xlog_recover(
3751 return error; 3964 return error;
3752 } 3965 }
3753 3966
3967 /*
3968 * Version 5 superblock log feature mask validation. We know the
3969 * log is dirty so check if there are any unknown log features
3970 * in what we need to recover. If there are unknown features
3971 * (e.g. unsupported transactions, then simply reject the
3972 * attempt at recovery before touching anything.
3973 */
3974 if (XFS_SB_VERSION_NUM(&log->l_mp->m_sb) == XFS_SB_VERSION_5 &&
3975 xfs_sb_has_incompat_log_feature(&log->l_mp->m_sb,
3976 XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN)) {
3977 xfs_warn(log->l_mp,
3978"Superblock has unknown incompatible log features (0x%x) enabled.\n"
3979"The log can not be fully and/or safely recovered by this kernel.\n"
3980"Please recover the log on a kernel that supports the unknown features.",
3981 (log->l_mp->m_sb.sb_features_log_incompat &
3982 XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN));
3983 return EINVAL;
3984 }
3985
3754 xfs_notice(log->l_mp, "Starting recovery (logdev: %s)", 3986 xfs_notice(log->l_mp, "Starting recovery (logdev: %s)",
3755 log->l_mp->m_logname ? log->l_mp->m_logname 3987 log->l_mp->m_logname ? log->l_mp->m_logname
3756 : "internal"); 3988 : "internal");
diff --git a/fs/xfs/xfs_message.h b/fs/xfs/xfs_message.h
index 56dc0c17f16a..76c81982f964 100644
--- a/fs/xfs/xfs_message.h
+++ b/fs/xfs/xfs_message.h
@@ -30,6 +30,32 @@ void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
30} 30}
31#endif 31#endif
32 32
33#define xfs_printk_ratelimited(func, dev, fmt, ...) \
34do { \
35 static DEFINE_RATELIMIT_STATE(_rs, \
36 DEFAULT_RATELIMIT_INTERVAL, \
37 DEFAULT_RATELIMIT_BURST); \
38 if (__ratelimit(&_rs)) \
39 func(dev, fmt, ##__VA_ARGS__); \
40} while (0)
41
42#define xfs_emerg_ratelimited(dev, fmt, ...) \
43 xfs_printk_ratelimited(xfs_emerg, dev, fmt, ##__VA_ARGS__)
44#define xfs_alert_ratelimited(dev, fmt, ...) \
45 xfs_printk_ratelimited(xfs_alert, dev, fmt, ##__VA_ARGS__)
46#define xfs_crit_ratelimited(dev, fmt, ...) \
47 xfs_printk_ratelimited(xfs_crit, dev, fmt, ##__VA_ARGS__)
48#define xfs_err_ratelimited(dev, fmt, ...) \
49 xfs_printk_ratelimited(xfs_err, dev, fmt, ##__VA_ARGS__)
50#define xfs_warn_ratelimited(dev, fmt, ...) \
51 xfs_printk_ratelimited(xfs_warn, dev, fmt, ##__VA_ARGS__)
52#define xfs_notice_ratelimited(dev, fmt, ...) \
53 xfs_printk_ratelimited(xfs_notice, dev, fmt, ##__VA_ARGS__)
54#define xfs_info_ratelimited(dev, fmt, ...) \
55 xfs_printk_ratelimited(xfs_info, dev, fmt, ##__VA_ARGS__)
56#define xfs_debug_ratelimited(dev, fmt, ...) \
57 xfs_printk_ratelimited(xfs_debug, dev, fmt, ##__VA_ARGS__)
58
33extern void assfail(char *expr, char *f, int l); 59extern void assfail(char *expr, char *f, int l);
34 60
35extern void xfs_hex_dump(void *p, int length); 61extern void xfs_hex_dump(void *p, int length);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 3806088a8f77..f6bfbd734669 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -43,6 +43,8 @@
43#include "xfs_utils.h" 43#include "xfs_utils.h"
44#include "xfs_trace.h" 44#include "xfs_trace.h"
45#include "xfs_icache.h" 45#include "xfs_icache.h"
46#include "xfs_cksum.h"
47#include "xfs_buf_item.h"
46 48
47 49
48#ifdef HAVE_PERCPU_SB 50#ifdef HAVE_PERCPU_SB
@@ -109,6 +111,14 @@ static const struct {
109 { offsetof(xfs_sb_t, sb_logsunit), 0 }, 111 { offsetof(xfs_sb_t, sb_logsunit), 0 },
110 { offsetof(xfs_sb_t, sb_features2), 0 }, 112 { offsetof(xfs_sb_t, sb_features2), 0 },
111 { offsetof(xfs_sb_t, sb_bad_features2), 0 }, 113 { offsetof(xfs_sb_t, sb_bad_features2), 0 },
114 { offsetof(xfs_sb_t, sb_features_compat), 0 },
115 { offsetof(xfs_sb_t, sb_features_ro_compat), 0 },
116 { offsetof(xfs_sb_t, sb_features_incompat), 0 },
117 { offsetof(xfs_sb_t, sb_features_log_incompat), 0 },
118 { offsetof(xfs_sb_t, sb_crc), 0 },
119 { offsetof(xfs_sb_t, sb_pad), 0 },
120 { offsetof(xfs_sb_t, sb_pquotino), 0 },
121 { offsetof(xfs_sb_t, sb_lsn), 0 },
112 { sizeof(xfs_sb_t), 0 } 122 { sizeof(xfs_sb_t), 0 }
113}; 123};
114 124
@@ -319,11 +329,54 @@ xfs_mount_validate_sb(
319 return XFS_ERROR(EWRONGFS); 329 return XFS_ERROR(EWRONGFS);
320 } 330 }
321 331
332
322 if (!xfs_sb_good_version(sbp)) { 333 if (!xfs_sb_good_version(sbp)) {
323 xfs_warn(mp, "bad version"); 334 xfs_warn(mp, "bad version");
324 return XFS_ERROR(EWRONGFS); 335 return XFS_ERROR(EWRONGFS);
325 } 336 }
326 337
338 /*
339 * Version 5 superblock feature mask validation. Reject combinations the
340 * kernel cannot support up front before checking anything else.
341 */
342 if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) {
343 xfs_alert(mp,
344"Version 5 superblock detected. This kernel has EXPERIMENTAL support enabled!\n"
345"Use of these features in this kernel is at your own risk!");
346
347 if (xfs_sb_has_compat_feature(sbp,
348 XFS_SB_FEAT_COMPAT_UNKNOWN)) {
349 xfs_warn(mp,
350"Superblock has unknown compatible features (0x%x) enabled.\n"
351"Using a more recent kernel is recommended.",
352 (sbp->sb_features_compat &
353 XFS_SB_FEAT_COMPAT_UNKNOWN));
354 }
355
356 if (xfs_sb_has_ro_compat_feature(sbp,
357 XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) {
358 xfs_alert(mp,
359"Superblock has unknown read-only compatible features (0x%x) enabled.",
360 (sbp->sb_features_ro_compat &
361 XFS_SB_FEAT_RO_COMPAT_UNKNOWN));
362 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
363 xfs_warn(mp,
364"Attempted to mount read-only compatible filesystem read-write.\n"
365"Filesystem can only be safely mounted read only.");
366 return XFS_ERROR(EINVAL);
367 }
368 }
369 if (xfs_sb_has_incompat_feature(sbp,
370 XFS_SB_FEAT_INCOMPAT_UNKNOWN)) {
371 xfs_warn(mp,
372"Superblock has unknown incompatible features (0x%x) enabled.\n"
373"Filesystem can not be safely mounted by this kernel.",
374 (sbp->sb_features_incompat &
375 XFS_SB_FEAT_INCOMPAT_UNKNOWN));
376 return XFS_ERROR(EINVAL);
377 }
378 }
379
327 if (unlikely( 380 if (unlikely(
328 sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) { 381 sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) {
329 xfs_warn(mp, 382 xfs_warn(mp,
@@ -557,6 +610,14 @@ xfs_sb_from_disk(
557 to->sb_logsunit = be32_to_cpu(from->sb_logsunit); 610 to->sb_logsunit = be32_to_cpu(from->sb_logsunit);
558 to->sb_features2 = be32_to_cpu(from->sb_features2); 611 to->sb_features2 = be32_to_cpu(from->sb_features2);
559 to->sb_bad_features2 = be32_to_cpu(from->sb_bad_features2); 612 to->sb_bad_features2 = be32_to_cpu(from->sb_bad_features2);
613 to->sb_features_compat = be32_to_cpu(from->sb_features_compat);
614 to->sb_features_ro_compat = be32_to_cpu(from->sb_features_ro_compat);
615 to->sb_features_incompat = be32_to_cpu(from->sb_features_incompat);
616 to->sb_features_log_incompat =
617 be32_to_cpu(from->sb_features_log_incompat);
618 to->sb_pad = 0;
619 to->sb_pquotino = be64_to_cpu(from->sb_pquotino);
620 to->sb_lsn = be64_to_cpu(from->sb_lsn);
560} 621}
561 622
562/* 623/*
@@ -612,13 +673,12 @@ xfs_sb_to_disk(
612 } 673 }
613} 674}
614 675
615static void 676static int
616xfs_sb_verify( 677xfs_sb_verify(
617 struct xfs_buf *bp) 678 struct xfs_buf *bp)
618{ 679{
619 struct xfs_mount *mp = bp->b_target->bt_mount; 680 struct xfs_mount *mp = bp->b_target->bt_mount;
620 struct xfs_sb sb; 681 struct xfs_sb sb;
621 int error;
622 682
623 xfs_sb_from_disk(&sb, XFS_BUF_TO_SBP(bp)); 683 xfs_sb_from_disk(&sb, XFS_BUF_TO_SBP(bp));
624 684
@@ -626,16 +686,46 @@ xfs_sb_verify(
626 * Only check the in progress field for the primary superblock as 686 * Only check the in progress field for the primary superblock as
627 * mkfs.xfs doesn't clear it from secondary superblocks. 687 * mkfs.xfs doesn't clear it from secondary superblocks.
628 */ 688 */
629 error = xfs_mount_validate_sb(mp, &sb, bp->b_bn == XFS_SB_DADDR); 689 return xfs_mount_validate_sb(mp, &sb, bp->b_bn == XFS_SB_DADDR);
630 if (error)
631 xfs_buf_ioerror(bp, error);
632} 690}
633 691
692/*
693 * If the superblock has the CRC feature bit set or the CRC field is non-null,
694 * check that the CRC is valid. We check the CRC field is non-null because a
695 * single bit error could clear the feature bit and unused parts of the
696 * superblock are supposed to be zero. Hence a non-null crc field indicates that
697 * we've potentially lost a feature bit and we should check it anyway.
698 */
634static void 699static void
635xfs_sb_read_verify( 700xfs_sb_read_verify(
636 struct xfs_buf *bp) 701 struct xfs_buf *bp)
637{ 702{
638 xfs_sb_verify(bp); 703 struct xfs_mount *mp = bp->b_target->bt_mount;
704 struct xfs_dsb *dsb = XFS_BUF_TO_SBP(bp);
705 int error;
706
707 /*
708 * open code the version check to avoid needing to convert the entire
709 * superblock from disk order just to check the version number
710 */
711 if (dsb->sb_magicnum == cpu_to_be32(XFS_SB_MAGIC) &&
712 (((be16_to_cpu(dsb->sb_versionnum) & XFS_SB_VERSION_NUMBITS) ==
713 XFS_SB_VERSION_5) ||
714 dsb->sb_crc != 0)) {
715
716 if (!xfs_verify_cksum(bp->b_addr, be16_to_cpu(dsb->sb_sectsize),
717 offsetof(struct xfs_sb, sb_crc))) {
718 error = EFSCORRUPTED;
719 goto out_error;
720 }
721 }
722 error = xfs_sb_verify(bp);
723
724out_error:
725 if (error) {
726 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
727 xfs_buf_ioerror(bp, error);
728 }
639} 729}
640 730
641/* 731/*
@@ -648,11 +738,10 @@ static void
648xfs_sb_quiet_read_verify( 738xfs_sb_quiet_read_verify(
649 struct xfs_buf *bp) 739 struct xfs_buf *bp)
650{ 740{
651 struct xfs_sb sb; 741 struct xfs_dsb *dsb = XFS_BUF_TO_SBP(bp);
652 742
653 xfs_sb_from_disk(&sb, XFS_BUF_TO_SBP(bp));
654 743
655 if (sb.sb_magicnum == XFS_SB_MAGIC) { 744 if (dsb->sb_magicnum == cpu_to_be32(XFS_SB_MAGIC)) {
656 /* XFS filesystem, verify noisily! */ 745 /* XFS filesystem, verify noisily! */
657 xfs_sb_read_verify(bp); 746 xfs_sb_read_verify(bp);
658 return; 747 return;
@@ -663,9 +752,27 @@ xfs_sb_quiet_read_verify(
663 752
664static void 753static void
665xfs_sb_write_verify( 754xfs_sb_write_verify(
666 struct xfs_buf *bp) 755 struct xfs_buf *bp)
667{ 756{
668 xfs_sb_verify(bp); 757 struct xfs_mount *mp = bp->b_target->bt_mount;
758 struct xfs_buf_log_item *bip = bp->b_fspriv;
759 int error;
760
761 error = xfs_sb_verify(bp);
762 if (error) {
763 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
764 xfs_buf_ioerror(bp, error);
765 return;
766 }
767
768 if (!xfs_sb_version_hascrc(&mp->m_sb))
769 return;
770
771 if (bip)
772 XFS_BUF_TO_SBP(bp)->sb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
773
774 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
775 offsetof(struct xfs_sb, sb_crc));
669} 776}
670 777
671const struct xfs_buf_ops xfs_sb_buf_ops = { 778const struct xfs_buf_ops xfs_sb_buf_ops = {
@@ -687,7 +794,8 @@ int
687xfs_readsb(xfs_mount_t *mp, int flags) 794xfs_readsb(xfs_mount_t *mp, int flags)
688{ 795{
689 unsigned int sector_size; 796 unsigned int sector_size;
690 xfs_buf_t *bp; 797 struct xfs_buf *bp;
798 struct xfs_sb *sbp = &mp->m_sb;
691 int error; 799 int error;
692 int loud = !(flags & XFS_MFSI_QUIET); 800 int loud = !(flags & XFS_MFSI_QUIET);
693 801
@@ -714,7 +822,7 @@ reread:
714 if (bp->b_error) { 822 if (bp->b_error) {
715 error = bp->b_error; 823 error = bp->b_error;
716 if (loud) 824 if (loud)
717 xfs_warn(mp, "SB validate failed"); 825 xfs_warn(mp, "SB validate failed with error %d.", error);
718 goto release_buf; 826 goto release_buf;
719 } 827 }
720 828
@@ -726,10 +834,10 @@ reread:
726 /* 834 /*
727 * We must be able to do sector-sized and sector-aligned IO. 835 * We must be able to do sector-sized and sector-aligned IO.
728 */ 836 */
729 if (sector_size > mp->m_sb.sb_sectsize) { 837 if (sector_size > sbp->sb_sectsize) {
730 if (loud) 838 if (loud)
731 xfs_warn(mp, "device supports %u byte sectors (not %u)", 839 xfs_warn(mp, "device supports %u byte sectors (not %u)",
732 sector_size, mp->m_sb.sb_sectsize); 840 sector_size, sbp->sb_sectsize);
733 error = ENOSYS; 841 error = ENOSYS;
734 goto release_buf; 842 goto release_buf;
735 } 843 }
@@ -738,15 +846,18 @@ reread:
738 * If device sector size is smaller than the superblock size, 846 * If device sector size is smaller than the superblock size,
739 * re-read the superblock so the buffer is correctly sized. 847 * re-read the superblock so the buffer is correctly sized.
740 */ 848 */
741 if (sector_size < mp->m_sb.sb_sectsize) { 849 if (sector_size < sbp->sb_sectsize) {
742 xfs_buf_relse(bp); 850 xfs_buf_relse(bp);
743 sector_size = mp->m_sb.sb_sectsize; 851 sector_size = sbp->sb_sectsize;
744 goto reread; 852 goto reread;
745 } 853 }
746 854
747 /* Initialize per-cpu counters */ 855 /* Initialize per-cpu counters */
748 xfs_icsb_reinit_counters(mp); 856 xfs_icsb_reinit_counters(mp);
749 857
858 /* no need to be quiet anymore, so reset the buf ops */
859 bp->b_ops = &xfs_sb_buf_ops;
860
750 mp->m_sb_bp = bp; 861 mp->m_sb_bp = bp;
751 xfs_buf_unlock(bp); 862 xfs_buf_unlock(bp);
752 return 0; 863 return 0;
@@ -1633,6 +1744,7 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields)
1633 ASSERT((1LL << f) & XFS_SB_MOD_BITS); 1744 ASSERT((1LL << f) & XFS_SB_MOD_BITS);
1634 first = xfs_sb_info[f].offset; 1745 first = xfs_sb_info[f].offset;
1635 1746
1747 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF);
1636 xfs_trans_log_buf(tp, bp, first, last); 1748 xfs_trans_log_buf(tp, bp, first, last);
1637} 1749}
1638 1750
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index bc907061d392..b004cecdfb04 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -207,7 +207,6 @@ typedef struct xfs_mount {
207 trimming */ 207 trimming */
208 __int64_t m_update_flags; /* sb flags we need to update 208 __int64_t m_update_flags; /* sb flags we need to update
209 on the next remount,rw */ 209 on the next remount,rw */
210 struct shrinker m_inode_shrink; /* inode reclaim shrinker */
211 int64_t m_low_space[XFS_LOWSP_MAX]; 210 int64_t m_low_space[XFS_LOWSP_MAX];
212 /* low free space thresholds */ 211 /* low free space thresholds */
213 212
@@ -392,6 +391,7 @@ extern void xfs_set_low_space_thresholds(struct xfs_mount *);
392 391
393#endif /* __KERNEL__ */ 392#endif /* __KERNEL__ */
394 393
394extern void xfs_sb_calc_crc(struct xfs_buf *);
395extern void xfs_mod_sb(struct xfs_trans *, __int64_t); 395extern void xfs_mod_sb(struct xfs_trans *, __int64_t);
396extern int xfs_initialize_perag(struct xfs_mount *, xfs_agnumber_t, 396extern int xfs_initialize_perag(struct xfs_mount *, xfs_agnumber_t,
397 xfs_agnumber_t *); 397 xfs_agnumber_t *);
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index e5b5cf973781..f41702b43003 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -617,6 +617,20 @@ xfs_qm_dqdetach(
617 } 617 }
618} 618}
619 619
620int
621xfs_qm_calc_dquots_per_chunk(
622 struct xfs_mount *mp,
623 unsigned int nbblks) /* basic block units */
624{
625 unsigned int ndquots;
626
627 ASSERT(nbblks > 0);
628 ndquots = BBTOB(nbblks);
629 do_div(ndquots, sizeof(xfs_dqblk_t));
630
631 return ndquots;
632}
633
620/* 634/*
621 * This initializes all the quota information that's kept in the 635 * This initializes all the quota information that's kept in the
622 * mount structure 636 * mount structure
@@ -656,9 +670,8 @@ xfs_qm_init_quotainfo(
656 670
657 /* Precalc some constants */ 671 /* Precalc some constants */
658 qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB); 672 qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
659 ASSERT(qinf->qi_dqchunklen); 673 qinf->qi_dqperchunk = xfs_qm_calc_dquots_per_chunk(mp,
660 qinf->qi_dqperchunk = BBTOB(qinf->qi_dqchunklen); 674 qinf->qi_dqchunklen);
661 do_div(qinf->qi_dqperchunk, sizeof(xfs_dqblk_t));
662 675
663 mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD); 676 mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD);
664 677
@@ -897,6 +910,10 @@ xfs_qm_dqiter_bufs(
897 if (error) 910 if (error)
898 break; 911 break;
899 912
913 /*
914 * XXX(hch): need to figure out if it makes sense to validate
915 * the CRC here.
916 */
900 xfs_qm_reset_dqcounts(mp, bp, firstid, type); 917 xfs_qm_reset_dqcounts(mp, bp, firstid, type);
901 xfs_buf_delwri_queue(bp, buffer_list); 918 xfs_buf_delwri_queue(bp, buffer_list);
902 xfs_buf_relse(bp); 919 xfs_buf_relse(bp);
@@ -1057,7 +1074,7 @@ xfs_qm_quotacheck_dqadjust(
1057 * There are no timers for the default values set in the root dquot. 1074 * There are no timers for the default values set in the root dquot.
1058 */ 1075 */
1059 if (dqp->q_core.d_id) { 1076 if (dqp->q_core.d_id) {
1060 xfs_qm_adjust_dqlimits(mp, &dqp->q_core); 1077 xfs_qm_adjust_dqlimits(mp, dqp);
1061 xfs_qm_adjust_dqtimers(mp, &dqp->q_core); 1078 xfs_qm_adjust_dqtimers(mp, &dqp->q_core);
1062 } 1079 }
1063 1080
diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h
index 44b858b79d71..5d16a6e6900f 100644
--- a/fs/xfs/xfs_qm.h
+++ b/fs/xfs/xfs_qm.h
@@ -75,6 +75,8 @@ typedef struct xfs_quotainfo {
75 &((qi)->qi_gquota_tree)) 75 &((qi)->qi_gquota_tree))
76 76
77 77
78extern int xfs_qm_calc_dquots_per_chunk(struct xfs_mount *mp,
79 unsigned int nbblks);
78extern void xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long); 80extern void xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long);
79extern int xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *, 81extern int xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *,
80 xfs_dquot_t *, xfs_dquot_t *, long, long, uint); 82 xfs_dquot_t *, xfs_dquot_t *, long, long, uint);
@@ -116,7 +118,7 @@ extern void xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint);
116extern int xfs_qm_scall_trunc_qfiles(xfs_mount_t *, uint); 118extern int xfs_qm_scall_trunc_qfiles(xfs_mount_t *, uint);
117extern int xfs_qm_scall_getquota(xfs_mount_t *, xfs_dqid_t, uint, 119extern int xfs_qm_scall_getquota(xfs_mount_t *, xfs_dqid_t, uint,
118 fs_disk_quota_t *); 120 fs_disk_quota_t *);
119extern int xfs_qm_scall_setqlim(xfs_mount_t *, xfs_dqid_t, uint, 121extern int xfs_qm_scall_setqlim(struct xfs_mount *, xfs_dqid_t, uint,
120 fs_disk_quota_t *); 122 fs_disk_quota_t *);
121extern int xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *); 123extern int xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *);
122extern int xfs_qm_scall_quotaon(xfs_mount_t *, uint); 124extern int xfs_qm_scall_quotaon(xfs_mount_t *, uint);
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index cf9a34051e07..c41190cad6e9 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -472,15 +472,15 @@ xfs_qm_scall_getqstat(
472 */ 472 */
473int 473int
474xfs_qm_scall_setqlim( 474xfs_qm_scall_setqlim(
475 xfs_mount_t *mp, 475 struct xfs_mount *mp,
476 xfs_dqid_t id, 476 xfs_dqid_t id,
477 uint type, 477 uint type,
478 fs_disk_quota_t *newlim) 478 fs_disk_quota_t *newlim)
479{ 479{
480 struct xfs_quotainfo *q = mp->m_quotainfo; 480 struct xfs_quotainfo *q = mp->m_quotainfo;
481 xfs_disk_dquot_t *ddq; 481 struct xfs_disk_dquot *ddq;
482 xfs_dquot_t *dqp; 482 struct xfs_dquot *dqp;
483 xfs_trans_t *tp; 483 struct xfs_trans *tp;
484 int error; 484 int error;
485 xfs_qcnt_t hard, soft; 485 xfs_qcnt_t hard, soft;
486 486
@@ -529,6 +529,7 @@ xfs_qm_scall_setqlim(
529 if (hard == 0 || hard >= soft) { 529 if (hard == 0 || hard >= soft) {
530 ddq->d_blk_hardlimit = cpu_to_be64(hard); 530 ddq->d_blk_hardlimit = cpu_to_be64(hard);
531 ddq->d_blk_softlimit = cpu_to_be64(soft); 531 ddq->d_blk_softlimit = cpu_to_be64(soft);
532 xfs_dquot_set_prealloc_limits(dqp);
532 if (id == 0) { 533 if (id == 0) {
533 q->qi_bhardlimit = hard; 534 q->qi_bhardlimit = hard;
534 q->qi_bsoftlimit = soft; 535 q->qi_bsoftlimit = soft;
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index b50ec5b95d5a..c61e31c7d997 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -77,7 +77,14 @@ typedef struct xfs_disk_dquot {
77 */ 77 */
78typedef struct xfs_dqblk { 78typedef struct xfs_dqblk {
79 xfs_disk_dquot_t dd_diskdq; /* portion that lives incore as well */ 79 xfs_disk_dquot_t dd_diskdq; /* portion that lives incore as well */
80 char dd_fill[32]; /* filling for posterity */ 80 char dd_fill[4]; /* filling for posterity */
81
82 /*
83 * These two are only present on filesystems with the CRC bits set.
84 */
85 __be32 dd_crc; /* checksum */
86 __be64 dd_lsn; /* last modification in log */
87 uuid_t dd_uuid; /* location information */
81} xfs_dqblk_t; 88} xfs_dqblk_t;
82 89
83/* 90/*
@@ -380,5 +387,7 @@ extern int xfs_qm_dqcheck(struct xfs_mount *, xfs_disk_dquot_t *,
380 xfs_dqid_t, uint, uint, char *); 387 xfs_dqid_t, uint, uint, char *);
381extern int xfs_mount_reset_sbqflags(struct xfs_mount *); 388extern int xfs_mount_reset_sbqflags(struct xfs_mount *);
382 389
390extern const struct xfs_buf_ops xfs_dquot_buf_ops;
391
383#endif /* __KERNEL__ */ 392#endif /* __KERNEL__ */
384#endif /* __XFS_QUOTA_H__ */ 393#endif /* __XFS_QUOTA_H__ */
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h
index a05b45175fb0..2de58a85833c 100644
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h
@@ -32,6 +32,7 @@ struct xfs_mount;
32#define XFS_SB_VERSION_2 2 /* 6.2 - attributes */ 32#define XFS_SB_VERSION_2 2 /* 6.2 - attributes */
33#define XFS_SB_VERSION_3 3 /* 6.2 - new inode version */ 33#define XFS_SB_VERSION_3 3 /* 6.2 - new inode version */
34#define XFS_SB_VERSION_4 4 /* 6.2+ - bitmask version */ 34#define XFS_SB_VERSION_4 4 /* 6.2+ - bitmask version */
35#define XFS_SB_VERSION_5 5 /* CRC enabled filesystem */
35#define XFS_SB_VERSION_NUMBITS 0x000f 36#define XFS_SB_VERSION_NUMBITS 0x000f
36#define XFS_SB_VERSION_ALLFBITS 0xfff0 37#define XFS_SB_VERSION_ALLFBITS 0xfff0
37#define XFS_SB_VERSION_SASHFBITS 0xf000 38#define XFS_SB_VERSION_SASHFBITS 0xf000
@@ -161,6 +162,20 @@ typedef struct xfs_sb {
161 */ 162 */
162 __uint32_t sb_bad_features2; 163 __uint32_t sb_bad_features2;
163 164
165 /* version 5 superblock fields start here */
166
167 /* feature masks */
168 __uint32_t sb_features_compat;
169 __uint32_t sb_features_ro_compat;
170 __uint32_t sb_features_incompat;
171 __uint32_t sb_features_log_incompat;
172
173 __uint32_t sb_crc; /* superblock crc */
174 __uint32_t sb_pad;
175
176 xfs_ino_t sb_pquotino; /* project quota inode */
177 xfs_lsn_t sb_lsn; /* last write sequence */
178
164 /* must be padded to 64 bit alignment */ 179 /* must be padded to 64 bit alignment */
165} xfs_sb_t; 180} xfs_sb_t;
166 181
@@ -229,7 +244,21 @@ typedef struct xfs_dsb {
229 * for features2 bits. Easiest just to mark it bad and not use 244 * for features2 bits. Easiest just to mark it bad and not use
230 * it for anything else. 245 * it for anything else.
231 */ 246 */
232 __be32 sb_bad_features2; 247 __be32 sb_bad_features2;
248
249 /* version 5 superblock fields start here */
250
251 /* feature masks */
252 __be32 sb_features_compat;
253 __be32 sb_features_ro_compat;
254 __be32 sb_features_incompat;
255 __be32 sb_features_log_incompat;
256
257 __le32 sb_crc; /* superblock crc */
258 __be32 sb_pad;
259
260 __be64 sb_pquotino; /* project quota inode */
261 __be64 sb_lsn; /* last write sequence */
233 262
234 /* must be padded to 64 bit alignment */ 263 /* must be padded to 64 bit alignment */
235} xfs_dsb_t; 264} xfs_dsb_t;
@@ -250,7 +279,10 @@ typedef enum {
250 XFS_SBS_GQUOTINO, XFS_SBS_QFLAGS, XFS_SBS_FLAGS, XFS_SBS_SHARED_VN, 279 XFS_SBS_GQUOTINO, XFS_SBS_QFLAGS, XFS_SBS_FLAGS, XFS_SBS_SHARED_VN,
251 XFS_SBS_INOALIGNMT, XFS_SBS_UNIT, XFS_SBS_WIDTH, XFS_SBS_DIRBLKLOG, 280 XFS_SBS_INOALIGNMT, XFS_SBS_UNIT, XFS_SBS_WIDTH, XFS_SBS_DIRBLKLOG,
252 XFS_SBS_LOGSECTLOG, XFS_SBS_LOGSECTSIZE, XFS_SBS_LOGSUNIT, 281 XFS_SBS_LOGSECTLOG, XFS_SBS_LOGSECTSIZE, XFS_SBS_LOGSUNIT,
253 XFS_SBS_FEATURES2, XFS_SBS_BAD_FEATURES2, 282 XFS_SBS_FEATURES2, XFS_SBS_BAD_FEATURES2, XFS_SBS_FEATURES_COMPAT,
283 XFS_SBS_FEATURES_RO_COMPAT, XFS_SBS_FEATURES_INCOMPAT,
284 XFS_SBS_FEATURES_LOG_INCOMPAT, XFS_SBS_CRC, XFS_SBS_PAD,
285 XFS_SBS_PQUOTINO, XFS_SBS_LSN,
254 XFS_SBS_FIELDCOUNT 286 XFS_SBS_FIELDCOUNT
255} xfs_sb_field_t; 287} xfs_sb_field_t;
256 288
@@ -276,6 +308,12 @@ typedef enum {
276#define XFS_SB_FDBLOCKS XFS_SB_MVAL(FDBLOCKS) 308#define XFS_SB_FDBLOCKS XFS_SB_MVAL(FDBLOCKS)
277#define XFS_SB_FEATURES2 XFS_SB_MVAL(FEATURES2) 309#define XFS_SB_FEATURES2 XFS_SB_MVAL(FEATURES2)
278#define XFS_SB_BAD_FEATURES2 XFS_SB_MVAL(BAD_FEATURES2) 310#define XFS_SB_BAD_FEATURES2 XFS_SB_MVAL(BAD_FEATURES2)
311#define XFS_SB_FEATURES_COMPAT XFS_SB_MVAL(FEATURES_COMPAT)
312#define XFS_SB_FEATURES_RO_COMPAT XFS_SB_MVAL(FEATURES_RO_COMPAT)
313#define XFS_SB_FEATURES_INCOMPAT XFS_SB_MVAL(FEATURES_INCOMPAT)
314#define XFS_SB_FEATURES_LOG_INCOMPAT XFS_SB_MVAL(FEATURES_LOG_INCOMPAT)
315#define XFS_SB_CRC XFS_SB_MVAL(CRC)
316#define XFS_SB_PQUOTINO XFS_SB_MVAL(PQUOTINO)
279#define XFS_SB_NUM_BITS ((int)XFS_SBS_FIELDCOUNT) 317#define XFS_SB_NUM_BITS ((int)XFS_SBS_FIELDCOUNT)
280#define XFS_SB_ALL_BITS ((1LL << XFS_SB_NUM_BITS) - 1) 318#define XFS_SB_ALL_BITS ((1LL << XFS_SB_NUM_BITS) - 1)
281#define XFS_SB_MOD_BITS \ 319#define XFS_SB_MOD_BITS \
@@ -283,7 +321,9 @@ typedef enum {
283 XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | XFS_SB_GQUOTINO | \ 321 XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | XFS_SB_GQUOTINO | \
284 XFS_SB_QFLAGS | XFS_SB_SHARED_VN | XFS_SB_UNIT | XFS_SB_WIDTH | \ 322 XFS_SB_QFLAGS | XFS_SB_SHARED_VN | XFS_SB_UNIT | XFS_SB_WIDTH | \
285 XFS_SB_ICOUNT | XFS_SB_IFREE | XFS_SB_FDBLOCKS | XFS_SB_FEATURES2 | \ 323 XFS_SB_ICOUNT | XFS_SB_IFREE | XFS_SB_FDBLOCKS | XFS_SB_FEATURES2 | \
286 XFS_SB_BAD_FEATURES2) 324 XFS_SB_BAD_FEATURES2 | XFS_SB_FEATURES_COMPAT | \
325 XFS_SB_FEATURES_RO_COMPAT | XFS_SB_FEATURES_INCOMPAT | \
326 XFS_SB_FEATURES_LOG_INCOMPAT | XFS_SB_PQUOTINO)
287 327
288 328
289/* 329/*
@@ -325,6 +365,8 @@ static inline int xfs_sb_good_version(xfs_sb_t *sbp)
325 365
326 return 1; 366 return 1;
327 } 367 }
368 if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5)
369 return 1;
328 370
329 return 0; 371 return 0;
330} 372}
@@ -365,7 +407,7 @@ static inline int xfs_sb_version_hasattr(xfs_sb_t *sbp)
365{ 407{
366 return sbp->sb_versionnum == XFS_SB_VERSION_2 || 408 return sbp->sb_versionnum == XFS_SB_VERSION_2 ||
367 sbp->sb_versionnum == XFS_SB_VERSION_3 || 409 sbp->sb_versionnum == XFS_SB_VERSION_3 ||
368 (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && 410 (XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 &&
369 (sbp->sb_versionnum & XFS_SB_VERSION_ATTRBIT)); 411 (sbp->sb_versionnum & XFS_SB_VERSION_ATTRBIT));
370} 412}
371 413
@@ -373,7 +415,7 @@ static inline void xfs_sb_version_addattr(xfs_sb_t *sbp)
373{ 415{
374 if (sbp->sb_versionnum == XFS_SB_VERSION_1) 416 if (sbp->sb_versionnum == XFS_SB_VERSION_1)
375 sbp->sb_versionnum = XFS_SB_VERSION_2; 417 sbp->sb_versionnum = XFS_SB_VERSION_2;
376 else if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) 418 else if (XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4)
377 sbp->sb_versionnum |= XFS_SB_VERSION_ATTRBIT; 419 sbp->sb_versionnum |= XFS_SB_VERSION_ATTRBIT;
378 else 420 else
379 sbp->sb_versionnum = XFS_SB_VERSION_4 | XFS_SB_VERSION_ATTRBIT; 421 sbp->sb_versionnum = XFS_SB_VERSION_4 | XFS_SB_VERSION_ATTRBIT;
@@ -382,7 +424,7 @@ static inline void xfs_sb_version_addattr(xfs_sb_t *sbp)
382static inline int xfs_sb_version_hasnlink(xfs_sb_t *sbp) 424static inline int xfs_sb_version_hasnlink(xfs_sb_t *sbp)
383{ 425{
384 return sbp->sb_versionnum == XFS_SB_VERSION_3 || 426 return sbp->sb_versionnum == XFS_SB_VERSION_3 ||
385 (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && 427 (XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 &&
386 (sbp->sb_versionnum & XFS_SB_VERSION_NLINKBIT)); 428 (sbp->sb_versionnum & XFS_SB_VERSION_NLINKBIT));
387} 429}
388 430
@@ -396,13 +438,13 @@ static inline void xfs_sb_version_addnlink(xfs_sb_t *sbp)
396 438
397static inline int xfs_sb_version_hasquota(xfs_sb_t *sbp) 439static inline int xfs_sb_version_hasquota(xfs_sb_t *sbp)
398{ 440{
399 return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && 441 return XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 &&
400 (sbp->sb_versionnum & XFS_SB_VERSION_QUOTABIT); 442 (sbp->sb_versionnum & XFS_SB_VERSION_QUOTABIT);
401} 443}
402 444
403static inline void xfs_sb_version_addquota(xfs_sb_t *sbp) 445static inline void xfs_sb_version_addquota(xfs_sb_t *sbp)
404{ 446{
405 if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) 447 if (XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4)
406 sbp->sb_versionnum |= XFS_SB_VERSION_QUOTABIT; 448 sbp->sb_versionnum |= XFS_SB_VERSION_QUOTABIT;
407 else 449 else
408 sbp->sb_versionnum = xfs_sb_version_tonew(sbp->sb_versionnum) | 450 sbp->sb_versionnum = xfs_sb_version_tonew(sbp->sb_versionnum) |
@@ -411,13 +453,14 @@ static inline void xfs_sb_version_addquota(xfs_sb_t *sbp)
411 453
412static inline int xfs_sb_version_hasalign(xfs_sb_t *sbp) 454static inline int xfs_sb_version_hasalign(xfs_sb_t *sbp)
413{ 455{
414 return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && 456 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) ||
415 (sbp->sb_versionnum & XFS_SB_VERSION_ALIGNBIT); 457 (XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 &&
458 (sbp->sb_versionnum & XFS_SB_VERSION_ALIGNBIT));
416} 459}
417 460
418static inline int xfs_sb_version_hasdalign(xfs_sb_t *sbp) 461static inline int xfs_sb_version_hasdalign(xfs_sb_t *sbp)
419{ 462{
420 return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && 463 return XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 &&
421 (sbp->sb_versionnum & XFS_SB_VERSION_DALIGNBIT); 464 (sbp->sb_versionnum & XFS_SB_VERSION_DALIGNBIT);
422} 465}
423 466
@@ -429,38 +472,42 @@ static inline int xfs_sb_version_hasshared(xfs_sb_t *sbp)
429 472
430static inline int xfs_sb_version_hasdirv2(xfs_sb_t *sbp) 473static inline int xfs_sb_version_hasdirv2(xfs_sb_t *sbp)
431{ 474{
432 return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && 475 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) ||
433 (sbp->sb_versionnum & XFS_SB_VERSION_DIRV2BIT); 476 (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
477 (sbp->sb_versionnum & XFS_SB_VERSION_DIRV2BIT));
434} 478}
435 479
436static inline int xfs_sb_version_haslogv2(xfs_sb_t *sbp) 480static inline int xfs_sb_version_haslogv2(xfs_sb_t *sbp)
437{ 481{
438 return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && 482 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) ||
439 (sbp->sb_versionnum & XFS_SB_VERSION_LOGV2BIT); 483 (XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 &&
484 (sbp->sb_versionnum & XFS_SB_VERSION_LOGV2BIT));
440} 485}
441 486
442static inline int xfs_sb_version_hasextflgbit(xfs_sb_t *sbp) 487static inline int xfs_sb_version_hasextflgbit(xfs_sb_t *sbp)
443{ 488{
444 return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && 489 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) ||
445 (sbp->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT); 490 (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
491 (sbp->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT));
446} 492}
447 493
448static inline int xfs_sb_version_hassector(xfs_sb_t *sbp) 494static inline int xfs_sb_version_hassector(xfs_sb_t *sbp)
449{ 495{
450 return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && 496 return XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 &&
451 (sbp->sb_versionnum & XFS_SB_VERSION_SECTORBIT); 497 (sbp->sb_versionnum & XFS_SB_VERSION_SECTORBIT);
452} 498}
453 499
454static inline int xfs_sb_version_hasasciici(xfs_sb_t *sbp) 500static inline int xfs_sb_version_hasasciici(xfs_sb_t *sbp)
455{ 501{
456 return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && 502 return XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 &&
457 (sbp->sb_versionnum & XFS_SB_VERSION_BORGBIT); 503 (sbp->sb_versionnum & XFS_SB_VERSION_BORGBIT);
458} 504}
459 505
460static inline int xfs_sb_version_hasmorebits(xfs_sb_t *sbp) 506static inline int xfs_sb_version_hasmorebits(xfs_sb_t *sbp)
461{ 507{
462 return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && 508 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) ||
463 (sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT); 509 (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
510 (sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT));
464} 511}
465 512
466/* 513/*
@@ -475,14 +522,16 @@ static inline int xfs_sb_version_hasmorebits(xfs_sb_t *sbp)
475 522
476static inline int xfs_sb_version_haslazysbcount(xfs_sb_t *sbp) 523static inline int xfs_sb_version_haslazysbcount(xfs_sb_t *sbp)
477{ 524{
478 return xfs_sb_version_hasmorebits(sbp) && 525 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) ||
479 (sbp->sb_features2 & XFS_SB_VERSION2_LAZYSBCOUNTBIT); 526 (xfs_sb_version_hasmorebits(sbp) &&
527 (sbp->sb_features2 & XFS_SB_VERSION2_LAZYSBCOUNTBIT));
480} 528}
481 529
482static inline int xfs_sb_version_hasattr2(xfs_sb_t *sbp) 530static inline int xfs_sb_version_hasattr2(xfs_sb_t *sbp)
483{ 531{
484 return xfs_sb_version_hasmorebits(sbp) && 532 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) ||
485 (sbp->sb_features2 & XFS_SB_VERSION2_ATTR2BIT); 533 (xfs_sb_version_hasmorebits(sbp) &&
534 (sbp->sb_features2 & XFS_SB_VERSION2_ATTR2BIT));
486} 535}
487 536
488static inline void xfs_sb_version_addattr2(xfs_sb_t *sbp) 537static inline void xfs_sb_version_addattr2(xfs_sb_t *sbp)
@@ -500,14 +549,73 @@ static inline void xfs_sb_version_removeattr2(xfs_sb_t *sbp)
500 549
501static inline int xfs_sb_version_hasprojid32bit(xfs_sb_t *sbp) 550static inline int xfs_sb_version_hasprojid32bit(xfs_sb_t *sbp)
502{ 551{
503 return xfs_sb_version_hasmorebits(sbp) && 552 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) ||
504 (sbp->sb_features2 & XFS_SB_VERSION2_PROJID32BIT); 553 (xfs_sb_version_hasmorebits(sbp) &&
554 (sbp->sb_features2 & XFS_SB_VERSION2_PROJID32BIT));
505} 555}
506 556
507static inline int xfs_sb_version_hascrc(xfs_sb_t *sbp) 557static inline int xfs_sb_version_hascrc(xfs_sb_t *sbp)
508{ 558{
509 return (xfs_sb_version_hasmorebits(sbp) && 559 return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5;
510 (sbp->sb_features2 & XFS_SB_VERSION2_CRCBIT)); 560}
561
562
563/*
564 * Extended v5 superblock feature masks. These are to be used for new v5
565 * superblock features only.
566 *
567 * Compat features are new features that old kernels will not notice or affect
568 * and so can mount read-write without issues.
569 *
570 * RO-Compat (read only) are features that old kernels can read but will break
571 * if they write. Hence only read-only mounts of such filesystems are allowed on
572 * kernels that don't support the feature bit.
573 *
574 * InCompat features are features which old kernels will not understand and so
575 * must not mount.
576 *
577 * Log-InCompat features are for changes to log formats or new transactions that
578 * can't be replayed on older kernels. The fields are set when the filesystem is
579 * mounted, and a clean unmount clears the fields.
580 */
581#define XFS_SB_FEAT_COMPAT_ALL 0
582#define XFS_SB_FEAT_COMPAT_UNKNOWN ~XFS_SB_FEAT_COMPAT_ALL
583static inline bool
584xfs_sb_has_compat_feature(
585 struct xfs_sb *sbp,
586 __uint32_t feature)
587{
588 return (sbp->sb_features_compat & feature) != 0;
589}
590
591#define XFS_SB_FEAT_RO_COMPAT_ALL 0
592#define XFS_SB_FEAT_RO_COMPAT_UNKNOWN ~XFS_SB_FEAT_RO_COMPAT_ALL
593static inline bool
594xfs_sb_has_ro_compat_feature(
595 struct xfs_sb *sbp,
596 __uint32_t feature)
597{
598 return (sbp->sb_features_ro_compat & feature) != 0;
599}
600
601#define XFS_SB_FEAT_INCOMPAT_ALL 0
602#define XFS_SB_FEAT_INCOMPAT_UNKNOWN ~XFS_SB_FEAT_INCOMPAT_ALL
603static inline bool
604xfs_sb_has_incompat_feature(
605 struct xfs_sb *sbp,
606 __uint32_t feature)
607{
608 return (sbp->sb_features_incompat & feature) != 0;
609}
610
611#define XFS_SB_FEAT_INCOMPAT_LOG_ALL 0
612#define XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN ~XFS_SB_FEAT_INCOMPAT_LOG_ALL
613static inline bool
614xfs_sb_has_incompat_log_feature(
615 struct xfs_sb *sbp,
616 __uint32_t feature)
617{
618 return (sbp->sb_features_log_incompat & feature) != 0;
511} 619}
512 620
513/* 621/*
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
new file mode 100644
index 000000000000..5f234389327c
--- /dev/null
+++ b/fs/xfs/xfs_symlink.c
@@ -0,0 +1,730 @@
1/*
2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3 * Copyright (c) 2012-2013 Red Hat, Inc.
4 * All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it would be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19#include "xfs.h"
20#include "xfs_fs.h"
21#include "xfs_types.h"
22#include "xfs_bit.h"
23#include "xfs_log.h"
24#include "xfs_trans.h"
25#include "xfs_sb.h"
26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_mount.h"
29#include "xfs_da_btree.h"
30#include "xfs_bmap_btree.h"
31#include "xfs_ialloc_btree.h"
32#include "xfs_dinode.h"
33#include "xfs_inode.h"
34#include "xfs_inode_item.h"
35#include "xfs_itable.h"
36#include "xfs_ialloc.h"
37#include "xfs_alloc.h"
38#include "xfs_bmap.h"
39#include "xfs_error.h"
40#include "xfs_quota.h"
41#include "xfs_utils.h"
42#include "xfs_trans_space.h"
43#include "xfs_log_priv.h"
44#include "xfs_trace.h"
45#include "xfs_symlink.h"
46#include "xfs_cksum.h"
47#include "xfs_buf_item.h"
48
49
50/*
51 * Each contiguous block has a header, so it is not just a simple pathlen
52 * to FSB conversion.
53 */
54int
55xfs_symlink_blocks(
56 struct xfs_mount *mp,
57 int pathlen)
58{
59 int fsblocks = 0;
60 int len = pathlen;
61
62 do {
63 fsblocks++;
64 len -= XFS_SYMLINK_BUF_SPACE(mp, mp->m_sb.sb_blocksize);
65 } while (len > 0);
66
67 ASSERT(fsblocks <= XFS_SYMLINK_MAPS);
68 return fsblocks;
69}
70
71static int
72xfs_symlink_hdr_set(
73 struct xfs_mount *mp,
74 xfs_ino_t ino,
75 uint32_t offset,
76 uint32_t size,
77 struct xfs_buf *bp)
78{
79 struct xfs_dsymlink_hdr *dsl = bp->b_addr;
80
81 if (!xfs_sb_version_hascrc(&mp->m_sb))
82 return 0;
83
84 dsl->sl_magic = cpu_to_be32(XFS_SYMLINK_MAGIC);
85 dsl->sl_offset = cpu_to_be32(offset);
86 dsl->sl_bytes = cpu_to_be32(size);
87 uuid_copy(&dsl->sl_uuid, &mp->m_sb.sb_uuid);
88 dsl->sl_owner = cpu_to_be64(ino);
89 dsl->sl_blkno = cpu_to_be64(bp->b_bn);
90 bp->b_ops = &xfs_symlink_buf_ops;
91
92 return sizeof(struct xfs_dsymlink_hdr);
93}
94
95/*
96 * Checking of the symlink header is split into two parts. the verifier does
97 * CRC, location and bounds checking, the unpacking function checks the path
98 * parameters and owner.
99 */
100bool
101xfs_symlink_hdr_ok(
102 struct xfs_mount *mp,
103 xfs_ino_t ino,
104 uint32_t offset,
105 uint32_t size,
106 struct xfs_buf *bp)
107{
108 struct xfs_dsymlink_hdr *dsl = bp->b_addr;
109
110 if (offset != be32_to_cpu(dsl->sl_offset))
111 return false;
112 if (size != be32_to_cpu(dsl->sl_bytes))
113 return false;
114 if (ino != be64_to_cpu(dsl->sl_owner))
115 return false;
116
117 /* ok */
118 return true;
119}
120
121static bool
122xfs_symlink_verify(
123 struct xfs_buf *bp)
124{
125 struct xfs_mount *mp = bp->b_target->bt_mount;
126 struct xfs_dsymlink_hdr *dsl = bp->b_addr;
127
128 if (!xfs_sb_version_hascrc(&mp->m_sb))
129 return false;
130 if (dsl->sl_magic != cpu_to_be32(XFS_SYMLINK_MAGIC))
131 return false;
132 if (!uuid_equal(&dsl->sl_uuid, &mp->m_sb.sb_uuid))
133 return false;
134 if (bp->b_bn != be64_to_cpu(dsl->sl_blkno))
135 return false;
136 if (be32_to_cpu(dsl->sl_offset) +
137 be32_to_cpu(dsl->sl_bytes) >= MAXPATHLEN)
138 return false;
139 if (dsl->sl_owner == 0)
140 return false;
141
142 return true;
143}
144
145static void
146xfs_symlink_read_verify(
147 struct xfs_buf *bp)
148{
149 struct xfs_mount *mp = bp->b_target->bt_mount;
150
151 /* no verification of non-crc buffers */
152 if (!xfs_sb_version_hascrc(&mp->m_sb))
153 return;
154
155 if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
156 offsetof(struct xfs_dsymlink_hdr, sl_crc)) ||
157 !xfs_symlink_verify(bp)) {
158 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
159 xfs_buf_ioerror(bp, EFSCORRUPTED);
160 }
161}
162
163static void
164xfs_symlink_write_verify(
165 struct xfs_buf *bp)
166{
167 struct xfs_mount *mp = bp->b_target->bt_mount;
168 struct xfs_buf_log_item *bip = bp->b_fspriv;
169
170 /* no verification of non-crc buffers */
171 if (!xfs_sb_version_hascrc(&mp->m_sb))
172 return;
173
174 if (!xfs_symlink_verify(bp)) {
175 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
176 xfs_buf_ioerror(bp, EFSCORRUPTED);
177 return;
178 }
179
180 if (bip) {
181 struct xfs_dsymlink_hdr *dsl = bp->b_addr;
182 dsl->sl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
183 }
184 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
185 offsetof(struct xfs_dsymlink_hdr, sl_crc));
186}
187
188const struct xfs_buf_ops xfs_symlink_buf_ops = {
189 .verify_read = xfs_symlink_read_verify,
190 .verify_write = xfs_symlink_write_verify,
191};
192
193void
194xfs_symlink_local_to_remote(
195 struct xfs_trans *tp,
196 struct xfs_buf *bp,
197 struct xfs_inode *ip,
198 struct xfs_ifork *ifp)
199{
200 struct xfs_mount *mp = ip->i_mount;
201 char *buf;
202
203 if (!xfs_sb_version_hascrc(&mp->m_sb)) {
204 bp->b_ops = NULL;
205 memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes);
206 return;
207 }
208
209 /*
210 * As this symlink fits in an inode literal area, it must also fit in
211 * the smallest buffer the filesystem supports.
212 */
213 ASSERT(BBTOB(bp->b_length) >=
214 ifp->if_bytes + sizeof(struct xfs_dsymlink_hdr));
215
216 bp->b_ops = &xfs_symlink_buf_ops;
217
218 buf = bp->b_addr;
219 buf += xfs_symlink_hdr_set(mp, ip->i_ino, 0, ifp->if_bytes, bp);
220 memcpy(buf, ifp->if_u1.if_data, ifp->if_bytes);
221}
222
223/* ----- Kernel only functions below ----- */
224STATIC int
225xfs_readlink_bmap(
226 struct xfs_inode *ip,
227 char *link)
228{
229 struct xfs_mount *mp = ip->i_mount;
230 struct xfs_bmbt_irec mval[XFS_SYMLINK_MAPS];
231 struct xfs_buf *bp;
232 xfs_daddr_t d;
233 char *cur_chunk;
234 int pathlen = ip->i_d.di_size;
235 int nmaps = XFS_SYMLINK_MAPS;
236 int byte_cnt;
237 int n;
238 int error = 0;
239 int fsblocks = 0;
240 int offset;
241
242 fsblocks = xfs_symlink_blocks(mp, pathlen);
243 error = xfs_bmapi_read(ip, 0, fsblocks, mval, &nmaps, 0);
244 if (error)
245 goto out;
246
247 offset = 0;
248 for (n = 0; n < nmaps; n++) {
249 d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
250 byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
251
252 bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), 0,
253 &xfs_symlink_buf_ops);
254 if (!bp)
255 return XFS_ERROR(ENOMEM);
256 error = bp->b_error;
257 if (error) {
258 xfs_buf_ioerror_alert(bp, __func__);
259 xfs_buf_relse(bp);
260 goto out;
261 }
262 byte_cnt = XFS_SYMLINK_BUF_SPACE(mp, byte_cnt);
263 if (pathlen < byte_cnt)
264 byte_cnt = pathlen;
265
266 cur_chunk = bp->b_addr;
267 if (xfs_sb_version_hascrc(&mp->m_sb)) {
268 if (!xfs_symlink_hdr_ok(mp, ip->i_ino, offset,
269 byte_cnt, bp)) {
270 error = EFSCORRUPTED;
271 xfs_alert(mp,
272"symlink header does not match required off/len/owner (0x%x/Ox%x,0x%llx)",
273 offset, byte_cnt, ip->i_ino);
274 xfs_buf_relse(bp);
275 goto out;
276
277 }
278
279 cur_chunk += sizeof(struct xfs_dsymlink_hdr);
280 }
281
282 memcpy(link + offset, bp->b_addr, byte_cnt);
283
284 pathlen -= byte_cnt;
285 offset += byte_cnt;
286
287 xfs_buf_relse(bp);
288 }
289 ASSERT(pathlen == 0);
290
291 link[ip->i_d.di_size] = '\0';
292 error = 0;
293
294 out:
295 return error;
296}
297
298int
299xfs_readlink(
300 struct xfs_inode *ip,
301 char *link)
302{
303 struct xfs_mount *mp = ip->i_mount;
304 xfs_fsize_t pathlen;
305 int error = 0;
306
307 trace_xfs_readlink(ip);
308
309 if (XFS_FORCED_SHUTDOWN(mp))
310 return XFS_ERROR(EIO);
311
312 xfs_ilock(ip, XFS_ILOCK_SHARED);
313
314 pathlen = ip->i_d.di_size;
315 if (!pathlen)
316 goto out;
317
318 if (pathlen < 0 || pathlen > MAXPATHLEN) {
319 xfs_alert(mp, "%s: inode (%llu) bad symlink length (%lld)",
320 __func__, (unsigned long long) ip->i_ino,
321 (long long) pathlen);
322 ASSERT(0);
323 error = XFS_ERROR(EFSCORRUPTED);
324 goto out;
325 }
326
327
328 if (ip->i_df.if_flags & XFS_IFINLINE) {
329 memcpy(link, ip->i_df.if_u1.if_data, pathlen);
330 link[pathlen] = '\0';
331 } else {
332 error = xfs_readlink_bmap(ip, link);
333 }
334
335 out:
336 xfs_iunlock(ip, XFS_ILOCK_SHARED);
337 return error;
338}
339
340int
341xfs_symlink(
342 struct xfs_inode *dp,
343 struct xfs_name *link_name,
344 const char *target_path,
345 umode_t mode,
346 struct xfs_inode **ipp)
347{
348 struct xfs_mount *mp = dp->i_mount;
349 struct xfs_trans *tp = NULL;
350 struct xfs_inode *ip = NULL;
351 int error = 0;
352 int pathlen;
353 struct xfs_bmap_free free_list;
354 xfs_fsblock_t first_block;
355 bool unlock_dp_on_error = false;
356 uint cancel_flags;
357 int committed;
358 xfs_fileoff_t first_fsb;
359 xfs_filblks_t fs_blocks;
360 int nmaps;
361 struct xfs_bmbt_irec mval[XFS_SYMLINK_MAPS];
362 xfs_daddr_t d;
363 const char *cur_chunk;
364 int byte_cnt;
365 int n;
366 xfs_buf_t *bp;
367 prid_t prid;
368 struct xfs_dquot *udqp, *gdqp;
369 uint resblks;
370
371 *ipp = NULL;
372
373 trace_xfs_symlink(dp, link_name);
374
375 if (XFS_FORCED_SHUTDOWN(mp))
376 return XFS_ERROR(EIO);
377
378 /*
379 * Check component lengths of the target path name.
380 */
381 pathlen = strlen(target_path);
382 if (pathlen >= MAXPATHLEN) /* total string too long */
383 return XFS_ERROR(ENAMETOOLONG);
384
385 udqp = gdqp = NULL;
386 if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
387 prid = xfs_get_projid(dp);
388 else
389 prid = XFS_PROJID_DEFAULT;
390
391 /*
392 * Make sure that we have allocated dquot(s) on disk.
393 */
394 error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid,
395 XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp);
396 if (error)
397 goto std_return;
398
399 tp = xfs_trans_alloc(mp, XFS_TRANS_SYMLINK);
400 cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
401 /*
402 * The symlink will fit into the inode data fork?
403 * There can't be any attributes so we get the whole variable part.
404 */
405 if (pathlen <= XFS_LITINO(mp, dp->i_d.di_version))
406 fs_blocks = 0;
407 else
408 fs_blocks = XFS_B_TO_FSB(mp, pathlen);
409 resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks);
410 error = xfs_trans_reserve(tp, resblks, XFS_SYMLINK_LOG_RES(mp), 0,
411 XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT);
412 if (error == ENOSPC && fs_blocks == 0) {
413 resblks = 0;
414 error = xfs_trans_reserve(tp, 0, XFS_SYMLINK_LOG_RES(mp), 0,
415 XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT);
416 }
417 if (error) {
418 cancel_flags = 0;
419 goto error_return;
420 }
421
422 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
423 unlock_dp_on_error = true;
424
425 /*
426 * Check whether the directory allows new symlinks or not.
427 */
428 if (dp->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) {
429 error = XFS_ERROR(EPERM);
430 goto error_return;
431 }
432
433 /*
434 * Reserve disk quota : blocks and inode.
435 */
436 error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, resblks, 1, 0);
437 if (error)
438 goto error_return;
439
440 /*
441 * Check for ability to enter directory entry, if no space reserved.
442 */
443 error = xfs_dir_canenter(tp, dp, link_name, resblks);
444 if (error)
445 goto error_return;
446 /*
447 * Initialize the bmap freelist prior to calling either
448 * bmapi or the directory create code.
449 */
450 xfs_bmap_init(&free_list, &first_block);
451
452 /*
453 * Allocate an inode for the symlink.
454 */
455 error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0,
456 prid, resblks > 0, &ip, NULL);
457 if (error) {
458 if (error == ENOSPC)
459 goto error_return;
460 goto error1;
461 }
462
463 /*
464 * An error after we've joined dp to the transaction will result in the
465 * transaction cancel unlocking dp so don't do it explicitly in the
466 * error path.
467 */
468 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
469 unlock_dp_on_error = false;
470
471 /*
472 * Also attach the dquot(s) to it, if applicable.
473 */
474 xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp);
475
476 if (resblks)
477 resblks -= XFS_IALLOC_SPACE_RES(mp);
478 /*
479 * If the symlink will fit into the inode, write it inline.
480 */
481 if (pathlen <= XFS_IFORK_DSIZE(ip)) {
482 xfs_idata_realloc(ip, pathlen, XFS_DATA_FORK);
483 memcpy(ip->i_df.if_u1.if_data, target_path, pathlen);
484 ip->i_d.di_size = pathlen;
485
486 /*
487 * The inode was initially created in extent format.
488 */
489 ip->i_df.if_flags &= ~(XFS_IFEXTENTS | XFS_IFBROOT);
490 ip->i_df.if_flags |= XFS_IFINLINE;
491
492 ip->i_d.di_format = XFS_DINODE_FMT_LOCAL;
493 xfs_trans_log_inode(tp, ip, XFS_ILOG_DDATA | XFS_ILOG_CORE);
494
495 } else {
496 int offset;
497
498 first_fsb = 0;
499 nmaps = XFS_SYMLINK_MAPS;
500
501 error = xfs_bmapi_write(tp, ip, first_fsb, fs_blocks,
502 XFS_BMAPI_METADATA, &first_block, resblks,
503 mval, &nmaps, &free_list);
504 if (error)
505 goto error2;
506
507 if (resblks)
508 resblks -= fs_blocks;
509 ip->i_d.di_size = pathlen;
510 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
511
512 cur_chunk = target_path;
513 offset = 0;
514 for (n = 0; n < nmaps; n++) {
515 char *buf;
516
517 d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
518 byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
519 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
520 BTOBB(byte_cnt), 0);
521 if (!bp) {
522 error = ENOMEM;
523 goto error2;
524 }
525 bp->b_ops = &xfs_symlink_buf_ops;
526
527 byte_cnt = XFS_SYMLINK_BUF_SPACE(mp, byte_cnt);
528 if (pathlen < byte_cnt) {
529 byte_cnt = pathlen;
530 }
531
532 buf = bp->b_addr;
533 buf += xfs_symlink_hdr_set(mp, ip->i_ino, offset,
534 byte_cnt, bp);
535
536 memcpy(buf, cur_chunk, byte_cnt);
537
538 cur_chunk += byte_cnt;
539 pathlen -= byte_cnt;
540 offset += byte_cnt;
541
542 xfs_trans_log_buf(tp, bp, 0, (buf + byte_cnt - 1) -
543 (char *)bp->b_addr);
544 }
545 }
546
547 /*
548 * Create the directory entry for the symlink.
549 */
550 error = xfs_dir_createname(tp, dp, link_name, ip->i_ino,
551 &first_block, &free_list, resblks);
552 if (error)
553 goto error2;
554 xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
555 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
556
557 /*
558 * If this is a synchronous mount, make sure that the
559 * symlink transaction goes to disk before returning to
560 * the user.
561 */
562 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) {
563 xfs_trans_set_sync(tp);
564 }
565
566 error = xfs_bmap_finish(&tp, &free_list, &committed);
567 if (error) {
568 goto error2;
569 }
570 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
571 xfs_qm_dqrele(udqp);
572 xfs_qm_dqrele(gdqp);
573
574 *ipp = ip;
575 return 0;
576
577 error2:
578 IRELE(ip);
579 error1:
580 xfs_bmap_cancel(&free_list);
581 cancel_flags |= XFS_TRANS_ABORT;
582 error_return:
583 xfs_trans_cancel(tp, cancel_flags);
584 xfs_qm_dqrele(udqp);
585 xfs_qm_dqrele(gdqp);
586
587 if (unlock_dp_on_error)
588 xfs_iunlock(dp, XFS_ILOCK_EXCL);
589 std_return:
590 return error;
591}
592
593/*
594 * Free a symlink that has blocks associated with it.
595 */
596int
597xfs_inactive_symlink_rmt(
598 xfs_inode_t *ip,
599 xfs_trans_t **tpp)
600{
601 xfs_buf_t *bp;
602 int committed;
603 int done;
604 int error;
605 xfs_fsblock_t first_block;
606 xfs_bmap_free_t free_list;
607 int i;
608 xfs_mount_t *mp;
609 xfs_bmbt_irec_t mval[XFS_SYMLINK_MAPS];
610 int nmaps;
611 xfs_trans_t *ntp;
612 int size;
613 xfs_trans_t *tp;
614
615 tp = *tpp;
616 mp = ip->i_mount;
617 ASSERT(ip->i_d.di_size > XFS_IFORK_DSIZE(ip));
618 /*
619 * We're freeing a symlink that has some
620 * blocks allocated to it. Free the
621 * blocks here. We know that we've got
622 * either 1 or 2 extents and that we can
623 * free them all in one bunmapi call.
624 */
625 ASSERT(ip->i_d.di_nextents > 0 && ip->i_d.di_nextents <= 2);
626
627 /*
628 * Lock the inode, fix the size, and join it to the transaction.
629 * Hold it so in the normal path, we still have it locked for
630 * the second transaction. In the error paths we need it
631 * held so the cancel won't rele it, see below.
632 */
633 size = (int)ip->i_d.di_size;
634 ip->i_d.di_size = 0;
635 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
636 /*
637 * Find the block(s) so we can inval and unmap them.
638 */
639 done = 0;
640 xfs_bmap_init(&free_list, &first_block);
641 nmaps = ARRAY_SIZE(mval);
642 error = xfs_bmapi_read(ip, 0, xfs_symlink_blocks(mp, size),
643 mval, &nmaps, 0);
644 if (error)
645 goto error0;
646 /*
647 * Invalidate the block(s). No validation is done.
648 */
649 for (i = 0; i < nmaps; i++) {
650 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp,
651 XFS_FSB_TO_DADDR(mp, mval[i].br_startblock),
652 XFS_FSB_TO_BB(mp, mval[i].br_blockcount), 0);
653 if (!bp) {
654 error = ENOMEM;
655 goto error1;
656 }
657 xfs_trans_binval(tp, bp);
658 }
659 /*
660 * Unmap the dead block(s) to the free_list.
661 */
662 if ((error = xfs_bunmapi(tp, ip, 0, size, XFS_BMAPI_METADATA, nmaps,
663 &first_block, &free_list, &done)))
664 goto error1;
665 ASSERT(done);
666 /*
667 * Commit the first transaction. This logs the EFI and the inode.
668 */
669 if ((error = xfs_bmap_finish(&tp, &free_list, &committed)))
670 goto error1;
671 /*
672 * The transaction must have been committed, since there were
673 * actually extents freed by xfs_bunmapi. See xfs_bmap_finish.
674 * The new tp has the extent freeing and EFDs.
675 */
676 ASSERT(committed);
677 /*
678 * The first xact was committed, so add the inode to the new one.
679 * Mark it dirty so it will be logged and moved forward in the log as
680 * part of every commit.
681 */
682 xfs_trans_ijoin(tp, ip, 0);
683 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
684 /*
685 * Get a new, empty transaction to return to our caller.
686 */
687 ntp = xfs_trans_dup(tp);
688 /*
689 * Commit the transaction containing extent freeing and EFDs.
690 * If we get an error on the commit here or on the reserve below,
691 * we need to unlock the inode since the new transaction doesn't
692 * have the inode attached.
693 */
694 error = xfs_trans_commit(tp, 0);
695 tp = ntp;
696 if (error) {
697 ASSERT(XFS_FORCED_SHUTDOWN(mp));
698 goto error0;
699 }
700 /*
701 * transaction commit worked ok so we can drop the extra ticket
702 * reference that we gained in xfs_trans_dup()
703 */
704 xfs_log_ticket_put(tp->t_ticket);
705
706 /*
707 * Remove the memory for extent descriptions (just bookkeeping).
708 */
709 if (ip->i_df.if_bytes)
710 xfs_idata_realloc(ip, -ip->i_df.if_bytes, XFS_DATA_FORK);
711 ASSERT(ip->i_df.if_bytes == 0);
712 /*
713 * Put an itruncate log reservation in the new transaction
714 * for our caller.
715 */
716 if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
717 XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT))) {
718 ASSERT(XFS_FORCED_SHUTDOWN(mp));
719 goto error0;
720 }
721
722 xfs_trans_ijoin(tp, ip, 0);
723 *tpp = tp;
724 return 0;
725
726 error1:
727 xfs_bmap_cancel(&free_list);
728 error0:
729 return error;
730}
diff --git a/fs/xfs/xfs_symlink.h b/fs/xfs/xfs_symlink.h
new file mode 100644
index 000000000000..b39398d2097c
--- /dev/null
+++ b/fs/xfs/xfs_symlink.h
@@ -0,0 +1,66 @@
1/*
2 * Copyright (c) 2012 Red Hat, Inc. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write the Free Software Foundation,
15 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
16 */
17#ifndef __XFS_SYMLINK_H
18#define __XFS_SYMLINK_H 1
19
20struct xfs_mount;
21struct xfs_trans;
22struct xfs_inode;
23struct xfs_buf;
24struct xfs_ifork;
25struct xfs_name;
26
27#define XFS_SYMLINK_MAGIC 0x58534c4d /* XSLM */
28
29struct xfs_dsymlink_hdr {
30 __be32 sl_magic;
31 __be32 sl_offset;
32 __be32 sl_bytes;
33 __be32 sl_crc;
34 uuid_t sl_uuid;
35 __be64 sl_owner;
36 __be64 sl_blkno;
37 __be64 sl_lsn;
38};
39
40/*
41 * The maximum pathlen is 1024 bytes. Since the minimum file system
42 * blocksize is 512 bytes, we can get a max of 3 extents back from
43 * bmapi when crc headers are taken into account.
44 */
45#define XFS_SYMLINK_MAPS 3
46
47#define XFS_SYMLINK_BUF_SPACE(mp, bufsize) \
48 ((bufsize) - (xfs_sb_version_hascrc(&(mp)->m_sb) ? \
49 sizeof(struct xfs_dsymlink_hdr) : 0))
50
51int xfs_symlink_blocks(struct xfs_mount *mp, int pathlen);
52
53void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp,
54 struct xfs_inode *ip, struct xfs_ifork *ifp);
55
56extern const struct xfs_buf_ops xfs_symlink_buf_ops;
57
58#ifdef __KERNEL__
59
60int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name,
61 const char *target_path, umode_t mode, struct xfs_inode **ipp);
62int xfs_readlink(struct xfs_inode *ip, char *link);
63int xfs_inactive_symlink_rmt(struct xfs_inode *ip, struct xfs_trans **tpp);
64
65#endif /* __KERNEL__ */
66#endif /* __XFS_SYMLINK_H */
diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c
index 624bedd81357..b6e3897c1d9f 100644
--- a/fs/xfs/xfs_trace.c
+++ b/fs/xfs/xfs_trace.c
@@ -22,7 +22,6 @@
22#include "xfs_trans.h" 22#include "xfs_trans.h"
23#include "xfs_sb.h" 23#include "xfs_sb.h"
24#include "xfs_ag.h" 24#include "xfs_ag.h"
25#include "xfs_da_btree.h"
26#include "xfs_bmap_btree.h" 25#include "xfs_bmap_btree.h"
27#include "xfs_alloc_btree.h" 26#include "xfs_alloc_btree.h"
28#include "xfs_ialloc_btree.h" 27#include "xfs_ialloc_btree.h"
@@ -30,6 +29,7 @@
30#include "xfs_inode.h" 29#include "xfs_inode.h"
31#include "xfs_btree.h" 30#include "xfs_btree.h"
32#include "xfs_mount.h" 31#include "xfs_mount.h"
32#include "xfs_da_btree.h"
33#include "xfs_ialloc.h" 33#include "xfs_ialloc.h"
34#include "xfs_itable.h" 34#include "xfs_itable.h"
35#include "xfs_alloc.h" 35#include "xfs_alloc.h"
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 16a812977eab..aa4db3307d36 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -619,6 +619,30 @@ DECLARE_EVENT_CLASS(xfs_iref_class,
619 (char *)__entry->caller_ip) 619 (char *)__entry->caller_ip)
620) 620)
621 621
622TRACE_EVENT(xfs_iomap_prealloc_size,
623 TP_PROTO(struct xfs_inode *ip, xfs_fsblock_t blocks, int shift,
624 unsigned int writeio_blocks),
625 TP_ARGS(ip, blocks, shift, writeio_blocks),
626 TP_STRUCT__entry(
627 __field(dev_t, dev)
628 __field(xfs_ino_t, ino)
629 __field(xfs_fsblock_t, blocks)
630 __field(int, shift)
631 __field(unsigned int, writeio_blocks)
632 ),
633 TP_fast_assign(
634 __entry->dev = VFS_I(ip)->i_sb->s_dev;
635 __entry->ino = ip->i_ino;
636 __entry->blocks = blocks;
637 __entry->shift = shift;
638 __entry->writeio_blocks = writeio_blocks;
639 ),
640 TP_printk("dev %d:%d ino 0x%llx prealloc blocks %llu shift %d "
641 "m_writeio_blocks %u",
642 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino,
643 __entry->blocks, __entry->shift, __entry->writeio_blocks)
644)
645
622#define DEFINE_IREF_EVENT(name) \ 646#define DEFINE_IREF_EVENT(name) \
623DEFINE_EVENT(xfs_iref_class, name, \ 647DEFINE_EVENT(xfs_iref_class, name, \
624 TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \ 648 TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 3edf5dbee001..73a5fa457e16 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -659,6 +659,7 @@ xfs_trans_binval(
659 ASSERT(XFS_BUF_ISSTALE(bp)); 659 ASSERT(XFS_BUF_ISSTALE(bp));
660 ASSERT(!(bip->bli_flags & (XFS_BLI_LOGGED | XFS_BLI_DIRTY))); 660 ASSERT(!(bip->bli_flags & (XFS_BLI_LOGGED | XFS_BLI_DIRTY)));
661 ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_INODE_BUF)); 661 ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_INODE_BUF));
662 ASSERT(!(bip->__bli_format.blf_flags & XFS_BLFT_MASK));
662 ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL); 663 ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
663 ASSERT(bip->bli_item.li_desc->lid_flags & XFS_LID_DIRTY); 664 ASSERT(bip->bli_item.li_desc->lid_flags & XFS_LID_DIRTY);
664 ASSERT(tp->t_flags & XFS_TRANS_DIRTY); 665 ASSERT(tp->t_flags & XFS_TRANS_DIRTY);
@@ -671,6 +672,7 @@ xfs_trans_binval(
671 bip->bli_flags &= ~(XFS_BLI_INODE_BUF | XFS_BLI_LOGGED | XFS_BLI_DIRTY); 672 bip->bli_flags &= ~(XFS_BLI_INODE_BUF | XFS_BLI_LOGGED | XFS_BLI_DIRTY);
672 bip->__bli_format.blf_flags &= ~XFS_BLF_INODE_BUF; 673 bip->__bli_format.blf_flags &= ~XFS_BLF_INODE_BUF;
673 bip->__bli_format.blf_flags |= XFS_BLF_CANCEL; 674 bip->__bli_format.blf_flags |= XFS_BLF_CANCEL;
675 bip->__bli_format.blf_flags &= ~XFS_BLFT_MASK;
674 for (i = 0; i < bip->bli_format_count; i++) { 676 for (i = 0; i < bip->bli_format_count; i++) {
675 memset(bip->bli_formats[i].blf_data_map, 0, 677 memset(bip->bli_formats[i].blf_data_map, 0,
676 (bip->bli_formats[i].blf_map_size * sizeof(uint))); 678 (bip->bli_formats[i].blf_map_size * sizeof(uint)));
@@ -702,12 +704,13 @@ xfs_trans_inode_buf(
702 ASSERT(atomic_read(&bip->bli_refcount) > 0); 704 ASSERT(atomic_read(&bip->bli_refcount) > 0);
703 705
704 bip->bli_flags |= XFS_BLI_INODE_BUF; 706 bip->bli_flags |= XFS_BLI_INODE_BUF;
707 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF);
705} 708}
706 709
707/* 710/*
708 * This call is used to indicate that the buffer is going to 711 * This call is used to indicate that the buffer is going to
709 * be staled and was an inode buffer. This means it gets 712 * be staled and was an inode buffer. This means it gets
710 * special processing during unpin - where any inodes 713 * special processing during unpin - where any inodes
711 * associated with the buffer should be removed from ail. 714 * associated with the buffer should be removed from ail.
712 * There is also special processing during recovery, 715 * There is also special processing during recovery,
713 * any replay of the inodes in the buffer needs to be 716 * any replay of the inodes in the buffer needs to be
@@ -726,6 +729,7 @@ xfs_trans_stale_inode_buf(
726 729
727 bip->bli_flags |= XFS_BLI_STALE_INODE; 730 bip->bli_flags |= XFS_BLI_STALE_INODE;
728 bip->bli_item.li_cb = xfs_buf_iodone; 731 bip->bli_item.li_cb = xfs_buf_iodone;
732 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF);
729} 733}
730 734
731/* 735/*
@@ -749,8 +753,43 @@ xfs_trans_inode_alloc_buf(
749 ASSERT(atomic_read(&bip->bli_refcount) > 0); 753 ASSERT(atomic_read(&bip->bli_refcount) > 0);
750 754
751 bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF; 755 bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF;
756 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF);
752} 757}
753 758
759/*
760 * Set the type of the buffer for log recovery so that it can correctly identify
761 * and hence attach the correct buffer ops to the buffer after replay.
762 */
763void
764xfs_trans_buf_set_type(
765 struct xfs_trans *tp,
766 struct xfs_buf *bp,
767 enum xfs_blft type)
768{
769 struct xfs_buf_log_item *bip = bp->b_fspriv;
770
771 if (!tp)
772 return;
773
774 ASSERT(bp->b_transp == tp);
775 ASSERT(bip != NULL);
776 ASSERT(atomic_read(&bip->bli_refcount) > 0);
777
778 xfs_blft_to_flags(&bip->__bli_format, type);
779}
780
781void
782xfs_trans_buf_copy_type(
783 struct xfs_buf *dst_bp,
784 struct xfs_buf *src_bp)
785{
786 struct xfs_buf_log_item *sbip = src_bp->b_fspriv;
787 struct xfs_buf_log_item *dbip = dst_bp->b_fspriv;
788 enum xfs_blft type;
789
790 type = xfs_blft_from_flags(&sbip->__bli_format);
791 xfs_blft_to_flags(&dbip->__bli_format, type);
792}
754 793
755/* 794/*
756 * Similar to xfs_trans_inode_buf(), this marks the buffer as a cluster of 795 * Similar to xfs_trans_inode_buf(), this marks the buffer as a cluster of
@@ -769,14 +808,28 @@ xfs_trans_dquot_buf(
769 xfs_buf_t *bp, 808 xfs_buf_t *bp,
770 uint type) 809 uint type)
771{ 810{
772 xfs_buf_log_item_t *bip = bp->b_fspriv; 811 struct xfs_buf_log_item *bip = bp->b_fspriv;
773 812
774 ASSERT(bp->b_transp == tp);
775 ASSERT(bip != NULL);
776 ASSERT(type == XFS_BLF_UDQUOT_BUF || 813 ASSERT(type == XFS_BLF_UDQUOT_BUF ||
777 type == XFS_BLF_PDQUOT_BUF || 814 type == XFS_BLF_PDQUOT_BUF ||
778 type == XFS_BLF_GDQUOT_BUF); 815 type == XFS_BLF_GDQUOT_BUF);
779 ASSERT(atomic_read(&bip->bli_refcount) > 0);
780 816
781 bip->__bli_format.blf_flags |= type; 817 bip->__bli_format.blf_flags |= type;
818
819 switch (type) {
820 case XFS_BLF_UDQUOT_BUF:
821 type = XFS_BLFT_UDQUOT_BUF;
822 break;
823 case XFS_BLF_PDQUOT_BUF:
824 type = XFS_BLFT_PDQUOT_BUF;
825 break;
826 case XFS_BLF_GDQUOT_BUF:
827 type = XFS_BLFT_GDQUOT_BUF;
828 break;
829 default:
830 type = XFS_BLFT_UNKNOWN_BUF;
831 break;
832 }
833
834 xfs_trans_buf_set_type(tp, bp, type);
782} 835}
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index 642c2d6e1db1..fec75d023703 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -326,12 +326,12 @@ xfs_trans_dqlockedjoin(
326 */ 326 */
327void 327void
328xfs_trans_apply_dquot_deltas( 328xfs_trans_apply_dquot_deltas(
329 xfs_trans_t *tp) 329 struct xfs_trans *tp)
330{ 330{
331 int i, j; 331 int i, j;
332 xfs_dquot_t *dqp; 332 struct xfs_dquot *dqp;
333 xfs_dqtrx_t *qtrx, *qa; 333 struct xfs_dqtrx *qtrx, *qa;
334 xfs_disk_dquot_t *d; 334 struct xfs_disk_dquot *d;
335 long totalbdelta; 335 long totalbdelta;
336 long totalrtbdelta; 336 long totalrtbdelta;
337 337
@@ -412,7 +412,7 @@ xfs_trans_apply_dquot_deltas(
412 * Start/reset the timer(s) if needed. 412 * Start/reset the timer(s) if needed.
413 */ 413 */
414 if (d->d_id) { 414 if (d->d_id) {
415 xfs_qm_adjust_dqlimits(tp->t_mountp, d); 415 xfs_qm_adjust_dqlimits(tp->t_mountp, dqp);
416 xfs_qm_adjust_dqtimers(tp->t_mountp, d); 416 xfs_qm_adjust_dqtimers(tp->t_mountp, d);
417 } 417 }
418 418
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 77ad74834baa..1501f4fa51a6 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3 * Copyright (c) 2012 Red Hat, Inc.
3 * All Rights Reserved. 4 * All Rights Reserved.
4 * 5 *
5 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
@@ -48,103 +49,8 @@
48#include "xfs_vnodeops.h" 49#include "xfs_vnodeops.h"
49#include "xfs_trace.h" 50#include "xfs_trace.h"
50#include "xfs_icache.h" 51#include "xfs_icache.h"
52#include "xfs_symlink.h"
51 53
52/*
53 * The maximum pathlen is 1024 bytes. Since the minimum file system
54 * blocksize is 512 bytes, we can get a max of 2 extents back from
55 * bmapi.
56 */
57#define SYMLINK_MAPS 2
58
59STATIC int
60xfs_readlink_bmap(
61 xfs_inode_t *ip,
62 char *link)
63{
64 xfs_mount_t *mp = ip->i_mount;
65 int pathlen = ip->i_d.di_size;
66 int nmaps = SYMLINK_MAPS;
67 xfs_bmbt_irec_t mval[SYMLINK_MAPS];
68 xfs_daddr_t d;
69 int byte_cnt;
70 int n;
71 xfs_buf_t *bp;
72 int error = 0;
73
74 error = xfs_bmapi_read(ip, 0, XFS_B_TO_FSB(mp, pathlen), mval, &nmaps,
75 0);
76 if (error)
77 goto out;
78
79 for (n = 0; n < nmaps; n++) {
80 d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
81 byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
82
83 bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), 0, NULL);
84 if (!bp)
85 return XFS_ERROR(ENOMEM);
86 error = bp->b_error;
87 if (error) {
88 xfs_buf_ioerror_alert(bp, __func__);
89 xfs_buf_relse(bp);
90 goto out;
91 }
92 if (pathlen < byte_cnt)
93 byte_cnt = pathlen;
94 pathlen -= byte_cnt;
95
96 memcpy(link, bp->b_addr, byte_cnt);
97 xfs_buf_relse(bp);
98 }
99
100 link[ip->i_d.di_size] = '\0';
101 error = 0;
102
103 out:
104 return error;
105}
106
107int
108xfs_readlink(
109 xfs_inode_t *ip,
110 char *link)
111{
112 xfs_mount_t *mp = ip->i_mount;
113 xfs_fsize_t pathlen;
114 int error = 0;
115
116 trace_xfs_readlink(ip);
117
118 if (XFS_FORCED_SHUTDOWN(mp))
119 return XFS_ERROR(EIO);
120
121 xfs_ilock(ip, XFS_ILOCK_SHARED);
122
123 pathlen = ip->i_d.di_size;
124 if (!pathlen)
125 goto out;
126
127 if (pathlen < 0 || pathlen > MAXPATHLEN) {
128 xfs_alert(mp, "%s: inode (%llu) bad symlink length (%lld)",
129 __func__, (unsigned long long) ip->i_ino,
130 (long long) pathlen);
131 ASSERT(0);
132 error = XFS_ERROR(EFSCORRUPTED);
133 goto out;
134 }
135
136
137 if (ip->i_df.if_flags & XFS_IFINLINE) {
138 memcpy(link, ip->i_df.if_u1.if_data, pathlen);
139 link[pathlen] = '\0';
140 } else {
141 error = xfs_readlink_bmap(ip, link);
142 }
143
144 out:
145 xfs_iunlock(ip, XFS_ILOCK_SHARED);
146 return error;
147}
148 54
149/* 55/*
150 * This is called by xfs_inactive to free any blocks beyond eof 56 * This is called by xfs_inactive to free any blocks beyond eof
@@ -249,145 +155,6 @@ xfs_free_eofblocks(
249 return error; 155 return error;
250} 156}
251 157
252/*
253 * Free a symlink that has blocks associated with it.
254 */
255STATIC int
256xfs_inactive_symlink_rmt(
257 xfs_inode_t *ip,
258 xfs_trans_t **tpp)
259{
260 xfs_buf_t *bp;
261 int committed;
262 int done;
263 int error;
264 xfs_fsblock_t first_block;
265 xfs_bmap_free_t free_list;
266 int i;
267 xfs_mount_t *mp;
268 xfs_bmbt_irec_t mval[SYMLINK_MAPS];
269 int nmaps;
270 xfs_trans_t *ntp;
271 int size;
272 xfs_trans_t *tp;
273
274 tp = *tpp;
275 mp = ip->i_mount;
276 ASSERT(ip->i_d.di_size > XFS_IFORK_DSIZE(ip));
277 /*
278 * We're freeing a symlink that has some
279 * blocks allocated to it. Free the
280 * blocks here. We know that we've got
281 * either 1 or 2 extents and that we can
282 * free them all in one bunmapi call.
283 */
284 ASSERT(ip->i_d.di_nextents > 0 && ip->i_d.di_nextents <= 2);
285
286 /*
287 * Lock the inode, fix the size, and join it to the transaction.
288 * Hold it so in the normal path, we still have it locked for
289 * the second transaction. In the error paths we need it
290 * held so the cancel won't rele it, see below.
291 */
292 size = (int)ip->i_d.di_size;
293 ip->i_d.di_size = 0;
294 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
295 /*
296 * Find the block(s) so we can inval and unmap them.
297 */
298 done = 0;
299 xfs_bmap_init(&free_list, &first_block);
300 nmaps = ARRAY_SIZE(mval);
301 error = xfs_bmapi_read(ip, 0, XFS_B_TO_FSB(mp, size),
302 mval, &nmaps, 0);
303 if (error)
304 goto error0;
305 /*
306 * Invalidate the block(s).
307 */
308 for (i = 0; i < nmaps; i++) {
309 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp,
310 XFS_FSB_TO_DADDR(mp, mval[i].br_startblock),
311 XFS_FSB_TO_BB(mp, mval[i].br_blockcount), 0);
312 if (!bp) {
313 error = ENOMEM;
314 goto error1;
315 }
316 xfs_trans_binval(tp, bp);
317 }
318 /*
319 * Unmap the dead block(s) to the free_list.
320 */
321 if ((error = xfs_bunmapi(tp, ip, 0, size, XFS_BMAPI_METADATA, nmaps,
322 &first_block, &free_list, &done)))
323 goto error1;
324 ASSERT(done);
325 /*
326 * Commit the first transaction. This logs the EFI and the inode.
327 */
328 if ((error = xfs_bmap_finish(&tp, &free_list, &committed)))
329 goto error1;
330 /*
331 * The transaction must have been committed, since there were
332 * actually extents freed by xfs_bunmapi. See xfs_bmap_finish.
333 * The new tp has the extent freeing and EFDs.
334 */
335 ASSERT(committed);
336 /*
337 * The first xact was committed, so add the inode to the new one.
338 * Mark it dirty so it will be logged and moved forward in the log as
339 * part of every commit.
340 */
341 xfs_trans_ijoin(tp, ip, 0);
342 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
343 /*
344 * Get a new, empty transaction to return to our caller.
345 */
346 ntp = xfs_trans_dup(tp);
347 /*
348 * Commit the transaction containing extent freeing and EFDs.
349 * If we get an error on the commit here or on the reserve below,
350 * we need to unlock the inode since the new transaction doesn't
351 * have the inode attached.
352 */
353 error = xfs_trans_commit(tp, 0);
354 tp = ntp;
355 if (error) {
356 ASSERT(XFS_FORCED_SHUTDOWN(mp));
357 goto error0;
358 }
359 /*
360 * transaction commit worked ok so we can drop the extra ticket
361 * reference that we gained in xfs_trans_dup()
362 */
363 xfs_log_ticket_put(tp->t_ticket);
364
365 /*
366 * Remove the memory for extent descriptions (just bookkeeping).
367 */
368 if (ip->i_df.if_bytes)
369 xfs_idata_realloc(ip, -ip->i_df.if_bytes, XFS_DATA_FORK);
370 ASSERT(ip->i_df.if_bytes == 0);
371 /*
372 * Put an itruncate log reservation in the new transaction
373 * for our caller.
374 */
375 if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
376 XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT))) {
377 ASSERT(XFS_FORCED_SHUTDOWN(mp));
378 goto error0;
379 }
380
381 xfs_trans_ijoin(tp, ip, 0);
382 *tpp = tp;
383 return 0;
384
385 error1:
386 xfs_bmap_cancel(&free_list);
387 error0:
388 return error;
389}
390
391int 158int
392xfs_release( 159xfs_release(
393 xfs_inode_t *ip) 160 xfs_inode_t *ip)
@@ -1353,247 +1120,6 @@ xfs_link(
1353} 1120}
1354 1121
1355int 1122int
1356xfs_symlink(
1357 xfs_inode_t *dp,
1358 struct xfs_name *link_name,
1359 const char *target_path,
1360 umode_t mode,
1361 xfs_inode_t **ipp)
1362{
1363 xfs_mount_t *mp = dp->i_mount;
1364 xfs_trans_t *tp;
1365 xfs_inode_t *ip;
1366 int error;
1367 int pathlen;
1368 xfs_bmap_free_t free_list;
1369 xfs_fsblock_t first_block;
1370 bool unlock_dp_on_error = false;
1371 uint cancel_flags;
1372 int committed;
1373 xfs_fileoff_t first_fsb;
1374 xfs_filblks_t fs_blocks;
1375 int nmaps;
1376 xfs_bmbt_irec_t mval[SYMLINK_MAPS];
1377 xfs_daddr_t d;
1378 const char *cur_chunk;
1379 int byte_cnt;
1380 int n;
1381 xfs_buf_t *bp;
1382 prid_t prid;
1383 struct xfs_dquot *udqp, *gdqp;
1384 uint resblks;
1385
1386 *ipp = NULL;
1387 error = 0;
1388 ip = NULL;
1389 tp = NULL;
1390
1391 trace_xfs_symlink(dp, link_name);
1392
1393 if (XFS_FORCED_SHUTDOWN(mp))
1394 return XFS_ERROR(EIO);
1395
1396 /*
1397 * Check component lengths of the target path name.
1398 */
1399 pathlen = strlen(target_path);
1400 if (pathlen >= MAXPATHLEN) /* total string too long */
1401 return XFS_ERROR(ENAMETOOLONG);
1402
1403 udqp = gdqp = NULL;
1404 if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
1405 prid = xfs_get_projid(dp);
1406 else
1407 prid = XFS_PROJID_DEFAULT;
1408
1409 /*
1410 * Make sure that we have allocated dquot(s) on disk.
1411 */
1412 error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid,
1413 XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp);
1414 if (error)
1415 goto std_return;
1416
1417 tp = xfs_trans_alloc(mp, XFS_TRANS_SYMLINK);
1418 cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
1419 /*
1420 * The symlink will fit into the inode data fork?
1421 * There can't be any attributes so we get the whole variable part.
1422 */
1423 if (pathlen <= XFS_LITINO(mp))
1424 fs_blocks = 0;
1425 else
1426 fs_blocks = XFS_B_TO_FSB(mp, pathlen);
1427 resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks);
1428 error = xfs_trans_reserve(tp, resblks, XFS_SYMLINK_LOG_RES(mp), 0,
1429 XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT);
1430 if (error == ENOSPC && fs_blocks == 0) {
1431 resblks = 0;
1432 error = xfs_trans_reserve(tp, 0, XFS_SYMLINK_LOG_RES(mp), 0,
1433 XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT);
1434 }
1435 if (error) {
1436 cancel_flags = 0;
1437 goto error_return;
1438 }
1439
1440 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
1441 unlock_dp_on_error = true;
1442
1443 /*
1444 * Check whether the directory allows new symlinks or not.
1445 */
1446 if (dp->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) {
1447 error = XFS_ERROR(EPERM);
1448 goto error_return;
1449 }
1450
1451 /*
1452 * Reserve disk quota : blocks and inode.
1453 */
1454 error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, resblks, 1, 0);
1455 if (error)
1456 goto error_return;
1457
1458 /*
1459 * Check for ability to enter directory entry, if no space reserved.
1460 */
1461 error = xfs_dir_canenter(tp, dp, link_name, resblks);
1462 if (error)
1463 goto error_return;
1464 /*
1465 * Initialize the bmap freelist prior to calling either
1466 * bmapi or the directory create code.
1467 */
1468 xfs_bmap_init(&free_list, &first_block);
1469
1470 /*
1471 * Allocate an inode for the symlink.
1472 */
1473 error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0,
1474 prid, resblks > 0, &ip, NULL);
1475 if (error) {
1476 if (error == ENOSPC)
1477 goto error_return;
1478 goto error1;
1479 }
1480
1481 /*
1482 * An error after we've joined dp to the transaction will result in the
1483 * transaction cancel unlocking dp so don't do it explicitly in the
1484 * error path.
1485 */
1486 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
1487 unlock_dp_on_error = false;
1488
1489 /*
1490 * Also attach the dquot(s) to it, if applicable.
1491 */
1492 xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp);
1493
1494 if (resblks)
1495 resblks -= XFS_IALLOC_SPACE_RES(mp);
1496 /*
1497 * If the symlink will fit into the inode, write it inline.
1498 */
1499 if (pathlen <= XFS_IFORK_DSIZE(ip)) {
1500 xfs_idata_realloc(ip, pathlen, XFS_DATA_FORK);
1501 memcpy(ip->i_df.if_u1.if_data, target_path, pathlen);
1502 ip->i_d.di_size = pathlen;
1503
1504 /*
1505 * The inode was initially created in extent format.
1506 */
1507 ip->i_df.if_flags &= ~(XFS_IFEXTENTS | XFS_IFBROOT);
1508 ip->i_df.if_flags |= XFS_IFINLINE;
1509
1510 ip->i_d.di_format = XFS_DINODE_FMT_LOCAL;
1511 xfs_trans_log_inode(tp, ip, XFS_ILOG_DDATA | XFS_ILOG_CORE);
1512
1513 } else {
1514 first_fsb = 0;
1515 nmaps = SYMLINK_MAPS;
1516
1517 error = xfs_bmapi_write(tp, ip, first_fsb, fs_blocks,
1518 XFS_BMAPI_METADATA, &first_block, resblks,
1519 mval, &nmaps, &free_list);
1520 if (error)
1521 goto error2;
1522
1523 if (resblks)
1524 resblks -= fs_blocks;
1525 ip->i_d.di_size = pathlen;
1526 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1527
1528 cur_chunk = target_path;
1529 for (n = 0; n < nmaps; n++) {
1530 d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
1531 byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
1532 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
1533 BTOBB(byte_cnt), 0);
1534 if (!bp) {
1535 error = ENOMEM;
1536 goto error2;
1537 }
1538 if (pathlen < byte_cnt) {
1539 byte_cnt = pathlen;
1540 }
1541 pathlen -= byte_cnt;
1542
1543 memcpy(bp->b_addr, cur_chunk, byte_cnt);
1544 cur_chunk += byte_cnt;
1545
1546 xfs_trans_log_buf(tp, bp, 0, byte_cnt - 1);
1547 }
1548 }
1549
1550 /*
1551 * Create the directory entry for the symlink.
1552 */
1553 error = xfs_dir_createname(tp, dp, link_name, ip->i_ino,
1554 &first_block, &free_list, resblks);
1555 if (error)
1556 goto error2;
1557 xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
1558 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
1559
1560 /*
1561 * If this is a synchronous mount, make sure that the
1562 * symlink transaction goes to disk before returning to
1563 * the user.
1564 */
1565 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) {
1566 xfs_trans_set_sync(tp);
1567 }
1568
1569 error = xfs_bmap_finish(&tp, &free_list, &committed);
1570 if (error) {
1571 goto error2;
1572 }
1573 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
1574 xfs_qm_dqrele(udqp);
1575 xfs_qm_dqrele(gdqp);
1576
1577 *ipp = ip;
1578 return 0;
1579
1580 error2:
1581 IRELE(ip);
1582 error1:
1583 xfs_bmap_cancel(&free_list);
1584 cancel_flags |= XFS_TRANS_ABORT;
1585 error_return:
1586 xfs_trans_cancel(tp, cancel_flags);
1587 xfs_qm_dqrele(udqp);
1588 xfs_qm_dqrele(gdqp);
1589
1590 if (unlock_dp_on_error)
1591 xfs_iunlock(dp, XFS_ILOCK_EXCL);
1592 std_return:
1593 return error;
1594}
1595
1596int
1597xfs_set_dmattrs( 1123xfs_set_dmattrs(
1598 xfs_inode_t *ip, 1124 xfs_inode_t *ip,
1599 u_int evmask, 1125 u_int evmask,