diff options
author | Dave Chinner <david@fromorbit.com> | 2014-07-14 17:37:18 -0400 |
---|---|---|
committer | Dave Chinner <david@fromorbit.com> | 2014-07-14 17:37:18 -0400 |
commit | 7f8a058f6dc52219117bc2469b1fb816f7fa1a4b (patch) | |
tree | 43ce8eed4d26beb6f2acff2279c43eae7f79f83a /fs/xfs/libxfs | |
parent | 03e01349c654fbdea80d3d9b4ab599244eb55bb7 (diff) | |
parent | 2451337dd043901b5270b7586942abe564443e3d (diff) |
Merge branch 'xfs-libxfs-restructure' into for-next
Diffstat (limited to 'fs/xfs/libxfs')
55 files changed, 45286 insertions, 0 deletions
diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h new file mode 100644 index 000000000000..6e247a99f5db --- /dev/null +++ b/fs/xfs/libxfs/xfs_ag.h | |||
@@ -0,0 +1,281 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_AG_H__ | ||
19 | #define __XFS_AG_H__ | ||
20 | |||
21 | /* | ||
22 | * Allocation group header | ||
23 | * This is divided into three structures, placed in sequential 512-byte | ||
24 | * buffers after a copy of the superblock (also in a 512-byte buffer). | ||
25 | */ | ||
26 | |||
27 | struct xfs_buf; | ||
28 | struct xfs_mount; | ||
29 | struct xfs_trans; | ||
30 | |||
31 | #define XFS_AGF_MAGIC 0x58414746 /* 'XAGF' */ | ||
32 | #define XFS_AGI_MAGIC 0x58414749 /* 'XAGI' */ | ||
33 | #define XFS_AGFL_MAGIC 0x5841464c /* 'XAFL' */ | ||
34 | #define XFS_AGF_VERSION 1 | ||
35 | #define XFS_AGI_VERSION 1 | ||
36 | |||
37 | #define XFS_AGF_GOOD_VERSION(v) ((v) == XFS_AGF_VERSION) | ||
38 | #define XFS_AGI_GOOD_VERSION(v) ((v) == XFS_AGI_VERSION) | ||
39 | |||
40 | /* | ||
41 | * Btree number 0 is bno, 1 is cnt. This value gives the size of the | ||
42 | * arrays below. | ||
43 | */ | ||
44 | #define XFS_BTNUM_AGF ((int)XFS_BTNUM_CNTi + 1) | ||
45 | |||
46 | /* | ||
47 | * The second word of agf_levels in the first a.g. overlaps the EFS | ||
48 | * superblock's magic number. Since the magic numbers valid for EFS | ||
49 | * are > 64k, our value cannot be confused for an EFS superblock's. | ||
50 | */ | ||
51 | |||
52 | typedef struct xfs_agf { | ||
53 | /* | ||
54 | * Common allocation group header information | ||
55 | */ | ||
56 | __be32 agf_magicnum; /* magic number == XFS_AGF_MAGIC */ | ||
57 | __be32 agf_versionnum; /* header version == XFS_AGF_VERSION */ | ||
58 | __be32 agf_seqno; /* sequence # starting from 0 */ | ||
59 | __be32 agf_length; /* size in blocks of a.g. */ | ||
60 | /* | ||
61 | * Freespace information | ||
62 | */ | ||
63 | __be32 agf_roots[XFS_BTNUM_AGF]; /* root blocks */ | ||
64 | __be32 agf_spare0; /* spare field */ | ||
65 | __be32 agf_levels[XFS_BTNUM_AGF]; /* btree levels */ | ||
66 | __be32 agf_spare1; /* spare field */ | ||
67 | |||
68 | __be32 agf_flfirst; /* first freelist block's index */ | ||
69 | __be32 agf_fllast; /* last freelist block's index */ | ||
70 | __be32 agf_flcount; /* count of blocks in freelist */ | ||
71 | __be32 agf_freeblks; /* total free blocks */ | ||
72 | |||
73 | __be32 agf_longest; /* longest free space */ | ||
74 | __be32 agf_btreeblks; /* # of blocks held in AGF btrees */ | ||
75 | uuid_t agf_uuid; /* uuid of filesystem */ | ||
76 | |||
77 | /* | ||
78 | * reserve some contiguous space for future logged fields before we add | ||
79 | * the unlogged fields. This makes the range logging via flags and | ||
80 | * structure offsets much simpler. | ||
81 | */ | ||
82 | __be64 agf_spare64[16]; | ||
83 | |||
84 | /* unlogged fields, written during buffer writeback. */ | ||
85 | __be64 agf_lsn; /* last write sequence */ | ||
86 | __be32 agf_crc; /* crc of agf sector */ | ||
87 | __be32 agf_spare2; | ||
88 | |||
89 | /* structure must be padded to 64 bit alignment */ | ||
90 | } xfs_agf_t; | ||
91 | |||
92 | #define XFS_AGF_CRC_OFF offsetof(struct xfs_agf, agf_crc) | ||
93 | |||
94 | #define XFS_AGF_MAGICNUM 0x00000001 | ||
95 | #define XFS_AGF_VERSIONNUM 0x00000002 | ||
96 | #define XFS_AGF_SEQNO 0x00000004 | ||
97 | #define XFS_AGF_LENGTH 0x00000008 | ||
98 | #define XFS_AGF_ROOTS 0x00000010 | ||
99 | #define XFS_AGF_LEVELS 0x00000020 | ||
100 | #define XFS_AGF_FLFIRST 0x00000040 | ||
101 | #define XFS_AGF_FLLAST 0x00000080 | ||
102 | #define XFS_AGF_FLCOUNT 0x00000100 | ||
103 | #define XFS_AGF_FREEBLKS 0x00000200 | ||
104 | #define XFS_AGF_LONGEST 0x00000400 | ||
105 | #define XFS_AGF_BTREEBLKS 0x00000800 | ||
106 | #define XFS_AGF_UUID 0x00001000 | ||
107 | #define XFS_AGF_NUM_BITS 13 | ||
108 | #define XFS_AGF_ALL_BITS ((1 << XFS_AGF_NUM_BITS) - 1) | ||
109 | |||
110 | #define XFS_AGF_FLAGS \ | ||
111 | { XFS_AGF_MAGICNUM, "MAGICNUM" }, \ | ||
112 | { XFS_AGF_VERSIONNUM, "VERSIONNUM" }, \ | ||
113 | { XFS_AGF_SEQNO, "SEQNO" }, \ | ||
114 | { XFS_AGF_LENGTH, "LENGTH" }, \ | ||
115 | { XFS_AGF_ROOTS, "ROOTS" }, \ | ||
116 | { XFS_AGF_LEVELS, "LEVELS" }, \ | ||
117 | { XFS_AGF_FLFIRST, "FLFIRST" }, \ | ||
118 | { XFS_AGF_FLLAST, "FLLAST" }, \ | ||
119 | { XFS_AGF_FLCOUNT, "FLCOUNT" }, \ | ||
120 | { XFS_AGF_FREEBLKS, "FREEBLKS" }, \ | ||
121 | { XFS_AGF_LONGEST, "LONGEST" }, \ | ||
122 | { XFS_AGF_BTREEBLKS, "BTREEBLKS" }, \ | ||
123 | { XFS_AGF_UUID, "UUID" } | ||
124 | |||
125 | /* disk block (xfs_daddr_t) in the AG */ | ||
126 | #define XFS_AGF_DADDR(mp) ((xfs_daddr_t)(1 << (mp)->m_sectbb_log)) | ||
127 | #define XFS_AGF_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGF_DADDR(mp)) | ||
128 | #define XFS_BUF_TO_AGF(bp) ((xfs_agf_t *)((bp)->b_addr)) | ||
129 | |||
130 | extern int xfs_read_agf(struct xfs_mount *mp, struct xfs_trans *tp, | ||
131 | xfs_agnumber_t agno, int flags, struct xfs_buf **bpp); | ||
132 | |||
133 | /* | ||
134 | * Size of the unlinked inode hash table in the agi. | ||
135 | */ | ||
136 | #define XFS_AGI_UNLINKED_BUCKETS 64 | ||
137 | |||
138 | typedef struct xfs_agi { | ||
139 | /* | ||
140 | * Common allocation group header information | ||
141 | */ | ||
142 | __be32 agi_magicnum; /* magic number == XFS_AGI_MAGIC */ | ||
143 | __be32 agi_versionnum; /* header version == XFS_AGI_VERSION */ | ||
144 | __be32 agi_seqno; /* sequence # starting from 0 */ | ||
145 | __be32 agi_length; /* size in blocks of a.g. */ | ||
146 | /* | ||
147 | * Inode information | ||
148 | * Inodes are mapped by interpreting the inode number, so no | ||
149 | * mapping data is needed here. | ||
150 | */ | ||
151 | __be32 agi_count; /* count of allocated inodes */ | ||
152 | __be32 agi_root; /* root of inode btree */ | ||
153 | __be32 agi_level; /* levels in inode btree */ | ||
154 | __be32 agi_freecount; /* number of free inodes */ | ||
155 | |||
156 | __be32 agi_newino; /* new inode just allocated */ | ||
157 | __be32 agi_dirino; /* last directory inode chunk */ | ||
158 | /* | ||
159 | * Hash table of inodes which have been unlinked but are | ||
160 | * still being referenced. | ||
161 | */ | ||
162 | __be32 agi_unlinked[XFS_AGI_UNLINKED_BUCKETS]; | ||
163 | /* | ||
164 | * This marks the end of logging region 1 and start of logging region 2. | ||
165 | */ | ||
166 | uuid_t agi_uuid; /* uuid of filesystem */ | ||
167 | __be32 agi_crc; /* crc of agi sector */ | ||
168 | __be32 agi_pad32; | ||
169 | __be64 agi_lsn; /* last write sequence */ | ||
170 | |||
171 | __be32 agi_free_root; /* root of the free inode btree */ | ||
172 | __be32 agi_free_level;/* levels in free inode btree */ | ||
173 | |||
174 | /* structure must be padded to 64 bit alignment */ | ||
175 | } xfs_agi_t; | ||
176 | |||
177 | #define XFS_AGI_CRC_OFF offsetof(struct xfs_agi, agi_crc) | ||
178 | |||
179 | #define XFS_AGI_MAGICNUM (1 << 0) | ||
180 | #define XFS_AGI_VERSIONNUM (1 << 1) | ||
181 | #define XFS_AGI_SEQNO (1 << 2) | ||
182 | #define XFS_AGI_LENGTH (1 << 3) | ||
183 | #define XFS_AGI_COUNT (1 << 4) | ||
184 | #define XFS_AGI_ROOT (1 << 5) | ||
185 | #define XFS_AGI_LEVEL (1 << 6) | ||
186 | #define XFS_AGI_FREECOUNT (1 << 7) | ||
187 | #define XFS_AGI_NEWINO (1 << 8) | ||
188 | #define XFS_AGI_DIRINO (1 << 9) | ||
189 | #define XFS_AGI_UNLINKED (1 << 10) | ||
190 | #define XFS_AGI_NUM_BITS_R1 11 /* end of the 1st agi logging region */ | ||
191 | #define XFS_AGI_ALL_BITS_R1 ((1 << XFS_AGI_NUM_BITS_R1) - 1) | ||
192 | #define XFS_AGI_FREE_ROOT (1 << 11) | ||
193 | #define XFS_AGI_FREE_LEVEL (1 << 12) | ||
194 | #define XFS_AGI_NUM_BITS_R2 13 | ||
195 | |||
196 | /* disk block (xfs_daddr_t) in the AG */ | ||
197 | #define XFS_AGI_DADDR(mp) ((xfs_daddr_t)(2 << (mp)->m_sectbb_log)) | ||
198 | #define XFS_AGI_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGI_DADDR(mp)) | ||
199 | #define XFS_BUF_TO_AGI(bp) ((xfs_agi_t *)((bp)->b_addr)) | ||
200 | |||
201 | extern int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp, | ||
202 | xfs_agnumber_t agno, struct xfs_buf **bpp); | ||
203 | |||
204 | /* | ||
205 | * The third a.g. block contains the a.g. freelist, an array | ||
206 | * of block pointers to blocks owned by the allocation btree code. | ||
207 | */ | ||
208 | #define XFS_AGFL_DADDR(mp) ((xfs_daddr_t)(3 << (mp)->m_sectbb_log)) | ||
209 | #define XFS_AGFL_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGFL_DADDR(mp)) | ||
210 | #define XFS_BUF_TO_AGFL(bp) ((xfs_agfl_t *)((bp)->b_addr)) | ||
211 | |||
212 | #define XFS_BUF_TO_AGFL_BNO(mp, bp) \ | ||
213 | (xfs_sb_version_hascrc(&((mp)->m_sb)) ? \ | ||
214 | &(XFS_BUF_TO_AGFL(bp)->agfl_bno[0]) : \ | ||
215 | (__be32 *)(bp)->b_addr) | ||
216 | |||
217 | /* | ||
218 | * Size of the AGFL. For CRC-enabled filesystes we steal a couple of | ||
219 | * slots in the beginning of the block for a proper header with the | ||
220 | * location information and CRC. | ||
221 | */ | ||
222 | #define XFS_AGFL_SIZE(mp) \ | ||
223 | (((mp)->m_sb.sb_sectsize - \ | ||
224 | (xfs_sb_version_hascrc(&((mp)->m_sb)) ? \ | ||
225 | sizeof(struct xfs_agfl) : 0)) / \ | ||
226 | sizeof(xfs_agblock_t)) | ||
227 | |||
228 | typedef struct xfs_agfl { | ||
229 | __be32 agfl_magicnum; | ||
230 | __be32 agfl_seqno; | ||
231 | uuid_t agfl_uuid; | ||
232 | __be64 agfl_lsn; | ||
233 | __be32 agfl_crc; | ||
234 | __be32 agfl_bno[]; /* actually XFS_AGFL_SIZE(mp) */ | ||
235 | } xfs_agfl_t; | ||
236 | |||
237 | #define XFS_AGFL_CRC_OFF offsetof(struct xfs_agfl, agfl_crc) | ||
238 | |||
239 | /* | ||
240 | * tags for inode radix tree | ||
241 | */ | ||
242 | #define XFS_ICI_NO_TAG (-1) /* special flag for an untagged lookup | ||
243 | in xfs_inode_ag_iterator */ | ||
244 | #define XFS_ICI_RECLAIM_TAG 0 /* inode is to be reclaimed */ | ||
245 | #define XFS_ICI_EOFBLOCKS_TAG 1 /* inode has blocks beyond EOF */ | ||
246 | |||
247 | #define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels) | ||
248 | #define XFS_MIN_FREELIST_RAW(bl,cl,mp) \ | ||
249 | (MIN(bl + 1, XFS_AG_MAXLEVELS(mp)) + MIN(cl + 1, XFS_AG_MAXLEVELS(mp))) | ||
250 | #define XFS_MIN_FREELIST(a,mp) \ | ||
251 | (XFS_MIN_FREELIST_RAW( \ | ||
252 | be32_to_cpu((a)->agf_levels[XFS_BTNUM_BNOi]), \ | ||
253 | be32_to_cpu((a)->agf_levels[XFS_BTNUM_CNTi]), mp)) | ||
254 | #define XFS_MIN_FREELIST_PAG(pag,mp) \ | ||
255 | (XFS_MIN_FREELIST_RAW( \ | ||
256 | (unsigned int)(pag)->pagf_levels[XFS_BTNUM_BNOi], \ | ||
257 | (unsigned int)(pag)->pagf_levels[XFS_BTNUM_CNTi], mp)) | ||
258 | |||
259 | #define XFS_AGB_TO_FSB(mp,agno,agbno) \ | ||
260 | (((xfs_fsblock_t)(agno) << (mp)->m_sb.sb_agblklog) | (agbno)) | ||
261 | #define XFS_FSB_TO_AGNO(mp,fsbno) \ | ||
262 | ((xfs_agnumber_t)((fsbno) >> (mp)->m_sb.sb_agblklog)) | ||
263 | #define XFS_FSB_TO_AGBNO(mp,fsbno) \ | ||
264 | ((xfs_agblock_t)((fsbno) & xfs_mask32lo((mp)->m_sb.sb_agblklog))) | ||
265 | #define XFS_AGB_TO_DADDR(mp,agno,agbno) \ | ||
266 | ((xfs_daddr_t)XFS_FSB_TO_BB(mp, \ | ||
267 | (xfs_fsblock_t)(agno) * (mp)->m_sb.sb_agblocks + (agbno))) | ||
268 | #define XFS_AG_DADDR(mp,agno,d) (XFS_AGB_TO_DADDR(mp, agno, 0) + (d)) | ||
269 | |||
270 | /* | ||
271 | * For checking for bad ranges of xfs_daddr_t's, covering multiple | ||
272 | * allocation groups or a single xfs_daddr_t that's a superblock copy. | ||
273 | */ | ||
274 | #define XFS_AG_CHECK_DADDR(mp,d,len) \ | ||
275 | ((len) == 1 ? \ | ||
276 | ASSERT((d) == XFS_SB_DADDR || \ | ||
277 | xfs_daddr_to_agbno(mp, d) != XFS_SB_DADDR) : \ | ||
278 | ASSERT(xfs_daddr_to_agno(mp, d) == \ | ||
279 | xfs_daddr_to_agno(mp, (d) + (len) - 1))) | ||
280 | |||
281 | #endif /* __XFS_AG_H__ */ | ||
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c new file mode 100644 index 000000000000..4bffffe038a1 --- /dev/null +++ b/fs/xfs/libxfs/xfs_alloc.c | |||
@@ -0,0 +1,2630 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #include "xfs.h" | ||
19 | #include "xfs_fs.h" | ||
20 | #include "xfs_format.h" | ||
21 | #include "xfs_log_format.h" | ||
22 | #include "xfs_shared.h" | ||
23 | #include "xfs_trans_resv.h" | ||
24 | #include "xfs_bit.h" | ||
25 | #include "xfs_sb.h" | ||
26 | #include "xfs_ag.h" | ||
27 | #include "xfs_mount.h" | ||
28 | #include "xfs_inode.h" | ||
29 | #include "xfs_btree.h" | ||
30 | #include "xfs_alloc_btree.h" | ||
31 | #include "xfs_alloc.h" | ||
32 | #include "xfs_extent_busy.h" | ||
33 | #include "xfs_error.h" | ||
34 | #include "xfs_cksum.h" | ||
35 | #include "xfs_trace.h" | ||
36 | #include "xfs_trans.h" | ||
37 | #include "xfs_buf_item.h" | ||
38 | #include "xfs_log.h" | ||
39 | |||
40 | struct workqueue_struct *xfs_alloc_wq; | ||
41 | |||
42 | #define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b))) | ||
43 | |||
44 | #define XFSA_FIXUP_BNO_OK 1 | ||
45 | #define XFSA_FIXUP_CNT_OK 2 | ||
46 | |||
47 | STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *); | ||
48 | STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *); | ||
49 | STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *); | ||
50 | STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, | ||
51 | xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); | ||
52 | |||
53 | /* | ||
54 | * Lookup the record equal to [bno, len] in the btree given by cur. | ||
55 | */ | ||
56 | STATIC int /* error */ | ||
57 | xfs_alloc_lookup_eq( | ||
58 | struct xfs_btree_cur *cur, /* btree cursor */ | ||
59 | xfs_agblock_t bno, /* starting block of extent */ | ||
60 | xfs_extlen_t len, /* length of extent */ | ||
61 | int *stat) /* success/failure */ | ||
62 | { | ||
63 | cur->bc_rec.a.ar_startblock = bno; | ||
64 | cur->bc_rec.a.ar_blockcount = len; | ||
65 | return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); | ||
66 | } | ||
67 | |||
68 | /* | ||
69 | * Lookup the first record greater than or equal to [bno, len] | ||
70 | * in the btree given by cur. | ||
71 | */ | ||
72 | int /* error */ | ||
73 | xfs_alloc_lookup_ge( | ||
74 | struct xfs_btree_cur *cur, /* btree cursor */ | ||
75 | xfs_agblock_t bno, /* starting block of extent */ | ||
76 | xfs_extlen_t len, /* length of extent */ | ||
77 | int *stat) /* success/failure */ | ||
78 | { | ||
79 | cur->bc_rec.a.ar_startblock = bno; | ||
80 | cur->bc_rec.a.ar_blockcount = len; | ||
81 | return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); | ||
82 | } | ||
83 | |||
84 | /* | ||
85 | * Lookup the first record less than or equal to [bno, len] | ||
86 | * in the btree given by cur. | ||
87 | */ | ||
88 | int /* error */ | ||
89 | xfs_alloc_lookup_le( | ||
90 | struct xfs_btree_cur *cur, /* btree cursor */ | ||
91 | xfs_agblock_t bno, /* starting block of extent */ | ||
92 | xfs_extlen_t len, /* length of extent */ | ||
93 | int *stat) /* success/failure */ | ||
94 | { | ||
95 | cur->bc_rec.a.ar_startblock = bno; | ||
96 | cur->bc_rec.a.ar_blockcount = len; | ||
97 | return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); | ||
98 | } | ||
99 | |||
100 | /* | ||
101 | * Update the record referred to by cur to the value given | ||
102 | * by [bno, len]. | ||
103 | * This either works (return 0) or gets an EFSCORRUPTED error. | ||
104 | */ | ||
105 | STATIC int /* error */ | ||
106 | xfs_alloc_update( | ||
107 | struct xfs_btree_cur *cur, /* btree cursor */ | ||
108 | xfs_agblock_t bno, /* starting block of extent */ | ||
109 | xfs_extlen_t len) /* length of extent */ | ||
110 | { | ||
111 | union xfs_btree_rec rec; | ||
112 | |||
113 | rec.alloc.ar_startblock = cpu_to_be32(bno); | ||
114 | rec.alloc.ar_blockcount = cpu_to_be32(len); | ||
115 | return xfs_btree_update(cur, &rec); | ||
116 | } | ||
117 | |||
118 | /* | ||
119 | * Get the data from the pointed-to record. | ||
120 | */ | ||
121 | int /* error */ | ||
122 | xfs_alloc_get_rec( | ||
123 | struct xfs_btree_cur *cur, /* btree cursor */ | ||
124 | xfs_agblock_t *bno, /* output: starting block of extent */ | ||
125 | xfs_extlen_t *len, /* output: length of extent */ | ||
126 | int *stat) /* output: success/failure */ | ||
127 | { | ||
128 | union xfs_btree_rec *rec; | ||
129 | int error; | ||
130 | |||
131 | error = xfs_btree_get_rec(cur, &rec, stat); | ||
132 | if (!error && *stat == 1) { | ||
133 | *bno = be32_to_cpu(rec->alloc.ar_startblock); | ||
134 | *len = be32_to_cpu(rec->alloc.ar_blockcount); | ||
135 | } | ||
136 | return error; | ||
137 | } | ||
138 | |||
139 | /* | ||
140 | * Compute aligned version of the found extent. | ||
141 | * Takes alignment and min length into account. | ||
142 | */ | ||
143 | STATIC void | ||
144 | xfs_alloc_compute_aligned( | ||
145 | xfs_alloc_arg_t *args, /* allocation argument structure */ | ||
146 | xfs_agblock_t foundbno, /* starting block in found extent */ | ||
147 | xfs_extlen_t foundlen, /* length in found extent */ | ||
148 | xfs_agblock_t *resbno, /* result block number */ | ||
149 | xfs_extlen_t *reslen) /* result length */ | ||
150 | { | ||
151 | xfs_agblock_t bno; | ||
152 | xfs_extlen_t len; | ||
153 | |||
154 | /* Trim busy sections out of found extent */ | ||
155 | xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len); | ||
156 | |||
157 | if (args->alignment > 1 && len >= args->minlen) { | ||
158 | xfs_agblock_t aligned_bno = roundup(bno, args->alignment); | ||
159 | xfs_extlen_t diff = aligned_bno - bno; | ||
160 | |||
161 | *resbno = aligned_bno; | ||
162 | *reslen = diff >= len ? 0 : len - diff; | ||
163 | } else { | ||
164 | *resbno = bno; | ||
165 | *reslen = len; | ||
166 | } | ||
167 | } | ||
168 | |||
169 | /* | ||
170 | * Compute best start block and diff for "near" allocations. | ||
171 | * freelen >= wantlen already checked by caller. | ||
172 | */ | ||
173 | STATIC xfs_extlen_t /* difference value (absolute) */ | ||
174 | xfs_alloc_compute_diff( | ||
175 | xfs_agblock_t wantbno, /* target starting block */ | ||
176 | xfs_extlen_t wantlen, /* target length */ | ||
177 | xfs_extlen_t alignment, /* target alignment */ | ||
178 | char userdata, /* are we allocating data? */ | ||
179 | xfs_agblock_t freebno, /* freespace's starting block */ | ||
180 | xfs_extlen_t freelen, /* freespace's length */ | ||
181 | xfs_agblock_t *newbnop) /* result: best start block from free */ | ||
182 | { | ||
183 | xfs_agblock_t freeend; /* end of freespace extent */ | ||
184 | xfs_agblock_t newbno1; /* return block number */ | ||
185 | xfs_agblock_t newbno2; /* other new block number */ | ||
186 | xfs_extlen_t newlen1=0; /* length with newbno1 */ | ||
187 | xfs_extlen_t newlen2=0; /* length with newbno2 */ | ||
188 | xfs_agblock_t wantend; /* end of target extent */ | ||
189 | |||
190 | ASSERT(freelen >= wantlen); | ||
191 | freeend = freebno + freelen; | ||
192 | wantend = wantbno + wantlen; | ||
193 | /* | ||
194 | * We want to allocate from the start of a free extent if it is past | ||
195 | * the desired block or if we are allocating user data and the free | ||
196 | * extent is before desired block. The second case is there to allow | ||
197 | * for contiguous allocation from the remaining free space if the file | ||
198 | * grows in the short term. | ||
199 | */ | ||
200 | if (freebno >= wantbno || (userdata && freeend < wantend)) { | ||
201 | if ((newbno1 = roundup(freebno, alignment)) >= freeend) | ||
202 | newbno1 = NULLAGBLOCK; | ||
203 | } else if (freeend >= wantend && alignment > 1) { | ||
204 | newbno1 = roundup(wantbno, alignment); | ||
205 | newbno2 = newbno1 - alignment; | ||
206 | if (newbno1 >= freeend) | ||
207 | newbno1 = NULLAGBLOCK; | ||
208 | else | ||
209 | newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1); | ||
210 | if (newbno2 < freebno) | ||
211 | newbno2 = NULLAGBLOCK; | ||
212 | else | ||
213 | newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2); | ||
214 | if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) { | ||
215 | if (newlen1 < newlen2 || | ||
216 | (newlen1 == newlen2 && | ||
217 | XFS_ABSDIFF(newbno1, wantbno) > | ||
218 | XFS_ABSDIFF(newbno2, wantbno))) | ||
219 | newbno1 = newbno2; | ||
220 | } else if (newbno2 != NULLAGBLOCK) | ||
221 | newbno1 = newbno2; | ||
222 | } else if (freeend >= wantend) { | ||
223 | newbno1 = wantbno; | ||
224 | } else if (alignment > 1) { | ||
225 | newbno1 = roundup(freeend - wantlen, alignment); | ||
226 | if (newbno1 > freeend - wantlen && | ||
227 | newbno1 - alignment >= freebno) | ||
228 | newbno1 -= alignment; | ||
229 | else if (newbno1 >= freeend) | ||
230 | newbno1 = NULLAGBLOCK; | ||
231 | } else | ||
232 | newbno1 = freeend - wantlen; | ||
233 | *newbnop = newbno1; | ||
234 | return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno); | ||
235 | } | ||
236 | |||
237 | /* | ||
238 | * Fix up the length, based on mod and prod. | ||
239 | * len should be k * prod + mod for some k. | ||
240 | * If len is too small it is returned unchanged. | ||
241 | * If len hits maxlen it is left alone. | ||
242 | */ | ||
243 | STATIC void | ||
244 | xfs_alloc_fix_len( | ||
245 | xfs_alloc_arg_t *args) /* allocation argument structure */ | ||
246 | { | ||
247 | xfs_extlen_t k; | ||
248 | xfs_extlen_t rlen; | ||
249 | |||
250 | ASSERT(args->mod < args->prod); | ||
251 | rlen = args->len; | ||
252 | ASSERT(rlen >= args->minlen); | ||
253 | ASSERT(rlen <= args->maxlen); | ||
254 | if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen || | ||
255 | (args->mod == 0 && rlen < args->prod)) | ||
256 | return; | ||
257 | k = rlen % args->prod; | ||
258 | if (k == args->mod) | ||
259 | return; | ||
260 | if (k > args->mod) | ||
261 | rlen = rlen - (k - args->mod); | ||
262 | else | ||
263 | rlen = rlen - args->prod + (args->mod - k); | ||
264 | if ((int)rlen < (int)args->minlen) | ||
265 | return; | ||
266 | ASSERT(rlen >= args->minlen && rlen <= args->maxlen); | ||
267 | ASSERT(rlen % args->prod == args->mod); | ||
268 | args->len = rlen; | ||
269 | } | ||
270 | |||
271 | /* | ||
272 | * Fix up length if there is too little space left in the a.g. | ||
273 | * Return 1 if ok, 0 if too little, should give up. | ||
274 | */ | ||
275 | STATIC int | ||
276 | xfs_alloc_fix_minleft( | ||
277 | xfs_alloc_arg_t *args) /* allocation argument structure */ | ||
278 | { | ||
279 | xfs_agf_t *agf; /* a.g. freelist header */ | ||
280 | int diff; /* free space difference */ | ||
281 | |||
282 | if (args->minleft == 0) | ||
283 | return 1; | ||
284 | agf = XFS_BUF_TO_AGF(args->agbp); | ||
285 | diff = be32_to_cpu(agf->agf_freeblks) | ||
286 | - args->len - args->minleft; | ||
287 | if (diff >= 0) | ||
288 | return 1; | ||
289 | args->len += diff; /* shrink the allocated space */ | ||
290 | if (args->len >= args->minlen) | ||
291 | return 1; | ||
292 | args->agbno = NULLAGBLOCK; | ||
293 | return 0; | ||
294 | } | ||
295 | |||
296 | /* | ||
297 | * Update the two btrees, logically removing from freespace the extent | ||
298 | * starting at rbno, rlen blocks. The extent is contained within the | ||
299 | * actual (current) free extent fbno for flen blocks. | ||
300 | * Flags are passed in indicating whether the cursors are set to the | ||
301 | * relevant records. | ||
302 | */ | ||
303 | STATIC int /* error code */ | ||
304 | xfs_alloc_fixup_trees( | ||
305 | xfs_btree_cur_t *cnt_cur, /* cursor for by-size btree */ | ||
306 | xfs_btree_cur_t *bno_cur, /* cursor for by-block btree */ | ||
307 | xfs_agblock_t fbno, /* starting block of free extent */ | ||
308 | xfs_extlen_t flen, /* length of free extent */ | ||
309 | xfs_agblock_t rbno, /* starting block of returned extent */ | ||
310 | xfs_extlen_t rlen, /* length of returned extent */ | ||
311 | int flags) /* flags, XFSA_FIXUP_... */ | ||
312 | { | ||
313 | int error; /* error code */ | ||
314 | int i; /* operation results */ | ||
315 | xfs_agblock_t nfbno1; /* first new free startblock */ | ||
316 | xfs_agblock_t nfbno2; /* second new free startblock */ | ||
317 | xfs_extlen_t nflen1=0; /* first new free length */ | ||
318 | xfs_extlen_t nflen2=0; /* second new free length */ | ||
319 | |||
320 | /* | ||
321 | * Look up the record in the by-size tree if necessary. | ||
322 | */ | ||
323 | if (flags & XFSA_FIXUP_CNT_OK) { | ||
324 | #ifdef DEBUG | ||
325 | if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i))) | ||
326 | return error; | ||
327 | XFS_WANT_CORRUPTED_RETURN( | ||
328 | i == 1 && nfbno1 == fbno && nflen1 == flen); | ||
329 | #endif | ||
330 | } else { | ||
331 | if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i))) | ||
332 | return error; | ||
333 | XFS_WANT_CORRUPTED_RETURN(i == 1); | ||
334 | } | ||
335 | /* | ||
336 | * Look up the record in the by-block tree if necessary. | ||
337 | */ | ||
338 | if (flags & XFSA_FIXUP_BNO_OK) { | ||
339 | #ifdef DEBUG | ||
340 | if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i))) | ||
341 | return error; | ||
342 | XFS_WANT_CORRUPTED_RETURN( | ||
343 | i == 1 && nfbno1 == fbno && nflen1 == flen); | ||
344 | #endif | ||
345 | } else { | ||
346 | if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i))) | ||
347 | return error; | ||
348 | XFS_WANT_CORRUPTED_RETURN(i == 1); | ||
349 | } | ||
350 | |||
351 | #ifdef DEBUG | ||
352 | if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) { | ||
353 | struct xfs_btree_block *bnoblock; | ||
354 | struct xfs_btree_block *cntblock; | ||
355 | |||
356 | bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]); | ||
357 | cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]); | ||
358 | |||
359 | XFS_WANT_CORRUPTED_RETURN( | ||
360 | bnoblock->bb_numrecs == cntblock->bb_numrecs); | ||
361 | } | ||
362 | #endif | ||
363 | |||
364 | /* | ||
365 | * Deal with all four cases: the allocated record is contained | ||
366 | * within the freespace record, so we can have new freespace | ||
367 | * at either (or both) end, or no freespace remaining. | ||
368 | */ | ||
369 | if (rbno == fbno && rlen == flen) | ||
370 | nfbno1 = nfbno2 = NULLAGBLOCK; | ||
371 | else if (rbno == fbno) { | ||
372 | nfbno1 = rbno + rlen; | ||
373 | nflen1 = flen - rlen; | ||
374 | nfbno2 = NULLAGBLOCK; | ||
375 | } else if (rbno + rlen == fbno + flen) { | ||
376 | nfbno1 = fbno; | ||
377 | nflen1 = flen - rlen; | ||
378 | nfbno2 = NULLAGBLOCK; | ||
379 | } else { | ||
380 | nfbno1 = fbno; | ||
381 | nflen1 = rbno - fbno; | ||
382 | nfbno2 = rbno + rlen; | ||
383 | nflen2 = (fbno + flen) - nfbno2; | ||
384 | } | ||
385 | /* | ||
386 | * Delete the entry from the by-size btree. | ||
387 | */ | ||
388 | if ((error = xfs_btree_delete(cnt_cur, &i))) | ||
389 | return error; | ||
390 | XFS_WANT_CORRUPTED_RETURN(i == 1); | ||
391 | /* | ||
392 | * Add new by-size btree entry(s). | ||
393 | */ | ||
394 | if (nfbno1 != NULLAGBLOCK) { | ||
395 | if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i))) | ||
396 | return error; | ||
397 | XFS_WANT_CORRUPTED_RETURN(i == 0); | ||
398 | if ((error = xfs_btree_insert(cnt_cur, &i))) | ||
399 | return error; | ||
400 | XFS_WANT_CORRUPTED_RETURN(i == 1); | ||
401 | } | ||
402 | if (nfbno2 != NULLAGBLOCK) { | ||
403 | if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i))) | ||
404 | return error; | ||
405 | XFS_WANT_CORRUPTED_RETURN(i == 0); | ||
406 | if ((error = xfs_btree_insert(cnt_cur, &i))) | ||
407 | return error; | ||
408 | XFS_WANT_CORRUPTED_RETURN(i == 1); | ||
409 | } | ||
410 | /* | ||
411 | * Fix up the by-block btree entry(s). | ||
412 | */ | ||
413 | if (nfbno1 == NULLAGBLOCK) { | ||
414 | /* | ||
415 | * No remaining freespace, just delete the by-block tree entry. | ||
416 | */ | ||
417 | if ((error = xfs_btree_delete(bno_cur, &i))) | ||
418 | return error; | ||
419 | XFS_WANT_CORRUPTED_RETURN(i == 1); | ||
420 | } else { | ||
421 | /* | ||
422 | * Update the by-block entry to start later|be shorter. | ||
423 | */ | ||
424 | if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1))) | ||
425 | return error; | ||
426 | } | ||
427 | if (nfbno2 != NULLAGBLOCK) { | ||
428 | /* | ||
429 | * 2 resulting free entries, need to add one. | ||
430 | */ | ||
431 | if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i))) | ||
432 | return error; | ||
433 | XFS_WANT_CORRUPTED_RETURN(i == 0); | ||
434 | if ((error = xfs_btree_insert(bno_cur, &i))) | ||
435 | return error; | ||
436 | XFS_WANT_CORRUPTED_RETURN(i == 1); | ||
437 | } | ||
438 | return 0; | ||
439 | } | ||
440 | |||
441 | static bool | ||
442 | xfs_agfl_verify( | ||
443 | struct xfs_buf *bp) | ||
444 | { | ||
445 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
446 | struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp); | ||
447 | int i; | ||
448 | |||
449 | if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_uuid)) | ||
450 | return false; | ||
451 | if (be32_to_cpu(agfl->agfl_magicnum) != XFS_AGFL_MAGIC) | ||
452 | return false; | ||
453 | /* | ||
454 | * during growfs operations, the perag is not fully initialised, | ||
455 | * so we can't use it for any useful checking. growfs ensures we can't | ||
456 | * use it by using uncached buffers that don't have the perag attached | ||
457 | * so we can detect and avoid this problem. | ||
458 | */ | ||
459 | if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno) | ||
460 | return false; | ||
461 | |||
462 | for (i = 0; i < XFS_AGFL_SIZE(mp); i++) { | ||
463 | if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK && | ||
464 | be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks) | ||
465 | return false; | ||
466 | } | ||
467 | return true; | ||
468 | } | ||
469 | |||
470 | static void | ||
471 | xfs_agfl_read_verify( | ||
472 | struct xfs_buf *bp) | ||
473 | { | ||
474 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
475 | |||
476 | /* | ||
477 | * There is no verification of non-crc AGFLs because mkfs does not | ||
478 | * initialise the AGFL to zero or NULL. Hence the only valid part of the | ||
479 | * AGFL is what the AGF says is active. We can't get to the AGF, so we | ||
480 | * can't verify just those entries are valid. | ||
481 | */ | ||
482 | if (!xfs_sb_version_hascrc(&mp->m_sb)) | ||
483 | return; | ||
484 | |||
485 | if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF)) | ||
486 | xfs_buf_ioerror(bp, -EFSBADCRC); | ||
487 | else if (!xfs_agfl_verify(bp)) | ||
488 | xfs_buf_ioerror(bp, -EFSCORRUPTED); | ||
489 | |||
490 | if (bp->b_error) | ||
491 | xfs_verifier_error(bp); | ||
492 | } | ||
493 | |||
494 | static void | ||
495 | xfs_agfl_write_verify( | ||
496 | struct xfs_buf *bp) | ||
497 | { | ||
498 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
499 | struct xfs_buf_log_item *bip = bp->b_fspriv; | ||
500 | |||
501 | /* no verification of non-crc AGFLs */ | ||
502 | if (!xfs_sb_version_hascrc(&mp->m_sb)) | ||
503 | return; | ||
504 | |||
505 | if (!xfs_agfl_verify(bp)) { | ||
506 | xfs_buf_ioerror(bp, -EFSCORRUPTED); | ||
507 | xfs_verifier_error(bp); | ||
508 | return; | ||
509 | } | ||
510 | |||
511 | if (bip) | ||
512 | XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn); | ||
513 | |||
514 | xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF); | ||
515 | } | ||
516 | |||
517 | const struct xfs_buf_ops xfs_agfl_buf_ops = { | ||
518 | .verify_read = xfs_agfl_read_verify, | ||
519 | .verify_write = xfs_agfl_write_verify, | ||
520 | }; | ||
521 | |||
522 | /* | ||
523 | * Read in the allocation group free block array. | ||
524 | */ | ||
525 | STATIC int /* error */ | ||
526 | xfs_alloc_read_agfl( | ||
527 | xfs_mount_t *mp, /* mount point structure */ | ||
528 | xfs_trans_t *tp, /* transaction pointer */ | ||
529 | xfs_agnumber_t agno, /* allocation group number */ | ||
530 | xfs_buf_t **bpp) /* buffer for the ag free block array */ | ||
531 | { | ||
532 | xfs_buf_t *bp; /* return value */ | ||
533 | int error; | ||
534 | |||
535 | ASSERT(agno != NULLAGNUMBER); | ||
536 | error = xfs_trans_read_buf( | ||
537 | mp, tp, mp->m_ddev_targp, | ||
538 | XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)), | ||
539 | XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops); | ||
540 | if (error) | ||
541 | return error; | ||
542 | xfs_buf_set_ref(bp, XFS_AGFL_REF); | ||
543 | *bpp = bp; | ||
544 | return 0; | ||
545 | } | ||
546 | |||
547 | STATIC int | ||
548 | xfs_alloc_update_counters( | ||
549 | struct xfs_trans *tp, | ||
550 | struct xfs_perag *pag, | ||
551 | struct xfs_buf *agbp, | ||
552 | long len) | ||
553 | { | ||
554 | struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); | ||
555 | |||
556 | pag->pagf_freeblks += len; | ||
557 | be32_add_cpu(&agf->agf_freeblks, len); | ||
558 | |||
559 | xfs_trans_agblocks_delta(tp, len); | ||
560 | if (unlikely(be32_to_cpu(agf->agf_freeblks) > | ||
561 | be32_to_cpu(agf->agf_length))) | ||
562 | return -EFSCORRUPTED; | ||
563 | |||
564 | xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS); | ||
565 | return 0; | ||
566 | } | ||
567 | |||
568 | /* | ||
569 | * Allocation group level functions. | ||
570 | */ | ||
571 | |||
572 | /* | ||
573 | * Allocate a variable extent in the allocation group agno. | ||
574 | * Type and bno are used to determine where in the allocation group the | ||
575 | * extent will start. | ||
576 | * Extent's length (returned in *len) will be between minlen and maxlen, | ||
577 | * and of the form k * prod + mod unless there's nothing that large. | ||
578 | * Return the starting a.g. block, or NULLAGBLOCK if we can't do it. | ||
579 | */ | ||
580 | STATIC int /* error */ | ||
581 | xfs_alloc_ag_vextent( | ||
582 | xfs_alloc_arg_t *args) /* argument structure for allocation */ | ||
583 | { | ||
584 | int error=0; | ||
585 | |||
586 | ASSERT(args->minlen > 0); | ||
587 | ASSERT(args->maxlen > 0); | ||
588 | ASSERT(args->minlen <= args->maxlen); | ||
589 | ASSERT(args->mod < args->prod); | ||
590 | ASSERT(args->alignment > 0); | ||
591 | /* | ||
592 | * Branch to correct routine based on the type. | ||
593 | */ | ||
594 | args->wasfromfl = 0; | ||
595 | switch (args->type) { | ||
596 | case XFS_ALLOCTYPE_THIS_AG: | ||
597 | error = xfs_alloc_ag_vextent_size(args); | ||
598 | break; | ||
599 | case XFS_ALLOCTYPE_NEAR_BNO: | ||
600 | error = xfs_alloc_ag_vextent_near(args); | ||
601 | break; | ||
602 | case XFS_ALLOCTYPE_THIS_BNO: | ||
603 | error = xfs_alloc_ag_vextent_exact(args); | ||
604 | break; | ||
605 | default: | ||
606 | ASSERT(0); | ||
607 | /* NOTREACHED */ | ||
608 | } | ||
609 | |||
610 | if (error || args->agbno == NULLAGBLOCK) | ||
611 | return error; | ||
612 | |||
613 | ASSERT(args->len >= args->minlen); | ||
614 | ASSERT(args->len <= args->maxlen); | ||
615 | ASSERT(!args->wasfromfl || !args->isfl); | ||
616 | ASSERT(args->agbno % args->alignment == 0); | ||
617 | |||
618 | if (!args->wasfromfl) { | ||
619 | error = xfs_alloc_update_counters(args->tp, args->pag, | ||
620 | args->agbp, | ||
621 | -((long)(args->len))); | ||
622 | if (error) | ||
623 | return error; | ||
624 | |||
625 | ASSERT(!xfs_extent_busy_search(args->mp, args->agno, | ||
626 | args->agbno, args->len)); | ||
627 | } | ||
628 | |||
629 | if (!args->isfl) { | ||
630 | xfs_trans_mod_sb(args->tp, args->wasdel ? | ||
631 | XFS_TRANS_SB_RES_FDBLOCKS : | ||
632 | XFS_TRANS_SB_FDBLOCKS, | ||
633 | -((long)(args->len))); | ||
634 | } | ||
635 | |||
636 | XFS_STATS_INC(xs_allocx); | ||
637 | XFS_STATS_ADD(xs_allocb, args->len); | ||
638 | return error; | ||
639 | } | ||
640 | |||
641 | /* | ||
642 | * Allocate a variable extent at exactly agno/bno. | ||
643 | * Extent's length (returned in *len) will be between minlen and maxlen, | ||
644 | * and of the form k * prod + mod unless there's nothing that large. | ||
645 | * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it. | ||
646 | */ | ||
647 | STATIC int /* error */ | ||
648 | xfs_alloc_ag_vextent_exact( | ||
649 | xfs_alloc_arg_t *args) /* allocation argument structure */ | ||
650 | { | ||
651 | xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */ | ||
652 | xfs_btree_cur_t *cnt_cur;/* by count btree cursor */ | ||
653 | int error; | ||
654 | xfs_agblock_t fbno; /* start block of found extent */ | ||
655 | xfs_extlen_t flen; /* length of found extent */ | ||
656 | xfs_agblock_t tbno; /* start block of trimmed extent */ | ||
657 | xfs_extlen_t tlen; /* length of trimmed extent */ | ||
658 | xfs_agblock_t tend; /* end block of trimmed extent */ | ||
659 | int i; /* success/failure of operation */ | ||
660 | |||
661 | ASSERT(args->alignment == 1); | ||
662 | |||
663 | /* | ||
664 | * Allocate/initialize a cursor for the by-number freespace btree. | ||
665 | */ | ||
666 | bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, | ||
667 | args->agno, XFS_BTNUM_BNO); | ||
668 | |||
669 | /* | ||
670 | * Lookup bno and minlen in the btree (minlen is irrelevant, really). | ||
671 | * Look for the closest free block <= bno, it must contain bno | ||
672 | * if any free block does. | ||
673 | */ | ||
674 | error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i); | ||
675 | if (error) | ||
676 | goto error0; | ||
677 | if (!i) | ||
678 | goto not_found; | ||
679 | |||
680 | /* | ||
681 | * Grab the freespace record. | ||
682 | */ | ||
683 | error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i); | ||
684 | if (error) | ||
685 | goto error0; | ||
686 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
687 | ASSERT(fbno <= args->agbno); | ||
688 | |||
689 | /* | ||
690 | * Check for overlapping busy extents. | ||
691 | */ | ||
692 | xfs_extent_busy_trim(args, fbno, flen, &tbno, &tlen); | ||
693 | |||
694 | /* | ||
695 | * Give up if the start of the extent is busy, or the freespace isn't | ||
696 | * long enough for the minimum request. | ||
697 | */ | ||
698 | if (tbno > args->agbno) | ||
699 | goto not_found; | ||
700 | if (tlen < args->minlen) | ||
701 | goto not_found; | ||
702 | tend = tbno + tlen; | ||
703 | if (tend < args->agbno + args->minlen) | ||
704 | goto not_found; | ||
705 | |||
706 | /* | ||
707 | * End of extent will be smaller of the freespace end and the | ||
708 | * maximal requested end. | ||
709 | * | ||
710 | * Fix the length according to mod and prod if given. | ||
711 | */ | ||
712 | args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen) | ||
713 | - args->agbno; | ||
714 | xfs_alloc_fix_len(args); | ||
715 | if (!xfs_alloc_fix_minleft(args)) | ||
716 | goto not_found; | ||
717 | |||
718 | ASSERT(args->agbno + args->len <= tend); | ||
719 | |||
720 | /* | ||
721 | * We are allocating agbno for args->len | ||
722 | * Allocate/initialize a cursor for the by-size btree. | ||
723 | */ | ||
724 | cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, | ||
725 | args->agno, XFS_BTNUM_CNT); | ||
726 | ASSERT(args->agbno + args->len <= | ||
727 | be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); | ||
728 | error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno, | ||
729 | args->len, XFSA_FIXUP_BNO_OK); | ||
730 | if (error) { | ||
731 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); | ||
732 | goto error0; | ||
733 | } | ||
734 | |||
735 | xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); | ||
736 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); | ||
737 | |||
738 | args->wasfromfl = 0; | ||
739 | trace_xfs_alloc_exact_done(args); | ||
740 | return 0; | ||
741 | |||
742 | not_found: | ||
743 | /* Didn't find it, return null. */ | ||
744 | xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); | ||
745 | args->agbno = NULLAGBLOCK; | ||
746 | trace_xfs_alloc_exact_notfound(args); | ||
747 | return 0; | ||
748 | |||
749 | error0: | ||
750 | xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); | ||
751 | trace_xfs_alloc_exact_error(args); | ||
752 | return error; | ||
753 | } | ||
754 | |||
755 | /* | ||
756 | * Search the btree in a given direction via the search cursor and compare | ||
757 | * the records found against the good extent we've already found. | ||
758 | */ | ||
759 | STATIC int | ||
760 | xfs_alloc_find_best_extent( | ||
761 | struct xfs_alloc_arg *args, /* allocation argument structure */ | ||
762 | struct xfs_btree_cur **gcur, /* good cursor */ | ||
763 | struct xfs_btree_cur **scur, /* searching cursor */ | ||
764 | xfs_agblock_t gdiff, /* difference for search comparison */ | ||
765 | xfs_agblock_t *sbno, /* extent found by search */ | ||
766 | xfs_extlen_t *slen, /* extent length */ | ||
767 | xfs_agblock_t *sbnoa, /* aligned extent found by search */ | ||
768 | xfs_extlen_t *slena, /* aligned extent length */ | ||
769 | int dir) /* 0 = search right, 1 = search left */ | ||
770 | { | ||
771 | xfs_agblock_t new; | ||
772 | xfs_agblock_t sdiff; | ||
773 | int error; | ||
774 | int i; | ||
775 | |||
776 | /* The good extent is perfect, no need to search. */ | ||
777 | if (!gdiff) | ||
778 | goto out_use_good; | ||
779 | |||
780 | /* | ||
781 | * Look until we find a better one, run out of space or run off the end. | ||
782 | */ | ||
783 | do { | ||
784 | error = xfs_alloc_get_rec(*scur, sbno, slen, &i); | ||
785 | if (error) | ||
786 | goto error0; | ||
787 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
788 | xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena); | ||
789 | |||
790 | /* | ||
791 | * The good extent is closer than this one. | ||
792 | */ | ||
793 | if (!dir) { | ||
794 | if (*sbnoa >= args->agbno + gdiff) | ||
795 | goto out_use_good; | ||
796 | } else { | ||
797 | if (*sbnoa <= args->agbno - gdiff) | ||
798 | goto out_use_good; | ||
799 | } | ||
800 | |||
801 | /* | ||
802 | * Same distance, compare length and pick the best. | ||
803 | */ | ||
804 | if (*slena >= args->minlen) { | ||
805 | args->len = XFS_EXTLEN_MIN(*slena, args->maxlen); | ||
806 | xfs_alloc_fix_len(args); | ||
807 | |||
808 | sdiff = xfs_alloc_compute_diff(args->agbno, args->len, | ||
809 | args->alignment, | ||
810 | args->userdata, *sbnoa, | ||
811 | *slena, &new); | ||
812 | |||
813 | /* | ||
814 | * Choose closer size and invalidate other cursor. | ||
815 | */ | ||
816 | if (sdiff < gdiff) | ||
817 | goto out_use_search; | ||
818 | goto out_use_good; | ||
819 | } | ||
820 | |||
821 | if (!dir) | ||
822 | error = xfs_btree_increment(*scur, 0, &i); | ||
823 | else | ||
824 | error = xfs_btree_decrement(*scur, 0, &i); | ||
825 | if (error) | ||
826 | goto error0; | ||
827 | } while (i); | ||
828 | |||
829 | out_use_good: | ||
830 | xfs_btree_del_cursor(*scur, XFS_BTREE_NOERROR); | ||
831 | *scur = NULL; | ||
832 | return 0; | ||
833 | |||
834 | out_use_search: | ||
835 | xfs_btree_del_cursor(*gcur, XFS_BTREE_NOERROR); | ||
836 | *gcur = NULL; | ||
837 | return 0; | ||
838 | |||
839 | error0: | ||
840 | /* caller invalidates cursors */ | ||
841 | return error; | ||
842 | } | ||
843 | |||
844 | /* | ||
845 | * Allocate a variable extent near bno in the allocation group agno. | ||
846 | * Extent's length (returned in len) will be between minlen and maxlen, | ||
847 | * and of the form k * prod + mod unless there's nothing that large. | ||
848 | * Return the starting a.g. block, or NULLAGBLOCK if we can't do it. | ||
849 | */ | ||
850 | STATIC int /* error */ | ||
851 | xfs_alloc_ag_vextent_near( | ||
852 | xfs_alloc_arg_t *args) /* allocation argument structure */ | ||
853 | { | ||
854 | xfs_btree_cur_t *bno_cur_gt; /* cursor for bno btree, right side */ | ||
855 | xfs_btree_cur_t *bno_cur_lt; /* cursor for bno btree, left side */ | ||
856 | xfs_btree_cur_t *cnt_cur; /* cursor for count btree */ | ||
857 | xfs_agblock_t gtbno; /* start bno of right side entry */ | ||
858 | xfs_agblock_t gtbnoa; /* aligned ... */ | ||
859 | xfs_extlen_t gtdiff; /* difference to right side entry */ | ||
860 | xfs_extlen_t gtlen; /* length of right side entry */ | ||
861 | xfs_extlen_t gtlena; /* aligned ... */ | ||
862 | xfs_agblock_t gtnew; /* useful start bno of right side */ | ||
863 | int error; /* error code */ | ||
864 | int i; /* result code, temporary */ | ||
865 | int j; /* result code, temporary */ | ||
866 | xfs_agblock_t ltbno; /* start bno of left side entry */ | ||
867 | xfs_agblock_t ltbnoa; /* aligned ... */ | ||
868 | xfs_extlen_t ltdiff; /* difference to left side entry */ | ||
869 | xfs_extlen_t ltlen; /* length of left side entry */ | ||
870 | xfs_extlen_t ltlena; /* aligned ... */ | ||
871 | xfs_agblock_t ltnew; /* useful start bno of left side */ | ||
872 | xfs_extlen_t rlen; /* length of returned extent */ | ||
873 | int forced = 0; | ||
874 | #ifdef DEBUG | ||
875 | /* | ||
876 | * Randomly don't execute the first algorithm. | ||
877 | */ | ||
878 | int dofirst; /* set to do first algorithm */ | ||
879 | |||
880 | dofirst = prandom_u32() & 1; | ||
881 | #endif | ||
882 | |||
883 | restart: | ||
884 | bno_cur_lt = NULL; | ||
885 | bno_cur_gt = NULL; | ||
886 | ltlen = 0; | ||
887 | gtlena = 0; | ||
888 | ltlena = 0; | ||
889 | |||
890 | /* | ||
891 | * Get a cursor for the by-size btree. | ||
892 | */ | ||
893 | cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, | ||
894 | args->agno, XFS_BTNUM_CNT); | ||
895 | |||
896 | /* | ||
897 | * See if there are any free extents as big as maxlen. | ||
898 | */ | ||
899 | if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen, &i))) | ||
900 | goto error0; | ||
901 | /* | ||
902 | * If none, then pick up the last entry in the tree unless the | ||
903 | * tree is empty. | ||
904 | */ | ||
905 | if (!i) { | ||
906 | if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, <bno, | ||
907 | <len, &i))) | ||
908 | goto error0; | ||
909 | if (i == 0 || ltlen == 0) { | ||
910 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); | ||
911 | trace_xfs_alloc_near_noentry(args); | ||
912 | return 0; | ||
913 | } | ||
914 | ASSERT(i == 1); | ||
915 | } | ||
916 | args->wasfromfl = 0; | ||
917 | |||
918 | /* | ||
919 | * First algorithm. | ||
920 | * If the requested extent is large wrt the freespaces available | ||
921 | * in this a.g., then the cursor will be pointing to a btree entry | ||
922 | * near the right edge of the tree. If it's in the last btree leaf | ||
923 | * block, then we just examine all the entries in that block | ||
924 | * that are big enough, and pick the best one. | ||
925 | * This is written as a while loop so we can break out of it, | ||
926 | * but we never loop back to the top. | ||
927 | */ | ||
928 | while (xfs_btree_islastblock(cnt_cur, 0)) { | ||
929 | xfs_extlen_t bdiff; | ||
930 | int besti=0; | ||
931 | xfs_extlen_t blen=0; | ||
932 | xfs_agblock_t bnew=0; | ||
933 | |||
934 | #ifdef DEBUG | ||
935 | if (dofirst) | ||
936 | break; | ||
937 | #endif | ||
938 | /* | ||
939 | * Start from the entry that lookup found, sequence through | ||
940 | * all larger free blocks. If we're actually pointing at a | ||
941 | * record smaller than maxlen, go to the start of this block, | ||
942 | * and skip all those smaller than minlen. | ||
943 | */ | ||
944 | if (ltlen || args->alignment > 1) { | ||
945 | cnt_cur->bc_ptrs[0] = 1; | ||
946 | do { | ||
947 | if ((error = xfs_alloc_get_rec(cnt_cur, <bno, | ||
948 | <len, &i))) | ||
949 | goto error0; | ||
950 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
951 | if (ltlen >= args->minlen) | ||
952 | break; | ||
953 | if ((error = xfs_btree_increment(cnt_cur, 0, &i))) | ||
954 | goto error0; | ||
955 | } while (i); | ||
956 | ASSERT(ltlen >= args->minlen); | ||
957 | if (!i) | ||
958 | break; | ||
959 | } | ||
960 | i = cnt_cur->bc_ptrs[0]; | ||
961 | for (j = 1, blen = 0, bdiff = 0; | ||
962 | !error && j && (blen < args->maxlen || bdiff > 0); | ||
963 | error = xfs_btree_increment(cnt_cur, 0, &j)) { | ||
964 | /* | ||
965 | * For each entry, decide if it's better than | ||
966 | * the previous best entry. | ||
967 | */ | ||
968 | if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) | ||
969 | goto error0; | ||
970 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
971 | xfs_alloc_compute_aligned(args, ltbno, ltlen, | ||
972 | <bnoa, <lena); | ||
973 | if (ltlena < args->minlen) | ||
974 | continue; | ||
975 | args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); | ||
976 | xfs_alloc_fix_len(args); | ||
977 | ASSERT(args->len >= args->minlen); | ||
978 | if (args->len < blen) | ||
979 | continue; | ||
980 | ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, | ||
981 | args->alignment, args->userdata, ltbnoa, | ||
982 | ltlena, <new); | ||
983 | if (ltnew != NULLAGBLOCK && | ||
984 | (args->len > blen || ltdiff < bdiff)) { | ||
985 | bdiff = ltdiff; | ||
986 | bnew = ltnew; | ||
987 | blen = args->len; | ||
988 | besti = cnt_cur->bc_ptrs[0]; | ||
989 | } | ||
990 | } | ||
991 | /* | ||
992 | * It didn't work. We COULD be in a case where | ||
993 | * there's a good record somewhere, so try again. | ||
994 | */ | ||
995 | if (blen == 0) | ||
996 | break; | ||
997 | /* | ||
998 | * Point at the best entry, and retrieve it again. | ||
999 | */ | ||
1000 | cnt_cur->bc_ptrs[0] = besti; | ||
1001 | if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) | ||
1002 | goto error0; | ||
1003 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
1004 | ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); | ||
1005 | args->len = blen; | ||
1006 | if (!xfs_alloc_fix_minleft(args)) { | ||
1007 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); | ||
1008 | trace_xfs_alloc_near_nominleft(args); | ||
1009 | return 0; | ||
1010 | } | ||
1011 | blen = args->len; | ||
1012 | /* | ||
1013 | * We are allocating starting at bnew for blen blocks. | ||
1014 | */ | ||
1015 | args->agbno = bnew; | ||
1016 | ASSERT(bnew >= ltbno); | ||
1017 | ASSERT(bnew + blen <= ltbno + ltlen); | ||
1018 | /* | ||
1019 | * Set up a cursor for the by-bno tree. | ||
1020 | */ | ||
1021 | bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, | ||
1022 | args->agbp, args->agno, XFS_BTNUM_BNO); | ||
1023 | /* | ||
1024 | * Fix up the btree entries. | ||
1025 | */ | ||
1026 | if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, | ||
1027 | ltlen, bnew, blen, XFSA_FIXUP_CNT_OK))) | ||
1028 | goto error0; | ||
1029 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); | ||
1030 | xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); | ||
1031 | |||
1032 | trace_xfs_alloc_near_first(args); | ||
1033 | return 0; | ||
1034 | } | ||
1035 | /* | ||
1036 | * Second algorithm. | ||
1037 | * Search in the by-bno tree to the left and to the right | ||
1038 | * simultaneously, until in each case we find a space big enough, | ||
1039 | * or run into the edge of the tree. When we run into the edge, | ||
1040 | * we deallocate that cursor. | ||
1041 | * If both searches succeed, we compare the two spaces and pick | ||
1042 | * the better one. | ||
1043 | * With alignment, it's possible for both to fail; the upper | ||
1044 | * level algorithm that picks allocation groups for allocations | ||
1045 | * is not supposed to do this. | ||
1046 | */ | ||
1047 | /* | ||
1048 | * Allocate and initialize the cursor for the leftward search. | ||
1049 | */ | ||
1050 | bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, | ||
1051 | args->agno, XFS_BTNUM_BNO); | ||
1052 | /* | ||
1053 | * Lookup <= bno to find the leftward search's starting point. | ||
1054 | */ | ||
1055 | if ((error = xfs_alloc_lookup_le(bno_cur_lt, args->agbno, args->maxlen, &i))) | ||
1056 | goto error0; | ||
1057 | if (!i) { | ||
1058 | /* | ||
1059 | * Didn't find anything; use this cursor for the rightward | ||
1060 | * search. | ||
1061 | */ | ||
1062 | bno_cur_gt = bno_cur_lt; | ||
1063 | bno_cur_lt = NULL; | ||
1064 | } | ||
1065 | /* | ||
1066 | * Found something. Duplicate the cursor for the rightward search. | ||
1067 | */ | ||
1068 | else if ((error = xfs_btree_dup_cursor(bno_cur_lt, &bno_cur_gt))) | ||
1069 | goto error0; | ||
1070 | /* | ||
1071 | * Increment the cursor, so we will point at the entry just right | ||
1072 | * of the leftward entry if any, or to the leftmost entry. | ||
1073 | */ | ||
1074 | if ((error = xfs_btree_increment(bno_cur_gt, 0, &i))) | ||
1075 | goto error0; | ||
1076 | if (!i) { | ||
1077 | /* | ||
1078 | * It failed, there are no rightward entries. | ||
1079 | */ | ||
1080 | xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_NOERROR); | ||
1081 | bno_cur_gt = NULL; | ||
1082 | } | ||
1083 | /* | ||
1084 | * Loop going left with the leftward cursor, right with the | ||
1085 | * rightward cursor, until either both directions give up or | ||
1086 | * we find an entry at least as big as minlen. | ||
1087 | */ | ||
1088 | do { | ||
1089 | if (bno_cur_lt) { | ||
1090 | if ((error = xfs_alloc_get_rec(bno_cur_lt, <bno, <len, &i))) | ||
1091 | goto error0; | ||
1092 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
1093 | xfs_alloc_compute_aligned(args, ltbno, ltlen, | ||
1094 | <bnoa, <lena); | ||
1095 | if (ltlena >= args->minlen) | ||
1096 | break; | ||
1097 | if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i))) | ||
1098 | goto error0; | ||
1099 | if (!i) { | ||
1100 | xfs_btree_del_cursor(bno_cur_lt, | ||
1101 | XFS_BTREE_NOERROR); | ||
1102 | bno_cur_lt = NULL; | ||
1103 | } | ||
1104 | } | ||
1105 | if (bno_cur_gt) { | ||
1106 | if ((error = xfs_alloc_get_rec(bno_cur_gt, >bno, >len, &i))) | ||
1107 | goto error0; | ||
1108 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
1109 | xfs_alloc_compute_aligned(args, gtbno, gtlen, | ||
1110 | >bnoa, >lena); | ||
1111 | if (gtlena >= args->minlen) | ||
1112 | break; | ||
1113 | if ((error = xfs_btree_increment(bno_cur_gt, 0, &i))) | ||
1114 | goto error0; | ||
1115 | if (!i) { | ||
1116 | xfs_btree_del_cursor(bno_cur_gt, | ||
1117 | XFS_BTREE_NOERROR); | ||
1118 | bno_cur_gt = NULL; | ||
1119 | } | ||
1120 | } | ||
1121 | } while (bno_cur_lt || bno_cur_gt); | ||
1122 | |||
1123 | /* | ||
1124 | * Got both cursors still active, need to find better entry. | ||
1125 | */ | ||
1126 | if (bno_cur_lt && bno_cur_gt) { | ||
1127 | if (ltlena >= args->minlen) { | ||
1128 | /* | ||
1129 | * Left side is good, look for a right side entry. | ||
1130 | */ | ||
1131 | args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); | ||
1132 | xfs_alloc_fix_len(args); | ||
1133 | ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, | ||
1134 | args->alignment, args->userdata, ltbnoa, | ||
1135 | ltlena, <new); | ||
1136 | |||
1137 | error = xfs_alloc_find_best_extent(args, | ||
1138 | &bno_cur_lt, &bno_cur_gt, | ||
1139 | ltdiff, >bno, >len, | ||
1140 | >bnoa, >lena, | ||
1141 | 0 /* search right */); | ||
1142 | } else { | ||
1143 | ASSERT(gtlena >= args->minlen); | ||
1144 | |||
1145 | /* | ||
1146 | * Right side is good, look for a left side entry. | ||
1147 | */ | ||
1148 | args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen); | ||
1149 | xfs_alloc_fix_len(args); | ||
1150 | gtdiff = xfs_alloc_compute_diff(args->agbno, args->len, | ||
1151 | args->alignment, args->userdata, gtbnoa, | ||
1152 | gtlena, >new); | ||
1153 | |||
1154 | error = xfs_alloc_find_best_extent(args, | ||
1155 | &bno_cur_gt, &bno_cur_lt, | ||
1156 | gtdiff, <bno, <len, | ||
1157 | <bnoa, <lena, | ||
1158 | 1 /* search left */); | ||
1159 | } | ||
1160 | |||
1161 | if (error) | ||
1162 | goto error0; | ||
1163 | } | ||
1164 | |||
1165 | /* | ||
1166 | * If we couldn't get anything, give up. | ||
1167 | */ | ||
1168 | if (bno_cur_lt == NULL && bno_cur_gt == NULL) { | ||
1169 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); | ||
1170 | |||
1171 | if (!forced++) { | ||
1172 | trace_xfs_alloc_near_busy(args); | ||
1173 | xfs_log_force(args->mp, XFS_LOG_SYNC); | ||
1174 | goto restart; | ||
1175 | } | ||
1176 | trace_xfs_alloc_size_neither(args); | ||
1177 | args->agbno = NULLAGBLOCK; | ||
1178 | return 0; | ||
1179 | } | ||
1180 | |||
1181 | /* | ||
1182 | * At this point we have selected a freespace entry, either to the | ||
1183 | * left or to the right. If it's on the right, copy all the | ||
1184 | * useful variables to the "left" set so we only have one | ||
1185 | * copy of this code. | ||
1186 | */ | ||
1187 | if (bno_cur_gt) { | ||
1188 | bno_cur_lt = bno_cur_gt; | ||
1189 | bno_cur_gt = NULL; | ||
1190 | ltbno = gtbno; | ||
1191 | ltbnoa = gtbnoa; | ||
1192 | ltlen = gtlen; | ||
1193 | ltlena = gtlena; | ||
1194 | j = 1; | ||
1195 | } else | ||
1196 | j = 0; | ||
1197 | |||
1198 | /* | ||
1199 | * Fix up the length and compute the useful address. | ||
1200 | */ | ||
1201 | args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); | ||
1202 | xfs_alloc_fix_len(args); | ||
1203 | if (!xfs_alloc_fix_minleft(args)) { | ||
1204 | trace_xfs_alloc_near_nominleft(args); | ||
1205 | xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); | ||
1206 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); | ||
1207 | return 0; | ||
1208 | } | ||
1209 | rlen = args->len; | ||
1210 | (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, | ||
1211 | args->userdata, ltbnoa, ltlena, <new); | ||
1212 | ASSERT(ltnew >= ltbno); | ||
1213 | ASSERT(ltnew + rlen <= ltbnoa + ltlena); | ||
1214 | ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); | ||
1215 | args->agbno = ltnew; | ||
1216 | |||
1217 | if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen, | ||
1218 | ltnew, rlen, XFSA_FIXUP_BNO_OK))) | ||
1219 | goto error0; | ||
1220 | |||
1221 | if (j) | ||
1222 | trace_xfs_alloc_near_greater(args); | ||
1223 | else | ||
1224 | trace_xfs_alloc_near_lesser(args); | ||
1225 | |||
1226 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); | ||
1227 | xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); | ||
1228 | return 0; | ||
1229 | |||
1230 | error0: | ||
1231 | trace_xfs_alloc_near_error(args); | ||
1232 | if (cnt_cur != NULL) | ||
1233 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); | ||
1234 | if (bno_cur_lt != NULL) | ||
1235 | xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_ERROR); | ||
1236 | if (bno_cur_gt != NULL) | ||
1237 | xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_ERROR); | ||
1238 | return error; | ||
1239 | } | ||
1240 | |||
1241 | /* | ||
1242 | * Allocate a variable extent anywhere in the allocation group agno. | ||
1243 | * Extent's length (returned in len) will be between minlen and maxlen, | ||
1244 | * and of the form k * prod + mod unless there's nothing that large. | ||
1245 | * Return the starting a.g. block, or NULLAGBLOCK if we can't do it. | ||
1246 | */ | ||
1247 | STATIC int /* error */ | ||
1248 | xfs_alloc_ag_vextent_size( | ||
1249 | xfs_alloc_arg_t *args) /* allocation argument structure */ | ||
1250 | { | ||
1251 | xfs_btree_cur_t *bno_cur; /* cursor for bno btree */ | ||
1252 | xfs_btree_cur_t *cnt_cur; /* cursor for cnt btree */ | ||
1253 | int error; /* error result */ | ||
1254 | xfs_agblock_t fbno; /* start of found freespace */ | ||
1255 | xfs_extlen_t flen; /* length of found freespace */ | ||
1256 | int i; /* temp status variable */ | ||
1257 | xfs_agblock_t rbno; /* returned block number */ | ||
1258 | xfs_extlen_t rlen; /* length of returned extent */ | ||
1259 | int forced = 0; | ||
1260 | |||
1261 | restart: | ||
1262 | /* | ||
1263 | * Allocate and initialize a cursor for the by-size btree. | ||
1264 | */ | ||
1265 | cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, | ||
1266 | args->agno, XFS_BTNUM_CNT); | ||
1267 | bno_cur = NULL; | ||
1268 | |||
1269 | /* | ||
1270 | * Look for an entry >= maxlen+alignment-1 blocks. | ||
1271 | */ | ||
1272 | if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, | ||
1273 | args->maxlen + args->alignment - 1, &i))) | ||
1274 | goto error0; | ||
1275 | |||
1276 | /* | ||
1277 | * If none or we have busy extents that we cannot allocate from, then | ||
1278 | * we have to settle for a smaller extent. In the case that there are | ||
1279 | * no large extents, this will return the last entry in the tree unless | ||
1280 | * the tree is empty. In the case that there are only busy large | ||
1281 | * extents, this will return the largest small extent unless there | ||
1282 | * are no smaller extents available. | ||
1283 | */ | ||
1284 | if (!i || forced > 1) { | ||
1285 | error = xfs_alloc_ag_vextent_small(args, cnt_cur, | ||
1286 | &fbno, &flen, &i); | ||
1287 | if (error) | ||
1288 | goto error0; | ||
1289 | if (i == 0 || flen == 0) { | ||
1290 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); | ||
1291 | trace_xfs_alloc_size_noentry(args); | ||
1292 | return 0; | ||
1293 | } | ||
1294 | ASSERT(i == 1); | ||
1295 | xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen); | ||
1296 | } else { | ||
1297 | /* | ||
1298 | * Search for a non-busy extent that is large enough. | ||
1299 | * If we are at low space, don't check, or if we fall of | ||
1300 | * the end of the btree, turn off the busy check and | ||
1301 | * restart. | ||
1302 | */ | ||
1303 | for (;;) { | ||
1304 | error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i); | ||
1305 | if (error) | ||
1306 | goto error0; | ||
1307 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
1308 | |||
1309 | xfs_alloc_compute_aligned(args, fbno, flen, | ||
1310 | &rbno, &rlen); | ||
1311 | |||
1312 | if (rlen >= args->maxlen) | ||
1313 | break; | ||
1314 | |||
1315 | error = xfs_btree_increment(cnt_cur, 0, &i); | ||
1316 | if (error) | ||
1317 | goto error0; | ||
1318 | if (i == 0) { | ||
1319 | /* | ||
1320 | * Our only valid extents must have been busy. | ||
1321 | * Make it unbusy by forcing the log out and | ||
1322 | * retrying. If we've been here before, forcing | ||
1323 | * the log isn't making the extents available, | ||
1324 | * which means they have probably been freed in | ||
1325 | * this transaction. In that case, we have to | ||
1326 | * give up on them and we'll attempt a minlen | ||
1327 | * allocation the next time around. | ||
1328 | */ | ||
1329 | xfs_btree_del_cursor(cnt_cur, | ||
1330 | XFS_BTREE_NOERROR); | ||
1331 | trace_xfs_alloc_size_busy(args); | ||
1332 | if (!forced++) | ||
1333 | xfs_log_force(args->mp, XFS_LOG_SYNC); | ||
1334 | goto restart; | ||
1335 | } | ||
1336 | } | ||
1337 | } | ||
1338 | |||
1339 | /* | ||
1340 | * In the first case above, we got the last entry in the | ||
1341 | * by-size btree. Now we check to see if the space hits maxlen | ||
1342 | * once aligned; if not, we search left for something better. | ||
1343 | * This can't happen in the second case above. | ||
1344 | */ | ||
1345 | rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); | ||
1346 | XFS_WANT_CORRUPTED_GOTO(rlen == 0 || | ||
1347 | (rlen <= flen && rbno + rlen <= fbno + flen), error0); | ||
1348 | if (rlen < args->maxlen) { | ||
1349 | xfs_agblock_t bestfbno; | ||
1350 | xfs_extlen_t bestflen; | ||
1351 | xfs_agblock_t bestrbno; | ||
1352 | xfs_extlen_t bestrlen; | ||
1353 | |||
1354 | bestrlen = rlen; | ||
1355 | bestrbno = rbno; | ||
1356 | bestflen = flen; | ||
1357 | bestfbno = fbno; | ||
1358 | for (;;) { | ||
1359 | if ((error = xfs_btree_decrement(cnt_cur, 0, &i))) | ||
1360 | goto error0; | ||
1361 | if (i == 0) | ||
1362 | break; | ||
1363 | if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, | ||
1364 | &i))) | ||
1365 | goto error0; | ||
1366 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
1367 | if (flen < bestrlen) | ||
1368 | break; | ||
1369 | xfs_alloc_compute_aligned(args, fbno, flen, | ||
1370 | &rbno, &rlen); | ||
1371 | rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); | ||
1372 | XFS_WANT_CORRUPTED_GOTO(rlen == 0 || | ||
1373 | (rlen <= flen && rbno + rlen <= fbno + flen), | ||
1374 | error0); | ||
1375 | if (rlen > bestrlen) { | ||
1376 | bestrlen = rlen; | ||
1377 | bestrbno = rbno; | ||
1378 | bestflen = flen; | ||
1379 | bestfbno = fbno; | ||
1380 | if (rlen == args->maxlen) | ||
1381 | break; | ||
1382 | } | ||
1383 | } | ||
1384 | if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen, | ||
1385 | &i))) | ||
1386 | goto error0; | ||
1387 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
1388 | rlen = bestrlen; | ||
1389 | rbno = bestrbno; | ||
1390 | flen = bestflen; | ||
1391 | fbno = bestfbno; | ||
1392 | } | ||
1393 | args->wasfromfl = 0; | ||
1394 | /* | ||
1395 | * Fix up the length. | ||
1396 | */ | ||
1397 | args->len = rlen; | ||
1398 | if (rlen < args->minlen) { | ||
1399 | if (!forced++) { | ||
1400 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); | ||
1401 | trace_xfs_alloc_size_busy(args); | ||
1402 | xfs_log_force(args->mp, XFS_LOG_SYNC); | ||
1403 | goto restart; | ||
1404 | } | ||
1405 | goto out_nominleft; | ||
1406 | } | ||
1407 | xfs_alloc_fix_len(args); | ||
1408 | |||
1409 | if (!xfs_alloc_fix_minleft(args)) | ||
1410 | goto out_nominleft; | ||
1411 | rlen = args->len; | ||
1412 | XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0); | ||
1413 | /* | ||
1414 | * Allocate and initialize a cursor for the by-block tree. | ||
1415 | */ | ||
1416 | bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, | ||
1417 | args->agno, XFS_BTNUM_BNO); | ||
1418 | if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, | ||
1419 | rbno, rlen, XFSA_FIXUP_CNT_OK))) | ||
1420 | goto error0; | ||
1421 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); | ||
1422 | xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); | ||
1423 | cnt_cur = bno_cur = NULL; | ||
1424 | args->len = rlen; | ||
1425 | args->agbno = rbno; | ||
1426 | XFS_WANT_CORRUPTED_GOTO( | ||
1427 | args->agbno + args->len <= | ||
1428 | be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length), | ||
1429 | error0); | ||
1430 | trace_xfs_alloc_size_done(args); | ||
1431 | return 0; | ||
1432 | |||
1433 | error0: | ||
1434 | trace_xfs_alloc_size_error(args); | ||
1435 | if (cnt_cur) | ||
1436 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); | ||
1437 | if (bno_cur) | ||
1438 | xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); | ||
1439 | return error; | ||
1440 | |||
1441 | out_nominleft: | ||
1442 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); | ||
1443 | trace_xfs_alloc_size_nominleft(args); | ||
1444 | args->agbno = NULLAGBLOCK; | ||
1445 | return 0; | ||
1446 | } | ||
1447 | |||
1448 | /* | ||
1449 | * Deal with the case where only small freespaces remain. | ||
1450 | * Either return the contents of the last freespace record, | ||
1451 | * or allocate space from the freelist if there is nothing in the tree. | ||
1452 | */ | ||
1453 | STATIC int /* error */ | ||
1454 | xfs_alloc_ag_vextent_small( | ||
1455 | xfs_alloc_arg_t *args, /* allocation argument structure */ | ||
1456 | xfs_btree_cur_t *ccur, /* by-size cursor */ | ||
1457 | xfs_agblock_t *fbnop, /* result block number */ | ||
1458 | xfs_extlen_t *flenp, /* result length */ | ||
1459 | int *stat) /* status: 0-freelist, 1-normal/none */ | ||
1460 | { | ||
1461 | int error; | ||
1462 | xfs_agblock_t fbno; | ||
1463 | xfs_extlen_t flen; | ||
1464 | int i; | ||
1465 | |||
1466 | if ((error = xfs_btree_decrement(ccur, 0, &i))) | ||
1467 | goto error0; | ||
1468 | if (i) { | ||
1469 | if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i))) | ||
1470 | goto error0; | ||
1471 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
1472 | } | ||
1473 | /* | ||
1474 | * Nothing in the btree, try the freelist. Make sure | ||
1475 | * to respect minleft even when pulling from the | ||
1476 | * freelist. | ||
1477 | */ | ||
1478 | else if (args->minlen == 1 && args->alignment == 1 && !args->isfl && | ||
1479 | (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount) | ||
1480 | > args->minleft)) { | ||
1481 | error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0); | ||
1482 | if (error) | ||
1483 | goto error0; | ||
1484 | if (fbno != NULLAGBLOCK) { | ||
1485 | xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1, | ||
1486 | args->userdata); | ||
1487 | |||
1488 | if (args->userdata) { | ||
1489 | xfs_buf_t *bp; | ||
1490 | |||
1491 | bp = xfs_btree_get_bufs(args->mp, args->tp, | ||
1492 | args->agno, fbno, 0); | ||
1493 | xfs_trans_binval(args->tp, bp); | ||
1494 | } | ||
1495 | args->len = 1; | ||
1496 | args->agbno = fbno; | ||
1497 | XFS_WANT_CORRUPTED_GOTO( | ||
1498 | args->agbno + args->len <= | ||
1499 | be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length), | ||
1500 | error0); | ||
1501 | args->wasfromfl = 1; | ||
1502 | trace_xfs_alloc_small_freelist(args); | ||
1503 | *stat = 0; | ||
1504 | return 0; | ||
1505 | } | ||
1506 | /* | ||
1507 | * Nothing in the freelist. | ||
1508 | */ | ||
1509 | else | ||
1510 | flen = 0; | ||
1511 | } | ||
1512 | /* | ||
1513 | * Can't allocate from the freelist for some reason. | ||
1514 | */ | ||
1515 | else { | ||
1516 | fbno = NULLAGBLOCK; | ||
1517 | flen = 0; | ||
1518 | } | ||
1519 | /* | ||
1520 | * Can't do the allocation, give up. | ||
1521 | */ | ||
1522 | if (flen < args->minlen) { | ||
1523 | args->agbno = NULLAGBLOCK; | ||
1524 | trace_xfs_alloc_small_notenough(args); | ||
1525 | flen = 0; | ||
1526 | } | ||
1527 | *fbnop = fbno; | ||
1528 | *flenp = flen; | ||
1529 | *stat = 1; | ||
1530 | trace_xfs_alloc_small_done(args); | ||
1531 | return 0; | ||
1532 | |||
1533 | error0: | ||
1534 | trace_xfs_alloc_small_error(args); | ||
1535 | return error; | ||
1536 | } | ||
1537 | |||
1538 | /* | ||
1539 | * Free the extent starting at agno/bno for length. | ||
1540 | */ | ||
1541 | STATIC int /* error */ | ||
1542 | xfs_free_ag_extent( | ||
1543 | xfs_trans_t *tp, /* transaction pointer */ | ||
1544 | xfs_buf_t *agbp, /* buffer for a.g. freelist header */ | ||
1545 | xfs_agnumber_t agno, /* allocation group number */ | ||
1546 | xfs_agblock_t bno, /* starting block number */ | ||
1547 | xfs_extlen_t len, /* length of extent */ | ||
1548 | int isfl) /* set if is freelist blocks - no sb acctg */ | ||
1549 | { | ||
1550 | xfs_btree_cur_t *bno_cur; /* cursor for by-block btree */ | ||
1551 | xfs_btree_cur_t *cnt_cur; /* cursor for by-size btree */ | ||
1552 | int error; /* error return value */ | ||
1553 | xfs_agblock_t gtbno; /* start of right neighbor block */ | ||
1554 | xfs_extlen_t gtlen; /* length of right neighbor block */ | ||
1555 | int haveleft; /* have a left neighbor block */ | ||
1556 | int haveright; /* have a right neighbor block */ | ||
1557 | int i; /* temp, result code */ | ||
1558 | xfs_agblock_t ltbno; /* start of left neighbor block */ | ||
1559 | xfs_extlen_t ltlen; /* length of left neighbor block */ | ||
1560 | xfs_mount_t *mp; /* mount point struct for filesystem */ | ||
1561 | xfs_agblock_t nbno; /* new starting block of freespace */ | ||
1562 | xfs_extlen_t nlen; /* new length of freespace */ | ||
1563 | xfs_perag_t *pag; /* per allocation group data */ | ||
1564 | |||
1565 | mp = tp->t_mountp; | ||
1566 | /* | ||
1567 | * Allocate and initialize a cursor for the by-block btree. | ||
1568 | */ | ||
1569 | bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO); | ||
1570 | cnt_cur = NULL; | ||
1571 | /* | ||
1572 | * Look for a neighboring block on the left (lower block numbers) | ||
1573 | * that is contiguous with this space. | ||
1574 | */ | ||
1575 | if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft))) | ||
1576 | goto error0; | ||
1577 | if (haveleft) { | ||
1578 | /* | ||
1579 | * There is a block to our left. | ||
1580 | */ | ||
1581 | if ((error = xfs_alloc_get_rec(bno_cur, <bno, <len, &i))) | ||
1582 | goto error0; | ||
1583 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
1584 | /* | ||
1585 | * It's not contiguous, though. | ||
1586 | */ | ||
1587 | if (ltbno + ltlen < bno) | ||
1588 | haveleft = 0; | ||
1589 | else { | ||
1590 | /* | ||
1591 | * If this failure happens the request to free this | ||
1592 | * space was invalid, it's (partly) already free. | ||
1593 | * Very bad. | ||
1594 | */ | ||
1595 | XFS_WANT_CORRUPTED_GOTO(ltbno + ltlen <= bno, error0); | ||
1596 | } | ||
1597 | } | ||
1598 | /* | ||
1599 | * Look for a neighboring block on the right (higher block numbers) | ||
1600 | * that is contiguous with this space. | ||
1601 | */ | ||
1602 | if ((error = xfs_btree_increment(bno_cur, 0, &haveright))) | ||
1603 | goto error0; | ||
1604 | if (haveright) { | ||
1605 | /* | ||
1606 | * There is a block to our right. | ||
1607 | */ | ||
1608 | if ((error = xfs_alloc_get_rec(bno_cur, >bno, >len, &i))) | ||
1609 | goto error0; | ||
1610 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
1611 | /* | ||
1612 | * It's not contiguous, though. | ||
1613 | */ | ||
1614 | if (bno + len < gtbno) | ||
1615 | haveright = 0; | ||
1616 | else { | ||
1617 | /* | ||
1618 | * If this failure happens the request to free this | ||
1619 | * space was invalid, it's (partly) already free. | ||
1620 | * Very bad. | ||
1621 | */ | ||
1622 | XFS_WANT_CORRUPTED_GOTO(gtbno >= bno + len, error0); | ||
1623 | } | ||
1624 | } | ||
1625 | /* | ||
1626 | * Now allocate and initialize a cursor for the by-size tree. | ||
1627 | */ | ||
1628 | cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT); | ||
1629 | /* | ||
1630 | * Have both left and right contiguous neighbors. | ||
1631 | * Merge all three into a single free block. | ||
1632 | */ | ||
1633 | if (haveleft && haveright) { | ||
1634 | /* | ||
1635 | * Delete the old by-size entry on the left. | ||
1636 | */ | ||
1637 | if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) | ||
1638 | goto error0; | ||
1639 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
1640 | if ((error = xfs_btree_delete(cnt_cur, &i))) | ||
1641 | goto error0; | ||
1642 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
1643 | /* | ||
1644 | * Delete the old by-size entry on the right. | ||
1645 | */ | ||
1646 | if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) | ||
1647 | goto error0; | ||
1648 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
1649 | if ((error = xfs_btree_delete(cnt_cur, &i))) | ||
1650 | goto error0; | ||
1651 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
1652 | /* | ||
1653 | * Delete the old by-block entry for the right block. | ||
1654 | */ | ||
1655 | if ((error = xfs_btree_delete(bno_cur, &i))) | ||
1656 | goto error0; | ||
1657 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
1658 | /* | ||
1659 | * Move the by-block cursor back to the left neighbor. | ||
1660 | */ | ||
1661 | if ((error = xfs_btree_decrement(bno_cur, 0, &i))) | ||
1662 | goto error0; | ||
1663 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
1664 | #ifdef DEBUG | ||
1665 | /* | ||
1666 | * Check that this is the right record: delete didn't | ||
1667 | * mangle the cursor. | ||
1668 | */ | ||
1669 | { | ||
1670 | xfs_agblock_t xxbno; | ||
1671 | xfs_extlen_t xxlen; | ||
1672 | |||
1673 | if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen, | ||
1674 | &i))) | ||
1675 | goto error0; | ||
1676 | XFS_WANT_CORRUPTED_GOTO( | ||
1677 | i == 1 && xxbno == ltbno && xxlen == ltlen, | ||
1678 | error0); | ||
1679 | } | ||
1680 | #endif | ||
1681 | /* | ||
1682 | * Update remaining by-block entry to the new, joined block. | ||
1683 | */ | ||
1684 | nbno = ltbno; | ||
1685 | nlen = len + ltlen + gtlen; | ||
1686 | if ((error = xfs_alloc_update(bno_cur, nbno, nlen))) | ||
1687 | goto error0; | ||
1688 | } | ||
1689 | /* | ||
1690 | * Have only a left contiguous neighbor. | ||
1691 | * Merge it together with the new freespace. | ||
1692 | */ | ||
1693 | else if (haveleft) { | ||
1694 | /* | ||
1695 | * Delete the old by-size entry on the left. | ||
1696 | */ | ||
1697 | if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) | ||
1698 | goto error0; | ||
1699 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
1700 | if ((error = xfs_btree_delete(cnt_cur, &i))) | ||
1701 | goto error0; | ||
1702 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
1703 | /* | ||
1704 | * Back up the by-block cursor to the left neighbor, and | ||
1705 | * update its length. | ||
1706 | */ | ||
1707 | if ((error = xfs_btree_decrement(bno_cur, 0, &i))) | ||
1708 | goto error0; | ||
1709 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
1710 | nbno = ltbno; | ||
1711 | nlen = len + ltlen; | ||
1712 | if ((error = xfs_alloc_update(bno_cur, nbno, nlen))) | ||
1713 | goto error0; | ||
1714 | } | ||
1715 | /* | ||
1716 | * Have only a right contiguous neighbor. | ||
1717 | * Merge it together with the new freespace. | ||
1718 | */ | ||
1719 | else if (haveright) { | ||
1720 | /* | ||
1721 | * Delete the old by-size entry on the right. | ||
1722 | */ | ||
1723 | if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) | ||
1724 | goto error0; | ||
1725 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
1726 | if ((error = xfs_btree_delete(cnt_cur, &i))) | ||
1727 | goto error0; | ||
1728 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
1729 | /* | ||
1730 | * Update the starting block and length of the right | ||
1731 | * neighbor in the by-block tree. | ||
1732 | */ | ||
1733 | nbno = bno; | ||
1734 | nlen = len + gtlen; | ||
1735 | if ((error = xfs_alloc_update(bno_cur, nbno, nlen))) | ||
1736 | goto error0; | ||
1737 | } | ||
1738 | /* | ||
1739 | * No contiguous neighbors. | ||
1740 | * Insert the new freespace into the by-block tree. | ||
1741 | */ | ||
1742 | else { | ||
1743 | nbno = bno; | ||
1744 | nlen = len; | ||
1745 | if ((error = xfs_btree_insert(bno_cur, &i))) | ||
1746 | goto error0; | ||
1747 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
1748 | } | ||
1749 | xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); | ||
1750 | bno_cur = NULL; | ||
1751 | /* | ||
1752 | * In all cases we need to insert the new freespace in the by-size tree. | ||
1753 | */ | ||
1754 | if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i))) | ||
1755 | goto error0; | ||
1756 | XFS_WANT_CORRUPTED_GOTO(i == 0, error0); | ||
1757 | if ((error = xfs_btree_insert(cnt_cur, &i))) | ||
1758 | goto error0; | ||
1759 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
1760 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); | ||
1761 | cnt_cur = NULL; | ||
1762 | |||
1763 | /* | ||
1764 | * Update the freespace totals in the ag and superblock. | ||
1765 | */ | ||
1766 | pag = xfs_perag_get(mp, agno); | ||
1767 | error = xfs_alloc_update_counters(tp, pag, agbp, len); | ||
1768 | xfs_perag_put(pag); | ||
1769 | if (error) | ||
1770 | goto error0; | ||
1771 | |||
1772 | if (!isfl) | ||
1773 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len); | ||
1774 | XFS_STATS_INC(xs_freex); | ||
1775 | XFS_STATS_ADD(xs_freeb, len); | ||
1776 | |||
1777 | trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright); | ||
1778 | |||
1779 | return 0; | ||
1780 | |||
1781 | error0: | ||
1782 | trace_xfs_free_extent(mp, agno, bno, len, isfl, -1, -1); | ||
1783 | if (bno_cur) | ||
1784 | xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); | ||
1785 | if (cnt_cur) | ||
1786 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); | ||
1787 | return error; | ||
1788 | } | ||
1789 | |||
1790 | /* | ||
1791 | * Visible (exported) allocation/free functions. | ||
1792 | * Some of these are used just by xfs_alloc_btree.c and this file. | ||
1793 | */ | ||
1794 | |||
1795 | /* | ||
1796 | * Compute and fill in value of m_ag_maxlevels. | ||
1797 | */ | ||
1798 | void | ||
1799 | xfs_alloc_compute_maxlevels( | ||
1800 | xfs_mount_t *mp) /* file system mount structure */ | ||
1801 | { | ||
1802 | int level; | ||
1803 | uint maxblocks; | ||
1804 | uint maxleafents; | ||
1805 | int minleafrecs; | ||
1806 | int minnoderecs; | ||
1807 | |||
1808 | maxleafents = (mp->m_sb.sb_agblocks + 1) / 2; | ||
1809 | minleafrecs = mp->m_alloc_mnr[0]; | ||
1810 | minnoderecs = mp->m_alloc_mnr[1]; | ||
1811 | maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs; | ||
1812 | for (level = 1; maxblocks > 1; level++) | ||
1813 | maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs; | ||
1814 | mp->m_ag_maxlevels = level; | ||
1815 | } | ||
1816 | |||
1817 | /* | ||
1818 | * Find the length of the longest extent in an AG. | ||
1819 | */ | ||
1820 | xfs_extlen_t | ||
1821 | xfs_alloc_longest_free_extent( | ||
1822 | struct xfs_mount *mp, | ||
1823 | struct xfs_perag *pag) | ||
1824 | { | ||
1825 | xfs_extlen_t need, delta = 0; | ||
1826 | |||
1827 | need = XFS_MIN_FREELIST_PAG(pag, mp); | ||
1828 | if (need > pag->pagf_flcount) | ||
1829 | delta = need - pag->pagf_flcount; | ||
1830 | |||
1831 | if (pag->pagf_longest > delta) | ||
1832 | return pag->pagf_longest - delta; | ||
1833 | return pag->pagf_flcount > 0 || pag->pagf_longest > 0; | ||
1834 | } | ||
1835 | |||
1836 | /* | ||
1837 | * Decide whether to use this allocation group for this allocation. | ||
1838 | * If so, fix up the btree freelist's size. | ||
1839 | */ | ||
1840 | STATIC int /* error */ | ||
1841 | xfs_alloc_fix_freelist( | ||
1842 | xfs_alloc_arg_t *args, /* allocation argument structure */ | ||
1843 | int flags) /* XFS_ALLOC_FLAG_... */ | ||
1844 | { | ||
1845 | xfs_buf_t *agbp; /* agf buffer pointer */ | ||
1846 | xfs_agf_t *agf; /* a.g. freespace structure pointer */ | ||
1847 | xfs_buf_t *agflbp;/* agfl buffer pointer */ | ||
1848 | xfs_agblock_t bno; /* freelist block */ | ||
1849 | xfs_extlen_t delta; /* new blocks needed in freelist */ | ||
1850 | int error; /* error result code */ | ||
1851 | xfs_extlen_t longest;/* longest extent in allocation group */ | ||
1852 | xfs_mount_t *mp; /* file system mount point structure */ | ||
1853 | xfs_extlen_t need; /* total blocks needed in freelist */ | ||
1854 | xfs_perag_t *pag; /* per-ag information structure */ | ||
1855 | xfs_alloc_arg_t targs; /* local allocation arguments */ | ||
1856 | xfs_trans_t *tp; /* transaction pointer */ | ||
1857 | |||
1858 | mp = args->mp; | ||
1859 | |||
1860 | pag = args->pag; | ||
1861 | tp = args->tp; | ||
1862 | if (!pag->pagf_init) { | ||
1863 | if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags, | ||
1864 | &agbp))) | ||
1865 | return error; | ||
1866 | if (!pag->pagf_init) { | ||
1867 | ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK); | ||
1868 | ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); | ||
1869 | args->agbp = NULL; | ||
1870 | return 0; | ||
1871 | } | ||
1872 | } else | ||
1873 | agbp = NULL; | ||
1874 | |||
1875 | /* | ||
1876 | * If this is a metadata preferred pag and we are user data | ||
1877 | * then try somewhere else if we are not being asked to | ||
1878 | * try harder at this point | ||
1879 | */ | ||
1880 | if (pag->pagf_metadata && args->userdata && | ||
1881 | (flags & XFS_ALLOC_FLAG_TRYLOCK)) { | ||
1882 | ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); | ||
1883 | args->agbp = NULL; | ||
1884 | return 0; | ||
1885 | } | ||
1886 | |||
1887 | if (!(flags & XFS_ALLOC_FLAG_FREEING)) { | ||
1888 | /* | ||
1889 | * If it looks like there isn't a long enough extent, or enough | ||
1890 | * total blocks, reject it. | ||
1891 | */ | ||
1892 | need = XFS_MIN_FREELIST_PAG(pag, mp); | ||
1893 | longest = xfs_alloc_longest_free_extent(mp, pag); | ||
1894 | if ((args->minlen + args->alignment + args->minalignslop - 1) > | ||
1895 | longest || | ||
1896 | ((int)(pag->pagf_freeblks + pag->pagf_flcount - | ||
1897 | need - args->total) < (int)args->minleft)) { | ||
1898 | if (agbp) | ||
1899 | xfs_trans_brelse(tp, agbp); | ||
1900 | args->agbp = NULL; | ||
1901 | return 0; | ||
1902 | } | ||
1903 | } | ||
1904 | |||
1905 | /* | ||
1906 | * Get the a.g. freespace buffer. | ||
1907 | * Can fail if we're not blocking on locks, and it's held. | ||
1908 | */ | ||
1909 | if (agbp == NULL) { | ||
1910 | if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags, | ||
1911 | &agbp))) | ||
1912 | return error; | ||
1913 | if (agbp == NULL) { | ||
1914 | ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK); | ||
1915 | ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); | ||
1916 | args->agbp = NULL; | ||
1917 | return 0; | ||
1918 | } | ||
1919 | } | ||
1920 | /* | ||
1921 | * Figure out how many blocks we should have in the freelist. | ||
1922 | */ | ||
1923 | agf = XFS_BUF_TO_AGF(agbp); | ||
1924 | need = XFS_MIN_FREELIST(agf, mp); | ||
1925 | /* | ||
1926 | * If there isn't enough total or single-extent, reject it. | ||
1927 | */ | ||
1928 | if (!(flags & XFS_ALLOC_FLAG_FREEING)) { | ||
1929 | delta = need > be32_to_cpu(agf->agf_flcount) ? | ||
1930 | (need - be32_to_cpu(agf->agf_flcount)) : 0; | ||
1931 | longest = be32_to_cpu(agf->agf_longest); | ||
1932 | longest = (longest > delta) ? (longest - delta) : | ||
1933 | (be32_to_cpu(agf->agf_flcount) > 0 || longest > 0); | ||
1934 | if ((args->minlen + args->alignment + args->minalignslop - 1) > | ||
1935 | longest || | ||
1936 | ((int)(be32_to_cpu(agf->agf_freeblks) + | ||
1937 | be32_to_cpu(agf->agf_flcount) - need - args->total) < | ||
1938 | (int)args->minleft)) { | ||
1939 | xfs_trans_brelse(tp, agbp); | ||
1940 | args->agbp = NULL; | ||
1941 | return 0; | ||
1942 | } | ||
1943 | } | ||
1944 | /* | ||
1945 | * Make the freelist shorter if it's too long. | ||
1946 | */ | ||
1947 | while (be32_to_cpu(agf->agf_flcount) > need) { | ||
1948 | xfs_buf_t *bp; | ||
1949 | |||
1950 | error = xfs_alloc_get_freelist(tp, agbp, &bno, 0); | ||
1951 | if (error) | ||
1952 | return error; | ||
1953 | if ((error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1))) | ||
1954 | return error; | ||
1955 | bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0); | ||
1956 | xfs_trans_binval(tp, bp); | ||
1957 | } | ||
1958 | /* | ||
1959 | * Initialize the args structure. | ||
1960 | */ | ||
1961 | memset(&targs, 0, sizeof(targs)); | ||
1962 | targs.tp = tp; | ||
1963 | targs.mp = mp; | ||
1964 | targs.agbp = agbp; | ||
1965 | targs.agno = args->agno; | ||
1966 | targs.alignment = targs.minlen = targs.prod = targs.isfl = 1; | ||
1967 | targs.type = XFS_ALLOCTYPE_THIS_AG; | ||
1968 | targs.pag = pag; | ||
1969 | if ((error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp))) | ||
1970 | return error; | ||
1971 | /* | ||
1972 | * Make the freelist longer if it's too short. | ||
1973 | */ | ||
1974 | while (be32_to_cpu(agf->agf_flcount) < need) { | ||
1975 | targs.agbno = 0; | ||
1976 | targs.maxlen = need - be32_to_cpu(agf->agf_flcount); | ||
1977 | /* | ||
1978 | * Allocate as many blocks as possible at once. | ||
1979 | */ | ||
1980 | if ((error = xfs_alloc_ag_vextent(&targs))) { | ||
1981 | xfs_trans_brelse(tp, agflbp); | ||
1982 | return error; | ||
1983 | } | ||
1984 | /* | ||
1985 | * Stop if we run out. Won't happen if callers are obeying | ||
1986 | * the restrictions correctly. Can happen for free calls | ||
1987 | * on a completely full ag. | ||
1988 | */ | ||
1989 | if (targs.agbno == NULLAGBLOCK) { | ||
1990 | if (flags & XFS_ALLOC_FLAG_FREEING) | ||
1991 | break; | ||
1992 | xfs_trans_brelse(tp, agflbp); | ||
1993 | args->agbp = NULL; | ||
1994 | return 0; | ||
1995 | } | ||
1996 | /* | ||
1997 | * Put each allocated block on the list. | ||
1998 | */ | ||
1999 | for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) { | ||
2000 | error = xfs_alloc_put_freelist(tp, agbp, | ||
2001 | agflbp, bno, 0); | ||
2002 | if (error) | ||
2003 | return error; | ||
2004 | } | ||
2005 | } | ||
2006 | xfs_trans_brelse(tp, agflbp); | ||
2007 | args->agbp = agbp; | ||
2008 | return 0; | ||
2009 | } | ||
2010 | |||
2011 | /* | ||
2012 | * Get a block from the freelist. | ||
2013 | * Returns with the buffer for the block gotten. | ||
2014 | */ | ||
2015 | int /* error */ | ||
2016 | xfs_alloc_get_freelist( | ||
2017 | xfs_trans_t *tp, /* transaction pointer */ | ||
2018 | xfs_buf_t *agbp, /* buffer containing the agf structure */ | ||
2019 | xfs_agblock_t *bnop, /* block address retrieved from freelist */ | ||
2020 | int btreeblk) /* destination is a AGF btree */ | ||
2021 | { | ||
2022 | xfs_agf_t *agf; /* a.g. freespace structure */ | ||
2023 | xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */ | ||
2024 | xfs_agblock_t bno; /* block number returned */ | ||
2025 | __be32 *agfl_bno; | ||
2026 | int error; | ||
2027 | int logflags; | ||
2028 | xfs_mount_t *mp = tp->t_mountp; | ||
2029 | xfs_perag_t *pag; /* per allocation group data */ | ||
2030 | |||
2031 | /* | ||
2032 | * Freelist is empty, give up. | ||
2033 | */ | ||
2034 | agf = XFS_BUF_TO_AGF(agbp); | ||
2035 | if (!agf->agf_flcount) { | ||
2036 | *bnop = NULLAGBLOCK; | ||
2037 | return 0; | ||
2038 | } | ||
2039 | /* | ||
2040 | * Read the array of free blocks. | ||
2041 | */ | ||
2042 | error = xfs_alloc_read_agfl(mp, tp, be32_to_cpu(agf->agf_seqno), | ||
2043 | &agflbp); | ||
2044 | if (error) | ||
2045 | return error; | ||
2046 | |||
2047 | |||
2048 | /* | ||
2049 | * Get the block number and update the data structures. | ||
2050 | */ | ||
2051 | agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp); | ||
2052 | bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]); | ||
2053 | be32_add_cpu(&agf->agf_flfirst, 1); | ||
2054 | xfs_trans_brelse(tp, agflbp); | ||
2055 | if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp)) | ||
2056 | agf->agf_flfirst = 0; | ||
2057 | |||
2058 | pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno)); | ||
2059 | be32_add_cpu(&agf->agf_flcount, -1); | ||
2060 | xfs_trans_agflist_delta(tp, -1); | ||
2061 | pag->pagf_flcount--; | ||
2062 | xfs_perag_put(pag); | ||
2063 | |||
2064 | logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT; | ||
2065 | if (btreeblk) { | ||
2066 | be32_add_cpu(&agf->agf_btreeblks, 1); | ||
2067 | pag->pagf_btreeblks++; | ||
2068 | logflags |= XFS_AGF_BTREEBLKS; | ||
2069 | } | ||
2070 | |||
2071 | xfs_alloc_log_agf(tp, agbp, logflags); | ||
2072 | *bnop = bno; | ||
2073 | |||
2074 | return 0; | ||
2075 | } | ||
2076 | |||
2077 | /* | ||
2078 | * Log the given fields from the agf structure. | ||
2079 | */ | ||
2080 | void | ||
2081 | xfs_alloc_log_agf( | ||
2082 | xfs_trans_t *tp, /* transaction pointer */ | ||
2083 | xfs_buf_t *bp, /* buffer for a.g. freelist header */ | ||
2084 | int fields) /* mask of fields to be logged (XFS_AGF_...) */ | ||
2085 | { | ||
2086 | int first; /* first byte offset */ | ||
2087 | int last; /* last byte offset */ | ||
2088 | static const short offsets[] = { | ||
2089 | offsetof(xfs_agf_t, agf_magicnum), | ||
2090 | offsetof(xfs_agf_t, agf_versionnum), | ||
2091 | offsetof(xfs_agf_t, agf_seqno), | ||
2092 | offsetof(xfs_agf_t, agf_length), | ||
2093 | offsetof(xfs_agf_t, agf_roots[0]), | ||
2094 | offsetof(xfs_agf_t, agf_levels[0]), | ||
2095 | offsetof(xfs_agf_t, agf_flfirst), | ||
2096 | offsetof(xfs_agf_t, agf_fllast), | ||
2097 | offsetof(xfs_agf_t, agf_flcount), | ||
2098 | offsetof(xfs_agf_t, agf_freeblks), | ||
2099 | offsetof(xfs_agf_t, agf_longest), | ||
2100 | offsetof(xfs_agf_t, agf_btreeblks), | ||
2101 | offsetof(xfs_agf_t, agf_uuid), | ||
2102 | sizeof(xfs_agf_t) | ||
2103 | }; | ||
2104 | |||
2105 | trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_); | ||
2106 | |||
2107 | xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF); | ||
2108 | |||
2109 | xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last); | ||
2110 | xfs_trans_log_buf(tp, bp, (uint)first, (uint)last); | ||
2111 | } | ||
2112 | |||
2113 | /* | ||
2114 | * Interface for inode allocation to force the pag data to be initialized. | ||
2115 | */ | ||
2116 | int /* error */ | ||
2117 | xfs_alloc_pagf_init( | ||
2118 | xfs_mount_t *mp, /* file system mount structure */ | ||
2119 | xfs_trans_t *tp, /* transaction pointer */ | ||
2120 | xfs_agnumber_t agno, /* allocation group number */ | ||
2121 | int flags) /* XFS_ALLOC_FLAGS_... */ | ||
2122 | { | ||
2123 | xfs_buf_t *bp; | ||
2124 | int error; | ||
2125 | |||
2126 | if ((error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp))) | ||
2127 | return error; | ||
2128 | if (bp) | ||
2129 | xfs_trans_brelse(tp, bp); | ||
2130 | return 0; | ||
2131 | } | ||
2132 | |||
2133 | /* | ||
2134 | * Put the block on the freelist for the allocation group. | ||
2135 | */ | ||
2136 | int /* error */ | ||
2137 | xfs_alloc_put_freelist( | ||
2138 | xfs_trans_t *tp, /* transaction pointer */ | ||
2139 | xfs_buf_t *agbp, /* buffer for a.g. freelist header */ | ||
2140 | xfs_buf_t *agflbp,/* buffer for a.g. free block array */ | ||
2141 | xfs_agblock_t bno, /* block being freed */ | ||
2142 | int btreeblk) /* block came from a AGF btree */ | ||
2143 | { | ||
2144 | xfs_agf_t *agf; /* a.g. freespace structure */ | ||
2145 | __be32 *blockp;/* pointer to array entry */ | ||
2146 | int error; | ||
2147 | int logflags; | ||
2148 | xfs_mount_t *mp; /* mount structure */ | ||
2149 | xfs_perag_t *pag; /* per allocation group data */ | ||
2150 | __be32 *agfl_bno; | ||
2151 | int startoff; | ||
2152 | |||
2153 | agf = XFS_BUF_TO_AGF(agbp); | ||
2154 | mp = tp->t_mountp; | ||
2155 | |||
2156 | if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp, | ||
2157 | be32_to_cpu(agf->agf_seqno), &agflbp))) | ||
2158 | return error; | ||
2159 | be32_add_cpu(&agf->agf_fllast, 1); | ||
2160 | if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp)) | ||
2161 | agf->agf_fllast = 0; | ||
2162 | |||
2163 | pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno)); | ||
2164 | be32_add_cpu(&agf->agf_flcount, 1); | ||
2165 | xfs_trans_agflist_delta(tp, 1); | ||
2166 | pag->pagf_flcount++; | ||
2167 | |||
2168 | logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT; | ||
2169 | if (btreeblk) { | ||
2170 | be32_add_cpu(&agf->agf_btreeblks, -1); | ||
2171 | pag->pagf_btreeblks--; | ||
2172 | logflags |= XFS_AGF_BTREEBLKS; | ||
2173 | } | ||
2174 | xfs_perag_put(pag); | ||
2175 | |||
2176 | xfs_alloc_log_agf(tp, agbp, logflags); | ||
2177 | |||
2178 | ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)); | ||
2179 | |||
2180 | agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp); | ||
2181 | blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)]; | ||
2182 | *blockp = cpu_to_be32(bno); | ||
2183 | startoff = (char *)blockp - (char *)agflbp->b_addr; | ||
2184 | |||
2185 | xfs_alloc_log_agf(tp, agbp, logflags); | ||
2186 | |||
2187 | xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF); | ||
2188 | xfs_trans_log_buf(tp, agflbp, startoff, | ||
2189 | startoff + sizeof(xfs_agblock_t) - 1); | ||
2190 | return 0; | ||
2191 | } | ||
2192 | |||
2193 | static bool | ||
2194 | xfs_agf_verify( | ||
2195 | struct xfs_mount *mp, | ||
2196 | struct xfs_buf *bp) | ||
2197 | { | ||
2198 | struct xfs_agf *agf = XFS_BUF_TO_AGF(bp); | ||
2199 | |||
2200 | if (xfs_sb_version_hascrc(&mp->m_sb) && | ||
2201 | !uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_uuid)) | ||
2202 | return false; | ||
2203 | |||
2204 | if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) && | ||
2205 | XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) && | ||
2206 | be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) && | ||
2207 | be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) && | ||
2208 | be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) && | ||
2209 | be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp))) | ||
2210 | return false; | ||
2211 | |||
2212 | /* | ||
2213 | * during growfs operations, the perag is not fully initialised, | ||
2214 | * so we can't use it for any useful checking. growfs ensures we can't | ||
2215 | * use it by using uncached buffers that don't have the perag attached | ||
2216 | * so we can detect and avoid this problem. | ||
2217 | */ | ||
2218 | if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno) | ||
2219 | return false; | ||
2220 | |||
2221 | if (xfs_sb_version_haslazysbcount(&mp->m_sb) && | ||
2222 | be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length)) | ||
2223 | return false; | ||
2224 | |||
2225 | return true;; | ||
2226 | |||
2227 | } | ||
2228 | |||
2229 | static void | ||
2230 | xfs_agf_read_verify( | ||
2231 | struct xfs_buf *bp) | ||
2232 | { | ||
2233 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
2234 | |||
2235 | if (xfs_sb_version_hascrc(&mp->m_sb) && | ||
2236 | !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF)) | ||
2237 | xfs_buf_ioerror(bp, -EFSBADCRC); | ||
2238 | else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp, | ||
2239 | XFS_ERRTAG_ALLOC_READ_AGF, | ||
2240 | XFS_RANDOM_ALLOC_READ_AGF)) | ||
2241 | xfs_buf_ioerror(bp, -EFSCORRUPTED); | ||
2242 | |||
2243 | if (bp->b_error) | ||
2244 | xfs_verifier_error(bp); | ||
2245 | } | ||
2246 | |||
2247 | static void | ||
2248 | xfs_agf_write_verify( | ||
2249 | struct xfs_buf *bp) | ||
2250 | { | ||
2251 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
2252 | struct xfs_buf_log_item *bip = bp->b_fspriv; | ||
2253 | |||
2254 | if (!xfs_agf_verify(mp, bp)) { | ||
2255 | xfs_buf_ioerror(bp, -EFSCORRUPTED); | ||
2256 | xfs_verifier_error(bp); | ||
2257 | return; | ||
2258 | } | ||
2259 | |||
2260 | if (!xfs_sb_version_hascrc(&mp->m_sb)) | ||
2261 | return; | ||
2262 | |||
2263 | if (bip) | ||
2264 | XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn); | ||
2265 | |||
2266 | xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF); | ||
2267 | } | ||
2268 | |||
2269 | const struct xfs_buf_ops xfs_agf_buf_ops = { | ||
2270 | .verify_read = xfs_agf_read_verify, | ||
2271 | .verify_write = xfs_agf_write_verify, | ||
2272 | }; | ||
2273 | |||
2274 | /* | ||
2275 | * Read in the allocation group header (free/alloc section). | ||
2276 | */ | ||
2277 | int /* error */ | ||
2278 | xfs_read_agf( | ||
2279 | struct xfs_mount *mp, /* mount point structure */ | ||
2280 | struct xfs_trans *tp, /* transaction pointer */ | ||
2281 | xfs_agnumber_t agno, /* allocation group number */ | ||
2282 | int flags, /* XFS_BUF_ */ | ||
2283 | struct xfs_buf **bpp) /* buffer for the ag freelist header */ | ||
2284 | { | ||
2285 | int error; | ||
2286 | |||
2287 | trace_xfs_read_agf(mp, agno); | ||
2288 | |||
2289 | ASSERT(agno != NULLAGNUMBER); | ||
2290 | error = xfs_trans_read_buf( | ||
2291 | mp, tp, mp->m_ddev_targp, | ||
2292 | XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)), | ||
2293 | XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops); | ||
2294 | if (error) | ||
2295 | return error; | ||
2296 | if (!*bpp) | ||
2297 | return 0; | ||
2298 | |||
2299 | ASSERT(!(*bpp)->b_error); | ||
2300 | xfs_buf_set_ref(*bpp, XFS_AGF_REF); | ||
2301 | return 0; | ||
2302 | } | ||
2303 | |||
2304 | /* | ||
2305 | * Read in the allocation group header (free/alloc section). | ||
2306 | */ | ||
2307 | int /* error */ | ||
2308 | xfs_alloc_read_agf( | ||
2309 | struct xfs_mount *mp, /* mount point structure */ | ||
2310 | struct xfs_trans *tp, /* transaction pointer */ | ||
2311 | xfs_agnumber_t agno, /* allocation group number */ | ||
2312 | int flags, /* XFS_ALLOC_FLAG_... */ | ||
2313 | struct xfs_buf **bpp) /* buffer for the ag freelist header */ | ||
2314 | { | ||
2315 | struct xfs_agf *agf; /* ag freelist header */ | ||
2316 | struct xfs_perag *pag; /* per allocation group data */ | ||
2317 | int error; | ||
2318 | |||
2319 | trace_xfs_alloc_read_agf(mp, agno); | ||
2320 | |||
2321 | ASSERT(agno != NULLAGNUMBER); | ||
2322 | error = xfs_read_agf(mp, tp, agno, | ||
2323 | (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0, | ||
2324 | bpp); | ||
2325 | if (error) | ||
2326 | return error; | ||
2327 | if (!*bpp) | ||
2328 | return 0; | ||
2329 | ASSERT(!(*bpp)->b_error); | ||
2330 | |||
2331 | agf = XFS_BUF_TO_AGF(*bpp); | ||
2332 | pag = xfs_perag_get(mp, agno); | ||
2333 | if (!pag->pagf_init) { | ||
2334 | pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks); | ||
2335 | pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks); | ||
2336 | pag->pagf_flcount = be32_to_cpu(agf->agf_flcount); | ||
2337 | pag->pagf_longest = be32_to_cpu(agf->agf_longest); | ||
2338 | pag->pagf_levels[XFS_BTNUM_BNOi] = | ||
2339 | be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]); | ||
2340 | pag->pagf_levels[XFS_BTNUM_CNTi] = | ||
2341 | be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]); | ||
2342 | spin_lock_init(&pag->pagb_lock); | ||
2343 | pag->pagb_count = 0; | ||
2344 | pag->pagb_tree = RB_ROOT; | ||
2345 | pag->pagf_init = 1; | ||
2346 | } | ||
2347 | #ifdef DEBUG | ||
2348 | else if (!XFS_FORCED_SHUTDOWN(mp)) { | ||
2349 | ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks)); | ||
2350 | ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks)); | ||
2351 | ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount)); | ||
2352 | ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest)); | ||
2353 | ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] == | ||
2354 | be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi])); | ||
2355 | ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] == | ||
2356 | be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi])); | ||
2357 | } | ||
2358 | #endif | ||
2359 | xfs_perag_put(pag); | ||
2360 | return 0; | ||
2361 | } | ||
2362 | |||
2363 | /* | ||
2364 | * Allocate an extent (variable-size). | ||
2365 | * Depending on the allocation type, we either look in a single allocation | ||
2366 | * group or loop over the allocation groups to find the result. | ||
2367 | */ | ||
2368 | int /* error */ | ||
2369 | xfs_alloc_vextent( | ||
2370 | xfs_alloc_arg_t *args) /* allocation argument structure */ | ||
2371 | { | ||
2372 | xfs_agblock_t agsize; /* allocation group size */ | ||
2373 | int error; | ||
2374 | int flags; /* XFS_ALLOC_FLAG_... locking flags */ | ||
2375 | xfs_extlen_t minleft;/* minimum left value, temp copy */ | ||
2376 | xfs_mount_t *mp; /* mount structure pointer */ | ||
2377 | xfs_agnumber_t sagno; /* starting allocation group number */ | ||
2378 | xfs_alloctype_t type; /* input allocation type */ | ||
2379 | int bump_rotor = 0; | ||
2380 | int no_min = 0; | ||
2381 | xfs_agnumber_t rotorstep = xfs_rotorstep; /* inode32 agf stepper */ | ||
2382 | |||
2383 | mp = args->mp; | ||
2384 | type = args->otype = args->type; | ||
2385 | args->agbno = NULLAGBLOCK; | ||
2386 | /* | ||
2387 | * Just fix this up, for the case where the last a.g. is shorter | ||
2388 | * (or there's only one a.g.) and the caller couldn't easily figure | ||
2389 | * that out (xfs_bmap_alloc). | ||
2390 | */ | ||
2391 | agsize = mp->m_sb.sb_agblocks; | ||
2392 | if (args->maxlen > agsize) | ||
2393 | args->maxlen = agsize; | ||
2394 | if (args->alignment == 0) | ||
2395 | args->alignment = 1; | ||
2396 | ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount); | ||
2397 | ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize); | ||
2398 | ASSERT(args->minlen <= args->maxlen); | ||
2399 | ASSERT(args->minlen <= agsize); | ||
2400 | ASSERT(args->mod < args->prod); | ||
2401 | if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount || | ||
2402 | XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize || | ||
2403 | args->minlen > args->maxlen || args->minlen > agsize || | ||
2404 | args->mod >= args->prod) { | ||
2405 | args->fsbno = NULLFSBLOCK; | ||
2406 | trace_xfs_alloc_vextent_badargs(args); | ||
2407 | return 0; | ||
2408 | } | ||
2409 | minleft = args->minleft; | ||
2410 | |||
2411 | switch (type) { | ||
2412 | case XFS_ALLOCTYPE_THIS_AG: | ||
2413 | case XFS_ALLOCTYPE_NEAR_BNO: | ||
2414 | case XFS_ALLOCTYPE_THIS_BNO: | ||
2415 | /* | ||
2416 | * These three force us into a single a.g. | ||
2417 | */ | ||
2418 | args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno); | ||
2419 | args->pag = xfs_perag_get(mp, args->agno); | ||
2420 | args->minleft = 0; | ||
2421 | error = xfs_alloc_fix_freelist(args, 0); | ||
2422 | args->minleft = minleft; | ||
2423 | if (error) { | ||
2424 | trace_xfs_alloc_vextent_nofix(args); | ||
2425 | goto error0; | ||
2426 | } | ||
2427 | if (!args->agbp) { | ||
2428 | trace_xfs_alloc_vextent_noagbp(args); | ||
2429 | break; | ||
2430 | } | ||
2431 | args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno); | ||
2432 | if ((error = xfs_alloc_ag_vextent(args))) | ||
2433 | goto error0; | ||
2434 | break; | ||
2435 | case XFS_ALLOCTYPE_START_BNO: | ||
2436 | /* | ||
2437 | * Try near allocation first, then anywhere-in-ag after | ||
2438 | * the first a.g. fails. | ||
2439 | */ | ||
2440 | if ((args->userdata == XFS_ALLOC_INITIAL_USER_DATA) && | ||
2441 | (mp->m_flags & XFS_MOUNT_32BITINODES)) { | ||
2442 | args->fsbno = XFS_AGB_TO_FSB(mp, | ||
2443 | ((mp->m_agfrotor / rotorstep) % | ||
2444 | mp->m_sb.sb_agcount), 0); | ||
2445 | bump_rotor = 1; | ||
2446 | } | ||
2447 | args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno); | ||
2448 | args->type = XFS_ALLOCTYPE_NEAR_BNO; | ||
2449 | /* FALLTHROUGH */ | ||
2450 | case XFS_ALLOCTYPE_ANY_AG: | ||
2451 | case XFS_ALLOCTYPE_START_AG: | ||
2452 | case XFS_ALLOCTYPE_FIRST_AG: | ||
2453 | /* | ||
2454 | * Rotate through the allocation groups looking for a winner. | ||
2455 | */ | ||
2456 | if (type == XFS_ALLOCTYPE_ANY_AG) { | ||
2457 | /* | ||
2458 | * Start with the last place we left off. | ||
2459 | */ | ||
2460 | args->agno = sagno = (mp->m_agfrotor / rotorstep) % | ||
2461 | mp->m_sb.sb_agcount; | ||
2462 | args->type = XFS_ALLOCTYPE_THIS_AG; | ||
2463 | flags = XFS_ALLOC_FLAG_TRYLOCK; | ||
2464 | } else if (type == XFS_ALLOCTYPE_FIRST_AG) { | ||
2465 | /* | ||
2466 | * Start with allocation group given by bno. | ||
2467 | */ | ||
2468 | args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno); | ||
2469 | args->type = XFS_ALLOCTYPE_THIS_AG; | ||
2470 | sagno = 0; | ||
2471 | flags = 0; | ||
2472 | } else { | ||
2473 | if (type == XFS_ALLOCTYPE_START_AG) | ||
2474 | args->type = XFS_ALLOCTYPE_THIS_AG; | ||
2475 | /* | ||
2476 | * Start with the given allocation group. | ||
2477 | */ | ||
2478 | args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno); | ||
2479 | flags = XFS_ALLOC_FLAG_TRYLOCK; | ||
2480 | } | ||
2481 | /* | ||
2482 | * Loop over allocation groups twice; first time with | ||
2483 | * trylock set, second time without. | ||
2484 | */ | ||
2485 | for (;;) { | ||
2486 | args->pag = xfs_perag_get(mp, args->agno); | ||
2487 | if (no_min) args->minleft = 0; | ||
2488 | error = xfs_alloc_fix_freelist(args, flags); | ||
2489 | args->minleft = minleft; | ||
2490 | if (error) { | ||
2491 | trace_xfs_alloc_vextent_nofix(args); | ||
2492 | goto error0; | ||
2493 | } | ||
2494 | /* | ||
2495 | * If we get a buffer back then the allocation will fly. | ||
2496 | */ | ||
2497 | if (args->agbp) { | ||
2498 | if ((error = xfs_alloc_ag_vextent(args))) | ||
2499 | goto error0; | ||
2500 | break; | ||
2501 | } | ||
2502 | |||
2503 | trace_xfs_alloc_vextent_loopfailed(args); | ||
2504 | |||
2505 | /* | ||
2506 | * Didn't work, figure out the next iteration. | ||
2507 | */ | ||
2508 | if (args->agno == sagno && | ||
2509 | type == XFS_ALLOCTYPE_START_BNO) | ||
2510 | args->type = XFS_ALLOCTYPE_THIS_AG; | ||
2511 | /* | ||
2512 | * For the first allocation, we can try any AG to get | ||
2513 | * space. However, if we already have allocated a | ||
2514 | * block, we don't want to try AGs whose number is below | ||
2515 | * sagno. Otherwise, we may end up with out-of-order | ||
2516 | * locking of AGF, which might cause deadlock. | ||
2517 | */ | ||
2518 | if (++(args->agno) == mp->m_sb.sb_agcount) { | ||
2519 | if (args->firstblock != NULLFSBLOCK) | ||
2520 | args->agno = sagno; | ||
2521 | else | ||
2522 | args->agno = 0; | ||
2523 | } | ||
2524 | /* | ||
2525 | * Reached the starting a.g., must either be done | ||
2526 | * or switch to non-trylock mode. | ||
2527 | */ | ||
2528 | if (args->agno == sagno) { | ||
2529 | if (no_min == 1) { | ||
2530 | args->agbno = NULLAGBLOCK; | ||
2531 | trace_xfs_alloc_vextent_allfailed(args); | ||
2532 | break; | ||
2533 | } | ||
2534 | if (flags == 0) { | ||
2535 | no_min = 1; | ||
2536 | } else { | ||
2537 | flags = 0; | ||
2538 | if (type == XFS_ALLOCTYPE_START_BNO) { | ||
2539 | args->agbno = XFS_FSB_TO_AGBNO(mp, | ||
2540 | args->fsbno); | ||
2541 | args->type = XFS_ALLOCTYPE_NEAR_BNO; | ||
2542 | } | ||
2543 | } | ||
2544 | } | ||
2545 | xfs_perag_put(args->pag); | ||
2546 | } | ||
2547 | if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) { | ||
2548 | if (args->agno == sagno) | ||
2549 | mp->m_agfrotor = (mp->m_agfrotor + 1) % | ||
2550 | (mp->m_sb.sb_agcount * rotorstep); | ||
2551 | else | ||
2552 | mp->m_agfrotor = (args->agno * rotorstep + 1) % | ||
2553 | (mp->m_sb.sb_agcount * rotorstep); | ||
2554 | } | ||
2555 | break; | ||
2556 | default: | ||
2557 | ASSERT(0); | ||
2558 | /* NOTREACHED */ | ||
2559 | } | ||
2560 | if (args->agbno == NULLAGBLOCK) | ||
2561 | args->fsbno = NULLFSBLOCK; | ||
2562 | else { | ||
2563 | args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno); | ||
2564 | #ifdef DEBUG | ||
2565 | ASSERT(args->len >= args->minlen); | ||
2566 | ASSERT(args->len <= args->maxlen); | ||
2567 | ASSERT(args->agbno % args->alignment == 0); | ||
2568 | XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno), | ||
2569 | args->len); | ||
2570 | #endif | ||
2571 | } | ||
2572 | xfs_perag_put(args->pag); | ||
2573 | return 0; | ||
2574 | error0: | ||
2575 | xfs_perag_put(args->pag); | ||
2576 | return error; | ||
2577 | } | ||
2578 | |||
2579 | /* | ||
2580 | * Free an extent. | ||
2581 | * Just break up the extent address and hand off to xfs_free_ag_extent | ||
2582 | * after fixing up the freelist. | ||
2583 | */ | ||
2584 | int /* error */ | ||
2585 | xfs_free_extent( | ||
2586 | xfs_trans_t *tp, /* transaction pointer */ | ||
2587 | xfs_fsblock_t bno, /* starting block number of extent */ | ||
2588 | xfs_extlen_t len) /* length of extent */ | ||
2589 | { | ||
2590 | xfs_alloc_arg_t args; | ||
2591 | int error; | ||
2592 | |||
2593 | ASSERT(len != 0); | ||
2594 | memset(&args, 0, sizeof(xfs_alloc_arg_t)); | ||
2595 | args.tp = tp; | ||
2596 | args.mp = tp->t_mountp; | ||
2597 | |||
2598 | /* | ||
2599 | * validate that the block number is legal - the enables us to detect | ||
2600 | * and handle a silent filesystem corruption rather than crashing. | ||
2601 | */ | ||
2602 | args.agno = XFS_FSB_TO_AGNO(args.mp, bno); | ||
2603 | if (args.agno >= args.mp->m_sb.sb_agcount) | ||
2604 | return -EFSCORRUPTED; | ||
2605 | |||
2606 | args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno); | ||
2607 | if (args.agbno >= args.mp->m_sb.sb_agblocks) | ||
2608 | return -EFSCORRUPTED; | ||
2609 | |||
2610 | args.pag = xfs_perag_get(args.mp, args.agno); | ||
2611 | ASSERT(args.pag); | ||
2612 | |||
2613 | error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING); | ||
2614 | if (error) | ||
2615 | goto error0; | ||
2616 | |||
2617 | /* validate the extent size is legal now we have the agf locked */ | ||
2618 | if (args.agbno + len > | ||
2619 | be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)) { | ||
2620 | error = -EFSCORRUPTED; | ||
2621 | goto error0; | ||
2622 | } | ||
2623 | |||
2624 | error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); | ||
2625 | if (!error) | ||
2626 | xfs_extent_busy_insert(tp, args.agno, args.agbno, len, 0); | ||
2627 | error0: | ||
2628 | xfs_perag_put(args.pag); | ||
2629 | return error; | ||
2630 | } | ||
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h new file mode 100644 index 000000000000..feacb061bab7 --- /dev/null +++ b/fs/xfs/libxfs/xfs_alloc.h | |||
@@ -0,0 +1,234 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_ALLOC_H__ | ||
19 | #define __XFS_ALLOC_H__ | ||
20 | |||
21 | struct xfs_buf; | ||
22 | struct xfs_btree_cur; | ||
23 | struct xfs_mount; | ||
24 | struct xfs_perag; | ||
25 | struct xfs_trans; | ||
26 | |||
27 | extern struct workqueue_struct *xfs_alloc_wq; | ||
28 | |||
29 | /* | ||
30 | * Freespace allocation types. Argument to xfs_alloc_[v]extent. | ||
31 | */ | ||
32 | #define XFS_ALLOCTYPE_ANY_AG 0x01 /* allocate anywhere, use rotor */ | ||
33 | #define XFS_ALLOCTYPE_FIRST_AG 0x02 /* ... start at ag 0 */ | ||
34 | #define XFS_ALLOCTYPE_START_AG 0x04 /* anywhere, start in this a.g. */ | ||
35 | #define XFS_ALLOCTYPE_THIS_AG 0x08 /* anywhere in this a.g. */ | ||
36 | #define XFS_ALLOCTYPE_START_BNO 0x10 /* near this block else anywhere */ | ||
37 | #define XFS_ALLOCTYPE_NEAR_BNO 0x20 /* in this a.g. and near this block */ | ||
38 | #define XFS_ALLOCTYPE_THIS_BNO 0x40 /* at exactly this block */ | ||
39 | |||
40 | /* this should become an enum again when the tracing code is fixed */ | ||
41 | typedef unsigned int xfs_alloctype_t; | ||
42 | |||
43 | #define XFS_ALLOC_TYPES \ | ||
44 | { XFS_ALLOCTYPE_ANY_AG, "ANY_AG" }, \ | ||
45 | { XFS_ALLOCTYPE_FIRST_AG, "FIRST_AG" }, \ | ||
46 | { XFS_ALLOCTYPE_START_AG, "START_AG" }, \ | ||
47 | { XFS_ALLOCTYPE_THIS_AG, "THIS_AG" }, \ | ||
48 | { XFS_ALLOCTYPE_START_BNO, "START_BNO" }, \ | ||
49 | { XFS_ALLOCTYPE_NEAR_BNO, "NEAR_BNO" }, \ | ||
50 | { XFS_ALLOCTYPE_THIS_BNO, "THIS_BNO" } | ||
51 | |||
52 | /* | ||
53 | * Flags for xfs_alloc_fix_freelist. | ||
54 | */ | ||
55 | #define XFS_ALLOC_FLAG_TRYLOCK 0x00000001 /* use trylock for buffer locking */ | ||
56 | #define XFS_ALLOC_FLAG_FREEING 0x00000002 /* indicate caller is freeing extents*/ | ||
57 | |||
58 | /* | ||
59 | * In order to avoid ENOSPC-related deadlock caused by | ||
60 | * out-of-order locking of AGF buffer (PV 947395), we place | ||
61 | * constraints on the relationship among actual allocations for | ||
62 | * data blocks, freelist blocks, and potential file data bmap | ||
63 | * btree blocks. However, these restrictions may result in no | ||
64 | * actual space allocated for a delayed extent, for example, a data | ||
65 | * block in a certain AG is allocated but there is no additional | ||
66 | * block for the additional bmap btree block due to a split of the | ||
67 | * bmap btree of the file. The result of this may lead to an | ||
68 | * infinite loop in xfssyncd when the file gets flushed to disk and | ||
69 | * all delayed extents need to be actually allocated. To get around | ||
70 | * this, we explicitly set aside a few blocks which will not be | ||
71 | * reserved in delayed allocation. Considering the minimum number of | ||
72 | * needed freelist blocks is 4 fsbs _per AG_, a potential split of file's bmap | ||
73 | * btree requires 1 fsb, so we set the number of set-aside blocks | ||
74 | * to 4 + 4*agcount. | ||
75 | */ | ||
76 | #define XFS_ALLOC_SET_ASIDE(mp) (4 + ((mp)->m_sb.sb_agcount * 4)) | ||
77 | |||
78 | /* | ||
79 | * When deciding how much space to allocate out of an AG, we limit the | ||
80 | * allocation maximum size to the size the AG. However, we cannot use all the | ||
81 | * blocks in the AG - some are permanently used by metadata. These | ||
82 | * blocks are generally: | ||
83 | * - the AG superblock, AGF, AGI and AGFL | ||
84 | * - the AGF (bno and cnt) and AGI btree root blocks | ||
85 | * - 4 blocks on the AGFL according to XFS_ALLOC_SET_ASIDE() limits | ||
86 | * | ||
87 | * The AG headers are sector sized, so the amount of space they take up is | ||
88 | * dependent on filesystem geometry. The others are all single blocks. | ||
89 | */ | ||
90 | #define XFS_ALLOC_AG_MAX_USABLE(mp) \ | ||
91 | ((mp)->m_sb.sb_agblocks - XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)) - 7) | ||
92 | |||
93 | |||
94 | /* | ||
95 | * Argument structure for xfs_alloc routines. | ||
96 | * This is turned into a structure to avoid having 20 arguments passed | ||
97 | * down several levels of the stack. | ||
98 | */ | ||
99 | typedef struct xfs_alloc_arg { | ||
100 | struct xfs_trans *tp; /* transaction pointer */ | ||
101 | struct xfs_mount *mp; /* file system mount point */ | ||
102 | struct xfs_buf *agbp; /* buffer for a.g. freelist header */ | ||
103 | struct xfs_perag *pag; /* per-ag struct for this agno */ | ||
104 | xfs_fsblock_t fsbno; /* file system block number */ | ||
105 | xfs_agnumber_t agno; /* allocation group number */ | ||
106 | xfs_agblock_t agbno; /* allocation group-relative block # */ | ||
107 | xfs_extlen_t minlen; /* minimum size of extent */ | ||
108 | xfs_extlen_t maxlen; /* maximum size of extent */ | ||
109 | xfs_extlen_t mod; /* mod value for extent size */ | ||
110 | xfs_extlen_t prod; /* prod value for extent size */ | ||
111 | xfs_extlen_t minleft; /* min blocks must be left after us */ | ||
112 | xfs_extlen_t total; /* total blocks needed in xaction */ | ||
113 | xfs_extlen_t alignment; /* align answer to multiple of this */ | ||
114 | xfs_extlen_t minalignslop; /* slop for minlen+alignment calcs */ | ||
115 | xfs_extlen_t len; /* output: actual size of extent */ | ||
116 | xfs_alloctype_t type; /* allocation type XFS_ALLOCTYPE_... */ | ||
117 | xfs_alloctype_t otype; /* original allocation type */ | ||
118 | char wasdel; /* set if allocation was prev delayed */ | ||
119 | char wasfromfl; /* set if allocation is from freelist */ | ||
120 | char isfl; /* set if is freelist blocks - !acctg */ | ||
121 | char userdata; /* set if this is user data */ | ||
122 | xfs_fsblock_t firstblock; /* io first block allocated */ | ||
123 | } xfs_alloc_arg_t; | ||
124 | |||
125 | /* | ||
126 | * Defines for userdata | ||
127 | */ | ||
128 | #define XFS_ALLOC_USERDATA 1 /* allocation is for user data*/ | ||
129 | #define XFS_ALLOC_INITIAL_USER_DATA 2 /* special case start of file */ | ||
130 | |||
131 | /* | ||
132 | * Find the length of the longest extent in an AG. | ||
133 | */ | ||
134 | xfs_extlen_t | ||
135 | xfs_alloc_longest_free_extent(struct xfs_mount *mp, | ||
136 | struct xfs_perag *pag); | ||
137 | |||
138 | /* | ||
139 | * Compute and fill in value of m_ag_maxlevels. | ||
140 | */ | ||
141 | void | ||
142 | xfs_alloc_compute_maxlevels( | ||
143 | struct xfs_mount *mp); /* file system mount structure */ | ||
144 | |||
145 | /* | ||
146 | * Get a block from the freelist. | ||
147 | * Returns with the buffer for the block gotten. | ||
148 | */ | ||
149 | int /* error */ | ||
150 | xfs_alloc_get_freelist( | ||
151 | struct xfs_trans *tp, /* transaction pointer */ | ||
152 | struct xfs_buf *agbp, /* buffer containing the agf structure */ | ||
153 | xfs_agblock_t *bnop, /* block address retrieved from freelist */ | ||
154 | int btreeblk); /* destination is a AGF btree */ | ||
155 | |||
156 | /* | ||
157 | * Log the given fields from the agf structure. | ||
158 | */ | ||
159 | void | ||
160 | xfs_alloc_log_agf( | ||
161 | struct xfs_trans *tp, /* transaction pointer */ | ||
162 | struct xfs_buf *bp, /* buffer for a.g. freelist header */ | ||
163 | int fields);/* mask of fields to be logged (XFS_AGF_...) */ | ||
164 | |||
165 | /* | ||
166 | * Interface for inode allocation to force the pag data to be initialized. | ||
167 | */ | ||
168 | int /* error */ | ||
169 | xfs_alloc_pagf_init( | ||
170 | struct xfs_mount *mp, /* file system mount structure */ | ||
171 | struct xfs_trans *tp, /* transaction pointer */ | ||
172 | xfs_agnumber_t agno, /* allocation group number */ | ||
173 | int flags); /* XFS_ALLOC_FLAGS_... */ | ||
174 | |||
175 | /* | ||
176 | * Put the block on the freelist for the allocation group. | ||
177 | */ | ||
178 | int /* error */ | ||
179 | xfs_alloc_put_freelist( | ||
180 | struct xfs_trans *tp, /* transaction pointer */ | ||
181 | struct xfs_buf *agbp, /* buffer for a.g. freelist header */ | ||
182 | struct xfs_buf *agflbp,/* buffer for a.g. free block array */ | ||
183 | xfs_agblock_t bno, /* block being freed */ | ||
184 | int btreeblk); /* owner was a AGF btree */ | ||
185 | |||
186 | /* | ||
187 | * Read in the allocation group header (free/alloc section). | ||
188 | */ | ||
189 | int /* error */ | ||
190 | xfs_alloc_read_agf( | ||
191 | struct xfs_mount *mp, /* mount point structure */ | ||
192 | struct xfs_trans *tp, /* transaction pointer */ | ||
193 | xfs_agnumber_t agno, /* allocation group number */ | ||
194 | int flags, /* XFS_ALLOC_FLAG_... */ | ||
195 | struct xfs_buf **bpp); /* buffer for the ag freelist header */ | ||
196 | |||
197 | /* | ||
198 | * Allocate an extent (variable-size). | ||
199 | */ | ||
200 | int /* error */ | ||
201 | xfs_alloc_vextent( | ||
202 | xfs_alloc_arg_t *args); /* allocation argument structure */ | ||
203 | |||
204 | /* | ||
205 | * Free an extent. | ||
206 | */ | ||
207 | int /* error */ | ||
208 | xfs_free_extent( | ||
209 | struct xfs_trans *tp, /* transaction pointer */ | ||
210 | xfs_fsblock_t bno, /* starting block number of extent */ | ||
211 | xfs_extlen_t len); /* length of extent */ | ||
212 | |||
213 | int /* error */ | ||
214 | xfs_alloc_lookup_le( | ||
215 | struct xfs_btree_cur *cur, /* btree cursor */ | ||
216 | xfs_agblock_t bno, /* starting block of extent */ | ||
217 | xfs_extlen_t len, /* length of extent */ | ||
218 | int *stat); /* success/failure */ | ||
219 | |||
220 | int /* error */ | ||
221 | xfs_alloc_lookup_ge( | ||
222 | struct xfs_btree_cur *cur, /* btree cursor */ | ||
223 | xfs_agblock_t bno, /* starting block of extent */ | ||
224 | xfs_extlen_t len, /* length of extent */ | ||
225 | int *stat); /* success/failure */ | ||
226 | |||
227 | int /* error */ | ||
228 | xfs_alloc_get_rec( | ||
229 | struct xfs_btree_cur *cur, /* btree cursor */ | ||
230 | xfs_agblock_t *bno, /* output: starting block of extent */ | ||
231 | xfs_extlen_t *len, /* output: length of extent */ | ||
232 | int *stat); /* output: success/failure */ | ||
233 | |||
234 | #endif /* __XFS_ALLOC_H__ */ | ||
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c new file mode 100644 index 000000000000..e0e83e24d3ef --- /dev/null +++ b/fs/xfs/libxfs/xfs_alloc_btree.c | |||
@@ -0,0 +1,504 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #include "xfs.h" | ||
19 | #include "xfs_fs.h" | ||
20 | #include "xfs_shared.h" | ||
21 | #include "xfs_format.h" | ||
22 | #include "xfs_log_format.h" | ||
23 | #include "xfs_trans_resv.h" | ||
24 | #include "xfs_sb.h" | ||
25 | #include "xfs_ag.h" | ||
26 | #include "xfs_mount.h" | ||
27 | #include "xfs_btree.h" | ||
28 | #include "xfs_alloc_btree.h" | ||
29 | #include "xfs_alloc.h" | ||
30 | #include "xfs_extent_busy.h" | ||
31 | #include "xfs_error.h" | ||
32 | #include "xfs_trace.h" | ||
33 | #include "xfs_cksum.h" | ||
34 | #include "xfs_trans.h" | ||
35 | |||
36 | |||
37 | STATIC struct xfs_btree_cur * | ||
38 | xfs_allocbt_dup_cursor( | ||
39 | struct xfs_btree_cur *cur) | ||
40 | { | ||
41 | return xfs_allocbt_init_cursor(cur->bc_mp, cur->bc_tp, | ||
42 | cur->bc_private.a.agbp, cur->bc_private.a.agno, | ||
43 | cur->bc_btnum); | ||
44 | } | ||
45 | |||
46 | STATIC void | ||
47 | xfs_allocbt_set_root( | ||
48 | struct xfs_btree_cur *cur, | ||
49 | union xfs_btree_ptr *ptr, | ||
50 | int inc) | ||
51 | { | ||
52 | struct xfs_buf *agbp = cur->bc_private.a.agbp; | ||
53 | struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); | ||
54 | xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno); | ||
55 | int btnum = cur->bc_btnum; | ||
56 | struct xfs_perag *pag = xfs_perag_get(cur->bc_mp, seqno); | ||
57 | |||
58 | ASSERT(ptr->s != 0); | ||
59 | |||
60 | agf->agf_roots[btnum] = ptr->s; | ||
61 | be32_add_cpu(&agf->agf_levels[btnum], inc); | ||
62 | pag->pagf_levels[btnum] += inc; | ||
63 | xfs_perag_put(pag); | ||
64 | |||
65 | xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS); | ||
66 | } | ||
67 | |||
68 | STATIC int | ||
69 | xfs_allocbt_alloc_block( | ||
70 | struct xfs_btree_cur *cur, | ||
71 | union xfs_btree_ptr *start, | ||
72 | union xfs_btree_ptr *new, | ||
73 | int *stat) | ||
74 | { | ||
75 | int error; | ||
76 | xfs_agblock_t bno; | ||
77 | |||
78 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); | ||
79 | |||
80 | /* Allocate the new block from the freelist. If we can't, give up. */ | ||
81 | error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp, | ||
82 | &bno, 1); | ||
83 | if (error) { | ||
84 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); | ||
85 | return error; | ||
86 | } | ||
87 | |||
88 | if (bno == NULLAGBLOCK) { | ||
89 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | ||
90 | *stat = 0; | ||
91 | return 0; | ||
92 | } | ||
93 | |||
94 | xfs_extent_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1, false); | ||
95 | |||
96 | xfs_trans_agbtree_delta(cur->bc_tp, 1); | ||
97 | new->s = cpu_to_be32(bno); | ||
98 | |||
99 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | ||
100 | *stat = 1; | ||
101 | return 0; | ||
102 | } | ||
103 | |||
104 | STATIC int | ||
105 | xfs_allocbt_free_block( | ||
106 | struct xfs_btree_cur *cur, | ||
107 | struct xfs_buf *bp) | ||
108 | { | ||
109 | struct xfs_buf *agbp = cur->bc_private.a.agbp; | ||
110 | struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); | ||
111 | xfs_agblock_t bno; | ||
112 | int error; | ||
113 | |||
114 | bno = xfs_daddr_to_agbno(cur->bc_mp, XFS_BUF_ADDR(bp)); | ||
115 | error = xfs_alloc_put_freelist(cur->bc_tp, agbp, NULL, bno, 1); | ||
116 | if (error) | ||
117 | return error; | ||
118 | |||
119 | xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1, | ||
120 | XFS_EXTENT_BUSY_SKIP_DISCARD); | ||
121 | xfs_trans_agbtree_delta(cur->bc_tp, -1); | ||
122 | |||
123 | xfs_trans_binval(cur->bc_tp, bp); | ||
124 | return 0; | ||
125 | } | ||
126 | |||
127 | /* | ||
128 | * Update the longest extent in the AGF | ||
129 | */ | ||
130 | STATIC void | ||
131 | xfs_allocbt_update_lastrec( | ||
132 | struct xfs_btree_cur *cur, | ||
133 | struct xfs_btree_block *block, | ||
134 | union xfs_btree_rec *rec, | ||
135 | int ptr, | ||
136 | int reason) | ||
137 | { | ||
138 | struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); | ||
139 | xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno); | ||
140 | struct xfs_perag *pag; | ||
141 | __be32 len; | ||
142 | int numrecs; | ||
143 | |||
144 | ASSERT(cur->bc_btnum == XFS_BTNUM_CNT); | ||
145 | |||
146 | switch (reason) { | ||
147 | case LASTREC_UPDATE: | ||
148 | /* | ||
149 | * If this is the last leaf block and it's the last record, | ||
150 | * then update the size of the longest extent in the AG. | ||
151 | */ | ||
152 | if (ptr != xfs_btree_get_numrecs(block)) | ||
153 | return; | ||
154 | len = rec->alloc.ar_blockcount; | ||
155 | break; | ||
156 | case LASTREC_INSREC: | ||
157 | if (be32_to_cpu(rec->alloc.ar_blockcount) <= | ||
158 | be32_to_cpu(agf->agf_longest)) | ||
159 | return; | ||
160 | len = rec->alloc.ar_blockcount; | ||
161 | break; | ||
162 | case LASTREC_DELREC: | ||
163 | numrecs = xfs_btree_get_numrecs(block); | ||
164 | if (ptr <= numrecs) | ||
165 | return; | ||
166 | ASSERT(ptr == numrecs + 1); | ||
167 | |||
168 | if (numrecs) { | ||
169 | xfs_alloc_rec_t *rrp; | ||
170 | |||
171 | rrp = XFS_ALLOC_REC_ADDR(cur->bc_mp, block, numrecs); | ||
172 | len = rrp->ar_blockcount; | ||
173 | } else { | ||
174 | len = 0; | ||
175 | } | ||
176 | |||
177 | break; | ||
178 | default: | ||
179 | ASSERT(0); | ||
180 | return; | ||
181 | } | ||
182 | |||
183 | agf->agf_longest = len; | ||
184 | pag = xfs_perag_get(cur->bc_mp, seqno); | ||
185 | pag->pagf_longest = be32_to_cpu(len); | ||
186 | xfs_perag_put(pag); | ||
187 | xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, XFS_AGF_LONGEST); | ||
188 | } | ||
189 | |||
190 | STATIC int | ||
191 | xfs_allocbt_get_minrecs( | ||
192 | struct xfs_btree_cur *cur, | ||
193 | int level) | ||
194 | { | ||
195 | return cur->bc_mp->m_alloc_mnr[level != 0]; | ||
196 | } | ||
197 | |||
198 | STATIC int | ||
199 | xfs_allocbt_get_maxrecs( | ||
200 | struct xfs_btree_cur *cur, | ||
201 | int level) | ||
202 | { | ||
203 | return cur->bc_mp->m_alloc_mxr[level != 0]; | ||
204 | } | ||
205 | |||
206 | STATIC void | ||
207 | xfs_allocbt_init_key_from_rec( | ||
208 | union xfs_btree_key *key, | ||
209 | union xfs_btree_rec *rec) | ||
210 | { | ||
211 | ASSERT(rec->alloc.ar_startblock != 0); | ||
212 | |||
213 | key->alloc.ar_startblock = rec->alloc.ar_startblock; | ||
214 | key->alloc.ar_blockcount = rec->alloc.ar_blockcount; | ||
215 | } | ||
216 | |||
217 | STATIC void | ||
218 | xfs_allocbt_init_rec_from_key( | ||
219 | union xfs_btree_key *key, | ||
220 | union xfs_btree_rec *rec) | ||
221 | { | ||
222 | ASSERT(key->alloc.ar_startblock != 0); | ||
223 | |||
224 | rec->alloc.ar_startblock = key->alloc.ar_startblock; | ||
225 | rec->alloc.ar_blockcount = key->alloc.ar_blockcount; | ||
226 | } | ||
227 | |||
228 | STATIC void | ||
229 | xfs_allocbt_init_rec_from_cur( | ||
230 | struct xfs_btree_cur *cur, | ||
231 | union xfs_btree_rec *rec) | ||
232 | { | ||
233 | ASSERT(cur->bc_rec.a.ar_startblock != 0); | ||
234 | |||
235 | rec->alloc.ar_startblock = cpu_to_be32(cur->bc_rec.a.ar_startblock); | ||
236 | rec->alloc.ar_blockcount = cpu_to_be32(cur->bc_rec.a.ar_blockcount); | ||
237 | } | ||
238 | |||
239 | STATIC void | ||
240 | xfs_allocbt_init_ptr_from_cur( | ||
241 | struct xfs_btree_cur *cur, | ||
242 | union xfs_btree_ptr *ptr) | ||
243 | { | ||
244 | struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); | ||
245 | |||
246 | ASSERT(cur->bc_private.a.agno == be32_to_cpu(agf->agf_seqno)); | ||
247 | ASSERT(agf->agf_roots[cur->bc_btnum] != 0); | ||
248 | |||
249 | ptr->s = agf->agf_roots[cur->bc_btnum]; | ||
250 | } | ||
251 | |||
252 | STATIC __int64_t | ||
253 | xfs_allocbt_key_diff( | ||
254 | struct xfs_btree_cur *cur, | ||
255 | union xfs_btree_key *key) | ||
256 | { | ||
257 | xfs_alloc_rec_incore_t *rec = &cur->bc_rec.a; | ||
258 | xfs_alloc_key_t *kp = &key->alloc; | ||
259 | __int64_t diff; | ||
260 | |||
261 | if (cur->bc_btnum == XFS_BTNUM_BNO) { | ||
262 | return (__int64_t)be32_to_cpu(kp->ar_startblock) - | ||
263 | rec->ar_startblock; | ||
264 | } | ||
265 | |||
266 | diff = (__int64_t)be32_to_cpu(kp->ar_blockcount) - rec->ar_blockcount; | ||
267 | if (diff) | ||
268 | return diff; | ||
269 | |||
270 | return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock; | ||
271 | } | ||
272 | |||
273 | static bool | ||
274 | xfs_allocbt_verify( | ||
275 | struct xfs_buf *bp) | ||
276 | { | ||
277 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
278 | struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); | ||
279 | struct xfs_perag *pag = bp->b_pag; | ||
280 | unsigned int level; | ||
281 | |||
282 | /* | ||
283 | * magic number and level verification | ||
284 | * | ||
285 | * During growfs operations, we can't verify the exact level or owner as | ||
286 | * the perag is not fully initialised and hence not attached to the | ||
287 | * buffer. In this case, check against the maximum tree depth. | ||
288 | * | ||
289 | * Similarly, during log recovery we will have a perag structure | ||
290 | * attached, but the agf information will not yet have been initialised | ||
291 | * from the on disk AGF. Again, we can only check against maximum limits | ||
292 | * in this case. | ||
293 | */ | ||
294 | level = be16_to_cpu(block->bb_level); | ||
295 | switch (block->bb_magic) { | ||
296 | case cpu_to_be32(XFS_ABTB_CRC_MAGIC): | ||
297 | if (!xfs_sb_version_hascrc(&mp->m_sb)) | ||
298 | return false; | ||
299 | if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid)) | ||
300 | return false; | ||
301 | if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn)) | ||
302 | return false; | ||
303 | if (pag && | ||
304 | be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno) | ||
305 | return false; | ||
306 | /* fall through */ | ||
307 | case cpu_to_be32(XFS_ABTB_MAGIC): | ||
308 | if (pag && pag->pagf_init) { | ||
309 | if (level >= pag->pagf_levels[XFS_BTNUM_BNOi]) | ||
310 | return false; | ||
311 | } else if (level >= mp->m_ag_maxlevels) | ||
312 | return false; | ||
313 | break; | ||
314 | case cpu_to_be32(XFS_ABTC_CRC_MAGIC): | ||
315 | if (!xfs_sb_version_hascrc(&mp->m_sb)) | ||
316 | return false; | ||
317 | if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid)) | ||
318 | return false; | ||
319 | if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn)) | ||
320 | return false; | ||
321 | if (pag && | ||
322 | be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno) | ||
323 | return false; | ||
324 | /* fall through */ | ||
325 | case cpu_to_be32(XFS_ABTC_MAGIC): | ||
326 | if (pag && pag->pagf_init) { | ||
327 | if (level >= pag->pagf_levels[XFS_BTNUM_CNTi]) | ||
328 | return false; | ||
329 | } else if (level >= mp->m_ag_maxlevels) | ||
330 | return false; | ||
331 | break; | ||
332 | default: | ||
333 | return false; | ||
334 | } | ||
335 | |||
336 | /* numrecs verification */ | ||
337 | if (be16_to_cpu(block->bb_numrecs) > mp->m_alloc_mxr[level != 0]) | ||
338 | return false; | ||
339 | |||
340 | /* sibling pointer verification */ | ||
341 | if (!block->bb_u.s.bb_leftsib || | ||
342 | (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks && | ||
343 | block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK))) | ||
344 | return false; | ||
345 | if (!block->bb_u.s.bb_rightsib || | ||
346 | (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks && | ||
347 | block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK))) | ||
348 | return false; | ||
349 | |||
350 | return true; | ||
351 | } | ||
352 | |||
353 | static void | ||
354 | xfs_allocbt_read_verify( | ||
355 | struct xfs_buf *bp) | ||
356 | { | ||
357 | if (!xfs_btree_sblock_verify_crc(bp)) | ||
358 | xfs_buf_ioerror(bp, -EFSBADCRC); | ||
359 | else if (!xfs_allocbt_verify(bp)) | ||
360 | xfs_buf_ioerror(bp, -EFSCORRUPTED); | ||
361 | |||
362 | if (bp->b_error) { | ||
363 | trace_xfs_btree_corrupt(bp, _RET_IP_); | ||
364 | xfs_verifier_error(bp); | ||
365 | } | ||
366 | } | ||
367 | |||
368 | static void | ||
369 | xfs_allocbt_write_verify( | ||
370 | struct xfs_buf *bp) | ||
371 | { | ||
372 | if (!xfs_allocbt_verify(bp)) { | ||
373 | trace_xfs_btree_corrupt(bp, _RET_IP_); | ||
374 | xfs_buf_ioerror(bp, -EFSCORRUPTED); | ||
375 | xfs_verifier_error(bp); | ||
376 | return; | ||
377 | } | ||
378 | xfs_btree_sblock_calc_crc(bp); | ||
379 | |||
380 | } | ||
381 | |||
382 | const struct xfs_buf_ops xfs_allocbt_buf_ops = { | ||
383 | .verify_read = xfs_allocbt_read_verify, | ||
384 | .verify_write = xfs_allocbt_write_verify, | ||
385 | }; | ||
386 | |||
387 | |||
388 | #if defined(DEBUG) || defined(XFS_WARN) | ||
389 | STATIC int | ||
390 | xfs_allocbt_keys_inorder( | ||
391 | struct xfs_btree_cur *cur, | ||
392 | union xfs_btree_key *k1, | ||
393 | union xfs_btree_key *k2) | ||
394 | { | ||
395 | if (cur->bc_btnum == XFS_BTNUM_BNO) { | ||
396 | return be32_to_cpu(k1->alloc.ar_startblock) < | ||
397 | be32_to_cpu(k2->alloc.ar_startblock); | ||
398 | } else { | ||
399 | return be32_to_cpu(k1->alloc.ar_blockcount) < | ||
400 | be32_to_cpu(k2->alloc.ar_blockcount) || | ||
401 | (k1->alloc.ar_blockcount == k2->alloc.ar_blockcount && | ||
402 | be32_to_cpu(k1->alloc.ar_startblock) < | ||
403 | be32_to_cpu(k2->alloc.ar_startblock)); | ||
404 | } | ||
405 | } | ||
406 | |||
407 | STATIC int | ||
408 | xfs_allocbt_recs_inorder( | ||
409 | struct xfs_btree_cur *cur, | ||
410 | union xfs_btree_rec *r1, | ||
411 | union xfs_btree_rec *r2) | ||
412 | { | ||
413 | if (cur->bc_btnum == XFS_BTNUM_BNO) { | ||
414 | return be32_to_cpu(r1->alloc.ar_startblock) + | ||
415 | be32_to_cpu(r1->alloc.ar_blockcount) <= | ||
416 | be32_to_cpu(r2->alloc.ar_startblock); | ||
417 | } else { | ||
418 | return be32_to_cpu(r1->alloc.ar_blockcount) < | ||
419 | be32_to_cpu(r2->alloc.ar_blockcount) || | ||
420 | (r1->alloc.ar_blockcount == r2->alloc.ar_blockcount && | ||
421 | be32_to_cpu(r1->alloc.ar_startblock) < | ||
422 | be32_to_cpu(r2->alloc.ar_startblock)); | ||
423 | } | ||
424 | } | ||
425 | #endif /* DEBUG */ | ||
426 | |||
427 | static const struct xfs_btree_ops xfs_allocbt_ops = { | ||
428 | .rec_len = sizeof(xfs_alloc_rec_t), | ||
429 | .key_len = sizeof(xfs_alloc_key_t), | ||
430 | |||
431 | .dup_cursor = xfs_allocbt_dup_cursor, | ||
432 | .set_root = xfs_allocbt_set_root, | ||
433 | .alloc_block = xfs_allocbt_alloc_block, | ||
434 | .free_block = xfs_allocbt_free_block, | ||
435 | .update_lastrec = xfs_allocbt_update_lastrec, | ||
436 | .get_minrecs = xfs_allocbt_get_minrecs, | ||
437 | .get_maxrecs = xfs_allocbt_get_maxrecs, | ||
438 | .init_key_from_rec = xfs_allocbt_init_key_from_rec, | ||
439 | .init_rec_from_key = xfs_allocbt_init_rec_from_key, | ||
440 | .init_rec_from_cur = xfs_allocbt_init_rec_from_cur, | ||
441 | .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur, | ||
442 | .key_diff = xfs_allocbt_key_diff, | ||
443 | .buf_ops = &xfs_allocbt_buf_ops, | ||
444 | #if defined(DEBUG) || defined(XFS_WARN) | ||
445 | .keys_inorder = xfs_allocbt_keys_inorder, | ||
446 | .recs_inorder = xfs_allocbt_recs_inorder, | ||
447 | #endif | ||
448 | }; | ||
449 | |||
450 | /* | ||
451 | * Allocate a new allocation btree cursor. | ||
452 | */ | ||
453 | struct xfs_btree_cur * /* new alloc btree cursor */ | ||
454 | xfs_allocbt_init_cursor( | ||
455 | struct xfs_mount *mp, /* file system mount point */ | ||
456 | struct xfs_trans *tp, /* transaction pointer */ | ||
457 | struct xfs_buf *agbp, /* buffer for agf structure */ | ||
458 | xfs_agnumber_t agno, /* allocation group number */ | ||
459 | xfs_btnum_t btnum) /* btree identifier */ | ||
460 | { | ||
461 | struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); | ||
462 | struct xfs_btree_cur *cur; | ||
463 | |||
464 | ASSERT(btnum == XFS_BTNUM_BNO || btnum == XFS_BTNUM_CNT); | ||
465 | |||
466 | cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP); | ||
467 | |||
468 | cur->bc_tp = tp; | ||
469 | cur->bc_mp = mp; | ||
470 | cur->bc_btnum = btnum; | ||
471 | cur->bc_blocklog = mp->m_sb.sb_blocklog; | ||
472 | cur->bc_ops = &xfs_allocbt_ops; | ||
473 | |||
474 | if (btnum == XFS_BTNUM_CNT) { | ||
475 | cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]); | ||
476 | cur->bc_flags = XFS_BTREE_LASTREC_UPDATE; | ||
477 | } else { | ||
478 | cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]); | ||
479 | } | ||
480 | |||
481 | cur->bc_private.a.agbp = agbp; | ||
482 | cur->bc_private.a.agno = agno; | ||
483 | |||
484 | if (xfs_sb_version_hascrc(&mp->m_sb)) | ||
485 | cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; | ||
486 | |||
487 | return cur; | ||
488 | } | ||
489 | |||
490 | /* | ||
491 | * Calculate number of records in an alloc btree block. | ||
492 | */ | ||
493 | int | ||
494 | xfs_allocbt_maxrecs( | ||
495 | struct xfs_mount *mp, | ||
496 | int blocklen, | ||
497 | int leaf) | ||
498 | { | ||
499 | blocklen -= XFS_ALLOC_BLOCK_LEN(mp); | ||
500 | |||
501 | if (leaf) | ||
502 | return blocklen / sizeof(xfs_alloc_rec_t); | ||
503 | return blocklen / (sizeof(xfs_alloc_key_t) + sizeof(xfs_alloc_ptr_t)); | ||
504 | } | ||
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.h b/fs/xfs/libxfs/xfs_alloc_btree.h new file mode 100644 index 000000000000..45e189e7e81c --- /dev/null +++ b/fs/xfs/libxfs/xfs_alloc_btree.h | |||
@@ -0,0 +1,65 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_ALLOC_BTREE_H__ | ||
19 | #define __XFS_ALLOC_BTREE_H__ | ||
20 | |||
21 | /* | ||
22 | * Freespace on-disk structures | ||
23 | */ | ||
24 | |||
25 | struct xfs_buf; | ||
26 | struct xfs_btree_cur; | ||
27 | struct xfs_mount; | ||
28 | |||
29 | /* | ||
30 | * Btree block header size depends on a superblock flag. | ||
31 | */ | ||
32 | #define XFS_ALLOC_BLOCK_LEN(mp) \ | ||
33 | (xfs_sb_version_hascrc(&((mp)->m_sb)) ? \ | ||
34 | XFS_BTREE_SBLOCK_CRC_LEN : XFS_BTREE_SBLOCK_LEN) | ||
35 | |||
36 | /* | ||
37 | * Record, key, and pointer address macros for btree blocks. | ||
38 | * | ||
39 | * (note that some of these may appear unused, but they are used in userspace) | ||
40 | */ | ||
41 | #define XFS_ALLOC_REC_ADDR(mp, block, index) \ | ||
42 | ((xfs_alloc_rec_t *) \ | ||
43 | ((char *)(block) + \ | ||
44 | XFS_ALLOC_BLOCK_LEN(mp) + \ | ||
45 | (((index) - 1) * sizeof(xfs_alloc_rec_t)))) | ||
46 | |||
47 | #define XFS_ALLOC_KEY_ADDR(mp, block, index) \ | ||
48 | ((xfs_alloc_key_t *) \ | ||
49 | ((char *)(block) + \ | ||
50 | XFS_ALLOC_BLOCK_LEN(mp) + \ | ||
51 | ((index) - 1) * sizeof(xfs_alloc_key_t))) | ||
52 | |||
53 | #define XFS_ALLOC_PTR_ADDR(mp, block, index, maxrecs) \ | ||
54 | ((xfs_alloc_ptr_t *) \ | ||
55 | ((char *)(block) + \ | ||
56 | XFS_ALLOC_BLOCK_LEN(mp) + \ | ||
57 | (maxrecs) * sizeof(xfs_alloc_key_t) + \ | ||
58 | ((index) - 1) * sizeof(xfs_alloc_ptr_t))) | ||
59 | |||
60 | extern struct xfs_btree_cur *xfs_allocbt_init_cursor(struct xfs_mount *, | ||
61 | struct xfs_trans *, struct xfs_buf *, | ||
62 | xfs_agnumber_t, xfs_btnum_t); | ||
63 | extern int xfs_allocbt_maxrecs(struct xfs_mount *, int, int); | ||
64 | |||
65 | #endif /* __XFS_ALLOC_BTREE_H__ */ | ||
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c new file mode 100644 index 000000000000..353fb425faef --- /dev/null +++ b/fs/xfs/libxfs/xfs_attr.c | |||
@@ -0,0 +1,1459 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #include "xfs.h" | ||
19 | #include "xfs_fs.h" | ||
20 | #include "xfs_shared.h" | ||
21 | #include "xfs_format.h" | ||
22 | #include "xfs_log_format.h" | ||
23 | #include "xfs_trans_resv.h" | ||
24 | #include "xfs_bit.h" | ||
25 | #include "xfs_sb.h" | ||
26 | #include "xfs_ag.h" | ||
27 | #include "xfs_mount.h" | ||
28 | #include "xfs_da_format.h" | ||
29 | #include "xfs_da_btree.h" | ||
30 | #include "xfs_attr_sf.h" | ||
31 | #include "xfs_inode.h" | ||
32 | #include "xfs_alloc.h" | ||
33 | #include "xfs_trans.h" | ||
34 | #include "xfs_inode_item.h" | ||
35 | #include "xfs_bmap.h" | ||
36 | #include "xfs_bmap_util.h" | ||
37 | #include "xfs_bmap_btree.h" | ||
38 | #include "xfs_attr.h" | ||
39 | #include "xfs_attr_leaf.h" | ||
40 | #include "xfs_attr_remote.h" | ||
41 | #include "xfs_error.h" | ||
42 | #include "xfs_quota.h" | ||
43 | #include "xfs_trans_space.h" | ||
44 | #include "xfs_trace.h" | ||
45 | #include "xfs_dinode.h" | ||
46 | |||
47 | /* | ||
48 | * xfs_attr.c | ||
49 | * | ||
50 | * Provide the external interfaces to manage attribute lists. | ||
51 | */ | ||
52 | |||
53 | /*======================================================================== | ||
54 | * Function prototypes for the kernel. | ||
55 | *========================================================================*/ | ||
56 | |||
57 | /* | ||
58 | * Internal routines when attribute list fits inside the inode. | ||
59 | */ | ||
60 | STATIC int xfs_attr_shortform_addname(xfs_da_args_t *args); | ||
61 | |||
62 | /* | ||
63 | * Internal routines when attribute list is one block. | ||
64 | */ | ||
65 | STATIC int xfs_attr_leaf_get(xfs_da_args_t *args); | ||
66 | STATIC int xfs_attr_leaf_addname(xfs_da_args_t *args); | ||
67 | STATIC int xfs_attr_leaf_removename(xfs_da_args_t *args); | ||
68 | |||
69 | /* | ||
70 | * Internal routines when attribute list is more than one block. | ||
71 | */ | ||
72 | STATIC int xfs_attr_node_get(xfs_da_args_t *args); | ||
73 | STATIC int xfs_attr_node_addname(xfs_da_args_t *args); | ||
74 | STATIC int xfs_attr_node_removename(xfs_da_args_t *args); | ||
75 | STATIC int xfs_attr_fillstate(xfs_da_state_t *state); | ||
76 | STATIC int xfs_attr_refillstate(xfs_da_state_t *state); | ||
77 | |||
78 | |||
79 | STATIC int | ||
80 | xfs_attr_args_init( | ||
81 | struct xfs_da_args *args, | ||
82 | struct xfs_inode *dp, | ||
83 | const unsigned char *name, | ||
84 | int flags) | ||
85 | { | ||
86 | |||
87 | if (!name) | ||
88 | return -EINVAL; | ||
89 | |||
90 | memset(args, 0, sizeof(*args)); | ||
91 | args->geo = dp->i_mount->m_attr_geo; | ||
92 | args->whichfork = XFS_ATTR_FORK; | ||
93 | args->dp = dp; | ||
94 | args->flags = flags; | ||
95 | args->name = name; | ||
96 | args->namelen = strlen((const char *)name); | ||
97 | if (args->namelen >= MAXNAMELEN) | ||
98 | return -EFAULT; /* match IRIX behaviour */ | ||
99 | |||
100 | args->hashval = xfs_da_hashname(args->name, args->namelen); | ||
101 | return 0; | ||
102 | } | ||
103 | |||
104 | int | ||
105 | xfs_inode_hasattr( | ||
106 | struct xfs_inode *ip) | ||
107 | { | ||
108 | if (!XFS_IFORK_Q(ip) || | ||
109 | (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS && | ||
110 | ip->i_d.di_anextents == 0)) | ||
111 | return 0; | ||
112 | return 1; | ||
113 | } | ||
114 | |||
115 | /*======================================================================== | ||
116 | * Overall external interface routines. | ||
117 | *========================================================================*/ | ||
118 | |||
119 | int | ||
120 | xfs_attr_get( | ||
121 | struct xfs_inode *ip, | ||
122 | const unsigned char *name, | ||
123 | unsigned char *value, | ||
124 | int *valuelenp, | ||
125 | int flags) | ||
126 | { | ||
127 | struct xfs_da_args args; | ||
128 | uint lock_mode; | ||
129 | int error; | ||
130 | |||
131 | XFS_STATS_INC(xs_attr_get); | ||
132 | |||
133 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | ||
134 | return -EIO; | ||
135 | |||
136 | if (!xfs_inode_hasattr(ip)) | ||
137 | return -ENOATTR; | ||
138 | |||
139 | error = xfs_attr_args_init(&args, ip, name, flags); | ||
140 | if (error) | ||
141 | return error; | ||
142 | |||
143 | args.value = value; | ||
144 | args.valuelen = *valuelenp; | ||
145 | |||
146 | lock_mode = xfs_ilock_attr_map_shared(ip); | ||
147 | if (!xfs_inode_hasattr(ip)) | ||
148 | error = -ENOATTR; | ||
149 | else if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) | ||
150 | error = xfs_attr_shortform_getvalue(&args); | ||
151 | else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK)) | ||
152 | error = xfs_attr_leaf_get(&args); | ||
153 | else | ||
154 | error = xfs_attr_node_get(&args); | ||
155 | xfs_iunlock(ip, lock_mode); | ||
156 | |||
157 | *valuelenp = args.valuelen; | ||
158 | return error == -EEXIST ? 0 : error; | ||
159 | } | ||
160 | |||
161 | /* | ||
162 | * Calculate how many blocks we need for the new attribute, | ||
163 | */ | ||
164 | STATIC int | ||
165 | xfs_attr_calc_size( | ||
166 | struct xfs_da_args *args, | ||
167 | int *local) | ||
168 | { | ||
169 | struct xfs_mount *mp = args->dp->i_mount; | ||
170 | int size; | ||
171 | int nblks; | ||
172 | |||
173 | /* | ||
174 | * Determine space new attribute will use, and if it would be | ||
175 | * "local" or "remote" (note: local != inline). | ||
176 | */ | ||
177 | size = xfs_attr_leaf_newentsize(args, local); | ||
178 | nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK); | ||
179 | if (*local) { | ||
180 | if (size > (args->geo->blksize / 2)) { | ||
181 | /* Double split possible */ | ||
182 | nblks *= 2; | ||
183 | } | ||
184 | } else { | ||
185 | /* | ||
186 | * Out of line attribute, cannot double split, but | ||
187 | * make room for the attribute value itself. | ||
188 | */ | ||
189 | uint dblocks = xfs_attr3_rmt_blocks(mp, args->valuelen); | ||
190 | nblks += dblocks; | ||
191 | nblks += XFS_NEXTENTADD_SPACE_RES(mp, dblocks, XFS_ATTR_FORK); | ||
192 | } | ||
193 | |||
194 | return nblks; | ||
195 | } | ||
196 | |||
197 | int | ||
198 | xfs_attr_set( | ||
199 | struct xfs_inode *dp, | ||
200 | const unsigned char *name, | ||
201 | unsigned char *value, | ||
202 | int valuelen, | ||
203 | int flags) | ||
204 | { | ||
205 | struct xfs_mount *mp = dp->i_mount; | ||
206 | struct xfs_da_args args; | ||
207 | struct xfs_bmap_free flist; | ||
208 | struct xfs_trans_res tres; | ||
209 | xfs_fsblock_t firstblock; | ||
210 | int rsvd = (flags & ATTR_ROOT) != 0; | ||
211 | int error, err2, committed, local; | ||
212 | |||
213 | XFS_STATS_INC(xs_attr_set); | ||
214 | |||
215 | if (XFS_FORCED_SHUTDOWN(dp->i_mount)) | ||
216 | return -EIO; | ||
217 | |||
218 | error = xfs_attr_args_init(&args, dp, name, flags); | ||
219 | if (error) | ||
220 | return error; | ||
221 | |||
222 | args.value = value; | ||
223 | args.valuelen = valuelen; | ||
224 | args.firstblock = &firstblock; | ||
225 | args.flist = &flist; | ||
226 | args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT; | ||
227 | args.total = xfs_attr_calc_size(&args, &local); | ||
228 | |||
229 | error = xfs_qm_dqattach(dp, 0); | ||
230 | if (error) | ||
231 | return error; | ||
232 | |||
233 | /* | ||
234 | * If the inode doesn't have an attribute fork, add one. | ||
235 | * (inode must not be locked when we call this routine) | ||
236 | */ | ||
237 | if (XFS_IFORK_Q(dp) == 0) { | ||
238 | int sf_size = sizeof(xfs_attr_sf_hdr_t) + | ||
239 | XFS_ATTR_SF_ENTSIZE_BYNAME(args.namelen, valuelen); | ||
240 | |||
241 | error = xfs_bmap_add_attrfork(dp, sf_size, rsvd); | ||
242 | if (error) | ||
243 | return error; | ||
244 | } | ||
245 | |||
246 | /* | ||
247 | * Start our first transaction of the day. | ||
248 | * | ||
249 | * All future transactions during this code must be "chained" off | ||
250 | * this one via the trans_dup() call. All transactions will contain | ||
251 | * the inode, and the inode will always be marked with trans_ihold(). | ||
252 | * Since the inode will be locked in all transactions, we must log | ||
253 | * the inode in every transaction to let it float upward through | ||
254 | * the log. | ||
255 | */ | ||
256 | args.trans = xfs_trans_alloc(mp, XFS_TRANS_ATTR_SET); | ||
257 | |||
258 | /* | ||
259 | * Root fork attributes can use reserved data blocks for this | ||
260 | * operation if necessary | ||
261 | */ | ||
262 | |||
263 | if (rsvd) | ||
264 | args.trans->t_flags |= XFS_TRANS_RESERVE; | ||
265 | |||
266 | tres.tr_logres = M_RES(mp)->tr_attrsetm.tr_logres + | ||
267 | M_RES(mp)->tr_attrsetrt.tr_logres * args.total; | ||
268 | tres.tr_logcount = XFS_ATTRSET_LOG_COUNT; | ||
269 | tres.tr_logflags = XFS_TRANS_PERM_LOG_RES; | ||
270 | error = xfs_trans_reserve(args.trans, &tres, args.total, 0); | ||
271 | if (error) { | ||
272 | xfs_trans_cancel(args.trans, 0); | ||
273 | return error; | ||
274 | } | ||
275 | xfs_ilock(dp, XFS_ILOCK_EXCL); | ||
276 | |||
277 | error = xfs_trans_reserve_quota_nblks(args.trans, dp, args.total, 0, | ||
278 | rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES : | ||
279 | XFS_QMOPT_RES_REGBLKS); | ||
280 | if (error) { | ||
281 | xfs_iunlock(dp, XFS_ILOCK_EXCL); | ||
282 | xfs_trans_cancel(args.trans, XFS_TRANS_RELEASE_LOG_RES); | ||
283 | return error; | ||
284 | } | ||
285 | |||
286 | xfs_trans_ijoin(args.trans, dp, 0); | ||
287 | |||
288 | /* | ||
289 | * If the attribute list is non-existent or a shortform list, | ||
290 | * upgrade it to a single-leaf-block attribute list. | ||
291 | */ | ||
292 | if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL || | ||
293 | (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS && | ||
294 | dp->i_d.di_anextents == 0)) { | ||
295 | |||
296 | /* | ||
297 | * Build initial attribute list (if required). | ||
298 | */ | ||
299 | if (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS) | ||
300 | xfs_attr_shortform_create(&args); | ||
301 | |||
302 | /* | ||
303 | * Try to add the attr to the attribute list in | ||
304 | * the inode. | ||
305 | */ | ||
306 | error = xfs_attr_shortform_addname(&args); | ||
307 | if (error != -ENOSPC) { | ||
308 | /* | ||
309 | * Commit the shortform mods, and we're done. | ||
310 | * NOTE: this is also the error path (EEXIST, etc). | ||
311 | */ | ||
312 | ASSERT(args.trans != NULL); | ||
313 | |||
314 | /* | ||
315 | * If this is a synchronous mount, make sure that | ||
316 | * the transaction goes to disk before returning | ||
317 | * to the user. | ||
318 | */ | ||
319 | if (mp->m_flags & XFS_MOUNT_WSYNC) | ||
320 | xfs_trans_set_sync(args.trans); | ||
321 | |||
322 | if (!error && (flags & ATTR_KERNOTIME) == 0) { | ||
323 | xfs_trans_ichgtime(args.trans, dp, | ||
324 | XFS_ICHGTIME_CHG); | ||
325 | } | ||
326 | err2 = xfs_trans_commit(args.trans, | ||
327 | XFS_TRANS_RELEASE_LOG_RES); | ||
328 | xfs_iunlock(dp, XFS_ILOCK_EXCL); | ||
329 | |||
330 | return error ? error : err2; | ||
331 | } | ||
332 | |||
333 | /* | ||
334 | * It won't fit in the shortform, transform to a leaf block. | ||
335 | * GROT: another possible req'mt for a double-split btree op. | ||
336 | */ | ||
337 | xfs_bmap_init(args.flist, args.firstblock); | ||
338 | error = xfs_attr_shortform_to_leaf(&args); | ||
339 | if (!error) { | ||
340 | error = xfs_bmap_finish(&args.trans, args.flist, | ||
341 | &committed); | ||
342 | } | ||
343 | if (error) { | ||
344 | ASSERT(committed); | ||
345 | args.trans = NULL; | ||
346 | xfs_bmap_cancel(&flist); | ||
347 | goto out; | ||
348 | } | ||
349 | |||
350 | /* | ||
351 | * bmap_finish() may have committed the last trans and started | ||
352 | * a new one. We need the inode to be in all transactions. | ||
353 | */ | ||
354 | if (committed) | ||
355 | xfs_trans_ijoin(args.trans, dp, 0); | ||
356 | |||
357 | /* | ||
358 | * Commit the leaf transformation. We'll need another (linked) | ||
359 | * transaction to add the new attribute to the leaf. | ||
360 | */ | ||
361 | |||
362 | error = xfs_trans_roll(&args.trans, dp); | ||
363 | if (error) | ||
364 | goto out; | ||
365 | |||
366 | } | ||
367 | |||
368 | if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) | ||
369 | error = xfs_attr_leaf_addname(&args); | ||
370 | else | ||
371 | error = xfs_attr_node_addname(&args); | ||
372 | if (error) | ||
373 | goto out; | ||
374 | |||
375 | /* | ||
376 | * If this is a synchronous mount, make sure that the | ||
377 | * transaction goes to disk before returning to the user. | ||
378 | */ | ||
379 | if (mp->m_flags & XFS_MOUNT_WSYNC) | ||
380 | xfs_trans_set_sync(args.trans); | ||
381 | |||
382 | if ((flags & ATTR_KERNOTIME) == 0) | ||
383 | xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG); | ||
384 | |||
385 | /* | ||
386 | * Commit the last in the sequence of transactions. | ||
387 | */ | ||
388 | xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE); | ||
389 | error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES); | ||
390 | xfs_iunlock(dp, XFS_ILOCK_EXCL); | ||
391 | |||
392 | return error; | ||
393 | |||
394 | out: | ||
395 | if (args.trans) { | ||
396 | xfs_trans_cancel(args.trans, | ||
397 | XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); | ||
398 | } | ||
399 | xfs_iunlock(dp, XFS_ILOCK_EXCL); | ||
400 | return error; | ||
401 | } | ||
402 | |||
403 | /* | ||
404 | * Generic handler routine to remove a name from an attribute list. | ||
405 | * Transitions attribute list from Btree to shortform as necessary. | ||
406 | */ | ||
407 | int | ||
408 | xfs_attr_remove( | ||
409 | struct xfs_inode *dp, | ||
410 | const unsigned char *name, | ||
411 | int flags) | ||
412 | { | ||
413 | struct xfs_mount *mp = dp->i_mount; | ||
414 | struct xfs_da_args args; | ||
415 | struct xfs_bmap_free flist; | ||
416 | xfs_fsblock_t firstblock; | ||
417 | int error; | ||
418 | |||
419 | XFS_STATS_INC(xs_attr_remove); | ||
420 | |||
421 | if (XFS_FORCED_SHUTDOWN(dp->i_mount)) | ||
422 | return -EIO; | ||
423 | |||
424 | if (!xfs_inode_hasattr(dp)) | ||
425 | return -ENOATTR; | ||
426 | |||
427 | error = xfs_attr_args_init(&args, dp, name, flags); | ||
428 | if (error) | ||
429 | return error; | ||
430 | |||
431 | args.firstblock = &firstblock; | ||
432 | args.flist = &flist; | ||
433 | |||
434 | /* | ||
435 | * we have no control over the attribute names that userspace passes us | ||
436 | * to remove, so we have to allow the name lookup prior to attribute | ||
437 | * removal to fail. | ||
438 | */ | ||
439 | args.op_flags = XFS_DA_OP_OKNOENT; | ||
440 | |||
441 | error = xfs_qm_dqattach(dp, 0); | ||
442 | if (error) | ||
443 | return error; | ||
444 | |||
445 | /* | ||
446 | * Start our first transaction of the day. | ||
447 | * | ||
448 | * All future transactions during this code must be "chained" off | ||
449 | * this one via the trans_dup() call. All transactions will contain | ||
450 | * the inode, and the inode will always be marked with trans_ihold(). | ||
451 | * Since the inode will be locked in all transactions, we must log | ||
452 | * the inode in every transaction to let it float upward through | ||
453 | * the log. | ||
454 | */ | ||
455 | args.trans = xfs_trans_alloc(mp, XFS_TRANS_ATTR_RM); | ||
456 | |||
457 | /* | ||
458 | * Root fork attributes can use reserved data blocks for this | ||
459 | * operation if necessary | ||
460 | */ | ||
461 | |||
462 | if (flags & ATTR_ROOT) | ||
463 | args.trans->t_flags |= XFS_TRANS_RESERVE; | ||
464 | |||
465 | error = xfs_trans_reserve(args.trans, &M_RES(mp)->tr_attrrm, | ||
466 | XFS_ATTRRM_SPACE_RES(mp), 0); | ||
467 | if (error) { | ||
468 | xfs_trans_cancel(args.trans, 0); | ||
469 | return error; | ||
470 | } | ||
471 | |||
472 | xfs_ilock(dp, XFS_ILOCK_EXCL); | ||
473 | /* | ||
474 | * No need to make quota reservations here. We expect to release some | ||
475 | * blocks not allocate in the common case. | ||
476 | */ | ||
477 | xfs_trans_ijoin(args.trans, dp, 0); | ||
478 | |||
479 | if (!xfs_inode_hasattr(dp)) { | ||
480 | error = -ENOATTR; | ||
481 | } else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) { | ||
482 | ASSERT(dp->i_afp->if_flags & XFS_IFINLINE); | ||
483 | error = xfs_attr_shortform_remove(&args); | ||
484 | } else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) { | ||
485 | error = xfs_attr_leaf_removename(&args); | ||
486 | } else { | ||
487 | error = xfs_attr_node_removename(&args); | ||
488 | } | ||
489 | |||
490 | if (error) | ||
491 | goto out; | ||
492 | |||
493 | /* | ||
494 | * If this is a synchronous mount, make sure that the | ||
495 | * transaction goes to disk before returning to the user. | ||
496 | */ | ||
497 | if (mp->m_flags & XFS_MOUNT_WSYNC) | ||
498 | xfs_trans_set_sync(args.trans); | ||
499 | |||
500 | if ((flags & ATTR_KERNOTIME) == 0) | ||
501 | xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG); | ||
502 | |||
503 | /* | ||
504 | * Commit the last in the sequence of transactions. | ||
505 | */ | ||
506 | xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE); | ||
507 | error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES); | ||
508 | xfs_iunlock(dp, XFS_ILOCK_EXCL); | ||
509 | |||
510 | return error; | ||
511 | |||
512 | out: | ||
513 | if (args.trans) { | ||
514 | xfs_trans_cancel(args.trans, | ||
515 | XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); | ||
516 | } | ||
517 | xfs_iunlock(dp, XFS_ILOCK_EXCL); | ||
518 | return error; | ||
519 | } | ||
520 | |||
521 | /*======================================================================== | ||
522 | * External routines when attribute list is inside the inode | ||
523 | *========================================================================*/ | ||
524 | |||
525 | /* | ||
526 | * Add a name to the shortform attribute list structure | ||
527 | * This is the external routine. | ||
528 | */ | ||
529 | STATIC int | ||
530 | xfs_attr_shortform_addname(xfs_da_args_t *args) | ||
531 | { | ||
532 | int newsize, forkoff, retval; | ||
533 | |||
534 | trace_xfs_attr_sf_addname(args); | ||
535 | |||
536 | retval = xfs_attr_shortform_lookup(args); | ||
537 | if ((args->flags & ATTR_REPLACE) && (retval == -ENOATTR)) { | ||
538 | return retval; | ||
539 | } else if (retval == -EEXIST) { | ||
540 | if (args->flags & ATTR_CREATE) | ||
541 | return retval; | ||
542 | retval = xfs_attr_shortform_remove(args); | ||
543 | ASSERT(retval == 0); | ||
544 | } | ||
545 | |||
546 | if (args->namelen >= XFS_ATTR_SF_ENTSIZE_MAX || | ||
547 | args->valuelen >= XFS_ATTR_SF_ENTSIZE_MAX) | ||
548 | return -ENOSPC; | ||
549 | |||
550 | newsize = XFS_ATTR_SF_TOTSIZE(args->dp); | ||
551 | newsize += XFS_ATTR_SF_ENTSIZE_BYNAME(args->namelen, args->valuelen); | ||
552 | |||
553 | forkoff = xfs_attr_shortform_bytesfit(args->dp, newsize); | ||
554 | if (!forkoff) | ||
555 | return -ENOSPC; | ||
556 | |||
557 | xfs_attr_shortform_add(args, forkoff); | ||
558 | return 0; | ||
559 | } | ||
560 | |||
561 | |||
562 | /*======================================================================== | ||
563 | * External routines when attribute list is one block | ||
564 | *========================================================================*/ | ||
565 | |||
566 | /* | ||
567 | * Add a name to the leaf attribute list structure | ||
568 | * | ||
569 | * This leaf block cannot have a "remote" value, we only call this routine | ||
570 | * if bmap_one_block() says there is only one block (ie: no remote blks). | ||
571 | */ | ||
572 | STATIC int | ||
573 | xfs_attr_leaf_addname(xfs_da_args_t *args) | ||
574 | { | ||
575 | xfs_inode_t *dp; | ||
576 | struct xfs_buf *bp; | ||
577 | int retval, error, committed, forkoff; | ||
578 | |||
579 | trace_xfs_attr_leaf_addname(args); | ||
580 | |||
581 | /* | ||
582 | * Read the (only) block in the attribute list in. | ||
583 | */ | ||
584 | dp = args->dp; | ||
585 | args->blkno = 0; | ||
586 | error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); | ||
587 | if (error) | ||
588 | return error; | ||
589 | |||
590 | /* | ||
591 | * Look up the given attribute in the leaf block. Figure out if | ||
592 | * the given flags produce an error or call for an atomic rename. | ||
593 | */ | ||
594 | retval = xfs_attr3_leaf_lookup_int(bp, args); | ||
595 | if ((args->flags & ATTR_REPLACE) && (retval == -ENOATTR)) { | ||
596 | xfs_trans_brelse(args->trans, bp); | ||
597 | return retval; | ||
598 | } else if (retval == -EEXIST) { | ||
599 | if (args->flags & ATTR_CREATE) { /* pure create op */ | ||
600 | xfs_trans_brelse(args->trans, bp); | ||
601 | return retval; | ||
602 | } | ||
603 | |||
604 | trace_xfs_attr_leaf_replace(args); | ||
605 | |||
606 | /* save the attribute state for later removal*/ | ||
607 | args->op_flags |= XFS_DA_OP_RENAME; /* an atomic rename */ | ||
608 | args->blkno2 = args->blkno; /* set 2nd entry info*/ | ||
609 | args->index2 = args->index; | ||
610 | args->rmtblkno2 = args->rmtblkno; | ||
611 | args->rmtblkcnt2 = args->rmtblkcnt; | ||
612 | args->rmtvaluelen2 = args->rmtvaluelen; | ||
613 | |||
614 | /* | ||
615 | * clear the remote attr state now that it is saved so that the | ||
616 | * values reflect the state of the attribute we are about to | ||
617 | * add, not the attribute we just found and will remove later. | ||
618 | */ | ||
619 | args->rmtblkno = 0; | ||
620 | args->rmtblkcnt = 0; | ||
621 | args->rmtvaluelen = 0; | ||
622 | } | ||
623 | |||
624 | /* | ||
625 | * Add the attribute to the leaf block, transitioning to a Btree | ||
626 | * if required. | ||
627 | */ | ||
628 | retval = xfs_attr3_leaf_add(bp, args); | ||
629 | if (retval == -ENOSPC) { | ||
630 | /* | ||
631 | * Promote the attribute list to the Btree format, then | ||
632 | * Commit that transaction so that the node_addname() call | ||
633 | * can manage its own transactions. | ||
634 | */ | ||
635 | xfs_bmap_init(args->flist, args->firstblock); | ||
636 | error = xfs_attr3_leaf_to_node(args); | ||
637 | if (!error) { | ||
638 | error = xfs_bmap_finish(&args->trans, args->flist, | ||
639 | &committed); | ||
640 | } | ||
641 | if (error) { | ||
642 | ASSERT(committed); | ||
643 | args->trans = NULL; | ||
644 | xfs_bmap_cancel(args->flist); | ||
645 | return error; | ||
646 | } | ||
647 | |||
648 | /* | ||
649 | * bmap_finish() may have committed the last trans and started | ||
650 | * a new one. We need the inode to be in all transactions. | ||
651 | */ | ||
652 | if (committed) | ||
653 | xfs_trans_ijoin(args->trans, dp, 0); | ||
654 | |||
655 | /* | ||
656 | * Commit the current trans (including the inode) and start | ||
657 | * a new one. | ||
658 | */ | ||
659 | error = xfs_trans_roll(&args->trans, dp); | ||
660 | if (error) | ||
661 | return error; | ||
662 | |||
663 | /* | ||
664 | * Fob the whole rest of the problem off on the Btree code. | ||
665 | */ | ||
666 | error = xfs_attr_node_addname(args); | ||
667 | return error; | ||
668 | } | ||
669 | |||
670 | /* | ||
671 | * Commit the transaction that added the attr name so that | ||
672 | * later routines can manage their own transactions. | ||
673 | */ | ||
674 | error = xfs_trans_roll(&args->trans, dp); | ||
675 | if (error) | ||
676 | return error; | ||
677 | |||
678 | /* | ||
679 | * If there was an out-of-line value, allocate the blocks we | ||
680 | * identified for its storage and copy the value. This is done | ||
681 | * after we create the attribute so that we don't overflow the | ||
682 | * maximum size of a transaction and/or hit a deadlock. | ||
683 | */ | ||
684 | if (args->rmtblkno > 0) { | ||
685 | error = xfs_attr_rmtval_set(args); | ||
686 | if (error) | ||
687 | return error; | ||
688 | } | ||
689 | |||
690 | /* | ||
691 | * If this is an atomic rename operation, we must "flip" the | ||
692 | * incomplete flags on the "new" and "old" attribute/value pairs | ||
693 | * so that one disappears and one appears atomically. Then we | ||
694 | * must remove the "old" attribute/value pair. | ||
695 | */ | ||
696 | if (args->op_flags & XFS_DA_OP_RENAME) { | ||
697 | /* | ||
698 | * In a separate transaction, set the incomplete flag on the | ||
699 | * "old" attr and clear the incomplete flag on the "new" attr. | ||
700 | */ | ||
701 | error = xfs_attr3_leaf_flipflags(args); | ||
702 | if (error) | ||
703 | return error; | ||
704 | |||
705 | /* | ||
706 | * Dismantle the "old" attribute/value pair by removing | ||
707 | * a "remote" value (if it exists). | ||
708 | */ | ||
709 | args->index = args->index2; | ||
710 | args->blkno = args->blkno2; | ||
711 | args->rmtblkno = args->rmtblkno2; | ||
712 | args->rmtblkcnt = args->rmtblkcnt2; | ||
713 | args->rmtvaluelen = args->rmtvaluelen2; | ||
714 | if (args->rmtblkno) { | ||
715 | error = xfs_attr_rmtval_remove(args); | ||
716 | if (error) | ||
717 | return error; | ||
718 | } | ||
719 | |||
720 | /* | ||
721 | * Read in the block containing the "old" attr, then | ||
722 | * remove the "old" attr from that block (neat, huh!) | ||
723 | */ | ||
724 | error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, | ||
725 | -1, &bp); | ||
726 | if (error) | ||
727 | return error; | ||
728 | |||
729 | xfs_attr3_leaf_remove(bp, args); | ||
730 | |||
731 | /* | ||
732 | * If the result is small enough, shrink it all into the inode. | ||
733 | */ | ||
734 | if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) { | ||
735 | xfs_bmap_init(args->flist, args->firstblock); | ||
736 | error = xfs_attr3_leaf_to_shortform(bp, args, forkoff); | ||
737 | /* bp is gone due to xfs_da_shrink_inode */ | ||
738 | if (!error) { | ||
739 | error = xfs_bmap_finish(&args->trans, | ||
740 | args->flist, | ||
741 | &committed); | ||
742 | } | ||
743 | if (error) { | ||
744 | ASSERT(committed); | ||
745 | args->trans = NULL; | ||
746 | xfs_bmap_cancel(args->flist); | ||
747 | return error; | ||
748 | } | ||
749 | |||
750 | /* | ||
751 | * bmap_finish() may have committed the last trans | ||
752 | * and started a new one. We need the inode to be | ||
753 | * in all transactions. | ||
754 | */ | ||
755 | if (committed) | ||
756 | xfs_trans_ijoin(args->trans, dp, 0); | ||
757 | } | ||
758 | |||
759 | /* | ||
760 | * Commit the remove and start the next trans in series. | ||
761 | */ | ||
762 | error = xfs_trans_roll(&args->trans, dp); | ||
763 | |||
764 | } else if (args->rmtblkno > 0) { | ||
765 | /* | ||
766 | * Added a "remote" value, just clear the incomplete flag. | ||
767 | */ | ||
768 | error = xfs_attr3_leaf_clearflag(args); | ||
769 | } | ||
770 | return error; | ||
771 | } | ||
772 | |||
773 | /* | ||
774 | * Remove a name from the leaf attribute list structure | ||
775 | * | ||
776 | * This leaf block cannot have a "remote" value, we only call this routine | ||
777 | * if bmap_one_block() says there is only one block (ie: no remote blks). | ||
778 | */ | ||
779 | STATIC int | ||
780 | xfs_attr_leaf_removename(xfs_da_args_t *args) | ||
781 | { | ||
782 | xfs_inode_t *dp; | ||
783 | struct xfs_buf *bp; | ||
784 | int error, committed, forkoff; | ||
785 | |||
786 | trace_xfs_attr_leaf_removename(args); | ||
787 | |||
788 | /* | ||
789 | * Remove the attribute. | ||
790 | */ | ||
791 | dp = args->dp; | ||
792 | args->blkno = 0; | ||
793 | error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); | ||
794 | if (error) | ||
795 | return error; | ||
796 | |||
797 | error = xfs_attr3_leaf_lookup_int(bp, args); | ||
798 | if (error == -ENOATTR) { | ||
799 | xfs_trans_brelse(args->trans, bp); | ||
800 | return error; | ||
801 | } | ||
802 | |||
803 | xfs_attr3_leaf_remove(bp, args); | ||
804 | |||
805 | /* | ||
806 | * If the result is small enough, shrink it all into the inode. | ||
807 | */ | ||
808 | if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) { | ||
809 | xfs_bmap_init(args->flist, args->firstblock); | ||
810 | error = xfs_attr3_leaf_to_shortform(bp, args, forkoff); | ||
811 | /* bp is gone due to xfs_da_shrink_inode */ | ||
812 | if (!error) { | ||
813 | error = xfs_bmap_finish(&args->trans, args->flist, | ||
814 | &committed); | ||
815 | } | ||
816 | if (error) { | ||
817 | ASSERT(committed); | ||
818 | args->trans = NULL; | ||
819 | xfs_bmap_cancel(args->flist); | ||
820 | return error; | ||
821 | } | ||
822 | |||
823 | /* | ||
824 | * bmap_finish() may have committed the last trans and started | ||
825 | * a new one. We need the inode to be in all transactions. | ||
826 | */ | ||
827 | if (committed) | ||
828 | xfs_trans_ijoin(args->trans, dp, 0); | ||
829 | } | ||
830 | return 0; | ||
831 | } | ||
832 | |||
833 | /* | ||
834 | * Look up a name in a leaf attribute list structure. | ||
835 | * | ||
836 | * This leaf block cannot have a "remote" value, we only call this routine | ||
837 | * if bmap_one_block() says there is only one block (ie: no remote blks). | ||
838 | */ | ||
839 | STATIC int | ||
840 | xfs_attr_leaf_get(xfs_da_args_t *args) | ||
841 | { | ||
842 | struct xfs_buf *bp; | ||
843 | int error; | ||
844 | |||
845 | trace_xfs_attr_leaf_get(args); | ||
846 | |||
847 | args->blkno = 0; | ||
848 | error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); | ||
849 | if (error) | ||
850 | return error; | ||
851 | |||
852 | error = xfs_attr3_leaf_lookup_int(bp, args); | ||
853 | if (error != -EEXIST) { | ||
854 | xfs_trans_brelse(args->trans, bp); | ||
855 | return error; | ||
856 | } | ||
857 | error = xfs_attr3_leaf_getvalue(bp, args); | ||
858 | xfs_trans_brelse(args->trans, bp); | ||
859 | if (!error && (args->rmtblkno > 0) && !(args->flags & ATTR_KERNOVAL)) { | ||
860 | error = xfs_attr_rmtval_get(args); | ||
861 | } | ||
862 | return error; | ||
863 | } | ||
864 | |||
865 | /*======================================================================== | ||
866 | * External routines when attribute list size > geo->blksize | ||
867 | *========================================================================*/ | ||
868 | |||
869 | /* | ||
870 | * Add a name to a Btree-format attribute list. | ||
871 | * | ||
872 | * This will involve walking down the Btree, and may involve splitting | ||
873 | * leaf nodes and even splitting intermediate nodes up to and including | ||
874 | * the root node (a special case of an intermediate node). | ||
875 | * | ||
876 | * "Remote" attribute values confuse the issue and atomic rename operations | ||
877 | * add a whole extra layer of confusion on top of that. | ||
878 | */ | ||
879 | STATIC int | ||
880 | xfs_attr_node_addname(xfs_da_args_t *args) | ||
881 | { | ||
882 | xfs_da_state_t *state; | ||
883 | xfs_da_state_blk_t *blk; | ||
884 | xfs_inode_t *dp; | ||
885 | xfs_mount_t *mp; | ||
886 | int committed, retval, error; | ||
887 | |||
888 | trace_xfs_attr_node_addname(args); | ||
889 | |||
890 | /* | ||
891 | * Fill in bucket of arguments/results/context to carry around. | ||
892 | */ | ||
893 | dp = args->dp; | ||
894 | mp = dp->i_mount; | ||
895 | restart: | ||
896 | state = xfs_da_state_alloc(); | ||
897 | state->args = args; | ||
898 | state->mp = mp; | ||
899 | |||
900 | /* | ||
901 | * Search to see if name already exists, and get back a pointer | ||
902 | * to where it should go. | ||
903 | */ | ||
904 | error = xfs_da3_node_lookup_int(state, &retval); | ||
905 | if (error) | ||
906 | goto out; | ||
907 | blk = &state->path.blk[ state->path.active-1 ]; | ||
908 | ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC); | ||
909 | if ((args->flags & ATTR_REPLACE) && (retval == -ENOATTR)) { | ||
910 | goto out; | ||
911 | } else if (retval == -EEXIST) { | ||
912 | if (args->flags & ATTR_CREATE) | ||
913 | goto out; | ||
914 | |||
915 | trace_xfs_attr_node_replace(args); | ||
916 | |||
917 | /* save the attribute state for later removal*/ | ||
918 | args->op_flags |= XFS_DA_OP_RENAME; /* atomic rename op */ | ||
919 | args->blkno2 = args->blkno; /* set 2nd entry info*/ | ||
920 | args->index2 = args->index; | ||
921 | args->rmtblkno2 = args->rmtblkno; | ||
922 | args->rmtblkcnt2 = args->rmtblkcnt; | ||
923 | args->rmtvaluelen2 = args->rmtvaluelen; | ||
924 | |||
925 | /* | ||
926 | * clear the remote attr state now that it is saved so that the | ||
927 | * values reflect the state of the attribute we are about to | ||
928 | * add, not the attribute we just found and will remove later. | ||
929 | */ | ||
930 | args->rmtblkno = 0; | ||
931 | args->rmtblkcnt = 0; | ||
932 | args->rmtvaluelen = 0; | ||
933 | } | ||
934 | |||
935 | retval = xfs_attr3_leaf_add(blk->bp, state->args); | ||
936 | if (retval == -ENOSPC) { | ||
937 | if (state->path.active == 1) { | ||
938 | /* | ||
939 | * Its really a single leaf node, but it had | ||
940 | * out-of-line values so it looked like it *might* | ||
941 | * have been a b-tree. | ||
942 | */ | ||
943 | xfs_da_state_free(state); | ||
944 | state = NULL; | ||
945 | xfs_bmap_init(args->flist, args->firstblock); | ||
946 | error = xfs_attr3_leaf_to_node(args); | ||
947 | if (!error) { | ||
948 | error = xfs_bmap_finish(&args->trans, | ||
949 | args->flist, | ||
950 | &committed); | ||
951 | } | ||
952 | if (error) { | ||
953 | ASSERT(committed); | ||
954 | args->trans = NULL; | ||
955 | xfs_bmap_cancel(args->flist); | ||
956 | goto out; | ||
957 | } | ||
958 | |||
959 | /* | ||
960 | * bmap_finish() may have committed the last trans | ||
961 | * and started a new one. We need the inode to be | ||
962 | * in all transactions. | ||
963 | */ | ||
964 | if (committed) | ||
965 | xfs_trans_ijoin(args->trans, dp, 0); | ||
966 | |||
967 | /* | ||
968 | * Commit the node conversion and start the next | ||
969 | * trans in the chain. | ||
970 | */ | ||
971 | error = xfs_trans_roll(&args->trans, dp); | ||
972 | if (error) | ||
973 | goto out; | ||
974 | |||
975 | goto restart; | ||
976 | } | ||
977 | |||
978 | /* | ||
979 | * Split as many Btree elements as required. | ||
980 | * This code tracks the new and old attr's location | ||
981 | * in the index/blkno/rmtblkno/rmtblkcnt fields and | ||
982 | * in the index2/blkno2/rmtblkno2/rmtblkcnt2 fields. | ||
983 | */ | ||
984 | xfs_bmap_init(args->flist, args->firstblock); | ||
985 | error = xfs_da3_split(state); | ||
986 | if (!error) { | ||
987 | error = xfs_bmap_finish(&args->trans, args->flist, | ||
988 | &committed); | ||
989 | } | ||
990 | if (error) { | ||
991 | ASSERT(committed); | ||
992 | args->trans = NULL; | ||
993 | xfs_bmap_cancel(args->flist); | ||
994 | goto out; | ||
995 | } | ||
996 | |||
997 | /* | ||
998 | * bmap_finish() may have committed the last trans and started | ||
999 | * a new one. We need the inode to be in all transactions. | ||
1000 | */ | ||
1001 | if (committed) | ||
1002 | xfs_trans_ijoin(args->trans, dp, 0); | ||
1003 | } else { | ||
1004 | /* | ||
1005 | * Addition succeeded, update Btree hashvals. | ||
1006 | */ | ||
1007 | xfs_da3_fixhashpath(state, &state->path); | ||
1008 | } | ||
1009 | |||
1010 | /* | ||
1011 | * Kill the state structure, we're done with it and need to | ||
1012 | * allow the buffers to come back later. | ||
1013 | */ | ||
1014 | xfs_da_state_free(state); | ||
1015 | state = NULL; | ||
1016 | |||
1017 | /* | ||
1018 | * Commit the leaf addition or btree split and start the next | ||
1019 | * trans in the chain. | ||
1020 | */ | ||
1021 | error = xfs_trans_roll(&args->trans, dp); | ||
1022 | if (error) | ||
1023 | goto out; | ||
1024 | |||
1025 | /* | ||
1026 | * If there was an out-of-line value, allocate the blocks we | ||
1027 | * identified for its storage and copy the value. This is done | ||
1028 | * after we create the attribute so that we don't overflow the | ||
1029 | * maximum size of a transaction and/or hit a deadlock. | ||
1030 | */ | ||
1031 | if (args->rmtblkno > 0) { | ||
1032 | error = xfs_attr_rmtval_set(args); | ||
1033 | if (error) | ||
1034 | return error; | ||
1035 | } | ||
1036 | |||
1037 | /* | ||
1038 | * If this is an atomic rename operation, we must "flip" the | ||
1039 | * incomplete flags on the "new" and "old" attribute/value pairs | ||
1040 | * so that one disappears and one appears atomically. Then we | ||
1041 | * must remove the "old" attribute/value pair. | ||
1042 | */ | ||
1043 | if (args->op_flags & XFS_DA_OP_RENAME) { | ||
1044 | /* | ||
1045 | * In a separate transaction, set the incomplete flag on the | ||
1046 | * "old" attr and clear the incomplete flag on the "new" attr. | ||
1047 | */ | ||
1048 | error = xfs_attr3_leaf_flipflags(args); | ||
1049 | if (error) | ||
1050 | goto out; | ||
1051 | |||
1052 | /* | ||
1053 | * Dismantle the "old" attribute/value pair by removing | ||
1054 | * a "remote" value (if it exists). | ||
1055 | */ | ||
1056 | args->index = args->index2; | ||
1057 | args->blkno = args->blkno2; | ||
1058 | args->rmtblkno = args->rmtblkno2; | ||
1059 | args->rmtblkcnt = args->rmtblkcnt2; | ||
1060 | args->rmtvaluelen = args->rmtvaluelen2; | ||
1061 | if (args->rmtblkno) { | ||
1062 | error = xfs_attr_rmtval_remove(args); | ||
1063 | if (error) | ||
1064 | return error; | ||
1065 | } | ||
1066 | |||
1067 | /* | ||
1068 | * Re-find the "old" attribute entry after any split ops. | ||
1069 | * The INCOMPLETE flag means that we will find the "old" | ||
1070 | * attr, not the "new" one. | ||
1071 | */ | ||
1072 | args->flags |= XFS_ATTR_INCOMPLETE; | ||
1073 | state = xfs_da_state_alloc(); | ||
1074 | state->args = args; | ||
1075 | state->mp = mp; | ||
1076 | state->inleaf = 0; | ||
1077 | error = xfs_da3_node_lookup_int(state, &retval); | ||
1078 | if (error) | ||
1079 | goto out; | ||
1080 | |||
1081 | /* | ||
1082 | * Remove the name and update the hashvals in the tree. | ||
1083 | */ | ||
1084 | blk = &state->path.blk[ state->path.active-1 ]; | ||
1085 | ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC); | ||
1086 | error = xfs_attr3_leaf_remove(blk->bp, args); | ||
1087 | xfs_da3_fixhashpath(state, &state->path); | ||
1088 | |||
1089 | /* | ||
1090 | * Check to see if the tree needs to be collapsed. | ||
1091 | */ | ||
1092 | if (retval && (state->path.active > 1)) { | ||
1093 | xfs_bmap_init(args->flist, args->firstblock); | ||
1094 | error = xfs_da3_join(state); | ||
1095 | if (!error) { | ||
1096 | error = xfs_bmap_finish(&args->trans, | ||
1097 | args->flist, | ||
1098 | &committed); | ||
1099 | } | ||
1100 | if (error) { | ||
1101 | ASSERT(committed); | ||
1102 | args->trans = NULL; | ||
1103 | xfs_bmap_cancel(args->flist); | ||
1104 | goto out; | ||
1105 | } | ||
1106 | |||
1107 | /* | ||
1108 | * bmap_finish() may have committed the last trans | ||
1109 | * and started a new one. We need the inode to be | ||
1110 | * in all transactions. | ||
1111 | */ | ||
1112 | if (committed) | ||
1113 | xfs_trans_ijoin(args->trans, dp, 0); | ||
1114 | } | ||
1115 | |||
1116 | /* | ||
1117 | * Commit and start the next trans in the chain. | ||
1118 | */ | ||
1119 | error = xfs_trans_roll(&args->trans, dp); | ||
1120 | if (error) | ||
1121 | goto out; | ||
1122 | |||
1123 | } else if (args->rmtblkno > 0) { | ||
1124 | /* | ||
1125 | * Added a "remote" value, just clear the incomplete flag. | ||
1126 | */ | ||
1127 | error = xfs_attr3_leaf_clearflag(args); | ||
1128 | if (error) | ||
1129 | goto out; | ||
1130 | } | ||
1131 | retval = error = 0; | ||
1132 | |||
1133 | out: | ||
1134 | if (state) | ||
1135 | xfs_da_state_free(state); | ||
1136 | if (error) | ||
1137 | return error; | ||
1138 | return retval; | ||
1139 | } | ||
1140 | |||
1141 | /* | ||
1142 | * Remove a name from a B-tree attribute list. | ||
1143 | * | ||
1144 | * This will involve walking down the Btree, and may involve joining | ||
1145 | * leaf nodes and even joining intermediate nodes up to and including | ||
1146 | * the root node (a special case of an intermediate node). | ||
1147 | */ | ||
1148 | STATIC int | ||
1149 | xfs_attr_node_removename(xfs_da_args_t *args) | ||
1150 | { | ||
1151 | xfs_da_state_t *state; | ||
1152 | xfs_da_state_blk_t *blk; | ||
1153 | xfs_inode_t *dp; | ||
1154 | struct xfs_buf *bp; | ||
1155 | int retval, error, committed, forkoff; | ||
1156 | |||
1157 | trace_xfs_attr_node_removename(args); | ||
1158 | |||
1159 | /* | ||
1160 | * Tie a string around our finger to remind us where we are. | ||
1161 | */ | ||
1162 | dp = args->dp; | ||
1163 | state = xfs_da_state_alloc(); | ||
1164 | state->args = args; | ||
1165 | state->mp = dp->i_mount; | ||
1166 | |||
1167 | /* | ||
1168 | * Search to see if name exists, and get back a pointer to it. | ||
1169 | */ | ||
1170 | error = xfs_da3_node_lookup_int(state, &retval); | ||
1171 | if (error || (retval != -EEXIST)) { | ||
1172 | if (error == 0) | ||
1173 | error = retval; | ||
1174 | goto out; | ||
1175 | } | ||
1176 | |||
1177 | /* | ||
1178 | * If there is an out-of-line value, de-allocate the blocks. | ||
1179 | * This is done before we remove the attribute so that we don't | ||
1180 | * overflow the maximum size of a transaction and/or hit a deadlock. | ||
1181 | */ | ||
1182 | blk = &state->path.blk[ state->path.active-1 ]; | ||
1183 | ASSERT(blk->bp != NULL); | ||
1184 | ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC); | ||
1185 | if (args->rmtblkno > 0) { | ||
1186 | /* | ||
1187 | * Fill in disk block numbers in the state structure | ||
1188 | * so that we can get the buffers back after we commit | ||
1189 | * several transactions in the following calls. | ||
1190 | */ | ||
1191 | error = xfs_attr_fillstate(state); | ||
1192 | if (error) | ||
1193 | goto out; | ||
1194 | |||
1195 | /* | ||
1196 | * Mark the attribute as INCOMPLETE, then bunmapi() the | ||
1197 | * remote value. | ||
1198 | */ | ||
1199 | error = xfs_attr3_leaf_setflag(args); | ||
1200 | if (error) | ||
1201 | goto out; | ||
1202 | error = xfs_attr_rmtval_remove(args); | ||
1203 | if (error) | ||
1204 | goto out; | ||
1205 | |||
1206 | /* | ||
1207 | * Refill the state structure with buffers, the prior calls | ||
1208 | * released our buffers. | ||
1209 | */ | ||
1210 | error = xfs_attr_refillstate(state); | ||
1211 | if (error) | ||
1212 | goto out; | ||
1213 | } | ||
1214 | |||
1215 | /* | ||
1216 | * Remove the name and update the hashvals in the tree. | ||
1217 | */ | ||
1218 | blk = &state->path.blk[ state->path.active-1 ]; | ||
1219 | ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC); | ||
1220 | retval = xfs_attr3_leaf_remove(blk->bp, args); | ||
1221 | xfs_da3_fixhashpath(state, &state->path); | ||
1222 | |||
1223 | /* | ||
1224 | * Check to see if the tree needs to be collapsed. | ||
1225 | */ | ||
1226 | if (retval && (state->path.active > 1)) { | ||
1227 | xfs_bmap_init(args->flist, args->firstblock); | ||
1228 | error = xfs_da3_join(state); | ||
1229 | if (!error) { | ||
1230 | error = xfs_bmap_finish(&args->trans, args->flist, | ||
1231 | &committed); | ||
1232 | } | ||
1233 | if (error) { | ||
1234 | ASSERT(committed); | ||
1235 | args->trans = NULL; | ||
1236 | xfs_bmap_cancel(args->flist); | ||
1237 | goto out; | ||
1238 | } | ||
1239 | |||
1240 | /* | ||
1241 | * bmap_finish() may have committed the last trans and started | ||
1242 | * a new one. We need the inode to be in all transactions. | ||
1243 | */ | ||
1244 | if (committed) | ||
1245 | xfs_trans_ijoin(args->trans, dp, 0); | ||
1246 | |||
1247 | /* | ||
1248 | * Commit the Btree join operation and start a new trans. | ||
1249 | */ | ||
1250 | error = xfs_trans_roll(&args->trans, dp); | ||
1251 | if (error) | ||
1252 | goto out; | ||
1253 | } | ||
1254 | |||
1255 | /* | ||
1256 | * If the result is small enough, push it all into the inode. | ||
1257 | */ | ||
1258 | if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) { | ||
1259 | /* | ||
1260 | * Have to get rid of the copy of this dabuf in the state. | ||
1261 | */ | ||
1262 | ASSERT(state->path.active == 1); | ||
1263 | ASSERT(state->path.blk[0].bp); | ||
1264 | state->path.blk[0].bp = NULL; | ||
1265 | |||
1266 | error = xfs_attr3_leaf_read(args->trans, args->dp, 0, -1, &bp); | ||
1267 | if (error) | ||
1268 | goto out; | ||
1269 | |||
1270 | if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) { | ||
1271 | xfs_bmap_init(args->flist, args->firstblock); | ||
1272 | error = xfs_attr3_leaf_to_shortform(bp, args, forkoff); | ||
1273 | /* bp is gone due to xfs_da_shrink_inode */ | ||
1274 | if (!error) { | ||
1275 | error = xfs_bmap_finish(&args->trans, | ||
1276 | args->flist, | ||
1277 | &committed); | ||
1278 | } | ||
1279 | if (error) { | ||
1280 | ASSERT(committed); | ||
1281 | args->trans = NULL; | ||
1282 | xfs_bmap_cancel(args->flist); | ||
1283 | goto out; | ||
1284 | } | ||
1285 | |||
1286 | /* | ||
1287 | * bmap_finish() may have committed the last trans | ||
1288 | * and started a new one. We need the inode to be | ||
1289 | * in all transactions. | ||
1290 | */ | ||
1291 | if (committed) | ||
1292 | xfs_trans_ijoin(args->trans, dp, 0); | ||
1293 | } else | ||
1294 | xfs_trans_brelse(args->trans, bp); | ||
1295 | } | ||
1296 | error = 0; | ||
1297 | |||
1298 | out: | ||
1299 | xfs_da_state_free(state); | ||
1300 | return error; | ||
1301 | } | ||
1302 | |||
1303 | /* | ||
1304 | * Fill in the disk block numbers in the state structure for the buffers | ||
1305 | * that are attached to the state structure. | ||
1306 | * This is done so that we can quickly reattach ourselves to those buffers | ||
1307 | * after some set of transaction commits have released these buffers. | ||
1308 | */ | ||
1309 | STATIC int | ||
1310 | xfs_attr_fillstate(xfs_da_state_t *state) | ||
1311 | { | ||
1312 | xfs_da_state_path_t *path; | ||
1313 | xfs_da_state_blk_t *blk; | ||
1314 | int level; | ||
1315 | |||
1316 | trace_xfs_attr_fillstate(state->args); | ||
1317 | |||
1318 | /* | ||
1319 | * Roll down the "path" in the state structure, storing the on-disk | ||
1320 | * block number for those buffers in the "path". | ||
1321 | */ | ||
1322 | path = &state->path; | ||
1323 | ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH)); | ||
1324 | for (blk = path->blk, level = 0; level < path->active; blk++, level++) { | ||
1325 | if (blk->bp) { | ||
1326 | blk->disk_blkno = XFS_BUF_ADDR(blk->bp); | ||
1327 | blk->bp = NULL; | ||
1328 | } else { | ||
1329 | blk->disk_blkno = 0; | ||
1330 | } | ||
1331 | } | ||
1332 | |||
1333 | /* | ||
1334 | * Roll down the "altpath" in the state structure, storing the on-disk | ||
1335 | * block number for those buffers in the "altpath". | ||
1336 | */ | ||
1337 | path = &state->altpath; | ||
1338 | ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH)); | ||
1339 | for (blk = path->blk, level = 0; level < path->active; blk++, level++) { | ||
1340 | if (blk->bp) { | ||
1341 | blk->disk_blkno = XFS_BUF_ADDR(blk->bp); | ||
1342 | blk->bp = NULL; | ||
1343 | } else { | ||
1344 | blk->disk_blkno = 0; | ||
1345 | } | ||
1346 | } | ||
1347 | |||
1348 | return 0; | ||
1349 | } | ||
1350 | |||
1351 | /* | ||
1352 | * Reattach the buffers to the state structure based on the disk block | ||
1353 | * numbers stored in the state structure. | ||
1354 | * This is done after some set of transaction commits have released those | ||
1355 | * buffers from our grip. | ||
1356 | */ | ||
1357 | STATIC int | ||
1358 | xfs_attr_refillstate(xfs_da_state_t *state) | ||
1359 | { | ||
1360 | xfs_da_state_path_t *path; | ||
1361 | xfs_da_state_blk_t *blk; | ||
1362 | int level, error; | ||
1363 | |||
1364 | trace_xfs_attr_refillstate(state->args); | ||
1365 | |||
1366 | /* | ||
1367 | * Roll down the "path" in the state structure, storing the on-disk | ||
1368 | * block number for those buffers in the "path". | ||
1369 | */ | ||
1370 | path = &state->path; | ||
1371 | ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH)); | ||
1372 | for (blk = path->blk, level = 0; level < path->active; blk++, level++) { | ||
1373 | if (blk->disk_blkno) { | ||
1374 | error = xfs_da3_node_read(state->args->trans, | ||
1375 | state->args->dp, | ||
1376 | blk->blkno, blk->disk_blkno, | ||
1377 | &blk->bp, XFS_ATTR_FORK); | ||
1378 | if (error) | ||
1379 | return error; | ||
1380 | } else { | ||
1381 | blk->bp = NULL; | ||
1382 | } | ||
1383 | } | ||
1384 | |||
1385 | /* | ||
1386 | * Roll down the "altpath" in the state structure, storing the on-disk | ||
1387 | * block number for those buffers in the "altpath". | ||
1388 | */ | ||
1389 | path = &state->altpath; | ||
1390 | ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH)); | ||
1391 | for (blk = path->blk, level = 0; level < path->active; blk++, level++) { | ||
1392 | if (blk->disk_blkno) { | ||
1393 | error = xfs_da3_node_read(state->args->trans, | ||
1394 | state->args->dp, | ||
1395 | blk->blkno, blk->disk_blkno, | ||
1396 | &blk->bp, XFS_ATTR_FORK); | ||
1397 | if (error) | ||
1398 | return error; | ||
1399 | } else { | ||
1400 | blk->bp = NULL; | ||
1401 | } | ||
1402 | } | ||
1403 | |||
1404 | return 0; | ||
1405 | } | ||
1406 | |||
1407 | /* | ||
1408 | * Look up a filename in a node attribute list. | ||
1409 | * | ||
1410 | * This routine gets called for any attribute fork that has more than one | ||
1411 | * block, ie: both true Btree attr lists and for single-leaf-blocks with | ||
1412 | * "remote" values taking up more blocks. | ||
1413 | */ | ||
1414 | STATIC int | ||
1415 | xfs_attr_node_get(xfs_da_args_t *args) | ||
1416 | { | ||
1417 | xfs_da_state_t *state; | ||
1418 | xfs_da_state_blk_t *blk; | ||
1419 | int error, retval; | ||
1420 | int i; | ||
1421 | |||
1422 | trace_xfs_attr_node_get(args); | ||
1423 | |||
1424 | state = xfs_da_state_alloc(); | ||
1425 | state->args = args; | ||
1426 | state->mp = args->dp->i_mount; | ||
1427 | |||
1428 | /* | ||
1429 | * Search to see if name exists, and get back a pointer to it. | ||
1430 | */ | ||
1431 | error = xfs_da3_node_lookup_int(state, &retval); | ||
1432 | if (error) { | ||
1433 | retval = error; | ||
1434 | } else if (retval == -EEXIST) { | ||
1435 | blk = &state->path.blk[ state->path.active-1 ]; | ||
1436 | ASSERT(blk->bp != NULL); | ||
1437 | ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC); | ||
1438 | |||
1439 | /* | ||
1440 | * Get the value, local or "remote" | ||
1441 | */ | ||
1442 | retval = xfs_attr3_leaf_getvalue(blk->bp, args); | ||
1443 | if (!retval && (args->rmtblkno > 0) | ||
1444 | && !(args->flags & ATTR_KERNOVAL)) { | ||
1445 | retval = xfs_attr_rmtval_get(args); | ||
1446 | } | ||
1447 | } | ||
1448 | |||
1449 | /* | ||
1450 | * If not in a transaction, we have to release all the buffers. | ||
1451 | */ | ||
1452 | for (i = 0; i < state->path.active; i++) { | ||
1453 | xfs_trans_brelse(args->trans, state->path.blk[i].bp); | ||
1454 | state->path.blk[i].bp = NULL; | ||
1455 | } | ||
1456 | |||
1457 | xfs_da_state_free(state); | ||
1458 | return retval; | ||
1459 | } | ||
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c new file mode 100644 index 000000000000..b1f73dbbf3d8 --- /dev/null +++ b/fs/xfs/libxfs/xfs_attr_leaf.c | |||
@@ -0,0 +1,2697 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. | ||
3 | * Copyright (c) 2013 Red Hat, Inc. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License as | ||
8 | * published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope that it would be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program; if not, write the Free Software Foundation, | ||
17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
18 | */ | ||
19 | #include "xfs.h" | ||
20 | #include "xfs_fs.h" | ||
21 | #include "xfs_shared.h" | ||
22 | #include "xfs_format.h" | ||
23 | #include "xfs_log_format.h" | ||
24 | #include "xfs_trans_resv.h" | ||
25 | #include "xfs_bit.h" | ||
26 | #include "xfs_sb.h" | ||
27 | #include "xfs_ag.h" | ||
28 | #include "xfs_mount.h" | ||
29 | #include "xfs_da_format.h" | ||
30 | #include "xfs_da_btree.h" | ||
31 | #include "xfs_inode.h" | ||
32 | #include "xfs_trans.h" | ||
33 | #include "xfs_inode_item.h" | ||
34 | #include "xfs_bmap_btree.h" | ||
35 | #include "xfs_bmap.h" | ||
36 | #include "xfs_attr_sf.h" | ||
37 | #include "xfs_attr_remote.h" | ||
38 | #include "xfs_attr.h" | ||
39 | #include "xfs_attr_leaf.h" | ||
40 | #include "xfs_error.h" | ||
41 | #include "xfs_trace.h" | ||
42 | #include "xfs_buf_item.h" | ||
43 | #include "xfs_cksum.h" | ||
44 | #include "xfs_dinode.h" | ||
45 | #include "xfs_dir2.h" | ||
46 | |||
47 | |||
48 | /* | ||
49 | * xfs_attr_leaf.c | ||
50 | * | ||
51 | * Routines to implement leaf blocks of attributes as Btrees of hashed names. | ||
52 | */ | ||
53 | |||
54 | /*======================================================================== | ||
55 | * Function prototypes for the kernel. | ||
56 | *========================================================================*/ | ||
57 | |||
58 | /* | ||
59 | * Routines used for growing the Btree. | ||
60 | */ | ||
61 | STATIC int xfs_attr3_leaf_create(struct xfs_da_args *args, | ||
62 | xfs_dablk_t which_block, struct xfs_buf **bpp); | ||
63 | STATIC int xfs_attr3_leaf_add_work(struct xfs_buf *leaf_buffer, | ||
64 | struct xfs_attr3_icleaf_hdr *ichdr, | ||
65 | struct xfs_da_args *args, int freemap_index); | ||
66 | STATIC void xfs_attr3_leaf_compact(struct xfs_da_args *args, | ||
67 | struct xfs_attr3_icleaf_hdr *ichdr, | ||
68 | struct xfs_buf *leaf_buffer); | ||
69 | STATIC void xfs_attr3_leaf_rebalance(xfs_da_state_t *state, | ||
70 | xfs_da_state_blk_t *blk1, | ||
71 | xfs_da_state_blk_t *blk2); | ||
72 | STATIC int xfs_attr3_leaf_figure_balance(xfs_da_state_t *state, | ||
73 | xfs_da_state_blk_t *leaf_blk_1, | ||
74 | struct xfs_attr3_icleaf_hdr *ichdr1, | ||
75 | xfs_da_state_blk_t *leaf_blk_2, | ||
76 | struct xfs_attr3_icleaf_hdr *ichdr2, | ||
77 | int *number_entries_in_blk1, | ||
78 | int *number_usedbytes_in_blk1); | ||
79 | |||
80 | /* | ||
81 | * Utility routines. | ||
82 | */ | ||
83 | STATIC void xfs_attr3_leaf_moveents(struct xfs_da_args *args, | ||
84 | struct xfs_attr_leafblock *src_leaf, | ||
85 | struct xfs_attr3_icleaf_hdr *src_ichdr, int src_start, | ||
86 | struct xfs_attr_leafblock *dst_leaf, | ||
87 | struct xfs_attr3_icleaf_hdr *dst_ichdr, int dst_start, | ||
88 | int move_count); | ||
89 | STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index); | ||
90 | |||
91 | void | ||
92 | xfs_attr3_leaf_hdr_from_disk( | ||
93 | struct xfs_attr3_icleaf_hdr *to, | ||
94 | struct xfs_attr_leafblock *from) | ||
95 | { | ||
96 | int i; | ||
97 | |||
98 | ASSERT(from->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC) || | ||
99 | from->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC)); | ||
100 | |||
101 | if (from->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC)) { | ||
102 | struct xfs_attr3_leaf_hdr *hdr3 = (struct xfs_attr3_leaf_hdr *)from; | ||
103 | |||
104 | to->forw = be32_to_cpu(hdr3->info.hdr.forw); | ||
105 | to->back = be32_to_cpu(hdr3->info.hdr.back); | ||
106 | to->magic = be16_to_cpu(hdr3->info.hdr.magic); | ||
107 | to->count = be16_to_cpu(hdr3->count); | ||
108 | to->usedbytes = be16_to_cpu(hdr3->usedbytes); | ||
109 | to->firstused = be16_to_cpu(hdr3->firstused); | ||
110 | to->holes = hdr3->holes; | ||
111 | |||
112 | for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { | ||
113 | to->freemap[i].base = be16_to_cpu(hdr3->freemap[i].base); | ||
114 | to->freemap[i].size = be16_to_cpu(hdr3->freemap[i].size); | ||
115 | } | ||
116 | return; | ||
117 | } | ||
118 | to->forw = be32_to_cpu(from->hdr.info.forw); | ||
119 | to->back = be32_to_cpu(from->hdr.info.back); | ||
120 | to->magic = be16_to_cpu(from->hdr.info.magic); | ||
121 | to->count = be16_to_cpu(from->hdr.count); | ||
122 | to->usedbytes = be16_to_cpu(from->hdr.usedbytes); | ||
123 | to->firstused = be16_to_cpu(from->hdr.firstused); | ||
124 | to->holes = from->hdr.holes; | ||
125 | |||
126 | for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { | ||
127 | to->freemap[i].base = be16_to_cpu(from->hdr.freemap[i].base); | ||
128 | to->freemap[i].size = be16_to_cpu(from->hdr.freemap[i].size); | ||
129 | } | ||
130 | } | ||
131 | |||
132 | void | ||
133 | xfs_attr3_leaf_hdr_to_disk( | ||
134 | struct xfs_attr_leafblock *to, | ||
135 | struct xfs_attr3_icleaf_hdr *from) | ||
136 | { | ||
137 | int i; | ||
138 | |||
139 | ASSERT(from->magic == XFS_ATTR_LEAF_MAGIC || | ||
140 | from->magic == XFS_ATTR3_LEAF_MAGIC); | ||
141 | |||
142 | if (from->magic == XFS_ATTR3_LEAF_MAGIC) { | ||
143 | struct xfs_attr3_leaf_hdr *hdr3 = (struct xfs_attr3_leaf_hdr *)to; | ||
144 | |||
145 | hdr3->info.hdr.forw = cpu_to_be32(from->forw); | ||
146 | hdr3->info.hdr.back = cpu_to_be32(from->back); | ||
147 | hdr3->info.hdr.magic = cpu_to_be16(from->magic); | ||
148 | hdr3->count = cpu_to_be16(from->count); | ||
149 | hdr3->usedbytes = cpu_to_be16(from->usedbytes); | ||
150 | hdr3->firstused = cpu_to_be16(from->firstused); | ||
151 | hdr3->holes = from->holes; | ||
152 | hdr3->pad1 = 0; | ||
153 | |||
154 | for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { | ||
155 | hdr3->freemap[i].base = cpu_to_be16(from->freemap[i].base); | ||
156 | hdr3->freemap[i].size = cpu_to_be16(from->freemap[i].size); | ||
157 | } | ||
158 | return; | ||
159 | } | ||
160 | to->hdr.info.forw = cpu_to_be32(from->forw); | ||
161 | to->hdr.info.back = cpu_to_be32(from->back); | ||
162 | to->hdr.info.magic = cpu_to_be16(from->magic); | ||
163 | to->hdr.count = cpu_to_be16(from->count); | ||
164 | to->hdr.usedbytes = cpu_to_be16(from->usedbytes); | ||
165 | to->hdr.firstused = cpu_to_be16(from->firstused); | ||
166 | to->hdr.holes = from->holes; | ||
167 | to->hdr.pad1 = 0; | ||
168 | |||
169 | for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { | ||
170 | to->hdr.freemap[i].base = cpu_to_be16(from->freemap[i].base); | ||
171 | to->hdr.freemap[i].size = cpu_to_be16(from->freemap[i].size); | ||
172 | } | ||
173 | } | ||
174 | |||
175 | static bool | ||
176 | xfs_attr3_leaf_verify( | ||
177 | struct xfs_buf *bp) | ||
178 | { | ||
179 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
180 | struct xfs_attr_leafblock *leaf = bp->b_addr; | ||
181 | struct xfs_attr3_icleaf_hdr ichdr; | ||
182 | |||
183 | xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); | ||
184 | |||
185 | if (xfs_sb_version_hascrc(&mp->m_sb)) { | ||
186 | struct xfs_da3_node_hdr *hdr3 = bp->b_addr; | ||
187 | |||
188 | if (ichdr.magic != XFS_ATTR3_LEAF_MAGIC) | ||
189 | return false; | ||
190 | |||
191 | if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_uuid)) | ||
192 | return false; | ||
193 | if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn) | ||
194 | return false; | ||
195 | } else { | ||
196 | if (ichdr.magic != XFS_ATTR_LEAF_MAGIC) | ||
197 | return false; | ||
198 | } | ||
199 | if (ichdr.count == 0) | ||
200 | return false; | ||
201 | |||
202 | /* XXX: need to range check rest of attr header values */ | ||
203 | /* XXX: hash order check? */ | ||
204 | |||
205 | return true; | ||
206 | } | ||
207 | |||
208 | static void | ||
209 | xfs_attr3_leaf_write_verify( | ||
210 | struct xfs_buf *bp) | ||
211 | { | ||
212 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
213 | struct xfs_buf_log_item *bip = bp->b_fspriv; | ||
214 | struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr; | ||
215 | |||
216 | if (!xfs_attr3_leaf_verify(bp)) { | ||
217 | xfs_buf_ioerror(bp, -EFSCORRUPTED); | ||
218 | xfs_verifier_error(bp); | ||
219 | return; | ||
220 | } | ||
221 | |||
222 | if (!xfs_sb_version_hascrc(&mp->m_sb)) | ||
223 | return; | ||
224 | |||
225 | if (bip) | ||
226 | hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn); | ||
227 | |||
228 | xfs_buf_update_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF); | ||
229 | } | ||
230 | |||
231 | /* | ||
232 | * leaf/node format detection on trees is sketchy, so a node read can be done on | ||
233 | * leaf level blocks when detection identifies the tree as a node format tree | ||
234 | * incorrectly. In this case, we need to swap the verifier to match the correct | ||
235 | * format of the block being read. | ||
236 | */ | ||
237 | static void | ||
238 | xfs_attr3_leaf_read_verify( | ||
239 | struct xfs_buf *bp) | ||
240 | { | ||
241 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
242 | |||
243 | if (xfs_sb_version_hascrc(&mp->m_sb) && | ||
244 | !xfs_buf_verify_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF)) | ||
245 | xfs_buf_ioerror(bp, -EFSBADCRC); | ||
246 | else if (!xfs_attr3_leaf_verify(bp)) | ||
247 | xfs_buf_ioerror(bp, -EFSCORRUPTED); | ||
248 | |||
249 | if (bp->b_error) | ||
250 | xfs_verifier_error(bp); | ||
251 | } | ||
252 | |||
253 | const struct xfs_buf_ops xfs_attr3_leaf_buf_ops = { | ||
254 | .verify_read = xfs_attr3_leaf_read_verify, | ||
255 | .verify_write = xfs_attr3_leaf_write_verify, | ||
256 | }; | ||
257 | |||
258 | int | ||
259 | xfs_attr3_leaf_read( | ||
260 | struct xfs_trans *tp, | ||
261 | struct xfs_inode *dp, | ||
262 | xfs_dablk_t bno, | ||
263 | xfs_daddr_t mappedbno, | ||
264 | struct xfs_buf **bpp) | ||
265 | { | ||
266 | int err; | ||
267 | |||
268 | err = xfs_da_read_buf(tp, dp, bno, mappedbno, bpp, | ||
269 | XFS_ATTR_FORK, &xfs_attr3_leaf_buf_ops); | ||
270 | if (!err && tp) | ||
271 | xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_ATTR_LEAF_BUF); | ||
272 | return err; | ||
273 | } | ||
274 | |||
275 | /*======================================================================== | ||
276 | * Namespace helper routines | ||
277 | *========================================================================*/ | ||
278 | |||
279 | /* | ||
280 | * If namespace bits don't match return 0. | ||
281 | * If all match then return 1. | ||
282 | */ | ||
283 | STATIC int | ||
284 | xfs_attr_namesp_match(int arg_flags, int ondisk_flags) | ||
285 | { | ||
286 | return XFS_ATTR_NSP_ONDISK(ondisk_flags) == XFS_ATTR_NSP_ARGS_TO_ONDISK(arg_flags); | ||
287 | } | ||
288 | |||
289 | |||
290 | /*======================================================================== | ||
291 | * External routines when attribute fork size < XFS_LITINO(mp). | ||
292 | *========================================================================*/ | ||
293 | |||
294 | /* | ||
295 | * Query whether the requested number of additional bytes of extended | ||
296 | * attribute space will be able to fit inline. | ||
297 | * | ||
298 | * Returns zero if not, else the di_forkoff fork offset to be used in the | ||
299 | * literal area for attribute data once the new bytes have been added. | ||
300 | * | ||
301 | * di_forkoff must be 8 byte aligned, hence is stored as a >>3 value; | ||
302 | * special case for dev/uuid inodes, they have fixed size data forks. | ||
303 | */ | ||
304 | int | ||
305 | xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes) | ||
306 | { | ||
307 | int offset; | ||
308 | int minforkoff; /* lower limit on valid forkoff locations */ | ||
309 | int maxforkoff; /* upper limit on valid forkoff locations */ | ||
310 | int dsize; | ||
311 | xfs_mount_t *mp = dp->i_mount; | ||
312 | |||
313 | /* rounded down */ | ||
314 | offset = (XFS_LITINO(mp, dp->i_d.di_version) - bytes) >> 3; | ||
315 | |||
316 | switch (dp->i_d.di_format) { | ||
317 | case XFS_DINODE_FMT_DEV: | ||
318 | minforkoff = roundup(sizeof(xfs_dev_t), 8) >> 3; | ||
319 | return (offset >= minforkoff) ? minforkoff : 0; | ||
320 | case XFS_DINODE_FMT_UUID: | ||
321 | minforkoff = roundup(sizeof(uuid_t), 8) >> 3; | ||
322 | return (offset >= minforkoff) ? minforkoff : 0; | ||
323 | } | ||
324 | |||
325 | /* | ||
326 | * If the requested numbers of bytes is smaller or equal to the | ||
327 | * current attribute fork size we can always proceed. | ||
328 | * | ||
329 | * Note that if_bytes in the data fork might actually be larger than | ||
330 | * the current data fork size is due to delalloc extents. In that | ||
331 | * case either the extent count will go down when they are converted | ||
332 | * to real extents, or the delalloc conversion will take care of the | ||
333 | * literal area rebalancing. | ||
334 | */ | ||
335 | if (bytes <= XFS_IFORK_ASIZE(dp)) | ||
336 | return dp->i_d.di_forkoff; | ||
337 | |||
338 | /* | ||
339 | * For attr2 we can try to move the forkoff if there is space in the | ||
340 | * literal area, but for the old format we are done if there is no | ||
341 | * space in the fixed attribute fork. | ||
342 | */ | ||
343 | if (!(mp->m_flags & XFS_MOUNT_ATTR2)) | ||
344 | return 0; | ||
345 | |||
346 | dsize = dp->i_df.if_bytes; | ||
347 | |||
348 | switch (dp->i_d.di_format) { | ||
349 | case XFS_DINODE_FMT_EXTENTS: | ||
350 | /* | ||
351 | * If there is no attr fork and the data fork is extents, | ||
352 | * determine if creating the default attr fork will result | ||
353 | * in the extents form migrating to btree. If so, the | ||
354 | * minimum offset only needs to be the space required for | ||
355 | * the btree root. | ||
356 | */ | ||
357 | if (!dp->i_d.di_forkoff && dp->i_df.if_bytes > | ||
358 | xfs_default_attroffset(dp)) | ||
359 | dsize = XFS_BMDR_SPACE_CALC(MINDBTPTRS); | ||
360 | break; | ||
361 | case XFS_DINODE_FMT_BTREE: | ||
362 | /* | ||
363 | * If we have a data btree then keep forkoff if we have one, | ||
364 | * otherwise we are adding a new attr, so then we set | ||
365 | * minforkoff to where the btree root can finish so we have | ||
366 | * plenty of room for attrs | ||
367 | */ | ||
368 | if (dp->i_d.di_forkoff) { | ||
369 | if (offset < dp->i_d.di_forkoff) | ||
370 | return 0; | ||
371 | return dp->i_d.di_forkoff; | ||
372 | } | ||
373 | dsize = XFS_BMAP_BROOT_SPACE(mp, dp->i_df.if_broot); | ||
374 | break; | ||
375 | } | ||
376 | |||
377 | /* | ||
378 | * A data fork btree root must have space for at least | ||
379 | * MINDBTPTRS key/ptr pairs if the data fork is small or empty. | ||
380 | */ | ||
381 | minforkoff = MAX(dsize, XFS_BMDR_SPACE_CALC(MINDBTPTRS)); | ||
382 | minforkoff = roundup(minforkoff, 8) >> 3; | ||
383 | |||
384 | /* attr fork btree root can have at least this many key/ptr pairs */ | ||
385 | maxforkoff = XFS_LITINO(mp, dp->i_d.di_version) - | ||
386 | XFS_BMDR_SPACE_CALC(MINABTPTRS); | ||
387 | maxforkoff = maxforkoff >> 3; /* rounded down */ | ||
388 | |||
389 | if (offset >= maxforkoff) | ||
390 | return maxforkoff; | ||
391 | if (offset >= minforkoff) | ||
392 | return offset; | ||
393 | return 0; | ||
394 | } | ||
395 | |||
396 | /* | ||
397 | * Switch on the ATTR2 superblock bit (implies also FEATURES2) | ||
398 | */ | ||
399 | STATIC void | ||
400 | xfs_sbversion_add_attr2(xfs_mount_t *mp, xfs_trans_t *tp) | ||
401 | { | ||
402 | if ((mp->m_flags & XFS_MOUNT_ATTR2) && | ||
403 | !(xfs_sb_version_hasattr2(&mp->m_sb))) { | ||
404 | spin_lock(&mp->m_sb_lock); | ||
405 | if (!xfs_sb_version_hasattr2(&mp->m_sb)) { | ||
406 | xfs_sb_version_addattr2(&mp->m_sb); | ||
407 | spin_unlock(&mp->m_sb_lock); | ||
408 | xfs_mod_sb(tp, XFS_SB_VERSIONNUM | XFS_SB_FEATURES2); | ||
409 | } else | ||
410 | spin_unlock(&mp->m_sb_lock); | ||
411 | } | ||
412 | } | ||
413 | |||
414 | /* | ||
415 | * Create the initial contents of a shortform attribute list. | ||
416 | */ | ||
417 | void | ||
418 | xfs_attr_shortform_create(xfs_da_args_t *args) | ||
419 | { | ||
420 | xfs_attr_sf_hdr_t *hdr; | ||
421 | xfs_inode_t *dp; | ||
422 | xfs_ifork_t *ifp; | ||
423 | |||
424 | trace_xfs_attr_sf_create(args); | ||
425 | |||
426 | dp = args->dp; | ||
427 | ASSERT(dp != NULL); | ||
428 | ifp = dp->i_afp; | ||
429 | ASSERT(ifp != NULL); | ||
430 | ASSERT(ifp->if_bytes == 0); | ||
431 | if (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS) { | ||
432 | ifp->if_flags &= ~XFS_IFEXTENTS; /* just in case */ | ||
433 | dp->i_d.di_aformat = XFS_DINODE_FMT_LOCAL; | ||
434 | ifp->if_flags |= XFS_IFINLINE; | ||
435 | } else { | ||
436 | ASSERT(ifp->if_flags & XFS_IFINLINE); | ||
437 | } | ||
438 | xfs_idata_realloc(dp, sizeof(*hdr), XFS_ATTR_FORK); | ||
439 | hdr = (xfs_attr_sf_hdr_t *)ifp->if_u1.if_data; | ||
440 | hdr->count = 0; | ||
441 | hdr->totsize = cpu_to_be16(sizeof(*hdr)); | ||
442 | xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_ADATA); | ||
443 | } | ||
444 | |||
445 | /* | ||
446 | * Add a name/value pair to the shortform attribute list. | ||
447 | * Overflow from the inode has already been checked for. | ||
448 | */ | ||
449 | void | ||
450 | xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff) | ||
451 | { | ||
452 | xfs_attr_shortform_t *sf; | ||
453 | xfs_attr_sf_entry_t *sfe; | ||
454 | int i, offset, size; | ||
455 | xfs_mount_t *mp; | ||
456 | xfs_inode_t *dp; | ||
457 | xfs_ifork_t *ifp; | ||
458 | |||
459 | trace_xfs_attr_sf_add(args); | ||
460 | |||
461 | dp = args->dp; | ||
462 | mp = dp->i_mount; | ||
463 | dp->i_d.di_forkoff = forkoff; | ||
464 | |||
465 | ifp = dp->i_afp; | ||
466 | ASSERT(ifp->if_flags & XFS_IFINLINE); | ||
467 | sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data; | ||
468 | sfe = &sf->list[0]; | ||
469 | for (i = 0; i < sf->hdr.count; sfe = XFS_ATTR_SF_NEXTENTRY(sfe), i++) { | ||
470 | #ifdef DEBUG | ||
471 | if (sfe->namelen != args->namelen) | ||
472 | continue; | ||
473 | if (memcmp(args->name, sfe->nameval, args->namelen) != 0) | ||
474 | continue; | ||
475 | if (!xfs_attr_namesp_match(args->flags, sfe->flags)) | ||
476 | continue; | ||
477 | ASSERT(0); | ||
478 | #endif | ||
479 | } | ||
480 | |||
481 | offset = (char *)sfe - (char *)sf; | ||
482 | size = XFS_ATTR_SF_ENTSIZE_BYNAME(args->namelen, args->valuelen); | ||
483 | xfs_idata_realloc(dp, size, XFS_ATTR_FORK); | ||
484 | sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data; | ||
485 | sfe = (xfs_attr_sf_entry_t *)((char *)sf + offset); | ||
486 | |||
487 | sfe->namelen = args->namelen; | ||
488 | sfe->valuelen = args->valuelen; | ||
489 | sfe->flags = XFS_ATTR_NSP_ARGS_TO_ONDISK(args->flags); | ||
490 | memcpy(sfe->nameval, args->name, args->namelen); | ||
491 | memcpy(&sfe->nameval[args->namelen], args->value, args->valuelen); | ||
492 | sf->hdr.count++; | ||
493 | be16_add_cpu(&sf->hdr.totsize, size); | ||
494 | xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_ADATA); | ||
495 | |||
496 | xfs_sbversion_add_attr2(mp, args->trans); | ||
497 | } | ||
498 | |||
499 | /* | ||
500 | * After the last attribute is removed revert to original inode format, | ||
501 | * making all literal area available to the data fork once more. | ||
502 | */ | ||
503 | STATIC void | ||
504 | xfs_attr_fork_reset( | ||
505 | struct xfs_inode *ip, | ||
506 | struct xfs_trans *tp) | ||
507 | { | ||
508 | xfs_idestroy_fork(ip, XFS_ATTR_FORK); | ||
509 | ip->i_d.di_forkoff = 0; | ||
510 | ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; | ||
511 | |||
512 | ASSERT(ip->i_d.di_anextents == 0); | ||
513 | ASSERT(ip->i_afp == NULL); | ||
514 | |||
515 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | ||
516 | } | ||
517 | |||
518 | /* | ||
519 | * Remove an attribute from the shortform attribute list structure. | ||
520 | */ | ||
521 | int | ||
522 | xfs_attr_shortform_remove(xfs_da_args_t *args) | ||
523 | { | ||
524 | xfs_attr_shortform_t *sf; | ||
525 | xfs_attr_sf_entry_t *sfe; | ||
526 | int base, size=0, end, totsize, i; | ||
527 | xfs_mount_t *mp; | ||
528 | xfs_inode_t *dp; | ||
529 | |||
530 | trace_xfs_attr_sf_remove(args); | ||
531 | |||
532 | dp = args->dp; | ||
533 | mp = dp->i_mount; | ||
534 | base = sizeof(xfs_attr_sf_hdr_t); | ||
535 | sf = (xfs_attr_shortform_t *)dp->i_afp->if_u1.if_data; | ||
536 | sfe = &sf->list[0]; | ||
537 | end = sf->hdr.count; | ||
538 | for (i = 0; i < end; sfe = XFS_ATTR_SF_NEXTENTRY(sfe), | ||
539 | base += size, i++) { | ||
540 | size = XFS_ATTR_SF_ENTSIZE(sfe); | ||
541 | if (sfe->namelen != args->namelen) | ||
542 | continue; | ||
543 | if (memcmp(sfe->nameval, args->name, args->namelen) != 0) | ||
544 | continue; | ||
545 | if (!xfs_attr_namesp_match(args->flags, sfe->flags)) | ||
546 | continue; | ||
547 | break; | ||
548 | } | ||
549 | if (i == end) | ||
550 | return -ENOATTR; | ||
551 | |||
552 | /* | ||
553 | * Fix up the attribute fork data, covering the hole | ||
554 | */ | ||
555 | end = base + size; | ||
556 | totsize = be16_to_cpu(sf->hdr.totsize); | ||
557 | if (end != totsize) | ||
558 | memmove(&((char *)sf)[base], &((char *)sf)[end], totsize - end); | ||
559 | sf->hdr.count--; | ||
560 | be16_add_cpu(&sf->hdr.totsize, -size); | ||
561 | |||
562 | /* | ||
563 | * Fix up the start offset of the attribute fork | ||
564 | */ | ||
565 | totsize -= size; | ||
566 | if (totsize == sizeof(xfs_attr_sf_hdr_t) && | ||
567 | (mp->m_flags & XFS_MOUNT_ATTR2) && | ||
568 | (dp->i_d.di_format != XFS_DINODE_FMT_BTREE) && | ||
569 | !(args->op_flags & XFS_DA_OP_ADDNAME)) { | ||
570 | xfs_attr_fork_reset(dp, args->trans); | ||
571 | } else { | ||
572 | xfs_idata_realloc(dp, -size, XFS_ATTR_FORK); | ||
573 | dp->i_d.di_forkoff = xfs_attr_shortform_bytesfit(dp, totsize); | ||
574 | ASSERT(dp->i_d.di_forkoff); | ||
575 | ASSERT(totsize > sizeof(xfs_attr_sf_hdr_t) || | ||
576 | (args->op_flags & XFS_DA_OP_ADDNAME) || | ||
577 | !(mp->m_flags & XFS_MOUNT_ATTR2) || | ||
578 | dp->i_d.di_format == XFS_DINODE_FMT_BTREE); | ||
579 | xfs_trans_log_inode(args->trans, dp, | ||
580 | XFS_ILOG_CORE | XFS_ILOG_ADATA); | ||
581 | } | ||
582 | |||
583 | xfs_sbversion_add_attr2(mp, args->trans); | ||
584 | |||
585 | return 0; | ||
586 | } | ||
587 | |||
588 | /* | ||
589 | * Look up a name in a shortform attribute list structure. | ||
590 | */ | ||
591 | /*ARGSUSED*/ | ||
592 | int | ||
593 | xfs_attr_shortform_lookup(xfs_da_args_t *args) | ||
594 | { | ||
595 | xfs_attr_shortform_t *sf; | ||
596 | xfs_attr_sf_entry_t *sfe; | ||
597 | int i; | ||
598 | xfs_ifork_t *ifp; | ||
599 | |||
600 | trace_xfs_attr_sf_lookup(args); | ||
601 | |||
602 | ifp = args->dp->i_afp; | ||
603 | ASSERT(ifp->if_flags & XFS_IFINLINE); | ||
604 | sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data; | ||
605 | sfe = &sf->list[0]; | ||
606 | for (i = 0; i < sf->hdr.count; | ||
607 | sfe = XFS_ATTR_SF_NEXTENTRY(sfe), i++) { | ||
608 | if (sfe->namelen != args->namelen) | ||
609 | continue; | ||
610 | if (memcmp(args->name, sfe->nameval, args->namelen) != 0) | ||
611 | continue; | ||
612 | if (!xfs_attr_namesp_match(args->flags, sfe->flags)) | ||
613 | continue; | ||
614 | return -EEXIST; | ||
615 | } | ||
616 | return -ENOATTR; | ||
617 | } | ||
618 | |||
619 | /* | ||
620 | * Look up a name in a shortform attribute list structure. | ||
621 | */ | ||
622 | /*ARGSUSED*/ | ||
623 | int | ||
624 | xfs_attr_shortform_getvalue(xfs_da_args_t *args) | ||
625 | { | ||
626 | xfs_attr_shortform_t *sf; | ||
627 | xfs_attr_sf_entry_t *sfe; | ||
628 | int i; | ||
629 | |||
630 | ASSERT(args->dp->i_afp->if_flags == XFS_IFINLINE); | ||
631 | sf = (xfs_attr_shortform_t *)args->dp->i_afp->if_u1.if_data; | ||
632 | sfe = &sf->list[0]; | ||
633 | for (i = 0; i < sf->hdr.count; | ||
634 | sfe = XFS_ATTR_SF_NEXTENTRY(sfe), i++) { | ||
635 | if (sfe->namelen != args->namelen) | ||
636 | continue; | ||
637 | if (memcmp(args->name, sfe->nameval, args->namelen) != 0) | ||
638 | continue; | ||
639 | if (!xfs_attr_namesp_match(args->flags, sfe->flags)) | ||
640 | continue; | ||
641 | if (args->flags & ATTR_KERNOVAL) { | ||
642 | args->valuelen = sfe->valuelen; | ||
643 | return -EEXIST; | ||
644 | } | ||
645 | if (args->valuelen < sfe->valuelen) { | ||
646 | args->valuelen = sfe->valuelen; | ||
647 | return -ERANGE; | ||
648 | } | ||
649 | args->valuelen = sfe->valuelen; | ||
650 | memcpy(args->value, &sfe->nameval[args->namelen], | ||
651 | args->valuelen); | ||
652 | return -EEXIST; | ||
653 | } | ||
654 | return -ENOATTR; | ||
655 | } | ||
656 | |||
657 | /* | ||
658 | * Convert from using the shortform to the leaf. | ||
659 | */ | ||
660 | int | ||
661 | xfs_attr_shortform_to_leaf(xfs_da_args_t *args) | ||
662 | { | ||
663 | xfs_inode_t *dp; | ||
664 | xfs_attr_shortform_t *sf; | ||
665 | xfs_attr_sf_entry_t *sfe; | ||
666 | xfs_da_args_t nargs; | ||
667 | char *tmpbuffer; | ||
668 | int error, i, size; | ||
669 | xfs_dablk_t blkno; | ||
670 | struct xfs_buf *bp; | ||
671 | xfs_ifork_t *ifp; | ||
672 | |||
673 | trace_xfs_attr_sf_to_leaf(args); | ||
674 | |||
675 | dp = args->dp; | ||
676 | ifp = dp->i_afp; | ||
677 | sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data; | ||
678 | size = be16_to_cpu(sf->hdr.totsize); | ||
679 | tmpbuffer = kmem_alloc(size, KM_SLEEP); | ||
680 | ASSERT(tmpbuffer != NULL); | ||
681 | memcpy(tmpbuffer, ifp->if_u1.if_data, size); | ||
682 | sf = (xfs_attr_shortform_t *)tmpbuffer; | ||
683 | |||
684 | xfs_idata_realloc(dp, -size, XFS_ATTR_FORK); | ||
685 | xfs_bmap_local_to_extents_empty(dp, XFS_ATTR_FORK); | ||
686 | |||
687 | bp = NULL; | ||
688 | error = xfs_da_grow_inode(args, &blkno); | ||
689 | if (error) { | ||
690 | /* | ||
691 | * If we hit an IO error middle of the transaction inside | ||
692 | * grow_inode(), we may have inconsistent data. Bail out. | ||
693 | */ | ||
694 | if (error == -EIO) | ||
695 | goto out; | ||
696 | xfs_idata_realloc(dp, size, XFS_ATTR_FORK); /* try to put */ | ||
697 | memcpy(ifp->if_u1.if_data, tmpbuffer, size); /* it back */ | ||
698 | goto out; | ||
699 | } | ||
700 | |||
701 | ASSERT(blkno == 0); | ||
702 | error = xfs_attr3_leaf_create(args, blkno, &bp); | ||
703 | if (error) { | ||
704 | error = xfs_da_shrink_inode(args, 0, bp); | ||
705 | bp = NULL; | ||
706 | if (error) | ||
707 | goto out; | ||
708 | xfs_idata_realloc(dp, size, XFS_ATTR_FORK); /* try to put */ | ||
709 | memcpy(ifp->if_u1.if_data, tmpbuffer, size); /* it back */ | ||
710 | goto out; | ||
711 | } | ||
712 | |||
713 | memset((char *)&nargs, 0, sizeof(nargs)); | ||
714 | nargs.dp = dp; | ||
715 | nargs.geo = args->geo; | ||
716 | nargs.firstblock = args->firstblock; | ||
717 | nargs.flist = args->flist; | ||
718 | nargs.total = args->total; | ||
719 | nargs.whichfork = XFS_ATTR_FORK; | ||
720 | nargs.trans = args->trans; | ||
721 | nargs.op_flags = XFS_DA_OP_OKNOENT; | ||
722 | |||
723 | sfe = &sf->list[0]; | ||
724 | for (i = 0; i < sf->hdr.count; i++) { | ||
725 | nargs.name = sfe->nameval; | ||
726 | nargs.namelen = sfe->namelen; | ||
727 | nargs.value = &sfe->nameval[nargs.namelen]; | ||
728 | nargs.valuelen = sfe->valuelen; | ||
729 | nargs.hashval = xfs_da_hashname(sfe->nameval, | ||
730 | sfe->namelen); | ||
731 | nargs.flags = XFS_ATTR_NSP_ONDISK_TO_ARGS(sfe->flags); | ||
732 | error = xfs_attr3_leaf_lookup_int(bp, &nargs); /* set a->index */ | ||
733 | ASSERT(error == -ENOATTR); | ||
734 | error = xfs_attr3_leaf_add(bp, &nargs); | ||
735 | ASSERT(error != -ENOSPC); | ||
736 | if (error) | ||
737 | goto out; | ||
738 | sfe = XFS_ATTR_SF_NEXTENTRY(sfe); | ||
739 | } | ||
740 | error = 0; | ||
741 | |||
742 | out: | ||
743 | kmem_free(tmpbuffer); | ||
744 | return error; | ||
745 | } | ||
746 | |||
747 | /* | ||
748 | * Check a leaf attribute block to see if all the entries would fit into | ||
749 | * a shortform attribute list. | ||
750 | */ | ||
751 | int | ||
752 | xfs_attr_shortform_allfit( | ||
753 | struct xfs_buf *bp, | ||
754 | struct xfs_inode *dp) | ||
755 | { | ||
756 | struct xfs_attr_leafblock *leaf; | ||
757 | struct xfs_attr_leaf_entry *entry; | ||
758 | xfs_attr_leaf_name_local_t *name_loc; | ||
759 | struct xfs_attr3_icleaf_hdr leafhdr; | ||
760 | int bytes; | ||
761 | int i; | ||
762 | |||
763 | leaf = bp->b_addr; | ||
764 | xfs_attr3_leaf_hdr_from_disk(&leafhdr, leaf); | ||
765 | entry = xfs_attr3_leaf_entryp(leaf); | ||
766 | |||
767 | bytes = sizeof(struct xfs_attr_sf_hdr); | ||
768 | for (i = 0; i < leafhdr.count; entry++, i++) { | ||
769 | if (entry->flags & XFS_ATTR_INCOMPLETE) | ||
770 | continue; /* don't copy partial entries */ | ||
771 | if (!(entry->flags & XFS_ATTR_LOCAL)) | ||
772 | return 0; | ||
773 | name_loc = xfs_attr3_leaf_name_local(leaf, i); | ||
774 | if (name_loc->namelen >= XFS_ATTR_SF_ENTSIZE_MAX) | ||
775 | return 0; | ||
776 | if (be16_to_cpu(name_loc->valuelen) >= XFS_ATTR_SF_ENTSIZE_MAX) | ||
777 | return 0; | ||
778 | bytes += sizeof(struct xfs_attr_sf_entry) - 1 | ||
779 | + name_loc->namelen | ||
780 | + be16_to_cpu(name_loc->valuelen); | ||
781 | } | ||
782 | if ((dp->i_mount->m_flags & XFS_MOUNT_ATTR2) && | ||
783 | (dp->i_d.di_format != XFS_DINODE_FMT_BTREE) && | ||
784 | (bytes == sizeof(struct xfs_attr_sf_hdr))) | ||
785 | return -1; | ||
786 | return xfs_attr_shortform_bytesfit(dp, bytes); | ||
787 | } | ||
788 | |||
789 | /* | ||
790 | * Convert a leaf attribute list to shortform attribute list | ||
791 | */ | ||
792 | int | ||
793 | xfs_attr3_leaf_to_shortform( | ||
794 | struct xfs_buf *bp, | ||
795 | struct xfs_da_args *args, | ||
796 | int forkoff) | ||
797 | { | ||
798 | struct xfs_attr_leafblock *leaf; | ||
799 | struct xfs_attr3_icleaf_hdr ichdr; | ||
800 | struct xfs_attr_leaf_entry *entry; | ||
801 | struct xfs_attr_leaf_name_local *name_loc; | ||
802 | struct xfs_da_args nargs; | ||
803 | struct xfs_inode *dp = args->dp; | ||
804 | char *tmpbuffer; | ||
805 | int error; | ||
806 | int i; | ||
807 | |||
808 | trace_xfs_attr_leaf_to_sf(args); | ||
809 | |||
810 | tmpbuffer = kmem_alloc(args->geo->blksize, KM_SLEEP); | ||
811 | if (!tmpbuffer) | ||
812 | return -ENOMEM; | ||
813 | |||
814 | memcpy(tmpbuffer, bp->b_addr, args->geo->blksize); | ||
815 | |||
816 | leaf = (xfs_attr_leafblock_t *)tmpbuffer; | ||
817 | xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); | ||
818 | entry = xfs_attr3_leaf_entryp(leaf); | ||
819 | |||
820 | /* XXX (dgc): buffer is about to be marked stale - why zero it? */ | ||
821 | memset(bp->b_addr, 0, args->geo->blksize); | ||
822 | |||
823 | /* | ||
824 | * Clean out the prior contents of the attribute list. | ||
825 | */ | ||
826 | error = xfs_da_shrink_inode(args, 0, bp); | ||
827 | if (error) | ||
828 | goto out; | ||
829 | |||
830 | if (forkoff == -1) { | ||
831 | ASSERT(dp->i_mount->m_flags & XFS_MOUNT_ATTR2); | ||
832 | ASSERT(dp->i_d.di_format != XFS_DINODE_FMT_BTREE); | ||
833 | xfs_attr_fork_reset(dp, args->trans); | ||
834 | goto out; | ||
835 | } | ||
836 | |||
837 | xfs_attr_shortform_create(args); | ||
838 | |||
839 | /* | ||
840 | * Copy the attributes | ||
841 | */ | ||
842 | memset((char *)&nargs, 0, sizeof(nargs)); | ||
843 | nargs.geo = args->geo; | ||
844 | nargs.dp = dp; | ||
845 | nargs.firstblock = args->firstblock; | ||
846 | nargs.flist = args->flist; | ||
847 | nargs.total = args->total; | ||
848 | nargs.whichfork = XFS_ATTR_FORK; | ||
849 | nargs.trans = args->trans; | ||
850 | nargs.op_flags = XFS_DA_OP_OKNOENT; | ||
851 | |||
852 | for (i = 0; i < ichdr.count; entry++, i++) { | ||
853 | if (entry->flags & XFS_ATTR_INCOMPLETE) | ||
854 | continue; /* don't copy partial entries */ | ||
855 | if (!entry->nameidx) | ||
856 | continue; | ||
857 | ASSERT(entry->flags & XFS_ATTR_LOCAL); | ||
858 | name_loc = xfs_attr3_leaf_name_local(leaf, i); | ||
859 | nargs.name = name_loc->nameval; | ||
860 | nargs.namelen = name_loc->namelen; | ||
861 | nargs.value = &name_loc->nameval[nargs.namelen]; | ||
862 | nargs.valuelen = be16_to_cpu(name_loc->valuelen); | ||
863 | nargs.hashval = be32_to_cpu(entry->hashval); | ||
864 | nargs.flags = XFS_ATTR_NSP_ONDISK_TO_ARGS(entry->flags); | ||
865 | xfs_attr_shortform_add(&nargs, forkoff); | ||
866 | } | ||
867 | error = 0; | ||
868 | |||
869 | out: | ||
870 | kmem_free(tmpbuffer); | ||
871 | return error; | ||
872 | } | ||
873 | |||
874 | /* | ||
875 | * Convert from using a single leaf to a root node and a leaf. | ||
876 | */ | ||
877 | int | ||
878 | xfs_attr3_leaf_to_node( | ||
879 | struct xfs_da_args *args) | ||
880 | { | ||
881 | struct xfs_attr_leafblock *leaf; | ||
882 | struct xfs_attr3_icleaf_hdr icleafhdr; | ||
883 | struct xfs_attr_leaf_entry *entries; | ||
884 | struct xfs_da_node_entry *btree; | ||
885 | struct xfs_da3_icnode_hdr icnodehdr; | ||
886 | struct xfs_da_intnode *node; | ||
887 | struct xfs_inode *dp = args->dp; | ||
888 | struct xfs_mount *mp = dp->i_mount; | ||
889 | struct xfs_buf *bp1 = NULL; | ||
890 | struct xfs_buf *bp2 = NULL; | ||
891 | xfs_dablk_t blkno; | ||
892 | int error; | ||
893 | |||
894 | trace_xfs_attr_leaf_to_node(args); | ||
895 | |||
896 | error = xfs_da_grow_inode(args, &blkno); | ||
897 | if (error) | ||
898 | goto out; | ||
899 | error = xfs_attr3_leaf_read(args->trans, dp, 0, -1, &bp1); | ||
900 | if (error) | ||
901 | goto out; | ||
902 | |||
903 | error = xfs_da_get_buf(args->trans, dp, blkno, -1, &bp2, XFS_ATTR_FORK); | ||
904 | if (error) | ||
905 | goto out; | ||
906 | |||
907 | /* copy leaf to new buffer, update identifiers */ | ||
908 | xfs_trans_buf_set_type(args->trans, bp2, XFS_BLFT_ATTR_LEAF_BUF); | ||
909 | bp2->b_ops = bp1->b_ops; | ||
910 | memcpy(bp2->b_addr, bp1->b_addr, args->geo->blksize); | ||
911 | if (xfs_sb_version_hascrc(&mp->m_sb)) { | ||
912 | struct xfs_da3_blkinfo *hdr3 = bp2->b_addr; | ||
913 | hdr3->blkno = cpu_to_be64(bp2->b_bn); | ||
914 | } | ||
915 | xfs_trans_log_buf(args->trans, bp2, 0, args->geo->blksize - 1); | ||
916 | |||
917 | /* | ||
918 | * Set up the new root node. | ||
919 | */ | ||
920 | error = xfs_da3_node_create(args, 0, 1, &bp1, XFS_ATTR_FORK); | ||
921 | if (error) | ||
922 | goto out; | ||
923 | node = bp1->b_addr; | ||
924 | dp->d_ops->node_hdr_from_disk(&icnodehdr, node); | ||
925 | btree = dp->d_ops->node_tree_p(node); | ||
926 | |||
927 | leaf = bp2->b_addr; | ||
928 | xfs_attr3_leaf_hdr_from_disk(&icleafhdr, leaf); | ||
929 | entries = xfs_attr3_leaf_entryp(leaf); | ||
930 | |||
931 | /* both on-disk, don't endian-flip twice */ | ||
932 | btree[0].hashval = entries[icleafhdr.count - 1].hashval; | ||
933 | btree[0].before = cpu_to_be32(blkno); | ||
934 | icnodehdr.count = 1; | ||
935 | dp->d_ops->node_hdr_to_disk(node, &icnodehdr); | ||
936 | xfs_trans_log_buf(args->trans, bp1, 0, args->geo->blksize - 1); | ||
937 | error = 0; | ||
938 | out: | ||
939 | return error; | ||
940 | } | ||
941 | |||
942 | /*======================================================================== | ||
943 | * Routines used for growing the Btree. | ||
944 | *========================================================================*/ | ||
945 | |||
946 | /* | ||
947 | * Create the initial contents of a leaf attribute list | ||
948 | * or a leaf in a node attribute list. | ||
949 | */ | ||
950 | STATIC int | ||
951 | xfs_attr3_leaf_create( | ||
952 | struct xfs_da_args *args, | ||
953 | xfs_dablk_t blkno, | ||
954 | struct xfs_buf **bpp) | ||
955 | { | ||
956 | struct xfs_attr_leafblock *leaf; | ||
957 | struct xfs_attr3_icleaf_hdr ichdr; | ||
958 | struct xfs_inode *dp = args->dp; | ||
959 | struct xfs_mount *mp = dp->i_mount; | ||
960 | struct xfs_buf *bp; | ||
961 | int error; | ||
962 | |||
963 | trace_xfs_attr_leaf_create(args); | ||
964 | |||
965 | error = xfs_da_get_buf(args->trans, args->dp, blkno, -1, &bp, | ||
966 | XFS_ATTR_FORK); | ||
967 | if (error) | ||
968 | return error; | ||
969 | bp->b_ops = &xfs_attr3_leaf_buf_ops; | ||
970 | xfs_trans_buf_set_type(args->trans, bp, XFS_BLFT_ATTR_LEAF_BUF); | ||
971 | leaf = bp->b_addr; | ||
972 | memset(leaf, 0, args->geo->blksize); | ||
973 | |||
974 | memset(&ichdr, 0, sizeof(ichdr)); | ||
975 | ichdr.firstused = args->geo->blksize; | ||
976 | |||
977 | if (xfs_sb_version_hascrc(&mp->m_sb)) { | ||
978 | struct xfs_da3_blkinfo *hdr3 = bp->b_addr; | ||
979 | |||
980 | ichdr.magic = XFS_ATTR3_LEAF_MAGIC; | ||
981 | |||
982 | hdr3->blkno = cpu_to_be64(bp->b_bn); | ||
983 | hdr3->owner = cpu_to_be64(dp->i_ino); | ||
984 | uuid_copy(&hdr3->uuid, &mp->m_sb.sb_uuid); | ||
985 | |||
986 | ichdr.freemap[0].base = sizeof(struct xfs_attr3_leaf_hdr); | ||
987 | } else { | ||
988 | ichdr.magic = XFS_ATTR_LEAF_MAGIC; | ||
989 | ichdr.freemap[0].base = sizeof(struct xfs_attr_leaf_hdr); | ||
990 | } | ||
991 | ichdr.freemap[0].size = ichdr.firstused - ichdr.freemap[0].base; | ||
992 | |||
993 | xfs_attr3_leaf_hdr_to_disk(leaf, &ichdr); | ||
994 | xfs_trans_log_buf(args->trans, bp, 0, args->geo->blksize - 1); | ||
995 | |||
996 | *bpp = bp; | ||
997 | return 0; | ||
998 | } | ||
999 | |||
1000 | /* | ||
1001 | * Split the leaf node, rebalance, then add the new entry. | ||
1002 | */ | ||
1003 | int | ||
1004 | xfs_attr3_leaf_split( | ||
1005 | struct xfs_da_state *state, | ||
1006 | struct xfs_da_state_blk *oldblk, | ||
1007 | struct xfs_da_state_blk *newblk) | ||
1008 | { | ||
1009 | xfs_dablk_t blkno; | ||
1010 | int error; | ||
1011 | |||
1012 | trace_xfs_attr_leaf_split(state->args); | ||
1013 | |||
1014 | /* | ||
1015 | * Allocate space for a new leaf node. | ||
1016 | */ | ||
1017 | ASSERT(oldblk->magic == XFS_ATTR_LEAF_MAGIC); | ||
1018 | error = xfs_da_grow_inode(state->args, &blkno); | ||
1019 | if (error) | ||
1020 | return error; | ||
1021 | error = xfs_attr3_leaf_create(state->args, blkno, &newblk->bp); | ||
1022 | if (error) | ||
1023 | return error; | ||
1024 | newblk->blkno = blkno; | ||
1025 | newblk->magic = XFS_ATTR_LEAF_MAGIC; | ||
1026 | |||
1027 | /* | ||
1028 | * Rebalance the entries across the two leaves. | ||
1029 | * NOTE: rebalance() currently depends on the 2nd block being empty. | ||
1030 | */ | ||
1031 | xfs_attr3_leaf_rebalance(state, oldblk, newblk); | ||
1032 | error = xfs_da3_blk_link(state, oldblk, newblk); | ||
1033 | if (error) | ||
1034 | return error; | ||
1035 | |||
1036 | /* | ||
1037 | * Save info on "old" attribute for "atomic rename" ops, leaf_add() | ||
1038 | * modifies the index/blkno/rmtblk/rmtblkcnt fields to show the | ||
1039 | * "new" attrs info. Will need the "old" info to remove it later. | ||
1040 | * | ||
1041 | * Insert the "new" entry in the correct block. | ||
1042 | */ | ||
1043 | if (state->inleaf) { | ||
1044 | trace_xfs_attr_leaf_add_old(state->args); | ||
1045 | error = xfs_attr3_leaf_add(oldblk->bp, state->args); | ||
1046 | } else { | ||
1047 | trace_xfs_attr_leaf_add_new(state->args); | ||
1048 | error = xfs_attr3_leaf_add(newblk->bp, state->args); | ||
1049 | } | ||
1050 | |||
1051 | /* | ||
1052 | * Update last hashval in each block since we added the name. | ||
1053 | */ | ||
1054 | oldblk->hashval = xfs_attr_leaf_lasthash(oldblk->bp, NULL); | ||
1055 | newblk->hashval = xfs_attr_leaf_lasthash(newblk->bp, NULL); | ||
1056 | return error; | ||
1057 | } | ||
1058 | |||
1059 | /* | ||
1060 | * Add a name to the leaf attribute list structure. | ||
1061 | */ | ||
1062 | int | ||
1063 | xfs_attr3_leaf_add( | ||
1064 | struct xfs_buf *bp, | ||
1065 | struct xfs_da_args *args) | ||
1066 | { | ||
1067 | struct xfs_attr_leafblock *leaf; | ||
1068 | struct xfs_attr3_icleaf_hdr ichdr; | ||
1069 | int tablesize; | ||
1070 | int entsize; | ||
1071 | int sum; | ||
1072 | int tmp; | ||
1073 | int i; | ||
1074 | |||
1075 | trace_xfs_attr_leaf_add(args); | ||
1076 | |||
1077 | leaf = bp->b_addr; | ||
1078 | xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); | ||
1079 | ASSERT(args->index >= 0 && args->index <= ichdr.count); | ||
1080 | entsize = xfs_attr_leaf_newentsize(args, NULL); | ||
1081 | |||
1082 | /* | ||
1083 | * Search through freemap for first-fit on new name length. | ||
1084 | * (may need to figure in size of entry struct too) | ||
1085 | */ | ||
1086 | tablesize = (ichdr.count + 1) * sizeof(xfs_attr_leaf_entry_t) | ||
1087 | + xfs_attr3_leaf_hdr_size(leaf); | ||
1088 | for (sum = 0, i = XFS_ATTR_LEAF_MAPSIZE - 1; i >= 0; i--) { | ||
1089 | if (tablesize > ichdr.firstused) { | ||
1090 | sum += ichdr.freemap[i].size; | ||
1091 | continue; | ||
1092 | } | ||
1093 | if (!ichdr.freemap[i].size) | ||
1094 | continue; /* no space in this map */ | ||
1095 | tmp = entsize; | ||
1096 | if (ichdr.freemap[i].base < ichdr.firstused) | ||
1097 | tmp += sizeof(xfs_attr_leaf_entry_t); | ||
1098 | if (ichdr.freemap[i].size >= tmp) { | ||
1099 | tmp = xfs_attr3_leaf_add_work(bp, &ichdr, args, i); | ||
1100 | goto out_log_hdr; | ||
1101 | } | ||
1102 | sum += ichdr.freemap[i].size; | ||
1103 | } | ||
1104 | |||
1105 | /* | ||
1106 | * If there are no holes in the address space of the block, | ||
1107 | * and we don't have enough freespace, then compaction will do us | ||
1108 | * no good and we should just give up. | ||
1109 | */ | ||
1110 | if (!ichdr.holes && sum < entsize) | ||
1111 | return -ENOSPC; | ||
1112 | |||
1113 | /* | ||
1114 | * Compact the entries to coalesce free space. | ||
1115 | * This may change the hdr->count via dropping INCOMPLETE entries. | ||
1116 | */ | ||
1117 | xfs_attr3_leaf_compact(args, &ichdr, bp); | ||
1118 | |||
1119 | /* | ||
1120 | * After compaction, the block is guaranteed to have only one | ||
1121 | * free region, in freemap[0]. If it is not big enough, give up. | ||
1122 | */ | ||
1123 | if (ichdr.freemap[0].size < (entsize + sizeof(xfs_attr_leaf_entry_t))) { | ||
1124 | tmp = -ENOSPC; | ||
1125 | goto out_log_hdr; | ||
1126 | } | ||
1127 | |||
1128 | tmp = xfs_attr3_leaf_add_work(bp, &ichdr, args, 0); | ||
1129 | |||
1130 | out_log_hdr: | ||
1131 | xfs_attr3_leaf_hdr_to_disk(leaf, &ichdr); | ||
1132 | xfs_trans_log_buf(args->trans, bp, | ||
1133 | XFS_DA_LOGRANGE(leaf, &leaf->hdr, | ||
1134 | xfs_attr3_leaf_hdr_size(leaf))); | ||
1135 | return tmp; | ||
1136 | } | ||
1137 | |||
1138 | /* | ||
1139 | * Add a name to a leaf attribute list structure. | ||
1140 | */ | ||
1141 | STATIC int | ||
1142 | xfs_attr3_leaf_add_work( | ||
1143 | struct xfs_buf *bp, | ||
1144 | struct xfs_attr3_icleaf_hdr *ichdr, | ||
1145 | struct xfs_da_args *args, | ||
1146 | int mapindex) | ||
1147 | { | ||
1148 | struct xfs_attr_leafblock *leaf; | ||
1149 | struct xfs_attr_leaf_entry *entry; | ||
1150 | struct xfs_attr_leaf_name_local *name_loc; | ||
1151 | struct xfs_attr_leaf_name_remote *name_rmt; | ||
1152 | struct xfs_mount *mp; | ||
1153 | int tmp; | ||
1154 | int i; | ||
1155 | |||
1156 | trace_xfs_attr_leaf_add_work(args); | ||
1157 | |||
1158 | leaf = bp->b_addr; | ||
1159 | ASSERT(mapindex >= 0 && mapindex < XFS_ATTR_LEAF_MAPSIZE); | ||
1160 | ASSERT(args->index >= 0 && args->index <= ichdr->count); | ||
1161 | |||
1162 | /* | ||
1163 | * Force open some space in the entry array and fill it in. | ||
1164 | */ | ||
1165 | entry = &xfs_attr3_leaf_entryp(leaf)[args->index]; | ||
1166 | if (args->index < ichdr->count) { | ||
1167 | tmp = ichdr->count - args->index; | ||
1168 | tmp *= sizeof(xfs_attr_leaf_entry_t); | ||
1169 | memmove(entry + 1, entry, tmp); | ||
1170 | xfs_trans_log_buf(args->trans, bp, | ||
1171 | XFS_DA_LOGRANGE(leaf, entry, tmp + sizeof(*entry))); | ||
1172 | } | ||
1173 | ichdr->count++; | ||
1174 | |||
1175 | /* | ||
1176 | * Allocate space for the new string (at the end of the run). | ||
1177 | */ | ||
1178 | mp = args->trans->t_mountp; | ||
1179 | ASSERT(ichdr->freemap[mapindex].base < args->geo->blksize); | ||
1180 | ASSERT((ichdr->freemap[mapindex].base & 0x3) == 0); | ||
1181 | ASSERT(ichdr->freemap[mapindex].size >= | ||
1182 | xfs_attr_leaf_newentsize(args, NULL)); | ||
1183 | ASSERT(ichdr->freemap[mapindex].size < args->geo->blksize); | ||
1184 | ASSERT((ichdr->freemap[mapindex].size & 0x3) == 0); | ||
1185 | |||
1186 | ichdr->freemap[mapindex].size -= xfs_attr_leaf_newentsize(args, &tmp); | ||
1187 | |||
1188 | entry->nameidx = cpu_to_be16(ichdr->freemap[mapindex].base + | ||
1189 | ichdr->freemap[mapindex].size); | ||
1190 | entry->hashval = cpu_to_be32(args->hashval); | ||
1191 | entry->flags = tmp ? XFS_ATTR_LOCAL : 0; | ||
1192 | entry->flags |= XFS_ATTR_NSP_ARGS_TO_ONDISK(args->flags); | ||
1193 | if (args->op_flags & XFS_DA_OP_RENAME) { | ||
1194 | entry->flags |= XFS_ATTR_INCOMPLETE; | ||
1195 | if ((args->blkno2 == args->blkno) && | ||
1196 | (args->index2 <= args->index)) { | ||
1197 | args->index2++; | ||
1198 | } | ||
1199 | } | ||
1200 | xfs_trans_log_buf(args->trans, bp, | ||
1201 | XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry))); | ||
1202 | ASSERT((args->index == 0) || | ||
1203 | (be32_to_cpu(entry->hashval) >= be32_to_cpu((entry-1)->hashval))); | ||
1204 | ASSERT((args->index == ichdr->count - 1) || | ||
1205 | (be32_to_cpu(entry->hashval) <= be32_to_cpu((entry+1)->hashval))); | ||
1206 | |||
1207 | /* | ||
1208 | * For "remote" attribute values, simply note that we need to | ||
1209 | * allocate space for the "remote" value. We can't actually | ||
1210 | * allocate the extents in this transaction, and we can't decide | ||
1211 | * which blocks they should be as we might allocate more blocks | ||
1212 | * as part of this transaction (a split operation for example). | ||
1213 | */ | ||
1214 | if (entry->flags & XFS_ATTR_LOCAL) { | ||
1215 | name_loc = xfs_attr3_leaf_name_local(leaf, args->index); | ||
1216 | name_loc->namelen = args->namelen; | ||
1217 | name_loc->valuelen = cpu_to_be16(args->valuelen); | ||
1218 | memcpy((char *)name_loc->nameval, args->name, args->namelen); | ||
1219 | memcpy((char *)&name_loc->nameval[args->namelen], args->value, | ||
1220 | be16_to_cpu(name_loc->valuelen)); | ||
1221 | } else { | ||
1222 | name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index); | ||
1223 | name_rmt->namelen = args->namelen; | ||
1224 | memcpy((char *)name_rmt->name, args->name, args->namelen); | ||
1225 | entry->flags |= XFS_ATTR_INCOMPLETE; | ||
1226 | /* just in case */ | ||
1227 | name_rmt->valuelen = 0; | ||
1228 | name_rmt->valueblk = 0; | ||
1229 | args->rmtblkno = 1; | ||
1230 | args->rmtblkcnt = xfs_attr3_rmt_blocks(mp, args->valuelen); | ||
1231 | args->rmtvaluelen = args->valuelen; | ||
1232 | } | ||
1233 | xfs_trans_log_buf(args->trans, bp, | ||
1234 | XFS_DA_LOGRANGE(leaf, xfs_attr3_leaf_name(leaf, args->index), | ||
1235 | xfs_attr_leaf_entsize(leaf, args->index))); | ||
1236 | |||
1237 | /* | ||
1238 | * Update the control info for this leaf node | ||
1239 | */ | ||
1240 | if (be16_to_cpu(entry->nameidx) < ichdr->firstused) | ||
1241 | ichdr->firstused = be16_to_cpu(entry->nameidx); | ||
1242 | |||
1243 | ASSERT(ichdr->firstused >= ichdr->count * sizeof(xfs_attr_leaf_entry_t) | ||
1244 | + xfs_attr3_leaf_hdr_size(leaf)); | ||
1245 | tmp = (ichdr->count - 1) * sizeof(xfs_attr_leaf_entry_t) | ||
1246 | + xfs_attr3_leaf_hdr_size(leaf); | ||
1247 | |||
1248 | for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { | ||
1249 | if (ichdr->freemap[i].base == tmp) { | ||
1250 | ichdr->freemap[i].base += sizeof(xfs_attr_leaf_entry_t); | ||
1251 | ichdr->freemap[i].size -= sizeof(xfs_attr_leaf_entry_t); | ||
1252 | } | ||
1253 | } | ||
1254 | ichdr->usedbytes += xfs_attr_leaf_entsize(leaf, args->index); | ||
1255 | return 0; | ||
1256 | } | ||
1257 | |||
1258 | /* | ||
1259 | * Garbage collect a leaf attribute list block by copying it to a new buffer. | ||
1260 | */ | ||
1261 | STATIC void | ||
1262 | xfs_attr3_leaf_compact( | ||
1263 | struct xfs_da_args *args, | ||
1264 | struct xfs_attr3_icleaf_hdr *ichdr_dst, | ||
1265 | struct xfs_buf *bp) | ||
1266 | { | ||
1267 | struct xfs_attr_leafblock *leaf_src; | ||
1268 | struct xfs_attr_leafblock *leaf_dst; | ||
1269 | struct xfs_attr3_icleaf_hdr ichdr_src; | ||
1270 | struct xfs_trans *trans = args->trans; | ||
1271 | char *tmpbuffer; | ||
1272 | |||
1273 | trace_xfs_attr_leaf_compact(args); | ||
1274 | |||
1275 | tmpbuffer = kmem_alloc(args->geo->blksize, KM_SLEEP); | ||
1276 | memcpy(tmpbuffer, bp->b_addr, args->geo->blksize); | ||
1277 | memset(bp->b_addr, 0, args->geo->blksize); | ||
1278 | leaf_src = (xfs_attr_leafblock_t *)tmpbuffer; | ||
1279 | leaf_dst = bp->b_addr; | ||
1280 | |||
1281 | /* | ||
1282 | * Copy the on-disk header back into the destination buffer to ensure | ||
1283 | * all the information in the header that is not part of the incore | ||
1284 | * header structure is preserved. | ||
1285 | */ | ||
1286 | memcpy(bp->b_addr, tmpbuffer, xfs_attr3_leaf_hdr_size(leaf_src)); | ||
1287 | |||
1288 | /* Initialise the incore headers */ | ||
1289 | ichdr_src = *ichdr_dst; /* struct copy */ | ||
1290 | ichdr_dst->firstused = args->geo->blksize; | ||
1291 | ichdr_dst->usedbytes = 0; | ||
1292 | ichdr_dst->count = 0; | ||
1293 | ichdr_dst->holes = 0; | ||
1294 | ichdr_dst->freemap[0].base = xfs_attr3_leaf_hdr_size(leaf_src); | ||
1295 | ichdr_dst->freemap[0].size = ichdr_dst->firstused - | ||
1296 | ichdr_dst->freemap[0].base; | ||
1297 | |||
1298 | /* write the header back to initialise the underlying buffer */ | ||
1299 | xfs_attr3_leaf_hdr_to_disk(leaf_dst, ichdr_dst); | ||
1300 | |||
1301 | /* | ||
1302 | * Copy all entry's in the same (sorted) order, | ||
1303 | * but allocate name/value pairs packed and in sequence. | ||
1304 | */ | ||
1305 | xfs_attr3_leaf_moveents(args, leaf_src, &ichdr_src, 0, | ||
1306 | leaf_dst, ichdr_dst, 0, ichdr_src.count); | ||
1307 | /* | ||
1308 | * this logs the entire buffer, but the caller must write the header | ||
1309 | * back to the buffer when it is finished modifying it. | ||
1310 | */ | ||
1311 | xfs_trans_log_buf(trans, bp, 0, args->geo->blksize - 1); | ||
1312 | |||
1313 | kmem_free(tmpbuffer); | ||
1314 | } | ||
1315 | |||
1316 | /* | ||
1317 | * Compare two leaf blocks "order". | ||
1318 | * Return 0 unless leaf2 should go before leaf1. | ||
1319 | */ | ||
1320 | static int | ||
1321 | xfs_attr3_leaf_order( | ||
1322 | struct xfs_buf *leaf1_bp, | ||
1323 | struct xfs_attr3_icleaf_hdr *leaf1hdr, | ||
1324 | struct xfs_buf *leaf2_bp, | ||
1325 | struct xfs_attr3_icleaf_hdr *leaf2hdr) | ||
1326 | { | ||
1327 | struct xfs_attr_leaf_entry *entries1; | ||
1328 | struct xfs_attr_leaf_entry *entries2; | ||
1329 | |||
1330 | entries1 = xfs_attr3_leaf_entryp(leaf1_bp->b_addr); | ||
1331 | entries2 = xfs_attr3_leaf_entryp(leaf2_bp->b_addr); | ||
1332 | if (leaf1hdr->count > 0 && leaf2hdr->count > 0 && | ||
1333 | ((be32_to_cpu(entries2[0].hashval) < | ||
1334 | be32_to_cpu(entries1[0].hashval)) || | ||
1335 | (be32_to_cpu(entries2[leaf2hdr->count - 1].hashval) < | ||
1336 | be32_to_cpu(entries1[leaf1hdr->count - 1].hashval)))) { | ||
1337 | return 1; | ||
1338 | } | ||
1339 | return 0; | ||
1340 | } | ||
1341 | |||
1342 | int | ||
1343 | xfs_attr_leaf_order( | ||
1344 | struct xfs_buf *leaf1_bp, | ||
1345 | struct xfs_buf *leaf2_bp) | ||
1346 | { | ||
1347 | struct xfs_attr3_icleaf_hdr ichdr1; | ||
1348 | struct xfs_attr3_icleaf_hdr ichdr2; | ||
1349 | |||
1350 | xfs_attr3_leaf_hdr_from_disk(&ichdr1, leaf1_bp->b_addr); | ||
1351 | xfs_attr3_leaf_hdr_from_disk(&ichdr2, leaf2_bp->b_addr); | ||
1352 | return xfs_attr3_leaf_order(leaf1_bp, &ichdr1, leaf2_bp, &ichdr2); | ||
1353 | } | ||
1354 | |||
1355 | /* | ||
1356 | * Redistribute the attribute list entries between two leaf nodes, | ||
1357 | * taking into account the size of the new entry. | ||
1358 | * | ||
1359 | * NOTE: if new block is empty, then it will get the upper half of the | ||
1360 | * old block. At present, all (one) callers pass in an empty second block. | ||
1361 | * | ||
1362 | * This code adjusts the args->index/blkno and args->index2/blkno2 fields | ||
1363 | * to match what it is doing in splitting the attribute leaf block. Those | ||
1364 | * values are used in "atomic rename" operations on attributes. Note that | ||
1365 | * the "new" and "old" values can end up in different blocks. | ||
1366 | */ | ||
1367 | STATIC void | ||
1368 | xfs_attr3_leaf_rebalance( | ||
1369 | struct xfs_da_state *state, | ||
1370 | struct xfs_da_state_blk *blk1, | ||
1371 | struct xfs_da_state_blk *blk2) | ||
1372 | { | ||
1373 | struct xfs_da_args *args; | ||
1374 | struct xfs_attr_leafblock *leaf1; | ||
1375 | struct xfs_attr_leafblock *leaf2; | ||
1376 | struct xfs_attr3_icleaf_hdr ichdr1; | ||
1377 | struct xfs_attr3_icleaf_hdr ichdr2; | ||
1378 | struct xfs_attr_leaf_entry *entries1; | ||
1379 | struct xfs_attr_leaf_entry *entries2; | ||
1380 | int count; | ||
1381 | int totallen; | ||
1382 | int max; | ||
1383 | int space; | ||
1384 | int swap; | ||
1385 | |||
1386 | /* | ||
1387 | * Set up environment. | ||
1388 | */ | ||
1389 | ASSERT(blk1->magic == XFS_ATTR_LEAF_MAGIC); | ||
1390 | ASSERT(blk2->magic == XFS_ATTR_LEAF_MAGIC); | ||
1391 | leaf1 = blk1->bp->b_addr; | ||
1392 | leaf2 = blk2->bp->b_addr; | ||
1393 | xfs_attr3_leaf_hdr_from_disk(&ichdr1, leaf1); | ||
1394 | xfs_attr3_leaf_hdr_from_disk(&ichdr2, leaf2); | ||
1395 | ASSERT(ichdr2.count == 0); | ||
1396 | args = state->args; | ||
1397 | |||
1398 | trace_xfs_attr_leaf_rebalance(args); | ||
1399 | |||
1400 | /* | ||
1401 | * Check ordering of blocks, reverse if it makes things simpler. | ||
1402 | * | ||
1403 | * NOTE: Given that all (current) callers pass in an empty | ||
1404 | * second block, this code should never set "swap". | ||
1405 | */ | ||
1406 | swap = 0; | ||
1407 | if (xfs_attr3_leaf_order(blk1->bp, &ichdr1, blk2->bp, &ichdr2)) { | ||
1408 | struct xfs_da_state_blk *tmp_blk; | ||
1409 | struct xfs_attr3_icleaf_hdr tmp_ichdr; | ||
1410 | |||
1411 | tmp_blk = blk1; | ||
1412 | blk1 = blk2; | ||
1413 | blk2 = tmp_blk; | ||
1414 | |||
1415 | /* struct copies to swap them rather than reconverting */ | ||
1416 | tmp_ichdr = ichdr1; | ||
1417 | ichdr1 = ichdr2; | ||
1418 | ichdr2 = tmp_ichdr; | ||
1419 | |||
1420 | leaf1 = blk1->bp->b_addr; | ||
1421 | leaf2 = blk2->bp->b_addr; | ||
1422 | swap = 1; | ||
1423 | } | ||
1424 | |||
1425 | /* | ||
1426 | * Examine entries until we reduce the absolute difference in | ||
1427 | * byte usage between the two blocks to a minimum. Then get | ||
1428 | * the direction to copy and the number of elements to move. | ||
1429 | * | ||
1430 | * "inleaf" is true if the new entry should be inserted into blk1. | ||
1431 | * If "swap" is also true, then reverse the sense of "inleaf". | ||
1432 | */ | ||
1433 | state->inleaf = xfs_attr3_leaf_figure_balance(state, blk1, &ichdr1, | ||
1434 | blk2, &ichdr2, | ||
1435 | &count, &totallen); | ||
1436 | if (swap) | ||
1437 | state->inleaf = !state->inleaf; | ||
1438 | |||
1439 | /* | ||
1440 | * Move any entries required from leaf to leaf: | ||
1441 | */ | ||
1442 | if (count < ichdr1.count) { | ||
1443 | /* | ||
1444 | * Figure the total bytes to be added to the destination leaf. | ||
1445 | */ | ||
1446 | /* number entries being moved */ | ||
1447 | count = ichdr1.count - count; | ||
1448 | space = ichdr1.usedbytes - totallen; | ||
1449 | space += count * sizeof(xfs_attr_leaf_entry_t); | ||
1450 | |||
1451 | /* | ||
1452 | * leaf2 is the destination, compact it if it looks tight. | ||
1453 | */ | ||
1454 | max = ichdr2.firstused - xfs_attr3_leaf_hdr_size(leaf1); | ||
1455 | max -= ichdr2.count * sizeof(xfs_attr_leaf_entry_t); | ||
1456 | if (space > max) | ||
1457 | xfs_attr3_leaf_compact(args, &ichdr2, blk2->bp); | ||
1458 | |||
1459 | /* | ||
1460 | * Move high entries from leaf1 to low end of leaf2. | ||
1461 | */ | ||
1462 | xfs_attr3_leaf_moveents(args, leaf1, &ichdr1, | ||
1463 | ichdr1.count - count, leaf2, &ichdr2, 0, count); | ||
1464 | |||
1465 | } else if (count > ichdr1.count) { | ||
1466 | /* | ||
1467 | * I assert that since all callers pass in an empty | ||
1468 | * second buffer, this code should never execute. | ||
1469 | */ | ||
1470 | ASSERT(0); | ||
1471 | |||
1472 | /* | ||
1473 | * Figure the total bytes to be added to the destination leaf. | ||
1474 | */ | ||
1475 | /* number entries being moved */ | ||
1476 | count -= ichdr1.count; | ||
1477 | space = totallen - ichdr1.usedbytes; | ||
1478 | space += count * sizeof(xfs_attr_leaf_entry_t); | ||
1479 | |||
1480 | /* | ||
1481 | * leaf1 is the destination, compact it if it looks tight. | ||
1482 | */ | ||
1483 | max = ichdr1.firstused - xfs_attr3_leaf_hdr_size(leaf1); | ||
1484 | max -= ichdr1.count * sizeof(xfs_attr_leaf_entry_t); | ||
1485 | if (space > max) | ||
1486 | xfs_attr3_leaf_compact(args, &ichdr1, blk1->bp); | ||
1487 | |||
1488 | /* | ||
1489 | * Move low entries from leaf2 to high end of leaf1. | ||
1490 | */ | ||
1491 | xfs_attr3_leaf_moveents(args, leaf2, &ichdr2, 0, leaf1, &ichdr1, | ||
1492 | ichdr1.count, count); | ||
1493 | } | ||
1494 | |||
1495 | xfs_attr3_leaf_hdr_to_disk(leaf1, &ichdr1); | ||
1496 | xfs_attr3_leaf_hdr_to_disk(leaf2, &ichdr2); | ||
1497 | xfs_trans_log_buf(args->trans, blk1->bp, 0, args->geo->blksize - 1); | ||
1498 | xfs_trans_log_buf(args->trans, blk2->bp, 0, args->geo->blksize - 1); | ||
1499 | |||
1500 | /* | ||
1501 | * Copy out last hashval in each block for B-tree code. | ||
1502 | */ | ||
1503 | entries1 = xfs_attr3_leaf_entryp(leaf1); | ||
1504 | entries2 = xfs_attr3_leaf_entryp(leaf2); | ||
1505 | blk1->hashval = be32_to_cpu(entries1[ichdr1.count - 1].hashval); | ||
1506 | blk2->hashval = be32_to_cpu(entries2[ichdr2.count - 1].hashval); | ||
1507 | |||
1508 | /* | ||
1509 | * Adjust the expected index for insertion. | ||
1510 | * NOTE: this code depends on the (current) situation that the | ||
1511 | * second block was originally empty. | ||
1512 | * | ||
1513 | * If the insertion point moved to the 2nd block, we must adjust | ||
1514 | * the index. We must also track the entry just following the | ||
1515 | * new entry for use in an "atomic rename" operation, that entry | ||
1516 | * is always the "old" entry and the "new" entry is what we are | ||
1517 | * inserting. The index/blkno fields refer to the "old" entry, | ||
1518 | * while the index2/blkno2 fields refer to the "new" entry. | ||
1519 | */ | ||
1520 | if (blk1->index > ichdr1.count) { | ||
1521 | ASSERT(state->inleaf == 0); | ||
1522 | blk2->index = blk1->index - ichdr1.count; | ||
1523 | args->index = args->index2 = blk2->index; | ||
1524 | args->blkno = args->blkno2 = blk2->blkno; | ||
1525 | } else if (blk1->index == ichdr1.count) { | ||
1526 | if (state->inleaf) { | ||
1527 | args->index = blk1->index; | ||
1528 | args->blkno = blk1->blkno; | ||
1529 | args->index2 = 0; | ||
1530 | args->blkno2 = blk2->blkno; | ||
1531 | } else { | ||
1532 | /* | ||
1533 | * On a double leaf split, the original attr location | ||
1534 | * is already stored in blkno2/index2, so don't | ||
1535 | * overwrite it overwise we corrupt the tree. | ||
1536 | */ | ||
1537 | blk2->index = blk1->index - ichdr1.count; | ||
1538 | args->index = blk2->index; | ||
1539 | args->blkno = blk2->blkno; | ||
1540 | if (!state->extravalid) { | ||
1541 | /* | ||
1542 | * set the new attr location to match the old | ||
1543 | * one and let the higher level split code | ||
1544 | * decide where in the leaf to place it. | ||
1545 | */ | ||
1546 | args->index2 = blk2->index; | ||
1547 | args->blkno2 = blk2->blkno; | ||
1548 | } | ||
1549 | } | ||
1550 | } else { | ||
1551 | ASSERT(state->inleaf == 1); | ||
1552 | args->index = args->index2 = blk1->index; | ||
1553 | args->blkno = args->blkno2 = blk1->blkno; | ||
1554 | } | ||
1555 | } | ||
1556 | |||
1557 | /* | ||
1558 | * Examine entries until we reduce the absolute difference in | ||
1559 | * byte usage between the two blocks to a minimum. | ||
1560 | * GROT: Is this really necessary? With other than a 512 byte blocksize, | ||
1561 | * GROT: there will always be enough room in either block for a new entry. | ||
1562 | * GROT: Do a double-split for this case? | ||
1563 | */ | ||
1564 | STATIC int | ||
1565 | xfs_attr3_leaf_figure_balance( | ||
1566 | struct xfs_da_state *state, | ||
1567 | struct xfs_da_state_blk *blk1, | ||
1568 | struct xfs_attr3_icleaf_hdr *ichdr1, | ||
1569 | struct xfs_da_state_blk *blk2, | ||
1570 | struct xfs_attr3_icleaf_hdr *ichdr2, | ||
1571 | int *countarg, | ||
1572 | int *usedbytesarg) | ||
1573 | { | ||
1574 | struct xfs_attr_leafblock *leaf1 = blk1->bp->b_addr; | ||
1575 | struct xfs_attr_leafblock *leaf2 = blk2->bp->b_addr; | ||
1576 | struct xfs_attr_leaf_entry *entry; | ||
1577 | int count; | ||
1578 | int max; | ||
1579 | int index; | ||
1580 | int totallen = 0; | ||
1581 | int half; | ||
1582 | int lastdelta; | ||
1583 | int foundit = 0; | ||
1584 | int tmp; | ||
1585 | |||
1586 | /* | ||
1587 | * Examine entries until we reduce the absolute difference in | ||
1588 | * byte usage between the two blocks to a minimum. | ||
1589 | */ | ||
1590 | max = ichdr1->count + ichdr2->count; | ||
1591 | half = (max + 1) * sizeof(*entry); | ||
1592 | half += ichdr1->usedbytes + ichdr2->usedbytes + | ||
1593 | xfs_attr_leaf_newentsize(state->args, NULL); | ||
1594 | half /= 2; | ||
1595 | lastdelta = state->args->geo->blksize; | ||
1596 | entry = xfs_attr3_leaf_entryp(leaf1); | ||
1597 | for (count = index = 0; count < max; entry++, index++, count++) { | ||
1598 | |||
1599 | #define XFS_ATTR_ABS(A) (((A) < 0) ? -(A) : (A)) | ||
1600 | /* | ||
1601 | * The new entry is in the first block, account for it. | ||
1602 | */ | ||
1603 | if (count == blk1->index) { | ||
1604 | tmp = totallen + sizeof(*entry) + | ||
1605 | xfs_attr_leaf_newentsize(state->args, NULL); | ||
1606 | if (XFS_ATTR_ABS(half - tmp) > lastdelta) | ||
1607 | break; | ||
1608 | lastdelta = XFS_ATTR_ABS(half - tmp); | ||
1609 | totallen = tmp; | ||
1610 | foundit = 1; | ||
1611 | } | ||
1612 | |||
1613 | /* | ||
1614 | * Wrap around into the second block if necessary. | ||
1615 | */ | ||
1616 | if (count == ichdr1->count) { | ||
1617 | leaf1 = leaf2; | ||
1618 | entry = xfs_attr3_leaf_entryp(leaf1); | ||
1619 | index = 0; | ||
1620 | } | ||
1621 | |||
1622 | /* | ||
1623 | * Figure out if next leaf entry would be too much. | ||
1624 | */ | ||
1625 | tmp = totallen + sizeof(*entry) + xfs_attr_leaf_entsize(leaf1, | ||
1626 | index); | ||
1627 | if (XFS_ATTR_ABS(half - tmp) > lastdelta) | ||
1628 | break; | ||
1629 | lastdelta = XFS_ATTR_ABS(half - tmp); | ||
1630 | totallen = tmp; | ||
1631 | #undef XFS_ATTR_ABS | ||
1632 | } | ||
1633 | |||
1634 | /* | ||
1635 | * Calculate the number of usedbytes that will end up in lower block. | ||
1636 | * If new entry not in lower block, fix up the count. | ||
1637 | */ | ||
1638 | totallen -= count * sizeof(*entry); | ||
1639 | if (foundit) { | ||
1640 | totallen -= sizeof(*entry) + | ||
1641 | xfs_attr_leaf_newentsize(state->args, NULL); | ||
1642 | } | ||
1643 | |||
1644 | *countarg = count; | ||
1645 | *usedbytesarg = totallen; | ||
1646 | return foundit; | ||
1647 | } | ||
1648 | |||
1649 | /*======================================================================== | ||
1650 | * Routines used for shrinking the Btree. | ||
1651 | *========================================================================*/ | ||
1652 | |||
1653 | /* | ||
1654 | * Check a leaf block and its neighbors to see if the block should be | ||
1655 | * collapsed into one or the other neighbor. Always keep the block | ||
1656 | * with the smaller block number. | ||
1657 | * If the current block is over 50% full, don't try to join it, return 0. | ||
1658 | * If the block is empty, fill in the state structure and return 2. | ||
1659 | * If it can be collapsed, fill in the state structure and return 1. | ||
1660 | * If nothing can be done, return 0. | ||
1661 | * | ||
1662 | * GROT: allow for INCOMPLETE entries in calculation. | ||
1663 | */ | ||
1664 | int | ||
1665 | xfs_attr3_leaf_toosmall( | ||
1666 | struct xfs_da_state *state, | ||
1667 | int *action) | ||
1668 | { | ||
1669 | struct xfs_attr_leafblock *leaf; | ||
1670 | struct xfs_da_state_blk *blk; | ||
1671 | struct xfs_attr3_icleaf_hdr ichdr; | ||
1672 | struct xfs_buf *bp; | ||
1673 | xfs_dablk_t blkno; | ||
1674 | int bytes; | ||
1675 | int forward; | ||
1676 | int error; | ||
1677 | int retval; | ||
1678 | int i; | ||
1679 | |||
1680 | trace_xfs_attr_leaf_toosmall(state->args); | ||
1681 | |||
1682 | /* | ||
1683 | * Check for the degenerate case of the block being over 50% full. | ||
1684 | * If so, it's not worth even looking to see if we might be able | ||
1685 | * to coalesce with a sibling. | ||
1686 | */ | ||
1687 | blk = &state->path.blk[ state->path.active-1 ]; | ||
1688 | leaf = blk->bp->b_addr; | ||
1689 | xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); | ||
1690 | bytes = xfs_attr3_leaf_hdr_size(leaf) + | ||
1691 | ichdr.count * sizeof(xfs_attr_leaf_entry_t) + | ||
1692 | ichdr.usedbytes; | ||
1693 | if (bytes > (state->args->geo->blksize >> 1)) { | ||
1694 | *action = 0; /* blk over 50%, don't try to join */ | ||
1695 | return 0; | ||
1696 | } | ||
1697 | |||
1698 | /* | ||
1699 | * Check for the degenerate case of the block being empty. | ||
1700 | * If the block is empty, we'll simply delete it, no need to | ||
1701 | * coalesce it with a sibling block. We choose (arbitrarily) | ||
1702 | * to merge with the forward block unless it is NULL. | ||
1703 | */ | ||
1704 | if (ichdr.count == 0) { | ||
1705 | /* | ||
1706 | * Make altpath point to the block we want to keep and | ||
1707 | * path point to the block we want to drop (this one). | ||
1708 | */ | ||
1709 | forward = (ichdr.forw != 0); | ||
1710 | memcpy(&state->altpath, &state->path, sizeof(state->path)); | ||
1711 | error = xfs_da3_path_shift(state, &state->altpath, forward, | ||
1712 | 0, &retval); | ||
1713 | if (error) | ||
1714 | return error; | ||
1715 | if (retval) { | ||
1716 | *action = 0; | ||
1717 | } else { | ||
1718 | *action = 2; | ||
1719 | } | ||
1720 | return 0; | ||
1721 | } | ||
1722 | |||
1723 | /* | ||
1724 | * Examine each sibling block to see if we can coalesce with | ||
1725 | * at least 25% free space to spare. We need to figure out | ||
1726 | * whether to merge with the forward or the backward block. | ||
1727 | * We prefer coalescing with the lower numbered sibling so as | ||
1728 | * to shrink an attribute list over time. | ||
1729 | */ | ||
1730 | /* start with smaller blk num */ | ||
1731 | forward = ichdr.forw < ichdr.back; | ||
1732 | for (i = 0; i < 2; forward = !forward, i++) { | ||
1733 | struct xfs_attr3_icleaf_hdr ichdr2; | ||
1734 | if (forward) | ||
1735 | blkno = ichdr.forw; | ||
1736 | else | ||
1737 | blkno = ichdr.back; | ||
1738 | if (blkno == 0) | ||
1739 | continue; | ||
1740 | error = xfs_attr3_leaf_read(state->args->trans, state->args->dp, | ||
1741 | blkno, -1, &bp); | ||
1742 | if (error) | ||
1743 | return error; | ||
1744 | |||
1745 | xfs_attr3_leaf_hdr_from_disk(&ichdr2, bp->b_addr); | ||
1746 | |||
1747 | bytes = state->args->geo->blksize - | ||
1748 | (state->args->geo->blksize >> 2) - | ||
1749 | ichdr.usedbytes - ichdr2.usedbytes - | ||
1750 | ((ichdr.count + ichdr2.count) * | ||
1751 | sizeof(xfs_attr_leaf_entry_t)) - | ||
1752 | xfs_attr3_leaf_hdr_size(leaf); | ||
1753 | |||
1754 | xfs_trans_brelse(state->args->trans, bp); | ||
1755 | if (bytes >= 0) | ||
1756 | break; /* fits with at least 25% to spare */ | ||
1757 | } | ||
1758 | if (i >= 2) { | ||
1759 | *action = 0; | ||
1760 | return 0; | ||
1761 | } | ||
1762 | |||
1763 | /* | ||
1764 | * Make altpath point to the block we want to keep (the lower | ||
1765 | * numbered block) and path point to the block we want to drop. | ||
1766 | */ | ||
1767 | memcpy(&state->altpath, &state->path, sizeof(state->path)); | ||
1768 | if (blkno < blk->blkno) { | ||
1769 | error = xfs_da3_path_shift(state, &state->altpath, forward, | ||
1770 | 0, &retval); | ||
1771 | } else { | ||
1772 | error = xfs_da3_path_shift(state, &state->path, forward, | ||
1773 | 0, &retval); | ||
1774 | } | ||
1775 | if (error) | ||
1776 | return error; | ||
1777 | if (retval) { | ||
1778 | *action = 0; | ||
1779 | } else { | ||
1780 | *action = 1; | ||
1781 | } | ||
1782 | return 0; | ||
1783 | } | ||
1784 | |||
1785 | /* | ||
1786 | * Remove a name from the leaf attribute list structure. | ||
1787 | * | ||
1788 | * Return 1 if leaf is less than 37% full, 0 if >= 37% full. | ||
1789 | * If two leaves are 37% full, when combined they will leave 25% free. | ||
1790 | */ | ||
1791 | int | ||
1792 | xfs_attr3_leaf_remove( | ||
1793 | struct xfs_buf *bp, | ||
1794 | struct xfs_da_args *args) | ||
1795 | { | ||
1796 | struct xfs_attr_leafblock *leaf; | ||
1797 | struct xfs_attr3_icleaf_hdr ichdr; | ||
1798 | struct xfs_attr_leaf_entry *entry; | ||
1799 | int before; | ||
1800 | int after; | ||
1801 | int smallest; | ||
1802 | int entsize; | ||
1803 | int tablesize; | ||
1804 | int tmp; | ||
1805 | int i; | ||
1806 | |||
1807 | trace_xfs_attr_leaf_remove(args); | ||
1808 | |||
1809 | leaf = bp->b_addr; | ||
1810 | xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); | ||
1811 | |||
1812 | ASSERT(ichdr.count > 0 && ichdr.count < args->geo->blksize / 8); | ||
1813 | ASSERT(args->index >= 0 && args->index < ichdr.count); | ||
1814 | ASSERT(ichdr.firstused >= ichdr.count * sizeof(*entry) + | ||
1815 | xfs_attr3_leaf_hdr_size(leaf)); | ||
1816 | |||
1817 | entry = &xfs_attr3_leaf_entryp(leaf)[args->index]; | ||
1818 | |||
1819 | ASSERT(be16_to_cpu(entry->nameidx) >= ichdr.firstused); | ||
1820 | ASSERT(be16_to_cpu(entry->nameidx) < args->geo->blksize); | ||
1821 | |||
1822 | /* | ||
1823 | * Scan through free region table: | ||
1824 | * check for adjacency of free'd entry with an existing one, | ||
1825 | * find smallest free region in case we need to replace it, | ||
1826 | * adjust any map that borders the entry table, | ||
1827 | */ | ||
1828 | tablesize = ichdr.count * sizeof(xfs_attr_leaf_entry_t) | ||
1829 | + xfs_attr3_leaf_hdr_size(leaf); | ||
1830 | tmp = ichdr.freemap[0].size; | ||
1831 | before = after = -1; | ||
1832 | smallest = XFS_ATTR_LEAF_MAPSIZE - 1; | ||
1833 | entsize = xfs_attr_leaf_entsize(leaf, args->index); | ||
1834 | for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { | ||
1835 | ASSERT(ichdr.freemap[i].base < args->geo->blksize); | ||
1836 | ASSERT(ichdr.freemap[i].size < args->geo->blksize); | ||
1837 | if (ichdr.freemap[i].base == tablesize) { | ||
1838 | ichdr.freemap[i].base -= sizeof(xfs_attr_leaf_entry_t); | ||
1839 | ichdr.freemap[i].size += sizeof(xfs_attr_leaf_entry_t); | ||
1840 | } | ||
1841 | |||
1842 | if (ichdr.freemap[i].base + ichdr.freemap[i].size == | ||
1843 | be16_to_cpu(entry->nameidx)) { | ||
1844 | before = i; | ||
1845 | } else if (ichdr.freemap[i].base == | ||
1846 | (be16_to_cpu(entry->nameidx) + entsize)) { | ||
1847 | after = i; | ||
1848 | } else if (ichdr.freemap[i].size < tmp) { | ||
1849 | tmp = ichdr.freemap[i].size; | ||
1850 | smallest = i; | ||
1851 | } | ||
1852 | } | ||
1853 | |||
1854 | /* | ||
1855 | * Coalesce adjacent freemap regions, | ||
1856 | * or replace the smallest region. | ||
1857 | */ | ||
1858 | if ((before >= 0) || (after >= 0)) { | ||
1859 | if ((before >= 0) && (after >= 0)) { | ||
1860 | ichdr.freemap[before].size += entsize; | ||
1861 | ichdr.freemap[before].size += ichdr.freemap[after].size; | ||
1862 | ichdr.freemap[after].base = 0; | ||
1863 | ichdr.freemap[after].size = 0; | ||
1864 | } else if (before >= 0) { | ||
1865 | ichdr.freemap[before].size += entsize; | ||
1866 | } else { | ||
1867 | ichdr.freemap[after].base = be16_to_cpu(entry->nameidx); | ||
1868 | ichdr.freemap[after].size += entsize; | ||
1869 | } | ||
1870 | } else { | ||
1871 | /* | ||
1872 | * Replace smallest region (if it is smaller than free'd entry) | ||
1873 | */ | ||
1874 | if (ichdr.freemap[smallest].size < entsize) { | ||
1875 | ichdr.freemap[smallest].base = be16_to_cpu(entry->nameidx); | ||
1876 | ichdr.freemap[smallest].size = entsize; | ||
1877 | } | ||
1878 | } | ||
1879 | |||
1880 | /* | ||
1881 | * Did we remove the first entry? | ||
1882 | */ | ||
1883 | if (be16_to_cpu(entry->nameidx) == ichdr.firstused) | ||
1884 | smallest = 1; | ||
1885 | else | ||
1886 | smallest = 0; | ||
1887 | |||
1888 | /* | ||
1889 | * Compress the remaining entries and zero out the removed stuff. | ||
1890 | */ | ||
1891 | memset(xfs_attr3_leaf_name(leaf, args->index), 0, entsize); | ||
1892 | ichdr.usedbytes -= entsize; | ||
1893 | xfs_trans_log_buf(args->trans, bp, | ||
1894 | XFS_DA_LOGRANGE(leaf, xfs_attr3_leaf_name(leaf, args->index), | ||
1895 | entsize)); | ||
1896 | |||
1897 | tmp = (ichdr.count - args->index) * sizeof(xfs_attr_leaf_entry_t); | ||
1898 | memmove(entry, entry + 1, tmp); | ||
1899 | ichdr.count--; | ||
1900 | xfs_trans_log_buf(args->trans, bp, | ||
1901 | XFS_DA_LOGRANGE(leaf, entry, tmp + sizeof(xfs_attr_leaf_entry_t))); | ||
1902 | |||
1903 | entry = &xfs_attr3_leaf_entryp(leaf)[ichdr.count]; | ||
1904 | memset(entry, 0, sizeof(xfs_attr_leaf_entry_t)); | ||
1905 | |||
1906 | /* | ||
1907 | * If we removed the first entry, re-find the first used byte | ||
1908 | * in the name area. Note that if the entry was the "firstused", | ||
1909 | * then we don't have a "hole" in our block resulting from | ||
1910 | * removing the name. | ||
1911 | */ | ||
1912 | if (smallest) { | ||
1913 | tmp = args->geo->blksize; | ||
1914 | entry = xfs_attr3_leaf_entryp(leaf); | ||
1915 | for (i = ichdr.count - 1; i >= 0; entry++, i--) { | ||
1916 | ASSERT(be16_to_cpu(entry->nameidx) >= ichdr.firstused); | ||
1917 | ASSERT(be16_to_cpu(entry->nameidx) < args->geo->blksize); | ||
1918 | |||
1919 | if (be16_to_cpu(entry->nameidx) < tmp) | ||
1920 | tmp = be16_to_cpu(entry->nameidx); | ||
1921 | } | ||
1922 | ichdr.firstused = tmp; | ||
1923 | if (!ichdr.firstused) | ||
1924 | ichdr.firstused = tmp - XFS_ATTR_LEAF_NAME_ALIGN; | ||
1925 | } else { | ||
1926 | ichdr.holes = 1; /* mark as needing compaction */ | ||
1927 | } | ||
1928 | xfs_attr3_leaf_hdr_to_disk(leaf, &ichdr); | ||
1929 | xfs_trans_log_buf(args->trans, bp, | ||
1930 | XFS_DA_LOGRANGE(leaf, &leaf->hdr, | ||
1931 | xfs_attr3_leaf_hdr_size(leaf))); | ||
1932 | |||
1933 | /* | ||
1934 | * Check if leaf is less than 50% full, caller may want to | ||
1935 | * "join" the leaf with a sibling if so. | ||
1936 | */ | ||
1937 | tmp = ichdr.usedbytes + xfs_attr3_leaf_hdr_size(leaf) + | ||
1938 | ichdr.count * sizeof(xfs_attr_leaf_entry_t); | ||
1939 | |||
1940 | return tmp < args->geo->magicpct; /* leaf is < 37% full */ | ||
1941 | } | ||
1942 | |||
1943 | /* | ||
1944 | * Move all the attribute list entries from drop_leaf into save_leaf. | ||
1945 | */ | ||
1946 | void | ||
1947 | xfs_attr3_leaf_unbalance( | ||
1948 | struct xfs_da_state *state, | ||
1949 | struct xfs_da_state_blk *drop_blk, | ||
1950 | struct xfs_da_state_blk *save_blk) | ||
1951 | { | ||
1952 | struct xfs_attr_leafblock *drop_leaf = drop_blk->bp->b_addr; | ||
1953 | struct xfs_attr_leafblock *save_leaf = save_blk->bp->b_addr; | ||
1954 | struct xfs_attr3_icleaf_hdr drophdr; | ||
1955 | struct xfs_attr3_icleaf_hdr savehdr; | ||
1956 | struct xfs_attr_leaf_entry *entry; | ||
1957 | |||
1958 | trace_xfs_attr_leaf_unbalance(state->args); | ||
1959 | |||
1960 | drop_leaf = drop_blk->bp->b_addr; | ||
1961 | save_leaf = save_blk->bp->b_addr; | ||
1962 | xfs_attr3_leaf_hdr_from_disk(&drophdr, drop_leaf); | ||
1963 | xfs_attr3_leaf_hdr_from_disk(&savehdr, save_leaf); | ||
1964 | entry = xfs_attr3_leaf_entryp(drop_leaf); | ||
1965 | |||
1966 | /* | ||
1967 | * Save last hashval from dying block for later Btree fixup. | ||
1968 | */ | ||
1969 | drop_blk->hashval = be32_to_cpu(entry[drophdr.count - 1].hashval); | ||
1970 | |||
1971 | /* | ||
1972 | * Check if we need a temp buffer, or can we do it in place. | ||
1973 | * Note that we don't check "leaf" for holes because we will | ||
1974 | * always be dropping it, toosmall() decided that for us already. | ||
1975 | */ | ||
1976 | if (savehdr.holes == 0) { | ||
1977 | /* | ||
1978 | * dest leaf has no holes, so we add there. May need | ||
1979 | * to make some room in the entry array. | ||
1980 | */ | ||
1981 | if (xfs_attr3_leaf_order(save_blk->bp, &savehdr, | ||
1982 | drop_blk->bp, &drophdr)) { | ||
1983 | xfs_attr3_leaf_moveents(state->args, | ||
1984 | drop_leaf, &drophdr, 0, | ||
1985 | save_leaf, &savehdr, 0, | ||
1986 | drophdr.count); | ||
1987 | } else { | ||
1988 | xfs_attr3_leaf_moveents(state->args, | ||
1989 | drop_leaf, &drophdr, 0, | ||
1990 | save_leaf, &savehdr, | ||
1991 | savehdr.count, drophdr.count); | ||
1992 | } | ||
1993 | } else { | ||
1994 | /* | ||
1995 | * Destination has holes, so we make a temporary copy | ||
1996 | * of the leaf and add them both to that. | ||
1997 | */ | ||
1998 | struct xfs_attr_leafblock *tmp_leaf; | ||
1999 | struct xfs_attr3_icleaf_hdr tmphdr; | ||
2000 | |||
2001 | tmp_leaf = kmem_zalloc(state->args->geo->blksize, KM_SLEEP); | ||
2002 | |||
2003 | /* | ||
2004 | * Copy the header into the temp leaf so that all the stuff | ||
2005 | * not in the incore header is present and gets copied back in | ||
2006 | * once we've moved all the entries. | ||
2007 | */ | ||
2008 | memcpy(tmp_leaf, save_leaf, xfs_attr3_leaf_hdr_size(save_leaf)); | ||
2009 | |||
2010 | memset(&tmphdr, 0, sizeof(tmphdr)); | ||
2011 | tmphdr.magic = savehdr.magic; | ||
2012 | tmphdr.forw = savehdr.forw; | ||
2013 | tmphdr.back = savehdr.back; | ||
2014 | tmphdr.firstused = state->args->geo->blksize; | ||
2015 | |||
2016 | /* write the header to the temp buffer to initialise it */ | ||
2017 | xfs_attr3_leaf_hdr_to_disk(tmp_leaf, &tmphdr); | ||
2018 | |||
2019 | if (xfs_attr3_leaf_order(save_blk->bp, &savehdr, | ||
2020 | drop_blk->bp, &drophdr)) { | ||
2021 | xfs_attr3_leaf_moveents(state->args, | ||
2022 | drop_leaf, &drophdr, 0, | ||
2023 | tmp_leaf, &tmphdr, 0, | ||
2024 | drophdr.count); | ||
2025 | xfs_attr3_leaf_moveents(state->args, | ||
2026 | save_leaf, &savehdr, 0, | ||
2027 | tmp_leaf, &tmphdr, tmphdr.count, | ||
2028 | savehdr.count); | ||
2029 | } else { | ||
2030 | xfs_attr3_leaf_moveents(state->args, | ||
2031 | save_leaf, &savehdr, 0, | ||
2032 | tmp_leaf, &tmphdr, 0, | ||
2033 | savehdr.count); | ||
2034 | xfs_attr3_leaf_moveents(state->args, | ||
2035 | drop_leaf, &drophdr, 0, | ||
2036 | tmp_leaf, &tmphdr, tmphdr.count, | ||
2037 | drophdr.count); | ||
2038 | } | ||
2039 | memcpy(save_leaf, tmp_leaf, state->args->geo->blksize); | ||
2040 | savehdr = tmphdr; /* struct copy */ | ||
2041 | kmem_free(tmp_leaf); | ||
2042 | } | ||
2043 | |||
2044 | xfs_attr3_leaf_hdr_to_disk(save_leaf, &savehdr); | ||
2045 | xfs_trans_log_buf(state->args->trans, save_blk->bp, 0, | ||
2046 | state->args->geo->blksize - 1); | ||
2047 | |||
2048 | /* | ||
2049 | * Copy out last hashval in each block for B-tree code. | ||
2050 | */ | ||
2051 | entry = xfs_attr3_leaf_entryp(save_leaf); | ||
2052 | save_blk->hashval = be32_to_cpu(entry[savehdr.count - 1].hashval); | ||
2053 | } | ||
2054 | |||
2055 | /*======================================================================== | ||
2056 | * Routines used for finding things in the Btree. | ||
2057 | *========================================================================*/ | ||
2058 | |||
2059 | /* | ||
2060 | * Look up a name in a leaf attribute list structure. | ||
2061 | * This is the internal routine, it uses the caller's buffer. | ||
2062 | * | ||
2063 | * Note that duplicate keys are allowed, but only check within the | ||
2064 | * current leaf node. The Btree code must check in adjacent leaf nodes. | ||
2065 | * | ||
2066 | * Return in args->index the index into the entry[] array of either | ||
2067 | * the found entry, or where the entry should have been (insert before | ||
2068 | * that entry). | ||
2069 | * | ||
2070 | * Don't change the args->value unless we find the attribute. | ||
2071 | */ | ||
2072 | int | ||
2073 | xfs_attr3_leaf_lookup_int( | ||
2074 | struct xfs_buf *bp, | ||
2075 | struct xfs_da_args *args) | ||
2076 | { | ||
2077 | struct xfs_attr_leafblock *leaf; | ||
2078 | struct xfs_attr3_icleaf_hdr ichdr; | ||
2079 | struct xfs_attr_leaf_entry *entry; | ||
2080 | struct xfs_attr_leaf_entry *entries; | ||
2081 | struct xfs_attr_leaf_name_local *name_loc; | ||
2082 | struct xfs_attr_leaf_name_remote *name_rmt; | ||
2083 | xfs_dahash_t hashval; | ||
2084 | int probe; | ||
2085 | int span; | ||
2086 | |||
2087 | trace_xfs_attr_leaf_lookup(args); | ||
2088 | |||
2089 | leaf = bp->b_addr; | ||
2090 | xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); | ||
2091 | entries = xfs_attr3_leaf_entryp(leaf); | ||
2092 | ASSERT(ichdr.count < args->geo->blksize / 8); | ||
2093 | |||
2094 | /* | ||
2095 | * Binary search. (note: small blocks will skip this loop) | ||
2096 | */ | ||
2097 | hashval = args->hashval; | ||
2098 | probe = span = ichdr.count / 2; | ||
2099 | for (entry = &entries[probe]; span > 4; entry = &entries[probe]) { | ||
2100 | span /= 2; | ||
2101 | if (be32_to_cpu(entry->hashval) < hashval) | ||
2102 | probe += span; | ||
2103 | else if (be32_to_cpu(entry->hashval) > hashval) | ||
2104 | probe -= span; | ||
2105 | else | ||
2106 | break; | ||
2107 | } | ||
2108 | ASSERT(probe >= 0 && (!ichdr.count || probe < ichdr.count)); | ||
2109 | ASSERT(span <= 4 || be32_to_cpu(entry->hashval) == hashval); | ||
2110 | |||
2111 | /* | ||
2112 | * Since we may have duplicate hashval's, find the first matching | ||
2113 | * hashval in the leaf. | ||
2114 | */ | ||
2115 | while (probe > 0 && be32_to_cpu(entry->hashval) >= hashval) { | ||
2116 | entry--; | ||
2117 | probe--; | ||
2118 | } | ||
2119 | while (probe < ichdr.count && | ||
2120 | be32_to_cpu(entry->hashval) < hashval) { | ||
2121 | entry++; | ||
2122 | probe++; | ||
2123 | } | ||
2124 | if (probe == ichdr.count || be32_to_cpu(entry->hashval) != hashval) { | ||
2125 | args->index = probe; | ||
2126 | return -ENOATTR; | ||
2127 | } | ||
2128 | |||
2129 | /* | ||
2130 | * Duplicate keys may be present, so search all of them for a match. | ||
2131 | */ | ||
2132 | for (; probe < ichdr.count && (be32_to_cpu(entry->hashval) == hashval); | ||
2133 | entry++, probe++) { | ||
2134 | /* | ||
2135 | * GROT: Add code to remove incomplete entries. | ||
2136 | */ | ||
2137 | /* | ||
2138 | * If we are looking for INCOMPLETE entries, show only those. | ||
2139 | * If we are looking for complete entries, show only those. | ||
2140 | */ | ||
2141 | if ((args->flags & XFS_ATTR_INCOMPLETE) != | ||
2142 | (entry->flags & XFS_ATTR_INCOMPLETE)) { | ||
2143 | continue; | ||
2144 | } | ||
2145 | if (entry->flags & XFS_ATTR_LOCAL) { | ||
2146 | name_loc = xfs_attr3_leaf_name_local(leaf, probe); | ||
2147 | if (name_loc->namelen != args->namelen) | ||
2148 | continue; | ||
2149 | if (memcmp(args->name, name_loc->nameval, | ||
2150 | args->namelen) != 0) | ||
2151 | continue; | ||
2152 | if (!xfs_attr_namesp_match(args->flags, entry->flags)) | ||
2153 | continue; | ||
2154 | args->index = probe; | ||
2155 | return -EEXIST; | ||
2156 | } else { | ||
2157 | name_rmt = xfs_attr3_leaf_name_remote(leaf, probe); | ||
2158 | if (name_rmt->namelen != args->namelen) | ||
2159 | continue; | ||
2160 | if (memcmp(args->name, name_rmt->name, | ||
2161 | args->namelen) != 0) | ||
2162 | continue; | ||
2163 | if (!xfs_attr_namesp_match(args->flags, entry->flags)) | ||
2164 | continue; | ||
2165 | args->index = probe; | ||
2166 | args->rmtvaluelen = be32_to_cpu(name_rmt->valuelen); | ||
2167 | args->rmtblkno = be32_to_cpu(name_rmt->valueblk); | ||
2168 | args->rmtblkcnt = xfs_attr3_rmt_blocks( | ||
2169 | args->dp->i_mount, | ||
2170 | args->rmtvaluelen); | ||
2171 | return -EEXIST; | ||
2172 | } | ||
2173 | } | ||
2174 | args->index = probe; | ||
2175 | return -ENOATTR; | ||
2176 | } | ||
2177 | |||
2178 | /* | ||
2179 | * Get the value associated with an attribute name from a leaf attribute | ||
2180 | * list structure. | ||
2181 | */ | ||
2182 | int | ||
2183 | xfs_attr3_leaf_getvalue( | ||
2184 | struct xfs_buf *bp, | ||
2185 | struct xfs_da_args *args) | ||
2186 | { | ||
2187 | struct xfs_attr_leafblock *leaf; | ||
2188 | struct xfs_attr3_icleaf_hdr ichdr; | ||
2189 | struct xfs_attr_leaf_entry *entry; | ||
2190 | struct xfs_attr_leaf_name_local *name_loc; | ||
2191 | struct xfs_attr_leaf_name_remote *name_rmt; | ||
2192 | int valuelen; | ||
2193 | |||
2194 | leaf = bp->b_addr; | ||
2195 | xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); | ||
2196 | ASSERT(ichdr.count < args->geo->blksize / 8); | ||
2197 | ASSERT(args->index < ichdr.count); | ||
2198 | |||
2199 | entry = &xfs_attr3_leaf_entryp(leaf)[args->index]; | ||
2200 | if (entry->flags & XFS_ATTR_LOCAL) { | ||
2201 | name_loc = xfs_attr3_leaf_name_local(leaf, args->index); | ||
2202 | ASSERT(name_loc->namelen == args->namelen); | ||
2203 | ASSERT(memcmp(args->name, name_loc->nameval, args->namelen) == 0); | ||
2204 | valuelen = be16_to_cpu(name_loc->valuelen); | ||
2205 | if (args->flags & ATTR_KERNOVAL) { | ||
2206 | args->valuelen = valuelen; | ||
2207 | return 0; | ||
2208 | } | ||
2209 | if (args->valuelen < valuelen) { | ||
2210 | args->valuelen = valuelen; | ||
2211 | return -ERANGE; | ||
2212 | } | ||
2213 | args->valuelen = valuelen; | ||
2214 | memcpy(args->value, &name_loc->nameval[args->namelen], valuelen); | ||
2215 | } else { | ||
2216 | name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index); | ||
2217 | ASSERT(name_rmt->namelen == args->namelen); | ||
2218 | ASSERT(memcmp(args->name, name_rmt->name, args->namelen) == 0); | ||
2219 | args->rmtvaluelen = be32_to_cpu(name_rmt->valuelen); | ||
2220 | args->rmtblkno = be32_to_cpu(name_rmt->valueblk); | ||
2221 | args->rmtblkcnt = xfs_attr3_rmt_blocks(args->dp->i_mount, | ||
2222 | args->rmtvaluelen); | ||
2223 | if (args->flags & ATTR_KERNOVAL) { | ||
2224 | args->valuelen = args->rmtvaluelen; | ||
2225 | return 0; | ||
2226 | } | ||
2227 | if (args->valuelen < args->rmtvaluelen) { | ||
2228 | args->valuelen = args->rmtvaluelen; | ||
2229 | return -ERANGE; | ||
2230 | } | ||
2231 | args->valuelen = args->rmtvaluelen; | ||
2232 | } | ||
2233 | return 0; | ||
2234 | } | ||
2235 | |||
2236 | /*======================================================================== | ||
2237 | * Utility routines. | ||
2238 | *========================================================================*/ | ||
2239 | |||
2240 | /* | ||
2241 | * Move the indicated entries from one leaf to another. | ||
2242 | * NOTE: this routine modifies both source and destination leaves. | ||
2243 | */ | ||
2244 | /*ARGSUSED*/ | ||
2245 | STATIC void | ||
2246 | xfs_attr3_leaf_moveents( | ||
2247 | struct xfs_da_args *args, | ||
2248 | struct xfs_attr_leafblock *leaf_s, | ||
2249 | struct xfs_attr3_icleaf_hdr *ichdr_s, | ||
2250 | int start_s, | ||
2251 | struct xfs_attr_leafblock *leaf_d, | ||
2252 | struct xfs_attr3_icleaf_hdr *ichdr_d, | ||
2253 | int start_d, | ||
2254 | int count) | ||
2255 | { | ||
2256 | struct xfs_attr_leaf_entry *entry_s; | ||
2257 | struct xfs_attr_leaf_entry *entry_d; | ||
2258 | int desti; | ||
2259 | int tmp; | ||
2260 | int i; | ||
2261 | |||
2262 | /* | ||
2263 | * Check for nothing to do. | ||
2264 | */ | ||
2265 | if (count == 0) | ||
2266 | return; | ||
2267 | |||
2268 | /* | ||
2269 | * Set up environment. | ||
2270 | */ | ||
2271 | ASSERT(ichdr_s->magic == XFS_ATTR_LEAF_MAGIC || | ||
2272 | ichdr_s->magic == XFS_ATTR3_LEAF_MAGIC); | ||
2273 | ASSERT(ichdr_s->magic == ichdr_d->magic); | ||
2274 | ASSERT(ichdr_s->count > 0 && ichdr_s->count < args->geo->blksize / 8); | ||
2275 | ASSERT(ichdr_s->firstused >= (ichdr_s->count * sizeof(*entry_s)) | ||
2276 | + xfs_attr3_leaf_hdr_size(leaf_s)); | ||
2277 | ASSERT(ichdr_d->count < args->geo->blksize / 8); | ||
2278 | ASSERT(ichdr_d->firstused >= (ichdr_d->count * sizeof(*entry_d)) | ||
2279 | + xfs_attr3_leaf_hdr_size(leaf_d)); | ||
2280 | |||
2281 | ASSERT(start_s < ichdr_s->count); | ||
2282 | ASSERT(start_d <= ichdr_d->count); | ||
2283 | ASSERT(count <= ichdr_s->count); | ||
2284 | |||
2285 | |||
2286 | /* | ||
2287 | * Move the entries in the destination leaf up to make a hole? | ||
2288 | */ | ||
2289 | if (start_d < ichdr_d->count) { | ||
2290 | tmp = ichdr_d->count - start_d; | ||
2291 | tmp *= sizeof(xfs_attr_leaf_entry_t); | ||
2292 | entry_s = &xfs_attr3_leaf_entryp(leaf_d)[start_d]; | ||
2293 | entry_d = &xfs_attr3_leaf_entryp(leaf_d)[start_d + count]; | ||
2294 | memmove(entry_d, entry_s, tmp); | ||
2295 | } | ||
2296 | |||
2297 | /* | ||
2298 | * Copy all entry's in the same (sorted) order, | ||
2299 | * but allocate attribute info packed and in sequence. | ||
2300 | */ | ||
2301 | entry_s = &xfs_attr3_leaf_entryp(leaf_s)[start_s]; | ||
2302 | entry_d = &xfs_attr3_leaf_entryp(leaf_d)[start_d]; | ||
2303 | desti = start_d; | ||
2304 | for (i = 0; i < count; entry_s++, entry_d++, desti++, i++) { | ||
2305 | ASSERT(be16_to_cpu(entry_s->nameidx) >= ichdr_s->firstused); | ||
2306 | tmp = xfs_attr_leaf_entsize(leaf_s, start_s + i); | ||
2307 | #ifdef GROT | ||
2308 | /* | ||
2309 | * Code to drop INCOMPLETE entries. Difficult to use as we | ||
2310 | * may also need to change the insertion index. Code turned | ||
2311 | * off for 6.2, should be revisited later. | ||
2312 | */ | ||
2313 | if (entry_s->flags & XFS_ATTR_INCOMPLETE) { /* skip partials? */ | ||
2314 | memset(xfs_attr3_leaf_name(leaf_s, start_s + i), 0, tmp); | ||
2315 | ichdr_s->usedbytes -= tmp; | ||
2316 | ichdr_s->count -= 1; | ||
2317 | entry_d--; /* to compensate for ++ in loop hdr */ | ||
2318 | desti--; | ||
2319 | if ((start_s + i) < offset) | ||
2320 | result++; /* insertion index adjustment */ | ||
2321 | } else { | ||
2322 | #endif /* GROT */ | ||
2323 | ichdr_d->firstused -= tmp; | ||
2324 | /* both on-disk, don't endian flip twice */ | ||
2325 | entry_d->hashval = entry_s->hashval; | ||
2326 | entry_d->nameidx = cpu_to_be16(ichdr_d->firstused); | ||
2327 | entry_d->flags = entry_s->flags; | ||
2328 | ASSERT(be16_to_cpu(entry_d->nameidx) + tmp | ||
2329 | <= args->geo->blksize); | ||
2330 | memmove(xfs_attr3_leaf_name(leaf_d, desti), | ||
2331 | xfs_attr3_leaf_name(leaf_s, start_s + i), tmp); | ||
2332 | ASSERT(be16_to_cpu(entry_s->nameidx) + tmp | ||
2333 | <= args->geo->blksize); | ||
2334 | memset(xfs_attr3_leaf_name(leaf_s, start_s + i), 0, tmp); | ||
2335 | ichdr_s->usedbytes -= tmp; | ||
2336 | ichdr_d->usedbytes += tmp; | ||
2337 | ichdr_s->count -= 1; | ||
2338 | ichdr_d->count += 1; | ||
2339 | tmp = ichdr_d->count * sizeof(xfs_attr_leaf_entry_t) | ||
2340 | + xfs_attr3_leaf_hdr_size(leaf_d); | ||
2341 | ASSERT(ichdr_d->firstused >= tmp); | ||
2342 | #ifdef GROT | ||
2343 | } | ||
2344 | #endif /* GROT */ | ||
2345 | } | ||
2346 | |||
2347 | /* | ||
2348 | * Zero out the entries we just copied. | ||
2349 | */ | ||
2350 | if (start_s == ichdr_s->count) { | ||
2351 | tmp = count * sizeof(xfs_attr_leaf_entry_t); | ||
2352 | entry_s = &xfs_attr3_leaf_entryp(leaf_s)[start_s]; | ||
2353 | ASSERT(((char *)entry_s + tmp) <= | ||
2354 | ((char *)leaf_s + args->geo->blksize)); | ||
2355 | memset(entry_s, 0, tmp); | ||
2356 | } else { | ||
2357 | /* | ||
2358 | * Move the remaining entries down to fill the hole, | ||
2359 | * then zero the entries at the top. | ||
2360 | */ | ||
2361 | tmp = (ichdr_s->count - count) * sizeof(xfs_attr_leaf_entry_t); | ||
2362 | entry_s = &xfs_attr3_leaf_entryp(leaf_s)[start_s + count]; | ||
2363 | entry_d = &xfs_attr3_leaf_entryp(leaf_s)[start_s]; | ||
2364 | memmove(entry_d, entry_s, tmp); | ||
2365 | |||
2366 | tmp = count * sizeof(xfs_attr_leaf_entry_t); | ||
2367 | entry_s = &xfs_attr3_leaf_entryp(leaf_s)[ichdr_s->count]; | ||
2368 | ASSERT(((char *)entry_s + tmp) <= | ||
2369 | ((char *)leaf_s + args->geo->blksize)); | ||
2370 | memset(entry_s, 0, tmp); | ||
2371 | } | ||
2372 | |||
2373 | /* | ||
2374 | * Fill in the freemap information | ||
2375 | */ | ||
2376 | ichdr_d->freemap[0].base = xfs_attr3_leaf_hdr_size(leaf_d); | ||
2377 | ichdr_d->freemap[0].base += ichdr_d->count * sizeof(xfs_attr_leaf_entry_t); | ||
2378 | ichdr_d->freemap[0].size = ichdr_d->firstused - ichdr_d->freemap[0].base; | ||
2379 | ichdr_d->freemap[1].base = 0; | ||
2380 | ichdr_d->freemap[2].base = 0; | ||
2381 | ichdr_d->freemap[1].size = 0; | ||
2382 | ichdr_d->freemap[2].size = 0; | ||
2383 | ichdr_s->holes = 1; /* leaf may not be compact */ | ||
2384 | } | ||
2385 | |||
2386 | /* | ||
2387 | * Pick up the last hashvalue from a leaf block. | ||
2388 | */ | ||
2389 | xfs_dahash_t | ||
2390 | xfs_attr_leaf_lasthash( | ||
2391 | struct xfs_buf *bp, | ||
2392 | int *count) | ||
2393 | { | ||
2394 | struct xfs_attr3_icleaf_hdr ichdr; | ||
2395 | struct xfs_attr_leaf_entry *entries; | ||
2396 | |||
2397 | xfs_attr3_leaf_hdr_from_disk(&ichdr, bp->b_addr); | ||
2398 | entries = xfs_attr3_leaf_entryp(bp->b_addr); | ||
2399 | if (count) | ||
2400 | *count = ichdr.count; | ||
2401 | if (!ichdr.count) | ||
2402 | return 0; | ||
2403 | return be32_to_cpu(entries[ichdr.count - 1].hashval); | ||
2404 | } | ||
2405 | |||
2406 | /* | ||
2407 | * Calculate the number of bytes used to store the indicated attribute | ||
2408 | * (whether local or remote only calculate bytes in this block). | ||
2409 | */ | ||
2410 | STATIC int | ||
2411 | xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index) | ||
2412 | { | ||
2413 | struct xfs_attr_leaf_entry *entries; | ||
2414 | xfs_attr_leaf_name_local_t *name_loc; | ||
2415 | xfs_attr_leaf_name_remote_t *name_rmt; | ||
2416 | int size; | ||
2417 | |||
2418 | entries = xfs_attr3_leaf_entryp(leaf); | ||
2419 | if (entries[index].flags & XFS_ATTR_LOCAL) { | ||
2420 | name_loc = xfs_attr3_leaf_name_local(leaf, index); | ||
2421 | size = xfs_attr_leaf_entsize_local(name_loc->namelen, | ||
2422 | be16_to_cpu(name_loc->valuelen)); | ||
2423 | } else { | ||
2424 | name_rmt = xfs_attr3_leaf_name_remote(leaf, index); | ||
2425 | size = xfs_attr_leaf_entsize_remote(name_rmt->namelen); | ||
2426 | } | ||
2427 | return size; | ||
2428 | } | ||
2429 | |||
2430 | /* | ||
2431 | * Calculate the number of bytes that would be required to store the new | ||
2432 | * attribute (whether local or remote only calculate bytes in this block). | ||
2433 | * This routine decides as a side effect whether the attribute will be | ||
2434 | * a "local" or a "remote" attribute. | ||
2435 | */ | ||
2436 | int | ||
2437 | xfs_attr_leaf_newentsize( | ||
2438 | struct xfs_da_args *args, | ||
2439 | int *local) | ||
2440 | { | ||
2441 | int size; | ||
2442 | |||
2443 | size = xfs_attr_leaf_entsize_local(args->namelen, args->valuelen); | ||
2444 | if (size < xfs_attr_leaf_entsize_local_max(args->geo->blksize)) { | ||
2445 | if (local) | ||
2446 | *local = 1; | ||
2447 | return size; | ||
2448 | } | ||
2449 | if (local) | ||
2450 | *local = 0; | ||
2451 | return xfs_attr_leaf_entsize_remote(args->namelen); | ||
2452 | } | ||
2453 | |||
2454 | |||
2455 | /*======================================================================== | ||
2456 | * Manage the INCOMPLETE flag in a leaf entry | ||
2457 | *========================================================================*/ | ||
2458 | |||
2459 | /* | ||
2460 | * Clear the INCOMPLETE flag on an entry in a leaf block. | ||
2461 | */ | ||
2462 | int | ||
2463 | xfs_attr3_leaf_clearflag( | ||
2464 | struct xfs_da_args *args) | ||
2465 | { | ||
2466 | struct xfs_attr_leafblock *leaf; | ||
2467 | struct xfs_attr_leaf_entry *entry; | ||
2468 | struct xfs_attr_leaf_name_remote *name_rmt; | ||
2469 | struct xfs_buf *bp; | ||
2470 | int error; | ||
2471 | #ifdef DEBUG | ||
2472 | struct xfs_attr3_icleaf_hdr ichdr; | ||
2473 | xfs_attr_leaf_name_local_t *name_loc; | ||
2474 | int namelen; | ||
2475 | char *name; | ||
2476 | #endif /* DEBUG */ | ||
2477 | |||
2478 | trace_xfs_attr_leaf_clearflag(args); | ||
2479 | /* | ||
2480 | * Set up the operation. | ||
2481 | */ | ||
2482 | error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); | ||
2483 | if (error) | ||
2484 | return error; | ||
2485 | |||
2486 | leaf = bp->b_addr; | ||
2487 | entry = &xfs_attr3_leaf_entryp(leaf)[args->index]; | ||
2488 | ASSERT(entry->flags & XFS_ATTR_INCOMPLETE); | ||
2489 | |||
2490 | #ifdef DEBUG | ||
2491 | xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); | ||
2492 | ASSERT(args->index < ichdr.count); | ||
2493 | ASSERT(args->index >= 0); | ||
2494 | |||
2495 | if (entry->flags & XFS_ATTR_LOCAL) { | ||
2496 | name_loc = xfs_attr3_leaf_name_local(leaf, args->index); | ||
2497 | namelen = name_loc->namelen; | ||
2498 | name = (char *)name_loc->nameval; | ||
2499 | } else { | ||
2500 | name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index); | ||
2501 | namelen = name_rmt->namelen; | ||
2502 | name = (char *)name_rmt->name; | ||
2503 | } | ||
2504 | ASSERT(be32_to_cpu(entry->hashval) == args->hashval); | ||
2505 | ASSERT(namelen == args->namelen); | ||
2506 | ASSERT(memcmp(name, args->name, namelen) == 0); | ||
2507 | #endif /* DEBUG */ | ||
2508 | |||
2509 | entry->flags &= ~XFS_ATTR_INCOMPLETE; | ||
2510 | xfs_trans_log_buf(args->trans, bp, | ||
2511 | XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry))); | ||
2512 | |||
2513 | if (args->rmtblkno) { | ||
2514 | ASSERT((entry->flags & XFS_ATTR_LOCAL) == 0); | ||
2515 | name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index); | ||
2516 | name_rmt->valueblk = cpu_to_be32(args->rmtblkno); | ||
2517 | name_rmt->valuelen = cpu_to_be32(args->rmtvaluelen); | ||
2518 | xfs_trans_log_buf(args->trans, bp, | ||
2519 | XFS_DA_LOGRANGE(leaf, name_rmt, sizeof(*name_rmt))); | ||
2520 | } | ||
2521 | |||
2522 | /* | ||
2523 | * Commit the flag value change and start the next trans in series. | ||
2524 | */ | ||
2525 | return xfs_trans_roll(&args->trans, args->dp); | ||
2526 | } | ||
2527 | |||
2528 | /* | ||
2529 | * Set the INCOMPLETE flag on an entry in a leaf block. | ||
2530 | */ | ||
2531 | int | ||
2532 | xfs_attr3_leaf_setflag( | ||
2533 | struct xfs_da_args *args) | ||
2534 | { | ||
2535 | struct xfs_attr_leafblock *leaf; | ||
2536 | struct xfs_attr_leaf_entry *entry; | ||
2537 | struct xfs_attr_leaf_name_remote *name_rmt; | ||
2538 | struct xfs_buf *bp; | ||
2539 | int error; | ||
2540 | #ifdef DEBUG | ||
2541 | struct xfs_attr3_icleaf_hdr ichdr; | ||
2542 | #endif | ||
2543 | |||
2544 | trace_xfs_attr_leaf_setflag(args); | ||
2545 | |||
2546 | /* | ||
2547 | * Set up the operation. | ||
2548 | */ | ||
2549 | error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); | ||
2550 | if (error) | ||
2551 | return error; | ||
2552 | |||
2553 | leaf = bp->b_addr; | ||
2554 | #ifdef DEBUG | ||
2555 | xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); | ||
2556 | ASSERT(args->index < ichdr.count); | ||
2557 | ASSERT(args->index >= 0); | ||
2558 | #endif | ||
2559 | entry = &xfs_attr3_leaf_entryp(leaf)[args->index]; | ||
2560 | |||
2561 | ASSERT((entry->flags & XFS_ATTR_INCOMPLETE) == 0); | ||
2562 | entry->flags |= XFS_ATTR_INCOMPLETE; | ||
2563 | xfs_trans_log_buf(args->trans, bp, | ||
2564 | XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry))); | ||
2565 | if ((entry->flags & XFS_ATTR_LOCAL) == 0) { | ||
2566 | name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index); | ||
2567 | name_rmt->valueblk = 0; | ||
2568 | name_rmt->valuelen = 0; | ||
2569 | xfs_trans_log_buf(args->trans, bp, | ||
2570 | XFS_DA_LOGRANGE(leaf, name_rmt, sizeof(*name_rmt))); | ||
2571 | } | ||
2572 | |||
2573 | /* | ||
2574 | * Commit the flag value change and start the next trans in series. | ||
2575 | */ | ||
2576 | return xfs_trans_roll(&args->trans, args->dp); | ||
2577 | } | ||
2578 | |||
2579 | /* | ||
2580 | * In a single transaction, clear the INCOMPLETE flag on the leaf entry | ||
2581 | * given by args->blkno/index and set the INCOMPLETE flag on the leaf | ||
2582 | * entry given by args->blkno2/index2. | ||
2583 | * | ||
2584 | * Note that they could be in different blocks, or in the same block. | ||
2585 | */ | ||
2586 | int | ||
2587 | xfs_attr3_leaf_flipflags( | ||
2588 | struct xfs_da_args *args) | ||
2589 | { | ||
2590 | struct xfs_attr_leafblock *leaf1; | ||
2591 | struct xfs_attr_leafblock *leaf2; | ||
2592 | struct xfs_attr_leaf_entry *entry1; | ||
2593 | struct xfs_attr_leaf_entry *entry2; | ||
2594 | struct xfs_attr_leaf_name_remote *name_rmt; | ||
2595 | struct xfs_buf *bp1; | ||
2596 | struct xfs_buf *bp2; | ||
2597 | int error; | ||
2598 | #ifdef DEBUG | ||
2599 | struct xfs_attr3_icleaf_hdr ichdr1; | ||
2600 | struct xfs_attr3_icleaf_hdr ichdr2; | ||
2601 | xfs_attr_leaf_name_local_t *name_loc; | ||
2602 | int namelen1, namelen2; | ||
2603 | char *name1, *name2; | ||
2604 | #endif /* DEBUG */ | ||
2605 | |||
2606 | trace_xfs_attr_leaf_flipflags(args); | ||
2607 | |||
2608 | /* | ||
2609 | * Read the block containing the "old" attr | ||
2610 | */ | ||
2611 | error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp1); | ||
2612 | if (error) | ||
2613 | return error; | ||
2614 | |||
2615 | /* | ||
2616 | * Read the block containing the "new" attr, if it is different | ||
2617 | */ | ||
2618 | if (args->blkno2 != args->blkno) { | ||
2619 | error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno2, | ||
2620 | -1, &bp2); | ||
2621 | if (error) | ||
2622 | return error; | ||
2623 | } else { | ||
2624 | bp2 = bp1; | ||
2625 | } | ||
2626 | |||
2627 | leaf1 = bp1->b_addr; | ||
2628 | entry1 = &xfs_attr3_leaf_entryp(leaf1)[args->index]; | ||
2629 | |||
2630 | leaf2 = bp2->b_addr; | ||
2631 | entry2 = &xfs_attr3_leaf_entryp(leaf2)[args->index2]; | ||
2632 | |||
2633 | #ifdef DEBUG | ||
2634 | xfs_attr3_leaf_hdr_from_disk(&ichdr1, leaf1); | ||
2635 | ASSERT(args->index < ichdr1.count); | ||
2636 | ASSERT(args->index >= 0); | ||
2637 | |||
2638 | xfs_attr3_leaf_hdr_from_disk(&ichdr2, leaf2); | ||
2639 | ASSERT(args->index2 < ichdr2.count); | ||
2640 | ASSERT(args->index2 >= 0); | ||
2641 | |||
2642 | if (entry1->flags & XFS_ATTR_LOCAL) { | ||
2643 | name_loc = xfs_attr3_leaf_name_local(leaf1, args->index); | ||
2644 | namelen1 = name_loc->namelen; | ||
2645 | name1 = (char *)name_loc->nameval; | ||
2646 | } else { | ||
2647 | name_rmt = xfs_attr3_leaf_name_remote(leaf1, args->index); | ||
2648 | namelen1 = name_rmt->namelen; | ||
2649 | name1 = (char *)name_rmt->name; | ||
2650 | } | ||
2651 | if (entry2->flags & XFS_ATTR_LOCAL) { | ||
2652 | name_loc = xfs_attr3_leaf_name_local(leaf2, args->index2); | ||
2653 | namelen2 = name_loc->namelen; | ||
2654 | name2 = (char *)name_loc->nameval; | ||
2655 | } else { | ||
2656 | name_rmt = xfs_attr3_leaf_name_remote(leaf2, args->index2); | ||
2657 | namelen2 = name_rmt->namelen; | ||
2658 | name2 = (char *)name_rmt->name; | ||
2659 | } | ||
2660 | ASSERT(be32_to_cpu(entry1->hashval) == be32_to_cpu(entry2->hashval)); | ||
2661 | ASSERT(namelen1 == namelen2); | ||
2662 | ASSERT(memcmp(name1, name2, namelen1) == 0); | ||
2663 | #endif /* DEBUG */ | ||
2664 | |||
2665 | ASSERT(entry1->flags & XFS_ATTR_INCOMPLETE); | ||
2666 | ASSERT((entry2->flags & XFS_ATTR_INCOMPLETE) == 0); | ||
2667 | |||
2668 | entry1->flags &= ~XFS_ATTR_INCOMPLETE; | ||
2669 | xfs_trans_log_buf(args->trans, bp1, | ||
2670 | XFS_DA_LOGRANGE(leaf1, entry1, sizeof(*entry1))); | ||
2671 | if (args->rmtblkno) { | ||
2672 | ASSERT((entry1->flags & XFS_ATTR_LOCAL) == 0); | ||
2673 | name_rmt = xfs_attr3_leaf_name_remote(leaf1, args->index); | ||
2674 | name_rmt->valueblk = cpu_to_be32(args->rmtblkno); | ||
2675 | name_rmt->valuelen = cpu_to_be32(args->rmtvaluelen); | ||
2676 | xfs_trans_log_buf(args->trans, bp1, | ||
2677 | XFS_DA_LOGRANGE(leaf1, name_rmt, sizeof(*name_rmt))); | ||
2678 | } | ||
2679 | |||
2680 | entry2->flags |= XFS_ATTR_INCOMPLETE; | ||
2681 | xfs_trans_log_buf(args->trans, bp2, | ||
2682 | XFS_DA_LOGRANGE(leaf2, entry2, sizeof(*entry2))); | ||
2683 | if ((entry2->flags & XFS_ATTR_LOCAL) == 0) { | ||
2684 | name_rmt = xfs_attr3_leaf_name_remote(leaf2, args->index2); | ||
2685 | name_rmt->valueblk = 0; | ||
2686 | name_rmt->valuelen = 0; | ||
2687 | xfs_trans_log_buf(args->trans, bp2, | ||
2688 | XFS_DA_LOGRANGE(leaf2, name_rmt, sizeof(*name_rmt))); | ||
2689 | } | ||
2690 | |||
2691 | /* | ||
2692 | * Commit the flag value change and start the next trans in series. | ||
2693 | */ | ||
2694 | error = xfs_trans_roll(&args->trans, args->dp); | ||
2695 | |||
2696 | return error; | ||
2697 | } | ||
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h new file mode 100644 index 000000000000..e2929da7c3ba --- /dev/null +++ b/fs/xfs/libxfs/xfs_attr_leaf.h | |||
@@ -0,0 +1,108 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000,2002-2003,2005 Silicon Graphics, Inc. | ||
3 | * Copyright (c) 2013 Red Hat, Inc. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License as | ||
8 | * published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope that it would be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program; if not, write the Free Software Foundation, | ||
17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
18 | */ | ||
19 | #ifndef __XFS_ATTR_LEAF_H__ | ||
20 | #define __XFS_ATTR_LEAF_H__ | ||
21 | |||
22 | struct attrlist; | ||
23 | struct attrlist_cursor_kern; | ||
24 | struct xfs_attr_list_context; | ||
25 | struct xfs_da_args; | ||
26 | struct xfs_da_state; | ||
27 | struct xfs_da_state_blk; | ||
28 | struct xfs_inode; | ||
29 | struct xfs_trans; | ||
30 | |||
31 | /* | ||
32 | * Used to keep a list of "remote value" extents when unlinking an inode. | ||
33 | */ | ||
34 | typedef struct xfs_attr_inactive_list { | ||
35 | xfs_dablk_t valueblk; /* block number of value bytes */ | ||
36 | int valuelen; /* number of bytes in value */ | ||
37 | } xfs_attr_inactive_list_t; | ||
38 | |||
39 | |||
40 | /*======================================================================== | ||
41 | * Function prototypes for the kernel. | ||
42 | *========================================================================*/ | ||
43 | |||
44 | /* | ||
45 | * Internal routines when attribute fork size < XFS_LITINO(mp). | ||
46 | */ | ||
47 | void xfs_attr_shortform_create(struct xfs_da_args *args); | ||
48 | void xfs_attr_shortform_add(struct xfs_da_args *args, int forkoff); | ||
49 | int xfs_attr_shortform_lookup(struct xfs_da_args *args); | ||
50 | int xfs_attr_shortform_getvalue(struct xfs_da_args *args); | ||
51 | int xfs_attr_shortform_to_leaf(struct xfs_da_args *args); | ||
52 | int xfs_attr_shortform_remove(struct xfs_da_args *args); | ||
53 | int xfs_attr_shortform_list(struct xfs_attr_list_context *context); | ||
54 | int xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp); | ||
55 | int xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes); | ||
56 | |||
57 | |||
58 | /* | ||
59 | * Internal routines when attribute fork size == XFS_LBSIZE(mp). | ||
60 | */ | ||
61 | int xfs_attr3_leaf_to_node(struct xfs_da_args *args); | ||
62 | int xfs_attr3_leaf_to_shortform(struct xfs_buf *bp, | ||
63 | struct xfs_da_args *args, int forkoff); | ||
64 | int xfs_attr3_leaf_clearflag(struct xfs_da_args *args); | ||
65 | int xfs_attr3_leaf_setflag(struct xfs_da_args *args); | ||
66 | int xfs_attr3_leaf_flipflags(struct xfs_da_args *args); | ||
67 | |||
68 | /* | ||
69 | * Routines used for growing the Btree. | ||
70 | */ | ||
71 | int xfs_attr3_leaf_split(struct xfs_da_state *state, | ||
72 | struct xfs_da_state_blk *oldblk, | ||
73 | struct xfs_da_state_blk *newblk); | ||
74 | int xfs_attr3_leaf_lookup_int(struct xfs_buf *leaf, | ||
75 | struct xfs_da_args *args); | ||
76 | int xfs_attr3_leaf_getvalue(struct xfs_buf *bp, struct xfs_da_args *args); | ||
77 | int xfs_attr3_leaf_add(struct xfs_buf *leaf_buffer, | ||
78 | struct xfs_da_args *args); | ||
79 | int xfs_attr3_leaf_remove(struct xfs_buf *leaf_buffer, | ||
80 | struct xfs_da_args *args); | ||
81 | int xfs_attr3_leaf_list_int(struct xfs_buf *bp, | ||
82 | struct xfs_attr_list_context *context); | ||
83 | |||
84 | /* | ||
85 | * Routines used for shrinking the Btree. | ||
86 | */ | ||
87 | int xfs_attr3_leaf_toosmall(struct xfs_da_state *state, int *retval); | ||
88 | void xfs_attr3_leaf_unbalance(struct xfs_da_state *state, | ||
89 | struct xfs_da_state_blk *drop_blk, | ||
90 | struct xfs_da_state_blk *save_blk); | ||
91 | int xfs_attr3_root_inactive(struct xfs_trans **trans, struct xfs_inode *dp); | ||
92 | |||
93 | /* | ||
94 | * Utility routines. | ||
95 | */ | ||
96 | xfs_dahash_t xfs_attr_leaf_lasthash(struct xfs_buf *bp, int *count); | ||
97 | int xfs_attr_leaf_order(struct xfs_buf *leaf1_bp, | ||
98 | struct xfs_buf *leaf2_bp); | ||
99 | int xfs_attr_leaf_newentsize(struct xfs_da_args *args, int *local); | ||
100 | int xfs_attr3_leaf_read(struct xfs_trans *tp, struct xfs_inode *dp, | ||
101 | xfs_dablk_t bno, xfs_daddr_t mappedbno, | ||
102 | struct xfs_buf **bpp); | ||
103 | void xfs_attr3_leaf_hdr_from_disk(struct xfs_attr3_icleaf_hdr *to, | ||
104 | struct xfs_attr_leafblock *from); | ||
105 | void xfs_attr3_leaf_hdr_to_disk(struct xfs_attr_leafblock *to, | ||
106 | struct xfs_attr3_icleaf_hdr *from); | ||
107 | |||
108 | #endif /* __XFS_ATTR_LEAF_H__ */ | ||
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c new file mode 100644 index 000000000000..7510ab8058a4 --- /dev/null +++ b/fs/xfs/libxfs/xfs_attr_remote.c | |||
@@ -0,0 +1,628 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. | ||
3 | * Copyright (c) 2013 Red Hat, Inc. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License as | ||
8 | * published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope that it would be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program; if not, write the Free Software Foundation, | ||
17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
18 | */ | ||
19 | #include "xfs.h" | ||
20 | #include "xfs_fs.h" | ||
21 | #include "xfs_shared.h" | ||
22 | #include "xfs_format.h" | ||
23 | #include "xfs_log_format.h" | ||
24 | #include "xfs_trans_resv.h" | ||
25 | #include "xfs_bit.h" | ||
26 | #include "xfs_sb.h" | ||
27 | #include "xfs_ag.h" | ||
28 | #include "xfs_mount.h" | ||
29 | #include "xfs_da_format.h" | ||
30 | #include "xfs_da_btree.h" | ||
31 | #include "xfs_inode.h" | ||
32 | #include "xfs_alloc.h" | ||
33 | #include "xfs_trans.h" | ||
34 | #include "xfs_inode_item.h" | ||
35 | #include "xfs_bmap.h" | ||
36 | #include "xfs_bmap_util.h" | ||
37 | #include "xfs_attr.h" | ||
38 | #include "xfs_attr_leaf.h" | ||
39 | #include "xfs_attr_remote.h" | ||
40 | #include "xfs_trans_space.h" | ||
41 | #include "xfs_trace.h" | ||
42 | #include "xfs_cksum.h" | ||
43 | #include "xfs_buf_item.h" | ||
44 | #include "xfs_error.h" | ||
45 | |||
46 | #define ATTR_RMTVALUE_MAPSIZE 1 /* # of map entries at once */ | ||
47 | |||
48 | /* | ||
49 | * Each contiguous block has a header, so it is not just a simple attribute | ||
50 | * length to FSB conversion. | ||
51 | */ | ||
52 | int | ||
53 | xfs_attr3_rmt_blocks( | ||
54 | struct xfs_mount *mp, | ||
55 | int attrlen) | ||
56 | { | ||
57 | if (xfs_sb_version_hascrc(&mp->m_sb)) { | ||
58 | int buflen = XFS_ATTR3_RMT_BUF_SPACE(mp, mp->m_sb.sb_blocksize); | ||
59 | return (attrlen + buflen - 1) / buflen; | ||
60 | } | ||
61 | return XFS_B_TO_FSB(mp, attrlen); | ||
62 | } | ||
63 | |||
64 | /* | ||
65 | * Checking of the remote attribute header is split into two parts. The verifier | ||
66 | * does CRC, location and bounds checking, the unpacking function checks the | ||
67 | * attribute parameters and owner. | ||
68 | */ | ||
69 | static bool | ||
70 | xfs_attr3_rmt_hdr_ok( | ||
71 | void *ptr, | ||
72 | xfs_ino_t ino, | ||
73 | uint32_t offset, | ||
74 | uint32_t size, | ||
75 | xfs_daddr_t bno) | ||
76 | { | ||
77 | struct xfs_attr3_rmt_hdr *rmt = ptr; | ||
78 | |||
79 | if (bno != be64_to_cpu(rmt->rm_blkno)) | ||
80 | return false; | ||
81 | if (offset != be32_to_cpu(rmt->rm_offset)) | ||
82 | return false; | ||
83 | if (size != be32_to_cpu(rmt->rm_bytes)) | ||
84 | return false; | ||
85 | if (ino != be64_to_cpu(rmt->rm_owner)) | ||
86 | return false; | ||
87 | |||
88 | /* ok */ | ||
89 | return true; | ||
90 | } | ||
91 | |||
92 | static bool | ||
93 | xfs_attr3_rmt_verify( | ||
94 | struct xfs_mount *mp, | ||
95 | void *ptr, | ||
96 | int fsbsize, | ||
97 | xfs_daddr_t bno) | ||
98 | { | ||
99 | struct xfs_attr3_rmt_hdr *rmt = ptr; | ||
100 | |||
101 | if (!xfs_sb_version_hascrc(&mp->m_sb)) | ||
102 | return false; | ||
103 | if (rmt->rm_magic != cpu_to_be32(XFS_ATTR3_RMT_MAGIC)) | ||
104 | return false; | ||
105 | if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_uuid)) | ||
106 | return false; | ||
107 | if (be64_to_cpu(rmt->rm_blkno) != bno) | ||
108 | return false; | ||
109 | if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt)) | ||
110 | return false; | ||
111 | if (be32_to_cpu(rmt->rm_offset) + | ||
112 | be32_to_cpu(rmt->rm_bytes) > XATTR_SIZE_MAX) | ||
113 | return false; | ||
114 | if (rmt->rm_owner == 0) | ||
115 | return false; | ||
116 | |||
117 | return true; | ||
118 | } | ||
119 | |||
120 | static void | ||
121 | xfs_attr3_rmt_read_verify( | ||
122 | struct xfs_buf *bp) | ||
123 | { | ||
124 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
125 | char *ptr; | ||
126 | int len; | ||
127 | xfs_daddr_t bno; | ||
128 | int blksize = mp->m_attr_geo->blksize; | ||
129 | |||
130 | /* no verification of non-crc buffers */ | ||
131 | if (!xfs_sb_version_hascrc(&mp->m_sb)) | ||
132 | return; | ||
133 | |||
134 | ptr = bp->b_addr; | ||
135 | bno = bp->b_bn; | ||
136 | len = BBTOB(bp->b_length); | ||
137 | ASSERT(len >= blksize); | ||
138 | |||
139 | while (len > 0) { | ||
140 | if (!xfs_verify_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF)) { | ||
141 | xfs_buf_ioerror(bp, -EFSBADCRC); | ||
142 | break; | ||
143 | } | ||
144 | if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) { | ||
145 | xfs_buf_ioerror(bp, -EFSCORRUPTED); | ||
146 | break; | ||
147 | } | ||
148 | len -= blksize; | ||
149 | ptr += blksize; | ||
150 | bno += BTOBB(blksize); | ||
151 | } | ||
152 | |||
153 | if (bp->b_error) | ||
154 | xfs_verifier_error(bp); | ||
155 | else | ||
156 | ASSERT(len == 0); | ||
157 | } | ||
158 | |||
159 | static void | ||
160 | xfs_attr3_rmt_write_verify( | ||
161 | struct xfs_buf *bp) | ||
162 | { | ||
163 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
164 | struct xfs_buf_log_item *bip = bp->b_fspriv; | ||
165 | char *ptr; | ||
166 | int len; | ||
167 | xfs_daddr_t bno; | ||
168 | int blksize = mp->m_attr_geo->blksize; | ||
169 | |||
170 | /* no verification of non-crc buffers */ | ||
171 | if (!xfs_sb_version_hascrc(&mp->m_sb)) | ||
172 | return; | ||
173 | |||
174 | ptr = bp->b_addr; | ||
175 | bno = bp->b_bn; | ||
176 | len = BBTOB(bp->b_length); | ||
177 | ASSERT(len >= blksize); | ||
178 | |||
179 | while (len > 0) { | ||
180 | if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) { | ||
181 | xfs_buf_ioerror(bp, -EFSCORRUPTED); | ||
182 | xfs_verifier_error(bp); | ||
183 | return; | ||
184 | } | ||
185 | if (bip) { | ||
186 | struct xfs_attr3_rmt_hdr *rmt; | ||
187 | |||
188 | rmt = (struct xfs_attr3_rmt_hdr *)ptr; | ||
189 | rmt->rm_lsn = cpu_to_be64(bip->bli_item.li_lsn); | ||
190 | } | ||
191 | xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF); | ||
192 | |||
193 | len -= blksize; | ||
194 | ptr += blksize; | ||
195 | bno += BTOBB(blksize); | ||
196 | } | ||
197 | ASSERT(len == 0); | ||
198 | } | ||
199 | |||
200 | const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = { | ||
201 | .verify_read = xfs_attr3_rmt_read_verify, | ||
202 | .verify_write = xfs_attr3_rmt_write_verify, | ||
203 | }; | ||
204 | |||
205 | STATIC int | ||
206 | xfs_attr3_rmt_hdr_set( | ||
207 | struct xfs_mount *mp, | ||
208 | void *ptr, | ||
209 | xfs_ino_t ino, | ||
210 | uint32_t offset, | ||
211 | uint32_t size, | ||
212 | xfs_daddr_t bno) | ||
213 | { | ||
214 | struct xfs_attr3_rmt_hdr *rmt = ptr; | ||
215 | |||
216 | if (!xfs_sb_version_hascrc(&mp->m_sb)) | ||
217 | return 0; | ||
218 | |||
219 | rmt->rm_magic = cpu_to_be32(XFS_ATTR3_RMT_MAGIC); | ||
220 | rmt->rm_offset = cpu_to_be32(offset); | ||
221 | rmt->rm_bytes = cpu_to_be32(size); | ||
222 | uuid_copy(&rmt->rm_uuid, &mp->m_sb.sb_uuid); | ||
223 | rmt->rm_owner = cpu_to_be64(ino); | ||
224 | rmt->rm_blkno = cpu_to_be64(bno); | ||
225 | |||
226 | return sizeof(struct xfs_attr3_rmt_hdr); | ||
227 | } | ||
228 | |||
229 | /* | ||
230 | * Helper functions to copy attribute data in and out of the one disk extents | ||
231 | */ | ||
232 | STATIC int | ||
233 | xfs_attr_rmtval_copyout( | ||
234 | struct xfs_mount *mp, | ||
235 | struct xfs_buf *bp, | ||
236 | xfs_ino_t ino, | ||
237 | int *offset, | ||
238 | int *valuelen, | ||
239 | __uint8_t **dst) | ||
240 | { | ||
241 | char *src = bp->b_addr; | ||
242 | xfs_daddr_t bno = bp->b_bn; | ||
243 | int len = BBTOB(bp->b_length); | ||
244 | int blksize = mp->m_attr_geo->blksize; | ||
245 | |||
246 | ASSERT(len >= blksize); | ||
247 | |||
248 | while (len > 0 && *valuelen > 0) { | ||
249 | int hdr_size = 0; | ||
250 | int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize); | ||
251 | |||
252 | byte_cnt = min(*valuelen, byte_cnt); | ||
253 | |||
254 | if (xfs_sb_version_hascrc(&mp->m_sb)) { | ||
255 | if (!xfs_attr3_rmt_hdr_ok(src, ino, *offset, | ||
256 | byte_cnt, bno)) { | ||
257 | xfs_alert(mp, | ||
258 | "remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)", | ||
259 | bno, *offset, byte_cnt, ino); | ||
260 | return -EFSCORRUPTED; | ||
261 | } | ||
262 | hdr_size = sizeof(struct xfs_attr3_rmt_hdr); | ||
263 | } | ||
264 | |||
265 | memcpy(*dst, src + hdr_size, byte_cnt); | ||
266 | |||
267 | /* roll buffer forwards */ | ||
268 | len -= blksize; | ||
269 | src += blksize; | ||
270 | bno += BTOBB(blksize); | ||
271 | |||
272 | /* roll attribute data forwards */ | ||
273 | *valuelen -= byte_cnt; | ||
274 | *dst += byte_cnt; | ||
275 | *offset += byte_cnt; | ||
276 | } | ||
277 | return 0; | ||
278 | } | ||
279 | |||
280 | STATIC void | ||
281 | xfs_attr_rmtval_copyin( | ||
282 | struct xfs_mount *mp, | ||
283 | struct xfs_buf *bp, | ||
284 | xfs_ino_t ino, | ||
285 | int *offset, | ||
286 | int *valuelen, | ||
287 | __uint8_t **src) | ||
288 | { | ||
289 | char *dst = bp->b_addr; | ||
290 | xfs_daddr_t bno = bp->b_bn; | ||
291 | int len = BBTOB(bp->b_length); | ||
292 | int blksize = mp->m_attr_geo->blksize; | ||
293 | |||
294 | ASSERT(len >= blksize); | ||
295 | |||
296 | while (len > 0 && *valuelen > 0) { | ||
297 | int hdr_size; | ||
298 | int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize); | ||
299 | |||
300 | byte_cnt = min(*valuelen, byte_cnt); | ||
301 | hdr_size = xfs_attr3_rmt_hdr_set(mp, dst, ino, *offset, | ||
302 | byte_cnt, bno); | ||
303 | |||
304 | memcpy(dst + hdr_size, *src, byte_cnt); | ||
305 | |||
306 | /* | ||
307 | * If this is the last block, zero the remainder of it. | ||
308 | * Check that we are actually the last block, too. | ||
309 | */ | ||
310 | if (byte_cnt + hdr_size < blksize) { | ||
311 | ASSERT(*valuelen - byte_cnt == 0); | ||
312 | ASSERT(len == blksize); | ||
313 | memset(dst + hdr_size + byte_cnt, 0, | ||
314 | blksize - hdr_size - byte_cnt); | ||
315 | } | ||
316 | |||
317 | /* roll buffer forwards */ | ||
318 | len -= blksize; | ||
319 | dst += blksize; | ||
320 | bno += BTOBB(blksize); | ||
321 | |||
322 | /* roll attribute data forwards */ | ||
323 | *valuelen -= byte_cnt; | ||
324 | *src += byte_cnt; | ||
325 | *offset += byte_cnt; | ||
326 | } | ||
327 | } | ||
328 | |||
329 | /* | ||
330 | * Read the value associated with an attribute from the out-of-line buffer | ||
331 | * that we stored it in. | ||
332 | */ | ||
333 | int | ||
334 | xfs_attr_rmtval_get( | ||
335 | struct xfs_da_args *args) | ||
336 | { | ||
337 | struct xfs_bmbt_irec map[ATTR_RMTVALUE_MAPSIZE]; | ||
338 | struct xfs_mount *mp = args->dp->i_mount; | ||
339 | struct xfs_buf *bp; | ||
340 | xfs_dablk_t lblkno = args->rmtblkno; | ||
341 | __uint8_t *dst = args->value; | ||
342 | int valuelen; | ||
343 | int nmap; | ||
344 | int error; | ||
345 | int blkcnt = args->rmtblkcnt; | ||
346 | int i; | ||
347 | int offset = 0; | ||
348 | |||
349 | trace_xfs_attr_rmtval_get(args); | ||
350 | |||
351 | ASSERT(!(args->flags & ATTR_KERNOVAL)); | ||
352 | ASSERT(args->rmtvaluelen == args->valuelen); | ||
353 | |||
354 | valuelen = args->rmtvaluelen; | ||
355 | while (valuelen > 0) { | ||
356 | nmap = ATTR_RMTVALUE_MAPSIZE; | ||
357 | error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno, | ||
358 | blkcnt, map, &nmap, | ||
359 | XFS_BMAPI_ATTRFORK); | ||
360 | if (error) | ||
361 | return error; | ||
362 | ASSERT(nmap >= 1); | ||
363 | |||
364 | for (i = 0; (i < nmap) && (valuelen > 0); i++) { | ||
365 | xfs_daddr_t dblkno; | ||
366 | int dblkcnt; | ||
367 | |||
368 | ASSERT((map[i].br_startblock != DELAYSTARTBLOCK) && | ||
369 | (map[i].br_startblock != HOLESTARTBLOCK)); | ||
370 | dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock); | ||
371 | dblkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount); | ||
372 | error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, | ||
373 | dblkno, dblkcnt, 0, &bp, | ||
374 | &xfs_attr3_rmt_buf_ops); | ||
375 | if (error) | ||
376 | return error; | ||
377 | |||
378 | error = xfs_attr_rmtval_copyout(mp, bp, args->dp->i_ino, | ||
379 | &offset, &valuelen, | ||
380 | &dst); | ||
381 | xfs_buf_relse(bp); | ||
382 | if (error) | ||
383 | return error; | ||
384 | |||
385 | /* roll attribute extent map forwards */ | ||
386 | lblkno += map[i].br_blockcount; | ||
387 | blkcnt -= map[i].br_blockcount; | ||
388 | } | ||
389 | } | ||
390 | ASSERT(valuelen == 0); | ||
391 | return 0; | ||
392 | } | ||
393 | |||
394 | /* | ||
395 | * Write the value associated with an attribute into the out-of-line buffer | ||
396 | * that we have defined for it. | ||
397 | */ | ||
398 | int | ||
399 | xfs_attr_rmtval_set( | ||
400 | struct xfs_da_args *args) | ||
401 | { | ||
402 | struct xfs_inode *dp = args->dp; | ||
403 | struct xfs_mount *mp = dp->i_mount; | ||
404 | struct xfs_bmbt_irec map; | ||
405 | xfs_dablk_t lblkno; | ||
406 | xfs_fileoff_t lfileoff = 0; | ||
407 | __uint8_t *src = args->value; | ||
408 | int blkcnt; | ||
409 | int valuelen; | ||
410 | int nmap; | ||
411 | int error; | ||
412 | int offset = 0; | ||
413 | |||
414 | trace_xfs_attr_rmtval_set(args); | ||
415 | |||
416 | /* | ||
417 | * Find a "hole" in the attribute address space large enough for | ||
418 | * us to drop the new attribute's value into. Because CRC enable | ||
419 | * attributes have headers, we can't just do a straight byte to FSB | ||
420 | * conversion and have to take the header space into account. | ||
421 | */ | ||
422 | blkcnt = xfs_attr3_rmt_blocks(mp, args->rmtvaluelen); | ||
423 | error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff, | ||
424 | XFS_ATTR_FORK); | ||
425 | if (error) | ||
426 | return error; | ||
427 | |||
428 | args->rmtblkno = lblkno = (xfs_dablk_t)lfileoff; | ||
429 | args->rmtblkcnt = blkcnt; | ||
430 | |||
431 | /* | ||
432 | * Roll through the "value", allocating blocks on disk as required. | ||
433 | */ | ||
434 | while (blkcnt > 0) { | ||
435 | int committed; | ||
436 | |||
437 | /* | ||
438 | * Allocate a single extent, up to the size of the value. | ||
439 | */ | ||
440 | xfs_bmap_init(args->flist, args->firstblock); | ||
441 | nmap = 1; | ||
442 | error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno, | ||
443 | blkcnt, | ||
444 | XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, | ||
445 | args->firstblock, args->total, &map, &nmap, | ||
446 | args->flist); | ||
447 | if (!error) { | ||
448 | error = xfs_bmap_finish(&args->trans, args->flist, | ||
449 | &committed); | ||
450 | } | ||
451 | if (error) { | ||
452 | ASSERT(committed); | ||
453 | args->trans = NULL; | ||
454 | xfs_bmap_cancel(args->flist); | ||
455 | return error; | ||
456 | } | ||
457 | |||
458 | /* | ||
459 | * bmap_finish() may have committed the last trans and started | ||
460 | * a new one. We need the inode to be in all transactions. | ||
461 | */ | ||
462 | if (committed) | ||
463 | xfs_trans_ijoin(args->trans, dp, 0); | ||
464 | |||
465 | ASSERT(nmap == 1); | ||
466 | ASSERT((map.br_startblock != DELAYSTARTBLOCK) && | ||
467 | (map.br_startblock != HOLESTARTBLOCK)); | ||
468 | lblkno += map.br_blockcount; | ||
469 | blkcnt -= map.br_blockcount; | ||
470 | |||
471 | /* | ||
472 | * Start the next trans in the chain. | ||
473 | */ | ||
474 | error = xfs_trans_roll(&args->trans, dp); | ||
475 | if (error) | ||
476 | return error; | ||
477 | } | ||
478 | |||
479 | /* | ||
480 | * Roll through the "value", copying the attribute value to the | ||
481 | * already-allocated blocks. Blocks are written synchronously | ||
482 | * so that we can know they are all on disk before we turn off | ||
483 | * the INCOMPLETE flag. | ||
484 | */ | ||
485 | lblkno = args->rmtblkno; | ||
486 | blkcnt = args->rmtblkcnt; | ||
487 | valuelen = args->rmtvaluelen; | ||
488 | while (valuelen > 0) { | ||
489 | struct xfs_buf *bp; | ||
490 | xfs_daddr_t dblkno; | ||
491 | int dblkcnt; | ||
492 | |||
493 | ASSERT(blkcnt > 0); | ||
494 | |||
495 | xfs_bmap_init(args->flist, args->firstblock); | ||
496 | nmap = 1; | ||
497 | error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno, | ||
498 | blkcnt, &map, &nmap, | ||
499 | XFS_BMAPI_ATTRFORK); | ||
500 | if (error) | ||
501 | return error; | ||
502 | ASSERT(nmap == 1); | ||
503 | ASSERT((map.br_startblock != DELAYSTARTBLOCK) && | ||
504 | (map.br_startblock != HOLESTARTBLOCK)); | ||
505 | |||
506 | dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock), | ||
507 | dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); | ||
508 | |||
509 | bp = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt, 0); | ||
510 | if (!bp) | ||
511 | return -ENOMEM; | ||
512 | bp->b_ops = &xfs_attr3_rmt_buf_ops; | ||
513 | |||
514 | xfs_attr_rmtval_copyin(mp, bp, args->dp->i_ino, &offset, | ||
515 | &valuelen, &src); | ||
516 | |||
517 | error = xfs_bwrite(bp); /* GROT: NOTE: synchronous write */ | ||
518 | xfs_buf_relse(bp); | ||
519 | if (error) | ||
520 | return error; | ||
521 | |||
522 | |||
523 | /* roll attribute extent map forwards */ | ||
524 | lblkno += map.br_blockcount; | ||
525 | blkcnt -= map.br_blockcount; | ||
526 | } | ||
527 | ASSERT(valuelen == 0); | ||
528 | return 0; | ||
529 | } | ||
530 | |||
531 | /* | ||
532 | * Remove the value associated with an attribute by deleting the | ||
533 | * out-of-line buffer that it is stored on. | ||
534 | */ | ||
535 | int | ||
536 | xfs_attr_rmtval_remove( | ||
537 | struct xfs_da_args *args) | ||
538 | { | ||
539 | struct xfs_mount *mp = args->dp->i_mount; | ||
540 | xfs_dablk_t lblkno; | ||
541 | int blkcnt; | ||
542 | int error; | ||
543 | int done; | ||
544 | |||
545 | trace_xfs_attr_rmtval_remove(args); | ||
546 | |||
547 | /* | ||
548 | * Roll through the "value", invalidating the attribute value's blocks. | ||
549 | */ | ||
550 | lblkno = args->rmtblkno; | ||
551 | blkcnt = args->rmtblkcnt; | ||
552 | while (blkcnt > 0) { | ||
553 | struct xfs_bmbt_irec map; | ||
554 | struct xfs_buf *bp; | ||
555 | xfs_daddr_t dblkno; | ||
556 | int dblkcnt; | ||
557 | int nmap; | ||
558 | |||
559 | /* | ||
560 | * Try to remember where we decided to put the value. | ||
561 | */ | ||
562 | nmap = 1; | ||
563 | error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno, | ||
564 | blkcnt, &map, &nmap, XFS_BMAPI_ATTRFORK); | ||
565 | if (error) | ||
566 | return error; | ||
567 | ASSERT(nmap == 1); | ||
568 | ASSERT((map.br_startblock != DELAYSTARTBLOCK) && | ||
569 | (map.br_startblock != HOLESTARTBLOCK)); | ||
570 | |||
571 | dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock), | ||
572 | dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); | ||
573 | |||
574 | /* | ||
575 | * If the "remote" value is in the cache, remove it. | ||
576 | */ | ||
577 | bp = xfs_incore(mp->m_ddev_targp, dblkno, dblkcnt, XBF_TRYLOCK); | ||
578 | if (bp) { | ||
579 | xfs_buf_stale(bp); | ||
580 | xfs_buf_relse(bp); | ||
581 | bp = NULL; | ||
582 | } | ||
583 | |||
584 | lblkno += map.br_blockcount; | ||
585 | blkcnt -= map.br_blockcount; | ||
586 | } | ||
587 | |||
588 | /* | ||
589 | * Keep de-allocating extents until the remote-value region is gone. | ||
590 | */ | ||
591 | lblkno = args->rmtblkno; | ||
592 | blkcnt = args->rmtblkcnt; | ||
593 | done = 0; | ||
594 | while (!done) { | ||
595 | int committed; | ||
596 | |||
597 | xfs_bmap_init(args->flist, args->firstblock); | ||
598 | error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt, | ||
599 | XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, | ||
600 | 1, args->firstblock, args->flist, | ||
601 | &done); | ||
602 | if (!error) { | ||
603 | error = xfs_bmap_finish(&args->trans, args->flist, | ||
604 | &committed); | ||
605 | } | ||
606 | if (error) { | ||
607 | ASSERT(committed); | ||
608 | args->trans = NULL; | ||
609 | xfs_bmap_cancel(args->flist); | ||
610 | return error; | ||
611 | } | ||
612 | |||
613 | /* | ||
614 | * bmap_finish() may have committed the last trans and started | ||
615 | * a new one. We need the inode to be in all transactions. | ||
616 | */ | ||
617 | if (committed) | ||
618 | xfs_trans_ijoin(args->trans, args->dp, 0); | ||
619 | |||
620 | /* | ||
621 | * Close out trans and start the next one in the chain. | ||
622 | */ | ||
623 | error = xfs_trans_roll(&args->trans, args->dp); | ||
624 | if (error) | ||
625 | return error; | ||
626 | } | ||
627 | return 0; | ||
628 | } | ||
diff --git a/fs/xfs/libxfs/xfs_attr_remote.h b/fs/xfs/libxfs/xfs_attr_remote.h new file mode 100644 index 000000000000..5a9acfa156d7 --- /dev/null +++ b/fs/xfs/libxfs/xfs_attr_remote.h | |||
@@ -0,0 +1,27 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2013 Red Hat, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_ATTR_REMOTE_H__ | ||
19 | #define __XFS_ATTR_REMOTE_H__ | ||
20 | |||
21 | int xfs_attr3_rmt_blocks(struct xfs_mount *mp, int attrlen); | ||
22 | |||
23 | int xfs_attr_rmtval_get(struct xfs_da_args *args); | ||
24 | int xfs_attr_rmtval_set(struct xfs_da_args *args); | ||
25 | int xfs_attr_rmtval_remove(struct xfs_da_args *args); | ||
26 | |||
27 | #endif /* __XFS_ATTR_REMOTE_H__ */ | ||
diff --git a/fs/xfs/libxfs/xfs_attr_sf.h b/fs/xfs/libxfs/xfs_attr_sf.h new file mode 100644 index 000000000000..919756e3ba53 --- /dev/null +++ b/fs/xfs/libxfs/xfs_attr_sf.h | |||
@@ -0,0 +1,70 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000,2002,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_ATTR_SF_H__ | ||
19 | #define __XFS_ATTR_SF_H__ | ||
20 | |||
21 | /* | ||
22 | * Attribute storage when stored inside the inode. | ||
23 | * | ||
24 | * Small attribute lists are packed as tightly as possible so as | ||
25 | * to fit into the literal area of the inode. | ||
26 | */ | ||
27 | |||
28 | /* | ||
29 | * Entries are packed toward the top as tight as possible. | ||
30 | */ | ||
31 | typedef struct xfs_attr_shortform { | ||
32 | struct xfs_attr_sf_hdr { /* constant-structure header block */ | ||
33 | __be16 totsize; /* total bytes in shortform list */ | ||
34 | __u8 count; /* count of active entries */ | ||
35 | } hdr; | ||
36 | struct xfs_attr_sf_entry { | ||
37 | __uint8_t namelen; /* actual length of name (no NULL) */ | ||
38 | __uint8_t valuelen; /* actual length of value (no NULL) */ | ||
39 | __uint8_t flags; /* flags bits (see xfs_attr_leaf.h) */ | ||
40 | __uint8_t nameval[1]; /* name & value bytes concatenated */ | ||
41 | } list[1]; /* variable sized array */ | ||
42 | } xfs_attr_shortform_t; | ||
43 | typedef struct xfs_attr_sf_hdr xfs_attr_sf_hdr_t; | ||
44 | typedef struct xfs_attr_sf_entry xfs_attr_sf_entry_t; | ||
45 | |||
46 | /* | ||
47 | * We generate this then sort it, attr_list() must return things in hash-order. | ||
48 | */ | ||
49 | typedef struct xfs_attr_sf_sort { | ||
50 | __uint8_t entno; /* entry number in original list */ | ||
51 | __uint8_t namelen; /* length of name value (no null) */ | ||
52 | __uint8_t valuelen; /* length of value */ | ||
53 | __uint8_t flags; /* flags bits (see xfs_attr_leaf.h) */ | ||
54 | xfs_dahash_t hash; /* this entry's hash value */ | ||
55 | unsigned char *name; /* name value, pointer into buffer */ | ||
56 | } xfs_attr_sf_sort_t; | ||
57 | |||
58 | #define XFS_ATTR_SF_ENTSIZE_BYNAME(nlen,vlen) /* space name/value uses */ \ | ||
59 | (((int)sizeof(xfs_attr_sf_entry_t)-1 + (nlen)+(vlen))) | ||
60 | #define XFS_ATTR_SF_ENTSIZE_MAX /* max space for name&value */ \ | ||
61 | ((1 << (NBBY*(int)sizeof(__uint8_t))) - 1) | ||
62 | #define XFS_ATTR_SF_ENTSIZE(sfep) /* space an entry uses */ \ | ||
63 | ((int)sizeof(xfs_attr_sf_entry_t)-1 + (sfep)->namelen+(sfep)->valuelen) | ||
64 | #define XFS_ATTR_SF_NEXTENTRY(sfep) /* next entry in struct */ \ | ||
65 | ((xfs_attr_sf_entry_t *)((char *)(sfep) + XFS_ATTR_SF_ENTSIZE(sfep))) | ||
66 | #define XFS_ATTR_SF_TOTSIZE(dp) /* total space in use */ \ | ||
67 | (be16_to_cpu(((xfs_attr_shortform_t *) \ | ||
68 | ((dp)->i_afp->if_u1.if_data))->hdr.totsize)) | ||
69 | |||
70 | #endif /* __XFS_ATTR_SF_H__ */ | ||
diff --git a/fs/xfs/libxfs/xfs_bit.h b/fs/xfs/libxfs/xfs_bit.h new file mode 100644 index 000000000000..e1649c0d3e02 --- /dev/null +++ b/fs/xfs/libxfs/xfs_bit.h | |||
@@ -0,0 +1,87 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000,2002,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_BIT_H__ | ||
19 | #define __XFS_BIT_H__ | ||
20 | |||
21 | /* | ||
22 | * XFS bit manipulation routines. | ||
23 | */ | ||
24 | |||
25 | /* | ||
26 | * masks with n high/low bits set, 64-bit values | ||
27 | */ | ||
28 | static inline __uint64_t xfs_mask64hi(int n) | ||
29 | { | ||
30 | return (__uint64_t)-1 << (64 - (n)); | ||
31 | } | ||
32 | static inline __uint32_t xfs_mask32lo(int n) | ||
33 | { | ||
34 | return ((__uint32_t)1 << (n)) - 1; | ||
35 | } | ||
36 | static inline __uint64_t xfs_mask64lo(int n) | ||
37 | { | ||
38 | return ((__uint64_t)1 << (n)) - 1; | ||
39 | } | ||
40 | |||
41 | /* Get high bit set out of 32-bit argument, -1 if none set */ | ||
42 | static inline int xfs_highbit32(__uint32_t v) | ||
43 | { | ||
44 | return fls(v) - 1; | ||
45 | } | ||
46 | |||
47 | /* Get high bit set out of 64-bit argument, -1 if none set */ | ||
48 | static inline int xfs_highbit64(__uint64_t v) | ||
49 | { | ||
50 | return fls64(v) - 1; | ||
51 | } | ||
52 | |||
53 | /* Get low bit set out of 32-bit argument, -1 if none set */ | ||
54 | static inline int xfs_lowbit32(__uint32_t v) | ||
55 | { | ||
56 | return ffs(v) - 1; | ||
57 | } | ||
58 | |||
59 | /* Get low bit set out of 64-bit argument, -1 if none set */ | ||
60 | static inline int xfs_lowbit64(__uint64_t v) | ||
61 | { | ||
62 | __uint32_t w = (__uint32_t)v; | ||
63 | int n = 0; | ||
64 | |||
65 | if (w) { /* lower bits */ | ||
66 | n = ffs(w); | ||
67 | } else { /* upper bits */ | ||
68 | w = (__uint32_t)(v >> 32); | ||
69 | if (w) { | ||
70 | n = ffs(w); | ||
71 | if (n) | ||
72 | n += 32; | ||
73 | } | ||
74 | } | ||
75 | return n - 1; | ||
76 | } | ||
77 | |||
78 | /* Return whether bitmap is empty (1 == empty) */ | ||
79 | extern int xfs_bitmap_empty(uint *map, uint size); | ||
80 | |||
81 | /* Count continuous one bits in map starting with start_bit */ | ||
82 | extern int xfs_contig_bits(uint *map, uint size, uint start_bit); | ||
83 | |||
84 | /* Find next set bit in map */ | ||
85 | extern int xfs_next_bit(uint *map, uint size, uint start_bit); | ||
86 | |||
87 | #endif /* __XFS_BIT_H__ */ | ||
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c new file mode 100644 index 000000000000..94ac88306fa6 --- /dev/null +++ b/fs/xfs/libxfs/xfs_bmap.c | |||
@@ -0,0 +1,5606 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2006 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #include "xfs.h" | ||
19 | #include "xfs_fs.h" | ||
20 | #include "xfs_shared.h" | ||
21 | #include "xfs_format.h" | ||
22 | #include "xfs_log_format.h" | ||
23 | #include "xfs_trans_resv.h" | ||
24 | #include "xfs_bit.h" | ||
25 | #include "xfs_inum.h" | ||
26 | #include "xfs_sb.h" | ||
27 | #include "xfs_ag.h" | ||
28 | #include "xfs_mount.h" | ||
29 | #include "xfs_da_format.h" | ||
30 | #include "xfs_da_btree.h" | ||
31 | #include "xfs_dir2.h" | ||
32 | #include "xfs_inode.h" | ||
33 | #include "xfs_btree.h" | ||
34 | #include "xfs_trans.h" | ||
35 | #include "xfs_inode_item.h" | ||
36 | #include "xfs_extfree_item.h" | ||
37 | #include "xfs_alloc.h" | ||
38 | #include "xfs_bmap.h" | ||
39 | #include "xfs_bmap_util.h" | ||
40 | #include "xfs_bmap_btree.h" | ||
41 | #include "xfs_rtalloc.h" | ||
42 | #include "xfs_error.h" | ||
43 | #include "xfs_quota.h" | ||
44 | #include "xfs_trans_space.h" | ||
45 | #include "xfs_buf_item.h" | ||
46 | #include "xfs_trace.h" | ||
47 | #include "xfs_symlink.h" | ||
48 | #include "xfs_attr_leaf.h" | ||
49 | #include "xfs_dinode.h" | ||
50 | #include "xfs_filestream.h" | ||
51 | |||
52 | |||
53 | kmem_zone_t *xfs_bmap_free_item_zone; | ||
54 | |||
55 | /* | ||
56 | * Miscellaneous helper functions | ||
57 | */ | ||
58 | |||
59 | /* | ||
60 | * Compute and fill in the value of the maximum depth of a bmap btree | ||
61 | * in this filesystem. Done once, during mount. | ||
62 | */ | ||
63 | void | ||
64 | xfs_bmap_compute_maxlevels( | ||
65 | xfs_mount_t *mp, /* file system mount structure */ | ||
66 | int whichfork) /* data or attr fork */ | ||
67 | { | ||
68 | int level; /* btree level */ | ||
69 | uint maxblocks; /* max blocks at this level */ | ||
70 | uint maxleafents; /* max leaf entries possible */ | ||
71 | int maxrootrecs; /* max records in root block */ | ||
72 | int minleafrecs; /* min records in leaf block */ | ||
73 | int minnoderecs; /* min records in node block */ | ||
74 | int sz; /* root block size */ | ||
75 | |||
76 | /* | ||
77 | * The maximum number of extents in a file, hence the maximum | ||
78 | * number of leaf entries, is controlled by the type of di_nextents | ||
79 | * (a signed 32-bit number, xfs_extnum_t), or by di_anextents | ||
80 | * (a signed 16-bit number, xfs_aextnum_t). | ||
81 | * | ||
82 | * Note that we can no longer assume that if we are in ATTR1 that | ||
83 | * the fork offset of all the inodes will be | ||
84 | * (xfs_default_attroffset(ip) >> 3) because we could have mounted | ||
85 | * with ATTR2 and then mounted back with ATTR1, keeping the | ||
86 | * di_forkoff's fixed but probably at various positions. Therefore, | ||
87 | * for both ATTR1 and ATTR2 we have to assume the worst case scenario | ||
88 | * of a minimum size available. | ||
89 | */ | ||
90 | if (whichfork == XFS_DATA_FORK) { | ||
91 | maxleafents = MAXEXTNUM; | ||
92 | sz = XFS_BMDR_SPACE_CALC(MINDBTPTRS); | ||
93 | } else { | ||
94 | maxleafents = MAXAEXTNUM; | ||
95 | sz = XFS_BMDR_SPACE_CALC(MINABTPTRS); | ||
96 | } | ||
97 | maxrootrecs = xfs_bmdr_maxrecs(sz, 0); | ||
98 | minleafrecs = mp->m_bmap_dmnr[0]; | ||
99 | minnoderecs = mp->m_bmap_dmnr[1]; | ||
100 | maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs; | ||
101 | for (level = 1; maxblocks > 1; level++) { | ||
102 | if (maxblocks <= maxrootrecs) | ||
103 | maxblocks = 1; | ||
104 | else | ||
105 | maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs; | ||
106 | } | ||
107 | mp->m_bm_maxlevels[whichfork] = level; | ||
108 | } | ||
109 | |||
110 | STATIC int /* error */ | ||
111 | xfs_bmbt_lookup_eq( | ||
112 | struct xfs_btree_cur *cur, | ||
113 | xfs_fileoff_t off, | ||
114 | xfs_fsblock_t bno, | ||
115 | xfs_filblks_t len, | ||
116 | int *stat) /* success/failure */ | ||
117 | { | ||
118 | cur->bc_rec.b.br_startoff = off; | ||
119 | cur->bc_rec.b.br_startblock = bno; | ||
120 | cur->bc_rec.b.br_blockcount = len; | ||
121 | return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); | ||
122 | } | ||
123 | |||
124 | STATIC int /* error */ | ||
125 | xfs_bmbt_lookup_ge( | ||
126 | struct xfs_btree_cur *cur, | ||
127 | xfs_fileoff_t off, | ||
128 | xfs_fsblock_t bno, | ||
129 | xfs_filblks_t len, | ||
130 | int *stat) /* success/failure */ | ||
131 | { | ||
132 | cur->bc_rec.b.br_startoff = off; | ||
133 | cur->bc_rec.b.br_startblock = bno; | ||
134 | cur->bc_rec.b.br_blockcount = len; | ||
135 | return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); | ||
136 | } | ||
137 | |||
138 | /* | ||
139 | * Check if the inode needs to be converted to btree format. | ||
140 | */ | ||
141 | static inline bool xfs_bmap_needs_btree(struct xfs_inode *ip, int whichfork) | ||
142 | { | ||
143 | return XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS && | ||
144 | XFS_IFORK_NEXTENTS(ip, whichfork) > | ||
145 | XFS_IFORK_MAXEXT(ip, whichfork); | ||
146 | } | ||
147 | |||
148 | /* | ||
149 | * Check if the inode should be converted to extent format. | ||
150 | */ | ||
151 | static inline bool xfs_bmap_wants_extents(struct xfs_inode *ip, int whichfork) | ||
152 | { | ||
153 | return XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE && | ||
154 | XFS_IFORK_NEXTENTS(ip, whichfork) <= | ||
155 | XFS_IFORK_MAXEXT(ip, whichfork); | ||
156 | } | ||
157 | |||
158 | /* | ||
159 | * Update the record referred to by cur to the value given | ||
160 | * by [off, bno, len, state]. | ||
161 | * This either works (return 0) or gets an EFSCORRUPTED error. | ||
162 | */ | ||
163 | STATIC int | ||
164 | xfs_bmbt_update( | ||
165 | struct xfs_btree_cur *cur, | ||
166 | xfs_fileoff_t off, | ||
167 | xfs_fsblock_t bno, | ||
168 | xfs_filblks_t len, | ||
169 | xfs_exntst_t state) | ||
170 | { | ||
171 | union xfs_btree_rec rec; | ||
172 | |||
173 | xfs_bmbt_disk_set_allf(&rec.bmbt, off, bno, len, state); | ||
174 | return xfs_btree_update(cur, &rec); | ||
175 | } | ||
176 | |||
177 | /* | ||
178 | * Compute the worst-case number of indirect blocks that will be used | ||
179 | * for ip's delayed extent of length "len". | ||
180 | */ | ||
181 | STATIC xfs_filblks_t | ||
182 | xfs_bmap_worst_indlen( | ||
183 | xfs_inode_t *ip, /* incore inode pointer */ | ||
184 | xfs_filblks_t len) /* delayed extent length */ | ||
185 | { | ||
186 | int level; /* btree level number */ | ||
187 | int maxrecs; /* maximum record count at this level */ | ||
188 | xfs_mount_t *mp; /* mount structure */ | ||
189 | xfs_filblks_t rval; /* return value */ | ||
190 | |||
191 | mp = ip->i_mount; | ||
192 | maxrecs = mp->m_bmap_dmxr[0]; | ||
193 | for (level = 0, rval = 0; | ||
194 | level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK); | ||
195 | level++) { | ||
196 | len += maxrecs - 1; | ||
197 | do_div(len, maxrecs); | ||
198 | rval += len; | ||
199 | if (len == 1) | ||
200 | return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - | ||
201 | level - 1; | ||
202 | if (level == 0) | ||
203 | maxrecs = mp->m_bmap_dmxr[1]; | ||
204 | } | ||
205 | return rval; | ||
206 | } | ||
207 | |||
208 | /* | ||
209 | * Calculate the default attribute fork offset for newly created inodes. | ||
210 | */ | ||
211 | uint | ||
212 | xfs_default_attroffset( | ||
213 | struct xfs_inode *ip) | ||
214 | { | ||
215 | struct xfs_mount *mp = ip->i_mount; | ||
216 | uint offset; | ||
217 | |||
218 | if (mp->m_sb.sb_inodesize == 256) { | ||
219 | offset = XFS_LITINO(mp, ip->i_d.di_version) - | ||
220 | XFS_BMDR_SPACE_CALC(MINABTPTRS); | ||
221 | } else { | ||
222 | offset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS); | ||
223 | } | ||
224 | |||
225 | ASSERT(offset < XFS_LITINO(mp, ip->i_d.di_version)); | ||
226 | return offset; | ||
227 | } | ||
228 | |||
229 | /* | ||
230 | * Helper routine to reset inode di_forkoff field when switching | ||
231 | * attribute fork from local to extent format - we reset it where | ||
232 | * possible to make space available for inline data fork extents. | ||
233 | */ | ||
234 | STATIC void | ||
235 | xfs_bmap_forkoff_reset( | ||
236 | xfs_inode_t *ip, | ||
237 | int whichfork) | ||
238 | { | ||
239 | if (whichfork == XFS_ATTR_FORK && | ||
240 | ip->i_d.di_format != XFS_DINODE_FMT_DEV && | ||
241 | ip->i_d.di_format != XFS_DINODE_FMT_UUID && | ||
242 | ip->i_d.di_format != XFS_DINODE_FMT_BTREE) { | ||
243 | uint dfl_forkoff = xfs_default_attroffset(ip) >> 3; | ||
244 | |||
245 | if (dfl_forkoff > ip->i_d.di_forkoff) | ||
246 | ip->i_d.di_forkoff = dfl_forkoff; | ||
247 | } | ||
248 | } | ||
249 | |||
250 | /* | ||
251 | * Debug/sanity checking code | ||
252 | */ | ||
253 | |||
254 | STATIC int | ||
255 | xfs_bmap_sanity_check( | ||
256 | struct xfs_mount *mp, | ||
257 | struct xfs_buf *bp, | ||
258 | int level) | ||
259 | { | ||
260 | struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); | ||
261 | |||
262 | if (block->bb_magic != cpu_to_be32(XFS_BMAP_CRC_MAGIC) && | ||
263 | block->bb_magic != cpu_to_be32(XFS_BMAP_MAGIC)) | ||
264 | return 0; | ||
265 | |||
266 | if (be16_to_cpu(block->bb_level) != level || | ||
267 | be16_to_cpu(block->bb_numrecs) == 0 || | ||
268 | be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0]) | ||
269 | return 0; | ||
270 | |||
271 | return 1; | ||
272 | } | ||
273 | |||
274 | #ifdef DEBUG | ||
275 | STATIC struct xfs_buf * | ||
276 | xfs_bmap_get_bp( | ||
277 | struct xfs_btree_cur *cur, | ||
278 | xfs_fsblock_t bno) | ||
279 | { | ||
280 | struct xfs_log_item_desc *lidp; | ||
281 | int i; | ||
282 | |||
283 | if (!cur) | ||
284 | return NULL; | ||
285 | |||
286 | for (i = 0; i < XFS_BTREE_MAXLEVELS; i++) { | ||
287 | if (!cur->bc_bufs[i]) | ||
288 | break; | ||
289 | if (XFS_BUF_ADDR(cur->bc_bufs[i]) == bno) | ||
290 | return cur->bc_bufs[i]; | ||
291 | } | ||
292 | |||
293 | /* Chase down all the log items to see if the bp is there */ | ||
294 | list_for_each_entry(lidp, &cur->bc_tp->t_items, lid_trans) { | ||
295 | struct xfs_buf_log_item *bip; | ||
296 | bip = (struct xfs_buf_log_item *)lidp->lid_item; | ||
297 | if (bip->bli_item.li_type == XFS_LI_BUF && | ||
298 | XFS_BUF_ADDR(bip->bli_buf) == bno) | ||
299 | return bip->bli_buf; | ||
300 | } | ||
301 | |||
302 | return NULL; | ||
303 | } | ||
304 | |||
305 | STATIC void | ||
306 | xfs_check_block( | ||
307 | struct xfs_btree_block *block, | ||
308 | xfs_mount_t *mp, | ||
309 | int root, | ||
310 | short sz) | ||
311 | { | ||
312 | int i, j, dmxr; | ||
313 | __be64 *pp, *thispa; /* pointer to block address */ | ||
314 | xfs_bmbt_key_t *prevp, *keyp; | ||
315 | |||
316 | ASSERT(be16_to_cpu(block->bb_level) > 0); | ||
317 | |||
318 | prevp = NULL; | ||
319 | for( i = 1; i <= xfs_btree_get_numrecs(block); i++) { | ||
320 | dmxr = mp->m_bmap_dmxr[0]; | ||
321 | keyp = XFS_BMBT_KEY_ADDR(mp, block, i); | ||
322 | |||
323 | if (prevp) { | ||
324 | ASSERT(be64_to_cpu(prevp->br_startoff) < | ||
325 | be64_to_cpu(keyp->br_startoff)); | ||
326 | } | ||
327 | prevp = keyp; | ||
328 | |||
329 | /* | ||
330 | * Compare the block numbers to see if there are dups. | ||
331 | */ | ||
332 | if (root) | ||
333 | pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz); | ||
334 | else | ||
335 | pp = XFS_BMBT_PTR_ADDR(mp, block, i, dmxr); | ||
336 | |||
337 | for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) { | ||
338 | if (root) | ||
339 | thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz); | ||
340 | else | ||
341 | thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr); | ||
342 | if (*thispa == *pp) { | ||
343 | xfs_warn(mp, "%s: thispa(%d) == pp(%d) %Ld", | ||
344 | __func__, j, i, | ||
345 | (unsigned long long)be64_to_cpu(*thispa)); | ||
346 | panic("%s: ptrs are equal in node\n", | ||
347 | __func__); | ||
348 | } | ||
349 | } | ||
350 | } | ||
351 | } | ||
352 | |||
353 | /* | ||
354 | * Check that the extents for the inode ip are in the right order in all | ||
355 | * btree leaves. | ||
356 | */ | ||
357 | |||
358 | STATIC void | ||
359 | xfs_bmap_check_leaf_extents( | ||
360 | xfs_btree_cur_t *cur, /* btree cursor or null */ | ||
361 | xfs_inode_t *ip, /* incore inode pointer */ | ||
362 | int whichfork) /* data or attr fork */ | ||
363 | { | ||
364 | struct xfs_btree_block *block; /* current btree block */ | ||
365 | xfs_fsblock_t bno; /* block # of "block" */ | ||
366 | xfs_buf_t *bp; /* buffer for "block" */ | ||
367 | int error; /* error return value */ | ||
368 | xfs_extnum_t i=0, j; /* index into the extents list */ | ||
369 | xfs_ifork_t *ifp; /* fork structure */ | ||
370 | int level; /* btree level, for checking */ | ||
371 | xfs_mount_t *mp; /* file system mount structure */ | ||
372 | __be64 *pp; /* pointer to block address */ | ||
373 | xfs_bmbt_rec_t *ep; /* pointer to current extent */ | ||
374 | xfs_bmbt_rec_t last = {0, 0}; /* last extent in prev block */ | ||
375 | xfs_bmbt_rec_t *nextp; /* pointer to next extent */ | ||
376 | int bp_release = 0; | ||
377 | |||
378 | if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) { | ||
379 | return; | ||
380 | } | ||
381 | |||
382 | bno = NULLFSBLOCK; | ||
383 | mp = ip->i_mount; | ||
384 | ifp = XFS_IFORK_PTR(ip, whichfork); | ||
385 | block = ifp->if_broot; | ||
386 | /* | ||
387 | * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out. | ||
388 | */ | ||
389 | level = be16_to_cpu(block->bb_level); | ||
390 | ASSERT(level > 0); | ||
391 | xfs_check_block(block, mp, 1, ifp->if_broot_bytes); | ||
392 | pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); | ||
393 | bno = be64_to_cpu(*pp); | ||
394 | |||
395 | ASSERT(bno != NULLDFSBNO); | ||
396 | ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); | ||
397 | ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks); | ||
398 | |||
399 | /* | ||
400 | * Go down the tree until leaf level is reached, following the first | ||
401 | * pointer (leftmost) at each level. | ||
402 | */ | ||
403 | while (level-- > 0) { | ||
404 | /* See if buf is in cur first */ | ||
405 | bp_release = 0; | ||
406 | bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno)); | ||
407 | if (!bp) { | ||
408 | bp_release = 1; | ||
409 | error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp, | ||
410 | XFS_BMAP_BTREE_REF, | ||
411 | &xfs_bmbt_buf_ops); | ||
412 | if (error) | ||
413 | goto error_norelse; | ||
414 | } | ||
415 | block = XFS_BUF_TO_BLOCK(bp); | ||
416 | XFS_WANT_CORRUPTED_GOTO( | ||
417 | xfs_bmap_sanity_check(mp, bp, level), | ||
418 | error0); | ||
419 | if (level == 0) | ||
420 | break; | ||
421 | |||
422 | /* | ||
423 | * Check this block for basic sanity (increasing keys and | ||
424 | * no duplicate blocks). | ||
425 | */ | ||
426 | |||
427 | xfs_check_block(block, mp, 0, 0); | ||
428 | pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); | ||
429 | bno = be64_to_cpu(*pp); | ||
430 | XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0); | ||
431 | if (bp_release) { | ||
432 | bp_release = 0; | ||
433 | xfs_trans_brelse(NULL, bp); | ||
434 | } | ||
435 | } | ||
436 | |||
437 | /* | ||
438 | * Here with bp and block set to the leftmost leaf node in the tree. | ||
439 | */ | ||
440 | i = 0; | ||
441 | |||
442 | /* | ||
443 | * Loop over all leaf nodes checking that all extents are in the right order. | ||
444 | */ | ||
445 | for (;;) { | ||
446 | xfs_fsblock_t nextbno; | ||
447 | xfs_extnum_t num_recs; | ||
448 | |||
449 | |||
450 | num_recs = xfs_btree_get_numrecs(block); | ||
451 | |||
452 | /* | ||
453 | * Read-ahead the next leaf block, if any. | ||
454 | */ | ||
455 | |||
456 | nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); | ||
457 | |||
458 | /* | ||
459 | * Check all the extents to make sure they are OK. | ||
460 | * If we had a previous block, the last entry should | ||
461 | * conform with the first entry in this one. | ||
462 | */ | ||
463 | |||
464 | ep = XFS_BMBT_REC_ADDR(mp, block, 1); | ||
465 | if (i) { | ||
466 | ASSERT(xfs_bmbt_disk_get_startoff(&last) + | ||
467 | xfs_bmbt_disk_get_blockcount(&last) <= | ||
468 | xfs_bmbt_disk_get_startoff(ep)); | ||
469 | } | ||
470 | for (j = 1; j < num_recs; j++) { | ||
471 | nextp = XFS_BMBT_REC_ADDR(mp, block, j + 1); | ||
472 | ASSERT(xfs_bmbt_disk_get_startoff(ep) + | ||
473 | xfs_bmbt_disk_get_blockcount(ep) <= | ||
474 | xfs_bmbt_disk_get_startoff(nextp)); | ||
475 | ep = nextp; | ||
476 | } | ||
477 | |||
478 | last = *ep; | ||
479 | i += num_recs; | ||
480 | if (bp_release) { | ||
481 | bp_release = 0; | ||
482 | xfs_trans_brelse(NULL, bp); | ||
483 | } | ||
484 | bno = nextbno; | ||
485 | /* | ||
486 | * If we've reached the end, stop. | ||
487 | */ | ||
488 | if (bno == NULLFSBLOCK) | ||
489 | break; | ||
490 | |||
491 | bp_release = 0; | ||
492 | bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno)); | ||
493 | if (!bp) { | ||
494 | bp_release = 1; | ||
495 | error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp, | ||
496 | XFS_BMAP_BTREE_REF, | ||
497 | &xfs_bmbt_buf_ops); | ||
498 | if (error) | ||
499 | goto error_norelse; | ||
500 | } | ||
501 | block = XFS_BUF_TO_BLOCK(bp); | ||
502 | } | ||
503 | if (bp_release) { | ||
504 | bp_release = 0; | ||
505 | xfs_trans_brelse(NULL, bp); | ||
506 | } | ||
507 | return; | ||
508 | |||
509 | error0: | ||
510 | xfs_warn(mp, "%s: at error0", __func__); | ||
511 | if (bp_release) | ||
512 | xfs_trans_brelse(NULL, bp); | ||
513 | error_norelse: | ||
514 | xfs_warn(mp, "%s: BAD after btree leaves for %d extents", | ||
515 | __func__, i); | ||
516 | panic("%s: CORRUPTED BTREE OR SOMETHING", __func__); | ||
517 | return; | ||
518 | } | ||
519 | |||
520 | /* | ||
521 | * Add bmap trace insert entries for all the contents of the extent records. | ||
522 | */ | ||
523 | void | ||
524 | xfs_bmap_trace_exlist( | ||
525 | xfs_inode_t *ip, /* incore inode pointer */ | ||
526 | xfs_extnum_t cnt, /* count of entries in the list */ | ||
527 | int whichfork, /* data or attr fork */ | ||
528 | unsigned long caller_ip) | ||
529 | { | ||
530 | xfs_extnum_t idx; /* extent record index */ | ||
531 | xfs_ifork_t *ifp; /* inode fork pointer */ | ||
532 | int state = 0; | ||
533 | |||
534 | if (whichfork == XFS_ATTR_FORK) | ||
535 | state |= BMAP_ATTRFORK; | ||
536 | |||
537 | ifp = XFS_IFORK_PTR(ip, whichfork); | ||
538 | ASSERT(cnt == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))); | ||
539 | for (idx = 0; idx < cnt; idx++) | ||
540 | trace_xfs_extlist(ip, idx, whichfork, caller_ip); | ||
541 | } | ||
542 | |||
543 | /* | ||
544 | * Validate that the bmbt_irecs being returned from bmapi are valid | ||
545 | * given the caller's original parameters. Specifically check the | ||
546 | * ranges of the returned irecs to ensure that they only extend beyond | ||
547 | * the given parameters if the XFS_BMAPI_ENTIRE flag was set. | ||
548 | */ | ||
549 | STATIC void | ||
550 | xfs_bmap_validate_ret( | ||
551 | xfs_fileoff_t bno, | ||
552 | xfs_filblks_t len, | ||
553 | int flags, | ||
554 | xfs_bmbt_irec_t *mval, | ||
555 | int nmap, | ||
556 | int ret_nmap) | ||
557 | { | ||
558 | int i; /* index to map values */ | ||
559 | |||
560 | ASSERT(ret_nmap <= nmap); | ||
561 | |||
562 | for (i = 0; i < ret_nmap; i++) { | ||
563 | ASSERT(mval[i].br_blockcount > 0); | ||
564 | if (!(flags & XFS_BMAPI_ENTIRE)) { | ||
565 | ASSERT(mval[i].br_startoff >= bno); | ||
566 | ASSERT(mval[i].br_blockcount <= len); | ||
567 | ASSERT(mval[i].br_startoff + mval[i].br_blockcount <= | ||
568 | bno + len); | ||
569 | } else { | ||
570 | ASSERT(mval[i].br_startoff < bno + len); | ||
571 | ASSERT(mval[i].br_startoff + mval[i].br_blockcount > | ||
572 | bno); | ||
573 | } | ||
574 | ASSERT(i == 0 || | ||
575 | mval[i - 1].br_startoff + mval[i - 1].br_blockcount == | ||
576 | mval[i].br_startoff); | ||
577 | ASSERT(mval[i].br_startblock != DELAYSTARTBLOCK && | ||
578 | mval[i].br_startblock != HOLESTARTBLOCK); | ||
579 | ASSERT(mval[i].br_state == XFS_EXT_NORM || | ||
580 | mval[i].br_state == XFS_EXT_UNWRITTEN); | ||
581 | } | ||
582 | } | ||
583 | |||
584 | #else | ||
585 | #define xfs_bmap_check_leaf_extents(cur, ip, whichfork) do { } while (0) | ||
586 | #define xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap) | ||
587 | #endif /* DEBUG */ | ||
588 | |||
589 | /* | ||
590 | * bmap free list manipulation functions | ||
591 | */ | ||
592 | |||
593 | /* | ||
594 | * Add the extent to the list of extents to be free at transaction end. | ||
595 | * The list is maintained sorted (by block number). | ||
596 | */ | ||
597 | void | ||
598 | xfs_bmap_add_free( | ||
599 | xfs_fsblock_t bno, /* fs block number of extent */ | ||
600 | xfs_filblks_t len, /* length of extent */ | ||
601 | xfs_bmap_free_t *flist, /* list of extents */ | ||
602 | xfs_mount_t *mp) /* mount point structure */ | ||
603 | { | ||
604 | xfs_bmap_free_item_t *cur; /* current (next) element */ | ||
605 | xfs_bmap_free_item_t *new; /* new element */ | ||
606 | xfs_bmap_free_item_t *prev; /* previous element */ | ||
607 | #ifdef DEBUG | ||
608 | xfs_agnumber_t agno; | ||
609 | xfs_agblock_t agbno; | ||
610 | |||
611 | ASSERT(bno != NULLFSBLOCK); | ||
612 | ASSERT(len > 0); | ||
613 | ASSERT(len <= MAXEXTLEN); | ||
614 | ASSERT(!isnullstartblock(bno)); | ||
615 | agno = XFS_FSB_TO_AGNO(mp, bno); | ||
616 | agbno = XFS_FSB_TO_AGBNO(mp, bno); | ||
617 | ASSERT(agno < mp->m_sb.sb_agcount); | ||
618 | ASSERT(agbno < mp->m_sb.sb_agblocks); | ||
619 | ASSERT(len < mp->m_sb.sb_agblocks); | ||
620 | ASSERT(agbno + len <= mp->m_sb.sb_agblocks); | ||
621 | #endif | ||
622 | ASSERT(xfs_bmap_free_item_zone != NULL); | ||
623 | new = kmem_zone_alloc(xfs_bmap_free_item_zone, KM_SLEEP); | ||
624 | new->xbfi_startblock = bno; | ||
625 | new->xbfi_blockcount = (xfs_extlen_t)len; | ||
626 | for (prev = NULL, cur = flist->xbf_first; | ||
627 | cur != NULL; | ||
628 | prev = cur, cur = cur->xbfi_next) { | ||
629 | if (cur->xbfi_startblock >= bno) | ||
630 | break; | ||
631 | } | ||
632 | if (prev) | ||
633 | prev->xbfi_next = new; | ||
634 | else | ||
635 | flist->xbf_first = new; | ||
636 | new->xbfi_next = cur; | ||
637 | flist->xbf_count++; | ||
638 | } | ||
639 | |||
640 | /* | ||
641 | * Remove the entry "free" from the free item list. Prev points to the | ||
642 | * previous entry, unless "free" is the head of the list. | ||
643 | */ | ||
644 | void | ||
645 | xfs_bmap_del_free( | ||
646 | xfs_bmap_free_t *flist, /* free item list header */ | ||
647 | xfs_bmap_free_item_t *prev, /* previous item on list, if any */ | ||
648 | xfs_bmap_free_item_t *free) /* list item to be freed */ | ||
649 | { | ||
650 | if (prev) | ||
651 | prev->xbfi_next = free->xbfi_next; | ||
652 | else | ||
653 | flist->xbf_first = free->xbfi_next; | ||
654 | flist->xbf_count--; | ||
655 | kmem_zone_free(xfs_bmap_free_item_zone, free); | ||
656 | } | ||
657 | |||
658 | /* | ||
659 | * Free up any items left in the list. | ||
660 | */ | ||
661 | void | ||
662 | xfs_bmap_cancel( | ||
663 | xfs_bmap_free_t *flist) /* list of bmap_free_items */ | ||
664 | { | ||
665 | xfs_bmap_free_item_t *free; /* free list item */ | ||
666 | xfs_bmap_free_item_t *next; | ||
667 | |||
668 | if (flist->xbf_count == 0) | ||
669 | return; | ||
670 | ASSERT(flist->xbf_first != NULL); | ||
671 | for (free = flist->xbf_first; free; free = next) { | ||
672 | next = free->xbfi_next; | ||
673 | xfs_bmap_del_free(flist, NULL, free); | ||
674 | } | ||
675 | ASSERT(flist->xbf_count == 0); | ||
676 | } | ||
677 | |||
678 | /* | ||
679 | * Inode fork format manipulation functions | ||
680 | */ | ||
681 | |||
682 | /* | ||
683 | * Transform a btree format file with only one leaf node, where the | ||
684 | * extents list will fit in the inode, into an extents format file. | ||
685 | * Since the file extents are already in-core, all we have to do is | ||
686 | * give up the space for the btree root and pitch the leaf block. | ||
687 | */ | ||
688 | STATIC int /* error */ | ||
689 | xfs_bmap_btree_to_extents( | ||
690 | xfs_trans_t *tp, /* transaction pointer */ | ||
691 | xfs_inode_t *ip, /* incore inode pointer */ | ||
692 | xfs_btree_cur_t *cur, /* btree cursor */ | ||
693 | int *logflagsp, /* inode logging flags */ | ||
694 | int whichfork) /* data or attr fork */ | ||
695 | { | ||
696 | /* REFERENCED */ | ||
697 | struct xfs_btree_block *cblock;/* child btree block */ | ||
698 | xfs_fsblock_t cbno; /* child block number */ | ||
699 | xfs_buf_t *cbp; /* child block's buffer */ | ||
700 | int error; /* error return value */ | ||
701 | xfs_ifork_t *ifp; /* inode fork data */ | ||
702 | xfs_mount_t *mp; /* mount point structure */ | ||
703 | __be64 *pp; /* ptr to block address */ | ||
704 | struct xfs_btree_block *rblock;/* root btree block */ | ||
705 | |||
706 | mp = ip->i_mount; | ||
707 | ifp = XFS_IFORK_PTR(ip, whichfork); | ||
708 | ASSERT(ifp->if_flags & XFS_IFEXTENTS); | ||
709 | ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE); | ||
710 | rblock = ifp->if_broot; | ||
711 | ASSERT(be16_to_cpu(rblock->bb_level) == 1); | ||
712 | ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1); | ||
713 | ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1); | ||
714 | pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes); | ||
715 | cbno = be64_to_cpu(*pp); | ||
716 | *logflagsp = 0; | ||
717 | #ifdef DEBUG | ||
718 | if ((error = xfs_btree_check_lptr(cur, cbno, 1))) | ||
719 | return error; | ||
720 | #endif | ||
721 | error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, XFS_BMAP_BTREE_REF, | ||
722 | &xfs_bmbt_buf_ops); | ||
723 | if (error) | ||
724 | return error; | ||
725 | cblock = XFS_BUF_TO_BLOCK(cbp); | ||
726 | if ((error = xfs_btree_check_block(cur, cblock, 0, cbp))) | ||
727 | return error; | ||
728 | xfs_bmap_add_free(cbno, 1, cur->bc_private.b.flist, mp); | ||
729 | ip->i_d.di_nblocks--; | ||
730 | xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); | ||
731 | xfs_trans_binval(tp, cbp); | ||
732 | if (cur->bc_bufs[0] == cbp) | ||
733 | cur->bc_bufs[0] = NULL; | ||
734 | xfs_iroot_realloc(ip, -1, whichfork); | ||
735 | ASSERT(ifp->if_broot == NULL); | ||
736 | ASSERT((ifp->if_flags & XFS_IFBROOT) == 0); | ||
737 | XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS); | ||
738 | *logflagsp = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); | ||
739 | return 0; | ||
740 | } | ||
741 | |||
742 | /* | ||
743 | * Convert an extents-format file into a btree-format file. | ||
744 | * The new file will have a root block (in the inode) and a single child block. | ||
745 | */ | ||
746 | STATIC int /* error */ | ||
747 | xfs_bmap_extents_to_btree( | ||
748 | xfs_trans_t *tp, /* transaction pointer */ | ||
749 | xfs_inode_t *ip, /* incore inode pointer */ | ||
750 | xfs_fsblock_t *firstblock, /* first-block-allocated */ | ||
751 | xfs_bmap_free_t *flist, /* blocks freed in xaction */ | ||
752 | xfs_btree_cur_t **curp, /* cursor returned to caller */ | ||
753 | int wasdel, /* converting a delayed alloc */ | ||
754 | int *logflagsp, /* inode logging flags */ | ||
755 | int whichfork) /* data or attr fork */ | ||
756 | { | ||
757 | struct xfs_btree_block *ablock; /* allocated (child) bt block */ | ||
758 | xfs_buf_t *abp; /* buffer for ablock */ | ||
759 | xfs_alloc_arg_t args; /* allocation arguments */ | ||
760 | xfs_bmbt_rec_t *arp; /* child record pointer */ | ||
761 | struct xfs_btree_block *block; /* btree root block */ | ||
762 | xfs_btree_cur_t *cur; /* bmap btree cursor */ | ||
763 | xfs_bmbt_rec_host_t *ep; /* extent record pointer */ | ||
764 | int error; /* error return value */ | ||
765 | xfs_extnum_t i, cnt; /* extent record index */ | ||
766 | xfs_ifork_t *ifp; /* inode fork pointer */ | ||
767 | xfs_bmbt_key_t *kp; /* root block key pointer */ | ||
768 | xfs_mount_t *mp; /* mount structure */ | ||
769 | xfs_extnum_t nextents; /* number of file extents */ | ||
770 | xfs_bmbt_ptr_t *pp; /* root block address pointer */ | ||
771 | |||
772 | mp = ip->i_mount; | ||
773 | ifp = XFS_IFORK_PTR(ip, whichfork); | ||
774 | ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS); | ||
775 | |||
776 | /* | ||
777 | * Make space in the inode incore. | ||
778 | */ | ||
779 | xfs_iroot_realloc(ip, 1, whichfork); | ||
780 | ifp->if_flags |= XFS_IFBROOT; | ||
781 | |||
782 | /* | ||
783 | * Fill in the root. | ||
784 | */ | ||
785 | block = ifp->if_broot; | ||
786 | if (xfs_sb_version_hascrc(&mp->m_sb)) | ||
787 | xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL, | ||
788 | XFS_BMAP_CRC_MAGIC, 1, 1, ip->i_ino, | ||
789 | XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS); | ||
790 | else | ||
791 | xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL, | ||
792 | XFS_BMAP_MAGIC, 1, 1, ip->i_ino, | ||
793 | XFS_BTREE_LONG_PTRS); | ||
794 | |||
795 | /* | ||
796 | * Need a cursor. Can't allocate until bb_level is filled in. | ||
797 | */ | ||
798 | cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); | ||
799 | cur->bc_private.b.firstblock = *firstblock; | ||
800 | cur->bc_private.b.flist = flist; | ||
801 | cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0; | ||
802 | /* | ||
803 | * Convert to a btree with two levels, one record in root. | ||
804 | */ | ||
805 | XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE); | ||
806 | memset(&args, 0, sizeof(args)); | ||
807 | args.tp = tp; | ||
808 | args.mp = mp; | ||
809 | args.firstblock = *firstblock; | ||
810 | if (*firstblock == NULLFSBLOCK) { | ||
811 | args.type = XFS_ALLOCTYPE_START_BNO; | ||
812 | args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino); | ||
813 | } else if (flist->xbf_low) { | ||
814 | args.type = XFS_ALLOCTYPE_START_BNO; | ||
815 | args.fsbno = *firstblock; | ||
816 | } else { | ||
817 | args.type = XFS_ALLOCTYPE_NEAR_BNO; | ||
818 | args.fsbno = *firstblock; | ||
819 | } | ||
820 | args.minlen = args.maxlen = args.prod = 1; | ||
821 | args.wasdel = wasdel; | ||
822 | *logflagsp = 0; | ||
823 | if ((error = xfs_alloc_vextent(&args))) { | ||
824 | xfs_iroot_realloc(ip, -1, whichfork); | ||
825 | xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); | ||
826 | return error; | ||
827 | } | ||
828 | /* | ||
829 | * Allocation can't fail, the space was reserved. | ||
830 | */ | ||
831 | ASSERT(args.fsbno != NULLFSBLOCK); | ||
832 | ASSERT(*firstblock == NULLFSBLOCK || | ||
833 | args.agno == XFS_FSB_TO_AGNO(mp, *firstblock) || | ||
834 | (flist->xbf_low && | ||
835 | args.agno > XFS_FSB_TO_AGNO(mp, *firstblock))); | ||
836 | *firstblock = cur->bc_private.b.firstblock = args.fsbno; | ||
837 | cur->bc_private.b.allocated++; | ||
838 | ip->i_d.di_nblocks++; | ||
839 | xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L); | ||
840 | abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0); | ||
841 | /* | ||
842 | * Fill in the child block. | ||
843 | */ | ||
844 | abp->b_ops = &xfs_bmbt_buf_ops; | ||
845 | ablock = XFS_BUF_TO_BLOCK(abp); | ||
846 | if (xfs_sb_version_hascrc(&mp->m_sb)) | ||
847 | xfs_btree_init_block_int(mp, ablock, abp->b_bn, | ||
848 | XFS_BMAP_CRC_MAGIC, 0, 0, ip->i_ino, | ||
849 | XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS); | ||
850 | else | ||
851 | xfs_btree_init_block_int(mp, ablock, abp->b_bn, | ||
852 | XFS_BMAP_MAGIC, 0, 0, ip->i_ino, | ||
853 | XFS_BTREE_LONG_PTRS); | ||
854 | |||
855 | arp = XFS_BMBT_REC_ADDR(mp, ablock, 1); | ||
856 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); | ||
857 | for (cnt = i = 0; i < nextents; i++) { | ||
858 | ep = xfs_iext_get_ext(ifp, i); | ||
859 | if (!isnullstartblock(xfs_bmbt_get_startblock(ep))) { | ||
860 | arp->l0 = cpu_to_be64(ep->l0); | ||
861 | arp->l1 = cpu_to_be64(ep->l1); | ||
862 | arp++; cnt++; | ||
863 | } | ||
864 | } | ||
865 | ASSERT(cnt == XFS_IFORK_NEXTENTS(ip, whichfork)); | ||
866 | xfs_btree_set_numrecs(ablock, cnt); | ||
867 | |||
868 | /* | ||
869 | * Fill in the root key and pointer. | ||
870 | */ | ||
871 | kp = XFS_BMBT_KEY_ADDR(mp, block, 1); | ||
872 | arp = XFS_BMBT_REC_ADDR(mp, ablock, 1); | ||
873 | kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp)); | ||
874 | pp = XFS_BMBT_PTR_ADDR(mp, block, 1, xfs_bmbt_get_maxrecs(cur, | ||
875 | be16_to_cpu(block->bb_level))); | ||
876 | *pp = cpu_to_be64(args.fsbno); | ||
877 | |||
878 | /* | ||
879 | * Do all this logging at the end so that | ||
880 | * the root is at the right level. | ||
881 | */ | ||
882 | xfs_btree_log_block(cur, abp, XFS_BB_ALL_BITS); | ||
883 | xfs_btree_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs)); | ||
884 | ASSERT(*curp == NULL); | ||
885 | *curp = cur; | ||
886 | *logflagsp = XFS_ILOG_CORE | xfs_ilog_fbroot(whichfork); | ||
887 | return 0; | ||
888 | } | ||
889 | |||
890 | /* | ||
891 | * Convert a local file to an extents file. | ||
892 | * This code is out of bounds for data forks of regular files, | ||
893 | * since the file data needs to get logged so things will stay consistent. | ||
894 | * (The bmap-level manipulations are ok, though). | ||
895 | */ | ||
896 | void | ||
897 | xfs_bmap_local_to_extents_empty( | ||
898 | struct xfs_inode *ip, | ||
899 | int whichfork) | ||
900 | { | ||
901 | struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); | ||
902 | |||
903 | ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL); | ||
904 | ASSERT(ifp->if_bytes == 0); | ||
905 | ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) == 0); | ||
906 | |||
907 | xfs_bmap_forkoff_reset(ip, whichfork); | ||
908 | ifp->if_flags &= ~XFS_IFINLINE; | ||
909 | ifp->if_flags |= XFS_IFEXTENTS; | ||
910 | XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS); | ||
911 | } | ||
912 | |||
913 | |||
914 | STATIC int /* error */ | ||
915 | xfs_bmap_local_to_extents( | ||
916 | xfs_trans_t *tp, /* transaction pointer */ | ||
917 | xfs_inode_t *ip, /* incore inode pointer */ | ||
918 | xfs_fsblock_t *firstblock, /* first block allocated in xaction */ | ||
919 | xfs_extlen_t total, /* total blocks needed by transaction */ | ||
920 | int *logflagsp, /* inode logging flags */ | ||
921 | int whichfork, | ||
922 | void (*init_fn)(struct xfs_trans *tp, | ||
923 | struct xfs_buf *bp, | ||
924 | struct xfs_inode *ip, | ||
925 | struct xfs_ifork *ifp)) | ||
926 | { | ||
927 | int error = 0; | ||
928 | int flags; /* logging flags returned */ | ||
929 | xfs_ifork_t *ifp; /* inode fork pointer */ | ||
930 | xfs_alloc_arg_t args; /* allocation arguments */ | ||
931 | xfs_buf_t *bp; /* buffer for extent block */ | ||
932 | xfs_bmbt_rec_host_t *ep; /* extent record pointer */ | ||
933 | |||
934 | /* | ||
935 | * We don't want to deal with the case of keeping inode data inline yet. | ||
936 | * So sending the data fork of a regular inode is invalid. | ||
937 | */ | ||
938 | ASSERT(!(S_ISREG(ip->i_d.di_mode) && whichfork == XFS_DATA_FORK)); | ||
939 | ifp = XFS_IFORK_PTR(ip, whichfork); | ||
940 | ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL); | ||
941 | |||
942 | if (!ifp->if_bytes) { | ||
943 | xfs_bmap_local_to_extents_empty(ip, whichfork); | ||
944 | flags = XFS_ILOG_CORE; | ||
945 | goto done; | ||
946 | } | ||
947 | |||
948 | flags = 0; | ||
949 | error = 0; | ||
950 | ASSERT((ifp->if_flags & (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) == | ||
951 | XFS_IFINLINE); | ||
952 | memset(&args, 0, sizeof(args)); | ||
953 | args.tp = tp; | ||
954 | args.mp = ip->i_mount; | ||
955 | args.firstblock = *firstblock; | ||
956 | /* | ||
957 | * Allocate a block. We know we need only one, since the | ||
958 | * file currently fits in an inode. | ||
959 | */ | ||
960 | if (*firstblock == NULLFSBLOCK) { | ||
961 | args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino); | ||
962 | args.type = XFS_ALLOCTYPE_START_BNO; | ||
963 | } else { | ||
964 | args.fsbno = *firstblock; | ||
965 | args.type = XFS_ALLOCTYPE_NEAR_BNO; | ||
966 | } | ||
967 | args.total = total; | ||
968 | args.minlen = args.maxlen = args.prod = 1; | ||
969 | error = xfs_alloc_vextent(&args); | ||
970 | if (error) | ||
971 | goto done; | ||
972 | |||
973 | /* Can't fail, the space was reserved. */ | ||
974 | ASSERT(args.fsbno != NULLFSBLOCK); | ||
975 | ASSERT(args.len == 1); | ||
976 | *firstblock = args.fsbno; | ||
977 | bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0); | ||
978 | |||
979 | /* initialise the block and copy the data */ | ||
980 | init_fn(tp, bp, ip, ifp); | ||
981 | |||
982 | /* account for the change in fork size and log everything */ | ||
983 | xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1); | ||
984 | xfs_idata_realloc(ip, -ifp->if_bytes, whichfork); | ||
985 | xfs_bmap_local_to_extents_empty(ip, whichfork); | ||
986 | flags |= XFS_ILOG_CORE; | ||
987 | |||
988 | xfs_iext_add(ifp, 0, 1); | ||
989 | ep = xfs_iext_get_ext(ifp, 0); | ||
990 | xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM); | ||
991 | trace_xfs_bmap_post_update(ip, 0, | ||
992 | whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0, | ||
993 | _THIS_IP_); | ||
994 | XFS_IFORK_NEXT_SET(ip, whichfork, 1); | ||
995 | ip->i_d.di_nblocks = 1; | ||
996 | xfs_trans_mod_dquot_byino(tp, ip, | ||
997 | XFS_TRANS_DQ_BCOUNT, 1L); | ||
998 | flags |= xfs_ilog_fext(whichfork); | ||
999 | |||
1000 | done: | ||
1001 | *logflagsp = flags; | ||
1002 | return error; | ||
1003 | } | ||
1004 | |||
1005 | /* | ||
1006 | * Called from xfs_bmap_add_attrfork to handle btree format files. | ||
1007 | */ | ||
1008 | STATIC int /* error */ | ||
1009 | xfs_bmap_add_attrfork_btree( | ||
1010 | xfs_trans_t *tp, /* transaction pointer */ | ||
1011 | xfs_inode_t *ip, /* incore inode pointer */ | ||
1012 | xfs_fsblock_t *firstblock, /* first block allocated */ | ||
1013 | xfs_bmap_free_t *flist, /* blocks to free at commit */ | ||
1014 | int *flags) /* inode logging flags */ | ||
1015 | { | ||
1016 | xfs_btree_cur_t *cur; /* btree cursor */ | ||
1017 | int error; /* error return value */ | ||
1018 | xfs_mount_t *mp; /* file system mount struct */ | ||
1019 | int stat; /* newroot status */ | ||
1020 | |||
1021 | mp = ip->i_mount; | ||
1022 | if (ip->i_df.if_broot_bytes <= XFS_IFORK_DSIZE(ip)) | ||
1023 | *flags |= XFS_ILOG_DBROOT; | ||
1024 | else { | ||
1025 | cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK); | ||
1026 | cur->bc_private.b.flist = flist; | ||
1027 | cur->bc_private.b.firstblock = *firstblock; | ||
1028 | if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat))) | ||
1029 | goto error0; | ||
1030 | /* must be at least one entry */ | ||
1031 | XFS_WANT_CORRUPTED_GOTO(stat == 1, error0); | ||
1032 | if ((error = xfs_btree_new_iroot(cur, flags, &stat))) | ||
1033 | goto error0; | ||
1034 | if (stat == 0) { | ||
1035 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); | ||
1036 | return -ENOSPC; | ||
1037 | } | ||
1038 | *firstblock = cur->bc_private.b.firstblock; | ||
1039 | cur->bc_private.b.allocated = 0; | ||
1040 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); | ||
1041 | } | ||
1042 | return 0; | ||
1043 | error0: | ||
1044 | xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); | ||
1045 | return error; | ||
1046 | } | ||
1047 | |||
1048 | /* | ||
1049 | * Called from xfs_bmap_add_attrfork to handle extents format files. | ||
1050 | */ | ||
1051 | STATIC int /* error */ | ||
1052 | xfs_bmap_add_attrfork_extents( | ||
1053 | xfs_trans_t *tp, /* transaction pointer */ | ||
1054 | xfs_inode_t *ip, /* incore inode pointer */ | ||
1055 | xfs_fsblock_t *firstblock, /* first block allocated */ | ||
1056 | xfs_bmap_free_t *flist, /* blocks to free at commit */ | ||
1057 | int *flags) /* inode logging flags */ | ||
1058 | { | ||
1059 | xfs_btree_cur_t *cur; /* bmap btree cursor */ | ||
1060 | int error; /* error return value */ | ||
1061 | |||
1062 | if (ip->i_d.di_nextents * sizeof(xfs_bmbt_rec_t) <= XFS_IFORK_DSIZE(ip)) | ||
1063 | return 0; | ||
1064 | cur = NULL; | ||
1065 | error = xfs_bmap_extents_to_btree(tp, ip, firstblock, flist, &cur, 0, | ||
1066 | flags, XFS_DATA_FORK); | ||
1067 | if (cur) { | ||
1068 | cur->bc_private.b.allocated = 0; | ||
1069 | xfs_btree_del_cursor(cur, | ||
1070 | error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); | ||
1071 | } | ||
1072 | return error; | ||
1073 | } | ||
1074 | |||
1075 | /* | ||
1076 | * Called from xfs_bmap_add_attrfork to handle local format files. Each | ||
1077 | * different data fork content type needs a different callout to do the | ||
1078 | * conversion. Some are basic and only require special block initialisation | ||
1079 | * callouts for the data formating, others (directories) are so specialised they | ||
1080 | * handle everything themselves. | ||
1081 | * | ||
1082 | * XXX (dgc): investigate whether directory conversion can use the generic | ||
1083 | * formatting callout. It should be possible - it's just a very complex | ||
1084 | * formatter. | ||
1085 | */ | ||
1086 | STATIC int /* error */ | ||
1087 | xfs_bmap_add_attrfork_local( | ||
1088 | xfs_trans_t *tp, /* transaction pointer */ | ||
1089 | xfs_inode_t *ip, /* incore inode pointer */ | ||
1090 | xfs_fsblock_t *firstblock, /* first block allocated */ | ||
1091 | xfs_bmap_free_t *flist, /* blocks to free at commit */ | ||
1092 | int *flags) /* inode logging flags */ | ||
1093 | { | ||
1094 | xfs_da_args_t dargs; /* args for dir/attr code */ | ||
1095 | |||
1096 | if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip)) | ||
1097 | return 0; | ||
1098 | |||
1099 | if (S_ISDIR(ip->i_d.di_mode)) { | ||
1100 | memset(&dargs, 0, sizeof(dargs)); | ||
1101 | dargs.geo = ip->i_mount->m_dir_geo; | ||
1102 | dargs.dp = ip; | ||
1103 | dargs.firstblock = firstblock; | ||
1104 | dargs.flist = flist; | ||
1105 | dargs.total = dargs.geo->fsbcount; | ||
1106 | dargs.whichfork = XFS_DATA_FORK; | ||
1107 | dargs.trans = tp; | ||
1108 | return xfs_dir2_sf_to_block(&dargs); | ||
1109 | } | ||
1110 | |||
1111 | if (S_ISLNK(ip->i_d.di_mode)) | ||
1112 | return xfs_bmap_local_to_extents(tp, ip, firstblock, 1, | ||
1113 | flags, XFS_DATA_FORK, | ||
1114 | xfs_symlink_local_to_remote); | ||
1115 | |||
1116 | /* should only be called for types that support local format data */ | ||
1117 | ASSERT(0); | ||
1118 | return -EFSCORRUPTED; | ||
1119 | } | ||
1120 | |||
1121 | /* | ||
1122 | * Convert inode from non-attributed to attributed. | ||
1123 | * Must not be in a transaction, ip must not be locked. | ||
1124 | */ | ||
1125 | int /* error code */ | ||
1126 | xfs_bmap_add_attrfork( | ||
1127 | xfs_inode_t *ip, /* incore inode pointer */ | ||
1128 | int size, /* space new attribute needs */ | ||
1129 | int rsvd) /* xact may use reserved blks */ | ||
1130 | { | ||
1131 | xfs_fsblock_t firstblock; /* 1st block/ag allocated */ | ||
1132 | xfs_bmap_free_t flist; /* freed extent records */ | ||
1133 | xfs_mount_t *mp; /* mount structure */ | ||
1134 | xfs_trans_t *tp; /* transaction pointer */ | ||
1135 | int blks; /* space reservation */ | ||
1136 | int version = 1; /* superblock attr version */ | ||
1137 | int committed; /* xaction was committed */ | ||
1138 | int logflags; /* logging flags */ | ||
1139 | int error; /* error return value */ | ||
1140 | int cancel_flags = 0; | ||
1141 | |||
1142 | ASSERT(XFS_IFORK_Q(ip) == 0); | ||
1143 | |||
1144 | mp = ip->i_mount; | ||
1145 | ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); | ||
1146 | tp = xfs_trans_alloc(mp, XFS_TRANS_ADDAFORK); | ||
1147 | blks = XFS_ADDAFORK_SPACE_RES(mp); | ||
1148 | if (rsvd) | ||
1149 | tp->t_flags |= XFS_TRANS_RESERVE; | ||
1150 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_addafork, blks, 0); | ||
1151 | if (error) { | ||
1152 | xfs_trans_cancel(tp, 0); | ||
1153 | return error; | ||
1154 | } | ||
1155 | cancel_flags = XFS_TRANS_RELEASE_LOG_RES; | ||
1156 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
1157 | error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ? | ||
1158 | XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES : | ||
1159 | XFS_QMOPT_RES_REGBLKS); | ||
1160 | if (error) | ||
1161 | goto trans_cancel; | ||
1162 | cancel_flags |= XFS_TRANS_ABORT; | ||
1163 | if (XFS_IFORK_Q(ip)) | ||
1164 | goto trans_cancel; | ||
1165 | if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) { | ||
1166 | /* | ||
1167 | * For inodes coming from pre-6.2 filesystems. | ||
1168 | */ | ||
1169 | ASSERT(ip->i_d.di_aformat == 0); | ||
1170 | ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; | ||
1171 | } | ||
1172 | ASSERT(ip->i_d.di_anextents == 0); | ||
1173 | |||
1174 | xfs_trans_ijoin(tp, ip, 0); | ||
1175 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | ||
1176 | |||
1177 | switch (ip->i_d.di_format) { | ||
1178 | case XFS_DINODE_FMT_DEV: | ||
1179 | ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3; | ||
1180 | break; | ||
1181 | case XFS_DINODE_FMT_UUID: | ||
1182 | ip->i_d.di_forkoff = roundup(sizeof(uuid_t), 8) >> 3; | ||
1183 | break; | ||
1184 | case XFS_DINODE_FMT_LOCAL: | ||
1185 | case XFS_DINODE_FMT_EXTENTS: | ||
1186 | case XFS_DINODE_FMT_BTREE: | ||
1187 | ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size); | ||
1188 | if (!ip->i_d.di_forkoff) | ||
1189 | ip->i_d.di_forkoff = xfs_default_attroffset(ip) >> 3; | ||
1190 | else if (mp->m_flags & XFS_MOUNT_ATTR2) | ||
1191 | version = 2; | ||
1192 | break; | ||
1193 | default: | ||
1194 | ASSERT(0); | ||
1195 | error = -EINVAL; | ||
1196 | goto trans_cancel; | ||
1197 | } | ||
1198 | |||
1199 | ASSERT(ip->i_afp == NULL); | ||
1200 | ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP); | ||
1201 | ip->i_afp->if_flags = XFS_IFEXTENTS; | ||
1202 | logflags = 0; | ||
1203 | xfs_bmap_init(&flist, &firstblock); | ||
1204 | switch (ip->i_d.di_format) { | ||
1205 | case XFS_DINODE_FMT_LOCAL: | ||
1206 | error = xfs_bmap_add_attrfork_local(tp, ip, &firstblock, &flist, | ||
1207 | &logflags); | ||
1208 | break; | ||
1209 | case XFS_DINODE_FMT_EXTENTS: | ||
1210 | error = xfs_bmap_add_attrfork_extents(tp, ip, &firstblock, | ||
1211 | &flist, &logflags); | ||
1212 | break; | ||
1213 | case XFS_DINODE_FMT_BTREE: | ||
1214 | error = xfs_bmap_add_attrfork_btree(tp, ip, &firstblock, &flist, | ||
1215 | &logflags); | ||
1216 | break; | ||
1217 | default: | ||
1218 | error = 0; | ||
1219 | break; | ||
1220 | } | ||
1221 | if (logflags) | ||
1222 | xfs_trans_log_inode(tp, ip, logflags); | ||
1223 | if (error) | ||
1224 | goto bmap_cancel; | ||
1225 | if (!xfs_sb_version_hasattr(&mp->m_sb) || | ||
1226 | (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) { | ||
1227 | __int64_t sbfields = 0; | ||
1228 | |||
1229 | spin_lock(&mp->m_sb_lock); | ||
1230 | if (!xfs_sb_version_hasattr(&mp->m_sb)) { | ||
1231 | xfs_sb_version_addattr(&mp->m_sb); | ||
1232 | sbfields |= XFS_SB_VERSIONNUM; | ||
1233 | } | ||
1234 | if (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2) { | ||
1235 | xfs_sb_version_addattr2(&mp->m_sb); | ||
1236 | sbfields |= (XFS_SB_VERSIONNUM | XFS_SB_FEATURES2); | ||
1237 | } | ||
1238 | if (sbfields) { | ||
1239 | spin_unlock(&mp->m_sb_lock); | ||
1240 | xfs_mod_sb(tp, sbfields); | ||
1241 | } else | ||
1242 | spin_unlock(&mp->m_sb_lock); | ||
1243 | } | ||
1244 | |||
1245 | error = xfs_bmap_finish(&tp, &flist, &committed); | ||
1246 | if (error) | ||
1247 | goto bmap_cancel; | ||
1248 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); | ||
1249 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
1250 | return error; | ||
1251 | |||
1252 | bmap_cancel: | ||
1253 | xfs_bmap_cancel(&flist); | ||
1254 | trans_cancel: | ||
1255 | xfs_trans_cancel(tp, cancel_flags); | ||
1256 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
1257 | return error; | ||
1258 | } | ||
1259 | |||
1260 | /* | ||
1261 | * Internal and external extent tree search functions. | ||
1262 | */ | ||
1263 | |||
1264 | /* | ||
1265 | * Read in the extents to if_extents. | ||
1266 | * All inode fields are set up by caller, we just traverse the btree | ||
1267 | * and copy the records in. If the file system cannot contain unwritten | ||
1268 | * extents, the records are checked for no "state" flags. | ||
1269 | */ | ||
1270 | int /* error */ | ||
1271 | xfs_bmap_read_extents( | ||
1272 | xfs_trans_t *tp, /* transaction pointer */ | ||
1273 | xfs_inode_t *ip, /* incore inode */ | ||
1274 | int whichfork) /* data or attr fork */ | ||
1275 | { | ||
1276 | struct xfs_btree_block *block; /* current btree block */ | ||
1277 | xfs_fsblock_t bno; /* block # of "block" */ | ||
1278 | xfs_buf_t *bp; /* buffer for "block" */ | ||
1279 | int error; /* error return value */ | ||
1280 | xfs_exntfmt_t exntf; /* XFS_EXTFMT_NOSTATE, if checking */ | ||
1281 | xfs_extnum_t i, j; /* index into the extents list */ | ||
1282 | xfs_ifork_t *ifp; /* fork structure */ | ||
1283 | int level; /* btree level, for checking */ | ||
1284 | xfs_mount_t *mp; /* file system mount structure */ | ||
1285 | __be64 *pp; /* pointer to block address */ | ||
1286 | /* REFERENCED */ | ||
1287 | xfs_extnum_t room; /* number of entries there's room for */ | ||
1288 | |||
1289 | bno = NULLFSBLOCK; | ||
1290 | mp = ip->i_mount; | ||
1291 | ifp = XFS_IFORK_PTR(ip, whichfork); | ||
1292 | exntf = (whichfork != XFS_DATA_FORK) ? XFS_EXTFMT_NOSTATE : | ||
1293 | XFS_EXTFMT_INODE(ip); | ||
1294 | block = ifp->if_broot; | ||
1295 | /* | ||
1296 | * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out. | ||
1297 | */ | ||
1298 | level = be16_to_cpu(block->bb_level); | ||
1299 | ASSERT(level > 0); | ||
1300 | pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); | ||
1301 | bno = be64_to_cpu(*pp); | ||
1302 | ASSERT(bno != NULLDFSBNO); | ||
1303 | ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); | ||
1304 | ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks); | ||
1305 | /* | ||
1306 | * Go down the tree until leaf level is reached, following the first | ||
1307 | * pointer (leftmost) at each level. | ||
1308 | */ | ||
1309 | while (level-- > 0) { | ||
1310 | error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, | ||
1311 | XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); | ||
1312 | if (error) | ||
1313 | return error; | ||
1314 | block = XFS_BUF_TO_BLOCK(bp); | ||
1315 | XFS_WANT_CORRUPTED_GOTO( | ||
1316 | xfs_bmap_sanity_check(mp, bp, level), | ||
1317 | error0); | ||
1318 | if (level == 0) | ||
1319 | break; | ||
1320 | pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); | ||
1321 | bno = be64_to_cpu(*pp); | ||
1322 | XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0); | ||
1323 | xfs_trans_brelse(tp, bp); | ||
1324 | } | ||
1325 | /* | ||
1326 | * Here with bp and block set to the leftmost leaf node in the tree. | ||
1327 | */ | ||
1328 | room = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); | ||
1329 | i = 0; | ||
1330 | /* | ||
1331 | * Loop over all leaf nodes. Copy information to the extent records. | ||
1332 | */ | ||
1333 | for (;;) { | ||
1334 | xfs_bmbt_rec_t *frp; | ||
1335 | xfs_fsblock_t nextbno; | ||
1336 | xfs_extnum_t num_recs; | ||
1337 | xfs_extnum_t start; | ||
1338 | |||
1339 | num_recs = xfs_btree_get_numrecs(block); | ||
1340 | if (unlikely(i + num_recs > room)) { | ||
1341 | ASSERT(i + num_recs <= room); | ||
1342 | xfs_warn(ip->i_mount, | ||
1343 | "corrupt dinode %Lu, (btree extents).", | ||
1344 | (unsigned long long) ip->i_ino); | ||
1345 | XFS_CORRUPTION_ERROR("xfs_bmap_read_extents(1)", | ||
1346 | XFS_ERRLEVEL_LOW, ip->i_mount, block); | ||
1347 | goto error0; | ||
1348 | } | ||
1349 | XFS_WANT_CORRUPTED_GOTO( | ||
1350 | xfs_bmap_sanity_check(mp, bp, 0), | ||
1351 | error0); | ||
1352 | /* | ||
1353 | * Read-ahead the next leaf block, if any. | ||
1354 | */ | ||
1355 | nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); | ||
1356 | if (nextbno != NULLFSBLOCK) | ||
1357 | xfs_btree_reada_bufl(mp, nextbno, 1, | ||
1358 | &xfs_bmbt_buf_ops); | ||
1359 | /* | ||
1360 | * Copy records into the extent records. | ||
1361 | */ | ||
1362 | frp = XFS_BMBT_REC_ADDR(mp, block, 1); | ||
1363 | start = i; | ||
1364 | for (j = 0; j < num_recs; j++, i++, frp++) { | ||
1365 | xfs_bmbt_rec_host_t *trp = xfs_iext_get_ext(ifp, i); | ||
1366 | trp->l0 = be64_to_cpu(frp->l0); | ||
1367 | trp->l1 = be64_to_cpu(frp->l1); | ||
1368 | } | ||
1369 | if (exntf == XFS_EXTFMT_NOSTATE) { | ||
1370 | /* | ||
1371 | * Check all attribute bmap btree records and | ||
1372 | * any "older" data bmap btree records for a | ||
1373 | * set bit in the "extent flag" position. | ||
1374 | */ | ||
1375 | if (unlikely(xfs_check_nostate_extents(ifp, | ||
1376 | start, num_recs))) { | ||
1377 | XFS_ERROR_REPORT("xfs_bmap_read_extents(2)", | ||
1378 | XFS_ERRLEVEL_LOW, | ||
1379 | ip->i_mount); | ||
1380 | goto error0; | ||
1381 | } | ||
1382 | } | ||
1383 | xfs_trans_brelse(tp, bp); | ||
1384 | bno = nextbno; | ||
1385 | /* | ||
1386 | * If we've reached the end, stop. | ||
1387 | */ | ||
1388 | if (bno == NULLFSBLOCK) | ||
1389 | break; | ||
1390 | error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, | ||
1391 | XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); | ||
1392 | if (error) | ||
1393 | return error; | ||
1394 | block = XFS_BUF_TO_BLOCK(bp); | ||
1395 | } | ||
1396 | ASSERT(i == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))); | ||
1397 | ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork)); | ||
1398 | XFS_BMAP_TRACE_EXLIST(ip, i, whichfork); | ||
1399 | return 0; | ||
1400 | error0: | ||
1401 | xfs_trans_brelse(tp, bp); | ||
1402 | return -EFSCORRUPTED; | ||
1403 | } | ||
1404 | |||
1405 | |||
1406 | /* | ||
1407 | * Search the extent records for the entry containing block bno. | ||
1408 | * If bno lies in a hole, point to the next entry. If bno lies | ||
1409 | * past eof, *eofp will be set, and *prevp will contain the last | ||
1410 | * entry (null if none). Else, *lastxp will be set to the index | ||
1411 | * of the found entry; *gotp will contain the entry. | ||
1412 | */ | ||
1413 | STATIC xfs_bmbt_rec_host_t * /* pointer to found extent entry */ | ||
1414 | xfs_bmap_search_multi_extents( | ||
1415 | xfs_ifork_t *ifp, /* inode fork pointer */ | ||
1416 | xfs_fileoff_t bno, /* block number searched for */ | ||
1417 | int *eofp, /* out: end of file found */ | ||
1418 | xfs_extnum_t *lastxp, /* out: last extent index */ | ||
1419 | xfs_bmbt_irec_t *gotp, /* out: extent entry found */ | ||
1420 | xfs_bmbt_irec_t *prevp) /* out: previous extent entry found */ | ||
1421 | { | ||
1422 | xfs_bmbt_rec_host_t *ep; /* extent record pointer */ | ||
1423 | xfs_extnum_t lastx; /* last extent index */ | ||
1424 | |||
1425 | /* | ||
1426 | * Initialize the extent entry structure to catch access to | ||
1427 | * uninitialized br_startblock field. | ||
1428 | */ | ||
1429 | gotp->br_startoff = 0xffa5a5a5a5a5a5a5LL; | ||
1430 | gotp->br_blockcount = 0xa55a5a5a5a5a5a5aLL; | ||
1431 | gotp->br_state = XFS_EXT_INVALID; | ||
1432 | #if XFS_BIG_BLKNOS | ||
1433 | gotp->br_startblock = 0xffffa5a5a5a5a5a5LL; | ||
1434 | #else | ||
1435 | gotp->br_startblock = 0xffffa5a5; | ||
1436 | #endif | ||
1437 | prevp->br_startoff = NULLFILEOFF; | ||
1438 | |||
1439 | ep = xfs_iext_bno_to_ext(ifp, bno, &lastx); | ||
1440 | if (lastx > 0) { | ||
1441 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx - 1), prevp); | ||
1442 | } | ||
1443 | if (lastx < (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))) { | ||
1444 | xfs_bmbt_get_all(ep, gotp); | ||
1445 | *eofp = 0; | ||
1446 | } else { | ||
1447 | if (lastx > 0) { | ||
1448 | *gotp = *prevp; | ||
1449 | } | ||
1450 | *eofp = 1; | ||
1451 | ep = NULL; | ||
1452 | } | ||
1453 | *lastxp = lastx; | ||
1454 | return ep; | ||
1455 | } | ||
1456 | |||
1457 | /* | ||
1458 | * Search the extents list for the inode, for the extent containing bno. | ||
1459 | * If bno lies in a hole, point to the next entry. If bno lies past eof, | ||
1460 | * *eofp will be set, and *prevp will contain the last entry (null if none). | ||
1461 | * Else, *lastxp will be set to the index of the found | ||
1462 | * entry; *gotp will contain the entry. | ||
1463 | */ | ||
1464 | STATIC xfs_bmbt_rec_host_t * /* pointer to found extent entry */ | ||
1465 | xfs_bmap_search_extents( | ||
1466 | xfs_inode_t *ip, /* incore inode pointer */ | ||
1467 | xfs_fileoff_t bno, /* block number searched for */ | ||
1468 | int fork, /* data or attr fork */ | ||
1469 | int *eofp, /* out: end of file found */ | ||
1470 | xfs_extnum_t *lastxp, /* out: last extent index */ | ||
1471 | xfs_bmbt_irec_t *gotp, /* out: extent entry found */ | ||
1472 | xfs_bmbt_irec_t *prevp) /* out: previous extent entry found */ | ||
1473 | { | ||
1474 | xfs_ifork_t *ifp; /* inode fork pointer */ | ||
1475 | xfs_bmbt_rec_host_t *ep; /* extent record pointer */ | ||
1476 | |||
1477 | XFS_STATS_INC(xs_look_exlist); | ||
1478 | ifp = XFS_IFORK_PTR(ip, fork); | ||
1479 | |||
1480 | ep = xfs_bmap_search_multi_extents(ifp, bno, eofp, lastxp, gotp, prevp); | ||
1481 | |||
1482 | if (unlikely(!(gotp->br_startblock) && (*lastxp != NULLEXTNUM) && | ||
1483 | !(XFS_IS_REALTIME_INODE(ip) && fork == XFS_DATA_FORK))) { | ||
1484 | xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO, | ||
1485 | "Access to block zero in inode %llu " | ||
1486 | "start_block: %llx start_off: %llx " | ||
1487 | "blkcnt: %llx extent-state: %x lastx: %x", | ||
1488 | (unsigned long long)ip->i_ino, | ||
1489 | (unsigned long long)gotp->br_startblock, | ||
1490 | (unsigned long long)gotp->br_startoff, | ||
1491 | (unsigned long long)gotp->br_blockcount, | ||
1492 | gotp->br_state, *lastxp); | ||
1493 | *lastxp = NULLEXTNUM; | ||
1494 | *eofp = 1; | ||
1495 | return NULL; | ||
1496 | } | ||
1497 | return ep; | ||
1498 | } | ||
1499 | |||
1500 | /* | ||
1501 | * Returns the file-relative block number of the first unused block(s) | ||
1502 | * in the file with at least "len" logically contiguous blocks free. | ||
1503 | * This is the lowest-address hole if the file has holes, else the first block | ||
1504 | * past the end of file. | ||
1505 | * Return 0 if the file is currently local (in-inode). | ||
1506 | */ | ||
1507 | int /* error */ | ||
1508 | xfs_bmap_first_unused( | ||
1509 | xfs_trans_t *tp, /* transaction pointer */ | ||
1510 | xfs_inode_t *ip, /* incore inode */ | ||
1511 | xfs_extlen_t len, /* size of hole to find */ | ||
1512 | xfs_fileoff_t *first_unused, /* unused block */ | ||
1513 | int whichfork) /* data or attr fork */ | ||
1514 | { | ||
1515 | int error; /* error return value */ | ||
1516 | int idx; /* extent record index */ | ||
1517 | xfs_ifork_t *ifp; /* inode fork pointer */ | ||
1518 | xfs_fileoff_t lastaddr; /* last block number seen */ | ||
1519 | xfs_fileoff_t lowest; /* lowest useful block */ | ||
1520 | xfs_fileoff_t max; /* starting useful block */ | ||
1521 | xfs_fileoff_t off; /* offset for this block */ | ||
1522 | xfs_extnum_t nextents; /* number of extent entries */ | ||
1523 | |||
1524 | ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE || | ||
1525 | XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS || | ||
1526 | XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL); | ||
1527 | if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { | ||
1528 | *first_unused = 0; | ||
1529 | return 0; | ||
1530 | } | ||
1531 | ifp = XFS_IFORK_PTR(ip, whichfork); | ||
1532 | if (!(ifp->if_flags & XFS_IFEXTENTS) && | ||
1533 | (error = xfs_iread_extents(tp, ip, whichfork))) | ||
1534 | return error; | ||
1535 | lowest = *first_unused; | ||
1536 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); | ||
1537 | for (idx = 0, lastaddr = 0, max = lowest; idx < nextents; idx++) { | ||
1538 | xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, idx); | ||
1539 | off = xfs_bmbt_get_startoff(ep); | ||
1540 | /* | ||
1541 | * See if the hole before this extent will work. | ||
1542 | */ | ||
1543 | if (off >= lowest + len && off - max >= len) { | ||
1544 | *first_unused = max; | ||
1545 | return 0; | ||
1546 | } | ||
1547 | lastaddr = off + xfs_bmbt_get_blockcount(ep); | ||
1548 | max = XFS_FILEOFF_MAX(lastaddr, lowest); | ||
1549 | } | ||
1550 | *first_unused = max; | ||
1551 | return 0; | ||
1552 | } | ||
1553 | |||
1554 | /* | ||
1555 | * Returns the file-relative block number of the last block - 1 before | ||
1556 | * last_block (input value) in the file. | ||
1557 | * This is not based on i_size, it is based on the extent records. | ||
1558 | * Returns 0 for local files, as they do not have extent records. | ||
1559 | */ | ||
1560 | int /* error */ | ||
1561 | xfs_bmap_last_before( | ||
1562 | xfs_trans_t *tp, /* transaction pointer */ | ||
1563 | xfs_inode_t *ip, /* incore inode */ | ||
1564 | xfs_fileoff_t *last_block, /* last block */ | ||
1565 | int whichfork) /* data or attr fork */ | ||
1566 | { | ||
1567 | xfs_fileoff_t bno; /* input file offset */ | ||
1568 | int eof; /* hit end of file */ | ||
1569 | xfs_bmbt_rec_host_t *ep; /* pointer to last extent */ | ||
1570 | int error; /* error return value */ | ||
1571 | xfs_bmbt_irec_t got; /* current extent value */ | ||
1572 | xfs_ifork_t *ifp; /* inode fork pointer */ | ||
1573 | xfs_extnum_t lastx; /* last extent used */ | ||
1574 | xfs_bmbt_irec_t prev; /* previous extent value */ | ||
1575 | |||
1576 | if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE && | ||
1577 | XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && | ||
1578 | XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL) | ||
1579 | return -EIO; | ||
1580 | if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { | ||
1581 | *last_block = 0; | ||
1582 | return 0; | ||
1583 | } | ||
1584 | ifp = XFS_IFORK_PTR(ip, whichfork); | ||
1585 | if (!(ifp->if_flags & XFS_IFEXTENTS) && | ||
1586 | (error = xfs_iread_extents(tp, ip, whichfork))) | ||
1587 | return error; | ||
1588 | bno = *last_block - 1; | ||
1589 | ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, | ||
1590 | &prev); | ||
1591 | if (eof || xfs_bmbt_get_startoff(ep) > bno) { | ||
1592 | if (prev.br_startoff == NULLFILEOFF) | ||
1593 | *last_block = 0; | ||
1594 | else | ||
1595 | *last_block = prev.br_startoff + prev.br_blockcount; | ||
1596 | } | ||
1597 | /* | ||
1598 | * Otherwise *last_block is already the right answer. | ||
1599 | */ | ||
1600 | return 0; | ||
1601 | } | ||
1602 | |||
1603 | int | ||
1604 | xfs_bmap_last_extent( | ||
1605 | struct xfs_trans *tp, | ||
1606 | struct xfs_inode *ip, | ||
1607 | int whichfork, | ||
1608 | struct xfs_bmbt_irec *rec, | ||
1609 | int *is_empty) | ||
1610 | { | ||
1611 | struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); | ||
1612 | int error; | ||
1613 | int nextents; | ||
1614 | |||
1615 | if (!(ifp->if_flags & XFS_IFEXTENTS)) { | ||
1616 | error = xfs_iread_extents(tp, ip, whichfork); | ||
1617 | if (error) | ||
1618 | return error; | ||
1619 | } | ||
1620 | |||
1621 | nextents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); | ||
1622 | if (nextents == 0) { | ||
1623 | *is_empty = 1; | ||
1624 | return 0; | ||
1625 | } | ||
1626 | |||
1627 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, nextents - 1), rec); | ||
1628 | *is_empty = 0; | ||
1629 | return 0; | ||
1630 | } | ||
1631 | |||
1632 | /* | ||
1633 | * Check the last inode extent to determine whether this allocation will result | ||
1634 | * in blocks being allocated at the end of the file. When we allocate new data | ||
1635 | * blocks at the end of the file which do not start at the previous data block, | ||
1636 | * we will try to align the new blocks at stripe unit boundaries. | ||
1637 | * | ||
1638 | * Returns 1 in bma->aeof if the file (fork) is empty as any new write will be | ||
1639 | * at, or past the EOF. | ||
1640 | */ | ||
1641 | STATIC int | ||
1642 | xfs_bmap_isaeof( | ||
1643 | struct xfs_bmalloca *bma, | ||
1644 | int whichfork) | ||
1645 | { | ||
1646 | struct xfs_bmbt_irec rec; | ||
1647 | int is_empty; | ||
1648 | int error; | ||
1649 | |||
1650 | bma->aeof = 0; | ||
1651 | error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec, | ||
1652 | &is_empty); | ||
1653 | if (error) | ||
1654 | return error; | ||
1655 | |||
1656 | if (is_empty) { | ||
1657 | bma->aeof = 1; | ||
1658 | return 0; | ||
1659 | } | ||
1660 | |||
1661 | /* | ||
1662 | * Check if we are allocation or past the last extent, or at least into | ||
1663 | * the last delayed allocated extent. | ||
1664 | */ | ||
1665 | bma->aeof = bma->offset >= rec.br_startoff + rec.br_blockcount || | ||
1666 | (bma->offset >= rec.br_startoff && | ||
1667 | isnullstartblock(rec.br_startblock)); | ||
1668 | return 0; | ||
1669 | } | ||
1670 | |||
1671 | /* | ||
1672 | * Returns the file-relative block number of the first block past eof in | ||
1673 | * the file. This is not based on i_size, it is based on the extent records. | ||
1674 | * Returns 0 for local files, as they do not have extent records. | ||
1675 | */ | ||
1676 | int | ||
1677 | xfs_bmap_last_offset( | ||
1678 | struct xfs_inode *ip, | ||
1679 | xfs_fileoff_t *last_block, | ||
1680 | int whichfork) | ||
1681 | { | ||
1682 | struct xfs_bmbt_irec rec; | ||
1683 | int is_empty; | ||
1684 | int error; | ||
1685 | |||
1686 | *last_block = 0; | ||
1687 | |||
1688 | if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) | ||
1689 | return 0; | ||
1690 | |||
1691 | if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE && | ||
1692 | XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) | ||
1693 | return -EIO; | ||
1694 | |||
1695 | error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty); | ||
1696 | if (error || is_empty) | ||
1697 | return error; | ||
1698 | |||
1699 | *last_block = rec.br_startoff + rec.br_blockcount; | ||
1700 | return 0; | ||
1701 | } | ||
1702 | |||
1703 | /* | ||
1704 | * Returns whether the selected fork of the inode has exactly one | ||
1705 | * block or not. For the data fork we check this matches di_size, | ||
1706 | * implying the file's range is 0..bsize-1. | ||
1707 | */ | ||
1708 | int /* 1=>1 block, 0=>otherwise */ | ||
1709 | xfs_bmap_one_block( | ||
1710 | xfs_inode_t *ip, /* incore inode */ | ||
1711 | int whichfork) /* data or attr fork */ | ||
1712 | { | ||
1713 | xfs_bmbt_rec_host_t *ep; /* ptr to fork's extent */ | ||
1714 | xfs_ifork_t *ifp; /* inode fork pointer */ | ||
1715 | int rval; /* return value */ | ||
1716 | xfs_bmbt_irec_t s; /* internal version of extent */ | ||
1717 | |||
1718 | #ifndef DEBUG | ||
1719 | if (whichfork == XFS_DATA_FORK) | ||
1720 | return XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize; | ||
1721 | #endif /* !DEBUG */ | ||
1722 | if (XFS_IFORK_NEXTENTS(ip, whichfork) != 1) | ||
1723 | return 0; | ||
1724 | if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) | ||
1725 | return 0; | ||
1726 | ifp = XFS_IFORK_PTR(ip, whichfork); | ||
1727 | ASSERT(ifp->if_flags & XFS_IFEXTENTS); | ||
1728 | ep = xfs_iext_get_ext(ifp, 0); | ||
1729 | xfs_bmbt_get_all(ep, &s); | ||
1730 | rval = s.br_startoff == 0 && s.br_blockcount == 1; | ||
1731 | if (rval && whichfork == XFS_DATA_FORK) | ||
1732 | ASSERT(XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize); | ||
1733 | return rval; | ||
1734 | } | ||
1735 | |||
1736 | /* | ||
1737 | * Extent tree manipulation functions used during allocation. | ||
1738 | */ | ||
1739 | |||
1740 | /* | ||
1741 | * Convert a delayed allocation to a real allocation. | ||
1742 | */ | ||
1743 | STATIC int /* error */ | ||
1744 | xfs_bmap_add_extent_delay_real( | ||
1745 | struct xfs_bmalloca *bma) | ||
1746 | { | ||
1747 | struct xfs_bmbt_irec *new = &bma->got; | ||
1748 | int diff; /* temp value */ | ||
1749 | xfs_bmbt_rec_host_t *ep; /* extent entry for idx */ | ||
1750 | int error; /* error return value */ | ||
1751 | int i; /* temp state */ | ||
1752 | xfs_ifork_t *ifp; /* inode fork pointer */ | ||
1753 | xfs_fileoff_t new_endoff; /* end offset of new entry */ | ||
1754 | xfs_bmbt_irec_t r[3]; /* neighbor extent entries */ | ||
1755 | /* left is 0, right is 1, prev is 2 */ | ||
1756 | int rval=0; /* return value (logging flags) */ | ||
1757 | int state = 0;/* state bits, accessed thru macros */ | ||
1758 | xfs_filblks_t da_new; /* new count del alloc blocks used */ | ||
1759 | xfs_filblks_t da_old; /* old count del alloc blocks used */ | ||
1760 | xfs_filblks_t temp=0; /* value for da_new calculations */ | ||
1761 | xfs_filblks_t temp2=0;/* value for da_new calculations */ | ||
1762 | int tmp_rval; /* partial logging flags */ | ||
1763 | |||
1764 | ifp = XFS_IFORK_PTR(bma->ip, XFS_DATA_FORK); | ||
1765 | |||
1766 | ASSERT(bma->idx >= 0); | ||
1767 | ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); | ||
1768 | ASSERT(!isnullstartblock(new->br_startblock)); | ||
1769 | ASSERT(!bma->cur || | ||
1770 | (bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); | ||
1771 | |||
1772 | XFS_STATS_INC(xs_add_exlist); | ||
1773 | |||
1774 | #define LEFT r[0] | ||
1775 | #define RIGHT r[1] | ||
1776 | #define PREV r[2] | ||
1777 | |||
1778 | /* | ||
1779 | * Set up a bunch of variables to make the tests simpler. | ||
1780 | */ | ||
1781 | ep = xfs_iext_get_ext(ifp, bma->idx); | ||
1782 | xfs_bmbt_get_all(ep, &PREV); | ||
1783 | new_endoff = new->br_startoff + new->br_blockcount; | ||
1784 | ASSERT(PREV.br_startoff <= new->br_startoff); | ||
1785 | ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff); | ||
1786 | |||
1787 | da_old = startblockval(PREV.br_startblock); | ||
1788 | da_new = 0; | ||
1789 | |||
1790 | /* | ||
1791 | * Set flags determining what part of the previous delayed allocation | ||
1792 | * extent is being replaced by a real allocation. | ||
1793 | */ | ||
1794 | if (PREV.br_startoff == new->br_startoff) | ||
1795 | state |= BMAP_LEFT_FILLING; | ||
1796 | if (PREV.br_startoff + PREV.br_blockcount == new_endoff) | ||
1797 | state |= BMAP_RIGHT_FILLING; | ||
1798 | |||
1799 | /* | ||
1800 | * Check and set flags if this segment has a left neighbor. | ||
1801 | * Don't set contiguous if the combined extent would be too large. | ||
1802 | */ | ||
1803 | if (bma->idx > 0) { | ||
1804 | state |= BMAP_LEFT_VALID; | ||
1805 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), &LEFT); | ||
1806 | |||
1807 | if (isnullstartblock(LEFT.br_startblock)) | ||
1808 | state |= BMAP_LEFT_DELAY; | ||
1809 | } | ||
1810 | |||
1811 | if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) && | ||
1812 | LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff && | ||
1813 | LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock && | ||
1814 | LEFT.br_state == new->br_state && | ||
1815 | LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN) | ||
1816 | state |= BMAP_LEFT_CONTIG; | ||
1817 | |||
1818 | /* | ||
1819 | * Check and set flags if this segment has a right neighbor. | ||
1820 | * Don't set contiguous if the combined extent would be too large. | ||
1821 | * Also check for all-three-contiguous being too large. | ||
1822 | */ | ||
1823 | if (bma->idx < bma->ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) { | ||
1824 | state |= BMAP_RIGHT_VALID; | ||
1825 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx + 1), &RIGHT); | ||
1826 | |||
1827 | if (isnullstartblock(RIGHT.br_startblock)) | ||
1828 | state |= BMAP_RIGHT_DELAY; | ||
1829 | } | ||
1830 | |||
1831 | if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) && | ||
1832 | new_endoff == RIGHT.br_startoff && | ||
1833 | new->br_startblock + new->br_blockcount == RIGHT.br_startblock && | ||
1834 | new->br_state == RIGHT.br_state && | ||
1835 | new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN && | ||
1836 | ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | | ||
1837 | BMAP_RIGHT_FILLING)) != | ||
1838 | (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | | ||
1839 | BMAP_RIGHT_FILLING) || | ||
1840 | LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount | ||
1841 | <= MAXEXTLEN)) | ||
1842 | state |= BMAP_RIGHT_CONTIG; | ||
1843 | |||
1844 | error = 0; | ||
1845 | /* | ||
1846 | * Switch out based on the FILLING and CONTIG state bits. | ||
1847 | */ | ||
1848 | switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | | ||
1849 | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) { | ||
1850 | case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | | ||
1851 | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: | ||
1852 | /* | ||
1853 | * Filling in all of a previously delayed allocation extent. | ||
1854 | * The left and right neighbors are both contiguous with new. | ||
1855 | */ | ||
1856 | bma->idx--; | ||
1857 | trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); | ||
1858 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx), | ||
1859 | LEFT.br_blockcount + PREV.br_blockcount + | ||
1860 | RIGHT.br_blockcount); | ||
1861 | trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); | ||
1862 | |||
1863 | xfs_iext_remove(bma->ip, bma->idx + 1, 2, state); | ||
1864 | bma->ip->i_d.di_nextents--; | ||
1865 | if (bma->cur == NULL) | ||
1866 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; | ||
1867 | else { | ||
1868 | rval = XFS_ILOG_CORE; | ||
1869 | error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff, | ||
1870 | RIGHT.br_startblock, | ||
1871 | RIGHT.br_blockcount, &i); | ||
1872 | if (error) | ||
1873 | goto done; | ||
1874 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | ||
1875 | error = xfs_btree_delete(bma->cur, &i); | ||
1876 | if (error) | ||
1877 | goto done; | ||
1878 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | ||
1879 | error = xfs_btree_decrement(bma->cur, 0, &i); | ||
1880 | if (error) | ||
1881 | goto done; | ||
1882 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | ||
1883 | error = xfs_bmbt_update(bma->cur, LEFT.br_startoff, | ||
1884 | LEFT.br_startblock, | ||
1885 | LEFT.br_blockcount + | ||
1886 | PREV.br_blockcount + | ||
1887 | RIGHT.br_blockcount, LEFT.br_state); | ||
1888 | if (error) | ||
1889 | goto done; | ||
1890 | } | ||
1891 | break; | ||
1892 | |||
1893 | case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: | ||
1894 | /* | ||
1895 | * Filling in all of a previously delayed allocation extent. | ||
1896 | * The left neighbor is contiguous, the right is not. | ||
1897 | */ | ||
1898 | bma->idx--; | ||
1899 | |||
1900 | trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); | ||
1901 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx), | ||
1902 | LEFT.br_blockcount + PREV.br_blockcount); | ||
1903 | trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); | ||
1904 | |||
1905 | xfs_iext_remove(bma->ip, bma->idx + 1, 1, state); | ||
1906 | if (bma->cur == NULL) | ||
1907 | rval = XFS_ILOG_DEXT; | ||
1908 | else { | ||
1909 | rval = 0; | ||
1910 | error = xfs_bmbt_lookup_eq(bma->cur, LEFT.br_startoff, | ||
1911 | LEFT.br_startblock, LEFT.br_blockcount, | ||
1912 | &i); | ||
1913 | if (error) | ||
1914 | goto done; | ||
1915 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | ||
1916 | error = xfs_bmbt_update(bma->cur, LEFT.br_startoff, | ||
1917 | LEFT.br_startblock, | ||
1918 | LEFT.br_blockcount + | ||
1919 | PREV.br_blockcount, LEFT.br_state); | ||
1920 | if (error) | ||
1921 | goto done; | ||
1922 | } | ||
1923 | break; | ||
1924 | |||
1925 | case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: | ||
1926 | /* | ||
1927 | * Filling in all of a previously delayed allocation extent. | ||
1928 | * The right neighbor is contiguous, the left is not. | ||
1929 | */ | ||
1930 | trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); | ||
1931 | xfs_bmbt_set_startblock(ep, new->br_startblock); | ||
1932 | xfs_bmbt_set_blockcount(ep, | ||
1933 | PREV.br_blockcount + RIGHT.br_blockcount); | ||
1934 | trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); | ||
1935 | |||
1936 | xfs_iext_remove(bma->ip, bma->idx + 1, 1, state); | ||
1937 | if (bma->cur == NULL) | ||
1938 | rval = XFS_ILOG_DEXT; | ||
1939 | else { | ||
1940 | rval = 0; | ||
1941 | error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff, | ||
1942 | RIGHT.br_startblock, | ||
1943 | RIGHT.br_blockcount, &i); | ||
1944 | if (error) | ||
1945 | goto done; | ||
1946 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | ||
1947 | error = xfs_bmbt_update(bma->cur, PREV.br_startoff, | ||
1948 | new->br_startblock, | ||
1949 | PREV.br_blockcount + | ||
1950 | RIGHT.br_blockcount, PREV.br_state); | ||
1951 | if (error) | ||
1952 | goto done; | ||
1953 | } | ||
1954 | break; | ||
1955 | |||
1956 | case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING: | ||
1957 | /* | ||
1958 | * Filling in all of a previously delayed allocation extent. | ||
1959 | * Neither the left nor right neighbors are contiguous with | ||
1960 | * the new one. | ||
1961 | */ | ||
1962 | trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); | ||
1963 | xfs_bmbt_set_startblock(ep, new->br_startblock); | ||
1964 | trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); | ||
1965 | |||
1966 | bma->ip->i_d.di_nextents++; | ||
1967 | if (bma->cur == NULL) | ||
1968 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; | ||
1969 | else { | ||
1970 | rval = XFS_ILOG_CORE; | ||
1971 | error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff, | ||
1972 | new->br_startblock, new->br_blockcount, | ||
1973 | &i); | ||
1974 | if (error) | ||
1975 | goto done; | ||
1976 | XFS_WANT_CORRUPTED_GOTO(i == 0, done); | ||
1977 | bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; | ||
1978 | error = xfs_btree_insert(bma->cur, &i); | ||
1979 | if (error) | ||
1980 | goto done; | ||
1981 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | ||
1982 | } | ||
1983 | break; | ||
1984 | |||
1985 | case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG: | ||
1986 | /* | ||
1987 | * Filling in the first part of a previous delayed allocation. | ||
1988 | * The left neighbor is contiguous. | ||
1989 | */ | ||
1990 | trace_xfs_bmap_pre_update(bma->ip, bma->idx - 1, state, _THIS_IP_); | ||
1991 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx - 1), | ||
1992 | LEFT.br_blockcount + new->br_blockcount); | ||
1993 | xfs_bmbt_set_startoff(ep, | ||
1994 | PREV.br_startoff + new->br_blockcount); | ||
1995 | trace_xfs_bmap_post_update(bma->ip, bma->idx - 1, state, _THIS_IP_); | ||
1996 | |||
1997 | temp = PREV.br_blockcount - new->br_blockcount; | ||
1998 | trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); | ||
1999 | xfs_bmbt_set_blockcount(ep, temp); | ||
2000 | if (bma->cur == NULL) | ||
2001 | rval = XFS_ILOG_DEXT; | ||
2002 | else { | ||
2003 | rval = 0; | ||
2004 | error = xfs_bmbt_lookup_eq(bma->cur, LEFT.br_startoff, | ||
2005 | LEFT.br_startblock, LEFT.br_blockcount, | ||
2006 | &i); | ||
2007 | if (error) | ||
2008 | goto done; | ||
2009 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | ||
2010 | error = xfs_bmbt_update(bma->cur, LEFT.br_startoff, | ||
2011 | LEFT.br_startblock, | ||
2012 | LEFT.br_blockcount + | ||
2013 | new->br_blockcount, | ||
2014 | LEFT.br_state); | ||
2015 | if (error) | ||
2016 | goto done; | ||
2017 | } | ||
2018 | da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), | ||
2019 | startblockval(PREV.br_startblock)); | ||
2020 | xfs_bmbt_set_startblock(ep, nullstartblock(da_new)); | ||
2021 | trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); | ||
2022 | |||
2023 | bma->idx--; | ||
2024 | break; | ||
2025 | |||
2026 | case BMAP_LEFT_FILLING: | ||
2027 | /* | ||
2028 | * Filling in the first part of a previous delayed allocation. | ||
2029 | * The left neighbor is not contiguous. | ||
2030 | */ | ||
2031 | trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); | ||
2032 | xfs_bmbt_set_startoff(ep, new_endoff); | ||
2033 | temp = PREV.br_blockcount - new->br_blockcount; | ||
2034 | xfs_bmbt_set_blockcount(ep, temp); | ||
2035 | xfs_iext_insert(bma->ip, bma->idx, 1, new, state); | ||
2036 | bma->ip->i_d.di_nextents++; | ||
2037 | if (bma->cur == NULL) | ||
2038 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; | ||
2039 | else { | ||
2040 | rval = XFS_ILOG_CORE; | ||
2041 | error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff, | ||
2042 | new->br_startblock, new->br_blockcount, | ||
2043 | &i); | ||
2044 | if (error) | ||
2045 | goto done; | ||
2046 | XFS_WANT_CORRUPTED_GOTO(i == 0, done); | ||
2047 | bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; | ||
2048 | error = xfs_btree_insert(bma->cur, &i); | ||
2049 | if (error) | ||
2050 | goto done; | ||
2051 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | ||
2052 | } | ||
2053 | |||
2054 | if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) { | ||
2055 | error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, | ||
2056 | bma->firstblock, bma->flist, | ||
2057 | &bma->cur, 1, &tmp_rval, XFS_DATA_FORK); | ||
2058 | rval |= tmp_rval; | ||
2059 | if (error) | ||
2060 | goto done; | ||
2061 | } | ||
2062 | da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), | ||
2063 | startblockval(PREV.br_startblock) - | ||
2064 | (bma->cur ? bma->cur->bc_private.b.allocated : 0)); | ||
2065 | ep = xfs_iext_get_ext(ifp, bma->idx + 1); | ||
2066 | xfs_bmbt_set_startblock(ep, nullstartblock(da_new)); | ||
2067 | trace_xfs_bmap_post_update(bma->ip, bma->idx + 1, state, _THIS_IP_); | ||
2068 | break; | ||
2069 | |||
2070 | case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: | ||
2071 | /* | ||
2072 | * Filling in the last part of a previous delayed allocation. | ||
2073 | * The right neighbor is contiguous with the new allocation. | ||
2074 | */ | ||
2075 | temp = PREV.br_blockcount - new->br_blockcount; | ||
2076 | trace_xfs_bmap_pre_update(bma->ip, bma->idx + 1, state, _THIS_IP_); | ||
2077 | xfs_bmbt_set_blockcount(ep, temp); | ||
2078 | xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, bma->idx + 1), | ||
2079 | new->br_startoff, new->br_startblock, | ||
2080 | new->br_blockcount + RIGHT.br_blockcount, | ||
2081 | RIGHT.br_state); | ||
2082 | trace_xfs_bmap_post_update(bma->ip, bma->idx + 1, state, _THIS_IP_); | ||
2083 | if (bma->cur == NULL) | ||
2084 | rval = XFS_ILOG_DEXT; | ||
2085 | else { | ||
2086 | rval = 0; | ||
2087 | error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff, | ||
2088 | RIGHT.br_startblock, | ||
2089 | RIGHT.br_blockcount, &i); | ||
2090 | if (error) | ||
2091 | goto done; | ||
2092 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | ||
2093 | error = xfs_bmbt_update(bma->cur, new->br_startoff, | ||
2094 | new->br_startblock, | ||
2095 | new->br_blockcount + | ||
2096 | RIGHT.br_blockcount, | ||
2097 | RIGHT.br_state); | ||
2098 | if (error) | ||
2099 | goto done; | ||
2100 | } | ||
2101 | |||
2102 | da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), | ||
2103 | startblockval(PREV.br_startblock)); | ||
2104 | trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); | ||
2105 | xfs_bmbt_set_startblock(ep, nullstartblock(da_new)); | ||
2106 | trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); | ||
2107 | |||
2108 | bma->idx++; | ||
2109 | break; | ||
2110 | |||
2111 | case BMAP_RIGHT_FILLING: | ||
2112 | /* | ||
2113 | * Filling in the last part of a previous delayed allocation. | ||
2114 | * The right neighbor is not contiguous. | ||
2115 | */ | ||
2116 | temp = PREV.br_blockcount - new->br_blockcount; | ||
2117 | trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); | ||
2118 | xfs_bmbt_set_blockcount(ep, temp); | ||
2119 | xfs_iext_insert(bma->ip, bma->idx + 1, 1, new, state); | ||
2120 | bma->ip->i_d.di_nextents++; | ||
2121 | if (bma->cur == NULL) | ||
2122 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; | ||
2123 | else { | ||
2124 | rval = XFS_ILOG_CORE; | ||
2125 | error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff, | ||
2126 | new->br_startblock, new->br_blockcount, | ||
2127 | &i); | ||
2128 | if (error) | ||
2129 | goto done; | ||
2130 | XFS_WANT_CORRUPTED_GOTO(i == 0, done); | ||
2131 | bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; | ||
2132 | error = xfs_btree_insert(bma->cur, &i); | ||
2133 | if (error) | ||
2134 | goto done; | ||
2135 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | ||
2136 | } | ||
2137 | |||
2138 | if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) { | ||
2139 | error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, | ||
2140 | bma->firstblock, bma->flist, &bma->cur, 1, | ||
2141 | &tmp_rval, XFS_DATA_FORK); | ||
2142 | rval |= tmp_rval; | ||
2143 | if (error) | ||
2144 | goto done; | ||
2145 | } | ||
2146 | da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), | ||
2147 | startblockval(PREV.br_startblock) - | ||
2148 | (bma->cur ? bma->cur->bc_private.b.allocated : 0)); | ||
2149 | ep = xfs_iext_get_ext(ifp, bma->idx); | ||
2150 | xfs_bmbt_set_startblock(ep, nullstartblock(da_new)); | ||
2151 | trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); | ||
2152 | |||
2153 | bma->idx++; | ||
2154 | break; | ||
2155 | |||
2156 | case 0: | ||
2157 | /* | ||
2158 | * Filling in the middle part of a previous delayed allocation. | ||
2159 | * Contiguity is impossible here. | ||
2160 | * This case is avoided almost all the time. | ||
2161 | * | ||
2162 | * We start with a delayed allocation: | ||
2163 | * | ||
2164 | * +ddddddddddddddddddddddddddddddddddddddddddddddddddddddd+ | ||
2165 | * PREV @ idx | ||
2166 | * | ||
2167 | * and we are allocating: | ||
2168 | * +rrrrrrrrrrrrrrrrr+ | ||
2169 | * new | ||
2170 | * | ||
2171 | * and we set it up for insertion as: | ||
2172 | * +ddddddddddddddddddd+rrrrrrrrrrrrrrrrr+ddddddddddddddddd+ | ||
2173 | * new | ||
2174 | * PREV @ idx LEFT RIGHT | ||
2175 | * inserted at idx + 1 | ||
2176 | */ | ||
2177 | temp = new->br_startoff - PREV.br_startoff; | ||
2178 | temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff; | ||
2179 | trace_xfs_bmap_pre_update(bma->ip, bma->idx, 0, _THIS_IP_); | ||
2180 | xfs_bmbt_set_blockcount(ep, temp); /* truncate PREV */ | ||
2181 | LEFT = *new; | ||
2182 | RIGHT.br_state = PREV.br_state; | ||
2183 | RIGHT.br_startblock = nullstartblock( | ||
2184 | (int)xfs_bmap_worst_indlen(bma->ip, temp2)); | ||
2185 | RIGHT.br_startoff = new_endoff; | ||
2186 | RIGHT.br_blockcount = temp2; | ||
2187 | /* insert LEFT (r[0]) and RIGHT (r[1]) at the same time */ | ||
2188 | xfs_iext_insert(bma->ip, bma->idx + 1, 2, &LEFT, state); | ||
2189 | bma->ip->i_d.di_nextents++; | ||
2190 | if (bma->cur == NULL) | ||
2191 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; | ||
2192 | else { | ||
2193 | rval = XFS_ILOG_CORE; | ||
2194 | error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff, | ||
2195 | new->br_startblock, new->br_blockcount, | ||
2196 | &i); | ||
2197 | if (error) | ||
2198 | goto done; | ||
2199 | XFS_WANT_CORRUPTED_GOTO(i == 0, done); | ||
2200 | bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; | ||
2201 | error = xfs_btree_insert(bma->cur, &i); | ||
2202 | if (error) | ||
2203 | goto done; | ||
2204 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | ||
2205 | } | ||
2206 | |||
2207 | if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) { | ||
2208 | error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, | ||
2209 | bma->firstblock, bma->flist, &bma->cur, | ||
2210 | 1, &tmp_rval, XFS_DATA_FORK); | ||
2211 | rval |= tmp_rval; | ||
2212 | if (error) | ||
2213 | goto done; | ||
2214 | } | ||
2215 | temp = xfs_bmap_worst_indlen(bma->ip, temp); | ||
2216 | temp2 = xfs_bmap_worst_indlen(bma->ip, temp2); | ||
2217 | diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) - | ||
2218 | (bma->cur ? bma->cur->bc_private.b.allocated : 0)); | ||
2219 | if (diff > 0) { | ||
2220 | error = xfs_icsb_modify_counters(bma->ip->i_mount, | ||
2221 | XFS_SBS_FDBLOCKS, | ||
2222 | -((int64_t)diff), 0); | ||
2223 | ASSERT(!error); | ||
2224 | if (error) | ||
2225 | goto done; | ||
2226 | } | ||
2227 | |||
2228 | ep = xfs_iext_get_ext(ifp, bma->idx); | ||
2229 | xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); | ||
2230 | trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); | ||
2231 | trace_xfs_bmap_pre_update(bma->ip, bma->idx + 2, state, _THIS_IP_); | ||
2232 | xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, bma->idx + 2), | ||
2233 | nullstartblock((int)temp2)); | ||
2234 | trace_xfs_bmap_post_update(bma->ip, bma->idx + 2, state, _THIS_IP_); | ||
2235 | |||
2236 | bma->idx++; | ||
2237 | da_new = temp + temp2; | ||
2238 | break; | ||
2239 | |||
2240 | case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: | ||
2241 | case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: | ||
2242 | case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG: | ||
2243 | case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: | ||
2244 | case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: | ||
2245 | case BMAP_LEFT_CONTIG: | ||
2246 | case BMAP_RIGHT_CONTIG: | ||
2247 | /* | ||
2248 | * These cases are all impossible. | ||
2249 | */ | ||
2250 | ASSERT(0); | ||
2251 | } | ||
2252 | |||
2253 | /* convert to a btree if necessary */ | ||
2254 | if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) { | ||
2255 | int tmp_logflags; /* partial log flag return val */ | ||
2256 | |||
2257 | ASSERT(bma->cur == NULL); | ||
2258 | error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, | ||
2259 | bma->firstblock, bma->flist, &bma->cur, | ||
2260 | da_old > 0, &tmp_logflags, XFS_DATA_FORK); | ||
2261 | bma->logflags |= tmp_logflags; | ||
2262 | if (error) | ||
2263 | goto done; | ||
2264 | } | ||
2265 | |||
2266 | /* adjust for changes in reserved delayed indirect blocks */ | ||
2267 | if (da_old || da_new) { | ||
2268 | temp = da_new; | ||
2269 | if (bma->cur) | ||
2270 | temp += bma->cur->bc_private.b.allocated; | ||
2271 | ASSERT(temp <= da_old); | ||
2272 | if (temp < da_old) | ||
2273 | xfs_icsb_modify_counters(bma->ip->i_mount, | ||
2274 | XFS_SBS_FDBLOCKS, | ||
2275 | (int64_t)(da_old - temp), 0); | ||
2276 | } | ||
2277 | |||
2278 | /* clear out the allocated field, done with it now in any case. */ | ||
2279 | if (bma->cur) | ||
2280 | bma->cur->bc_private.b.allocated = 0; | ||
2281 | |||
2282 | xfs_bmap_check_leaf_extents(bma->cur, bma->ip, XFS_DATA_FORK); | ||
2283 | done: | ||
2284 | bma->logflags |= rval; | ||
2285 | return error; | ||
2286 | #undef LEFT | ||
2287 | #undef RIGHT | ||
2288 | #undef PREV | ||
2289 | } | ||
2290 | |||
2291 | /* | ||
2292 | * Convert an unwritten allocation to a real allocation or vice versa. | ||
2293 | */ | ||
2294 | STATIC int /* error */ | ||
2295 | xfs_bmap_add_extent_unwritten_real( | ||
2296 | struct xfs_trans *tp, | ||
2297 | xfs_inode_t *ip, /* incore inode pointer */ | ||
2298 | xfs_extnum_t *idx, /* extent number to update/insert */ | ||
2299 | xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ | ||
2300 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ | ||
2301 | xfs_fsblock_t *first, /* pointer to firstblock variable */ | ||
2302 | xfs_bmap_free_t *flist, /* list of extents to be freed */ | ||
2303 | int *logflagsp) /* inode logging flags */ | ||
2304 | { | ||
2305 | xfs_btree_cur_t *cur; /* btree cursor */ | ||
2306 | xfs_bmbt_rec_host_t *ep; /* extent entry for idx */ | ||
2307 | int error; /* error return value */ | ||
2308 | int i; /* temp state */ | ||
2309 | xfs_ifork_t *ifp; /* inode fork pointer */ | ||
2310 | xfs_fileoff_t new_endoff; /* end offset of new entry */ | ||
2311 | xfs_exntst_t newext; /* new extent state */ | ||
2312 | xfs_exntst_t oldext; /* old extent state */ | ||
2313 | xfs_bmbt_irec_t r[3]; /* neighbor extent entries */ | ||
2314 | /* left is 0, right is 1, prev is 2 */ | ||
2315 | int rval=0; /* return value (logging flags) */ | ||
2316 | int state = 0;/* state bits, accessed thru macros */ | ||
2317 | |||
2318 | *logflagsp = 0; | ||
2319 | |||
2320 | cur = *curp; | ||
2321 | ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); | ||
2322 | |||
2323 | ASSERT(*idx >= 0); | ||
2324 | ASSERT(*idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); | ||
2325 | ASSERT(!isnullstartblock(new->br_startblock)); | ||
2326 | |||
2327 | XFS_STATS_INC(xs_add_exlist); | ||
2328 | |||
2329 | #define LEFT r[0] | ||
2330 | #define RIGHT r[1] | ||
2331 | #define PREV r[2] | ||
2332 | |||
2333 | /* | ||
2334 | * Set up a bunch of variables to make the tests simpler. | ||
2335 | */ | ||
2336 | error = 0; | ||
2337 | ep = xfs_iext_get_ext(ifp, *idx); | ||
2338 | xfs_bmbt_get_all(ep, &PREV); | ||
2339 | newext = new->br_state; | ||
2340 | oldext = (newext == XFS_EXT_UNWRITTEN) ? | ||
2341 | XFS_EXT_NORM : XFS_EXT_UNWRITTEN; | ||
2342 | ASSERT(PREV.br_state == oldext); | ||
2343 | new_endoff = new->br_startoff + new->br_blockcount; | ||
2344 | ASSERT(PREV.br_startoff <= new->br_startoff); | ||
2345 | ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff); | ||
2346 | |||
2347 | /* | ||
2348 | * Set flags determining what part of the previous oldext allocation | ||
2349 | * extent is being replaced by a newext allocation. | ||
2350 | */ | ||
2351 | if (PREV.br_startoff == new->br_startoff) | ||
2352 | state |= BMAP_LEFT_FILLING; | ||
2353 | if (PREV.br_startoff + PREV.br_blockcount == new_endoff) | ||
2354 | state |= BMAP_RIGHT_FILLING; | ||
2355 | |||
2356 | /* | ||
2357 | * Check and set flags if this segment has a left neighbor. | ||
2358 | * Don't set contiguous if the combined extent would be too large. | ||
2359 | */ | ||
2360 | if (*idx > 0) { | ||
2361 | state |= BMAP_LEFT_VALID; | ||
2362 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &LEFT); | ||
2363 | |||
2364 | if (isnullstartblock(LEFT.br_startblock)) | ||
2365 | state |= BMAP_LEFT_DELAY; | ||
2366 | } | ||
2367 | |||
2368 | if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) && | ||
2369 | LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff && | ||
2370 | LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock && | ||
2371 | LEFT.br_state == newext && | ||
2372 | LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN) | ||
2373 | state |= BMAP_LEFT_CONTIG; | ||
2374 | |||
2375 | /* | ||
2376 | * Check and set flags if this segment has a right neighbor. | ||
2377 | * Don't set contiguous if the combined extent would be too large. | ||
2378 | * Also check for all-three-contiguous being too large. | ||
2379 | */ | ||
2380 | if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) { | ||
2381 | state |= BMAP_RIGHT_VALID; | ||
2382 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT); | ||
2383 | if (isnullstartblock(RIGHT.br_startblock)) | ||
2384 | state |= BMAP_RIGHT_DELAY; | ||
2385 | } | ||
2386 | |||
2387 | if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) && | ||
2388 | new_endoff == RIGHT.br_startoff && | ||
2389 | new->br_startblock + new->br_blockcount == RIGHT.br_startblock && | ||
2390 | newext == RIGHT.br_state && | ||
2391 | new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN && | ||
2392 | ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | | ||
2393 | BMAP_RIGHT_FILLING)) != | ||
2394 | (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | | ||
2395 | BMAP_RIGHT_FILLING) || | ||
2396 | LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount | ||
2397 | <= MAXEXTLEN)) | ||
2398 | state |= BMAP_RIGHT_CONTIG; | ||
2399 | |||
2400 | /* | ||
2401 | * Switch out based on the FILLING and CONTIG state bits. | ||
2402 | */ | ||
2403 | switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | | ||
2404 | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) { | ||
2405 | case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | | ||
2406 | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: | ||
2407 | /* | ||
2408 | * Setting all of a previous oldext extent to newext. | ||
2409 | * The left and right neighbors are both contiguous with new. | ||
2410 | */ | ||
2411 | --*idx; | ||
2412 | |||
2413 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); | ||
2414 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), | ||
2415 | LEFT.br_blockcount + PREV.br_blockcount + | ||
2416 | RIGHT.br_blockcount); | ||
2417 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); | ||
2418 | |||
2419 | xfs_iext_remove(ip, *idx + 1, 2, state); | ||
2420 | ip->i_d.di_nextents -= 2; | ||
2421 | if (cur == NULL) | ||
2422 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; | ||
2423 | else { | ||
2424 | rval = XFS_ILOG_CORE; | ||
2425 | if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff, | ||
2426 | RIGHT.br_startblock, | ||
2427 | RIGHT.br_blockcount, &i))) | ||
2428 | goto done; | ||
2429 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | ||
2430 | if ((error = xfs_btree_delete(cur, &i))) | ||
2431 | goto done; | ||
2432 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | ||
2433 | if ((error = xfs_btree_decrement(cur, 0, &i))) | ||
2434 | goto done; | ||
2435 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | ||
2436 | if ((error = xfs_btree_delete(cur, &i))) | ||
2437 | goto done; | ||
2438 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | ||
2439 | if ((error = xfs_btree_decrement(cur, 0, &i))) | ||
2440 | goto done; | ||
2441 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | ||
2442 | if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, | ||
2443 | LEFT.br_startblock, | ||
2444 | LEFT.br_blockcount + PREV.br_blockcount + | ||
2445 | RIGHT.br_blockcount, LEFT.br_state))) | ||
2446 | goto done; | ||
2447 | } | ||
2448 | break; | ||
2449 | |||
2450 | case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: | ||
2451 | /* | ||
2452 | * Setting all of a previous oldext extent to newext. | ||
2453 | * The left neighbor is contiguous, the right is not. | ||
2454 | */ | ||
2455 | --*idx; | ||
2456 | |||
2457 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); | ||
2458 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), | ||
2459 | LEFT.br_blockcount + PREV.br_blockcount); | ||
2460 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); | ||
2461 | |||
2462 | xfs_iext_remove(ip, *idx + 1, 1, state); | ||
2463 | ip->i_d.di_nextents--; | ||
2464 | if (cur == NULL) | ||
2465 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; | ||
2466 | else { | ||
2467 | rval = XFS_ILOG_CORE; | ||
2468 | if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, | ||
2469 | PREV.br_startblock, PREV.br_blockcount, | ||
2470 | &i))) | ||
2471 | goto done; | ||
2472 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | ||
2473 | if ((error = xfs_btree_delete(cur, &i))) | ||
2474 | goto done; | ||
2475 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | ||
2476 | if ((error = xfs_btree_decrement(cur, 0, &i))) | ||
2477 | goto done; | ||
2478 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | ||
2479 | if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, | ||
2480 | LEFT.br_startblock, | ||
2481 | LEFT.br_blockcount + PREV.br_blockcount, | ||
2482 | LEFT.br_state))) | ||
2483 | goto done; | ||
2484 | } | ||
2485 | break; | ||
2486 | |||
2487 | case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: | ||
2488 | /* | ||
2489 | * Setting all of a previous oldext extent to newext. | ||
2490 | * The right neighbor is contiguous, the left is not. | ||
2491 | */ | ||
2492 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); | ||
2493 | xfs_bmbt_set_blockcount(ep, | ||
2494 | PREV.br_blockcount + RIGHT.br_blockcount); | ||
2495 | xfs_bmbt_set_state(ep, newext); | ||
2496 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); | ||
2497 | xfs_iext_remove(ip, *idx + 1, 1, state); | ||
2498 | ip->i_d.di_nextents--; | ||
2499 | if (cur == NULL) | ||
2500 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; | ||
2501 | else { | ||
2502 | rval = XFS_ILOG_CORE; | ||
2503 | if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff, | ||
2504 | RIGHT.br_startblock, | ||
2505 | RIGHT.br_blockcount, &i))) | ||
2506 | goto done; | ||
2507 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | ||
2508 | if ((error = xfs_btree_delete(cur, &i))) | ||
2509 | goto done; | ||
2510 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | ||
2511 | if ((error = xfs_btree_decrement(cur, 0, &i))) | ||
2512 | goto done; | ||
2513 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | ||
2514 | if ((error = xfs_bmbt_update(cur, new->br_startoff, | ||
2515 | new->br_startblock, | ||
2516 | new->br_blockcount + RIGHT.br_blockcount, | ||
2517 | newext))) | ||
2518 | goto done; | ||
2519 | } | ||
2520 | break; | ||
2521 | |||
2522 | case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING: | ||
2523 | /* | ||
2524 | * Setting all of a previous oldext extent to newext. | ||
2525 | * Neither the left nor right neighbors are contiguous with | ||
2526 | * the new one. | ||
2527 | */ | ||
2528 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); | ||
2529 | xfs_bmbt_set_state(ep, newext); | ||
2530 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); | ||
2531 | |||
2532 | if (cur == NULL) | ||
2533 | rval = XFS_ILOG_DEXT; | ||
2534 | else { | ||
2535 | rval = 0; | ||
2536 | if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff, | ||
2537 | new->br_startblock, new->br_blockcount, | ||
2538 | &i))) | ||
2539 | goto done; | ||
2540 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | ||
2541 | if ((error = xfs_bmbt_update(cur, new->br_startoff, | ||
2542 | new->br_startblock, new->br_blockcount, | ||
2543 | newext))) | ||
2544 | goto done; | ||
2545 | } | ||
2546 | break; | ||
2547 | |||
2548 | case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG: | ||
2549 | /* | ||
2550 | * Setting the first part of a previous oldext extent to newext. | ||
2551 | * The left neighbor is contiguous. | ||
2552 | */ | ||
2553 | trace_xfs_bmap_pre_update(ip, *idx - 1, state, _THIS_IP_); | ||
2554 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx - 1), | ||
2555 | LEFT.br_blockcount + new->br_blockcount); | ||
2556 | xfs_bmbt_set_startoff(ep, | ||
2557 | PREV.br_startoff + new->br_blockcount); | ||
2558 | trace_xfs_bmap_post_update(ip, *idx - 1, state, _THIS_IP_); | ||
2559 | |||
2560 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); | ||
2561 | xfs_bmbt_set_startblock(ep, | ||
2562 | new->br_startblock + new->br_blockcount); | ||
2563 | xfs_bmbt_set_blockcount(ep, | ||
2564 | PREV.br_blockcount - new->br_blockcount); | ||
2565 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); | ||
2566 | |||
2567 | --*idx; | ||
2568 | |||
2569 | if (cur == NULL) | ||
2570 | rval = XFS_ILOG_DEXT; | ||
2571 | else { | ||
2572 | rval = 0; | ||
2573 | if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, | ||
2574 | PREV.br_startblock, PREV.br_blockcount, | ||
2575 | &i))) | ||
2576 | goto done; | ||
2577 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | ||
2578 | if ((error = xfs_bmbt_update(cur, | ||
2579 | PREV.br_startoff + new->br_blockcount, | ||
2580 | PREV.br_startblock + new->br_blockcount, | ||
2581 | PREV.br_blockcount - new->br_blockcount, | ||
2582 | oldext))) | ||
2583 | goto done; | ||
2584 | if ((error = xfs_btree_decrement(cur, 0, &i))) | ||
2585 | goto done; | ||
2586 | error = xfs_bmbt_update(cur, LEFT.br_startoff, | ||
2587 | LEFT.br_startblock, | ||
2588 | LEFT.br_blockcount + new->br_blockcount, | ||
2589 | LEFT.br_state); | ||
2590 | if (error) | ||
2591 | goto done; | ||
2592 | } | ||
2593 | break; | ||
2594 | |||
2595 | case BMAP_LEFT_FILLING: | ||
2596 | /* | ||
2597 | * Setting the first part of a previous oldext extent to newext. | ||
2598 | * The left neighbor is not contiguous. | ||
2599 | */ | ||
2600 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); | ||
2601 | ASSERT(ep && xfs_bmbt_get_state(ep) == oldext); | ||
2602 | xfs_bmbt_set_startoff(ep, new_endoff); | ||
2603 | xfs_bmbt_set_blockcount(ep, | ||
2604 | PREV.br_blockcount - new->br_blockcount); | ||
2605 | xfs_bmbt_set_startblock(ep, | ||
2606 | new->br_startblock + new->br_blockcount); | ||
2607 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); | ||
2608 | |||
2609 | xfs_iext_insert(ip, *idx, 1, new, state); | ||
2610 | ip->i_d.di_nextents++; | ||
2611 | if (cur == NULL) | ||
2612 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; | ||
2613 | else { | ||
2614 | rval = XFS_ILOG_CORE; | ||
2615 | if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, | ||
2616 | PREV.br_startblock, PREV.br_blockcount, | ||
2617 | &i))) | ||
2618 | goto done; | ||
2619 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | ||
2620 | if ((error = xfs_bmbt_update(cur, | ||
2621 | PREV.br_startoff + new->br_blockcount, | ||
2622 | PREV.br_startblock + new->br_blockcount, | ||
2623 | PREV.br_blockcount - new->br_blockcount, | ||
2624 | oldext))) | ||
2625 | goto done; | ||
2626 | cur->bc_rec.b = *new; | ||
2627 | if ((error = xfs_btree_insert(cur, &i))) | ||
2628 | goto done; | ||
2629 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | ||
2630 | } | ||
2631 | break; | ||
2632 | |||
2633 | case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: | ||
2634 | /* | ||
2635 | * Setting the last part of a previous oldext extent to newext. | ||
2636 | * The right neighbor is contiguous with the new allocation. | ||
2637 | */ | ||
2638 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); | ||
2639 | xfs_bmbt_set_blockcount(ep, | ||
2640 | PREV.br_blockcount - new->br_blockcount); | ||
2641 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); | ||
2642 | |||
2643 | ++*idx; | ||
2644 | |||
2645 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); | ||
2646 | xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx), | ||
2647 | new->br_startoff, new->br_startblock, | ||
2648 | new->br_blockcount + RIGHT.br_blockcount, newext); | ||
2649 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); | ||
2650 | |||
2651 | if (cur == NULL) | ||
2652 | rval = XFS_ILOG_DEXT; | ||
2653 | else { | ||
2654 | rval = 0; | ||
2655 | if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, | ||
2656 | PREV.br_startblock, | ||
2657 | PREV.br_blockcount, &i))) | ||
2658 | goto done; | ||
2659 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | ||
2660 | if ((error = xfs_bmbt_update(cur, PREV.br_startoff, | ||
2661 | PREV.br_startblock, | ||
2662 | PREV.br_blockcount - new->br_blockcount, | ||
2663 | oldext))) | ||
2664 | goto done; | ||
2665 | if ((error = xfs_btree_increment(cur, 0, &i))) | ||
2666 | goto done; | ||
2667 | if ((error = xfs_bmbt_update(cur, new->br_startoff, | ||
2668 | new->br_startblock, | ||
2669 | new->br_blockcount + RIGHT.br_blockcount, | ||
2670 | newext))) | ||
2671 | goto done; | ||
2672 | } | ||
2673 | break; | ||
2674 | |||
2675 | case BMAP_RIGHT_FILLING: | ||
2676 | /* | ||
2677 | * Setting the last part of a previous oldext extent to newext. | ||
2678 | * The right neighbor is not contiguous. | ||
2679 | */ | ||
2680 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); | ||
2681 | xfs_bmbt_set_blockcount(ep, | ||
2682 | PREV.br_blockcount - new->br_blockcount); | ||
2683 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); | ||
2684 | |||
2685 | ++*idx; | ||
2686 | xfs_iext_insert(ip, *idx, 1, new, state); | ||
2687 | |||
2688 | ip->i_d.di_nextents++; | ||
2689 | if (cur == NULL) | ||
2690 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; | ||
2691 | else { | ||
2692 | rval = XFS_ILOG_CORE; | ||
2693 | if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, | ||
2694 | PREV.br_startblock, PREV.br_blockcount, | ||
2695 | &i))) | ||
2696 | goto done; | ||
2697 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | ||
2698 | if ((error = xfs_bmbt_update(cur, PREV.br_startoff, | ||
2699 | PREV.br_startblock, | ||
2700 | PREV.br_blockcount - new->br_blockcount, | ||
2701 | oldext))) | ||
2702 | goto done; | ||
2703 | if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff, | ||
2704 | new->br_startblock, new->br_blockcount, | ||
2705 | &i))) | ||
2706 | goto done; | ||
2707 | XFS_WANT_CORRUPTED_GOTO(i == 0, done); | ||
2708 | cur->bc_rec.b.br_state = XFS_EXT_NORM; | ||
2709 | if ((error = xfs_btree_insert(cur, &i))) | ||
2710 | goto done; | ||
2711 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | ||
2712 | } | ||
2713 | break; | ||
2714 | |||
2715 | case 0: | ||
2716 | /* | ||
2717 | * Setting the middle part of a previous oldext extent to | ||
2718 | * newext. Contiguity is impossible here. | ||
2719 | * One extent becomes three extents. | ||
2720 | */ | ||
2721 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); | ||
2722 | xfs_bmbt_set_blockcount(ep, | ||
2723 | new->br_startoff - PREV.br_startoff); | ||
2724 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); | ||
2725 | |||
2726 | r[0] = *new; | ||
2727 | r[1].br_startoff = new_endoff; | ||
2728 | r[1].br_blockcount = | ||
2729 | PREV.br_startoff + PREV.br_blockcount - new_endoff; | ||
2730 | r[1].br_startblock = new->br_startblock + new->br_blockcount; | ||
2731 | r[1].br_state = oldext; | ||
2732 | |||
2733 | ++*idx; | ||
2734 | xfs_iext_insert(ip, *idx, 2, &r[0], state); | ||
2735 | |||
2736 | ip->i_d.di_nextents += 2; | ||
2737 | if (cur == NULL) | ||
2738 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; | ||
2739 | else { | ||
2740 | rval = XFS_ILOG_CORE; | ||
2741 | if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, | ||
2742 | PREV.br_startblock, PREV.br_blockcount, | ||
2743 | &i))) | ||
2744 | goto done; | ||
2745 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | ||
2746 | /* new right extent - oldext */ | ||
2747 | if ((error = xfs_bmbt_update(cur, r[1].br_startoff, | ||
2748 | r[1].br_startblock, r[1].br_blockcount, | ||
2749 | r[1].br_state))) | ||
2750 | goto done; | ||
2751 | /* new left extent - oldext */ | ||
2752 | cur->bc_rec.b = PREV; | ||
2753 | cur->bc_rec.b.br_blockcount = | ||
2754 | new->br_startoff - PREV.br_startoff; | ||
2755 | if ((error = xfs_btree_insert(cur, &i))) | ||
2756 | goto done; | ||
2757 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | ||
2758 | /* | ||
2759 | * Reset the cursor to the position of the new extent | ||
2760 | * we are about to insert as we can't trust it after | ||
2761 | * the previous insert. | ||
2762 | */ | ||
2763 | if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff, | ||
2764 | new->br_startblock, new->br_blockcount, | ||
2765 | &i))) | ||
2766 | goto done; | ||
2767 | XFS_WANT_CORRUPTED_GOTO(i == 0, done); | ||
2768 | /* new middle extent - newext */ | ||
2769 | cur->bc_rec.b.br_state = new->br_state; | ||
2770 | if ((error = xfs_btree_insert(cur, &i))) | ||
2771 | goto done; | ||
2772 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | ||
2773 | } | ||
2774 | break; | ||
2775 | |||
2776 | case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: | ||
2777 | case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: | ||
2778 | case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG: | ||
2779 | case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: | ||
2780 | case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: | ||
2781 | case BMAP_LEFT_CONTIG: | ||
2782 | case BMAP_RIGHT_CONTIG: | ||
2783 | /* | ||
2784 | * These cases are all impossible. | ||
2785 | */ | ||
2786 | ASSERT(0); | ||
2787 | } | ||
2788 | |||
2789 | /* convert to a btree if necessary */ | ||
2790 | if (xfs_bmap_needs_btree(ip, XFS_DATA_FORK)) { | ||
2791 | int tmp_logflags; /* partial log flag return val */ | ||
2792 | |||
2793 | ASSERT(cur == NULL); | ||
2794 | error = xfs_bmap_extents_to_btree(tp, ip, first, flist, &cur, | ||
2795 | 0, &tmp_logflags, XFS_DATA_FORK); | ||
2796 | *logflagsp |= tmp_logflags; | ||
2797 | if (error) | ||
2798 | goto done; | ||
2799 | } | ||
2800 | |||
2801 | /* clear out the allocated field, done with it now in any case. */ | ||
2802 | if (cur) { | ||
2803 | cur->bc_private.b.allocated = 0; | ||
2804 | *curp = cur; | ||
2805 | } | ||
2806 | |||
2807 | xfs_bmap_check_leaf_extents(*curp, ip, XFS_DATA_FORK); | ||
2808 | done: | ||
2809 | *logflagsp |= rval; | ||
2810 | return error; | ||
2811 | #undef LEFT | ||
2812 | #undef RIGHT | ||
2813 | #undef PREV | ||
2814 | } | ||
2815 | |||
2816 | /* | ||
2817 | * Convert a hole to a delayed allocation. | ||
2818 | */ | ||
2819 | STATIC void | ||
2820 | xfs_bmap_add_extent_hole_delay( | ||
2821 | xfs_inode_t *ip, /* incore inode pointer */ | ||
2822 | xfs_extnum_t *idx, /* extent number to update/insert */ | ||
2823 | xfs_bmbt_irec_t *new) /* new data to add to file extents */ | ||
2824 | { | ||
2825 | xfs_ifork_t *ifp; /* inode fork pointer */ | ||
2826 | xfs_bmbt_irec_t left; /* left neighbor extent entry */ | ||
2827 | xfs_filblks_t newlen=0; /* new indirect size */ | ||
2828 | xfs_filblks_t oldlen=0; /* old indirect size */ | ||
2829 | xfs_bmbt_irec_t right; /* right neighbor extent entry */ | ||
2830 | int state; /* state bits, accessed thru macros */ | ||
2831 | xfs_filblks_t temp=0; /* temp for indirect calculations */ | ||
2832 | |||
2833 | ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); | ||
2834 | state = 0; | ||
2835 | ASSERT(isnullstartblock(new->br_startblock)); | ||
2836 | |||
2837 | /* | ||
2838 | * Check and set flags if this segment has a left neighbor | ||
2839 | */ | ||
2840 | if (*idx > 0) { | ||
2841 | state |= BMAP_LEFT_VALID; | ||
2842 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &left); | ||
2843 | |||
2844 | if (isnullstartblock(left.br_startblock)) | ||
2845 | state |= BMAP_LEFT_DELAY; | ||
2846 | } | ||
2847 | |||
2848 | /* | ||
2849 | * Check and set flags if the current (right) segment exists. | ||
2850 | * If it doesn't exist, we're converting the hole at end-of-file. | ||
2851 | */ | ||
2852 | if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { | ||
2853 | state |= BMAP_RIGHT_VALID; | ||
2854 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right); | ||
2855 | |||
2856 | if (isnullstartblock(right.br_startblock)) | ||
2857 | state |= BMAP_RIGHT_DELAY; | ||
2858 | } | ||
2859 | |||
2860 | /* | ||
2861 | * Set contiguity flags on the left and right neighbors. | ||
2862 | * Don't let extents get too large, even if the pieces are contiguous. | ||
2863 | */ | ||
2864 | if ((state & BMAP_LEFT_VALID) && (state & BMAP_LEFT_DELAY) && | ||
2865 | left.br_startoff + left.br_blockcount == new->br_startoff && | ||
2866 | left.br_blockcount + new->br_blockcount <= MAXEXTLEN) | ||
2867 | state |= BMAP_LEFT_CONTIG; | ||
2868 | |||
2869 | if ((state & BMAP_RIGHT_VALID) && (state & BMAP_RIGHT_DELAY) && | ||
2870 | new->br_startoff + new->br_blockcount == right.br_startoff && | ||
2871 | new->br_blockcount + right.br_blockcount <= MAXEXTLEN && | ||
2872 | (!(state & BMAP_LEFT_CONTIG) || | ||
2873 | (left.br_blockcount + new->br_blockcount + | ||
2874 | right.br_blockcount <= MAXEXTLEN))) | ||
2875 | state |= BMAP_RIGHT_CONTIG; | ||
2876 | |||
2877 | /* | ||
2878 | * Switch out based on the contiguity flags. | ||
2879 | */ | ||
2880 | switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) { | ||
2881 | case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: | ||
2882 | /* | ||
2883 | * New allocation is contiguous with delayed allocations | ||
2884 | * on the left and on the right. | ||
2885 | * Merge all three into a single extent record. | ||
2886 | */ | ||
2887 | --*idx; | ||
2888 | temp = left.br_blockcount + new->br_blockcount + | ||
2889 | right.br_blockcount; | ||
2890 | |||
2891 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); | ||
2892 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp); | ||
2893 | oldlen = startblockval(left.br_startblock) + | ||
2894 | startblockval(new->br_startblock) + | ||
2895 | startblockval(right.br_startblock); | ||
2896 | newlen = xfs_bmap_worst_indlen(ip, temp); | ||
2897 | xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx), | ||
2898 | nullstartblock((int)newlen)); | ||
2899 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); | ||
2900 | |||
2901 | xfs_iext_remove(ip, *idx + 1, 1, state); | ||
2902 | break; | ||
2903 | |||
2904 | case BMAP_LEFT_CONTIG: | ||
2905 | /* | ||
2906 | * New allocation is contiguous with a delayed allocation | ||
2907 | * on the left. | ||
2908 | * Merge the new allocation with the left neighbor. | ||
2909 | */ | ||
2910 | --*idx; | ||
2911 | temp = left.br_blockcount + new->br_blockcount; | ||
2912 | |||
2913 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); | ||
2914 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp); | ||
2915 | oldlen = startblockval(left.br_startblock) + | ||
2916 | startblockval(new->br_startblock); | ||
2917 | newlen = xfs_bmap_worst_indlen(ip, temp); | ||
2918 | xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx), | ||
2919 | nullstartblock((int)newlen)); | ||
2920 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); | ||
2921 | break; | ||
2922 | |||
2923 | case BMAP_RIGHT_CONTIG: | ||
2924 | /* | ||
2925 | * New allocation is contiguous with a delayed allocation | ||
2926 | * on the right. | ||
2927 | * Merge the new allocation with the right neighbor. | ||
2928 | */ | ||
2929 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); | ||
2930 | temp = new->br_blockcount + right.br_blockcount; | ||
2931 | oldlen = startblockval(new->br_startblock) + | ||
2932 | startblockval(right.br_startblock); | ||
2933 | newlen = xfs_bmap_worst_indlen(ip, temp); | ||
2934 | xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx), | ||
2935 | new->br_startoff, | ||
2936 | nullstartblock((int)newlen), temp, right.br_state); | ||
2937 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); | ||
2938 | break; | ||
2939 | |||
2940 | case 0: | ||
2941 | /* | ||
2942 | * New allocation is not contiguous with another | ||
2943 | * delayed allocation. | ||
2944 | * Insert a new entry. | ||
2945 | */ | ||
2946 | oldlen = newlen = 0; | ||
2947 | xfs_iext_insert(ip, *idx, 1, new, state); | ||
2948 | break; | ||
2949 | } | ||
2950 | if (oldlen != newlen) { | ||
2951 | ASSERT(oldlen > newlen); | ||
2952 | xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS, | ||
2953 | (int64_t)(oldlen - newlen), 0); | ||
2954 | /* | ||
2955 | * Nothing to do for disk quota accounting here. | ||
2956 | */ | ||
2957 | } | ||
2958 | } | ||
2959 | |||
2960 | /* | ||
2961 | * Convert a hole to a real allocation. | ||
2962 | */ | ||
2963 | STATIC int /* error */ | ||
2964 | xfs_bmap_add_extent_hole_real( | ||
2965 | struct xfs_bmalloca *bma, | ||
2966 | int whichfork) | ||
2967 | { | ||
2968 | struct xfs_bmbt_irec *new = &bma->got; | ||
2969 | int error; /* error return value */ | ||
2970 | int i; /* temp state */ | ||
2971 | xfs_ifork_t *ifp; /* inode fork pointer */ | ||
2972 | xfs_bmbt_irec_t left; /* left neighbor extent entry */ | ||
2973 | xfs_bmbt_irec_t right; /* right neighbor extent entry */ | ||
2974 | int rval=0; /* return value (logging flags) */ | ||
2975 | int state; /* state bits, accessed thru macros */ | ||
2976 | |||
2977 | ifp = XFS_IFORK_PTR(bma->ip, whichfork); | ||
2978 | |||
2979 | ASSERT(bma->idx >= 0); | ||
2980 | ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); | ||
2981 | ASSERT(!isnullstartblock(new->br_startblock)); | ||
2982 | ASSERT(!bma->cur || | ||
2983 | !(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); | ||
2984 | |||
2985 | XFS_STATS_INC(xs_add_exlist); | ||
2986 | |||
2987 | state = 0; | ||
2988 | if (whichfork == XFS_ATTR_FORK) | ||
2989 | state |= BMAP_ATTRFORK; | ||
2990 | |||
2991 | /* | ||
2992 | * Check and set flags if this segment has a left neighbor. | ||
2993 | */ | ||
2994 | if (bma->idx > 0) { | ||
2995 | state |= BMAP_LEFT_VALID; | ||
2996 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), &left); | ||
2997 | if (isnullstartblock(left.br_startblock)) | ||
2998 | state |= BMAP_LEFT_DELAY; | ||
2999 | } | ||
3000 | |||
3001 | /* | ||
3002 | * Check and set flags if this segment has a current value. | ||
3003 | * Not true if we're inserting into the "hole" at eof. | ||
3004 | */ | ||
3005 | if (bma->idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { | ||
3006 | state |= BMAP_RIGHT_VALID; | ||
3007 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &right); | ||
3008 | if (isnullstartblock(right.br_startblock)) | ||
3009 | state |= BMAP_RIGHT_DELAY; | ||
3010 | } | ||
3011 | |||
3012 | /* | ||
3013 | * We're inserting a real allocation between "left" and "right". | ||
3014 | * Set the contiguity flags. Don't let extents get too large. | ||
3015 | */ | ||
3016 | if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) && | ||
3017 | left.br_startoff + left.br_blockcount == new->br_startoff && | ||
3018 | left.br_startblock + left.br_blockcount == new->br_startblock && | ||
3019 | left.br_state == new->br_state && | ||
3020 | left.br_blockcount + new->br_blockcount <= MAXEXTLEN) | ||
3021 | state |= BMAP_LEFT_CONTIG; | ||
3022 | |||
3023 | if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) && | ||
3024 | new->br_startoff + new->br_blockcount == right.br_startoff && | ||
3025 | new->br_startblock + new->br_blockcount == right.br_startblock && | ||
3026 | new->br_state == right.br_state && | ||
3027 | new->br_blockcount + right.br_blockcount <= MAXEXTLEN && | ||
3028 | (!(state & BMAP_LEFT_CONTIG) || | ||
3029 | left.br_blockcount + new->br_blockcount + | ||
3030 | right.br_blockcount <= MAXEXTLEN)) | ||
3031 | state |= BMAP_RIGHT_CONTIG; | ||
3032 | |||
3033 | error = 0; | ||
3034 | /* | ||
3035 | * Select which case we're in here, and implement it. | ||
3036 | */ | ||
3037 | switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) { | ||
3038 | case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: | ||
3039 | /* | ||
3040 | * New allocation is contiguous with real allocations on the | ||
3041 | * left and on the right. | ||
3042 | * Merge all three into a single extent record. | ||
3043 | */ | ||
3044 | --bma->idx; | ||
3045 | trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); | ||
3046 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx), | ||
3047 | left.br_blockcount + new->br_blockcount + | ||
3048 | right.br_blockcount); | ||
3049 | trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); | ||
3050 | |||
3051 | xfs_iext_remove(bma->ip, bma->idx + 1, 1, state); | ||
3052 | |||
3053 | XFS_IFORK_NEXT_SET(bma->ip, whichfork, | ||
3054 | XFS_IFORK_NEXTENTS(bma->ip, whichfork) - 1); | ||
3055 | if (bma->cur == NULL) { | ||
3056 | rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); | ||
3057 | } else { | ||
3058 | rval = XFS_ILOG_CORE; | ||
3059 | error = xfs_bmbt_lookup_eq(bma->cur, right.br_startoff, | ||
3060 | right.br_startblock, right.br_blockcount, | ||
3061 | &i); | ||
3062 | if (error) | ||
3063 | goto done; | ||
3064 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | ||
3065 | error = xfs_btree_delete(bma->cur, &i); | ||
3066 | if (error) | ||
3067 | goto done; | ||
3068 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | ||
3069 | error = xfs_btree_decrement(bma->cur, 0, &i); | ||
3070 | if (error) | ||
3071 | goto done; | ||
3072 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | ||
3073 | error = xfs_bmbt_update(bma->cur, left.br_startoff, | ||
3074 | left.br_startblock, | ||
3075 | left.br_blockcount + | ||
3076 | new->br_blockcount + | ||
3077 | right.br_blockcount, | ||
3078 | left.br_state); | ||
3079 | if (error) | ||
3080 | goto done; | ||
3081 | } | ||
3082 | break; | ||
3083 | |||
3084 | case BMAP_LEFT_CONTIG: | ||
3085 | /* | ||
3086 | * New allocation is contiguous with a real allocation | ||
3087 | * on the left. | ||
3088 | * Merge the new allocation with the left neighbor. | ||
3089 | */ | ||
3090 | --bma->idx; | ||
3091 | trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); | ||
3092 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx), | ||
3093 | left.br_blockcount + new->br_blockcount); | ||
3094 | trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); | ||
3095 | |||
3096 | if (bma->cur == NULL) { | ||
3097 | rval = xfs_ilog_fext(whichfork); | ||
3098 | } else { | ||
3099 | rval = 0; | ||
3100 | error = xfs_bmbt_lookup_eq(bma->cur, left.br_startoff, | ||
3101 | left.br_startblock, left.br_blockcount, | ||
3102 | &i); | ||
3103 | if (error) | ||
3104 | goto done; | ||
3105 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | ||
3106 | error = xfs_bmbt_update(bma->cur, left.br_startoff, | ||
3107 | left.br_startblock, | ||
3108 | left.br_blockcount + | ||
3109 | new->br_blockcount, | ||
3110 | left.br_state); | ||
3111 | if (error) | ||
3112 | goto done; | ||
3113 | } | ||
3114 | break; | ||
3115 | |||
3116 | case BMAP_RIGHT_CONTIG: | ||
3117 | /* | ||
3118 | * New allocation is contiguous with a real allocation | ||
3119 | * on the right. | ||
3120 | * Merge the new allocation with the right neighbor. | ||
3121 | */ | ||
3122 | trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); | ||
3123 | xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, bma->idx), | ||
3124 | new->br_startoff, new->br_startblock, | ||
3125 | new->br_blockcount + right.br_blockcount, | ||
3126 | right.br_state); | ||
3127 | trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); | ||
3128 | |||
3129 | if (bma->cur == NULL) { | ||
3130 | rval = xfs_ilog_fext(whichfork); | ||
3131 | } else { | ||
3132 | rval = 0; | ||
3133 | error = xfs_bmbt_lookup_eq(bma->cur, | ||
3134 | right.br_startoff, | ||
3135 | right.br_startblock, | ||
3136 | right.br_blockcount, &i); | ||
3137 | if (error) | ||
3138 | goto done; | ||
3139 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | ||
3140 | error = xfs_bmbt_update(bma->cur, new->br_startoff, | ||
3141 | new->br_startblock, | ||
3142 | new->br_blockcount + | ||
3143 | right.br_blockcount, | ||
3144 | right.br_state); | ||
3145 | if (error) | ||
3146 | goto done; | ||
3147 | } | ||
3148 | break; | ||
3149 | |||
3150 | case 0: | ||
3151 | /* | ||
3152 | * New allocation is not contiguous with another | ||
3153 | * real allocation. | ||
3154 | * Insert a new entry. | ||
3155 | */ | ||
3156 | xfs_iext_insert(bma->ip, bma->idx, 1, new, state); | ||
3157 | XFS_IFORK_NEXT_SET(bma->ip, whichfork, | ||
3158 | XFS_IFORK_NEXTENTS(bma->ip, whichfork) + 1); | ||
3159 | if (bma->cur == NULL) { | ||
3160 | rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); | ||
3161 | } else { | ||
3162 | rval = XFS_ILOG_CORE; | ||
3163 | error = xfs_bmbt_lookup_eq(bma->cur, | ||
3164 | new->br_startoff, | ||
3165 | new->br_startblock, | ||
3166 | new->br_blockcount, &i); | ||
3167 | if (error) | ||
3168 | goto done; | ||
3169 | XFS_WANT_CORRUPTED_GOTO(i == 0, done); | ||
3170 | bma->cur->bc_rec.b.br_state = new->br_state; | ||
3171 | error = xfs_btree_insert(bma->cur, &i); | ||
3172 | if (error) | ||
3173 | goto done; | ||
3174 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | ||
3175 | } | ||
3176 | break; | ||
3177 | } | ||
3178 | |||
3179 | /* convert to a btree if necessary */ | ||
3180 | if (xfs_bmap_needs_btree(bma->ip, whichfork)) { | ||
3181 | int tmp_logflags; /* partial log flag return val */ | ||
3182 | |||
3183 | ASSERT(bma->cur == NULL); | ||
3184 | error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, | ||
3185 | bma->firstblock, bma->flist, &bma->cur, | ||
3186 | 0, &tmp_logflags, whichfork); | ||
3187 | bma->logflags |= tmp_logflags; | ||
3188 | if (error) | ||
3189 | goto done; | ||
3190 | } | ||
3191 | |||
3192 | /* clear out the allocated field, done with it now in any case. */ | ||
3193 | if (bma->cur) | ||
3194 | bma->cur->bc_private.b.allocated = 0; | ||
3195 | |||
3196 | xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork); | ||
3197 | done: | ||
3198 | bma->logflags |= rval; | ||
3199 | return error; | ||
3200 | } | ||
3201 | |||
3202 | /* | ||
3203 | * Functions used in the extent read, allocate and remove paths | ||
3204 | */ | ||
3205 | |||
3206 | /* | ||
3207 | * Adjust the size of the new extent based on di_extsize and rt extsize. | ||
3208 | */ | ||
3209 | int | ||
3210 | xfs_bmap_extsize_align( | ||
3211 | xfs_mount_t *mp, | ||
3212 | xfs_bmbt_irec_t *gotp, /* next extent pointer */ | ||
3213 | xfs_bmbt_irec_t *prevp, /* previous extent pointer */ | ||
3214 | xfs_extlen_t extsz, /* align to this extent size */ | ||
3215 | int rt, /* is this a realtime inode? */ | ||
3216 | int eof, /* is extent at end-of-file? */ | ||
3217 | int delay, /* creating delalloc extent? */ | ||
3218 | int convert, /* overwriting unwritten extent? */ | ||
3219 | xfs_fileoff_t *offp, /* in/out: aligned offset */ | ||
3220 | xfs_extlen_t *lenp) /* in/out: aligned length */ | ||
3221 | { | ||
3222 | xfs_fileoff_t orig_off; /* original offset */ | ||
3223 | xfs_extlen_t orig_alen; /* original length */ | ||
3224 | xfs_fileoff_t orig_end; /* original off+len */ | ||
3225 | xfs_fileoff_t nexto; /* next file offset */ | ||
3226 | xfs_fileoff_t prevo; /* previous file offset */ | ||
3227 | xfs_fileoff_t align_off; /* temp for offset */ | ||
3228 | xfs_extlen_t align_alen; /* temp for length */ | ||
3229 | xfs_extlen_t temp; /* temp for calculations */ | ||
3230 | |||
3231 | if (convert) | ||
3232 | return 0; | ||
3233 | |||
3234 | orig_off = align_off = *offp; | ||
3235 | orig_alen = align_alen = *lenp; | ||
3236 | orig_end = orig_off + orig_alen; | ||
3237 | |||
3238 | /* | ||
3239 | * If this request overlaps an existing extent, then don't | ||
3240 | * attempt to perform any additional alignment. | ||
3241 | */ | ||
3242 | if (!delay && !eof && | ||
3243 | (orig_off >= gotp->br_startoff) && | ||
3244 | (orig_end <= gotp->br_startoff + gotp->br_blockcount)) { | ||
3245 | return 0; | ||
3246 | } | ||
3247 | |||
3248 | /* | ||
3249 | * If the file offset is unaligned vs. the extent size | ||
3250 | * we need to align it. This will be possible unless | ||
3251 | * the file was previously written with a kernel that didn't | ||
3252 | * perform this alignment, or if a truncate shot us in the | ||
3253 | * foot. | ||
3254 | */ | ||
3255 | temp = do_mod(orig_off, extsz); | ||
3256 | if (temp) { | ||
3257 | align_alen += temp; | ||
3258 | align_off -= temp; | ||
3259 | } | ||
3260 | /* | ||
3261 | * Same adjustment for the end of the requested area. | ||
3262 | */ | ||
3263 | if ((temp = (align_alen % extsz))) { | ||
3264 | align_alen += extsz - temp; | ||
3265 | } | ||
3266 | /* | ||
3267 | * If the previous block overlaps with this proposed allocation | ||
3268 | * then move the start forward without adjusting the length. | ||
3269 | */ | ||
3270 | if (prevp->br_startoff != NULLFILEOFF) { | ||
3271 | if (prevp->br_startblock == HOLESTARTBLOCK) | ||
3272 | prevo = prevp->br_startoff; | ||
3273 | else | ||
3274 | prevo = prevp->br_startoff + prevp->br_blockcount; | ||
3275 | } else | ||
3276 | prevo = 0; | ||
3277 | if (align_off != orig_off && align_off < prevo) | ||
3278 | align_off = prevo; | ||
3279 | /* | ||
3280 | * If the next block overlaps with this proposed allocation | ||
3281 | * then move the start back without adjusting the length, | ||
3282 | * but not before offset 0. | ||
3283 | * This may of course make the start overlap previous block, | ||
3284 | * and if we hit the offset 0 limit then the next block | ||
3285 | * can still overlap too. | ||
3286 | */ | ||
3287 | if (!eof && gotp->br_startoff != NULLFILEOFF) { | ||
3288 | if ((delay && gotp->br_startblock == HOLESTARTBLOCK) || | ||
3289 | (!delay && gotp->br_startblock == DELAYSTARTBLOCK)) | ||
3290 | nexto = gotp->br_startoff + gotp->br_blockcount; | ||
3291 | else | ||
3292 | nexto = gotp->br_startoff; | ||
3293 | } else | ||
3294 | nexto = NULLFILEOFF; | ||
3295 | if (!eof && | ||
3296 | align_off + align_alen != orig_end && | ||
3297 | align_off + align_alen > nexto) | ||
3298 | align_off = nexto > align_alen ? nexto - align_alen : 0; | ||
3299 | /* | ||
3300 | * If we're now overlapping the next or previous extent that | ||
3301 | * means we can't fit an extsz piece in this hole. Just move | ||
3302 | * the start forward to the first valid spot and set | ||
3303 | * the length so we hit the end. | ||
3304 | */ | ||
3305 | if (align_off != orig_off && align_off < prevo) | ||
3306 | align_off = prevo; | ||
3307 | if (align_off + align_alen != orig_end && | ||
3308 | align_off + align_alen > nexto && | ||
3309 | nexto != NULLFILEOFF) { | ||
3310 | ASSERT(nexto > prevo); | ||
3311 | align_alen = nexto - align_off; | ||
3312 | } | ||
3313 | |||
3314 | /* | ||
3315 | * If realtime, and the result isn't a multiple of the realtime | ||
3316 | * extent size we need to remove blocks until it is. | ||
3317 | */ | ||
3318 | if (rt && (temp = (align_alen % mp->m_sb.sb_rextsize))) { | ||
3319 | /* | ||
3320 | * We're not covering the original request, or | ||
3321 | * we won't be able to once we fix the length. | ||
3322 | */ | ||
3323 | if (orig_off < align_off || | ||
3324 | orig_end > align_off + align_alen || | ||
3325 | align_alen - temp < orig_alen) | ||
3326 | return -EINVAL; | ||
3327 | /* | ||
3328 | * Try to fix it by moving the start up. | ||
3329 | */ | ||
3330 | if (align_off + temp <= orig_off) { | ||
3331 | align_alen -= temp; | ||
3332 | align_off += temp; | ||
3333 | } | ||
3334 | /* | ||
3335 | * Try to fix it by moving the end in. | ||
3336 | */ | ||
3337 | else if (align_off + align_alen - temp >= orig_end) | ||
3338 | align_alen -= temp; | ||
3339 | /* | ||
3340 | * Set the start to the minimum then trim the length. | ||
3341 | */ | ||
3342 | else { | ||
3343 | align_alen -= orig_off - align_off; | ||
3344 | align_off = orig_off; | ||
3345 | align_alen -= align_alen % mp->m_sb.sb_rextsize; | ||
3346 | } | ||
3347 | /* | ||
3348 | * Result doesn't cover the request, fail it. | ||
3349 | */ | ||
3350 | if (orig_off < align_off || orig_end > align_off + align_alen) | ||
3351 | return -EINVAL; | ||
3352 | } else { | ||
3353 | ASSERT(orig_off >= align_off); | ||
3354 | ASSERT(orig_end <= align_off + align_alen); | ||
3355 | } | ||
3356 | |||
3357 | #ifdef DEBUG | ||
3358 | if (!eof && gotp->br_startoff != NULLFILEOFF) | ||
3359 | ASSERT(align_off + align_alen <= gotp->br_startoff); | ||
3360 | if (prevp->br_startoff != NULLFILEOFF) | ||
3361 | ASSERT(align_off >= prevp->br_startoff + prevp->br_blockcount); | ||
3362 | #endif | ||
3363 | |||
3364 | *lenp = align_alen; | ||
3365 | *offp = align_off; | ||
3366 | return 0; | ||
3367 | } | ||
3368 | |||
3369 | #define XFS_ALLOC_GAP_UNITS 4 | ||
3370 | |||
3371 | void | ||
3372 | xfs_bmap_adjacent( | ||
3373 | struct xfs_bmalloca *ap) /* bmap alloc argument struct */ | ||
3374 | { | ||
3375 | xfs_fsblock_t adjust; /* adjustment to block numbers */ | ||
3376 | xfs_agnumber_t fb_agno; /* ag number of ap->firstblock */ | ||
3377 | xfs_mount_t *mp; /* mount point structure */ | ||
3378 | int nullfb; /* true if ap->firstblock isn't set */ | ||
3379 | int rt; /* true if inode is realtime */ | ||
3380 | |||
3381 | #define ISVALID(x,y) \ | ||
3382 | (rt ? \ | ||
3383 | (x) < mp->m_sb.sb_rblocks : \ | ||
3384 | XFS_FSB_TO_AGNO(mp, x) == XFS_FSB_TO_AGNO(mp, y) && \ | ||
3385 | XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount && \ | ||
3386 | XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks) | ||
3387 | |||
3388 | mp = ap->ip->i_mount; | ||
3389 | nullfb = *ap->firstblock == NULLFSBLOCK; | ||
3390 | rt = XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata; | ||
3391 | fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock); | ||
3392 | /* | ||
3393 | * If allocating at eof, and there's a previous real block, | ||
3394 | * try to use its last block as our starting point. | ||
3395 | */ | ||
3396 | if (ap->eof && ap->prev.br_startoff != NULLFILEOFF && | ||
3397 | !isnullstartblock(ap->prev.br_startblock) && | ||
3398 | ISVALID(ap->prev.br_startblock + ap->prev.br_blockcount, | ||
3399 | ap->prev.br_startblock)) { | ||
3400 | ap->blkno = ap->prev.br_startblock + ap->prev.br_blockcount; | ||
3401 | /* | ||
3402 | * Adjust for the gap between prevp and us. | ||
3403 | */ | ||
3404 | adjust = ap->offset - | ||
3405 | (ap->prev.br_startoff + ap->prev.br_blockcount); | ||
3406 | if (adjust && | ||
3407 | ISVALID(ap->blkno + adjust, ap->prev.br_startblock)) | ||
3408 | ap->blkno += adjust; | ||
3409 | } | ||
3410 | /* | ||
3411 | * If not at eof, then compare the two neighbor blocks. | ||
3412 | * Figure out whether either one gives us a good starting point, | ||
3413 | * and pick the better one. | ||
3414 | */ | ||
3415 | else if (!ap->eof) { | ||
3416 | xfs_fsblock_t gotbno; /* right side block number */ | ||
3417 | xfs_fsblock_t gotdiff=0; /* right side difference */ | ||
3418 | xfs_fsblock_t prevbno; /* left side block number */ | ||
3419 | xfs_fsblock_t prevdiff=0; /* left side difference */ | ||
3420 | |||
3421 | /* | ||
3422 | * If there's a previous (left) block, select a requested | ||
3423 | * start block based on it. | ||
3424 | */ | ||
3425 | if (ap->prev.br_startoff != NULLFILEOFF && | ||
3426 | !isnullstartblock(ap->prev.br_startblock) && | ||
3427 | (prevbno = ap->prev.br_startblock + | ||
3428 | ap->prev.br_blockcount) && | ||
3429 | ISVALID(prevbno, ap->prev.br_startblock)) { | ||
3430 | /* | ||
3431 | * Calculate gap to end of previous block. | ||
3432 | */ | ||
3433 | adjust = prevdiff = ap->offset - | ||
3434 | (ap->prev.br_startoff + | ||
3435 | ap->prev.br_blockcount); | ||
3436 | /* | ||
3437 | * Figure the startblock based on the previous block's | ||
3438 | * end and the gap size. | ||
3439 | * Heuristic! | ||
3440 | * If the gap is large relative to the piece we're | ||
3441 | * allocating, or using it gives us an invalid block | ||
3442 | * number, then just use the end of the previous block. | ||
3443 | */ | ||
3444 | if (prevdiff <= XFS_ALLOC_GAP_UNITS * ap->length && | ||
3445 | ISVALID(prevbno + prevdiff, | ||
3446 | ap->prev.br_startblock)) | ||
3447 | prevbno += adjust; | ||
3448 | else | ||
3449 | prevdiff += adjust; | ||
3450 | /* | ||
3451 | * If the firstblock forbids it, can't use it, | ||
3452 | * must use default. | ||
3453 | */ | ||
3454 | if (!rt && !nullfb && | ||
3455 | XFS_FSB_TO_AGNO(mp, prevbno) != fb_agno) | ||
3456 | prevbno = NULLFSBLOCK; | ||
3457 | } | ||
3458 | /* | ||
3459 | * No previous block or can't follow it, just default. | ||
3460 | */ | ||
3461 | else | ||
3462 | prevbno = NULLFSBLOCK; | ||
3463 | /* | ||
3464 | * If there's a following (right) block, select a requested | ||
3465 | * start block based on it. | ||
3466 | */ | ||
3467 | if (!isnullstartblock(ap->got.br_startblock)) { | ||
3468 | /* | ||
3469 | * Calculate gap to start of next block. | ||
3470 | */ | ||
3471 | adjust = gotdiff = ap->got.br_startoff - ap->offset; | ||
3472 | /* | ||
3473 | * Figure the startblock based on the next block's | ||
3474 | * start and the gap size. | ||
3475 | */ | ||
3476 | gotbno = ap->got.br_startblock; | ||
3477 | /* | ||
3478 | * Heuristic! | ||
3479 | * If the gap is large relative to the piece we're | ||
3480 | * allocating, or using it gives us an invalid block | ||
3481 | * number, then just use the start of the next block | ||
3482 | * offset by our length. | ||
3483 | */ | ||
3484 | if (gotdiff <= XFS_ALLOC_GAP_UNITS * ap->length && | ||
3485 | ISVALID(gotbno - gotdiff, gotbno)) | ||
3486 | gotbno -= adjust; | ||
3487 | else if (ISVALID(gotbno - ap->length, gotbno)) { | ||
3488 | gotbno -= ap->length; | ||
3489 | gotdiff += adjust - ap->length; | ||
3490 | } else | ||
3491 | gotdiff += adjust; | ||
3492 | /* | ||
3493 | * If the firstblock forbids it, can't use it, | ||
3494 | * must use default. | ||
3495 | */ | ||
3496 | if (!rt && !nullfb && | ||
3497 | XFS_FSB_TO_AGNO(mp, gotbno) != fb_agno) | ||
3498 | gotbno = NULLFSBLOCK; | ||
3499 | } | ||
3500 | /* | ||
3501 | * No next block, just default. | ||
3502 | */ | ||
3503 | else | ||
3504 | gotbno = NULLFSBLOCK; | ||
3505 | /* | ||
3506 | * If both valid, pick the better one, else the only good | ||
3507 | * one, else ap->blkno is already set (to 0 or the inode block). | ||
3508 | */ | ||
3509 | if (prevbno != NULLFSBLOCK && gotbno != NULLFSBLOCK) | ||
3510 | ap->blkno = prevdiff <= gotdiff ? prevbno : gotbno; | ||
3511 | else if (prevbno != NULLFSBLOCK) | ||
3512 | ap->blkno = prevbno; | ||
3513 | else if (gotbno != NULLFSBLOCK) | ||
3514 | ap->blkno = gotbno; | ||
3515 | } | ||
3516 | #undef ISVALID | ||
3517 | } | ||
3518 | |||
3519 | static int | ||
3520 | xfs_bmap_longest_free_extent( | ||
3521 | struct xfs_trans *tp, | ||
3522 | xfs_agnumber_t ag, | ||
3523 | xfs_extlen_t *blen, | ||
3524 | int *notinit) | ||
3525 | { | ||
3526 | struct xfs_mount *mp = tp->t_mountp; | ||
3527 | struct xfs_perag *pag; | ||
3528 | xfs_extlen_t longest; | ||
3529 | int error = 0; | ||
3530 | |||
3531 | pag = xfs_perag_get(mp, ag); | ||
3532 | if (!pag->pagf_init) { | ||
3533 | error = xfs_alloc_pagf_init(mp, tp, ag, XFS_ALLOC_FLAG_TRYLOCK); | ||
3534 | if (error) | ||
3535 | goto out; | ||
3536 | |||
3537 | if (!pag->pagf_init) { | ||
3538 | *notinit = 1; | ||
3539 | goto out; | ||
3540 | } | ||
3541 | } | ||
3542 | |||
3543 | longest = xfs_alloc_longest_free_extent(mp, pag); | ||
3544 | if (*blen < longest) | ||
3545 | *blen = longest; | ||
3546 | |||
3547 | out: | ||
3548 | xfs_perag_put(pag); | ||
3549 | return error; | ||
3550 | } | ||
3551 | |||
3552 | static void | ||
3553 | xfs_bmap_select_minlen( | ||
3554 | struct xfs_bmalloca *ap, | ||
3555 | struct xfs_alloc_arg *args, | ||
3556 | xfs_extlen_t *blen, | ||
3557 | int notinit) | ||
3558 | { | ||
3559 | if (notinit || *blen < ap->minlen) { | ||
3560 | /* | ||
3561 | * Since we did a BUF_TRYLOCK above, it is possible that | ||
3562 | * there is space for this request. | ||
3563 | */ | ||
3564 | args->minlen = ap->minlen; | ||
3565 | } else if (*blen < args->maxlen) { | ||
3566 | /* | ||
3567 | * If the best seen length is less than the request length, | ||
3568 | * use the best as the minimum. | ||
3569 | */ | ||
3570 | args->minlen = *blen; | ||
3571 | } else { | ||
3572 | /* | ||
3573 | * Otherwise we've seen an extent as big as maxlen, use that | ||
3574 | * as the minimum. | ||
3575 | */ | ||
3576 | args->minlen = args->maxlen; | ||
3577 | } | ||
3578 | } | ||
3579 | |||
3580 | STATIC int | ||
3581 | xfs_bmap_btalloc_nullfb( | ||
3582 | struct xfs_bmalloca *ap, | ||
3583 | struct xfs_alloc_arg *args, | ||
3584 | xfs_extlen_t *blen) | ||
3585 | { | ||
3586 | struct xfs_mount *mp = ap->ip->i_mount; | ||
3587 | xfs_agnumber_t ag, startag; | ||
3588 | int notinit = 0; | ||
3589 | int error; | ||
3590 | |||
3591 | args->type = XFS_ALLOCTYPE_START_BNO; | ||
3592 | args->total = ap->total; | ||
3593 | |||
3594 | startag = ag = XFS_FSB_TO_AGNO(mp, args->fsbno); | ||
3595 | if (startag == NULLAGNUMBER) | ||
3596 | startag = ag = 0; | ||
3597 | |||
3598 | while (*blen < args->maxlen) { | ||
3599 | error = xfs_bmap_longest_free_extent(args->tp, ag, blen, | ||
3600 | ¬init); | ||
3601 | if (error) | ||
3602 | return error; | ||
3603 | |||
3604 | if (++ag == mp->m_sb.sb_agcount) | ||
3605 | ag = 0; | ||
3606 | if (ag == startag) | ||
3607 | break; | ||
3608 | } | ||
3609 | |||
3610 | xfs_bmap_select_minlen(ap, args, blen, notinit); | ||
3611 | return 0; | ||
3612 | } | ||
3613 | |||
3614 | STATIC int | ||
3615 | xfs_bmap_btalloc_filestreams( | ||
3616 | struct xfs_bmalloca *ap, | ||
3617 | struct xfs_alloc_arg *args, | ||
3618 | xfs_extlen_t *blen) | ||
3619 | { | ||
3620 | struct xfs_mount *mp = ap->ip->i_mount; | ||
3621 | xfs_agnumber_t ag; | ||
3622 | int notinit = 0; | ||
3623 | int error; | ||
3624 | |||
3625 | args->type = XFS_ALLOCTYPE_NEAR_BNO; | ||
3626 | args->total = ap->total; | ||
3627 | |||
3628 | ag = XFS_FSB_TO_AGNO(mp, args->fsbno); | ||
3629 | if (ag == NULLAGNUMBER) | ||
3630 | ag = 0; | ||
3631 | |||
3632 | error = xfs_bmap_longest_free_extent(args->tp, ag, blen, ¬init); | ||
3633 | if (error) | ||
3634 | return error; | ||
3635 | |||
3636 | if (*blen < args->maxlen) { | ||
3637 | error = xfs_filestream_new_ag(ap, &ag); | ||
3638 | if (error) | ||
3639 | return error; | ||
3640 | |||
3641 | error = xfs_bmap_longest_free_extent(args->tp, ag, blen, | ||
3642 | ¬init); | ||
3643 | if (error) | ||
3644 | return error; | ||
3645 | |||
3646 | } | ||
3647 | |||
3648 | xfs_bmap_select_minlen(ap, args, blen, notinit); | ||
3649 | |||
3650 | /* | ||
3651 | * Set the failure fallback case to look in the selected AG as stream | ||
3652 | * may have moved. | ||
3653 | */ | ||
3654 | ap->blkno = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0); | ||
3655 | return 0; | ||
3656 | } | ||
3657 | |||
3658 | STATIC int | ||
3659 | xfs_bmap_btalloc( | ||
3660 | struct xfs_bmalloca *ap) /* bmap alloc argument struct */ | ||
3661 | { | ||
3662 | xfs_mount_t *mp; /* mount point structure */ | ||
3663 | xfs_alloctype_t atype = 0; /* type for allocation routines */ | ||
3664 | xfs_extlen_t align; /* minimum allocation alignment */ | ||
3665 | xfs_agnumber_t fb_agno; /* ag number of ap->firstblock */ | ||
3666 | xfs_agnumber_t ag; | ||
3667 | xfs_alloc_arg_t args; | ||
3668 | xfs_extlen_t blen; | ||
3669 | xfs_extlen_t nextminlen = 0; | ||
3670 | int nullfb; /* true if ap->firstblock isn't set */ | ||
3671 | int isaligned; | ||
3672 | int tryagain; | ||
3673 | int error; | ||
3674 | int stripe_align; | ||
3675 | |||
3676 | ASSERT(ap->length); | ||
3677 | |||
3678 | mp = ap->ip->i_mount; | ||
3679 | |||
3680 | /* stripe alignment for allocation is determined by mount parameters */ | ||
3681 | stripe_align = 0; | ||
3682 | if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC)) | ||
3683 | stripe_align = mp->m_swidth; | ||
3684 | else if (mp->m_dalign) | ||
3685 | stripe_align = mp->m_dalign; | ||
3686 | |||
3687 | align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0; | ||
3688 | if (unlikely(align)) { | ||
3689 | error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, | ||
3690 | align, 0, ap->eof, 0, ap->conv, | ||
3691 | &ap->offset, &ap->length); | ||
3692 | ASSERT(!error); | ||
3693 | ASSERT(ap->length); | ||
3694 | } | ||
3695 | |||
3696 | |||
3697 | nullfb = *ap->firstblock == NULLFSBLOCK; | ||
3698 | fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock); | ||
3699 | if (nullfb) { | ||
3700 | if (ap->userdata && xfs_inode_is_filestream(ap->ip)) { | ||
3701 | ag = xfs_filestream_lookup_ag(ap->ip); | ||
3702 | ag = (ag != NULLAGNUMBER) ? ag : 0; | ||
3703 | ap->blkno = XFS_AGB_TO_FSB(mp, ag, 0); | ||
3704 | } else { | ||
3705 | ap->blkno = XFS_INO_TO_FSB(mp, ap->ip->i_ino); | ||
3706 | } | ||
3707 | } else | ||
3708 | ap->blkno = *ap->firstblock; | ||
3709 | |||
3710 | xfs_bmap_adjacent(ap); | ||
3711 | |||
3712 | /* | ||
3713 | * If allowed, use ap->blkno; otherwise must use firstblock since | ||
3714 | * it's in the right allocation group. | ||
3715 | */ | ||
3716 | if (nullfb || XFS_FSB_TO_AGNO(mp, ap->blkno) == fb_agno) | ||
3717 | ; | ||
3718 | else | ||
3719 | ap->blkno = *ap->firstblock; | ||
3720 | /* | ||
3721 | * Normal allocation, done through xfs_alloc_vextent. | ||
3722 | */ | ||
3723 | tryagain = isaligned = 0; | ||
3724 | memset(&args, 0, sizeof(args)); | ||
3725 | args.tp = ap->tp; | ||
3726 | args.mp = mp; | ||
3727 | args.fsbno = ap->blkno; | ||
3728 | |||
3729 | /* Trim the allocation back to the maximum an AG can fit. */ | ||
3730 | args.maxlen = MIN(ap->length, XFS_ALLOC_AG_MAX_USABLE(mp)); | ||
3731 | args.firstblock = *ap->firstblock; | ||
3732 | blen = 0; | ||
3733 | if (nullfb) { | ||
3734 | /* | ||
3735 | * Search for an allocation group with a single extent large | ||
3736 | * enough for the request. If one isn't found, then adjust | ||
3737 | * the minimum allocation size to the largest space found. | ||
3738 | */ | ||
3739 | if (ap->userdata && xfs_inode_is_filestream(ap->ip)) | ||
3740 | error = xfs_bmap_btalloc_filestreams(ap, &args, &blen); | ||
3741 | else | ||
3742 | error = xfs_bmap_btalloc_nullfb(ap, &args, &blen); | ||
3743 | if (error) | ||
3744 | return error; | ||
3745 | } else if (ap->flist->xbf_low) { | ||
3746 | if (xfs_inode_is_filestream(ap->ip)) | ||
3747 | args.type = XFS_ALLOCTYPE_FIRST_AG; | ||
3748 | else | ||
3749 | args.type = XFS_ALLOCTYPE_START_BNO; | ||
3750 | args.total = args.minlen = ap->minlen; | ||
3751 | } else { | ||
3752 | args.type = XFS_ALLOCTYPE_NEAR_BNO; | ||
3753 | args.total = ap->total; | ||
3754 | args.minlen = ap->minlen; | ||
3755 | } | ||
3756 | /* apply extent size hints if obtained earlier */ | ||
3757 | if (unlikely(align)) { | ||
3758 | args.prod = align; | ||
3759 | if ((args.mod = (xfs_extlen_t)do_mod(ap->offset, args.prod))) | ||
3760 | args.mod = (xfs_extlen_t)(args.prod - args.mod); | ||
3761 | } else if (mp->m_sb.sb_blocksize >= PAGE_CACHE_SIZE) { | ||
3762 | args.prod = 1; | ||
3763 | args.mod = 0; | ||
3764 | } else { | ||
3765 | args.prod = PAGE_CACHE_SIZE >> mp->m_sb.sb_blocklog; | ||
3766 | if ((args.mod = (xfs_extlen_t)(do_mod(ap->offset, args.prod)))) | ||
3767 | args.mod = (xfs_extlen_t)(args.prod - args.mod); | ||
3768 | } | ||
3769 | /* | ||
3770 | * If we are not low on available data blocks, and the | ||
3771 | * underlying logical volume manager is a stripe, and | ||
3772 | * the file offset is zero then try to allocate data | ||
3773 | * blocks on stripe unit boundary. | ||
3774 | * NOTE: ap->aeof is only set if the allocation length | ||
3775 | * is >= the stripe unit and the allocation offset is | ||
3776 | * at the end of file. | ||
3777 | */ | ||
3778 | if (!ap->flist->xbf_low && ap->aeof) { | ||
3779 | if (!ap->offset) { | ||
3780 | args.alignment = stripe_align; | ||
3781 | atype = args.type; | ||
3782 | isaligned = 1; | ||
3783 | /* | ||
3784 | * Adjust for alignment | ||
3785 | */ | ||
3786 | if (blen > args.alignment && blen <= args.maxlen) | ||
3787 | args.minlen = blen - args.alignment; | ||
3788 | args.minalignslop = 0; | ||
3789 | } else { | ||
3790 | /* | ||
3791 | * First try an exact bno allocation. | ||
3792 | * If it fails then do a near or start bno | ||
3793 | * allocation with alignment turned on. | ||
3794 | */ | ||
3795 | atype = args.type; | ||
3796 | tryagain = 1; | ||
3797 | args.type = XFS_ALLOCTYPE_THIS_BNO; | ||
3798 | args.alignment = 1; | ||
3799 | /* | ||
3800 | * Compute the minlen+alignment for the | ||
3801 | * next case. Set slop so that the value | ||
3802 | * of minlen+alignment+slop doesn't go up | ||
3803 | * between the calls. | ||
3804 | */ | ||
3805 | if (blen > stripe_align && blen <= args.maxlen) | ||
3806 | nextminlen = blen - stripe_align; | ||
3807 | else | ||
3808 | nextminlen = args.minlen; | ||
3809 | if (nextminlen + stripe_align > args.minlen + 1) | ||
3810 | args.minalignslop = | ||
3811 | nextminlen + stripe_align - | ||
3812 | args.minlen - 1; | ||
3813 | else | ||
3814 | args.minalignslop = 0; | ||
3815 | } | ||
3816 | } else { | ||
3817 | args.alignment = 1; | ||
3818 | args.minalignslop = 0; | ||
3819 | } | ||
3820 | args.minleft = ap->minleft; | ||
3821 | args.wasdel = ap->wasdel; | ||
3822 | args.isfl = 0; | ||
3823 | args.userdata = ap->userdata; | ||
3824 | if ((error = xfs_alloc_vextent(&args))) | ||
3825 | return error; | ||
3826 | if (tryagain && args.fsbno == NULLFSBLOCK) { | ||
3827 | /* | ||
3828 | * Exact allocation failed. Now try with alignment | ||
3829 | * turned on. | ||
3830 | */ | ||
3831 | args.type = atype; | ||
3832 | args.fsbno = ap->blkno; | ||
3833 | args.alignment = stripe_align; | ||
3834 | args.minlen = nextminlen; | ||
3835 | args.minalignslop = 0; | ||
3836 | isaligned = 1; | ||
3837 | if ((error = xfs_alloc_vextent(&args))) | ||
3838 | return error; | ||
3839 | } | ||
3840 | if (isaligned && args.fsbno == NULLFSBLOCK) { | ||
3841 | /* | ||
3842 | * allocation failed, so turn off alignment and | ||
3843 | * try again. | ||
3844 | */ | ||
3845 | args.type = atype; | ||
3846 | args.fsbno = ap->blkno; | ||
3847 | args.alignment = 0; | ||
3848 | if ((error = xfs_alloc_vextent(&args))) | ||
3849 | return error; | ||
3850 | } | ||
3851 | if (args.fsbno == NULLFSBLOCK && nullfb && | ||
3852 | args.minlen > ap->minlen) { | ||
3853 | args.minlen = ap->minlen; | ||
3854 | args.type = XFS_ALLOCTYPE_START_BNO; | ||
3855 | args.fsbno = ap->blkno; | ||
3856 | if ((error = xfs_alloc_vextent(&args))) | ||
3857 | return error; | ||
3858 | } | ||
3859 | if (args.fsbno == NULLFSBLOCK && nullfb) { | ||
3860 | args.fsbno = 0; | ||
3861 | args.type = XFS_ALLOCTYPE_FIRST_AG; | ||
3862 | args.total = ap->minlen; | ||
3863 | args.minleft = 0; | ||
3864 | if ((error = xfs_alloc_vextent(&args))) | ||
3865 | return error; | ||
3866 | ap->flist->xbf_low = 1; | ||
3867 | } | ||
3868 | if (args.fsbno != NULLFSBLOCK) { | ||
3869 | /* | ||
3870 | * check the allocation happened at the same or higher AG than | ||
3871 | * the first block that was allocated. | ||
3872 | */ | ||
3873 | ASSERT(*ap->firstblock == NULLFSBLOCK || | ||
3874 | XFS_FSB_TO_AGNO(mp, *ap->firstblock) == | ||
3875 | XFS_FSB_TO_AGNO(mp, args.fsbno) || | ||
3876 | (ap->flist->xbf_low && | ||
3877 | XFS_FSB_TO_AGNO(mp, *ap->firstblock) < | ||
3878 | XFS_FSB_TO_AGNO(mp, args.fsbno))); | ||
3879 | |||
3880 | ap->blkno = args.fsbno; | ||
3881 | if (*ap->firstblock == NULLFSBLOCK) | ||
3882 | *ap->firstblock = args.fsbno; | ||
3883 | ASSERT(nullfb || fb_agno == args.agno || | ||
3884 | (ap->flist->xbf_low && fb_agno < args.agno)); | ||
3885 | ap->length = args.len; | ||
3886 | ap->ip->i_d.di_nblocks += args.len; | ||
3887 | xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); | ||
3888 | if (ap->wasdel) | ||
3889 | ap->ip->i_delayed_blks -= args.len; | ||
3890 | /* | ||
3891 | * Adjust the disk quota also. This was reserved | ||
3892 | * earlier. | ||
3893 | */ | ||
3894 | xfs_trans_mod_dquot_byino(ap->tp, ap->ip, | ||
3895 | ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT : | ||
3896 | XFS_TRANS_DQ_BCOUNT, | ||
3897 | (long) args.len); | ||
3898 | } else { | ||
3899 | ap->blkno = NULLFSBLOCK; | ||
3900 | ap->length = 0; | ||
3901 | } | ||
3902 | return 0; | ||
3903 | } | ||
3904 | |||
3905 | /* | ||
3906 | * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file. | ||
3907 | * It figures out where to ask the underlying allocator to put the new extent. | ||
3908 | */ | ||
3909 | STATIC int | ||
3910 | xfs_bmap_alloc( | ||
3911 | struct xfs_bmalloca *ap) /* bmap alloc argument struct */ | ||
3912 | { | ||
3913 | if (XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata) | ||
3914 | return xfs_bmap_rtalloc(ap); | ||
3915 | return xfs_bmap_btalloc(ap); | ||
3916 | } | ||
3917 | |||
3918 | /* | ||
3919 | * Trim the returned map to the required bounds | ||
3920 | */ | ||
3921 | STATIC void | ||
3922 | xfs_bmapi_trim_map( | ||
3923 | struct xfs_bmbt_irec *mval, | ||
3924 | struct xfs_bmbt_irec *got, | ||
3925 | xfs_fileoff_t *bno, | ||
3926 | xfs_filblks_t len, | ||
3927 | xfs_fileoff_t obno, | ||
3928 | xfs_fileoff_t end, | ||
3929 | int n, | ||
3930 | int flags) | ||
3931 | { | ||
3932 | if ((flags & XFS_BMAPI_ENTIRE) || | ||
3933 | got->br_startoff + got->br_blockcount <= obno) { | ||
3934 | *mval = *got; | ||
3935 | if (isnullstartblock(got->br_startblock)) | ||
3936 | mval->br_startblock = DELAYSTARTBLOCK; | ||
3937 | return; | ||
3938 | } | ||
3939 | |||
3940 | if (obno > *bno) | ||
3941 | *bno = obno; | ||
3942 | ASSERT((*bno >= obno) || (n == 0)); | ||
3943 | ASSERT(*bno < end); | ||
3944 | mval->br_startoff = *bno; | ||
3945 | if (isnullstartblock(got->br_startblock)) | ||
3946 | mval->br_startblock = DELAYSTARTBLOCK; | ||
3947 | else | ||
3948 | mval->br_startblock = got->br_startblock + | ||
3949 | (*bno - got->br_startoff); | ||
3950 | /* | ||
3951 | * Return the minimum of what we got and what we asked for for | ||
3952 | * the length. We can use the len variable here because it is | ||
3953 | * modified below and we could have been there before coming | ||
3954 | * here if the first part of the allocation didn't overlap what | ||
3955 | * was asked for. | ||
3956 | */ | ||
3957 | mval->br_blockcount = XFS_FILBLKS_MIN(end - *bno, | ||
3958 | got->br_blockcount - (*bno - got->br_startoff)); | ||
3959 | mval->br_state = got->br_state; | ||
3960 | ASSERT(mval->br_blockcount <= len); | ||
3961 | return; | ||
3962 | } | ||
3963 | |||
3964 | /* | ||
3965 | * Update and validate the extent map to return | ||
3966 | */ | ||
3967 | STATIC void | ||
3968 | xfs_bmapi_update_map( | ||
3969 | struct xfs_bmbt_irec **map, | ||
3970 | xfs_fileoff_t *bno, | ||
3971 | xfs_filblks_t *len, | ||
3972 | xfs_fileoff_t obno, | ||
3973 | xfs_fileoff_t end, | ||
3974 | int *n, | ||
3975 | int flags) | ||
3976 | { | ||
3977 | xfs_bmbt_irec_t *mval = *map; | ||
3978 | |||
3979 | ASSERT((flags & XFS_BMAPI_ENTIRE) || | ||
3980 | ((mval->br_startoff + mval->br_blockcount) <= end)); | ||
3981 | ASSERT((flags & XFS_BMAPI_ENTIRE) || (mval->br_blockcount <= *len) || | ||
3982 | (mval->br_startoff < obno)); | ||
3983 | |||
3984 | *bno = mval->br_startoff + mval->br_blockcount; | ||
3985 | *len = end - *bno; | ||
3986 | if (*n > 0 && mval->br_startoff == mval[-1].br_startoff) { | ||
3987 | /* update previous map with new information */ | ||
3988 | ASSERT(mval->br_startblock == mval[-1].br_startblock); | ||
3989 | ASSERT(mval->br_blockcount > mval[-1].br_blockcount); | ||
3990 | ASSERT(mval->br_state == mval[-1].br_state); | ||
3991 | mval[-1].br_blockcount = mval->br_blockcount; | ||
3992 | mval[-1].br_state = mval->br_state; | ||
3993 | } else if (*n > 0 && mval->br_startblock != DELAYSTARTBLOCK && | ||
3994 | mval[-1].br_startblock != DELAYSTARTBLOCK && | ||
3995 | mval[-1].br_startblock != HOLESTARTBLOCK && | ||
3996 | mval->br_startblock == mval[-1].br_startblock + | ||
3997 | mval[-1].br_blockcount && | ||
3998 | ((flags & XFS_BMAPI_IGSTATE) || | ||
3999 | mval[-1].br_state == mval->br_state)) { | ||
4000 | ASSERT(mval->br_startoff == | ||
4001 | mval[-1].br_startoff + mval[-1].br_blockcount); | ||
4002 | mval[-1].br_blockcount += mval->br_blockcount; | ||
4003 | } else if (*n > 0 && | ||
4004 | mval->br_startblock == DELAYSTARTBLOCK && | ||
4005 | mval[-1].br_startblock == DELAYSTARTBLOCK && | ||
4006 | mval->br_startoff == | ||
4007 | mval[-1].br_startoff + mval[-1].br_blockcount) { | ||
4008 | mval[-1].br_blockcount += mval->br_blockcount; | ||
4009 | mval[-1].br_state = mval->br_state; | ||
4010 | } else if (!((*n == 0) && | ||
4011 | ((mval->br_startoff + mval->br_blockcount) <= | ||
4012 | obno))) { | ||
4013 | mval++; | ||
4014 | (*n)++; | ||
4015 | } | ||
4016 | *map = mval; | ||
4017 | } | ||
4018 | |||
4019 | /* | ||
4020 | * Map file blocks to filesystem blocks without allocation. | ||
4021 | */ | ||
4022 | int | ||
4023 | xfs_bmapi_read( | ||
4024 | struct xfs_inode *ip, | ||
4025 | xfs_fileoff_t bno, | ||
4026 | xfs_filblks_t len, | ||
4027 | struct xfs_bmbt_irec *mval, | ||
4028 | int *nmap, | ||
4029 | int flags) | ||
4030 | { | ||
4031 | struct xfs_mount *mp = ip->i_mount; | ||
4032 | struct xfs_ifork *ifp; | ||
4033 | struct xfs_bmbt_irec got; | ||
4034 | struct xfs_bmbt_irec prev; | ||
4035 | xfs_fileoff_t obno; | ||
4036 | xfs_fileoff_t end; | ||
4037 | xfs_extnum_t lastx; | ||
4038 | int error; | ||
4039 | int eof; | ||
4040 | int n = 0; | ||
4041 | int whichfork = (flags & XFS_BMAPI_ATTRFORK) ? | ||
4042 | XFS_ATTR_FORK : XFS_DATA_FORK; | ||
4043 | |||
4044 | ASSERT(*nmap >= 1); | ||
4045 | ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK|XFS_BMAPI_ENTIRE| | ||
4046 | XFS_BMAPI_IGSTATE))); | ||
4047 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)); | ||
4048 | |||
4049 | if (unlikely(XFS_TEST_ERROR( | ||
4050 | (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && | ||
4051 | XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), | ||
4052 | mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) { | ||
4053 | XFS_ERROR_REPORT("xfs_bmapi_read", XFS_ERRLEVEL_LOW, mp); | ||
4054 | return -EFSCORRUPTED; | ||
4055 | } | ||
4056 | |||
4057 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
4058 | return -EIO; | ||
4059 | |||
4060 | XFS_STATS_INC(xs_blk_mapr); | ||
4061 | |||
4062 | ifp = XFS_IFORK_PTR(ip, whichfork); | ||
4063 | |||
4064 | if (!(ifp->if_flags & XFS_IFEXTENTS)) { | ||
4065 | error = xfs_iread_extents(NULL, ip, whichfork); | ||
4066 | if (error) | ||
4067 | return error; | ||
4068 | } | ||
4069 | |||
4070 | xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, &prev); | ||
4071 | end = bno + len; | ||
4072 | obno = bno; | ||
4073 | |||
4074 | while (bno < end && n < *nmap) { | ||
4075 | /* Reading past eof, act as though there's a hole up to end. */ | ||
4076 | if (eof) | ||
4077 | got.br_startoff = end; | ||
4078 | if (got.br_startoff > bno) { | ||
4079 | /* Reading in a hole. */ | ||
4080 | mval->br_startoff = bno; | ||
4081 | mval->br_startblock = HOLESTARTBLOCK; | ||
4082 | mval->br_blockcount = | ||
4083 | XFS_FILBLKS_MIN(len, got.br_startoff - bno); | ||
4084 | mval->br_state = XFS_EXT_NORM; | ||
4085 | bno += mval->br_blockcount; | ||
4086 | len -= mval->br_blockcount; | ||
4087 | mval++; | ||
4088 | n++; | ||
4089 | continue; | ||
4090 | } | ||
4091 | |||
4092 | /* set up the extent map to return. */ | ||
4093 | xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags); | ||
4094 | xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags); | ||
4095 | |||
4096 | /* If we're done, stop now. */ | ||
4097 | if (bno >= end || n >= *nmap) | ||
4098 | break; | ||
4099 | |||
4100 | /* Else go on to the next record. */ | ||
4101 | if (++lastx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) | ||
4102 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx), &got); | ||
4103 | else | ||
4104 | eof = 1; | ||
4105 | } | ||
4106 | *nmap = n; | ||
4107 | return 0; | ||
4108 | } | ||
4109 | |||
4110 | STATIC int | ||
4111 | xfs_bmapi_reserve_delalloc( | ||
4112 | struct xfs_inode *ip, | ||
4113 | xfs_fileoff_t aoff, | ||
4114 | xfs_filblks_t len, | ||
4115 | struct xfs_bmbt_irec *got, | ||
4116 | struct xfs_bmbt_irec *prev, | ||
4117 | xfs_extnum_t *lastx, | ||
4118 | int eof) | ||
4119 | { | ||
4120 | struct xfs_mount *mp = ip->i_mount; | ||
4121 | struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); | ||
4122 | xfs_extlen_t alen; | ||
4123 | xfs_extlen_t indlen; | ||
4124 | char rt = XFS_IS_REALTIME_INODE(ip); | ||
4125 | xfs_extlen_t extsz; | ||
4126 | int error; | ||
4127 | |||
4128 | alen = XFS_FILBLKS_MIN(len, MAXEXTLEN); | ||
4129 | if (!eof) | ||
4130 | alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff); | ||
4131 | |||
4132 | /* Figure out the extent size, adjust alen */ | ||
4133 | extsz = xfs_get_extsz_hint(ip); | ||
4134 | if (extsz) { | ||
4135 | /* | ||
4136 | * Make sure we don't exceed a single extent length when we | ||
4137 | * align the extent by reducing length we are going to | ||
4138 | * allocate by the maximum amount extent size aligment may | ||
4139 | * require. | ||
4140 | */ | ||
4141 | alen = XFS_FILBLKS_MIN(len, MAXEXTLEN - (2 * extsz - 1)); | ||
4142 | error = xfs_bmap_extsize_align(mp, got, prev, extsz, rt, eof, | ||
4143 | 1, 0, &aoff, &alen); | ||
4144 | ASSERT(!error); | ||
4145 | } | ||
4146 | |||
4147 | if (rt) | ||
4148 | extsz = alen / mp->m_sb.sb_rextsize; | ||
4149 | |||
4150 | /* | ||
4151 | * Make a transaction-less quota reservation for delayed allocation | ||
4152 | * blocks. This number gets adjusted later. We return if we haven't | ||
4153 | * allocated blocks already inside this loop. | ||
4154 | */ | ||
4155 | error = xfs_trans_reserve_quota_nblks(NULL, ip, (long)alen, 0, | ||
4156 | rt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); | ||
4157 | if (error) | ||
4158 | return error; | ||
4159 | |||
4160 | /* | ||
4161 | * Split changing sb for alen and indlen since they could be coming | ||
4162 | * from different places. | ||
4163 | */ | ||
4164 | indlen = (xfs_extlen_t)xfs_bmap_worst_indlen(ip, alen); | ||
4165 | ASSERT(indlen > 0); | ||
4166 | |||
4167 | if (rt) { | ||
4168 | error = xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, | ||
4169 | -((int64_t)extsz), 0); | ||
4170 | } else { | ||
4171 | error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, | ||
4172 | -((int64_t)alen), 0); | ||
4173 | } | ||
4174 | |||
4175 | if (error) | ||
4176 | goto out_unreserve_quota; | ||
4177 | |||
4178 | error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, | ||
4179 | -((int64_t)indlen), 0); | ||
4180 | if (error) | ||
4181 | goto out_unreserve_blocks; | ||
4182 | |||
4183 | |||
4184 | ip->i_delayed_blks += alen; | ||
4185 | |||
4186 | got->br_startoff = aoff; | ||
4187 | got->br_startblock = nullstartblock(indlen); | ||
4188 | got->br_blockcount = alen; | ||
4189 | got->br_state = XFS_EXT_NORM; | ||
4190 | xfs_bmap_add_extent_hole_delay(ip, lastx, got); | ||
4191 | |||
4192 | /* | ||
4193 | * Update our extent pointer, given that xfs_bmap_add_extent_hole_delay | ||
4194 | * might have merged it into one of the neighbouring ones. | ||
4195 | */ | ||
4196 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), got); | ||
4197 | |||
4198 | ASSERT(got->br_startoff <= aoff); | ||
4199 | ASSERT(got->br_startoff + got->br_blockcount >= aoff + alen); | ||
4200 | ASSERT(isnullstartblock(got->br_startblock)); | ||
4201 | ASSERT(got->br_state == XFS_EXT_NORM); | ||
4202 | return 0; | ||
4203 | |||
4204 | out_unreserve_blocks: | ||
4205 | if (rt) | ||
4206 | xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, extsz, 0); | ||
4207 | else | ||
4208 | xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, alen, 0); | ||
4209 | out_unreserve_quota: | ||
4210 | if (XFS_IS_QUOTA_ON(mp)) | ||
4211 | xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0, rt ? | ||
4212 | XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); | ||
4213 | return error; | ||
4214 | } | ||
4215 | |||
4216 | /* | ||
4217 | * Map file blocks to filesystem blocks, adding delayed allocations as needed. | ||
4218 | */ | ||
4219 | int | ||
4220 | xfs_bmapi_delay( | ||
4221 | struct xfs_inode *ip, /* incore inode */ | ||
4222 | xfs_fileoff_t bno, /* starting file offs. mapped */ | ||
4223 | xfs_filblks_t len, /* length to map in file */ | ||
4224 | struct xfs_bmbt_irec *mval, /* output: map values */ | ||
4225 | int *nmap, /* i/o: mval size/count */ | ||
4226 | int flags) /* XFS_BMAPI_... */ | ||
4227 | { | ||
4228 | struct xfs_mount *mp = ip->i_mount; | ||
4229 | struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); | ||
4230 | struct xfs_bmbt_irec got; /* current file extent record */ | ||
4231 | struct xfs_bmbt_irec prev; /* previous file extent record */ | ||
4232 | xfs_fileoff_t obno; /* old block number (offset) */ | ||
4233 | xfs_fileoff_t end; /* end of mapped file region */ | ||
4234 | xfs_extnum_t lastx; /* last useful extent number */ | ||
4235 | int eof; /* we've hit the end of extents */ | ||
4236 | int n = 0; /* current extent index */ | ||
4237 | int error = 0; | ||
4238 | |||
4239 | ASSERT(*nmap >= 1); | ||
4240 | ASSERT(*nmap <= XFS_BMAP_MAX_NMAP); | ||
4241 | ASSERT(!(flags & ~XFS_BMAPI_ENTIRE)); | ||
4242 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | ||
4243 | |||
4244 | if (unlikely(XFS_TEST_ERROR( | ||
4245 | (XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_EXTENTS && | ||
4246 | XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_BTREE), | ||
4247 | mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) { | ||
4248 | XFS_ERROR_REPORT("xfs_bmapi_delay", XFS_ERRLEVEL_LOW, mp); | ||
4249 | return -EFSCORRUPTED; | ||
4250 | } | ||
4251 | |||
4252 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
4253 | return -EIO; | ||
4254 | |||
4255 | XFS_STATS_INC(xs_blk_mapw); | ||
4256 | |||
4257 | if (!(ifp->if_flags & XFS_IFEXTENTS)) { | ||
4258 | error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK); | ||
4259 | if (error) | ||
4260 | return error; | ||
4261 | } | ||
4262 | |||
4263 | xfs_bmap_search_extents(ip, bno, XFS_DATA_FORK, &eof, &lastx, &got, &prev); | ||
4264 | end = bno + len; | ||
4265 | obno = bno; | ||
4266 | |||
4267 | while (bno < end && n < *nmap) { | ||
4268 | if (eof || got.br_startoff > bno) { | ||
4269 | error = xfs_bmapi_reserve_delalloc(ip, bno, len, &got, | ||
4270 | &prev, &lastx, eof); | ||
4271 | if (error) { | ||
4272 | if (n == 0) { | ||
4273 | *nmap = 0; | ||
4274 | return error; | ||
4275 | } | ||
4276 | break; | ||
4277 | } | ||
4278 | } | ||
4279 | |||
4280 | /* set up the extent map to return. */ | ||
4281 | xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags); | ||
4282 | xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags); | ||
4283 | |||
4284 | /* If we're done, stop now. */ | ||
4285 | if (bno >= end || n >= *nmap) | ||
4286 | break; | ||
4287 | |||
4288 | /* Else go on to the next record. */ | ||
4289 | prev = got; | ||
4290 | if (++lastx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) | ||
4291 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx), &got); | ||
4292 | else | ||
4293 | eof = 1; | ||
4294 | } | ||
4295 | |||
4296 | *nmap = n; | ||
4297 | return 0; | ||
4298 | } | ||
4299 | |||
4300 | |||
4301 | static int | ||
4302 | xfs_bmapi_allocate( | ||
4303 | struct xfs_bmalloca *bma) | ||
4304 | { | ||
4305 | struct xfs_mount *mp = bma->ip->i_mount; | ||
4306 | int whichfork = (bma->flags & XFS_BMAPI_ATTRFORK) ? | ||
4307 | XFS_ATTR_FORK : XFS_DATA_FORK; | ||
4308 | struct xfs_ifork *ifp = XFS_IFORK_PTR(bma->ip, whichfork); | ||
4309 | int tmp_logflags = 0; | ||
4310 | int error; | ||
4311 | |||
4312 | ASSERT(bma->length > 0); | ||
4313 | |||
4314 | /* | ||
4315 | * For the wasdelay case, we could also just allocate the stuff asked | ||
4316 | * for in this bmap call but that wouldn't be as good. | ||
4317 | */ | ||
4318 | if (bma->wasdel) { | ||
4319 | bma->length = (xfs_extlen_t)bma->got.br_blockcount; | ||
4320 | bma->offset = bma->got.br_startoff; | ||
4321 | if (bma->idx != NULLEXTNUM && bma->idx) { | ||
4322 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), | ||
4323 | &bma->prev); | ||
4324 | } | ||
4325 | } else { | ||
4326 | bma->length = XFS_FILBLKS_MIN(bma->length, MAXEXTLEN); | ||
4327 | if (!bma->eof) | ||
4328 | bma->length = XFS_FILBLKS_MIN(bma->length, | ||
4329 | bma->got.br_startoff - bma->offset); | ||
4330 | } | ||
4331 | |||
4332 | /* | ||
4333 | * Indicate if this is the first user data in the file, or just any | ||
4334 | * user data. | ||
4335 | */ | ||
4336 | if (!(bma->flags & XFS_BMAPI_METADATA)) { | ||
4337 | bma->userdata = (bma->offset == 0) ? | ||
4338 | XFS_ALLOC_INITIAL_USER_DATA : XFS_ALLOC_USERDATA; | ||
4339 | } | ||
4340 | |||
4341 | bma->minlen = (bma->flags & XFS_BMAPI_CONTIG) ? bma->length : 1; | ||
4342 | |||
4343 | /* | ||
4344 | * Only want to do the alignment at the eof if it is userdata and | ||
4345 | * allocation length is larger than a stripe unit. | ||
4346 | */ | ||
4347 | if (mp->m_dalign && bma->length >= mp->m_dalign && | ||
4348 | !(bma->flags & XFS_BMAPI_METADATA) && whichfork == XFS_DATA_FORK) { | ||
4349 | error = xfs_bmap_isaeof(bma, whichfork); | ||
4350 | if (error) | ||
4351 | return error; | ||
4352 | } | ||
4353 | |||
4354 | error = xfs_bmap_alloc(bma); | ||
4355 | if (error) | ||
4356 | return error; | ||
4357 | |||
4358 | if (bma->flist->xbf_low) | ||
4359 | bma->minleft = 0; | ||
4360 | if (bma->cur) | ||
4361 | bma->cur->bc_private.b.firstblock = *bma->firstblock; | ||
4362 | if (bma->blkno == NULLFSBLOCK) | ||
4363 | return 0; | ||
4364 | if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) { | ||
4365 | bma->cur = xfs_bmbt_init_cursor(mp, bma->tp, bma->ip, whichfork); | ||
4366 | bma->cur->bc_private.b.firstblock = *bma->firstblock; | ||
4367 | bma->cur->bc_private.b.flist = bma->flist; | ||
4368 | } | ||
4369 | /* | ||
4370 | * Bump the number of extents we've allocated | ||
4371 | * in this call. | ||
4372 | */ | ||
4373 | bma->nallocs++; | ||
4374 | |||
4375 | if (bma->cur) | ||
4376 | bma->cur->bc_private.b.flags = | ||
4377 | bma->wasdel ? XFS_BTCUR_BPRV_WASDEL : 0; | ||
4378 | |||
4379 | bma->got.br_startoff = bma->offset; | ||
4380 | bma->got.br_startblock = bma->blkno; | ||
4381 | bma->got.br_blockcount = bma->length; | ||
4382 | bma->got.br_state = XFS_EXT_NORM; | ||
4383 | |||
4384 | /* | ||
4385 | * A wasdelay extent has been initialized, so shouldn't be flagged | ||
4386 | * as unwritten. | ||
4387 | */ | ||
4388 | if (!bma->wasdel && (bma->flags & XFS_BMAPI_PREALLOC) && | ||
4389 | xfs_sb_version_hasextflgbit(&mp->m_sb)) | ||
4390 | bma->got.br_state = XFS_EXT_UNWRITTEN; | ||
4391 | |||
4392 | if (bma->wasdel) | ||
4393 | error = xfs_bmap_add_extent_delay_real(bma); | ||
4394 | else | ||
4395 | error = xfs_bmap_add_extent_hole_real(bma, whichfork); | ||
4396 | |||
4397 | bma->logflags |= tmp_logflags; | ||
4398 | if (error) | ||
4399 | return error; | ||
4400 | |||
4401 | /* | ||
4402 | * Update our extent pointer, given that xfs_bmap_add_extent_delay_real | ||
4403 | * or xfs_bmap_add_extent_hole_real might have merged it into one of | ||
4404 | * the neighbouring ones. | ||
4405 | */ | ||
4406 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &bma->got); | ||
4407 | |||
4408 | ASSERT(bma->got.br_startoff <= bma->offset); | ||
4409 | ASSERT(bma->got.br_startoff + bma->got.br_blockcount >= | ||
4410 | bma->offset + bma->length); | ||
4411 | ASSERT(bma->got.br_state == XFS_EXT_NORM || | ||
4412 | bma->got.br_state == XFS_EXT_UNWRITTEN); | ||
4413 | return 0; | ||
4414 | } | ||
4415 | |||
4416 | STATIC int | ||
4417 | xfs_bmapi_convert_unwritten( | ||
4418 | struct xfs_bmalloca *bma, | ||
4419 | struct xfs_bmbt_irec *mval, | ||
4420 | xfs_filblks_t len, | ||
4421 | int flags) | ||
4422 | { | ||
4423 | int whichfork = (flags & XFS_BMAPI_ATTRFORK) ? | ||
4424 | XFS_ATTR_FORK : XFS_DATA_FORK; | ||
4425 | struct xfs_ifork *ifp = XFS_IFORK_PTR(bma->ip, whichfork); | ||
4426 | int tmp_logflags = 0; | ||
4427 | int error; | ||
4428 | |||
4429 | /* check if we need to do unwritten->real conversion */ | ||
4430 | if (mval->br_state == XFS_EXT_UNWRITTEN && | ||
4431 | (flags & XFS_BMAPI_PREALLOC)) | ||
4432 | return 0; | ||
4433 | |||
4434 | /* check if we need to do real->unwritten conversion */ | ||
4435 | if (mval->br_state == XFS_EXT_NORM && | ||
4436 | (flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) != | ||
4437 | (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) | ||
4438 | return 0; | ||
4439 | |||
4440 | /* | ||
4441 | * Modify (by adding) the state flag, if writing. | ||
4442 | */ | ||
4443 | ASSERT(mval->br_blockcount <= len); | ||
4444 | if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) { | ||
4445 | bma->cur = xfs_bmbt_init_cursor(bma->ip->i_mount, bma->tp, | ||
4446 | bma->ip, whichfork); | ||
4447 | bma->cur->bc_private.b.firstblock = *bma->firstblock; | ||
4448 | bma->cur->bc_private.b.flist = bma->flist; | ||
4449 | } | ||
4450 | mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN) | ||
4451 | ? XFS_EXT_NORM : XFS_EXT_UNWRITTEN; | ||
4452 | |||
4453 | error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx, | ||
4454 | &bma->cur, mval, bma->firstblock, bma->flist, | ||
4455 | &tmp_logflags); | ||
4456 | bma->logflags |= tmp_logflags; | ||
4457 | if (error) | ||
4458 | return error; | ||
4459 | |||
4460 | /* | ||
4461 | * Update our extent pointer, given that | ||
4462 | * xfs_bmap_add_extent_unwritten_real might have merged it into one | ||
4463 | * of the neighbouring ones. | ||
4464 | */ | ||
4465 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &bma->got); | ||
4466 | |||
4467 | /* | ||
4468 | * We may have combined previously unwritten space with written space, | ||
4469 | * so generate another request. | ||
4470 | */ | ||
4471 | if (mval->br_blockcount < len) | ||
4472 | return -EAGAIN; | ||
4473 | return 0; | ||
4474 | } | ||
4475 | |||
4476 | /* | ||
4477 | * Map file blocks to filesystem blocks, and allocate blocks or convert the | ||
4478 | * extent state if necessary. Details behaviour is controlled by the flags | ||
4479 | * parameter. Only allocates blocks from a single allocation group, to avoid | ||
4480 | * locking problems. | ||
4481 | * | ||
4482 | * The returned value in "firstblock" from the first call in a transaction | ||
4483 | * must be remembered and presented to subsequent calls in "firstblock". | ||
4484 | * An upper bound for the number of blocks to be allocated is supplied to | ||
4485 | * the first call in "total"; if no allocation group has that many free | ||
4486 | * blocks then the call will fail (return NULLFSBLOCK in "firstblock"). | ||
4487 | */ | ||
4488 | int | ||
4489 | xfs_bmapi_write( | ||
4490 | struct xfs_trans *tp, /* transaction pointer */ | ||
4491 | struct xfs_inode *ip, /* incore inode */ | ||
4492 | xfs_fileoff_t bno, /* starting file offs. mapped */ | ||
4493 | xfs_filblks_t len, /* length to map in file */ | ||
4494 | int flags, /* XFS_BMAPI_... */ | ||
4495 | xfs_fsblock_t *firstblock, /* first allocated block | ||
4496 | controls a.g. for allocs */ | ||
4497 | xfs_extlen_t total, /* total blocks needed */ | ||
4498 | struct xfs_bmbt_irec *mval, /* output: map values */ | ||
4499 | int *nmap, /* i/o: mval size/count */ | ||
4500 | struct xfs_bmap_free *flist) /* i/o: list extents to free */ | ||
4501 | { | ||
4502 | struct xfs_mount *mp = ip->i_mount; | ||
4503 | struct xfs_ifork *ifp; | ||
4504 | struct xfs_bmalloca bma = { NULL }; /* args for xfs_bmap_alloc */ | ||
4505 | xfs_fileoff_t end; /* end of mapped file region */ | ||
4506 | int eof; /* after the end of extents */ | ||
4507 | int error; /* error return */ | ||
4508 | int n; /* current extent index */ | ||
4509 | xfs_fileoff_t obno; /* old block number (offset) */ | ||
4510 | int whichfork; /* data or attr fork */ | ||
4511 | char inhole; /* current location is hole in file */ | ||
4512 | char wasdelay; /* old extent was delayed */ | ||
4513 | |||
4514 | #ifdef DEBUG | ||
4515 | xfs_fileoff_t orig_bno; /* original block number value */ | ||
4516 | int orig_flags; /* original flags arg value */ | ||
4517 | xfs_filblks_t orig_len; /* original value of len arg */ | ||
4518 | struct xfs_bmbt_irec *orig_mval; /* original value of mval */ | ||
4519 | int orig_nmap; /* original value of *nmap */ | ||
4520 | |||
4521 | orig_bno = bno; | ||
4522 | orig_len = len; | ||
4523 | orig_flags = flags; | ||
4524 | orig_mval = mval; | ||
4525 | orig_nmap = *nmap; | ||
4526 | #endif | ||
4527 | whichfork = (flags & XFS_BMAPI_ATTRFORK) ? | ||
4528 | XFS_ATTR_FORK : XFS_DATA_FORK; | ||
4529 | |||
4530 | ASSERT(*nmap >= 1); | ||
4531 | ASSERT(*nmap <= XFS_BMAP_MAX_NMAP); | ||
4532 | ASSERT(!(flags & XFS_BMAPI_IGSTATE)); | ||
4533 | ASSERT(tp != NULL); | ||
4534 | ASSERT(len > 0); | ||
4535 | ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL); | ||
4536 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | ||
4537 | |||
4538 | if (unlikely(XFS_TEST_ERROR( | ||
4539 | (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && | ||
4540 | XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), | ||
4541 | mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) { | ||
4542 | XFS_ERROR_REPORT("xfs_bmapi_write", XFS_ERRLEVEL_LOW, mp); | ||
4543 | return -EFSCORRUPTED; | ||
4544 | } | ||
4545 | |||
4546 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
4547 | return -EIO; | ||
4548 | |||
4549 | ifp = XFS_IFORK_PTR(ip, whichfork); | ||
4550 | |||
4551 | XFS_STATS_INC(xs_blk_mapw); | ||
4552 | |||
4553 | if (*firstblock == NULLFSBLOCK) { | ||
4554 | if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE) | ||
4555 | bma.minleft = be16_to_cpu(ifp->if_broot->bb_level) + 1; | ||
4556 | else | ||
4557 | bma.minleft = 1; | ||
4558 | } else { | ||
4559 | bma.minleft = 0; | ||
4560 | } | ||
4561 | |||
4562 | if (!(ifp->if_flags & XFS_IFEXTENTS)) { | ||
4563 | error = xfs_iread_extents(tp, ip, whichfork); | ||
4564 | if (error) | ||
4565 | goto error0; | ||
4566 | } | ||
4567 | |||
4568 | xfs_bmap_search_extents(ip, bno, whichfork, &eof, &bma.idx, &bma.got, | ||
4569 | &bma.prev); | ||
4570 | n = 0; | ||
4571 | end = bno + len; | ||
4572 | obno = bno; | ||
4573 | |||
4574 | bma.tp = tp; | ||
4575 | bma.ip = ip; | ||
4576 | bma.total = total; | ||
4577 | bma.userdata = 0; | ||
4578 | bma.flist = flist; | ||
4579 | bma.firstblock = firstblock; | ||
4580 | |||
4581 | while (bno < end && n < *nmap) { | ||
4582 | inhole = eof || bma.got.br_startoff > bno; | ||
4583 | wasdelay = !inhole && isnullstartblock(bma.got.br_startblock); | ||
4584 | |||
4585 | /* | ||
4586 | * First, deal with the hole before the allocated space | ||
4587 | * that we found, if any. | ||
4588 | */ | ||
4589 | if (inhole || wasdelay) { | ||
4590 | bma.eof = eof; | ||
4591 | bma.conv = !!(flags & XFS_BMAPI_CONVERT); | ||
4592 | bma.wasdel = wasdelay; | ||
4593 | bma.offset = bno; | ||
4594 | bma.flags = flags; | ||
4595 | |||
4596 | /* | ||
4597 | * There's a 32/64 bit type mismatch between the | ||
4598 | * allocation length request (which can be 64 bits in | ||
4599 | * length) and the bma length request, which is | ||
4600 | * xfs_extlen_t and therefore 32 bits. Hence we have to | ||
4601 | * check for 32-bit overflows and handle them here. | ||
4602 | */ | ||
4603 | if (len > (xfs_filblks_t)MAXEXTLEN) | ||
4604 | bma.length = MAXEXTLEN; | ||
4605 | else | ||
4606 | bma.length = len; | ||
4607 | |||
4608 | ASSERT(len > 0); | ||
4609 | ASSERT(bma.length > 0); | ||
4610 | error = xfs_bmapi_allocate(&bma); | ||
4611 | if (error) | ||
4612 | goto error0; | ||
4613 | if (bma.blkno == NULLFSBLOCK) | ||
4614 | break; | ||
4615 | } | ||
4616 | |||
4617 | /* Deal with the allocated space we found. */ | ||
4618 | xfs_bmapi_trim_map(mval, &bma.got, &bno, len, obno, | ||
4619 | end, n, flags); | ||
4620 | |||
4621 | /* Execute unwritten extent conversion if necessary */ | ||
4622 | error = xfs_bmapi_convert_unwritten(&bma, mval, len, flags); | ||
4623 | if (error == -EAGAIN) | ||
4624 | continue; | ||
4625 | if (error) | ||
4626 | goto error0; | ||
4627 | |||
4628 | /* update the extent map to return */ | ||
4629 | xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags); | ||
4630 | |||
4631 | /* | ||
4632 | * If we're done, stop now. Stop when we've allocated | ||
4633 | * XFS_BMAP_MAX_NMAP extents no matter what. Otherwise | ||
4634 | * the transaction may get too big. | ||
4635 | */ | ||
4636 | if (bno >= end || n >= *nmap || bma.nallocs >= *nmap) | ||
4637 | break; | ||
4638 | |||
4639 | /* Else go on to the next record. */ | ||
4640 | bma.prev = bma.got; | ||
4641 | if (++bma.idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) { | ||
4642 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma.idx), | ||
4643 | &bma.got); | ||
4644 | } else | ||
4645 | eof = 1; | ||
4646 | } | ||
4647 | *nmap = n; | ||
4648 | |||
4649 | /* | ||
4650 | * Transform from btree to extents, give it cur. | ||
4651 | */ | ||
4652 | if (xfs_bmap_wants_extents(ip, whichfork)) { | ||
4653 | int tmp_logflags = 0; | ||
4654 | |||
4655 | ASSERT(bma.cur); | ||
4656 | error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, | ||
4657 | &tmp_logflags, whichfork); | ||
4658 | bma.logflags |= tmp_logflags; | ||
4659 | if (error) | ||
4660 | goto error0; | ||
4661 | } | ||
4662 | |||
4663 | ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE || | ||
4664 | XFS_IFORK_NEXTENTS(ip, whichfork) > | ||
4665 | XFS_IFORK_MAXEXT(ip, whichfork)); | ||
4666 | error = 0; | ||
4667 | error0: | ||
4668 | /* | ||
4669 | * Log everything. Do this after conversion, there's no point in | ||
4670 | * logging the extent records if we've converted to btree format. | ||
4671 | */ | ||
4672 | if ((bma.logflags & xfs_ilog_fext(whichfork)) && | ||
4673 | XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) | ||
4674 | bma.logflags &= ~xfs_ilog_fext(whichfork); | ||
4675 | else if ((bma.logflags & xfs_ilog_fbroot(whichfork)) && | ||
4676 | XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) | ||
4677 | bma.logflags &= ~xfs_ilog_fbroot(whichfork); | ||
4678 | /* | ||
4679 | * Log whatever the flags say, even if error. Otherwise we might miss | ||
4680 | * detecting a case where the data is changed, there's an error, | ||
4681 | * and it's not logged so we don't shutdown when we should. | ||
4682 | */ | ||
4683 | if (bma.logflags) | ||
4684 | xfs_trans_log_inode(tp, ip, bma.logflags); | ||
4685 | |||
4686 | if (bma.cur) { | ||
4687 | if (!error) { | ||
4688 | ASSERT(*firstblock == NULLFSBLOCK || | ||
4689 | XFS_FSB_TO_AGNO(mp, *firstblock) == | ||
4690 | XFS_FSB_TO_AGNO(mp, | ||
4691 | bma.cur->bc_private.b.firstblock) || | ||
4692 | (flist->xbf_low && | ||
4693 | XFS_FSB_TO_AGNO(mp, *firstblock) < | ||
4694 | XFS_FSB_TO_AGNO(mp, | ||
4695 | bma.cur->bc_private.b.firstblock))); | ||
4696 | *firstblock = bma.cur->bc_private.b.firstblock; | ||
4697 | } | ||
4698 | xfs_btree_del_cursor(bma.cur, | ||
4699 | error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); | ||
4700 | } | ||
4701 | if (!error) | ||
4702 | xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval, | ||
4703 | orig_nmap, *nmap); | ||
4704 | return error; | ||
4705 | } | ||
4706 | |||
4707 | /* | ||
4708 | * Called by xfs_bmapi to update file extent records and the btree | ||
4709 | * after removing space (or undoing a delayed allocation). | ||
4710 | */ | ||
4711 | STATIC int /* error */ | ||
4712 | xfs_bmap_del_extent( | ||
4713 | xfs_inode_t *ip, /* incore inode pointer */ | ||
4714 | xfs_trans_t *tp, /* current transaction pointer */ | ||
4715 | xfs_extnum_t *idx, /* extent number to update/delete */ | ||
4716 | xfs_bmap_free_t *flist, /* list of extents to be freed */ | ||
4717 | xfs_btree_cur_t *cur, /* if null, not a btree */ | ||
4718 | xfs_bmbt_irec_t *del, /* data to remove from extents */ | ||
4719 | int *logflagsp, /* inode logging flags */ | ||
4720 | int whichfork) /* data or attr fork */ | ||
4721 | { | ||
4722 | xfs_filblks_t da_new; /* new delay-alloc indirect blocks */ | ||
4723 | xfs_filblks_t da_old; /* old delay-alloc indirect blocks */ | ||
4724 | xfs_fsblock_t del_endblock=0; /* first block past del */ | ||
4725 | xfs_fileoff_t del_endoff; /* first offset past del */ | ||
4726 | int delay; /* current block is delayed allocated */ | ||
4727 | int do_fx; /* free extent at end of routine */ | ||
4728 | xfs_bmbt_rec_host_t *ep; /* current extent entry pointer */ | ||
4729 | int error; /* error return value */ | ||
4730 | int flags; /* inode logging flags */ | ||
4731 | xfs_bmbt_irec_t got; /* current extent entry */ | ||
4732 | xfs_fileoff_t got_endoff; /* first offset past got */ | ||
4733 | int i; /* temp state */ | ||
4734 | xfs_ifork_t *ifp; /* inode fork pointer */ | ||
4735 | xfs_mount_t *mp; /* mount structure */ | ||
4736 | xfs_filblks_t nblks; /* quota/sb block count */ | ||
4737 | xfs_bmbt_irec_t new; /* new record to be inserted */ | ||
4738 | /* REFERENCED */ | ||
4739 | uint qfield; /* quota field to update */ | ||
4740 | xfs_filblks_t temp; /* for indirect length calculations */ | ||
4741 | xfs_filblks_t temp2; /* for indirect length calculations */ | ||
4742 | int state = 0; | ||
4743 | |||
4744 | XFS_STATS_INC(xs_del_exlist); | ||
4745 | |||
4746 | if (whichfork == XFS_ATTR_FORK) | ||
4747 | state |= BMAP_ATTRFORK; | ||
4748 | |||
4749 | mp = ip->i_mount; | ||
4750 | ifp = XFS_IFORK_PTR(ip, whichfork); | ||
4751 | ASSERT((*idx >= 0) && (*idx < ifp->if_bytes / | ||
4752 | (uint)sizeof(xfs_bmbt_rec_t))); | ||
4753 | ASSERT(del->br_blockcount > 0); | ||
4754 | ep = xfs_iext_get_ext(ifp, *idx); | ||
4755 | xfs_bmbt_get_all(ep, &got); | ||
4756 | ASSERT(got.br_startoff <= del->br_startoff); | ||
4757 | del_endoff = del->br_startoff + del->br_blockcount; | ||
4758 | got_endoff = got.br_startoff + got.br_blockcount; | ||
4759 | ASSERT(got_endoff >= del_endoff); | ||
4760 | delay = isnullstartblock(got.br_startblock); | ||
4761 | ASSERT(isnullstartblock(del->br_startblock) == delay); | ||
4762 | flags = 0; | ||
4763 | qfield = 0; | ||
4764 | error = 0; | ||
4765 | /* | ||
4766 | * If deleting a real allocation, must free up the disk space. | ||
4767 | */ | ||
4768 | if (!delay) { | ||
4769 | flags = XFS_ILOG_CORE; | ||
4770 | /* | ||
4771 | * Realtime allocation. Free it and record di_nblocks update. | ||
4772 | */ | ||
4773 | if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) { | ||
4774 | xfs_fsblock_t bno; | ||
4775 | xfs_filblks_t len; | ||
4776 | |||
4777 | ASSERT(do_mod(del->br_blockcount, | ||
4778 | mp->m_sb.sb_rextsize) == 0); | ||
4779 | ASSERT(do_mod(del->br_startblock, | ||
4780 | mp->m_sb.sb_rextsize) == 0); | ||
4781 | bno = del->br_startblock; | ||
4782 | len = del->br_blockcount; | ||
4783 | do_div(bno, mp->m_sb.sb_rextsize); | ||
4784 | do_div(len, mp->m_sb.sb_rextsize); | ||
4785 | error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len); | ||
4786 | if (error) | ||
4787 | goto done; | ||
4788 | do_fx = 0; | ||
4789 | nblks = len * mp->m_sb.sb_rextsize; | ||
4790 | qfield = XFS_TRANS_DQ_RTBCOUNT; | ||
4791 | } | ||
4792 | /* | ||
4793 | * Ordinary allocation. | ||
4794 | */ | ||
4795 | else { | ||
4796 | do_fx = 1; | ||
4797 | nblks = del->br_blockcount; | ||
4798 | qfield = XFS_TRANS_DQ_BCOUNT; | ||
4799 | } | ||
4800 | /* | ||
4801 | * Set up del_endblock and cur for later. | ||
4802 | */ | ||
4803 | del_endblock = del->br_startblock + del->br_blockcount; | ||
4804 | if (cur) { | ||
4805 | if ((error = xfs_bmbt_lookup_eq(cur, got.br_startoff, | ||
4806 | got.br_startblock, got.br_blockcount, | ||
4807 | &i))) | ||
4808 | goto done; | ||
4809 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | ||
4810 | } | ||
4811 | da_old = da_new = 0; | ||
4812 | } else { | ||
4813 | da_old = startblockval(got.br_startblock); | ||
4814 | da_new = 0; | ||
4815 | nblks = 0; | ||
4816 | do_fx = 0; | ||
4817 | } | ||
4818 | /* | ||
4819 | * Set flag value to use in switch statement. | ||
4820 | * Left-contig is 2, right-contig is 1. | ||
4821 | */ | ||
4822 | switch (((got.br_startoff == del->br_startoff) << 1) | | ||
4823 | (got_endoff == del_endoff)) { | ||
4824 | case 3: | ||
4825 | /* | ||
4826 | * Matches the whole extent. Delete the entry. | ||
4827 | */ | ||
4828 | xfs_iext_remove(ip, *idx, 1, | ||
4829 | whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0); | ||
4830 | --*idx; | ||
4831 | if (delay) | ||
4832 | break; | ||
4833 | |||
4834 | XFS_IFORK_NEXT_SET(ip, whichfork, | ||
4835 | XFS_IFORK_NEXTENTS(ip, whichfork) - 1); | ||
4836 | flags |= XFS_ILOG_CORE; | ||
4837 | if (!cur) { | ||
4838 | flags |= xfs_ilog_fext(whichfork); | ||
4839 | break; | ||
4840 | } | ||
4841 | if ((error = xfs_btree_delete(cur, &i))) | ||
4842 | goto done; | ||
4843 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | ||
4844 | break; | ||
4845 | |||
4846 | case 2: | ||
4847 | /* | ||
4848 | * Deleting the first part of the extent. | ||
4849 | */ | ||
4850 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); | ||
4851 | xfs_bmbt_set_startoff(ep, del_endoff); | ||
4852 | temp = got.br_blockcount - del->br_blockcount; | ||
4853 | xfs_bmbt_set_blockcount(ep, temp); | ||
4854 | if (delay) { | ||
4855 | temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), | ||
4856 | da_old); | ||
4857 | xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); | ||
4858 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); | ||
4859 | da_new = temp; | ||
4860 | break; | ||
4861 | } | ||
4862 | xfs_bmbt_set_startblock(ep, del_endblock); | ||
4863 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); | ||
4864 | if (!cur) { | ||
4865 | flags |= xfs_ilog_fext(whichfork); | ||
4866 | break; | ||
4867 | } | ||
4868 | if ((error = xfs_bmbt_update(cur, del_endoff, del_endblock, | ||
4869 | got.br_blockcount - del->br_blockcount, | ||
4870 | got.br_state))) | ||
4871 | goto done; | ||
4872 | break; | ||
4873 | |||
4874 | case 1: | ||
4875 | /* | ||
4876 | * Deleting the last part of the extent. | ||
4877 | */ | ||
4878 | temp = got.br_blockcount - del->br_blockcount; | ||
4879 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); | ||
4880 | xfs_bmbt_set_blockcount(ep, temp); | ||
4881 | if (delay) { | ||
4882 | temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), | ||
4883 | da_old); | ||
4884 | xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); | ||
4885 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); | ||
4886 | da_new = temp; | ||
4887 | break; | ||
4888 | } | ||
4889 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); | ||
4890 | if (!cur) { | ||
4891 | flags |= xfs_ilog_fext(whichfork); | ||
4892 | break; | ||
4893 | } | ||
4894 | if ((error = xfs_bmbt_update(cur, got.br_startoff, | ||
4895 | got.br_startblock, | ||
4896 | got.br_blockcount - del->br_blockcount, | ||
4897 | got.br_state))) | ||
4898 | goto done; | ||
4899 | break; | ||
4900 | |||
4901 | case 0: | ||
4902 | /* | ||
4903 | * Deleting the middle of the extent. | ||
4904 | */ | ||
4905 | temp = del->br_startoff - got.br_startoff; | ||
4906 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); | ||
4907 | xfs_bmbt_set_blockcount(ep, temp); | ||
4908 | new.br_startoff = del_endoff; | ||
4909 | temp2 = got_endoff - del_endoff; | ||
4910 | new.br_blockcount = temp2; | ||
4911 | new.br_state = got.br_state; | ||
4912 | if (!delay) { | ||
4913 | new.br_startblock = del_endblock; | ||
4914 | flags |= XFS_ILOG_CORE; | ||
4915 | if (cur) { | ||
4916 | if ((error = xfs_bmbt_update(cur, | ||
4917 | got.br_startoff, | ||
4918 | got.br_startblock, temp, | ||
4919 | got.br_state))) | ||
4920 | goto done; | ||
4921 | if ((error = xfs_btree_increment(cur, 0, &i))) | ||
4922 | goto done; | ||
4923 | cur->bc_rec.b = new; | ||
4924 | error = xfs_btree_insert(cur, &i); | ||
4925 | if (error && error != -ENOSPC) | ||
4926 | goto done; | ||
4927 | /* | ||
4928 | * If get no-space back from btree insert, | ||
4929 | * it tried a split, and we have a zero | ||
4930 | * block reservation. | ||
4931 | * Fix up our state and return the error. | ||
4932 | */ | ||
4933 | if (error == -ENOSPC) { | ||
4934 | /* | ||
4935 | * Reset the cursor, don't trust | ||
4936 | * it after any insert operation. | ||
4937 | */ | ||
4938 | if ((error = xfs_bmbt_lookup_eq(cur, | ||
4939 | got.br_startoff, | ||
4940 | got.br_startblock, | ||
4941 | temp, &i))) | ||
4942 | goto done; | ||
4943 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | ||
4944 | /* | ||
4945 | * Update the btree record back | ||
4946 | * to the original value. | ||
4947 | */ | ||
4948 | if ((error = xfs_bmbt_update(cur, | ||
4949 | got.br_startoff, | ||
4950 | got.br_startblock, | ||
4951 | got.br_blockcount, | ||
4952 | got.br_state))) | ||
4953 | goto done; | ||
4954 | /* | ||
4955 | * Reset the extent record back | ||
4956 | * to the original value. | ||
4957 | */ | ||
4958 | xfs_bmbt_set_blockcount(ep, | ||
4959 | got.br_blockcount); | ||
4960 | flags = 0; | ||
4961 | error = -ENOSPC; | ||
4962 | goto done; | ||
4963 | } | ||
4964 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | ||
4965 | } else | ||
4966 | flags |= xfs_ilog_fext(whichfork); | ||
4967 | XFS_IFORK_NEXT_SET(ip, whichfork, | ||
4968 | XFS_IFORK_NEXTENTS(ip, whichfork) + 1); | ||
4969 | } else { | ||
4970 | ASSERT(whichfork == XFS_DATA_FORK); | ||
4971 | temp = xfs_bmap_worst_indlen(ip, temp); | ||
4972 | xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); | ||
4973 | temp2 = xfs_bmap_worst_indlen(ip, temp2); | ||
4974 | new.br_startblock = nullstartblock((int)temp2); | ||
4975 | da_new = temp + temp2; | ||
4976 | while (da_new > da_old) { | ||
4977 | if (temp) { | ||
4978 | temp--; | ||
4979 | da_new--; | ||
4980 | xfs_bmbt_set_startblock(ep, | ||
4981 | nullstartblock((int)temp)); | ||
4982 | } | ||
4983 | if (da_new == da_old) | ||
4984 | break; | ||
4985 | if (temp2) { | ||
4986 | temp2--; | ||
4987 | da_new--; | ||
4988 | new.br_startblock = | ||
4989 | nullstartblock((int)temp2); | ||
4990 | } | ||
4991 | } | ||
4992 | } | ||
4993 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); | ||
4994 | xfs_iext_insert(ip, *idx + 1, 1, &new, state); | ||
4995 | ++*idx; | ||
4996 | break; | ||
4997 | } | ||
4998 | /* | ||
4999 | * If we need to, add to list of extents to delete. | ||
5000 | */ | ||
5001 | if (do_fx) | ||
5002 | xfs_bmap_add_free(del->br_startblock, del->br_blockcount, flist, | ||
5003 | mp); | ||
5004 | /* | ||
5005 | * Adjust inode # blocks in the file. | ||
5006 | */ | ||
5007 | if (nblks) | ||
5008 | ip->i_d.di_nblocks -= nblks; | ||
5009 | /* | ||
5010 | * Adjust quota data. | ||
5011 | */ | ||
5012 | if (qfield) | ||
5013 | xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks); | ||
5014 | |||
5015 | /* | ||
5016 | * Account for change in delayed indirect blocks. | ||
5017 | * Nothing to do for disk quota accounting here. | ||
5018 | */ | ||
5019 | ASSERT(da_old >= da_new); | ||
5020 | if (da_old > da_new) { | ||
5021 | xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, | ||
5022 | (int64_t)(da_old - da_new), 0); | ||
5023 | } | ||
5024 | done: | ||
5025 | *logflagsp = flags; | ||
5026 | return error; | ||
5027 | } | ||
5028 | |||
5029 | /* | ||
5030 | * Unmap (remove) blocks from a file. | ||
5031 | * If nexts is nonzero then the number of extents to remove is limited to | ||
5032 | * that value. If not all extents in the block range can be removed then | ||
5033 | * *done is set. | ||
5034 | */ | ||
5035 | int /* error */ | ||
5036 | xfs_bunmapi( | ||
5037 | xfs_trans_t *tp, /* transaction pointer */ | ||
5038 | struct xfs_inode *ip, /* incore inode */ | ||
5039 | xfs_fileoff_t bno, /* starting offset to unmap */ | ||
5040 | xfs_filblks_t len, /* length to unmap in file */ | ||
5041 | int flags, /* misc flags */ | ||
5042 | xfs_extnum_t nexts, /* number of extents max */ | ||
5043 | xfs_fsblock_t *firstblock, /* first allocated block | ||
5044 | controls a.g. for allocs */ | ||
5045 | xfs_bmap_free_t *flist, /* i/o: list extents to free */ | ||
5046 | int *done) /* set if not done yet */ | ||
5047 | { | ||
5048 | xfs_btree_cur_t *cur; /* bmap btree cursor */ | ||
5049 | xfs_bmbt_irec_t del; /* extent being deleted */ | ||
5050 | int eof; /* is deleting at eof */ | ||
5051 | xfs_bmbt_rec_host_t *ep; /* extent record pointer */ | ||
5052 | int error; /* error return value */ | ||
5053 | xfs_extnum_t extno; /* extent number in list */ | ||
5054 | xfs_bmbt_irec_t got; /* current extent record */ | ||
5055 | xfs_ifork_t *ifp; /* inode fork pointer */ | ||
5056 | int isrt; /* freeing in rt area */ | ||
5057 | xfs_extnum_t lastx; /* last extent index used */ | ||
5058 | int logflags; /* transaction logging flags */ | ||
5059 | xfs_extlen_t mod; /* rt extent offset */ | ||
5060 | xfs_mount_t *mp; /* mount structure */ | ||
5061 | xfs_extnum_t nextents; /* number of file extents */ | ||
5062 | xfs_bmbt_irec_t prev; /* previous extent record */ | ||
5063 | xfs_fileoff_t start; /* first file offset deleted */ | ||
5064 | int tmp_logflags; /* partial logging flags */ | ||
5065 | int wasdel; /* was a delayed alloc extent */ | ||
5066 | int whichfork; /* data or attribute fork */ | ||
5067 | xfs_fsblock_t sum; | ||
5068 | |||
5069 | trace_xfs_bunmap(ip, bno, len, flags, _RET_IP_); | ||
5070 | |||
5071 | whichfork = (flags & XFS_BMAPI_ATTRFORK) ? | ||
5072 | XFS_ATTR_FORK : XFS_DATA_FORK; | ||
5073 | ifp = XFS_IFORK_PTR(ip, whichfork); | ||
5074 | if (unlikely( | ||
5075 | XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && | ||
5076 | XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) { | ||
5077 | XFS_ERROR_REPORT("xfs_bunmapi", XFS_ERRLEVEL_LOW, | ||
5078 | ip->i_mount); | ||
5079 | return -EFSCORRUPTED; | ||
5080 | } | ||
5081 | mp = ip->i_mount; | ||
5082 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
5083 | return -EIO; | ||
5084 | |||
5085 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | ||
5086 | ASSERT(len > 0); | ||
5087 | ASSERT(nexts >= 0); | ||
5088 | |||
5089 | if (!(ifp->if_flags & XFS_IFEXTENTS) && | ||
5090 | (error = xfs_iread_extents(tp, ip, whichfork))) | ||
5091 | return error; | ||
5092 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); | ||
5093 | if (nextents == 0) { | ||
5094 | *done = 1; | ||
5095 | return 0; | ||
5096 | } | ||
5097 | XFS_STATS_INC(xs_blk_unmap); | ||
5098 | isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip); | ||
5099 | start = bno; | ||
5100 | bno = start + len - 1; | ||
5101 | ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, | ||
5102 | &prev); | ||
5103 | |||
5104 | /* | ||
5105 | * Check to see if the given block number is past the end of the | ||
5106 | * file, back up to the last block if so... | ||
5107 | */ | ||
5108 | if (eof) { | ||
5109 | ep = xfs_iext_get_ext(ifp, --lastx); | ||
5110 | xfs_bmbt_get_all(ep, &got); | ||
5111 | bno = got.br_startoff + got.br_blockcount - 1; | ||
5112 | } | ||
5113 | logflags = 0; | ||
5114 | if (ifp->if_flags & XFS_IFBROOT) { | ||
5115 | ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE); | ||
5116 | cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); | ||
5117 | cur->bc_private.b.firstblock = *firstblock; | ||
5118 | cur->bc_private.b.flist = flist; | ||
5119 | cur->bc_private.b.flags = 0; | ||
5120 | } else | ||
5121 | cur = NULL; | ||
5122 | |||
5123 | if (isrt) { | ||
5124 | /* | ||
5125 | * Synchronize by locking the bitmap inode. | ||
5126 | */ | ||
5127 | xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL); | ||
5128 | xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL); | ||
5129 | } | ||
5130 | |||
5131 | extno = 0; | ||
5132 | while (bno != (xfs_fileoff_t)-1 && bno >= start && lastx >= 0 && | ||
5133 | (nexts == 0 || extno < nexts)) { | ||
5134 | /* | ||
5135 | * Is the found extent after a hole in which bno lives? | ||
5136 | * Just back up to the previous extent, if so. | ||
5137 | */ | ||
5138 | if (got.br_startoff > bno) { | ||
5139 | if (--lastx < 0) | ||
5140 | break; | ||
5141 | ep = xfs_iext_get_ext(ifp, lastx); | ||
5142 | xfs_bmbt_get_all(ep, &got); | ||
5143 | } | ||
5144 | /* | ||
5145 | * Is the last block of this extent before the range | ||
5146 | * we're supposed to delete? If so, we're done. | ||
5147 | */ | ||
5148 | bno = XFS_FILEOFF_MIN(bno, | ||
5149 | got.br_startoff + got.br_blockcount - 1); | ||
5150 | if (bno < start) | ||
5151 | break; | ||
5152 | /* | ||
5153 | * Then deal with the (possibly delayed) allocated space | ||
5154 | * we found. | ||
5155 | */ | ||
5156 | ASSERT(ep != NULL); | ||
5157 | del = got; | ||
5158 | wasdel = isnullstartblock(del.br_startblock); | ||
5159 | if (got.br_startoff < start) { | ||
5160 | del.br_startoff = start; | ||
5161 | del.br_blockcount -= start - got.br_startoff; | ||
5162 | if (!wasdel) | ||
5163 | del.br_startblock += start - got.br_startoff; | ||
5164 | } | ||
5165 | if (del.br_startoff + del.br_blockcount > bno + 1) | ||
5166 | del.br_blockcount = bno + 1 - del.br_startoff; | ||
5167 | sum = del.br_startblock + del.br_blockcount; | ||
5168 | if (isrt && | ||
5169 | (mod = do_mod(sum, mp->m_sb.sb_rextsize))) { | ||
5170 | /* | ||
5171 | * Realtime extent not lined up at the end. | ||
5172 | * The extent could have been split into written | ||
5173 | * and unwritten pieces, or we could just be | ||
5174 | * unmapping part of it. But we can't really | ||
5175 | * get rid of part of a realtime extent. | ||
5176 | */ | ||
5177 | if (del.br_state == XFS_EXT_UNWRITTEN || | ||
5178 | !xfs_sb_version_hasextflgbit(&mp->m_sb)) { | ||
5179 | /* | ||
5180 | * This piece is unwritten, or we're not | ||
5181 | * using unwritten extents. Skip over it. | ||
5182 | */ | ||
5183 | ASSERT(bno >= mod); | ||
5184 | bno -= mod > del.br_blockcount ? | ||
5185 | del.br_blockcount : mod; | ||
5186 | if (bno < got.br_startoff) { | ||
5187 | if (--lastx >= 0) | ||
5188 | xfs_bmbt_get_all(xfs_iext_get_ext( | ||
5189 | ifp, lastx), &got); | ||
5190 | } | ||
5191 | continue; | ||
5192 | } | ||
5193 | /* | ||
5194 | * It's written, turn it unwritten. | ||
5195 | * This is better than zeroing it. | ||
5196 | */ | ||
5197 | ASSERT(del.br_state == XFS_EXT_NORM); | ||
5198 | ASSERT(xfs_trans_get_block_res(tp) > 0); | ||
5199 | /* | ||
5200 | * If this spans a realtime extent boundary, | ||
5201 | * chop it back to the start of the one we end at. | ||
5202 | */ | ||
5203 | if (del.br_blockcount > mod) { | ||
5204 | del.br_startoff += del.br_blockcount - mod; | ||
5205 | del.br_startblock += del.br_blockcount - mod; | ||
5206 | del.br_blockcount = mod; | ||
5207 | } | ||
5208 | del.br_state = XFS_EXT_UNWRITTEN; | ||
5209 | error = xfs_bmap_add_extent_unwritten_real(tp, ip, | ||
5210 | &lastx, &cur, &del, firstblock, flist, | ||
5211 | &logflags); | ||
5212 | if (error) | ||
5213 | goto error0; | ||
5214 | goto nodelete; | ||
5215 | } | ||
5216 | if (isrt && (mod = do_mod(del.br_startblock, mp->m_sb.sb_rextsize))) { | ||
5217 | /* | ||
5218 | * Realtime extent is lined up at the end but not | ||
5219 | * at the front. We'll get rid of full extents if | ||
5220 | * we can. | ||
5221 | */ | ||
5222 | mod = mp->m_sb.sb_rextsize - mod; | ||
5223 | if (del.br_blockcount > mod) { | ||
5224 | del.br_blockcount -= mod; | ||
5225 | del.br_startoff += mod; | ||
5226 | del.br_startblock += mod; | ||
5227 | } else if ((del.br_startoff == start && | ||
5228 | (del.br_state == XFS_EXT_UNWRITTEN || | ||
5229 | xfs_trans_get_block_res(tp) == 0)) || | ||
5230 | !xfs_sb_version_hasextflgbit(&mp->m_sb)) { | ||
5231 | /* | ||
5232 | * Can't make it unwritten. There isn't | ||
5233 | * a full extent here so just skip it. | ||
5234 | */ | ||
5235 | ASSERT(bno >= del.br_blockcount); | ||
5236 | bno -= del.br_blockcount; | ||
5237 | if (got.br_startoff > bno) { | ||
5238 | if (--lastx >= 0) { | ||
5239 | ep = xfs_iext_get_ext(ifp, | ||
5240 | lastx); | ||
5241 | xfs_bmbt_get_all(ep, &got); | ||
5242 | } | ||
5243 | } | ||
5244 | continue; | ||
5245 | } else if (del.br_state == XFS_EXT_UNWRITTEN) { | ||
5246 | /* | ||
5247 | * This one is already unwritten. | ||
5248 | * It must have a written left neighbor. | ||
5249 | * Unwrite the killed part of that one and | ||
5250 | * try again. | ||
5251 | */ | ||
5252 | ASSERT(lastx > 0); | ||
5253 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, | ||
5254 | lastx - 1), &prev); | ||
5255 | ASSERT(prev.br_state == XFS_EXT_NORM); | ||
5256 | ASSERT(!isnullstartblock(prev.br_startblock)); | ||
5257 | ASSERT(del.br_startblock == | ||
5258 | prev.br_startblock + prev.br_blockcount); | ||
5259 | if (prev.br_startoff < start) { | ||
5260 | mod = start - prev.br_startoff; | ||
5261 | prev.br_blockcount -= mod; | ||
5262 | prev.br_startblock += mod; | ||
5263 | prev.br_startoff = start; | ||
5264 | } | ||
5265 | prev.br_state = XFS_EXT_UNWRITTEN; | ||
5266 | lastx--; | ||
5267 | error = xfs_bmap_add_extent_unwritten_real(tp, | ||
5268 | ip, &lastx, &cur, &prev, | ||
5269 | firstblock, flist, &logflags); | ||
5270 | if (error) | ||
5271 | goto error0; | ||
5272 | goto nodelete; | ||
5273 | } else { | ||
5274 | ASSERT(del.br_state == XFS_EXT_NORM); | ||
5275 | del.br_state = XFS_EXT_UNWRITTEN; | ||
5276 | error = xfs_bmap_add_extent_unwritten_real(tp, | ||
5277 | ip, &lastx, &cur, &del, | ||
5278 | firstblock, flist, &logflags); | ||
5279 | if (error) | ||
5280 | goto error0; | ||
5281 | goto nodelete; | ||
5282 | } | ||
5283 | } | ||
5284 | if (wasdel) { | ||
5285 | ASSERT(startblockval(del.br_startblock) > 0); | ||
5286 | /* Update realtime/data freespace, unreserve quota */ | ||
5287 | if (isrt) { | ||
5288 | xfs_filblks_t rtexts; | ||
5289 | |||
5290 | rtexts = XFS_FSB_TO_B(mp, del.br_blockcount); | ||
5291 | do_div(rtexts, mp->m_sb.sb_rextsize); | ||
5292 | xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, | ||
5293 | (int64_t)rtexts, 0); | ||
5294 | (void)xfs_trans_reserve_quota_nblks(NULL, | ||
5295 | ip, -((long)del.br_blockcount), 0, | ||
5296 | XFS_QMOPT_RES_RTBLKS); | ||
5297 | } else { | ||
5298 | xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, | ||
5299 | (int64_t)del.br_blockcount, 0); | ||
5300 | (void)xfs_trans_reserve_quota_nblks(NULL, | ||
5301 | ip, -((long)del.br_blockcount), 0, | ||
5302 | XFS_QMOPT_RES_REGBLKS); | ||
5303 | } | ||
5304 | ip->i_delayed_blks -= del.br_blockcount; | ||
5305 | if (cur) | ||
5306 | cur->bc_private.b.flags |= | ||
5307 | XFS_BTCUR_BPRV_WASDEL; | ||
5308 | } else if (cur) | ||
5309 | cur->bc_private.b.flags &= ~XFS_BTCUR_BPRV_WASDEL; | ||
5310 | /* | ||
5311 | * If it's the case where the directory code is running | ||
5312 | * with no block reservation, and the deleted block is in | ||
5313 | * the middle of its extent, and the resulting insert | ||
5314 | * of an extent would cause transformation to btree format, | ||
5315 | * then reject it. The calling code will then swap | ||
5316 | * blocks around instead. | ||
5317 | * We have to do this now, rather than waiting for the | ||
5318 | * conversion to btree format, since the transaction | ||
5319 | * will be dirty. | ||
5320 | */ | ||
5321 | if (!wasdel && xfs_trans_get_block_res(tp) == 0 && | ||
5322 | XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS && | ||
5323 | XFS_IFORK_NEXTENTS(ip, whichfork) >= /* Note the >= */ | ||
5324 | XFS_IFORK_MAXEXT(ip, whichfork) && | ||
5325 | del.br_startoff > got.br_startoff && | ||
5326 | del.br_startoff + del.br_blockcount < | ||
5327 | got.br_startoff + got.br_blockcount) { | ||
5328 | error = -ENOSPC; | ||
5329 | goto error0; | ||
5330 | } | ||
5331 | error = xfs_bmap_del_extent(ip, tp, &lastx, flist, cur, &del, | ||
5332 | &tmp_logflags, whichfork); | ||
5333 | logflags |= tmp_logflags; | ||
5334 | if (error) | ||
5335 | goto error0; | ||
5336 | bno = del.br_startoff - 1; | ||
5337 | nodelete: | ||
5338 | /* | ||
5339 | * If not done go on to the next (previous) record. | ||
5340 | */ | ||
5341 | if (bno != (xfs_fileoff_t)-1 && bno >= start) { | ||
5342 | if (lastx >= 0) { | ||
5343 | ep = xfs_iext_get_ext(ifp, lastx); | ||
5344 | if (xfs_bmbt_get_startoff(ep) > bno) { | ||
5345 | if (--lastx >= 0) | ||
5346 | ep = xfs_iext_get_ext(ifp, | ||
5347 | lastx); | ||
5348 | } | ||
5349 | xfs_bmbt_get_all(ep, &got); | ||
5350 | } | ||
5351 | extno++; | ||
5352 | } | ||
5353 | } | ||
5354 | *done = bno == (xfs_fileoff_t)-1 || bno < start || lastx < 0; | ||
5355 | |||
5356 | /* | ||
5357 | * Convert to a btree if necessary. | ||
5358 | */ | ||
5359 | if (xfs_bmap_needs_btree(ip, whichfork)) { | ||
5360 | ASSERT(cur == NULL); | ||
5361 | error = xfs_bmap_extents_to_btree(tp, ip, firstblock, flist, | ||
5362 | &cur, 0, &tmp_logflags, whichfork); | ||
5363 | logflags |= tmp_logflags; | ||
5364 | if (error) | ||
5365 | goto error0; | ||
5366 | } | ||
5367 | /* | ||
5368 | * transform from btree to extents, give it cur | ||
5369 | */ | ||
5370 | else if (xfs_bmap_wants_extents(ip, whichfork)) { | ||
5371 | ASSERT(cur != NULL); | ||
5372 | error = xfs_bmap_btree_to_extents(tp, ip, cur, &tmp_logflags, | ||
5373 | whichfork); | ||
5374 | logflags |= tmp_logflags; | ||
5375 | if (error) | ||
5376 | goto error0; | ||
5377 | } | ||
5378 | /* | ||
5379 | * transform from extents to local? | ||
5380 | */ | ||
5381 | error = 0; | ||
5382 | error0: | ||
5383 | /* | ||
5384 | * Log everything. Do this after conversion, there's no point in | ||
5385 | * logging the extent records if we've converted to btree format. | ||
5386 | */ | ||
5387 | if ((logflags & xfs_ilog_fext(whichfork)) && | ||
5388 | XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) | ||
5389 | logflags &= ~xfs_ilog_fext(whichfork); | ||
5390 | else if ((logflags & xfs_ilog_fbroot(whichfork)) && | ||
5391 | XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) | ||
5392 | logflags &= ~xfs_ilog_fbroot(whichfork); | ||
5393 | /* | ||
5394 | * Log inode even in the error case, if the transaction | ||
5395 | * is dirty we'll need to shut down the filesystem. | ||
5396 | */ | ||
5397 | if (logflags) | ||
5398 | xfs_trans_log_inode(tp, ip, logflags); | ||
5399 | if (cur) { | ||
5400 | if (!error) { | ||
5401 | *firstblock = cur->bc_private.b.firstblock; | ||
5402 | cur->bc_private.b.allocated = 0; | ||
5403 | } | ||
5404 | xfs_btree_del_cursor(cur, | ||
5405 | error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); | ||
5406 | } | ||
5407 | return error; | ||
5408 | } | ||
5409 | |||
5410 | /* | ||
5411 | * Shift extent records to the left to cover a hole. | ||
5412 | * | ||
5413 | * The maximum number of extents to be shifted in a single operation | ||
5414 | * is @num_exts, and @current_ext keeps track of the current extent | ||
5415 | * index we have shifted. @offset_shift_fsb is the length by which each | ||
5416 | * extent is shifted. If there is no hole to shift the extents | ||
5417 | * into, this will be considered invalid operation and we abort immediately. | ||
5418 | */ | ||
5419 | int | ||
5420 | xfs_bmap_shift_extents( | ||
5421 | struct xfs_trans *tp, | ||
5422 | struct xfs_inode *ip, | ||
5423 | int *done, | ||
5424 | xfs_fileoff_t start_fsb, | ||
5425 | xfs_fileoff_t offset_shift_fsb, | ||
5426 | xfs_extnum_t *current_ext, | ||
5427 | xfs_fsblock_t *firstblock, | ||
5428 | struct xfs_bmap_free *flist, | ||
5429 | int num_exts) | ||
5430 | { | ||
5431 | struct xfs_btree_cur *cur; | ||
5432 | struct xfs_bmbt_rec_host *gotp; | ||
5433 | struct xfs_bmbt_irec got; | ||
5434 | struct xfs_bmbt_irec left; | ||
5435 | struct xfs_mount *mp = ip->i_mount; | ||
5436 | struct xfs_ifork *ifp; | ||
5437 | xfs_extnum_t nexts = 0; | ||
5438 | xfs_fileoff_t startoff; | ||
5439 | int error = 0; | ||
5440 | int i; | ||
5441 | int whichfork = XFS_DATA_FORK; | ||
5442 | int logflags; | ||
5443 | xfs_filblks_t blockcount = 0; | ||
5444 | int total_extents; | ||
5445 | |||
5446 | if (unlikely(XFS_TEST_ERROR( | ||
5447 | (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && | ||
5448 | XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), | ||
5449 | mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) { | ||
5450 | XFS_ERROR_REPORT("xfs_bmap_shift_extents", | ||
5451 | XFS_ERRLEVEL_LOW, mp); | ||
5452 | return -EFSCORRUPTED; | ||
5453 | } | ||
5454 | |||
5455 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
5456 | return -EIO; | ||
5457 | |||
5458 | ASSERT(current_ext != NULL); | ||
5459 | |||
5460 | ifp = XFS_IFORK_PTR(ip, whichfork); | ||
5461 | if (!(ifp->if_flags & XFS_IFEXTENTS)) { | ||
5462 | /* Read in all the extents */ | ||
5463 | error = xfs_iread_extents(tp, ip, whichfork); | ||
5464 | if (error) | ||
5465 | return error; | ||
5466 | } | ||
5467 | |||
5468 | /* | ||
5469 | * If *current_ext is 0, we would need to lookup the extent | ||
5470 | * from where we would start shifting and store it in gotp. | ||
5471 | */ | ||
5472 | if (!*current_ext) { | ||
5473 | gotp = xfs_iext_bno_to_ext(ifp, start_fsb, current_ext); | ||
5474 | /* | ||
5475 | * gotp can be null in 2 cases: 1) if there are no extents | ||
5476 | * or 2) start_fsb lies in a hole beyond which there are | ||
5477 | * no extents. Either way, we are done. | ||
5478 | */ | ||
5479 | if (!gotp) { | ||
5480 | *done = 1; | ||
5481 | return 0; | ||
5482 | } | ||
5483 | } | ||
5484 | |||
5485 | /* We are going to change core inode */ | ||
5486 | logflags = XFS_ILOG_CORE; | ||
5487 | if (ifp->if_flags & XFS_IFBROOT) { | ||
5488 | cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); | ||
5489 | cur->bc_private.b.firstblock = *firstblock; | ||
5490 | cur->bc_private.b.flist = flist; | ||
5491 | cur->bc_private.b.flags = 0; | ||
5492 | } else { | ||
5493 | cur = NULL; | ||
5494 | logflags |= XFS_ILOG_DEXT; | ||
5495 | } | ||
5496 | |||
5497 | /* | ||
5498 | * There may be delalloc extents in the data fork before the range we | ||
5499 | * are collapsing out, so we cannot | ||
5500 | * use the count of real extents here. Instead we have to calculate it | ||
5501 | * from the incore fork. | ||
5502 | */ | ||
5503 | total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); | ||
5504 | while (nexts++ < num_exts && *current_ext < total_extents) { | ||
5505 | |||
5506 | gotp = xfs_iext_get_ext(ifp, *current_ext); | ||
5507 | xfs_bmbt_get_all(gotp, &got); | ||
5508 | startoff = got.br_startoff - offset_shift_fsb; | ||
5509 | |||
5510 | /* | ||
5511 | * Before shifting extent into hole, make sure that the hole | ||
5512 | * is large enough to accomodate the shift. | ||
5513 | */ | ||
5514 | if (*current_ext) { | ||
5515 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, | ||
5516 | *current_ext - 1), &left); | ||
5517 | |||
5518 | if (startoff < left.br_startoff + left.br_blockcount) | ||
5519 | error = -EINVAL; | ||
5520 | } else if (offset_shift_fsb > got.br_startoff) { | ||
5521 | /* | ||
5522 | * When first extent is shifted, offset_shift_fsb | ||
5523 | * should be less than the stating offset of | ||
5524 | * the first extent. | ||
5525 | */ | ||
5526 | error = -EINVAL; | ||
5527 | } | ||
5528 | |||
5529 | if (error) | ||
5530 | goto del_cursor; | ||
5531 | |||
5532 | if (cur) { | ||
5533 | error = xfs_bmbt_lookup_eq(cur, got.br_startoff, | ||
5534 | got.br_startblock, | ||
5535 | got.br_blockcount, | ||
5536 | &i); | ||
5537 | if (error) | ||
5538 | goto del_cursor; | ||
5539 | XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor); | ||
5540 | } | ||
5541 | |||
5542 | /* Check if we can merge 2 adjacent extents */ | ||
5543 | if (*current_ext && | ||
5544 | left.br_startoff + left.br_blockcount == startoff && | ||
5545 | left.br_startblock + left.br_blockcount == | ||
5546 | got.br_startblock && | ||
5547 | left.br_state == got.br_state && | ||
5548 | left.br_blockcount + got.br_blockcount <= MAXEXTLEN) { | ||
5549 | blockcount = left.br_blockcount + | ||
5550 | got.br_blockcount; | ||
5551 | xfs_iext_remove(ip, *current_ext, 1, 0); | ||
5552 | if (cur) { | ||
5553 | error = xfs_btree_delete(cur, &i); | ||
5554 | if (error) | ||
5555 | goto del_cursor; | ||
5556 | XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor); | ||
5557 | } | ||
5558 | XFS_IFORK_NEXT_SET(ip, whichfork, | ||
5559 | XFS_IFORK_NEXTENTS(ip, whichfork) - 1); | ||
5560 | gotp = xfs_iext_get_ext(ifp, --*current_ext); | ||
5561 | xfs_bmbt_get_all(gotp, &got); | ||
5562 | |||
5563 | /* Make cursor point to the extent we will update */ | ||
5564 | if (cur) { | ||
5565 | error = xfs_bmbt_lookup_eq(cur, got.br_startoff, | ||
5566 | got.br_startblock, | ||
5567 | got.br_blockcount, | ||
5568 | &i); | ||
5569 | if (error) | ||
5570 | goto del_cursor; | ||
5571 | XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor); | ||
5572 | } | ||
5573 | |||
5574 | xfs_bmbt_set_blockcount(gotp, blockcount); | ||
5575 | got.br_blockcount = blockcount; | ||
5576 | } else { | ||
5577 | /* We have to update the startoff */ | ||
5578 | xfs_bmbt_set_startoff(gotp, startoff); | ||
5579 | got.br_startoff = startoff; | ||
5580 | } | ||
5581 | |||
5582 | if (cur) { | ||
5583 | error = xfs_bmbt_update(cur, got.br_startoff, | ||
5584 | got.br_startblock, | ||
5585 | got.br_blockcount, | ||
5586 | got.br_state); | ||
5587 | if (error) | ||
5588 | goto del_cursor; | ||
5589 | } | ||
5590 | |||
5591 | (*current_ext)++; | ||
5592 | total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); | ||
5593 | } | ||
5594 | |||
5595 | /* Check if we are done */ | ||
5596 | if (*current_ext == total_extents) | ||
5597 | *done = 1; | ||
5598 | |||
5599 | del_cursor: | ||
5600 | if (cur) | ||
5601 | xfs_btree_del_cursor(cur, | ||
5602 | error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); | ||
5603 | |||
5604 | xfs_trans_log_inode(tp, ip, logflags); | ||
5605 | return error; | ||
5606 | } | ||
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h new file mode 100644 index 000000000000..b879ca56a64c --- /dev/null +++ b/fs/xfs/libxfs/xfs_bmap.h | |||
@@ -0,0 +1,186 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2006 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_BMAP_H__ | ||
19 | #define __XFS_BMAP_H__ | ||
20 | |||
21 | struct getbmap; | ||
22 | struct xfs_bmbt_irec; | ||
23 | struct xfs_ifork; | ||
24 | struct xfs_inode; | ||
25 | struct xfs_mount; | ||
26 | struct xfs_trans; | ||
27 | |||
28 | extern kmem_zone_t *xfs_bmap_free_item_zone; | ||
29 | |||
30 | /* | ||
31 | * List of extents to be free "later". | ||
32 | * The list is kept sorted on xbf_startblock. | ||
33 | */ | ||
34 | typedef struct xfs_bmap_free_item | ||
35 | { | ||
36 | xfs_fsblock_t xbfi_startblock;/* starting fs block number */ | ||
37 | xfs_extlen_t xbfi_blockcount;/* number of blocks in extent */ | ||
38 | struct xfs_bmap_free_item *xbfi_next; /* link to next entry */ | ||
39 | } xfs_bmap_free_item_t; | ||
40 | |||
41 | /* | ||
42 | * Header for free extent list. | ||
43 | * | ||
44 | * xbf_low is used by the allocator to activate the lowspace algorithm - | ||
45 | * when free space is running low the extent allocator may choose to | ||
46 | * allocate an extent from an AG without leaving sufficient space for | ||
47 | * a btree split when inserting the new extent. In this case the allocator | ||
48 | * will enable the lowspace algorithm which is supposed to allow further | ||
49 | * allocations (such as btree splits and newroots) to allocate from | ||
50 | * sequential AGs. In order to avoid locking AGs out of order the lowspace | ||
51 | * algorithm will start searching for free space from AG 0. If the correct | ||
52 | * transaction reservations have been made then this algorithm will eventually | ||
53 | * find all the space it needs. | ||
54 | */ | ||
55 | typedef struct xfs_bmap_free | ||
56 | { | ||
57 | xfs_bmap_free_item_t *xbf_first; /* list of to-be-free extents */ | ||
58 | int xbf_count; /* count of items on list */ | ||
59 | int xbf_low; /* alloc in low mode */ | ||
60 | } xfs_bmap_free_t; | ||
61 | |||
62 | #define XFS_BMAP_MAX_NMAP 4 | ||
63 | |||
64 | /* | ||
65 | * Flags for xfs_bmapi_* | ||
66 | */ | ||
67 | #define XFS_BMAPI_ENTIRE 0x001 /* return entire extent, not trimmed */ | ||
68 | #define XFS_BMAPI_METADATA 0x002 /* mapping metadata not user data */ | ||
69 | #define XFS_BMAPI_ATTRFORK 0x004 /* use attribute fork not data */ | ||
70 | #define XFS_BMAPI_PREALLOC 0x008 /* preallocation op: unwritten space */ | ||
71 | #define XFS_BMAPI_IGSTATE 0x010 /* Ignore state - */ | ||
72 | /* combine contig. space */ | ||
73 | #define XFS_BMAPI_CONTIG 0x020 /* must allocate only one extent */ | ||
74 | /* | ||
75 | * unwritten extent conversion - this needs write cache flushing and no additional | ||
76 | * allocation alignments. When specified with XFS_BMAPI_PREALLOC it converts | ||
77 | * from written to unwritten, otherwise convert from unwritten to written. | ||
78 | */ | ||
79 | #define XFS_BMAPI_CONVERT 0x040 | ||
80 | |||
81 | #define XFS_BMAPI_FLAGS \ | ||
82 | { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ | ||
83 | { XFS_BMAPI_METADATA, "METADATA" }, \ | ||
84 | { XFS_BMAPI_ATTRFORK, "ATTRFORK" }, \ | ||
85 | { XFS_BMAPI_PREALLOC, "PREALLOC" }, \ | ||
86 | { XFS_BMAPI_IGSTATE, "IGSTATE" }, \ | ||
87 | { XFS_BMAPI_CONTIG, "CONTIG" }, \ | ||
88 | { XFS_BMAPI_CONVERT, "CONVERT" } | ||
89 | |||
90 | |||
91 | static inline int xfs_bmapi_aflag(int w) | ||
92 | { | ||
93 | return (w == XFS_ATTR_FORK ? XFS_BMAPI_ATTRFORK : 0); | ||
94 | } | ||
95 | |||
96 | /* | ||
97 | * Special values for xfs_bmbt_irec_t br_startblock field. | ||
98 | */ | ||
99 | #define DELAYSTARTBLOCK ((xfs_fsblock_t)-1LL) | ||
100 | #define HOLESTARTBLOCK ((xfs_fsblock_t)-2LL) | ||
101 | |||
102 | static inline void xfs_bmap_init(xfs_bmap_free_t *flp, xfs_fsblock_t *fbp) | ||
103 | { | ||
104 | ((flp)->xbf_first = NULL, (flp)->xbf_count = 0, \ | ||
105 | (flp)->xbf_low = 0, *(fbp) = NULLFSBLOCK); | ||
106 | } | ||
107 | |||
108 | /* | ||
109 | * Flags for xfs_bmap_add_extent*. | ||
110 | */ | ||
111 | #define BMAP_LEFT_CONTIG (1 << 0) | ||
112 | #define BMAP_RIGHT_CONTIG (1 << 1) | ||
113 | #define BMAP_LEFT_FILLING (1 << 2) | ||
114 | #define BMAP_RIGHT_FILLING (1 << 3) | ||
115 | #define BMAP_LEFT_DELAY (1 << 4) | ||
116 | #define BMAP_RIGHT_DELAY (1 << 5) | ||
117 | #define BMAP_LEFT_VALID (1 << 6) | ||
118 | #define BMAP_RIGHT_VALID (1 << 7) | ||
119 | #define BMAP_ATTRFORK (1 << 8) | ||
120 | |||
121 | #define XFS_BMAP_EXT_FLAGS \ | ||
122 | { BMAP_LEFT_CONTIG, "LC" }, \ | ||
123 | { BMAP_RIGHT_CONTIG, "RC" }, \ | ||
124 | { BMAP_LEFT_FILLING, "LF" }, \ | ||
125 | { BMAP_RIGHT_FILLING, "RF" }, \ | ||
126 | { BMAP_ATTRFORK, "ATTR" } | ||
127 | |||
128 | |||
129 | /* | ||
130 | * This macro is used to determine how many extents will be shifted | ||
131 | * in one write transaction. We could require two splits, | ||
132 | * an extent move on the first and an extent merge on the second, | ||
133 | * So it is proper that one extent is shifted inside write transaction | ||
134 | * at a time. | ||
135 | */ | ||
136 | #define XFS_BMAP_MAX_SHIFT_EXTENTS 1 | ||
137 | |||
138 | #ifdef DEBUG | ||
139 | void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt, | ||
140 | int whichfork, unsigned long caller_ip); | ||
141 | #define XFS_BMAP_TRACE_EXLIST(ip,c,w) \ | ||
142 | xfs_bmap_trace_exlist(ip,c,w, _THIS_IP_) | ||
143 | #else | ||
144 | #define XFS_BMAP_TRACE_EXLIST(ip,c,w) | ||
145 | #endif | ||
146 | |||
147 | int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd); | ||
148 | void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork); | ||
149 | void xfs_bmap_add_free(xfs_fsblock_t bno, xfs_filblks_t len, | ||
150 | struct xfs_bmap_free *flist, struct xfs_mount *mp); | ||
151 | void xfs_bmap_cancel(struct xfs_bmap_free *flist); | ||
152 | void xfs_bmap_compute_maxlevels(struct xfs_mount *mp, int whichfork); | ||
153 | int xfs_bmap_first_unused(struct xfs_trans *tp, struct xfs_inode *ip, | ||
154 | xfs_extlen_t len, xfs_fileoff_t *unused, int whichfork); | ||
155 | int xfs_bmap_last_before(struct xfs_trans *tp, struct xfs_inode *ip, | ||
156 | xfs_fileoff_t *last_block, int whichfork); | ||
157 | int xfs_bmap_last_offset(struct xfs_inode *ip, xfs_fileoff_t *unused, | ||
158 | int whichfork); | ||
159 | int xfs_bmap_one_block(struct xfs_inode *ip, int whichfork); | ||
160 | int xfs_bmap_read_extents(struct xfs_trans *tp, struct xfs_inode *ip, | ||
161 | int whichfork); | ||
162 | int xfs_bmapi_read(struct xfs_inode *ip, xfs_fileoff_t bno, | ||
163 | xfs_filblks_t len, struct xfs_bmbt_irec *mval, | ||
164 | int *nmap, int flags); | ||
165 | int xfs_bmapi_delay(struct xfs_inode *ip, xfs_fileoff_t bno, | ||
166 | xfs_filblks_t len, struct xfs_bmbt_irec *mval, | ||
167 | int *nmap, int flags); | ||
168 | int xfs_bmapi_write(struct xfs_trans *tp, struct xfs_inode *ip, | ||
169 | xfs_fileoff_t bno, xfs_filblks_t len, int flags, | ||
170 | xfs_fsblock_t *firstblock, xfs_extlen_t total, | ||
171 | struct xfs_bmbt_irec *mval, int *nmap, | ||
172 | struct xfs_bmap_free *flist); | ||
173 | int xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip, | ||
174 | xfs_fileoff_t bno, xfs_filblks_t len, int flags, | ||
175 | xfs_extnum_t nexts, xfs_fsblock_t *firstblock, | ||
176 | struct xfs_bmap_free *flist, int *done); | ||
177 | int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx, | ||
178 | xfs_extnum_t num); | ||
179 | uint xfs_default_attroffset(struct xfs_inode *ip); | ||
180 | int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip, | ||
181 | int *done, xfs_fileoff_t start_fsb, | ||
182 | xfs_fileoff_t offset_shift_fsb, xfs_extnum_t *current_ext, | ||
183 | xfs_fsblock_t *firstblock, struct xfs_bmap_free *flist, | ||
184 | int num_exts); | ||
185 | |||
186 | #endif /* __XFS_BMAP_H__ */ | ||
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c new file mode 100644 index 000000000000..a388de4ceaa1 --- /dev/null +++ b/fs/xfs/libxfs/xfs_bmap_btree.c | |||
@@ -0,0 +1,967 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #include "xfs.h" | ||
19 | #include "xfs_fs.h" | ||
20 | #include "xfs_shared.h" | ||
21 | #include "xfs_format.h" | ||
22 | #include "xfs_log_format.h" | ||
23 | #include "xfs_trans_resv.h" | ||
24 | #include "xfs_bit.h" | ||
25 | #include "xfs_sb.h" | ||
26 | #include "xfs_ag.h" | ||
27 | #include "xfs_mount.h" | ||
28 | #include "xfs_inode.h" | ||
29 | #include "xfs_trans.h" | ||
30 | #include "xfs_inode_item.h" | ||
31 | #include "xfs_alloc.h" | ||
32 | #include "xfs_btree.h" | ||
33 | #include "xfs_bmap_btree.h" | ||
34 | #include "xfs_bmap.h" | ||
35 | #include "xfs_error.h" | ||
36 | #include "xfs_quota.h" | ||
37 | #include "xfs_trace.h" | ||
38 | #include "xfs_cksum.h" | ||
39 | #include "xfs_dinode.h" | ||
40 | |||
41 | /* | ||
42 | * Determine the extent state. | ||
43 | */ | ||
44 | /* ARGSUSED */ | ||
45 | STATIC xfs_exntst_t | ||
46 | xfs_extent_state( | ||
47 | xfs_filblks_t blks, | ||
48 | int extent_flag) | ||
49 | { | ||
50 | if (extent_flag) { | ||
51 | ASSERT(blks != 0); /* saved for DMIG */ | ||
52 | return XFS_EXT_UNWRITTEN; | ||
53 | } | ||
54 | return XFS_EXT_NORM; | ||
55 | } | ||
56 | |||
57 | /* | ||
58 | * Convert on-disk form of btree root to in-memory form. | ||
59 | */ | ||
60 | void | ||
61 | xfs_bmdr_to_bmbt( | ||
62 | struct xfs_inode *ip, | ||
63 | xfs_bmdr_block_t *dblock, | ||
64 | int dblocklen, | ||
65 | struct xfs_btree_block *rblock, | ||
66 | int rblocklen) | ||
67 | { | ||
68 | struct xfs_mount *mp = ip->i_mount; | ||
69 | int dmxr; | ||
70 | xfs_bmbt_key_t *fkp; | ||
71 | __be64 *fpp; | ||
72 | xfs_bmbt_key_t *tkp; | ||
73 | __be64 *tpp; | ||
74 | |||
75 | if (xfs_sb_version_hascrc(&mp->m_sb)) | ||
76 | xfs_btree_init_block_int(mp, rblock, XFS_BUF_DADDR_NULL, | ||
77 | XFS_BMAP_CRC_MAGIC, 0, 0, ip->i_ino, | ||
78 | XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS); | ||
79 | else | ||
80 | xfs_btree_init_block_int(mp, rblock, XFS_BUF_DADDR_NULL, | ||
81 | XFS_BMAP_MAGIC, 0, 0, ip->i_ino, | ||
82 | XFS_BTREE_LONG_PTRS); | ||
83 | |||
84 | rblock->bb_level = dblock->bb_level; | ||
85 | ASSERT(be16_to_cpu(rblock->bb_level) > 0); | ||
86 | rblock->bb_numrecs = dblock->bb_numrecs; | ||
87 | dmxr = xfs_bmdr_maxrecs(dblocklen, 0); | ||
88 | fkp = XFS_BMDR_KEY_ADDR(dblock, 1); | ||
89 | tkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1); | ||
90 | fpp = XFS_BMDR_PTR_ADDR(dblock, 1, dmxr); | ||
91 | tpp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, rblocklen); | ||
92 | dmxr = be16_to_cpu(dblock->bb_numrecs); | ||
93 | memcpy(tkp, fkp, sizeof(*fkp) * dmxr); | ||
94 | memcpy(tpp, fpp, sizeof(*fpp) * dmxr); | ||
95 | } | ||
96 | |||
97 | /* | ||
98 | * Convert a compressed bmap extent record to an uncompressed form. | ||
99 | * This code must be in sync with the routines xfs_bmbt_get_startoff, | ||
100 | * xfs_bmbt_get_startblock, xfs_bmbt_get_blockcount and xfs_bmbt_get_state. | ||
101 | */ | ||
102 | STATIC void | ||
103 | __xfs_bmbt_get_all( | ||
104 | __uint64_t l0, | ||
105 | __uint64_t l1, | ||
106 | xfs_bmbt_irec_t *s) | ||
107 | { | ||
108 | int ext_flag; | ||
109 | xfs_exntst_t st; | ||
110 | |||
111 | ext_flag = (int)(l0 >> (64 - BMBT_EXNTFLAG_BITLEN)); | ||
112 | s->br_startoff = ((xfs_fileoff_t)l0 & | ||
113 | xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9; | ||
114 | #if XFS_BIG_BLKNOS | ||
115 | s->br_startblock = (((xfs_fsblock_t)l0 & xfs_mask64lo(9)) << 43) | | ||
116 | (((xfs_fsblock_t)l1) >> 21); | ||
117 | #else | ||
118 | #ifdef DEBUG | ||
119 | { | ||
120 | xfs_dfsbno_t b; | ||
121 | |||
122 | b = (((xfs_dfsbno_t)l0 & xfs_mask64lo(9)) << 43) | | ||
123 | (((xfs_dfsbno_t)l1) >> 21); | ||
124 | ASSERT((b >> 32) == 0 || isnulldstartblock(b)); | ||
125 | s->br_startblock = (xfs_fsblock_t)b; | ||
126 | } | ||
127 | #else /* !DEBUG */ | ||
128 | s->br_startblock = (xfs_fsblock_t)(((xfs_dfsbno_t)l1) >> 21); | ||
129 | #endif /* DEBUG */ | ||
130 | #endif /* XFS_BIG_BLKNOS */ | ||
131 | s->br_blockcount = (xfs_filblks_t)(l1 & xfs_mask64lo(21)); | ||
132 | /* This is xfs_extent_state() in-line */ | ||
133 | if (ext_flag) { | ||
134 | ASSERT(s->br_blockcount != 0); /* saved for DMIG */ | ||
135 | st = XFS_EXT_UNWRITTEN; | ||
136 | } else | ||
137 | st = XFS_EXT_NORM; | ||
138 | s->br_state = st; | ||
139 | } | ||
140 | |||
141 | void | ||
142 | xfs_bmbt_get_all( | ||
143 | xfs_bmbt_rec_host_t *r, | ||
144 | xfs_bmbt_irec_t *s) | ||
145 | { | ||
146 | __xfs_bmbt_get_all(r->l0, r->l1, s); | ||
147 | } | ||
148 | |||
149 | /* | ||
150 | * Extract the blockcount field from an in memory bmap extent record. | ||
151 | */ | ||
152 | xfs_filblks_t | ||
153 | xfs_bmbt_get_blockcount( | ||
154 | xfs_bmbt_rec_host_t *r) | ||
155 | { | ||
156 | return (xfs_filblks_t)(r->l1 & xfs_mask64lo(21)); | ||
157 | } | ||
158 | |||
159 | /* | ||
160 | * Extract the startblock field from an in memory bmap extent record. | ||
161 | */ | ||
162 | xfs_fsblock_t | ||
163 | xfs_bmbt_get_startblock( | ||
164 | xfs_bmbt_rec_host_t *r) | ||
165 | { | ||
166 | #if XFS_BIG_BLKNOS | ||
167 | return (((xfs_fsblock_t)r->l0 & xfs_mask64lo(9)) << 43) | | ||
168 | (((xfs_fsblock_t)r->l1) >> 21); | ||
169 | #else | ||
170 | #ifdef DEBUG | ||
171 | xfs_dfsbno_t b; | ||
172 | |||
173 | b = (((xfs_dfsbno_t)r->l0 & xfs_mask64lo(9)) << 43) | | ||
174 | (((xfs_dfsbno_t)r->l1) >> 21); | ||
175 | ASSERT((b >> 32) == 0 || isnulldstartblock(b)); | ||
176 | return (xfs_fsblock_t)b; | ||
177 | #else /* !DEBUG */ | ||
178 | return (xfs_fsblock_t)(((xfs_dfsbno_t)r->l1) >> 21); | ||
179 | #endif /* DEBUG */ | ||
180 | #endif /* XFS_BIG_BLKNOS */ | ||
181 | } | ||
182 | |||
183 | /* | ||
184 | * Extract the startoff field from an in memory bmap extent record. | ||
185 | */ | ||
186 | xfs_fileoff_t | ||
187 | xfs_bmbt_get_startoff( | ||
188 | xfs_bmbt_rec_host_t *r) | ||
189 | { | ||
190 | return ((xfs_fileoff_t)r->l0 & | ||
191 | xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9; | ||
192 | } | ||
193 | |||
194 | xfs_exntst_t | ||
195 | xfs_bmbt_get_state( | ||
196 | xfs_bmbt_rec_host_t *r) | ||
197 | { | ||
198 | int ext_flag; | ||
199 | |||
200 | ext_flag = (int)((r->l0) >> (64 - BMBT_EXNTFLAG_BITLEN)); | ||
201 | return xfs_extent_state(xfs_bmbt_get_blockcount(r), | ||
202 | ext_flag); | ||
203 | } | ||
204 | |||
205 | /* | ||
206 | * Extract the blockcount field from an on disk bmap extent record. | ||
207 | */ | ||
208 | xfs_filblks_t | ||
209 | xfs_bmbt_disk_get_blockcount( | ||
210 | xfs_bmbt_rec_t *r) | ||
211 | { | ||
212 | return (xfs_filblks_t)(be64_to_cpu(r->l1) & xfs_mask64lo(21)); | ||
213 | } | ||
214 | |||
215 | /* | ||
216 | * Extract the startoff field from a disk format bmap extent record. | ||
217 | */ | ||
218 | xfs_fileoff_t | ||
219 | xfs_bmbt_disk_get_startoff( | ||
220 | xfs_bmbt_rec_t *r) | ||
221 | { | ||
222 | return ((xfs_fileoff_t)be64_to_cpu(r->l0) & | ||
223 | xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9; | ||
224 | } | ||
225 | |||
226 | |||
227 | /* | ||
228 | * Set all the fields in a bmap extent record from the arguments. | ||
229 | */ | ||
230 | void | ||
231 | xfs_bmbt_set_allf( | ||
232 | xfs_bmbt_rec_host_t *r, | ||
233 | xfs_fileoff_t startoff, | ||
234 | xfs_fsblock_t startblock, | ||
235 | xfs_filblks_t blockcount, | ||
236 | xfs_exntst_t state) | ||
237 | { | ||
238 | int extent_flag = (state == XFS_EXT_NORM) ? 0 : 1; | ||
239 | |||
240 | ASSERT(state == XFS_EXT_NORM || state == XFS_EXT_UNWRITTEN); | ||
241 | ASSERT((startoff & xfs_mask64hi(64-BMBT_STARTOFF_BITLEN)) == 0); | ||
242 | ASSERT((blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN)) == 0); | ||
243 | |||
244 | #if XFS_BIG_BLKNOS | ||
245 | ASSERT((startblock & xfs_mask64hi(64-BMBT_STARTBLOCK_BITLEN)) == 0); | ||
246 | |||
247 | r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) | | ||
248 | ((xfs_bmbt_rec_base_t)startoff << 9) | | ||
249 | ((xfs_bmbt_rec_base_t)startblock >> 43); | ||
250 | r->l1 = ((xfs_bmbt_rec_base_t)startblock << 21) | | ||
251 | ((xfs_bmbt_rec_base_t)blockcount & | ||
252 | (xfs_bmbt_rec_base_t)xfs_mask64lo(21)); | ||
253 | #else /* !XFS_BIG_BLKNOS */ | ||
254 | if (isnullstartblock(startblock)) { | ||
255 | r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) | | ||
256 | ((xfs_bmbt_rec_base_t)startoff << 9) | | ||
257 | (xfs_bmbt_rec_base_t)xfs_mask64lo(9); | ||
258 | r->l1 = xfs_mask64hi(11) | | ||
259 | ((xfs_bmbt_rec_base_t)startblock << 21) | | ||
260 | ((xfs_bmbt_rec_base_t)blockcount & | ||
261 | (xfs_bmbt_rec_base_t)xfs_mask64lo(21)); | ||
262 | } else { | ||
263 | r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) | | ||
264 | ((xfs_bmbt_rec_base_t)startoff << 9); | ||
265 | r->l1 = ((xfs_bmbt_rec_base_t)startblock << 21) | | ||
266 | ((xfs_bmbt_rec_base_t)blockcount & | ||
267 | (xfs_bmbt_rec_base_t)xfs_mask64lo(21)); | ||
268 | } | ||
269 | #endif /* XFS_BIG_BLKNOS */ | ||
270 | } | ||
271 | |||
272 | /* | ||
273 | * Set all the fields in a bmap extent record from the uncompressed form. | ||
274 | */ | ||
275 | void | ||
276 | xfs_bmbt_set_all( | ||
277 | xfs_bmbt_rec_host_t *r, | ||
278 | xfs_bmbt_irec_t *s) | ||
279 | { | ||
280 | xfs_bmbt_set_allf(r, s->br_startoff, s->br_startblock, | ||
281 | s->br_blockcount, s->br_state); | ||
282 | } | ||
283 | |||
284 | |||
285 | /* | ||
286 | * Set all the fields in a disk format bmap extent record from the arguments. | ||
287 | */ | ||
288 | void | ||
289 | xfs_bmbt_disk_set_allf( | ||
290 | xfs_bmbt_rec_t *r, | ||
291 | xfs_fileoff_t startoff, | ||
292 | xfs_fsblock_t startblock, | ||
293 | xfs_filblks_t blockcount, | ||
294 | xfs_exntst_t state) | ||
295 | { | ||
296 | int extent_flag = (state == XFS_EXT_NORM) ? 0 : 1; | ||
297 | |||
298 | ASSERT(state == XFS_EXT_NORM || state == XFS_EXT_UNWRITTEN); | ||
299 | ASSERT((startoff & xfs_mask64hi(64-BMBT_STARTOFF_BITLEN)) == 0); | ||
300 | ASSERT((blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN)) == 0); | ||
301 | |||
302 | #if XFS_BIG_BLKNOS | ||
303 | ASSERT((startblock & xfs_mask64hi(64-BMBT_STARTBLOCK_BITLEN)) == 0); | ||
304 | |||
305 | r->l0 = cpu_to_be64( | ||
306 | ((xfs_bmbt_rec_base_t)extent_flag << 63) | | ||
307 | ((xfs_bmbt_rec_base_t)startoff << 9) | | ||
308 | ((xfs_bmbt_rec_base_t)startblock >> 43)); | ||
309 | r->l1 = cpu_to_be64( | ||
310 | ((xfs_bmbt_rec_base_t)startblock << 21) | | ||
311 | ((xfs_bmbt_rec_base_t)blockcount & | ||
312 | (xfs_bmbt_rec_base_t)xfs_mask64lo(21))); | ||
313 | #else /* !XFS_BIG_BLKNOS */ | ||
314 | if (isnullstartblock(startblock)) { | ||
315 | r->l0 = cpu_to_be64( | ||
316 | ((xfs_bmbt_rec_base_t)extent_flag << 63) | | ||
317 | ((xfs_bmbt_rec_base_t)startoff << 9) | | ||
318 | (xfs_bmbt_rec_base_t)xfs_mask64lo(9)); | ||
319 | r->l1 = cpu_to_be64(xfs_mask64hi(11) | | ||
320 | ((xfs_bmbt_rec_base_t)startblock << 21) | | ||
321 | ((xfs_bmbt_rec_base_t)blockcount & | ||
322 | (xfs_bmbt_rec_base_t)xfs_mask64lo(21))); | ||
323 | } else { | ||
324 | r->l0 = cpu_to_be64( | ||
325 | ((xfs_bmbt_rec_base_t)extent_flag << 63) | | ||
326 | ((xfs_bmbt_rec_base_t)startoff << 9)); | ||
327 | r->l1 = cpu_to_be64( | ||
328 | ((xfs_bmbt_rec_base_t)startblock << 21) | | ||
329 | ((xfs_bmbt_rec_base_t)blockcount & | ||
330 | (xfs_bmbt_rec_base_t)xfs_mask64lo(21))); | ||
331 | } | ||
332 | #endif /* XFS_BIG_BLKNOS */ | ||
333 | } | ||
334 | |||
335 | /* | ||
336 | * Set all the fields in a bmap extent record from the uncompressed form. | ||
337 | */ | ||
338 | STATIC void | ||
339 | xfs_bmbt_disk_set_all( | ||
340 | xfs_bmbt_rec_t *r, | ||
341 | xfs_bmbt_irec_t *s) | ||
342 | { | ||
343 | xfs_bmbt_disk_set_allf(r, s->br_startoff, s->br_startblock, | ||
344 | s->br_blockcount, s->br_state); | ||
345 | } | ||
346 | |||
347 | /* | ||
348 | * Set the blockcount field in a bmap extent record. | ||
349 | */ | ||
350 | void | ||
351 | xfs_bmbt_set_blockcount( | ||
352 | xfs_bmbt_rec_host_t *r, | ||
353 | xfs_filblks_t v) | ||
354 | { | ||
355 | ASSERT((v & xfs_mask64hi(43)) == 0); | ||
356 | r->l1 = (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64hi(43)) | | ||
357 | (xfs_bmbt_rec_base_t)(v & xfs_mask64lo(21)); | ||
358 | } | ||
359 | |||
360 | /* | ||
361 | * Set the startblock field in a bmap extent record. | ||
362 | */ | ||
363 | void | ||
364 | xfs_bmbt_set_startblock( | ||
365 | xfs_bmbt_rec_host_t *r, | ||
366 | xfs_fsblock_t v) | ||
367 | { | ||
368 | #if XFS_BIG_BLKNOS | ||
369 | ASSERT((v & xfs_mask64hi(12)) == 0); | ||
370 | r->l0 = (r->l0 & (xfs_bmbt_rec_base_t)xfs_mask64hi(55)) | | ||
371 | (xfs_bmbt_rec_base_t)(v >> 43); | ||
372 | r->l1 = (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64lo(21)) | | ||
373 | (xfs_bmbt_rec_base_t)(v << 21); | ||
374 | #else /* !XFS_BIG_BLKNOS */ | ||
375 | if (isnullstartblock(v)) { | ||
376 | r->l0 |= (xfs_bmbt_rec_base_t)xfs_mask64lo(9); | ||
377 | r->l1 = (xfs_bmbt_rec_base_t)xfs_mask64hi(11) | | ||
378 | ((xfs_bmbt_rec_base_t)v << 21) | | ||
379 | (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64lo(21)); | ||
380 | } else { | ||
381 | r->l0 &= ~(xfs_bmbt_rec_base_t)xfs_mask64lo(9); | ||
382 | r->l1 = ((xfs_bmbt_rec_base_t)v << 21) | | ||
383 | (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64lo(21)); | ||
384 | } | ||
385 | #endif /* XFS_BIG_BLKNOS */ | ||
386 | } | ||
387 | |||
388 | /* | ||
389 | * Set the startoff field in a bmap extent record. | ||
390 | */ | ||
391 | void | ||
392 | xfs_bmbt_set_startoff( | ||
393 | xfs_bmbt_rec_host_t *r, | ||
394 | xfs_fileoff_t v) | ||
395 | { | ||
396 | ASSERT((v & xfs_mask64hi(9)) == 0); | ||
397 | r->l0 = (r->l0 & (xfs_bmbt_rec_base_t) xfs_mask64hi(1)) | | ||
398 | ((xfs_bmbt_rec_base_t)v << 9) | | ||
399 | (r->l0 & (xfs_bmbt_rec_base_t)xfs_mask64lo(9)); | ||
400 | } | ||
401 | |||
402 | /* | ||
403 | * Set the extent state field in a bmap extent record. | ||
404 | */ | ||
405 | void | ||
406 | xfs_bmbt_set_state( | ||
407 | xfs_bmbt_rec_host_t *r, | ||
408 | xfs_exntst_t v) | ||
409 | { | ||
410 | ASSERT(v == XFS_EXT_NORM || v == XFS_EXT_UNWRITTEN); | ||
411 | if (v == XFS_EXT_NORM) | ||
412 | r->l0 &= xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN); | ||
413 | else | ||
414 | r->l0 |= xfs_mask64hi(BMBT_EXNTFLAG_BITLEN); | ||
415 | } | ||
416 | |||
417 | /* | ||
418 | * Convert in-memory form of btree root to on-disk form. | ||
419 | */ | ||
420 | void | ||
421 | xfs_bmbt_to_bmdr( | ||
422 | struct xfs_mount *mp, | ||
423 | struct xfs_btree_block *rblock, | ||
424 | int rblocklen, | ||
425 | xfs_bmdr_block_t *dblock, | ||
426 | int dblocklen) | ||
427 | { | ||
428 | int dmxr; | ||
429 | xfs_bmbt_key_t *fkp; | ||
430 | __be64 *fpp; | ||
431 | xfs_bmbt_key_t *tkp; | ||
432 | __be64 *tpp; | ||
433 | |||
434 | if (xfs_sb_version_hascrc(&mp->m_sb)) { | ||
435 | ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_CRC_MAGIC)); | ||
436 | ASSERT(uuid_equal(&rblock->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid)); | ||
437 | ASSERT(rblock->bb_u.l.bb_blkno == | ||
438 | cpu_to_be64(XFS_BUF_DADDR_NULL)); | ||
439 | } else | ||
440 | ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_MAGIC)); | ||
441 | ASSERT(rblock->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO)); | ||
442 | ASSERT(rblock->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO)); | ||
443 | ASSERT(rblock->bb_level != 0); | ||
444 | dblock->bb_level = rblock->bb_level; | ||
445 | dblock->bb_numrecs = rblock->bb_numrecs; | ||
446 | dmxr = xfs_bmdr_maxrecs(dblocklen, 0); | ||
447 | fkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1); | ||
448 | tkp = XFS_BMDR_KEY_ADDR(dblock, 1); | ||
449 | fpp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, rblocklen); | ||
450 | tpp = XFS_BMDR_PTR_ADDR(dblock, 1, dmxr); | ||
451 | dmxr = be16_to_cpu(dblock->bb_numrecs); | ||
452 | memcpy(tkp, fkp, sizeof(*fkp) * dmxr); | ||
453 | memcpy(tpp, fpp, sizeof(*fpp) * dmxr); | ||
454 | } | ||
455 | |||
456 | /* | ||
457 | * Check extent records, which have just been read, for | ||
458 | * any bit in the extent flag field. ASSERT on debug | ||
459 | * kernels, as this condition should not occur. | ||
460 | * Return an error condition (1) if any flags found, | ||
461 | * otherwise return 0. | ||
462 | */ | ||
463 | |||
464 | int | ||
465 | xfs_check_nostate_extents( | ||
466 | xfs_ifork_t *ifp, | ||
467 | xfs_extnum_t idx, | ||
468 | xfs_extnum_t num) | ||
469 | { | ||
470 | for (; num > 0; num--, idx++) { | ||
471 | xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, idx); | ||
472 | if ((ep->l0 >> | ||
473 | (64 - BMBT_EXNTFLAG_BITLEN)) != 0) { | ||
474 | ASSERT(0); | ||
475 | return 1; | ||
476 | } | ||
477 | } | ||
478 | return 0; | ||
479 | } | ||
480 | |||
481 | |||
482 | STATIC struct xfs_btree_cur * | ||
483 | xfs_bmbt_dup_cursor( | ||
484 | struct xfs_btree_cur *cur) | ||
485 | { | ||
486 | struct xfs_btree_cur *new; | ||
487 | |||
488 | new = xfs_bmbt_init_cursor(cur->bc_mp, cur->bc_tp, | ||
489 | cur->bc_private.b.ip, cur->bc_private.b.whichfork); | ||
490 | |||
491 | /* | ||
492 | * Copy the firstblock, flist, and flags values, | ||
493 | * since init cursor doesn't get them. | ||
494 | */ | ||
495 | new->bc_private.b.firstblock = cur->bc_private.b.firstblock; | ||
496 | new->bc_private.b.flist = cur->bc_private.b.flist; | ||
497 | new->bc_private.b.flags = cur->bc_private.b.flags; | ||
498 | |||
499 | return new; | ||
500 | } | ||
501 | |||
502 | STATIC void | ||
503 | xfs_bmbt_update_cursor( | ||
504 | struct xfs_btree_cur *src, | ||
505 | struct xfs_btree_cur *dst) | ||
506 | { | ||
507 | ASSERT((dst->bc_private.b.firstblock != NULLFSBLOCK) || | ||
508 | (dst->bc_private.b.ip->i_d.di_flags & XFS_DIFLAG_REALTIME)); | ||
509 | ASSERT(dst->bc_private.b.flist == src->bc_private.b.flist); | ||
510 | |||
511 | dst->bc_private.b.allocated += src->bc_private.b.allocated; | ||
512 | dst->bc_private.b.firstblock = src->bc_private.b.firstblock; | ||
513 | |||
514 | src->bc_private.b.allocated = 0; | ||
515 | } | ||
516 | |||
517 | STATIC int | ||
518 | xfs_bmbt_alloc_block( | ||
519 | struct xfs_btree_cur *cur, | ||
520 | union xfs_btree_ptr *start, | ||
521 | union xfs_btree_ptr *new, | ||
522 | int *stat) | ||
523 | { | ||
524 | xfs_alloc_arg_t args; /* block allocation args */ | ||
525 | int error; /* error return value */ | ||
526 | |||
527 | memset(&args, 0, sizeof(args)); | ||
528 | args.tp = cur->bc_tp; | ||
529 | args.mp = cur->bc_mp; | ||
530 | args.fsbno = cur->bc_private.b.firstblock; | ||
531 | args.firstblock = args.fsbno; | ||
532 | |||
533 | if (args.fsbno == NULLFSBLOCK) { | ||
534 | args.fsbno = be64_to_cpu(start->l); | ||
535 | args.type = XFS_ALLOCTYPE_START_BNO; | ||
536 | /* | ||
537 | * Make sure there is sufficient room left in the AG to | ||
538 | * complete a full tree split for an extent insert. If | ||
539 | * we are converting the middle part of an extent then | ||
540 | * we may need space for two tree splits. | ||
541 | * | ||
542 | * We are relying on the caller to make the correct block | ||
543 | * reservation for this operation to succeed. If the | ||
544 | * reservation amount is insufficient then we may fail a | ||
545 | * block allocation here and corrupt the filesystem. | ||
546 | */ | ||
547 | args.minleft = xfs_trans_get_block_res(args.tp); | ||
548 | } else if (cur->bc_private.b.flist->xbf_low) { | ||
549 | args.type = XFS_ALLOCTYPE_START_BNO; | ||
550 | } else { | ||
551 | args.type = XFS_ALLOCTYPE_NEAR_BNO; | ||
552 | } | ||
553 | |||
554 | args.minlen = args.maxlen = args.prod = 1; | ||
555 | args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL; | ||
556 | if (!args.wasdel && xfs_trans_get_block_res(args.tp) == 0) { | ||
557 | error = -ENOSPC; | ||
558 | goto error0; | ||
559 | } | ||
560 | error = xfs_alloc_vextent(&args); | ||
561 | if (error) | ||
562 | goto error0; | ||
563 | |||
564 | if (args.fsbno == NULLFSBLOCK && args.minleft) { | ||
565 | /* | ||
566 | * Could not find an AG with enough free space to satisfy | ||
567 | * a full btree split. Try again without minleft and if | ||
568 | * successful activate the lowspace algorithm. | ||
569 | */ | ||
570 | args.fsbno = 0; | ||
571 | args.type = XFS_ALLOCTYPE_FIRST_AG; | ||
572 | args.minleft = 0; | ||
573 | error = xfs_alloc_vextent(&args); | ||
574 | if (error) | ||
575 | goto error0; | ||
576 | cur->bc_private.b.flist->xbf_low = 1; | ||
577 | } | ||
578 | if (args.fsbno == NULLFSBLOCK) { | ||
579 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | ||
580 | *stat = 0; | ||
581 | return 0; | ||
582 | } | ||
583 | ASSERT(args.len == 1); | ||
584 | cur->bc_private.b.firstblock = args.fsbno; | ||
585 | cur->bc_private.b.allocated++; | ||
586 | cur->bc_private.b.ip->i_d.di_nblocks++; | ||
587 | xfs_trans_log_inode(args.tp, cur->bc_private.b.ip, XFS_ILOG_CORE); | ||
588 | xfs_trans_mod_dquot_byino(args.tp, cur->bc_private.b.ip, | ||
589 | XFS_TRANS_DQ_BCOUNT, 1L); | ||
590 | |||
591 | new->l = cpu_to_be64(args.fsbno); | ||
592 | |||
593 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | ||
594 | *stat = 1; | ||
595 | return 0; | ||
596 | |||
597 | error0: | ||
598 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); | ||
599 | return error; | ||
600 | } | ||
601 | |||
602 | STATIC int | ||
603 | xfs_bmbt_free_block( | ||
604 | struct xfs_btree_cur *cur, | ||
605 | struct xfs_buf *bp) | ||
606 | { | ||
607 | struct xfs_mount *mp = cur->bc_mp; | ||
608 | struct xfs_inode *ip = cur->bc_private.b.ip; | ||
609 | struct xfs_trans *tp = cur->bc_tp; | ||
610 | xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(bp)); | ||
611 | |||
612 | xfs_bmap_add_free(fsbno, 1, cur->bc_private.b.flist, mp); | ||
613 | ip->i_d.di_nblocks--; | ||
614 | |||
615 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | ||
616 | xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); | ||
617 | xfs_trans_binval(tp, bp); | ||
618 | return 0; | ||
619 | } | ||
620 | |||
621 | STATIC int | ||
622 | xfs_bmbt_get_minrecs( | ||
623 | struct xfs_btree_cur *cur, | ||
624 | int level) | ||
625 | { | ||
626 | if (level == cur->bc_nlevels - 1) { | ||
627 | struct xfs_ifork *ifp; | ||
628 | |||
629 | ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, | ||
630 | cur->bc_private.b.whichfork); | ||
631 | |||
632 | return xfs_bmbt_maxrecs(cur->bc_mp, | ||
633 | ifp->if_broot_bytes, level == 0) / 2; | ||
634 | } | ||
635 | |||
636 | return cur->bc_mp->m_bmap_dmnr[level != 0]; | ||
637 | } | ||
638 | |||
639 | int | ||
640 | xfs_bmbt_get_maxrecs( | ||
641 | struct xfs_btree_cur *cur, | ||
642 | int level) | ||
643 | { | ||
644 | if (level == cur->bc_nlevels - 1) { | ||
645 | struct xfs_ifork *ifp; | ||
646 | |||
647 | ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, | ||
648 | cur->bc_private.b.whichfork); | ||
649 | |||
650 | return xfs_bmbt_maxrecs(cur->bc_mp, | ||
651 | ifp->if_broot_bytes, level == 0); | ||
652 | } | ||
653 | |||
654 | return cur->bc_mp->m_bmap_dmxr[level != 0]; | ||
655 | |||
656 | } | ||
657 | |||
658 | /* | ||
659 | * Get the maximum records we could store in the on-disk format. | ||
660 | * | ||
661 | * For non-root nodes this is equivalent to xfs_bmbt_get_maxrecs, but | ||
662 | * for the root node this checks the available space in the dinode fork | ||
663 | * so that we can resize the in-memory buffer to match it. After a | ||
664 | * resize to the maximum size this function returns the same value | ||
665 | * as xfs_bmbt_get_maxrecs for the root node, too. | ||
666 | */ | ||
667 | STATIC int | ||
668 | xfs_bmbt_get_dmaxrecs( | ||
669 | struct xfs_btree_cur *cur, | ||
670 | int level) | ||
671 | { | ||
672 | if (level != cur->bc_nlevels - 1) | ||
673 | return cur->bc_mp->m_bmap_dmxr[level != 0]; | ||
674 | return xfs_bmdr_maxrecs(cur->bc_private.b.forksize, level == 0); | ||
675 | } | ||
676 | |||
677 | STATIC void | ||
678 | xfs_bmbt_init_key_from_rec( | ||
679 | union xfs_btree_key *key, | ||
680 | union xfs_btree_rec *rec) | ||
681 | { | ||
682 | key->bmbt.br_startoff = | ||
683 | cpu_to_be64(xfs_bmbt_disk_get_startoff(&rec->bmbt)); | ||
684 | } | ||
685 | |||
686 | STATIC void | ||
687 | xfs_bmbt_init_rec_from_key( | ||
688 | union xfs_btree_key *key, | ||
689 | union xfs_btree_rec *rec) | ||
690 | { | ||
691 | ASSERT(key->bmbt.br_startoff != 0); | ||
692 | |||
693 | xfs_bmbt_disk_set_allf(&rec->bmbt, be64_to_cpu(key->bmbt.br_startoff), | ||
694 | 0, 0, XFS_EXT_NORM); | ||
695 | } | ||
696 | |||
697 | STATIC void | ||
698 | xfs_bmbt_init_rec_from_cur( | ||
699 | struct xfs_btree_cur *cur, | ||
700 | union xfs_btree_rec *rec) | ||
701 | { | ||
702 | xfs_bmbt_disk_set_all(&rec->bmbt, &cur->bc_rec.b); | ||
703 | } | ||
704 | |||
705 | STATIC void | ||
706 | xfs_bmbt_init_ptr_from_cur( | ||
707 | struct xfs_btree_cur *cur, | ||
708 | union xfs_btree_ptr *ptr) | ||
709 | { | ||
710 | ptr->l = 0; | ||
711 | } | ||
712 | |||
713 | STATIC __int64_t | ||
714 | xfs_bmbt_key_diff( | ||
715 | struct xfs_btree_cur *cur, | ||
716 | union xfs_btree_key *key) | ||
717 | { | ||
718 | return (__int64_t)be64_to_cpu(key->bmbt.br_startoff) - | ||
719 | cur->bc_rec.b.br_startoff; | ||
720 | } | ||
721 | |||
722 | static bool | ||
723 | xfs_bmbt_verify( | ||
724 | struct xfs_buf *bp) | ||
725 | { | ||
726 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
727 | struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); | ||
728 | unsigned int level; | ||
729 | |||
730 | switch (block->bb_magic) { | ||
731 | case cpu_to_be32(XFS_BMAP_CRC_MAGIC): | ||
732 | if (!xfs_sb_version_hascrc(&mp->m_sb)) | ||
733 | return false; | ||
734 | if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid)) | ||
735 | return false; | ||
736 | if (be64_to_cpu(block->bb_u.l.bb_blkno) != bp->b_bn) | ||
737 | return false; | ||
738 | /* | ||
739 | * XXX: need a better way of verifying the owner here. Right now | ||
740 | * just make sure there has been one set. | ||
741 | */ | ||
742 | if (be64_to_cpu(block->bb_u.l.bb_owner) == 0) | ||
743 | return false; | ||
744 | /* fall through */ | ||
745 | case cpu_to_be32(XFS_BMAP_MAGIC): | ||
746 | break; | ||
747 | default: | ||
748 | return false; | ||
749 | } | ||
750 | |||
751 | /* | ||
752 | * numrecs and level verification. | ||
753 | * | ||
754 | * We don't know what fork we belong to, so just verify that the level | ||
755 | * is less than the maximum of the two. Later checks will be more | ||
756 | * precise. | ||
757 | */ | ||
758 | level = be16_to_cpu(block->bb_level); | ||
759 | if (level > max(mp->m_bm_maxlevels[0], mp->m_bm_maxlevels[1])) | ||
760 | return false; | ||
761 | if (be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0]) | ||
762 | return false; | ||
763 | |||
764 | /* sibling pointer verification */ | ||
765 | if (!block->bb_u.l.bb_leftsib || | ||
766 | (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLDFSBNO) && | ||
767 | !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_leftsib)))) | ||
768 | return false; | ||
769 | if (!block->bb_u.l.bb_rightsib || | ||
770 | (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLDFSBNO) && | ||
771 | !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_rightsib)))) | ||
772 | return false; | ||
773 | |||
774 | return true; | ||
775 | } | ||
776 | |||
777 | static void | ||
778 | xfs_bmbt_read_verify( | ||
779 | struct xfs_buf *bp) | ||
780 | { | ||
781 | if (!xfs_btree_lblock_verify_crc(bp)) | ||
782 | xfs_buf_ioerror(bp, -EFSBADCRC); | ||
783 | else if (!xfs_bmbt_verify(bp)) | ||
784 | xfs_buf_ioerror(bp, -EFSCORRUPTED); | ||
785 | |||
786 | if (bp->b_error) { | ||
787 | trace_xfs_btree_corrupt(bp, _RET_IP_); | ||
788 | xfs_verifier_error(bp); | ||
789 | } | ||
790 | } | ||
791 | |||
792 | static void | ||
793 | xfs_bmbt_write_verify( | ||
794 | struct xfs_buf *bp) | ||
795 | { | ||
796 | if (!xfs_bmbt_verify(bp)) { | ||
797 | trace_xfs_btree_corrupt(bp, _RET_IP_); | ||
798 | xfs_buf_ioerror(bp, -EFSCORRUPTED); | ||
799 | xfs_verifier_error(bp); | ||
800 | return; | ||
801 | } | ||
802 | xfs_btree_lblock_calc_crc(bp); | ||
803 | } | ||
804 | |||
805 | const struct xfs_buf_ops xfs_bmbt_buf_ops = { | ||
806 | .verify_read = xfs_bmbt_read_verify, | ||
807 | .verify_write = xfs_bmbt_write_verify, | ||
808 | }; | ||
809 | |||
810 | |||
811 | #if defined(DEBUG) || defined(XFS_WARN) | ||
812 | STATIC int | ||
813 | xfs_bmbt_keys_inorder( | ||
814 | struct xfs_btree_cur *cur, | ||
815 | union xfs_btree_key *k1, | ||
816 | union xfs_btree_key *k2) | ||
817 | { | ||
818 | return be64_to_cpu(k1->bmbt.br_startoff) < | ||
819 | be64_to_cpu(k2->bmbt.br_startoff); | ||
820 | } | ||
821 | |||
822 | STATIC int | ||
823 | xfs_bmbt_recs_inorder( | ||
824 | struct xfs_btree_cur *cur, | ||
825 | union xfs_btree_rec *r1, | ||
826 | union xfs_btree_rec *r2) | ||
827 | { | ||
828 | return xfs_bmbt_disk_get_startoff(&r1->bmbt) + | ||
829 | xfs_bmbt_disk_get_blockcount(&r1->bmbt) <= | ||
830 | xfs_bmbt_disk_get_startoff(&r2->bmbt); | ||
831 | } | ||
832 | #endif /* DEBUG */ | ||
833 | |||
834 | static const struct xfs_btree_ops xfs_bmbt_ops = { | ||
835 | .rec_len = sizeof(xfs_bmbt_rec_t), | ||
836 | .key_len = sizeof(xfs_bmbt_key_t), | ||
837 | |||
838 | .dup_cursor = xfs_bmbt_dup_cursor, | ||
839 | .update_cursor = xfs_bmbt_update_cursor, | ||
840 | .alloc_block = xfs_bmbt_alloc_block, | ||
841 | .free_block = xfs_bmbt_free_block, | ||
842 | .get_maxrecs = xfs_bmbt_get_maxrecs, | ||
843 | .get_minrecs = xfs_bmbt_get_minrecs, | ||
844 | .get_dmaxrecs = xfs_bmbt_get_dmaxrecs, | ||
845 | .init_key_from_rec = xfs_bmbt_init_key_from_rec, | ||
846 | .init_rec_from_key = xfs_bmbt_init_rec_from_key, | ||
847 | .init_rec_from_cur = xfs_bmbt_init_rec_from_cur, | ||
848 | .init_ptr_from_cur = xfs_bmbt_init_ptr_from_cur, | ||
849 | .key_diff = xfs_bmbt_key_diff, | ||
850 | .buf_ops = &xfs_bmbt_buf_ops, | ||
851 | #if defined(DEBUG) || defined(XFS_WARN) | ||
852 | .keys_inorder = xfs_bmbt_keys_inorder, | ||
853 | .recs_inorder = xfs_bmbt_recs_inorder, | ||
854 | #endif | ||
855 | }; | ||
856 | |||
857 | /* | ||
858 | * Allocate a new bmap btree cursor. | ||
859 | */ | ||
860 | struct xfs_btree_cur * /* new bmap btree cursor */ | ||
861 | xfs_bmbt_init_cursor( | ||
862 | struct xfs_mount *mp, /* file system mount point */ | ||
863 | struct xfs_trans *tp, /* transaction pointer */ | ||
864 | struct xfs_inode *ip, /* inode owning the btree */ | ||
865 | int whichfork) /* data or attr fork */ | ||
866 | { | ||
867 | struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); | ||
868 | struct xfs_btree_cur *cur; | ||
869 | |||
870 | cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP); | ||
871 | |||
872 | cur->bc_tp = tp; | ||
873 | cur->bc_mp = mp; | ||
874 | cur->bc_nlevels = be16_to_cpu(ifp->if_broot->bb_level) + 1; | ||
875 | cur->bc_btnum = XFS_BTNUM_BMAP; | ||
876 | cur->bc_blocklog = mp->m_sb.sb_blocklog; | ||
877 | |||
878 | cur->bc_ops = &xfs_bmbt_ops; | ||
879 | cur->bc_flags = XFS_BTREE_LONG_PTRS | XFS_BTREE_ROOT_IN_INODE; | ||
880 | if (xfs_sb_version_hascrc(&mp->m_sb)) | ||
881 | cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; | ||
882 | |||
883 | cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork); | ||
884 | cur->bc_private.b.ip = ip; | ||
885 | cur->bc_private.b.firstblock = NULLFSBLOCK; | ||
886 | cur->bc_private.b.flist = NULL; | ||
887 | cur->bc_private.b.allocated = 0; | ||
888 | cur->bc_private.b.flags = 0; | ||
889 | cur->bc_private.b.whichfork = whichfork; | ||
890 | |||
891 | return cur; | ||
892 | } | ||
893 | |||
894 | /* | ||
895 | * Calculate number of records in a bmap btree block. | ||
896 | */ | ||
897 | int | ||
898 | xfs_bmbt_maxrecs( | ||
899 | struct xfs_mount *mp, | ||
900 | int blocklen, | ||
901 | int leaf) | ||
902 | { | ||
903 | blocklen -= XFS_BMBT_BLOCK_LEN(mp); | ||
904 | |||
905 | if (leaf) | ||
906 | return blocklen / sizeof(xfs_bmbt_rec_t); | ||
907 | return blocklen / (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t)); | ||
908 | } | ||
909 | |||
910 | /* | ||
911 | * Calculate number of records in a bmap btree inode root. | ||
912 | */ | ||
913 | int | ||
914 | xfs_bmdr_maxrecs( | ||
915 | int blocklen, | ||
916 | int leaf) | ||
917 | { | ||
918 | blocklen -= sizeof(xfs_bmdr_block_t); | ||
919 | |||
920 | if (leaf) | ||
921 | return blocklen / sizeof(xfs_bmdr_rec_t); | ||
922 | return blocklen / (sizeof(xfs_bmdr_key_t) + sizeof(xfs_bmdr_ptr_t)); | ||
923 | } | ||
924 | |||
925 | /* | ||
926 | * Change the owner of a btree format fork fo the inode passed in. Change it to | ||
927 | * the owner of that is passed in so that we can change owners before or after | ||
928 | * we switch forks between inodes. The operation that the caller is doing will | ||
929 | * determine whether is needs to change owner before or after the switch. | ||
930 | * | ||
931 | * For demand paged transactional modification, the fork switch should be done | ||
932 | * after reading in all the blocks, modifying them and pinning them in the | ||
933 | * transaction. For modification when the buffers are already pinned in memory, | ||
934 | * the fork switch can be done before changing the owner as we won't need to | ||
935 | * validate the owner until the btree buffers are unpinned and writes can occur | ||
936 | * again. | ||
937 | * | ||
938 | * For recovery based ownership change, there is no transactional context and | ||
939 | * so a buffer list must be supplied so that we can record the buffers that we | ||
940 | * modified for the caller to issue IO on. | ||
941 | */ | ||
942 | int | ||
943 | xfs_bmbt_change_owner( | ||
944 | struct xfs_trans *tp, | ||
945 | struct xfs_inode *ip, | ||
946 | int whichfork, | ||
947 | xfs_ino_t new_owner, | ||
948 | struct list_head *buffer_list) | ||
949 | { | ||
950 | struct xfs_btree_cur *cur; | ||
951 | int error; | ||
952 | |||
953 | ASSERT(tp || buffer_list); | ||
954 | ASSERT(!(tp && buffer_list)); | ||
955 | if (whichfork == XFS_DATA_FORK) | ||
956 | ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_BTREE); | ||
957 | else | ||
958 | ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_BTREE); | ||
959 | |||
960 | cur = xfs_bmbt_init_cursor(ip->i_mount, tp, ip, whichfork); | ||
961 | if (!cur) | ||
962 | return -ENOMEM; | ||
963 | |||
964 | error = xfs_btree_change_owner(cur, new_owner, buffer_list); | ||
965 | xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); | ||
966 | return error; | ||
967 | } | ||
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.h b/fs/xfs/libxfs/xfs_bmap_btree.h new file mode 100644 index 000000000000..819a8a4dee95 --- /dev/null +++ b/fs/xfs/libxfs/xfs_bmap_btree.h | |||
@@ -0,0 +1,143 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000,2002-2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_BMAP_BTREE_H__ | ||
19 | #define __XFS_BMAP_BTREE_H__ | ||
20 | |||
21 | struct xfs_btree_cur; | ||
22 | struct xfs_btree_block; | ||
23 | struct xfs_mount; | ||
24 | struct xfs_inode; | ||
25 | struct xfs_trans; | ||
26 | |||
27 | /* | ||
28 | * Extent state and extent format macros. | ||
29 | */ | ||
30 | #define XFS_EXTFMT_INODE(x) \ | ||
31 | (xfs_sb_version_hasextflgbit(&((x)->i_mount->m_sb)) ? \ | ||
32 | XFS_EXTFMT_HASSTATE : XFS_EXTFMT_NOSTATE) | ||
33 | #define ISUNWRITTEN(x) ((x)->br_state == XFS_EXT_UNWRITTEN) | ||
34 | |||
35 | /* | ||
36 | * Btree block header size depends on a superblock flag. | ||
37 | */ | ||
38 | #define XFS_BMBT_BLOCK_LEN(mp) \ | ||
39 | (xfs_sb_version_hascrc(&((mp)->m_sb)) ? \ | ||
40 | XFS_BTREE_LBLOCK_CRC_LEN : XFS_BTREE_LBLOCK_LEN) | ||
41 | |||
42 | #define XFS_BMBT_REC_ADDR(mp, block, index) \ | ||
43 | ((xfs_bmbt_rec_t *) \ | ||
44 | ((char *)(block) + \ | ||
45 | XFS_BMBT_BLOCK_LEN(mp) + \ | ||
46 | ((index) - 1) * sizeof(xfs_bmbt_rec_t))) | ||
47 | |||
48 | #define XFS_BMBT_KEY_ADDR(mp, block, index) \ | ||
49 | ((xfs_bmbt_key_t *) \ | ||
50 | ((char *)(block) + \ | ||
51 | XFS_BMBT_BLOCK_LEN(mp) + \ | ||
52 | ((index) - 1) * sizeof(xfs_bmbt_key_t))) | ||
53 | |||
54 | #define XFS_BMBT_PTR_ADDR(mp, block, index, maxrecs) \ | ||
55 | ((xfs_bmbt_ptr_t *) \ | ||
56 | ((char *)(block) + \ | ||
57 | XFS_BMBT_BLOCK_LEN(mp) + \ | ||
58 | (maxrecs) * sizeof(xfs_bmbt_key_t) + \ | ||
59 | ((index) - 1) * sizeof(xfs_bmbt_ptr_t))) | ||
60 | |||
61 | #define XFS_BMDR_REC_ADDR(block, index) \ | ||
62 | ((xfs_bmdr_rec_t *) \ | ||
63 | ((char *)(block) + \ | ||
64 | sizeof(struct xfs_bmdr_block) + \ | ||
65 | ((index) - 1) * sizeof(xfs_bmdr_rec_t))) | ||
66 | |||
67 | #define XFS_BMDR_KEY_ADDR(block, index) \ | ||
68 | ((xfs_bmdr_key_t *) \ | ||
69 | ((char *)(block) + \ | ||
70 | sizeof(struct xfs_bmdr_block) + \ | ||
71 | ((index) - 1) * sizeof(xfs_bmdr_key_t))) | ||
72 | |||
73 | #define XFS_BMDR_PTR_ADDR(block, index, maxrecs) \ | ||
74 | ((xfs_bmdr_ptr_t *) \ | ||
75 | ((char *)(block) + \ | ||
76 | sizeof(struct xfs_bmdr_block) + \ | ||
77 | (maxrecs) * sizeof(xfs_bmdr_key_t) + \ | ||
78 | ((index) - 1) * sizeof(xfs_bmdr_ptr_t))) | ||
79 | |||
80 | /* | ||
81 | * These are to be used when we know the size of the block and | ||
82 | * we don't have a cursor. | ||
83 | */ | ||
84 | #define XFS_BMAP_BROOT_PTR_ADDR(mp, bb, i, sz) \ | ||
85 | XFS_BMBT_PTR_ADDR(mp, bb, i, xfs_bmbt_maxrecs(mp, sz, 0)) | ||
86 | |||
87 | #define XFS_BMAP_BROOT_SPACE_CALC(mp, nrecs) \ | ||
88 | (int)(XFS_BMBT_BLOCK_LEN(mp) + \ | ||
89 | ((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t)))) | ||
90 | |||
91 | #define XFS_BMAP_BROOT_SPACE(mp, bb) \ | ||
92 | (XFS_BMAP_BROOT_SPACE_CALC(mp, be16_to_cpu((bb)->bb_numrecs))) | ||
93 | #define XFS_BMDR_SPACE_CALC(nrecs) \ | ||
94 | (int)(sizeof(xfs_bmdr_block_t) + \ | ||
95 | ((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t)))) | ||
96 | #define XFS_BMAP_BMDR_SPACE(bb) \ | ||
97 | (XFS_BMDR_SPACE_CALC(be16_to_cpu((bb)->bb_numrecs))) | ||
98 | |||
99 | /* | ||
100 | * Maximum number of bmap btree levels. | ||
101 | */ | ||
102 | #define XFS_BM_MAXLEVELS(mp,w) ((mp)->m_bm_maxlevels[(w)]) | ||
103 | |||
104 | /* | ||
105 | * Prototypes for xfs_bmap.c to call. | ||
106 | */ | ||
107 | extern void xfs_bmdr_to_bmbt(struct xfs_inode *, xfs_bmdr_block_t *, int, | ||
108 | struct xfs_btree_block *, int); | ||
109 | extern void xfs_bmbt_get_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s); | ||
110 | extern xfs_filblks_t xfs_bmbt_get_blockcount(xfs_bmbt_rec_host_t *r); | ||
111 | extern xfs_fsblock_t xfs_bmbt_get_startblock(xfs_bmbt_rec_host_t *r); | ||
112 | extern xfs_fileoff_t xfs_bmbt_get_startoff(xfs_bmbt_rec_host_t *r); | ||
113 | extern xfs_exntst_t xfs_bmbt_get_state(xfs_bmbt_rec_host_t *r); | ||
114 | |||
115 | extern xfs_filblks_t xfs_bmbt_disk_get_blockcount(xfs_bmbt_rec_t *r); | ||
116 | extern xfs_fileoff_t xfs_bmbt_disk_get_startoff(xfs_bmbt_rec_t *r); | ||
117 | |||
118 | extern void xfs_bmbt_set_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s); | ||
119 | extern void xfs_bmbt_set_allf(xfs_bmbt_rec_host_t *r, xfs_fileoff_t o, | ||
120 | xfs_fsblock_t b, xfs_filblks_t c, xfs_exntst_t v); | ||
121 | extern void xfs_bmbt_set_blockcount(xfs_bmbt_rec_host_t *r, xfs_filblks_t v); | ||
122 | extern void xfs_bmbt_set_startblock(xfs_bmbt_rec_host_t *r, xfs_fsblock_t v); | ||
123 | extern void xfs_bmbt_set_startoff(xfs_bmbt_rec_host_t *r, xfs_fileoff_t v); | ||
124 | extern void xfs_bmbt_set_state(xfs_bmbt_rec_host_t *r, xfs_exntst_t v); | ||
125 | |||
126 | extern void xfs_bmbt_disk_set_allf(xfs_bmbt_rec_t *r, xfs_fileoff_t o, | ||
127 | xfs_fsblock_t b, xfs_filblks_t c, xfs_exntst_t v); | ||
128 | |||
129 | extern void xfs_bmbt_to_bmdr(struct xfs_mount *, struct xfs_btree_block *, int, | ||
130 | xfs_bmdr_block_t *, int); | ||
131 | |||
132 | extern int xfs_bmbt_get_maxrecs(struct xfs_btree_cur *, int level); | ||
133 | extern int xfs_bmdr_maxrecs(int blocklen, int leaf); | ||
134 | extern int xfs_bmbt_maxrecs(struct xfs_mount *, int blocklen, int leaf); | ||
135 | |||
136 | extern int xfs_bmbt_change_owner(struct xfs_trans *tp, struct xfs_inode *ip, | ||
137 | int whichfork, xfs_ino_t new_owner, | ||
138 | struct list_head *buffer_list); | ||
139 | |||
140 | extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *, | ||
141 | struct xfs_trans *, struct xfs_inode *, int); | ||
142 | |||
143 | #endif /* __XFS_BMAP_BTREE_H__ */ | ||
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c new file mode 100644 index 000000000000..ba35c9ccb8f9 --- /dev/null +++ b/fs/xfs/libxfs/xfs_btree.c | |||
@@ -0,0 +1,4069 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #include "xfs.h" | ||
19 | #include "xfs_fs.h" | ||
20 | #include "xfs_shared.h" | ||
21 | #include "xfs_format.h" | ||
22 | #include "xfs_log_format.h" | ||
23 | #include "xfs_trans_resv.h" | ||
24 | #include "xfs_bit.h" | ||
25 | #include "xfs_sb.h" | ||
26 | #include "xfs_ag.h" | ||
27 | #include "xfs_mount.h" | ||
28 | #include "xfs_inode.h" | ||
29 | #include "xfs_trans.h" | ||
30 | #include "xfs_inode_item.h" | ||
31 | #include "xfs_buf_item.h" | ||
32 | #include "xfs_btree.h" | ||
33 | #include "xfs_error.h" | ||
34 | #include "xfs_trace.h" | ||
35 | #include "xfs_cksum.h" | ||
36 | #include "xfs_alloc.h" | ||
37 | |||
38 | /* | ||
39 | * Cursor allocation zone. | ||
40 | */ | ||
41 | kmem_zone_t *xfs_btree_cur_zone; | ||
42 | |||
43 | /* | ||
44 | * Btree magic numbers. | ||
45 | */ | ||
46 | static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = { | ||
47 | { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC, | ||
48 | XFS_FIBT_MAGIC }, | ||
49 | { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, | ||
50 | XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC } | ||
51 | }; | ||
52 | #define xfs_btree_magic(cur) \ | ||
53 | xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum] | ||
54 | |||
55 | |||
56 | STATIC int /* error (0 or EFSCORRUPTED) */ | ||
57 | xfs_btree_check_lblock( | ||
58 | struct xfs_btree_cur *cur, /* btree cursor */ | ||
59 | struct xfs_btree_block *block, /* btree long form block pointer */ | ||
60 | int level, /* level of the btree block */ | ||
61 | struct xfs_buf *bp) /* buffer for block, if any */ | ||
62 | { | ||
63 | int lblock_ok = 1; /* block passes checks */ | ||
64 | struct xfs_mount *mp; /* file system mount point */ | ||
65 | |||
66 | mp = cur->bc_mp; | ||
67 | |||
68 | if (xfs_sb_version_hascrc(&mp->m_sb)) { | ||
69 | lblock_ok = lblock_ok && | ||
70 | uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid) && | ||
71 | block->bb_u.l.bb_blkno == cpu_to_be64( | ||
72 | bp ? bp->b_bn : XFS_BUF_DADDR_NULL); | ||
73 | } | ||
74 | |||
75 | lblock_ok = lblock_ok && | ||
76 | be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) && | ||
77 | be16_to_cpu(block->bb_level) == level && | ||
78 | be16_to_cpu(block->bb_numrecs) <= | ||
79 | cur->bc_ops->get_maxrecs(cur, level) && | ||
80 | block->bb_u.l.bb_leftsib && | ||
81 | (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO) || | ||
82 | XFS_FSB_SANITY_CHECK(mp, | ||
83 | be64_to_cpu(block->bb_u.l.bb_leftsib))) && | ||
84 | block->bb_u.l.bb_rightsib && | ||
85 | (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO) || | ||
86 | XFS_FSB_SANITY_CHECK(mp, | ||
87 | be64_to_cpu(block->bb_u.l.bb_rightsib))); | ||
88 | |||
89 | if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp, | ||
90 | XFS_ERRTAG_BTREE_CHECK_LBLOCK, | ||
91 | XFS_RANDOM_BTREE_CHECK_LBLOCK))) { | ||
92 | if (bp) | ||
93 | trace_xfs_btree_corrupt(bp, _RET_IP_); | ||
94 | XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); | ||
95 | return -EFSCORRUPTED; | ||
96 | } | ||
97 | return 0; | ||
98 | } | ||
99 | |||
100 | STATIC int /* error (0 or EFSCORRUPTED) */ | ||
101 | xfs_btree_check_sblock( | ||
102 | struct xfs_btree_cur *cur, /* btree cursor */ | ||
103 | struct xfs_btree_block *block, /* btree short form block pointer */ | ||
104 | int level, /* level of the btree block */ | ||
105 | struct xfs_buf *bp) /* buffer containing block */ | ||
106 | { | ||
107 | struct xfs_mount *mp; /* file system mount point */ | ||
108 | struct xfs_buf *agbp; /* buffer for ag. freespace struct */ | ||
109 | struct xfs_agf *agf; /* ag. freespace structure */ | ||
110 | xfs_agblock_t agflen; /* native ag. freespace length */ | ||
111 | int sblock_ok = 1; /* block passes checks */ | ||
112 | |||
113 | mp = cur->bc_mp; | ||
114 | agbp = cur->bc_private.a.agbp; | ||
115 | agf = XFS_BUF_TO_AGF(agbp); | ||
116 | agflen = be32_to_cpu(agf->agf_length); | ||
117 | |||
118 | if (xfs_sb_version_hascrc(&mp->m_sb)) { | ||
119 | sblock_ok = sblock_ok && | ||
120 | uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid) && | ||
121 | block->bb_u.s.bb_blkno == cpu_to_be64( | ||
122 | bp ? bp->b_bn : XFS_BUF_DADDR_NULL); | ||
123 | } | ||
124 | |||
125 | sblock_ok = sblock_ok && | ||
126 | be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) && | ||
127 | be16_to_cpu(block->bb_level) == level && | ||
128 | be16_to_cpu(block->bb_numrecs) <= | ||
129 | cur->bc_ops->get_maxrecs(cur, level) && | ||
130 | (block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) || | ||
131 | be32_to_cpu(block->bb_u.s.bb_leftsib) < agflen) && | ||
132 | block->bb_u.s.bb_leftsib && | ||
133 | (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) || | ||
134 | be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) && | ||
135 | block->bb_u.s.bb_rightsib; | ||
136 | |||
137 | if (unlikely(XFS_TEST_ERROR(!sblock_ok, mp, | ||
138 | XFS_ERRTAG_BTREE_CHECK_SBLOCK, | ||
139 | XFS_RANDOM_BTREE_CHECK_SBLOCK))) { | ||
140 | if (bp) | ||
141 | trace_xfs_btree_corrupt(bp, _RET_IP_); | ||
142 | XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); | ||
143 | return -EFSCORRUPTED; | ||
144 | } | ||
145 | return 0; | ||
146 | } | ||
147 | |||
148 | /* | ||
149 | * Debug routine: check that block header is ok. | ||
150 | */ | ||
151 | int | ||
152 | xfs_btree_check_block( | ||
153 | struct xfs_btree_cur *cur, /* btree cursor */ | ||
154 | struct xfs_btree_block *block, /* generic btree block pointer */ | ||
155 | int level, /* level of the btree block */ | ||
156 | struct xfs_buf *bp) /* buffer containing block, if any */ | ||
157 | { | ||
158 | if (cur->bc_flags & XFS_BTREE_LONG_PTRS) | ||
159 | return xfs_btree_check_lblock(cur, block, level, bp); | ||
160 | else | ||
161 | return xfs_btree_check_sblock(cur, block, level, bp); | ||
162 | } | ||
163 | |||
164 | /* | ||
165 | * Check that (long) pointer is ok. | ||
166 | */ | ||
167 | int /* error (0 or EFSCORRUPTED) */ | ||
168 | xfs_btree_check_lptr( | ||
169 | struct xfs_btree_cur *cur, /* btree cursor */ | ||
170 | xfs_dfsbno_t bno, /* btree block disk address */ | ||
171 | int level) /* btree block level */ | ||
172 | { | ||
173 | XFS_WANT_CORRUPTED_RETURN( | ||
174 | level > 0 && | ||
175 | bno != NULLDFSBNO && | ||
176 | XFS_FSB_SANITY_CHECK(cur->bc_mp, bno)); | ||
177 | return 0; | ||
178 | } | ||
179 | |||
180 | #ifdef DEBUG | ||
181 | /* | ||
182 | * Check that (short) pointer is ok. | ||
183 | */ | ||
184 | STATIC int /* error (0 or EFSCORRUPTED) */ | ||
185 | xfs_btree_check_sptr( | ||
186 | struct xfs_btree_cur *cur, /* btree cursor */ | ||
187 | xfs_agblock_t bno, /* btree block disk address */ | ||
188 | int level) /* btree block level */ | ||
189 | { | ||
190 | xfs_agblock_t agblocks = cur->bc_mp->m_sb.sb_agblocks; | ||
191 | |||
192 | XFS_WANT_CORRUPTED_RETURN( | ||
193 | level > 0 && | ||
194 | bno != NULLAGBLOCK && | ||
195 | bno != 0 && | ||
196 | bno < agblocks); | ||
197 | return 0; | ||
198 | } | ||
199 | |||
200 | /* | ||
201 | * Check that block ptr is ok. | ||
202 | */ | ||
203 | STATIC int /* error (0 or EFSCORRUPTED) */ | ||
204 | xfs_btree_check_ptr( | ||
205 | struct xfs_btree_cur *cur, /* btree cursor */ | ||
206 | union xfs_btree_ptr *ptr, /* btree block disk address */ | ||
207 | int index, /* offset from ptr to check */ | ||
208 | int level) /* btree block level */ | ||
209 | { | ||
210 | if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { | ||
211 | return xfs_btree_check_lptr(cur, | ||
212 | be64_to_cpu((&ptr->l)[index]), level); | ||
213 | } else { | ||
214 | return xfs_btree_check_sptr(cur, | ||
215 | be32_to_cpu((&ptr->s)[index]), level); | ||
216 | } | ||
217 | } | ||
218 | #endif | ||
219 | |||
220 | /* | ||
221 | * Calculate CRC on the whole btree block and stuff it into the | ||
222 | * long-form btree header. | ||
223 | * | ||
224 | * Prior to calculting the CRC, pull the LSN out of the buffer log item and put | ||
225 | * it into the buffer so recovery knows what the last modifcation was that made | ||
226 | * it to disk. | ||
227 | */ | ||
228 | void | ||
229 | xfs_btree_lblock_calc_crc( | ||
230 | struct xfs_buf *bp) | ||
231 | { | ||
232 | struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); | ||
233 | struct xfs_buf_log_item *bip = bp->b_fspriv; | ||
234 | |||
235 | if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) | ||
236 | return; | ||
237 | if (bip) | ||
238 | block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); | ||
239 | xfs_buf_update_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF); | ||
240 | } | ||
241 | |||
242 | bool | ||
243 | xfs_btree_lblock_verify_crc( | ||
244 | struct xfs_buf *bp) | ||
245 | { | ||
246 | if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) | ||
247 | return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF); | ||
248 | |||
249 | return true; | ||
250 | } | ||
251 | |||
252 | /* | ||
253 | * Calculate CRC on the whole btree block and stuff it into the | ||
254 | * short-form btree header. | ||
255 | * | ||
256 | * Prior to calculting the CRC, pull the LSN out of the buffer log item and put | ||
257 | * it into the buffer so recovery knows what the last modifcation was that made | ||
258 | * it to disk. | ||
259 | */ | ||
260 | void | ||
261 | xfs_btree_sblock_calc_crc( | ||
262 | struct xfs_buf *bp) | ||
263 | { | ||
264 | struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); | ||
265 | struct xfs_buf_log_item *bip = bp->b_fspriv; | ||
266 | |||
267 | if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) | ||
268 | return; | ||
269 | if (bip) | ||
270 | block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); | ||
271 | xfs_buf_update_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); | ||
272 | } | ||
273 | |||
274 | bool | ||
275 | xfs_btree_sblock_verify_crc( | ||
276 | struct xfs_buf *bp) | ||
277 | { | ||
278 | if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) | ||
279 | return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); | ||
280 | |||
281 | return true; | ||
282 | } | ||
283 | |||
284 | /* | ||
285 | * Delete the btree cursor. | ||
286 | */ | ||
287 | void | ||
288 | xfs_btree_del_cursor( | ||
289 | xfs_btree_cur_t *cur, /* btree cursor */ | ||
290 | int error) /* del because of error */ | ||
291 | { | ||
292 | int i; /* btree level */ | ||
293 | |||
294 | /* | ||
295 | * Clear the buffer pointers, and release the buffers. | ||
296 | * If we're doing this in the face of an error, we | ||
297 | * need to make sure to inspect all of the entries | ||
298 | * in the bc_bufs array for buffers to be unlocked. | ||
299 | * This is because some of the btree code works from | ||
300 | * level n down to 0, and if we get an error along | ||
301 | * the way we won't have initialized all the entries | ||
302 | * down to 0. | ||
303 | */ | ||
304 | for (i = 0; i < cur->bc_nlevels; i++) { | ||
305 | if (cur->bc_bufs[i]) | ||
306 | xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]); | ||
307 | else if (!error) | ||
308 | break; | ||
309 | } | ||
310 | /* | ||
311 | * Can't free a bmap cursor without having dealt with the | ||
312 | * allocated indirect blocks' accounting. | ||
313 | */ | ||
314 | ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP || | ||
315 | cur->bc_private.b.allocated == 0); | ||
316 | /* | ||
317 | * Free the cursor. | ||
318 | */ | ||
319 | kmem_zone_free(xfs_btree_cur_zone, cur); | ||
320 | } | ||
321 | |||
322 | /* | ||
323 | * Duplicate the btree cursor. | ||
324 | * Allocate a new one, copy the record, re-get the buffers. | ||
325 | */ | ||
326 | int /* error */ | ||
327 | xfs_btree_dup_cursor( | ||
328 | xfs_btree_cur_t *cur, /* input cursor */ | ||
329 | xfs_btree_cur_t **ncur) /* output cursor */ | ||
330 | { | ||
331 | xfs_buf_t *bp; /* btree block's buffer pointer */ | ||
332 | int error; /* error return value */ | ||
333 | int i; /* level number of btree block */ | ||
334 | xfs_mount_t *mp; /* mount structure for filesystem */ | ||
335 | xfs_btree_cur_t *new; /* new cursor value */ | ||
336 | xfs_trans_t *tp; /* transaction pointer, can be NULL */ | ||
337 | |||
338 | tp = cur->bc_tp; | ||
339 | mp = cur->bc_mp; | ||
340 | |||
341 | /* | ||
342 | * Allocate a new cursor like the old one. | ||
343 | */ | ||
344 | new = cur->bc_ops->dup_cursor(cur); | ||
345 | |||
346 | /* | ||
347 | * Copy the record currently in the cursor. | ||
348 | */ | ||
349 | new->bc_rec = cur->bc_rec; | ||
350 | |||
351 | /* | ||
352 | * For each level current, re-get the buffer and copy the ptr value. | ||
353 | */ | ||
354 | for (i = 0; i < new->bc_nlevels; i++) { | ||
355 | new->bc_ptrs[i] = cur->bc_ptrs[i]; | ||
356 | new->bc_ra[i] = cur->bc_ra[i]; | ||
357 | bp = cur->bc_bufs[i]; | ||
358 | if (bp) { | ||
359 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, | ||
360 | XFS_BUF_ADDR(bp), mp->m_bsize, | ||
361 | 0, &bp, | ||
362 | cur->bc_ops->buf_ops); | ||
363 | if (error) { | ||
364 | xfs_btree_del_cursor(new, error); | ||
365 | *ncur = NULL; | ||
366 | return error; | ||
367 | } | ||
368 | } | ||
369 | new->bc_bufs[i] = bp; | ||
370 | } | ||
371 | *ncur = new; | ||
372 | return 0; | ||
373 | } | ||
374 | |||
375 | /* | ||
376 | * XFS btree block layout and addressing: | ||
377 | * | ||
378 | * There are two types of blocks in the btree: leaf and non-leaf blocks. | ||
379 | * | ||
380 | * The leaf record start with a header then followed by records containing | ||
381 | * the values. A non-leaf block also starts with the same header, and | ||
382 | * then first contains lookup keys followed by an equal number of pointers | ||
383 | * to the btree blocks at the previous level. | ||
384 | * | ||
385 | * +--------+-------+-------+-------+-------+-------+-------+ | ||
386 | * Leaf: | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N | | ||
387 | * +--------+-------+-------+-------+-------+-------+-------+ | ||
388 | * | ||
389 | * +--------+-------+-------+-------+-------+-------+-------+ | ||
390 | * Non-Leaf: | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N | | ||
391 | * +--------+-------+-------+-------+-------+-------+-------+ | ||
392 | * | ||
393 | * The header is called struct xfs_btree_block for reasons better left unknown | ||
394 | * and comes in different versions for short (32bit) and long (64bit) block | ||
395 | * pointers. The record and key structures are defined by the btree instances | ||
396 | * and opaque to the btree core. The block pointers are simple disk endian | ||
397 | * integers, available in a short (32bit) and long (64bit) variant. | ||
398 | * | ||
399 | * The helpers below calculate the offset of a given record, key or pointer | ||
400 | * into a btree block (xfs_btree_*_offset) or return a pointer to the given | ||
401 | * record, key or pointer (xfs_btree_*_addr). Note that all addressing | ||
402 | * inside the btree block is done using indices starting at one, not zero! | ||
403 | */ | ||
404 | |||
405 | /* | ||
406 | * Return size of the btree block header for this btree instance. | ||
407 | */ | ||
408 | static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur) | ||
409 | { | ||
410 | if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { | ||
411 | if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) | ||
412 | return XFS_BTREE_LBLOCK_CRC_LEN; | ||
413 | return XFS_BTREE_LBLOCK_LEN; | ||
414 | } | ||
415 | if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) | ||
416 | return XFS_BTREE_SBLOCK_CRC_LEN; | ||
417 | return XFS_BTREE_SBLOCK_LEN; | ||
418 | } | ||
419 | |||
420 | /* | ||
421 | * Return size of btree block pointers for this btree instance. | ||
422 | */ | ||
423 | static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur) | ||
424 | { | ||
425 | return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ? | ||
426 | sizeof(__be64) : sizeof(__be32); | ||
427 | } | ||
428 | |||
429 | /* | ||
430 | * Calculate offset of the n-th record in a btree block. | ||
431 | */ | ||
432 | STATIC size_t | ||
433 | xfs_btree_rec_offset( | ||
434 | struct xfs_btree_cur *cur, | ||
435 | int n) | ||
436 | { | ||
437 | return xfs_btree_block_len(cur) + | ||
438 | (n - 1) * cur->bc_ops->rec_len; | ||
439 | } | ||
440 | |||
441 | /* | ||
442 | * Calculate offset of the n-th key in a btree block. | ||
443 | */ | ||
444 | STATIC size_t | ||
445 | xfs_btree_key_offset( | ||
446 | struct xfs_btree_cur *cur, | ||
447 | int n) | ||
448 | { | ||
449 | return xfs_btree_block_len(cur) + | ||
450 | (n - 1) * cur->bc_ops->key_len; | ||
451 | } | ||
452 | |||
453 | /* | ||
454 | * Calculate offset of the n-th block pointer in a btree block. | ||
455 | */ | ||
456 | STATIC size_t | ||
457 | xfs_btree_ptr_offset( | ||
458 | struct xfs_btree_cur *cur, | ||
459 | int n, | ||
460 | int level) | ||
461 | { | ||
462 | return xfs_btree_block_len(cur) + | ||
463 | cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len + | ||
464 | (n - 1) * xfs_btree_ptr_len(cur); | ||
465 | } | ||
466 | |||
467 | /* | ||
468 | * Return a pointer to the n-th record in the btree block. | ||
469 | */ | ||
470 | STATIC union xfs_btree_rec * | ||
471 | xfs_btree_rec_addr( | ||
472 | struct xfs_btree_cur *cur, | ||
473 | int n, | ||
474 | struct xfs_btree_block *block) | ||
475 | { | ||
476 | return (union xfs_btree_rec *) | ||
477 | ((char *)block + xfs_btree_rec_offset(cur, n)); | ||
478 | } | ||
479 | |||
480 | /* | ||
481 | * Return a pointer to the n-th key in the btree block. | ||
482 | */ | ||
483 | STATIC union xfs_btree_key * | ||
484 | xfs_btree_key_addr( | ||
485 | struct xfs_btree_cur *cur, | ||
486 | int n, | ||
487 | struct xfs_btree_block *block) | ||
488 | { | ||
489 | return (union xfs_btree_key *) | ||
490 | ((char *)block + xfs_btree_key_offset(cur, n)); | ||
491 | } | ||
492 | |||
493 | /* | ||
494 | * Return a pointer to the n-th block pointer in the btree block. | ||
495 | */ | ||
496 | STATIC union xfs_btree_ptr * | ||
497 | xfs_btree_ptr_addr( | ||
498 | struct xfs_btree_cur *cur, | ||
499 | int n, | ||
500 | struct xfs_btree_block *block) | ||
501 | { | ||
502 | int level = xfs_btree_get_level(block); | ||
503 | |||
504 | ASSERT(block->bb_level != 0); | ||
505 | |||
506 | return (union xfs_btree_ptr *) | ||
507 | ((char *)block + xfs_btree_ptr_offset(cur, n, level)); | ||
508 | } | ||
509 | |||
510 | /* | ||
511 | * Get the root block which is stored in the inode. | ||
512 | * | ||
513 | * For now this btree implementation assumes the btree root is always | ||
514 | * stored in the if_broot field of an inode fork. | ||
515 | */ | ||
516 | STATIC struct xfs_btree_block * | ||
517 | xfs_btree_get_iroot( | ||
518 | struct xfs_btree_cur *cur) | ||
519 | { | ||
520 | struct xfs_ifork *ifp; | ||
521 | |||
522 | ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork); | ||
523 | return (struct xfs_btree_block *)ifp->if_broot; | ||
524 | } | ||
525 | |||
526 | /* | ||
527 | * Retrieve the block pointer from the cursor at the given level. | ||
528 | * This may be an inode btree root or from a buffer. | ||
529 | */ | ||
530 | STATIC struct xfs_btree_block * /* generic btree block pointer */ | ||
531 | xfs_btree_get_block( | ||
532 | struct xfs_btree_cur *cur, /* btree cursor */ | ||
533 | int level, /* level in btree */ | ||
534 | struct xfs_buf **bpp) /* buffer containing the block */ | ||
535 | { | ||
536 | if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && | ||
537 | (level == cur->bc_nlevels - 1)) { | ||
538 | *bpp = NULL; | ||
539 | return xfs_btree_get_iroot(cur); | ||
540 | } | ||
541 | |||
542 | *bpp = cur->bc_bufs[level]; | ||
543 | return XFS_BUF_TO_BLOCK(*bpp); | ||
544 | } | ||
545 | |||
546 | /* | ||
547 | * Get a buffer for the block, return it with no data read. | ||
548 | * Long-form addressing. | ||
549 | */ | ||
550 | xfs_buf_t * /* buffer for fsbno */ | ||
551 | xfs_btree_get_bufl( | ||
552 | xfs_mount_t *mp, /* file system mount point */ | ||
553 | xfs_trans_t *tp, /* transaction pointer */ | ||
554 | xfs_fsblock_t fsbno, /* file system block number */ | ||
555 | uint lock) /* lock flags for get_buf */ | ||
556 | { | ||
557 | xfs_daddr_t d; /* real disk block address */ | ||
558 | |||
559 | ASSERT(fsbno != NULLFSBLOCK); | ||
560 | d = XFS_FSB_TO_DADDR(mp, fsbno); | ||
561 | return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); | ||
562 | } | ||
563 | |||
564 | /* | ||
565 | * Get a buffer for the block, return it with no data read. | ||
566 | * Short-form addressing. | ||
567 | */ | ||
568 | xfs_buf_t * /* buffer for agno/agbno */ | ||
569 | xfs_btree_get_bufs( | ||
570 | xfs_mount_t *mp, /* file system mount point */ | ||
571 | xfs_trans_t *tp, /* transaction pointer */ | ||
572 | xfs_agnumber_t agno, /* allocation group number */ | ||
573 | xfs_agblock_t agbno, /* allocation group block number */ | ||
574 | uint lock) /* lock flags for get_buf */ | ||
575 | { | ||
576 | xfs_daddr_t d; /* real disk block address */ | ||
577 | |||
578 | ASSERT(agno != NULLAGNUMBER); | ||
579 | ASSERT(agbno != NULLAGBLOCK); | ||
580 | d = XFS_AGB_TO_DADDR(mp, agno, agbno); | ||
581 | return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); | ||
582 | } | ||
583 | |||
584 | /* | ||
585 | * Check for the cursor referring to the last block at the given level. | ||
586 | */ | ||
587 | int /* 1=is last block, 0=not last block */ | ||
588 | xfs_btree_islastblock( | ||
589 | xfs_btree_cur_t *cur, /* btree cursor */ | ||
590 | int level) /* level to check */ | ||
591 | { | ||
592 | struct xfs_btree_block *block; /* generic btree block pointer */ | ||
593 | xfs_buf_t *bp; /* buffer containing block */ | ||
594 | |||
595 | block = xfs_btree_get_block(cur, level, &bp); | ||
596 | xfs_btree_check_block(cur, block, level, bp); | ||
597 | if (cur->bc_flags & XFS_BTREE_LONG_PTRS) | ||
598 | return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO); | ||
599 | else | ||
600 | return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK); | ||
601 | } | ||
602 | |||
603 | /* | ||
604 | * Change the cursor to point to the first record at the given level. | ||
605 | * Other levels are unaffected. | ||
606 | */ | ||
607 | STATIC int /* success=1, failure=0 */ | ||
608 | xfs_btree_firstrec( | ||
609 | xfs_btree_cur_t *cur, /* btree cursor */ | ||
610 | int level) /* level to change */ | ||
611 | { | ||
612 | struct xfs_btree_block *block; /* generic btree block pointer */ | ||
613 | xfs_buf_t *bp; /* buffer containing block */ | ||
614 | |||
615 | /* | ||
616 | * Get the block pointer for this level. | ||
617 | */ | ||
618 | block = xfs_btree_get_block(cur, level, &bp); | ||
619 | xfs_btree_check_block(cur, block, level, bp); | ||
620 | /* | ||
621 | * It's empty, there is no such record. | ||
622 | */ | ||
623 | if (!block->bb_numrecs) | ||
624 | return 0; | ||
625 | /* | ||
626 | * Set the ptr value to 1, that's the first record/key. | ||
627 | */ | ||
628 | cur->bc_ptrs[level] = 1; | ||
629 | return 1; | ||
630 | } | ||
631 | |||
632 | /* | ||
633 | * Change the cursor to point to the last record in the current block | ||
634 | * at the given level. Other levels are unaffected. | ||
635 | */ | ||
636 | STATIC int /* success=1, failure=0 */ | ||
637 | xfs_btree_lastrec( | ||
638 | xfs_btree_cur_t *cur, /* btree cursor */ | ||
639 | int level) /* level to change */ | ||
640 | { | ||
641 | struct xfs_btree_block *block; /* generic btree block pointer */ | ||
642 | xfs_buf_t *bp; /* buffer containing block */ | ||
643 | |||
644 | /* | ||
645 | * Get the block pointer for this level. | ||
646 | */ | ||
647 | block = xfs_btree_get_block(cur, level, &bp); | ||
648 | xfs_btree_check_block(cur, block, level, bp); | ||
649 | /* | ||
650 | * It's empty, there is no such record. | ||
651 | */ | ||
652 | if (!block->bb_numrecs) | ||
653 | return 0; | ||
654 | /* | ||
655 | * Set the ptr value to numrecs, that's the last record/key. | ||
656 | */ | ||
657 | cur->bc_ptrs[level] = be16_to_cpu(block->bb_numrecs); | ||
658 | return 1; | ||
659 | } | ||
660 | |||
661 | /* | ||
662 | * Compute first and last byte offsets for the fields given. | ||
663 | * Interprets the offsets table, which contains struct field offsets. | ||
664 | */ | ||
665 | void | ||
666 | xfs_btree_offsets( | ||
667 | __int64_t fields, /* bitmask of fields */ | ||
668 | const short *offsets, /* table of field offsets */ | ||
669 | int nbits, /* number of bits to inspect */ | ||
670 | int *first, /* output: first byte offset */ | ||
671 | int *last) /* output: last byte offset */ | ||
672 | { | ||
673 | int i; /* current bit number */ | ||
674 | __int64_t imask; /* mask for current bit number */ | ||
675 | |||
676 | ASSERT(fields != 0); | ||
677 | /* | ||
678 | * Find the lowest bit, so the first byte offset. | ||
679 | */ | ||
680 | for (i = 0, imask = 1LL; ; i++, imask <<= 1) { | ||
681 | if (imask & fields) { | ||
682 | *first = offsets[i]; | ||
683 | break; | ||
684 | } | ||
685 | } | ||
686 | /* | ||
687 | * Find the highest bit, so the last byte offset. | ||
688 | */ | ||
689 | for (i = nbits - 1, imask = 1LL << i; ; i--, imask >>= 1) { | ||
690 | if (imask & fields) { | ||
691 | *last = offsets[i + 1] - 1; | ||
692 | break; | ||
693 | } | ||
694 | } | ||
695 | } | ||
696 | |||
697 | /* | ||
698 | * Get a buffer for the block, return it read in. | ||
699 | * Long-form addressing. | ||
700 | */ | ||
701 | int | ||
702 | xfs_btree_read_bufl( | ||
703 | struct xfs_mount *mp, /* file system mount point */ | ||
704 | struct xfs_trans *tp, /* transaction pointer */ | ||
705 | xfs_fsblock_t fsbno, /* file system block number */ | ||
706 | uint lock, /* lock flags for read_buf */ | ||
707 | struct xfs_buf **bpp, /* buffer for fsbno */ | ||
708 | int refval, /* ref count value for buffer */ | ||
709 | const struct xfs_buf_ops *ops) | ||
710 | { | ||
711 | struct xfs_buf *bp; /* return value */ | ||
712 | xfs_daddr_t d; /* real disk block address */ | ||
713 | int error; | ||
714 | |||
715 | ASSERT(fsbno != NULLFSBLOCK); | ||
716 | d = XFS_FSB_TO_DADDR(mp, fsbno); | ||
717 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, | ||
718 | mp->m_bsize, lock, &bp, ops); | ||
719 | if (error) | ||
720 | return error; | ||
721 | if (bp) | ||
722 | xfs_buf_set_ref(bp, refval); | ||
723 | *bpp = bp; | ||
724 | return 0; | ||
725 | } | ||
726 | |||
727 | /* | ||
728 | * Read-ahead the block, don't wait for it, don't return a buffer. | ||
729 | * Long-form addressing. | ||
730 | */ | ||
731 | /* ARGSUSED */ | ||
732 | void | ||
733 | xfs_btree_reada_bufl( | ||
734 | struct xfs_mount *mp, /* file system mount point */ | ||
735 | xfs_fsblock_t fsbno, /* file system block number */ | ||
736 | xfs_extlen_t count, /* count of filesystem blocks */ | ||
737 | const struct xfs_buf_ops *ops) | ||
738 | { | ||
739 | xfs_daddr_t d; | ||
740 | |||
741 | ASSERT(fsbno != NULLFSBLOCK); | ||
742 | d = XFS_FSB_TO_DADDR(mp, fsbno); | ||
743 | xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops); | ||
744 | } | ||
745 | |||
746 | /* | ||
747 | * Read-ahead the block, don't wait for it, don't return a buffer. | ||
748 | * Short-form addressing. | ||
749 | */ | ||
750 | /* ARGSUSED */ | ||
751 | void | ||
752 | xfs_btree_reada_bufs( | ||
753 | struct xfs_mount *mp, /* file system mount point */ | ||
754 | xfs_agnumber_t agno, /* allocation group number */ | ||
755 | xfs_agblock_t agbno, /* allocation group block number */ | ||
756 | xfs_extlen_t count, /* count of filesystem blocks */ | ||
757 | const struct xfs_buf_ops *ops) | ||
758 | { | ||
759 | xfs_daddr_t d; | ||
760 | |||
761 | ASSERT(agno != NULLAGNUMBER); | ||
762 | ASSERT(agbno != NULLAGBLOCK); | ||
763 | d = XFS_AGB_TO_DADDR(mp, agno, agbno); | ||
764 | xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops); | ||
765 | } | ||
766 | |||
767 | STATIC int | ||
768 | xfs_btree_readahead_lblock( | ||
769 | struct xfs_btree_cur *cur, | ||
770 | int lr, | ||
771 | struct xfs_btree_block *block) | ||
772 | { | ||
773 | int rval = 0; | ||
774 | xfs_dfsbno_t left = be64_to_cpu(block->bb_u.l.bb_leftsib); | ||
775 | xfs_dfsbno_t right = be64_to_cpu(block->bb_u.l.bb_rightsib); | ||
776 | |||
777 | if ((lr & XFS_BTCUR_LEFTRA) && left != NULLDFSBNO) { | ||
778 | xfs_btree_reada_bufl(cur->bc_mp, left, 1, | ||
779 | cur->bc_ops->buf_ops); | ||
780 | rval++; | ||
781 | } | ||
782 | |||
783 | if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLDFSBNO) { | ||
784 | xfs_btree_reada_bufl(cur->bc_mp, right, 1, | ||
785 | cur->bc_ops->buf_ops); | ||
786 | rval++; | ||
787 | } | ||
788 | |||
789 | return rval; | ||
790 | } | ||
791 | |||
792 | STATIC int | ||
793 | xfs_btree_readahead_sblock( | ||
794 | struct xfs_btree_cur *cur, | ||
795 | int lr, | ||
796 | struct xfs_btree_block *block) | ||
797 | { | ||
798 | int rval = 0; | ||
799 | xfs_agblock_t left = be32_to_cpu(block->bb_u.s.bb_leftsib); | ||
800 | xfs_agblock_t right = be32_to_cpu(block->bb_u.s.bb_rightsib); | ||
801 | |||
802 | |||
803 | if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) { | ||
804 | xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, | ||
805 | left, 1, cur->bc_ops->buf_ops); | ||
806 | rval++; | ||
807 | } | ||
808 | |||
809 | if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) { | ||
810 | xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, | ||
811 | right, 1, cur->bc_ops->buf_ops); | ||
812 | rval++; | ||
813 | } | ||
814 | |||
815 | return rval; | ||
816 | } | ||
817 | |||
818 | /* | ||
819 | * Read-ahead btree blocks, at the given level. | ||
820 | * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA. | ||
821 | */ | ||
822 | STATIC int | ||
823 | xfs_btree_readahead( | ||
824 | struct xfs_btree_cur *cur, /* btree cursor */ | ||
825 | int lev, /* level in btree */ | ||
826 | int lr) /* left/right bits */ | ||
827 | { | ||
828 | struct xfs_btree_block *block; | ||
829 | |||
830 | /* | ||
831 | * No readahead needed if we are at the root level and the | ||
832 | * btree root is stored in the inode. | ||
833 | */ | ||
834 | if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && | ||
835 | (lev == cur->bc_nlevels - 1)) | ||
836 | return 0; | ||
837 | |||
838 | if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev]) | ||
839 | return 0; | ||
840 | |||
841 | cur->bc_ra[lev] |= lr; | ||
842 | block = XFS_BUF_TO_BLOCK(cur->bc_bufs[lev]); | ||
843 | |||
844 | if (cur->bc_flags & XFS_BTREE_LONG_PTRS) | ||
845 | return xfs_btree_readahead_lblock(cur, lr, block); | ||
846 | return xfs_btree_readahead_sblock(cur, lr, block); | ||
847 | } | ||
848 | |||
849 | STATIC xfs_daddr_t | ||
850 | xfs_btree_ptr_to_daddr( | ||
851 | struct xfs_btree_cur *cur, | ||
852 | union xfs_btree_ptr *ptr) | ||
853 | { | ||
854 | if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { | ||
855 | ASSERT(ptr->l != cpu_to_be64(NULLDFSBNO)); | ||
856 | |||
857 | return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l)); | ||
858 | } else { | ||
859 | ASSERT(cur->bc_private.a.agno != NULLAGNUMBER); | ||
860 | ASSERT(ptr->s != cpu_to_be32(NULLAGBLOCK)); | ||
861 | |||
862 | return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno, | ||
863 | be32_to_cpu(ptr->s)); | ||
864 | } | ||
865 | } | ||
866 | |||
867 | /* | ||
868 | * Readahead @count btree blocks at the given @ptr location. | ||
869 | * | ||
870 | * We don't need to care about long or short form btrees here as we have a | ||
871 | * method of converting the ptr directly to a daddr available to us. | ||
872 | */ | ||
873 | STATIC void | ||
874 | xfs_btree_readahead_ptr( | ||
875 | struct xfs_btree_cur *cur, | ||
876 | union xfs_btree_ptr *ptr, | ||
877 | xfs_extlen_t count) | ||
878 | { | ||
879 | xfs_buf_readahead(cur->bc_mp->m_ddev_targp, | ||
880 | xfs_btree_ptr_to_daddr(cur, ptr), | ||
881 | cur->bc_mp->m_bsize * count, cur->bc_ops->buf_ops); | ||
882 | } | ||
883 | |||
884 | /* | ||
885 | * Set the buffer for level "lev" in the cursor to bp, releasing | ||
886 | * any previous buffer. | ||
887 | */ | ||
888 | STATIC void | ||
889 | xfs_btree_setbuf( | ||
890 | xfs_btree_cur_t *cur, /* btree cursor */ | ||
891 | int lev, /* level in btree */ | ||
892 | xfs_buf_t *bp) /* new buffer to set */ | ||
893 | { | ||
894 | struct xfs_btree_block *b; /* btree block */ | ||
895 | |||
896 | if (cur->bc_bufs[lev]) | ||
897 | xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[lev]); | ||
898 | cur->bc_bufs[lev] = bp; | ||
899 | cur->bc_ra[lev] = 0; | ||
900 | |||
901 | b = XFS_BUF_TO_BLOCK(bp); | ||
902 | if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { | ||
903 | if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO)) | ||
904 | cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA; | ||
905 | if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO)) | ||
906 | cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA; | ||
907 | } else { | ||
908 | if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK)) | ||
909 | cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA; | ||
910 | if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK)) | ||
911 | cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA; | ||
912 | } | ||
913 | } | ||
914 | |||
915 | STATIC int | ||
916 | xfs_btree_ptr_is_null( | ||
917 | struct xfs_btree_cur *cur, | ||
918 | union xfs_btree_ptr *ptr) | ||
919 | { | ||
920 | if (cur->bc_flags & XFS_BTREE_LONG_PTRS) | ||
921 | return ptr->l == cpu_to_be64(NULLDFSBNO); | ||
922 | else | ||
923 | return ptr->s == cpu_to_be32(NULLAGBLOCK); | ||
924 | } | ||
925 | |||
926 | STATIC void | ||
927 | xfs_btree_set_ptr_null( | ||
928 | struct xfs_btree_cur *cur, | ||
929 | union xfs_btree_ptr *ptr) | ||
930 | { | ||
931 | if (cur->bc_flags & XFS_BTREE_LONG_PTRS) | ||
932 | ptr->l = cpu_to_be64(NULLDFSBNO); | ||
933 | else | ||
934 | ptr->s = cpu_to_be32(NULLAGBLOCK); | ||
935 | } | ||
936 | |||
937 | /* | ||
938 | * Get/set/init sibling pointers | ||
939 | */ | ||
940 | STATIC void | ||
941 | xfs_btree_get_sibling( | ||
942 | struct xfs_btree_cur *cur, | ||
943 | struct xfs_btree_block *block, | ||
944 | union xfs_btree_ptr *ptr, | ||
945 | int lr) | ||
946 | { | ||
947 | ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB); | ||
948 | |||
949 | if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { | ||
950 | if (lr == XFS_BB_RIGHTSIB) | ||
951 | ptr->l = block->bb_u.l.bb_rightsib; | ||
952 | else | ||
953 | ptr->l = block->bb_u.l.bb_leftsib; | ||
954 | } else { | ||
955 | if (lr == XFS_BB_RIGHTSIB) | ||
956 | ptr->s = block->bb_u.s.bb_rightsib; | ||
957 | else | ||
958 | ptr->s = block->bb_u.s.bb_leftsib; | ||
959 | } | ||
960 | } | ||
961 | |||
962 | STATIC void | ||
963 | xfs_btree_set_sibling( | ||
964 | struct xfs_btree_cur *cur, | ||
965 | struct xfs_btree_block *block, | ||
966 | union xfs_btree_ptr *ptr, | ||
967 | int lr) | ||
968 | { | ||
969 | ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB); | ||
970 | |||
971 | if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { | ||
972 | if (lr == XFS_BB_RIGHTSIB) | ||
973 | block->bb_u.l.bb_rightsib = ptr->l; | ||
974 | else | ||
975 | block->bb_u.l.bb_leftsib = ptr->l; | ||
976 | } else { | ||
977 | if (lr == XFS_BB_RIGHTSIB) | ||
978 | block->bb_u.s.bb_rightsib = ptr->s; | ||
979 | else | ||
980 | block->bb_u.s.bb_leftsib = ptr->s; | ||
981 | } | ||
982 | } | ||
983 | |||
984 | void | ||
985 | xfs_btree_init_block_int( | ||
986 | struct xfs_mount *mp, | ||
987 | struct xfs_btree_block *buf, | ||
988 | xfs_daddr_t blkno, | ||
989 | __u32 magic, | ||
990 | __u16 level, | ||
991 | __u16 numrecs, | ||
992 | __u64 owner, | ||
993 | unsigned int flags) | ||
994 | { | ||
995 | buf->bb_magic = cpu_to_be32(magic); | ||
996 | buf->bb_level = cpu_to_be16(level); | ||
997 | buf->bb_numrecs = cpu_to_be16(numrecs); | ||
998 | |||
999 | if (flags & XFS_BTREE_LONG_PTRS) { | ||
1000 | buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO); | ||
1001 | buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO); | ||
1002 | if (flags & XFS_BTREE_CRC_BLOCKS) { | ||
1003 | buf->bb_u.l.bb_blkno = cpu_to_be64(blkno); | ||
1004 | buf->bb_u.l.bb_owner = cpu_to_be64(owner); | ||
1005 | uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid); | ||
1006 | buf->bb_u.l.bb_pad = 0; | ||
1007 | buf->bb_u.l.bb_lsn = 0; | ||
1008 | } | ||
1009 | } else { | ||
1010 | /* owner is a 32 bit value on short blocks */ | ||
1011 | __u32 __owner = (__u32)owner; | ||
1012 | |||
1013 | buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK); | ||
1014 | buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK); | ||
1015 | if (flags & XFS_BTREE_CRC_BLOCKS) { | ||
1016 | buf->bb_u.s.bb_blkno = cpu_to_be64(blkno); | ||
1017 | buf->bb_u.s.bb_owner = cpu_to_be32(__owner); | ||
1018 | uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid); | ||
1019 | buf->bb_u.s.bb_lsn = 0; | ||
1020 | } | ||
1021 | } | ||
1022 | } | ||
1023 | |||
1024 | void | ||
1025 | xfs_btree_init_block( | ||
1026 | struct xfs_mount *mp, | ||
1027 | struct xfs_buf *bp, | ||
1028 | __u32 magic, | ||
1029 | __u16 level, | ||
1030 | __u16 numrecs, | ||
1031 | __u64 owner, | ||
1032 | unsigned int flags) | ||
1033 | { | ||
1034 | xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn, | ||
1035 | magic, level, numrecs, owner, flags); | ||
1036 | } | ||
1037 | |||
1038 | STATIC void | ||
1039 | xfs_btree_init_block_cur( | ||
1040 | struct xfs_btree_cur *cur, | ||
1041 | struct xfs_buf *bp, | ||
1042 | int level, | ||
1043 | int numrecs) | ||
1044 | { | ||
1045 | __u64 owner; | ||
1046 | |||
1047 | /* | ||
1048 | * we can pull the owner from the cursor right now as the different | ||
1049 | * owners align directly with the pointer size of the btree. This may | ||
1050 | * change in future, but is safe for current users of the generic btree | ||
1051 | * code. | ||
1052 | */ | ||
1053 | if (cur->bc_flags & XFS_BTREE_LONG_PTRS) | ||
1054 | owner = cur->bc_private.b.ip->i_ino; | ||
1055 | else | ||
1056 | owner = cur->bc_private.a.agno; | ||
1057 | |||
1058 | xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn, | ||
1059 | xfs_btree_magic(cur), level, numrecs, | ||
1060 | owner, cur->bc_flags); | ||
1061 | } | ||
1062 | |||
1063 | /* | ||
1064 | * Return true if ptr is the last record in the btree and | ||
1065 | * we need to track updates to this record. The decision | ||
1066 | * will be further refined in the update_lastrec method. | ||
1067 | */ | ||
1068 | STATIC int | ||
1069 | xfs_btree_is_lastrec( | ||
1070 | struct xfs_btree_cur *cur, | ||
1071 | struct xfs_btree_block *block, | ||
1072 | int level) | ||
1073 | { | ||
1074 | union xfs_btree_ptr ptr; | ||
1075 | |||
1076 | if (level > 0) | ||
1077 | return 0; | ||
1078 | if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE)) | ||
1079 | return 0; | ||
1080 | |||
1081 | xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); | ||
1082 | if (!xfs_btree_ptr_is_null(cur, &ptr)) | ||
1083 | return 0; | ||
1084 | return 1; | ||
1085 | } | ||
1086 | |||
1087 | STATIC void | ||
1088 | xfs_btree_buf_to_ptr( | ||
1089 | struct xfs_btree_cur *cur, | ||
1090 | struct xfs_buf *bp, | ||
1091 | union xfs_btree_ptr *ptr) | ||
1092 | { | ||
1093 | if (cur->bc_flags & XFS_BTREE_LONG_PTRS) | ||
1094 | ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp, | ||
1095 | XFS_BUF_ADDR(bp))); | ||
1096 | else { | ||
1097 | ptr->s = cpu_to_be32(xfs_daddr_to_agbno(cur->bc_mp, | ||
1098 | XFS_BUF_ADDR(bp))); | ||
1099 | } | ||
1100 | } | ||
1101 | |||
1102 | STATIC void | ||
1103 | xfs_btree_set_refs( | ||
1104 | struct xfs_btree_cur *cur, | ||
1105 | struct xfs_buf *bp) | ||
1106 | { | ||
1107 | switch (cur->bc_btnum) { | ||
1108 | case XFS_BTNUM_BNO: | ||
1109 | case XFS_BTNUM_CNT: | ||
1110 | xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF); | ||
1111 | break; | ||
1112 | case XFS_BTNUM_INO: | ||
1113 | case XFS_BTNUM_FINO: | ||
1114 | xfs_buf_set_ref(bp, XFS_INO_BTREE_REF); | ||
1115 | break; | ||
1116 | case XFS_BTNUM_BMAP: | ||
1117 | xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF); | ||
1118 | break; | ||
1119 | default: | ||
1120 | ASSERT(0); | ||
1121 | } | ||
1122 | } | ||
1123 | |||
1124 | STATIC int | ||
1125 | xfs_btree_get_buf_block( | ||
1126 | struct xfs_btree_cur *cur, | ||
1127 | union xfs_btree_ptr *ptr, | ||
1128 | int flags, | ||
1129 | struct xfs_btree_block **block, | ||
1130 | struct xfs_buf **bpp) | ||
1131 | { | ||
1132 | struct xfs_mount *mp = cur->bc_mp; | ||
1133 | xfs_daddr_t d; | ||
1134 | |||
1135 | /* need to sort out how callers deal with failures first */ | ||
1136 | ASSERT(!(flags & XBF_TRYLOCK)); | ||
1137 | |||
1138 | d = xfs_btree_ptr_to_daddr(cur, ptr); | ||
1139 | *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d, | ||
1140 | mp->m_bsize, flags); | ||
1141 | |||
1142 | if (!*bpp) | ||
1143 | return -ENOMEM; | ||
1144 | |||
1145 | (*bpp)->b_ops = cur->bc_ops->buf_ops; | ||
1146 | *block = XFS_BUF_TO_BLOCK(*bpp); | ||
1147 | return 0; | ||
1148 | } | ||
1149 | |||
1150 | /* | ||
1151 | * Read in the buffer at the given ptr and return the buffer and | ||
1152 | * the block pointer within the buffer. | ||
1153 | */ | ||
1154 | STATIC int | ||
1155 | xfs_btree_read_buf_block( | ||
1156 | struct xfs_btree_cur *cur, | ||
1157 | union xfs_btree_ptr *ptr, | ||
1158 | int flags, | ||
1159 | struct xfs_btree_block **block, | ||
1160 | struct xfs_buf **bpp) | ||
1161 | { | ||
1162 | struct xfs_mount *mp = cur->bc_mp; | ||
1163 | xfs_daddr_t d; | ||
1164 | int error; | ||
1165 | |||
1166 | /* need to sort out how callers deal with failures first */ | ||
1167 | ASSERT(!(flags & XBF_TRYLOCK)); | ||
1168 | |||
1169 | d = xfs_btree_ptr_to_daddr(cur, ptr); | ||
1170 | error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d, | ||
1171 | mp->m_bsize, flags, bpp, | ||
1172 | cur->bc_ops->buf_ops); | ||
1173 | if (error) | ||
1174 | return error; | ||
1175 | |||
1176 | xfs_btree_set_refs(cur, *bpp); | ||
1177 | *block = XFS_BUF_TO_BLOCK(*bpp); | ||
1178 | return 0; | ||
1179 | } | ||
1180 | |||
1181 | /* | ||
1182 | * Copy keys from one btree block to another. | ||
1183 | */ | ||
1184 | STATIC void | ||
1185 | xfs_btree_copy_keys( | ||
1186 | struct xfs_btree_cur *cur, | ||
1187 | union xfs_btree_key *dst_key, | ||
1188 | union xfs_btree_key *src_key, | ||
1189 | int numkeys) | ||
1190 | { | ||
1191 | ASSERT(numkeys >= 0); | ||
1192 | memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len); | ||
1193 | } | ||
1194 | |||
1195 | /* | ||
1196 | * Copy records from one btree block to another. | ||
1197 | */ | ||
1198 | STATIC void | ||
1199 | xfs_btree_copy_recs( | ||
1200 | struct xfs_btree_cur *cur, | ||
1201 | union xfs_btree_rec *dst_rec, | ||
1202 | union xfs_btree_rec *src_rec, | ||
1203 | int numrecs) | ||
1204 | { | ||
1205 | ASSERT(numrecs >= 0); | ||
1206 | memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len); | ||
1207 | } | ||
1208 | |||
1209 | /* | ||
1210 | * Copy block pointers from one btree block to another. | ||
1211 | */ | ||
1212 | STATIC void | ||
1213 | xfs_btree_copy_ptrs( | ||
1214 | struct xfs_btree_cur *cur, | ||
1215 | union xfs_btree_ptr *dst_ptr, | ||
1216 | union xfs_btree_ptr *src_ptr, | ||
1217 | int numptrs) | ||
1218 | { | ||
1219 | ASSERT(numptrs >= 0); | ||
1220 | memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur)); | ||
1221 | } | ||
1222 | |||
1223 | /* | ||
1224 | * Shift keys one index left/right inside a single btree block. | ||
1225 | */ | ||
1226 | STATIC void | ||
1227 | xfs_btree_shift_keys( | ||
1228 | struct xfs_btree_cur *cur, | ||
1229 | union xfs_btree_key *key, | ||
1230 | int dir, | ||
1231 | int numkeys) | ||
1232 | { | ||
1233 | char *dst_key; | ||
1234 | |||
1235 | ASSERT(numkeys >= 0); | ||
1236 | ASSERT(dir == 1 || dir == -1); | ||
1237 | |||
1238 | dst_key = (char *)key + (dir * cur->bc_ops->key_len); | ||
1239 | memmove(dst_key, key, numkeys * cur->bc_ops->key_len); | ||
1240 | } | ||
1241 | |||
1242 | /* | ||
1243 | * Shift records one index left/right inside a single btree block. | ||
1244 | */ | ||
1245 | STATIC void | ||
1246 | xfs_btree_shift_recs( | ||
1247 | struct xfs_btree_cur *cur, | ||
1248 | union xfs_btree_rec *rec, | ||
1249 | int dir, | ||
1250 | int numrecs) | ||
1251 | { | ||
1252 | char *dst_rec; | ||
1253 | |||
1254 | ASSERT(numrecs >= 0); | ||
1255 | ASSERT(dir == 1 || dir == -1); | ||
1256 | |||
1257 | dst_rec = (char *)rec + (dir * cur->bc_ops->rec_len); | ||
1258 | memmove(dst_rec, rec, numrecs * cur->bc_ops->rec_len); | ||
1259 | } | ||
1260 | |||
1261 | /* | ||
1262 | * Shift block pointers one index left/right inside a single btree block. | ||
1263 | */ | ||
1264 | STATIC void | ||
1265 | xfs_btree_shift_ptrs( | ||
1266 | struct xfs_btree_cur *cur, | ||
1267 | union xfs_btree_ptr *ptr, | ||
1268 | int dir, | ||
1269 | int numptrs) | ||
1270 | { | ||
1271 | char *dst_ptr; | ||
1272 | |||
1273 | ASSERT(numptrs >= 0); | ||
1274 | ASSERT(dir == 1 || dir == -1); | ||
1275 | |||
1276 | dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur)); | ||
1277 | memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur)); | ||
1278 | } | ||
1279 | |||
1280 | /* | ||
1281 | * Log key values from the btree block. | ||
1282 | */ | ||
1283 | STATIC void | ||
1284 | xfs_btree_log_keys( | ||
1285 | struct xfs_btree_cur *cur, | ||
1286 | struct xfs_buf *bp, | ||
1287 | int first, | ||
1288 | int last) | ||
1289 | { | ||
1290 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); | ||
1291 | XFS_BTREE_TRACE_ARGBII(cur, bp, first, last); | ||
1292 | |||
1293 | if (bp) { | ||
1294 | xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); | ||
1295 | xfs_trans_log_buf(cur->bc_tp, bp, | ||
1296 | xfs_btree_key_offset(cur, first), | ||
1297 | xfs_btree_key_offset(cur, last + 1) - 1); | ||
1298 | } else { | ||
1299 | xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, | ||
1300 | xfs_ilog_fbroot(cur->bc_private.b.whichfork)); | ||
1301 | } | ||
1302 | |||
1303 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | ||
1304 | } | ||
1305 | |||
1306 | /* | ||
1307 | * Log record values from the btree block. | ||
1308 | */ | ||
1309 | void | ||
1310 | xfs_btree_log_recs( | ||
1311 | struct xfs_btree_cur *cur, | ||
1312 | struct xfs_buf *bp, | ||
1313 | int first, | ||
1314 | int last) | ||
1315 | { | ||
1316 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); | ||
1317 | XFS_BTREE_TRACE_ARGBII(cur, bp, first, last); | ||
1318 | |||
1319 | xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); | ||
1320 | xfs_trans_log_buf(cur->bc_tp, bp, | ||
1321 | xfs_btree_rec_offset(cur, first), | ||
1322 | xfs_btree_rec_offset(cur, last + 1) - 1); | ||
1323 | |||
1324 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | ||
1325 | } | ||
1326 | |||
1327 | /* | ||
1328 | * Log block pointer fields from a btree block (nonleaf). | ||
1329 | */ | ||
1330 | STATIC void | ||
1331 | xfs_btree_log_ptrs( | ||
1332 | struct xfs_btree_cur *cur, /* btree cursor */ | ||
1333 | struct xfs_buf *bp, /* buffer containing btree block */ | ||
1334 | int first, /* index of first pointer to log */ | ||
1335 | int last) /* index of last pointer to log */ | ||
1336 | { | ||
1337 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); | ||
1338 | XFS_BTREE_TRACE_ARGBII(cur, bp, first, last); | ||
1339 | |||
1340 | if (bp) { | ||
1341 | struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); | ||
1342 | int level = xfs_btree_get_level(block); | ||
1343 | |||
1344 | xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); | ||
1345 | xfs_trans_log_buf(cur->bc_tp, bp, | ||
1346 | xfs_btree_ptr_offset(cur, first, level), | ||
1347 | xfs_btree_ptr_offset(cur, last + 1, level) - 1); | ||
1348 | } else { | ||
1349 | xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, | ||
1350 | xfs_ilog_fbroot(cur->bc_private.b.whichfork)); | ||
1351 | } | ||
1352 | |||
1353 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | ||
1354 | } | ||
1355 | |||
1356 | /* | ||
1357 | * Log fields from a btree block header. | ||
1358 | */ | ||
1359 | void | ||
1360 | xfs_btree_log_block( | ||
1361 | struct xfs_btree_cur *cur, /* btree cursor */ | ||
1362 | struct xfs_buf *bp, /* buffer containing btree block */ | ||
1363 | int fields) /* mask of fields: XFS_BB_... */ | ||
1364 | { | ||
1365 | int first; /* first byte offset logged */ | ||
1366 | int last; /* last byte offset logged */ | ||
1367 | static const short soffsets[] = { /* table of offsets (short) */ | ||
1368 | offsetof(struct xfs_btree_block, bb_magic), | ||
1369 | offsetof(struct xfs_btree_block, bb_level), | ||
1370 | offsetof(struct xfs_btree_block, bb_numrecs), | ||
1371 | offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib), | ||
1372 | offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib), | ||
1373 | offsetof(struct xfs_btree_block, bb_u.s.bb_blkno), | ||
1374 | offsetof(struct xfs_btree_block, bb_u.s.bb_lsn), | ||
1375 | offsetof(struct xfs_btree_block, bb_u.s.bb_uuid), | ||
1376 | offsetof(struct xfs_btree_block, bb_u.s.bb_owner), | ||
1377 | offsetof(struct xfs_btree_block, bb_u.s.bb_crc), | ||
1378 | XFS_BTREE_SBLOCK_CRC_LEN | ||
1379 | }; | ||
1380 | static const short loffsets[] = { /* table of offsets (long) */ | ||
1381 | offsetof(struct xfs_btree_block, bb_magic), | ||
1382 | offsetof(struct xfs_btree_block, bb_level), | ||
1383 | offsetof(struct xfs_btree_block, bb_numrecs), | ||
1384 | offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib), | ||
1385 | offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib), | ||
1386 | offsetof(struct xfs_btree_block, bb_u.l.bb_blkno), | ||
1387 | offsetof(struct xfs_btree_block, bb_u.l.bb_lsn), | ||
1388 | offsetof(struct xfs_btree_block, bb_u.l.bb_uuid), | ||
1389 | offsetof(struct xfs_btree_block, bb_u.l.bb_owner), | ||
1390 | offsetof(struct xfs_btree_block, bb_u.l.bb_crc), | ||
1391 | offsetof(struct xfs_btree_block, bb_u.l.bb_pad), | ||
1392 | XFS_BTREE_LBLOCK_CRC_LEN | ||
1393 | }; | ||
1394 | |||
1395 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); | ||
1396 | XFS_BTREE_TRACE_ARGBI(cur, bp, fields); | ||
1397 | |||
1398 | if (bp) { | ||
1399 | int nbits; | ||
1400 | |||
1401 | if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) { | ||
1402 | /* | ||
1403 | * We don't log the CRC when updating a btree | ||
1404 | * block but instead recreate it during log | ||
1405 | * recovery. As the log buffers have checksums | ||
1406 | * of their own this is safe and avoids logging a crc | ||
1407 | * update in a lot of places. | ||
1408 | */ | ||
1409 | if (fields == XFS_BB_ALL_BITS) | ||
1410 | fields = XFS_BB_ALL_BITS_CRC; | ||
1411 | nbits = XFS_BB_NUM_BITS_CRC; | ||
1412 | } else { | ||
1413 | nbits = XFS_BB_NUM_BITS; | ||
1414 | } | ||
1415 | xfs_btree_offsets(fields, | ||
1416 | (cur->bc_flags & XFS_BTREE_LONG_PTRS) ? | ||
1417 | loffsets : soffsets, | ||
1418 | nbits, &first, &last); | ||
1419 | xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); | ||
1420 | xfs_trans_log_buf(cur->bc_tp, bp, first, last); | ||
1421 | } else { | ||
1422 | xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, | ||
1423 | xfs_ilog_fbroot(cur->bc_private.b.whichfork)); | ||
1424 | } | ||
1425 | |||
1426 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | ||
1427 | } | ||
1428 | |||
1429 | /* | ||
1430 | * Increment cursor by one record at the level. | ||
1431 | * For nonzero levels the leaf-ward information is untouched. | ||
1432 | */ | ||
1433 | int /* error */ | ||
1434 | xfs_btree_increment( | ||
1435 | struct xfs_btree_cur *cur, | ||
1436 | int level, | ||
1437 | int *stat) /* success/failure */ | ||
1438 | { | ||
1439 | struct xfs_btree_block *block; | ||
1440 | union xfs_btree_ptr ptr; | ||
1441 | struct xfs_buf *bp; | ||
1442 | int error; /* error return value */ | ||
1443 | int lev; | ||
1444 | |||
1445 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); | ||
1446 | XFS_BTREE_TRACE_ARGI(cur, level); | ||
1447 | |||
1448 | ASSERT(level < cur->bc_nlevels); | ||
1449 | |||
1450 | /* Read-ahead to the right at this level. */ | ||
1451 | xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA); | ||
1452 | |||
1453 | /* Get a pointer to the btree block. */ | ||
1454 | block = xfs_btree_get_block(cur, level, &bp); | ||
1455 | |||
1456 | #ifdef DEBUG | ||
1457 | error = xfs_btree_check_block(cur, block, level, bp); | ||
1458 | if (error) | ||
1459 | goto error0; | ||
1460 | #endif | ||
1461 | |||
1462 | /* We're done if we remain in the block after the increment. */ | ||
1463 | if (++cur->bc_ptrs[level] <= xfs_btree_get_numrecs(block)) | ||
1464 | goto out1; | ||
1465 | |||
1466 | /* Fail if we just went off the right edge of the tree. */ | ||
1467 | xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); | ||
1468 | if (xfs_btree_ptr_is_null(cur, &ptr)) | ||
1469 | goto out0; | ||
1470 | |||
1471 | XFS_BTREE_STATS_INC(cur, increment); | ||
1472 | |||
1473 | /* | ||
1474 | * March up the tree incrementing pointers. | ||
1475 | * Stop when we don't go off the right edge of a block. | ||
1476 | */ | ||
1477 | for (lev = level + 1; lev < cur->bc_nlevels; lev++) { | ||
1478 | block = xfs_btree_get_block(cur, lev, &bp); | ||
1479 | |||
1480 | #ifdef DEBUG | ||
1481 | error = xfs_btree_check_block(cur, block, lev, bp); | ||
1482 | if (error) | ||
1483 | goto error0; | ||
1484 | #endif | ||
1485 | |||
1486 | if (++cur->bc_ptrs[lev] <= xfs_btree_get_numrecs(block)) | ||
1487 | break; | ||
1488 | |||
1489 | /* Read-ahead the right block for the next loop. */ | ||
1490 | xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA); | ||
1491 | } | ||
1492 | |||
1493 | /* | ||
1494 | * If we went off the root then we are either seriously | ||
1495 | * confused or have the tree root in an inode. | ||
1496 | */ | ||
1497 | if (lev == cur->bc_nlevels) { | ||
1498 | if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) | ||
1499 | goto out0; | ||
1500 | ASSERT(0); | ||
1501 | error = -EFSCORRUPTED; | ||
1502 | goto error0; | ||
1503 | } | ||
1504 | ASSERT(lev < cur->bc_nlevels); | ||
1505 | |||
1506 | /* | ||
1507 | * Now walk back down the tree, fixing up the cursor's buffer | ||
1508 | * pointers and key numbers. | ||
1509 | */ | ||
1510 | for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) { | ||
1511 | union xfs_btree_ptr *ptrp; | ||
1512 | |||
1513 | ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block); | ||
1514 | --lev; | ||
1515 | error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp); | ||
1516 | if (error) | ||
1517 | goto error0; | ||
1518 | |||
1519 | xfs_btree_setbuf(cur, lev, bp); | ||
1520 | cur->bc_ptrs[lev] = 1; | ||
1521 | } | ||
1522 | out1: | ||
1523 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | ||
1524 | *stat = 1; | ||
1525 | return 0; | ||
1526 | |||
1527 | out0: | ||
1528 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | ||
1529 | *stat = 0; | ||
1530 | return 0; | ||
1531 | |||
1532 | error0: | ||
1533 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); | ||
1534 | return error; | ||
1535 | } | ||
1536 | |||
1537 | /* | ||
1538 | * Decrement cursor by one record at the level. | ||
1539 | * For nonzero levels the leaf-ward information is untouched. | ||
1540 | */ | ||
1541 | int /* error */ | ||
1542 | xfs_btree_decrement( | ||
1543 | struct xfs_btree_cur *cur, | ||
1544 | int level, | ||
1545 | int *stat) /* success/failure */ | ||
1546 | { | ||
1547 | struct xfs_btree_block *block; | ||
1548 | xfs_buf_t *bp; | ||
1549 | int error; /* error return value */ | ||
1550 | int lev; | ||
1551 | union xfs_btree_ptr ptr; | ||
1552 | |||
1553 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); | ||
1554 | XFS_BTREE_TRACE_ARGI(cur, level); | ||
1555 | |||
1556 | ASSERT(level < cur->bc_nlevels); | ||
1557 | |||
1558 | /* Read-ahead to the left at this level. */ | ||
1559 | xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA); | ||
1560 | |||
1561 | /* We're done if we remain in the block after the decrement. */ | ||
1562 | if (--cur->bc_ptrs[level] > 0) | ||
1563 | goto out1; | ||
1564 | |||
1565 | /* Get a pointer to the btree block. */ | ||
1566 | block = xfs_btree_get_block(cur, level, &bp); | ||
1567 | |||
1568 | #ifdef DEBUG | ||
1569 | error = xfs_btree_check_block(cur, block, level, bp); | ||
1570 | if (error) | ||
1571 | goto error0; | ||
1572 | #endif | ||
1573 | |||
1574 | /* Fail if we just went off the left edge of the tree. */ | ||
1575 | xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB); | ||
1576 | if (xfs_btree_ptr_is_null(cur, &ptr)) | ||
1577 | goto out0; | ||
1578 | |||
1579 | XFS_BTREE_STATS_INC(cur, decrement); | ||
1580 | |||
1581 | /* | ||
1582 | * March up the tree decrementing pointers. | ||
1583 | * Stop when we don't go off the left edge of a block. | ||
1584 | */ | ||
1585 | for (lev = level + 1; lev < cur->bc_nlevels; lev++) { | ||
1586 | if (--cur->bc_ptrs[lev] > 0) | ||
1587 | break; | ||
1588 | /* Read-ahead the left block for the next loop. */ | ||
1589 | xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA); | ||
1590 | } | ||
1591 | |||
1592 | /* | ||
1593 | * If we went off the root then we are seriously confused. | ||
1594 | * or the root of the tree is in an inode. | ||
1595 | */ | ||
1596 | if (lev == cur->bc_nlevels) { | ||
1597 | if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) | ||
1598 | goto out0; | ||
1599 | ASSERT(0); | ||
1600 | error = -EFSCORRUPTED; | ||
1601 | goto error0; | ||
1602 | } | ||
1603 | ASSERT(lev < cur->bc_nlevels); | ||
1604 | |||
1605 | /* | ||
1606 | * Now walk back down the tree, fixing up the cursor's buffer | ||
1607 | * pointers and key numbers. | ||
1608 | */ | ||
1609 | for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) { | ||
1610 | union xfs_btree_ptr *ptrp; | ||
1611 | |||
1612 | ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block); | ||
1613 | --lev; | ||
1614 | error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp); | ||
1615 | if (error) | ||
1616 | goto error0; | ||
1617 | xfs_btree_setbuf(cur, lev, bp); | ||
1618 | cur->bc_ptrs[lev] = xfs_btree_get_numrecs(block); | ||
1619 | } | ||
1620 | out1: | ||
1621 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | ||
1622 | *stat = 1; | ||
1623 | return 0; | ||
1624 | |||
1625 | out0: | ||
1626 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | ||
1627 | *stat = 0; | ||
1628 | return 0; | ||
1629 | |||
1630 | error0: | ||
1631 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); | ||
1632 | return error; | ||
1633 | } | ||
1634 | |||
1635 | STATIC int | ||
1636 | xfs_btree_lookup_get_block( | ||
1637 | struct xfs_btree_cur *cur, /* btree cursor */ | ||
1638 | int level, /* level in the btree */ | ||
1639 | union xfs_btree_ptr *pp, /* ptr to btree block */ | ||
1640 | struct xfs_btree_block **blkp) /* return btree block */ | ||
1641 | { | ||
1642 | struct xfs_buf *bp; /* buffer pointer for btree block */ | ||
1643 | int error = 0; | ||
1644 | |||
1645 | /* special case the root block if in an inode */ | ||
1646 | if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && | ||
1647 | (level == cur->bc_nlevels - 1)) { | ||
1648 | *blkp = xfs_btree_get_iroot(cur); | ||
1649 | return 0; | ||
1650 | } | ||
1651 | |||
1652 | /* | ||
1653 | * If the old buffer at this level for the disk address we are | ||
1654 | * looking for re-use it. | ||
1655 | * | ||
1656 | * Otherwise throw it away and get a new one. | ||
1657 | */ | ||
1658 | bp = cur->bc_bufs[level]; | ||
1659 | if (bp && XFS_BUF_ADDR(bp) == xfs_btree_ptr_to_daddr(cur, pp)) { | ||
1660 | *blkp = XFS_BUF_TO_BLOCK(bp); | ||
1661 | return 0; | ||
1662 | } | ||
1663 | |||
1664 | error = xfs_btree_read_buf_block(cur, pp, 0, blkp, &bp); | ||
1665 | if (error) | ||
1666 | return error; | ||
1667 | |||
1668 | xfs_btree_setbuf(cur, level, bp); | ||
1669 | return 0; | ||
1670 | } | ||
1671 | |||
1672 | /* | ||
1673 | * Get current search key. For level 0 we don't actually have a key | ||
1674 | * structure so we make one up from the record. For all other levels | ||
1675 | * we just return the right key. | ||
1676 | */ | ||
1677 | STATIC union xfs_btree_key * | ||
1678 | xfs_lookup_get_search_key( | ||
1679 | struct xfs_btree_cur *cur, | ||
1680 | int level, | ||
1681 | int keyno, | ||
1682 | struct xfs_btree_block *block, | ||
1683 | union xfs_btree_key *kp) | ||
1684 | { | ||
1685 | if (level == 0) { | ||
1686 | cur->bc_ops->init_key_from_rec(kp, | ||
1687 | xfs_btree_rec_addr(cur, keyno, block)); | ||
1688 | return kp; | ||
1689 | } | ||
1690 | |||
1691 | return xfs_btree_key_addr(cur, keyno, block); | ||
1692 | } | ||
1693 | |||
1694 | /* | ||
1695 | * Lookup the record. The cursor is made to point to it, based on dir. | ||
1696 | * stat is set to 0 if can't find any such record, 1 for success. | ||
1697 | */ | ||
1698 | int /* error */ | ||
1699 | xfs_btree_lookup( | ||
1700 | struct xfs_btree_cur *cur, /* btree cursor */ | ||
1701 | xfs_lookup_t dir, /* <=, ==, or >= */ | ||
1702 | int *stat) /* success/failure */ | ||
1703 | { | ||
1704 | struct xfs_btree_block *block; /* current btree block */ | ||
1705 | __int64_t diff; /* difference for the current key */ | ||
1706 | int error; /* error return value */ | ||
1707 | int keyno; /* current key number */ | ||
1708 | int level; /* level in the btree */ | ||
1709 | union xfs_btree_ptr *pp; /* ptr to btree block */ | ||
1710 | union xfs_btree_ptr ptr; /* ptr to btree block */ | ||
1711 | |||
1712 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); | ||
1713 | XFS_BTREE_TRACE_ARGI(cur, dir); | ||
1714 | |||
1715 | XFS_BTREE_STATS_INC(cur, lookup); | ||
1716 | |||
1717 | block = NULL; | ||
1718 | keyno = 0; | ||
1719 | |||
1720 | /* initialise start pointer from cursor */ | ||
1721 | cur->bc_ops->init_ptr_from_cur(cur, &ptr); | ||
1722 | pp = &ptr; | ||
1723 | |||
1724 | /* | ||
1725 | * Iterate over each level in the btree, starting at the root. | ||
1726 | * For each level above the leaves, find the key we need, based | ||
1727 | * on the lookup record, then follow the corresponding block | ||
1728 | * pointer down to the next level. | ||
1729 | */ | ||
1730 | for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) { | ||
1731 | /* Get the block we need to do the lookup on. */ | ||
1732 | error = xfs_btree_lookup_get_block(cur, level, pp, &block); | ||
1733 | if (error) | ||
1734 | goto error0; | ||
1735 | |||
1736 | if (diff == 0) { | ||
1737 | /* | ||
1738 | * If we already had a key match at a higher level, we | ||
1739 | * know we need to use the first entry in this block. | ||
1740 | */ | ||
1741 | keyno = 1; | ||
1742 | } else { | ||
1743 | /* Otherwise search this block. Do a binary search. */ | ||
1744 | |||
1745 | int high; /* high entry number */ | ||
1746 | int low; /* low entry number */ | ||
1747 | |||
1748 | /* Set low and high entry numbers, 1-based. */ | ||
1749 | low = 1; | ||
1750 | high = xfs_btree_get_numrecs(block); | ||
1751 | if (!high) { | ||
1752 | /* Block is empty, must be an empty leaf. */ | ||
1753 | ASSERT(level == 0 && cur->bc_nlevels == 1); | ||
1754 | |||
1755 | cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE; | ||
1756 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | ||
1757 | *stat = 0; | ||
1758 | return 0; | ||
1759 | } | ||
1760 | |||
1761 | /* Binary search the block. */ | ||
1762 | while (low <= high) { | ||
1763 | union xfs_btree_key key; | ||
1764 | union xfs_btree_key *kp; | ||
1765 | |||
1766 | XFS_BTREE_STATS_INC(cur, compare); | ||
1767 | |||
1768 | /* keyno is average of low and high. */ | ||
1769 | keyno = (low + high) >> 1; | ||
1770 | |||
1771 | /* Get current search key */ | ||
1772 | kp = xfs_lookup_get_search_key(cur, level, | ||
1773 | keyno, block, &key); | ||
1774 | |||
1775 | /* | ||
1776 | * Compute difference to get next direction: | ||
1777 | * - less than, move right | ||
1778 | * - greater than, move left | ||
1779 | * - equal, we're done | ||
1780 | */ | ||
1781 | diff = cur->bc_ops->key_diff(cur, kp); | ||
1782 | if (diff < 0) | ||
1783 | low = keyno + 1; | ||
1784 | else if (diff > 0) | ||
1785 | high = keyno - 1; | ||
1786 | else | ||
1787 | break; | ||
1788 | } | ||
1789 | } | ||
1790 | |||
1791 | /* | ||
1792 | * If there are more levels, set up for the next level | ||
1793 | * by getting the block number and filling in the cursor. | ||
1794 | */ | ||
1795 | if (level > 0) { | ||
1796 | /* | ||
1797 | * If we moved left, need the previous key number, | ||
1798 | * unless there isn't one. | ||
1799 | */ | ||
1800 | if (diff > 0 && --keyno < 1) | ||
1801 | keyno = 1; | ||
1802 | pp = xfs_btree_ptr_addr(cur, keyno, block); | ||
1803 | |||
1804 | #ifdef DEBUG | ||
1805 | error = xfs_btree_check_ptr(cur, pp, 0, level); | ||
1806 | if (error) | ||
1807 | goto error0; | ||
1808 | #endif | ||
1809 | cur->bc_ptrs[level] = keyno; | ||
1810 | } | ||
1811 | } | ||
1812 | |||
1813 | /* Done with the search. See if we need to adjust the results. */ | ||
1814 | if (dir != XFS_LOOKUP_LE && diff < 0) { | ||
1815 | keyno++; | ||
1816 | /* | ||
1817 | * If ge search and we went off the end of the block, but it's | ||
1818 | * not the last block, we're in the wrong block. | ||
1819 | */ | ||
1820 | xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); | ||
1821 | if (dir == XFS_LOOKUP_GE && | ||
1822 | keyno > xfs_btree_get_numrecs(block) && | ||
1823 | !xfs_btree_ptr_is_null(cur, &ptr)) { | ||
1824 | int i; | ||
1825 | |||
1826 | cur->bc_ptrs[0] = keyno; | ||
1827 | error = xfs_btree_increment(cur, 0, &i); | ||
1828 | if (error) | ||
1829 | goto error0; | ||
1830 | XFS_WANT_CORRUPTED_RETURN(i == 1); | ||
1831 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | ||
1832 | *stat = 1; | ||
1833 | return 0; | ||
1834 | } | ||
1835 | } else if (dir == XFS_LOOKUP_LE && diff > 0) | ||
1836 | keyno--; | ||
1837 | cur->bc_ptrs[0] = keyno; | ||
1838 | |||
1839 | /* Return if we succeeded or not. */ | ||
1840 | if (keyno == 0 || keyno > xfs_btree_get_numrecs(block)) | ||
1841 | *stat = 0; | ||
1842 | else if (dir != XFS_LOOKUP_EQ || diff == 0) | ||
1843 | *stat = 1; | ||
1844 | else | ||
1845 | *stat = 0; | ||
1846 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | ||
1847 | return 0; | ||
1848 | |||
1849 | error0: | ||
1850 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); | ||
1851 | return error; | ||
1852 | } | ||
1853 | |||
1854 | /* | ||
1855 | * Update keys at all levels from here to the root along the cursor's path. | ||
1856 | */ | ||
1857 | STATIC int | ||
1858 | xfs_btree_updkey( | ||
1859 | struct xfs_btree_cur *cur, | ||
1860 | union xfs_btree_key *keyp, | ||
1861 | int level) | ||
1862 | { | ||
1863 | struct xfs_btree_block *block; | ||
1864 | struct xfs_buf *bp; | ||
1865 | union xfs_btree_key *kp; | ||
1866 | int ptr; | ||
1867 | |||
1868 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); | ||
1869 | XFS_BTREE_TRACE_ARGIK(cur, level, keyp); | ||
1870 | |||
1871 | ASSERT(!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) || level >= 1); | ||
1872 | |||
1873 | /* | ||
1874 | * Go up the tree from this level toward the root. | ||
1875 | * At each level, update the key value to the value input. | ||
1876 | * Stop when we reach a level where the cursor isn't pointing | ||
1877 | * at the first entry in the block. | ||
1878 | */ | ||
1879 | for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) { | ||
1880 | #ifdef DEBUG | ||
1881 | int error; | ||
1882 | #endif | ||
1883 | block = xfs_btree_get_block(cur, level, &bp); | ||
1884 | #ifdef DEBUG | ||
1885 | error = xfs_btree_check_block(cur, block, level, bp); | ||
1886 | if (error) { | ||
1887 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); | ||
1888 | return error; | ||
1889 | } | ||
1890 | #endif | ||
1891 | ptr = cur->bc_ptrs[level]; | ||
1892 | kp = xfs_btree_key_addr(cur, ptr, block); | ||
1893 | xfs_btree_copy_keys(cur, kp, keyp, 1); | ||
1894 | xfs_btree_log_keys(cur, bp, ptr, ptr); | ||
1895 | } | ||
1896 | |||
1897 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | ||
1898 | return 0; | ||
1899 | } | ||
1900 | |||
1901 | /* | ||
1902 | * Update the record referred to by cur to the value in the | ||
1903 | * given record. This either works (return 0) or gets an | ||
1904 | * EFSCORRUPTED error. | ||
1905 | */ | ||
1906 | int | ||
1907 | xfs_btree_update( | ||
1908 | struct xfs_btree_cur *cur, | ||
1909 | union xfs_btree_rec *rec) | ||
1910 | { | ||
1911 | struct xfs_btree_block *block; | ||
1912 | struct xfs_buf *bp; | ||
1913 | int error; | ||
1914 | int ptr; | ||
1915 | union xfs_btree_rec *rp; | ||
1916 | |||
1917 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); | ||
1918 | XFS_BTREE_TRACE_ARGR(cur, rec); | ||
1919 | |||
1920 | /* Pick up the current block. */ | ||
1921 | block = xfs_btree_get_block(cur, 0, &bp); | ||
1922 | |||
1923 | #ifdef DEBUG | ||
1924 | error = xfs_btree_check_block(cur, block, 0, bp); | ||
1925 | if (error) | ||
1926 | goto error0; | ||
1927 | #endif | ||
1928 | /* Get the address of the rec to be updated. */ | ||
1929 | ptr = cur->bc_ptrs[0]; | ||
1930 | rp = xfs_btree_rec_addr(cur, ptr, block); | ||
1931 | |||
1932 | /* Fill in the new contents and log them. */ | ||
1933 | xfs_btree_copy_recs(cur, rp, rec, 1); | ||
1934 | xfs_btree_log_recs(cur, bp, ptr, ptr); | ||
1935 | |||
1936 | /* | ||
1937 | * If we are tracking the last record in the tree and | ||
1938 | * we are at the far right edge of the tree, update it. | ||
1939 | */ | ||
1940 | if (xfs_btree_is_lastrec(cur, block, 0)) { | ||
1941 | cur->bc_ops->update_lastrec(cur, block, rec, | ||
1942 | ptr, LASTREC_UPDATE); | ||
1943 | } | ||
1944 | |||
1945 | /* Updating first rec in leaf. Pass new key value up to our parent. */ | ||
1946 | if (ptr == 1) { | ||
1947 | union xfs_btree_key key; | ||
1948 | |||
1949 | cur->bc_ops->init_key_from_rec(&key, rec); | ||
1950 | error = xfs_btree_updkey(cur, &key, 1); | ||
1951 | if (error) | ||
1952 | goto error0; | ||
1953 | } | ||
1954 | |||
1955 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | ||
1956 | return 0; | ||
1957 | |||
1958 | error0: | ||
1959 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); | ||
1960 | return error; | ||
1961 | } | ||
1962 | |||
1963 | /* | ||
1964 | * Move 1 record left from cur/level if possible. | ||
1965 | * Update cur to reflect the new path. | ||
1966 | */ | ||
1967 | STATIC int /* error */ | ||
1968 | xfs_btree_lshift( | ||
1969 | struct xfs_btree_cur *cur, | ||
1970 | int level, | ||
1971 | int *stat) /* success/failure */ | ||
1972 | { | ||
1973 | union xfs_btree_key key; /* btree key */ | ||
1974 | struct xfs_buf *lbp; /* left buffer pointer */ | ||
1975 | struct xfs_btree_block *left; /* left btree block */ | ||
1976 | int lrecs; /* left record count */ | ||
1977 | struct xfs_buf *rbp; /* right buffer pointer */ | ||
1978 | struct xfs_btree_block *right; /* right btree block */ | ||
1979 | int rrecs; /* right record count */ | ||
1980 | union xfs_btree_ptr lptr; /* left btree pointer */ | ||
1981 | union xfs_btree_key *rkp = NULL; /* right btree key */ | ||
1982 | union xfs_btree_ptr *rpp = NULL; /* right address pointer */ | ||
1983 | union xfs_btree_rec *rrp = NULL; /* right record pointer */ | ||
1984 | int error; /* error return value */ | ||
1985 | |||
1986 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); | ||
1987 | XFS_BTREE_TRACE_ARGI(cur, level); | ||
1988 | |||
1989 | if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && | ||
1990 | level == cur->bc_nlevels - 1) | ||
1991 | goto out0; | ||
1992 | |||
1993 | /* Set up variables for this block as "right". */ | ||
1994 | right = xfs_btree_get_block(cur, level, &rbp); | ||
1995 | |||
1996 | #ifdef DEBUG | ||
1997 | error = xfs_btree_check_block(cur, right, level, rbp); | ||
1998 | if (error) | ||
1999 | goto error0; | ||
2000 | #endif | ||
2001 | |||
2002 | /* If we've got no left sibling then we can't shift an entry left. */ | ||
2003 | xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); | ||
2004 | if (xfs_btree_ptr_is_null(cur, &lptr)) | ||
2005 | goto out0; | ||
2006 | |||
2007 | /* | ||
2008 | * If the cursor entry is the one that would be moved, don't | ||
2009 | * do it... it's too complicated. | ||
2010 | */ | ||
2011 | if (cur->bc_ptrs[level] <= 1) | ||
2012 | goto out0; | ||
2013 | |||
2014 | /* Set up the left neighbor as "left". */ | ||
2015 | error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp); | ||
2016 | if (error) | ||
2017 | goto error0; | ||
2018 | |||
2019 | /* If it's full, it can't take another entry. */ | ||
2020 | lrecs = xfs_btree_get_numrecs(left); | ||
2021 | if (lrecs == cur->bc_ops->get_maxrecs(cur, level)) | ||
2022 | goto out0; | ||
2023 | |||
2024 | rrecs = xfs_btree_get_numrecs(right); | ||
2025 | |||
2026 | /* | ||
2027 | * We add one entry to the left side and remove one for the right side. | ||
2028 | * Account for it here, the changes will be updated on disk and logged | ||
2029 | * later. | ||
2030 | */ | ||
2031 | lrecs++; | ||
2032 | rrecs--; | ||
2033 | |||
2034 | XFS_BTREE_STATS_INC(cur, lshift); | ||
2035 | XFS_BTREE_STATS_ADD(cur, moves, 1); | ||
2036 | |||
2037 | /* | ||
2038 | * If non-leaf, copy a key and a ptr to the left block. | ||
2039 | * Log the changes to the left block. | ||
2040 | */ | ||
2041 | if (level > 0) { | ||
2042 | /* It's a non-leaf. Move keys and pointers. */ | ||
2043 | union xfs_btree_key *lkp; /* left btree key */ | ||
2044 | union xfs_btree_ptr *lpp; /* left address pointer */ | ||
2045 | |||
2046 | lkp = xfs_btree_key_addr(cur, lrecs, left); | ||
2047 | rkp = xfs_btree_key_addr(cur, 1, right); | ||
2048 | |||
2049 | lpp = xfs_btree_ptr_addr(cur, lrecs, left); | ||
2050 | rpp = xfs_btree_ptr_addr(cur, 1, right); | ||
2051 | #ifdef DEBUG | ||
2052 | error = xfs_btree_check_ptr(cur, rpp, 0, level); | ||
2053 | if (error) | ||
2054 | goto error0; | ||
2055 | #endif | ||
2056 | xfs_btree_copy_keys(cur, lkp, rkp, 1); | ||
2057 | xfs_btree_copy_ptrs(cur, lpp, rpp, 1); | ||
2058 | |||
2059 | xfs_btree_log_keys(cur, lbp, lrecs, lrecs); | ||
2060 | xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs); | ||
2061 | |||
2062 | ASSERT(cur->bc_ops->keys_inorder(cur, | ||
2063 | xfs_btree_key_addr(cur, lrecs - 1, left), lkp)); | ||
2064 | } else { | ||
2065 | /* It's a leaf. Move records. */ | ||
2066 | union xfs_btree_rec *lrp; /* left record pointer */ | ||
2067 | |||
2068 | lrp = xfs_btree_rec_addr(cur, lrecs, left); | ||
2069 | rrp = xfs_btree_rec_addr(cur, 1, right); | ||
2070 | |||
2071 | xfs_btree_copy_recs(cur, lrp, rrp, 1); | ||
2072 | xfs_btree_log_recs(cur, lbp, lrecs, lrecs); | ||
2073 | |||
2074 | ASSERT(cur->bc_ops->recs_inorder(cur, | ||
2075 | xfs_btree_rec_addr(cur, lrecs - 1, left), lrp)); | ||
2076 | } | ||
2077 | |||
2078 | xfs_btree_set_numrecs(left, lrecs); | ||
2079 | xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS); | ||
2080 | |||
2081 | xfs_btree_set_numrecs(right, rrecs); | ||
2082 | xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS); | ||
2083 | |||
2084 | /* | ||
2085 | * Slide the contents of right down one entry. | ||
2086 | */ | ||
2087 | XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1); | ||
2088 | if (level > 0) { | ||
2089 | /* It's a nonleaf. operate on keys and ptrs */ | ||
2090 | #ifdef DEBUG | ||
2091 | int i; /* loop index */ | ||
2092 | |||
2093 | for (i = 0; i < rrecs; i++) { | ||
2094 | error = xfs_btree_check_ptr(cur, rpp, i + 1, level); | ||
2095 | if (error) | ||
2096 | goto error0; | ||
2097 | } | ||
2098 | #endif | ||
2099 | xfs_btree_shift_keys(cur, | ||
2100 | xfs_btree_key_addr(cur, 2, right), | ||
2101 | -1, rrecs); | ||
2102 | xfs_btree_shift_ptrs(cur, | ||
2103 | xfs_btree_ptr_addr(cur, 2, right), | ||
2104 | -1, rrecs); | ||
2105 | |||
2106 | xfs_btree_log_keys(cur, rbp, 1, rrecs); | ||
2107 | xfs_btree_log_ptrs(cur, rbp, 1, rrecs); | ||
2108 | } else { | ||
2109 | /* It's a leaf. operate on records */ | ||
2110 | xfs_btree_shift_recs(cur, | ||
2111 | xfs_btree_rec_addr(cur, 2, right), | ||
2112 | -1, rrecs); | ||
2113 | xfs_btree_log_recs(cur, rbp, 1, rrecs); | ||
2114 | |||
2115 | /* | ||
2116 | * If it's the first record in the block, we'll need a key | ||
2117 | * structure to pass up to the next level (updkey). | ||
2118 | */ | ||
2119 | cur->bc_ops->init_key_from_rec(&key, | ||
2120 | xfs_btree_rec_addr(cur, 1, right)); | ||
2121 | rkp = &key; | ||
2122 | } | ||
2123 | |||
2124 | /* Update the parent key values of right. */ | ||
2125 | error = xfs_btree_updkey(cur, rkp, level + 1); | ||
2126 | if (error) | ||
2127 | goto error0; | ||
2128 | |||
2129 | /* Slide the cursor value left one. */ | ||
2130 | cur->bc_ptrs[level]--; | ||
2131 | |||
2132 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | ||
2133 | *stat = 1; | ||
2134 | return 0; | ||
2135 | |||
2136 | out0: | ||
2137 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | ||
2138 | *stat = 0; | ||
2139 | return 0; | ||
2140 | |||
2141 | error0: | ||
2142 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); | ||
2143 | return error; | ||
2144 | } | ||
2145 | |||
2146 | /* | ||
2147 | * Move 1 record right from cur/level if possible. | ||
2148 | * Update cur to reflect the new path. | ||
2149 | */ | ||
2150 | STATIC int /* error */ | ||
2151 | xfs_btree_rshift( | ||
2152 | struct xfs_btree_cur *cur, | ||
2153 | int level, | ||
2154 | int *stat) /* success/failure */ | ||
2155 | { | ||
2156 | union xfs_btree_key key; /* btree key */ | ||
2157 | struct xfs_buf *lbp; /* left buffer pointer */ | ||
2158 | struct xfs_btree_block *left; /* left btree block */ | ||
2159 | struct xfs_buf *rbp; /* right buffer pointer */ | ||
2160 | struct xfs_btree_block *right; /* right btree block */ | ||
2161 | struct xfs_btree_cur *tcur; /* temporary btree cursor */ | ||
2162 | union xfs_btree_ptr rptr; /* right block pointer */ | ||
2163 | union xfs_btree_key *rkp; /* right btree key */ | ||
2164 | int rrecs; /* right record count */ | ||
2165 | int lrecs; /* left record count */ | ||
2166 | int error; /* error return value */ | ||
2167 | int i; /* loop counter */ | ||
2168 | |||
2169 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); | ||
2170 | XFS_BTREE_TRACE_ARGI(cur, level); | ||
2171 | |||
2172 | if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && | ||
2173 | (level == cur->bc_nlevels - 1)) | ||
2174 | goto out0; | ||
2175 | |||
2176 | /* Set up variables for this block as "left". */ | ||
2177 | left = xfs_btree_get_block(cur, level, &lbp); | ||
2178 | |||
2179 | #ifdef DEBUG | ||
2180 | error = xfs_btree_check_block(cur, left, level, lbp); | ||
2181 | if (error) | ||
2182 | goto error0; | ||
2183 | #endif | ||
2184 | |||
2185 | /* If we've got no right sibling then we can't shift an entry right. */ | ||
2186 | xfs_btree_get_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB); | ||
2187 | if (xfs_btree_ptr_is_null(cur, &rptr)) | ||
2188 | goto out0; | ||
2189 | |||
2190 | /* | ||
2191 | * If the cursor entry is the one that would be moved, don't | ||
2192 | * do it... it's too complicated. | ||
2193 | */ | ||
2194 | lrecs = xfs_btree_get_numrecs(left); | ||
2195 | if (cur->bc_ptrs[level] >= lrecs) | ||
2196 | goto out0; | ||
2197 | |||
2198 | /* Set up the right neighbor as "right". */ | ||
2199 | error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp); | ||
2200 | if (error) | ||
2201 | goto error0; | ||
2202 | |||
2203 | /* If it's full, it can't take another entry. */ | ||
2204 | rrecs = xfs_btree_get_numrecs(right); | ||
2205 | if (rrecs == cur->bc_ops->get_maxrecs(cur, level)) | ||
2206 | goto out0; | ||
2207 | |||
2208 | XFS_BTREE_STATS_INC(cur, rshift); | ||
2209 | XFS_BTREE_STATS_ADD(cur, moves, rrecs); | ||
2210 | |||
2211 | /* | ||
2212 | * Make a hole at the start of the right neighbor block, then | ||
2213 | * copy the last left block entry to the hole. | ||
2214 | */ | ||
2215 | if (level > 0) { | ||
2216 | /* It's a nonleaf. make a hole in the keys and ptrs */ | ||
2217 | union xfs_btree_key *lkp; | ||
2218 | union xfs_btree_ptr *lpp; | ||
2219 | union xfs_btree_ptr *rpp; | ||
2220 | |||
2221 | lkp = xfs_btree_key_addr(cur, lrecs, left); | ||
2222 | lpp = xfs_btree_ptr_addr(cur, lrecs, left); | ||
2223 | rkp = xfs_btree_key_addr(cur, 1, right); | ||
2224 | rpp = xfs_btree_ptr_addr(cur, 1, right); | ||
2225 | |||
2226 | #ifdef DEBUG | ||
2227 | for (i = rrecs - 1; i >= 0; i--) { | ||
2228 | error = xfs_btree_check_ptr(cur, rpp, i, level); | ||
2229 | if (error) | ||
2230 | goto error0; | ||
2231 | } | ||
2232 | #endif | ||
2233 | |||
2234 | xfs_btree_shift_keys(cur, rkp, 1, rrecs); | ||
2235 | xfs_btree_shift_ptrs(cur, rpp, 1, rrecs); | ||
2236 | |||
2237 | #ifdef DEBUG | ||
2238 | error = xfs_btree_check_ptr(cur, lpp, 0, level); | ||
2239 | if (error) | ||
2240 | goto error0; | ||
2241 | #endif | ||
2242 | |||
2243 | /* Now put the new data in, and log it. */ | ||
2244 | xfs_btree_copy_keys(cur, rkp, lkp, 1); | ||
2245 | xfs_btree_copy_ptrs(cur, rpp, lpp, 1); | ||
2246 | |||
2247 | xfs_btree_log_keys(cur, rbp, 1, rrecs + 1); | ||
2248 | xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1); | ||
2249 | |||
2250 | ASSERT(cur->bc_ops->keys_inorder(cur, rkp, | ||
2251 | xfs_btree_key_addr(cur, 2, right))); | ||
2252 | } else { | ||
2253 | /* It's a leaf. make a hole in the records */ | ||
2254 | union xfs_btree_rec *lrp; | ||
2255 | union xfs_btree_rec *rrp; | ||
2256 | |||
2257 | lrp = xfs_btree_rec_addr(cur, lrecs, left); | ||
2258 | rrp = xfs_btree_rec_addr(cur, 1, right); | ||
2259 | |||
2260 | xfs_btree_shift_recs(cur, rrp, 1, rrecs); | ||
2261 | |||
2262 | /* Now put the new data in, and log it. */ | ||
2263 | xfs_btree_copy_recs(cur, rrp, lrp, 1); | ||
2264 | xfs_btree_log_recs(cur, rbp, 1, rrecs + 1); | ||
2265 | |||
2266 | cur->bc_ops->init_key_from_rec(&key, rrp); | ||
2267 | rkp = &key; | ||
2268 | |||
2269 | ASSERT(cur->bc_ops->recs_inorder(cur, rrp, | ||
2270 | xfs_btree_rec_addr(cur, 2, right))); | ||
2271 | } | ||
2272 | |||
2273 | /* | ||
2274 | * Decrement and log left's numrecs, bump and log right's numrecs. | ||
2275 | */ | ||
2276 | xfs_btree_set_numrecs(left, --lrecs); | ||
2277 | xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS); | ||
2278 | |||
2279 | xfs_btree_set_numrecs(right, ++rrecs); | ||
2280 | xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS); | ||
2281 | |||
2282 | /* | ||
2283 | * Using a temporary cursor, update the parent key values of the | ||
2284 | * block on the right. | ||
2285 | */ | ||
2286 | error = xfs_btree_dup_cursor(cur, &tcur); | ||
2287 | if (error) | ||
2288 | goto error0; | ||
2289 | i = xfs_btree_lastrec(tcur, level); | ||
2290 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
2291 | |||
2292 | error = xfs_btree_increment(tcur, level, &i); | ||
2293 | if (error) | ||
2294 | goto error1; | ||
2295 | |||
2296 | error = xfs_btree_updkey(tcur, rkp, level + 1); | ||
2297 | if (error) | ||
2298 | goto error1; | ||
2299 | |||
2300 | xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); | ||
2301 | |||
2302 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | ||
2303 | *stat = 1; | ||
2304 | return 0; | ||
2305 | |||
2306 | out0: | ||
2307 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | ||
2308 | *stat = 0; | ||
2309 | return 0; | ||
2310 | |||
2311 | error0: | ||
2312 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); | ||
2313 | return error; | ||
2314 | |||
2315 | error1: | ||
2316 | XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR); | ||
2317 | xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); | ||
2318 | return error; | ||
2319 | } | ||
2320 | |||
2321 | /* | ||
2322 | * Split cur/level block in half. | ||
2323 | * Return new block number and the key to its first | ||
2324 | * record (to be inserted into parent). | ||
2325 | */ | ||
2326 | STATIC int /* error */ | ||
2327 | __xfs_btree_split( | ||
2328 | struct xfs_btree_cur *cur, | ||
2329 | int level, | ||
2330 | union xfs_btree_ptr *ptrp, | ||
2331 | union xfs_btree_key *key, | ||
2332 | struct xfs_btree_cur **curp, | ||
2333 | int *stat) /* success/failure */ | ||
2334 | { | ||
2335 | union xfs_btree_ptr lptr; /* left sibling block ptr */ | ||
2336 | struct xfs_buf *lbp; /* left buffer pointer */ | ||
2337 | struct xfs_btree_block *left; /* left btree block */ | ||
2338 | union xfs_btree_ptr rptr; /* right sibling block ptr */ | ||
2339 | struct xfs_buf *rbp; /* right buffer pointer */ | ||
2340 | struct xfs_btree_block *right; /* right btree block */ | ||
2341 | union xfs_btree_ptr rrptr; /* right-right sibling ptr */ | ||
2342 | struct xfs_buf *rrbp; /* right-right buffer pointer */ | ||
2343 | struct xfs_btree_block *rrblock; /* right-right btree block */ | ||
2344 | int lrecs; | ||
2345 | int rrecs; | ||
2346 | int src_index; | ||
2347 | int error; /* error return value */ | ||
2348 | #ifdef DEBUG | ||
2349 | int i; | ||
2350 | #endif | ||
2351 | |||
2352 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); | ||
2353 | XFS_BTREE_TRACE_ARGIPK(cur, level, *ptrp, key); | ||
2354 | |||
2355 | XFS_BTREE_STATS_INC(cur, split); | ||
2356 | |||
2357 | /* Set up left block (current one). */ | ||
2358 | left = xfs_btree_get_block(cur, level, &lbp); | ||
2359 | |||
2360 | #ifdef DEBUG | ||
2361 | error = xfs_btree_check_block(cur, left, level, lbp); | ||
2362 | if (error) | ||
2363 | goto error0; | ||
2364 | #endif | ||
2365 | |||
2366 | xfs_btree_buf_to_ptr(cur, lbp, &lptr); | ||
2367 | |||
2368 | /* Allocate the new block. If we can't do it, we're toast. Give up. */ | ||
2369 | error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, stat); | ||
2370 | if (error) | ||
2371 | goto error0; | ||
2372 | if (*stat == 0) | ||
2373 | goto out0; | ||
2374 | XFS_BTREE_STATS_INC(cur, alloc); | ||
2375 | |||
2376 | /* Set up the new block as "right". */ | ||
2377 | error = xfs_btree_get_buf_block(cur, &rptr, 0, &right, &rbp); | ||
2378 | if (error) | ||
2379 | goto error0; | ||
2380 | |||
2381 | /* Fill in the btree header for the new right block. */ | ||
2382 | xfs_btree_init_block_cur(cur, rbp, xfs_btree_get_level(left), 0); | ||
2383 | |||
2384 | /* | ||
2385 | * Split the entries between the old and the new block evenly. | ||
2386 | * Make sure that if there's an odd number of entries now, that | ||
2387 | * each new block will have the same number of entries. | ||
2388 | */ | ||
2389 | lrecs = xfs_btree_get_numrecs(left); | ||
2390 | rrecs = lrecs / 2; | ||
2391 | if ((lrecs & 1) && cur->bc_ptrs[level] <= rrecs + 1) | ||
2392 | rrecs++; | ||
2393 | src_index = (lrecs - rrecs + 1); | ||
2394 | |||
2395 | XFS_BTREE_STATS_ADD(cur, moves, rrecs); | ||
2396 | |||
2397 | /* | ||
2398 | * Copy btree block entries from the left block over to the | ||
2399 | * new block, the right. Update the right block and log the | ||
2400 | * changes. | ||
2401 | */ | ||
2402 | if (level > 0) { | ||
2403 | /* It's a non-leaf. Move keys and pointers. */ | ||
2404 | union xfs_btree_key *lkp; /* left btree key */ | ||
2405 | union xfs_btree_ptr *lpp; /* left address pointer */ | ||
2406 | union xfs_btree_key *rkp; /* right btree key */ | ||
2407 | union xfs_btree_ptr *rpp; /* right address pointer */ | ||
2408 | |||
2409 | lkp = xfs_btree_key_addr(cur, src_index, left); | ||
2410 | lpp = xfs_btree_ptr_addr(cur, src_index, left); | ||
2411 | rkp = xfs_btree_key_addr(cur, 1, right); | ||
2412 | rpp = xfs_btree_ptr_addr(cur, 1, right); | ||
2413 | |||
2414 | #ifdef DEBUG | ||
2415 | for (i = src_index; i < rrecs; i++) { | ||
2416 | error = xfs_btree_check_ptr(cur, lpp, i, level); | ||
2417 | if (error) | ||
2418 | goto error0; | ||
2419 | } | ||
2420 | #endif | ||
2421 | |||
2422 | xfs_btree_copy_keys(cur, rkp, lkp, rrecs); | ||
2423 | xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs); | ||
2424 | |||
2425 | xfs_btree_log_keys(cur, rbp, 1, rrecs); | ||
2426 | xfs_btree_log_ptrs(cur, rbp, 1, rrecs); | ||
2427 | |||
2428 | /* Grab the keys to the entries moved to the right block */ | ||
2429 | xfs_btree_copy_keys(cur, key, rkp, 1); | ||
2430 | } else { | ||
2431 | /* It's a leaf. Move records. */ | ||
2432 | union xfs_btree_rec *lrp; /* left record pointer */ | ||
2433 | union xfs_btree_rec *rrp; /* right record pointer */ | ||
2434 | |||
2435 | lrp = xfs_btree_rec_addr(cur, src_index, left); | ||
2436 | rrp = xfs_btree_rec_addr(cur, 1, right); | ||
2437 | |||
2438 | xfs_btree_copy_recs(cur, rrp, lrp, rrecs); | ||
2439 | xfs_btree_log_recs(cur, rbp, 1, rrecs); | ||
2440 | |||
2441 | cur->bc_ops->init_key_from_rec(key, | ||
2442 | xfs_btree_rec_addr(cur, 1, right)); | ||
2443 | } | ||
2444 | |||
2445 | |||
2446 | /* | ||
2447 | * Find the left block number by looking in the buffer. | ||
2448 | * Adjust numrecs, sibling pointers. | ||
2449 | */ | ||
2450 | xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB); | ||
2451 | xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB); | ||
2452 | xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); | ||
2453 | xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB); | ||
2454 | |||
2455 | lrecs -= rrecs; | ||
2456 | xfs_btree_set_numrecs(left, lrecs); | ||
2457 | xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs); | ||
2458 | |||
2459 | xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS); | ||
2460 | xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); | ||
2461 | |||
2462 | /* | ||
2463 | * If there's a block to the new block's right, make that block | ||
2464 | * point back to right instead of to left. | ||
2465 | */ | ||
2466 | if (!xfs_btree_ptr_is_null(cur, &rrptr)) { | ||
2467 | error = xfs_btree_read_buf_block(cur, &rrptr, | ||
2468 | 0, &rrblock, &rrbp); | ||
2469 | if (error) | ||
2470 | goto error0; | ||
2471 | xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB); | ||
2472 | xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB); | ||
2473 | } | ||
2474 | /* | ||
2475 | * If the cursor is really in the right block, move it there. | ||
2476 | * If it's just pointing past the last entry in left, then we'll | ||
2477 | * insert there, so don't change anything in that case. | ||
2478 | */ | ||
2479 | if (cur->bc_ptrs[level] > lrecs + 1) { | ||
2480 | xfs_btree_setbuf(cur, level, rbp); | ||
2481 | cur->bc_ptrs[level] -= lrecs; | ||
2482 | } | ||
2483 | /* | ||
2484 | * If there are more levels, we'll need another cursor which refers | ||
2485 | * the right block, no matter where this cursor was. | ||
2486 | */ | ||
2487 | if (level + 1 < cur->bc_nlevels) { | ||
2488 | error = xfs_btree_dup_cursor(cur, curp); | ||
2489 | if (error) | ||
2490 | goto error0; | ||
2491 | (*curp)->bc_ptrs[level + 1]++; | ||
2492 | } | ||
2493 | *ptrp = rptr; | ||
2494 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | ||
2495 | *stat = 1; | ||
2496 | return 0; | ||
2497 | out0: | ||
2498 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | ||
2499 | *stat = 0; | ||
2500 | return 0; | ||
2501 | |||
2502 | error0: | ||
2503 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); | ||
2504 | return error; | ||
2505 | } | ||
2506 | |||
2507 | struct xfs_btree_split_args { | ||
2508 | struct xfs_btree_cur *cur; | ||
2509 | int level; | ||
2510 | union xfs_btree_ptr *ptrp; | ||
2511 | union xfs_btree_key *key; | ||
2512 | struct xfs_btree_cur **curp; | ||
2513 | int *stat; /* success/failure */ | ||
2514 | int result; | ||
2515 | bool kswapd; /* allocation in kswapd context */ | ||
2516 | struct completion *done; | ||
2517 | struct work_struct work; | ||
2518 | }; | ||
2519 | |||
2520 | /* | ||
2521 | * Stack switching interfaces for allocation | ||
2522 | */ | ||
2523 | static void | ||
2524 | xfs_btree_split_worker( | ||
2525 | struct work_struct *work) | ||
2526 | { | ||
2527 | struct xfs_btree_split_args *args = container_of(work, | ||
2528 | struct xfs_btree_split_args, work); | ||
2529 | unsigned long pflags; | ||
2530 | unsigned long new_pflags = PF_FSTRANS; | ||
2531 | |||
2532 | /* | ||
2533 | * we are in a transaction context here, but may also be doing work | ||
2534 | * in kswapd context, and hence we may need to inherit that state | ||
2535 | * temporarily to ensure that we don't block waiting for memory reclaim | ||
2536 | * in any way. | ||
2537 | */ | ||
2538 | if (args->kswapd) | ||
2539 | new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD; | ||
2540 | |||
2541 | current_set_flags_nested(&pflags, new_pflags); | ||
2542 | |||
2543 | args->result = __xfs_btree_split(args->cur, args->level, args->ptrp, | ||
2544 | args->key, args->curp, args->stat); | ||
2545 | complete(args->done); | ||
2546 | |||
2547 | current_restore_flags_nested(&pflags, new_pflags); | ||
2548 | } | ||
2549 | |||
2550 | /* | ||
2551 | * BMBT split requests often come in with little stack to work on. Push | ||
2552 | * them off to a worker thread so there is lots of stack to use. For the other | ||
2553 | * btree types, just call directly to avoid the context switch overhead here. | ||
2554 | */ | ||
2555 | STATIC int /* error */ | ||
2556 | xfs_btree_split( | ||
2557 | struct xfs_btree_cur *cur, | ||
2558 | int level, | ||
2559 | union xfs_btree_ptr *ptrp, | ||
2560 | union xfs_btree_key *key, | ||
2561 | struct xfs_btree_cur **curp, | ||
2562 | int *stat) /* success/failure */ | ||
2563 | { | ||
2564 | struct xfs_btree_split_args args; | ||
2565 | DECLARE_COMPLETION_ONSTACK(done); | ||
2566 | |||
2567 | if (cur->bc_btnum != XFS_BTNUM_BMAP) | ||
2568 | return __xfs_btree_split(cur, level, ptrp, key, curp, stat); | ||
2569 | |||
2570 | args.cur = cur; | ||
2571 | args.level = level; | ||
2572 | args.ptrp = ptrp; | ||
2573 | args.key = key; | ||
2574 | args.curp = curp; | ||
2575 | args.stat = stat; | ||
2576 | args.done = &done; | ||
2577 | args.kswapd = current_is_kswapd(); | ||
2578 | INIT_WORK_ONSTACK(&args.work, xfs_btree_split_worker); | ||
2579 | queue_work(xfs_alloc_wq, &args.work); | ||
2580 | wait_for_completion(&done); | ||
2581 | destroy_work_on_stack(&args.work); | ||
2582 | return args.result; | ||
2583 | } | ||
2584 | |||
2585 | |||
2586 | /* | ||
2587 | * Copy the old inode root contents into a real block and make the | ||
2588 | * broot point to it. | ||
2589 | */ | ||
2590 | int /* error */ | ||
2591 | xfs_btree_new_iroot( | ||
2592 | struct xfs_btree_cur *cur, /* btree cursor */ | ||
2593 | int *logflags, /* logging flags for inode */ | ||
2594 | int *stat) /* return status - 0 fail */ | ||
2595 | { | ||
2596 | struct xfs_buf *cbp; /* buffer for cblock */ | ||
2597 | struct xfs_btree_block *block; /* btree block */ | ||
2598 | struct xfs_btree_block *cblock; /* child btree block */ | ||
2599 | union xfs_btree_key *ckp; /* child key pointer */ | ||
2600 | union xfs_btree_ptr *cpp; /* child ptr pointer */ | ||
2601 | union xfs_btree_key *kp; /* pointer to btree key */ | ||
2602 | union xfs_btree_ptr *pp; /* pointer to block addr */ | ||
2603 | union xfs_btree_ptr nptr; /* new block addr */ | ||
2604 | int level; /* btree level */ | ||
2605 | int error; /* error return code */ | ||
2606 | #ifdef DEBUG | ||
2607 | int i; /* loop counter */ | ||
2608 | #endif | ||
2609 | |||
2610 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); | ||
2611 | XFS_BTREE_STATS_INC(cur, newroot); | ||
2612 | |||
2613 | ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); | ||
2614 | |||
2615 | level = cur->bc_nlevels - 1; | ||
2616 | |||
2617 | block = xfs_btree_get_iroot(cur); | ||
2618 | pp = xfs_btree_ptr_addr(cur, 1, block); | ||
2619 | |||
2620 | /* Allocate the new block. If we can't do it, we're toast. Give up. */ | ||
2621 | error = cur->bc_ops->alloc_block(cur, pp, &nptr, stat); | ||
2622 | if (error) | ||
2623 | goto error0; | ||
2624 | if (*stat == 0) { | ||
2625 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | ||
2626 | return 0; | ||
2627 | } | ||
2628 | XFS_BTREE_STATS_INC(cur, alloc); | ||
2629 | |||
2630 | /* Copy the root into a real block. */ | ||
2631 | error = xfs_btree_get_buf_block(cur, &nptr, 0, &cblock, &cbp); | ||
2632 | if (error) | ||
2633 | goto error0; | ||
2634 | |||
2635 | /* | ||
2636 | * we can't just memcpy() the root in for CRC enabled btree blocks. | ||
2637 | * In that case have to also ensure the blkno remains correct | ||
2638 | */ | ||
2639 | memcpy(cblock, block, xfs_btree_block_len(cur)); | ||
2640 | if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) { | ||
2641 | if (cur->bc_flags & XFS_BTREE_LONG_PTRS) | ||
2642 | cblock->bb_u.l.bb_blkno = cpu_to_be64(cbp->b_bn); | ||
2643 | else | ||
2644 | cblock->bb_u.s.bb_blkno = cpu_to_be64(cbp->b_bn); | ||
2645 | } | ||
2646 | |||
2647 | be16_add_cpu(&block->bb_level, 1); | ||
2648 | xfs_btree_set_numrecs(block, 1); | ||
2649 | cur->bc_nlevels++; | ||
2650 | cur->bc_ptrs[level + 1] = 1; | ||
2651 | |||
2652 | kp = xfs_btree_key_addr(cur, 1, block); | ||
2653 | ckp = xfs_btree_key_addr(cur, 1, cblock); | ||
2654 | xfs_btree_copy_keys(cur, ckp, kp, xfs_btree_get_numrecs(cblock)); | ||
2655 | |||
2656 | cpp = xfs_btree_ptr_addr(cur, 1, cblock); | ||
2657 | #ifdef DEBUG | ||
2658 | for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) { | ||
2659 | error = xfs_btree_check_ptr(cur, pp, i, level); | ||
2660 | if (error) | ||
2661 | goto error0; | ||
2662 | } | ||
2663 | #endif | ||
2664 | xfs_btree_copy_ptrs(cur, cpp, pp, xfs_btree_get_numrecs(cblock)); | ||
2665 | |||
2666 | #ifdef DEBUG | ||
2667 | error = xfs_btree_check_ptr(cur, &nptr, 0, level); | ||
2668 | if (error) | ||
2669 | goto error0; | ||
2670 | #endif | ||
2671 | xfs_btree_copy_ptrs(cur, pp, &nptr, 1); | ||
2672 | |||
2673 | xfs_iroot_realloc(cur->bc_private.b.ip, | ||
2674 | 1 - xfs_btree_get_numrecs(cblock), | ||
2675 | cur->bc_private.b.whichfork); | ||
2676 | |||
2677 | xfs_btree_setbuf(cur, level, cbp); | ||
2678 | |||
2679 | /* | ||
2680 | * Do all this logging at the end so that | ||
2681 | * the root is at the right level. | ||
2682 | */ | ||
2683 | xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS); | ||
2684 | xfs_btree_log_keys(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs)); | ||
2685 | xfs_btree_log_ptrs(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs)); | ||
2686 | |||
2687 | *logflags |= | ||
2688 | XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork); | ||
2689 | *stat = 1; | ||
2690 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | ||
2691 | return 0; | ||
2692 | error0: | ||
2693 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); | ||
2694 | return error; | ||
2695 | } | ||
2696 | |||
2697 | /* | ||
2698 | * Allocate a new root block, fill it in. | ||
2699 | */ | ||
2700 | STATIC int /* error */ | ||
2701 | xfs_btree_new_root( | ||
2702 | struct xfs_btree_cur *cur, /* btree cursor */ | ||
2703 | int *stat) /* success/failure */ | ||
2704 | { | ||
2705 | struct xfs_btree_block *block; /* one half of the old root block */ | ||
2706 | struct xfs_buf *bp; /* buffer containing block */ | ||
2707 | int error; /* error return value */ | ||
2708 | struct xfs_buf *lbp; /* left buffer pointer */ | ||
2709 | struct xfs_btree_block *left; /* left btree block */ | ||
2710 | struct xfs_buf *nbp; /* new (root) buffer */ | ||
2711 | struct xfs_btree_block *new; /* new (root) btree block */ | ||
2712 | int nptr; /* new value for key index, 1 or 2 */ | ||
2713 | struct xfs_buf *rbp; /* right buffer pointer */ | ||
2714 | struct xfs_btree_block *right; /* right btree block */ | ||
2715 | union xfs_btree_ptr rptr; | ||
2716 | union xfs_btree_ptr lptr; | ||
2717 | |||
2718 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); | ||
2719 | XFS_BTREE_STATS_INC(cur, newroot); | ||
2720 | |||
2721 | /* initialise our start point from the cursor */ | ||
2722 | cur->bc_ops->init_ptr_from_cur(cur, &rptr); | ||
2723 | |||
2724 | /* Allocate the new block. If we can't do it, we're toast. Give up. */ | ||
2725 | error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, stat); | ||
2726 | if (error) | ||
2727 | goto error0; | ||
2728 | if (*stat == 0) | ||
2729 | goto out0; | ||
2730 | XFS_BTREE_STATS_INC(cur, alloc); | ||
2731 | |||
2732 | /* Set up the new block. */ | ||
2733 | error = xfs_btree_get_buf_block(cur, &lptr, 0, &new, &nbp); | ||
2734 | if (error) | ||
2735 | goto error0; | ||
2736 | |||
2737 | /* Set the root in the holding structure increasing the level by 1. */ | ||
2738 | cur->bc_ops->set_root(cur, &lptr, 1); | ||
2739 | |||
2740 | /* | ||
2741 | * At the previous root level there are now two blocks: the old root, | ||
2742 | * and the new block generated when it was split. We don't know which | ||
2743 | * one the cursor is pointing at, so we set up variables "left" and | ||
2744 | * "right" for each case. | ||
2745 | */ | ||
2746 | block = xfs_btree_get_block(cur, cur->bc_nlevels - 1, &bp); | ||
2747 | |||
2748 | #ifdef DEBUG | ||
2749 | error = xfs_btree_check_block(cur, block, cur->bc_nlevels - 1, bp); | ||
2750 | if (error) | ||
2751 | goto error0; | ||
2752 | #endif | ||
2753 | |||
2754 | xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB); | ||
2755 | if (!xfs_btree_ptr_is_null(cur, &rptr)) { | ||
2756 | /* Our block is left, pick up the right block. */ | ||
2757 | lbp = bp; | ||
2758 | xfs_btree_buf_to_ptr(cur, lbp, &lptr); | ||
2759 | left = block; | ||
2760 | error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp); | ||
2761 | if (error) | ||
2762 | goto error0; | ||
2763 | bp = rbp; | ||
2764 | nptr = 1; | ||
2765 | } else { | ||
2766 | /* Our block is right, pick up the left block. */ | ||
2767 | rbp = bp; | ||
2768 | xfs_btree_buf_to_ptr(cur, rbp, &rptr); | ||
2769 | right = block; | ||
2770 | xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); | ||
2771 | error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp); | ||
2772 | if (error) | ||
2773 | goto error0; | ||
2774 | bp = lbp; | ||
2775 | nptr = 2; | ||
2776 | } | ||
2777 | /* Fill in the new block's btree header and log it. */ | ||
2778 | xfs_btree_init_block_cur(cur, nbp, cur->bc_nlevels, 2); | ||
2779 | xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS); | ||
2780 | ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) && | ||
2781 | !xfs_btree_ptr_is_null(cur, &rptr)); | ||
2782 | |||
2783 | /* Fill in the key data in the new root. */ | ||
2784 | if (xfs_btree_get_level(left) > 0) { | ||
2785 | xfs_btree_copy_keys(cur, | ||
2786 | xfs_btree_key_addr(cur, 1, new), | ||
2787 | xfs_btree_key_addr(cur, 1, left), 1); | ||
2788 | xfs_btree_copy_keys(cur, | ||
2789 | xfs_btree_key_addr(cur, 2, new), | ||
2790 | xfs_btree_key_addr(cur, 1, right), 1); | ||
2791 | } else { | ||
2792 | cur->bc_ops->init_key_from_rec( | ||
2793 | xfs_btree_key_addr(cur, 1, new), | ||
2794 | xfs_btree_rec_addr(cur, 1, left)); | ||
2795 | cur->bc_ops->init_key_from_rec( | ||
2796 | xfs_btree_key_addr(cur, 2, new), | ||
2797 | xfs_btree_rec_addr(cur, 1, right)); | ||
2798 | } | ||
2799 | xfs_btree_log_keys(cur, nbp, 1, 2); | ||
2800 | |||
2801 | /* Fill in the pointer data in the new root. */ | ||
2802 | xfs_btree_copy_ptrs(cur, | ||
2803 | xfs_btree_ptr_addr(cur, 1, new), &lptr, 1); | ||
2804 | xfs_btree_copy_ptrs(cur, | ||
2805 | xfs_btree_ptr_addr(cur, 2, new), &rptr, 1); | ||
2806 | xfs_btree_log_ptrs(cur, nbp, 1, 2); | ||
2807 | |||
2808 | /* Fix up the cursor. */ | ||
2809 | xfs_btree_setbuf(cur, cur->bc_nlevels, nbp); | ||
2810 | cur->bc_ptrs[cur->bc_nlevels] = nptr; | ||
2811 | cur->bc_nlevels++; | ||
2812 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | ||
2813 | *stat = 1; | ||
2814 | return 0; | ||
2815 | error0: | ||
2816 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); | ||
2817 | return error; | ||
2818 | out0: | ||
2819 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | ||
2820 | *stat = 0; | ||
2821 | return 0; | ||
2822 | } | ||
2823 | |||
2824 | STATIC int | ||
2825 | xfs_btree_make_block_unfull( | ||
2826 | struct xfs_btree_cur *cur, /* btree cursor */ | ||
2827 | int level, /* btree level */ | ||
2828 | int numrecs,/* # of recs in block */ | ||
2829 | int *oindex,/* old tree index */ | ||
2830 | int *index, /* new tree index */ | ||
2831 | union xfs_btree_ptr *nptr, /* new btree ptr */ | ||
2832 | struct xfs_btree_cur **ncur, /* new btree cursor */ | ||
2833 | union xfs_btree_rec *nrec, /* new record */ | ||
2834 | int *stat) | ||
2835 | { | ||
2836 | union xfs_btree_key key; /* new btree key value */ | ||
2837 | int error = 0; | ||
2838 | |||
2839 | if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && | ||
2840 | level == cur->bc_nlevels - 1) { | ||
2841 | struct xfs_inode *ip = cur->bc_private.b.ip; | ||
2842 | |||
2843 | if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) { | ||
2844 | /* A root block that can be made bigger. */ | ||
2845 | xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork); | ||
2846 | } else { | ||
2847 | /* A root block that needs replacing */ | ||
2848 | int logflags = 0; | ||
2849 | |||
2850 | error = xfs_btree_new_iroot(cur, &logflags, stat); | ||
2851 | if (error || *stat == 0) | ||
2852 | return error; | ||
2853 | |||
2854 | xfs_trans_log_inode(cur->bc_tp, ip, logflags); | ||
2855 | } | ||
2856 | |||
2857 | return 0; | ||
2858 | } | ||
2859 | |||
2860 | /* First, try shifting an entry to the right neighbor. */ | ||
2861 | error = xfs_btree_rshift(cur, level, stat); | ||
2862 | if (error || *stat) | ||
2863 | return error; | ||
2864 | |||
2865 | /* Next, try shifting an entry to the left neighbor. */ | ||
2866 | error = xfs_btree_lshift(cur, level, stat); | ||
2867 | if (error) | ||
2868 | return error; | ||
2869 | |||
2870 | if (*stat) { | ||
2871 | *oindex = *index = cur->bc_ptrs[level]; | ||
2872 | return 0; | ||
2873 | } | ||
2874 | |||
2875 | /* | ||
2876 | * Next, try splitting the current block in half. | ||
2877 | * | ||
2878 | * If this works we have to re-set our variables because we | ||
2879 | * could be in a different block now. | ||
2880 | */ | ||
2881 | error = xfs_btree_split(cur, level, nptr, &key, ncur, stat); | ||
2882 | if (error || *stat == 0) | ||
2883 | return error; | ||
2884 | |||
2885 | |||
2886 | *index = cur->bc_ptrs[level]; | ||
2887 | cur->bc_ops->init_rec_from_key(&key, nrec); | ||
2888 | return 0; | ||
2889 | } | ||
2890 | |||
2891 | /* | ||
2892 | * Insert one record/level. Return information to the caller | ||
2893 | * allowing the next level up to proceed if necessary. | ||
2894 | */ | ||
2895 | STATIC int | ||
2896 | xfs_btree_insrec( | ||
2897 | struct xfs_btree_cur *cur, /* btree cursor */ | ||
2898 | int level, /* level to insert record at */ | ||
2899 | union xfs_btree_ptr *ptrp, /* i/o: block number inserted */ | ||
2900 | union xfs_btree_rec *recp, /* i/o: record data inserted */ | ||
2901 | struct xfs_btree_cur **curp, /* output: new cursor replacing cur */ | ||
2902 | int *stat) /* success/failure */ | ||
2903 | { | ||
2904 | struct xfs_btree_block *block; /* btree block */ | ||
2905 | struct xfs_buf *bp; /* buffer for block */ | ||
2906 | union xfs_btree_key key; /* btree key */ | ||
2907 | union xfs_btree_ptr nptr; /* new block ptr */ | ||
2908 | struct xfs_btree_cur *ncur; /* new btree cursor */ | ||
2909 | union xfs_btree_rec nrec; /* new record count */ | ||
2910 | int optr; /* old key/record index */ | ||
2911 | int ptr; /* key/record index */ | ||
2912 | int numrecs;/* number of records */ | ||
2913 | int error; /* error return value */ | ||
2914 | #ifdef DEBUG | ||
2915 | int i; | ||
2916 | #endif | ||
2917 | |||
2918 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); | ||
2919 | XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, recp); | ||
2920 | |||
2921 | ncur = NULL; | ||
2922 | |||
2923 | /* | ||
2924 | * If we have an external root pointer, and we've made it to the | ||
2925 | * root level, allocate a new root block and we're done. | ||
2926 | */ | ||
2927 | if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && | ||
2928 | (level >= cur->bc_nlevels)) { | ||
2929 | error = xfs_btree_new_root(cur, stat); | ||
2930 | xfs_btree_set_ptr_null(cur, ptrp); | ||
2931 | |||
2932 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | ||
2933 | return error; | ||
2934 | } | ||
2935 | |||
2936 | /* If we're off the left edge, return failure. */ | ||
2937 | ptr = cur->bc_ptrs[level]; | ||
2938 | if (ptr == 0) { | ||
2939 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | ||
2940 | *stat = 0; | ||
2941 | return 0; | ||
2942 | } | ||
2943 | |||
2944 | /* Make a key out of the record data to be inserted, and save it. */ | ||
2945 | cur->bc_ops->init_key_from_rec(&key, recp); | ||
2946 | |||
2947 | optr = ptr; | ||
2948 | |||
2949 | XFS_BTREE_STATS_INC(cur, insrec); | ||
2950 | |||
2951 | /* Get pointers to the btree buffer and block. */ | ||
2952 | block = xfs_btree_get_block(cur, level, &bp); | ||
2953 | numrecs = xfs_btree_get_numrecs(block); | ||
2954 | |||
2955 | #ifdef DEBUG | ||
2956 | error = xfs_btree_check_block(cur, block, level, bp); | ||
2957 | if (error) | ||
2958 | goto error0; | ||
2959 | |||
2960 | /* Check that the new entry is being inserted in the right place. */ | ||
2961 | if (ptr <= numrecs) { | ||
2962 | if (level == 0) { | ||
2963 | ASSERT(cur->bc_ops->recs_inorder(cur, recp, | ||
2964 | xfs_btree_rec_addr(cur, ptr, block))); | ||
2965 | } else { | ||
2966 | ASSERT(cur->bc_ops->keys_inorder(cur, &key, | ||
2967 | xfs_btree_key_addr(cur, ptr, block))); | ||
2968 | } | ||
2969 | } | ||
2970 | #endif | ||
2971 | |||
2972 | /* | ||
2973 | * If the block is full, we can't insert the new entry until we | ||
2974 | * make the block un-full. | ||
2975 | */ | ||
2976 | xfs_btree_set_ptr_null(cur, &nptr); | ||
2977 | if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) { | ||
2978 | error = xfs_btree_make_block_unfull(cur, level, numrecs, | ||
2979 | &optr, &ptr, &nptr, &ncur, &nrec, stat); | ||
2980 | if (error || *stat == 0) | ||
2981 | goto error0; | ||
2982 | } | ||
2983 | |||
2984 | /* | ||
2985 | * The current block may have changed if the block was | ||
2986 | * previously full and we have just made space in it. | ||
2987 | */ | ||
2988 | block = xfs_btree_get_block(cur, level, &bp); | ||
2989 | numrecs = xfs_btree_get_numrecs(block); | ||
2990 | |||
2991 | #ifdef DEBUG | ||
2992 | error = xfs_btree_check_block(cur, block, level, bp); | ||
2993 | if (error) | ||
2994 | return error; | ||
2995 | #endif | ||
2996 | |||
2997 | /* | ||
2998 | * At this point we know there's room for our new entry in the block | ||
2999 | * we're pointing at. | ||
3000 | */ | ||
3001 | XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr + 1); | ||
3002 | |||
3003 | if (level > 0) { | ||
3004 | /* It's a nonleaf. make a hole in the keys and ptrs */ | ||
3005 | union xfs_btree_key *kp; | ||
3006 | union xfs_btree_ptr *pp; | ||
3007 | |||
3008 | kp = xfs_btree_key_addr(cur, ptr, block); | ||
3009 | pp = xfs_btree_ptr_addr(cur, ptr, block); | ||
3010 | |||
3011 | #ifdef DEBUG | ||
3012 | for (i = numrecs - ptr; i >= 0; i--) { | ||
3013 | error = xfs_btree_check_ptr(cur, pp, i, level); | ||
3014 | if (error) | ||
3015 | return error; | ||
3016 | } | ||
3017 | #endif | ||
3018 | |||
3019 | xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1); | ||
3020 | xfs_btree_shift_ptrs(cur, pp, 1, numrecs - ptr + 1); | ||
3021 | |||
3022 | #ifdef DEBUG | ||
3023 | error = xfs_btree_check_ptr(cur, ptrp, 0, level); | ||
3024 | if (error) | ||
3025 | goto error0; | ||
3026 | #endif | ||
3027 | |||
3028 | /* Now put the new data in, bump numrecs and log it. */ | ||
3029 | xfs_btree_copy_keys(cur, kp, &key, 1); | ||
3030 | xfs_btree_copy_ptrs(cur, pp, ptrp, 1); | ||
3031 | numrecs++; | ||
3032 | xfs_btree_set_numrecs(block, numrecs); | ||
3033 | xfs_btree_log_ptrs(cur, bp, ptr, numrecs); | ||
3034 | xfs_btree_log_keys(cur, bp, ptr, numrecs); | ||
3035 | #ifdef DEBUG | ||
3036 | if (ptr < numrecs) { | ||
3037 | ASSERT(cur->bc_ops->keys_inorder(cur, kp, | ||
3038 | xfs_btree_key_addr(cur, ptr + 1, block))); | ||
3039 | } | ||
3040 | #endif | ||
3041 | } else { | ||
3042 | /* It's a leaf. make a hole in the records */ | ||
3043 | union xfs_btree_rec *rp; | ||
3044 | |||
3045 | rp = xfs_btree_rec_addr(cur, ptr, block); | ||
3046 | |||
3047 | xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1); | ||
3048 | |||
3049 | /* Now put the new data in, bump numrecs and log it. */ | ||
3050 | xfs_btree_copy_recs(cur, rp, recp, 1); | ||
3051 | xfs_btree_set_numrecs(block, ++numrecs); | ||
3052 | xfs_btree_log_recs(cur, bp, ptr, numrecs); | ||
3053 | #ifdef DEBUG | ||
3054 | if (ptr < numrecs) { | ||
3055 | ASSERT(cur->bc_ops->recs_inorder(cur, rp, | ||
3056 | xfs_btree_rec_addr(cur, ptr + 1, block))); | ||
3057 | } | ||
3058 | #endif | ||
3059 | } | ||
3060 | |||
3061 | /* Log the new number of records in the btree header. */ | ||
3062 | xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS); | ||
3063 | |||
3064 | /* If we inserted at the start of a block, update the parents' keys. */ | ||
3065 | if (optr == 1) { | ||
3066 | error = xfs_btree_updkey(cur, &key, level + 1); | ||
3067 | if (error) | ||
3068 | goto error0; | ||
3069 | } | ||
3070 | |||
3071 | /* | ||
3072 | * If we are tracking the last record in the tree and | ||
3073 | * we are at the far right edge of the tree, update it. | ||
3074 | */ | ||
3075 | if (xfs_btree_is_lastrec(cur, block, level)) { | ||
3076 | cur->bc_ops->update_lastrec(cur, block, recp, | ||
3077 | ptr, LASTREC_INSREC); | ||
3078 | } | ||
3079 | |||
3080 | /* | ||
3081 | * Return the new block number, if any. | ||
3082 | * If there is one, give back a record value and a cursor too. | ||
3083 | */ | ||
3084 | *ptrp = nptr; | ||
3085 | if (!xfs_btree_ptr_is_null(cur, &nptr)) { | ||
3086 | *recp = nrec; | ||
3087 | *curp = ncur; | ||
3088 | } | ||
3089 | |||
3090 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | ||
3091 | *stat = 1; | ||
3092 | return 0; | ||
3093 | |||
3094 | error0: | ||
3095 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); | ||
3096 | return error; | ||
3097 | } | ||
3098 | |||
3099 | /* | ||
3100 | * Insert the record at the point referenced by cur. | ||
3101 | * | ||
3102 | * A multi-level split of the tree on insert will invalidate the original | ||
3103 | * cursor. All callers of this function should assume that the cursor is | ||
3104 | * no longer valid and revalidate it. | ||
3105 | */ | ||
3106 | int | ||
3107 | xfs_btree_insert( | ||
3108 | struct xfs_btree_cur *cur, | ||
3109 | int *stat) | ||
3110 | { | ||
3111 | int error; /* error return value */ | ||
3112 | int i; /* result value, 0 for failure */ | ||
3113 | int level; /* current level number in btree */ | ||
3114 | union xfs_btree_ptr nptr; /* new block number (split result) */ | ||
3115 | struct xfs_btree_cur *ncur; /* new cursor (split result) */ | ||
3116 | struct xfs_btree_cur *pcur; /* previous level's cursor */ | ||
3117 | union xfs_btree_rec rec; /* record to insert */ | ||
3118 | |||
3119 | level = 0; | ||
3120 | ncur = NULL; | ||
3121 | pcur = cur; | ||
3122 | |||
3123 | xfs_btree_set_ptr_null(cur, &nptr); | ||
3124 | cur->bc_ops->init_rec_from_cur(cur, &rec); | ||
3125 | |||
3126 | /* | ||
3127 | * Loop going up the tree, starting at the leaf level. | ||
3128 | * Stop when we don't get a split block, that must mean that | ||
3129 | * the insert is finished with this level. | ||
3130 | */ | ||
3131 | do { | ||
3132 | /* | ||
3133 | * Insert nrec/nptr into this level of the tree. | ||
3134 | * Note if we fail, nptr will be null. | ||
3135 | */ | ||
3136 | error = xfs_btree_insrec(pcur, level, &nptr, &rec, &ncur, &i); | ||
3137 | if (error) { | ||
3138 | if (pcur != cur) | ||
3139 | xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR); | ||
3140 | goto error0; | ||
3141 | } | ||
3142 | |||
3143 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
3144 | level++; | ||
3145 | |||
3146 | /* | ||
3147 | * See if the cursor we just used is trash. | ||
3148 | * Can't trash the caller's cursor, but otherwise we should | ||
3149 | * if ncur is a new cursor or we're about to be done. | ||
3150 | */ | ||
3151 | if (pcur != cur && | ||
3152 | (ncur || xfs_btree_ptr_is_null(cur, &nptr))) { | ||
3153 | /* Save the state from the cursor before we trash it */ | ||
3154 | if (cur->bc_ops->update_cursor) | ||
3155 | cur->bc_ops->update_cursor(pcur, cur); | ||
3156 | cur->bc_nlevels = pcur->bc_nlevels; | ||
3157 | xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR); | ||
3158 | } | ||
3159 | /* If we got a new cursor, switch to it. */ | ||
3160 | if (ncur) { | ||
3161 | pcur = ncur; | ||
3162 | ncur = NULL; | ||
3163 | } | ||
3164 | } while (!xfs_btree_ptr_is_null(cur, &nptr)); | ||
3165 | |||
3166 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | ||
3167 | *stat = i; | ||
3168 | return 0; | ||
3169 | error0: | ||
3170 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); | ||
3171 | return error; | ||
3172 | } | ||
3173 | |||
3174 | /* | ||
3175 | * Try to merge a non-leaf block back into the inode root. | ||
3176 | * | ||
3177 | * Note: the killroot names comes from the fact that we're effectively | ||
3178 | * killing the old root block. But because we can't just delete the | ||
3179 | * inode we have to copy the single block it was pointing to into the | ||
3180 | * inode. | ||
3181 | */ | ||
3182 | STATIC int | ||
3183 | xfs_btree_kill_iroot( | ||
3184 | struct xfs_btree_cur *cur) | ||
3185 | { | ||
3186 | int whichfork = cur->bc_private.b.whichfork; | ||
3187 | struct xfs_inode *ip = cur->bc_private.b.ip; | ||
3188 | struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); | ||
3189 | struct xfs_btree_block *block; | ||
3190 | struct xfs_btree_block *cblock; | ||
3191 | union xfs_btree_key *kp; | ||
3192 | union xfs_btree_key *ckp; | ||
3193 | union xfs_btree_ptr *pp; | ||
3194 | union xfs_btree_ptr *cpp; | ||
3195 | struct xfs_buf *cbp; | ||
3196 | int level; | ||
3197 | int index; | ||
3198 | int numrecs; | ||
3199 | #ifdef DEBUG | ||
3200 | union xfs_btree_ptr ptr; | ||
3201 | int i; | ||
3202 | #endif | ||
3203 | |||
3204 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); | ||
3205 | |||
3206 | ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); | ||
3207 | ASSERT(cur->bc_nlevels > 1); | ||
3208 | |||
3209 | /* | ||
3210 | * Don't deal with the root block needs to be a leaf case. | ||
3211 | * We're just going to turn the thing back into extents anyway. | ||
3212 | */ | ||
3213 | level = cur->bc_nlevels - 1; | ||
3214 | if (level == 1) | ||
3215 | goto out0; | ||
3216 | |||
3217 | /* | ||
3218 | * Give up if the root has multiple children. | ||
3219 | */ | ||
3220 | block = xfs_btree_get_iroot(cur); | ||
3221 | if (xfs_btree_get_numrecs(block) != 1) | ||
3222 | goto out0; | ||
3223 | |||
3224 | cblock = xfs_btree_get_block(cur, level - 1, &cbp); | ||
3225 | numrecs = xfs_btree_get_numrecs(cblock); | ||
3226 | |||
3227 | /* | ||
3228 | * Only do this if the next level will fit. | ||
3229 | * Then the data must be copied up to the inode, | ||
3230 | * instead of freeing the root you free the next level. | ||
3231 | */ | ||
3232 | if (numrecs > cur->bc_ops->get_dmaxrecs(cur, level)) | ||
3233 | goto out0; | ||
3234 | |||
3235 | XFS_BTREE_STATS_INC(cur, killroot); | ||
3236 | |||
3237 | #ifdef DEBUG | ||
3238 | xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB); | ||
3239 | ASSERT(xfs_btree_ptr_is_null(cur, &ptr)); | ||
3240 | xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); | ||
3241 | ASSERT(xfs_btree_ptr_is_null(cur, &ptr)); | ||
3242 | #endif | ||
3243 | |||
3244 | index = numrecs - cur->bc_ops->get_maxrecs(cur, level); | ||
3245 | if (index) { | ||
3246 | xfs_iroot_realloc(cur->bc_private.b.ip, index, | ||
3247 | cur->bc_private.b.whichfork); | ||
3248 | block = ifp->if_broot; | ||
3249 | } | ||
3250 | |||
3251 | be16_add_cpu(&block->bb_numrecs, index); | ||
3252 | ASSERT(block->bb_numrecs == cblock->bb_numrecs); | ||
3253 | |||
3254 | kp = xfs_btree_key_addr(cur, 1, block); | ||
3255 | ckp = xfs_btree_key_addr(cur, 1, cblock); | ||
3256 | xfs_btree_copy_keys(cur, kp, ckp, numrecs); | ||
3257 | |||
3258 | pp = xfs_btree_ptr_addr(cur, 1, block); | ||
3259 | cpp = xfs_btree_ptr_addr(cur, 1, cblock); | ||
3260 | #ifdef DEBUG | ||
3261 | for (i = 0; i < numrecs; i++) { | ||
3262 | int error; | ||
3263 | |||
3264 | error = xfs_btree_check_ptr(cur, cpp, i, level - 1); | ||
3265 | if (error) { | ||
3266 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); | ||
3267 | return error; | ||
3268 | } | ||
3269 | } | ||
3270 | #endif | ||
3271 | xfs_btree_copy_ptrs(cur, pp, cpp, numrecs); | ||
3272 | |||
3273 | cur->bc_ops->free_block(cur, cbp); | ||
3274 | XFS_BTREE_STATS_INC(cur, free); | ||
3275 | |||
3276 | cur->bc_bufs[level - 1] = NULL; | ||
3277 | be16_add_cpu(&block->bb_level, -1); | ||
3278 | xfs_trans_log_inode(cur->bc_tp, ip, | ||
3279 | XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork)); | ||
3280 | cur->bc_nlevels--; | ||
3281 | out0: | ||
3282 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | ||
3283 | return 0; | ||
3284 | } | ||
3285 | |||
3286 | /* | ||
3287 | * Kill the current root node, and replace it with it's only child node. | ||
3288 | */ | ||
3289 | STATIC int | ||
3290 | xfs_btree_kill_root( | ||
3291 | struct xfs_btree_cur *cur, | ||
3292 | struct xfs_buf *bp, | ||
3293 | int level, | ||
3294 | union xfs_btree_ptr *newroot) | ||
3295 | { | ||
3296 | int error; | ||
3297 | |||
3298 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); | ||
3299 | XFS_BTREE_STATS_INC(cur, killroot); | ||
3300 | |||
3301 | /* | ||
3302 | * Update the root pointer, decreasing the level by 1 and then | ||
3303 | * free the old root. | ||
3304 | */ | ||
3305 | cur->bc_ops->set_root(cur, newroot, -1); | ||
3306 | |||
3307 | error = cur->bc_ops->free_block(cur, bp); | ||
3308 | if (error) { | ||
3309 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); | ||
3310 | return error; | ||
3311 | } | ||
3312 | |||
3313 | XFS_BTREE_STATS_INC(cur, free); | ||
3314 | |||
3315 | cur->bc_bufs[level] = NULL; | ||
3316 | cur->bc_ra[level] = 0; | ||
3317 | cur->bc_nlevels--; | ||
3318 | |||
3319 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | ||
3320 | return 0; | ||
3321 | } | ||
3322 | |||
3323 | STATIC int | ||
3324 | xfs_btree_dec_cursor( | ||
3325 | struct xfs_btree_cur *cur, | ||
3326 | int level, | ||
3327 | int *stat) | ||
3328 | { | ||
3329 | int error; | ||
3330 | int i; | ||
3331 | |||
3332 | if (level > 0) { | ||
3333 | error = xfs_btree_decrement(cur, level, &i); | ||
3334 | if (error) | ||
3335 | return error; | ||
3336 | } | ||
3337 | |||
3338 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | ||
3339 | *stat = 1; | ||
3340 | return 0; | ||
3341 | } | ||
3342 | |||
3343 | /* | ||
3344 | * Single level of the btree record deletion routine. | ||
3345 | * Delete record pointed to by cur/level. | ||
3346 | * Remove the record from its block then rebalance the tree. | ||
3347 | * Return 0 for error, 1 for done, 2 to go on to the next level. | ||
3348 | */ | ||
3349 | STATIC int /* error */ | ||
3350 | xfs_btree_delrec( | ||
3351 | struct xfs_btree_cur *cur, /* btree cursor */ | ||
3352 | int level, /* level removing record from */ | ||
3353 | int *stat) /* fail/done/go-on */ | ||
3354 | { | ||
3355 | struct xfs_btree_block *block; /* btree block */ | ||
3356 | union xfs_btree_ptr cptr; /* current block ptr */ | ||
3357 | struct xfs_buf *bp; /* buffer for block */ | ||
3358 | int error; /* error return value */ | ||
3359 | int i; /* loop counter */ | ||
3360 | union xfs_btree_key key; /* storage for keyp */ | ||
3361 | union xfs_btree_key *keyp = &key; /* passed to the next level */ | ||
3362 | union xfs_btree_ptr lptr; /* left sibling block ptr */ | ||
3363 | struct xfs_buf *lbp; /* left buffer pointer */ | ||
3364 | struct xfs_btree_block *left; /* left btree block */ | ||
3365 | int lrecs = 0; /* left record count */ | ||
3366 | int ptr; /* key/record index */ | ||
3367 | union xfs_btree_ptr rptr; /* right sibling block ptr */ | ||
3368 | struct xfs_buf *rbp; /* right buffer pointer */ | ||
3369 | struct xfs_btree_block *right; /* right btree block */ | ||
3370 | struct xfs_btree_block *rrblock; /* right-right btree block */ | ||
3371 | struct xfs_buf *rrbp; /* right-right buffer pointer */ | ||
3372 | int rrecs = 0; /* right record count */ | ||
3373 | struct xfs_btree_cur *tcur; /* temporary btree cursor */ | ||
3374 | int numrecs; /* temporary numrec count */ | ||
3375 | |||
3376 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); | ||
3377 | XFS_BTREE_TRACE_ARGI(cur, level); | ||
3378 | |||
3379 | tcur = NULL; | ||
3380 | |||
3381 | /* Get the index of the entry being deleted, check for nothing there. */ | ||
3382 | ptr = cur->bc_ptrs[level]; | ||
3383 | if (ptr == 0) { | ||
3384 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | ||
3385 | *stat = 0; | ||
3386 | return 0; | ||
3387 | } | ||
3388 | |||
3389 | /* Get the buffer & block containing the record or key/ptr. */ | ||
3390 | block = xfs_btree_get_block(cur, level, &bp); | ||
3391 | numrecs = xfs_btree_get_numrecs(block); | ||
3392 | |||
3393 | #ifdef DEBUG | ||
3394 | error = xfs_btree_check_block(cur, block, level, bp); | ||
3395 | if (error) | ||
3396 | goto error0; | ||
3397 | #endif | ||
3398 | |||
3399 | /* Fail if we're off the end of the block. */ | ||
3400 | if (ptr > numrecs) { | ||
3401 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | ||
3402 | *stat = 0; | ||
3403 | return 0; | ||
3404 | } | ||
3405 | |||
3406 | XFS_BTREE_STATS_INC(cur, delrec); | ||
3407 | XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr); | ||
3408 | |||
3409 | /* Excise the entries being deleted. */ | ||
3410 | if (level > 0) { | ||
3411 | /* It's a nonleaf. operate on keys and ptrs */ | ||
3412 | union xfs_btree_key *lkp; | ||
3413 | union xfs_btree_ptr *lpp; | ||
3414 | |||
3415 | lkp = xfs_btree_key_addr(cur, ptr + 1, block); | ||
3416 | lpp = xfs_btree_ptr_addr(cur, ptr + 1, block); | ||
3417 | |||
3418 | #ifdef DEBUG | ||
3419 | for (i = 0; i < numrecs - ptr; i++) { | ||
3420 | error = xfs_btree_check_ptr(cur, lpp, i, level); | ||
3421 | if (error) | ||
3422 | goto error0; | ||
3423 | } | ||
3424 | #endif | ||
3425 | |||
3426 | if (ptr < numrecs) { | ||
3427 | xfs_btree_shift_keys(cur, lkp, -1, numrecs - ptr); | ||
3428 | xfs_btree_shift_ptrs(cur, lpp, -1, numrecs - ptr); | ||
3429 | xfs_btree_log_keys(cur, bp, ptr, numrecs - 1); | ||
3430 | xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1); | ||
3431 | } | ||
3432 | |||
3433 | /* | ||
3434 | * If it's the first record in the block, we'll need to pass a | ||
3435 | * key up to the next level (updkey). | ||
3436 | */ | ||
3437 | if (ptr == 1) | ||
3438 | keyp = xfs_btree_key_addr(cur, 1, block); | ||
3439 | } else { | ||
3440 | /* It's a leaf. operate on records */ | ||
3441 | if (ptr < numrecs) { | ||
3442 | xfs_btree_shift_recs(cur, | ||
3443 | xfs_btree_rec_addr(cur, ptr + 1, block), | ||
3444 | -1, numrecs - ptr); | ||
3445 | xfs_btree_log_recs(cur, bp, ptr, numrecs - 1); | ||
3446 | } | ||
3447 | |||
3448 | /* | ||
3449 | * If it's the first record in the block, we'll need a key | ||
3450 | * structure to pass up to the next level (updkey). | ||
3451 | */ | ||
3452 | if (ptr == 1) { | ||
3453 | cur->bc_ops->init_key_from_rec(&key, | ||
3454 | xfs_btree_rec_addr(cur, 1, block)); | ||
3455 | keyp = &key; | ||
3456 | } | ||
3457 | } | ||
3458 | |||
3459 | /* | ||
3460 | * Decrement and log the number of entries in the block. | ||
3461 | */ | ||
3462 | xfs_btree_set_numrecs(block, --numrecs); | ||
3463 | xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS); | ||
3464 | |||
3465 | /* | ||
3466 | * If we are tracking the last record in the tree and | ||
3467 | * we are at the far right edge of the tree, update it. | ||
3468 | */ | ||
3469 | if (xfs_btree_is_lastrec(cur, block, level)) { | ||
3470 | cur->bc_ops->update_lastrec(cur, block, NULL, | ||
3471 | ptr, LASTREC_DELREC); | ||
3472 | } | ||
3473 | |||
3474 | /* | ||
3475 | * We're at the root level. First, shrink the root block in-memory. | ||
3476 | * Try to get rid of the next level down. If we can't then there's | ||
3477 | * nothing left to do. | ||
3478 | */ | ||
3479 | if (level == cur->bc_nlevels - 1) { | ||
3480 | if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) { | ||
3481 | xfs_iroot_realloc(cur->bc_private.b.ip, -1, | ||
3482 | cur->bc_private.b.whichfork); | ||
3483 | |||
3484 | error = xfs_btree_kill_iroot(cur); | ||
3485 | if (error) | ||
3486 | goto error0; | ||
3487 | |||
3488 | error = xfs_btree_dec_cursor(cur, level, stat); | ||
3489 | if (error) | ||
3490 | goto error0; | ||
3491 | *stat = 1; | ||
3492 | return 0; | ||
3493 | } | ||
3494 | |||
3495 | /* | ||
3496 | * If this is the root level, and there's only one entry left, | ||
3497 | * and it's NOT the leaf level, then we can get rid of this | ||
3498 | * level. | ||
3499 | */ | ||
3500 | if (numrecs == 1 && level > 0) { | ||
3501 | union xfs_btree_ptr *pp; | ||
3502 | /* | ||
3503 | * pp is still set to the first pointer in the block. | ||
3504 | * Make it the new root of the btree. | ||
3505 | */ | ||
3506 | pp = xfs_btree_ptr_addr(cur, 1, block); | ||
3507 | error = xfs_btree_kill_root(cur, bp, level, pp); | ||
3508 | if (error) | ||
3509 | goto error0; | ||
3510 | } else if (level > 0) { | ||
3511 | error = xfs_btree_dec_cursor(cur, level, stat); | ||
3512 | if (error) | ||
3513 | goto error0; | ||
3514 | } | ||
3515 | *stat = 1; | ||
3516 | return 0; | ||
3517 | } | ||
3518 | |||
3519 | /* | ||
3520 | * If we deleted the leftmost entry in the block, update the | ||
3521 | * key values above us in the tree. | ||
3522 | */ | ||
3523 | if (ptr == 1) { | ||
3524 | error = xfs_btree_updkey(cur, keyp, level + 1); | ||
3525 | if (error) | ||
3526 | goto error0; | ||
3527 | } | ||
3528 | |||
3529 | /* | ||
3530 | * If the number of records remaining in the block is at least | ||
3531 | * the minimum, we're done. | ||
3532 | */ | ||
3533 | if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) { | ||
3534 | error = xfs_btree_dec_cursor(cur, level, stat); | ||
3535 | if (error) | ||
3536 | goto error0; | ||
3537 | return 0; | ||
3538 | } | ||
3539 | |||
3540 | /* | ||
3541 | * Otherwise, we have to move some records around to keep the | ||
3542 | * tree balanced. Look at the left and right sibling blocks to | ||
3543 | * see if we can re-balance by moving only one record. | ||
3544 | */ | ||
3545 | xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB); | ||
3546 | xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB); | ||
3547 | |||
3548 | if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) { | ||
3549 | /* | ||
3550 | * One child of root, need to get a chance to copy its contents | ||
3551 | * into the root and delete it. Can't go up to next level, | ||
3552 | * there's nothing to delete there. | ||
3553 | */ | ||
3554 | if (xfs_btree_ptr_is_null(cur, &rptr) && | ||
3555 | xfs_btree_ptr_is_null(cur, &lptr) && | ||
3556 | level == cur->bc_nlevels - 2) { | ||
3557 | error = xfs_btree_kill_iroot(cur); | ||
3558 | if (!error) | ||
3559 | error = xfs_btree_dec_cursor(cur, level, stat); | ||
3560 | if (error) | ||
3561 | goto error0; | ||
3562 | return 0; | ||
3563 | } | ||
3564 | } | ||
3565 | |||
3566 | ASSERT(!xfs_btree_ptr_is_null(cur, &rptr) || | ||
3567 | !xfs_btree_ptr_is_null(cur, &lptr)); | ||
3568 | |||
3569 | /* | ||
3570 | * Duplicate the cursor so our btree manipulations here won't | ||
3571 | * disrupt the next level up. | ||
3572 | */ | ||
3573 | error = xfs_btree_dup_cursor(cur, &tcur); | ||
3574 | if (error) | ||
3575 | goto error0; | ||
3576 | |||
3577 | /* | ||
3578 | * If there's a right sibling, see if it's ok to shift an entry | ||
3579 | * out of it. | ||
3580 | */ | ||
3581 | if (!xfs_btree_ptr_is_null(cur, &rptr)) { | ||
3582 | /* | ||
3583 | * Move the temp cursor to the last entry in the next block. | ||
3584 | * Actually any entry but the first would suffice. | ||
3585 | */ | ||
3586 | i = xfs_btree_lastrec(tcur, level); | ||
3587 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
3588 | |||
3589 | error = xfs_btree_increment(tcur, level, &i); | ||
3590 | if (error) | ||
3591 | goto error0; | ||
3592 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
3593 | |||
3594 | i = xfs_btree_lastrec(tcur, level); | ||
3595 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
3596 | |||
3597 | /* Grab a pointer to the block. */ | ||
3598 | right = xfs_btree_get_block(tcur, level, &rbp); | ||
3599 | #ifdef DEBUG | ||
3600 | error = xfs_btree_check_block(tcur, right, level, rbp); | ||
3601 | if (error) | ||
3602 | goto error0; | ||
3603 | #endif | ||
3604 | /* Grab the current block number, for future use. */ | ||
3605 | xfs_btree_get_sibling(tcur, right, &cptr, XFS_BB_LEFTSIB); | ||
3606 | |||
3607 | /* | ||
3608 | * If right block is full enough so that removing one entry | ||
3609 | * won't make it too empty, and left-shifting an entry out | ||
3610 | * of right to us works, we're done. | ||
3611 | */ | ||
3612 | if (xfs_btree_get_numrecs(right) - 1 >= | ||
3613 | cur->bc_ops->get_minrecs(tcur, level)) { | ||
3614 | error = xfs_btree_lshift(tcur, level, &i); | ||
3615 | if (error) | ||
3616 | goto error0; | ||
3617 | if (i) { | ||
3618 | ASSERT(xfs_btree_get_numrecs(block) >= | ||
3619 | cur->bc_ops->get_minrecs(tcur, level)); | ||
3620 | |||
3621 | xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); | ||
3622 | tcur = NULL; | ||
3623 | |||
3624 | error = xfs_btree_dec_cursor(cur, level, stat); | ||
3625 | if (error) | ||
3626 | goto error0; | ||
3627 | return 0; | ||
3628 | } | ||
3629 | } | ||
3630 | |||
3631 | /* | ||
3632 | * Otherwise, grab the number of records in right for | ||
3633 | * future reference, and fix up the temp cursor to point | ||
3634 | * to our block again (last record). | ||
3635 | */ | ||
3636 | rrecs = xfs_btree_get_numrecs(right); | ||
3637 | if (!xfs_btree_ptr_is_null(cur, &lptr)) { | ||
3638 | i = xfs_btree_firstrec(tcur, level); | ||
3639 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
3640 | |||
3641 | error = xfs_btree_decrement(tcur, level, &i); | ||
3642 | if (error) | ||
3643 | goto error0; | ||
3644 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
3645 | } | ||
3646 | } | ||
3647 | |||
3648 | /* | ||
3649 | * If there's a left sibling, see if it's ok to shift an entry | ||
3650 | * out of it. | ||
3651 | */ | ||
3652 | if (!xfs_btree_ptr_is_null(cur, &lptr)) { | ||
3653 | /* | ||
3654 | * Move the temp cursor to the first entry in the | ||
3655 | * previous block. | ||
3656 | */ | ||
3657 | i = xfs_btree_firstrec(tcur, level); | ||
3658 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
3659 | |||
3660 | error = xfs_btree_decrement(tcur, level, &i); | ||
3661 | if (error) | ||
3662 | goto error0; | ||
3663 | i = xfs_btree_firstrec(tcur, level); | ||
3664 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
3665 | |||
3666 | /* Grab a pointer to the block. */ | ||
3667 | left = xfs_btree_get_block(tcur, level, &lbp); | ||
3668 | #ifdef DEBUG | ||
3669 | error = xfs_btree_check_block(cur, left, level, lbp); | ||
3670 | if (error) | ||
3671 | goto error0; | ||
3672 | #endif | ||
3673 | /* Grab the current block number, for future use. */ | ||
3674 | xfs_btree_get_sibling(tcur, left, &cptr, XFS_BB_RIGHTSIB); | ||
3675 | |||
3676 | /* | ||
3677 | * If left block is full enough so that removing one entry | ||
3678 | * won't make it too empty, and right-shifting an entry out | ||
3679 | * of left to us works, we're done. | ||
3680 | */ | ||
3681 | if (xfs_btree_get_numrecs(left) - 1 >= | ||
3682 | cur->bc_ops->get_minrecs(tcur, level)) { | ||
3683 | error = xfs_btree_rshift(tcur, level, &i); | ||
3684 | if (error) | ||
3685 | goto error0; | ||
3686 | if (i) { | ||
3687 | ASSERT(xfs_btree_get_numrecs(block) >= | ||
3688 | cur->bc_ops->get_minrecs(tcur, level)); | ||
3689 | xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); | ||
3690 | tcur = NULL; | ||
3691 | if (level == 0) | ||
3692 | cur->bc_ptrs[0]++; | ||
3693 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | ||
3694 | *stat = 1; | ||
3695 | return 0; | ||
3696 | } | ||
3697 | } | ||
3698 | |||
3699 | /* | ||
3700 | * Otherwise, grab the number of records in right for | ||
3701 | * future reference. | ||
3702 | */ | ||
3703 | lrecs = xfs_btree_get_numrecs(left); | ||
3704 | } | ||
3705 | |||
3706 | /* Delete the temp cursor, we're done with it. */ | ||
3707 | xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); | ||
3708 | tcur = NULL; | ||
3709 | |||
3710 | /* If here, we need to do a join to keep the tree balanced. */ | ||
3711 | ASSERT(!xfs_btree_ptr_is_null(cur, &cptr)); | ||
3712 | |||
3713 | if (!xfs_btree_ptr_is_null(cur, &lptr) && | ||
3714 | lrecs + xfs_btree_get_numrecs(block) <= | ||
3715 | cur->bc_ops->get_maxrecs(cur, level)) { | ||
3716 | /* | ||
3717 | * Set "right" to be the starting block, | ||
3718 | * "left" to be the left neighbor. | ||
3719 | */ | ||
3720 | rptr = cptr; | ||
3721 | right = block; | ||
3722 | rbp = bp; | ||
3723 | error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp); | ||
3724 | if (error) | ||
3725 | goto error0; | ||
3726 | |||
3727 | /* | ||
3728 | * If that won't work, see if we can join with the right neighbor block. | ||
3729 | */ | ||
3730 | } else if (!xfs_btree_ptr_is_null(cur, &rptr) && | ||
3731 | rrecs + xfs_btree_get_numrecs(block) <= | ||
3732 | cur->bc_ops->get_maxrecs(cur, level)) { | ||
3733 | /* | ||
3734 | * Set "left" to be the starting block, | ||
3735 | * "right" to be the right neighbor. | ||
3736 | */ | ||
3737 | lptr = cptr; | ||
3738 | left = block; | ||
3739 | lbp = bp; | ||
3740 | error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp); | ||
3741 | if (error) | ||
3742 | goto error0; | ||
3743 | |||
3744 | /* | ||
3745 | * Otherwise, we can't fix the imbalance. | ||
3746 | * Just return. This is probably a logic error, but it's not fatal. | ||
3747 | */ | ||
3748 | } else { | ||
3749 | error = xfs_btree_dec_cursor(cur, level, stat); | ||
3750 | if (error) | ||
3751 | goto error0; | ||
3752 | return 0; | ||
3753 | } | ||
3754 | |||
3755 | rrecs = xfs_btree_get_numrecs(right); | ||
3756 | lrecs = xfs_btree_get_numrecs(left); | ||
3757 | |||
3758 | /* | ||
3759 | * We're now going to join "left" and "right" by moving all the stuff | ||
3760 | * in "right" to "left" and deleting "right". | ||
3761 | */ | ||
3762 | XFS_BTREE_STATS_ADD(cur, moves, rrecs); | ||
3763 | if (level > 0) { | ||
3764 | /* It's a non-leaf. Move keys and pointers. */ | ||
3765 | union xfs_btree_key *lkp; /* left btree key */ | ||
3766 | union xfs_btree_ptr *lpp; /* left address pointer */ | ||
3767 | union xfs_btree_key *rkp; /* right btree key */ | ||
3768 | union xfs_btree_ptr *rpp; /* right address pointer */ | ||
3769 | |||
3770 | lkp = xfs_btree_key_addr(cur, lrecs + 1, left); | ||
3771 | lpp = xfs_btree_ptr_addr(cur, lrecs + 1, left); | ||
3772 | rkp = xfs_btree_key_addr(cur, 1, right); | ||
3773 | rpp = xfs_btree_ptr_addr(cur, 1, right); | ||
3774 | #ifdef DEBUG | ||
3775 | for (i = 1; i < rrecs; i++) { | ||
3776 | error = xfs_btree_check_ptr(cur, rpp, i, level); | ||
3777 | if (error) | ||
3778 | goto error0; | ||
3779 | } | ||
3780 | #endif | ||
3781 | xfs_btree_copy_keys(cur, lkp, rkp, rrecs); | ||
3782 | xfs_btree_copy_ptrs(cur, lpp, rpp, rrecs); | ||
3783 | |||
3784 | xfs_btree_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs); | ||
3785 | xfs_btree_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs); | ||
3786 | } else { | ||
3787 | /* It's a leaf. Move records. */ | ||
3788 | union xfs_btree_rec *lrp; /* left record pointer */ | ||
3789 | union xfs_btree_rec *rrp; /* right record pointer */ | ||
3790 | |||
3791 | lrp = xfs_btree_rec_addr(cur, lrecs + 1, left); | ||
3792 | rrp = xfs_btree_rec_addr(cur, 1, right); | ||
3793 | |||
3794 | xfs_btree_copy_recs(cur, lrp, rrp, rrecs); | ||
3795 | xfs_btree_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs); | ||
3796 | } | ||
3797 | |||
3798 | XFS_BTREE_STATS_INC(cur, join); | ||
3799 | |||
3800 | /* | ||
3801 | * Fix up the number of records and right block pointer in the | ||
3802 | * surviving block, and log it. | ||
3803 | */ | ||
3804 | xfs_btree_set_numrecs(left, lrecs + rrecs); | ||
3805 | xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB), | ||
3806 | xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB); | ||
3807 | xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); | ||
3808 | |||
3809 | /* If there is a right sibling, point it to the remaining block. */ | ||
3810 | xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB); | ||
3811 | if (!xfs_btree_ptr_is_null(cur, &cptr)) { | ||
3812 | error = xfs_btree_read_buf_block(cur, &cptr, 0, &rrblock, &rrbp); | ||
3813 | if (error) | ||
3814 | goto error0; | ||
3815 | xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB); | ||
3816 | xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB); | ||
3817 | } | ||
3818 | |||
3819 | /* Free the deleted block. */ | ||
3820 | error = cur->bc_ops->free_block(cur, rbp); | ||
3821 | if (error) | ||
3822 | goto error0; | ||
3823 | XFS_BTREE_STATS_INC(cur, free); | ||
3824 | |||
3825 | /* | ||
3826 | * If we joined with the left neighbor, set the buffer in the | ||
3827 | * cursor to the left block, and fix up the index. | ||
3828 | */ | ||
3829 | if (bp != lbp) { | ||
3830 | cur->bc_bufs[level] = lbp; | ||
3831 | cur->bc_ptrs[level] += lrecs; | ||
3832 | cur->bc_ra[level] = 0; | ||
3833 | } | ||
3834 | /* | ||
3835 | * If we joined with the right neighbor and there's a level above | ||
3836 | * us, increment the cursor at that level. | ||
3837 | */ | ||
3838 | else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) || | ||
3839 | (level + 1 < cur->bc_nlevels)) { | ||
3840 | error = xfs_btree_increment(cur, level + 1, &i); | ||
3841 | if (error) | ||
3842 | goto error0; | ||
3843 | } | ||
3844 | |||
3845 | /* | ||
3846 | * Readjust the ptr at this level if it's not a leaf, since it's | ||
3847 | * still pointing at the deletion point, which makes the cursor | ||
3848 | * inconsistent. If this makes the ptr 0, the caller fixes it up. | ||
3849 | * We can't use decrement because it would change the next level up. | ||
3850 | */ | ||
3851 | if (level > 0) | ||
3852 | cur->bc_ptrs[level]--; | ||
3853 | |||
3854 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | ||
3855 | /* Return value means the next level up has something to do. */ | ||
3856 | *stat = 2; | ||
3857 | return 0; | ||
3858 | |||
3859 | error0: | ||
3860 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); | ||
3861 | if (tcur) | ||
3862 | xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); | ||
3863 | return error; | ||
3864 | } | ||
3865 | |||
3866 | /* | ||
3867 | * Delete the record pointed to by cur. | ||
3868 | * The cursor refers to the place where the record was (could be inserted) | ||
3869 | * when the operation returns. | ||
3870 | */ | ||
3871 | int /* error */ | ||
3872 | xfs_btree_delete( | ||
3873 | struct xfs_btree_cur *cur, | ||
3874 | int *stat) /* success/failure */ | ||
3875 | { | ||
3876 | int error; /* error return value */ | ||
3877 | int level; | ||
3878 | int i; | ||
3879 | |||
3880 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); | ||
3881 | |||
3882 | /* | ||
3883 | * Go up the tree, starting at leaf level. | ||
3884 | * | ||
3885 | * If 2 is returned then a join was done; go to the next level. | ||
3886 | * Otherwise we are done. | ||
3887 | */ | ||
3888 | for (level = 0, i = 2; i == 2; level++) { | ||
3889 | error = xfs_btree_delrec(cur, level, &i); | ||
3890 | if (error) | ||
3891 | goto error0; | ||
3892 | } | ||
3893 | |||
3894 | if (i == 0) { | ||
3895 | for (level = 1; level < cur->bc_nlevels; level++) { | ||
3896 | if (cur->bc_ptrs[level] == 0) { | ||
3897 | error = xfs_btree_decrement(cur, level, &i); | ||
3898 | if (error) | ||
3899 | goto error0; | ||
3900 | break; | ||
3901 | } | ||
3902 | } | ||
3903 | } | ||
3904 | |||
3905 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | ||
3906 | *stat = i; | ||
3907 | return 0; | ||
3908 | error0: | ||
3909 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); | ||
3910 | return error; | ||
3911 | } | ||
3912 | |||
3913 | /* | ||
3914 | * Get the data from the pointed-to record. | ||
3915 | */ | ||
3916 | int /* error */ | ||
3917 | xfs_btree_get_rec( | ||
3918 | struct xfs_btree_cur *cur, /* btree cursor */ | ||
3919 | union xfs_btree_rec **recp, /* output: btree record */ | ||
3920 | int *stat) /* output: success/failure */ | ||
3921 | { | ||
3922 | struct xfs_btree_block *block; /* btree block */ | ||
3923 | struct xfs_buf *bp; /* buffer pointer */ | ||
3924 | int ptr; /* record number */ | ||
3925 | #ifdef DEBUG | ||
3926 | int error; /* error return value */ | ||
3927 | #endif | ||
3928 | |||
3929 | ptr = cur->bc_ptrs[0]; | ||
3930 | block = xfs_btree_get_block(cur, 0, &bp); | ||
3931 | |||
3932 | #ifdef DEBUG | ||
3933 | error = xfs_btree_check_block(cur, block, 0, bp); | ||
3934 | if (error) | ||
3935 | return error; | ||
3936 | #endif | ||
3937 | |||
3938 | /* | ||
3939 | * Off the right end or left end, return failure. | ||
3940 | */ | ||
3941 | if (ptr > xfs_btree_get_numrecs(block) || ptr <= 0) { | ||
3942 | *stat = 0; | ||
3943 | return 0; | ||
3944 | } | ||
3945 | |||
3946 | /* | ||
3947 | * Point to the record and extract its data. | ||
3948 | */ | ||
3949 | *recp = xfs_btree_rec_addr(cur, ptr, block); | ||
3950 | *stat = 1; | ||
3951 | return 0; | ||
3952 | } | ||
3953 | |||
3954 | /* | ||
3955 | * Change the owner of a btree. | ||
3956 | * | ||
3957 | * The mechanism we use here is ordered buffer logging. Because we don't know | ||
3958 | * how many buffers were are going to need to modify, we don't really want to | ||
3959 | * have to make transaction reservations for the worst case of every buffer in a | ||
3960 | * full size btree as that may be more space that we can fit in the log.... | ||
3961 | * | ||
3962 | * We do the btree walk in the most optimal manner possible - we have sibling | ||
3963 | * pointers so we can just walk all the blocks on each level from left to right | ||
3964 | * in a single pass, and then move to the next level and do the same. We can | ||
3965 | * also do readahead on the sibling pointers to get IO moving more quickly, | ||
3966 | * though for slow disks this is unlikely to make much difference to performance | ||
3967 | * as the amount of CPU work we have to do before moving to the next block is | ||
3968 | * relatively small. | ||
3969 | * | ||
3970 | * For each btree block that we load, modify the owner appropriately, set the | ||
3971 | * buffer as an ordered buffer and log it appropriately. We need to ensure that | ||
3972 | * we mark the region we change dirty so that if the buffer is relogged in | ||
3973 | * a subsequent transaction the changes we make here as an ordered buffer are | ||
3974 | * correctly relogged in that transaction. If we are in recovery context, then | ||
3975 | * just queue the modified buffer as delayed write buffer so the transaction | ||
3976 | * recovery completion writes the changes to disk. | ||
3977 | */ | ||
3978 | static int | ||
3979 | xfs_btree_block_change_owner( | ||
3980 | struct xfs_btree_cur *cur, | ||
3981 | int level, | ||
3982 | __uint64_t new_owner, | ||
3983 | struct list_head *buffer_list) | ||
3984 | { | ||
3985 | struct xfs_btree_block *block; | ||
3986 | struct xfs_buf *bp; | ||
3987 | union xfs_btree_ptr rptr; | ||
3988 | |||
3989 | /* do right sibling readahead */ | ||
3990 | xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA); | ||
3991 | |||
3992 | /* modify the owner */ | ||
3993 | block = xfs_btree_get_block(cur, level, &bp); | ||
3994 | if (cur->bc_flags & XFS_BTREE_LONG_PTRS) | ||
3995 | block->bb_u.l.bb_owner = cpu_to_be64(new_owner); | ||
3996 | else | ||
3997 | block->bb_u.s.bb_owner = cpu_to_be32(new_owner); | ||
3998 | |||
3999 | /* | ||
4000 | * If the block is a root block hosted in an inode, we might not have a | ||
4001 | * buffer pointer here and we shouldn't attempt to log the change as the | ||
4002 | * information is already held in the inode and discarded when the root | ||
4003 | * block is formatted into the on-disk inode fork. We still change it, | ||
4004 | * though, so everything is consistent in memory. | ||
4005 | */ | ||
4006 | if (bp) { | ||
4007 | if (cur->bc_tp) { | ||
4008 | xfs_trans_ordered_buf(cur->bc_tp, bp); | ||
4009 | xfs_btree_log_block(cur, bp, XFS_BB_OWNER); | ||
4010 | } else { | ||
4011 | xfs_buf_delwri_queue(bp, buffer_list); | ||
4012 | } | ||
4013 | } else { | ||
4014 | ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); | ||
4015 | ASSERT(level == cur->bc_nlevels - 1); | ||
4016 | } | ||
4017 | |||
4018 | /* now read rh sibling block for next iteration */ | ||
4019 | xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB); | ||
4020 | if (xfs_btree_ptr_is_null(cur, &rptr)) | ||
4021 | return -ENOENT; | ||
4022 | |||
4023 | return xfs_btree_lookup_get_block(cur, level, &rptr, &block); | ||
4024 | } | ||
4025 | |||
4026 | int | ||
4027 | xfs_btree_change_owner( | ||
4028 | struct xfs_btree_cur *cur, | ||
4029 | __uint64_t new_owner, | ||
4030 | struct list_head *buffer_list) | ||
4031 | { | ||
4032 | union xfs_btree_ptr lptr; | ||
4033 | int level; | ||
4034 | struct xfs_btree_block *block = NULL; | ||
4035 | int error = 0; | ||
4036 | |||
4037 | cur->bc_ops->init_ptr_from_cur(cur, &lptr); | ||
4038 | |||
4039 | /* for each level */ | ||
4040 | for (level = cur->bc_nlevels - 1; level >= 0; level--) { | ||
4041 | /* grab the left hand block */ | ||
4042 | error = xfs_btree_lookup_get_block(cur, level, &lptr, &block); | ||
4043 | if (error) | ||
4044 | return error; | ||
4045 | |||
4046 | /* readahead the left most block for the next level down */ | ||
4047 | if (level > 0) { | ||
4048 | union xfs_btree_ptr *ptr; | ||
4049 | |||
4050 | ptr = xfs_btree_ptr_addr(cur, 1, block); | ||
4051 | xfs_btree_readahead_ptr(cur, ptr, 1); | ||
4052 | |||
4053 | /* save for the next iteration of the loop */ | ||
4054 | lptr = *ptr; | ||
4055 | } | ||
4056 | |||
4057 | /* for each buffer in the level */ | ||
4058 | do { | ||
4059 | error = xfs_btree_block_change_owner(cur, level, | ||
4060 | new_owner, | ||
4061 | buffer_list); | ||
4062 | } while (!error); | ||
4063 | |||
4064 | if (error != -ENOENT) | ||
4065 | return error; | ||
4066 | } | ||
4067 | |||
4068 | return 0; | ||
4069 | } | ||
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h new file mode 100644 index 000000000000..a04b69422f67 --- /dev/null +++ b/fs/xfs/libxfs/xfs_btree.h | |||
@@ -0,0 +1,468 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_BTREE_H__ | ||
19 | #define __XFS_BTREE_H__ | ||
20 | |||
21 | struct xfs_buf; | ||
22 | struct xfs_bmap_free; | ||
23 | struct xfs_inode; | ||
24 | struct xfs_mount; | ||
25 | struct xfs_trans; | ||
26 | |||
27 | extern kmem_zone_t *xfs_btree_cur_zone; | ||
28 | |||
29 | /* | ||
30 | * Generic key, ptr and record wrapper structures. | ||
31 | * | ||
32 | * These are disk format structures, and are converted where necessary | ||
33 | * by the btree specific code that needs to interpret them. | ||
34 | */ | ||
35 | union xfs_btree_ptr { | ||
36 | __be32 s; /* short form ptr */ | ||
37 | __be64 l; /* long form ptr */ | ||
38 | }; | ||
39 | |||
40 | union xfs_btree_key { | ||
41 | xfs_bmbt_key_t bmbt; | ||
42 | xfs_bmdr_key_t bmbr; /* bmbt root block */ | ||
43 | xfs_alloc_key_t alloc; | ||
44 | xfs_inobt_key_t inobt; | ||
45 | }; | ||
46 | |||
47 | union xfs_btree_rec { | ||
48 | xfs_bmbt_rec_t bmbt; | ||
49 | xfs_bmdr_rec_t bmbr; /* bmbt root block */ | ||
50 | xfs_alloc_rec_t alloc; | ||
51 | xfs_inobt_rec_t inobt; | ||
52 | }; | ||
53 | |||
54 | /* | ||
55 | * This nonsense is to make -wlint happy. | ||
56 | */ | ||
57 | #define XFS_LOOKUP_EQ ((xfs_lookup_t)XFS_LOOKUP_EQi) | ||
58 | #define XFS_LOOKUP_LE ((xfs_lookup_t)XFS_LOOKUP_LEi) | ||
59 | #define XFS_LOOKUP_GE ((xfs_lookup_t)XFS_LOOKUP_GEi) | ||
60 | |||
61 | #define XFS_BTNUM_BNO ((xfs_btnum_t)XFS_BTNUM_BNOi) | ||
62 | #define XFS_BTNUM_CNT ((xfs_btnum_t)XFS_BTNUM_CNTi) | ||
63 | #define XFS_BTNUM_BMAP ((xfs_btnum_t)XFS_BTNUM_BMAPi) | ||
64 | #define XFS_BTNUM_INO ((xfs_btnum_t)XFS_BTNUM_INOi) | ||
65 | #define XFS_BTNUM_FINO ((xfs_btnum_t)XFS_BTNUM_FINOi) | ||
66 | |||
67 | /* | ||
68 | * For logging record fields. | ||
69 | */ | ||
70 | #define XFS_BB_MAGIC (1 << 0) | ||
71 | #define XFS_BB_LEVEL (1 << 1) | ||
72 | #define XFS_BB_NUMRECS (1 << 2) | ||
73 | #define XFS_BB_LEFTSIB (1 << 3) | ||
74 | #define XFS_BB_RIGHTSIB (1 << 4) | ||
75 | #define XFS_BB_BLKNO (1 << 5) | ||
76 | #define XFS_BB_LSN (1 << 6) | ||
77 | #define XFS_BB_UUID (1 << 7) | ||
78 | #define XFS_BB_OWNER (1 << 8) | ||
79 | #define XFS_BB_NUM_BITS 5 | ||
80 | #define XFS_BB_ALL_BITS ((1 << XFS_BB_NUM_BITS) - 1) | ||
81 | #define XFS_BB_NUM_BITS_CRC 9 | ||
82 | #define XFS_BB_ALL_BITS_CRC ((1 << XFS_BB_NUM_BITS_CRC) - 1) | ||
83 | |||
84 | /* | ||
85 | * Generic stats interface | ||
86 | */ | ||
87 | #define __XFS_BTREE_STATS_INC(type, stat) \ | ||
88 | XFS_STATS_INC(xs_ ## type ## _2_ ## stat) | ||
89 | #define XFS_BTREE_STATS_INC(cur, stat) \ | ||
90 | do { \ | ||
91 | switch (cur->bc_btnum) { \ | ||
92 | case XFS_BTNUM_BNO: __XFS_BTREE_STATS_INC(abtb, stat); break; \ | ||
93 | case XFS_BTNUM_CNT: __XFS_BTREE_STATS_INC(abtc, stat); break; \ | ||
94 | case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(bmbt, stat); break; \ | ||
95 | case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(ibt, stat); break; \ | ||
96 | case XFS_BTNUM_FINO: __XFS_BTREE_STATS_INC(fibt, stat); break; \ | ||
97 | case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ | ||
98 | } \ | ||
99 | } while (0) | ||
100 | |||
101 | #define __XFS_BTREE_STATS_ADD(type, stat, val) \ | ||
102 | XFS_STATS_ADD(xs_ ## type ## _2_ ## stat, val) | ||
103 | #define XFS_BTREE_STATS_ADD(cur, stat, val) \ | ||
104 | do { \ | ||
105 | switch (cur->bc_btnum) { \ | ||
106 | case XFS_BTNUM_BNO: __XFS_BTREE_STATS_ADD(abtb, stat, val); break; \ | ||
107 | case XFS_BTNUM_CNT: __XFS_BTREE_STATS_ADD(abtc, stat, val); break; \ | ||
108 | case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_ADD(bmbt, stat, val); break; \ | ||
109 | case XFS_BTNUM_INO: __XFS_BTREE_STATS_ADD(ibt, stat, val); break; \ | ||
110 | case XFS_BTNUM_FINO: __XFS_BTREE_STATS_ADD(fibt, stat, val); break; \ | ||
111 | case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ | ||
112 | } \ | ||
113 | } while (0) | ||
114 | |||
115 | #define XFS_BTREE_MAXLEVELS 8 /* max of all btrees */ | ||
116 | |||
117 | struct xfs_btree_ops { | ||
118 | /* size of the key and record structures */ | ||
119 | size_t key_len; | ||
120 | size_t rec_len; | ||
121 | |||
122 | /* cursor operations */ | ||
123 | struct xfs_btree_cur *(*dup_cursor)(struct xfs_btree_cur *); | ||
124 | void (*update_cursor)(struct xfs_btree_cur *src, | ||
125 | struct xfs_btree_cur *dst); | ||
126 | |||
127 | /* update btree root pointer */ | ||
128 | void (*set_root)(struct xfs_btree_cur *cur, | ||
129 | union xfs_btree_ptr *nptr, int level_change); | ||
130 | |||
131 | /* block allocation / freeing */ | ||
132 | int (*alloc_block)(struct xfs_btree_cur *cur, | ||
133 | union xfs_btree_ptr *start_bno, | ||
134 | union xfs_btree_ptr *new_bno, | ||
135 | int *stat); | ||
136 | int (*free_block)(struct xfs_btree_cur *cur, struct xfs_buf *bp); | ||
137 | |||
138 | /* update last record information */ | ||
139 | void (*update_lastrec)(struct xfs_btree_cur *cur, | ||
140 | struct xfs_btree_block *block, | ||
141 | union xfs_btree_rec *rec, | ||
142 | int ptr, int reason); | ||
143 | |||
144 | /* records in block/level */ | ||
145 | int (*get_minrecs)(struct xfs_btree_cur *cur, int level); | ||
146 | int (*get_maxrecs)(struct xfs_btree_cur *cur, int level); | ||
147 | |||
148 | /* records on disk. Matter for the root in inode case. */ | ||
149 | int (*get_dmaxrecs)(struct xfs_btree_cur *cur, int level); | ||
150 | |||
151 | /* init values of btree structures */ | ||
152 | void (*init_key_from_rec)(union xfs_btree_key *key, | ||
153 | union xfs_btree_rec *rec); | ||
154 | void (*init_rec_from_key)(union xfs_btree_key *key, | ||
155 | union xfs_btree_rec *rec); | ||
156 | void (*init_rec_from_cur)(struct xfs_btree_cur *cur, | ||
157 | union xfs_btree_rec *rec); | ||
158 | void (*init_ptr_from_cur)(struct xfs_btree_cur *cur, | ||
159 | union xfs_btree_ptr *ptr); | ||
160 | |||
161 | /* difference between key value and cursor value */ | ||
162 | __int64_t (*key_diff)(struct xfs_btree_cur *cur, | ||
163 | union xfs_btree_key *key); | ||
164 | |||
165 | const struct xfs_buf_ops *buf_ops; | ||
166 | |||
167 | #if defined(DEBUG) || defined(XFS_WARN) | ||
168 | /* check that k1 is lower than k2 */ | ||
169 | int (*keys_inorder)(struct xfs_btree_cur *cur, | ||
170 | union xfs_btree_key *k1, | ||
171 | union xfs_btree_key *k2); | ||
172 | |||
173 | /* check that r1 is lower than r2 */ | ||
174 | int (*recs_inorder)(struct xfs_btree_cur *cur, | ||
175 | union xfs_btree_rec *r1, | ||
176 | union xfs_btree_rec *r2); | ||
177 | #endif | ||
178 | }; | ||
179 | |||
180 | /* | ||
181 | * Reasons for the update_lastrec method to be called. | ||
182 | */ | ||
183 | #define LASTREC_UPDATE 0 | ||
184 | #define LASTREC_INSREC 1 | ||
185 | #define LASTREC_DELREC 2 | ||
186 | |||
187 | |||
188 | /* | ||
189 | * Btree cursor structure. | ||
190 | * This collects all information needed by the btree code in one place. | ||
191 | */ | ||
192 | typedef struct xfs_btree_cur | ||
193 | { | ||
194 | struct xfs_trans *bc_tp; /* transaction we're in, if any */ | ||
195 | struct xfs_mount *bc_mp; /* file system mount struct */ | ||
196 | const struct xfs_btree_ops *bc_ops; | ||
197 | uint bc_flags; /* btree features - below */ | ||
198 | union { | ||
199 | xfs_alloc_rec_incore_t a; | ||
200 | xfs_bmbt_irec_t b; | ||
201 | xfs_inobt_rec_incore_t i; | ||
202 | } bc_rec; /* current insert/search record value */ | ||
203 | struct xfs_buf *bc_bufs[XFS_BTREE_MAXLEVELS]; /* buf ptr per level */ | ||
204 | int bc_ptrs[XFS_BTREE_MAXLEVELS]; /* key/record # */ | ||
205 | __uint8_t bc_ra[XFS_BTREE_MAXLEVELS]; /* readahead bits */ | ||
206 | #define XFS_BTCUR_LEFTRA 1 /* left sibling has been read-ahead */ | ||
207 | #define XFS_BTCUR_RIGHTRA 2 /* right sibling has been read-ahead */ | ||
208 | __uint8_t bc_nlevels; /* number of levels in the tree */ | ||
209 | __uint8_t bc_blocklog; /* log2(blocksize) of btree blocks */ | ||
210 | xfs_btnum_t bc_btnum; /* identifies which btree type */ | ||
211 | union { | ||
212 | struct { /* needed for BNO, CNT, INO */ | ||
213 | struct xfs_buf *agbp; /* agf/agi buffer pointer */ | ||
214 | xfs_agnumber_t agno; /* ag number */ | ||
215 | } a; | ||
216 | struct { /* needed for BMAP */ | ||
217 | struct xfs_inode *ip; /* pointer to our inode */ | ||
218 | struct xfs_bmap_free *flist; /* list to free after */ | ||
219 | xfs_fsblock_t firstblock; /* 1st blk allocated */ | ||
220 | int allocated; /* count of alloced */ | ||
221 | short forksize; /* fork's inode space */ | ||
222 | char whichfork; /* data or attr fork */ | ||
223 | char flags; /* flags */ | ||
224 | #define XFS_BTCUR_BPRV_WASDEL 1 /* was delayed */ | ||
225 | } b; | ||
226 | } bc_private; /* per-btree type data */ | ||
227 | } xfs_btree_cur_t; | ||
228 | |||
229 | /* cursor flags */ | ||
230 | #define XFS_BTREE_LONG_PTRS (1<<0) /* pointers are 64bits long */ | ||
231 | #define XFS_BTREE_ROOT_IN_INODE (1<<1) /* root may be variable size */ | ||
232 | #define XFS_BTREE_LASTREC_UPDATE (1<<2) /* track last rec externally */ | ||
233 | #define XFS_BTREE_CRC_BLOCKS (1<<3) /* uses extended btree blocks */ | ||
234 | |||
235 | |||
236 | #define XFS_BTREE_NOERROR 0 | ||
237 | #define XFS_BTREE_ERROR 1 | ||
238 | |||
239 | /* | ||
240 | * Convert from buffer to btree block header. | ||
241 | */ | ||
242 | #define XFS_BUF_TO_BLOCK(bp) ((struct xfs_btree_block *)((bp)->b_addr)) | ||
243 | |||
244 | |||
245 | /* | ||
246 | * Check that block header is ok. | ||
247 | */ | ||
248 | int | ||
249 | xfs_btree_check_block( | ||
250 | struct xfs_btree_cur *cur, /* btree cursor */ | ||
251 | struct xfs_btree_block *block, /* generic btree block pointer */ | ||
252 | int level, /* level of the btree block */ | ||
253 | struct xfs_buf *bp); /* buffer containing block, if any */ | ||
254 | |||
255 | /* | ||
256 | * Check that (long) pointer is ok. | ||
257 | */ | ||
258 | int /* error (0 or EFSCORRUPTED) */ | ||
259 | xfs_btree_check_lptr( | ||
260 | struct xfs_btree_cur *cur, /* btree cursor */ | ||
261 | xfs_dfsbno_t ptr, /* btree block disk address */ | ||
262 | int level); /* btree block level */ | ||
263 | |||
264 | /* | ||
265 | * Delete the btree cursor. | ||
266 | */ | ||
267 | void | ||
268 | xfs_btree_del_cursor( | ||
269 | xfs_btree_cur_t *cur, /* btree cursor */ | ||
270 | int error); /* del because of error */ | ||
271 | |||
272 | /* | ||
273 | * Duplicate the btree cursor. | ||
274 | * Allocate a new one, copy the record, re-get the buffers. | ||
275 | */ | ||
276 | int /* error */ | ||
277 | xfs_btree_dup_cursor( | ||
278 | xfs_btree_cur_t *cur, /* input cursor */ | ||
279 | xfs_btree_cur_t **ncur);/* output cursor */ | ||
280 | |||
281 | /* | ||
282 | * Get a buffer for the block, return it with no data read. | ||
283 | * Long-form addressing. | ||
284 | */ | ||
285 | struct xfs_buf * /* buffer for fsbno */ | ||
286 | xfs_btree_get_bufl( | ||
287 | struct xfs_mount *mp, /* file system mount point */ | ||
288 | struct xfs_trans *tp, /* transaction pointer */ | ||
289 | xfs_fsblock_t fsbno, /* file system block number */ | ||
290 | uint lock); /* lock flags for get_buf */ | ||
291 | |||
292 | /* | ||
293 | * Get a buffer for the block, return it with no data read. | ||
294 | * Short-form addressing. | ||
295 | */ | ||
296 | struct xfs_buf * /* buffer for agno/agbno */ | ||
297 | xfs_btree_get_bufs( | ||
298 | struct xfs_mount *mp, /* file system mount point */ | ||
299 | struct xfs_trans *tp, /* transaction pointer */ | ||
300 | xfs_agnumber_t agno, /* allocation group number */ | ||
301 | xfs_agblock_t agbno, /* allocation group block number */ | ||
302 | uint lock); /* lock flags for get_buf */ | ||
303 | |||
304 | /* | ||
305 | * Check for the cursor referring to the last block at the given level. | ||
306 | */ | ||
307 | int /* 1=is last block, 0=not last block */ | ||
308 | xfs_btree_islastblock( | ||
309 | xfs_btree_cur_t *cur, /* btree cursor */ | ||
310 | int level); /* level to check */ | ||
311 | |||
312 | /* | ||
313 | * Compute first and last byte offsets for the fields given. | ||
314 | * Interprets the offsets table, which contains struct field offsets. | ||
315 | */ | ||
316 | void | ||
317 | xfs_btree_offsets( | ||
318 | __int64_t fields, /* bitmask of fields */ | ||
319 | const short *offsets,/* table of field offsets */ | ||
320 | int nbits, /* number of bits to inspect */ | ||
321 | int *first, /* output: first byte offset */ | ||
322 | int *last); /* output: last byte offset */ | ||
323 | |||
324 | /* | ||
325 | * Get a buffer for the block, return it read in. | ||
326 | * Long-form addressing. | ||
327 | */ | ||
328 | int /* error */ | ||
329 | xfs_btree_read_bufl( | ||
330 | struct xfs_mount *mp, /* file system mount point */ | ||
331 | struct xfs_trans *tp, /* transaction pointer */ | ||
332 | xfs_fsblock_t fsbno, /* file system block number */ | ||
333 | uint lock, /* lock flags for read_buf */ | ||
334 | struct xfs_buf **bpp, /* buffer for fsbno */ | ||
335 | int refval, /* ref count value for buffer */ | ||
336 | const struct xfs_buf_ops *ops); | ||
337 | |||
338 | /* | ||
339 | * Read-ahead the block, don't wait for it, don't return a buffer. | ||
340 | * Long-form addressing. | ||
341 | */ | ||
342 | void /* error */ | ||
343 | xfs_btree_reada_bufl( | ||
344 | struct xfs_mount *mp, /* file system mount point */ | ||
345 | xfs_fsblock_t fsbno, /* file system block number */ | ||
346 | xfs_extlen_t count, /* count of filesystem blocks */ | ||
347 | const struct xfs_buf_ops *ops); | ||
348 | |||
349 | /* | ||
350 | * Read-ahead the block, don't wait for it, don't return a buffer. | ||
351 | * Short-form addressing. | ||
352 | */ | ||
353 | void /* error */ | ||
354 | xfs_btree_reada_bufs( | ||
355 | struct xfs_mount *mp, /* file system mount point */ | ||
356 | xfs_agnumber_t agno, /* allocation group number */ | ||
357 | xfs_agblock_t agbno, /* allocation group block number */ | ||
358 | xfs_extlen_t count, /* count of filesystem blocks */ | ||
359 | const struct xfs_buf_ops *ops); | ||
360 | |||
361 | /* | ||
362 | * Initialise a new btree block header | ||
363 | */ | ||
364 | void | ||
365 | xfs_btree_init_block( | ||
366 | struct xfs_mount *mp, | ||
367 | struct xfs_buf *bp, | ||
368 | __u32 magic, | ||
369 | __u16 level, | ||
370 | __u16 numrecs, | ||
371 | __u64 owner, | ||
372 | unsigned int flags); | ||
373 | |||
374 | void | ||
375 | xfs_btree_init_block_int( | ||
376 | struct xfs_mount *mp, | ||
377 | struct xfs_btree_block *buf, | ||
378 | xfs_daddr_t blkno, | ||
379 | __u32 magic, | ||
380 | __u16 level, | ||
381 | __u16 numrecs, | ||
382 | __u64 owner, | ||
383 | unsigned int flags); | ||
384 | |||
385 | /* | ||
386 | * Common btree core entry points. | ||
387 | */ | ||
388 | int xfs_btree_increment(struct xfs_btree_cur *, int, int *); | ||
389 | int xfs_btree_decrement(struct xfs_btree_cur *, int, int *); | ||
390 | int xfs_btree_lookup(struct xfs_btree_cur *, xfs_lookup_t, int *); | ||
391 | int xfs_btree_update(struct xfs_btree_cur *, union xfs_btree_rec *); | ||
392 | int xfs_btree_new_iroot(struct xfs_btree_cur *, int *, int *); | ||
393 | int xfs_btree_insert(struct xfs_btree_cur *, int *); | ||
394 | int xfs_btree_delete(struct xfs_btree_cur *, int *); | ||
395 | int xfs_btree_get_rec(struct xfs_btree_cur *, union xfs_btree_rec **, int *); | ||
396 | int xfs_btree_change_owner(struct xfs_btree_cur *cur, __uint64_t new_owner, | ||
397 | struct list_head *buffer_list); | ||
398 | |||
399 | /* | ||
400 | * btree block CRC helpers | ||
401 | */ | ||
402 | void xfs_btree_lblock_calc_crc(struct xfs_buf *); | ||
403 | bool xfs_btree_lblock_verify_crc(struct xfs_buf *); | ||
404 | void xfs_btree_sblock_calc_crc(struct xfs_buf *); | ||
405 | bool xfs_btree_sblock_verify_crc(struct xfs_buf *); | ||
406 | |||
407 | /* | ||
408 | * Internal btree helpers also used by xfs_bmap.c. | ||
409 | */ | ||
410 | void xfs_btree_log_block(struct xfs_btree_cur *, struct xfs_buf *, int); | ||
411 | void xfs_btree_log_recs(struct xfs_btree_cur *, struct xfs_buf *, int, int); | ||
412 | |||
413 | /* | ||
414 | * Helpers. | ||
415 | */ | ||
416 | static inline int xfs_btree_get_numrecs(struct xfs_btree_block *block) | ||
417 | { | ||
418 | return be16_to_cpu(block->bb_numrecs); | ||
419 | } | ||
420 | |||
421 | static inline void xfs_btree_set_numrecs(struct xfs_btree_block *block, | ||
422 | __uint16_t numrecs) | ||
423 | { | ||
424 | block->bb_numrecs = cpu_to_be16(numrecs); | ||
425 | } | ||
426 | |||
427 | static inline int xfs_btree_get_level(struct xfs_btree_block *block) | ||
428 | { | ||
429 | return be16_to_cpu(block->bb_level); | ||
430 | } | ||
431 | |||
432 | |||
433 | /* | ||
434 | * Min and max functions for extlen, agblock, fileoff, and filblks types. | ||
435 | */ | ||
436 | #define XFS_EXTLEN_MIN(a,b) min_t(xfs_extlen_t, (a), (b)) | ||
437 | #define XFS_EXTLEN_MAX(a,b) max_t(xfs_extlen_t, (a), (b)) | ||
438 | #define XFS_AGBLOCK_MIN(a,b) min_t(xfs_agblock_t, (a), (b)) | ||
439 | #define XFS_AGBLOCK_MAX(a,b) max_t(xfs_agblock_t, (a), (b)) | ||
440 | #define XFS_FILEOFF_MIN(a,b) min_t(xfs_fileoff_t, (a), (b)) | ||
441 | #define XFS_FILEOFF_MAX(a,b) max_t(xfs_fileoff_t, (a), (b)) | ||
442 | #define XFS_FILBLKS_MIN(a,b) min_t(xfs_filblks_t, (a), (b)) | ||
443 | #define XFS_FILBLKS_MAX(a,b) max_t(xfs_filblks_t, (a), (b)) | ||
444 | |||
445 | #define XFS_FSB_SANITY_CHECK(mp,fsb) \ | ||
446 | (XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \ | ||
447 | XFS_FSB_TO_AGBNO(mp, fsb) < mp->m_sb.sb_agblocks) | ||
448 | |||
449 | /* | ||
450 | * Trace hooks. Currently not implemented as they need to be ported | ||
451 | * over to the generic tracing functionality, which is some effort. | ||
452 | * | ||
453 | * i,j = integer (32 bit) | ||
454 | * b = btree block buffer (xfs_buf_t) | ||
455 | * p = btree ptr | ||
456 | * r = btree record | ||
457 | * k = btree key | ||
458 | */ | ||
459 | #define XFS_BTREE_TRACE_ARGBI(c, b, i) | ||
460 | #define XFS_BTREE_TRACE_ARGBII(c, b, i, j) | ||
461 | #define XFS_BTREE_TRACE_ARGI(c, i) | ||
462 | #define XFS_BTREE_TRACE_ARGIPK(c, i, p, s) | ||
463 | #define XFS_BTREE_TRACE_ARGIPR(c, i, p, r) | ||
464 | #define XFS_BTREE_TRACE_ARGIK(c, i, k) | ||
465 | #define XFS_BTREE_TRACE_ARGR(c, r) | ||
466 | #define XFS_BTREE_TRACE_CURSOR(c, t) | ||
467 | |||
468 | #endif /* __XFS_BTREE_H__ */ | ||
diff --git a/fs/xfs/libxfs/xfs_cksum.h b/fs/xfs/libxfs/xfs_cksum.h new file mode 100644 index 000000000000..fad1676ad8cd --- /dev/null +++ b/fs/xfs/libxfs/xfs_cksum.h | |||
@@ -0,0 +1,63 @@ | |||
1 | #ifndef _XFS_CKSUM_H | ||
2 | #define _XFS_CKSUM_H 1 | ||
3 | |||
4 | #define XFS_CRC_SEED (~(__uint32_t)0) | ||
5 | |||
6 | /* | ||
7 | * Calculate the intermediate checksum for a buffer that has the CRC field | ||
8 | * inside it. The offset of the 32bit crc fields is passed as the | ||
9 | * cksum_offset parameter. | ||
10 | */ | ||
11 | static inline __uint32_t | ||
12 | xfs_start_cksum(char *buffer, size_t length, unsigned long cksum_offset) | ||
13 | { | ||
14 | __uint32_t zero = 0; | ||
15 | __uint32_t crc; | ||
16 | |||
17 | /* Calculate CRC up to the checksum. */ | ||
18 | crc = crc32c(XFS_CRC_SEED, buffer, cksum_offset); | ||
19 | |||
20 | /* Skip checksum field */ | ||
21 | crc = crc32c(crc, &zero, sizeof(__u32)); | ||
22 | |||
23 | /* Calculate the rest of the CRC. */ | ||
24 | return crc32c(crc, &buffer[cksum_offset + sizeof(__be32)], | ||
25 | length - (cksum_offset + sizeof(__be32))); | ||
26 | } | ||
27 | |||
28 | /* | ||
29 | * Convert the intermediate checksum to the final ondisk format. | ||
30 | * | ||
31 | * The CRC32c calculation uses LE format even on BE machines, but returns the | ||
32 | * result in host endian format. Hence we need to byte swap it back to LE format | ||
33 | * so that it is consistent on disk. | ||
34 | */ | ||
35 | static inline __le32 | ||
36 | xfs_end_cksum(__uint32_t crc) | ||
37 | { | ||
38 | return ~cpu_to_le32(crc); | ||
39 | } | ||
40 | |||
41 | /* | ||
42 | * Helper to generate the checksum for a buffer. | ||
43 | */ | ||
44 | static inline void | ||
45 | xfs_update_cksum(char *buffer, size_t length, unsigned long cksum_offset) | ||
46 | { | ||
47 | __uint32_t crc = xfs_start_cksum(buffer, length, cksum_offset); | ||
48 | |||
49 | *(__le32 *)(buffer + cksum_offset) = xfs_end_cksum(crc); | ||
50 | } | ||
51 | |||
52 | /* | ||
53 | * Helper to verify the checksum for a buffer. | ||
54 | */ | ||
55 | static inline int | ||
56 | xfs_verify_cksum(char *buffer, size_t length, unsigned long cksum_offset) | ||
57 | { | ||
58 | __uint32_t crc = xfs_start_cksum(buffer, length, cksum_offset); | ||
59 | |||
60 | return *(__le32 *)(buffer + cksum_offset) == xfs_end_cksum(crc); | ||
61 | } | ||
62 | |||
63 | #endif /* _XFS_CKSUM_H */ | ||
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c new file mode 100644 index 000000000000..8d809873525b --- /dev/null +++ b/fs/xfs/libxfs/xfs_da_btree.c | |||
@@ -0,0 +1,2665 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. | ||
3 | * Copyright (c) 2013 Red Hat, Inc. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License as | ||
8 | * published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope that it would be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program; if not, write the Free Software Foundation, | ||
17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
18 | */ | ||
19 | #include "xfs.h" | ||
20 | #include "xfs_fs.h" | ||
21 | #include "xfs_shared.h" | ||
22 | #include "xfs_format.h" | ||
23 | #include "xfs_log_format.h" | ||
24 | #include "xfs_trans_resv.h" | ||
25 | #include "xfs_bit.h" | ||
26 | #include "xfs_sb.h" | ||
27 | #include "xfs_ag.h" | ||
28 | #include "xfs_mount.h" | ||
29 | #include "xfs_da_format.h" | ||
30 | #include "xfs_da_btree.h" | ||
31 | #include "xfs_dir2.h" | ||
32 | #include "xfs_dir2_priv.h" | ||
33 | #include "xfs_inode.h" | ||
34 | #include "xfs_trans.h" | ||
35 | #include "xfs_inode_item.h" | ||
36 | #include "xfs_alloc.h" | ||
37 | #include "xfs_bmap.h" | ||
38 | #include "xfs_attr.h" | ||
39 | #include "xfs_attr_leaf.h" | ||
40 | #include "xfs_error.h" | ||
41 | #include "xfs_trace.h" | ||
42 | #include "xfs_cksum.h" | ||
43 | #include "xfs_buf_item.h" | ||
44 | |||
45 | /* | ||
46 | * xfs_da_btree.c | ||
47 | * | ||
48 | * Routines to implement directories as Btrees of hashed names. | ||
49 | */ | ||
50 | |||
51 | /*======================================================================== | ||
52 | * Function prototypes for the kernel. | ||
53 | *========================================================================*/ | ||
54 | |||
55 | /* | ||
56 | * Routines used for growing the Btree. | ||
57 | */ | ||
58 | STATIC int xfs_da3_root_split(xfs_da_state_t *state, | ||
59 | xfs_da_state_blk_t *existing_root, | ||
60 | xfs_da_state_blk_t *new_child); | ||
61 | STATIC int xfs_da3_node_split(xfs_da_state_t *state, | ||
62 | xfs_da_state_blk_t *existing_blk, | ||
63 | xfs_da_state_blk_t *split_blk, | ||
64 | xfs_da_state_blk_t *blk_to_add, | ||
65 | int treelevel, | ||
66 | int *result); | ||
67 | STATIC void xfs_da3_node_rebalance(xfs_da_state_t *state, | ||
68 | xfs_da_state_blk_t *node_blk_1, | ||
69 | xfs_da_state_blk_t *node_blk_2); | ||
70 | STATIC void xfs_da3_node_add(xfs_da_state_t *state, | ||
71 | xfs_da_state_blk_t *old_node_blk, | ||
72 | xfs_da_state_blk_t *new_node_blk); | ||
73 | |||
74 | /* | ||
75 | * Routines used for shrinking the Btree. | ||
76 | */ | ||
77 | STATIC int xfs_da3_root_join(xfs_da_state_t *state, | ||
78 | xfs_da_state_blk_t *root_blk); | ||
79 | STATIC int xfs_da3_node_toosmall(xfs_da_state_t *state, int *retval); | ||
80 | STATIC void xfs_da3_node_remove(xfs_da_state_t *state, | ||
81 | xfs_da_state_blk_t *drop_blk); | ||
82 | STATIC void xfs_da3_node_unbalance(xfs_da_state_t *state, | ||
83 | xfs_da_state_blk_t *src_node_blk, | ||
84 | xfs_da_state_blk_t *dst_node_blk); | ||
85 | |||
86 | /* | ||
87 | * Utility routines. | ||
88 | */ | ||
89 | STATIC int xfs_da3_blk_unlink(xfs_da_state_t *state, | ||
90 | xfs_da_state_blk_t *drop_blk, | ||
91 | xfs_da_state_blk_t *save_blk); | ||
92 | |||
93 | |||
94 | kmem_zone_t *xfs_da_state_zone; /* anchor for state struct zone */ | ||
95 | |||
96 | /* | ||
97 | * Allocate a dir-state structure. | ||
98 | * We don't put them on the stack since they're large. | ||
99 | */ | ||
100 | xfs_da_state_t * | ||
101 | xfs_da_state_alloc(void) | ||
102 | { | ||
103 | return kmem_zone_zalloc(xfs_da_state_zone, KM_NOFS); | ||
104 | } | ||
105 | |||
106 | /* | ||
107 | * Kill the altpath contents of a da-state structure. | ||
108 | */ | ||
109 | STATIC void | ||
110 | xfs_da_state_kill_altpath(xfs_da_state_t *state) | ||
111 | { | ||
112 | int i; | ||
113 | |||
114 | for (i = 0; i < state->altpath.active; i++) | ||
115 | state->altpath.blk[i].bp = NULL; | ||
116 | state->altpath.active = 0; | ||
117 | } | ||
118 | |||
119 | /* | ||
120 | * Free a da-state structure. | ||
121 | */ | ||
122 | void | ||
123 | xfs_da_state_free(xfs_da_state_t *state) | ||
124 | { | ||
125 | xfs_da_state_kill_altpath(state); | ||
126 | #ifdef DEBUG | ||
127 | memset((char *)state, 0, sizeof(*state)); | ||
128 | #endif /* DEBUG */ | ||
129 | kmem_zone_free(xfs_da_state_zone, state); | ||
130 | } | ||
131 | |||
132 | static bool | ||
133 | xfs_da3_node_verify( | ||
134 | struct xfs_buf *bp) | ||
135 | { | ||
136 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
137 | struct xfs_da_intnode *hdr = bp->b_addr; | ||
138 | struct xfs_da3_icnode_hdr ichdr; | ||
139 | const struct xfs_dir_ops *ops; | ||
140 | |||
141 | ops = xfs_dir_get_ops(mp, NULL); | ||
142 | |||
143 | ops->node_hdr_from_disk(&ichdr, hdr); | ||
144 | |||
145 | if (xfs_sb_version_hascrc(&mp->m_sb)) { | ||
146 | struct xfs_da3_node_hdr *hdr3 = bp->b_addr; | ||
147 | |||
148 | if (ichdr.magic != XFS_DA3_NODE_MAGIC) | ||
149 | return false; | ||
150 | |||
151 | if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_uuid)) | ||
152 | return false; | ||
153 | if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn) | ||
154 | return false; | ||
155 | } else { | ||
156 | if (ichdr.magic != XFS_DA_NODE_MAGIC) | ||
157 | return false; | ||
158 | } | ||
159 | if (ichdr.level == 0) | ||
160 | return false; | ||
161 | if (ichdr.level > XFS_DA_NODE_MAXDEPTH) | ||
162 | return false; | ||
163 | if (ichdr.count == 0) | ||
164 | return false; | ||
165 | |||
166 | /* | ||
167 | * we don't know if the node is for and attribute or directory tree, | ||
168 | * so only fail if the count is outside both bounds | ||
169 | */ | ||
170 | if (ichdr.count > mp->m_dir_geo->node_ents && | ||
171 | ichdr.count > mp->m_attr_geo->node_ents) | ||
172 | return false; | ||
173 | |||
174 | /* XXX: hash order check? */ | ||
175 | |||
176 | return true; | ||
177 | } | ||
178 | |||
179 | static void | ||
180 | xfs_da3_node_write_verify( | ||
181 | struct xfs_buf *bp) | ||
182 | { | ||
183 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
184 | struct xfs_buf_log_item *bip = bp->b_fspriv; | ||
185 | struct xfs_da3_node_hdr *hdr3 = bp->b_addr; | ||
186 | |||
187 | if (!xfs_da3_node_verify(bp)) { | ||
188 | xfs_buf_ioerror(bp, -EFSCORRUPTED); | ||
189 | xfs_verifier_error(bp); | ||
190 | return; | ||
191 | } | ||
192 | |||
193 | if (!xfs_sb_version_hascrc(&mp->m_sb)) | ||
194 | return; | ||
195 | |||
196 | if (bip) | ||
197 | hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn); | ||
198 | |||
199 | xfs_buf_update_cksum(bp, XFS_DA3_NODE_CRC_OFF); | ||
200 | } | ||
201 | |||
202 | /* | ||
203 | * leaf/node format detection on trees is sketchy, so a node read can be done on | ||
204 | * leaf level blocks when detection identifies the tree as a node format tree | ||
205 | * incorrectly. In this case, we need to swap the verifier to match the correct | ||
206 | * format of the block being read. | ||
207 | */ | ||
208 | static void | ||
209 | xfs_da3_node_read_verify( | ||
210 | struct xfs_buf *bp) | ||
211 | { | ||
212 | struct xfs_da_blkinfo *info = bp->b_addr; | ||
213 | |||
214 | switch (be16_to_cpu(info->magic)) { | ||
215 | case XFS_DA3_NODE_MAGIC: | ||
216 | if (!xfs_buf_verify_cksum(bp, XFS_DA3_NODE_CRC_OFF)) { | ||
217 | xfs_buf_ioerror(bp, -EFSBADCRC); | ||
218 | break; | ||
219 | } | ||
220 | /* fall through */ | ||
221 | case XFS_DA_NODE_MAGIC: | ||
222 | if (!xfs_da3_node_verify(bp)) { | ||
223 | xfs_buf_ioerror(bp, -EFSCORRUPTED); | ||
224 | break; | ||
225 | } | ||
226 | return; | ||
227 | case XFS_ATTR_LEAF_MAGIC: | ||
228 | case XFS_ATTR3_LEAF_MAGIC: | ||
229 | bp->b_ops = &xfs_attr3_leaf_buf_ops; | ||
230 | bp->b_ops->verify_read(bp); | ||
231 | return; | ||
232 | case XFS_DIR2_LEAFN_MAGIC: | ||
233 | case XFS_DIR3_LEAFN_MAGIC: | ||
234 | bp->b_ops = &xfs_dir3_leafn_buf_ops; | ||
235 | bp->b_ops->verify_read(bp); | ||
236 | return; | ||
237 | default: | ||
238 | break; | ||
239 | } | ||
240 | |||
241 | /* corrupt block */ | ||
242 | xfs_verifier_error(bp); | ||
243 | } | ||
244 | |||
245 | const struct xfs_buf_ops xfs_da3_node_buf_ops = { | ||
246 | .verify_read = xfs_da3_node_read_verify, | ||
247 | .verify_write = xfs_da3_node_write_verify, | ||
248 | }; | ||
249 | |||
250 | int | ||
251 | xfs_da3_node_read( | ||
252 | struct xfs_trans *tp, | ||
253 | struct xfs_inode *dp, | ||
254 | xfs_dablk_t bno, | ||
255 | xfs_daddr_t mappedbno, | ||
256 | struct xfs_buf **bpp, | ||
257 | int which_fork) | ||
258 | { | ||
259 | int err; | ||
260 | |||
261 | err = xfs_da_read_buf(tp, dp, bno, mappedbno, bpp, | ||
262 | which_fork, &xfs_da3_node_buf_ops); | ||
263 | if (!err && tp) { | ||
264 | struct xfs_da_blkinfo *info = (*bpp)->b_addr; | ||
265 | int type; | ||
266 | |||
267 | switch (be16_to_cpu(info->magic)) { | ||
268 | case XFS_DA_NODE_MAGIC: | ||
269 | case XFS_DA3_NODE_MAGIC: | ||
270 | type = XFS_BLFT_DA_NODE_BUF; | ||
271 | break; | ||
272 | case XFS_ATTR_LEAF_MAGIC: | ||
273 | case XFS_ATTR3_LEAF_MAGIC: | ||
274 | type = XFS_BLFT_ATTR_LEAF_BUF; | ||
275 | break; | ||
276 | case XFS_DIR2_LEAFN_MAGIC: | ||
277 | case XFS_DIR3_LEAFN_MAGIC: | ||
278 | type = XFS_BLFT_DIR_LEAFN_BUF; | ||
279 | break; | ||
280 | default: | ||
281 | type = 0; | ||
282 | ASSERT(0); | ||
283 | break; | ||
284 | } | ||
285 | xfs_trans_buf_set_type(tp, *bpp, type); | ||
286 | } | ||
287 | return err; | ||
288 | } | ||
289 | |||
290 | /*======================================================================== | ||
291 | * Routines used for growing the Btree. | ||
292 | *========================================================================*/ | ||
293 | |||
294 | /* | ||
295 | * Create the initial contents of an intermediate node. | ||
296 | */ | ||
297 | int | ||
298 | xfs_da3_node_create( | ||
299 | struct xfs_da_args *args, | ||
300 | xfs_dablk_t blkno, | ||
301 | int level, | ||
302 | struct xfs_buf **bpp, | ||
303 | int whichfork) | ||
304 | { | ||
305 | struct xfs_da_intnode *node; | ||
306 | struct xfs_trans *tp = args->trans; | ||
307 | struct xfs_mount *mp = tp->t_mountp; | ||
308 | struct xfs_da3_icnode_hdr ichdr = {0}; | ||
309 | struct xfs_buf *bp; | ||
310 | int error; | ||
311 | struct xfs_inode *dp = args->dp; | ||
312 | |||
313 | trace_xfs_da_node_create(args); | ||
314 | ASSERT(level <= XFS_DA_NODE_MAXDEPTH); | ||
315 | |||
316 | error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, whichfork); | ||
317 | if (error) | ||
318 | return error; | ||
319 | bp->b_ops = &xfs_da3_node_buf_ops; | ||
320 | xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF); | ||
321 | node = bp->b_addr; | ||
322 | |||
323 | if (xfs_sb_version_hascrc(&mp->m_sb)) { | ||
324 | struct xfs_da3_node_hdr *hdr3 = bp->b_addr; | ||
325 | |||
326 | ichdr.magic = XFS_DA3_NODE_MAGIC; | ||
327 | hdr3->info.blkno = cpu_to_be64(bp->b_bn); | ||
328 | hdr3->info.owner = cpu_to_be64(args->dp->i_ino); | ||
329 | uuid_copy(&hdr3->info.uuid, &mp->m_sb.sb_uuid); | ||
330 | } else { | ||
331 | ichdr.magic = XFS_DA_NODE_MAGIC; | ||
332 | } | ||
333 | ichdr.level = level; | ||
334 | |||
335 | dp->d_ops->node_hdr_to_disk(node, &ichdr); | ||
336 | xfs_trans_log_buf(tp, bp, | ||
337 | XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size)); | ||
338 | |||
339 | *bpp = bp; | ||
340 | return 0; | ||
341 | } | ||
342 | |||
343 | /* | ||
344 | * Split a leaf node, rebalance, then possibly split | ||
345 | * intermediate nodes, rebalance, etc. | ||
346 | */ | ||
347 | int /* error */ | ||
348 | xfs_da3_split( | ||
349 | struct xfs_da_state *state) | ||
350 | { | ||
351 | struct xfs_da_state_blk *oldblk; | ||
352 | struct xfs_da_state_blk *newblk; | ||
353 | struct xfs_da_state_blk *addblk; | ||
354 | struct xfs_da_intnode *node; | ||
355 | struct xfs_buf *bp; | ||
356 | int max; | ||
357 | int action = 0; | ||
358 | int error; | ||
359 | int i; | ||
360 | |||
361 | trace_xfs_da_split(state->args); | ||
362 | |||
363 | /* | ||
364 | * Walk back up the tree splitting/inserting/adjusting as necessary. | ||
365 | * If we need to insert and there isn't room, split the node, then | ||
366 | * decide which fragment to insert the new block from below into. | ||
367 | * Note that we may split the root this way, but we need more fixup. | ||
368 | */ | ||
369 | max = state->path.active - 1; | ||
370 | ASSERT((max >= 0) && (max < XFS_DA_NODE_MAXDEPTH)); | ||
371 | ASSERT(state->path.blk[max].magic == XFS_ATTR_LEAF_MAGIC || | ||
372 | state->path.blk[max].magic == XFS_DIR2_LEAFN_MAGIC); | ||
373 | |||
374 | addblk = &state->path.blk[max]; /* initial dummy value */ | ||
375 | for (i = max; (i >= 0) && addblk; state->path.active--, i--) { | ||
376 | oldblk = &state->path.blk[i]; | ||
377 | newblk = &state->altpath.blk[i]; | ||
378 | |||
379 | /* | ||
380 | * If a leaf node then | ||
381 | * Allocate a new leaf node, then rebalance across them. | ||
382 | * else if an intermediate node then | ||
383 | * We split on the last layer, must we split the node? | ||
384 | */ | ||
385 | switch (oldblk->magic) { | ||
386 | case XFS_ATTR_LEAF_MAGIC: | ||
387 | error = xfs_attr3_leaf_split(state, oldblk, newblk); | ||
388 | if ((error != 0) && (error != -ENOSPC)) { | ||
389 | return error; /* GROT: attr is inconsistent */ | ||
390 | } | ||
391 | if (!error) { | ||
392 | addblk = newblk; | ||
393 | break; | ||
394 | } | ||
395 | /* | ||
396 | * Entry wouldn't fit, split the leaf again. | ||
397 | */ | ||
398 | state->extravalid = 1; | ||
399 | if (state->inleaf) { | ||
400 | state->extraafter = 0; /* before newblk */ | ||
401 | trace_xfs_attr_leaf_split_before(state->args); | ||
402 | error = xfs_attr3_leaf_split(state, oldblk, | ||
403 | &state->extrablk); | ||
404 | } else { | ||
405 | state->extraafter = 1; /* after newblk */ | ||
406 | trace_xfs_attr_leaf_split_after(state->args); | ||
407 | error = xfs_attr3_leaf_split(state, newblk, | ||
408 | &state->extrablk); | ||
409 | } | ||
410 | if (error) | ||
411 | return error; /* GROT: attr inconsistent */ | ||
412 | addblk = newblk; | ||
413 | break; | ||
414 | case XFS_DIR2_LEAFN_MAGIC: | ||
415 | error = xfs_dir2_leafn_split(state, oldblk, newblk); | ||
416 | if (error) | ||
417 | return error; | ||
418 | addblk = newblk; | ||
419 | break; | ||
420 | case XFS_DA_NODE_MAGIC: | ||
421 | error = xfs_da3_node_split(state, oldblk, newblk, addblk, | ||
422 | max - i, &action); | ||
423 | addblk->bp = NULL; | ||
424 | if (error) | ||
425 | return error; /* GROT: dir is inconsistent */ | ||
426 | /* | ||
427 | * Record the newly split block for the next time thru? | ||
428 | */ | ||
429 | if (action) | ||
430 | addblk = newblk; | ||
431 | else | ||
432 | addblk = NULL; | ||
433 | break; | ||
434 | } | ||
435 | |||
436 | /* | ||
437 | * Update the btree to show the new hashval for this child. | ||
438 | */ | ||
439 | xfs_da3_fixhashpath(state, &state->path); | ||
440 | } | ||
441 | if (!addblk) | ||
442 | return 0; | ||
443 | |||
444 | /* | ||
445 | * Split the root node. | ||
446 | */ | ||
447 | ASSERT(state->path.active == 0); | ||
448 | oldblk = &state->path.blk[0]; | ||
449 | error = xfs_da3_root_split(state, oldblk, addblk); | ||
450 | if (error) { | ||
451 | addblk->bp = NULL; | ||
452 | return error; /* GROT: dir is inconsistent */ | ||
453 | } | ||
454 | |||
455 | /* | ||
456 | * Update pointers to the node which used to be block 0 and | ||
457 | * just got bumped because of the addition of a new root node. | ||
458 | * There might be three blocks involved if a double split occurred, | ||
459 | * and the original block 0 could be at any position in the list. | ||
460 | * | ||
461 | * Note: the magic numbers and sibling pointers are in the same | ||
462 | * physical place for both v2 and v3 headers (by design). Hence it | ||
463 | * doesn't matter which version of the xfs_da_intnode structure we use | ||
464 | * here as the result will be the same using either structure. | ||
465 | */ | ||
466 | node = oldblk->bp->b_addr; | ||
467 | if (node->hdr.info.forw) { | ||
468 | if (be32_to_cpu(node->hdr.info.forw) == addblk->blkno) { | ||
469 | bp = addblk->bp; | ||
470 | } else { | ||
471 | ASSERT(state->extravalid); | ||
472 | bp = state->extrablk.bp; | ||
473 | } | ||
474 | node = bp->b_addr; | ||
475 | node->hdr.info.back = cpu_to_be32(oldblk->blkno); | ||
476 | xfs_trans_log_buf(state->args->trans, bp, | ||
477 | XFS_DA_LOGRANGE(node, &node->hdr.info, | ||
478 | sizeof(node->hdr.info))); | ||
479 | } | ||
480 | node = oldblk->bp->b_addr; | ||
481 | if (node->hdr.info.back) { | ||
482 | if (be32_to_cpu(node->hdr.info.back) == addblk->blkno) { | ||
483 | bp = addblk->bp; | ||
484 | } else { | ||
485 | ASSERT(state->extravalid); | ||
486 | bp = state->extrablk.bp; | ||
487 | } | ||
488 | node = bp->b_addr; | ||
489 | node->hdr.info.forw = cpu_to_be32(oldblk->blkno); | ||
490 | xfs_trans_log_buf(state->args->trans, bp, | ||
491 | XFS_DA_LOGRANGE(node, &node->hdr.info, | ||
492 | sizeof(node->hdr.info))); | ||
493 | } | ||
494 | addblk->bp = NULL; | ||
495 | return 0; | ||
496 | } | ||
497 | |||
498 | /* | ||
499 | * Split the root. We have to create a new root and point to the two | ||
500 | * parts (the split old root) that we just created. Copy block zero to | ||
501 | * the EOF, extending the inode in process. | ||
502 | */ | ||
503 | STATIC int /* error */ | ||
504 | xfs_da3_root_split( | ||
505 | struct xfs_da_state *state, | ||
506 | struct xfs_da_state_blk *blk1, | ||
507 | struct xfs_da_state_blk *blk2) | ||
508 | { | ||
509 | struct xfs_da_intnode *node; | ||
510 | struct xfs_da_intnode *oldroot; | ||
511 | struct xfs_da_node_entry *btree; | ||
512 | struct xfs_da3_icnode_hdr nodehdr; | ||
513 | struct xfs_da_args *args; | ||
514 | struct xfs_buf *bp; | ||
515 | struct xfs_inode *dp; | ||
516 | struct xfs_trans *tp; | ||
517 | struct xfs_mount *mp; | ||
518 | struct xfs_dir2_leaf *leaf; | ||
519 | xfs_dablk_t blkno; | ||
520 | int level; | ||
521 | int error; | ||
522 | int size; | ||
523 | |||
524 | trace_xfs_da_root_split(state->args); | ||
525 | |||
526 | /* | ||
527 | * Copy the existing (incorrect) block from the root node position | ||
528 | * to a free space somewhere. | ||
529 | */ | ||
530 | args = state->args; | ||
531 | error = xfs_da_grow_inode(args, &blkno); | ||
532 | if (error) | ||
533 | return error; | ||
534 | |||
535 | dp = args->dp; | ||
536 | tp = args->trans; | ||
537 | mp = state->mp; | ||
538 | error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, args->whichfork); | ||
539 | if (error) | ||
540 | return error; | ||
541 | node = bp->b_addr; | ||
542 | oldroot = blk1->bp->b_addr; | ||
543 | if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) || | ||
544 | oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) { | ||
545 | struct xfs_da3_icnode_hdr nodehdr; | ||
546 | |||
547 | dp->d_ops->node_hdr_from_disk(&nodehdr, oldroot); | ||
548 | btree = dp->d_ops->node_tree_p(oldroot); | ||
549 | size = (int)((char *)&btree[nodehdr.count] - (char *)oldroot); | ||
550 | level = nodehdr.level; | ||
551 | |||
552 | /* | ||
553 | * we are about to copy oldroot to bp, so set up the type | ||
554 | * of bp while we know exactly what it will be. | ||
555 | */ | ||
556 | xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF); | ||
557 | } else { | ||
558 | struct xfs_dir3_icleaf_hdr leafhdr; | ||
559 | struct xfs_dir2_leaf_entry *ents; | ||
560 | |||
561 | leaf = (xfs_dir2_leaf_t *)oldroot; | ||
562 | dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); | ||
563 | ents = dp->d_ops->leaf_ents_p(leaf); | ||
564 | |||
565 | ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC || | ||
566 | leafhdr.magic == XFS_DIR3_LEAFN_MAGIC); | ||
567 | size = (int)((char *)&ents[leafhdr.count] - (char *)leaf); | ||
568 | level = 0; | ||
569 | |||
570 | /* | ||
571 | * we are about to copy oldroot to bp, so set up the type | ||
572 | * of bp while we know exactly what it will be. | ||
573 | */ | ||
574 | xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_LEAFN_BUF); | ||
575 | } | ||
576 | |||
577 | /* | ||
578 | * we can copy most of the information in the node from one block to | ||
579 | * another, but for CRC enabled headers we have to make sure that the | ||
580 | * block specific identifiers are kept intact. We update the buffer | ||
581 | * directly for this. | ||
582 | */ | ||
583 | memcpy(node, oldroot, size); | ||
584 | if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC) || | ||
585 | oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) { | ||
586 | struct xfs_da3_intnode *node3 = (struct xfs_da3_intnode *)node; | ||
587 | |||
588 | node3->hdr.info.blkno = cpu_to_be64(bp->b_bn); | ||
589 | } | ||
590 | xfs_trans_log_buf(tp, bp, 0, size - 1); | ||
591 | |||
592 | bp->b_ops = blk1->bp->b_ops; | ||
593 | xfs_trans_buf_copy_type(bp, blk1->bp); | ||
594 | blk1->bp = bp; | ||
595 | blk1->blkno = blkno; | ||
596 | |||
597 | /* | ||
598 | * Set up the new root node. | ||
599 | */ | ||
600 | error = xfs_da3_node_create(args, | ||
601 | (args->whichfork == XFS_DATA_FORK) ? args->geo->leafblk : 0, | ||
602 | level + 1, &bp, args->whichfork); | ||
603 | if (error) | ||
604 | return error; | ||
605 | |||
606 | node = bp->b_addr; | ||
607 | dp->d_ops->node_hdr_from_disk(&nodehdr, node); | ||
608 | btree = dp->d_ops->node_tree_p(node); | ||
609 | btree[0].hashval = cpu_to_be32(blk1->hashval); | ||
610 | btree[0].before = cpu_to_be32(blk1->blkno); | ||
611 | btree[1].hashval = cpu_to_be32(blk2->hashval); | ||
612 | btree[1].before = cpu_to_be32(blk2->blkno); | ||
613 | nodehdr.count = 2; | ||
614 | dp->d_ops->node_hdr_to_disk(node, &nodehdr); | ||
615 | |||
616 | #ifdef DEBUG | ||
617 | if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || | ||
618 | oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) { | ||
619 | ASSERT(blk1->blkno >= args->geo->leafblk && | ||
620 | blk1->blkno < args->geo->freeblk); | ||
621 | ASSERT(blk2->blkno >= args->geo->leafblk && | ||
622 | blk2->blkno < args->geo->freeblk); | ||
623 | } | ||
624 | #endif | ||
625 | |||
626 | /* Header is already logged by xfs_da_node_create */ | ||
627 | xfs_trans_log_buf(tp, bp, | ||
628 | XFS_DA_LOGRANGE(node, btree, sizeof(xfs_da_node_entry_t) * 2)); | ||
629 | |||
630 | return 0; | ||
631 | } | ||
632 | |||
633 | /* | ||
634 | * Split the node, rebalance, then add the new entry. | ||
635 | */ | ||
636 | STATIC int /* error */ | ||
637 | xfs_da3_node_split( | ||
638 | struct xfs_da_state *state, | ||
639 | struct xfs_da_state_blk *oldblk, | ||
640 | struct xfs_da_state_blk *newblk, | ||
641 | struct xfs_da_state_blk *addblk, | ||
642 | int treelevel, | ||
643 | int *result) | ||
644 | { | ||
645 | struct xfs_da_intnode *node; | ||
646 | struct xfs_da3_icnode_hdr nodehdr; | ||
647 | xfs_dablk_t blkno; | ||
648 | int newcount; | ||
649 | int error; | ||
650 | int useextra; | ||
651 | struct xfs_inode *dp = state->args->dp; | ||
652 | |||
653 | trace_xfs_da_node_split(state->args); | ||
654 | |||
655 | node = oldblk->bp->b_addr; | ||
656 | dp->d_ops->node_hdr_from_disk(&nodehdr, node); | ||
657 | |||
658 | /* | ||
659 | * With V2 dirs the extra block is data or freespace. | ||
660 | */ | ||
661 | useextra = state->extravalid && state->args->whichfork == XFS_ATTR_FORK; | ||
662 | newcount = 1 + useextra; | ||
663 | /* | ||
664 | * Do we have to split the node? | ||
665 | */ | ||
666 | if (nodehdr.count + newcount > state->args->geo->node_ents) { | ||
667 | /* | ||
668 | * Allocate a new node, add to the doubly linked chain of | ||
669 | * nodes, then move some of our excess entries into it. | ||
670 | */ | ||
671 | error = xfs_da_grow_inode(state->args, &blkno); | ||
672 | if (error) | ||
673 | return error; /* GROT: dir is inconsistent */ | ||
674 | |||
675 | error = xfs_da3_node_create(state->args, blkno, treelevel, | ||
676 | &newblk->bp, state->args->whichfork); | ||
677 | if (error) | ||
678 | return error; /* GROT: dir is inconsistent */ | ||
679 | newblk->blkno = blkno; | ||
680 | newblk->magic = XFS_DA_NODE_MAGIC; | ||
681 | xfs_da3_node_rebalance(state, oldblk, newblk); | ||
682 | error = xfs_da3_blk_link(state, oldblk, newblk); | ||
683 | if (error) | ||
684 | return error; | ||
685 | *result = 1; | ||
686 | } else { | ||
687 | *result = 0; | ||
688 | } | ||
689 | |||
690 | /* | ||
691 | * Insert the new entry(s) into the correct block | ||
692 | * (updating last hashval in the process). | ||
693 | * | ||
694 | * xfs_da3_node_add() inserts BEFORE the given index, | ||
695 | * and as a result of using node_lookup_int() we always | ||
696 | * point to a valid entry (not after one), but a split | ||
697 | * operation always results in a new block whose hashvals | ||
698 | * FOLLOW the current block. | ||
699 | * | ||
700 | * If we had double-split op below us, then add the extra block too. | ||
701 | */ | ||
702 | node = oldblk->bp->b_addr; | ||
703 | dp->d_ops->node_hdr_from_disk(&nodehdr, node); | ||
704 | if (oldblk->index <= nodehdr.count) { | ||
705 | oldblk->index++; | ||
706 | xfs_da3_node_add(state, oldblk, addblk); | ||
707 | if (useextra) { | ||
708 | if (state->extraafter) | ||
709 | oldblk->index++; | ||
710 | xfs_da3_node_add(state, oldblk, &state->extrablk); | ||
711 | state->extravalid = 0; | ||
712 | } | ||
713 | } else { | ||
714 | newblk->index++; | ||
715 | xfs_da3_node_add(state, newblk, addblk); | ||
716 | if (useextra) { | ||
717 | if (state->extraafter) | ||
718 | newblk->index++; | ||
719 | xfs_da3_node_add(state, newblk, &state->extrablk); | ||
720 | state->extravalid = 0; | ||
721 | } | ||
722 | } | ||
723 | |||
724 | return 0; | ||
725 | } | ||
726 | |||
727 | /* | ||
728 | * Balance the btree elements between two intermediate nodes, | ||
729 | * usually one full and one empty. | ||
730 | * | ||
731 | * NOTE: if blk2 is empty, then it will get the upper half of blk1. | ||
732 | */ | ||
733 | STATIC void | ||
734 | xfs_da3_node_rebalance( | ||
735 | struct xfs_da_state *state, | ||
736 | struct xfs_da_state_blk *blk1, | ||
737 | struct xfs_da_state_blk *blk2) | ||
738 | { | ||
739 | struct xfs_da_intnode *node1; | ||
740 | struct xfs_da_intnode *node2; | ||
741 | struct xfs_da_intnode *tmpnode; | ||
742 | struct xfs_da_node_entry *btree1; | ||
743 | struct xfs_da_node_entry *btree2; | ||
744 | struct xfs_da_node_entry *btree_s; | ||
745 | struct xfs_da_node_entry *btree_d; | ||
746 | struct xfs_da3_icnode_hdr nodehdr1; | ||
747 | struct xfs_da3_icnode_hdr nodehdr2; | ||
748 | struct xfs_trans *tp; | ||
749 | int count; | ||
750 | int tmp; | ||
751 | int swap = 0; | ||
752 | struct xfs_inode *dp = state->args->dp; | ||
753 | |||
754 | trace_xfs_da_node_rebalance(state->args); | ||
755 | |||
756 | node1 = blk1->bp->b_addr; | ||
757 | node2 = blk2->bp->b_addr; | ||
758 | dp->d_ops->node_hdr_from_disk(&nodehdr1, node1); | ||
759 | dp->d_ops->node_hdr_from_disk(&nodehdr2, node2); | ||
760 | btree1 = dp->d_ops->node_tree_p(node1); | ||
761 | btree2 = dp->d_ops->node_tree_p(node2); | ||
762 | |||
763 | /* | ||
764 | * Figure out how many entries need to move, and in which direction. | ||
765 | * Swap the nodes around if that makes it simpler. | ||
766 | */ | ||
767 | if (nodehdr1.count > 0 && nodehdr2.count > 0 && | ||
768 | ((be32_to_cpu(btree2[0].hashval) < be32_to_cpu(btree1[0].hashval)) || | ||
769 | (be32_to_cpu(btree2[nodehdr2.count - 1].hashval) < | ||
770 | be32_to_cpu(btree1[nodehdr1.count - 1].hashval)))) { | ||
771 | tmpnode = node1; | ||
772 | node1 = node2; | ||
773 | node2 = tmpnode; | ||
774 | dp->d_ops->node_hdr_from_disk(&nodehdr1, node1); | ||
775 | dp->d_ops->node_hdr_from_disk(&nodehdr2, node2); | ||
776 | btree1 = dp->d_ops->node_tree_p(node1); | ||
777 | btree2 = dp->d_ops->node_tree_p(node2); | ||
778 | swap = 1; | ||
779 | } | ||
780 | |||
781 | count = (nodehdr1.count - nodehdr2.count) / 2; | ||
782 | if (count == 0) | ||
783 | return; | ||
784 | tp = state->args->trans; | ||
785 | /* | ||
786 | * Two cases: high-to-low and low-to-high. | ||
787 | */ | ||
788 | if (count > 0) { | ||
789 | /* | ||
790 | * Move elements in node2 up to make a hole. | ||
791 | */ | ||
792 | tmp = nodehdr2.count; | ||
793 | if (tmp > 0) { | ||
794 | tmp *= (uint)sizeof(xfs_da_node_entry_t); | ||
795 | btree_s = &btree2[0]; | ||
796 | btree_d = &btree2[count]; | ||
797 | memmove(btree_d, btree_s, tmp); | ||
798 | } | ||
799 | |||
800 | /* | ||
801 | * Move the req'd B-tree elements from high in node1 to | ||
802 | * low in node2. | ||
803 | */ | ||
804 | nodehdr2.count += count; | ||
805 | tmp = count * (uint)sizeof(xfs_da_node_entry_t); | ||
806 | btree_s = &btree1[nodehdr1.count - count]; | ||
807 | btree_d = &btree2[0]; | ||
808 | memcpy(btree_d, btree_s, tmp); | ||
809 | nodehdr1.count -= count; | ||
810 | } else { | ||
811 | /* | ||
812 | * Move the req'd B-tree elements from low in node2 to | ||
813 | * high in node1. | ||
814 | */ | ||
815 | count = -count; | ||
816 | tmp = count * (uint)sizeof(xfs_da_node_entry_t); | ||
817 | btree_s = &btree2[0]; | ||
818 | btree_d = &btree1[nodehdr1.count]; | ||
819 | memcpy(btree_d, btree_s, tmp); | ||
820 | nodehdr1.count += count; | ||
821 | |||
822 | xfs_trans_log_buf(tp, blk1->bp, | ||
823 | XFS_DA_LOGRANGE(node1, btree_d, tmp)); | ||
824 | |||
825 | /* | ||
826 | * Move elements in node2 down to fill the hole. | ||
827 | */ | ||
828 | tmp = nodehdr2.count - count; | ||
829 | tmp *= (uint)sizeof(xfs_da_node_entry_t); | ||
830 | btree_s = &btree2[count]; | ||
831 | btree_d = &btree2[0]; | ||
832 | memmove(btree_d, btree_s, tmp); | ||
833 | nodehdr2.count -= count; | ||
834 | } | ||
835 | |||
836 | /* | ||
837 | * Log header of node 1 and all current bits of node 2. | ||
838 | */ | ||
839 | dp->d_ops->node_hdr_to_disk(node1, &nodehdr1); | ||
840 | xfs_trans_log_buf(tp, blk1->bp, | ||
841 | XFS_DA_LOGRANGE(node1, &node1->hdr, dp->d_ops->node_hdr_size)); | ||
842 | |||
843 | dp->d_ops->node_hdr_to_disk(node2, &nodehdr2); | ||
844 | xfs_trans_log_buf(tp, blk2->bp, | ||
845 | XFS_DA_LOGRANGE(node2, &node2->hdr, | ||
846 | dp->d_ops->node_hdr_size + | ||
847 | (sizeof(btree2[0]) * nodehdr2.count))); | ||
848 | |||
849 | /* | ||
850 | * Record the last hashval from each block for upward propagation. | ||
851 | * (note: don't use the swapped node pointers) | ||
852 | */ | ||
853 | if (swap) { | ||
854 | node1 = blk1->bp->b_addr; | ||
855 | node2 = blk2->bp->b_addr; | ||
856 | dp->d_ops->node_hdr_from_disk(&nodehdr1, node1); | ||
857 | dp->d_ops->node_hdr_from_disk(&nodehdr2, node2); | ||
858 | btree1 = dp->d_ops->node_tree_p(node1); | ||
859 | btree2 = dp->d_ops->node_tree_p(node2); | ||
860 | } | ||
861 | blk1->hashval = be32_to_cpu(btree1[nodehdr1.count - 1].hashval); | ||
862 | blk2->hashval = be32_to_cpu(btree2[nodehdr2.count - 1].hashval); | ||
863 | |||
864 | /* | ||
865 | * Adjust the expected index for insertion. | ||
866 | */ | ||
867 | if (blk1->index >= nodehdr1.count) { | ||
868 | blk2->index = blk1->index - nodehdr1.count; | ||
869 | blk1->index = nodehdr1.count + 1; /* make it invalid */ | ||
870 | } | ||
871 | } | ||
872 | |||
873 | /* | ||
874 | * Add a new entry to an intermediate node. | ||
875 | */ | ||
876 | STATIC void | ||
877 | xfs_da3_node_add( | ||
878 | struct xfs_da_state *state, | ||
879 | struct xfs_da_state_blk *oldblk, | ||
880 | struct xfs_da_state_blk *newblk) | ||
881 | { | ||
882 | struct xfs_da_intnode *node; | ||
883 | struct xfs_da3_icnode_hdr nodehdr; | ||
884 | struct xfs_da_node_entry *btree; | ||
885 | int tmp; | ||
886 | struct xfs_inode *dp = state->args->dp; | ||
887 | |||
888 | trace_xfs_da_node_add(state->args); | ||
889 | |||
890 | node = oldblk->bp->b_addr; | ||
891 | dp->d_ops->node_hdr_from_disk(&nodehdr, node); | ||
892 | btree = dp->d_ops->node_tree_p(node); | ||
893 | |||
894 | ASSERT(oldblk->index >= 0 && oldblk->index <= nodehdr.count); | ||
895 | ASSERT(newblk->blkno != 0); | ||
896 | if (state->args->whichfork == XFS_DATA_FORK) | ||
897 | ASSERT(newblk->blkno >= state->args->geo->leafblk && | ||
898 | newblk->blkno < state->args->geo->freeblk); | ||
899 | |||
900 | /* | ||
901 | * We may need to make some room before we insert the new node. | ||
902 | */ | ||
903 | tmp = 0; | ||
904 | if (oldblk->index < nodehdr.count) { | ||
905 | tmp = (nodehdr.count - oldblk->index) * (uint)sizeof(*btree); | ||
906 | memmove(&btree[oldblk->index + 1], &btree[oldblk->index], tmp); | ||
907 | } | ||
908 | btree[oldblk->index].hashval = cpu_to_be32(newblk->hashval); | ||
909 | btree[oldblk->index].before = cpu_to_be32(newblk->blkno); | ||
910 | xfs_trans_log_buf(state->args->trans, oldblk->bp, | ||
911 | XFS_DA_LOGRANGE(node, &btree[oldblk->index], | ||
912 | tmp + sizeof(*btree))); | ||
913 | |||
914 | nodehdr.count += 1; | ||
915 | dp->d_ops->node_hdr_to_disk(node, &nodehdr); | ||
916 | xfs_trans_log_buf(state->args->trans, oldblk->bp, | ||
917 | XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size)); | ||
918 | |||
919 | /* | ||
920 | * Copy the last hash value from the oldblk to propagate upwards. | ||
921 | */ | ||
922 | oldblk->hashval = be32_to_cpu(btree[nodehdr.count - 1].hashval); | ||
923 | } | ||
924 | |||
925 | /*======================================================================== | ||
926 | * Routines used for shrinking the Btree. | ||
927 | *========================================================================*/ | ||
928 | |||
929 | /* | ||
930 | * Deallocate an empty leaf node, remove it from its parent, | ||
931 | * possibly deallocating that block, etc... | ||
932 | */ | ||
933 | int | ||
934 | xfs_da3_join( | ||
935 | struct xfs_da_state *state) | ||
936 | { | ||
937 | struct xfs_da_state_blk *drop_blk; | ||
938 | struct xfs_da_state_blk *save_blk; | ||
939 | int action = 0; | ||
940 | int error; | ||
941 | |||
942 | trace_xfs_da_join(state->args); | ||
943 | |||
944 | drop_blk = &state->path.blk[ state->path.active-1 ]; | ||
945 | save_blk = &state->altpath.blk[ state->path.active-1 ]; | ||
946 | ASSERT(state->path.blk[0].magic == XFS_DA_NODE_MAGIC); | ||
947 | ASSERT(drop_blk->magic == XFS_ATTR_LEAF_MAGIC || | ||
948 | drop_blk->magic == XFS_DIR2_LEAFN_MAGIC); | ||
949 | |||
950 | /* | ||
951 | * Walk back up the tree joining/deallocating as necessary. | ||
952 | * When we stop dropping blocks, break out. | ||
953 | */ | ||
954 | for ( ; state->path.active >= 2; drop_blk--, save_blk--, | ||
955 | state->path.active--) { | ||
956 | /* | ||
957 | * See if we can combine the block with a neighbor. | ||
958 | * (action == 0) => no options, just leave | ||
959 | * (action == 1) => coalesce, then unlink | ||
960 | * (action == 2) => block empty, unlink it | ||
961 | */ | ||
962 | switch (drop_blk->magic) { | ||
963 | case XFS_ATTR_LEAF_MAGIC: | ||
964 | error = xfs_attr3_leaf_toosmall(state, &action); | ||
965 | if (error) | ||
966 | return error; | ||
967 | if (action == 0) | ||
968 | return 0; | ||
969 | xfs_attr3_leaf_unbalance(state, drop_blk, save_blk); | ||
970 | break; | ||
971 | case XFS_DIR2_LEAFN_MAGIC: | ||
972 | error = xfs_dir2_leafn_toosmall(state, &action); | ||
973 | if (error) | ||
974 | return error; | ||
975 | if (action == 0) | ||
976 | return 0; | ||
977 | xfs_dir2_leafn_unbalance(state, drop_blk, save_blk); | ||
978 | break; | ||
979 | case XFS_DA_NODE_MAGIC: | ||
980 | /* | ||
981 | * Remove the offending node, fixup hashvals, | ||
982 | * check for a toosmall neighbor. | ||
983 | */ | ||
984 | xfs_da3_node_remove(state, drop_blk); | ||
985 | xfs_da3_fixhashpath(state, &state->path); | ||
986 | error = xfs_da3_node_toosmall(state, &action); | ||
987 | if (error) | ||
988 | return error; | ||
989 | if (action == 0) | ||
990 | return 0; | ||
991 | xfs_da3_node_unbalance(state, drop_blk, save_blk); | ||
992 | break; | ||
993 | } | ||
994 | xfs_da3_fixhashpath(state, &state->altpath); | ||
995 | error = xfs_da3_blk_unlink(state, drop_blk, save_blk); | ||
996 | xfs_da_state_kill_altpath(state); | ||
997 | if (error) | ||
998 | return error; | ||
999 | error = xfs_da_shrink_inode(state->args, drop_blk->blkno, | ||
1000 | drop_blk->bp); | ||
1001 | drop_blk->bp = NULL; | ||
1002 | if (error) | ||
1003 | return error; | ||
1004 | } | ||
1005 | /* | ||
1006 | * We joined all the way to the top. If it turns out that | ||
1007 | * we only have one entry in the root, make the child block | ||
1008 | * the new root. | ||
1009 | */ | ||
1010 | xfs_da3_node_remove(state, drop_blk); | ||
1011 | xfs_da3_fixhashpath(state, &state->path); | ||
1012 | error = xfs_da3_root_join(state, &state->path.blk[0]); | ||
1013 | return error; | ||
1014 | } | ||
1015 | |||
1016 | #ifdef DEBUG | ||
1017 | static void | ||
1018 | xfs_da_blkinfo_onlychild_validate(struct xfs_da_blkinfo *blkinfo, __u16 level) | ||
1019 | { | ||
1020 | __be16 magic = blkinfo->magic; | ||
1021 | |||
1022 | if (level == 1) { | ||
1023 | ASSERT(magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || | ||
1024 | magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC) || | ||
1025 | magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC) || | ||
1026 | magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC)); | ||
1027 | } else { | ||
1028 | ASSERT(magic == cpu_to_be16(XFS_DA_NODE_MAGIC) || | ||
1029 | magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)); | ||
1030 | } | ||
1031 | ASSERT(!blkinfo->forw); | ||
1032 | ASSERT(!blkinfo->back); | ||
1033 | } | ||
1034 | #else /* !DEBUG */ | ||
1035 | #define xfs_da_blkinfo_onlychild_validate(blkinfo, level) | ||
1036 | #endif /* !DEBUG */ | ||
1037 | |||
1038 | /* | ||
1039 | * We have only one entry in the root. Copy the only remaining child of | ||
1040 | * the old root to block 0 as the new root node. | ||
1041 | */ | ||
1042 | STATIC int | ||
1043 | xfs_da3_root_join( | ||
1044 | struct xfs_da_state *state, | ||
1045 | struct xfs_da_state_blk *root_blk) | ||
1046 | { | ||
1047 | struct xfs_da_intnode *oldroot; | ||
1048 | struct xfs_da_args *args; | ||
1049 | xfs_dablk_t child; | ||
1050 | struct xfs_buf *bp; | ||
1051 | struct xfs_da3_icnode_hdr oldroothdr; | ||
1052 | struct xfs_da_node_entry *btree; | ||
1053 | int error; | ||
1054 | struct xfs_inode *dp = state->args->dp; | ||
1055 | |||
1056 | trace_xfs_da_root_join(state->args); | ||
1057 | |||
1058 | ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC); | ||
1059 | |||
1060 | args = state->args; | ||
1061 | oldroot = root_blk->bp->b_addr; | ||
1062 | dp->d_ops->node_hdr_from_disk(&oldroothdr, oldroot); | ||
1063 | ASSERT(oldroothdr.forw == 0); | ||
1064 | ASSERT(oldroothdr.back == 0); | ||
1065 | |||
1066 | /* | ||
1067 | * If the root has more than one child, then don't do anything. | ||
1068 | */ | ||
1069 | if (oldroothdr.count > 1) | ||
1070 | return 0; | ||
1071 | |||
1072 | /* | ||
1073 | * Read in the (only) child block, then copy those bytes into | ||
1074 | * the root block's buffer and free the original child block. | ||
1075 | */ | ||
1076 | btree = dp->d_ops->node_tree_p(oldroot); | ||
1077 | child = be32_to_cpu(btree[0].before); | ||
1078 | ASSERT(child != 0); | ||
1079 | error = xfs_da3_node_read(args->trans, dp, child, -1, &bp, | ||
1080 | args->whichfork); | ||
1081 | if (error) | ||
1082 | return error; | ||
1083 | xfs_da_blkinfo_onlychild_validate(bp->b_addr, oldroothdr.level); | ||
1084 | |||
1085 | /* | ||
1086 | * This could be copying a leaf back into the root block in the case of | ||
1087 | * there only being a single leaf block left in the tree. Hence we have | ||
1088 | * to update the b_ops pointer as well to match the buffer type change | ||
1089 | * that could occur. For dir3 blocks we also need to update the block | ||
1090 | * number in the buffer header. | ||
1091 | */ | ||
1092 | memcpy(root_blk->bp->b_addr, bp->b_addr, args->geo->blksize); | ||
1093 | root_blk->bp->b_ops = bp->b_ops; | ||
1094 | xfs_trans_buf_copy_type(root_blk->bp, bp); | ||
1095 | if (oldroothdr.magic == XFS_DA3_NODE_MAGIC) { | ||
1096 | struct xfs_da3_blkinfo *da3 = root_blk->bp->b_addr; | ||
1097 | da3->blkno = cpu_to_be64(root_blk->bp->b_bn); | ||
1098 | } | ||
1099 | xfs_trans_log_buf(args->trans, root_blk->bp, 0, | ||
1100 | args->geo->blksize - 1); | ||
1101 | error = xfs_da_shrink_inode(args, child, bp); | ||
1102 | return error; | ||
1103 | } | ||
1104 | |||
1105 | /* | ||
1106 | * Check a node block and its neighbors to see if the block should be | ||
1107 | * collapsed into one or the other neighbor. Always keep the block | ||
1108 | * with the smaller block number. | ||
1109 | * If the current block is over 50% full, don't try to join it, return 0. | ||
1110 | * If the block is empty, fill in the state structure and return 2. | ||
1111 | * If it can be collapsed, fill in the state structure and return 1. | ||
1112 | * If nothing can be done, return 0. | ||
1113 | */ | ||
1114 | STATIC int | ||
1115 | xfs_da3_node_toosmall( | ||
1116 | struct xfs_da_state *state, | ||
1117 | int *action) | ||
1118 | { | ||
1119 | struct xfs_da_intnode *node; | ||
1120 | struct xfs_da_state_blk *blk; | ||
1121 | struct xfs_da_blkinfo *info; | ||
1122 | xfs_dablk_t blkno; | ||
1123 | struct xfs_buf *bp; | ||
1124 | struct xfs_da3_icnode_hdr nodehdr; | ||
1125 | int count; | ||
1126 | int forward; | ||
1127 | int error; | ||
1128 | int retval; | ||
1129 | int i; | ||
1130 | struct xfs_inode *dp = state->args->dp; | ||
1131 | |||
1132 | trace_xfs_da_node_toosmall(state->args); | ||
1133 | |||
1134 | /* | ||
1135 | * Check for the degenerate case of the block being over 50% full. | ||
1136 | * If so, it's not worth even looking to see if we might be able | ||
1137 | * to coalesce with a sibling. | ||
1138 | */ | ||
1139 | blk = &state->path.blk[ state->path.active-1 ]; | ||
1140 | info = blk->bp->b_addr; | ||
1141 | node = (xfs_da_intnode_t *)info; | ||
1142 | dp->d_ops->node_hdr_from_disk(&nodehdr, node); | ||
1143 | if (nodehdr.count > (state->args->geo->node_ents >> 1)) { | ||
1144 | *action = 0; /* blk over 50%, don't try to join */ | ||
1145 | return 0; /* blk over 50%, don't try to join */ | ||
1146 | } | ||
1147 | |||
1148 | /* | ||
1149 | * Check for the degenerate case of the block being empty. | ||
1150 | * If the block is empty, we'll simply delete it, no need to | ||
1151 | * coalesce it with a sibling block. We choose (arbitrarily) | ||
1152 | * to merge with the forward block unless it is NULL. | ||
1153 | */ | ||
1154 | if (nodehdr.count == 0) { | ||
1155 | /* | ||
1156 | * Make altpath point to the block we want to keep and | ||
1157 | * path point to the block we want to drop (this one). | ||
1158 | */ | ||
1159 | forward = (info->forw != 0); | ||
1160 | memcpy(&state->altpath, &state->path, sizeof(state->path)); | ||
1161 | error = xfs_da3_path_shift(state, &state->altpath, forward, | ||
1162 | 0, &retval); | ||
1163 | if (error) | ||
1164 | return error; | ||
1165 | if (retval) { | ||
1166 | *action = 0; | ||
1167 | } else { | ||
1168 | *action = 2; | ||
1169 | } | ||
1170 | return 0; | ||
1171 | } | ||
1172 | |||
1173 | /* | ||
1174 | * Examine each sibling block to see if we can coalesce with | ||
1175 | * at least 25% free space to spare. We need to figure out | ||
1176 | * whether to merge with the forward or the backward block. | ||
1177 | * We prefer coalescing with the lower numbered sibling so as | ||
1178 | * to shrink a directory over time. | ||
1179 | */ | ||
1180 | count = state->args->geo->node_ents; | ||
1181 | count -= state->args->geo->node_ents >> 2; | ||
1182 | count -= nodehdr.count; | ||
1183 | |||
1184 | /* start with smaller blk num */ | ||
1185 | forward = nodehdr.forw < nodehdr.back; | ||
1186 | for (i = 0; i < 2; forward = !forward, i++) { | ||
1187 | struct xfs_da3_icnode_hdr thdr; | ||
1188 | if (forward) | ||
1189 | blkno = nodehdr.forw; | ||
1190 | else | ||
1191 | blkno = nodehdr.back; | ||
1192 | if (blkno == 0) | ||
1193 | continue; | ||
1194 | error = xfs_da3_node_read(state->args->trans, dp, | ||
1195 | blkno, -1, &bp, state->args->whichfork); | ||
1196 | if (error) | ||
1197 | return error; | ||
1198 | |||
1199 | node = bp->b_addr; | ||
1200 | dp->d_ops->node_hdr_from_disk(&thdr, node); | ||
1201 | xfs_trans_brelse(state->args->trans, bp); | ||
1202 | |||
1203 | if (count - thdr.count >= 0) | ||
1204 | break; /* fits with at least 25% to spare */ | ||
1205 | } | ||
1206 | if (i >= 2) { | ||
1207 | *action = 0; | ||
1208 | return 0; | ||
1209 | } | ||
1210 | |||
1211 | /* | ||
1212 | * Make altpath point to the block we want to keep (the lower | ||
1213 | * numbered block) and path point to the block we want to drop. | ||
1214 | */ | ||
1215 | memcpy(&state->altpath, &state->path, sizeof(state->path)); | ||
1216 | if (blkno < blk->blkno) { | ||
1217 | error = xfs_da3_path_shift(state, &state->altpath, forward, | ||
1218 | 0, &retval); | ||
1219 | } else { | ||
1220 | error = xfs_da3_path_shift(state, &state->path, forward, | ||
1221 | 0, &retval); | ||
1222 | } | ||
1223 | if (error) | ||
1224 | return error; | ||
1225 | if (retval) { | ||
1226 | *action = 0; | ||
1227 | return 0; | ||
1228 | } | ||
1229 | *action = 1; | ||
1230 | return 0; | ||
1231 | } | ||
1232 | |||
1233 | /* | ||
1234 | * Pick up the last hashvalue from an intermediate node. | ||
1235 | */ | ||
1236 | STATIC uint | ||
1237 | xfs_da3_node_lasthash( | ||
1238 | struct xfs_inode *dp, | ||
1239 | struct xfs_buf *bp, | ||
1240 | int *count) | ||
1241 | { | ||
1242 | struct xfs_da_intnode *node; | ||
1243 | struct xfs_da_node_entry *btree; | ||
1244 | struct xfs_da3_icnode_hdr nodehdr; | ||
1245 | |||
1246 | node = bp->b_addr; | ||
1247 | dp->d_ops->node_hdr_from_disk(&nodehdr, node); | ||
1248 | if (count) | ||
1249 | *count = nodehdr.count; | ||
1250 | if (!nodehdr.count) | ||
1251 | return 0; | ||
1252 | btree = dp->d_ops->node_tree_p(node); | ||
1253 | return be32_to_cpu(btree[nodehdr.count - 1].hashval); | ||
1254 | } | ||
1255 | |||
1256 | /* | ||
1257 | * Walk back up the tree adjusting hash values as necessary, | ||
1258 | * when we stop making changes, return. | ||
1259 | */ | ||
1260 | void | ||
1261 | xfs_da3_fixhashpath( | ||
1262 | struct xfs_da_state *state, | ||
1263 | struct xfs_da_state_path *path) | ||
1264 | { | ||
1265 | struct xfs_da_state_blk *blk; | ||
1266 | struct xfs_da_intnode *node; | ||
1267 | struct xfs_da_node_entry *btree; | ||
1268 | xfs_dahash_t lasthash=0; | ||
1269 | int level; | ||
1270 | int count; | ||
1271 | struct xfs_inode *dp = state->args->dp; | ||
1272 | |||
1273 | trace_xfs_da_fixhashpath(state->args); | ||
1274 | |||
1275 | level = path->active-1; | ||
1276 | blk = &path->blk[ level ]; | ||
1277 | switch (blk->magic) { | ||
1278 | case XFS_ATTR_LEAF_MAGIC: | ||
1279 | lasthash = xfs_attr_leaf_lasthash(blk->bp, &count); | ||
1280 | if (count == 0) | ||
1281 | return; | ||
1282 | break; | ||
1283 | case XFS_DIR2_LEAFN_MAGIC: | ||
1284 | lasthash = xfs_dir2_leafn_lasthash(dp, blk->bp, &count); | ||
1285 | if (count == 0) | ||
1286 | return; | ||
1287 | break; | ||
1288 | case XFS_DA_NODE_MAGIC: | ||
1289 | lasthash = xfs_da3_node_lasthash(dp, blk->bp, &count); | ||
1290 | if (count == 0) | ||
1291 | return; | ||
1292 | break; | ||
1293 | } | ||
1294 | for (blk--, level--; level >= 0; blk--, level--) { | ||
1295 | struct xfs_da3_icnode_hdr nodehdr; | ||
1296 | |||
1297 | node = blk->bp->b_addr; | ||
1298 | dp->d_ops->node_hdr_from_disk(&nodehdr, node); | ||
1299 | btree = dp->d_ops->node_tree_p(node); | ||
1300 | if (be32_to_cpu(btree[blk->index].hashval) == lasthash) | ||
1301 | break; | ||
1302 | blk->hashval = lasthash; | ||
1303 | btree[blk->index].hashval = cpu_to_be32(lasthash); | ||
1304 | xfs_trans_log_buf(state->args->trans, blk->bp, | ||
1305 | XFS_DA_LOGRANGE(node, &btree[blk->index], | ||
1306 | sizeof(*btree))); | ||
1307 | |||
1308 | lasthash = be32_to_cpu(btree[nodehdr.count - 1].hashval); | ||
1309 | } | ||
1310 | } | ||
1311 | |||
1312 | /* | ||
1313 | * Remove an entry from an intermediate node. | ||
1314 | */ | ||
1315 | STATIC void | ||
1316 | xfs_da3_node_remove( | ||
1317 | struct xfs_da_state *state, | ||
1318 | struct xfs_da_state_blk *drop_blk) | ||
1319 | { | ||
1320 | struct xfs_da_intnode *node; | ||
1321 | struct xfs_da3_icnode_hdr nodehdr; | ||
1322 | struct xfs_da_node_entry *btree; | ||
1323 | int index; | ||
1324 | int tmp; | ||
1325 | struct xfs_inode *dp = state->args->dp; | ||
1326 | |||
1327 | trace_xfs_da_node_remove(state->args); | ||
1328 | |||
1329 | node = drop_blk->bp->b_addr; | ||
1330 | dp->d_ops->node_hdr_from_disk(&nodehdr, node); | ||
1331 | ASSERT(drop_blk->index < nodehdr.count); | ||
1332 | ASSERT(drop_blk->index >= 0); | ||
1333 | |||
1334 | /* | ||
1335 | * Copy over the offending entry, or just zero it out. | ||
1336 | */ | ||
1337 | index = drop_blk->index; | ||
1338 | btree = dp->d_ops->node_tree_p(node); | ||
1339 | if (index < nodehdr.count - 1) { | ||
1340 | tmp = nodehdr.count - index - 1; | ||
1341 | tmp *= (uint)sizeof(xfs_da_node_entry_t); | ||
1342 | memmove(&btree[index], &btree[index + 1], tmp); | ||
1343 | xfs_trans_log_buf(state->args->trans, drop_blk->bp, | ||
1344 | XFS_DA_LOGRANGE(node, &btree[index], tmp)); | ||
1345 | index = nodehdr.count - 1; | ||
1346 | } | ||
1347 | memset(&btree[index], 0, sizeof(xfs_da_node_entry_t)); | ||
1348 | xfs_trans_log_buf(state->args->trans, drop_blk->bp, | ||
1349 | XFS_DA_LOGRANGE(node, &btree[index], sizeof(btree[index]))); | ||
1350 | nodehdr.count -= 1; | ||
1351 | dp->d_ops->node_hdr_to_disk(node, &nodehdr); | ||
1352 | xfs_trans_log_buf(state->args->trans, drop_blk->bp, | ||
1353 | XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size)); | ||
1354 | |||
1355 | /* | ||
1356 | * Copy the last hash value from the block to propagate upwards. | ||
1357 | */ | ||
1358 | drop_blk->hashval = be32_to_cpu(btree[index - 1].hashval); | ||
1359 | } | ||
1360 | |||
1361 | /* | ||
1362 | * Unbalance the elements between two intermediate nodes, | ||
1363 | * move all Btree elements from one node into another. | ||
1364 | */ | ||
1365 | STATIC void | ||
1366 | xfs_da3_node_unbalance( | ||
1367 | struct xfs_da_state *state, | ||
1368 | struct xfs_da_state_blk *drop_blk, | ||
1369 | struct xfs_da_state_blk *save_blk) | ||
1370 | { | ||
1371 | struct xfs_da_intnode *drop_node; | ||
1372 | struct xfs_da_intnode *save_node; | ||
1373 | struct xfs_da_node_entry *drop_btree; | ||
1374 | struct xfs_da_node_entry *save_btree; | ||
1375 | struct xfs_da3_icnode_hdr drop_hdr; | ||
1376 | struct xfs_da3_icnode_hdr save_hdr; | ||
1377 | struct xfs_trans *tp; | ||
1378 | int sindex; | ||
1379 | int tmp; | ||
1380 | struct xfs_inode *dp = state->args->dp; | ||
1381 | |||
1382 | trace_xfs_da_node_unbalance(state->args); | ||
1383 | |||
1384 | drop_node = drop_blk->bp->b_addr; | ||
1385 | save_node = save_blk->bp->b_addr; | ||
1386 | dp->d_ops->node_hdr_from_disk(&drop_hdr, drop_node); | ||
1387 | dp->d_ops->node_hdr_from_disk(&save_hdr, save_node); | ||
1388 | drop_btree = dp->d_ops->node_tree_p(drop_node); | ||
1389 | save_btree = dp->d_ops->node_tree_p(save_node); | ||
1390 | tp = state->args->trans; | ||
1391 | |||
1392 | /* | ||
1393 | * If the dying block has lower hashvals, then move all the | ||
1394 | * elements in the remaining block up to make a hole. | ||
1395 | */ | ||
1396 | if ((be32_to_cpu(drop_btree[0].hashval) < | ||
1397 | be32_to_cpu(save_btree[0].hashval)) || | ||
1398 | (be32_to_cpu(drop_btree[drop_hdr.count - 1].hashval) < | ||
1399 | be32_to_cpu(save_btree[save_hdr.count - 1].hashval))) { | ||
1400 | /* XXX: check this - is memmove dst correct? */ | ||
1401 | tmp = save_hdr.count * sizeof(xfs_da_node_entry_t); | ||
1402 | memmove(&save_btree[drop_hdr.count], &save_btree[0], tmp); | ||
1403 | |||
1404 | sindex = 0; | ||
1405 | xfs_trans_log_buf(tp, save_blk->bp, | ||
1406 | XFS_DA_LOGRANGE(save_node, &save_btree[0], | ||
1407 | (save_hdr.count + drop_hdr.count) * | ||
1408 | sizeof(xfs_da_node_entry_t))); | ||
1409 | } else { | ||
1410 | sindex = save_hdr.count; | ||
1411 | xfs_trans_log_buf(tp, save_blk->bp, | ||
1412 | XFS_DA_LOGRANGE(save_node, &save_btree[sindex], | ||
1413 | drop_hdr.count * sizeof(xfs_da_node_entry_t))); | ||
1414 | } | ||
1415 | |||
1416 | /* | ||
1417 | * Move all the B-tree elements from drop_blk to save_blk. | ||
1418 | */ | ||
1419 | tmp = drop_hdr.count * (uint)sizeof(xfs_da_node_entry_t); | ||
1420 | memcpy(&save_btree[sindex], &drop_btree[0], tmp); | ||
1421 | save_hdr.count += drop_hdr.count; | ||
1422 | |||
1423 | dp->d_ops->node_hdr_to_disk(save_node, &save_hdr); | ||
1424 | xfs_trans_log_buf(tp, save_blk->bp, | ||
1425 | XFS_DA_LOGRANGE(save_node, &save_node->hdr, | ||
1426 | dp->d_ops->node_hdr_size)); | ||
1427 | |||
1428 | /* | ||
1429 | * Save the last hashval in the remaining block for upward propagation. | ||
1430 | */ | ||
1431 | save_blk->hashval = be32_to_cpu(save_btree[save_hdr.count - 1].hashval); | ||
1432 | } | ||
1433 | |||
1434 | /*======================================================================== | ||
1435 | * Routines used for finding things in the Btree. | ||
1436 | *========================================================================*/ | ||
1437 | |||
1438 | /* | ||
1439 | * Walk down the Btree looking for a particular filename, filling | ||
1440 | * in the state structure as we go. | ||
1441 | * | ||
1442 | * We will set the state structure to point to each of the elements | ||
1443 | * in each of the nodes where either the hashval is or should be. | ||
1444 | * | ||
1445 | * We support duplicate hashval's so for each entry in the current | ||
1446 | * node that could contain the desired hashval, descend. This is a | ||
1447 | * pruned depth-first tree search. | ||
1448 | */ | ||
1449 | int /* error */ | ||
1450 | xfs_da3_node_lookup_int( | ||
1451 | struct xfs_da_state *state, | ||
1452 | int *result) | ||
1453 | { | ||
1454 | struct xfs_da_state_blk *blk; | ||
1455 | struct xfs_da_blkinfo *curr; | ||
1456 | struct xfs_da_intnode *node; | ||
1457 | struct xfs_da_node_entry *btree; | ||
1458 | struct xfs_da3_icnode_hdr nodehdr; | ||
1459 | struct xfs_da_args *args; | ||
1460 | xfs_dablk_t blkno; | ||
1461 | xfs_dahash_t hashval; | ||
1462 | xfs_dahash_t btreehashval; | ||
1463 | int probe; | ||
1464 | int span; | ||
1465 | int max; | ||
1466 | int error; | ||
1467 | int retval; | ||
1468 | struct xfs_inode *dp = state->args->dp; | ||
1469 | |||
1470 | args = state->args; | ||
1471 | |||
1472 | /* | ||
1473 | * Descend thru the B-tree searching each level for the right | ||
1474 | * node to use, until the right hashval is found. | ||
1475 | */ | ||
1476 | blkno = (args->whichfork == XFS_DATA_FORK)? args->geo->leafblk : 0; | ||
1477 | for (blk = &state->path.blk[0], state->path.active = 1; | ||
1478 | state->path.active <= XFS_DA_NODE_MAXDEPTH; | ||
1479 | blk++, state->path.active++) { | ||
1480 | /* | ||
1481 | * Read the next node down in the tree. | ||
1482 | */ | ||
1483 | blk->blkno = blkno; | ||
1484 | error = xfs_da3_node_read(args->trans, args->dp, blkno, | ||
1485 | -1, &blk->bp, args->whichfork); | ||
1486 | if (error) { | ||
1487 | blk->blkno = 0; | ||
1488 | state->path.active--; | ||
1489 | return error; | ||
1490 | } | ||
1491 | curr = blk->bp->b_addr; | ||
1492 | blk->magic = be16_to_cpu(curr->magic); | ||
1493 | |||
1494 | if (blk->magic == XFS_ATTR_LEAF_MAGIC || | ||
1495 | blk->magic == XFS_ATTR3_LEAF_MAGIC) { | ||
1496 | blk->magic = XFS_ATTR_LEAF_MAGIC; | ||
1497 | blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL); | ||
1498 | break; | ||
1499 | } | ||
1500 | |||
1501 | if (blk->magic == XFS_DIR2_LEAFN_MAGIC || | ||
1502 | blk->magic == XFS_DIR3_LEAFN_MAGIC) { | ||
1503 | blk->magic = XFS_DIR2_LEAFN_MAGIC; | ||
1504 | blk->hashval = xfs_dir2_leafn_lasthash(args->dp, | ||
1505 | blk->bp, NULL); | ||
1506 | break; | ||
1507 | } | ||
1508 | |||
1509 | blk->magic = XFS_DA_NODE_MAGIC; | ||
1510 | |||
1511 | |||
1512 | /* | ||
1513 | * Search an intermediate node for a match. | ||
1514 | */ | ||
1515 | node = blk->bp->b_addr; | ||
1516 | dp->d_ops->node_hdr_from_disk(&nodehdr, node); | ||
1517 | btree = dp->d_ops->node_tree_p(node); | ||
1518 | |||
1519 | max = nodehdr.count; | ||
1520 | blk->hashval = be32_to_cpu(btree[max - 1].hashval); | ||
1521 | |||
1522 | /* | ||
1523 | * Binary search. (note: small blocks will skip loop) | ||
1524 | */ | ||
1525 | probe = span = max / 2; | ||
1526 | hashval = args->hashval; | ||
1527 | while (span > 4) { | ||
1528 | span /= 2; | ||
1529 | btreehashval = be32_to_cpu(btree[probe].hashval); | ||
1530 | if (btreehashval < hashval) | ||
1531 | probe += span; | ||
1532 | else if (btreehashval > hashval) | ||
1533 | probe -= span; | ||
1534 | else | ||
1535 | break; | ||
1536 | } | ||
1537 | ASSERT((probe >= 0) && (probe < max)); | ||
1538 | ASSERT((span <= 4) || | ||
1539 | (be32_to_cpu(btree[probe].hashval) == hashval)); | ||
1540 | |||
1541 | /* | ||
1542 | * Since we may have duplicate hashval's, find the first | ||
1543 | * matching hashval in the node. | ||
1544 | */ | ||
1545 | while (probe > 0 && | ||
1546 | be32_to_cpu(btree[probe].hashval) >= hashval) { | ||
1547 | probe--; | ||
1548 | } | ||
1549 | while (probe < max && | ||
1550 | be32_to_cpu(btree[probe].hashval) < hashval) { | ||
1551 | probe++; | ||
1552 | } | ||
1553 | |||
1554 | /* | ||
1555 | * Pick the right block to descend on. | ||
1556 | */ | ||
1557 | if (probe == max) { | ||
1558 | blk->index = max - 1; | ||
1559 | blkno = be32_to_cpu(btree[max - 1].before); | ||
1560 | } else { | ||
1561 | blk->index = probe; | ||
1562 | blkno = be32_to_cpu(btree[probe].before); | ||
1563 | } | ||
1564 | } | ||
1565 | |||
1566 | /* | ||
1567 | * A leaf block that ends in the hashval that we are interested in | ||
1568 | * (final hashval == search hashval) means that the next block may | ||
1569 | * contain more entries with the same hashval, shift upward to the | ||
1570 | * next leaf and keep searching. | ||
1571 | */ | ||
1572 | for (;;) { | ||
1573 | if (blk->magic == XFS_DIR2_LEAFN_MAGIC) { | ||
1574 | retval = xfs_dir2_leafn_lookup_int(blk->bp, args, | ||
1575 | &blk->index, state); | ||
1576 | } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) { | ||
1577 | retval = xfs_attr3_leaf_lookup_int(blk->bp, args); | ||
1578 | blk->index = args->index; | ||
1579 | args->blkno = blk->blkno; | ||
1580 | } else { | ||
1581 | ASSERT(0); | ||
1582 | return -EFSCORRUPTED; | ||
1583 | } | ||
1584 | if (((retval == -ENOENT) || (retval == -ENOATTR)) && | ||
1585 | (blk->hashval == args->hashval)) { | ||
1586 | error = xfs_da3_path_shift(state, &state->path, 1, 1, | ||
1587 | &retval); | ||
1588 | if (error) | ||
1589 | return error; | ||
1590 | if (retval == 0) { | ||
1591 | continue; | ||
1592 | } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) { | ||
1593 | /* path_shift() gives ENOENT */ | ||
1594 | retval = -ENOATTR; | ||
1595 | } | ||
1596 | } | ||
1597 | break; | ||
1598 | } | ||
1599 | *result = retval; | ||
1600 | return 0; | ||
1601 | } | ||
1602 | |||
1603 | /*======================================================================== | ||
1604 | * Utility routines. | ||
1605 | *========================================================================*/ | ||
1606 | |||
1607 | /* | ||
1608 | * Compare two intermediate nodes for "order". | ||
1609 | */ | ||
1610 | STATIC int | ||
1611 | xfs_da3_node_order( | ||
1612 | struct xfs_inode *dp, | ||
1613 | struct xfs_buf *node1_bp, | ||
1614 | struct xfs_buf *node2_bp) | ||
1615 | { | ||
1616 | struct xfs_da_intnode *node1; | ||
1617 | struct xfs_da_intnode *node2; | ||
1618 | struct xfs_da_node_entry *btree1; | ||
1619 | struct xfs_da_node_entry *btree2; | ||
1620 | struct xfs_da3_icnode_hdr node1hdr; | ||
1621 | struct xfs_da3_icnode_hdr node2hdr; | ||
1622 | |||
1623 | node1 = node1_bp->b_addr; | ||
1624 | node2 = node2_bp->b_addr; | ||
1625 | dp->d_ops->node_hdr_from_disk(&node1hdr, node1); | ||
1626 | dp->d_ops->node_hdr_from_disk(&node2hdr, node2); | ||
1627 | btree1 = dp->d_ops->node_tree_p(node1); | ||
1628 | btree2 = dp->d_ops->node_tree_p(node2); | ||
1629 | |||
1630 | if (node1hdr.count > 0 && node2hdr.count > 0 && | ||
1631 | ((be32_to_cpu(btree2[0].hashval) < be32_to_cpu(btree1[0].hashval)) || | ||
1632 | (be32_to_cpu(btree2[node2hdr.count - 1].hashval) < | ||
1633 | be32_to_cpu(btree1[node1hdr.count - 1].hashval)))) { | ||
1634 | return 1; | ||
1635 | } | ||
1636 | return 0; | ||
1637 | } | ||
1638 | |||
1639 | /* | ||
1640 | * Link a new block into a doubly linked list of blocks (of whatever type). | ||
1641 | */ | ||
1642 | int /* error */ | ||
1643 | xfs_da3_blk_link( | ||
1644 | struct xfs_da_state *state, | ||
1645 | struct xfs_da_state_blk *old_blk, | ||
1646 | struct xfs_da_state_blk *new_blk) | ||
1647 | { | ||
1648 | struct xfs_da_blkinfo *old_info; | ||
1649 | struct xfs_da_blkinfo *new_info; | ||
1650 | struct xfs_da_blkinfo *tmp_info; | ||
1651 | struct xfs_da_args *args; | ||
1652 | struct xfs_buf *bp; | ||
1653 | int before = 0; | ||
1654 | int error; | ||
1655 | struct xfs_inode *dp = state->args->dp; | ||
1656 | |||
1657 | /* | ||
1658 | * Set up environment. | ||
1659 | */ | ||
1660 | args = state->args; | ||
1661 | ASSERT(args != NULL); | ||
1662 | old_info = old_blk->bp->b_addr; | ||
1663 | new_info = new_blk->bp->b_addr; | ||
1664 | ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC || | ||
1665 | old_blk->magic == XFS_DIR2_LEAFN_MAGIC || | ||
1666 | old_blk->magic == XFS_ATTR_LEAF_MAGIC); | ||
1667 | |||
1668 | switch (old_blk->magic) { | ||
1669 | case XFS_ATTR_LEAF_MAGIC: | ||
1670 | before = xfs_attr_leaf_order(old_blk->bp, new_blk->bp); | ||
1671 | break; | ||
1672 | case XFS_DIR2_LEAFN_MAGIC: | ||
1673 | before = xfs_dir2_leafn_order(dp, old_blk->bp, new_blk->bp); | ||
1674 | break; | ||
1675 | case XFS_DA_NODE_MAGIC: | ||
1676 | before = xfs_da3_node_order(dp, old_blk->bp, new_blk->bp); | ||
1677 | break; | ||
1678 | } | ||
1679 | |||
1680 | /* | ||
1681 | * Link blocks in appropriate order. | ||
1682 | */ | ||
1683 | if (before) { | ||
1684 | /* | ||
1685 | * Link new block in before existing block. | ||
1686 | */ | ||
1687 | trace_xfs_da_link_before(args); | ||
1688 | new_info->forw = cpu_to_be32(old_blk->blkno); | ||
1689 | new_info->back = old_info->back; | ||
1690 | if (old_info->back) { | ||
1691 | error = xfs_da3_node_read(args->trans, dp, | ||
1692 | be32_to_cpu(old_info->back), | ||
1693 | -1, &bp, args->whichfork); | ||
1694 | if (error) | ||
1695 | return error; | ||
1696 | ASSERT(bp != NULL); | ||
1697 | tmp_info = bp->b_addr; | ||
1698 | ASSERT(tmp_info->magic == old_info->magic); | ||
1699 | ASSERT(be32_to_cpu(tmp_info->forw) == old_blk->blkno); | ||
1700 | tmp_info->forw = cpu_to_be32(new_blk->blkno); | ||
1701 | xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1); | ||
1702 | } | ||
1703 | old_info->back = cpu_to_be32(new_blk->blkno); | ||
1704 | } else { | ||
1705 | /* | ||
1706 | * Link new block in after existing block. | ||
1707 | */ | ||
1708 | trace_xfs_da_link_after(args); | ||
1709 | new_info->forw = old_info->forw; | ||
1710 | new_info->back = cpu_to_be32(old_blk->blkno); | ||
1711 | if (old_info->forw) { | ||
1712 | error = xfs_da3_node_read(args->trans, dp, | ||
1713 | be32_to_cpu(old_info->forw), | ||
1714 | -1, &bp, args->whichfork); | ||
1715 | if (error) | ||
1716 | return error; | ||
1717 | ASSERT(bp != NULL); | ||
1718 | tmp_info = bp->b_addr; | ||
1719 | ASSERT(tmp_info->magic == old_info->magic); | ||
1720 | ASSERT(be32_to_cpu(tmp_info->back) == old_blk->blkno); | ||
1721 | tmp_info->back = cpu_to_be32(new_blk->blkno); | ||
1722 | xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1); | ||
1723 | } | ||
1724 | old_info->forw = cpu_to_be32(new_blk->blkno); | ||
1725 | } | ||
1726 | |||
1727 | xfs_trans_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1); | ||
1728 | xfs_trans_log_buf(args->trans, new_blk->bp, 0, sizeof(*tmp_info) - 1); | ||
1729 | return 0; | ||
1730 | } | ||
1731 | |||
1732 | /* | ||
1733 | * Unlink a block from a doubly linked list of blocks. | ||
1734 | */ | ||
1735 | STATIC int /* error */ | ||
1736 | xfs_da3_blk_unlink( | ||
1737 | struct xfs_da_state *state, | ||
1738 | struct xfs_da_state_blk *drop_blk, | ||
1739 | struct xfs_da_state_blk *save_blk) | ||
1740 | { | ||
1741 | struct xfs_da_blkinfo *drop_info; | ||
1742 | struct xfs_da_blkinfo *save_info; | ||
1743 | struct xfs_da_blkinfo *tmp_info; | ||
1744 | struct xfs_da_args *args; | ||
1745 | struct xfs_buf *bp; | ||
1746 | int error; | ||
1747 | |||
1748 | /* | ||
1749 | * Set up environment. | ||
1750 | */ | ||
1751 | args = state->args; | ||
1752 | ASSERT(args != NULL); | ||
1753 | save_info = save_blk->bp->b_addr; | ||
1754 | drop_info = drop_blk->bp->b_addr; | ||
1755 | ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC || | ||
1756 | save_blk->magic == XFS_DIR2_LEAFN_MAGIC || | ||
1757 | save_blk->magic == XFS_ATTR_LEAF_MAGIC); | ||
1758 | ASSERT(save_blk->magic == drop_blk->magic); | ||
1759 | ASSERT((be32_to_cpu(save_info->forw) == drop_blk->blkno) || | ||
1760 | (be32_to_cpu(save_info->back) == drop_blk->blkno)); | ||
1761 | ASSERT((be32_to_cpu(drop_info->forw) == save_blk->blkno) || | ||
1762 | (be32_to_cpu(drop_info->back) == save_blk->blkno)); | ||
1763 | |||
1764 | /* | ||
1765 | * Unlink the leaf block from the doubly linked chain of leaves. | ||
1766 | */ | ||
1767 | if (be32_to_cpu(save_info->back) == drop_blk->blkno) { | ||
1768 | trace_xfs_da_unlink_back(args); | ||
1769 | save_info->back = drop_info->back; | ||
1770 | if (drop_info->back) { | ||
1771 | error = xfs_da3_node_read(args->trans, args->dp, | ||
1772 | be32_to_cpu(drop_info->back), | ||
1773 | -1, &bp, args->whichfork); | ||
1774 | if (error) | ||
1775 | return error; | ||
1776 | ASSERT(bp != NULL); | ||
1777 | tmp_info = bp->b_addr; | ||
1778 | ASSERT(tmp_info->magic == save_info->magic); | ||
1779 | ASSERT(be32_to_cpu(tmp_info->forw) == drop_blk->blkno); | ||
1780 | tmp_info->forw = cpu_to_be32(save_blk->blkno); | ||
1781 | xfs_trans_log_buf(args->trans, bp, 0, | ||
1782 | sizeof(*tmp_info) - 1); | ||
1783 | } | ||
1784 | } else { | ||
1785 | trace_xfs_da_unlink_forward(args); | ||
1786 | save_info->forw = drop_info->forw; | ||
1787 | if (drop_info->forw) { | ||
1788 | error = xfs_da3_node_read(args->trans, args->dp, | ||
1789 | be32_to_cpu(drop_info->forw), | ||
1790 | -1, &bp, args->whichfork); | ||
1791 | if (error) | ||
1792 | return error; | ||
1793 | ASSERT(bp != NULL); | ||
1794 | tmp_info = bp->b_addr; | ||
1795 | ASSERT(tmp_info->magic == save_info->magic); | ||
1796 | ASSERT(be32_to_cpu(tmp_info->back) == drop_blk->blkno); | ||
1797 | tmp_info->back = cpu_to_be32(save_blk->blkno); | ||
1798 | xfs_trans_log_buf(args->trans, bp, 0, | ||
1799 | sizeof(*tmp_info) - 1); | ||
1800 | } | ||
1801 | } | ||
1802 | |||
1803 | xfs_trans_log_buf(args->trans, save_blk->bp, 0, sizeof(*save_info) - 1); | ||
1804 | return 0; | ||
1805 | } | ||
1806 | |||
1807 | /* | ||
1808 | * Move a path "forward" or "!forward" one block at the current level. | ||
1809 | * | ||
1810 | * This routine will adjust a "path" to point to the next block | ||
1811 | * "forward" (higher hashvalues) or "!forward" (lower hashvals) in the | ||
1812 | * Btree, including updating pointers to the intermediate nodes between | ||
1813 | * the new bottom and the root. | ||
1814 | */ | ||
1815 | int /* error */ | ||
1816 | xfs_da3_path_shift( | ||
1817 | struct xfs_da_state *state, | ||
1818 | struct xfs_da_state_path *path, | ||
1819 | int forward, | ||
1820 | int release, | ||
1821 | int *result) | ||
1822 | { | ||
1823 | struct xfs_da_state_blk *blk; | ||
1824 | struct xfs_da_blkinfo *info; | ||
1825 | struct xfs_da_intnode *node; | ||
1826 | struct xfs_da_args *args; | ||
1827 | struct xfs_da_node_entry *btree; | ||
1828 | struct xfs_da3_icnode_hdr nodehdr; | ||
1829 | xfs_dablk_t blkno = 0; | ||
1830 | int level; | ||
1831 | int error; | ||
1832 | struct xfs_inode *dp = state->args->dp; | ||
1833 | |||
1834 | trace_xfs_da_path_shift(state->args); | ||
1835 | |||
1836 | /* | ||
1837 | * Roll up the Btree looking for the first block where our | ||
1838 | * current index is not at the edge of the block. Note that | ||
1839 | * we skip the bottom layer because we want the sibling block. | ||
1840 | */ | ||
1841 | args = state->args; | ||
1842 | ASSERT(args != NULL); | ||
1843 | ASSERT(path != NULL); | ||
1844 | ASSERT((path->active > 0) && (path->active < XFS_DA_NODE_MAXDEPTH)); | ||
1845 | level = (path->active-1) - 1; /* skip bottom layer in path */ | ||
1846 | for (blk = &path->blk[level]; level >= 0; blk--, level--) { | ||
1847 | node = blk->bp->b_addr; | ||
1848 | dp->d_ops->node_hdr_from_disk(&nodehdr, node); | ||
1849 | btree = dp->d_ops->node_tree_p(node); | ||
1850 | |||
1851 | if (forward && (blk->index < nodehdr.count - 1)) { | ||
1852 | blk->index++; | ||
1853 | blkno = be32_to_cpu(btree[blk->index].before); | ||
1854 | break; | ||
1855 | } else if (!forward && (blk->index > 0)) { | ||
1856 | blk->index--; | ||
1857 | blkno = be32_to_cpu(btree[blk->index].before); | ||
1858 | break; | ||
1859 | } | ||
1860 | } | ||
1861 | if (level < 0) { | ||
1862 | *result = -ENOENT; /* we're out of our tree */ | ||
1863 | ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); | ||
1864 | return 0; | ||
1865 | } | ||
1866 | |||
1867 | /* | ||
1868 | * Roll down the edge of the subtree until we reach the | ||
1869 | * same depth we were at originally. | ||
1870 | */ | ||
1871 | for (blk++, level++; level < path->active; blk++, level++) { | ||
1872 | /* | ||
1873 | * Release the old block. | ||
1874 | * (if it's dirty, trans won't actually let go) | ||
1875 | */ | ||
1876 | if (release) | ||
1877 | xfs_trans_brelse(args->trans, blk->bp); | ||
1878 | |||
1879 | /* | ||
1880 | * Read the next child block. | ||
1881 | */ | ||
1882 | blk->blkno = blkno; | ||
1883 | error = xfs_da3_node_read(args->trans, dp, blkno, -1, | ||
1884 | &blk->bp, args->whichfork); | ||
1885 | if (error) | ||
1886 | return error; | ||
1887 | info = blk->bp->b_addr; | ||
1888 | ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) || | ||
1889 | info->magic == cpu_to_be16(XFS_DA3_NODE_MAGIC) || | ||
1890 | info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || | ||
1891 | info->magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC) || | ||
1892 | info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC) || | ||
1893 | info->magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC)); | ||
1894 | |||
1895 | |||
1896 | /* | ||
1897 | * Note: we flatten the magic number to a single type so we | ||
1898 | * don't have to compare against crc/non-crc types elsewhere. | ||
1899 | */ | ||
1900 | switch (be16_to_cpu(info->magic)) { | ||
1901 | case XFS_DA_NODE_MAGIC: | ||
1902 | case XFS_DA3_NODE_MAGIC: | ||
1903 | blk->magic = XFS_DA_NODE_MAGIC; | ||
1904 | node = (xfs_da_intnode_t *)info; | ||
1905 | dp->d_ops->node_hdr_from_disk(&nodehdr, node); | ||
1906 | btree = dp->d_ops->node_tree_p(node); | ||
1907 | blk->hashval = be32_to_cpu(btree[nodehdr.count - 1].hashval); | ||
1908 | if (forward) | ||
1909 | blk->index = 0; | ||
1910 | else | ||
1911 | blk->index = nodehdr.count - 1; | ||
1912 | blkno = be32_to_cpu(btree[blk->index].before); | ||
1913 | break; | ||
1914 | case XFS_ATTR_LEAF_MAGIC: | ||
1915 | case XFS_ATTR3_LEAF_MAGIC: | ||
1916 | blk->magic = XFS_ATTR_LEAF_MAGIC; | ||
1917 | ASSERT(level == path->active-1); | ||
1918 | blk->index = 0; | ||
1919 | blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL); | ||
1920 | break; | ||
1921 | case XFS_DIR2_LEAFN_MAGIC: | ||
1922 | case XFS_DIR3_LEAFN_MAGIC: | ||
1923 | blk->magic = XFS_DIR2_LEAFN_MAGIC; | ||
1924 | ASSERT(level == path->active-1); | ||
1925 | blk->index = 0; | ||
1926 | blk->hashval = xfs_dir2_leafn_lasthash(args->dp, | ||
1927 | blk->bp, NULL); | ||
1928 | break; | ||
1929 | default: | ||
1930 | ASSERT(0); | ||
1931 | break; | ||
1932 | } | ||
1933 | } | ||
1934 | *result = 0; | ||
1935 | return 0; | ||
1936 | } | ||
1937 | |||
1938 | |||
1939 | /*======================================================================== | ||
1940 | * Utility routines. | ||
1941 | *========================================================================*/ | ||
1942 | |||
1943 | /* | ||
1944 | * Implement a simple hash on a character string. | ||
1945 | * Rotate the hash value by 7 bits, then XOR each character in. | ||
1946 | * This is implemented with some source-level loop unrolling. | ||
1947 | */ | ||
1948 | xfs_dahash_t | ||
1949 | xfs_da_hashname(const __uint8_t *name, int namelen) | ||
1950 | { | ||
1951 | xfs_dahash_t hash; | ||
1952 | |||
1953 | /* | ||
1954 | * Do four characters at a time as long as we can. | ||
1955 | */ | ||
1956 | for (hash = 0; namelen >= 4; namelen -= 4, name += 4) | ||
1957 | hash = (name[0] << 21) ^ (name[1] << 14) ^ (name[2] << 7) ^ | ||
1958 | (name[3] << 0) ^ rol32(hash, 7 * 4); | ||
1959 | |||
1960 | /* | ||
1961 | * Now do the rest of the characters. | ||
1962 | */ | ||
1963 | switch (namelen) { | ||
1964 | case 3: | ||
1965 | return (name[0] << 14) ^ (name[1] << 7) ^ (name[2] << 0) ^ | ||
1966 | rol32(hash, 7 * 3); | ||
1967 | case 2: | ||
1968 | return (name[0] << 7) ^ (name[1] << 0) ^ rol32(hash, 7 * 2); | ||
1969 | case 1: | ||
1970 | return (name[0] << 0) ^ rol32(hash, 7 * 1); | ||
1971 | default: /* case 0: */ | ||
1972 | return hash; | ||
1973 | } | ||
1974 | } | ||
1975 | |||
1976 | enum xfs_dacmp | ||
1977 | xfs_da_compname( | ||
1978 | struct xfs_da_args *args, | ||
1979 | const unsigned char *name, | ||
1980 | int len) | ||
1981 | { | ||
1982 | return (args->namelen == len && memcmp(args->name, name, len) == 0) ? | ||
1983 | XFS_CMP_EXACT : XFS_CMP_DIFFERENT; | ||
1984 | } | ||
1985 | |||
1986 | static xfs_dahash_t | ||
1987 | xfs_default_hashname( | ||
1988 | struct xfs_name *name) | ||
1989 | { | ||
1990 | return xfs_da_hashname(name->name, name->len); | ||
1991 | } | ||
1992 | |||
1993 | const struct xfs_nameops xfs_default_nameops = { | ||
1994 | .hashname = xfs_default_hashname, | ||
1995 | .compname = xfs_da_compname | ||
1996 | }; | ||
1997 | |||
1998 | int | ||
1999 | xfs_da_grow_inode_int( | ||
2000 | struct xfs_da_args *args, | ||
2001 | xfs_fileoff_t *bno, | ||
2002 | int count) | ||
2003 | { | ||
2004 | struct xfs_trans *tp = args->trans; | ||
2005 | struct xfs_inode *dp = args->dp; | ||
2006 | int w = args->whichfork; | ||
2007 | xfs_drfsbno_t nblks = dp->i_d.di_nblocks; | ||
2008 | struct xfs_bmbt_irec map, *mapp; | ||
2009 | int nmap, error, got, i, mapi; | ||
2010 | |||
2011 | /* | ||
2012 | * Find a spot in the file space to put the new block. | ||
2013 | */ | ||
2014 | error = xfs_bmap_first_unused(tp, dp, count, bno, w); | ||
2015 | if (error) | ||
2016 | return error; | ||
2017 | |||
2018 | /* | ||
2019 | * Try mapping it in one filesystem block. | ||
2020 | */ | ||
2021 | nmap = 1; | ||
2022 | ASSERT(args->firstblock != NULL); | ||
2023 | error = xfs_bmapi_write(tp, dp, *bno, count, | ||
2024 | xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG, | ||
2025 | args->firstblock, args->total, &map, &nmap, | ||
2026 | args->flist); | ||
2027 | if (error) | ||
2028 | return error; | ||
2029 | |||
2030 | ASSERT(nmap <= 1); | ||
2031 | if (nmap == 1) { | ||
2032 | mapp = ↦ | ||
2033 | mapi = 1; | ||
2034 | } else if (nmap == 0 && count > 1) { | ||
2035 | xfs_fileoff_t b; | ||
2036 | int c; | ||
2037 | |||
2038 | /* | ||
2039 | * If we didn't get it and the block might work if fragmented, | ||
2040 | * try without the CONTIG flag. Loop until we get it all. | ||
2041 | */ | ||
2042 | mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP); | ||
2043 | for (b = *bno, mapi = 0; b < *bno + count; ) { | ||
2044 | nmap = MIN(XFS_BMAP_MAX_NMAP, count); | ||
2045 | c = (int)(*bno + count - b); | ||
2046 | error = xfs_bmapi_write(tp, dp, b, c, | ||
2047 | xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA, | ||
2048 | args->firstblock, args->total, | ||
2049 | &mapp[mapi], &nmap, args->flist); | ||
2050 | if (error) | ||
2051 | goto out_free_map; | ||
2052 | if (nmap < 1) | ||
2053 | break; | ||
2054 | mapi += nmap; | ||
2055 | b = mapp[mapi - 1].br_startoff + | ||
2056 | mapp[mapi - 1].br_blockcount; | ||
2057 | } | ||
2058 | } else { | ||
2059 | mapi = 0; | ||
2060 | mapp = NULL; | ||
2061 | } | ||
2062 | |||
2063 | /* | ||
2064 | * Count the blocks we got, make sure it matches the total. | ||
2065 | */ | ||
2066 | for (i = 0, got = 0; i < mapi; i++) | ||
2067 | got += mapp[i].br_blockcount; | ||
2068 | if (got != count || mapp[0].br_startoff != *bno || | ||
2069 | mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount != | ||
2070 | *bno + count) { | ||
2071 | error = -ENOSPC; | ||
2072 | goto out_free_map; | ||
2073 | } | ||
2074 | |||
2075 | /* account for newly allocated blocks in reserved blocks total */ | ||
2076 | args->total -= dp->i_d.di_nblocks - nblks; | ||
2077 | |||
2078 | out_free_map: | ||
2079 | if (mapp != &map) | ||
2080 | kmem_free(mapp); | ||
2081 | return error; | ||
2082 | } | ||
2083 | |||
2084 | /* | ||
2085 | * Add a block to the btree ahead of the file. | ||
2086 | * Return the new block number to the caller. | ||
2087 | */ | ||
2088 | int | ||
2089 | xfs_da_grow_inode( | ||
2090 | struct xfs_da_args *args, | ||
2091 | xfs_dablk_t *new_blkno) | ||
2092 | { | ||
2093 | xfs_fileoff_t bno; | ||
2094 | int error; | ||
2095 | |||
2096 | trace_xfs_da_grow_inode(args); | ||
2097 | |||
2098 | bno = args->geo->leafblk; | ||
2099 | error = xfs_da_grow_inode_int(args, &bno, args->geo->fsbcount); | ||
2100 | if (!error) | ||
2101 | *new_blkno = (xfs_dablk_t)bno; | ||
2102 | return error; | ||
2103 | } | ||
2104 | |||
2105 | /* | ||
2106 | * Ick. We need to always be able to remove a btree block, even | ||
2107 | * if there's no space reservation because the filesystem is full. | ||
2108 | * This is called if xfs_bunmapi on a btree block fails due to ENOSPC. | ||
2109 | * It swaps the target block with the last block in the file. The | ||
2110 | * last block in the file can always be removed since it can't cause | ||
2111 | * a bmap btree split to do that. | ||
2112 | */ | ||
2113 | STATIC int | ||
2114 | xfs_da3_swap_lastblock( | ||
2115 | struct xfs_da_args *args, | ||
2116 | xfs_dablk_t *dead_blknop, | ||
2117 | struct xfs_buf **dead_bufp) | ||
2118 | { | ||
2119 | struct xfs_da_blkinfo *dead_info; | ||
2120 | struct xfs_da_blkinfo *sib_info; | ||
2121 | struct xfs_da_intnode *par_node; | ||
2122 | struct xfs_da_intnode *dead_node; | ||
2123 | struct xfs_dir2_leaf *dead_leaf2; | ||
2124 | struct xfs_da_node_entry *btree; | ||
2125 | struct xfs_da3_icnode_hdr par_hdr; | ||
2126 | struct xfs_inode *dp; | ||
2127 | struct xfs_trans *tp; | ||
2128 | struct xfs_mount *mp; | ||
2129 | struct xfs_buf *dead_buf; | ||
2130 | struct xfs_buf *last_buf; | ||
2131 | struct xfs_buf *sib_buf; | ||
2132 | struct xfs_buf *par_buf; | ||
2133 | xfs_dahash_t dead_hash; | ||
2134 | xfs_fileoff_t lastoff; | ||
2135 | xfs_dablk_t dead_blkno; | ||
2136 | xfs_dablk_t last_blkno; | ||
2137 | xfs_dablk_t sib_blkno; | ||
2138 | xfs_dablk_t par_blkno; | ||
2139 | int error; | ||
2140 | int w; | ||
2141 | int entno; | ||
2142 | int level; | ||
2143 | int dead_level; | ||
2144 | |||
2145 | trace_xfs_da_swap_lastblock(args); | ||
2146 | |||
2147 | dead_buf = *dead_bufp; | ||
2148 | dead_blkno = *dead_blknop; | ||
2149 | tp = args->trans; | ||
2150 | dp = args->dp; | ||
2151 | w = args->whichfork; | ||
2152 | ASSERT(w == XFS_DATA_FORK); | ||
2153 | mp = dp->i_mount; | ||
2154 | lastoff = args->geo->freeblk; | ||
2155 | error = xfs_bmap_last_before(tp, dp, &lastoff, w); | ||
2156 | if (error) | ||
2157 | return error; | ||
2158 | if (unlikely(lastoff == 0)) { | ||
2159 | XFS_ERROR_REPORT("xfs_da_swap_lastblock(1)", XFS_ERRLEVEL_LOW, | ||
2160 | mp); | ||
2161 | return -EFSCORRUPTED; | ||
2162 | } | ||
2163 | /* | ||
2164 | * Read the last block in the btree space. | ||
2165 | */ | ||
2166 | last_blkno = (xfs_dablk_t)lastoff - args->geo->fsbcount; | ||
2167 | error = xfs_da3_node_read(tp, dp, last_blkno, -1, &last_buf, w); | ||
2168 | if (error) | ||
2169 | return error; | ||
2170 | /* | ||
2171 | * Copy the last block into the dead buffer and log it. | ||
2172 | */ | ||
2173 | memcpy(dead_buf->b_addr, last_buf->b_addr, args->geo->blksize); | ||
2174 | xfs_trans_log_buf(tp, dead_buf, 0, args->geo->blksize - 1); | ||
2175 | dead_info = dead_buf->b_addr; | ||
2176 | /* | ||
2177 | * Get values from the moved block. | ||
2178 | */ | ||
2179 | if (dead_info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || | ||
2180 | dead_info->magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) { | ||
2181 | struct xfs_dir3_icleaf_hdr leafhdr; | ||
2182 | struct xfs_dir2_leaf_entry *ents; | ||
2183 | |||
2184 | dead_leaf2 = (xfs_dir2_leaf_t *)dead_info; | ||
2185 | dp->d_ops->leaf_hdr_from_disk(&leafhdr, dead_leaf2); | ||
2186 | ents = dp->d_ops->leaf_ents_p(dead_leaf2); | ||
2187 | dead_level = 0; | ||
2188 | dead_hash = be32_to_cpu(ents[leafhdr.count - 1].hashval); | ||
2189 | } else { | ||
2190 | struct xfs_da3_icnode_hdr deadhdr; | ||
2191 | |||
2192 | dead_node = (xfs_da_intnode_t *)dead_info; | ||
2193 | dp->d_ops->node_hdr_from_disk(&deadhdr, dead_node); | ||
2194 | btree = dp->d_ops->node_tree_p(dead_node); | ||
2195 | dead_level = deadhdr.level; | ||
2196 | dead_hash = be32_to_cpu(btree[deadhdr.count - 1].hashval); | ||
2197 | } | ||
2198 | sib_buf = par_buf = NULL; | ||
2199 | /* | ||
2200 | * If the moved block has a left sibling, fix up the pointers. | ||
2201 | */ | ||
2202 | if ((sib_blkno = be32_to_cpu(dead_info->back))) { | ||
2203 | error = xfs_da3_node_read(tp, dp, sib_blkno, -1, &sib_buf, w); | ||
2204 | if (error) | ||
2205 | goto done; | ||
2206 | sib_info = sib_buf->b_addr; | ||
2207 | if (unlikely( | ||
2208 | be32_to_cpu(sib_info->forw) != last_blkno || | ||
2209 | sib_info->magic != dead_info->magic)) { | ||
2210 | XFS_ERROR_REPORT("xfs_da_swap_lastblock(2)", | ||
2211 | XFS_ERRLEVEL_LOW, mp); | ||
2212 | error = -EFSCORRUPTED; | ||
2213 | goto done; | ||
2214 | } | ||
2215 | sib_info->forw = cpu_to_be32(dead_blkno); | ||
2216 | xfs_trans_log_buf(tp, sib_buf, | ||
2217 | XFS_DA_LOGRANGE(sib_info, &sib_info->forw, | ||
2218 | sizeof(sib_info->forw))); | ||
2219 | sib_buf = NULL; | ||
2220 | } | ||
2221 | /* | ||
2222 | * If the moved block has a right sibling, fix up the pointers. | ||
2223 | */ | ||
2224 | if ((sib_blkno = be32_to_cpu(dead_info->forw))) { | ||
2225 | error = xfs_da3_node_read(tp, dp, sib_blkno, -1, &sib_buf, w); | ||
2226 | if (error) | ||
2227 | goto done; | ||
2228 | sib_info = sib_buf->b_addr; | ||
2229 | if (unlikely( | ||
2230 | be32_to_cpu(sib_info->back) != last_blkno || | ||
2231 | sib_info->magic != dead_info->magic)) { | ||
2232 | XFS_ERROR_REPORT("xfs_da_swap_lastblock(3)", | ||
2233 | XFS_ERRLEVEL_LOW, mp); | ||
2234 | error = -EFSCORRUPTED; | ||
2235 | goto done; | ||
2236 | } | ||
2237 | sib_info->back = cpu_to_be32(dead_blkno); | ||
2238 | xfs_trans_log_buf(tp, sib_buf, | ||
2239 | XFS_DA_LOGRANGE(sib_info, &sib_info->back, | ||
2240 | sizeof(sib_info->back))); | ||
2241 | sib_buf = NULL; | ||
2242 | } | ||
2243 | par_blkno = args->geo->leafblk; | ||
2244 | level = -1; | ||
2245 | /* | ||
2246 | * Walk down the tree looking for the parent of the moved block. | ||
2247 | */ | ||
2248 | for (;;) { | ||
2249 | error = xfs_da3_node_read(tp, dp, par_blkno, -1, &par_buf, w); | ||
2250 | if (error) | ||
2251 | goto done; | ||
2252 | par_node = par_buf->b_addr; | ||
2253 | dp->d_ops->node_hdr_from_disk(&par_hdr, par_node); | ||
2254 | if (level >= 0 && level != par_hdr.level + 1) { | ||
2255 | XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)", | ||
2256 | XFS_ERRLEVEL_LOW, mp); | ||
2257 | error = -EFSCORRUPTED; | ||
2258 | goto done; | ||
2259 | } | ||
2260 | level = par_hdr.level; | ||
2261 | btree = dp->d_ops->node_tree_p(par_node); | ||
2262 | for (entno = 0; | ||
2263 | entno < par_hdr.count && | ||
2264 | be32_to_cpu(btree[entno].hashval) < dead_hash; | ||
2265 | entno++) | ||
2266 | continue; | ||
2267 | if (entno == par_hdr.count) { | ||
2268 | XFS_ERROR_REPORT("xfs_da_swap_lastblock(5)", | ||
2269 | XFS_ERRLEVEL_LOW, mp); | ||
2270 | error = -EFSCORRUPTED; | ||
2271 | goto done; | ||
2272 | } | ||
2273 | par_blkno = be32_to_cpu(btree[entno].before); | ||
2274 | if (level == dead_level + 1) | ||
2275 | break; | ||
2276 | xfs_trans_brelse(tp, par_buf); | ||
2277 | par_buf = NULL; | ||
2278 | } | ||
2279 | /* | ||
2280 | * We're in the right parent block. | ||
2281 | * Look for the right entry. | ||
2282 | */ | ||
2283 | for (;;) { | ||
2284 | for (; | ||
2285 | entno < par_hdr.count && | ||
2286 | be32_to_cpu(btree[entno].before) != last_blkno; | ||
2287 | entno++) | ||
2288 | continue; | ||
2289 | if (entno < par_hdr.count) | ||
2290 | break; | ||
2291 | par_blkno = par_hdr.forw; | ||
2292 | xfs_trans_brelse(tp, par_buf); | ||
2293 | par_buf = NULL; | ||
2294 | if (unlikely(par_blkno == 0)) { | ||
2295 | XFS_ERROR_REPORT("xfs_da_swap_lastblock(6)", | ||
2296 | XFS_ERRLEVEL_LOW, mp); | ||
2297 | error = -EFSCORRUPTED; | ||
2298 | goto done; | ||
2299 | } | ||
2300 | error = xfs_da3_node_read(tp, dp, par_blkno, -1, &par_buf, w); | ||
2301 | if (error) | ||
2302 | goto done; | ||
2303 | par_node = par_buf->b_addr; | ||
2304 | dp->d_ops->node_hdr_from_disk(&par_hdr, par_node); | ||
2305 | if (par_hdr.level != level) { | ||
2306 | XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)", | ||
2307 | XFS_ERRLEVEL_LOW, mp); | ||
2308 | error = -EFSCORRUPTED; | ||
2309 | goto done; | ||
2310 | } | ||
2311 | btree = dp->d_ops->node_tree_p(par_node); | ||
2312 | entno = 0; | ||
2313 | } | ||
2314 | /* | ||
2315 | * Update the parent entry pointing to the moved block. | ||
2316 | */ | ||
2317 | btree[entno].before = cpu_to_be32(dead_blkno); | ||
2318 | xfs_trans_log_buf(tp, par_buf, | ||
2319 | XFS_DA_LOGRANGE(par_node, &btree[entno].before, | ||
2320 | sizeof(btree[entno].before))); | ||
2321 | *dead_blknop = last_blkno; | ||
2322 | *dead_bufp = last_buf; | ||
2323 | return 0; | ||
2324 | done: | ||
2325 | if (par_buf) | ||
2326 | xfs_trans_brelse(tp, par_buf); | ||
2327 | if (sib_buf) | ||
2328 | xfs_trans_brelse(tp, sib_buf); | ||
2329 | xfs_trans_brelse(tp, last_buf); | ||
2330 | return error; | ||
2331 | } | ||
2332 | |||
2333 | /* | ||
2334 | * Remove a btree block from a directory or attribute. | ||
2335 | */ | ||
2336 | int | ||
2337 | xfs_da_shrink_inode( | ||
2338 | xfs_da_args_t *args, | ||
2339 | xfs_dablk_t dead_blkno, | ||
2340 | struct xfs_buf *dead_buf) | ||
2341 | { | ||
2342 | xfs_inode_t *dp; | ||
2343 | int done, error, w, count; | ||
2344 | xfs_trans_t *tp; | ||
2345 | xfs_mount_t *mp; | ||
2346 | |||
2347 | trace_xfs_da_shrink_inode(args); | ||
2348 | |||
2349 | dp = args->dp; | ||
2350 | w = args->whichfork; | ||
2351 | tp = args->trans; | ||
2352 | mp = dp->i_mount; | ||
2353 | count = args->geo->fsbcount; | ||
2354 | for (;;) { | ||
2355 | /* | ||
2356 | * Remove extents. If we get ENOSPC for a dir we have to move | ||
2357 | * the last block to the place we want to kill. | ||
2358 | */ | ||
2359 | error = xfs_bunmapi(tp, dp, dead_blkno, count, | ||
2360 | xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA, | ||
2361 | 0, args->firstblock, args->flist, &done); | ||
2362 | if (error == -ENOSPC) { | ||
2363 | if (w != XFS_DATA_FORK) | ||
2364 | break; | ||
2365 | error = xfs_da3_swap_lastblock(args, &dead_blkno, | ||
2366 | &dead_buf); | ||
2367 | if (error) | ||
2368 | break; | ||
2369 | } else { | ||
2370 | break; | ||
2371 | } | ||
2372 | } | ||
2373 | xfs_trans_binval(tp, dead_buf); | ||
2374 | return error; | ||
2375 | } | ||
2376 | |||
2377 | /* | ||
2378 | * See if the mapping(s) for this btree block are valid, i.e. | ||
2379 | * don't contain holes, are logically contiguous, and cover the whole range. | ||
2380 | */ | ||
2381 | STATIC int | ||
2382 | xfs_da_map_covers_blocks( | ||
2383 | int nmap, | ||
2384 | xfs_bmbt_irec_t *mapp, | ||
2385 | xfs_dablk_t bno, | ||
2386 | int count) | ||
2387 | { | ||
2388 | int i; | ||
2389 | xfs_fileoff_t off; | ||
2390 | |||
2391 | for (i = 0, off = bno; i < nmap; i++) { | ||
2392 | if (mapp[i].br_startblock == HOLESTARTBLOCK || | ||
2393 | mapp[i].br_startblock == DELAYSTARTBLOCK) { | ||
2394 | return 0; | ||
2395 | } | ||
2396 | if (off != mapp[i].br_startoff) { | ||
2397 | return 0; | ||
2398 | } | ||
2399 | off += mapp[i].br_blockcount; | ||
2400 | } | ||
2401 | return off == bno + count; | ||
2402 | } | ||
2403 | |||
2404 | /* | ||
2405 | * Convert a struct xfs_bmbt_irec to a struct xfs_buf_map. | ||
2406 | * | ||
2407 | * For the single map case, it is assumed that the caller has provided a pointer | ||
2408 | * to a valid xfs_buf_map. For the multiple map case, this function will | ||
2409 | * allocate the xfs_buf_map to hold all the maps and replace the caller's single | ||
2410 | * map pointer with the allocated map. | ||
2411 | */ | ||
2412 | static int | ||
2413 | xfs_buf_map_from_irec( | ||
2414 | struct xfs_mount *mp, | ||
2415 | struct xfs_buf_map **mapp, | ||
2416 | int *nmaps, | ||
2417 | struct xfs_bmbt_irec *irecs, | ||
2418 | int nirecs) | ||
2419 | { | ||
2420 | struct xfs_buf_map *map; | ||
2421 | int i; | ||
2422 | |||
2423 | ASSERT(*nmaps == 1); | ||
2424 | ASSERT(nirecs >= 1); | ||
2425 | |||
2426 | if (nirecs > 1) { | ||
2427 | map = kmem_zalloc(nirecs * sizeof(struct xfs_buf_map), | ||
2428 | KM_SLEEP | KM_NOFS); | ||
2429 | if (!map) | ||
2430 | return -ENOMEM; | ||
2431 | *mapp = map; | ||
2432 | } | ||
2433 | |||
2434 | *nmaps = nirecs; | ||
2435 | map = *mapp; | ||
2436 | for (i = 0; i < *nmaps; i++) { | ||
2437 | ASSERT(irecs[i].br_startblock != DELAYSTARTBLOCK && | ||
2438 | irecs[i].br_startblock != HOLESTARTBLOCK); | ||
2439 | map[i].bm_bn = XFS_FSB_TO_DADDR(mp, irecs[i].br_startblock); | ||
2440 | map[i].bm_len = XFS_FSB_TO_BB(mp, irecs[i].br_blockcount); | ||
2441 | } | ||
2442 | return 0; | ||
2443 | } | ||
2444 | |||
2445 | /* | ||
2446 | * Map the block we are given ready for reading. There are three possible return | ||
2447 | * values: | ||
2448 | * -1 - will be returned if we land in a hole and mappedbno == -2 so the | ||
2449 | * caller knows not to execute a subsequent read. | ||
2450 | * 0 - if we mapped the block successfully | ||
2451 | * >0 - positive error number if there was an error. | ||
2452 | */ | ||
2453 | static int | ||
2454 | xfs_dabuf_map( | ||
2455 | struct xfs_inode *dp, | ||
2456 | xfs_dablk_t bno, | ||
2457 | xfs_daddr_t mappedbno, | ||
2458 | int whichfork, | ||
2459 | struct xfs_buf_map **map, | ||
2460 | int *nmaps) | ||
2461 | { | ||
2462 | struct xfs_mount *mp = dp->i_mount; | ||
2463 | int nfsb; | ||
2464 | int error = 0; | ||
2465 | struct xfs_bmbt_irec irec; | ||
2466 | struct xfs_bmbt_irec *irecs = &irec; | ||
2467 | int nirecs; | ||
2468 | |||
2469 | ASSERT(map && *map); | ||
2470 | ASSERT(*nmaps == 1); | ||
2471 | |||
2472 | if (whichfork == XFS_DATA_FORK) | ||
2473 | nfsb = mp->m_dir_geo->fsbcount; | ||
2474 | else | ||
2475 | nfsb = mp->m_attr_geo->fsbcount; | ||
2476 | |||
2477 | /* | ||
2478 | * Caller doesn't have a mapping. -2 means don't complain | ||
2479 | * if we land in a hole. | ||
2480 | */ | ||
2481 | if (mappedbno == -1 || mappedbno == -2) { | ||
2482 | /* | ||
2483 | * Optimize the one-block case. | ||
2484 | */ | ||
2485 | if (nfsb != 1) | ||
2486 | irecs = kmem_zalloc(sizeof(irec) * nfsb, | ||
2487 | KM_SLEEP | KM_NOFS); | ||
2488 | |||
2489 | nirecs = nfsb; | ||
2490 | error = xfs_bmapi_read(dp, (xfs_fileoff_t)bno, nfsb, irecs, | ||
2491 | &nirecs, xfs_bmapi_aflag(whichfork)); | ||
2492 | if (error) | ||
2493 | goto out; | ||
2494 | } else { | ||
2495 | irecs->br_startblock = XFS_DADDR_TO_FSB(mp, mappedbno); | ||
2496 | irecs->br_startoff = (xfs_fileoff_t)bno; | ||
2497 | irecs->br_blockcount = nfsb; | ||
2498 | irecs->br_state = 0; | ||
2499 | nirecs = 1; | ||
2500 | } | ||
2501 | |||
2502 | if (!xfs_da_map_covers_blocks(nirecs, irecs, bno, nfsb)) { | ||
2503 | error = mappedbno == -2 ? -1 : -EFSCORRUPTED; | ||
2504 | if (unlikely(error == -EFSCORRUPTED)) { | ||
2505 | if (xfs_error_level >= XFS_ERRLEVEL_LOW) { | ||
2506 | int i; | ||
2507 | xfs_alert(mp, "%s: bno %lld dir: inode %lld", | ||
2508 | __func__, (long long)bno, | ||
2509 | (long long)dp->i_ino); | ||
2510 | for (i = 0; i < *nmaps; i++) { | ||
2511 | xfs_alert(mp, | ||
2512 | "[%02d] br_startoff %lld br_startblock %lld br_blockcount %lld br_state %d", | ||
2513 | i, | ||
2514 | (long long)irecs[i].br_startoff, | ||
2515 | (long long)irecs[i].br_startblock, | ||
2516 | (long long)irecs[i].br_blockcount, | ||
2517 | irecs[i].br_state); | ||
2518 | } | ||
2519 | } | ||
2520 | XFS_ERROR_REPORT("xfs_da_do_buf(1)", | ||
2521 | XFS_ERRLEVEL_LOW, mp); | ||
2522 | } | ||
2523 | goto out; | ||
2524 | } | ||
2525 | error = xfs_buf_map_from_irec(mp, map, nmaps, irecs, nirecs); | ||
2526 | out: | ||
2527 | if (irecs != &irec) | ||
2528 | kmem_free(irecs); | ||
2529 | return error; | ||
2530 | } | ||
2531 | |||
2532 | /* | ||
2533 | * Get a buffer for the dir/attr block. | ||
2534 | */ | ||
2535 | int | ||
2536 | xfs_da_get_buf( | ||
2537 | struct xfs_trans *trans, | ||
2538 | struct xfs_inode *dp, | ||
2539 | xfs_dablk_t bno, | ||
2540 | xfs_daddr_t mappedbno, | ||
2541 | struct xfs_buf **bpp, | ||
2542 | int whichfork) | ||
2543 | { | ||
2544 | struct xfs_buf *bp; | ||
2545 | struct xfs_buf_map map; | ||
2546 | struct xfs_buf_map *mapp; | ||
2547 | int nmap; | ||
2548 | int error; | ||
2549 | |||
2550 | *bpp = NULL; | ||
2551 | mapp = ↦ | ||
2552 | nmap = 1; | ||
2553 | error = xfs_dabuf_map(dp, bno, mappedbno, whichfork, | ||
2554 | &mapp, &nmap); | ||
2555 | if (error) { | ||
2556 | /* mapping a hole is not an error, but we don't continue */ | ||
2557 | if (error == -1) | ||
2558 | error = 0; | ||
2559 | goto out_free; | ||
2560 | } | ||
2561 | |||
2562 | bp = xfs_trans_get_buf_map(trans, dp->i_mount->m_ddev_targp, | ||
2563 | mapp, nmap, 0); | ||
2564 | error = bp ? bp->b_error : -EIO; | ||
2565 | if (error) { | ||
2566 | xfs_trans_brelse(trans, bp); | ||
2567 | goto out_free; | ||
2568 | } | ||
2569 | |||
2570 | *bpp = bp; | ||
2571 | |||
2572 | out_free: | ||
2573 | if (mapp != &map) | ||
2574 | kmem_free(mapp); | ||
2575 | |||
2576 | return error; | ||
2577 | } | ||
2578 | |||
2579 | /* | ||
2580 | * Get a buffer for the dir/attr block, fill in the contents. | ||
2581 | */ | ||
2582 | int | ||
2583 | xfs_da_read_buf( | ||
2584 | struct xfs_trans *trans, | ||
2585 | struct xfs_inode *dp, | ||
2586 | xfs_dablk_t bno, | ||
2587 | xfs_daddr_t mappedbno, | ||
2588 | struct xfs_buf **bpp, | ||
2589 | int whichfork, | ||
2590 | const struct xfs_buf_ops *ops) | ||
2591 | { | ||
2592 | struct xfs_buf *bp; | ||
2593 | struct xfs_buf_map map; | ||
2594 | struct xfs_buf_map *mapp; | ||
2595 | int nmap; | ||
2596 | int error; | ||
2597 | |||
2598 | *bpp = NULL; | ||
2599 | mapp = ↦ | ||
2600 | nmap = 1; | ||
2601 | error = xfs_dabuf_map(dp, bno, mappedbno, whichfork, | ||
2602 | &mapp, &nmap); | ||
2603 | if (error) { | ||
2604 | /* mapping a hole is not an error, but we don't continue */ | ||
2605 | if (error == -1) | ||
2606 | error = 0; | ||
2607 | goto out_free; | ||
2608 | } | ||
2609 | |||
2610 | error = xfs_trans_read_buf_map(dp->i_mount, trans, | ||
2611 | dp->i_mount->m_ddev_targp, | ||
2612 | mapp, nmap, 0, &bp, ops); | ||
2613 | if (error) | ||
2614 | goto out_free; | ||
2615 | |||
2616 | if (whichfork == XFS_ATTR_FORK) | ||
2617 | xfs_buf_set_ref(bp, XFS_ATTR_BTREE_REF); | ||
2618 | else | ||
2619 | xfs_buf_set_ref(bp, XFS_DIR_BTREE_REF); | ||
2620 | *bpp = bp; | ||
2621 | out_free: | ||
2622 | if (mapp != &map) | ||
2623 | kmem_free(mapp); | ||
2624 | |||
2625 | return error; | ||
2626 | } | ||
2627 | |||
2628 | /* | ||
2629 | * Readahead the dir/attr block. | ||
2630 | */ | ||
2631 | xfs_daddr_t | ||
2632 | xfs_da_reada_buf( | ||
2633 | struct xfs_inode *dp, | ||
2634 | xfs_dablk_t bno, | ||
2635 | xfs_daddr_t mappedbno, | ||
2636 | int whichfork, | ||
2637 | const struct xfs_buf_ops *ops) | ||
2638 | { | ||
2639 | struct xfs_buf_map map; | ||
2640 | struct xfs_buf_map *mapp; | ||
2641 | int nmap; | ||
2642 | int error; | ||
2643 | |||
2644 | mapp = ↦ | ||
2645 | nmap = 1; | ||
2646 | error = xfs_dabuf_map(dp, bno, mappedbno, whichfork, | ||
2647 | &mapp, &nmap); | ||
2648 | if (error) { | ||
2649 | /* mapping a hole is not an error, but we don't continue */ | ||
2650 | if (error == -1) | ||
2651 | error = 0; | ||
2652 | goto out_free; | ||
2653 | } | ||
2654 | |||
2655 | mappedbno = mapp[0].bm_bn; | ||
2656 | xfs_buf_readahead_map(dp->i_mount->m_ddev_targp, mapp, nmap, ops); | ||
2657 | |||
2658 | out_free: | ||
2659 | if (mapp != &map) | ||
2660 | kmem_free(mapp); | ||
2661 | |||
2662 | if (error) | ||
2663 | return -1; | ||
2664 | return mappedbno; | ||
2665 | } | ||
diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h new file mode 100644 index 000000000000..6e153e399a77 --- /dev/null +++ b/fs/xfs/libxfs/xfs_da_btree.h | |||
@@ -0,0 +1,221 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000,2002,2005 Silicon Graphics, Inc. | ||
3 | * Copyright (c) 2013 Red Hat, Inc. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License as | ||
8 | * published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope that it would be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program; if not, write the Free Software Foundation, | ||
17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
18 | */ | ||
19 | #ifndef __XFS_DA_BTREE_H__ | ||
20 | #define __XFS_DA_BTREE_H__ | ||
21 | |||
22 | struct xfs_bmap_free; | ||
23 | struct xfs_inode; | ||
24 | struct xfs_trans; | ||
25 | struct zone; | ||
26 | struct xfs_dir_ops; | ||
27 | |||
28 | /* | ||
29 | * Directory/attribute geometry information. There will be one of these for each | ||
30 | * data fork type, and it will be passed around via the xfs_da_args. Global | ||
31 | * structures will be attached to the xfs_mount. | ||
32 | */ | ||
33 | struct xfs_da_geometry { | ||
34 | int blksize; /* da block size in bytes */ | ||
35 | int fsbcount; /* da block size in filesystem blocks */ | ||
36 | uint8_t fsblog; /* log2 of _filesystem_ block size */ | ||
37 | uint8_t blklog; /* log2 of da block size */ | ||
38 | uint node_ents; /* # of entries in a danode */ | ||
39 | int magicpct; /* 37% of block size in bytes */ | ||
40 | xfs_dablk_t datablk; /* blockno of dir data v2 */ | ||
41 | xfs_dablk_t leafblk; /* blockno of leaf data v2 */ | ||
42 | xfs_dablk_t freeblk; /* blockno of free data v2 */ | ||
43 | }; | ||
44 | |||
45 | /*======================================================================== | ||
46 | * Btree searching and modification structure definitions. | ||
47 | *========================================================================*/ | ||
48 | |||
49 | /* | ||
50 | * Search comparison results | ||
51 | */ | ||
52 | enum xfs_dacmp { | ||
53 | XFS_CMP_DIFFERENT, /* names are completely different */ | ||
54 | XFS_CMP_EXACT, /* names are exactly the same */ | ||
55 | XFS_CMP_CASE /* names are same but differ in case */ | ||
56 | }; | ||
57 | |||
58 | /* | ||
59 | * Structure to ease passing around component names. | ||
60 | */ | ||
61 | typedef struct xfs_da_args { | ||
62 | struct xfs_da_geometry *geo; /* da block geometry */ | ||
63 | const __uint8_t *name; /* string (maybe not NULL terminated) */ | ||
64 | int namelen; /* length of string (maybe no NULL) */ | ||
65 | __uint8_t filetype; /* filetype of inode for directories */ | ||
66 | __uint8_t *value; /* set of bytes (maybe contain NULLs) */ | ||
67 | int valuelen; /* length of value */ | ||
68 | int flags; /* argument flags (eg: ATTR_NOCREATE) */ | ||
69 | xfs_dahash_t hashval; /* hash value of name */ | ||
70 | xfs_ino_t inumber; /* input/output inode number */ | ||
71 | struct xfs_inode *dp; /* directory inode to manipulate */ | ||
72 | xfs_fsblock_t *firstblock; /* ptr to firstblock for bmap calls */ | ||
73 | struct xfs_bmap_free *flist; /* ptr to freelist for bmap_finish */ | ||
74 | struct xfs_trans *trans; /* current trans (changes over time) */ | ||
75 | xfs_extlen_t total; /* total blocks needed, for 1st bmap */ | ||
76 | int whichfork; /* data or attribute fork */ | ||
77 | xfs_dablk_t blkno; /* blkno of attr leaf of interest */ | ||
78 | int index; /* index of attr of interest in blk */ | ||
79 | xfs_dablk_t rmtblkno; /* remote attr value starting blkno */ | ||
80 | int rmtblkcnt; /* remote attr value block count */ | ||
81 | int rmtvaluelen; /* remote attr value length in bytes */ | ||
82 | xfs_dablk_t blkno2; /* blkno of 2nd attr leaf of interest */ | ||
83 | int index2; /* index of 2nd attr in blk */ | ||
84 | xfs_dablk_t rmtblkno2; /* remote attr value starting blkno */ | ||
85 | int rmtblkcnt2; /* remote attr value block count */ | ||
86 | int rmtvaluelen2; /* remote attr value length in bytes */ | ||
87 | int op_flags; /* operation flags */ | ||
88 | enum xfs_dacmp cmpresult; /* name compare result for lookups */ | ||
89 | } xfs_da_args_t; | ||
90 | |||
91 | /* | ||
92 | * Operation flags: | ||
93 | */ | ||
94 | #define XFS_DA_OP_JUSTCHECK 0x0001 /* check for ok with no space */ | ||
95 | #define XFS_DA_OP_RENAME 0x0002 /* this is an atomic rename op */ | ||
96 | #define XFS_DA_OP_ADDNAME 0x0004 /* this is an add operation */ | ||
97 | #define XFS_DA_OP_OKNOENT 0x0008 /* lookup/add op, ENOENT ok, else die */ | ||
98 | #define XFS_DA_OP_CILOOKUP 0x0010 /* lookup to return CI name if found */ | ||
99 | |||
100 | #define XFS_DA_OP_FLAGS \ | ||
101 | { XFS_DA_OP_JUSTCHECK, "JUSTCHECK" }, \ | ||
102 | { XFS_DA_OP_RENAME, "RENAME" }, \ | ||
103 | { XFS_DA_OP_ADDNAME, "ADDNAME" }, \ | ||
104 | { XFS_DA_OP_OKNOENT, "OKNOENT" }, \ | ||
105 | { XFS_DA_OP_CILOOKUP, "CILOOKUP" } | ||
106 | |||
107 | /* | ||
108 | * Storage for holding state during Btree searches and split/join ops. | ||
109 | * | ||
110 | * Only need space for 5 intermediate nodes. With a minimum of 62-way | ||
111 | * fanout to the Btree, we can support over 900 million directory blocks, | ||
112 | * which is slightly more than enough. | ||
113 | */ | ||
114 | typedef struct xfs_da_state_blk { | ||
115 | struct xfs_buf *bp; /* buffer containing block */ | ||
116 | xfs_dablk_t blkno; /* filesystem blkno of buffer */ | ||
117 | xfs_daddr_t disk_blkno; /* on-disk blkno (in BBs) of buffer */ | ||
118 | int index; /* relevant index into block */ | ||
119 | xfs_dahash_t hashval; /* last hash value in block */ | ||
120 | int magic; /* blk's magic number, ie: blk type */ | ||
121 | } xfs_da_state_blk_t; | ||
122 | |||
123 | typedef struct xfs_da_state_path { | ||
124 | int active; /* number of active levels */ | ||
125 | xfs_da_state_blk_t blk[XFS_DA_NODE_MAXDEPTH]; | ||
126 | } xfs_da_state_path_t; | ||
127 | |||
128 | typedef struct xfs_da_state { | ||
129 | xfs_da_args_t *args; /* filename arguments */ | ||
130 | struct xfs_mount *mp; /* filesystem mount point */ | ||
131 | xfs_da_state_path_t path; /* search/split paths */ | ||
132 | xfs_da_state_path_t altpath; /* alternate path for join */ | ||
133 | unsigned char inleaf; /* insert into 1->lf, 0->splf */ | ||
134 | unsigned char extravalid; /* T/F: extrablk is in use */ | ||
135 | unsigned char extraafter; /* T/F: extrablk is after new */ | ||
136 | xfs_da_state_blk_t extrablk; /* for double-splits on leaves */ | ||
137 | /* for dirv2 extrablk is data */ | ||
138 | } xfs_da_state_t; | ||
139 | |||
140 | /* | ||
141 | * Utility macros to aid in logging changed structure fields. | ||
142 | */ | ||
143 | #define XFS_DA_LOGOFF(BASE, ADDR) ((char *)(ADDR) - (char *)(BASE)) | ||
144 | #define XFS_DA_LOGRANGE(BASE, ADDR, SIZE) \ | ||
145 | (uint)(XFS_DA_LOGOFF(BASE, ADDR)), \ | ||
146 | (uint)(XFS_DA_LOGOFF(BASE, ADDR)+(SIZE)-1) | ||
147 | |||
148 | /* | ||
149 | * Name ops for directory and/or attr name operations | ||
150 | */ | ||
151 | struct xfs_nameops { | ||
152 | xfs_dahash_t (*hashname)(struct xfs_name *); | ||
153 | enum xfs_dacmp (*compname)(struct xfs_da_args *, | ||
154 | const unsigned char *, int); | ||
155 | }; | ||
156 | |||
157 | |||
158 | /*======================================================================== | ||
159 | * Function prototypes. | ||
160 | *========================================================================*/ | ||
161 | |||
162 | /* | ||
163 | * Routines used for growing the Btree. | ||
164 | */ | ||
165 | int xfs_da3_node_create(struct xfs_da_args *args, xfs_dablk_t blkno, | ||
166 | int level, struct xfs_buf **bpp, int whichfork); | ||
167 | int xfs_da3_split(xfs_da_state_t *state); | ||
168 | |||
169 | /* | ||
170 | * Routines used for shrinking the Btree. | ||
171 | */ | ||
172 | int xfs_da3_join(xfs_da_state_t *state); | ||
173 | void xfs_da3_fixhashpath(struct xfs_da_state *state, | ||
174 | struct xfs_da_state_path *path_to_to_fix); | ||
175 | |||
176 | /* | ||
177 | * Routines used for finding things in the Btree. | ||
178 | */ | ||
179 | int xfs_da3_node_lookup_int(xfs_da_state_t *state, int *result); | ||
180 | int xfs_da3_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path, | ||
181 | int forward, int release, int *result); | ||
182 | /* | ||
183 | * Utility routines. | ||
184 | */ | ||
185 | int xfs_da3_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk, | ||
186 | xfs_da_state_blk_t *new_blk); | ||
187 | int xfs_da3_node_read(struct xfs_trans *tp, struct xfs_inode *dp, | ||
188 | xfs_dablk_t bno, xfs_daddr_t mappedbno, | ||
189 | struct xfs_buf **bpp, int which_fork); | ||
190 | |||
191 | /* | ||
192 | * Utility routines. | ||
193 | */ | ||
194 | int xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno); | ||
195 | int xfs_da_grow_inode_int(struct xfs_da_args *args, xfs_fileoff_t *bno, | ||
196 | int count); | ||
197 | int xfs_da_get_buf(struct xfs_trans *trans, struct xfs_inode *dp, | ||
198 | xfs_dablk_t bno, xfs_daddr_t mappedbno, | ||
199 | struct xfs_buf **bp, int whichfork); | ||
200 | int xfs_da_read_buf(struct xfs_trans *trans, struct xfs_inode *dp, | ||
201 | xfs_dablk_t bno, xfs_daddr_t mappedbno, | ||
202 | struct xfs_buf **bpp, int whichfork, | ||
203 | const struct xfs_buf_ops *ops); | ||
204 | xfs_daddr_t xfs_da_reada_buf(struct xfs_inode *dp, xfs_dablk_t bno, | ||
205 | xfs_daddr_t mapped_bno, int whichfork, | ||
206 | const struct xfs_buf_ops *ops); | ||
207 | int xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno, | ||
208 | struct xfs_buf *dead_buf); | ||
209 | |||
210 | uint xfs_da_hashname(const __uint8_t *name_string, int name_length); | ||
211 | enum xfs_dacmp xfs_da_compname(struct xfs_da_args *args, | ||
212 | const unsigned char *name, int len); | ||
213 | |||
214 | |||
215 | xfs_da_state_t *xfs_da_state_alloc(void); | ||
216 | void xfs_da_state_free(xfs_da_state_t *state); | ||
217 | |||
218 | extern struct kmem_zone *xfs_da_state_zone; | ||
219 | extern const struct xfs_nameops xfs_default_nameops; | ||
220 | |||
221 | #endif /* __XFS_DA_BTREE_H__ */ | ||
diff --git a/fs/xfs/libxfs/xfs_da_format.c b/fs/xfs/libxfs/xfs_da_format.c new file mode 100644 index 000000000000..c9aee52a37e2 --- /dev/null +++ b/fs/xfs/libxfs/xfs_da_format.c | |||
@@ -0,0 +1,911 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000,2002,2005 Silicon Graphics, Inc. | ||
3 | * Copyright (c) 2013 Red Hat, Inc. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License as | ||
8 | * published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope that it would be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program; if not, write the Free Software Foundation, | ||
17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
18 | */ | ||
19 | #include "xfs.h" | ||
20 | #include "xfs_fs.h" | ||
21 | #include "xfs_shared.h" | ||
22 | #include "xfs_format.h" | ||
23 | #include "xfs_log_format.h" | ||
24 | #include "xfs_trans_resv.h" | ||
25 | #include "xfs_sb.h" | ||
26 | #include "xfs_ag.h" | ||
27 | #include "xfs_mount.h" | ||
28 | #include "xfs_da_format.h" | ||
29 | #include "xfs_da_btree.h" | ||
30 | #include "xfs_inode.h" | ||
31 | #include "xfs_dir2.h" | ||
32 | #include "xfs_dir2_priv.h" | ||
33 | |||
34 | /* | ||
35 | * Shortform directory ops | ||
36 | */ | ||
37 | static int | ||
38 | xfs_dir2_sf_entsize( | ||
39 | struct xfs_dir2_sf_hdr *hdr, | ||
40 | int len) | ||
41 | { | ||
42 | int count = sizeof(struct xfs_dir2_sf_entry); /* namelen + offset */ | ||
43 | |||
44 | count += len; /* name */ | ||
45 | count += hdr->i8count ? sizeof(xfs_dir2_ino8_t) : | ||
46 | sizeof(xfs_dir2_ino4_t); /* ino # */ | ||
47 | return count; | ||
48 | } | ||
49 | |||
50 | static int | ||
51 | xfs_dir3_sf_entsize( | ||
52 | struct xfs_dir2_sf_hdr *hdr, | ||
53 | int len) | ||
54 | { | ||
55 | return xfs_dir2_sf_entsize(hdr, len) + sizeof(__uint8_t); | ||
56 | } | ||
57 | |||
58 | static struct xfs_dir2_sf_entry * | ||
59 | xfs_dir2_sf_nextentry( | ||
60 | struct xfs_dir2_sf_hdr *hdr, | ||
61 | struct xfs_dir2_sf_entry *sfep) | ||
62 | { | ||
63 | return (struct xfs_dir2_sf_entry *) | ||
64 | ((char *)sfep + xfs_dir2_sf_entsize(hdr, sfep->namelen)); | ||
65 | } | ||
66 | |||
67 | static struct xfs_dir2_sf_entry * | ||
68 | xfs_dir3_sf_nextentry( | ||
69 | struct xfs_dir2_sf_hdr *hdr, | ||
70 | struct xfs_dir2_sf_entry *sfep) | ||
71 | { | ||
72 | return (struct xfs_dir2_sf_entry *) | ||
73 | ((char *)sfep + xfs_dir3_sf_entsize(hdr, sfep->namelen)); | ||
74 | } | ||
75 | |||
76 | |||
77 | /* | ||
78 | * For filetype enabled shortform directories, the file type field is stored at | ||
79 | * the end of the name. Because it's only a single byte, endian conversion is | ||
80 | * not necessary. For non-filetype enable directories, the type is always | ||
81 | * unknown and we never store the value. | ||
82 | */ | ||
83 | static __uint8_t | ||
84 | xfs_dir2_sfe_get_ftype( | ||
85 | struct xfs_dir2_sf_entry *sfep) | ||
86 | { | ||
87 | return XFS_DIR3_FT_UNKNOWN; | ||
88 | } | ||
89 | |||
90 | static void | ||
91 | xfs_dir2_sfe_put_ftype( | ||
92 | struct xfs_dir2_sf_entry *sfep, | ||
93 | __uint8_t ftype) | ||
94 | { | ||
95 | ASSERT(ftype < XFS_DIR3_FT_MAX); | ||
96 | } | ||
97 | |||
98 | static __uint8_t | ||
99 | xfs_dir3_sfe_get_ftype( | ||
100 | struct xfs_dir2_sf_entry *sfep) | ||
101 | { | ||
102 | __uint8_t ftype; | ||
103 | |||
104 | ftype = sfep->name[sfep->namelen]; | ||
105 | if (ftype >= XFS_DIR3_FT_MAX) | ||
106 | return XFS_DIR3_FT_UNKNOWN; | ||
107 | return ftype; | ||
108 | } | ||
109 | |||
110 | static void | ||
111 | xfs_dir3_sfe_put_ftype( | ||
112 | struct xfs_dir2_sf_entry *sfep, | ||
113 | __uint8_t ftype) | ||
114 | { | ||
115 | ASSERT(ftype < XFS_DIR3_FT_MAX); | ||
116 | |||
117 | sfep->name[sfep->namelen] = ftype; | ||
118 | } | ||
119 | |||
120 | /* | ||
121 | * Inode numbers in short-form directories can come in two versions, | ||
122 | * either 4 bytes or 8 bytes wide. These helpers deal with the | ||
123 | * two forms transparently by looking at the headers i8count field. | ||
124 | * | ||
125 | * For 64-bit inode number the most significant byte must be zero. | ||
126 | */ | ||
127 | static xfs_ino_t | ||
128 | xfs_dir2_sf_get_ino( | ||
129 | struct xfs_dir2_sf_hdr *hdr, | ||
130 | xfs_dir2_inou_t *from) | ||
131 | { | ||
132 | if (hdr->i8count) | ||
133 | return get_unaligned_be64(&from->i8.i) & 0x00ffffffffffffffULL; | ||
134 | else | ||
135 | return get_unaligned_be32(&from->i4.i); | ||
136 | } | ||
137 | |||
138 | static void | ||
139 | xfs_dir2_sf_put_ino( | ||
140 | struct xfs_dir2_sf_hdr *hdr, | ||
141 | xfs_dir2_inou_t *to, | ||
142 | xfs_ino_t ino) | ||
143 | { | ||
144 | ASSERT((ino & 0xff00000000000000ULL) == 0); | ||
145 | |||
146 | if (hdr->i8count) | ||
147 | put_unaligned_be64(ino, &to->i8.i); | ||
148 | else | ||
149 | put_unaligned_be32(ino, &to->i4.i); | ||
150 | } | ||
151 | |||
152 | static xfs_ino_t | ||
153 | xfs_dir2_sf_get_parent_ino( | ||
154 | struct xfs_dir2_sf_hdr *hdr) | ||
155 | { | ||
156 | return xfs_dir2_sf_get_ino(hdr, &hdr->parent); | ||
157 | } | ||
158 | |||
159 | static void | ||
160 | xfs_dir2_sf_put_parent_ino( | ||
161 | struct xfs_dir2_sf_hdr *hdr, | ||
162 | xfs_ino_t ino) | ||
163 | { | ||
164 | xfs_dir2_sf_put_ino(hdr, &hdr->parent, ino); | ||
165 | } | ||
166 | |||
167 | /* | ||
168 | * In short-form directory entries the inode numbers are stored at variable | ||
169 | * offset behind the entry name. If the entry stores a filetype value, then it | ||
170 | * sits between the name and the inode number. Hence the inode numbers may only | ||
171 | * be accessed through the helpers below. | ||
172 | */ | ||
173 | static xfs_ino_t | ||
174 | xfs_dir2_sfe_get_ino( | ||
175 | struct xfs_dir2_sf_hdr *hdr, | ||
176 | struct xfs_dir2_sf_entry *sfep) | ||
177 | { | ||
178 | return xfs_dir2_sf_get_ino(hdr, | ||
179 | (xfs_dir2_inou_t *)&sfep->name[sfep->namelen]); | ||
180 | } | ||
181 | |||
182 | static void | ||
183 | xfs_dir2_sfe_put_ino( | ||
184 | struct xfs_dir2_sf_hdr *hdr, | ||
185 | struct xfs_dir2_sf_entry *sfep, | ||
186 | xfs_ino_t ino) | ||
187 | { | ||
188 | xfs_dir2_sf_put_ino(hdr, | ||
189 | (xfs_dir2_inou_t *)&sfep->name[sfep->namelen], ino); | ||
190 | } | ||
191 | |||
192 | static xfs_ino_t | ||
193 | xfs_dir3_sfe_get_ino( | ||
194 | struct xfs_dir2_sf_hdr *hdr, | ||
195 | struct xfs_dir2_sf_entry *sfep) | ||
196 | { | ||
197 | return xfs_dir2_sf_get_ino(hdr, | ||
198 | (xfs_dir2_inou_t *)&sfep->name[sfep->namelen + 1]); | ||
199 | } | ||
200 | |||
201 | static void | ||
202 | xfs_dir3_sfe_put_ino( | ||
203 | struct xfs_dir2_sf_hdr *hdr, | ||
204 | struct xfs_dir2_sf_entry *sfep, | ||
205 | xfs_ino_t ino) | ||
206 | { | ||
207 | xfs_dir2_sf_put_ino(hdr, | ||
208 | (xfs_dir2_inou_t *)&sfep->name[sfep->namelen + 1], ino); | ||
209 | } | ||
210 | |||
211 | |||
212 | /* | ||
213 | * Directory data block operations | ||
214 | */ | ||
215 | |||
216 | /* | ||
217 | * For special situations, the dirent size ends up fixed because we always know | ||
218 | * what the size of the entry is. That's true for the "." and "..", and | ||
219 | * therefore we know that they are a fixed size and hence their offsets are | ||
220 | * constant, as is the first entry. | ||
221 | * | ||
222 | * Hence, this calculation is written as a macro to be able to be calculated at | ||
223 | * compile time and so certain offsets can be calculated directly in the | ||
224 | * structure initaliser via the macro. There are two macros - one for dirents | ||
225 | * with ftype and without so there are no unresolvable conditionals in the | ||
226 | * calculations. We also use round_up() as XFS_DIR2_DATA_ALIGN is always a power | ||
227 | * of 2 and the compiler doesn't reject it (unlike roundup()). | ||
228 | */ | ||
229 | #define XFS_DIR2_DATA_ENTSIZE(n) \ | ||
230 | round_up((offsetof(struct xfs_dir2_data_entry, name[0]) + (n) + \ | ||
231 | sizeof(xfs_dir2_data_off_t)), XFS_DIR2_DATA_ALIGN) | ||
232 | |||
233 | #define XFS_DIR3_DATA_ENTSIZE(n) \ | ||
234 | round_up((offsetof(struct xfs_dir2_data_entry, name[0]) + (n) + \ | ||
235 | sizeof(xfs_dir2_data_off_t) + sizeof(__uint8_t)), \ | ||
236 | XFS_DIR2_DATA_ALIGN) | ||
237 | |||
238 | static int | ||
239 | xfs_dir2_data_entsize( | ||
240 | int n) | ||
241 | { | ||
242 | return XFS_DIR2_DATA_ENTSIZE(n); | ||
243 | } | ||
244 | |||
245 | static int | ||
246 | xfs_dir3_data_entsize( | ||
247 | int n) | ||
248 | { | ||
249 | return XFS_DIR3_DATA_ENTSIZE(n); | ||
250 | } | ||
251 | |||
252 | static __uint8_t | ||
253 | xfs_dir2_data_get_ftype( | ||
254 | struct xfs_dir2_data_entry *dep) | ||
255 | { | ||
256 | return XFS_DIR3_FT_UNKNOWN; | ||
257 | } | ||
258 | |||
259 | static void | ||
260 | xfs_dir2_data_put_ftype( | ||
261 | struct xfs_dir2_data_entry *dep, | ||
262 | __uint8_t ftype) | ||
263 | { | ||
264 | ASSERT(ftype < XFS_DIR3_FT_MAX); | ||
265 | } | ||
266 | |||
267 | static __uint8_t | ||
268 | xfs_dir3_data_get_ftype( | ||
269 | struct xfs_dir2_data_entry *dep) | ||
270 | { | ||
271 | __uint8_t ftype = dep->name[dep->namelen]; | ||
272 | |||
273 | ASSERT(ftype < XFS_DIR3_FT_MAX); | ||
274 | if (ftype >= XFS_DIR3_FT_MAX) | ||
275 | return XFS_DIR3_FT_UNKNOWN; | ||
276 | return ftype; | ||
277 | } | ||
278 | |||
279 | static void | ||
280 | xfs_dir3_data_put_ftype( | ||
281 | struct xfs_dir2_data_entry *dep, | ||
282 | __uint8_t type) | ||
283 | { | ||
284 | ASSERT(type < XFS_DIR3_FT_MAX); | ||
285 | ASSERT(dep->namelen != 0); | ||
286 | |||
287 | dep->name[dep->namelen] = type; | ||
288 | } | ||
289 | |||
290 | /* | ||
291 | * Pointer to an entry's tag word. | ||
292 | */ | ||
293 | static __be16 * | ||
294 | xfs_dir2_data_entry_tag_p( | ||
295 | struct xfs_dir2_data_entry *dep) | ||
296 | { | ||
297 | return (__be16 *)((char *)dep + | ||
298 | xfs_dir2_data_entsize(dep->namelen) - sizeof(__be16)); | ||
299 | } | ||
300 | |||
301 | static __be16 * | ||
302 | xfs_dir3_data_entry_tag_p( | ||
303 | struct xfs_dir2_data_entry *dep) | ||
304 | { | ||
305 | return (__be16 *)((char *)dep + | ||
306 | xfs_dir3_data_entsize(dep->namelen) - sizeof(__be16)); | ||
307 | } | ||
308 | |||
309 | /* | ||
310 | * location of . and .. in data space (always block 0) | ||
311 | */ | ||
312 | static struct xfs_dir2_data_entry * | ||
313 | xfs_dir2_data_dot_entry_p( | ||
314 | struct xfs_dir2_data_hdr *hdr) | ||
315 | { | ||
316 | return (struct xfs_dir2_data_entry *) | ||
317 | ((char *)hdr + sizeof(struct xfs_dir2_data_hdr)); | ||
318 | } | ||
319 | |||
320 | static struct xfs_dir2_data_entry * | ||
321 | xfs_dir2_data_dotdot_entry_p( | ||
322 | struct xfs_dir2_data_hdr *hdr) | ||
323 | { | ||
324 | return (struct xfs_dir2_data_entry *) | ||
325 | ((char *)hdr + sizeof(struct xfs_dir2_data_hdr) + | ||
326 | XFS_DIR2_DATA_ENTSIZE(1)); | ||
327 | } | ||
328 | |||
329 | static struct xfs_dir2_data_entry * | ||
330 | xfs_dir2_data_first_entry_p( | ||
331 | struct xfs_dir2_data_hdr *hdr) | ||
332 | { | ||
333 | return (struct xfs_dir2_data_entry *) | ||
334 | ((char *)hdr + sizeof(struct xfs_dir2_data_hdr) + | ||
335 | XFS_DIR2_DATA_ENTSIZE(1) + | ||
336 | XFS_DIR2_DATA_ENTSIZE(2)); | ||
337 | } | ||
338 | |||
339 | static struct xfs_dir2_data_entry * | ||
340 | xfs_dir2_ftype_data_dotdot_entry_p( | ||
341 | struct xfs_dir2_data_hdr *hdr) | ||
342 | { | ||
343 | return (struct xfs_dir2_data_entry *) | ||
344 | ((char *)hdr + sizeof(struct xfs_dir2_data_hdr) + | ||
345 | XFS_DIR3_DATA_ENTSIZE(1)); | ||
346 | } | ||
347 | |||
348 | static struct xfs_dir2_data_entry * | ||
349 | xfs_dir2_ftype_data_first_entry_p( | ||
350 | struct xfs_dir2_data_hdr *hdr) | ||
351 | { | ||
352 | return (struct xfs_dir2_data_entry *) | ||
353 | ((char *)hdr + sizeof(struct xfs_dir2_data_hdr) + | ||
354 | XFS_DIR3_DATA_ENTSIZE(1) + | ||
355 | XFS_DIR3_DATA_ENTSIZE(2)); | ||
356 | } | ||
357 | |||
358 | static struct xfs_dir2_data_entry * | ||
359 | xfs_dir3_data_dot_entry_p( | ||
360 | struct xfs_dir2_data_hdr *hdr) | ||
361 | { | ||
362 | return (struct xfs_dir2_data_entry *) | ||
363 | ((char *)hdr + sizeof(struct xfs_dir3_data_hdr)); | ||
364 | } | ||
365 | |||
366 | static struct xfs_dir2_data_entry * | ||
367 | xfs_dir3_data_dotdot_entry_p( | ||
368 | struct xfs_dir2_data_hdr *hdr) | ||
369 | { | ||
370 | return (struct xfs_dir2_data_entry *) | ||
371 | ((char *)hdr + sizeof(struct xfs_dir3_data_hdr) + | ||
372 | XFS_DIR3_DATA_ENTSIZE(1)); | ||
373 | } | ||
374 | |||
375 | static struct xfs_dir2_data_entry * | ||
376 | xfs_dir3_data_first_entry_p( | ||
377 | struct xfs_dir2_data_hdr *hdr) | ||
378 | { | ||
379 | return (struct xfs_dir2_data_entry *) | ||
380 | ((char *)hdr + sizeof(struct xfs_dir3_data_hdr) + | ||
381 | XFS_DIR3_DATA_ENTSIZE(1) + | ||
382 | XFS_DIR3_DATA_ENTSIZE(2)); | ||
383 | } | ||
384 | |||
385 | static struct xfs_dir2_data_free * | ||
386 | xfs_dir2_data_bestfree_p(struct xfs_dir2_data_hdr *hdr) | ||
387 | { | ||
388 | return hdr->bestfree; | ||
389 | } | ||
390 | |||
391 | static struct xfs_dir2_data_free * | ||
392 | xfs_dir3_data_bestfree_p(struct xfs_dir2_data_hdr *hdr) | ||
393 | { | ||
394 | return ((struct xfs_dir3_data_hdr *)hdr)->best_free; | ||
395 | } | ||
396 | |||
397 | static struct xfs_dir2_data_entry * | ||
398 | xfs_dir2_data_entry_p(struct xfs_dir2_data_hdr *hdr) | ||
399 | { | ||
400 | return (struct xfs_dir2_data_entry *) | ||
401 | ((char *)hdr + sizeof(struct xfs_dir2_data_hdr)); | ||
402 | } | ||
403 | |||
404 | static struct xfs_dir2_data_unused * | ||
405 | xfs_dir2_data_unused_p(struct xfs_dir2_data_hdr *hdr) | ||
406 | { | ||
407 | return (struct xfs_dir2_data_unused *) | ||
408 | ((char *)hdr + sizeof(struct xfs_dir2_data_hdr)); | ||
409 | } | ||
410 | |||
411 | static struct xfs_dir2_data_entry * | ||
412 | xfs_dir3_data_entry_p(struct xfs_dir2_data_hdr *hdr) | ||
413 | { | ||
414 | return (struct xfs_dir2_data_entry *) | ||
415 | ((char *)hdr + sizeof(struct xfs_dir3_data_hdr)); | ||
416 | } | ||
417 | |||
418 | static struct xfs_dir2_data_unused * | ||
419 | xfs_dir3_data_unused_p(struct xfs_dir2_data_hdr *hdr) | ||
420 | { | ||
421 | return (struct xfs_dir2_data_unused *) | ||
422 | ((char *)hdr + sizeof(struct xfs_dir3_data_hdr)); | ||
423 | } | ||
424 | |||
425 | |||
426 | /* | ||
427 | * Directory Leaf block operations | ||
428 | */ | ||
429 | static int | ||
430 | xfs_dir2_max_leaf_ents(struct xfs_da_geometry *geo) | ||
431 | { | ||
432 | return (geo->blksize - sizeof(struct xfs_dir2_leaf_hdr)) / | ||
433 | (uint)sizeof(struct xfs_dir2_leaf_entry); | ||
434 | } | ||
435 | |||
436 | static struct xfs_dir2_leaf_entry * | ||
437 | xfs_dir2_leaf_ents_p(struct xfs_dir2_leaf *lp) | ||
438 | { | ||
439 | return lp->__ents; | ||
440 | } | ||
441 | |||
442 | static int | ||
443 | xfs_dir3_max_leaf_ents(struct xfs_da_geometry *geo) | ||
444 | { | ||
445 | return (geo->blksize - sizeof(struct xfs_dir3_leaf_hdr)) / | ||
446 | (uint)sizeof(struct xfs_dir2_leaf_entry); | ||
447 | } | ||
448 | |||
449 | static struct xfs_dir2_leaf_entry * | ||
450 | xfs_dir3_leaf_ents_p(struct xfs_dir2_leaf *lp) | ||
451 | { | ||
452 | return ((struct xfs_dir3_leaf *)lp)->__ents; | ||
453 | } | ||
454 | |||
455 | static void | ||
456 | xfs_dir2_leaf_hdr_from_disk( | ||
457 | struct xfs_dir3_icleaf_hdr *to, | ||
458 | struct xfs_dir2_leaf *from) | ||
459 | { | ||
460 | to->forw = be32_to_cpu(from->hdr.info.forw); | ||
461 | to->back = be32_to_cpu(from->hdr.info.back); | ||
462 | to->magic = be16_to_cpu(from->hdr.info.magic); | ||
463 | to->count = be16_to_cpu(from->hdr.count); | ||
464 | to->stale = be16_to_cpu(from->hdr.stale); | ||
465 | |||
466 | ASSERT(to->magic == XFS_DIR2_LEAF1_MAGIC || | ||
467 | to->magic == XFS_DIR2_LEAFN_MAGIC); | ||
468 | } | ||
469 | |||
470 | static void | ||
471 | xfs_dir2_leaf_hdr_to_disk( | ||
472 | struct xfs_dir2_leaf *to, | ||
473 | struct xfs_dir3_icleaf_hdr *from) | ||
474 | { | ||
475 | ASSERT(from->magic == XFS_DIR2_LEAF1_MAGIC || | ||
476 | from->magic == XFS_DIR2_LEAFN_MAGIC); | ||
477 | |||
478 | to->hdr.info.forw = cpu_to_be32(from->forw); | ||
479 | to->hdr.info.back = cpu_to_be32(from->back); | ||
480 | to->hdr.info.magic = cpu_to_be16(from->magic); | ||
481 | to->hdr.count = cpu_to_be16(from->count); | ||
482 | to->hdr.stale = cpu_to_be16(from->stale); | ||
483 | } | ||
484 | |||
485 | static void | ||
486 | xfs_dir3_leaf_hdr_from_disk( | ||
487 | struct xfs_dir3_icleaf_hdr *to, | ||
488 | struct xfs_dir2_leaf *from) | ||
489 | { | ||
490 | struct xfs_dir3_leaf_hdr *hdr3 = (struct xfs_dir3_leaf_hdr *)from; | ||
491 | |||
492 | to->forw = be32_to_cpu(hdr3->info.hdr.forw); | ||
493 | to->back = be32_to_cpu(hdr3->info.hdr.back); | ||
494 | to->magic = be16_to_cpu(hdr3->info.hdr.magic); | ||
495 | to->count = be16_to_cpu(hdr3->count); | ||
496 | to->stale = be16_to_cpu(hdr3->stale); | ||
497 | |||
498 | ASSERT(to->magic == XFS_DIR3_LEAF1_MAGIC || | ||
499 | to->magic == XFS_DIR3_LEAFN_MAGIC); | ||
500 | } | ||
501 | |||
502 | static void | ||
503 | xfs_dir3_leaf_hdr_to_disk( | ||
504 | struct xfs_dir2_leaf *to, | ||
505 | struct xfs_dir3_icleaf_hdr *from) | ||
506 | { | ||
507 | struct xfs_dir3_leaf_hdr *hdr3 = (struct xfs_dir3_leaf_hdr *)to; | ||
508 | |||
509 | ASSERT(from->magic == XFS_DIR3_LEAF1_MAGIC || | ||
510 | from->magic == XFS_DIR3_LEAFN_MAGIC); | ||
511 | |||
512 | hdr3->info.hdr.forw = cpu_to_be32(from->forw); | ||
513 | hdr3->info.hdr.back = cpu_to_be32(from->back); | ||
514 | hdr3->info.hdr.magic = cpu_to_be16(from->magic); | ||
515 | hdr3->count = cpu_to_be16(from->count); | ||
516 | hdr3->stale = cpu_to_be16(from->stale); | ||
517 | } | ||
518 | |||
519 | |||
520 | /* | ||
521 | * Directory/Attribute Node block operations | ||
522 | */ | ||
523 | static struct xfs_da_node_entry * | ||
524 | xfs_da2_node_tree_p(struct xfs_da_intnode *dap) | ||
525 | { | ||
526 | return dap->__btree; | ||
527 | } | ||
528 | |||
529 | static struct xfs_da_node_entry * | ||
530 | xfs_da3_node_tree_p(struct xfs_da_intnode *dap) | ||
531 | { | ||
532 | return ((struct xfs_da3_intnode *)dap)->__btree; | ||
533 | } | ||
534 | |||
535 | static void | ||
536 | xfs_da2_node_hdr_from_disk( | ||
537 | struct xfs_da3_icnode_hdr *to, | ||
538 | struct xfs_da_intnode *from) | ||
539 | { | ||
540 | ASSERT(from->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); | ||
541 | to->forw = be32_to_cpu(from->hdr.info.forw); | ||
542 | to->back = be32_to_cpu(from->hdr.info.back); | ||
543 | to->magic = be16_to_cpu(from->hdr.info.magic); | ||
544 | to->count = be16_to_cpu(from->hdr.__count); | ||
545 | to->level = be16_to_cpu(from->hdr.__level); | ||
546 | } | ||
547 | |||
548 | static void | ||
549 | xfs_da2_node_hdr_to_disk( | ||
550 | struct xfs_da_intnode *to, | ||
551 | struct xfs_da3_icnode_hdr *from) | ||
552 | { | ||
553 | ASSERT(from->magic == XFS_DA_NODE_MAGIC); | ||
554 | to->hdr.info.forw = cpu_to_be32(from->forw); | ||
555 | to->hdr.info.back = cpu_to_be32(from->back); | ||
556 | to->hdr.info.magic = cpu_to_be16(from->magic); | ||
557 | to->hdr.__count = cpu_to_be16(from->count); | ||
558 | to->hdr.__level = cpu_to_be16(from->level); | ||
559 | } | ||
560 | |||
561 | static void | ||
562 | xfs_da3_node_hdr_from_disk( | ||
563 | struct xfs_da3_icnode_hdr *to, | ||
564 | struct xfs_da_intnode *from) | ||
565 | { | ||
566 | struct xfs_da3_node_hdr *hdr3 = (struct xfs_da3_node_hdr *)from; | ||
567 | |||
568 | ASSERT(from->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)); | ||
569 | to->forw = be32_to_cpu(hdr3->info.hdr.forw); | ||
570 | to->back = be32_to_cpu(hdr3->info.hdr.back); | ||
571 | to->magic = be16_to_cpu(hdr3->info.hdr.magic); | ||
572 | to->count = be16_to_cpu(hdr3->__count); | ||
573 | to->level = be16_to_cpu(hdr3->__level); | ||
574 | } | ||
575 | |||
576 | static void | ||
577 | xfs_da3_node_hdr_to_disk( | ||
578 | struct xfs_da_intnode *to, | ||
579 | struct xfs_da3_icnode_hdr *from) | ||
580 | { | ||
581 | struct xfs_da3_node_hdr *hdr3 = (struct xfs_da3_node_hdr *)to; | ||
582 | |||
583 | ASSERT(from->magic == XFS_DA3_NODE_MAGIC); | ||
584 | hdr3->info.hdr.forw = cpu_to_be32(from->forw); | ||
585 | hdr3->info.hdr.back = cpu_to_be32(from->back); | ||
586 | hdr3->info.hdr.magic = cpu_to_be16(from->magic); | ||
587 | hdr3->__count = cpu_to_be16(from->count); | ||
588 | hdr3->__level = cpu_to_be16(from->level); | ||
589 | } | ||
590 | |||
591 | |||
592 | /* | ||
593 | * Directory free space block operations | ||
594 | */ | ||
595 | static int | ||
596 | xfs_dir2_free_max_bests(struct xfs_da_geometry *geo) | ||
597 | { | ||
598 | return (geo->blksize - sizeof(struct xfs_dir2_free_hdr)) / | ||
599 | sizeof(xfs_dir2_data_off_t); | ||
600 | } | ||
601 | |||
602 | static __be16 * | ||
603 | xfs_dir2_free_bests_p(struct xfs_dir2_free *free) | ||
604 | { | ||
605 | return (__be16 *)((char *)free + sizeof(struct xfs_dir2_free_hdr)); | ||
606 | } | ||
607 | |||
608 | /* | ||
609 | * Convert data space db to the corresponding free db. | ||
610 | */ | ||
611 | static xfs_dir2_db_t | ||
612 | xfs_dir2_db_to_fdb(struct xfs_da_geometry *geo, xfs_dir2_db_t db) | ||
613 | { | ||
614 | return xfs_dir2_byte_to_db(geo, XFS_DIR2_FREE_OFFSET) + | ||
615 | (db / xfs_dir2_free_max_bests(geo)); | ||
616 | } | ||
617 | |||
618 | /* | ||
619 | * Convert data space db to the corresponding index in a free db. | ||
620 | */ | ||
621 | static int | ||
622 | xfs_dir2_db_to_fdindex(struct xfs_da_geometry *geo, xfs_dir2_db_t db) | ||
623 | { | ||
624 | return db % xfs_dir2_free_max_bests(geo); | ||
625 | } | ||
626 | |||
627 | static int | ||
628 | xfs_dir3_free_max_bests(struct xfs_da_geometry *geo) | ||
629 | { | ||
630 | return (geo->blksize - sizeof(struct xfs_dir3_free_hdr)) / | ||
631 | sizeof(xfs_dir2_data_off_t); | ||
632 | } | ||
633 | |||
634 | static __be16 * | ||
635 | xfs_dir3_free_bests_p(struct xfs_dir2_free *free) | ||
636 | { | ||
637 | return (__be16 *)((char *)free + sizeof(struct xfs_dir3_free_hdr)); | ||
638 | } | ||
639 | |||
640 | /* | ||
641 | * Convert data space db to the corresponding free db. | ||
642 | */ | ||
643 | static xfs_dir2_db_t | ||
644 | xfs_dir3_db_to_fdb(struct xfs_da_geometry *geo, xfs_dir2_db_t db) | ||
645 | { | ||
646 | return xfs_dir2_byte_to_db(geo, XFS_DIR2_FREE_OFFSET) + | ||
647 | (db / xfs_dir3_free_max_bests(geo)); | ||
648 | } | ||
649 | |||
650 | /* | ||
651 | * Convert data space db to the corresponding index in a free db. | ||
652 | */ | ||
653 | static int | ||
654 | xfs_dir3_db_to_fdindex(struct xfs_da_geometry *geo, xfs_dir2_db_t db) | ||
655 | { | ||
656 | return db % xfs_dir3_free_max_bests(geo); | ||
657 | } | ||
658 | |||
659 | static void | ||
660 | xfs_dir2_free_hdr_from_disk( | ||
661 | struct xfs_dir3_icfree_hdr *to, | ||
662 | struct xfs_dir2_free *from) | ||
663 | { | ||
664 | to->magic = be32_to_cpu(from->hdr.magic); | ||
665 | to->firstdb = be32_to_cpu(from->hdr.firstdb); | ||
666 | to->nvalid = be32_to_cpu(from->hdr.nvalid); | ||
667 | to->nused = be32_to_cpu(from->hdr.nused); | ||
668 | ASSERT(to->magic == XFS_DIR2_FREE_MAGIC); | ||
669 | } | ||
670 | |||
671 | static void | ||
672 | xfs_dir2_free_hdr_to_disk( | ||
673 | struct xfs_dir2_free *to, | ||
674 | struct xfs_dir3_icfree_hdr *from) | ||
675 | { | ||
676 | ASSERT(from->magic == XFS_DIR2_FREE_MAGIC); | ||
677 | |||
678 | to->hdr.magic = cpu_to_be32(from->magic); | ||
679 | to->hdr.firstdb = cpu_to_be32(from->firstdb); | ||
680 | to->hdr.nvalid = cpu_to_be32(from->nvalid); | ||
681 | to->hdr.nused = cpu_to_be32(from->nused); | ||
682 | } | ||
683 | |||
684 | static void | ||
685 | xfs_dir3_free_hdr_from_disk( | ||
686 | struct xfs_dir3_icfree_hdr *to, | ||
687 | struct xfs_dir2_free *from) | ||
688 | { | ||
689 | struct xfs_dir3_free_hdr *hdr3 = (struct xfs_dir3_free_hdr *)from; | ||
690 | |||
691 | to->magic = be32_to_cpu(hdr3->hdr.magic); | ||
692 | to->firstdb = be32_to_cpu(hdr3->firstdb); | ||
693 | to->nvalid = be32_to_cpu(hdr3->nvalid); | ||
694 | to->nused = be32_to_cpu(hdr3->nused); | ||
695 | |||
696 | ASSERT(to->magic == XFS_DIR3_FREE_MAGIC); | ||
697 | } | ||
698 | |||
699 | static void | ||
700 | xfs_dir3_free_hdr_to_disk( | ||
701 | struct xfs_dir2_free *to, | ||
702 | struct xfs_dir3_icfree_hdr *from) | ||
703 | { | ||
704 | struct xfs_dir3_free_hdr *hdr3 = (struct xfs_dir3_free_hdr *)to; | ||
705 | |||
706 | ASSERT(from->magic == XFS_DIR3_FREE_MAGIC); | ||
707 | |||
708 | hdr3->hdr.magic = cpu_to_be32(from->magic); | ||
709 | hdr3->firstdb = cpu_to_be32(from->firstdb); | ||
710 | hdr3->nvalid = cpu_to_be32(from->nvalid); | ||
711 | hdr3->nused = cpu_to_be32(from->nused); | ||
712 | } | ||
713 | |||
714 | static const struct xfs_dir_ops xfs_dir2_ops = { | ||
715 | .sf_entsize = xfs_dir2_sf_entsize, | ||
716 | .sf_nextentry = xfs_dir2_sf_nextentry, | ||
717 | .sf_get_ftype = xfs_dir2_sfe_get_ftype, | ||
718 | .sf_put_ftype = xfs_dir2_sfe_put_ftype, | ||
719 | .sf_get_ino = xfs_dir2_sfe_get_ino, | ||
720 | .sf_put_ino = xfs_dir2_sfe_put_ino, | ||
721 | .sf_get_parent_ino = xfs_dir2_sf_get_parent_ino, | ||
722 | .sf_put_parent_ino = xfs_dir2_sf_put_parent_ino, | ||
723 | |||
724 | .data_entsize = xfs_dir2_data_entsize, | ||
725 | .data_get_ftype = xfs_dir2_data_get_ftype, | ||
726 | .data_put_ftype = xfs_dir2_data_put_ftype, | ||
727 | .data_entry_tag_p = xfs_dir2_data_entry_tag_p, | ||
728 | .data_bestfree_p = xfs_dir2_data_bestfree_p, | ||
729 | |||
730 | .data_dot_offset = sizeof(struct xfs_dir2_data_hdr), | ||
731 | .data_dotdot_offset = sizeof(struct xfs_dir2_data_hdr) + | ||
732 | XFS_DIR2_DATA_ENTSIZE(1), | ||
733 | .data_first_offset = sizeof(struct xfs_dir2_data_hdr) + | ||
734 | XFS_DIR2_DATA_ENTSIZE(1) + | ||
735 | XFS_DIR2_DATA_ENTSIZE(2), | ||
736 | .data_entry_offset = sizeof(struct xfs_dir2_data_hdr), | ||
737 | |||
738 | .data_dot_entry_p = xfs_dir2_data_dot_entry_p, | ||
739 | .data_dotdot_entry_p = xfs_dir2_data_dotdot_entry_p, | ||
740 | .data_first_entry_p = xfs_dir2_data_first_entry_p, | ||
741 | .data_entry_p = xfs_dir2_data_entry_p, | ||
742 | .data_unused_p = xfs_dir2_data_unused_p, | ||
743 | |||
744 | .leaf_hdr_size = sizeof(struct xfs_dir2_leaf_hdr), | ||
745 | .leaf_hdr_to_disk = xfs_dir2_leaf_hdr_to_disk, | ||
746 | .leaf_hdr_from_disk = xfs_dir2_leaf_hdr_from_disk, | ||
747 | .leaf_max_ents = xfs_dir2_max_leaf_ents, | ||
748 | .leaf_ents_p = xfs_dir2_leaf_ents_p, | ||
749 | |||
750 | .node_hdr_size = sizeof(struct xfs_da_node_hdr), | ||
751 | .node_hdr_to_disk = xfs_da2_node_hdr_to_disk, | ||
752 | .node_hdr_from_disk = xfs_da2_node_hdr_from_disk, | ||
753 | .node_tree_p = xfs_da2_node_tree_p, | ||
754 | |||
755 | .free_hdr_size = sizeof(struct xfs_dir2_free_hdr), | ||
756 | .free_hdr_to_disk = xfs_dir2_free_hdr_to_disk, | ||
757 | .free_hdr_from_disk = xfs_dir2_free_hdr_from_disk, | ||
758 | .free_max_bests = xfs_dir2_free_max_bests, | ||
759 | .free_bests_p = xfs_dir2_free_bests_p, | ||
760 | .db_to_fdb = xfs_dir2_db_to_fdb, | ||
761 | .db_to_fdindex = xfs_dir2_db_to_fdindex, | ||
762 | }; | ||
763 | |||
764 | static const struct xfs_dir_ops xfs_dir2_ftype_ops = { | ||
765 | .sf_entsize = xfs_dir3_sf_entsize, | ||
766 | .sf_nextentry = xfs_dir3_sf_nextentry, | ||
767 | .sf_get_ftype = xfs_dir3_sfe_get_ftype, | ||
768 | .sf_put_ftype = xfs_dir3_sfe_put_ftype, | ||
769 | .sf_get_ino = xfs_dir3_sfe_get_ino, | ||
770 | .sf_put_ino = xfs_dir3_sfe_put_ino, | ||
771 | .sf_get_parent_ino = xfs_dir2_sf_get_parent_ino, | ||
772 | .sf_put_parent_ino = xfs_dir2_sf_put_parent_ino, | ||
773 | |||
774 | .data_entsize = xfs_dir3_data_entsize, | ||
775 | .data_get_ftype = xfs_dir3_data_get_ftype, | ||
776 | .data_put_ftype = xfs_dir3_data_put_ftype, | ||
777 | .data_entry_tag_p = xfs_dir3_data_entry_tag_p, | ||
778 | .data_bestfree_p = xfs_dir2_data_bestfree_p, | ||
779 | |||
780 | .data_dot_offset = sizeof(struct xfs_dir2_data_hdr), | ||
781 | .data_dotdot_offset = sizeof(struct xfs_dir2_data_hdr) + | ||
782 | XFS_DIR3_DATA_ENTSIZE(1), | ||
783 | .data_first_offset = sizeof(struct xfs_dir2_data_hdr) + | ||
784 | XFS_DIR3_DATA_ENTSIZE(1) + | ||
785 | XFS_DIR3_DATA_ENTSIZE(2), | ||
786 | .data_entry_offset = sizeof(struct xfs_dir2_data_hdr), | ||
787 | |||
788 | .data_dot_entry_p = xfs_dir2_data_dot_entry_p, | ||
789 | .data_dotdot_entry_p = xfs_dir2_ftype_data_dotdot_entry_p, | ||
790 | .data_first_entry_p = xfs_dir2_ftype_data_first_entry_p, | ||
791 | .data_entry_p = xfs_dir2_data_entry_p, | ||
792 | .data_unused_p = xfs_dir2_data_unused_p, | ||
793 | |||
794 | .leaf_hdr_size = sizeof(struct xfs_dir2_leaf_hdr), | ||
795 | .leaf_hdr_to_disk = xfs_dir2_leaf_hdr_to_disk, | ||
796 | .leaf_hdr_from_disk = xfs_dir2_leaf_hdr_from_disk, | ||
797 | .leaf_max_ents = xfs_dir2_max_leaf_ents, | ||
798 | .leaf_ents_p = xfs_dir2_leaf_ents_p, | ||
799 | |||
800 | .node_hdr_size = sizeof(struct xfs_da_node_hdr), | ||
801 | .node_hdr_to_disk = xfs_da2_node_hdr_to_disk, | ||
802 | .node_hdr_from_disk = xfs_da2_node_hdr_from_disk, | ||
803 | .node_tree_p = xfs_da2_node_tree_p, | ||
804 | |||
805 | .free_hdr_size = sizeof(struct xfs_dir2_free_hdr), | ||
806 | .free_hdr_to_disk = xfs_dir2_free_hdr_to_disk, | ||
807 | .free_hdr_from_disk = xfs_dir2_free_hdr_from_disk, | ||
808 | .free_max_bests = xfs_dir2_free_max_bests, | ||
809 | .free_bests_p = xfs_dir2_free_bests_p, | ||
810 | .db_to_fdb = xfs_dir2_db_to_fdb, | ||
811 | .db_to_fdindex = xfs_dir2_db_to_fdindex, | ||
812 | }; | ||
813 | |||
814 | static const struct xfs_dir_ops xfs_dir3_ops = { | ||
815 | .sf_entsize = xfs_dir3_sf_entsize, | ||
816 | .sf_nextentry = xfs_dir3_sf_nextentry, | ||
817 | .sf_get_ftype = xfs_dir3_sfe_get_ftype, | ||
818 | .sf_put_ftype = xfs_dir3_sfe_put_ftype, | ||
819 | .sf_get_ino = xfs_dir3_sfe_get_ino, | ||
820 | .sf_put_ino = xfs_dir3_sfe_put_ino, | ||
821 | .sf_get_parent_ino = xfs_dir2_sf_get_parent_ino, | ||
822 | .sf_put_parent_ino = xfs_dir2_sf_put_parent_ino, | ||
823 | |||
824 | .data_entsize = xfs_dir3_data_entsize, | ||
825 | .data_get_ftype = xfs_dir3_data_get_ftype, | ||
826 | .data_put_ftype = xfs_dir3_data_put_ftype, | ||
827 | .data_entry_tag_p = xfs_dir3_data_entry_tag_p, | ||
828 | .data_bestfree_p = xfs_dir3_data_bestfree_p, | ||
829 | |||
830 | .data_dot_offset = sizeof(struct xfs_dir3_data_hdr), | ||
831 | .data_dotdot_offset = sizeof(struct xfs_dir3_data_hdr) + | ||
832 | XFS_DIR3_DATA_ENTSIZE(1), | ||
833 | .data_first_offset = sizeof(struct xfs_dir3_data_hdr) + | ||
834 | XFS_DIR3_DATA_ENTSIZE(1) + | ||
835 | XFS_DIR3_DATA_ENTSIZE(2), | ||
836 | .data_entry_offset = sizeof(struct xfs_dir3_data_hdr), | ||
837 | |||
838 | .data_dot_entry_p = xfs_dir3_data_dot_entry_p, | ||
839 | .data_dotdot_entry_p = xfs_dir3_data_dotdot_entry_p, | ||
840 | .data_first_entry_p = xfs_dir3_data_first_entry_p, | ||
841 | .data_entry_p = xfs_dir3_data_entry_p, | ||
842 | .data_unused_p = xfs_dir3_data_unused_p, | ||
843 | |||
844 | .leaf_hdr_size = sizeof(struct xfs_dir3_leaf_hdr), | ||
845 | .leaf_hdr_to_disk = xfs_dir3_leaf_hdr_to_disk, | ||
846 | .leaf_hdr_from_disk = xfs_dir3_leaf_hdr_from_disk, | ||
847 | .leaf_max_ents = xfs_dir3_max_leaf_ents, | ||
848 | .leaf_ents_p = xfs_dir3_leaf_ents_p, | ||
849 | |||
850 | .node_hdr_size = sizeof(struct xfs_da3_node_hdr), | ||
851 | .node_hdr_to_disk = xfs_da3_node_hdr_to_disk, | ||
852 | .node_hdr_from_disk = xfs_da3_node_hdr_from_disk, | ||
853 | .node_tree_p = xfs_da3_node_tree_p, | ||
854 | |||
855 | .free_hdr_size = sizeof(struct xfs_dir3_free_hdr), | ||
856 | .free_hdr_to_disk = xfs_dir3_free_hdr_to_disk, | ||
857 | .free_hdr_from_disk = xfs_dir3_free_hdr_from_disk, | ||
858 | .free_max_bests = xfs_dir3_free_max_bests, | ||
859 | .free_bests_p = xfs_dir3_free_bests_p, | ||
860 | .db_to_fdb = xfs_dir3_db_to_fdb, | ||
861 | .db_to_fdindex = xfs_dir3_db_to_fdindex, | ||
862 | }; | ||
863 | |||
864 | static const struct xfs_dir_ops xfs_dir2_nondir_ops = { | ||
865 | .node_hdr_size = sizeof(struct xfs_da_node_hdr), | ||
866 | .node_hdr_to_disk = xfs_da2_node_hdr_to_disk, | ||
867 | .node_hdr_from_disk = xfs_da2_node_hdr_from_disk, | ||
868 | .node_tree_p = xfs_da2_node_tree_p, | ||
869 | }; | ||
870 | |||
871 | static const struct xfs_dir_ops xfs_dir3_nondir_ops = { | ||
872 | .node_hdr_size = sizeof(struct xfs_da3_node_hdr), | ||
873 | .node_hdr_to_disk = xfs_da3_node_hdr_to_disk, | ||
874 | .node_hdr_from_disk = xfs_da3_node_hdr_from_disk, | ||
875 | .node_tree_p = xfs_da3_node_tree_p, | ||
876 | }; | ||
877 | |||
878 | /* | ||
879 | * Return the ops structure according to the current config. If we are passed | ||
880 | * an inode, then that overrides the default config we use which is based on | ||
881 | * feature bits. | ||
882 | */ | ||
883 | const struct xfs_dir_ops * | ||
884 | xfs_dir_get_ops( | ||
885 | struct xfs_mount *mp, | ||
886 | struct xfs_inode *dp) | ||
887 | { | ||
888 | if (dp) | ||
889 | return dp->d_ops; | ||
890 | if (mp->m_dir_inode_ops) | ||
891 | return mp->m_dir_inode_ops; | ||
892 | if (xfs_sb_version_hascrc(&mp->m_sb)) | ||
893 | return &xfs_dir3_ops; | ||
894 | if (xfs_sb_version_hasftype(&mp->m_sb)) | ||
895 | return &xfs_dir2_ftype_ops; | ||
896 | return &xfs_dir2_ops; | ||
897 | } | ||
898 | |||
899 | const struct xfs_dir_ops * | ||
900 | xfs_nondir_get_ops( | ||
901 | struct xfs_mount *mp, | ||
902 | struct xfs_inode *dp) | ||
903 | { | ||
904 | if (dp) | ||
905 | return dp->d_ops; | ||
906 | if (mp->m_nondir_inode_ops) | ||
907 | return mp->m_nondir_inode_ops; | ||
908 | if (xfs_sb_version_hascrc(&mp->m_sb)) | ||
909 | return &xfs_dir3_nondir_ops; | ||
910 | return &xfs_dir2_nondir_ops; | ||
911 | } | ||
diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h new file mode 100644 index 000000000000..0a49b0286372 --- /dev/null +++ b/fs/xfs/libxfs/xfs_da_format.h | |||
@@ -0,0 +1,861 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. | ||
3 | * Copyright (c) 2013 Red Hat, Inc. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License as | ||
8 | * published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope that it would be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program; if not, write the Free Software Foundation, | ||
17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
18 | */ | ||
19 | #ifndef __XFS_DA_FORMAT_H__ | ||
20 | #define __XFS_DA_FORMAT_H__ | ||
21 | |||
22 | /* | ||
23 | * This structure is common to both leaf nodes and non-leaf nodes in the Btree. | ||
24 | * | ||
25 | * It is used to manage a doubly linked list of all blocks at the same | ||
26 | * level in the Btree, and to identify which type of block this is. | ||
27 | */ | ||
28 | #define XFS_DA_NODE_MAGIC 0xfebe /* magic number: non-leaf blocks */ | ||
29 | #define XFS_ATTR_LEAF_MAGIC 0xfbee /* magic number: attribute leaf blks */ | ||
30 | #define XFS_DIR2_LEAF1_MAGIC 0xd2f1 /* magic number: v2 dirlf single blks */ | ||
31 | #define XFS_DIR2_LEAFN_MAGIC 0xd2ff /* magic number: v2 dirlf multi blks */ | ||
32 | |||
33 | typedef struct xfs_da_blkinfo { | ||
34 | __be32 forw; /* previous block in list */ | ||
35 | __be32 back; /* following block in list */ | ||
36 | __be16 magic; /* validity check on block */ | ||
37 | __be16 pad; /* unused */ | ||
38 | } xfs_da_blkinfo_t; | ||
39 | |||
40 | /* | ||
41 | * CRC enabled directory structure types | ||
42 | * | ||
43 | * The headers change size for the additional verification information, but | ||
44 | * otherwise the tree layouts and contents are unchanged. Hence the da btree | ||
45 | * code can use the struct xfs_da_blkinfo for manipulating the tree links and | ||
46 | * magic numbers without modification for both v2 and v3 nodes. | ||
47 | */ | ||
48 | #define XFS_DA3_NODE_MAGIC 0x3ebe /* magic number: non-leaf blocks */ | ||
49 | #define XFS_ATTR3_LEAF_MAGIC 0x3bee /* magic number: attribute leaf blks */ | ||
50 | #define XFS_DIR3_LEAF1_MAGIC 0x3df1 /* magic number: v2 dirlf single blks */ | ||
51 | #define XFS_DIR3_LEAFN_MAGIC 0x3dff /* magic number: v2 dirlf multi blks */ | ||
52 | |||
53 | struct xfs_da3_blkinfo { | ||
54 | /* | ||
55 | * the node link manipulation code relies on the fact that the first | ||
56 | * element of this structure is the struct xfs_da_blkinfo so it can | ||
57 | * ignore the differences in the rest of the structures. | ||
58 | */ | ||
59 | struct xfs_da_blkinfo hdr; | ||
60 | __be32 crc; /* CRC of block */ | ||
61 | __be64 blkno; /* first block of the buffer */ | ||
62 | __be64 lsn; /* sequence number of last write */ | ||
63 | uuid_t uuid; /* filesystem we belong to */ | ||
64 | __be64 owner; /* inode that owns the block */ | ||
65 | }; | ||
66 | |||
67 | /* | ||
68 | * This is the structure of the root and intermediate nodes in the Btree. | ||
69 | * The leaf nodes are defined above. | ||
70 | * | ||
71 | * Entries are not packed. | ||
72 | * | ||
73 | * Since we have duplicate keys, use a binary search but always follow | ||
74 | * all match in the block, not just the first match found. | ||
75 | */ | ||
76 | #define XFS_DA_NODE_MAXDEPTH 5 /* max depth of Btree */ | ||
77 | |||
78 | typedef struct xfs_da_node_hdr { | ||
79 | struct xfs_da_blkinfo info; /* block type, links, etc. */ | ||
80 | __be16 __count; /* count of active entries */ | ||
81 | __be16 __level; /* level above leaves (leaf == 0) */ | ||
82 | } xfs_da_node_hdr_t; | ||
83 | |||
84 | struct xfs_da3_node_hdr { | ||
85 | struct xfs_da3_blkinfo info; /* block type, links, etc. */ | ||
86 | __be16 __count; /* count of active entries */ | ||
87 | __be16 __level; /* level above leaves (leaf == 0) */ | ||
88 | __be32 __pad32; | ||
89 | }; | ||
90 | |||
91 | #define XFS_DA3_NODE_CRC_OFF (offsetof(struct xfs_da3_node_hdr, info.crc)) | ||
92 | |||
93 | typedef struct xfs_da_node_entry { | ||
94 | __be32 hashval; /* hash value for this descendant */ | ||
95 | __be32 before; /* Btree block before this key */ | ||
96 | } xfs_da_node_entry_t; | ||
97 | |||
98 | typedef struct xfs_da_intnode { | ||
99 | struct xfs_da_node_hdr hdr; | ||
100 | struct xfs_da_node_entry __btree[]; | ||
101 | } xfs_da_intnode_t; | ||
102 | |||
103 | struct xfs_da3_intnode { | ||
104 | struct xfs_da3_node_hdr hdr; | ||
105 | struct xfs_da_node_entry __btree[]; | ||
106 | }; | ||
107 | |||
108 | /* | ||
109 | * In-core version of the node header to abstract the differences in the v2 and | ||
110 | * v3 disk format of the headers. Callers need to convert to/from disk format as | ||
111 | * appropriate. | ||
112 | */ | ||
113 | struct xfs_da3_icnode_hdr { | ||
114 | __uint32_t forw; | ||
115 | __uint32_t back; | ||
116 | __uint16_t magic; | ||
117 | __uint16_t count; | ||
118 | __uint16_t level; | ||
119 | }; | ||
120 | |||
121 | /* | ||
122 | * Directory version 2. | ||
123 | * | ||
124 | * There are 4 possible formats: | ||
125 | * - shortform - embedded into the inode | ||
126 | * - single block - data with embedded leaf at the end | ||
127 | * - multiple data blocks, single leaf+freeindex block | ||
128 | * - data blocks, node and leaf blocks (btree), freeindex blocks | ||
129 | * | ||
130 | * Note: many node blocks structures and constants are shared with the attr | ||
131 | * code and defined in xfs_da_btree.h. | ||
132 | */ | ||
133 | |||
134 | #define XFS_DIR2_BLOCK_MAGIC 0x58443242 /* XD2B: single block dirs */ | ||
135 | #define XFS_DIR2_DATA_MAGIC 0x58443244 /* XD2D: multiblock dirs */ | ||
136 | #define XFS_DIR2_FREE_MAGIC 0x58443246 /* XD2F: free index blocks */ | ||
137 | |||
138 | /* | ||
139 | * Directory Version 3 With CRCs. | ||
140 | * | ||
141 | * The tree formats are the same as for version 2 directories. The difference | ||
142 | * is in the block header and dirent formats. In many cases the v3 structures | ||
143 | * use v2 definitions as they are no different and this makes code sharing much | ||
144 | * easier. | ||
145 | * | ||
146 | * Also, the xfs_dir3_*() functions handle both v2 and v3 formats - if the | ||
147 | * format is v2 then they switch to the existing v2 code, or the format is v3 | ||
148 | * they implement the v3 functionality. This means the existing dir2 is a mix of | ||
149 | * xfs_dir2/xfs_dir3 calls and functions. The xfs_dir3 functions are called | ||
150 | * where there is a difference in the formats, otherwise the code is unchanged. | ||
151 | * | ||
152 | * Where it is possible, the code decides what to do based on the magic numbers | ||
153 | * in the blocks rather than feature bits in the superblock. This means the code | ||
154 | * is as independent of the external XFS code as possible as doesn't require | ||
155 | * passing struct xfs_mount pointers into places where it isn't really | ||
156 | * necessary. | ||
157 | * | ||
158 | * Version 3 includes: | ||
159 | * | ||
160 | * - a larger block header for CRC and identification purposes and so the | ||
161 | * offsets of all the structures inside the blocks are different. | ||
162 | * | ||
163 | * - new magic numbers to be able to detect the v2/v3 types on the fly. | ||
164 | */ | ||
165 | |||
166 | #define XFS_DIR3_BLOCK_MAGIC 0x58444233 /* XDB3: single block dirs */ | ||
167 | #define XFS_DIR3_DATA_MAGIC 0x58444433 /* XDD3: multiblock dirs */ | ||
168 | #define XFS_DIR3_FREE_MAGIC 0x58444633 /* XDF3: free index blocks */ | ||
169 | |||
170 | /* | ||
171 | * Dirents in version 3 directories have a file type field. Additions to this | ||
172 | * list are an on-disk format change, requiring feature bits. Valid values | ||
173 | * are as follows: | ||
174 | */ | ||
175 | #define XFS_DIR3_FT_UNKNOWN 0 | ||
176 | #define XFS_DIR3_FT_REG_FILE 1 | ||
177 | #define XFS_DIR3_FT_DIR 2 | ||
178 | #define XFS_DIR3_FT_CHRDEV 3 | ||
179 | #define XFS_DIR3_FT_BLKDEV 4 | ||
180 | #define XFS_DIR3_FT_FIFO 5 | ||
181 | #define XFS_DIR3_FT_SOCK 6 | ||
182 | #define XFS_DIR3_FT_SYMLINK 7 | ||
183 | #define XFS_DIR3_FT_WHT 8 | ||
184 | |||
185 | #define XFS_DIR3_FT_MAX 9 | ||
186 | |||
187 | /* | ||
188 | * Byte offset in data block and shortform entry. | ||
189 | */ | ||
190 | typedef __uint16_t xfs_dir2_data_off_t; | ||
191 | #define NULLDATAOFF 0xffffU | ||
192 | typedef uint xfs_dir2_data_aoff_t; /* argument form */ | ||
193 | |||
194 | /* | ||
195 | * Normalized offset (in a data block) of the entry, really xfs_dir2_data_off_t. | ||
196 | * Only need 16 bits, this is the byte offset into the single block form. | ||
197 | */ | ||
198 | typedef struct { __uint8_t i[2]; } __arch_pack xfs_dir2_sf_off_t; | ||
199 | |||
200 | /* | ||
201 | * Offset in data space of a data entry. | ||
202 | */ | ||
203 | typedef __uint32_t xfs_dir2_dataptr_t; | ||
204 | #define XFS_DIR2_MAX_DATAPTR ((xfs_dir2_dataptr_t)0xffffffff) | ||
205 | #define XFS_DIR2_NULL_DATAPTR ((xfs_dir2_dataptr_t)0) | ||
206 | |||
207 | /* | ||
208 | * Byte offset in a directory. | ||
209 | */ | ||
210 | typedef xfs_off_t xfs_dir2_off_t; | ||
211 | |||
212 | /* | ||
213 | * Directory block number (logical dirblk in file) | ||
214 | */ | ||
215 | typedef __uint32_t xfs_dir2_db_t; | ||
216 | |||
217 | /* | ||
218 | * Inode number stored as 8 8-bit values. | ||
219 | */ | ||
220 | typedef struct { __uint8_t i[8]; } xfs_dir2_ino8_t; | ||
221 | |||
222 | /* | ||
223 | * Inode number stored as 4 8-bit values. | ||
224 | * Works a lot of the time, when all the inode numbers in a directory | ||
225 | * fit in 32 bits. | ||
226 | */ | ||
227 | typedef struct { __uint8_t i[4]; } xfs_dir2_ino4_t; | ||
228 | |||
229 | typedef union { | ||
230 | xfs_dir2_ino8_t i8; | ||
231 | xfs_dir2_ino4_t i4; | ||
232 | } xfs_dir2_inou_t; | ||
233 | #define XFS_DIR2_MAX_SHORT_INUM ((xfs_ino_t)0xffffffffULL) | ||
234 | |||
235 | /* | ||
236 | * Directory layout when stored internal to an inode. | ||
237 | * | ||
238 | * Small directories are packed as tightly as possible so as to fit into the | ||
239 | * literal area of the inode. These "shortform" directories consist of a | ||
240 | * single xfs_dir2_sf_hdr header followed by zero or more xfs_dir2_sf_entry | ||
241 | * structures. Due the different inode number storage size and the variable | ||
242 | * length name field in the xfs_dir2_sf_entry all these structure are | ||
243 | * variable length, and the accessors in this file should be used to iterate | ||
244 | * over them. | ||
245 | */ | ||
246 | typedef struct xfs_dir2_sf_hdr { | ||
247 | __uint8_t count; /* count of entries */ | ||
248 | __uint8_t i8count; /* count of 8-byte inode #s */ | ||
249 | xfs_dir2_inou_t parent; /* parent dir inode number */ | ||
250 | } __arch_pack xfs_dir2_sf_hdr_t; | ||
251 | |||
252 | typedef struct xfs_dir2_sf_entry { | ||
253 | __u8 namelen; /* actual name length */ | ||
254 | xfs_dir2_sf_off_t offset; /* saved offset */ | ||
255 | __u8 name[]; /* name, variable size */ | ||
256 | /* | ||
257 | * A single byte containing the file type field follows the inode | ||
258 | * number for version 3 directory entries. | ||
259 | * | ||
260 | * A xfs_dir2_ino8_t or xfs_dir2_ino4_t follows here, at a | ||
261 | * variable offset after the name. | ||
262 | */ | ||
263 | } __arch_pack xfs_dir2_sf_entry_t; | ||
264 | |||
265 | static inline int xfs_dir2_sf_hdr_size(int i8count) | ||
266 | { | ||
267 | return sizeof(struct xfs_dir2_sf_hdr) - | ||
268 | (i8count == 0) * | ||
269 | (sizeof(xfs_dir2_ino8_t) - sizeof(xfs_dir2_ino4_t)); | ||
270 | } | ||
271 | |||
272 | static inline xfs_dir2_data_aoff_t | ||
273 | xfs_dir2_sf_get_offset(xfs_dir2_sf_entry_t *sfep) | ||
274 | { | ||
275 | return get_unaligned_be16(&sfep->offset.i); | ||
276 | } | ||
277 | |||
278 | static inline void | ||
279 | xfs_dir2_sf_put_offset(xfs_dir2_sf_entry_t *sfep, xfs_dir2_data_aoff_t off) | ||
280 | { | ||
281 | put_unaligned_be16(off, &sfep->offset.i); | ||
282 | } | ||
283 | |||
284 | static inline struct xfs_dir2_sf_entry * | ||
285 | xfs_dir2_sf_firstentry(struct xfs_dir2_sf_hdr *hdr) | ||
286 | { | ||
287 | return (struct xfs_dir2_sf_entry *) | ||
288 | ((char *)hdr + xfs_dir2_sf_hdr_size(hdr->i8count)); | ||
289 | } | ||
290 | |||
291 | /* | ||
292 | * Data block structures. | ||
293 | * | ||
294 | * A pure data block looks like the following drawing on disk: | ||
295 | * | ||
296 | * +-------------------------------------------------+ | ||
297 | * | xfs_dir2_data_hdr_t | | ||
298 | * +-------------------------------------------------+ | ||
299 | * | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t | | ||
300 | * | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t | | ||
301 | * | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t | | ||
302 | * | ... | | ||
303 | * +-------------------------------------------------+ | ||
304 | * | unused space | | ||
305 | * +-------------------------------------------------+ | ||
306 | * | ||
307 | * As all the entries are variable size structures the accessors below should | ||
308 | * be used to iterate over them. | ||
309 | * | ||
310 | * In addition to the pure data blocks for the data and node formats, | ||
311 | * most structures are also used for the combined data/freespace "block" | ||
312 | * format below. | ||
313 | */ | ||
314 | |||
315 | #define XFS_DIR2_DATA_ALIGN_LOG 3 /* i.e., 8 bytes */ | ||
316 | #define XFS_DIR2_DATA_ALIGN (1 << XFS_DIR2_DATA_ALIGN_LOG) | ||
317 | #define XFS_DIR2_DATA_FREE_TAG 0xffff | ||
318 | #define XFS_DIR2_DATA_FD_COUNT 3 | ||
319 | |||
320 | /* | ||
321 | * Directory address space divided into sections, | ||
322 | * spaces separated by 32GB. | ||
323 | */ | ||
324 | #define XFS_DIR2_SPACE_SIZE (1ULL << (32 + XFS_DIR2_DATA_ALIGN_LOG)) | ||
325 | #define XFS_DIR2_DATA_SPACE 0 | ||
326 | #define XFS_DIR2_DATA_OFFSET (XFS_DIR2_DATA_SPACE * XFS_DIR2_SPACE_SIZE) | ||
327 | |||
328 | /* | ||
329 | * Describe a free area in the data block. | ||
330 | * | ||
331 | * The freespace will be formatted as a xfs_dir2_data_unused_t. | ||
332 | */ | ||
333 | typedef struct xfs_dir2_data_free { | ||
334 | __be16 offset; /* start of freespace */ | ||
335 | __be16 length; /* length of freespace */ | ||
336 | } xfs_dir2_data_free_t; | ||
337 | |||
338 | /* | ||
339 | * Header for the data blocks. | ||
340 | * | ||
341 | * The code knows that XFS_DIR2_DATA_FD_COUNT is 3. | ||
342 | */ | ||
343 | typedef struct xfs_dir2_data_hdr { | ||
344 | __be32 magic; /* XFS_DIR2_DATA_MAGIC or */ | ||
345 | /* XFS_DIR2_BLOCK_MAGIC */ | ||
346 | xfs_dir2_data_free_t bestfree[XFS_DIR2_DATA_FD_COUNT]; | ||
347 | } xfs_dir2_data_hdr_t; | ||
348 | |||
349 | /* | ||
350 | * define a structure for all the verification fields we are adding to the | ||
351 | * directory block structures. This will be used in several structures. | ||
352 | * The magic number must be the first entry to align with all the dir2 | ||
353 | * structures so we determine how to decode them just by the magic number. | ||
354 | */ | ||
355 | struct xfs_dir3_blk_hdr { | ||
356 | __be32 magic; /* magic number */ | ||
357 | __be32 crc; /* CRC of block */ | ||
358 | __be64 blkno; /* first block of the buffer */ | ||
359 | __be64 lsn; /* sequence number of last write */ | ||
360 | uuid_t uuid; /* filesystem we belong to */ | ||
361 | __be64 owner; /* inode that owns the block */ | ||
362 | }; | ||
363 | |||
364 | struct xfs_dir3_data_hdr { | ||
365 | struct xfs_dir3_blk_hdr hdr; | ||
366 | xfs_dir2_data_free_t best_free[XFS_DIR2_DATA_FD_COUNT]; | ||
367 | __be32 pad; /* 64 bit alignment */ | ||
368 | }; | ||
369 | |||
370 | #define XFS_DIR3_DATA_CRC_OFF offsetof(struct xfs_dir3_data_hdr, hdr.crc) | ||
371 | |||
372 | /* | ||
373 | * Active entry in a data block. | ||
374 | * | ||
375 | * Aligned to 8 bytes. After the variable length name field there is a | ||
376 | * 2 byte tag field, which can be accessed using xfs_dir3_data_entry_tag_p. | ||
377 | * | ||
378 | * For dir3 structures, there is file type field between the name and the tag. | ||
379 | * This can only be manipulated by helper functions. It is packed hard against | ||
380 | * the end of the name so any padding for rounding is between the file type and | ||
381 | * the tag. | ||
382 | */ | ||
383 | typedef struct xfs_dir2_data_entry { | ||
384 | __be64 inumber; /* inode number */ | ||
385 | __u8 namelen; /* name length */ | ||
386 | __u8 name[]; /* name bytes, no null */ | ||
387 | /* __u8 filetype; */ /* type of inode we point to */ | ||
388 | /* __be16 tag; */ /* starting offset of us */ | ||
389 | } xfs_dir2_data_entry_t; | ||
390 | |||
391 | /* | ||
392 | * Unused entry in a data block. | ||
393 | * | ||
394 | * Aligned to 8 bytes. Tag appears as the last 2 bytes and must be accessed | ||
395 | * using xfs_dir2_data_unused_tag_p. | ||
396 | */ | ||
397 | typedef struct xfs_dir2_data_unused { | ||
398 | __be16 freetag; /* XFS_DIR2_DATA_FREE_TAG */ | ||
399 | __be16 length; /* total free length */ | ||
400 | /* variable offset */ | ||
401 | __be16 tag; /* starting offset of us */ | ||
402 | } xfs_dir2_data_unused_t; | ||
403 | |||
404 | /* | ||
405 | * Pointer to a freespace's tag word. | ||
406 | */ | ||
407 | static inline __be16 * | ||
408 | xfs_dir2_data_unused_tag_p(struct xfs_dir2_data_unused *dup) | ||
409 | { | ||
410 | return (__be16 *)((char *)dup + | ||
411 | be16_to_cpu(dup->length) - sizeof(__be16)); | ||
412 | } | ||
413 | |||
414 | /* | ||
415 | * Leaf block structures. | ||
416 | * | ||
417 | * A pure leaf block looks like the following drawing on disk: | ||
418 | * | ||
419 | * +---------------------------+ | ||
420 | * | xfs_dir2_leaf_hdr_t | | ||
421 | * +---------------------------+ | ||
422 | * | xfs_dir2_leaf_entry_t | | ||
423 | * | xfs_dir2_leaf_entry_t | | ||
424 | * | xfs_dir2_leaf_entry_t | | ||
425 | * | xfs_dir2_leaf_entry_t | | ||
426 | * | ... | | ||
427 | * +---------------------------+ | ||
428 | * | xfs_dir2_data_off_t | | ||
429 | * | xfs_dir2_data_off_t | | ||
430 | * | xfs_dir2_data_off_t | | ||
431 | * | ... | | ||
432 | * +---------------------------+ | ||
433 | * | xfs_dir2_leaf_tail_t | | ||
434 | * +---------------------------+ | ||
435 | * | ||
436 | * The xfs_dir2_data_off_t members (bests) and tail are at the end of the block | ||
437 | * for single-leaf (magic = XFS_DIR2_LEAF1_MAGIC) blocks only, but not present | ||
438 | * for directories with separate leaf nodes and free space blocks | ||
439 | * (magic = XFS_DIR2_LEAFN_MAGIC). | ||
440 | * | ||
441 | * As all the entries are variable size structures the accessors below should | ||
442 | * be used to iterate over them. | ||
443 | */ | ||
444 | |||
445 | /* | ||
446 | * Offset of the leaf/node space. First block in this space | ||
447 | * is the btree root. | ||
448 | */ | ||
449 | #define XFS_DIR2_LEAF_SPACE 1 | ||
450 | #define XFS_DIR2_LEAF_OFFSET (XFS_DIR2_LEAF_SPACE * XFS_DIR2_SPACE_SIZE) | ||
451 | |||
452 | /* | ||
453 | * Leaf block header. | ||
454 | */ | ||
455 | typedef struct xfs_dir2_leaf_hdr { | ||
456 | xfs_da_blkinfo_t info; /* header for da routines */ | ||
457 | __be16 count; /* count of entries */ | ||
458 | __be16 stale; /* count of stale entries */ | ||
459 | } xfs_dir2_leaf_hdr_t; | ||
460 | |||
461 | struct xfs_dir3_leaf_hdr { | ||
462 | struct xfs_da3_blkinfo info; /* header for da routines */ | ||
463 | __be16 count; /* count of entries */ | ||
464 | __be16 stale; /* count of stale entries */ | ||
465 | __be32 pad; /* 64 bit alignment */ | ||
466 | }; | ||
467 | |||
468 | struct xfs_dir3_icleaf_hdr { | ||
469 | __uint32_t forw; | ||
470 | __uint32_t back; | ||
471 | __uint16_t magic; | ||
472 | __uint16_t count; | ||
473 | __uint16_t stale; | ||
474 | }; | ||
475 | |||
476 | /* | ||
477 | * Leaf block entry. | ||
478 | */ | ||
479 | typedef struct xfs_dir2_leaf_entry { | ||
480 | __be32 hashval; /* hash value of name */ | ||
481 | __be32 address; /* address of data entry */ | ||
482 | } xfs_dir2_leaf_entry_t; | ||
483 | |||
484 | /* | ||
485 | * Leaf block tail. | ||
486 | */ | ||
487 | typedef struct xfs_dir2_leaf_tail { | ||
488 | __be32 bestcount; | ||
489 | } xfs_dir2_leaf_tail_t; | ||
490 | |||
491 | /* | ||
492 | * Leaf block. | ||
493 | */ | ||
494 | typedef struct xfs_dir2_leaf { | ||
495 | xfs_dir2_leaf_hdr_t hdr; /* leaf header */ | ||
496 | xfs_dir2_leaf_entry_t __ents[]; /* entries */ | ||
497 | } xfs_dir2_leaf_t; | ||
498 | |||
499 | struct xfs_dir3_leaf { | ||
500 | struct xfs_dir3_leaf_hdr hdr; /* leaf header */ | ||
501 | struct xfs_dir2_leaf_entry __ents[]; /* entries */ | ||
502 | }; | ||
503 | |||
504 | #define XFS_DIR3_LEAF_CRC_OFF offsetof(struct xfs_dir3_leaf_hdr, info.crc) | ||
505 | |||
506 | /* | ||
507 | * Get address of the bests array in the single-leaf block. | ||
508 | */ | ||
509 | static inline __be16 * | ||
510 | xfs_dir2_leaf_bests_p(struct xfs_dir2_leaf_tail *ltp) | ||
511 | { | ||
512 | return (__be16 *)ltp - be32_to_cpu(ltp->bestcount); | ||
513 | } | ||
514 | |||
515 | /* | ||
516 | * Free space block defintions for the node format. | ||
517 | */ | ||
518 | |||
519 | /* | ||
520 | * Offset of the freespace index. | ||
521 | */ | ||
522 | #define XFS_DIR2_FREE_SPACE 2 | ||
523 | #define XFS_DIR2_FREE_OFFSET (XFS_DIR2_FREE_SPACE * XFS_DIR2_SPACE_SIZE) | ||
524 | |||
525 | typedef struct xfs_dir2_free_hdr { | ||
526 | __be32 magic; /* XFS_DIR2_FREE_MAGIC */ | ||
527 | __be32 firstdb; /* db of first entry */ | ||
528 | __be32 nvalid; /* count of valid entries */ | ||
529 | __be32 nused; /* count of used entries */ | ||
530 | } xfs_dir2_free_hdr_t; | ||
531 | |||
532 | typedef struct xfs_dir2_free { | ||
533 | xfs_dir2_free_hdr_t hdr; /* block header */ | ||
534 | __be16 bests[]; /* best free counts */ | ||
535 | /* unused entries are -1 */ | ||
536 | } xfs_dir2_free_t; | ||
537 | |||
538 | struct xfs_dir3_free_hdr { | ||
539 | struct xfs_dir3_blk_hdr hdr; | ||
540 | __be32 firstdb; /* db of first entry */ | ||
541 | __be32 nvalid; /* count of valid entries */ | ||
542 | __be32 nused; /* count of used entries */ | ||
543 | __be32 pad; /* 64 bit alignment */ | ||
544 | }; | ||
545 | |||
546 | struct xfs_dir3_free { | ||
547 | struct xfs_dir3_free_hdr hdr; | ||
548 | __be16 bests[]; /* best free counts */ | ||
549 | /* unused entries are -1 */ | ||
550 | }; | ||
551 | |||
552 | #define XFS_DIR3_FREE_CRC_OFF offsetof(struct xfs_dir3_free, hdr.hdr.crc) | ||
553 | |||
554 | /* | ||
555 | * In core version of the free block header, abstracted away from on-disk format | ||
556 | * differences. Use this in the code, and convert to/from the disk version using | ||
557 | * xfs_dir3_free_hdr_from_disk/xfs_dir3_free_hdr_to_disk. | ||
558 | */ | ||
559 | struct xfs_dir3_icfree_hdr { | ||
560 | __uint32_t magic; | ||
561 | __uint32_t firstdb; | ||
562 | __uint32_t nvalid; | ||
563 | __uint32_t nused; | ||
564 | |||
565 | }; | ||
566 | |||
567 | /* | ||
568 | * Single block format. | ||
569 | * | ||
570 | * The single block format looks like the following drawing on disk: | ||
571 | * | ||
572 | * +-------------------------------------------------+ | ||
573 | * | xfs_dir2_data_hdr_t | | ||
574 | * +-------------------------------------------------+ | ||
575 | * | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t | | ||
576 | * | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t | | ||
577 | * | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t : | ||
578 | * | ... | | ||
579 | * +-------------------------------------------------+ | ||
580 | * | unused space | | ||
581 | * +-------------------------------------------------+ | ||
582 | * | ... | | ||
583 | * | xfs_dir2_leaf_entry_t | | ||
584 | * | xfs_dir2_leaf_entry_t | | ||
585 | * +-------------------------------------------------+ | ||
586 | * | xfs_dir2_block_tail_t | | ||
587 | * +-------------------------------------------------+ | ||
588 | * | ||
589 | * As all the entries are variable size structures the accessors below should | ||
590 | * be used to iterate over them. | ||
591 | */ | ||
592 | |||
593 | typedef struct xfs_dir2_block_tail { | ||
594 | __be32 count; /* count of leaf entries */ | ||
595 | __be32 stale; /* count of stale lf entries */ | ||
596 | } xfs_dir2_block_tail_t; | ||
597 | |||
598 | /* | ||
599 | * Pointer to the leaf entries embedded in a data block (1-block format) | ||
600 | */ | ||
601 | static inline struct xfs_dir2_leaf_entry * | ||
602 | xfs_dir2_block_leaf_p(struct xfs_dir2_block_tail *btp) | ||
603 | { | ||
604 | return ((struct xfs_dir2_leaf_entry *)btp) - be32_to_cpu(btp->count); | ||
605 | } | ||
606 | |||
607 | |||
608 | /* | ||
609 | * Attribute storage layout | ||
610 | * | ||
611 | * Attribute lists are structured around Btrees where all the data | ||
612 | * elements are in the leaf nodes. Attribute names are hashed into an int, | ||
613 | * then that int is used as the index into the Btree. Since the hashval | ||
614 | * of an attribute name may not be unique, we may have duplicate keys. The | ||
615 | * internal links in the Btree are logical block offsets into the file. | ||
616 | * | ||
617 | * Struct leaf_entry's are packed from the top. Name/values grow from the | ||
618 | * bottom but are not packed. The freemap contains run-length-encoded entries | ||
619 | * for the free bytes after the leaf_entry's, but only the N largest such, | ||
620 | * smaller runs are dropped. When the freemap doesn't show enough space | ||
621 | * for an allocation, we compact the name/value area and try again. If we | ||
622 | * still don't have enough space, then we have to split the block. The | ||
623 | * name/value structs (both local and remote versions) must be 32bit aligned. | ||
624 | * | ||
625 | * Since we have duplicate hash keys, for each key that matches, compare | ||
626 | * the actual name string. The root and intermediate node search always | ||
627 | * takes the first-in-the-block key match found, so we should only have | ||
628 | * to work "forw"ard. If none matches, continue with the "forw"ard leaf | ||
629 | * nodes until the hash key changes or the attribute name is found. | ||
630 | * | ||
631 | * We store the fact that an attribute is a ROOT/USER/SECURE attribute in | ||
632 | * the leaf_entry. The namespaces are independent only because we also look | ||
633 | * at the namespace bit when we are looking for a matching attribute name. | ||
634 | * | ||
635 | * We also store an "incomplete" bit in the leaf_entry. It shows that an | ||
636 | * attribute is in the middle of being created and should not be shown to | ||
637 | * the user if we crash during the time that the bit is set. We clear the | ||
638 | * bit when we have finished setting up the attribute. We do this because | ||
639 | * we cannot create some large attributes inside a single transaction, and we | ||
640 | * need some indication that we weren't finished if we crash in the middle. | ||
641 | */ | ||
642 | #define XFS_ATTR_LEAF_MAPSIZE 3 /* how many freespace slots */ | ||
643 | |||
644 | typedef struct xfs_attr_leaf_map { /* RLE map of free bytes */ | ||
645 | __be16 base; /* base of free region */ | ||
646 | __be16 size; /* length of free region */ | ||
647 | } xfs_attr_leaf_map_t; | ||
648 | |||
649 | typedef struct xfs_attr_leaf_hdr { /* constant-structure header block */ | ||
650 | xfs_da_blkinfo_t info; /* block type, links, etc. */ | ||
651 | __be16 count; /* count of active leaf_entry's */ | ||
652 | __be16 usedbytes; /* num bytes of names/values stored */ | ||
653 | __be16 firstused; /* first used byte in name area */ | ||
654 | __u8 holes; /* != 0 if blk needs compaction */ | ||
655 | __u8 pad1; | ||
656 | xfs_attr_leaf_map_t freemap[XFS_ATTR_LEAF_MAPSIZE]; | ||
657 | /* N largest free regions */ | ||
658 | } xfs_attr_leaf_hdr_t; | ||
659 | |||
660 | typedef struct xfs_attr_leaf_entry { /* sorted on key, not name */ | ||
661 | __be32 hashval; /* hash value of name */ | ||
662 | __be16 nameidx; /* index into buffer of name/value */ | ||
663 | __u8 flags; /* LOCAL/ROOT/SECURE/INCOMPLETE flag */ | ||
664 | __u8 pad2; /* unused pad byte */ | ||
665 | } xfs_attr_leaf_entry_t; | ||
666 | |||
667 | typedef struct xfs_attr_leaf_name_local { | ||
668 | __be16 valuelen; /* number of bytes in value */ | ||
669 | __u8 namelen; /* length of name bytes */ | ||
670 | __u8 nameval[1]; /* name/value bytes */ | ||
671 | } xfs_attr_leaf_name_local_t; | ||
672 | |||
673 | typedef struct xfs_attr_leaf_name_remote { | ||
674 | __be32 valueblk; /* block number of value bytes */ | ||
675 | __be32 valuelen; /* number of bytes in value */ | ||
676 | __u8 namelen; /* length of name bytes */ | ||
677 | __u8 name[1]; /* name bytes */ | ||
678 | } xfs_attr_leaf_name_remote_t; | ||
679 | |||
680 | typedef struct xfs_attr_leafblock { | ||
681 | xfs_attr_leaf_hdr_t hdr; /* constant-structure header block */ | ||
682 | xfs_attr_leaf_entry_t entries[1]; /* sorted on key, not name */ | ||
683 | xfs_attr_leaf_name_local_t namelist; /* grows from bottom of buf */ | ||
684 | xfs_attr_leaf_name_remote_t valuelist; /* grows from bottom of buf */ | ||
685 | } xfs_attr_leafblock_t; | ||
686 | |||
687 | /* | ||
688 | * CRC enabled leaf structures. Called "version 3" structures to match the | ||
689 | * version number of the directory and dablk structures for this feature, and | ||
690 | * attr2 is already taken by the variable inode attribute fork size feature. | ||
691 | */ | ||
692 | struct xfs_attr3_leaf_hdr { | ||
693 | struct xfs_da3_blkinfo info; | ||
694 | __be16 count; | ||
695 | __be16 usedbytes; | ||
696 | __be16 firstused; | ||
697 | __u8 holes; | ||
698 | __u8 pad1; | ||
699 | struct xfs_attr_leaf_map freemap[XFS_ATTR_LEAF_MAPSIZE]; | ||
700 | __be32 pad2; /* 64 bit alignment */ | ||
701 | }; | ||
702 | |||
703 | #define XFS_ATTR3_LEAF_CRC_OFF (offsetof(struct xfs_attr3_leaf_hdr, info.crc)) | ||
704 | |||
705 | struct xfs_attr3_leafblock { | ||
706 | struct xfs_attr3_leaf_hdr hdr; | ||
707 | struct xfs_attr_leaf_entry entries[1]; | ||
708 | |||
709 | /* | ||
710 | * The rest of the block contains the following structures after the | ||
711 | * leaf entries, growing from the bottom up. The variables are never | ||
712 | * referenced, the locations accessed purely from helper functions. | ||
713 | * | ||
714 | * struct xfs_attr_leaf_name_local | ||
715 | * struct xfs_attr_leaf_name_remote | ||
716 | */ | ||
717 | }; | ||
718 | |||
719 | /* | ||
720 | * incore, neutral version of the attribute leaf header | ||
721 | */ | ||
722 | struct xfs_attr3_icleaf_hdr { | ||
723 | __uint32_t forw; | ||
724 | __uint32_t back; | ||
725 | __uint16_t magic; | ||
726 | __uint16_t count; | ||
727 | __uint16_t usedbytes; | ||
728 | __uint16_t firstused; | ||
729 | __u8 holes; | ||
730 | struct { | ||
731 | __uint16_t base; | ||
732 | __uint16_t size; | ||
733 | } freemap[XFS_ATTR_LEAF_MAPSIZE]; | ||
734 | }; | ||
735 | |||
736 | /* | ||
737 | * Flags used in the leaf_entry[i].flags field. | ||
738 | * NOTE: the INCOMPLETE bit must not collide with the flags bits specified | ||
739 | * on the system call, they are "or"ed together for various operations. | ||
740 | */ | ||
741 | #define XFS_ATTR_LOCAL_BIT 0 /* attr is stored locally */ | ||
742 | #define XFS_ATTR_ROOT_BIT 1 /* limit access to trusted attrs */ | ||
743 | #define XFS_ATTR_SECURE_BIT 2 /* limit access to secure attrs */ | ||
744 | #define XFS_ATTR_INCOMPLETE_BIT 7 /* attr in middle of create/delete */ | ||
745 | #define XFS_ATTR_LOCAL (1 << XFS_ATTR_LOCAL_BIT) | ||
746 | #define XFS_ATTR_ROOT (1 << XFS_ATTR_ROOT_BIT) | ||
747 | #define XFS_ATTR_SECURE (1 << XFS_ATTR_SECURE_BIT) | ||
748 | #define XFS_ATTR_INCOMPLETE (1 << XFS_ATTR_INCOMPLETE_BIT) | ||
749 | |||
750 | /* | ||
751 | * Conversion macros for converting namespace bits from argument flags | ||
752 | * to ondisk flags. | ||
753 | */ | ||
754 | #define XFS_ATTR_NSP_ARGS_MASK (ATTR_ROOT | ATTR_SECURE) | ||
755 | #define XFS_ATTR_NSP_ONDISK_MASK (XFS_ATTR_ROOT | XFS_ATTR_SECURE) | ||
756 | #define XFS_ATTR_NSP_ONDISK(flags) ((flags) & XFS_ATTR_NSP_ONDISK_MASK) | ||
757 | #define XFS_ATTR_NSP_ARGS(flags) ((flags) & XFS_ATTR_NSP_ARGS_MASK) | ||
758 | #define XFS_ATTR_NSP_ARGS_TO_ONDISK(x) (((x) & ATTR_ROOT ? XFS_ATTR_ROOT : 0) |\ | ||
759 | ((x) & ATTR_SECURE ? XFS_ATTR_SECURE : 0)) | ||
760 | #define XFS_ATTR_NSP_ONDISK_TO_ARGS(x) (((x) & XFS_ATTR_ROOT ? ATTR_ROOT : 0) |\ | ||
761 | ((x) & XFS_ATTR_SECURE ? ATTR_SECURE : 0)) | ||
762 | |||
763 | /* | ||
764 | * Alignment for namelist and valuelist entries (since they are mixed | ||
765 | * there can be only one alignment value) | ||
766 | */ | ||
767 | #define XFS_ATTR_LEAF_NAME_ALIGN ((uint)sizeof(xfs_dablk_t)) | ||
768 | |||
769 | static inline int | ||
770 | xfs_attr3_leaf_hdr_size(struct xfs_attr_leafblock *leafp) | ||
771 | { | ||
772 | if (leafp->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC)) | ||
773 | return sizeof(struct xfs_attr3_leaf_hdr); | ||
774 | return sizeof(struct xfs_attr_leaf_hdr); | ||
775 | } | ||
776 | |||
777 | static inline struct xfs_attr_leaf_entry * | ||
778 | xfs_attr3_leaf_entryp(xfs_attr_leafblock_t *leafp) | ||
779 | { | ||
780 | if (leafp->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC)) | ||
781 | return &((struct xfs_attr3_leafblock *)leafp)->entries[0]; | ||
782 | return &leafp->entries[0]; | ||
783 | } | ||
784 | |||
785 | /* | ||
786 | * Cast typed pointers for "local" and "remote" name/value structs. | ||
787 | */ | ||
788 | static inline char * | ||
789 | xfs_attr3_leaf_name(xfs_attr_leafblock_t *leafp, int idx) | ||
790 | { | ||
791 | struct xfs_attr_leaf_entry *entries = xfs_attr3_leaf_entryp(leafp); | ||
792 | |||
793 | return &((char *)leafp)[be16_to_cpu(entries[idx].nameidx)]; | ||
794 | } | ||
795 | |||
796 | static inline xfs_attr_leaf_name_remote_t * | ||
797 | xfs_attr3_leaf_name_remote(xfs_attr_leafblock_t *leafp, int idx) | ||
798 | { | ||
799 | return (xfs_attr_leaf_name_remote_t *)xfs_attr3_leaf_name(leafp, idx); | ||
800 | } | ||
801 | |||
802 | static inline xfs_attr_leaf_name_local_t * | ||
803 | xfs_attr3_leaf_name_local(xfs_attr_leafblock_t *leafp, int idx) | ||
804 | { | ||
805 | return (xfs_attr_leaf_name_local_t *)xfs_attr3_leaf_name(leafp, idx); | ||
806 | } | ||
807 | |||
808 | /* | ||
809 | * Calculate total bytes used (including trailing pad for alignment) for | ||
810 | * a "local" name/value structure, a "remote" name/value structure, and | ||
811 | * a pointer which might be either. | ||
812 | */ | ||
813 | static inline int xfs_attr_leaf_entsize_remote(int nlen) | ||
814 | { | ||
815 | return ((uint)sizeof(xfs_attr_leaf_name_remote_t) - 1 + (nlen) + \ | ||
816 | XFS_ATTR_LEAF_NAME_ALIGN - 1) & ~(XFS_ATTR_LEAF_NAME_ALIGN - 1); | ||
817 | } | ||
818 | |||
819 | static inline int xfs_attr_leaf_entsize_local(int nlen, int vlen) | ||
820 | { | ||
821 | return ((uint)sizeof(xfs_attr_leaf_name_local_t) - 1 + (nlen) + (vlen) + | ||
822 | XFS_ATTR_LEAF_NAME_ALIGN - 1) & ~(XFS_ATTR_LEAF_NAME_ALIGN - 1); | ||
823 | } | ||
824 | |||
825 | static inline int xfs_attr_leaf_entsize_local_max(int bsize) | ||
826 | { | ||
827 | return (((bsize) >> 1) + ((bsize) >> 2)); | ||
828 | } | ||
829 | |||
830 | |||
831 | |||
832 | /* | ||
833 | * Remote attribute block format definition | ||
834 | * | ||
835 | * There is one of these headers per filesystem block in a remote attribute. | ||
836 | * This is done to ensure there is a 1:1 mapping between the attribute value | ||
837 | * length and the number of blocks needed to store the attribute. This makes the | ||
838 | * verification of a buffer a little more complex, but greatly simplifies the | ||
839 | * allocation, reading and writing of these attributes as we don't have to guess | ||
840 | * the number of blocks needed to store the attribute data. | ||
841 | */ | ||
842 | #define XFS_ATTR3_RMT_MAGIC 0x5841524d /* XARM */ | ||
843 | |||
844 | struct xfs_attr3_rmt_hdr { | ||
845 | __be32 rm_magic; | ||
846 | __be32 rm_offset; | ||
847 | __be32 rm_bytes; | ||
848 | __be32 rm_crc; | ||
849 | uuid_t rm_uuid; | ||
850 | __be64 rm_owner; | ||
851 | __be64 rm_blkno; | ||
852 | __be64 rm_lsn; | ||
853 | }; | ||
854 | |||
855 | #define XFS_ATTR3_RMT_CRC_OFF offsetof(struct xfs_attr3_rmt_hdr, rm_crc) | ||
856 | |||
857 | #define XFS_ATTR3_RMT_BUF_SPACE(mp, bufsize) \ | ||
858 | ((bufsize) - (xfs_sb_version_hascrc(&(mp)->m_sb) ? \ | ||
859 | sizeof(struct xfs_attr3_rmt_hdr) : 0)) | ||
860 | |||
861 | #endif /* __XFS_DA_FORMAT_H__ */ | ||
diff --git a/fs/xfs/libxfs/xfs_dinode.h b/fs/xfs/libxfs/xfs_dinode.h new file mode 100644 index 000000000000..623bbe8fd921 --- /dev/null +++ b/fs/xfs/libxfs/xfs_dinode.h | |||
@@ -0,0 +1,243 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000,2002,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_DINODE_H__ | ||
19 | #define __XFS_DINODE_H__ | ||
20 | |||
21 | #define XFS_DINODE_MAGIC 0x494e /* 'IN' */ | ||
22 | #define XFS_DINODE_GOOD_VERSION(v) ((v) >= 1 && (v) <= 3) | ||
23 | |||
24 | typedef struct xfs_timestamp { | ||
25 | __be32 t_sec; /* timestamp seconds */ | ||
26 | __be32 t_nsec; /* timestamp nanoseconds */ | ||
27 | } xfs_timestamp_t; | ||
28 | |||
29 | /* | ||
30 | * On-disk inode structure. | ||
31 | * | ||
32 | * This is just the header or "dinode core", the inode is expanded to fill a | ||
33 | * variable size the leftover area split into a data and an attribute fork. | ||
34 | * The format of the data and attribute fork depends on the format of the | ||
35 | * inode as indicated by di_format and di_aformat. To access the data and | ||
36 | * attribute use the XFS_DFORK_DPTR, XFS_DFORK_APTR, and XFS_DFORK_PTR macros | ||
37 | * below. | ||
38 | * | ||
39 | * There is a very similar struct icdinode in xfs_inode which matches the | ||
40 | * layout of the first 96 bytes of this structure, but is kept in native | ||
41 | * format instead of big endian. | ||
42 | * | ||
43 | * Note: di_flushiter is only used by v1/2 inodes - it's effectively a zeroed | ||
44 | * padding field for v3 inodes. | ||
45 | */ | ||
46 | typedef struct xfs_dinode { | ||
47 | __be16 di_magic; /* inode magic # = XFS_DINODE_MAGIC */ | ||
48 | __be16 di_mode; /* mode and type of file */ | ||
49 | __u8 di_version; /* inode version */ | ||
50 | __u8 di_format; /* format of di_c data */ | ||
51 | __be16 di_onlink; /* old number of links to file */ | ||
52 | __be32 di_uid; /* owner's user id */ | ||
53 | __be32 di_gid; /* owner's group id */ | ||
54 | __be32 di_nlink; /* number of links to file */ | ||
55 | __be16 di_projid_lo; /* lower part of owner's project id */ | ||
56 | __be16 di_projid_hi; /* higher part owner's project id */ | ||
57 | __u8 di_pad[6]; /* unused, zeroed space */ | ||
58 | __be16 di_flushiter; /* incremented on flush */ | ||
59 | xfs_timestamp_t di_atime; /* time last accessed */ | ||
60 | xfs_timestamp_t di_mtime; /* time last modified */ | ||
61 | xfs_timestamp_t di_ctime; /* time created/inode modified */ | ||
62 | __be64 di_size; /* number of bytes in file */ | ||
63 | __be64 di_nblocks; /* # of direct & btree blocks used */ | ||
64 | __be32 di_extsize; /* basic/minimum extent size for file */ | ||
65 | __be32 di_nextents; /* number of extents in data fork */ | ||
66 | __be16 di_anextents; /* number of extents in attribute fork*/ | ||
67 | __u8 di_forkoff; /* attr fork offs, <<3 for 64b align */ | ||
68 | __s8 di_aformat; /* format of attr fork's data */ | ||
69 | __be32 di_dmevmask; /* DMIG event mask */ | ||
70 | __be16 di_dmstate; /* DMIG state info */ | ||
71 | __be16 di_flags; /* random flags, XFS_DIFLAG_... */ | ||
72 | __be32 di_gen; /* generation number */ | ||
73 | |||
74 | /* di_next_unlinked is the only non-core field in the old dinode */ | ||
75 | __be32 di_next_unlinked;/* agi unlinked list ptr */ | ||
76 | |||
77 | /* start of the extended dinode, writable fields */ | ||
78 | __le32 di_crc; /* CRC of the inode */ | ||
79 | __be64 di_changecount; /* number of attribute changes */ | ||
80 | __be64 di_lsn; /* flush sequence */ | ||
81 | __be64 di_flags2; /* more random flags */ | ||
82 | __u8 di_pad2[16]; /* more padding for future expansion */ | ||
83 | |||
84 | /* fields only written to during inode creation */ | ||
85 | xfs_timestamp_t di_crtime; /* time created */ | ||
86 | __be64 di_ino; /* inode number */ | ||
87 | uuid_t di_uuid; /* UUID of the filesystem */ | ||
88 | |||
89 | /* structure must be padded to 64 bit alignment */ | ||
90 | } xfs_dinode_t; | ||
91 | |||
92 | #define XFS_DINODE_CRC_OFF offsetof(struct xfs_dinode, di_crc) | ||
93 | |||
94 | #define DI_MAX_FLUSH 0xffff | ||
95 | |||
96 | /* | ||
97 | * Size of the core inode on disk. Version 1 and 2 inodes have | ||
98 | * the same size, but version 3 has grown a few additional fields. | ||
99 | */ | ||
100 | static inline uint xfs_dinode_size(int version) | ||
101 | { | ||
102 | if (version == 3) | ||
103 | return sizeof(struct xfs_dinode); | ||
104 | return offsetof(struct xfs_dinode, di_crc); | ||
105 | } | ||
106 | |||
107 | /* | ||
108 | * The 32 bit link count in the inode theoretically maxes out at UINT_MAX. | ||
109 | * Since the pathconf interface is signed, we use 2^31 - 1 instead. | ||
110 | * The old inode format had a 16 bit link count, so its maximum is USHRT_MAX. | ||
111 | */ | ||
112 | #define XFS_MAXLINK ((1U << 31) - 1U) | ||
113 | #define XFS_MAXLINK_1 65535U | ||
114 | |||
115 | /* | ||
116 | * Values for di_format | ||
117 | */ | ||
118 | typedef enum xfs_dinode_fmt { | ||
119 | XFS_DINODE_FMT_DEV, /* xfs_dev_t */ | ||
120 | XFS_DINODE_FMT_LOCAL, /* bulk data */ | ||
121 | XFS_DINODE_FMT_EXTENTS, /* struct xfs_bmbt_rec */ | ||
122 | XFS_DINODE_FMT_BTREE, /* struct xfs_bmdr_block */ | ||
123 | XFS_DINODE_FMT_UUID /* uuid_t */ | ||
124 | } xfs_dinode_fmt_t; | ||
125 | |||
126 | /* | ||
127 | * Inode minimum and maximum sizes. | ||
128 | */ | ||
129 | #define XFS_DINODE_MIN_LOG 8 | ||
130 | #define XFS_DINODE_MAX_LOG 11 | ||
131 | #define XFS_DINODE_MIN_SIZE (1 << XFS_DINODE_MIN_LOG) | ||
132 | #define XFS_DINODE_MAX_SIZE (1 << XFS_DINODE_MAX_LOG) | ||
133 | |||
134 | /* | ||
135 | * Inode size for given fs. | ||
136 | */ | ||
137 | #define XFS_LITINO(mp, version) \ | ||
138 | ((int)(((mp)->m_sb.sb_inodesize) - xfs_dinode_size(version))) | ||
139 | |||
140 | /* | ||
141 | * Inode data & attribute fork sizes, per inode. | ||
142 | */ | ||
143 | #define XFS_DFORK_Q(dip) ((dip)->di_forkoff != 0) | ||
144 | #define XFS_DFORK_BOFF(dip) ((int)((dip)->di_forkoff << 3)) | ||
145 | |||
146 | #define XFS_DFORK_DSIZE(dip,mp) \ | ||
147 | (XFS_DFORK_Q(dip) ? \ | ||
148 | XFS_DFORK_BOFF(dip) : \ | ||
149 | XFS_LITINO(mp, (dip)->di_version)) | ||
150 | #define XFS_DFORK_ASIZE(dip,mp) \ | ||
151 | (XFS_DFORK_Q(dip) ? \ | ||
152 | XFS_LITINO(mp, (dip)->di_version) - XFS_DFORK_BOFF(dip) : \ | ||
153 | 0) | ||
154 | #define XFS_DFORK_SIZE(dip,mp,w) \ | ||
155 | ((w) == XFS_DATA_FORK ? \ | ||
156 | XFS_DFORK_DSIZE(dip, mp) : \ | ||
157 | XFS_DFORK_ASIZE(dip, mp)) | ||
158 | |||
159 | /* | ||
160 | * Return pointers to the data or attribute forks. | ||
161 | */ | ||
162 | #define XFS_DFORK_DPTR(dip) \ | ||
163 | ((char *)dip + xfs_dinode_size(dip->di_version)) | ||
164 | #define XFS_DFORK_APTR(dip) \ | ||
165 | (XFS_DFORK_DPTR(dip) + XFS_DFORK_BOFF(dip)) | ||
166 | #define XFS_DFORK_PTR(dip,w) \ | ||
167 | ((w) == XFS_DATA_FORK ? XFS_DFORK_DPTR(dip) : XFS_DFORK_APTR(dip)) | ||
168 | |||
169 | #define XFS_DFORK_FORMAT(dip,w) \ | ||
170 | ((w) == XFS_DATA_FORK ? \ | ||
171 | (dip)->di_format : \ | ||
172 | (dip)->di_aformat) | ||
173 | #define XFS_DFORK_NEXTENTS(dip,w) \ | ||
174 | ((w) == XFS_DATA_FORK ? \ | ||
175 | be32_to_cpu((dip)->di_nextents) : \ | ||
176 | be16_to_cpu((dip)->di_anextents)) | ||
177 | |||
178 | #define XFS_BUF_TO_DINODE(bp) ((xfs_dinode_t *)((bp)->b_addr)) | ||
179 | |||
180 | /* | ||
181 | * For block and character special files the 32bit dev_t is stored at the | ||
182 | * beginning of the data fork. | ||
183 | */ | ||
184 | static inline xfs_dev_t xfs_dinode_get_rdev(struct xfs_dinode *dip) | ||
185 | { | ||
186 | return be32_to_cpu(*(__be32 *)XFS_DFORK_DPTR(dip)); | ||
187 | } | ||
188 | |||
189 | static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev) | ||
190 | { | ||
191 | *(__be32 *)XFS_DFORK_DPTR(dip) = cpu_to_be32(rdev); | ||
192 | } | ||
193 | |||
194 | /* | ||
195 | * Values for di_flags | ||
196 | * There should be a one-to-one correspondence between these flags and the | ||
197 | * XFS_XFLAG_s. | ||
198 | */ | ||
199 | #define XFS_DIFLAG_REALTIME_BIT 0 /* file's blocks come from rt area */ | ||
200 | #define XFS_DIFLAG_PREALLOC_BIT 1 /* file space has been preallocated */ | ||
201 | #define XFS_DIFLAG_NEWRTBM_BIT 2 /* for rtbitmap inode, new format */ | ||
202 | #define XFS_DIFLAG_IMMUTABLE_BIT 3 /* inode is immutable */ | ||
203 | #define XFS_DIFLAG_APPEND_BIT 4 /* inode is append-only */ | ||
204 | #define XFS_DIFLAG_SYNC_BIT 5 /* inode is written synchronously */ | ||
205 | #define XFS_DIFLAG_NOATIME_BIT 6 /* do not update atime */ | ||
206 | #define XFS_DIFLAG_NODUMP_BIT 7 /* do not dump */ | ||
207 | #define XFS_DIFLAG_RTINHERIT_BIT 8 /* create with realtime bit set */ | ||
208 | #define XFS_DIFLAG_PROJINHERIT_BIT 9 /* create with parents projid */ | ||
209 | #define XFS_DIFLAG_NOSYMLINKS_BIT 10 /* disallow symlink creation */ | ||
210 | #define XFS_DIFLAG_EXTSIZE_BIT 11 /* inode extent size allocator hint */ | ||
211 | #define XFS_DIFLAG_EXTSZINHERIT_BIT 12 /* inherit inode extent size */ | ||
212 | #define XFS_DIFLAG_NODEFRAG_BIT 13 /* do not reorganize/defragment */ | ||
213 | #define XFS_DIFLAG_FILESTREAM_BIT 14 /* use filestream allocator */ | ||
214 | #define XFS_DIFLAG_REALTIME (1 << XFS_DIFLAG_REALTIME_BIT) | ||
215 | #define XFS_DIFLAG_PREALLOC (1 << XFS_DIFLAG_PREALLOC_BIT) | ||
216 | #define XFS_DIFLAG_NEWRTBM (1 << XFS_DIFLAG_NEWRTBM_BIT) | ||
217 | #define XFS_DIFLAG_IMMUTABLE (1 << XFS_DIFLAG_IMMUTABLE_BIT) | ||
218 | #define XFS_DIFLAG_APPEND (1 << XFS_DIFLAG_APPEND_BIT) | ||
219 | #define XFS_DIFLAG_SYNC (1 << XFS_DIFLAG_SYNC_BIT) | ||
220 | #define XFS_DIFLAG_NOATIME (1 << XFS_DIFLAG_NOATIME_BIT) | ||
221 | #define XFS_DIFLAG_NODUMP (1 << XFS_DIFLAG_NODUMP_BIT) | ||
222 | #define XFS_DIFLAG_RTINHERIT (1 << XFS_DIFLAG_RTINHERIT_BIT) | ||
223 | #define XFS_DIFLAG_PROJINHERIT (1 << XFS_DIFLAG_PROJINHERIT_BIT) | ||
224 | #define XFS_DIFLAG_NOSYMLINKS (1 << XFS_DIFLAG_NOSYMLINKS_BIT) | ||
225 | #define XFS_DIFLAG_EXTSIZE (1 << XFS_DIFLAG_EXTSIZE_BIT) | ||
226 | #define XFS_DIFLAG_EXTSZINHERIT (1 << XFS_DIFLAG_EXTSZINHERIT_BIT) | ||
227 | #define XFS_DIFLAG_NODEFRAG (1 << XFS_DIFLAG_NODEFRAG_BIT) | ||
228 | #define XFS_DIFLAG_FILESTREAM (1 << XFS_DIFLAG_FILESTREAM_BIT) | ||
229 | |||
230 | #ifdef CONFIG_XFS_RT | ||
231 | #define XFS_IS_REALTIME_INODE(ip) ((ip)->i_d.di_flags & XFS_DIFLAG_REALTIME) | ||
232 | #else | ||
233 | #define XFS_IS_REALTIME_INODE(ip) (0) | ||
234 | #endif | ||
235 | |||
236 | #define XFS_DIFLAG_ANY \ | ||
237 | (XFS_DIFLAG_REALTIME | XFS_DIFLAG_PREALLOC | XFS_DIFLAG_NEWRTBM | \ | ||
238 | XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND | XFS_DIFLAG_SYNC | \ | ||
239 | XFS_DIFLAG_NOATIME | XFS_DIFLAG_NODUMP | XFS_DIFLAG_RTINHERIT | \ | ||
240 | XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS | XFS_DIFLAG_EXTSIZE | \ | ||
241 | XFS_DIFLAG_EXTSZINHERIT | XFS_DIFLAG_NODEFRAG | XFS_DIFLAG_FILESTREAM) | ||
242 | |||
243 | #endif /* __XFS_DINODE_H__ */ | ||
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c new file mode 100644 index 000000000000..6cef22152fd6 --- /dev/null +++ b/fs/xfs/libxfs/xfs_dir2.c | |||
@@ -0,0 +1,762 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #include "xfs.h" | ||
19 | #include "xfs_fs.h" | ||
20 | #include "xfs_format.h" | ||
21 | #include "xfs_log_format.h" | ||
22 | #include "xfs_trans_resv.h" | ||
23 | #include "xfs_inum.h" | ||
24 | #include "xfs_sb.h" | ||
25 | #include "xfs_ag.h" | ||
26 | #include "xfs_mount.h" | ||
27 | #include "xfs_da_format.h" | ||
28 | #include "xfs_da_btree.h" | ||
29 | #include "xfs_inode.h" | ||
30 | #include "xfs_trans.h" | ||
31 | #include "xfs_inode_item.h" | ||
32 | #include "xfs_bmap.h" | ||
33 | #include "xfs_dir2.h" | ||
34 | #include "xfs_dir2_priv.h" | ||
35 | #include "xfs_error.h" | ||
36 | #include "xfs_trace.h" | ||
37 | #include "xfs_dinode.h" | ||
38 | |||
39 | struct xfs_name xfs_name_dotdot = { (unsigned char *)"..", 2, XFS_DIR3_FT_DIR }; | ||
40 | |||
41 | |||
42 | /* | ||
43 | * ASCII case-insensitive (ie. A-Z) support for directories that was | ||
44 | * used in IRIX. | ||
45 | */ | ||
46 | STATIC xfs_dahash_t | ||
47 | xfs_ascii_ci_hashname( | ||
48 | struct xfs_name *name) | ||
49 | { | ||
50 | xfs_dahash_t hash; | ||
51 | int i; | ||
52 | |||
53 | for (i = 0, hash = 0; i < name->len; i++) | ||
54 | hash = tolower(name->name[i]) ^ rol32(hash, 7); | ||
55 | |||
56 | return hash; | ||
57 | } | ||
58 | |||
59 | STATIC enum xfs_dacmp | ||
60 | xfs_ascii_ci_compname( | ||
61 | struct xfs_da_args *args, | ||
62 | const unsigned char *name, | ||
63 | int len) | ||
64 | { | ||
65 | enum xfs_dacmp result; | ||
66 | int i; | ||
67 | |||
68 | if (args->namelen != len) | ||
69 | return XFS_CMP_DIFFERENT; | ||
70 | |||
71 | result = XFS_CMP_EXACT; | ||
72 | for (i = 0; i < len; i++) { | ||
73 | if (args->name[i] == name[i]) | ||
74 | continue; | ||
75 | if (tolower(args->name[i]) != tolower(name[i])) | ||
76 | return XFS_CMP_DIFFERENT; | ||
77 | result = XFS_CMP_CASE; | ||
78 | } | ||
79 | |||
80 | return result; | ||
81 | } | ||
82 | |||
83 | static struct xfs_nameops xfs_ascii_ci_nameops = { | ||
84 | .hashname = xfs_ascii_ci_hashname, | ||
85 | .compname = xfs_ascii_ci_compname, | ||
86 | }; | ||
87 | |||
88 | int | ||
89 | xfs_da_mount( | ||
90 | struct xfs_mount *mp) | ||
91 | { | ||
92 | struct xfs_da_geometry *dageo; | ||
93 | int nodehdr_size; | ||
94 | |||
95 | |||
96 | ASSERT(mp->m_sb.sb_versionnum & XFS_SB_VERSION_DIRV2BIT); | ||
97 | ASSERT((1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) <= | ||
98 | XFS_MAX_BLOCKSIZE); | ||
99 | |||
100 | mp->m_dir_inode_ops = xfs_dir_get_ops(mp, NULL); | ||
101 | mp->m_nondir_inode_ops = xfs_nondir_get_ops(mp, NULL); | ||
102 | |||
103 | nodehdr_size = mp->m_dir_inode_ops->node_hdr_size; | ||
104 | mp->m_dir_geo = kmem_zalloc(sizeof(struct xfs_da_geometry), | ||
105 | KM_SLEEP | KM_MAYFAIL); | ||
106 | mp->m_attr_geo = kmem_zalloc(sizeof(struct xfs_da_geometry), | ||
107 | KM_SLEEP | KM_MAYFAIL); | ||
108 | if (!mp->m_dir_geo || !mp->m_attr_geo) { | ||
109 | kmem_free(mp->m_dir_geo); | ||
110 | kmem_free(mp->m_attr_geo); | ||
111 | return -ENOMEM; | ||
112 | } | ||
113 | |||
114 | /* set up directory geometry */ | ||
115 | dageo = mp->m_dir_geo; | ||
116 | dageo->blklog = mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog; | ||
117 | dageo->fsblog = mp->m_sb.sb_blocklog; | ||
118 | dageo->blksize = 1 << dageo->blklog; | ||
119 | dageo->fsbcount = 1 << mp->m_sb.sb_dirblklog; | ||
120 | |||
121 | /* | ||
122 | * Now we've set up the block conversion variables, we can calculate the | ||
123 | * segment block constants using the geometry structure. | ||
124 | */ | ||
125 | dageo->datablk = xfs_dir2_byte_to_da(dageo, XFS_DIR2_DATA_OFFSET); | ||
126 | dageo->leafblk = xfs_dir2_byte_to_da(dageo, XFS_DIR2_LEAF_OFFSET); | ||
127 | dageo->freeblk = xfs_dir2_byte_to_da(dageo, XFS_DIR2_FREE_OFFSET); | ||
128 | dageo->node_ents = (dageo->blksize - nodehdr_size) / | ||
129 | (uint)sizeof(xfs_da_node_entry_t); | ||
130 | dageo->magicpct = (dageo->blksize * 37) / 100; | ||
131 | |||
132 | /* set up attribute geometry - single fsb only */ | ||
133 | dageo = mp->m_attr_geo; | ||
134 | dageo->blklog = mp->m_sb.sb_blocklog; | ||
135 | dageo->fsblog = mp->m_sb.sb_blocklog; | ||
136 | dageo->blksize = 1 << dageo->blklog; | ||
137 | dageo->fsbcount = 1; | ||
138 | dageo->node_ents = (dageo->blksize - nodehdr_size) / | ||
139 | (uint)sizeof(xfs_da_node_entry_t); | ||
140 | dageo->magicpct = (dageo->blksize * 37) / 100; | ||
141 | |||
142 | if (xfs_sb_version_hasasciici(&mp->m_sb)) | ||
143 | mp->m_dirnameops = &xfs_ascii_ci_nameops; | ||
144 | else | ||
145 | mp->m_dirnameops = &xfs_default_nameops; | ||
146 | |||
147 | return 0; | ||
148 | } | ||
149 | |||
150 | void | ||
151 | xfs_da_unmount( | ||
152 | struct xfs_mount *mp) | ||
153 | { | ||
154 | kmem_free(mp->m_dir_geo); | ||
155 | kmem_free(mp->m_attr_geo); | ||
156 | } | ||
157 | |||
158 | /* | ||
159 | * Return 1 if directory contains only "." and "..". | ||
160 | */ | ||
161 | int | ||
162 | xfs_dir_isempty( | ||
163 | xfs_inode_t *dp) | ||
164 | { | ||
165 | xfs_dir2_sf_hdr_t *sfp; | ||
166 | |||
167 | ASSERT(S_ISDIR(dp->i_d.di_mode)); | ||
168 | if (dp->i_d.di_size == 0) /* might happen during shutdown. */ | ||
169 | return 1; | ||
170 | if (dp->i_d.di_size > XFS_IFORK_DSIZE(dp)) | ||
171 | return 0; | ||
172 | sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; | ||
173 | return !sfp->count; | ||
174 | } | ||
175 | |||
176 | /* | ||
177 | * Validate a given inode number. | ||
178 | */ | ||
179 | int | ||
180 | xfs_dir_ino_validate( | ||
181 | xfs_mount_t *mp, | ||
182 | xfs_ino_t ino) | ||
183 | { | ||
184 | xfs_agblock_t agblkno; | ||
185 | xfs_agino_t agino; | ||
186 | xfs_agnumber_t agno; | ||
187 | int ino_ok; | ||
188 | int ioff; | ||
189 | |||
190 | agno = XFS_INO_TO_AGNO(mp, ino); | ||
191 | agblkno = XFS_INO_TO_AGBNO(mp, ino); | ||
192 | ioff = XFS_INO_TO_OFFSET(mp, ino); | ||
193 | agino = XFS_OFFBNO_TO_AGINO(mp, agblkno, ioff); | ||
194 | ino_ok = | ||
195 | agno < mp->m_sb.sb_agcount && | ||
196 | agblkno < mp->m_sb.sb_agblocks && | ||
197 | agblkno != 0 && | ||
198 | ioff < (1 << mp->m_sb.sb_inopblog) && | ||
199 | XFS_AGINO_TO_INO(mp, agno, agino) == ino; | ||
200 | if (unlikely(XFS_TEST_ERROR(!ino_ok, mp, XFS_ERRTAG_DIR_INO_VALIDATE, | ||
201 | XFS_RANDOM_DIR_INO_VALIDATE))) { | ||
202 | xfs_warn(mp, "Invalid inode number 0x%Lx", | ||
203 | (unsigned long long) ino); | ||
204 | XFS_ERROR_REPORT("xfs_dir_ino_validate", XFS_ERRLEVEL_LOW, mp); | ||
205 | return -EFSCORRUPTED; | ||
206 | } | ||
207 | return 0; | ||
208 | } | ||
209 | |||
210 | /* | ||
211 | * Initialize a directory with its "." and ".." entries. | ||
212 | */ | ||
213 | int | ||
214 | xfs_dir_init( | ||
215 | xfs_trans_t *tp, | ||
216 | xfs_inode_t *dp, | ||
217 | xfs_inode_t *pdp) | ||
218 | { | ||
219 | struct xfs_da_args *args; | ||
220 | int error; | ||
221 | |||
222 | ASSERT(S_ISDIR(dp->i_d.di_mode)); | ||
223 | error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino); | ||
224 | if (error) | ||
225 | return error; | ||
226 | |||
227 | args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); | ||
228 | if (!args) | ||
229 | return -ENOMEM; | ||
230 | |||
231 | args->geo = dp->i_mount->m_dir_geo; | ||
232 | args->dp = dp; | ||
233 | args->trans = tp; | ||
234 | error = xfs_dir2_sf_create(args, pdp->i_ino); | ||
235 | kmem_free(args); | ||
236 | return error; | ||
237 | } | ||
238 | |||
239 | /* | ||
240 | Enter a name in a directory. | ||
241 | */ | ||
242 | int | ||
243 | xfs_dir_createname( | ||
244 | xfs_trans_t *tp, | ||
245 | xfs_inode_t *dp, | ||
246 | struct xfs_name *name, | ||
247 | xfs_ino_t inum, /* new entry inode number */ | ||
248 | xfs_fsblock_t *first, /* bmap's firstblock */ | ||
249 | xfs_bmap_free_t *flist, /* bmap's freeblock list */ | ||
250 | xfs_extlen_t total) /* bmap's total block count */ | ||
251 | { | ||
252 | struct xfs_da_args *args; | ||
253 | int rval; | ||
254 | int v; /* type-checking value */ | ||
255 | |||
256 | ASSERT(S_ISDIR(dp->i_d.di_mode)); | ||
257 | rval = xfs_dir_ino_validate(tp->t_mountp, inum); | ||
258 | if (rval) | ||
259 | return rval; | ||
260 | XFS_STATS_INC(xs_dir_create); | ||
261 | |||
262 | args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); | ||
263 | if (!args) | ||
264 | return -ENOMEM; | ||
265 | |||
266 | args->geo = dp->i_mount->m_dir_geo; | ||
267 | args->name = name->name; | ||
268 | args->namelen = name->len; | ||
269 | args->filetype = name->type; | ||
270 | args->hashval = dp->i_mount->m_dirnameops->hashname(name); | ||
271 | args->inumber = inum; | ||
272 | args->dp = dp; | ||
273 | args->firstblock = first; | ||
274 | args->flist = flist; | ||
275 | args->total = total; | ||
276 | args->whichfork = XFS_DATA_FORK; | ||
277 | args->trans = tp; | ||
278 | args->op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT; | ||
279 | |||
280 | if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { | ||
281 | rval = xfs_dir2_sf_addname(args); | ||
282 | goto out_free; | ||
283 | } | ||
284 | |||
285 | rval = xfs_dir2_isblock(args, &v); | ||
286 | if (rval) | ||
287 | goto out_free; | ||
288 | if (v) { | ||
289 | rval = xfs_dir2_block_addname(args); | ||
290 | goto out_free; | ||
291 | } | ||
292 | |||
293 | rval = xfs_dir2_isleaf(args, &v); | ||
294 | if (rval) | ||
295 | goto out_free; | ||
296 | if (v) | ||
297 | rval = xfs_dir2_leaf_addname(args); | ||
298 | else | ||
299 | rval = xfs_dir2_node_addname(args); | ||
300 | |||
301 | out_free: | ||
302 | kmem_free(args); | ||
303 | return rval; | ||
304 | } | ||
305 | |||
306 | /* | ||
307 | * If doing a CI lookup and case-insensitive match, dup actual name into | ||
308 | * args.value. Return EEXIST for success (ie. name found) or an error. | ||
309 | */ | ||
310 | int | ||
311 | xfs_dir_cilookup_result( | ||
312 | struct xfs_da_args *args, | ||
313 | const unsigned char *name, | ||
314 | int len) | ||
315 | { | ||
316 | if (args->cmpresult == XFS_CMP_DIFFERENT) | ||
317 | return -ENOENT; | ||
318 | if (args->cmpresult != XFS_CMP_CASE || | ||
319 | !(args->op_flags & XFS_DA_OP_CILOOKUP)) | ||
320 | return -EEXIST; | ||
321 | |||
322 | args->value = kmem_alloc(len, KM_NOFS | KM_MAYFAIL); | ||
323 | if (!args->value) | ||
324 | return -ENOMEM; | ||
325 | |||
326 | memcpy(args->value, name, len); | ||
327 | args->valuelen = len; | ||
328 | return -EEXIST; | ||
329 | } | ||
330 | |||
331 | /* | ||
332 | * Lookup a name in a directory, give back the inode number. | ||
333 | * If ci_name is not NULL, returns the actual name in ci_name if it differs | ||
334 | * to name, or ci_name->name is set to NULL for an exact match. | ||
335 | */ | ||
336 | |||
337 | int | ||
338 | xfs_dir_lookup( | ||
339 | xfs_trans_t *tp, | ||
340 | xfs_inode_t *dp, | ||
341 | struct xfs_name *name, | ||
342 | xfs_ino_t *inum, /* out: inode number */ | ||
343 | struct xfs_name *ci_name) /* out: actual name if CI match */ | ||
344 | { | ||
345 | struct xfs_da_args *args; | ||
346 | int rval; | ||
347 | int v; /* type-checking value */ | ||
348 | |||
349 | ASSERT(S_ISDIR(dp->i_d.di_mode)); | ||
350 | XFS_STATS_INC(xs_dir_lookup); | ||
351 | |||
352 | /* | ||
353 | * We need to use KM_NOFS here so that lockdep will not throw false | ||
354 | * positive deadlock warnings on a non-transactional lookup path. It is | ||
355 | * safe to recurse into inode recalim in that case, but lockdep can't | ||
356 | * easily be taught about it. Hence KM_NOFS avoids having to add more | ||
357 | * lockdep Doing this avoids having to add a bunch of lockdep class | ||
358 | * annotations into the reclaim path for the ilock. | ||
359 | */ | ||
360 | args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); | ||
361 | args->geo = dp->i_mount->m_dir_geo; | ||
362 | args->name = name->name; | ||
363 | args->namelen = name->len; | ||
364 | args->filetype = name->type; | ||
365 | args->hashval = dp->i_mount->m_dirnameops->hashname(name); | ||
366 | args->dp = dp; | ||
367 | args->whichfork = XFS_DATA_FORK; | ||
368 | args->trans = tp; | ||
369 | args->op_flags = XFS_DA_OP_OKNOENT; | ||
370 | if (ci_name) | ||
371 | args->op_flags |= XFS_DA_OP_CILOOKUP; | ||
372 | |||
373 | if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { | ||
374 | rval = xfs_dir2_sf_lookup(args); | ||
375 | goto out_check_rval; | ||
376 | } | ||
377 | |||
378 | rval = xfs_dir2_isblock(args, &v); | ||
379 | if (rval) | ||
380 | goto out_free; | ||
381 | if (v) { | ||
382 | rval = xfs_dir2_block_lookup(args); | ||
383 | goto out_check_rval; | ||
384 | } | ||
385 | |||
386 | rval = xfs_dir2_isleaf(args, &v); | ||
387 | if (rval) | ||
388 | goto out_free; | ||
389 | if (v) | ||
390 | rval = xfs_dir2_leaf_lookup(args); | ||
391 | else | ||
392 | rval = xfs_dir2_node_lookup(args); | ||
393 | |||
394 | out_check_rval: | ||
395 | if (rval == -EEXIST) | ||
396 | rval = 0; | ||
397 | if (!rval) { | ||
398 | *inum = args->inumber; | ||
399 | if (ci_name) { | ||
400 | ci_name->name = args->value; | ||
401 | ci_name->len = args->valuelen; | ||
402 | } | ||
403 | } | ||
404 | out_free: | ||
405 | kmem_free(args); | ||
406 | return rval; | ||
407 | } | ||
408 | |||
409 | /* | ||
410 | * Remove an entry from a directory. | ||
411 | */ | ||
412 | int | ||
413 | xfs_dir_removename( | ||
414 | xfs_trans_t *tp, | ||
415 | xfs_inode_t *dp, | ||
416 | struct xfs_name *name, | ||
417 | xfs_ino_t ino, | ||
418 | xfs_fsblock_t *first, /* bmap's firstblock */ | ||
419 | xfs_bmap_free_t *flist, /* bmap's freeblock list */ | ||
420 | xfs_extlen_t total) /* bmap's total block count */ | ||
421 | { | ||
422 | struct xfs_da_args *args; | ||
423 | int rval; | ||
424 | int v; /* type-checking value */ | ||
425 | |||
426 | ASSERT(S_ISDIR(dp->i_d.di_mode)); | ||
427 | XFS_STATS_INC(xs_dir_remove); | ||
428 | |||
429 | args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); | ||
430 | if (!args) | ||
431 | return -ENOMEM; | ||
432 | |||
433 | args->geo = dp->i_mount->m_dir_geo; | ||
434 | args->name = name->name; | ||
435 | args->namelen = name->len; | ||
436 | args->filetype = name->type; | ||
437 | args->hashval = dp->i_mount->m_dirnameops->hashname(name); | ||
438 | args->inumber = ino; | ||
439 | args->dp = dp; | ||
440 | args->firstblock = first; | ||
441 | args->flist = flist; | ||
442 | args->total = total; | ||
443 | args->whichfork = XFS_DATA_FORK; | ||
444 | args->trans = tp; | ||
445 | |||
446 | if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { | ||
447 | rval = xfs_dir2_sf_removename(args); | ||
448 | goto out_free; | ||
449 | } | ||
450 | |||
451 | rval = xfs_dir2_isblock(args, &v); | ||
452 | if (rval) | ||
453 | goto out_free; | ||
454 | if (v) { | ||
455 | rval = xfs_dir2_block_removename(args); | ||
456 | goto out_free; | ||
457 | } | ||
458 | |||
459 | rval = xfs_dir2_isleaf(args, &v); | ||
460 | if (rval) | ||
461 | goto out_free; | ||
462 | if (v) | ||
463 | rval = xfs_dir2_leaf_removename(args); | ||
464 | else | ||
465 | rval = xfs_dir2_node_removename(args); | ||
466 | out_free: | ||
467 | kmem_free(args); | ||
468 | return rval; | ||
469 | } | ||
470 | |||
471 | /* | ||
472 | * Replace the inode number of a directory entry. | ||
473 | */ | ||
474 | int | ||
475 | xfs_dir_replace( | ||
476 | xfs_trans_t *tp, | ||
477 | xfs_inode_t *dp, | ||
478 | struct xfs_name *name, /* name of entry to replace */ | ||
479 | xfs_ino_t inum, /* new inode number */ | ||
480 | xfs_fsblock_t *first, /* bmap's firstblock */ | ||
481 | xfs_bmap_free_t *flist, /* bmap's freeblock list */ | ||
482 | xfs_extlen_t total) /* bmap's total block count */ | ||
483 | { | ||
484 | struct xfs_da_args *args; | ||
485 | int rval; | ||
486 | int v; /* type-checking value */ | ||
487 | |||
488 | ASSERT(S_ISDIR(dp->i_d.di_mode)); | ||
489 | |||
490 | rval = xfs_dir_ino_validate(tp->t_mountp, inum); | ||
491 | if (rval) | ||
492 | return rval; | ||
493 | |||
494 | args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); | ||
495 | if (!args) | ||
496 | return -ENOMEM; | ||
497 | |||
498 | args->geo = dp->i_mount->m_dir_geo; | ||
499 | args->name = name->name; | ||
500 | args->namelen = name->len; | ||
501 | args->filetype = name->type; | ||
502 | args->hashval = dp->i_mount->m_dirnameops->hashname(name); | ||
503 | args->inumber = inum; | ||
504 | args->dp = dp; | ||
505 | args->firstblock = first; | ||
506 | args->flist = flist; | ||
507 | args->total = total; | ||
508 | args->whichfork = XFS_DATA_FORK; | ||
509 | args->trans = tp; | ||
510 | |||
511 | if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { | ||
512 | rval = xfs_dir2_sf_replace(args); | ||
513 | goto out_free; | ||
514 | } | ||
515 | |||
516 | rval = xfs_dir2_isblock(args, &v); | ||
517 | if (rval) | ||
518 | goto out_free; | ||
519 | if (v) { | ||
520 | rval = xfs_dir2_block_replace(args); | ||
521 | goto out_free; | ||
522 | } | ||
523 | |||
524 | rval = xfs_dir2_isleaf(args, &v); | ||
525 | if (rval) | ||
526 | goto out_free; | ||
527 | if (v) | ||
528 | rval = xfs_dir2_leaf_replace(args); | ||
529 | else | ||
530 | rval = xfs_dir2_node_replace(args); | ||
531 | out_free: | ||
532 | kmem_free(args); | ||
533 | return rval; | ||
534 | } | ||
535 | |||
536 | /* | ||
537 | * See if this entry can be added to the directory without allocating space. | ||
538 | * First checks that the caller couldn't reserve enough space (resblks = 0). | ||
539 | */ | ||
540 | int | ||
541 | xfs_dir_canenter( | ||
542 | xfs_trans_t *tp, | ||
543 | xfs_inode_t *dp, | ||
544 | struct xfs_name *name, /* name of entry to add */ | ||
545 | uint resblks) | ||
546 | { | ||
547 | struct xfs_da_args *args; | ||
548 | int rval; | ||
549 | int v; /* type-checking value */ | ||
550 | |||
551 | if (resblks) | ||
552 | return 0; | ||
553 | |||
554 | ASSERT(S_ISDIR(dp->i_d.di_mode)); | ||
555 | |||
556 | args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); | ||
557 | if (!args) | ||
558 | return -ENOMEM; | ||
559 | |||
560 | args->geo = dp->i_mount->m_dir_geo; | ||
561 | args->name = name->name; | ||
562 | args->namelen = name->len; | ||
563 | args->filetype = name->type; | ||
564 | args->hashval = dp->i_mount->m_dirnameops->hashname(name); | ||
565 | args->dp = dp; | ||
566 | args->whichfork = XFS_DATA_FORK; | ||
567 | args->trans = tp; | ||
568 | args->op_flags = XFS_DA_OP_JUSTCHECK | XFS_DA_OP_ADDNAME | | ||
569 | XFS_DA_OP_OKNOENT; | ||
570 | |||
571 | if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { | ||
572 | rval = xfs_dir2_sf_addname(args); | ||
573 | goto out_free; | ||
574 | } | ||
575 | |||
576 | rval = xfs_dir2_isblock(args, &v); | ||
577 | if (rval) | ||
578 | goto out_free; | ||
579 | if (v) { | ||
580 | rval = xfs_dir2_block_addname(args); | ||
581 | goto out_free; | ||
582 | } | ||
583 | |||
584 | rval = xfs_dir2_isleaf(args, &v); | ||
585 | if (rval) | ||
586 | goto out_free; | ||
587 | if (v) | ||
588 | rval = xfs_dir2_leaf_addname(args); | ||
589 | else | ||
590 | rval = xfs_dir2_node_addname(args); | ||
591 | out_free: | ||
592 | kmem_free(args); | ||
593 | return rval; | ||
594 | } | ||
595 | |||
596 | /* | ||
597 | * Utility routines. | ||
598 | */ | ||
599 | |||
600 | /* | ||
601 | * Add a block to the directory. | ||
602 | * | ||
603 | * This routine is for data and free blocks, not leaf/node blocks which are | ||
604 | * handled by xfs_da_grow_inode. | ||
605 | */ | ||
606 | int | ||
607 | xfs_dir2_grow_inode( | ||
608 | struct xfs_da_args *args, | ||
609 | int space, /* v2 dir's space XFS_DIR2_xxx_SPACE */ | ||
610 | xfs_dir2_db_t *dbp) /* out: block number added */ | ||
611 | { | ||
612 | struct xfs_inode *dp = args->dp; | ||
613 | struct xfs_mount *mp = dp->i_mount; | ||
614 | xfs_fileoff_t bno; /* directory offset of new block */ | ||
615 | int count; /* count of filesystem blocks */ | ||
616 | int error; | ||
617 | |||
618 | trace_xfs_dir2_grow_inode(args, space); | ||
619 | |||
620 | /* | ||
621 | * Set lowest possible block in the space requested. | ||
622 | */ | ||
623 | bno = XFS_B_TO_FSBT(mp, space * XFS_DIR2_SPACE_SIZE); | ||
624 | count = args->geo->fsbcount; | ||
625 | |||
626 | error = xfs_da_grow_inode_int(args, &bno, count); | ||
627 | if (error) | ||
628 | return error; | ||
629 | |||
630 | *dbp = xfs_dir2_da_to_db(args->geo, (xfs_dablk_t)bno); | ||
631 | |||
632 | /* | ||
633 | * Update file's size if this is the data space and it grew. | ||
634 | */ | ||
635 | if (space == XFS_DIR2_DATA_SPACE) { | ||
636 | xfs_fsize_t size; /* directory file (data) size */ | ||
637 | |||
638 | size = XFS_FSB_TO_B(mp, bno + count); | ||
639 | if (size > dp->i_d.di_size) { | ||
640 | dp->i_d.di_size = size; | ||
641 | xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE); | ||
642 | } | ||
643 | } | ||
644 | return 0; | ||
645 | } | ||
646 | |||
647 | /* | ||
648 | * See if the directory is a single-block form directory. | ||
649 | */ | ||
650 | int | ||
651 | xfs_dir2_isblock( | ||
652 | struct xfs_da_args *args, | ||
653 | int *vp) /* out: 1 is block, 0 is not block */ | ||
654 | { | ||
655 | xfs_fileoff_t last; /* last file offset */ | ||
656 | int rval; | ||
657 | |||
658 | if ((rval = xfs_bmap_last_offset(args->dp, &last, XFS_DATA_FORK))) | ||
659 | return rval; | ||
660 | rval = XFS_FSB_TO_B(args->dp->i_mount, last) == args->geo->blksize; | ||
661 | ASSERT(rval == 0 || args->dp->i_d.di_size == args->geo->blksize); | ||
662 | *vp = rval; | ||
663 | return 0; | ||
664 | } | ||
665 | |||
666 | /* | ||
667 | * See if the directory is a single-leaf form directory. | ||
668 | */ | ||
669 | int | ||
670 | xfs_dir2_isleaf( | ||
671 | struct xfs_da_args *args, | ||
672 | int *vp) /* out: 1 is block, 0 is not block */ | ||
673 | { | ||
674 | xfs_fileoff_t last; /* last file offset */ | ||
675 | int rval; | ||
676 | |||
677 | if ((rval = xfs_bmap_last_offset(args->dp, &last, XFS_DATA_FORK))) | ||
678 | return rval; | ||
679 | *vp = last == args->geo->leafblk + args->geo->fsbcount; | ||
680 | return 0; | ||
681 | } | ||
682 | |||
683 | /* | ||
684 | * Remove the given block from the directory. | ||
685 | * This routine is used for data and free blocks, leaf/node are done | ||
686 | * by xfs_da_shrink_inode. | ||
687 | */ | ||
688 | int | ||
689 | xfs_dir2_shrink_inode( | ||
690 | xfs_da_args_t *args, | ||
691 | xfs_dir2_db_t db, | ||
692 | struct xfs_buf *bp) | ||
693 | { | ||
694 | xfs_fileoff_t bno; /* directory file offset */ | ||
695 | xfs_dablk_t da; /* directory file offset */ | ||
696 | int done; /* bunmap is finished */ | ||
697 | xfs_inode_t *dp; | ||
698 | int error; | ||
699 | xfs_mount_t *mp; | ||
700 | xfs_trans_t *tp; | ||
701 | |||
702 | trace_xfs_dir2_shrink_inode(args, db); | ||
703 | |||
704 | dp = args->dp; | ||
705 | mp = dp->i_mount; | ||
706 | tp = args->trans; | ||
707 | da = xfs_dir2_db_to_da(args->geo, db); | ||
708 | /* | ||
709 | * Unmap the fsblock(s). | ||
710 | */ | ||
711 | if ((error = xfs_bunmapi(tp, dp, da, args->geo->fsbcount, | ||
712 | XFS_BMAPI_METADATA, 0, args->firstblock, args->flist, | ||
713 | &done))) { | ||
714 | /* | ||
715 | * ENOSPC actually can happen if we're in a removename with | ||
716 | * no space reservation, and the resulting block removal | ||
717 | * would cause a bmap btree split or conversion from extents | ||
718 | * to btree. This can only happen for un-fragmented | ||
719 | * directory blocks, since you need to be punching out | ||
720 | * the middle of an extent. | ||
721 | * In this case we need to leave the block in the file, | ||
722 | * and not binval it. | ||
723 | * So the block has to be in a consistent empty state | ||
724 | * and appropriately logged. | ||
725 | * We don't free up the buffer, the caller can tell it | ||
726 | * hasn't happened since it got an error back. | ||
727 | */ | ||
728 | return error; | ||
729 | } | ||
730 | ASSERT(done); | ||
731 | /* | ||
732 | * Invalidate the buffer from the transaction. | ||
733 | */ | ||
734 | xfs_trans_binval(tp, bp); | ||
735 | /* | ||
736 | * If it's not a data block, we're done. | ||
737 | */ | ||
738 | if (db >= xfs_dir2_byte_to_db(args->geo, XFS_DIR2_LEAF_OFFSET)) | ||
739 | return 0; | ||
740 | /* | ||
741 | * If the block isn't the last one in the directory, we're done. | ||
742 | */ | ||
743 | if (dp->i_d.di_size > xfs_dir2_db_off_to_byte(args->geo, db + 1, 0)) | ||
744 | return 0; | ||
745 | bno = da; | ||
746 | if ((error = xfs_bmap_last_before(tp, dp, &bno, XFS_DATA_FORK))) { | ||
747 | /* | ||
748 | * This can't really happen unless there's kernel corruption. | ||
749 | */ | ||
750 | return error; | ||
751 | } | ||
752 | if (db == args->geo->datablk) | ||
753 | ASSERT(bno == 0); | ||
754 | else | ||
755 | ASSERT(bno > 0); | ||
756 | /* | ||
757 | * Set the size to the new last block. | ||
758 | */ | ||
759 | dp->i_d.di_size = XFS_FSB_TO_B(mp, bno); | ||
760 | xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); | ||
761 | return 0; | ||
762 | } | ||
diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h new file mode 100644 index 000000000000..c8e86b0b5e99 --- /dev/null +++ b/fs/xfs/libxfs/xfs_dir2.h | |||
@@ -0,0 +1,180 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_DIR2_H__ | ||
19 | #define __XFS_DIR2_H__ | ||
20 | |||
21 | struct xfs_bmap_free; | ||
22 | struct xfs_da_args; | ||
23 | struct xfs_inode; | ||
24 | struct xfs_mount; | ||
25 | struct xfs_trans; | ||
26 | struct xfs_dir2_sf_hdr; | ||
27 | struct xfs_dir2_sf_entry; | ||
28 | struct xfs_dir2_data_hdr; | ||
29 | struct xfs_dir2_data_entry; | ||
30 | struct xfs_dir2_data_unused; | ||
31 | |||
32 | extern struct xfs_name xfs_name_dotdot; | ||
33 | |||
34 | /* | ||
35 | * directory operations vector for encode/decode routines | ||
36 | */ | ||
37 | struct xfs_dir_ops { | ||
38 | int (*sf_entsize)(struct xfs_dir2_sf_hdr *hdr, int len); | ||
39 | struct xfs_dir2_sf_entry * | ||
40 | (*sf_nextentry)(struct xfs_dir2_sf_hdr *hdr, | ||
41 | struct xfs_dir2_sf_entry *sfep); | ||
42 | __uint8_t (*sf_get_ftype)(struct xfs_dir2_sf_entry *sfep); | ||
43 | void (*sf_put_ftype)(struct xfs_dir2_sf_entry *sfep, | ||
44 | __uint8_t ftype); | ||
45 | xfs_ino_t (*sf_get_ino)(struct xfs_dir2_sf_hdr *hdr, | ||
46 | struct xfs_dir2_sf_entry *sfep); | ||
47 | void (*sf_put_ino)(struct xfs_dir2_sf_hdr *hdr, | ||
48 | struct xfs_dir2_sf_entry *sfep, | ||
49 | xfs_ino_t ino); | ||
50 | xfs_ino_t (*sf_get_parent_ino)(struct xfs_dir2_sf_hdr *hdr); | ||
51 | void (*sf_put_parent_ino)(struct xfs_dir2_sf_hdr *hdr, | ||
52 | xfs_ino_t ino); | ||
53 | |||
54 | int (*data_entsize)(int len); | ||
55 | __uint8_t (*data_get_ftype)(struct xfs_dir2_data_entry *dep); | ||
56 | void (*data_put_ftype)(struct xfs_dir2_data_entry *dep, | ||
57 | __uint8_t ftype); | ||
58 | __be16 * (*data_entry_tag_p)(struct xfs_dir2_data_entry *dep); | ||
59 | struct xfs_dir2_data_free * | ||
60 | (*data_bestfree_p)(struct xfs_dir2_data_hdr *hdr); | ||
61 | |||
62 | xfs_dir2_data_aoff_t data_dot_offset; | ||
63 | xfs_dir2_data_aoff_t data_dotdot_offset; | ||
64 | xfs_dir2_data_aoff_t data_first_offset; | ||
65 | size_t data_entry_offset; | ||
66 | |||
67 | struct xfs_dir2_data_entry * | ||
68 | (*data_dot_entry_p)(struct xfs_dir2_data_hdr *hdr); | ||
69 | struct xfs_dir2_data_entry * | ||
70 | (*data_dotdot_entry_p)(struct xfs_dir2_data_hdr *hdr); | ||
71 | struct xfs_dir2_data_entry * | ||
72 | (*data_first_entry_p)(struct xfs_dir2_data_hdr *hdr); | ||
73 | struct xfs_dir2_data_entry * | ||
74 | (*data_entry_p)(struct xfs_dir2_data_hdr *hdr); | ||
75 | struct xfs_dir2_data_unused * | ||
76 | (*data_unused_p)(struct xfs_dir2_data_hdr *hdr); | ||
77 | |||
78 | int leaf_hdr_size; | ||
79 | void (*leaf_hdr_to_disk)(struct xfs_dir2_leaf *to, | ||
80 | struct xfs_dir3_icleaf_hdr *from); | ||
81 | void (*leaf_hdr_from_disk)(struct xfs_dir3_icleaf_hdr *to, | ||
82 | struct xfs_dir2_leaf *from); | ||
83 | int (*leaf_max_ents)(struct xfs_da_geometry *geo); | ||
84 | struct xfs_dir2_leaf_entry * | ||
85 | (*leaf_ents_p)(struct xfs_dir2_leaf *lp); | ||
86 | |||
87 | int node_hdr_size; | ||
88 | void (*node_hdr_to_disk)(struct xfs_da_intnode *to, | ||
89 | struct xfs_da3_icnode_hdr *from); | ||
90 | void (*node_hdr_from_disk)(struct xfs_da3_icnode_hdr *to, | ||
91 | struct xfs_da_intnode *from); | ||
92 | struct xfs_da_node_entry * | ||
93 | (*node_tree_p)(struct xfs_da_intnode *dap); | ||
94 | |||
95 | int free_hdr_size; | ||
96 | void (*free_hdr_to_disk)(struct xfs_dir2_free *to, | ||
97 | struct xfs_dir3_icfree_hdr *from); | ||
98 | void (*free_hdr_from_disk)(struct xfs_dir3_icfree_hdr *to, | ||
99 | struct xfs_dir2_free *from); | ||
100 | int (*free_max_bests)(struct xfs_da_geometry *geo); | ||
101 | __be16 * (*free_bests_p)(struct xfs_dir2_free *free); | ||
102 | xfs_dir2_db_t (*db_to_fdb)(struct xfs_da_geometry *geo, | ||
103 | xfs_dir2_db_t db); | ||
104 | int (*db_to_fdindex)(struct xfs_da_geometry *geo, | ||
105 | xfs_dir2_db_t db); | ||
106 | }; | ||
107 | |||
108 | extern const struct xfs_dir_ops * | ||
109 | xfs_dir_get_ops(struct xfs_mount *mp, struct xfs_inode *dp); | ||
110 | extern const struct xfs_dir_ops * | ||
111 | xfs_nondir_get_ops(struct xfs_mount *mp, struct xfs_inode *dp); | ||
112 | |||
113 | /* | ||
114 | * Generic directory interface routines | ||
115 | */ | ||
116 | extern void xfs_dir_startup(void); | ||
117 | extern int xfs_da_mount(struct xfs_mount *mp); | ||
118 | extern void xfs_da_unmount(struct xfs_mount *mp); | ||
119 | |||
120 | extern int xfs_dir_isempty(struct xfs_inode *dp); | ||
121 | extern int xfs_dir_init(struct xfs_trans *tp, struct xfs_inode *dp, | ||
122 | struct xfs_inode *pdp); | ||
123 | extern int xfs_dir_createname(struct xfs_trans *tp, struct xfs_inode *dp, | ||
124 | struct xfs_name *name, xfs_ino_t inum, | ||
125 | xfs_fsblock_t *first, | ||
126 | struct xfs_bmap_free *flist, xfs_extlen_t tot); | ||
127 | extern int xfs_dir_lookup(struct xfs_trans *tp, struct xfs_inode *dp, | ||
128 | struct xfs_name *name, xfs_ino_t *inum, | ||
129 | struct xfs_name *ci_name); | ||
130 | extern int xfs_dir_removename(struct xfs_trans *tp, struct xfs_inode *dp, | ||
131 | struct xfs_name *name, xfs_ino_t ino, | ||
132 | xfs_fsblock_t *first, | ||
133 | struct xfs_bmap_free *flist, xfs_extlen_t tot); | ||
134 | extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp, | ||
135 | struct xfs_name *name, xfs_ino_t inum, | ||
136 | xfs_fsblock_t *first, | ||
137 | struct xfs_bmap_free *flist, xfs_extlen_t tot); | ||
138 | extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp, | ||
139 | struct xfs_name *name, uint resblks); | ||
140 | |||
141 | /* | ||
142 | * Direct call from the bmap code, bypassing the generic directory layer. | ||
143 | */ | ||
144 | extern int xfs_dir2_sf_to_block(struct xfs_da_args *args); | ||
145 | |||
146 | /* | ||
147 | * Interface routines used by userspace utilities | ||
148 | */ | ||
149 | extern int xfs_dir2_isblock(struct xfs_da_args *args, int *r); | ||
150 | extern int xfs_dir2_isleaf(struct xfs_da_args *args, int *r); | ||
151 | extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db, | ||
152 | struct xfs_buf *bp); | ||
153 | |||
154 | extern void xfs_dir2_data_freescan(struct xfs_inode *dp, | ||
155 | struct xfs_dir2_data_hdr *hdr, int *loghead); | ||
156 | extern void xfs_dir2_data_log_entry(struct xfs_da_args *args, | ||
157 | struct xfs_buf *bp, struct xfs_dir2_data_entry *dep); | ||
158 | extern void xfs_dir2_data_log_header(struct xfs_da_args *args, | ||
159 | struct xfs_buf *bp); | ||
160 | extern void xfs_dir2_data_log_unused(struct xfs_da_args *args, | ||
161 | struct xfs_buf *bp, struct xfs_dir2_data_unused *dup); | ||
162 | extern void xfs_dir2_data_make_free(struct xfs_da_args *args, | ||
163 | struct xfs_buf *bp, xfs_dir2_data_aoff_t offset, | ||
164 | xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp); | ||
165 | extern void xfs_dir2_data_use_free(struct xfs_da_args *args, | ||
166 | struct xfs_buf *bp, struct xfs_dir2_data_unused *dup, | ||
167 | xfs_dir2_data_aoff_t offset, xfs_dir2_data_aoff_t len, | ||
168 | int *needlogp, int *needscanp); | ||
169 | |||
170 | extern struct xfs_dir2_data_free *xfs_dir2_data_freefind( | ||
171 | struct xfs_dir2_data_hdr *hdr, struct xfs_dir2_data_free *bf, | ||
172 | struct xfs_dir2_data_unused *dup); | ||
173 | |||
174 | extern const struct xfs_buf_ops xfs_dir3_block_buf_ops; | ||
175 | extern const struct xfs_buf_ops xfs_dir3_leafn_buf_ops; | ||
176 | extern const struct xfs_buf_ops xfs_dir3_leaf1_buf_ops; | ||
177 | extern const struct xfs_buf_ops xfs_dir3_free_buf_ops; | ||
178 | extern const struct xfs_buf_ops xfs_dir3_data_buf_ops; | ||
179 | |||
180 | #endif /* __XFS_DIR2_H__ */ | ||
diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c new file mode 100644 index 000000000000..9628ceccfa02 --- /dev/null +++ b/fs/xfs/libxfs/xfs_dir2_block.c | |||
@@ -0,0 +1,1265 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. | ||
3 | * Copyright (c) 2013 Red Hat, Inc. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License as | ||
8 | * published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope that it would be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program; if not, write the Free Software Foundation, | ||
17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
18 | */ | ||
19 | #include "xfs.h" | ||
20 | #include "xfs_fs.h" | ||
21 | #include "xfs_format.h" | ||
22 | #include "xfs_log_format.h" | ||
23 | #include "xfs_trans_resv.h" | ||
24 | #include "xfs_sb.h" | ||
25 | #include "xfs_ag.h" | ||
26 | #include "xfs_mount.h" | ||
27 | #include "xfs_da_format.h" | ||
28 | #include "xfs_da_btree.h" | ||
29 | #include "xfs_inode.h" | ||
30 | #include "xfs_trans.h" | ||
31 | #include "xfs_inode_item.h" | ||
32 | #include "xfs_bmap.h" | ||
33 | #include "xfs_buf_item.h" | ||
34 | #include "xfs_dir2.h" | ||
35 | #include "xfs_dir2_priv.h" | ||
36 | #include "xfs_error.h" | ||
37 | #include "xfs_trace.h" | ||
38 | #include "xfs_cksum.h" | ||
39 | #include "xfs_dinode.h" | ||
40 | |||
41 | /* | ||
42 | * Local function prototypes. | ||
43 | */ | ||
44 | static void xfs_dir2_block_log_leaf(xfs_trans_t *tp, struct xfs_buf *bp, | ||
45 | int first, int last); | ||
46 | static void xfs_dir2_block_log_tail(xfs_trans_t *tp, struct xfs_buf *bp); | ||
47 | static int xfs_dir2_block_lookup_int(xfs_da_args_t *args, struct xfs_buf **bpp, | ||
48 | int *entno); | ||
49 | static int xfs_dir2_block_sort(const void *a, const void *b); | ||
50 | |||
51 | static xfs_dahash_t xfs_dir_hash_dot, xfs_dir_hash_dotdot; | ||
52 | |||
53 | /* | ||
54 | * One-time startup routine called from xfs_init(). | ||
55 | */ | ||
56 | void | ||
57 | xfs_dir_startup(void) | ||
58 | { | ||
59 | xfs_dir_hash_dot = xfs_da_hashname((unsigned char *)".", 1); | ||
60 | xfs_dir_hash_dotdot = xfs_da_hashname((unsigned char *)"..", 2); | ||
61 | } | ||
62 | |||
63 | static bool | ||
64 | xfs_dir3_block_verify( | ||
65 | struct xfs_buf *bp) | ||
66 | { | ||
67 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
68 | struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; | ||
69 | |||
70 | if (xfs_sb_version_hascrc(&mp->m_sb)) { | ||
71 | if (hdr3->magic != cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) | ||
72 | return false; | ||
73 | if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_uuid)) | ||
74 | return false; | ||
75 | if (be64_to_cpu(hdr3->blkno) != bp->b_bn) | ||
76 | return false; | ||
77 | } else { | ||
78 | if (hdr3->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) | ||
79 | return false; | ||
80 | } | ||
81 | if (__xfs_dir3_data_check(NULL, bp)) | ||
82 | return false; | ||
83 | return true; | ||
84 | } | ||
85 | |||
86 | static void | ||
87 | xfs_dir3_block_read_verify( | ||
88 | struct xfs_buf *bp) | ||
89 | { | ||
90 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
91 | |||
92 | if (xfs_sb_version_hascrc(&mp->m_sb) && | ||
93 | !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) | ||
94 | xfs_buf_ioerror(bp, -EFSBADCRC); | ||
95 | else if (!xfs_dir3_block_verify(bp)) | ||
96 | xfs_buf_ioerror(bp, -EFSCORRUPTED); | ||
97 | |||
98 | if (bp->b_error) | ||
99 | xfs_verifier_error(bp); | ||
100 | } | ||
101 | |||
102 | static void | ||
103 | xfs_dir3_block_write_verify( | ||
104 | struct xfs_buf *bp) | ||
105 | { | ||
106 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
107 | struct xfs_buf_log_item *bip = bp->b_fspriv; | ||
108 | struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; | ||
109 | |||
110 | if (!xfs_dir3_block_verify(bp)) { | ||
111 | xfs_buf_ioerror(bp, -EFSCORRUPTED); | ||
112 | xfs_verifier_error(bp); | ||
113 | return; | ||
114 | } | ||
115 | |||
116 | if (!xfs_sb_version_hascrc(&mp->m_sb)) | ||
117 | return; | ||
118 | |||
119 | if (bip) | ||
120 | hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn); | ||
121 | |||
122 | xfs_buf_update_cksum(bp, XFS_DIR3_DATA_CRC_OFF); | ||
123 | } | ||
124 | |||
125 | const struct xfs_buf_ops xfs_dir3_block_buf_ops = { | ||
126 | .verify_read = xfs_dir3_block_read_verify, | ||
127 | .verify_write = xfs_dir3_block_write_verify, | ||
128 | }; | ||
129 | |||
130 | int | ||
131 | xfs_dir3_block_read( | ||
132 | struct xfs_trans *tp, | ||
133 | struct xfs_inode *dp, | ||
134 | struct xfs_buf **bpp) | ||
135 | { | ||
136 | struct xfs_mount *mp = dp->i_mount; | ||
137 | int err; | ||
138 | |||
139 | err = xfs_da_read_buf(tp, dp, mp->m_dir_geo->datablk, -1, bpp, | ||
140 | XFS_DATA_FORK, &xfs_dir3_block_buf_ops); | ||
141 | if (!err && tp) | ||
142 | xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_BLOCK_BUF); | ||
143 | return err; | ||
144 | } | ||
145 | |||
146 | static void | ||
147 | xfs_dir3_block_init( | ||
148 | struct xfs_mount *mp, | ||
149 | struct xfs_trans *tp, | ||
150 | struct xfs_buf *bp, | ||
151 | struct xfs_inode *dp) | ||
152 | { | ||
153 | struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; | ||
154 | |||
155 | bp->b_ops = &xfs_dir3_block_buf_ops; | ||
156 | xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_BLOCK_BUF); | ||
157 | |||
158 | if (xfs_sb_version_hascrc(&mp->m_sb)) { | ||
159 | memset(hdr3, 0, sizeof(*hdr3)); | ||
160 | hdr3->magic = cpu_to_be32(XFS_DIR3_BLOCK_MAGIC); | ||
161 | hdr3->blkno = cpu_to_be64(bp->b_bn); | ||
162 | hdr3->owner = cpu_to_be64(dp->i_ino); | ||
163 | uuid_copy(&hdr3->uuid, &mp->m_sb.sb_uuid); | ||
164 | return; | ||
165 | |||
166 | } | ||
167 | hdr3->magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC); | ||
168 | } | ||
169 | |||
170 | static void | ||
171 | xfs_dir2_block_need_space( | ||
172 | struct xfs_inode *dp, | ||
173 | struct xfs_dir2_data_hdr *hdr, | ||
174 | struct xfs_dir2_block_tail *btp, | ||
175 | struct xfs_dir2_leaf_entry *blp, | ||
176 | __be16 **tagpp, | ||
177 | struct xfs_dir2_data_unused **dupp, | ||
178 | struct xfs_dir2_data_unused **enddupp, | ||
179 | int *compact, | ||
180 | int len) | ||
181 | { | ||
182 | struct xfs_dir2_data_free *bf; | ||
183 | __be16 *tagp = NULL; | ||
184 | struct xfs_dir2_data_unused *dup = NULL; | ||
185 | struct xfs_dir2_data_unused *enddup = NULL; | ||
186 | |||
187 | *compact = 0; | ||
188 | bf = dp->d_ops->data_bestfree_p(hdr); | ||
189 | |||
190 | /* | ||
191 | * If there are stale entries we'll use one for the leaf. | ||
192 | */ | ||
193 | if (btp->stale) { | ||
194 | if (be16_to_cpu(bf[0].length) >= len) { | ||
195 | /* | ||
196 | * The biggest entry enough to avoid compaction. | ||
197 | */ | ||
198 | dup = (xfs_dir2_data_unused_t *) | ||
199 | ((char *)hdr + be16_to_cpu(bf[0].offset)); | ||
200 | goto out; | ||
201 | } | ||
202 | |||
203 | /* | ||
204 | * Will need to compact to make this work. | ||
205 | * Tag just before the first leaf entry. | ||
206 | */ | ||
207 | *compact = 1; | ||
208 | tagp = (__be16 *)blp - 1; | ||
209 | |||
210 | /* Data object just before the first leaf entry. */ | ||
211 | dup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp)); | ||
212 | |||
213 | /* | ||
214 | * If it's not free then the data will go where the | ||
215 | * leaf data starts now, if it works at all. | ||
216 | */ | ||
217 | if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { | ||
218 | if (be16_to_cpu(dup->length) + (be32_to_cpu(btp->stale) - 1) * | ||
219 | (uint)sizeof(*blp) < len) | ||
220 | dup = NULL; | ||
221 | } else if ((be32_to_cpu(btp->stale) - 1) * (uint)sizeof(*blp) < len) | ||
222 | dup = NULL; | ||
223 | else | ||
224 | dup = (xfs_dir2_data_unused_t *)blp; | ||
225 | goto out; | ||
226 | } | ||
227 | |||
228 | /* | ||
229 | * no stale entries, so just use free space. | ||
230 | * Tag just before the first leaf entry. | ||
231 | */ | ||
232 | tagp = (__be16 *)blp - 1; | ||
233 | |||
234 | /* Data object just before the first leaf entry. */ | ||
235 | enddup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp)); | ||
236 | |||
237 | /* | ||
238 | * If it's not free then can't do this add without cleaning up: | ||
239 | * the space before the first leaf entry needs to be free so it | ||
240 | * can be expanded to hold the pointer to the new entry. | ||
241 | */ | ||
242 | if (be16_to_cpu(enddup->freetag) == XFS_DIR2_DATA_FREE_TAG) { | ||
243 | /* | ||
244 | * Check out the biggest freespace and see if it's the same one. | ||
245 | */ | ||
246 | dup = (xfs_dir2_data_unused_t *) | ||
247 | ((char *)hdr + be16_to_cpu(bf[0].offset)); | ||
248 | if (dup != enddup) { | ||
249 | /* | ||
250 | * Not the same free entry, just check its length. | ||
251 | */ | ||
252 | if (be16_to_cpu(dup->length) < len) | ||
253 | dup = NULL; | ||
254 | goto out; | ||
255 | } | ||
256 | |||
257 | /* | ||
258 | * It is the biggest freespace, can it hold the leaf too? | ||
259 | */ | ||
260 | if (be16_to_cpu(dup->length) < len + (uint)sizeof(*blp)) { | ||
261 | /* | ||
262 | * Yes, use the second-largest entry instead if it works. | ||
263 | */ | ||
264 | if (be16_to_cpu(bf[1].length) >= len) | ||
265 | dup = (xfs_dir2_data_unused_t *) | ||
266 | ((char *)hdr + be16_to_cpu(bf[1].offset)); | ||
267 | else | ||
268 | dup = NULL; | ||
269 | } | ||
270 | } | ||
271 | out: | ||
272 | *tagpp = tagp; | ||
273 | *dupp = dup; | ||
274 | *enddupp = enddup; | ||
275 | } | ||
276 | |||
277 | /* | ||
278 | * compact the leaf entries. | ||
279 | * Leave the highest-numbered stale entry stale. | ||
280 | * XXX should be the one closest to mid but mid is not yet computed. | ||
281 | */ | ||
282 | static void | ||
283 | xfs_dir2_block_compact( | ||
284 | struct xfs_da_args *args, | ||
285 | struct xfs_buf *bp, | ||
286 | struct xfs_dir2_data_hdr *hdr, | ||
287 | struct xfs_dir2_block_tail *btp, | ||
288 | struct xfs_dir2_leaf_entry *blp, | ||
289 | int *needlog, | ||
290 | int *lfloghigh, | ||
291 | int *lfloglow) | ||
292 | { | ||
293 | int fromidx; /* source leaf index */ | ||
294 | int toidx; /* target leaf index */ | ||
295 | int needscan = 0; | ||
296 | int highstale; /* high stale index */ | ||
297 | |||
298 | fromidx = toidx = be32_to_cpu(btp->count) - 1; | ||
299 | highstale = *lfloghigh = -1; | ||
300 | for (; fromidx >= 0; fromidx--) { | ||
301 | if (blp[fromidx].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) { | ||
302 | if (highstale == -1) | ||
303 | highstale = toidx; | ||
304 | else { | ||
305 | if (*lfloghigh == -1) | ||
306 | *lfloghigh = toidx; | ||
307 | continue; | ||
308 | } | ||
309 | } | ||
310 | if (fromidx < toidx) | ||
311 | blp[toidx] = blp[fromidx]; | ||
312 | toidx--; | ||
313 | } | ||
314 | *lfloglow = toidx + 1 - (be32_to_cpu(btp->stale) - 1); | ||
315 | *lfloghigh -= be32_to_cpu(btp->stale) - 1; | ||
316 | be32_add_cpu(&btp->count, -(be32_to_cpu(btp->stale) - 1)); | ||
317 | xfs_dir2_data_make_free(args, bp, | ||
318 | (xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr), | ||
319 | (xfs_dir2_data_aoff_t)((be32_to_cpu(btp->stale) - 1) * sizeof(*blp)), | ||
320 | needlog, &needscan); | ||
321 | btp->stale = cpu_to_be32(1); | ||
322 | /* | ||
323 | * If we now need to rebuild the bestfree map, do so. | ||
324 | * This needs to happen before the next call to use_free. | ||
325 | */ | ||
326 | if (needscan) | ||
327 | xfs_dir2_data_freescan(args->dp, hdr, needlog); | ||
328 | } | ||
329 | |||
330 | /* | ||
331 | * Add an entry to a block directory. | ||
332 | */ | ||
333 | int /* error */ | ||
334 | xfs_dir2_block_addname( | ||
335 | xfs_da_args_t *args) /* directory op arguments */ | ||
336 | { | ||
337 | xfs_dir2_data_hdr_t *hdr; /* block header */ | ||
338 | xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ | ||
339 | struct xfs_buf *bp; /* buffer for block */ | ||
340 | xfs_dir2_block_tail_t *btp; /* block tail */ | ||
341 | int compact; /* need to compact leaf ents */ | ||
342 | xfs_dir2_data_entry_t *dep; /* block data entry */ | ||
343 | xfs_inode_t *dp; /* directory inode */ | ||
344 | xfs_dir2_data_unused_t *dup; /* block unused entry */ | ||
345 | int error; /* error return value */ | ||
346 | xfs_dir2_data_unused_t *enddup=NULL; /* unused at end of data */ | ||
347 | xfs_dahash_t hash; /* hash value of found entry */ | ||
348 | int high; /* high index for binary srch */ | ||
349 | int highstale; /* high stale index */ | ||
350 | int lfloghigh=0; /* last final leaf to log */ | ||
351 | int lfloglow=0; /* first final leaf to log */ | ||
352 | int len; /* length of the new entry */ | ||
353 | int low; /* low index for binary srch */ | ||
354 | int lowstale; /* low stale index */ | ||
355 | int mid=0; /* midpoint for binary srch */ | ||
356 | xfs_mount_t *mp; /* filesystem mount point */ | ||
357 | int needlog; /* need to log header */ | ||
358 | int needscan; /* need to rescan freespace */ | ||
359 | __be16 *tagp; /* pointer to tag value */ | ||
360 | xfs_trans_t *tp; /* transaction structure */ | ||
361 | |||
362 | trace_xfs_dir2_block_addname(args); | ||
363 | |||
364 | dp = args->dp; | ||
365 | tp = args->trans; | ||
366 | mp = dp->i_mount; | ||
367 | |||
368 | /* Read the (one and only) directory block into bp. */ | ||
369 | error = xfs_dir3_block_read(tp, dp, &bp); | ||
370 | if (error) | ||
371 | return error; | ||
372 | |||
373 | len = dp->d_ops->data_entsize(args->namelen); | ||
374 | |||
375 | /* | ||
376 | * Set up pointers to parts of the block. | ||
377 | */ | ||
378 | hdr = bp->b_addr; | ||
379 | btp = xfs_dir2_block_tail_p(args->geo, hdr); | ||
380 | blp = xfs_dir2_block_leaf_p(btp); | ||
381 | |||
382 | /* | ||
383 | * Find out if we can reuse stale entries or whether we need extra | ||
384 | * space for entry and new leaf. | ||
385 | */ | ||
386 | xfs_dir2_block_need_space(dp, hdr, btp, blp, &tagp, &dup, | ||
387 | &enddup, &compact, len); | ||
388 | |||
389 | /* | ||
390 | * Done everything we need for a space check now. | ||
391 | */ | ||
392 | if (args->op_flags & XFS_DA_OP_JUSTCHECK) { | ||
393 | xfs_trans_brelse(tp, bp); | ||
394 | if (!dup) | ||
395 | return -ENOSPC; | ||
396 | return 0; | ||
397 | } | ||
398 | |||
399 | /* | ||
400 | * If we don't have space for the new entry & leaf ... | ||
401 | */ | ||
402 | if (!dup) { | ||
403 | /* Don't have a space reservation: return no-space. */ | ||
404 | if (args->total == 0) | ||
405 | return -ENOSPC; | ||
406 | /* | ||
407 | * Convert to the next larger format. | ||
408 | * Then add the new entry in that format. | ||
409 | */ | ||
410 | error = xfs_dir2_block_to_leaf(args, bp); | ||
411 | if (error) | ||
412 | return error; | ||
413 | return xfs_dir2_leaf_addname(args); | ||
414 | } | ||
415 | |||
416 | needlog = needscan = 0; | ||
417 | |||
418 | /* | ||
419 | * If need to compact the leaf entries, do it now. | ||
420 | */ | ||
421 | if (compact) { | ||
422 | xfs_dir2_block_compact(args, bp, hdr, btp, blp, &needlog, | ||
423 | &lfloghigh, &lfloglow); | ||
424 | /* recalculate blp post-compaction */ | ||
425 | blp = xfs_dir2_block_leaf_p(btp); | ||
426 | } else if (btp->stale) { | ||
427 | /* | ||
428 | * Set leaf logging boundaries to impossible state. | ||
429 | * For the no-stale case they're set explicitly. | ||
430 | */ | ||
431 | lfloglow = be32_to_cpu(btp->count); | ||
432 | lfloghigh = -1; | ||
433 | } | ||
434 | |||
435 | /* | ||
436 | * Find the slot that's first lower than our hash value, -1 if none. | ||
437 | */ | ||
438 | for (low = 0, high = be32_to_cpu(btp->count) - 1; low <= high; ) { | ||
439 | mid = (low + high) >> 1; | ||
440 | if ((hash = be32_to_cpu(blp[mid].hashval)) == args->hashval) | ||
441 | break; | ||
442 | if (hash < args->hashval) | ||
443 | low = mid + 1; | ||
444 | else | ||
445 | high = mid - 1; | ||
446 | } | ||
447 | while (mid >= 0 && be32_to_cpu(blp[mid].hashval) >= args->hashval) { | ||
448 | mid--; | ||
449 | } | ||
450 | /* | ||
451 | * No stale entries, will use enddup space to hold new leaf. | ||
452 | */ | ||
453 | if (!btp->stale) { | ||
454 | /* | ||
455 | * Mark the space needed for the new leaf entry, now in use. | ||
456 | */ | ||
457 | xfs_dir2_data_use_free(args, bp, enddup, | ||
458 | (xfs_dir2_data_aoff_t) | ||
459 | ((char *)enddup - (char *)hdr + be16_to_cpu(enddup->length) - | ||
460 | sizeof(*blp)), | ||
461 | (xfs_dir2_data_aoff_t)sizeof(*blp), | ||
462 | &needlog, &needscan); | ||
463 | /* | ||
464 | * Update the tail (entry count). | ||
465 | */ | ||
466 | be32_add_cpu(&btp->count, 1); | ||
467 | /* | ||
468 | * If we now need to rebuild the bestfree map, do so. | ||
469 | * This needs to happen before the next call to use_free. | ||
470 | */ | ||
471 | if (needscan) { | ||
472 | xfs_dir2_data_freescan(dp, hdr, &needlog); | ||
473 | needscan = 0; | ||
474 | } | ||
475 | /* | ||
476 | * Adjust pointer to the first leaf entry, we're about to move | ||
477 | * the table up one to open up space for the new leaf entry. | ||
478 | * Then adjust our index to match. | ||
479 | */ | ||
480 | blp--; | ||
481 | mid++; | ||
482 | if (mid) | ||
483 | memmove(blp, &blp[1], mid * sizeof(*blp)); | ||
484 | lfloglow = 0; | ||
485 | lfloghigh = mid; | ||
486 | } | ||
487 | /* | ||
488 | * Use a stale leaf for our new entry. | ||
489 | */ | ||
490 | else { | ||
491 | for (lowstale = mid; | ||
492 | lowstale >= 0 && | ||
493 | blp[lowstale].address != | ||
494 | cpu_to_be32(XFS_DIR2_NULL_DATAPTR); | ||
495 | lowstale--) | ||
496 | continue; | ||
497 | for (highstale = mid + 1; | ||
498 | highstale < be32_to_cpu(btp->count) && | ||
499 | blp[highstale].address != | ||
500 | cpu_to_be32(XFS_DIR2_NULL_DATAPTR) && | ||
501 | (lowstale < 0 || mid - lowstale > highstale - mid); | ||
502 | highstale++) | ||
503 | continue; | ||
504 | /* | ||
505 | * Move entries toward the low-numbered stale entry. | ||
506 | */ | ||
507 | if (lowstale >= 0 && | ||
508 | (highstale == be32_to_cpu(btp->count) || | ||
509 | mid - lowstale <= highstale - mid)) { | ||
510 | if (mid - lowstale) | ||
511 | memmove(&blp[lowstale], &blp[lowstale + 1], | ||
512 | (mid - lowstale) * sizeof(*blp)); | ||
513 | lfloglow = MIN(lowstale, lfloglow); | ||
514 | lfloghigh = MAX(mid, lfloghigh); | ||
515 | } | ||
516 | /* | ||
517 | * Move entries toward the high-numbered stale entry. | ||
518 | */ | ||
519 | else { | ||
520 | ASSERT(highstale < be32_to_cpu(btp->count)); | ||
521 | mid++; | ||
522 | if (highstale - mid) | ||
523 | memmove(&blp[mid + 1], &blp[mid], | ||
524 | (highstale - mid) * sizeof(*blp)); | ||
525 | lfloglow = MIN(mid, lfloglow); | ||
526 | lfloghigh = MAX(highstale, lfloghigh); | ||
527 | } | ||
528 | be32_add_cpu(&btp->stale, -1); | ||
529 | } | ||
530 | /* | ||
531 | * Point to the new data entry. | ||
532 | */ | ||
533 | dep = (xfs_dir2_data_entry_t *)dup; | ||
534 | /* | ||
535 | * Fill in the leaf entry. | ||
536 | */ | ||
537 | blp[mid].hashval = cpu_to_be32(args->hashval); | ||
538 | blp[mid].address = cpu_to_be32(xfs_dir2_byte_to_dataptr( | ||
539 | (char *)dep - (char *)hdr)); | ||
540 | xfs_dir2_block_log_leaf(tp, bp, lfloglow, lfloghigh); | ||
541 | /* | ||
542 | * Mark space for the data entry used. | ||
543 | */ | ||
544 | xfs_dir2_data_use_free(args, bp, dup, | ||
545 | (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), | ||
546 | (xfs_dir2_data_aoff_t)len, &needlog, &needscan); | ||
547 | /* | ||
548 | * Create the new data entry. | ||
549 | */ | ||
550 | dep->inumber = cpu_to_be64(args->inumber); | ||
551 | dep->namelen = args->namelen; | ||
552 | memcpy(dep->name, args->name, args->namelen); | ||
553 | dp->d_ops->data_put_ftype(dep, args->filetype); | ||
554 | tagp = dp->d_ops->data_entry_tag_p(dep); | ||
555 | *tagp = cpu_to_be16((char *)dep - (char *)hdr); | ||
556 | /* | ||
557 | * Clean up the bestfree array and log the header, tail, and entry. | ||
558 | */ | ||
559 | if (needscan) | ||
560 | xfs_dir2_data_freescan(dp, hdr, &needlog); | ||
561 | if (needlog) | ||
562 | xfs_dir2_data_log_header(args, bp); | ||
563 | xfs_dir2_block_log_tail(tp, bp); | ||
564 | xfs_dir2_data_log_entry(args, bp, dep); | ||
565 | xfs_dir3_data_check(dp, bp); | ||
566 | return 0; | ||
567 | } | ||
568 | |||
569 | /* | ||
570 | * Log leaf entries from the block. | ||
571 | */ | ||
572 | static void | ||
573 | xfs_dir2_block_log_leaf( | ||
574 | xfs_trans_t *tp, /* transaction structure */ | ||
575 | struct xfs_buf *bp, /* block buffer */ | ||
576 | int first, /* index of first logged leaf */ | ||
577 | int last) /* index of last logged leaf */ | ||
578 | { | ||
579 | xfs_dir2_data_hdr_t *hdr = bp->b_addr; | ||
580 | xfs_dir2_leaf_entry_t *blp; | ||
581 | xfs_dir2_block_tail_t *btp; | ||
582 | |||
583 | btp = xfs_dir2_block_tail_p(tp->t_mountp->m_dir_geo, hdr); | ||
584 | blp = xfs_dir2_block_leaf_p(btp); | ||
585 | xfs_trans_log_buf(tp, bp, (uint)((char *)&blp[first] - (char *)hdr), | ||
586 | (uint)((char *)&blp[last + 1] - (char *)hdr - 1)); | ||
587 | } | ||
588 | |||
589 | /* | ||
590 | * Log the block tail. | ||
591 | */ | ||
592 | static void | ||
593 | xfs_dir2_block_log_tail( | ||
594 | xfs_trans_t *tp, /* transaction structure */ | ||
595 | struct xfs_buf *bp) /* block buffer */ | ||
596 | { | ||
597 | xfs_dir2_data_hdr_t *hdr = bp->b_addr; | ||
598 | xfs_dir2_block_tail_t *btp; | ||
599 | |||
600 | btp = xfs_dir2_block_tail_p(tp->t_mountp->m_dir_geo, hdr); | ||
601 | xfs_trans_log_buf(tp, bp, (uint)((char *)btp - (char *)hdr), | ||
602 | (uint)((char *)(btp + 1) - (char *)hdr - 1)); | ||
603 | } | ||
604 | |||
605 | /* | ||
606 | * Look up an entry in the block. This is the external routine, | ||
607 | * xfs_dir2_block_lookup_int does the real work. | ||
608 | */ | ||
609 | int /* error */ | ||
610 | xfs_dir2_block_lookup( | ||
611 | xfs_da_args_t *args) /* dir lookup arguments */ | ||
612 | { | ||
613 | xfs_dir2_data_hdr_t *hdr; /* block header */ | ||
614 | xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ | ||
615 | struct xfs_buf *bp; /* block buffer */ | ||
616 | xfs_dir2_block_tail_t *btp; /* block tail */ | ||
617 | xfs_dir2_data_entry_t *dep; /* block data entry */ | ||
618 | xfs_inode_t *dp; /* incore inode */ | ||
619 | int ent; /* entry index */ | ||
620 | int error; /* error return value */ | ||
621 | xfs_mount_t *mp; /* filesystem mount point */ | ||
622 | |||
623 | trace_xfs_dir2_block_lookup(args); | ||
624 | |||
625 | /* | ||
626 | * Get the buffer, look up the entry. | ||
627 | * If not found (ENOENT) then return, have no buffer. | ||
628 | */ | ||
629 | if ((error = xfs_dir2_block_lookup_int(args, &bp, &ent))) | ||
630 | return error; | ||
631 | dp = args->dp; | ||
632 | mp = dp->i_mount; | ||
633 | hdr = bp->b_addr; | ||
634 | xfs_dir3_data_check(dp, bp); | ||
635 | btp = xfs_dir2_block_tail_p(args->geo, hdr); | ||
636 | blp = xfs_dir2_block_leaf_p(btp); | ||
637 | /* | ||
638 | * Get the offset from the leaf entry, to point to the data. | ||
639 | */ | ||
640 | dep = (xfs_dir2_data_entry_t *)((char *)hdr + | ||
641 | xfs_dir2_dataptr_to_off(args->geo, | ||
642 | be32_to_cpu(blp[ent].address))); | ||
643 | /* | ||
644 | * Fill in inode number, CI name if appropriate, release the block. | ||
645 | */ | ||
646 | args->inumber = be64_to_cpu(dep->inumber); | ||
647 | args->filetype = dp->d_ops->data_get_ftype(dep); | ||
648 | error = xfs_dir_cilookup_result(args, dep->name, dep->namelen); | ||
649 | xfs_trans_brelse(args->trans, bp); | ||
650 | return error; | ||
651 | } | ||
652 | |||
653 | /* | ||
654 | * Internal block lookup routine. | ||
655 | */ | ||
656 | static int /* error */ | ||
657 | xfs_dir2_block_lookup_int( | ||
658 | xfs_da_args_t *args, /* dir lookup arguments */ | ||
659 | struct xfs_buf **bpp, /* returned block buffer */ | ||
660 | int *entno) /* returned entry number */ | ||
661 | { | ||
662 | xfs_dir2_dataptr_t addr; /* data entry address */ | ||
663 | xfs_dir2_data_hdr_t *hdr; /* block header */ | ||
664 | xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ | ||
665 | struct xfs_buf *bp; /* block buffer */ | ||
666 | xfs_dir2_block_tail_t *btp; /* block tail */ | ||
667 | xfs_dir2_data_entry_t *dep; /* block data entry */ | ||
668 | xfs_inode_t *dp; /* incore inode */ | ||
669 | int error; /* error return value */ | ||
670 | xfs_dahash_t hash; /* found hash value */ | ||
671 | int high; /* binary search high index */ | ||
672 | int low; /* binary search low index */ | ||
673 | int mid; /* binary search current idx */ | ||
674 | xfs_mount_t *mp; /* filesystem mount point */ | ||
675 | xfs_trans_t *tp; /* transaction pointer */ | ||
676 | enum xfs_dacmp cmp; /* comparison result */ | ||
677 | |||
678 | dp = args->dp; | ||
679 | tp = args->trans; | ||
680 | mp = dp->i_mount; | ||
681 | |||
682 | error = xfs_dir3_block_read(tp, dp, &bp); | ||
683 | if (error) | ||
684 | return error; | ||
685 | |||
686 | hdr = bp->b_addr; | ||
687 | xfs_dir3_data_check(dp, bp); | ||
688 | btp = xfs_dir2_block_tail_p(args->geo, hdr); | ||
689 | blp = xfs_dir2_block_leaf_p(btp); | ||
690 | /* | ||
691 | * Loop doing a binary search for our hash value. | ||
692 | * Find our entry, ENOENT if it's not there. | ||
693 | */ | ||
694 | for (low = 0, high = be32_to_cpu(btp->count) - 1; ; ) { | ||
695 | ASSERT(low <= high); | ||
696 | mid = (low + high) >> 1; | ||
697 | if ((hash = be32_to_cpu(blp[mid].hashval)) == args->hashval) | ||
698 | break; | ||
699 | if (hash < args->hashval) | ||
700 | low = mid + 1; | ||
701 | else | ||
702 | high = mid - 1; | ||
703 | if (low > high) { | ||
704 | ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); | ||
705 | xfs_trans_brelse(tp, bp); | ||
706 | return -ENOENT; | ||
707 | } | ||
708 | } | ||
709 | /* | ||
710 | * Back up to the first one with the right hash value. | ||
711 | */ | ||
712 | while (mid > 0 && be32_to_cpu(blp[mid - 1].hashval) == args->hashval) { | ||
713 | mid--; | ||
714 | } | ||
715 | /* | ||
716 | * Now loop forward through all the entries with the | ||
717 | * right hash value looking for our name. | ||
718 | */ | ||
719 | do { | ||
720 | if ((addr = be32_to_cpu(blp[mid].address)) == XFS_DIR2_NULL_DATAPTR) | ||
721 | continue; | ||
722 | /* | ||
723 | * Get pointer to the entry from the leaf. | ||
724 | */ | ||
725 | dep = (xfs_dir2_data_entry_t *) | ||
726 | ((char *)hdr + xfs_dir2_dataptr_to_off(args->geo, addr)); | ||
727 | /* | ||
728 | * Compare name and if it's an exact match, return the index | ||
729 | * and buffer. If it's the first case-insensitive match, store | ||
730 | * the index and buffer and continue looking for an exact match. | ||
731 | */ | ||
732 | cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen); | ||
733 | if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) { | ||
734 | args->cmpresult = cmp; | ||
735 | *bpp = bp; | ||
736 | *entno = mid; | ||
737 | if (cmp == XFS_CMP_EXACT) | ||
738 | return 0; | ||
739 | } | ||
740 | } while (++mid < be32_to_cpu(btp->count) && | ||
741 | be32_to_cpu(blp[mid].hashval) == hash); | ||
742 | |||
743 | ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); | ||
744 | /* | ||
745 | * Here, we can only be doing a lookup (not a rename or replace). | ||
746 | * If a case-insensitive match was found earlier, return success. | ||
747 | */ | ||
748 | if (args->cmpresult == XFS_CMP_CASE) | ||
749 | return 0; | ||
750 | /* | ||
751 | * No match, release the buffer and return ENOENT. | ||
752 | */ | ||
753 | xfs_trans_brelse(tp, bp); | ||
754 | return -ENOENT; | ||
755 | } | ||
756 | |||
757 | /* | ||
758 | * Remove an entry from a block format directory. | ||
759 | * If that makes the block small enough to fit in shortform, transform it. | ||
760 | */ | ||
761 | int /* error */ | ||
762 | xfs_dir2_block_removename( | ||
763 | xfs_da_args_t *args) /* directory operation args */ | ||
764 | { | ||
765 | xfs_dir2_data_hdr_t *hdr; /* block header */ | ||
766 | xfs_dir2_leaf_entry_t *blp; /* block leaf pointer */ | ||
767 | struct xfs_buf *bp; /* block buffer */ | ||
768 | xfs_dir2_block_tail_t *btp; /* block tail */ | ||
769 | xfs_dir2_data_entry_t *dep; /* block data entry */ | ||
770 | xfs_inode_t *dp; /* incore inode */ | ||
771 | int ent; /* block leaf entry index */ | ||
772 | int error; /* error return value */ | ||
773 | xfs_mount_t *mp; /* filesystem mount point */ | ||
774 | int needlog; /* need to log block header */ | ||
775 | int needscan; /* need to fixup bestfree */ | ||
776 | xfs_dir2_sf_hdr_t sfh; /* shortform header */ | ||
777 | int size; /* shortform size */ | ||
778 | xfs_trans_t *tp; /* transaction pointer */ | ||
779 | |||
780 | trace_xfs_dir2_block_removename(args); | ||
781 | |||
782 | /* | ||
783 | * Look up the entry in the block. Gets the buffer and entry index. | ||
784 | * It will always be there, the vnodeops level does a lookup first. | ||
785 | */ | ||
786 | if ((error = xfs_dir2_block_lookup_int(args, &bp, &ent))) { | ||
787 | return error; | ||
788 | } | ||
789 | dp = args->dp; | ||
790 | tp = args->trans; | ||
791 | mp = dp->i_mount; | ||
792 | hdr = bp->b_addr; | ||
793 | btp = xfs_dir2_block_tail_p(args->geo, hdr); | ||
794 | blp = xfs_dir2_block_leaf_p(btp); | ||
795 | /* | ||
796 | * Point to the data entry using the leaf entry. | ||
797 | */ | ||
798 | dep = (xfs_dir2_data_entry_t *)((char *)hdr + | ||
799 | xfs_dir2_dataptr_to_off(args->geo, | ||
800 | be32_to_cpu(blp[ent].address))); | ||
801 | /* | ||
802 | * Mark the data entry's space free. | ||
803 | */ | ||
804 | needlog = needscan = 0; | ||
805 | xfs_dir2_data_make_free(args, bp, | ||
806 | (xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr), | ||
807 | dp->d_ops->data_entsize(dep->namelen), &needlog, &needscan); | ||
808 | /* | ||
809 | * Fix up the block tail. | ||
810 | */ | ||
811 | be32_add_cpu(&btp->stale, 1); | ||
812 | xfs_dir2_block_log_tail(tp, bp); | ||
813 | /* | ||
814 | * Remove the leaf entry by marking it stale. | ||
815 | */ | ||
816 | blp[ent].address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR); | ||
817 | xfs_dir2_block_log_leaf(tp, bp, ent, ent); | ||
818 | /* | ||
819 | * Fix up bestfree, log the header if necessary. | ||
820 | */ | ||
821 | if (needscan) | ||
822 | xfs_dir2_data_freescan(dp, hdr, &needlog); | ||
823 | if (needlog) | ||
824 | xfs_dir2_data_log_header(args, bp); | ||
825 | xfs_dir3_data_check(dp, bp); | ||
826 | /* | ||
827 | * See if the size as a shortform is good enough. | ||
828 | */ | ||
829 | size = xfs_dir2_block_sfsize(dp, hdr, &sfh); | ||
830 | if (size > XFS_IFORK_DSIZE(dp)) | ||
831 | return 0; | ||
832 | |||
833 | /* | ||
834 | * If it works, do the conversion. | ||
835 | */ | ||
836 | return xfs_dir2_block_to_sf(args, bp, size, &sfh); | ||
837 | } | ||
838 | |||
839 | /* | ||
840 | * Replace an entry in a V2 block directory. | ||
841 | * Change the inode number to the new value. | ||
842 | */ | ||
843 | int /* error */ | ||
844 | xfs_dir2_block_replace( | ||
845 | xfs_da_args_t *args) /* directory operation args */ | ||
846 | { | ||
847 | xfs_dir2_data_hdr_t *hdr; /* block header */ | ||
848 | xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ | ||
849 | struct xfs_buf *bp; /* block buffer */ | ||
850 | xfs_dir2_block_tail_t *btp; /* block tail */ | ||
851 | xfs_dir2_data_entry_t *dep; /* block data entry */ | ||
852 | xfs_inode_t *dp; /* incore inode */ | ||
853 | int ent; /* leaf entry index */ | ||
854 | int error; /* error return value */ | ||
855 | xfs_mount_t *mp; /* filesystem mount point */ | ||
856 | |||
857 | trace_xfs_dir2_block_replace(args); | ||
858 | |||
859 | /* | ||
860 | * Lookup the entry in the directory. Get buffer and entry index. | ||
861 | * This will always succeed since the caller has already done a lookup. | ||
862 | */ | ||
863 | if ((error = xfs_dir2_block_lookup_int(args, &bp, &ent))) { | ||
864 | return error; | ||
865 | } | ||
866 | dp = args->dp; | ||
867 | mp = dp->i_mount; | ||
868 | hdr = bp->b_addr; | ||
869 | btp = xfs_dir2_block_tail_p(args->geo, hdr); | ||
870 | blp = xfs_dir2_block_leaf_p(btp); | ||
871 | /* | ||
872 | * Point to the data entry we need to change. | ||
873 | */ | ||
874 | dep = (xfs_dir2_data_entry_t *)((char *)hdr + | ||
875 | xfs_dir2_dataptr_to_off(args->geo, | ||
876 | be32_to_cpu(blp[ent].address))); | ||
877 | ASSERT(be64_to_cpu(dep->inumber) != args->inumber); | ||
878 | /* | ||
879 | * Change the inode number to the new value. | ||
880 | */ | ||
881 | dep->inumber = cpu_to_be64(args->inumber); | ||
882 | dp->d_ops->data_put_ftype(dep, args->filetype); | ||
883 | xfs_dir2_data_log_entry(args, bp, dep); | ||
884 | xfs_dir3_data_check(dp, bp); | ||
885 | return 0; | ||
886 | } | ||
887 | |||
888 | /* | ||
889 | * Qsort comparison routine for the block leaf entries. | ||
890 | */ | ||
891 | static int /* sort order */ | ||
892 | xfs_dir2_block_sort( | ||
893 | const void *a, /* first leaf entry */ | ||
894 | const void *b) /* second leaf entry */ | ||
895 | { | ||
896 | const xfs_dir2_leaf_entry_t *la; /* first leaf entry */ | ||
897 | const xfs_dir2_leaf_entry_t *lb; /* second leaf entry */ | ||
898 | |||
899 | la = a; | ||
900 | lb = b; | ||
901 | return be32_to_cpu(la->hashval) < be32_to_cpu(lb->hashval) ? -1 : | ||
902 | (be32_to_cpu(la->hashval) > be32_to_cpu(lb->hashval) ? 1 : 0); | ||
903 | } | ||
904 | |||
905 | /* | ||
906 | * Convert a V2 leaf directory to a V2 block directory if possible. | ||
907 | */ | ||
908 | int /* error */ | ||
909 | xfs_dir2_leaf_to_block( | ||
910 | xfs_da_args_t *args, /* operation arguments */ | ||
911 | struct xfs_buf *lbp, /* leaf buffer */ | ||
912 | struct xfs_buf *dbp) /* data buffer */ | ||
913 | { | ||
914 | __be16 *bestsp; /* leaf bests table */ | ||
915 | xfs_dir2_data_hdr_t *hdr; /* block header */ | ||
916 | xfs_dir2_block_tail_t *btp; /* block tail */ | ||
917 | xfs_inode_t *dp; /* incore directory inode */ | ||
918 | xfs_dir2_data_unused_t *dup; /* unused data entry */ | ||
919 | int error; /* error return value */ | ||
920 | int from; /* leaf from index */ | ||
921 | xfs_dir2_leaf_t *leaf; /* leaf structure */ | ||
922 | xfs_dir2_leaf_entry_t *lep; /* leaf entry */ | ||
923 | xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ | ||
924 | xfs_mount_t *mp; /* file system mount point */ | ||
925 | int needlog; /* need to log data header */ | ||
926 | int needscan; /* need to scan for bestfree */ | ||
927 | xfs_dir2_sf_hdr_t sfh; /* shortform header */ | ||
928 | int size; /* bytes used */ | ||
929 | __be16 *tagp; /* end of entry (tag) */ | ||
930 | int to; /* block/leaf to index */ | ||
931 | xfs_trans_t *tp; /* transaction pointer */ | ||
932 | struct xfs_dir2_leaf_entry *ents; | ||
933 | struct xfs_dir3_icleaf_hdr leafhdr; | ||
934 | |||
935 | trace_xfs_dir2_leaf_to_block(args); | ||
936 | |||
937 | dp = args->dp; | ||
938 | tp = args->trans; | ||
939 | mp = dp->i_mount; | ||
940 | leaf = lbp->b_addr; | ||
941 | dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); | ||
942 | ents = dp->d_ops->leaf_ents_p(leaf); | ||
943 | ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); | ||
944 | |||
945 | ASSERT(leafhdr.magic == XFS_DIR2_LEAF1_MAGIC || | ||
946 | leafhdr.magic == XFS_DIR3_LEAF1_MAGIC); | ||
947 | /* | ||
948 | * If there are data blocks other than the first one, take this | ||
949 | * opportunity to remove trailing empty data blocks that may have | ||
950 | * been left behind during no-space-reservation operations. | ||
951 | * These will show up in the leaf bests table. | ||
952 | */ | ||
953 | while (dp->i_d.di_size > args->geo->blksize) { | ||
954 | int hdrsz; | ||
955 | |||
956 | hdrsz = dp->d_ops->data_entry_offset; | ||
957 | bestsp = xfs_dir2_leaf_bests_p(ltp); | ||
958 | if (be16_to_cpu(bestsp[be32_to_cpu(ltp->bestcount) - 1]) == | ||
959 | args->geo->blksize - hdrsz) { | ||
960 | if ((error = | ||
961 | xfs_dir2_leaf_trim_data(args, lbp, | ||
962 | (xfs_dir2_db_t)(be32_to_cpu(ltp->bestcount) - 1)))) | ||
963 | return error; | ||
964 | } else | ||
965 | return 0; | ||
966 | } | ||
967 | /* | ||
968 | * Read the data block if we don't already have it, give up if it fails. | ||
969 | */ | ||
970 | if (!dbp) { | ||
971 | error = xfs_dir3_data_read(tp, dp, args->geo->datablk, -1, &dbp); | ||
972 | if (error) | ||
973 | return error; | ||
974 | } | ||
975 | hdr = dbp->b_addr; | ||
976 | ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || | ||
977 | hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC)); | ||
978 | |||
979 | /* | ||
980 | * Size of the "leaf" area in the block. | ||
981 | */ | ||
982 | size = (uint)sizeof(xfs_dir2_block_tail_t) + | ||
983 | (uint)sizeof(*lep) * (leafhdr.count - leafhdr.stale); | ||
984 | /* | ||
985 | * Look at the last data entry. | ||
986 | */ | ||
987 | tagp = (__be16 *)((char *)hdr + args->geo->blksize) - 1; | ||
988 | dup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp)); | ||
989 | /* | ||
990 | * If it's not free or is too short we can't do it. | ||
991 | */ | ||
992 | if (be16_to_cpu(dup->freetag) != XFS_DIR2_DATA_FREE_TAG || | ||
993 | be16_to_cpu(dup->length) < size) | ||
994 | return 0; | ||
995 | |||
996 | /* | ||
997 | * Start converting it to block form. | ||
998 | */ | ||
999 | xfs_dir3_block_init(mp, tp, dbp, dp); | ||
1000 | |||
1001 | needlog = 1; | ||
1002 | needscan = 0; | ||
1003 | /* | ||
1004 | * Use up the space at the end of the block (blp/btp). | ||
1005 | */ | ||
1006 | xfs_dir2_data_use_free(args, dbp, dup, args->geo->blksize - size, size, | ||
1007 | &needlog, &needscan); | ||
1008 | /* | ||
1009 | * Initialize the block tail. | ||
1010 | */ | ||
1011 | btp = xfs_dir2_block_tail_p(args->geo, hdr); | ||
1012 | btp->count = cpu_to_be32(leafhdr.count - leafhdr.stale); | ||
1013 | btp->stale = 0; | ||
1014 | xfs_dir2_block_log_tail(tp, dbp); | ||
1015 | /* | ||
1016 | * Initialize the block leaf area. We compact out stale entries. | ||
1017 | */ | ||
1018 | lep = xfs_dir2_block_leaf_p(btp); | ||
1019 | for (from = to = 0; from < leafhdr.count; from++) { | ||
1020 | if (ents[from].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) | ||
1021 | continue; | ||
1022 | lep[to++] = ents[from]; | ||
1023 | } | ||
1024 | ASSERT(to == be32_to_cpu(btp->count)); | ||
1025 | xfs_dir2_block_log_leaf(tp, dbp, 0, be32_to_cpu(btp->count) - 1); | ||
1026 | /* | ||
1027 | * Scan the bestfree if we need it and log the data block header. | ||
1028 | */ | ||
1029 | if (needscan) | ||
1030 | xfs_dir2_data_freescan(dp, hdr, &needlog); | ||
1031 | if (needlog) | ||
1032 | xfs_dir2_data_log_header(args, dbp); | ||
1033 | /* | ||
1034 | * Pitch the old leaf block. | ||
1035 | */ | ||
1036 | error = xfs_da_shrink_inode(args, args->geo->leafblk, lbp); | ||
1037 | if (error) | ||
1038 | return error; | ||
1039 | |||
1040 | /* | ||
1041 | * Now see if the resulting block can be shrunken to shortform. | ||
1042 | */ | ||
1043 | size = xfs_dir2_block_sfsize(dp, hdr, &sfh); | ||
1044 | if (size > XFS_IFORK_DSIZE(dp)) | ||
1045 | return 0; | ||
1046 | |||
1047 | return xfs_dir2_block_to_sf(args, dbp, size, &sfh); | ||
1048 | } | ||
1049 | |||
1050 | /* | ||
1051 | * Convert the shortform directory to block form. | ||
1052 | */ | ||
1053 | int /* error */ | ||
1054 | xfs_dir2_sf_to_block( | ||
1055 | xfs_da_args_t *args) /* operation arguments */ | ||
1056 | { | ||
1057 | xfs_dir2_db_t blkno; /* dir-relative block # (0) */ | ||
1058 | xfs_dir2_data_hdr_t *hdr; /* block header */ | ||
1059 | xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ | ||
1060 | struct xfs_buf *bp; /* block buffer */ | ||
1061 | xfs_dir2_block_tail_t *btp; /* block tail pointer */ | ||
1062 | xfs_dir2_data_entry_t *dep; /* data entry pointer */ | ||
1063 | xfs_inode_t *dp; /* incore directory inode */ | ||
1064 | int dummy; /* trash */ | ||
1065 | xfs_dir2_data_unused_t *dup; /* unused entry pointer */ | ||
1066 | int endoffset; /* end of data objects */ | ||
1067 | int error; /* error return value */ | ||
1068 | int i; /* index */ | ||
1069 | xfs_mount_t *mp; /* filesystem mount point */ | ||
1070 | int needlog; /* need to log block header */ | ||
1071 | int needscan; /* need to scan block freespc */ | ||
1072 | int newoffset; /* offset from current entry */ | ||
1073 | int offset; /* target block offset */ | ||
1074 | xfs_dir2_sf_entry_t *sfep; /* sf entry pointer */ | ||
1075 | xfs_dir2_sf_hdr_t *oldsfp; /* old shortform header */ | ||
1076 | xfs_dir2_sf_hdr_t *sfp; /* shortform header */ | ||
1077 | __be16 *tagp; /* end of data entry */ | ||
1078 | xfs_trans_t *tp; /* transaction pointer */ | ||
1079 | struct xfs_name name; | ||
1080 | struct xfs_ifork *ifp; | ||
1081 | |||
1082 | trace_xfs_dir2_sf_to_block(args); | ||
1083 | |||
1084 | dp = args->dp; | ||
1085 | tp = args->trans; | ||
1086 | mp = dp->i_mount; | ||
1087 | ifp = XFS_IFORK_PTR(dp, XFS_DATA_FORK); | ||
1088 | ASSERT(ifp->if_flags & XFS_IFINLINE); | ||
1089 | /* | ||
1090 | * Bomb out if the shortform directory is way too short. | ||
1091 | */ | ||
1092 | if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) { | ||
1093 | ASSERT(XFS_FORCED_SHUTDOWN(mp)); | ||
1094 | return -EIO; | ||
1095 | } | ||
1096 | |||
1097 | oldsfp = (xfs_dir2_sf_hdr_t *)ifp->if_u1.if_data; | ||
1098 | |||
1099 | ASSERT(ifp->if_bytes == dp->i_d.di_size); | ||
1100 | ASSERT(ifp->if_u1.if_data != NULL); | ||
1101 | ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(oldsfp->i8count)); | ||
1102 | ASSERT(dp->i_d.di_nextents == 0); | ||
1103 | |||
1104 | /* | ||
1105 | * Copy the directory into a temporary buffer. | ||
1106 | * Then pitch the incore inode data so we can make extents. | ||
1107 | */ | ||
1108 | sfp = kmem_alloc(ifp->if_bytes, KM_SLEEP); | ||
1109 | memcpy(sfp, oldsfp, ifp->if_bytes); | ||
1110 | |||
1111 | xfs_idata_realloc(dp, -ifp->if_bytes, XFS_DATA_FORK); | ||
1112 | xfs_bmap_local_to_extents_empty(dp, XFS_DATA_FORK); | ||
1113 | dp->i_d.di_size = 0; | ||
1114 | |||
1115 | /* | ||
1116 | * Add block 0 to the inode. | ||
1117 | */ | ||
1118 | error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, &blkno); | ||
1119 | if (error) { | ||
1120 | kmem_free(sfp); | ||
1121 | return error; | ||
1122 | } | ||
1123 | /* | ||
1124 | * Initialize the data block, then convert it to block format. | ||
1125 | */ | ||
1126 | error = xfs_dir3_data_init(args, blkno, &bp); | ||
1127 | if (error) { | ||
1128 | kmem_free(sfp); | ||
1129 | return error; | ||
1130 | } | ||
1131 | xfs_dir3_block_init(mp, tp, bp, dp); | ||
1132 | hdr = bp->b_addr; | ||
1133 | |||
1134 | /* | ||
1135 | * Compute size of block "tail" area. | ||
1136 | */ | ||
1137 | i = (uint)sizeof(*btp) + | ||
1138 | (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t); | ||
1139 | /* | ||
1140 | * The whole thing is initialized to free by the init routine. | ||
1141 | * Say we're using the leaf and tail area. | ||
1142 | */ | ||
1143 | dup = dp->d_ops->data_unused_p(hdr); | ||
1144 | needlog = needscan = 0; | ||
1145 | xfs_dir2_data_use_free(args, bp, dup, args->geo->blksize - i, | ||
1146 | i, &needlog, &needscan); | ||
1147 | ASSERT(needscan == 0); | ||
1148 | /* | ||
1149 | * Fill in the tail. | ||
1150 | */ | ||
1151 | btp = xfs_dir2_block_tail_p(args->geo, hdr); | ||
1152 | btp->count = cpu_to_be32(sfp->count + 2); /* ., .. */ | ||
1153 | btp->stale = 0; | ||
1154 | blp = xfs_dir2_block_leaf_p(btp); | ||
1155 | endoffset = (uint)((char *)blp - (char *)hdr); | ||
1156 | /* | ||
1157 | * Remove the freespace, we'll manage it. | ||
1158 | */ | ||
1159 | xfs_dir2_data_use_free(args, bp, dup, | ||
1160 | (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), | ||
1161 | be16_to_cpu(dup->length), &needlog, &needscan); | ||
1162 | /* | ||
1163 | * Create entry for . | ||
1164 | */ | ||
1165 | dep = dp->d_ops->data_dot_entry_p(hdr); | ||
1166 | dep->inumber = cpu_to_be64(dp->i_ino); | ||
1167 | dep->namelen = 1; | ||
1168 | dep->name[0] = '.'; | ||
1169 | dp->d_ops->data_put_ftype(dep, XFS_DIR3_FT_DIR); | ||
1170 | tagp = dp->d_ops->data_entry_tag_p(dep); | ||
1171 | *tagp = cpu_to_be16((char *)dep - (char *)hdr); | ||
1172 | xfs_dir2_data_log_entry(args, bp, dep); | ||
1173 | blp[0].hashval = cpu_to_be32(xfs_dir_hash_dot); | ||
1174 | blp[0].address = cpu_to_be32(xfs_dir2_byte_to_dataptr( | ||
1175 | (char *)dep - (char *)hdr)); | ||
1176 | /* | ||
1177 | * Create entry for .. | ||
1178 | */ | ||
1179 | dep = dp->d_ops->data_dotdot_entry_p(hdr); | ||
1180 | dep->inumber = cpu_to_be64(dp->d_ops->sf_get_parent_ino(sfp)); | ||
1181 | dep->namelen = 2; | ||
1182 | dep->name[0] = dep->name[1] = '.'; | ||
1183 | dp->d_ops->data_put_ftype(dep, XFS_DIR3_FT_DIR); | ||
1184 | tagp = dp->d_ops->data_entry_tag_p(dep); | ||
1185 | *tagp = cpu_to_be16((char *)dep - (char *)hdr); | ||
1186 | xfs_dir2_data_log_entry(args, bp, dep); | ||
1187 | blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot); | ||
1188 | blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr( | ||
1189 | (char *)dep - (char *)hdr)); | ||
1190 | offset = dp->d_ops->data_first_offset; | ||
1191 | /* | ||
1192 | * Loop over existing entries, stuff them in. | ||
1193 | */ | ||
1194 | i = 0; | ||
1195 | if (!sfp->count) | ||
1196 | sfep = NULL; | ||
1197 | else | ||
1198 | sfep = xfs_dir2_sf_firstentry(sfp); | ||
1199 | /* | ||
1200 | * Need to preserve the existing offset values in the sf directory. | ||
1201 | * Insert holes (unused entries) where necessary. | ||
1202 | */ | ||
1203 | while (offset < endoffset) { | ||
1204 | /* | ||
1205 | * sfep is null when we reach the end of the list. | ||
1206 | */ | ||
1207 | if (sfep == NULL) | ||
1208 | newoffset = endoffset; | ||
1209 | else | ||
1210 | newoffset = xfs_dir2_sf_get_offset(sfep); | ||
1211 | /* | ||
1212 | * There should be a hole here, make one. | ||
1213 | */ | ||
1214 | if (offset < newoffset) { | ||
1215 | dup = (xfs_dir2_data_unused_t *)((char *)hdr + offset); | ||
1216 | dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); | ||
1217 | dup->length = cpu_to_be16(newoffset - offset); | ||
1218 | *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16( | ||
1219 | ((char *)dup - (char *)hdr)); | ||
1220 | xfs_dir2_data_log_unused(args, bp, dup); | ||
1221 | xfs_dir2_data_freeinsert(hdr, | ||
1222 | dp->d_ops->data_bestfree_p(hdr), | ||
1223 | dup, &dummy); | ||
1224 | offset += be16_to_cpu(dup->length); | ||
1225 | continue; | ||
1226 | } | ||
1227 | /* | ||
1228 | * Copy a real entry. | ||
1229 | */ | ||
1230 | dep = (xfs_dir2_data_entry_t *)((char *)hdr + newoffset); | ||
1231 | dep->inumber = cpu_to_be64(dp->d_ops->sf_get_ino(sfp, sfep)); | ||
1232 | dep->namelen = sfep->namelen; | ||
1233 | dp->d_ops->data_put_ftype(dep, dp->d_ops->sf_get_ftype(sfep)); | ||
1234 | memcpy(dep->name, sfep->name, dep->namelen); | ||
1235 | tagp = dp->d_ops->data_entry_tag_p(dep); | ||
1236 | *tagp = cpu_to_be16((char *)dep - (char *)hdr); | ||
1237 | xfs_dir2_data_log_entry(args, bp, dep); | ||
1238 | name.name = sfep->name; | ||
1239 | name.len = sfep->namelen; | ||
1240 | blp[2 + i].hashval = cpu_to_be32(mp->m_dirnameops-> | ||
1241 | hashname(&name)); | ||
1242 | blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr( | ||
1243 | (char *)dep - (char *)hdr)); | ||
1244 | offset = (int)((char *)(tagp + 1) - (char *)hdr); | ||
1245 | if (++i == sfp->count) | ||
1246 | sfep = NULL; | ||
1247 | else | ||
1248 | sfep = dp->d_ops->sf_nextentry(sfp, sfep); | ||
1249 | } | ||
1250 | /* Done with the temporary buffer */ | ||
1251 | kmem_free(sfp); | ||
1252 | /* | ||
1253 | * Sort the leaf entries by hash value. | ||
1254 | */ | ||
1255 | xfs_sort(blp, be32_to_cpu(btp->count), sizeof(*blp), xfs_dir2_block_sort); | ||
1256 | /* | ||
1257 | * Log the leaf entry area and tail. | ||
1258 | * Already logged the header in data_init, ignore needlog. | ||
1259 | */ | ||
1260 | ASSERT(needscan == 0); | ||
1261 | xfs_dir2_block_log_leaf(tp, bp, 0, be32_to_cpu(btp->count) - 1); | ||
1262 | xfs_dir2_block_log_tail(tp, bp); | ||
1263 | xfs_dir3_data_check(dp, bp); | ||
1264 | return 0; | ||
1265 | } | ||
diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c new file mode 100644 index 000000000000..fdd803fecb8e --- /dev/null +++ b/fs/xfs/libxfs/xfs_dir2_data.c | |||
@@ -0,0 +1,1050 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. | ||
3 | * Copyright (c) 2013 Red Hat, Inc. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License as | ||
8 | * published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope that it would be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program; if not, write the Free Software Foundation, | ||
17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
18 | */ | ||
19 | #include "xfs.h" | ||
20 | #include "xfs_fs.h" | ||
21 | #include "xfs_format.h" | ||
22 | #include "xfs_log_format.h" | ||
23 | #include "xfs_trans_resv.h" | ||
24 | #include "xfs_sb.h" | ||
25 | #include "xfs_ag.h" | ||
26 | #include "xfs_mount.h" | ||
27 | #include "xfs_da_format.h" | ||
28 | #include "xfs_da_btree.h" | ||
29 | #include "xfs_inode.h" | ||
30 | #include "xfs_dir2.h" | ||
31 | #include "xfs_dir2_priv.h" | ||
32 | #include "xfs_error.h" | ||
33 | #include "xfs_trans.h" | ||
34 | #include "xfs_buf_item.h" | ||
35 | #include "xfs_cksum.h" | ||
36 | |||
37 | /* | ||
38 | * Check the consistency of the data block. | ||
39 | * The input can also be a block-format directory. | ||
40 | * Return 0 is the buffer is good, otherwise an error. | ||
41 | */ | ||
42 | int | ||
43 | __xfs_dir3_data_check( | ||
44 | struct xfs_inode *dp, /* incore inode pointer */ | ||
45 | struct xfs_buf *bp) /* data block's buffer */ | ||
46 | { | ||
47 | xfs_dir2_dataptr_t addr; /* addr for leaf lookup */ | ||
48 | xfs_dir2_data_free_t *bf; /* bestfree table */ | ||
49 | xfs_dir2_block_tail_t *btp=NULL; /* block tail */ | ||
50 | int count; /* count of entries found */ | ||
51 | xfs_dir2_data_hdr_t *hdr; /* data block header */ | ||
52 | xfs_dir2_data_entry_t *dep; /* data entry */ | ||
53 | xfs_dir2_data_free_t *dfp; /* bestfree entry */ | ||
54 | xfs_dir2_data_unused_t *dup; /* unused entry */ | ||
55 | char *endp; /* end of useful data */ | ||
56 | int freeseen; /* mask of bestfrees seen */ | ||
57 | xfs_dahash_t hash; /* hash of current name */ | ||
58 | int i; /* leaf index */ | ||
59 | int lastfree; /* last entry was unused */ | ||
60 | xfs_dir2_leaf_entry_t *lep=NULL; /* block leaf entries */ | ||
61 | xfs_mount_t *mp; /* filesystem mount point */ | ||
62 | char *p; /* current data position */ | ||
63 | int stale; /* count of stale leaves */ | ||
64 | struct xfs_name name; | ||
65 | const struct xfs_dir_ops *ops; | ||
66 | struct xfs_da_geometry *geo; | ||
67 | |||
68 | mp = bp->b_target->bt_mount; | ||
69 | geo = mp->m_dir_geo; | ||
70 | |||
71 | /* | ||
72 | * We can be passed a null dp here from a verifier, so we need to go the | ||
73 | * hard way to get them. | ||
74 | */ | ||
75 | ops = xfs_dir_get_ops(mp, dp); | ||
76 | |||
77 | hdr = bp->b_addr; | ||
78 | p = (char *)ops->data_entry_p(hdr); | ||
79 | |||
80 | switch (hdr->magic) { | ||
81 | case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC): | ||
82 | case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC): | ||
83 | btp = xfs_dir2_block_tail_p(geo, hdr); | ||
84 | lep = xfs_dir2_block_leaf_p(btp); | ||
85 | endp = (char *)lep; | ||
86 | |||
87 | /* | ||
88 | * The number of leaf entries is limited by the size of the | ||
89 | * block and the amount of space used by the data entries. | ||
90 | * We don't know how much space is used by the data entries yet, | ||
91 | * so just ensure that the count falls somewhere inside the | ||
92 | * block right now. | ||
93 | */ | ||
94 | XFS_WANT_CORRUPTED_RETURN(be32_to_cpu(btp->count) < | ||
95 | ((char *)btp - p) / sizeof(struct xfs_dir2_leaf_entry)); | ||
96 | break; | ||
97 | case cpu_to_be32(XFS_DIR3_DATA_MAGIC): | ||
98 | case cpu_to_be32(XFS_DIR2_DATA_MAGIC): | ||
99 | endp = (char *)hdr + geo->blksize; | ||
100 | break; | ||
101 | default: | ||
102 | XFS_ERROR_REPORT("Bad Magic", XFS_ERRLEVEL_LOW, mp); | ||
103 | return -EFSCORRUPTED; | ||
104 | } | ||
105 | |||
106 | /* | ||
107 | * Account for zero bestfree entries. | ||
108 | */ | ||
109 | bf = ops->data_bestfree_p(hdr); | ||
110 | count = lastfree = freeseen = 0; | ||
111 | if (!bf[0].length) { | ||
112 | XFS_WANT_CORRUPTED_RETURN(!bf[0].offset); | ||
113 | freeseen |= 1 << 0; | ||
114 | } | ||
115 | if (!bf[1].length) { | ||
116 | XFS_WANT_CORRUPTED_RETURN(!bf[1].offset); | ||
117 | freeseen |= 1 << 1; | ||
118 | } | ||
119 | if (!bf[2].length) { | ||
120 | XFS_WANT_CORRUPTED_RETURN(!bf[2].offset); | ||
121 | freeseen |= 1 << 2; | ||
122 | } | ||
123 | |||
124 | XFS_WANT_CORRUPTED_RETURN(be16_to_cpu(bf[0].length) >= | ||
125 | be16_to_cpu(bf[1].length)); | ||
126 | XFS_WANT_CORRUPTED_RETURN(be16_to_cpu(bf[1].length) >= | ||
127 | be16_to_cpu(bf[2].length)); | ||
128 | /* | ||
129 | * Loop over the data/unused entries. | ||
130 | */ | ||
131 | while (p < endp) { | ||
132 | dup = (xfs_dir2_data_unused_t *)p; | ||
133 | /* | ||
134 | * If it's unused, look for the space in the bestfree table. | ||
135 | * If we find it, account for that, else make sure it | ||
136 | * doesn't need to be there. | ||
137 | */ | ||
138 | if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { | ||
139 | XFS_WANT_CORRUPTED_RETURN(lastfree == 0); | ||
140 | XFS_WANT_CORRUPTED_RETURN( | ||
141 | be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) == | ||
142 | (char *)dup - (char *)hdr); | ||
143 | dfp = xfs_dir2_data_freefind(hdr, bf, dup); | ||
144 | if (dfp) { | ||
145 | i = (int)(dfp - bf); | ||
146 | XFS_WANT_CORRUPTED_RETURN( | ||
147 | (freeseen & (1 << i)) == 0); | ||
148 | freeseen |= 1 << i; | ||
149 | } else { | ||
150 | XFS_WANT_CORRUPTED_RETURN( | ||
151 | be16_to_cpu(dup->length) <= | ||
152 | be16_to_cpu(bf[2].length)); | ||
153 | } | ||
154 | p += be16_to_cpu(dup->length); | ||
155 | lastfree = 1; | ||
156 | continue; | ||
157 | } | ||
158 | /* | ||
159 | * It's a real entry. Validate the fields. | ||
160 | * If this is a block directory then make sure it's | ||
161 | * in the leaf section of the block. | ||
162 | * The linear search is crude but this is DEBUG code. | ||
163 | */ | ||
164 | dep = (xfs_dir2_data_entry_t *)p; | ||
165 | XFS_WANT_CORRUPTED_RETURN(dep->namelen != 0); | ||
166 | XFS_WANT_CORRUPTED_RETURN( | ||
167 | !xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber))); | ||
168 | XFS_WANT_CORRUPTED_RETURN( | ||
169 | be16_to_cpu(*ops->data_entry_tag_p(dep)) == | ||
170 | (char *)dep - (char *)hdr); | ||
171 | XFS_WANT_CORRUPTED_RETURN( | ||
172 | ops->data_get_ftype(dep) < XFS_DIR3_FT_MAX); | ||
173 | count++; | ||
174 | lastfree = 0; | ||
175 | if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || | ||
176 | hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) { | ||
177 | addr = xfs_dir2_db_off_to_dataptr(geo, geo->datablk, | ||
178 | (xfs_dir2_data_aoff_t) | ||
179 | ((char *)dep - (char *)hdr)); | ||
180 | name.name = dep->name; | ||
181 | name.len = dep->namelen; | ||
182 | hash = mp->m_dirnameops->hashname(&name); | ||
183 | for (i = 0; i < be32_to_cpu(btp->count); i++) { | ||
184 | if (be32_to_cpu(lep[i].address) == addr && | ||
185 | be32_to_cpu(lep[i].hashval) == hash) | ||
186 | break; | ||
187 | } | ||
188 | XFS_WANT_CORRUPTED_RETURN(i < be32_to_cpu(btp->count)); | ||
189 | } | ||
190 | p += ops->data_entsize(dep->namelen); | ||
191 | } | ||
192 | /* | ||
193 | * Need to have seen all the entries and all the bestfree slots. | ||
194 | */ | ||
195 | XFS_WANT_CORRUPTED_RETURN(freeseen == 7); | ||
196 | if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || | ||
197 | hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) { | ||
198 | for (i = stale = 0; i < be32_to_cpu(btp->count); i++) { | ||
199 | if (lep[i].address == | ||
200 | cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) | ||
201 | stale++; | ||
202 | if (i > 0) | ||
203 | XFS_WANT_CORRUPTED_RETURN( | ||
204 | be32_to_cpu(lep[i].hashval) >= | ||
205 | be32_to_cpu(lep[i - 1].hashval)); | ||
206 | } | ||
207 | XFS_WANT_CORRUPTED_RETURN(count == | ||
208 | be32_to_cpu(btp->count) - be32_to_cpu(btp->stale)); | ||
209 | XFS_WANT_CORRUPTED_RETURN(stale == be32_to_cpu(btp->stale)); | ||
210 | } | ||
211 | return 0; | ||
212 | } | ||
213 | |||
214 | static bool | ||
215 | xfs_dir3_data_verify( | ||
216 | struct xfs_buf *bp) | ||
217 | { | ||
218 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
219 | struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; | ||
220 | |||
221 | if (xfs_sb_version_hascrc(&mp->m_sb)) { | ||
222 | if (hdr3->magic != cpu_to_be32(XFS_DIR3_DATA_MAGIC)) | ||
223 | return false; | ||
224 | if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_uuid)) | ||
225 | return false; | ||
226 | if (be64_to_cpu(hdr3->blkno) != bp->b_bn) | ||
227 | return false; | ||
228 | } else { | ||
229 | if (hdr3->magic != cpu_to_be32(XFS_DIR2_DATA_MAGIC)) | ||
230 | return false; | ||
231 | } | ||
232 | if (__xfs_dir3_data_check(NULL, bp)) | ||
233 | return false; | ||
234 | return true; | ||
235 | } | ||
236 | |||
237 | /* | ||
238 | * Readahead of the first block of the directory when it is opened is completely | ||
239 | * oblivious to the format of the directory. Hence we can either get a block | ||
240 | * format buffer or a data format buffer on readahead. | ||
241 | */ | ||
242 | static void | ||
243 | xfs_dir3_data_reada_verify( | ||
244 | struct xfs_buf *bp) | ||
245 | { | ||
246 | struct xfs_dir2_data_hdr *hdr = bp->b_addr; | ||
247 | |||
248 | switch (hdr->magic) { | ||
249 | case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC): | ||
250 | case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC): | ||
251 | bp->b_ops = &xfs_dir3_block_buf_ops; | ||
252 | bp->b_ops->verify_read(bp); | ||
253 | return; | ||
254 | case cpu_to_be32(XFS_DIR2_DATA_MAGIC): | ||
255 | case cpu_to_be32(XFS_DIR3_DATA_MAGIC): | ||
256 | xfs_dir3_data_verify(bp); | ||
257 | return; | ||
258 | default: | ||
259 | xfs_buf_ioerror(bp, -EFSCORRUPTED); | ||
260 | xfs_verifier_error(bp); | ||
261 | break; | ||
262 | } | ||
263 | } | ||
264 | |||
265 | static void | ||
266 | xfs_dir3_data_read_verify( | ||
267 | struct xfs_buf *bp) | ||
268 | { | ||
269 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
270 | |||
271 | if (xfs_sb_version_hascrc(&mp->m_sb) && | ||
272 | !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) | ||
273 | xfs_buf_ioerror(bp, -EFSBADCRC); | ||
274 | else if (!xfs_dir3_data_verify(bp)) | ||
275 | xfs_buf_ioerror(bp, -EFSCORRUPTED); | ||
276 | |||
277 | if (bp->b_error) | ||
278 | xfs_verifier_error(bp); | ||
279 | } | ||
280 | |||
281 | static void | ||
282 | xfs_dir3_data_write_verify( | ||
283 | struct xfs_buf *bp) | ||
284 | { | ||
285 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
286 | struct xfs_buf_log_item *bip = bp->b_fspriv; | ||
287 | struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; | ||
288 | |||
289 | if (!xfs_dir3_data_verify(bp)) { | ||
290 | xfs_buf_ioerror(bp, -EFSCORRUPTED); | ||
291 | xfs_verifier_error(bp); | ||
292 | return; | ||
293 | } | ||
294 | |||
295 | if (!xfs_sb_version_hascrc(&mp->m_sb)) | ||
296 | return; | ||
297 | |||
298 | if (bip) | ||
299 | hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn); | ||
300 | |||
301 | xfs_buf_update_cksum(bp, XFS_DIR3_DATA_CRC_OFF); | ||
302 | } | ||
303 | |||
304 | const struct xfs_buf_ops xfs_dir3_data_buf_ops = { | ||
305 | .verify_read = xfs_dir3_data_read_verify, | ||
306 | .verify_write = xfs_dir3_data_write_verify, | ||
307 | }; | ||
308 | |||
309 | static const struct xfs_buf_ops xfs_dir3_data_reada_buf_ops = { | ||
310 | .verify_read = xfs_dir3_data_reada_verify, | ||
311 | .verify_write = xfs_dir3_data_write_verify, | ||
312 | }; | ||
313 | |||
314 | |||
315 | int | ||
316 | xfs_dir3_data_read( | ||
317 | struct xfs_trans *tp, | ||
318 | struct xfs_inode *dp, | ||
319 | xfs_dablk_t bno, | ||
320 | xfs_daddr_t mapped_bno, | ||
321 | struct xfs_buf **bpp) | ||
322 | { | ||
323 | int err; | ||
324 | |||
325 | err = xfs_da_read_buf(tp, dp, bno, mapped_bno, bpp, | ||
326 | XFS_DATA_FORK, &xfs_dir3_data_buf_ops); | ||
327 | if (!err && tp) | ||
328 | xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_DATA_BUF); | ||
329 | return err; | ||
330 | } | ||
331 | |||
332 | int | ||
333 | xfs_dir3_data_readahead( | ||
334 | struct xfs_inode *dp, | ||
335 | xfs_dablk_t bno, | ||
336 | xfs_daddr_t mapped_bno) | ||
337 | { | ||
338 | return xfs_da_reada_buf(dp, bno, mapped_bno, | ||
339 | XFS_DATA_FORK, &xfs_dir3_data_reada_buf_ops); | ||
340 | } | ||
341 | |||
342 | /* | ||
343 | * Given a data block and an unused entry from that block, | ||
344 | * return the bestfree entry if any that corresponds to it. | ||
345 | */ | ||
346 | xfs_dir2_data_free_t * | ||
347 | xfs_dir2_data_freefind( | ||
348 | struct xfs_dir2_data_hdr *hdr, /* data block header */ | ||
349 | struct xfs_dir2_data_free *bf, /* bestfree table pointer */ | ||
350 | struct xfs_dir2_data_unused *dup) /* unused space */ | ||
351 | { | ||
352 | xfs_dir2_data_free_t *dfp; /* bestfree entry */ | ||
353 | xfs_dir2_data_aoff_t off; /* offset value needed */ | ||
354 | #ifdef DEBUG | ||
355 | int matched; /* matched the value */ | ||
356 | int seenzero; /* saw a 0 bestfree entry */ | ||
357 | #endif | ||
358 | |||
359 | off = (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr); | ||
360 | |||
361 | #ifdef DEBUG | ||
362 | /* | ||
363 | * Validate some consistency in the bestfree table. | ||
364 | * Check order, non-overlapping entries, and if we find the | ||
365 | * one we're looking for it has to be exact. | ||
366 | */ | ||
367 | ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || | ||
368 | hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || | ||
369 | hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || | ||
370 | hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); | ||
371 | for (dfp = &bf[0], seenzero = matched = 0; | ||
372 | dfp < &bf[XFS_DIR2_DATA_FD_COUNT]; | ||
373 | dfp++) { | ||
374 | if (!dfp->offset) { | ||
375 | ASSERT(!dfp->length); | ||
376 | seenzero = 1; | ||
377 | continue; | ||
378 | } | ||
379 | ASSERT(seenzero == 0); | ||
380 | if (be16_to_cpu(dfp->offset) == off) { | ||
381 | matched = 1; | ||
382 | ASSERT(dfp->length == dup->length); | ||
383 | } else if (off < be16_to_cpu(dfp->offset)) | ||
384 | ASSERT(off + be16_to_cpu(dup->length) <= be16_to_cpu(dfp->offset)); | ||
385 | else | ||
386 | ASSERT(be16_to_cpu(dfp->offset) + be16_to_cpu(dfp->length) <= off); | ||
387 | ASSERT(matched || be16_to_cpu(dfp->length) >= be16_to_cpu(dup->length)); | ||
388 | if (dfp > &bf[0]) | ||
389 | ASSERT(be16_to_cpu(dfp[-1].length) >= be16_to_cpu(dfp[0].length)); | ||
390 | } | ||
391 | #endif | ||
392 | /* | ||
393 | * If this is smaller than the smallest bestfree entry, | ||
394 | * it can't be there since they're sorted. | ||
395 | */ | ||
396 | if (be16_to_cpu(dup->length) < | ||
397 | be16_to_cpu(bf[XFS_DIR2_DATA_FD_COUNT - 1].length)) | ||
398 | return NULL; | ||
399 | /* | ||
400 | * Look at the three bestfree entries for our guy. | ||
401 | */ | ||
402 | for (dfp = &bf[0]; dfp < &bf[XFS_DIR2_DATA_FD_COUNT]; dfp++) { | ||
403 | if (!dfp->offset) | ||
404 | return NULL; | ||
405 | if (be16_to_cpu(dfp->offset) == off) | ||
406 | return dfp; | ||
407 | } | ||
408 | /* | ||
409 | * Didn't find it. This only happens if there are duplicate lengths. | ||
410 | */ | ||
411 | return NULL; | ||
412 | } | ||
413 | |||
414 | /* | ||
415 | * Insert an unused-space entry into the bestfree table. | ||
416 | */ | ||
417 | xfs_dir2_data_free_t * /* entry inserted */ | ||
418 | xfs_dir2_data_freeinsert( | ||
419 | struct xfs_dir2_data_hdr *hdr, /* data block pointer */ | ||
420 | struct xfs_dir2_data_free *dfp, /* bestfree table pointer */ | ||
421 | struct xfs_dir2_data_unused *dup, /* unused space */ | ||
422 | int *loghead) /* log the data header (out) */ | ||
423 | { | ||
424 | xfs_dir2_data_free_t new; /* new bestfree entry */ | ||
425 | |||
426 | ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || | ||
427 | hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || | ||
428 | hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || | ||
429 | hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); | ||
430 | |||
431 | new.length = dup->length; | ||
432 | new.offset = cpu_to_be16((char *)dup - (char *)hdr); | ||
433 | |||
434 | /* | ||
435 | * Insert at position 0, 1, or 2; or not at all. | ||
436 | */ | ||
437 | if (be16_to_cpu(new.length) > be16_to_cpu(dfp[0].length)) { | ||
438 | dfp[2] = dfp[1]; | ||
439 | dfp[1] = dfp[0]; | ||
440 | dfp[0] = new; | ||
441 | *loghead = 1; | ||
442 | return &dfp[0]; | ||
443 | } | ||
444 | if (be16_to_cpu(new.length) > be16_to_cpu(dfp[1].length)) { | ||
445 | dfp[2] = dfp[1]; | ||
446 | dfp[1] = new; | ||
447 | *loghead = 1; | ||
448 | return &dfp[1]; | ||
449 | } | ||
450 | if (be16_to_cpu(new.length) > be16_to_cpu(dfp[2].length)) { | ||
451 | dfp[2] = new; | ||
452 | *loghead = 1; | ||
453 | return &dfp[2]; | ||
454 | } | ||
455 | return NULL; | ||
456 | } | ||
457 | |||
458 | /* | ||
459 | * Remove a bestfree entry from the table. | ||
460 | */ | ||
461 | STATIC void | ||
462 | xfs_dir2_data_freeremove( | ||
463 | struct xfs_dir2_data_hdr *hdr, /* data block header */ | ||
464 | struct xfs_dir2_data_free *bf, /* bestfree table pointer */ | ||
465 | struct xfs_dir2_data_free *dfp, /* bestfree entry pointer */ | ||
466 | int *loghead) /* out: log data header */ | ||
467 | { | ||
468 | |||
469 | ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || | ||
470 | hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || | ||
471 | hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || | ||
472 | hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); | ||
473 | |||
474 | /* | ||
475 | * It's the first entry, slide the next 2 up. | ||
476 | */ | ||
477 | if (dfp == &bf[0]) { | ||
478 | bf[0] = bf[1]; | ||
479 | bf[1] = bf[2]; | ||
480 | } | ||
481 | /* | ||
482 | * It's the second entry, slide the 3rd entry up. | ||
483 | */ | ||
484 | else if (dfp == &bf[1]) | ||
485 | bf[1] = bf[2]; | ||
486 | /* | ||
487 | * Must be the last entry. | ||
488 | */ | ||
489 | else | ||
490 | ASSERT(dfp == &bf[2]); | ||
491 | /* | ||
492 | * Clear the 3rd entry, must be zero now. | ||
493 | */ | ||
494 | bf[2].length = 0; | ||
495 | bf[2].offset = 0; | ||
496 | *loghead = 1; | ||
497 | } | ||
498 | |||
499 | /* | ||
500 | * Given a data block, reconstruct its bestfree map. | ||
501 | */ | ||
502 | void | ||
503 | xfs_dir2_data_freescan( | ||
504 | struct xfs_inode *dp, | ||
505 | struct xfs_dir2_data_hdr *hdr, | ||
506 | int *loghead) | ||
507 | { | ||
508 | xfs_dir2_block_tail_t *btp; /* block tail */ | ||
509 | xfs_dir2_data_entry_t *dep; /* active data entry */ | ||
510 | xfs_dir2_data_unused_t *dup; /* unused data entry */ | ||
511 | struct xfs_dir2_data_free *bf; | ||
512 | char *endp; /* end of block's data */ | ||
513 | char *p; /* current entry pointer */ | ||
514 | struct xfs_da_geometry *geo = dp->i_mount->m_dir_geo; | ||
515 | |||
516 | ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || | ||
517 | hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || | ||
518 | hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || | ||
519 | hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); | ||
520 | |||
521 | /* | ||
522 | * Start by clearing the table. | ||
523 | */ | ||
524 | bf = dp->d_ops->data_bestfree_p(hdr); | ||
525 | memset(bf, 0, sizeof(*bf) * XFS_DIR2_DATA_FD_COUNT); | ||
526 | *loghead = 1; | ||
527 | /* | ||
528 | * Set up pointers. | ||
529 | */ | ||
530 | p = (char *)dp->d_ops->data_entry_p(hdr); | ||
531 | if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || | ||
532 | hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) { | ||
533 | btp = xfs_dir2_block_tail_p(geo, hdr); | ||
534 | endp = (char *)xfs_dir2_block_leaf_p(btp); | ||
535 | } else | ||
536 | endp = (char *)hdr + geo->blksize; | ||
537 | /* | ||
538 | * Loop over the block's entries. | ||
539 | */ | ||
540 | while (p < endp) { | ||
541 | dup = (xfs_dir2_data_unused_t *)p; | ||
542 | /* | ||
543 | * If it's a free entry, insert it. | ||
544 | */ | ||
545 | if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { | ||
546 | ASSERT((char *)dup - (char *)hdr == | ||
547 | be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup))); | ||
548 | xfs_dir2_data_freeinsert(hdr, bf, dup, loghead); | ||
549 | p += be16_to_cpu(dup->length); | ||
550 | } | ||
551 | /* | ||
552 | * For active entries, check their tags and skip them. | ||
553 | */ | ||
554 | else { | ||
555 | dep = (xfs_dir2_data_entry_t *)p; | ||
556 | ASSERT((char *)dep - (char *)hdr == | ||
557 | be16_to_cpu(*dp->d_ops->data_entry_tag_p(dep))); | ||
558 | p += dp->d_ops->data_entsize(dep->namelen); | ||
559 | } | ||
560 | } | ||
561 | } | ||
562 | |||
563 | /* | ||
564 | * Initialize a data block at the given block number in the directory. | ||
565 | * Give back the buffer for the created block. | ||
566 | */ | ||
567 | int /* error */ | ||
568 | xfs_dir3_data_init( | ||
569 | xfs_da_args_t *args, /* directory operation args */ | ||
570 | xfs_dir2_db_t blkno, /* logical dir block number */ | ||
571 | struct xfs_buf **bpp) /* output block buffer */ | ||
572 | { | ||
573 | struct xfs_buf *bp; /* block buffer */ | ||
574 | xfs_dir2_data_hdr_t *hdr; /* data block header */ | ||
575 | xfs_inode_t *dp; /* incore directory inode */ | ||
576 | xfs_dir2_data_unused_t *dup; /* unused entry pointer */ | ||
577 | struct xfs_dir2_data_free *bf; | ||
578 | int error; /* error return value */ | ||
579 | int i; /* bestfree index */ | ||
580 | xfs_mount_t *mp; /* filesystem mount point */ | ||
581 | xfs_trans_t *tp; /* transaction pointer */ | ||
582 | int t; /* temp */ | ||
583 | |||
584 | dp = args->dp; | ||
585 | mp = dp->i_mount; | ||
586 | tp = args->trans; | ||
587 | /* | ||
588 | * Get the buffer set up for the block. | ||
589 | */ | ||
590 | error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(args->geo, blkno), | ||
591 | -1, &bp, XFS_DATA_FORK); | ||
592 | if (error) | ||
593 | return error; | ||
594 | bp->b_ops = &xfs_dir3_data_buf_ops; | ||
595 | xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_DATA_BUF); | ||
596 | |||
597 | /* | ||
598 | * Initialize the header. | ||
599 | */ | ||
600 | hdr = bp->b_addr; | ||
601 | if (xfs_sb_version_hascrc(&mp->m_sb)) { | ||
602 | struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; | ||
603 | |||
604 | memset(hdr3, 0, sizeof(*hdr3)); | ||
605 | hdr3->magic = cpu_to_be32(XFS_DIR3_DATA_MAGIC); | ||
606 | hdr3->blkno = cpu_to_be64(bp->b_bn); | ||
607 | hdr3->owner = cpu_to_be64(dp->i_ino); | ||
608 | uuid_copy(&hdr3->uuid, &mp->m_sb.sb_uuid); | ||
609 | |||
610 | } else | ||
611 | hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC); | ||
612 | |||
613 | bf = dp->d_ops->data_bestfree_p(hdr); | ||
614 | bf[0].offset = cpu_to_be16(dp->d_ops->data_entry_offset); | ||
615 | for (i = 1; i < XFS_DIR2_DATA_FD_COUNT; i++) { | ||
616 | bf[i].length = 0; | ||
617 | bf[i].offset = 0; | ||
618 | } | ||
619 | |||
620 | /* | ||
621 | * Set up an unused entry for the block's body. | ||
622 | */ | ||
623 | dup = dp->d_ops->data_unused_p(hdr); | ||
624 | dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); | ||
625 | |||
626 | t = args->geo->blksize - (uint)dp->d_ops->data_entry_offset; | ||
627 | bf[0].length = cpu_to_be16(t); | ||
628 | dup->length = cpu_to_be16(t); | ||
629 | *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)hdr); | ||
630 | /* | ||
631 | * Log it and return it. | ||
632 | */ | ||
633 | xfs_dir2_data_log_header(args, bp); | ||
634 | xfs_dir2_data_log_unused(args, bp, dup); | ||
635 | *bpp = bp; | ||
636 | return 0; | ||
637 | } | ||
638 | |||
639 | /* | ||
640 | * Log an active data entry from the block. | ||
641 | */ | ||
642 | void | ||
643 | xfs_dir2_data_log_entry( | ||
644 | struct xfs_da_args *args, | ||
645 | struct xfs_buf *bp, | ||
646 | xfs_dir2_data_entry_t *dep) /* data entry pointer */ | ||
647 | { | ||
648 | struct xfs_dir2_data_hdr *hdr = bp->b_addr; | ||
649 | |||
650 | ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || | ||
651 | hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || | ||
652 | hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || | ||
653 | hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); | ||
654 | |||
655 | xfs_trans_log_buf(args->trans, bp, (uint)((char *)dep - (char *)hdr), | ||
656 | (uint)((char *)(args->dp->d_ops->data_entry_tag_p(dep) + 1) - | ||
657 | (char *)hdr - 1)); | ||
658 | } | ||
659 | |||
660 | /* | ||
661 | * Log a data block header. | ||
662 | */ | ||
663 | void | ||
664 | xfs_dir2_data_log_header( | ||
665 | struct xfs_da_args *args, | ||
666 | struct xfs_buf *bp) | ||
667 | { | ||
668 | #ifdef DEBUG | ||
669 | struct xfs_dir2_data_hdr *hdr = bp->b_addr; | ||
670 | |||
671 | ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || | ||
672 | hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || | ||
673 | hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || | ||
674 | hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); | ||
675 | #endif | ||
676 | |||
677 | xfs_trans_log_buf(args->trans, bp, 0, | ||
678 | args->dp->d_ops->data_entry_offset - 1); | ||
679 | } | ||
680 | |||
681 | /* | ||
682 | * Log a data unused entry. | ||
683 | */ | ||
684 | void | ||
685 | xfs_dir2_data_log_unused( | ||
686 | struct xfs_da_args *args, | ||
687 | struct xfs_buf *bp, | ||
688 | xfs_dir2_data_unused_t *dup) /* data unused pointer */ | ||
689 | { | ||
690 | xfs_dir2_data_hdr_t *hdr = bp->b_addr; | ||
691 | |||
692 | ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || | ||
693 | hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || | ||
694 | hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || | ||
695 | hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); | ||
696 | |||
697 | /* | ||
698 | * Log the first part of the unused entry. | ||
699 | */ | ||
700 | xfs_trans_log_buf(args->trans, bp, (uint)((char *)dup - (char *)hdr), | ||
701 | (uint)((char *)&dup->length + sizeof(dup->length) - | ||
702 | 1 - (char *)hdr)); | ||
703 | /* | ||
704 | * Log the end (tag) of the unused entry. | ||
705 | */ | ||
706 | xfs_trans_log_buf(args->trans, bp, | ||
707 | (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr), | ||
708 | (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr + | ||
709 | sizeof(xfs_dir2_data_off_t) - 1)); | ||
710 | } | ||
711 | |||
712 | /* | ||
713 | * Make a byte range in the data block unused. | ||
714 | * Its current contents are unimportant. | ||
715 | */ | ||
716 | void | ||
717 | xfs_dir2_data_make_free( | ||
718 | struct xfs_da_args *args, | ||
719 | struct xfs_buf *bp, | ||
720 | xfs_dir2_data_aoff_t offset, /* starting byte offset */ | ||
721 | xfs_dir2_data_aoff_t len, /* length in bytes */ | ||
722 | int *needlogp, /* out: log header */ | ||
723 | int *needscanp) /* out: regen bestfree */ | ||
724 | { | ||
725 | xfs_dir2_data_hdr_t *hdr; /* data block pointer */ | ||
726 | xfs_dir2_data_free_t *dfp; /* bestfree pointer */ | ||
727 | char *endptr; /* end of data area */ | ||
728 | int needscan; /* need to regen bestfree */ | ||
729 | xfs_dir2_data_unused_t *newdup; /* new unused entry */ | ||
730 | xfs_dir2_data_unused_t *postdup; /* unused entry after us */ | ||
731 | xfs_dir2_data_unused_t *prevdup; /* unused entry before us */ | ||
732 | struct xfs_dir2_data_free *bf; | ||
733 | |||
734 | hdr = bp->b_addr; | ||
735 | |||
736 | /* | ||
737 | * Figure out where the end of the data area is. | ||
738 | */ | ||
739 | if (hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || | ||
740 | hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC)) | ||
741 | endptr = (char *)hdr + args->geo->blksize; | ||
742 | else { | ||
743 | xfs_dir2_block_tail_t *btp; /* block tail */ | ||
744 | |||
745 | ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || | ||
746 | hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); | ||
747 | btp = xfs_dir2_block_tail_p(args->geo, hdr); | ||
748 | endptr = (char *)xfs_dir2_block_leaf_p(btp); | ||
749 | } | ||
750 | /* | ||
751 | * If this isn't the start of the block, then back up to | ||
752 | * the previous entry and see if it's free. | ||
753 | */ | ||
754 | if (offset > args->dp->d_ops->data_entry_offset) { | ||
755 | __be16 *tagp; /* tag just before us */ | ||
756 | |||
757 | tagp = (__be16 *)((char *)hdr + offset) - 1; | ||
758 | prevdup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp)); | ||
759 | if (be16_to_cpu(prevdup->freetag) != XFS_DIR2_DATA_FREE_TAG) | ||
760 | prevdup = NULL; | ||
761 | } else | ||
762 | prevdup = NULL; | ||
763 | /* | ||
764 | * If this isn't the end of the block, see if the entry after | ||
765 | * us is free. | ||
766 | */ | ||
767 | if ((char *)hdr + offset + len < endptr) { | ||
768 | postdup = | ||
769 | (xfs_dir2_data_unused_t *)((char *)hdr + offset + len); | ||
770 | if (be16_to_cpu(postdup->freetag) != XFS_DIR2_DATA_FREE_TAG) | ||
771 | postdup = NULL; | ||
772 | } else | ||
773 | postdup = NULL; | ||
774 | ASSERT(*needscanp == 0); | ||
775 | needscan = 0; | ||
776 | /* | ||
777 | * Previous and following entries are both free, | ||
778 | * merge everything into a single free entry. | ||
779 | */ | ||
780 | bf = args->dp->d_ops->data_bestfree_p(hdr); | ||
781 | if (prevdup && postdup) { | ||
782 | xfs_dir2_data_free_t *dfp2; /* another bestfree pointer */ | ||
783 | |||
784 | /* | ||
785 | * See if prevdup and/or postdup are in bestfree table. | ||
786 | */ | ||
787 | dfp = xfs_dir2_data_freefind(hdr, bf, prevdup); | ||
788 | dfp2 = xfs_dir2_data_freefind(hdr, bf, postdup); | ||
789 | /* | ||
790 | * We need a rescan unless there are exactly 2 free entries | ||
791 | * namely our two. Then we know what's happening, otherwise | ||
792 | * since the third bestfree is there, there might be more | ||
793 | * entries. | ||
794 | */ | ||
795 | needscan = (bf[2].length != 0); | ||
796 | /* | ||
797 | * Fix up the new big freespace. | ||
798 | */ | ||
799 | be16_add_cpu(&prevdup->length, len + be16_to_cpu(postdup->length)); | ||
800 | *xfs_dir2_data_unused_tag_p(prevdup) = | ||
801 | cpu_to_be16((char *)prevdup - (char *)hdr); | ||
802 | xfs_dir2_data_log_unused(args, bp, prevdup); | ||
803 | if (!needscan) { | ||
804 | /* | ||
805 | * Has to be the case that entries 0 and 1 are | ||
806 | * dfp and dfp2 (don't know which is which), and | ||
807 | * entry 2 is empty. | ||
808 | * Remove entry 1 first then entry 0. | ||
809 | */ | ||
810 | ASSERT(dfp && dfp2); | ||
811 | if (dfp == &bf[1]) { | ||
812 | dfp = &bf[0]; | ||
813 | ASSERT(dfp2 == dfp); | ||
814 | dfp2 = &bf[1]; | ||
815 | } | ||
816 | xfs_dir2_data_freeremove(hdr, bf, dfp2, needlogp); | ||
817 | xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp); | ||
818 | /* | ||
819 | * Now insert the new entry. | ||
820 | */ | ||
821 | dfp = xfs_dir2_data_freeinsert(hdr, bf, prevdup, | ||
822 | needlogp); | ||
823 | ASSERT(dfp == &bf[0]); | ||
824 | ASSERT(dfp->length == prevdup->length); | ||
825 | ASSERT(!dfp[1].length); | ||
826 | ASSERT(!dfp[2].length); | ||
827 | } | ||
828 | } | ||
829 | /* | ||
830 | * The entry before us is free, merge with it. | ||
831 | */ | ||
832 | else if (prevdup) { | ||
833 | dfp = xfs_dir2_data_freefind(hdr, bf, prevdup); | ||
834 | be16_add_cpu(&prevdup->length, len); | ||
835 | *xfs_dir2_data_unused_tag_p(prevdup) = | ||
836 | cpu_to_be16((char *)prevdup - (char *)hdr); | ||
837 | xfs_dir2_data_log_unused(args, bp, prevdup); | ||
838 | /* | ||
839 | * If the previous entry was in the table, the new entry | ||
840 | * is longer, so it will be in the table too. Remove | ||
841 | * the old one and add the new one. | ||
842 | */ | ||
843 | if (dfp) { | ||
844 | xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp); | ||
845 | xfs_dir2_data_freeinsert(hdr, bf, prevdup, needlogp); | ||
846 | } | ||
847 | /* | ||
848 | * Otherwise we need a scan if the new entry is big enough. | ||
849 | */ | ||
850 | else { | ||
851 | needscan = be16_to_cpu(prevdup->length) > | ||
852 | be16_to_cpu(bf[2].length); | ||
853 | } | ||
854 | } | ||
855 | /* | ||
856 | * The following entry is free, merge with it. | ||
857 | */ | ||
858 | else if (postdup) { | ||
859 | dfp = xfs_dir2_data_freefind(hdr, bf, postdup); | ||
860 | newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset); | ||
861 | newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); | ||
862 | newdup->length = cpu_to_be16(len + be16_to_cpu(postdup->length)); | ||
863 | *xfs_dir2_data_unused_tag_p(newdup) = | ||
864 | cpu_to_be16((char *)newdup - (char *)hdr); | ||
865 | xfs_dir2_data_log_unused(args, bp, newdup); | ||
866 | /* | ||
867 | * If the following entry was in the table, the new entry | ||
868 | * is longer, so it will be in the table too. Remove | ||
869 | * the old one and add the new one. | ||
870 | */ | ||
871 | if (dfp) { | ||
872 | xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp); | ||
873 | xfs_dir2_data_freeinsert(hdr, bf, newdup, needlogp); | ||
874 | } | ||
875 | /* | ||
876 | * Otherwise we need a scan if the new entry is big enough. | ||
877 | */ | ||
878 | else { | ||
879 | needscan = be16_to_cpu(newdup->length) > | ||
880 | be16_to_cpu(bf[2].length); | ||
881 | } | ||
882 | } | ||
883 | /* | ||
884 | * Neither neighbor is free. Make a new entry. | ||
885 | */ | ||
886 | else { | ||
887 | newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset); | ||
888 | newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); | ||
889 | newdup->length = cpu_to_be16(len); | ||
890 | *xfs_dir2_data_unused_tag_p(newdup) = | ||
891 | cpu_to_be16((char *)newdup - (char *)hdr); | ||
892 | xfs_dir2_data_log_unused(args, bp, newdup); | ||
893 | xfs_dir2_data_freeinsert(hdr, bf, newdup, needlogp); | ||
894 | } | ||
895 | *needscanp = needscan; | ||
896 | } | ||
897 | |||
898 | /* | ||
899 | * Take a byte range out of an existing unused space and make it un-free. | ||
900 | */ | ||
901 | void | ||
902 | xfs_dir2_data_use_free( | ||
903 | struct xfs_da_args *args, | ||
904 | struct xfs_buf *bp, | ||
905 | xfs_dir2_data_unused_t *dup, /* unused entry */ | ||
906 | xfs_dir2_data_aoff_t offset, /* starting offset to use */ | ||
907 | xfs_dir2_data_aoff_t len, /* length to use */ | ||
908 | int *needlogp, /* out: need to log header */ | ||
909 | int *needscanp) /* out: need regen bestfree */ | ||
910 | { | ||
911 | xfs_dir2_data_hdr_t *hdr; /* data block header */ | ||
912 | xfs_dir2_data_free_t *dfp; /* bestfree pointer */ | ||
913 | int matchback; /* matches end of freespace */ | ||
914 | int matchfront; /* matches start of freespace */ | ||
915 | int needscan; /* need to regen bestfree */ | ||
916 | xfs_dir2_data_unused_t *newdup; /* new unused entry */ | ||
917 | xfs_dir2_data_unused_t *newdup2; /* another new unused entry */ | ||
918 | int oldlen; /* old unused entry's length */ | ||
919 | struct xfs_dir2_data_free *bf; | ||
920 | |||
921 | hdr = bp->b_addr; | ||
922 | ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || | ||
923 | hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || | ||
924 | hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || | ||
925 | hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); | ||
926 | ASSERT(be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG); | ||
927 | ASSERT(offset >= (char *)dup - (char *)hdr); | ||
928 | ASSERT(offset + len <= (char *)dup + be16_to_cpu(dup->length) - (char *)hdr); | ||
929 | ASSERT((char *)dup - (char *)hdr == be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup))); | ||
930 | /* | ||
931 | * Look up the entry in the bestfree table. | ||
932 | */ | ||
933 | oldlen = be16_to_cpu(dup->length); | ||
934 | bf = args->dp->d_ops->data_bestfree_p(hdr); | ||
935 | dfp = xfs_dir2_data_freefind(hdr, bf, dup); | ||
936 | ASSERT(dfp || oldlen <= be16_to_cpu(bf[2].length)); | ||
937 | /* | ||
938 | * Check for alignment with front and back of the entry. | ||
939 | */ | ||
940 | matchfront = (char *)dup - (char *)hdr == offset; | ||
941 | matchback = (char *)dup + oldlen - (char *)hdr == offset + len; | ||
942 | ASSERT(*needscanp == 0); | ||
943 | needscan = 0; | ||
944 | /* | ||
945 | * If we matched it exactly we just need to get rid of it from | ||
946 | * the bestfree table. | ||
947 | */ | ||
948 | if (matchfront && matchback) { | ||
949 | if (dfp) { | ||
950 | needscan = (bf[2].offset != 0); | ||
951 | if (!needscan) | ||
952 | xfs_dir2_data_freeremove(hdr, bf, dfp, | ||
953 | needlogp); | ||
954 | } | ||
955 | } | ||
956 | /* | ||
957 | * We match the first part of the entry. | ||
958 | * Make a new entry with the remaining freespace. | ||
959 | */ | ||
960 | else if (matchfront) { | ||
961 | newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset + len); | ||
962 | newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); | ||
963 | newdup->length = cpu_to_be16(oldlen - len); | ||
964 | *xfs_dir2_data_unused_tag_p(newdup) = | ||
965 | cpu_to_be16((char *)newdup - (char *)hdr); | ||
966 | xfs_dir2_data_log_unused(args, bp, newdup); | ||
967 | /* | ||
968 | * If it was in the table, remove it and add the new one. | ||
969 | */ | ||
970 | if (dfp) { | ||
971 | xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp); | ||
972 | dfp = xfs_dir2_data_freeinsert(hdr, bf, newdup, | ||
973 | needlogp); | ||
974 | ASSERT(dfp != NULL); | ||
975 | ASSERT(dfp->length == newdup->length); | ||
976 | ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr); | ||
977 | /* | ||
978 | * If we got inserted at the last slot, | ||
979 | * that means we don't know if there was a better | ||
980 | * choice for the last slot, or not. Rescan. | ||
981 | */ | ||
982 | needscan = dfp == &bf[2]; | ||
983 | } | ||
984 | } | ||
985 | /* | ||
986 | * We match the last part of the entry. | ||
987 | * Trim the allocated space off the tail of the entry. | ||
988 | */ | ||
989 | else if (matchback) { | ||
990 | newdup = dup; | ||
991 | newdup->length = cpu_to_be16(((char *)hdr + offset) - (char *)newdup); | ||
992 | *xfs_dir2_data_unused_tag_p(newdup) = | ||
993 | cpu_to_be16((char *)newdup - (char *)hdr); | ||
994 | xfs_dir2_data_log_unused(args, bp, newdup); | ||
995 | /* | ||
996 | * If it was in the table, remove it and add the new one. | ||
997 | */ | ||
998 | if (dfp) { | ||
999 | xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp); | ||
1000 | dfp = xfs_dir2_data_freeinsert(hdr, bf, newdup, | ||
1001 | needlogp); | ||
1002 | ASSERT(dfp != NULL); | ||
1003 | ASSERT(dfp->length == newdup->length); | ||
1004 | ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr); | ||
1005 | /* | ||
1006 | * If we got inserted at the last slot, | ||
1007 | * that means we don't know if there was a better | ||
1008 | * choice for the last slot, or not. Rescan. | ||
1009 | */ | ||
1010 | needscan = dfp == &bf[2]; | ||
1011 | } | ||
1012 | } | ||
1013 | /* | ||
1014 | * Poking out the middle of an entry. | ||
1015 | * Make two new entries. | ||
1016 | */ | ||
1017 | else { | ||
1018 | newdup = dup; | ||
1019 | newdup->length = cpu_to_be16(((char *)hdr + offset) - (char *)newdup); | ||
1020 | *xfs_dir2_data_unused_tag_p(newdup) = | ||
1021 | cpu_to_be16((char *)newdup - (char *)hdr); | ||
1022 | xfs_dir2_data_log_unused(args, bp, newdup); | ||
1023 | newdup2 = (xfs_dir2_data_unused_t *)((char *)hdr + offset + len); | ||
1024 | newdup2->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); | ||
1025 | newdup2->length = cpu_to_be16(oldlen - len - be16_to_cpu(newdup->length)); | ||
1026 | *xfs_dir2_data_unused_tag_p(newdup2) = | ||
1027 | cpu_to_be16((char *)newdup2 - (char *)hdr); | ||
1028 | xfs_dir2_data_log_unused(args, bp, newdup2); | ||
1029 | /* | ||
1030 | * If the old entry was in the table, we need to scan | ||
1031 | * if the 3rd entry was valid, since these entries | ||
1032 | * are smaller than the old one. | ||
1033 | * If we don't need to scan that means there were 1 or 2 | ||
1034 | * entries in the table, and removing the old and adding | ||
1035 | * the 2 new will work. | ||
1036 | */ | ||
1037 | if (dfp) { | ||
1038 | needscan = (bf[2].length != 0); | ||
1039 | if (!needscan) { | ||
1040 | xfs_dir2_data_freeremove(hdr, bf, dfp, | ||
1041 | needlogp); | ||
1042 | xfs_dir2_data_freeinsert(hdr, bf, newdup, | ||
1043 | needlogp); | ||
1044 | xfs_dir2_data_freeinsert(hdr, bf, newdup2, | ||
1045 | needlogp); | ||
1046 | } | ||
1047 | } | ||
1048 | } | ||
1049 | *needscanp = needscan; | ||
1050 | } | ||
diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c new file mode 100644 index 000000000000..a19174eb3cb2 --- /dev/null +++ b/fs/xfs/libxfs/xfs_dir2_leaf.c | |||
@@ -0,0 +1,1831 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. | ||
3 | * Copyright (c) 2013 Red Hat, Inc. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License as | ||
8 | * published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope that it would be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program; if not, write the Free Software Foundation, | ||
17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
18 | */ | ||
19 | #include "xfs.h" | ||
20 | #include "xfs_fs.h" | ||
21 | #include "xfs_format.h" | ||
22 | #include "xfs_log_format.h" | ||
23 | #include "xfs_trans_resv.h" | ||
24 | #include "xfs_sb.h" | ||
25 | #include "xfs_ag.h" | ||
26 | #include "xfs_mount.h" | ||
27 | #include "xfs_da_format.h" | ||
28 | #include "xfs_da_btree.h" | ||
29 | #include "xfs_inode.h" | ||
30 | #include "xfs_bmap.h" | ||
31 | #include "xfs_dir2.h" | ||
32 | #include "xfs_dir2_priv.h" | ||
33 | #include "xfs_error.h" | ||
34 | #include "xfs_trace.h" | ||
35 | #include "xfs_trans.h" | ||
36 | #include "xfs_buf_item.h" | ||
37 | #include "xfs_cksum.h" | ||
38 | |||
39 | /* | ||
40 | * Local function declarations. | ||
41 | */ | ||
42 | static int xfs_dir2_leaf_lookup_int(xfs_da_args_t *args, struct xfs_buf **lbpp, | ||
43 | int *indexp, struct xfs_buf **dbpp); | ||
44 | static void xfs_dir3_leaf_log_bests(struct xfs_da_args *args, | ||
45 | struct xfs_buf *bp, int first, int last); | ||
46 | static void xfs_dir3_leaf_log_tail(struct xfs_da_args *args, | ||
47 | struct xfs_buf *bp); | ||
48 | |||
49 | /* | ||
50 | * Check the internal consistency of a leaf1 block. | ||
51 | * Pop an assert if something is wrong. | ||
52 | */ | ||
53 | #ifdef DEBUG | ||
54 | #define xfs_dir3_leaf_check(dp, bp) \ | ||
55 | do { \ | ||
56 | if (!xfs_dir3_leaf1_check((dp), (bp))) \ | ||
57 | ASSERT(0); \ | ||
58 | } while (0); | ||
59 | |||
60 | STATIC bool | ||
61 | xfs_dir3_leaf1_check( | ||
62 | struct xfs_inode *dp, | ||
63 | struct xfs_buf *bp) | ||
64 | { | ||
65 | struct xfs_dir2_leaf *leaf = bp->b_addr; | ||
66 | struct xfs_dir3_icleaf_hdr leafhdr; | ||
67 | |||
68 | dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); | ||
69 | |||
70 | if (leafhdr.magic == XFS_DIR3_LEAF1_MAGIC) { | ||
71 | struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr; | ||
72 | if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn) | ||
73 | return false; | ||
74 | } else if (leafhdr.magic != XFS_DIR2_LEAF1_MAGIC) | ||
75 | return false; | ||
76 | |||
77 | return xfs_dir3_leaf_check_int(dp->i_mount, dp, &leafhdr, leaf); | ||
78 | } | ||
79 | #else | ||
80 | #define xfs_dir3_leaf_check(dp, bp) | ||
81 | #endif | ||
82 | |||
83 | bool | ||
84 | xfs_dir3_leaf_check_int( | ||
85 | struct xfs_mount *mp, | ||
86 | struct xfs_inode *dp, | ||
87 | struct xfs_dir3_icleaf_hdr *hdr, | ||
88 | struct xfs_dir2_leaf *leaf) | ||
89 | { | ||
90 | struct xfs_dir2_leaf_entry *ents; | ||
91 | xfs_dir2_leaf_tail_t *ltp; | ||
92 | int stale; | ||
93 | int i; | ||
94 | const struct xfs_dir_ops *ops; | ||
95 | struct xfs_dir3_icleaf_hdr leafhdr; | ||
96 | struct xfs_da_geometry *geo = mp->m_dir_geo; | ||
97 | |||
98 | /* | ||
99 | * we can be passed a null dp here from a verifier, so we need to go the | ||
100 | * hard way to get them. | ||
101 | */ | ||
102 | ops = xfs_dir_get_ops(mp, dp); | ||
103 | |||
104 | if (!hdr) { | ||
105 | ops->leaf_hdr_from_disk(&leafhdr, leaf); | ||
106 | hdr = &leafhdr; | ||
107 | } | ||
108 | |||
109 | ents = ops->leaf_ents_p(leaf); | ||
110 | ltp = xfs_dir2_leaf_tail_p(geo, leaf); | ||
111 | |||
112 | /* | ||
113 | * XXX (dgc): This value is not restrictive enough. | ||
114 | * Should factor in the size of the bests table as well. | ||
115 | * We can deduce a value for that from di_size. | ||
116 | */ | ||
117 | if (hdr->count > ops->leaf_max_ents(geo)) | ||
118 | return false; | ||
119 | |||
120 | /* Leaves and bests don't overlap in leaf format. */ | ||
121 | if ((hdr->magic == XFS_DIR2_LEAF1_MAGIC || | ||
122 | hdr->magic == XFS_DIR3_LEAF1_MAGIC) && | ||
123 | (char *)&ents[hdr->count] > (char *)xfs_dir2_leaf_bests_p(ltp)) | ||
124 | return false; | ||
125 | |||
126 | /* Check hash value order, count stale entries. */ | ||
127 | for (i = stale = 0; i < hdr->count; i++) { | ||
128 | if (i + 1 < hdr->count) { | ||
129 | if (be32_to_cpu(ents[i].hashval) > | ||
130 | be32_to_cpu(ents[i + 1].hashval)) | ||
131 | return false; | ||
132 | } | ||
133 | if (ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) | ||
134 | stale++; | ||
135 | } | ||
136 | if (hdr->stale != stale) | ||
137 | return false; | ||
138 | return true; | ||
139 | } | ||
140 | |||
141 | /* | ||
142 | * We verify the magic numbers before decoding the leaf header so that on debug | ||
143 | * kernels we don't get assertion failures in xfs_dir3_leaf_hdr_from_disk() due | ||
144 | * to incorrect magic numbers. | ||
145 | */ | ||
146 | static bool | ||
147 | xfs_dir3_leaf_verify( | ||
148 | struct xfs_buf *bp, | ||
149 | __uint16_t magic) | ||
150 | { | ||
151 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
152 | struct xfs_dir2_leaf *leaf = bp->b_addr; | ||
153 | |||
154 | ASSERT(magic == XFS_DIR2_LEAF1_MAGIC || magic == XFS_DIR2_LEAFN_MAGIC); | ||
155 | |||
156 | if (xfs_sb_version_hascrc(&mp->m_sb)) { | ||
157 | struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr; | ||
158 | __uint16_t magic3; | ||
159 | |||
160 | magic3 = (magic == XFS_DIR2_LEAF1_MAGIC) ? XFS_DIR3_LEAF1_MAGIC | ||
161 | : XFS_DIR3_LEAFN_MAGIC; | ||
162 | |||
163 | if (leaf3->info.hdr.magic != cpu_to_be16(magic3)) | ||
164 | return false; | ||
165 | if (!uuid_equal(&leaf3->info.uuid, &mp->m_sb.sb_uuid)) | ||
166 | return false; | ||
167 | if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn) | ||
168 | return false; | ||
169 | } else { | ||
170 | if (leaf->hdr.info.magic != cpu_to_be16(magic)) | ||
171 | return false; | ||
172 | } | ||
173 | |||
174 | return xfs_dir3_leaf_check_int(mp, NULL, NULL, leaf); | ||
175 | } | ||
176 | |||
177 | static void | ||
178 | __read_verify( | ||
179 | struct xfs_buf *bp, | ||
180 | __uint16_t magic) | ||
181 | { | ||
182 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
183 | |||
184 | if (xfs_sb_version_hascrc(&mp->m_sb) && | ||
185 | !xfs_buf_verify_cksum(bp, XFS_DIR3_LEAF_CRC_OFF)) | ||
186 | xfs_buf_ioerror(bp, -EFSBADCRC); | ||
187 | else if (!xfs_dir3_leaf_verify(bp, magic)) | ||
188 | xfs_buf_ioerror(bp, -EFSCORRUPTED); | ||
189 | |||
190 | if (bp->b_error) | ||
191 | xfs_verifier_error(bp); | ||
192 | } | ||
193 | |||
194 | static void | ||
195 | __write_verify( | ||
196 | struct xfs_buf *bp, | ||
197 | __uint16_t magic) | ||
198 | { | ||
199 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
200 | struct xfs_buf_log_item *bip = bp->b_fspriv; | ||
201 | struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr; | ||
202 | |||
203 | if (!xfs_dir3_leaf_verify(bp, magic)) { | ||
204 | xfs_buf_ioerror(bp, -EFSCORRUPTED); | ||
205 | xfs_verifier_error(bp); | ||
206 | return; | ||
207 | } | ||
208 | |||
209 | if (!xfs_sb_version_hascrc(&mp->m_sb)) | ||
210 | return; | ||
211 | |||
212 | if (bip) | ||
213 | hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn); | ||
214 | |||
215 | xfs_buf_update_cksum(bp, XFS_DIR3_LEAF_CRC_OFF); | ||
216 | } | ||
217 | |||
218 | static void | ||
219 | xfs_dir3_leaf1_read_verify( | ||
220 | struct xfs_buf *bp) | ||
221 | { | ||
222 | __read_verify(bp, XFS_DIR2_LEAF1_MAGIC); | ||
223 | } | ||
224 | |||
225 | static void | ||
226 | xfs_dir3_leaf1_write_verify( | ||
227 | struct xfs_buf *bp) | ||
228 | { | ||
229 | __write_verify(bp, XFS_DIR2_LEAF1_MAGIC); | ||
230 | } | ||
231 | |||
232 | static void | ||
233 | xfs_dir3_leafn_read_verify( | ||
234 | struct xfs_buf *bp) | ||
235 | { | ||
236 | __read_verify(bp, XFS_DIR2_LEAFN_MAGIC); | ||
237 | } | ||
238 | |||
239 | static void | ||
240 | xfs_dir3_leafn_write_verify( | ||
241 | struct xfs_buf *bp) | ||
242 | { | ||
243 | __write_verify(bp, XFS_DIR2_LEAFN_MAGIC); | ||
244 | } | ||
245 | |||
246 | const struct xfs_buf_ops xfs_dir3_leaf1_buf_ops = { | ||
247 | .verify_read = xfs_dir3_leaf1_read_verify, | ||
248 | .verify_write = xfs_dir3_leaf1_write_verify, | ||
249 | }; | ||
250 | |||
251 | const struct xfs_buf_ops xfs_dir3_leafn_buf_ops = { | ||
252 | .verify_read = xfs_dir3_leafn_read_verify, | ||
253 | .verify_write = xfs_dir3_leafn_write_verify, | ||
254 | }; | ||
255 | |||
256 | static int | ||
257 | xfs_dir3_leaf_read( | ||
258 | struct xfs_trans *tp, | ||
259 | struct xfs_inode *dp, | ||
260 | xfs_dablk_t fbno, | ||
261 | xfs_daddr_t mappedbno, | ||
262 | struct xfs_buf **bpp) | ||
263 | { | ||
264 | int err; | ||
265 | |||
266 | err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp, | ||
267 | XFS_DATA_FORK, &xfs_dir3_leaf1_buf_ops); | ||
268 | if (!err && tp) | ||
269 | xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_LEAF1_BUF); | ||
270 | return err; | ||
271 | } | ||
272 | |||
273 | int | ||
274 | xfs_dir3_leafn_read( | ||
275 | struct xfs_trans *tp, | ||
276 | struct xfs_inode *dp, | ||
277 | xfs_dablk_t fbno, | ||
278 | xfs_daddr_t mappedbno, | ||
279 | struct xfs_buf **bpp) | ||
280 | { | ||
281 | int err; | ||
282 | |||
283 | err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp, | ||
284 | XFS_DATA_FORK, &xfs_dir3_leafn_buf_ops); | ||
285 | if (!err && tp) | ||
286 | xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_LEAFN_BUF); | ||
287 | return err; | ||
288 | } | ||
289 | |||
290 | /* | ||
291 | * Initialize a new leaf block, leaf1 or leafn magic accepted. | ||
292 | */ | ||
293 | static void | ||
294 | xfs_dir3_leaf_init( | ||
295 | struct xfs_mount *mp, | ||
296 | struct xfs_trans *tp, | ||
297 | struct xfs_buf *bp, | ||
298 | xfs_ino_t owner, | ||
299 | __uint16_t type) | ||
300 | { | ||
301 | struct xfs_dir2_leaf *leaf = bp->b_addr; | ||
302 | |||
303 | ASSERT(type == XFS_DIR2_LEAF1_MAGIC || type == XFS_DIR2_LEAFN_MAGIC); | ||
304 | |||
305 | if (xfs_sb_version_hascrc(&mp->m_sb)) { | ||
306 | struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr; | ||
307 | |||
308 | memset(leaf3, 0, sizeof(*leaf3)); | ||
309 | |||
310 | leaf3->info.hdr.magic = (type == XFS_DIR2_LEAF1_MAGIC) | ||
311 | ? cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) | ||
312 | : cpu_to_be16(XFS_DIR3_LEAFN_MAGIC); | ||
313 | leaf3->info.blkno = cpu_to_be64(bp->b_bn); | ||
314 | leaf3->info.owner = cpu_to_be64(owner); | ||
315 | uuid_copy(&leaf3->info.uuid, &mp->m_sb.sb_uuid); | ||
316 | } else { | ||
317 | memset(leaf, 0, sizeof(*leaf)); | ||
318 | leaf->hdr.info.magic = cpu_to_be16(type); | ||
319 | } | ||
320 | |||
321 | /* | ||
322 | * If it's a leaf-format directory initialize the tail. | ||
323 | * Caller is responsible for initialising the bests table. | ||
324 | */ | ||
325 | if (type == XFS_DIR2_LEAF1_MAGIC) { | ||
326 | struct xfs_dir2_leaf_tail *ltp; | ||
327 | |||
328 | ltp = xfs_dir2_leaf_tail_p(mp->m_dir_geo, leaf); | ||
329 | ltp->bestcount = 0; | ||
330 | bp->b_ops = &xfs_dir3_leaf1_buf_ops; | ||
331 | xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_LEAF1_BUF); | ||
332 | } else { | ||
333 | bp->b_ops = &xfs_dir3_leafn_buf_ops; | ||
334 | xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_LEAFN_BUF); | ||
335 | } | ||
336 | } | ||
337 | |||
338 | int | ||
339 | xfs_dir3_leaf_get_buf( | ||
340 | xfs_da_args_t *args, | ||
341 | xfs_dir2_db_t bno, | ||
342 | struct xfs_buf **bpp, | ||
343 | __uint16_t magic) | ||
344 | { | ||
345 | struct xfs_inode *dp = args->dp; | ||
346 | struct xfs_trans *tp = args->trans; | ||
347 | struct xfs_mount *mp = dp->i_mount; | ||
348 | struct xfs_buf *bp; | ||
349 | int error; | ||
350 | |||
351 | ASSERT(magic == XFS_DIR2_LEAF1_MAGIC || magic == XFS_DIR2_LEAFN_MAGIC); | ||
352 | ASSERT(bno >= xfs_dir2_byte_to_db(args->geo, XFS_DIR2_LEAF_OFFSET) && | ||
353 | bno < xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET)); | ||
354 | |||
355 | error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(args->geo, bno), | ||
356 | -1, &bp, XFS_DATA_FORK); | ||
357 | if (error) | ||
358 | return error; | ||
359 | |||
360 | xfs_dir3_leaf_init(mp, tp, bp, dp->i_ino, magic); | ||
361 | xfs_dir3_leaf_log_header(args, bp); | ||
362 | if (magic == XFS_DIR2_LEAF1_MAGIC) | ||
363 | xfs_dir3_leaf_log_tail(args, bp); | ||
364 | *bpp = bp; | ||
365 | return 0; | ||
366 | } | ||
367 | |||
368 | /* | ||
369 | * Convert a block form directory to a leaf form directory. | ||
370 | */ | ||
371 | int /* error */ | ||
372 | xfs_dir2_block_to_leaf( | ||
373 | xfs_da_args_t *args, /* operation arguments */ | ||
374 | struct xfs_buf *dbp) /* input block's buffer */ | ||
375 | { | ||
376 | __be16 *bestsp; /* leaf's bestsp entries */ | ||
377 | xfs_dablk_t blkno; /* leaf block's bno */ | ||
378 | xfs_dir2_data_hdr_t *hdr; /* block header */ | ||
379 | xfs_dir2_leaf_entry_t *blp; /* block's leaf entries */ | ||
380 | xfs_dir2_block_tail_t *btp; /* block's tail */ | ||
381 | xfs_inode_t *dp; /* incore directory inode */ | ||
382 | int error; /* error return code */ | ||
383 | struct xfs_buf *lbp; /* leaf block's buffer */ | ||
384 | xfs_dir2_db_t ldb; /* leaf block's bno */ | ||
385 | xfs_dir2_leaf_t *leaf; /* leaf structure */ | ||
386 | xfs_dir2_leaf_tail_t *ltp; /* leaf's tail */ | ||
387 | xfs_mount_t *mp; /* filesystem mount point */ | ||
388 | int needlog; /* need to log block header */ | ||
389 | int needscan; /* need to rescan bestfree */ | ||
390 | xfs_trans_t *tp; /* transaction pointer */ | ||
391 | struct xfs_dir2_data_free *bf; | ||
392 | struct xfs_dir2_leaf_entry *ents; | ||
393 | struct xfs_dir3_icleaf_hdr leafhdr; | ||
394 | |||
395 | trace_xfs_dir2_block_to_leaf(args); | ||
396 | |||
397 | dp = args->dp; | ||
398 | mp = dp->i_mount; | ||
399 | tp = args->trans; | ||
400 | /* | ||
401 | * Add the leaf block to the inode. | ||
402 | * This interface will only put blocks in the leaf/node range. | ||
403 | * Since that's empty now, we'll get the root (block 0 in range). | ||
404 | */ | ||
405 | if ((error = xfs_da_grow_inode(args, &blkno))) { | ||
406 | return error; | ||
407 | } | ||
408 | ldb = xfs_dir2_da_to_db(args->geo, blkno); | ||
409 | ASSERT(ldb == xfs_dir2_byte_to_db(args->geo, XFS_DIR2_LEAF_OFFSET)); | ||
410 | /* | ||
411 | * Initialize the leaf block, get a buffer for it. | ||
412 | */ | ||
413 | error = xfs_dir3_leaf_get_buf(args, ldb, &lbp, XFS_DIR2_LEAF1_MAGIC); | ||
414 | if (error) | ||
415 | return error; | ||
416 | |||
417 | leaf = lbp->b_addr; | ||
418 | hdr = dbp->b_addr; | ||
419 | xfs_dir3_data_check(dp, dbp); | ||
420 | btp = xfs_dir2_block_tail_p(args->geo, hdr); | ||
421 | blp = xfs_dir2_block_leaf_p(btp); | ||
422 | bf = dp->d_ops->data_bestfree_p(hdr); | ||
423 | ents = dp->d_ops->leaf_ents_p(leaf); | ||
424 | |||
425 | /* | ||
426 | * Set the counts in the leaf header. | ||
427 | */ | ||
428 | dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); | ||
429 | leafhdr.count = be32_to_cpu(btp->count); | ||
430 | leafhdr.stale = be32_to_cpu(btp->stale); | ||
431 | dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); | ||
432 | xfs_dir3_leaf_log_header(args, lbp); | ||
433 | |||
434 | /* | ||
435 | * Could compact these but I think we always do the conversion | ||
436 | * after squeezing out stale entries. | ||
437 | */ | ||
438 | memcpy(ents, blp, be32_to_cpu(btp->count) * sizeof(xfs_dir2_leaf_entry_t)); | ||
439 | xfs_dir3_leaf_log_ents(args, lbp, 0, leafhdr.count - 1); | ||
440 | needscan = 0; | ||
441 | needlog = 1; | ||
442 | /* | ||
443 | * Make the space formerly occupied by the leaf entries and block | ||
444 | * tail be free. | ||
445 | */ | ||
446 | xfs_dir2_data_make_free(args, dbp, | ||
447 | (xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr), | ||
448 | (xfs_dir2_data_aoff_t)((char *)hdr + args->geo->blksize - | ||
449 | (char *)blp), | ||
450 | &needlog, &needscan); | ||
451 | /* | ||
452 | * Fix up the block header, make it a data block. | ||
453 | */ | ||
454 | dbp->b_ops = &xfs_dir3_data_buf_ops; | ||
455 | xfs_trans_buf_set_type(tp, dbp, XFS_BLFT_DIR_DATA_BUF); | ||
456 | if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) | ||
457 | hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC); | ||
458 | else | ||
459 | hdr->magic = cpu_to_be32(XFS_DIR3_DATA_MAGIC); | ||
460 | |||
461 | if (needscan) | ||
462 | xfs_dir2_data_freescan(dp, hdr, &needlog); | ||
463 | /* | ||
464 | * Set up leaf tail and bests table. | ||
465 | */ | ||
466 | ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); | ||
467 | ltp->bestcount = cpu_to_be32(1); | ||
468 | bestsp = xfs_dir2_leaf_bests_p(ltp); | ||
469 | bestsp[0] = bf[0].length; | ||
470 | /* | ||
471 | * Log the data header and leaf bests table. | ||
472 | */ | ||
473 | if (needlog) | ||
474 | xfs_dir2_data_log_header(args, dbp); | ||
475 | xfs_dir3_leaf_check(dp, lbp); | ||
476 | xfs_dir3_data_check(dp, dbp); | ||
477 | xfs_dir3_leaf_log_bests(args, lbp, 0, 0); | ||
478 | return 0; | ||
479 | } | ||
480 | |||
481 | STATIC void | ||
482 | xfs_dir3_leaf_find_stale( | ||
483 | struct xfs_dir3_icleaf_hdr *leafhdr, | ||
484 | struct xfs_dir2_leaf_entry *ents, | ||
485 | int index, | ||
486 | int *lowstale, | ||
487 | int *highstale) | ||
488 | { | ||
489 | /* | ||
490 | * Find the first stale entry before our index, if any. | ||
491 | */ | ||
492 | for (*lowstale = index - 1; *lowstale >= 0; --*lowstale) { | ||
493 | if (ents[*lowstale].address == | ||
494 | cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) | ||
495 | break; | ||
496 | } | ||
497 | |||
498 | /* | ||
499 | * Find the first stale entry at or after our index, if any. | ||
500 | * Stop if the result would require moving more entries than using | ||
501 | * lowstale. | ||
502 | */ | ||
503 | for (*highstale = index; *highstale < leafhdr->count; ++*highstale) { | ||
504 | if (ents[*highstale].address == | ||
505 | cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) | ||
506 | break; | ||
507 | if (*lowstale >= 0 && index - *lowstale <= *highstale - index) | ||
508 | break; | ||
509 | } | ||
510 | } | ||
511 | |||
512 | struct xfs_dir2_leaf_entry * | ||
513 | xfs_dir3_leaf_find_entry( | ||
514 | struct xfs_dir3_icleaf_hdr *leafhdr, | ||
515 | struct xfs_dir2_leaf_entry *ents, | ||
516 | int index, /* leaf table position */ | ||
517 | int compact, /* need to compact leaves */ | ||
518 | int lowstale, /* index of prev stale leaf */ | ||
519 | int highstale, /* index of next stale leaf */ | ||
520 | int *lfloglow, /* low leaf logging index */ | ||
521 | int *lfloghigh) /* high leaf logging index */ | ||
522 | { | ||
523 | if (!leafhdr->stale) { | ||
524 | xfs_dir2_leaf_entry_t *lep; /* leaf entry table pointer */ | ||
525 | |||
526 | /* | ||
527 | * Now we need to make room to insert the leaf entry. | ||
528 | * | ||
529 | * If there are no stale entries, just insert a hole at index. | ||
530 | */ | ||
531 | lep = &ents[index]; | ||
532 | if (index < leafhdr->count) | ||
533 | memmove(lep + 1, lep, | ||
534 | (leafhdr->count - index) * sizeof(*lep)); | ||
535 | |||
536 | /* | ||
537 | * Record low and high logging indices for the leaf. | ||
538 | */ | ||
539 | *lfloglow = index; | ||
540 | *lfloghigh = leafhdr->count++; | ||
541 | return lep; | ||
542 | } | ||
543 | |||
544 | /* | ||
545 | * There are stale entries. | ||
546 | * | ||
547 | * We will use one of them for the new entry. It's probably not at | ||
548 | * the right location, so we'll have to shift some up or down first. | ||
549 | * | ||
550 | * If we didn't compact before, we need to find the nearest stale | ||
551 | * entries before and after our insertion point. | ||
552 | */ | ||
553 | if (compact == 0) | ||
554 | xfs_dir3_leaf_find_stale(leafhdr, ents, index, | ||
555 | &lowstale, &highstale); | ||
556 | |||
557 | /* | ||
558 | * If the low one is better, use it. | ||
559 | */ | ||
560 | if (lowstale >= 0 && | ||
561 | (highstale == leafhdr->count || | ||
562 | index - lowstale - 1 < highstale - index)) { | ||
563 | ASSERT(index - lowstale - 1 >= 0); | ||
564 | ASSERT(ents[lowstale].address == | ||
565 | cpu_to_be32(XFS_DIR2_NULL_DATAPTR)); | ||
566 | |||
567 | /* | ||
568 | * Copy entries up to cover the stale entry and make room | ||
569 | * for the new entry. | ||
570 | */ | ||
571 | if (index - lowstale - 1 > 0) { | ||
572 | memmove(&ents[lowstale], &ents[lowstale + 1], | ||
573 | (index - lowstale - 1) * | ||
574 | sizeof(xfs_dir2_leaf_entry_t)); | ||
575 | } | ||
576 | *lfloglow = MIN(lowstale, *lfloglow); | ||
577 | *lfloghigh = MAX(index - 1, *lfloghigh); | ||
578 | leafhdr->stale--; | ||
579 | return &ents[index - 1]; | ||
580 | } | ||
581 | |||
582 | /* | ||
583 | * The high one is better, so use that one. | ||
584 | */ | ||
585 | ASSERT(highstale - index >= 0); | ||
586 | ASSERT(ents[highstale].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)); | ||
587 | |||
588 | /* | ||
589 | * Copy entries down to cover the stale entry and make room for the | ||
590 | * new entry. | ||
591 | */ | ||
592 | if (highstale - index > 0) { | ||
593 | memmove(&ents[index + 1], &ents[index], | ||
594 | (highstale - index) * sizeof(xfs_dir2_leaf_entry_t)); | ||
595 | } | ||
596 | *lfloglow = MIN(index, *lfloglow); | ||
597 | *lfloghigh = MAX(highstale, *lfloghigh); | ||
598 | leafhdr->stale--; | ||
599 | return &ents[index]; | ||
600 | } | ||
601 | |||
602 | /* | ||
603 | * Add an entry to a leaf form directory. | ||
604 | */ | ||
605 | int /* error */ | ||
606 | xfs_dir2_leaf_addname( | ||
607 | xfs_da_args_t *args) /* operation arguments */ | ||
608 | { | ||
609 | __be16 *bestsp; /* freespace table in leaf */ | ||
610 | int compact; /* need to compact leaves */ | ||
611 | xfs_dir2_data_hdr_t *hdr; /* data block header */ | ||
612 | struct xfs_buf *dbp; /* data block buffer */ | ||
613 | xfs_dir2_data_entry_t *dep; /* data block entry */ | ||
614 | xfs_inode_t *dp; /* incore directory inode */ | ||
615 | xfs_dir2_data_unused_t *dup; /* data unused entry */ | ||
616 | int error; /* error return value */ | ||
617 | int grown; /* allocated new data block */ | ||
618 | int highstale; /* index of next stale leaf */ | ||
619 | int i; /* temporary, index */ | ||
620 | int index; /* leaf table position */ | ||
621 | struct xfs_buf *lbp; /* leaf's buffer */ | ||
622 | xfs_dir2_leaf_t *leaf; /* leaf structure */ | ||
623 | int length; /* length of new entry */ | ||
624 | xfs_dir2_leaf_entry_t *lep; /* leaf entry table pointer */ | ||
625 | int lfloglow; /* low leaf logging index */ | ||
626 | int lfloghigh; /* high leaf logging index */ | ||
627 | int lowstale; /* index of prev stale leaf */ | ||
628 | xfs_dir2_leaf_tail_t *ltp; /* leaf tail pointer */ | ||
629 | xfs_mount_t *mp; /* filesystem mount point */ | ||
630 | int needbytes; /* leaf block bytes needed */ | ||
631 | int needlog; /* need to log data header */ | ||
632 | int needscan; /* need to rescan data free */ | ||
633 | __be16 *tagp; /* end of data entry */ | ||
634 | xfs_trans_t *tp; /* transaction pointer */ | ||
635 | xfs_dir2_db_t use_block; /* data block number */ | ||
636 | struct xfs_dir2_data_free *bf; /* bestfree table */ | ||
637 | struct xfs_dir2_leaf_entry *ents; | ||
638 | struct xfs_dir3_icleaf_hdr leafhdr; | ||
639 | |||
640 | trace_xfs_dir2_leaf_addname(args); | ||
641 | |||
642 | dp = args->dp; | ||
643 | tp = args->trans; | ||
644 | mp = dp->i_mount; | ||
645 | |||
646 | error = xfs_dir3_leaf_read(tp, dp, args->geo->leafblk, -1, &lbp); | ||
647 | if (error) | ||
648 | return error; | ||
649 | |||
650 | /* | ||
651 | * Look up the entry by hash value and name. | ||
652 | * We know it's not there, our caller has already done a lookup. | ||
653 | * So the index is of the entry to insert in front of. | ||
654 | * But if there are dup hash values the index is of the first of those. | ||
655 | */ | ||
656 | index = xfs_dir2_leaf_search_hash(args, lbp); | ||
657 | leaf = lbp->b_addr; | ||
658 | ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); | ||
659 | ents = dp->d_ops->leaf_ents_p(leaf); | ||
660 | dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); | ||
661 | bestsp = xfs_dir2_leaf_bests_p(ltp); | ||
662 | length = dp->d_ops->data_entsize(args->namelen); | ||
663 | |||
664 | /* | ||
665 | * See if there are any entries with the same hash value | ||
666 | * and space in their block for the new entry. | ||
667 | * This is good because it puts multiple same-hash value entries | ||
668 | * in a data block, improving the lookup of those entries. | ||
669 | */ | ||
670 | for (use_block = -1, lep = &ents[index]; | ||
671 | index < leafhdr.count && be32_to_cpu(lep->hashval) == args->hashval; | ||
672 | index++, lep++) { | ||
673 | if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR) | ||
674 | continue; | ||
675 | i = xfs_dir2_dataptr_to_db(args->geo, be32_to_cpu(lep->address)); | ||
676 | ASSERT(i < be32_to_cpu(ltp->bestcount)); | ||
677 | ASSERT(bestsp[i] != cpu_to_be16(NULLDATAOFF)); | ||
678 | if (be16_to_cpu(bestsp[i]) >= length) { | ||
679 | use_block = i; | ||
680 | break; | ||
681 | } | ||
682 | } | ||
683 | /* | ||
684 | * Didn't find a block yet, linear search all the data blocks. | ||
685 | */ | ||
686 | if (use_block == -1) { | ||
687 | for (i = 0; i < be32_to_cpu(ltp->bestcount); i++) { | ||
688 | /* | ||
689 | * Remember a block we see that's missing. | ||
690 | */ | ||
691 | if (bestsp[i] == cpu_to_be16(NULLDATAOFF) && | ||
692 | use_block == -1) | ||
693 | use_block = i; | ||
694 | else if (be16_to_cpu(bestsp[i]) >= length) { | ||
695 | use_block = i; | ||
696 | break; | ||
697 | } | ||
698 | } | ||
699 | } | ||
700 | /* | ||
701 | * How many bytes do we need in the leaf block? | ||
702 | */ | ||
703 | needbytes = 0; | ||
704 | if (!leafhdr.stale) | ||
705 | needbytes += sizeof(xfs_dir2_leaf_entry_t); | ||
706 | if (use_block == -1) | ||
707 | needbytes += sizeof(xfs_dir2_data_off_t); | ||
708 | |||
709 | /* | ||
710 | * Now kill use_block if it refers to a missing block, so we | ||
711 | * can use it as an indication of allocation needed. | ||
712 | */ | ||
713 | if (use_block != -1 && bestsp[use_block] == cpu_to_be16(NULLDATAOFF)) | ||
714 | use_block = -1; | ||
715 | /* | ||
716 | * If we don't have enough free bytes but we can make enough | ||
717 | * by compacting out stale entries, we'll do that. | ||
718 | */ | ||
719 | if ((char *)bestsp - (char *)&ents[leafhdr.count] < needbytes && | ||
720 | leafhdr.stale > 1) | ||
721 | compact = 1; | ||
722 | |||
723 | /* | ||
724 | * Otherwise if we don't have enough free bytes we need to | ||
725 | * convert to node form. | ||
726 | */ | ||
727 | else if ((char *)bestsp - (char *)&ents[leafhdr.count] < needbytes) { | ||
728 | /* | ||
729 | * Just checking or no space reservation, give up. | ||
730 | */ | ||
731 | if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || | ||
732 | args->total == 0) { | ||
733 | xfs_trans_brelse(tp, lbp); | ||
734 | return -ENOSPC; | ||
735 | } | ||
736 | /* | ||
737 | * Convert to node form. | ||
738 | */ | ||
739 | error = xfs_dir2_leaf_to_node(args, lbp); | ||
740 | if (error) | ||
741 | return error; | ||
742 | /* | ||
743 | * Then add the new entry. | ||
744 | */ | ||
745 | return xfs_dir2_node_addname(args); | ||
746 | } | ||
747 | /* | ||
748 | * Otherwise it will fit without compaction. | ||
749 | */ | ||
750 | else | ||
751 | compact = 0; | ||
752 | /* | ||
753 | * If just checking, then it will fit unless we needed to allocate | ||
754 | * a new data block. | ||
755 | */ | ||
756 | if (args->op_flags & XFS_DA_OP_JUSTCHECK) { | ||
757 | xfs_trans_brelse(tp, lbp); | ||
758 | return use_block == -1 ? -ENOSPC : 0; | ||
759 | } | ||
760 | /* | ||
761 | * If no allocations are allowed, return now before we've | ||
762 | * changed anything. | ||
763 | */ | ||
764 | if (args->total == 0 && use_block == -1) { | ||
765 | xfs_trans_brelse(tp, lbp); | ||
766 | return -ENOSPC; | ||
767 | } | ||
768 | /* | ||
769 | * Need to compact the leaf entries, removing stale ones. | ||
770 | * Leave one stale entry behind - the one closest to our | ||
771 | * insertion index - and we'll shift that one to our insertion | ||
772 | * point later. | ||
773 | */ | ||
774 | if (compact) { | ||
775 | xfs_dir3_leaf_compact_x1(&leafhdr, ents, &index, &lowstale, | ||
776 | &highstale, &lfloglow, &lfloghigh); | ||
777 | } | ||
778 | /* | ||
779 | * There are stale entries, so we'll need log-low and log-high | ||
780 | * impossibly bad values later. | ||
781 | */ | ||
782 | else if (leafhdr.stale) { | ||
783 | lfloglow = leafhdr.count; | ||
784 | lfloghigh = -1; | ||
785 | } | ||
786 | /* | ||
787 | * If there was no data block space found, we need to allocate | ||
788 | * a new one. | ||
789 | */ | ||
790 | if (use_block == -1) { | ||
791 | /* | ||
792 | * Add the new data block. | ||
793 | */ | ||
794 | if ((error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, | ||
795 | &use_block))) { | ||
796 | xfs_trans_brelse(tp, lbp); | ||
797 | return error; | ||
798 | } | ||
799 | /* | ||
800 | * Initialize the block. | ||
801 | */ | ||
802 | if ((error = xfs_dir3_data_init(args, use_block, &dbp))) { | ||
803 | xfs_trans_brelse(tp, lbp); | ||
804 | return error; | ||
805 | } | ||
806 | /* | ||
807 | * If we're adding a new data block on the end we need to | ||
808 | * extend the bests table. Copy it up one entry. | ||
809 | */ | ||
810 | if (use_block >= be32_to_cpu(ltp->bestcount)) { | ||
811 | bestsp--; | ||
812 | memmove(&bestsp[0], &bestsp[1], | ||
813 | be32_to_cpu(ltp->bestcount) * sizeof(bestsp[0])); | ||
814 | be32_add_cpu(<p->bestcount, 1); | ||
815 | xfs_dir3_leaf_log_tail(args, lbp); | ||
816 | xfs_dir3_leaf_log_bests(args, lbp, 0, | ||
817 | be32_to_cpu(ltp->bestcount) - 1); | ||
818 | } | ||
819 | /* | ||
820 | * If we're filling in a previously empty block just log it. | ||
821 | */ | ||
822 | else | ||
823 | xfs_dir3_leaf_log_bests(args, lbp, use_block, use_block); | ||
824 | hdr = dbp->b_addr; | ||
825 | bf = dp->d_ops->data_bestfree_p(hdr); | ||
826 | bestsp[use_block] = bf[0].length; | ||
827 | grown = 1; | ||
828 | } else { | ||
829 | /* | ||
830 | * Already had space in some data block. | ||
831 | * Just read that one in. | ||
832 | */ | ||
833 | error = xfs_dir3_data_read(tp, dp, | ||
834 | xfs_dir2_db_to_da(args->geo, use_block), | ||
835 | -1, &dbp); | ||
836 | if (error) { | ||
837 | xfs_trans_brelse(tp, lbp); | ||
838 | return error; | ||
839 | } | ||
840 | hdr = dbp->b_addr; | ||
841 | bf = dp->d_ops->data_bestfree_p(hdr); | ||
842 | grown = 0; | ||
843 | } | ||
844 | /* | ||
845 | * Point to the biggest freespace in our data block. | ||
846 | */ | ||
847 | dup = (xfs_dir2_data_unused_t *) | ||
848 | ((char *)hdr + be16_to_cpu(bf[0].offset)); | ||
849 | ASSERT(be16_to_cpu(dup->length) >= length); | ||
850 | needscan = needlog = 0; | ||
851 | /* | ||
852 | * Mark the initial part of our freespace in use for the new entry. | ||
853 | */ | ||
854 | xfs_dir2_data_use_free(args, dbp, dup, | ||
855 | (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length, | ||
856 | &needlog, &needscan); | ||
857 | /* | ||
858 | * Initialize our new entry (at last). | ||
859 | */ | ||
860 | dep = (xfs_dir2_data_entry_t *)dup; | ||
861 | dep->inumber = cpu_to_be64(args->inumber); | ||
862 | dep->namelen = args->namelen; | ||
863 | memcpy(dep->name, args->name, dep->namelen); | ||
864 | dp->d_ops->data_put_ftype(dep, args->filetype); | ||
865 | tagp = dp->d_ops->data_entry_tag_p(dep); | ||
866 | *tagp = cpu_to_be16((char *)dep - (char *)hdr); | ||
867 | /* | ||
868 | * Need to scan fix up the bestfree table. | ||
869 | */ | ||
870 | if (needscan) | ||
871 | xfs_dir2_data_freescan(dp, hdr, &needlog); | ||
872 | /* | ||
873 | * Need to log the data block's header. | ||
874 | */ | ||
875 | if (needlog) | ||
876 | xfs_dir2_data_log_header(args, dbp); | ||
877 | xfs_dir2_data_log_entry(args, dbp, dep); | ||
878 | /* | ||
879 | * If the bests table needs to be changed, do it. | ||
880 | * Log the change unless we've already done that. | ||
881 | */ | ||
882 | if (be16_to_cpu(bestsp[use_block]) != be16_to_cpu(bf[0].length)) { | ||
883 | bestsp[use_block] = bf[0].length; | ||
884 | if (!grown) | ||
885 | xfs_dir3_leaf_log_bests(args, lbp, use_block, use_block); | ||
886 | } | ||
887 | |||
888 | lep = xfs_dir3_leaf_find_entry(&leafhdr, ents, index, compact, lowstale, | ||
889 | highstale, &lfloglow, &lfloghigh); | ||
890 | |||
891 | /* | ||
892 | * Fill in the new leaf entry. | ||
893 | */ | ||
894 | lep->hashval = cpu_to_be32(args->hashval); | ||
895 | lep->address = cpu_to_be32( | ||
896 | xfs_dir2_db_off_to_dataptr(args->geo, use_block, | ||
897 | be16_to_cpu(*tagp))); | ||
898 | /* | ||
899 | * Log the leaf fields and give up the buffers. | ||
900 | */ | ||
901 | dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); | ||
902 | xfs_dir3_leaf_log_header(args, lbp); | ||
903 | xfs_dir3_leaf_log_ents(args, lbp, lfloglow, lfloghigh); | ||
904 | xfs_dir3_leaf_check(dp, lbp); | ||
905 | xfs_dir3_data_check(dp, dbp); | ||
906 | return 0; | ||
907 | } | ||
908 | |||
909 | /* | ||
910 | * Compact out any stale entries in the leaf. | ||
911 | * Log the header and changed leaf entries, if any. | ||
912 | */ | ||
913 | void | ||
914 | xfs_dir3_leaf_compact( | ||
915 | xfs_da_args_t *args, /* operation arguments */ | ||
916 | struct xfs_dir3_icleaf_hdr *leafhdr, | ||
917 | struct xfs_buf *bp) /* leaf buffer */ | ||
918 | { | ||
919 | int from; /* source leaf index */ | ||
920 | xfs_dir2_leaf_t *leaf; /* leaf structure */ | ||
921 | int loglow; /* first leaf entry to log */ | ||
922 | int to; /* target leaf index */ | ||
923 | struct xfs_dir2_leaf_entry *ents; | ||
924 | struct xfs_inode *dp = args->dp; | ||
925 | |||
926 | leaf = bp->b_addr; | ||
927 | if (!leafhdr->stale) | ||
928 | return; | ||
929 | |||
930 | /* | ||
931 | * Compress out the stale entries in place. | ||
932 | */ | ||
933 | ents = dp->d_ops->leaf_ents_p(leaf); | ||
934 | for (from = to = 0, loglow = -1; from < leafhdr->count; from++) { | ||
935 | if (ents[from].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) | ||
936 | continue; | ||
937 | /* | ||
938 | * Only actually copy the entries that are different. | ||
939 | */ | ||
940 | if (from > to) { | ||
941 | if (loglow == -1) | ||
942 | loglow = to; | ||
943 | ents[to] = ents[from]; | ||
944 | } | ||
945 | to++; | ||
946 | } | ||
947 | /* | ||
948 | * Update and log the header, log the leaf entries. | ||
949 | */ | ||
950 | ASSERT(leafhdr->stale == from - to); | ||
951 | leafhdr->count -= leafhdr->stale; | ||
952 | leafhdr->stale = 0; | ||
953 | |||
954 | dp->d_ops->leaf_hdr_to_disk(leaf, leafhdr); | ||
955 | xfs_dir3_leaf_log_header(args, bp); | ||
956 | if (loglow != -1) | ||
957 | xfs_dir3_leaf_log_ents(args, bp, loglow, to - 1); | ||
958 | } | ||
959 | |||
960 | /* | ||
961 | * Compact the leaf entries, removing stale ones. | ||
962 | * Leave one stale entry behind - the one closest to our | ||
963 | * insertion index - and the caller will shift that one to our insertion | ||
964 | * point later. | ||
965 | * Return new insertion index, where the remaining stale entry is, | ||
966 | * and leaf logging indices. | ||
967 | */ | ||
968 | void | ||
969 | xfs_dir3_leaf_compact_x1( | ||
970 | struct xfs_dir3_icleaf_hdr *leafhdr, | ||
971 | struct xfs_dir2_leaf_entry *ents, | ||
972 | int *indexp, /* insertion index */ | ||
973 | int *lowstalep, /* out: stale entry before us */ | ||
974 | int *highstalep, /* out: stale entry after us */ | ||
975 | int *lowlogp, /* out: low log index */ | ||
976 | int *highlogp) /* out: high log index */ | ||
977 | { | ||
978 | int from; /* source copy index */ | ||
979 | int highstale; /* stale entry at/after index */ | ||
980 | int index; /* insertion index */ | ||
981 | int keepstale; /* source index of kept stale */ | ||
982 | int lowstale; /* stale entry before index */ | ||
983 | int newindex=0; /* new insertion index */ | ||
984 | int to; /* destination copy index */ | ||
985 | |||
986 | ASSERT(leafhdr->stale > 1); | ||
987 | index = *indexp; | ||
988 | |||
989 | xfs_dir3_leaf_find_stale(leafhdr, ents, index, &lowstale, &highstale); | ||
990 | |||
991 | /* | ||
992 | * Pick the better of lowstale and highstale. | ||
993 | */ | ||
994 | if (lowstale >= 0 && | ||
995 | (highstale == leafhdr->count || | ||
996 | index - lowstale <= highstale - index)) | ||
997 | keepstale = lowstale; | ||
998 | else | ||
999 | keepstale = highstale; | ||
1000 | /* | ||
1001 | * Copy the entries in place, removing all the stale entries | ||
1002 | * except keepstale. | ||
1003 | */ | ||
1004 | for (from = to = 0; from < leafhdr->count; from++) { | ||
1005 | /* | ||
1006 | * Notice the new value of index. | ||
1007 | */ | ||
1008 | if (index == from) | ||
1009 | newindex = to; | ||
1010 | if (from != keepstale && | ||
1011 | ents[from].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) { | ||
1012 | if (from == to) | ||
1013 | *lowlogp = to; | ||
1014 | continue; | ||
1015 | } | ||
1016 | /* | ||
1017 | * Record the new keepstale value for the insertion. | ||
1018 | */ | ||
1019 | if (from == keepstale) | ||
1020 | lowstale = highstale = to; | ||
1021 | /* | ||
1022 | * Copy only the entries that have moved. | ||
1023 | */ | ||
1024 | if (from > to) | ||
1025 | ents[to] = ents[from]; | ||
1026 | to++; | ||
1027 | } | ||
1028 | ASSERT(from > to); | ||
1029 | /* | ||
1030 | * If the insertion point was past the last entry, | ||
1031 | * set the new insertion point accordingly. | ||
1032 | */ | ||
1033 | if (index == from) | ||
1034 | newindex = to; | ||
1035 | *indexp = newindex; | ||
1036 | /* | ||
1037 | * Adjust the leaf header values. | ||
1038 | */ | ||
1039 | leafhdr->count -= from - to; | ||
1040 | leafhdr->stale = 1; | ||
1041 | /* | ||
1042 | * Remember the low/high stale value only in the "right" | ||
1043 | * direction. | ||
1044 | */ | ||
1045 | if (lowstale >= newindex) | ||
1046 | lowstale = -1; | ||
1047 | else | ||
1048 | highstale = leafhdr->count; | ||
1049 | *highlogp = leafhdr->count - 1; | ||
1050 | *lowstalep = lowstale; | ||
1051 | *highstalep = highstale; | ||
1052 | } | ||
1053 | |||
1054 | /* | ||
1055 | * Log the bests entries indicated from a leaf1 block. | ||
1056 | */ | ||
1057 | static void | ||
1058 | xfs_dir3_leaf_log_bests( | ||
1059 | struct xfs_da_args *args, | ||
1060 | struct xfs_buf *bp, /* leaf buffer */ | ||
1061 | int first, /* first entry to log */ | ||
1062 | int last) /* last entry to log */ | ||
1063 | { | ||
1064 | __be16 *firstb; /* pointer to first entry */ | ||
1065 | __be16 *lastb; /* pointer to last entry */ | ||
1066 | struct xfs_dir2_leaf *leaf = bp->b_addr; | ||
1067 | xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ | ||
1068 | |||
1069 | ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) || | ||
1070 | leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC)); | ||
1071 | |||
1072 | ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); | ||
1073 | firstb = xfs_dir2_leaf_bests_p(ltp) + first; | ||
1074 | lastb = xfs_dir2_leaf_bests_p(ltp) + last; | ||
1075 | xfs_trans_log_buf(args->trans, bp, | ||
1076 | (uint)((char *)firstb - (char *)leaf), | ||
1077 | (uint)((char *)lastb - (char *)leaf + sizeof(*lastb) - 1)); | ||
1078 | } | ||
1079 | |||
1080 | /* | ||
1081 | * Log the leaf entries indicated from a leaf1 or leafn block. | ||
1082 | */ | ||
1083 | void | ||
1084 | xfs_dir3_leaf_log_ents( | ||
1085 | struct xfs_da_args *args, | ||
1086 | struct xfs_buf *bp, | ||
1087 | int first, | ||
1088 | int last) | ||
1089 | { | ||
1090 | xfs_dir2_leaf_entry_t *firstlep; /* pointer to first entry */ | ||
1091 | xfs_dir2_leaf_entry_t *lastlep; /* pointer to last entry */ | ||
1092 | struct xfs_dir2_leaf *leaf = bp->b_addr; | ||
1093 | struct xfs_dir2_leaf_entry *ents; | ||
1094 | |||
1095 | ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) || | ||
1096 | leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) || | ||
1097 | leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || | ||
1098 | leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)); | ||
1099 | |||
1100 | ents = args->dp->d_ops->leaf_ents_p(leaf); | ||
1101 | firstlep = &ents[first]; | ||
1102 | lastlep = &ents[last]; | ||
1103 | xfs_trans_log_buf(args->trans, bp, | ||
1104 | (uint)((char *)firstlep - (char *)leaf), | ||
1105 | (uint)((char *)lastlep - (char *)leaf + sizeof(*lastlep) - 1)); | ||
1106 | } | ||
1107 | |||
1108 | /* | ||
1109 | * Log the header of the leaf1 or leafn block. | ||
1110 | */ | ||
1111 | void | ||
1112 | xfs_dir3_leaf_log_header( | ||
1113 | struct xfs_da_args *args, | ||
1114 | struct xfs_buf *bp) | ||
1115 | { | ||
1116 | struct xfs_dir2_leaf *leaf = bp->b_addr; | ||
1117 | |||
1118 | ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) || | ||
1119 | leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) || | ||
1120 | leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || | ||
1121 | leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)); | ||
1122 | |||
1123 | xfs_trans_log_buf(args->trans, bp, | ||
1124 | (uint)((char *)&leaf->hdr - (char *)leaf), | ||
1125 | args->dp->d_ops->leaf_hdr_size - 1); | ||
1126 | } | ||
1127 | |||
1128 | /* | ||
1129 | * Log the tail of the leaf1 block. | ||
1130 | */ | ||
1131 | STATIC void | ||
1132 | xfs_dir3_leaf_log_tail( | ||
1133 | struct xfs_da_args *args, | ||
1134 | struct xfs_buf *bp) | ||
1135 | { | ||
1136 | struct xfs_dir2_leaf *leaf = bp->b_addr; | ||
1137 | xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ | ||
1138 | |||
1139 | ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) || | ||
1140 | leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) || | ||
1141 | leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || | ||
1142 | leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)); | ||
1143 | |||
1144 | ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); | ||
1145 | xfs_trans_log_buf(args->trans, bp, (uint)((char *)ltp - (char *)leaf), | ||
1146 | (uint)(args->geo->blksize - 1)); | ||
1147 | } | ||
1148 | |||
1149 | /* | ||
1150 | * Look up the entry referred to by args in the leaf format directory. | ||
1151 | * Most of the work is done by the xfs_dir2_leaf_lookup_int routine which | ||
1152 | * is also used by the node-format code. | ||
1153 | */ | ||
1154 | int | ||
1155 | xfs_dir2_leaf_lookup( | ||
1156 | xfs_da_args_t *args) /* operation arguments */ | ||
1157 | { | ||
1158 | struct xfs_buf *dbp; /* data block buffer */ | ||
1159 | xfs_dir2_data_entry_t *dep; /* data block entry */ | ||
1160 | xfs_inode_t *dp; /* incore directory inode */ | ||
1161 | int error; /* error return code */ | ||
1162 | int index; /* found entry index */ | ||
1163 | struct xfs_buf *lbp; /* leaf buffer */ | ||
1164 | xfs_dir2_leaf_t *leaf; /* leaf structure */ | ||
1165 | xfs_dir2_leaf_entry_t *lep; /* leaf entry */ | ||
1166 | xfs_trans_t *tp; /* transaction pointer */ | ||
1167 | struct xfs_dir2_leaf_entry *ents; | ||
1168 | |||
1169 | trace_xfs_dir2_leaf_lookup(args); | ||
1170 | |||
1171 | /* | ||
1172 | * Look up name in the leaf block, returning both buffers and index. | ||
1173 | */ | ||
1174 | if ((error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp))) { | ||
1175 | return error; | ||
1176 | } | ||
1177 | tp = args->trans; | ||
1178 | dp = args->dp; | ||
1179 | xfs_dir3_leaf_check(dp, lbp); | ||
1180 | leaf = lbp->b_addr; | ||
1181 | ents = dp->d_ops->leaf_ents_p(leaf); | ||
1182 | /* | ||
1183 | * Get to the leaf entry and contained data entry address. | ||
1184 | */ | ||
1185 | lep = &ents[index]; | ||
1186 | |||
1187 | /* | ||
1188 | * Point to the data entry. | ||
1189 | */ | ||
1190 | dep = (xfs_dir2_data_entry_t *) | ||
1191 | ((char *)dbp->b_addr + | ||
1192 | xfs_dir2_dataptr_to_off(args->geo, be32_to_cpu(lep->address))); | ||
1193 | /* | ||
1194 | * Return the found inode number & CI name if appropriate | ||
1195 | */ | ||
1196 | args->inumber = be64_to_cpu(dep->inumber); | ||
1197 | args->filetype = dp->d_ops->data_get_ftype(dep); | ||
1198 | error = xfs_dir_cilookup_result(args, dep->name, dep->namelen); | ||
1199 | xfs_trans_brelse(tp, dbp); | ||
1200 | xfs_trans_brelse(tp, lbp); | ||
1201 | return error; | ||
1202 | } | ||
1203 | |||
1204 | /* | ||
1205 | * Look up name/hash in the leaf block. | ||
1206 | * Fill in indexp with the found index, and dbpp with the data buffer. | ||
1207 | * If not found dbpp will be NULL, and ENOENT comes back. | ||
1208 | * lbpp will always be filled in with the leaf buffer unless there's an error. | ||
1209 | */ | ||
1210 | static int /* error */ | ||
1211 | xfs_dir2_leaf_lookup_int( | ||
1212 | xfs_da_args_t *args, /* operation arguments */ | ||
1213 | struct xfs_buf **lbpp, /* out: leaf buffer */ | ||
1214 | int *indexp, /* out: index in leaf block */ | ||
1215 | struct xfs_buf **dbpp) /* out: data buffer */ | ||
1216 | { | ||
1217 | xfs_dir2_db_t curdb = -1; /* current data block number */ | ||
1218 | struct xfs_buf *dbp = NULL; /* data buffer */ | ||
1219 | xfs_dir2_data_entry_t *dep; /* data entry */ | ||
1220 | xfs_inode_t *dp; /* incore directory inode */ | ||
1221 | int error; /* error return code */ | ||
1222 | int index; /* index in leaf block */ | ||
1223 | struct xfs_buf *lbp; /* leaf buffer */ | ||
1224 | xfs_dir2_leaf_entry_t *lep; /* leaf entry */ | ||
1225 | xfs_dir2_leaf_t *leaf; /* leaf structure */ | ||
1226 | xfs_mount_t *mp; /* filesystem mount point */ | ||
1227 | xfs_dir2_db_t newdb; /* new data block number */ | ||
1228 | xfs_trans_t *tp; /* transaction pointer */ | ||
1229 | xfs_dir2_db_t cidb = -1; /* case match data block no. */ | ||
1230 | enum xfs_dacmp cmp; /* name compare result */ | ||
1231 | struct xfs_dir2_leaf_entry *ents; | ||
1232 | struct xfs_dir3_icleaf_hdr leafhdr; | ||
1233 | |||
1234 | dp = args->dp; | ||
1235 | tp = args->trans; | ||
1236 | mp = dp->i_mount; | ||
1237 | |||
1238 | error = xfs_dir3_leaf_read(tp, dp, args->geo->leafblk, -1, &lbp); | ||
1239 | if (error) | ||
1240 | return error; | ||
1241 | |||
1242 | *lbpp = lbp; | ||
1243 | leaf = lbp->b_addr; | ||
1244 | xfs_dir3_leaf_check(dp, lbp); | ||
1245 | ents = dp->d_ops->leaf_ents_p(leaf); | ||
1246 | dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); | ||
1247 | |||
1248 | /* | ||
1249 | * Look for the first leaf entry with our hash value. | ||
1250 | */ | ||
1251 | index = xfs_dir2_leaf_search_hash(args, lbp); | ||
1252 | /* | ||
1253 | * Loop over all the entries with the right hash value | ||
1254 | * looking to match the name. | ||
1255 | */ | ||
1256 | for (lep = &ents[index]; | ||
1257 | index < leafhdr.count && be32_to_cpu(lep->hashval) == args->hashval; | ||
1258 | lep++, index++) { | ||
1259 | /* | ||
1260 | * Skip over stale leaf entries. | ||
1261 | */ | ||
1262 | if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR) | ||
1263 | continue; | ||
1264 | /* | ||
1265 | * Get the new data block number. | ||
1266 | */ | ||
1267 | newdb = xfs_dir2_dataptr_to_db(args->geo, | ||
1268 | be32_to_cpu(lep->address)); | ||
1269 | /* | ||
1270 | * If it's not the same as the old data block number, | ||
1271 | * need to pitch the old one and read the new one. | ||
1272 | */ | ||
1273 | if (newdb != curdb) { | ||
1274 | if (dbp) | ||
1275 | xfs_trans_brelse(tp, dbp); | ||
1276 | error = xfs_dir3_data_read(tp, dp, | ||
1277 | xfs_dir2_db_to_da(args->geo, newdb), | ||
1278 | -1, &dbp); | ||
1279 | if (error) { | ||
1280 | xfs_trans_brelse(tp, lbp); | ||
1281 | return error; | ||
1282 | } | ||
1283 | curdb = newdb; | ||
1284 | } | ||
1285 | /* | ||
1286 | * Point to the data entry. | ||
1287 | */ | ||
1288 | dep = (xfs_dir2_data_entry_t *)((char *)dbp->b_addr + | ||
1289 | xfs_dir2_dataptr_to_off(args->geo, | ||
1290 | be32_to_cpu(lep->address))); | ||
1291 | /* | ||
1292 | * Compare name and if it's an exact match, return the index | ||
1293 | * and buffer. If it's the first case-insensitive match, store | ||
1294 | * the index and buffer and continue looking for an exact match. | ||
1295 | */ | ||
1296 | cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen); | ||
1297 | if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) { | ||
1298 | args->cmpresult = cmp; | ||
1299 | *indexp = index; | ||
1300 | /* case exact match: return the current buffer. */ | ||
1301 | if (cmp == XFS_CMP_EXACT) { | ||
1302 | *dbpp = dbp; | ||
1303 | return 0; | ||
1304 | } | ||
1305 | cidb = curdb; | ||
1306 | } | ||
1307 | } | ||
1308 | ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); | ||
1309 | /* | ||
1310 | * Here, we can only be doing a lookup (not a rename or remove). | ||
1311 | * If a case-insensitive match was found earlier, re-read the | ||
1312 | * appropriate data block if required and return it. | ||
1313 | */ | ||
1314 | if (args->cmpresult == XFS_CMP_CASE) { | ||
1315 | ASSERT(cidb != -1); | ||
1316 | if (cidb != curdb) { | ||
1317 | xfs_trans_brelse(tp, dbp); | ||
1318 | error = xfs_dir3_data_read(tp, dp, | ||
1319 | xfs_dir2_db_to_da(args->geo, cidb), | ||
1320 | -1, &dbp); | ||
1321 | if (error) { | ||
1322 | xfs_trans_brelse(tp, lbp); | ||
1323 | return error; | ||
1324 | } | ||
1325 | } | ||
1326 | *dbpp = dbp; | ||
1327 | return 0; | ||
1328 | } | ||
1329 | /* | ||
1330 | * No match found, return -ENOENT. | ||
1331 | */ | ||
1332 | ASSERT(cidb == -1); | ||
1333 | if (dbp) | ||
1334 | xfs_trans_brelse(tp, dbp); | ||
1335 | xfs_trans_brelse(tp, lbp); | ||
1336 | return -ENOENT; | ||
1337 | } | ||
1338 | |||
1339 | /* | ||
1340 | * Remove an entry from a leaf format directory. | ||
1341 | */ | ||
1342 | int /* error */ | ||
1343 | xfs_dir2_leaf_removename( | ||
1344 | xfs_da_args_t *args) /* operation arguments */ | ||
1345 | { | ||
1346 | __be16 *bestsp; /* leaf block best freespace */ | ||
1347 | xfs_dir2_data_hdr_t *hdr; /* data block header */ | ||
1348 | xfs_dir2_db_t db; /* data block number */ | ||
1349 | struct xfs_buf *dbp; /* data block buffer */ | ||
1350 | xfs_dir2_data_entry_t *dep; /* data entry structure */ | ||
1351 | xfs_inode_t *dp; /* incore directory inode */ | ||
1352 | int error; /* error return code */ | ||
1353 | xfs_dir2_db_t i; /* temporary data block # */ | ||
1354 | int index; /* index into leaf entries */ | ||
1355 | struct xfs_buf *lbp; /* leaf buffer */ | ||
1356 | xfs_dir2_leaf_t *leaf; /* leaf structure */ | ||
1357 | xfs_dir2_leaf_entry_t *lep; /* leaf entry */ | ||
1358 | xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ | ||
1359 | xfs_mount_t *mp; /* filesystem mount point */ | ||
1360 | int needlog; /* need to log data header */ | ||
1361 | int needscan; /* need to rescan data frees */ | ||
1362 | xfs_dir2_data_off_t oldbest; /* old value of best free */ | ||
1363 | xfs_trans_t *tp; /* transaction pointer */ | ||
1364 | struct xfs_dir2_data_free *bf; /* bestfree table */ | ||
1365 | struct xfs_dir2_leaf_entry *ents; | ||
1366 | struct xfs_dir3_icleaf_hdr leafhdr; | ||
1367 | |||
1368 | trace_xfs_dir2_leaf_removename(args); | ||
1369 | |||
1370 | /* | ||
1371 | * Lookup the leaf entry, get the leaf and data blocks read in. | ||
1372 | */ | ||
1373 | if ((error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp))) { | ||
1374 | return error; | ||
1375 | } | ||
1376 | dp = args->dp; | ||
1377 | tp = args->trans; | ||
1378 | mp = dp->i_mount; | ||
1379 | leaf = lbp->b_addr; | ||
1380 | hdr = dbp->b_addr; | ||
1381 | xfs_dir3_data_check(dp, dbp); | ||
1382 | bf = dp->d_ops->data_bestfree_p(hdr); | ||
1383 | dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); | ||
1384 | ents = dp->d_ops->leaf_ents_p(leaf); | ||
1385 | /* | ||
1386 | * Point to the leaf entry, use that to point to the data entry. | ||
1387 | */ | ||
1388 | lep = &ents[index]; | ||
1389 | db = xfs_dir2_dataptr_to_db(args->geo, be32_to_cpu(lep->address)); | ||
1390 | dep = (xfs_dir2_data_entry_t *)((char *)hdr + | ||
1391 | xfs_dir2_dataptr_to_off(args->geo, be32_to_cpu(lep->address))); | ||
1392 | needscan = needlog = 0; | ||
1393 | oldbest = be16_to_cpu(bf[0].length); | ||
1394 | ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); | ||
1395 | bestsp = xfs_dir2_leaf_bests_p(ltp); | ||
1396 | ASSERT(be16_to_cpu(bestsp[db]) == oldbest); | ||
1397 | /* | ||
1398 | * Mark the former data entry unused. | ||
1399 | */ | ||
1400 | xfs_dir2_data_make_free(args, dbp, | ||
1401 | (xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr), | ||
1402 | dp->d_ops->data_entsize(dep->namelen), &needlog, &needscan); | ||
1403 | /* | ||
1404 | * We just mark the leaf entry stale by putting a null in it. | ||
1405 | */ | ||
1406 | leafhdr.stale++; | ||
1407 | dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); | ||
1408 | xfs_dir3_leaf_log_header(args, lbp); | ||
1409 | |||
1410 | lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR); | ||
1411 | xfs_dir3_leaf_log_ents(args, lbp, index, index); | ||
1412 | |||
1413 | /* | ||
1414 | * Scan the freespace in the data block again if necessary, | ||
1415 | * log the data block header if necessary. | ||
1416 | */ | ||
1417 | if (needscan) | ||
1418 | xfs_dir2_data_freescan(dp, hdr, &needlog); | ||
1419 | if (needlog) | ||
1420 | xfs_dir2_data_log_header(args, dbp); | ||
1421 | /* | ||
1422 | * If the longest freespace in the data block has changed, | ||
1423 | * put the new value in the bests table and log that. | ||
1424 | */ | ||
1425 | if (be16_to_cpu(bf[0].length) != oldbest) { | ||
1426 | bestsp[db] = bf[0].length; | ||
1427 | xfs_dir3_leaf_log_bests(args, lbp, db, db); | ||
1428 | } | ||
1429 | xfs_dir3_data_check(dp, dbp); | ||
1430 | /* | ||
1431 | * If the data block is now empty then get rid of the data block. | ||
1432 | */ | ||
1433 | if (be16_to_cpu(bf[0].length) == | ||
1434 | args->geo->blksize - dp->d_ops->data_entry_offset) { | ||
1435 | ASSERT(db != args->geo->datablk); | ||
1436 | if ((error = xfs_dir2_shrink_inode(args, db, dbp))) { | ||
1437 | /* | ||
1438 | * Nope, can't get rid of it because it caused | ||
1439 | * allocation of a bmap btree block to do so. | ||
1440 | * Just go on, returning success, leaving the | ||
1441 | * empty block in place. | ||
1442 | */ | ||
1443 | if (error == -ENOSPC && args->total == 0) | ||
1444 | error = 0; | ||
1445 | xfs_dir3_leaf_check(dp, lbp); | ||
1446 | return error; | ||
1447 | } | ||
1448 | dbp = NULL; | ||
1449 | /* | ||
1450 | * If this is the last data block then compact the | ||
1451 | * bests table by getting rid of entries. | ||
1452 | */ | ||
1453 | if (db == be32_to_cpu(ltp->bestcount) - 1) { | ||
1454 | /* | ||
1455 | * Look for the last active entry (i). | ||
1456 | */ | ||
1457 | for (i = db - 1; i > 0; i--) { | ||
1458 | if (bestsp[i] != cpu_to_be16(NULLDATAOFF)) | ||
1459 | break; | ||
1460 | } | ||
1461 | /* | ||
1462 | * Copy the table down so inactive entries at the | ||
1463 | * end are removed. | ||
1464 | */ | ||
1465 | memmove(&bestsp[db - i], bestsp, | ||
1466 | (be32_to_cpu(ltp->bestcount) - (db - i)) * sizeof(*bestsp)); | ||
1467 | be32_add_cpu(<p->bestcount, -(db - i)); | ||
1468 | xfs_dir3_leaf_log_tail(args, lbp); | ||
1469 | xfs_dir3_leaf_log_bests(args, lbp, 0, | ||
1470 | be32_to_cpu(ltp->bestcount) - 1); | ||
1471 | } else | ||
1472 | bestsp[db] = cpu_to_be16(NULLDATAOFF); | ||
1473 | } | ||
1474 | /* | ||
1475 | * If the data block was not the first one, drop it. | ||
1476 | */ | ||
1477 | else if (db != args->geo->datablk) | ||
1478 | dbp = NULL; | ||
1479 | |||
1480 | xfs_dir3_leaf_check(dp, lbp); | ||
1481 | /* | ||
1482 | * See if we can convert to block form. | ||
1483 | */ | ||
1484 | return xfs_dir2_leaf_to_block(args, lbp, dbp); | ||
1485 | } | ||
1486 | |||
1487 | /* | ||
1488 | * Replace the inode number in a leaf format directory entry. | ||
1489 | */ | ||
1490 | int /* error */ | ||
1491 | xfs_dir2_leaf_replace( | ||
1492 | xfs_da_args_t *args) /* operation arguments */ | ||
1493 | { | ||
1494 | struct xfs_buf *dbp; /* data block buffer */ | ||
1495 | xfs_dir2_data_entry_t *dep; /* data block entry */ | ||
1496 | xfs_inode_t *dp; /* incore directory inode */ | ||
1497 | int error; /* error return code */ | ||
1498 | int index; /* index of leaf entry */ | ||
1499 | struct xfs_buf *lbp; /* leaf buffer */ | ||
1500 | xfs_dir2_leaf_t *leaf; /* leaf structure */ | ||
1501 | xfs_dir2_leaf_entry_t *lep; /* leaf entry */ | ||
1502 | xfs_trans_t *tp; /* transaction pointer */ | ||
1503 | struct xfs_dir2_leaf_entry *ents; | ||
1504 | |||
1505 | trace_xfs_dir2_leaf_replace(args); | ||
1506 | |||
1507 | /* | ||
1508 | * Look up the entry. | ||
1509 | */ | ||
1510 | if ((error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp))) { | ||
1511 | return error; | ||
1512 | } | ||
1513 | dp = args->dp; | ||
1514 | leaf = lbp->b_addr; | ||
1515 | ents = dp->d_ops->leaf_ents_p(leaf); | ||
1516 | /* | ||
1517 | * Point to the leaf entry, get data address from it. | ||
1518 | */ | ||
1519 | lep = &ents[index]; | ||
1520 | /* | ||
1521 | * Point to the data entry. | ||
1522 | */ | ||
1523 | dep = (xfs_dir2_data_entry_t *) | ||
1524 | ((char *)dbp->b_addr + | ||
1525 | xfs_dir2_dataptr_to_off(args->geo, be32_to_cpu(lep->address))); | ||
1526 | ASSERT(args->inumber != be64_to_cpu(dep->inumber)); | ||
1527 | /* | ||
1528 | * Put the new inode number in, log it. | ||
1529 | */ | ||
1530 | dep->inumber = cpu_to_be64(args->inumber); | ||
1531 | dp->d_ops->data_put_ftype(dep, args->filetype); | ||
1532 | tp = args->trans; | ||
1533 | xfs_dir2_data_log_entry(args, dbp, dep); | ||
1534 | xfs_dir3_leaf_check(dp, lbp); | ||
1535 | xfs_trans_brelse(tp, lbp); | ||
1536 | return 0; | ||
1537 | } | ||
1538 | |||
1539 | /* | ||
1540 | * Return index in the leaf block (lbp) which is either the first | ||
1541 | * one with this hash value, or if there are none, the insert point | ||
1542 | * for that hash value. | ||
1543 | */ | ||
1544 | int /* index value */ | ||
1545 | xfs_dir2_leaf_search_hash( | ||
1546 | xfs_da_args_t *args, /* operation arguments */ | ||
1547 | struct xfs_buf *lbp) /* leaf buffer */ | ||
1548 | { | ||
1549 | xfs_dahash_t hash=0; /* hash from this entry */ | ||
1550 | xfs_dahash_t hashwant; /* hash value looking for */ | ||
1551 | int high; /* high leaf index */ | ||
1552 | int low; /* low leaf index */ | ||
1553 | xfs_dir2_leaf_t *leaf; /* leaf structure */ | ||
1554 | xfs_dir2_leaf_entry_t *lep; /* leaf entry */ | ||
1555 | int mid=0; /* current leaf index */ | ||
1556 | struct xfs_dir2_leaf_entry *ents; | ||
1557 | struct xfs_dir3_icleaf_hdr leafhdr; | ||
1558 | |||
1559 | leaf = lbp->b_addr; | ||
1560 | ents = args->dp->d_ops->leaf_ents_p(leaf); | ||
1561 | args->dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); | ||
1562 | |||
1563 | /* | ||
1564 | * Note, the table cannot be empty, so we have to go through the loop. | ||
1565 | * Binary search the leaf entries looking for our hash value. | ||
1566 | */ | ||
1567 | for (lep = ents, low = 0, high = leafhdr.count - 1, | ||
1568 | hashwant = args->hashval; | ||
1569 | low <= high; ) { | ||
1570 | mid = (low + high) >> 1; | ||
1571 | if ((hash = be32_to_cpu(lep[mid].hashval)) == hashwant) | ||
1572 | break; | ||
1573 | if (hash < hashwant) | ||
1574 | low = mid + 1; | ||
1575 | else | ||
1576 | high = mid - 1; | ||
1577 | } | ||
1578 | /* | ||
1579 | * Found one, back up through all the equal hash values. | ||
1580 | */ | ||
1581 | if (hash == hashwant) { | ||
1582 | while (mid > 0 && be32_to_cpu(lep[mid - 1].hashval) == hashwant) { | ||
1583 | mid--; | ||
1584 | } | ||
1585 | } | ||
1586 | /* | ||
1587 | * Need to point to an entry higher than ours. | ||
1588 | */ | ||
1589 | else if (hash < hashwant) | ||
1590 | mid++; | ||
1591 | return mid; | ||
1592 | } | ||
1593 | |||
1594 | /* | ||
1595 | * Trim off a trailing data block. We know it's empty since the leaf | ||
1596 | * freespace table says so. | ||
1597 | */ | ||
1598 | int /* error */ | ||
1599 | xfs_dir2_leaf_trim_data( | ||
1600 | xfs_da_args_t *args, /* operation arguments */ | ||
1601 | struct xfs_buf *lbp, /* leaf buffer */ | ||
1602 | xfs_dir2_db_t db) /* data block number */ | ||
1603 | { | ||
1604 | __be16 *bestsp; /* leaf bests table */ | ||
1605 | struct xfs_buf *dbp; /* data block buffer */ | ||
1606 | xfs_inode_t *dp; /* incore directory inode */ | ||
1607 | int error; /* error return value */ | ||
1608 | xfs_dir2_leaf_t *leaf; /* leaf structure */ | ||
1609 | xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ | ||
1610 | xfs_mount_t *mp; /* filesystem mount point */ | ||
1611 | xfs_trans_t *tp; /* transaction pointer */ | ||
1612 | |||
1613 | dp = args->dp; | ||
1614 | mp = dp->i_mount; | ||
1615 | tp = args->trans; | ||
1616 | /* | ||
1617 | * Read the offending data block. We need its buffer. | ||
1618 | */ | ||
1619 | error = xfs_dir3_data_read(tp, dp, xfs_dir2_db_to_da(args->geo, db), | ||
1620 | -1, &dbp); | ||
1621 | if (error) | ||
1622 | return error; | ||
1623 | |||
1624 | leaf = lbp->b_addr; | ||
1625 | ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); | ||
1626 | |||
1627 | #ifdef DEBUG | ||
1628 | { | ||
1629 | struct xfs_dir2_data_hdr *hdr = dbp->b_addr; | ||
1630 | struct xfs_dir2_data_free *bf = dp->d_ops->data_bestfree_p(hdr); | ||
1631 | |||
1632 | ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || | ||
1633 | hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC)); | ||
1634 | ASSERT(be16_to_cpu(bf[0].length) == | ||
1635 | args->geo->blksize - dp->d_ops->data_entry_offset); | ||
1636 | ASSERT(db == be32_to_cpu(ltp->bestcount) - 1); | ||
1637 | } | ||
1638 | #endif | ||
1639 | |||
1640 | /* | ||
1641 | * Get rid of the data block. | ||
1642 | */ | ||
1643 | if ((error = xfs_dir2_shrink_inode(args, db, dbp))) { | ||
1644 | ASSERT(error != -ENOSPC); | ||
1645 | xfs_trans_brelse(tp, dbp); | ||
1646 | return error; | ||
1647 | } | ||
1648 | /* | ||
1649 | * Eliminate the last bests entry from the table. | ||
1650 | */ | ||
1651 | bestsp = xfs_dir2_leaf_bests_p(ltp); | ||
1652 | be32_add_cpu(<p->bestcount, -1); | ||
1653 | memmove(&bestsp[1], &bestsp[0], be32_to_cpu(ltp->bestcount) * sizeof(*bestsp)); | ||
1654 | xfs_dir3_leaf_log_tail(args, lbp); | ||
1655 | xfs_dir3_leaf_log_bests(args, lbp, 0, be32_to_cpu(ltp->bestcount) - 1); | ||
1656 | return 0; | ||
1657 | } | ||
1658 | |||
1659 | static inline size_t | ||
1660 | xfs_dir3_leaf_size( | ||
1661 | struct xfs_dir3_icleaf_hdr *hdr, | ||
1662 | int counts) | ||
1663 | { | ||
1664 | int entries; | ||
1665 | int hdrsize; | ||
1666 | |||
1667 | entries = hdr->count - hdr->stale; | ||
1668 | if (hdr->magic == XFS_DIR2_LEAF1_MAGIC || | ||
1669 | hdr->magic == XFS_DIR2_LEAFN_MAGIC) | ||
1670 | hdrsize = sizeof(struct xfs_dir2_leaf_hdr); | ||
1671 | else | ||
1672 | hdrsize = sizeof(struct xfs_dir3_leaf_hdr); | ||
1673 | |||
1674 | return hdrsize + entries * sizeof(xfs_dir2_leaf_entry_t) | ||
1675 | + counts * sizeof(xfs_dir2_data_off_t) | ||
1676 | + sizeof(xfs_dir2_leaf_tail_t); | ||
1677 | } | ||
1678 | |||
1679 | /* | ||
1680 | * Convert node form directory to leaf form directory. | ||
1681 | * The root of the node form dir needs to already be a LEAFN block. | ||
1682 | * Just return if we can't do anything. | ||
1683 | */ | ||
1684 | int /* error */ | ||
1685 | xfs_dir2_node_to_leaf( | ||
1686 | xfs_da_state_t *state) /* directory operation state */ | ||
1687 | { | ||
1688 | xfs_da_args_t *args; /* operation arguments */ | ||
1689 | xfs_inode_t *dp; /* incore directory inode */ | ||
1690 | int error; /* error return code */ | ||
1691 | struct xfs_buf *fbp; /* buffer for freespace block */ | ||
1692 | xfs_fileoff_t fo; /* freespace file offset */ | ||
1693 | xfs_dir2_free_t *free; /* freespace structure */ | ||
1694 | struct xfs_buf *lbp; /* buffer for leaf block */ | ||
1695 | xfs_dir2_leaf_tail_t *ltp; /* tail of leaf structure */ | ||
1696 | xfs_dir2_leaf_t *leaf; /* leaf structure */ | ||
1697 | xfs_mount_t *mp; /* filesystem mount point */ | ||
1698 | int rval; /* successful free trim? */ | ||
1699 | xfs_trans_t *tp; /* transaction pointer */ | ||
1700 | struct xfs_dir3_icleaf_hdr leafhdr; | ||
1701 | struct xfs_dir3_icfree_hdr freehdr; | ||
1702 | |||
1703 | /* | ||
1704 | * There's more than a leaf level in the btree, so there must | ||
1705 | * be multiple leafn blocks. Give up. | ||
1706 | */ | ||
1707 | if (state->path.active > 1) | ||
1708 | return 0; | ||
1709 | args = state->args; | ||
1710 | |||
1711 | trace_xfs_dir2_node_to_leaf(args); | ||
1712 | |||
1713 | mp = state->mp; | ||
1714 | dp = args->dp; | ||
1715 | tp = args->trans; | ||
1716 | /* | ||
1717 | * Get the last offset in the file. | ||
1718 | */ | ||
1719 | if ((error = xfs_bmap_last_offset(dp, &fo, XFS_DATA_FORK))) { | ||
1720 | return error; | ||
1721 | } | ||
1722 | fo -= args->geo->fsbcount; | ||
1723 | /* | ||
1724 | * If there are freespace blocks other than the first one, | ||
1725 | * take this opportunity to remove trailing empty freespace blocks | ||
1726 | * that may have been left behind during no-space-reservation | ||
1727 | * operations. | ||
1728 | */ | ||
1729 | while (fo > args->geo->freeblk) { | ||
1730 | if ((error = xfs_dir2_node_trim_free(args, fo, &rval))) { | ||
1731 | return error; | ||
1732 | } | ||
1733 | if (rval) | ||
1734 | fo -= args->geo->fsbcount; | ||
1735 | else | ||
1736 | return 0; | ||
1737 | } | ||
1738 | /* | ||
1739 | * Now find the block just before the freespace block. | ||
1740 | */ | ||
1741 | if ((error = xfs_bmap_last_before(tp, dp, &fo, XFS_DATA_FORK))) { | ||
1742 | return error; | ||
1743 | } | ||
1744 | /* | ||
1745 | * If it's not the single leaf block, give up. | ||
1746 | */ | ||
1747 | if (XFS_FSB_TO_B(mp, fo) > XFS_DIR2_LEAF_OFFSET + args->geo->blksize) | ||
1748 | return 0; | ||
1749 | lbp = state->path.blk[0].bp; | ||
1750 | leaf = lbp->b_addr; | ||
1751 | dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); | ||
1752 | |||
1753 | ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC || | ||
1754 | leafhdr.magic == XFS_DIR3_LEAFN_MAGIC); | ||
1755 | |||
1756 | /* | ||
1757 | * Read the freespace block. | ||
1758 | */ | ||
1759 | error = xfs_dir2_free_read(tp, dp, args->geo->freeblk, &fbp); | ||
1760 | if (error) | ||
1761 | return error; | ||
1762 | free = fbp->b_addr; | ||
1763 | dp->d_ops->free_hdr_from_disk(&freehdr, free); | ||
1764 | |||
1765 | ASSERT(!freehdr.firstdb); | ||
1766 | |||
1767 | /* | ||
1768 | * Now see if the leafn and free data will fit in a leaf1. | ||
1769 | * If not, release the buffer and give up. | ||
1770 | */ | ||
1771 | if (xfs_dir3_leaf_size(&leafhdr, freehdr.nvalid) > args->geo->blksize) { | ||
1772 | xfs_trans_brelse(tp, fbp); | ||
1773 | return 0; | ||
1774 | } | ||
1775 | |||
1776 | /* | ||
1777 | * If the leaf has any stale entries in it, compress them out. | ||
1778 | */ | ||
1779 | if (leafhdr.stale) | ||
1780 | xfs_dir3_leaf_compact(args, &leafhdr, lbp); | ||
1781 | |||
1782 | lbp->b_ops = &xfs_dir3_leaf1_buf_ops; | ||
1783 | xfs_trans_buf_set_type(tp, lbp, XFS_BLFT_DIR_LEAF1_BUF); | ||
1784 | leafhdr.magic = (leafhdr.magic == XFS_DIR2_LEAFN_MAGIC) | ||
1785 | ? XFS_DIR2_LEAF1_MAGIC | ||
1786 | : XFS_DIR3_LEAF1_MAGIC; | ||
1787 | |||
1788 | /* | ||
1789 | * Set up the leaf tail from the freespace block. | ||
1790 | */ | ||
1791 | ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); | ||
1792 | ltp->bestcount = cpu_to_be32(freehdr.nvalid); | ||
1793 | |||
1794 | /* | ||
1795 | * Set up the leaf bests table. | ||
1796 | */ | ||
1797 | memcpy(xfs_dir2_leaf_bests_p(ltp), dp->d_ops->free_bests_p(free), | ||
1798 | freehdr.nvalid * sizeof(xfs_dir2_data_off_t)); | ||
1799 | |||
1800 | dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); | ||
1801 | xfs_dir3_leaf_log_header(args, lbp); | ||
1802 | xfs_dir3_leaf_log_bests(args, lbp, 0, be32_to_cpu(ltp->bestcount) - 1); | ||
1803 | xfs_dir3_leaf_log_tail(args, lbp); | ||
1804 | xfs_dir3_leaf_check(dp, lbp); | ||
1805 | |||
1806 | /* | ||
1807 | * Get rid of the freespace block. | ||
1808 | */ | ||
1809 | error = xfs_dir2_shrink_inode(args, | ||
1810 | xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET), | ||
1811 | fbp); | ||
1812 | if (error) { | ||
1813 | /* | ||
1814 | * This can't fail here because it can only happen when | ||
1815 | * punching out the middle of an extent, and this is an | ||
1816 | * isolated block. | ||
1817 | */ | ||
1818 | ASSERT(error != -ENOSPC); | ||
1819 | return error; | ||
1820 | } | ||
1821 | fbp = NULL; | ||
1822 | /* | ||
1823 | * Now see if we can convert the single-leaf directory | ||
1824 | * down to a block form directory. | ||
1825 | * This routine always kills the dabuf for the leaf, so | ||
1826 | * eliminate it from the path. | ||
1827 | */ | ||
1828 | error = xfs_dir2_leaf_to_block(args, lbp, NULL); | ||
1829 | state->path.blk[0].bp = NULL; | ||
1830 | return error; | ||
1831 | } | ||
diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c new file mode 100644 index 000000000000..2ae6ac2c11ae --- /dev/null +++ b/fs/xfs/libxfs/xfs_dir2_node.c | |||
@@ -0,0 +1,2284 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. | ||
3 | * Copyright (c) 2013 Red Hat, Inc. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License as | ||
8 | * published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope that it would be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program; if not, write the Free Software Foundation, | ||
17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
18 | */ | ||
19 | #include "xfs.h" | ||
20 | #include "xfs_fs.h" | ||
21 | #include "xfs_format.h" | ||
22 | #include "xfs_log_format.h" | ||
23 | #include "xfs_trans_resv.h" | ||
24 | #include "xfs_sb.h" | ||
25 | #include "xfs_ag.h" | ||
26 | #include "xfs_mount.h" | ||
27 | #include "xfs_da_format.h" | ||
28 | #include "xfs_da_btree.h" | ||
29 | #include "xfs_inode.h" | ||
30 | #include "xfs_bmap.h" | ||
31 | #include "xfs_dir2.h" | ||
32 | #include "xfs_dir2_priv.h" | ||
33 | #include "xfs_error.h" | ||
34 | #include "xfs_trace.h" | ||
35 | #include "xfs_trans.h" | ||
36 | #include "xfs_buf_item.h" | ||
37 | #include "xfs_cksum.h" | ||
38 | |||
39 | /* | ||
40 | * Function declarations. | ||
41 | */ | ||
42 | static int xfs_dir2_leafn_add(struct xfs_buf *bp, xfs_da_args_t *args, | ||
43 | int index); | ||
44 | static void xfs_dir2_leafn_rebalance(xfs_da_state_t *state, | ||
45 | xfs_da_state_blk_t *blk1, | ||
46 | xfs_da_state_blk_t *blk2); | ||
47 | static int xfs_dir2_leafn_remove(xfs_da_args_t *args, struct xfs_buf *bp, | ||
48 | int index, xfs_da_state_blk_t *dblk, | ||
49 | int *rval); | ||
50 | static int xfs_dir2_node_addname_int(xfs_da_args_t *args, | ||
51 | xfs_da_state_blk_t *fblk); | ||
52 | |||
53 | /* | ||
54 | * Check internal consistency of a leafn block. | ||
55 | */ | ||
56 | #ifdef DEBUG | ||
57 | #define xfs_dir3_leaf_check(dp, bp) \ | ||
58 | do { \ | ||
59 | if (!xfs_dir3_leafn_check((dp), (bp))) \ | ||
60 | ASSERT(0); \ | ||
61 | } while (0); | ||
62 | |||
63 | static bool | ||
64 | xfs_dir3_leafn_check( | ||
65 | struct xfs_inode *dp, | ||
66 | struct xfs_buf *bp) | ||
67 | { | ||
68 | struct xfs_dir2_leaf *leaf = bp->b_addr; | ||
69 | struct xfs_dir3_icleaf_hdr leafhdr; | ||
70 | |||
71 | dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); | ||
72 | |||
73 | if (leafhdr.magic == XFS_DIR3_LEAFN_MAGIC) { | ||
74 | struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr; | ||
75 | if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn) | ||
76 | return false; | ||
77 | } else if (leafhdr.magic != XFS_DIR2_LEAFN_MAGIC) | ||
78 | return false; | ||
79 | |||
80 | return xfs_dir3_leaf_check_int(dp->i_mount, dp, &leafhdr, leaf); | ||
81 | } | ||
82 | #else | ||
83 | #define xfs_dir3_leaf_check(dp, bp) | ||
84 | #endif | ||
85 | |||
86 | static bool | ||
87 | xfs_dir3_free_verify( | ||
88 | struct xfs_buf *bp) | ||
89 | { | ||
90 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
91 | struct xfs_dir2_free_hdr *hdr = bp->b_addr; | ||
92 | |||
93 | if (xfs_sb_version_hascrc(&mp->m_sb)) { | ||
94 | struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; | ||
95 | |||
96 | if (hdr3->magic != cpu_to_be32(XFS_DIR3_FREE_MAGIC)) | ||
97 | return false; | ||
98 | if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_uuid)) | ||
99 | return false; | ||
100 | if (be64_to_cpu(hdr3->blkno) != bp->b_bn) | ||
101 | return false; | ||
102 | } else { | ||
103 | if (hdr->magic != cpu_to_be32(XFS_DIR2_FREE_MAGIC)) | ||
104 | return false; | ||
105 | } | ||
106 | |||
107 | /* XXX: should bounds check the xfs_dir3_icfree_hdr here */ | ||
108 | |||
109 | return true; | ||
110 | } | ||
111 | |||
112 | static void | ||
113 | xfs_dir3_free_read_verify( | ||
114 | struct xfs_buf *bp) | ||
115 | { | ||
116 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
117 | |||
118 | if (xfs_sb_version_hascrc(&mp->m_sb) && | ||
119 | !xfs_buf_verify_cksum(bp, XFS_DIR3_FREE_CRC_OFF)) | ||
120 | xfs_buf_ioerror(bp, -EFSBADCRC); | ||
121 | else if (!xfs_dir3_free_verify(bp)) | ||
122 | xfs_buf_ioerror(bp, -EFSCORRUPTED); | ||
123 | |||
124 | if (bp->b_error) | ||
125 | xfs_verifier_error(bp); | ||
126 | } | ||
127 | |||
128 | static void | ||
129 | xfs_dir3_free_write_verify( | ||
130 | struct xfs_buf *bp) | ||
131 | { | ||
132 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
133 | struct xfs_buf_log_item *bip = bp->b_fspriv; | ||
134 | struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; | ||
135 | |||
136 | if (!xfs_dir3_free_verify(bp)) { | ||
137 | xfs_buf_ioerror(bp, -EFSCORRUPTED); | ||
138 | xfs_verifier_error(bp); | ||
139 | return; | ||
140 | } | ||
141 | |||
142 | if (!xfs_sb_version_hascrc(&mp->m_sb)) | ||
143 | return; | ||
144 | |||
145 | if (bip) | ||
146 | hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn); | ||
147 | |||
148 | xfs_buf_update_cksum(bp, XFS_DIR3_FREE_CRC_OFF); | ||
149 | } | ||
150 | |||
151 | const struct xfs_buf_ops xfs_dir3_free_buf_ops = { | ||
152 | .verify_read = xfs_dir3_free_read_verify, | ||
153 | .verify_write = xfs_dir3_free_write_verify, | ||
154 | }; | ||
155 | |||
156 | |||
157 | static int | ||
158 | __xfs_dir3_free_read( | ||
159 | struct xfs_trans *tp, | ||
160 | struct xfs_inode *dp, | ||
161 | xfs_dablk_t fbno, | ||
162 | xfs_daddr_t mappedbno, | ||
163 | struct xfs_buf **bpp) | ||
164 | { | ||
165 | int err; | ||
166 | |||
167 | err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp, | ||
168 | XFS_DATA_FORK, &xfs_dir3_free_buf_ops); | ||
169 | |||
170 | /* try read returns without an error or *bpp if it lands in a hole */ | ||
171 | if (!err && tp && *bpp) | ||
172 | xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_FREE_BUF); | ||
173 | return err; | ||
174 | } | ||
175 | |||
176 | int | ||
177 | xfs_dir2_free_read( | ||
178 | struct xfs_trans *tp, | ||
179 | struct xfs_inode *dp, | ||
180 | xfs_dablk_t fbno, | ||
181 | struct xfs_buf **bpp) | ||
182 | { | ||
183 | return __xfs_dir3_free_read(tp, dp, fbno, -1, bpp); | ||
184 | } | ||
185 | |||
186 | static int | ||
187 | xfs_dir2_free_try_read( | ||
188 | struct xfs_trans *tp, | ||
189 | struct xfs_inode *dp, | ||
190 | xfs_dablk_t fbno, | ||
191 | struct xfs_buf **bpp) | ||
192 | { | ||
193 | return __xfs_dir3_free_read(tp, dp, fbno, -2, bpp); | ||
194 | } | ||
195 | |||
196 | static int | ||
197 | xfs_dir3_free_get_buf( | ||
198 | xfs_da_args_t *args, | ||
199 | xfs_dir2_db_t fbno, | ||
200 | struct xfs_buf **bpp) | ||
201 | { | ||
202 | struct xfs_trans *tp = args->trans; | ||
203 | struct xfs_inode *dp = args->dp; | ||
204 | struct xfs_mount *mp = dp->i_mount; | ||
205 | struct xfs_buf *bp; | ||
206 | int error; | ||
207 | struct xfs_dir3_icfree_hdr hdr; | ||
208 | |||
209 | error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(args->geo, fbno), | ||
210 | -1, &bp, XFS_DATA_FORK); | ||
211 | if (error) | ||
212 | return error; | ||
213 | |||
214 | xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_FREE_BUF); | ||
215 | bp->b_ops = &xfs_dir3_free_buf_ops; | ||
216 | |||
217 | /* | ||
218 | * Initialize the new block to be empty, and remember | ||
219 | * its first slot as our empty slot. | ||
220 | */ | ||
221 | memset(bp->b_addr, 0, sizeof(struct xfs_dir3_free_hdr)); | ||
222 | memset(&hdr, 0, sizeof(hdr)); | ||
223 | |||
224 | if (xfs_sb_version_hascrc(&mp->m_sb)) { | ||
225 | struct xfs_dir3_free_hdr *hdr3 = bp->b_addr; | ||
226 | |||
227 | hdr.magic = XFS_DIR3_FREE_MAGIC; | ||
228 | |||
229 | hdr3->hdr.blkno = cpu_to_be64(bp->b_bn); | ||
230 | hdr3->hdr.owner = cpu_to_be64(dp->i_ino); | ||
231 | uuid_copy(&hdr3->hdr.uuid, &mp->m_sb.sb_uuid); | ||
232 | } else | ||
233 | hdr.magic = XFS_DIR2_FREE_MAGIC; | ||
234 | dp->d_ops->free_hdr_to_disk(bp->b_addr, &hdr); | ||
235 | *bpp = bp; | ||
236 | return 0; | ||
237 | } | ||
238 | |||
239 | /* | ||
240 | * Log entries from a freespace block. | ||
241 | */ | ||
242 | STATIC void | ||
243 | xfs_dir2_free_log_bests( | ||
244 | struct xfs_da_args *args, | ||
245 | struct xfs_buf *bp, | ||
246 | int first, /* first entry to log */ | ||
247 | int last) /* last entry to log */ | ||
248 | { | ||
249 | xfs_dir2_free_t *free; /* freespace structure */ | ||
250 | __be16 *bests; | ||
251 | |||
252 | free = bp->b_addr; | ||
253 | bests = args->dp->d_ops->free_bests_p(free); | ||
254 | ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC) || | ||
255 | free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC)); | ||
256 | xfs_trans_log_buf(args->trans, bp, | ||
257 | (uint)((char *)&bests[first] - (char *)free), | ||
258 | (uint)((char *)&bests[last] - (char *)free + | ||
259 | sizeof(bests[0]) - 1)); | ||
260 | } | ||
261 | |||
262 | /* | ||
263 | * Log header from a freespace block. | ||
264 | */ | ||
265 | static void | ||
266 | xfs_dir2_free_log_header( | ||
267 | struct xfs_da_args *args, | ||
268 | struct xfs_buf *bp) | ||
269 | { | ||
270 | #ifdef DEBUG | ||
271 | xfs_dir2_free_t *free; /* freespace structure */ | ||
272 | |||
273 | free = bp->b_addr; | ||
274 | ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC) || | ||
275 | free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC)); | ||
276 | #endif | ||
277 | xfs_trans_log_buf(args->trans, bp, 0, | ||
278 | args->dp->d_ops->free_hdr_size - 1); | ||
279 | } | ||
280 | |||
281 | /* | ||
282 | * Convert a leaf-format directory to a node-format directory. | ||
283 | * We need to change the magic number of the leaf block, and copy | ||
284 | * the freespace table out of the leaf block into its own block. | ||
285 | */ | ||
286 | int /* error */ | ||
287 | xfs_dir2_leaf_to_node( | ||
288 | xfs_da_args_t *args, /* operation arguments */ | ||
289 | struct xfs_buf *lbp) /* leaf buffer */ | ||
290 | { | ||
291 | xfs_inode_t *dp; /* incore directory inode */ | ||
292 | int error; /* error return value */ | ||
293 | struct xfs_buf *fbp; /* freespace buffer */ | ||
294 | xfs_dir2_db_t fdb; /* freespace block number */ | ||
295 | xfs_dir2_free_t *free; /* freespace structure */ | ||
296 | __be16 *from; /* pointer to freespace entry */ | ||
297 | int i; /* leaf freespace index */ | ||
298 | xfs_dir2_leaf_t *leaf; /* leaf structure */ | ||
299 | xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ | ||
300 | xfs_mount_t *mp; /* filesystem mount point */ | ||
301 | int n; /* count of live freespc ents */ | ||
302 | xfs_dir2_data_off_t off; /* freespace entry value */ | ||
303 | __be16 *to; /* pointer to freespace entry */ | ||
304 | xfs_trans_t *tp; /* transaction pointer */ | ||
305 | struct xfs_dir3_icfree_hdr freehdr; | ||
306 | |||
307 | trace_xfs_dir2_leaf_to_node(args); | ||
308 | |||
309 | dp = args->dp; | ||
310 | mp = dp->i_mount; | ||
311 | tp = args->trans; | ||
312 | /* | ||
313 | * Add a freespace block to the directory. | ||
314 | */ | ||
315 | if ((error = xfs_dir2_grow_inode(args, XFS_DIR2_FREE_SPACE, &fdb))) { | ||
316 | return error; | ||
317 | } | ||
318 | ASSERT(fdb == xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET)); | ||
319 | /* | ||
320 | * Get the buffer for the new freespace block. | ||
321 | */ | ||
322 | error = xfs_dir3_free_get_buf(args, fdb, &fbp); | ||
323 | if (error) | ||
324 | return error; | ||
325 | |||
326 | free = fbp->b_addr; | ||
327 | dp->d_ops->free_hdr_from_disk(&freehdr, free); | ||
328 | leaf = lbp->b_addr; | ||
329 | ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); | ||
330 | ASSERT(be32_to_cpu(ltp->bestcount) <= | ||
331 | (uint)dp->i_d.di_size / args->geo->blksize); | ||
332 | |||
333 | /* | ||
334 | * Copy freespace entries from the leaf block to the new block. | ||
335 | * Count active entries. | ||
336 | */ | ||
337 | from = xfs_dir2_leaf_bests_p(ltp); | ||
338 | to = dp->d_ops->free_bests_p(free); | ||
339 | for (i = n = 0; i < be32_to_cpu(ltp->bestcount); i++, from++, to++) { | ||
340 | if ((off = be16_to_cpu(*from)) != NULLDATAOFF) | ||
341 | n++; | ||
342 | *to = cpu_to_be16(off); | ||
343 | } | ||
344 | |||
345 | /* | ||
346 | * Now initialize the freespace block header. | ||
347 | */ | ||
348 | freehdr.nused = n; | ||
349 | freehdr.nvalid = be32_to_cpu(ltp->bestcount); | ||
350 | |||
351 | dp->d_ops->free_hdr_to_disk(fbp->b_addr, &freehdr); | ||
352 | xfs_dir2_free_log_bests(args, fbp, 0, freehdr.nvalid - 1); | ||
353 | xfs_dir2_free_log_header(args, fbp); | ||
354 | |||
355 | /* | ||
356 | * Converting the leaf to a leafnode is just a matter of changing the | ||
357 | * magic number and the ops. Do the change directly to the buffer as | ||
358 | * it's less work (and less code) than decoding the header to host | ||
359 | * format and back again. | ||
360 | */ | ||
361 | if (leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC)) | ||
362 | leaf->hdr.info.magic = cpu_to_be16(XFS_DIR2_LEAFN_MAGIC); | ||
363 | else | ||
364 | leaf->hdr.info.magic = cpu_to_be16(XFS_DIR3_LEAFN_MAGIC); | ||
365 | lbp->b_ops = &xfs_dir3_leafn_buf_ops; | ||
366 | xfs_trans_buf_set_type(tp, lbp, XFS_BLFT_DIR_LEAFN_BUF); | ||
367 | xfs_dir3_leaf_log_header(args, lbp); | ||
368 | xfs_dir3_leaf_check(dp, lbp); | ||
369 | return 0; | ||
370 | } | ||
371 | |||
372 | /* | ||
373 | * Add a leaf entry to a leaf block in a node-form directory. | ||
374 | * The other work necessary is done from the caller. | ||
375 | */ | ||
376 | static int /* error */ | ||
377 | xfs_dir2_leafn_add( | ||
378 | struct xfs_buf *bp, /* leaf buffer */ | ||
379 | xfs_da_args_t *args, /* operation arguments */ | ||
380 | int index) /* insertion pt for new entry */ | ||
381 | { | ||
382 | int compact; /* compacting stale leaves */ | ||
383 | xfs_inode_t *dp; /* incore directory inode */ | ||
384 | int highstale; /* next stale entry */ | ||
385 | xfs_dir2_leaf_t *leaf; /* leaf structure */ | ||
386 | xfs_dir2_leaf_entry_t *lep; /* leaf entry */ | ||
387 | int lfloghigh; /* high leaf entry logging */ | ||
388 | int lfloglow; /* low leaf entry logging */ | ||
389 | int lowstale; /* previous stale entry */ | ||
390 | xfs_mount_t *mp; /* filesystem mount point */ | ||
391 | xfs_trans_t *tp; /* transaction pointer */ | ||
392 | struct xfs_dir3_icleaf_hdr leafhdr; | ||
393 | struct xfs_dir2_leaf_entry *ents; | ||
394 | |||
395 | trace_xfs_dir2_leafn_add(args, index); | ||
396 | |||
397 | dp = args->dp; | ||
398 | mp = dp->i_mount; | ||
399 | tp = args->trans; | ||
400 | leaf = bp->b_addr; | ||
401 | dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); | ||
402 | ents = dp->d_ops->leaf_ents_p(leaf); | ||
403 | |||
404 | /* | ||
405 | * Quick check just to make sure we are not going to index | ||
406 | * into other peoples memory | ||
407 | */ | ||
408 | if (index < 0) | ||
409 | return -EFSCORRUPTED; | ||
410 | |||
411 | /* | ||
412 | * If there are already the maximum number of leaf entries in | ||
413 | * the block, if there are no stale entries it won't fit. | ||
414 | * Caller will do a split. If there are stale entries we'll do | ||
415 | * a compact. | ||
416 | */ | ||
417 | |||
418 | if (leafhdr.count == dp->d_ops->leaf_max_ents(args->geo)) { | ||
419 | if (!leafhdr.stale) | ||
420 | return -ENOSPC; | ||
421 | compact = leafhdr.stale > 1; | ||
422 | } else | ||
423 | compact = 0; | ||
424 | ASSERT(index == 0 || be32_to_cpu(ents[index - 1].hashval) <= args->hashval); | ||
425 | ASSERT(index == leafhdr.count || | ||
426 | be32_to_cpu(ents[index].hashval) >= args->hashval); | ||
427 | |||
428 | if (args->op_flags & XFS_DA_OP_JUSTCHECK) | ||
429 | return 0; | ||
430 | |||
431 | /* | ||
432 | * Compact out all but one stale leaf entry. Leaves behind | ||
433 | * the entry closest to index. | ||
434 | */ | ||
435 | if (compact) | ||
436 | xfs_dir3_leaf_compact_x1(&leafhdr, ents, &index, &lowstale, | ||
437 | &highstale, &lfloglow, &lfloghigh); | ||
438 | else if (leafhdr.stale) { | ||
439 | /* | ||
440 | * Set impossible logging indices for this case. | ||
441 | */ | ||
442 | lfloglow = leafhdr.count; | ||
443 | lfloghigh = -1; | ||
444 | } | ||
445 | |||
446 | /* | ||
447 | * Insert the new entry, log everything. | ||
448 | */ | ||
449 | lep = xfs_dir3_leaf_find_entry(&leafhdr, ents, index, compact, lowstale, | ||
450 | highstale, &lfloglow, &lfloghigh); | ||
451 | |||
452 | lep->hashval = cpu_to_be32(args->hashval); | ||
453 | lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(args->geo, | ||
454 | args->blkno, args->index)); | ||
455 | |||
456 | dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); | ||
457 | xfs_dir3_leaf_log_header(args, bp); | ||
458 | xfs_dir3_leaf_log_ents(args, bp, lfloglow, lfloghigh); | ||
459 | xfs_dir3_leaf_check(dp, bp); | ||
460 | return 0; | ||
461 | } | ||
462 | |||
463 | #ifdef DEBUG | ||
464 | static void | ||
465 | xfs_dir2_free_hdr_check( | ||
466 | struct xfs_inode *dp, | ||
467 | struct xfs_buf *bp, | ||
468 | xfs_dir2_db_t db) | ||
469 | { | ||
470 | struct xfs_dir3_icfree_hdr hdr; | ||
471 | |||
472 | dp->d_ops->free_hdr_from_disk(&hdr, bp->b_addr); | ||
473 | |||
474 | ASSERT((hdr.firstdb % | ||
475 | dp->d_ops->free_max_bests(dp->i_mount->m_dir_geo)) == 0); | ||
476 | ASSERT(hdr.firstdb <= db); | ||
477 | ASSERT(db < hdr.firstdb + hdr.nvalid); | ||
478 | } | ||
479 | #else | ||
480 | #define xfs_dir2_free_hdr_check(dp, bp, db) | ||
481 | #endif /* DEBUG */ | ||
482 | |||
483 | /* | ||
484 | * Return the last hash value in the leaf. | ||
485 | * Stale entries are ok. | ||
486 | */ | ||
487 | xfs_dahash_t /* hash value */ | ||
488 | xfs_dir2_leafn_lasthash( | ||
489 | struct xfs_inode *dp, | ||
490 | struct xfs_buf *bp, /* leaf buffer */ | ||
491 | int *count) /* count of entries in leaf */ | ||
492 | { | ||
493 | struct xfs_dir2_leaf *leaf = bp->b_addr; | ||
494 | struct xfs_dir2_leaf_entry *ents; | ||
495 | struct xfs_dir3_icleaf_hdr leafhdr; | ||
496 | |||
497 | dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); | ||
498 | |||
499 | ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC || | ||
500 | leafhdr.magic == XFS_DIR3_LEAFN_MAGIC); | ||
501 | |||
502 | if (count) | ||
503 | *count = leafhdr.count; | ||
504 | if (!leafhdr.count) | ||
505 | return 0; | ||
506 | |||
507 | ents = dp->d_ops->leaf_ents_p(leaf); | ||
508 | return be32_to_cpu(ents[leafhdr.count - 1].hashval); | ||
509 | } | ||
510 | |||
511 | /* | ||
512 | * Look up a leaf entry for space to add a name in a node-format leaf block. | ||
513 | * The extrablk in state is a freespace block. | ||
514 | */ | ||
515 | STATIC int | ||
516 | xfs_dir2_leafn_lookup_for_addname( | ||
517 | struct xfs_buf *bp, /* leaf buffer */ | ||
518 | xfs_da_args_t *args, /* operation arguments */ | ||
519 | int *indexp, /* out: leaf entry index */ | ||
520 | xfs_da_state_t *state) /* state to fill in */ | ||
521 | { | ||
522 | struct xfs_buf *curbp = NULL; /* current data/free buffer */ | ||
523 | xfs_dir2_db_t curdb = -1; /* current data block number */ | ||
524 | xfs_dir2_db_t curfdb = -1; /* current free block number */ | ||
525 | xfs_inode_t *dp; /* incore directory inode */ | ||
526 | int error; /* error return value */ | ||
527 | int fi; /* free entry index */ | ||
528 | xfs_dir2_free_t *free = NULL; /* free block structure */ | ||
529 | int index; /* leaf entry index */ | ||
530 | xfs_dir2_leaf_t *leaf; /* leaf structure */ | ||
531 | int length; /* length of new data entry */ | ||
532 | xfs_dir2_leaf_entry_t *lep; /* leaf entry */ | ||
533 | xfs_mount_t *mp; /* filesystem mount point */ | ||
534 | xfs_dir2_db_t newdb; /* new data block number */ | ||
535 | xfs_dir2_db_t newfdb; /* new free block number */ | ||
536 | xfs_trans_t *tp; /* transaction pointer */ | ||
537 | struct xfs_dir2_leaf_entry *ents; | ||
538 | struct xfs_dir3_icleaf_hdr leafhdr; | ||
539 | |||
540 | dp = args->dp; | ||
541 | tp = args->trans; | ||
542 | mp = dp->i_mount; | ||
543 | leaf = bp->b_addr; | ||
544 | dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); | ||
545 | ents = dp->d_ops->leaf_ents_p(leaf); | ||
546 | |||
547 | xfs_dir3_leaf_check(dp, bp); | ||
548 | ASSERT(leafhdr.count > 0); | ||
549 | |||
550 | /* | ||
551 | * Look up the hash value in the leaf entries. | ||
552 | */ | ||
553 | index = xfs_dir2_leaf_search_hash(args, bp); | ||
554 | /* | ||
555 | * Do we have a buffer coming in? | ||
556 | */ | ||
557 | if (state->extravalid) { | ||
558 | /* If so, it's a free block buffer, get the block number. */ | ||
559 | curbp = state->extrablk.bp; | ||
560 | curfdb = state->extrablk.blkno; | ||
561 | free = curbp->b_addr; | ||
562 | ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC) || | ||
563 | free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC)); | ||
564 | } | ||
565 | length = dp->d_ops->data_entsize(args->namelen); | ||
566 | /* | ||
567 | * Loop over leaf entries with the right hash value. | ||
568 | */ | ||
569 | for (lep = &ents[index]; | ||
570 | index < leafhdr.count && be32_to_cpu(lep->hashval) == args->hashval; | ||
571 | lep++, index++) { | ||
572 | /* | ||
573 | * Skip stale leaf entries. | ||
574 | */ | ||
575 | if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR) | ||
576 | continue; | ||
577 | /* | ||
578 | * Pull the data block number from the entry. | ||
579 | */ | ||
580 | newdb = xfs_dir2_dataptr_to_db(args->geo, | ||
581 | be32_to_cpu(lep->address)); | ||
582 | /* | ||
583 | * For addname, we're looking for a place to put the new entry. | ||
584 | * We want to use a data block with an entry of equal | ||
585 | * hash value to ours if there is one with room. | ||
586 | * | ||
587 | * If this block isn't the data block we already have | ||
588 | * in hand, take a look at it. | ||
589 | */ | ||
590 | if (newdb != curdb) { | ||
591 | __be16 *bests; | ||
592 | |||
593 | curdb = newdb; | ||
594 | /* | ||
595 | * Convert the data block to the free block | ||
596 | * holding its freespace information. | ||
597 | */ | ||
598 | newfdb = dp->d_ops->db_to_fdb(args->geo, newdb); | ||
599 | /* | ||
600 | * If it's not the one we have in hand, read it in. | ||
601 | */ | ||
602 | if (newfdb != curfdb) { | ||
603 | /* | ||
604 | * If we had one before, drop it. | ||
605 | */ | ||
606 | if (curbp) | ||
607 | xfs_trans_brelse(tp, curbp); | ||
608 | |||
609 | error = xfs_dir2_free_read(tp, dp, | ||
610 | xfs_dir2_db_to_da(args->geo, | ||
611 | newfdb), | ||
612 | &curbp); | ||
613 | if (error) | ||
614 | return error; | ||
615 | free = curbp->b_addr; | ||
616 | |||
617 | xfs_dir2_free_hdr_check(dp, curbp, curdb); | ||
618 | } | ||
619 | /* | ||
620 | * Get the index for our entry. | ||
621 | */ | ||
622 | fi = dp->d_ops->db_to_fdindex(args->geo, curdb); | ||
623 | /* | ||
624 | * If it has room, return it. | ||
625 | */ | ||
626 | bests = dp->d_ops->free_bests_p(free); | ||
627 | if (unlikely(bests[fi] == cpu_to_be16(NULLDATAOFF))) { | ||
628 | XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int", | ||
629 | XFS_ERRLEVEL_LOW, mp); | ||
630 | if (curfdb != newfdb) | ||
631 | xfs_trans_brelse(tp, curbp); | ||
632 | return -EFSCORRUPTED; | ||
633 | } | ||
634 | curfdb = newfdb; | ||
635 | if (be16_to_cpu(bests[fi]) >= length) | ||
636 | goto out; | ||
637 | } | ||
638 | } | ||
639 | /* Didn't find any space */ | ||
640 | fi = -1; | ||
641 | out: | ||
642 | ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); | ||
643 | if (curbp) { | ||
644 | /* Giving back a free block. */ | ||
645 | state->extravalid = 1; | ||
646 | state->extrablk.bp = curbp; | ||
647 | state->extrablk.index = fi; | ||
648 | state->extrablk.blkno = curfdb; | ||
649 | |||
650 | /* | ||
651 | * Important: this magic number is not in the buffer - it's for | ||
652 | * buffer type information and therefore only the free/data type | ||
653 | * matters here, not whether CRCs are enabled or not. | ||
654 | */ | ||
655 | state->extrablk.magic = XFS_DIR2_FREE_MAGIC; | ||
656 | } else { | ||
657 | state->extravalid = 0; | ||
658 | } | ||
659 | /* | ||
660 | * Return the index, that will be the insertion point. | ||
661 | */ | ||
662 | *indexp = index; | ||
663 | return -ENOENT; | ||
664 | } | ||
665 | |||
666 | /* | ||
667 | * Look up a leaf entry in a node-format leaf block. | ||
668 | * The extrablk in state a data block. | ||
669 | */ | ||
670 | STATIC int | ||
671 | xfs_dir2_leafn_lookup_for_entry( | ||
672 | struct xfs_buf *bp, /* leaf buffer */ | ||
673 | xfs_da_args_t *args, /* operation arguments */ | ||
674 | int *indexp, /* out: leaf entry index */ | ||
675 | xfs_da_state_t *state) /* state to fill in */ | ||
676 | { | ||
677 | struct xfs_buf *curbp = NULL; /* current data/free buffer */ | ||
678 | xfs_dir2_db_t curdb = -1; /* current data block number */ | ||
679 | xfs_dir2_data_entry_t *dep; /* data block entry */ | ||
680 | xfs_inode_t *dp; /* incore directory inode */ | ||
681 | int error; /* error return value */ | ||
682 | int index; /* leaf entry index */ | ||
683 | xfs_dir2_leaf_t *leaf; /* leaf structure */ | ||
684 | xfs_dir2_leaf_entry_t *lep; /* leaf entry */ | ||
685 | xfs_mount_t *mp; /* filesystem mount point */ | ||
686 | xfs_dir2_db_t newdb; /* new data block number */ | ||
687 | xfs_trans_t *tp; /* transaction pointer */ | ||
688 | enum xfs_dacmp cmp; /* comparison result */ | ||
689 | struct xfs_dir2_leaf_entry *ents; | ||
690 | struct xfs_dir3_icleaf_hdr leafhdr; | ||
691 | |||
692 | dp = args->dp; | ||
693 | tp = args->trans; | ||
694 | mp = dp->i_mount; | ||
695 | leaf = bp->b_addr; | ||
696 | dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); | ||
697 | ents = dp->d_ops->leaf_ents_p(leaf); | ||
698 | |||
699 | xfs_dir3_leaf_check(dp, bp); | ||
700 | ASSERT(leafhdr.count > 0); | ||
701 | |||
702 | /* | ||
703 | * Look up the hash value in the leaf entries. | ||
704 | */ | ||
705 | index = xfs_dir2_leaf_search_hash(args, bp); | ||
706 | /* | ||
707 | * Do we have a buffer coming in? | ||
708 | */ | ||
709 | if (state->extravalid) { | ||
710 | curbp = state->extrablk.bp; | ||
711 | curdb = state->extrablk.blkno; | ||
712 | } | ||
713 | /* | ||
714 | * Loop over leaf entries with the right hash value. | ||
715 | */ | ||
716 | for (lep = &ents[index]; | ||
717 | index < leafhdr.count && be32_to_cpu(lep->hashval) == args->hashval; | ||
718 | lep++, index++) { | ||
719 | /* | ||
720 | * Skip stale leaf entries. | ||
721 | */ | ||
722 | if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR) | ||
723 | continue; | ||
724 | /* | ||
725 | * Pull the data block number from the entry. | ||
726 | */ | ||
727 | newdb = xfs_dir2_dataptr_to_db(args->geo, | ||
728 | be32_to_cpu(lep->address)); | ||
729 | /* | ||
730 | * Not adding a new entry, so we really want to find | ||
731 | * the name given to us. | ||
732 | * | ||
733 | * If it's a different data block, go get it. | ||
734 | */ | ||
735 | if (newdb != curdb) { | ||
736 | /* | ||
737 | * If we had a block before that we aren't saving | ||
738 | * for a CI name, drop it | ||
739 | */ | ||
740 | if (curbp && (args->cmpresult == XFS_CMP_DIFFERENT || | ||
741 | curdb != state->extrablk.blkno)) | ||
742 | xfs_trans_brelse(tp, curbp); | ||
743 | /* | ||
744 | * If needing the block that is saved with a CI match, | ||
745 | * use it otherwise read in the new data block. | ||
746 | */ | ||
747 | if (args->cmpresult != XFS_CMP_DIFFERENT && | ||
748 | newdb == state->extrablk.blkno) { | ||
749 | ASSERT(state->extravalid); | ||
750 | curbp = state->extrablk.bp; | ||
751 | } else { | ||
752 | error = xfs_dir3_data_read(tp, dp, | ||
753 | xfs_dir2_db_to_da(args->geo, | ||
754 | newdb), | ||
755 | -1, &curbp); | ||
756 | if (error) | ||
757 | return error; | ||
758 | } | ||
759 | xfs_dir3_data_check(dp, curbp); | ||
760 | curdb = newdb; | ||
761 | } | ||
762 | /* | ||
763 | * Point to the data entry. | ||
764 | */ | ||
765 | dep = (xfs_dir2_data_entry_t *)((char *)curbp->b_addr + | ||
766 | xfs_dir2_dataptr_to_off(args->geo, | ||
767 | be32_to_cpu(lep->address))); | ||
768 | /* | ||
769 | * Compare the entry and if it's an exact match, return | ||
770 | * EEXIST immediately. If it's the first case-insensitive | ||
771 | * match, store the block & inode number and continue looking. | ||
772 | */ | ||
773 | cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen); | ||
774 | if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) { | ||
775 | /* If there is a CI match block, drop it */ | ||
776 | if (args->cmpresult != XFS_CMP_DIFFERENT && | ||
777 | curdb != state->extrablk.blkno) | ||
778 | xfs_trans_brelse(tp, state->extrablk.bp); | ||
779 | args->cmpresult = cmp; | ||
780 | args->inumber = be64_to_cpu(dep->inumber); | ||
781 | args->filetype = dp->d_ops->data_get_ftype(dep); | ||
782 | *indexp = index; | ||
783 | state->extravalid = 1; | ||
784 | state->extrablk.bp = curbp; | ||
785 | state->extrablk.blkno = curdb; | ||
786 | state->extrablk.index = (int)((char *)dep - | ||
787 | (char *)curbp->b_addr); | ||
788 | state->extrablk.magic = XFS_DIR2_DATA_MAGIC; | ||
789 | curbp->b_ops = &xfs_dir3_data_buf_ops; | ||
790 | xfs_trans_buf_set_type(tp, curbp, XFS_BLFT_DIR_DATA_BUF); | ||
791 | if (cmp == XFS_CMP_EXACT) | ||
792 | return -EEXIST; | ||
793 | } | ||
794 | } | ||
795 | ASSERT(index == leafhdr.count || (args->op_flags & XFS_DA_OP_OKNOENT)); | ||
796 | if (curbp) { | ||
797 | if (args->cmpresult == XFS_CMP_DIFFERENT) { | ||
798 | /* Giving back last used data block. */ | ||
799 | state->extravalid = 1; | ||
800 | state->extrablk.bp = curbp; | ||
801 | state->extrablk.index = -1; | ||
802 | state->extrablk.blkno = curdb; | ||
803 | state->extrablk.magic = XFS_DIR2_DATA_MAGIC; | ||
804 | curbp->b_ops = &xfs_dir3_data_buf_ops; | ||
805 | xfs_trans_buf_set_type(tp, curbp, XFS_BLFT_DIR_DATA_BUF); | ||
806 | } else { | ||
807 | /* If the curbp is not the CI match block, drop it */ | ||
808 | if (state->extrablk.bp != curbp) | ||
809 | xfs_trans_brelse(tp, curbp); | ||
810 | } | ||
811 | } else { | ||
812 | state->extravalid = 0; | ||
813 | } | ||
814 | *indexp = index; | ||
815 | return -ENOENT; | ||
816 | } | ||
817 | |||
818 | /* | ||
819 | * Look up a leaf entry in a node-format leaf block. | ||
820 | * If this is an addname then the extrablk in state is a freespace block, | ||
821 | * otherwise it's a data block. | ||
822 | */ | ||
823 | int | ||
824 | xfs_dir2_leafn_lookup_int( | ||
825 | struct xfs_buf *bp, /* leaf buffer */ | ||
826 | xfs_da_args_t *args, /* operation arguments */ | ||
827 | int *indexp, /* out: leaf entry index */ | ||
828 | xfs_da_state_t *state) /* state to fill in */ | ||
829 | { | ||
830 | if (args->op_flags & XFS_DA_OP_ADDNAME) | ||
831 | return xfs_dir2_leafn_lookup_for_addname(bp, args, indexp, | ||
832 | state); | ||
833 | return xfs_dir2_leafn_lookup_for_entry(bp, args, indexp, state); | ||
834 | } | ||
835 | |||
836 | /* | ||
837 | * Move count leaf entries from source to destination leaf. | ||
838 | * Log entries and headers. Stale entries are preserved. | ||
839 | */ | ||
840 | static void | ||
841 | xfs_dir3_leafn_moveents( | ||
842 | xfs_da_args_t *args, /* operation arguments */ | ||
843 | struct xfs_buf *bp_s, /* source */ | ||
844 | struct xfs_dir3_icleaf_hdr *shdr, | ||
845 | struct xfs_dir2_leaf_entry *sents, | ||
846 | int start_s,/* source leaf index */ | ||
847 | struct xfs_buf *bp_d, /* destination */ | ||
848 | struct xfs_dir3_icleaf_hdr *dhdr, | ||
849 | struct xfs_dir2_leaf_entry *dents, | ||
850 | int start_d,/* destination leaf index */ | ||
851 | int count) /* count of leaves to copy */ | ||
852 | { | ||
853 | int stale; /* count stale leaves copied */ | ||
854 | |||
855 | trace_xfs_dir2_leafn_moveents(args, start_s, start_d, count); | ||
856 | |||
857 | /* | ||
858 | * Silently return if nothing to do. | ||
859 | */ | ||
860 | if (count == 0) | ||
861 | return; | ||
862 | |||
863 | /* | ||
864 | * If the destination index is not the end of the current | ||
865 | * destination leaf entries, open up a hole in the destination | ||
866 | * to hold the new entries. | ||
867 | */ | ||
868 | if (start_d < dhdr->count) { | ||
869 | memmove(&dents[start_d + count], &dents[start_d], | ||
870 | (dhdr->count - start_d) * sizeof(xfs_dir2_leaf_entry_t)); | ||
871 | xfs_dir3_leaf_log_ents(args, bp_d, start_d + count, | ||
872 | count + dhdr->count - 1); | ||
873 | } | ||
874 | /* | ||
875 | * If the source has stale leaves, count the ones in the copy range | ||
876 | * so we can update the header correctly. | ||
877 | */ | ||
878 | if (shdr->stale) { | ||
879 | int i; /* temp leaf index */ | ||
880 | |||
881 | for (i = start_s, stale = 0; i < start_s + count; i++) { | ||
882 | if (sents[i].address == | ||
883 | cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) | ||
884 | stale++; | ||
885 | } | ||
886 | } else | ||
887 | stale = 0; | ||
888 | /* | ||
889 | * Copy the leaf entries from source to destination. | ||
890 | */ | ||
891 | memcpy(&dents[start_d], &sents[start_s], | ||
892 | count * sizeof(xfs_dir2_leaf_entry_t)); | ||
893 | xfs_dir3_leaf_log_ents(args, bp_d, start_d, start_d + count - 1); | ||
894 | |||
895 | /* | ||
896 | * If there are source entries after the ones we copied, | ||
897 | * delete the ones we copied by sliding the next ones down. | ||
898 | */ | ||
899 | if (start_s + count < shdr->count) { | ||
900 | memmove(&sents[start_s], &sents[start_s + count], | ||
901 | count * sizeof(xfs_dir2_leaf_entry_t)); | ||
902 | xfs_dir3_leaf_log_ents(args, bp_s, start_s, start_s + count - 1); | ||
903 | } | ||
904 | |||
905 | /* | ||
906 | * Update the headers and log them. | ||
907 | */ | ||
908 | shdr->count -= count; | ||
909 | shdr->stale -= stale; | ||
910 | dhdr->count += count; | ||
911 | dhdr->stale += stale; | ||
912 | } | ||
913 | |||
914 | /* | ||
915 | * Determine the sort order of two leaf blocks. | ||
916 | * Returns 1 if both are valid and leaf2 should be before leaf1, else 0. | ||
917 | */ | ||
918 | int /* sort order */ | ||
919 | xfs_dir2_leafn_order( | ||
920 | struct xfs_inode *dp, | ||
921 | struct xfs_buf *leaf1_bp, /* leaf1 buffer */ | ||
922 | struct xfs_buf *leaf2_bp) /* leaf2 buffer */ | ||
923 | { | ||
924 | struct xfs_dir2_leaf *leaf1 = leaf1_bp->b_addr; | ||
925 | struct xfs_dir2_leaf *leaf2 = leaf2_bp->b_addr; | ||
926 | struct xfs_dir2_leaf_entry *ents1; | ||
927 | struct xfs_dir2_leaf_entry *ents2; | ||
928 | struct xfs_dir3_icleaf_hdr hdr1; | ||
929 | struct xfs_dir3_icleaf_hdr hdr2; | ||
930 | |||
931 | dp->d_ops->leaf_hdr_from_disk(&hdr1, leaf1); | ||
932 | dp->d_ops->leaf_hdr_from_disk(&hdr2, leaf2); | ||
933 | ents1 = dp->d_ops->leaf_ents_p(leaf1); | ||
934 | ents2 = dp->d_ops->leaf_ents_p(leaf2); | ||
935 | |||
936 | if (hdr1.count > 0 && hdr2.count > 0 && | ||
937 | (be32_to_cpu(ents2[0].hashval) < be32_to_cpu(ents1[0].hashval) || | ||
938 | be32_to_cpu(ents2[hdr2.count - 1].hashval) < | ||
939 | be32_to_cpu(ents1[hdr1.count - 1].hashval))) | ||
940 | return 1; | ||
941 | return 0; | ||
942 | } | ||
943 | |||
944 | /* | ||
945 | * Rebalance leaf entries between two leaf blocks. | ||
946 | * This is actually only called when the second block is new, | ||
947 | * though the code deals with the general case. | ||
948 | * A new entry will be inserted in one of the blocks, and that | ||
949 | * entry is taken into account when balancing. | ||
950 | */ | ||
951 | static void | ||
952 | xfs_dir2_leafn_rebalance( | ||
953 | xfs_da_state_t *state, /* btree cursor */ | ||
954 | xfs_da_state_blk_t *blk1, /* first btree block */ | ||
955 | xfs_da_state_blk_t *blk2) /* second btree block */ | ||
956 | { | ||
957 | xfs_da_args_t *args; /* operation arguments */ | ||
958 | int count; /* count (& direction) leaves */ | ||
959 | int isleft; /* new goes in left leaf */ | ||
960 | xfs_dir2_leaf_t *leaf1; /* first leaf structure */ | ||
961 | xfs_dir2_leaf_t *leaf2; /* second leaf structure */ | ||
962 | int mid; /* midpoint leaf index */ | ||
963 | #if defined(DEBUG) || defined(XFS_WARN) | ||
964 | int oldstale; /* old count of stale leaves */ | ||
965 | #endif | ||
966 | int oldsum; /* old total leaf count */ | ||
967 | int swap; /* swapped leaf blocks */ | ||
968 | struct xfs_dir2_leaf_entry *ents1; | ||
969 | struct xfs_dir2_leaf_entry *ents2; | ||
970 | struct xfs_dir3_icleaf_hdr hdr1; | ||
971 | struct xfs_dir3_icleaf_hdr hdr2; | ||
972 | struct xfs_inode *dp = state->args->dp; | ||
973 | |||
974 | args = state->args; | ||
975 | /* | ||
976 | * If the block order is wrong, swap the arguments. | ||
977 | */ | ||
978 | if ((swap = xfs_dir2_leafn_order(dp, blk1->bp, blk2->bp))) { | ||
979 | xfs_da_state_blk_t *tmp; /* temp for block swap */ | ||
980 | |||
981 | tmp = blk1; | ||
982 | blk1 = blk2; | ||
983 | blk2 = tmp; | ||
984 | } | ||
985 | leaf1 = blk1->bp->b_addr; | ||
986 | leaf2 = blk2->bp->b_addr; | ||
987 | dp->d_ops->leaf_hdr_from_disk(&hdr1, leaf1); | ||
988 | dp->d_ops->leaf_hdr_from_disk(&hdr2, leaf2); | ||
989 | ents1 = dp->d_ops->leaf_ents_p(leaf1); | ||
990 | ents2 = dp->d_ops->leaf_ents_p(leaf2); | ||
991 | |||
992 | oldsum = hdr1.count + hdr2.count; | ||
993 | #if defined(DEBUG) || defined(XFS_WARN) | ||
994 | oldstale = hdr1.stale + hdr2.stale; | ||
995 | #endif | ||
996 | mid = oldsum >> 1; | ||
997 | |||
998 | /* | ||
999 | * If the old leaf count was odd then the new one will be even, | ||
1000 | * so we need to divide the new count evenly. | ||
1001 | */ | ||
1002 | if (oldsum & 1) { | ||
1003 | xfs_dahash_t midhash; /* middle entry hash value */ | ||
1004 | |||
1005 | if (mid >= hdr1.count) | ||
1006 | midhash = be32_to_cpu(ents2[mid - hdr1.count].hashval); | ||
1007 | else | ||
1008 | midhash = be32_to_cpu(ents1[mid].hashval); | ||
1009 | isleft = args->hashval <= midhash; | ||
1010 | } | ||
1011 | /* | ||
1012 | * If the old count is even then the new count is odd, so there's | ||
1013 | * no preferred side for the new entry. | ||
1014 | * Pick the left one. | ||
1015 | */ | ||
1016 | else | ||
1017 | isleft = 1; | ||
1018 | /* | ||
1019 | * Calculate moved entry count. Positive means left-to-right, | ||
1020 | * negative means right-to-left. Then move the entries. | ||
1021 | */ | ||
1022 | count = hdr1.count - mid + (isleft == 0); | ||
1023 | if (count > 0) | ||
1024 | xfs_dir3_leafn_moveents(args, blk1->bp, &hdr1, ents1, | ||
1025 | hdr1.count - count, blk2->bp, | ||
1026 | &hdr2, ents2, 0, count); | ||
1027 | else if (count < 0) | ||
1028 | xfs_dir3_leafn_moveents(args, blk2->bp, &hdr2, ents2, 0, | ||
1029 | blk1->bp, &hdr1, ents1, | ||
1030 | hdr1.count, count); | ||
1031 | |||
1032 | ASSERT(hdr1.count + hdr2.count == oldsum); | ||
1033 | ASSERT(hdr1.stale + hdr2.stale == oldstale); | ||
1034 | |||
1035 | /* log the changes made when moving the entries */ | ||
1036 | dp->d_ops->leaf_hdr_to_disk(leaf1, &hdr1); | ||
1037 | dp->d_ops->leaf_hdr_to_disk(leaf2, &hdr2); | ||
1038 | xfs_dir3_leaf_log_header(args, blk1->bp); | ||
1039 | xfs_dir3_leaf_log_header(args, blk2->bp); | ||
1040 | |||
1041 | xfs_dir3_leaf_check(dp, blk1->bp); | ||
1042 | xfs_dir3_leaf_check(dp, blk2->bp); | ||
1043 | |||
1044 | /* | ||
1045 | * Mark whether we're inserting into the old or new leaf. | ||
1046 | */ | ||
1047 | if (hdr1.count < hdr2.count) | ||
1048 | state->inleaf = swap; | ||
1049 | else if (hdr1.count > hdr2.count) | ||
1050 | state->inleaf = !swap; | ||
1051 | else | ||
1052 | state->inleaf = swap ^ (blk1->index <= hdr1.count); | ||
1053 | /* | ||
1054 | * Adjust the expected index for insertion. | ||
1055 | */ | ||
1056 | if (!state->inleaf) | ||
1057 | blk2->index = blk1->index - hdr1.count; | ||
1058 | |||
1059 | /* | ||
1060 | * Finally sanity check just to make sure we are not returning a | ||
1061 | * negative index | ||
1062 | */ | ||
1063 | if (blk2->index < 0) { | ||
1064 | state->inleaf = 1; | ||
1065 | blk2->index = 0; | ||
1066 | xfs_alert(dp->i_mount, | ||
1067 | "%s: picked the wrong leaf? reverting original leaf: blk1->index %d", | ||
1068 | __func__, blk1->index); | ||
1069 | } | ||
1070 | } | ||
1071 | |||
1072 | static int | ||
1073 | xfs_dir3_data_block_free( | ||
1074 | xfs_da_args_t *args, | ||
1075 | struct xfs_dir2_data_hdr *hdr, | ||
1076 | struct xfs_dir2_free *free, | ||
1077 | xfs_dir2_db_t fdb, | ||
1078 | int findex, | ||
1079 | struct xfs_buf *fbp, | ||
1080 | int longest) | ||
1081 | { | ||
1082 | int logfree = 0; | ||
1083 | __be16 *bests; | ||
1084 | struct xfs_dir3_icfree_hdr freehdr; | ||
1085 | struct xfs_inode *dp = args->dp; | ||
1086 | |||
1087 | dp->d_ops->free_hdr_from_disk(&freehdr, free); | ||
1088 | bests = dp->d_ops->free_bests_p(free); | ||
1089 | if (hdr) { | ||
1090 | /* | ||
1091 | * Data block is not empty, just set the free entry to the new | ||
1092 | * value. | ||
1093 | */ | ||
1094 | bests[findex] = cpu_to_be16(longest); | ||
1095 | xfs_dir2_free_log_bests(args, fbp, findex, findex); | ||
1096 | return 0; | ||
1097 | } | ||
1098 | |||
1099 | /* One less used entry in the free table. */ | ||
1100 | freehdr.nused--; | ||
1101 | |||
1102 | /* | ||
1103 | * If this was the last entry in the table, we can trim the table size | ||
1104 | * back. There might be other entries at the end referring to | ||
1105 | * non-existent data blocks, get those too. | ||
1106 | */ | ||
1107 | if (findex == freehdr.nvalid - 1) { | ||
1108 | int i; /* free entry index */ | ||
1109 | |||
1110 | for (i = findex - 1; i >= 0; i--) { | ||
1111 | if (bests[i] != cpu_to_be16(NULLDATAOFF)) | ||
1112 | break; | ||
1113 | } | ||
1114 | freehdr.nvalid = i + 1; | ||
1115 | logfree = 0; | ||
1116 | } else { | ||
1117 | /* Not the last entry, just punch it out. */ | ||
1118 | bests[findex] = cpu_to_be16(NULLDATAOFF); | ||
1119 | logfree = 1; | ||
1120 | } | ||
1121 | |||
1122 | dp->d_ops->free_hdr_to_disk(free, &freehdr); | ||
1123 | xfs_dir2_free_log_header(args, fbp); | ||
1124 | |||
1125 | /* | ||
1126 | * If there are no useful entries left in the block, get rid of the | ||
1127 | * block if we can. | ||
1128 | */ | ||
1129 | if (!freehdr.nused) { | ||
1130 | int error; | ||
1131 | |||
1132 | error = xfs_dir2_shrink_inode(args, fdb, fbp); | ||
1133 | if (error == 0) { | ||
1134 | fbp = NULL; | ||
1135 | logfree = 0; | ||
1136 | } else if (error != -ENOSPC || args->total != 0) | ||
1137 | return error; | ||
1138 | /* | ||
1139 | * It's possible to get ENOSPC if there is no | ||
1140 | * space reservation. In this case some one | ||
1141 | * else will eventually get rid of this block. | ||
1142 | */ | ||
1143 | } | ||
1144 | |||
1145 | /* Log the free entry that changed, unless we got rid of it. */ | ||
1146 | if (logfree) | ||
1147 | xfs_dir2_free_log_bests(args, fbp, findex, findex); | ||
1148 | return 0; | ||
1149 | } | ||
1150 | |||
1151 | /* | ||
1152 | * Remove an entry from a node directory. | ||
1153 | * This removes the leaf entry and the data entry, | ||
1154 | * and updates the free block if necessary. | ||
1155 | */ | ||
1156 | static int /* error */ | ||
1157 | xfs_dir2_leafn_remove( | ||
1158 | xfs_da_args_t *args, /* operation arguments */ | ||
1159 | struct xfs_buf *bp, /* leaf buffer */ | ||
1160 | int index, /* leaf entry index */ | ||
1161 | xfs_da_state_blk_t *dblk, /* data block */ | ||
1162 | int *rval) /* resulting block needs join */ | ||
1163 | { | ||
1164 | xfs_dir2_data_hdr_t *hdr; /* data block header */ | ||
1165 | xfs_dir2_db_t db; /* data block number */ | ||
1166 | struct xfs_buf *dbp; /* data block buffer */ | ||
1167 | xfs_dir2_data_entry_t *dep; /* data block entry */ | ||
1168 | xfs_inode_t *dp; /* incore directory inode */ | ||
1169 | xfs_dir2_leaf_t *leaf; /* leaf structure */ | ||
1170 | xfs_dir2_leaf_entry_t *lep; /* leaf entry */ | ||
1171 | int longest; /* longest data free entry */ | ||
1172 | int off; /* data block entry offset */ | ||
1173 | xfs_mount_t *mp; /* filesystem mount point */ | ||
1174 | int needlog; /* need to log data header */ | ||
1175 | int needscan; /* need to rescan data frees */ | ||
1176 | xfs_trans_t *tp; /* transaction pointer */ | ||
1177 | struct xfs_dir2_data_free *bf; /* bestfree table */ | ||
1178 | struct xfs_dir3_icleaf_hdr leafhdr; | ||
1179 | struct xfs_dir2_leaf_entry *ents; | ||
1180 | |||
1181 | trace_xfs_dir2_leafn_remove(args, index); | ||
1182 | |||
1183 | dp = args->dp; | ||
1184 | tp = args->trans; | ||
1185 | mp = dp->i_mount; | ||
1186 | leaf = bp->b_addr; | ||
1187 | dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); | ||
1188 | ents = dp->d_ops->leaf_ents_p(leaf); | ||
1189 | |||
1190 | /* | ||
1191 | * Point to the entry we're removing. | ||
1192 | */ | ||
1193 | lep = &ents[index]; | ||
1194 | |||
1195 | /* | ||
1196 | * Extract the data block and offset from the entry. | ||
1197 | */ | ||
1198 | db = xfs_dir2_dataptr_to_db(args->geo, be32_to_cpu(lep->address)); | ||
1199 | ASSERT(dblk->blkno == db); | ||
1200 | off = xfs_dir2_dataptr_to_off(args->geo, be32_to_cpu(lep->address)); | ||
1201 | ASSERT(dblk->index == off); | ||
1202 | |||
1203 | /* | ||
1204 | * Kill the leaf entry by marking it stale. | ||
1205 | * Log the leaf block changes. | ||
1206 | */ | ||
1207 | leafhdr.stale++; | ||
1208 | dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); | ||
1209 | xfs_dir3_leaf_log_header(args, bp); | ||
1210 | |||
1211 | lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR); | ||
1212 | xfs_dir3_leaf_log_ents(args, bp, index, index); | ||
1213 | |||
1214 | /* | ||
1215 | * Make the data entry free. Keep track of the longest freespace | ||
1216 | * in the data block in case it changes. | ||
1217 | */ | ||
1218 | dbp = dblk->bp; | ||
1219 | hdr = dbp->b_addr; | ||
1220 | dep = (xfs_dir2_data_entry_t *)((char *)hdr + off); | ||
1221 | bf = dp->d_ops->data_bestfree_p(hdr); | ||
1222 | longest = be16_to_cpu(bf[0].length); | ||
1223 | needlog = needscan = 0; | ||
1224 | xfs_dir2_data_make_free(args, dbp, off, | ||
1225 | dp->d_ops->data_entsize(dep->namelen), &needlog, &needscan); | ||
1226 | /* | ||
1227 | * Rescan the data block freespaces for bestfree. | ||
1228 | * Log the data block header if needed. | ||
1229 | */ | ||
1230 | if (needscan) | ||
1231 | xfs_dir2_data_freescan(dp, hdr, &needlog); | ||
1232 | if (needlog) | ||
1233 | xfs_dir2_data_log_header(args, dbp); | ||
1234 | xfs_dir3_data_check(dp, dbp); | ||
1235 | /* | ||
1236 | * If the longest data block freespace changes, need to update | ||
1237 | * the corresponding freeblock entry. | ||
1238 | */ | ||
1239 | if (longest < be16_to_cpu(bf[0].length)) { | ||
1240 | int error; /* error return value */ | ||
1241 | struct xfs_buf *fbp; /* freeblock buffer */ | ||
1242 | xfs_dir2_db_t fdb; /* freeblock block number */ | ||
1243 | int findex; /* index in freeblock entries */ | ||
1244 | xfs_dir2_free_t *free; /* freeblock structure */ | ||
1245 | |||
1246 | /* | ||
1247 | * Convert the data block number to a free block, | ||
1248 | * read in the free block. | ||
1249 | */ | ||
1250 | fdb = dp->d_ops->db_to_fdb(args->geo, db); | ||
1251 | error = xfs_dir2_free_read(tp, dp, | ||
1252 | xfs_dir2_db_to_da(args->geo, fdb), | ||
1253 | &fbp); | ||
1254 | if (error) | ||
1255 | return error; | ||
1256 | free = fbp->b_addr; | ||
1257 | #ifdef DEBUG | ||
1258 | { | ||
1259 | struct xfs_dir3_icfree_hdr freehdr; | ||
1260 | dp->d_ops->free_hdr_from_disk(&freehdr, free); | ||
1261 | ASSERT(freehdr.firstdb == dp->d_ops->free_max_bests(args->geo) * | ||
1262 | (fdb - xfs_dir2_byte_to_db(args->geo, | ||
1263 | XFS_DIR2_FREE_OFFSET))); | ||
1264 | } | ||
1265 | #endif | ||
1266 | /* | ||
1267 | * Calculate which entry we need to fix. | ||
1268 | */ | ||
1269 | findex = dp->d_ops->db_to_fdindex(args->geo, db); | ||
1270 | longest = be16_to_cpu(bf[0].length); | ||
1271 | /* | ||
1272 | * If the data block is now empty we can get rid of it | ||
1273 | * (usually). | ||
1274 | */ | ||
1275 | if (longest == args->geo->blksize - | ||
1276 | dp->d_ops->data_entry_offset) { | ||
1277 | /* | ||
1278 | * Try to punch out the data block. | ||
1279 | */ | ||
1280 | error = xfs_dir2_shrink_inode(args, db, dbp); | ||
1281 | if (error == 0) { | ||
1282 | dblk->bp = NULL; | ||
1283 | hdr = NULL; | ||
1284 | } | ||
1285 | /* | ||
1286 | * We can get ENOSPC if there's no space reservation. | ||
1287 | * In this case just drop the buffer and some one else | ||
1288 | * will eventually get rid of the empty block. | ||
1289 | */ | ||
1290 | else if (!(error == -ENOSPC && args->total == 0)) | ||
1291 | return error; | ||
1292 | } | ||
1293 | /* | ||
1294 | * If we got rid of the data block, we can eliminate that entry | ||
1295 | * in the free block. | ||
1296 | */ | ||
1297 | error = xfs_dir3_data_block_free(args, hdr, free, | ||
1298 | fdb, findex, fbp, longest); | ||
1299 | if (error) | ||
1300 | return error; | ||
1301 | } | ||
1302 | |||
1303 | xfs_dir3_leaf_check(dp, bp); | ||
1304 | /* | ||
1305 | * Return indication of whether this leaf block is empty enough | ||
1306 | * to justify trying to join it with a neighbor. | ||
1307 | */ | ||
1308 | *rval = (dp->d_ops->leaf_hdr_size + | ||
1309 | (uint)sizeof(ents[0]) * (leafhdr.count - leafhdr.stale)) < | ||
1310 | args->geo->magicpct; | ||
1311 | return 0; | ||
1312 | } | ||
1313 | |||
1314 | /* | ||
1315 | * Split the leaf entries in the old block into old and new blocks. | ||
1316 | */ | ||
1317 | int /* error */ | ||
1318 | xfs_dir2_leafn_split( | ||
1319 | xfs_da_state_t *state, /* btree cursor */ | ||
1320 | xfs_da_state_blk_t *oldblk, /* original block */ | ||
1321 | xfs_da_state_blk_t *newblk) /* newly created block */ | ||
1322 | { | ||
1323 | xfs_da_args_t *args; /* operation arguments */ | ||
1324 | xfs_dablk_t blkno; /* new leaf block number */ | ||
1325 | int error; /* error return value */ | ||
1326 | xfs_mount_t *mp; /* filesystem mount point */ | ||
1327 | struct xfs_inode *dp; | ||
1328 | |||
1329 | /* | ||
1330 | * Allocate space for a new leaf node. | ||
1331 | */ | ||
1332 | args = state->args; | ||
1333 | dp = args->dp; | ||
1334 | mp = dp->i_mount; | ||
1335 | ASSERT(oldblk->magic == XFS_DIR2_LEAFN_MAGIC); | ||
1336 | error = xfs_da_grow_inode(args, &blkno); | ||
1337 | if (error) { | ||
1338 | return error; | ||
1339 | } | ||
1340 | /* | ||
1341 | * Initialize the new leaf block. | ||
1342 | */ | ||
1343 | error = xfs_dir3_leaf_get_buf(args, xfs_dir2_da_to_db(args->geo, blkno), | ||
1344 | &newblk->bp, XFS_DIR2_LEAFN_MAGIC); | ||
1345 | if (error) | ||
1346 | return error; | ||
1347 | |||
1348 | newblk->blkno = blkno; | ||
1349 | newblk->magic = XFS_DIR2_LEAFN_MAGIC; | ||
1350 | /* | ||
1351 | * Rebalance the entries across the two leaves, link the new | ||
1352 | * block into the leaves. | ||
1353 | */ | ||
1354 | xfs_dir2_leafn_rebalance(state, oldblk, newblk); | ||
1355 | error = xfs_da3_blk_link(state, oldblk, newblk); | ||
1356 | if (error) { | ||
1357 | return error; | ||
1358 | } | ||
1359 | /* | ||
1360 | * Insert the new entry in the correct block. | ||
1361 | */ | ||
1362 | if (state->inleaf) | ||
1363 | error = xfs_dir2_leafn_add(oldblk->bp, args, oldblk->index); | ||
1364 | else | ||
1365 | error = xfs_dir2_leafn_add(newblk->bp, args, newblk->index); | ||
1366 | /* | ||
1367 | * Update last hashval in each block since we added the name. | ||
1368 | */ | ||
1369 | oldblk->hashval = xfs_dir2_leafn_lasthash(dp, oldblk->bp, NULL); | ||
1370 | newblk->hashval = xfs_dir2_leafn_lasthash(dp, newblk->bp, NULL); | ||
1371 | xfs_dir3_leaf_check(dp, oldblk->bp); | ||
1372 | xfs_dir3_leaf_check(dp, newblk->bp); | ||
1373 | return error; | ||
1374 | } | ||
1375 | |||
1376 | /* | ||
1377 | * Check a leaf block and its neighbors to see if the block should be | ||
1378 | * collapsed into one or the other neighbor. Always keep the block | ||
1379 | * with the smaller block number. | ||
1380 | * If the current block is over 50% full, don't try to join it, return 0. | ||
1381 | * If the block is empty, fill in the state structure and return 2. | ||
1382 | * If it can be collapsed, fill in the state structure and return 1. | ||
1383 | * If nothing can be done, return 0. | ||
1384 | */ | ||
1385 | int /* error */ | ||
1386 | xfs_dir2_leafn_toosmall( | ||
1387 | xfs_da_state_t *state, /* btree cursor */ | ||
1388 | int *action) /* resulting action to take */ | ||
1389 | { | ||
1390 | xfs_da_state_blk_t *blk; /* leaf block */ | ||
1391 | xfs_dablk_t blkno; /* leaf block number */ | ||
1392 | struct xfs_buf *bp; /* leaf buffer */ | ||
1393 | int bytes; /* bytes in use */ | ||
1394 | int count; /* leaf live entry count */ | ||
1395 | int error; /* error return value */ | ||
1396 | int forward; /* sibling block direction */ | ||
1397 | int i; /* sibling counter */ | ||
1398 | xfs_dir2_leaf_t *leaf; /* leaf structure */ | ||
1399 | int rval; /* result from path_shift */ | ||
1400 | struct xfs_dir3_icleaf_hdr leafhdr; | ||
1401 | struct xfs_dir2_leaf_entry *ents; | ||
1402 | struct xfs_inode *dp = state->args->dp; | ||
1403 | |||
1404 | /* | ||
1405 | * Check for the degenerate case of the block being over 50% full. | ||
1406 | * If so, it's not worth even looking to see if we might be able | ||
1407 | * to coalesce with a sibling. | ||
1408 | */ | ||
1409 | blk = &state->path.blk[state->path.active - 1]; | ||
1410 | leaf = blk->bp->b_addr; | ||
1411 | dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); | ||
1412 | ents = dp->d_ops->leaf_ents_p(leaf); | ||
1413 | xfs_dir3_leaf_check(dp, blk->bp); | ||
1414 | |||
1415 | count = leafhdr.count - leafhdr.stale; | ||
1416 | bytes = dp->d_ops->leaf_hdr_size + count * sizeof(ents[0]); | ||
1417 | if (bytes > (state->args->geo->blksize >> 1)) { | ||
1418 | /* | ||
1419 | * Blk over 50%, don't try to join. | ||
1420 | */ | ||
1421 | *action = 0; | ||
1422 | return 0; | ||
1423 | } | ||
1424 | /* | ||
1425 | * Check for the degenerate case of the block being empty. | ||
1426 | * If the block is empty, we'll simply delete it, no need to | ||
1427 | * coalesce it with a sibling block. We choose (arbitrarily) | ||
1428 | * to merge with the forward block unless it is NULL. | ||
1429 | */ | ||
1430 | if (count == 0) { | ||
1431 | /* | ||
1432 | * Make altpath point to the block we want to keep and | ||
1433 | * path point to the block we want to drop (this one). | ||
1434 | */ | ||
1435 | forward = (leafhdr.forw != 0); | ||
1436 | memcpy(&state->altpath, &state->path, sizeof(state->path)); | ||
1437 | error = xfs_da3_path_shift(state, &state->altpath, forward, 0, | ||
1438 | &rval); | ||
1439 | if (error) | ||
1440 | return error; | ||
1441 | *action = rval ? 2 : 0; | ||
1442 | return 0; | ||
1443 | } | ||
1444 | /* | ||
1445 | * Examine each sibling block to see if we can coalesce with | ||
1446 | * at least 25% free space to spare. We need to figure out | ||
1447 | * whether to merge with the forward or the backward block. | ||
1448 | * We prefer coalescing with the lower numbered sibling so as | ||
1449 | * to shrink a directory over time. | ||
1450 | */ | ||
1451 | forward = leafhdr.forw < leafhdr.back; | ||
1452 | for (i = 0, bp = NULL; i < 2; forward = !forward, i++) { | ||
1453 | struct xfs_dir3_icleaf_hdr hdr2; | ||
1454 | |||
1455 | blkno = forward ? leafhdr.forw : leafhdr.back; | ||
1456 | if (blkno == 0) | ||
1457 | continue; | ||
1458 | /* | ||
1459 | * Read the sibling leaf block. | ||
1460 | */ | ||
1461 | error = xfs_dir3_leafn_read(state->args->trans, dp, | ||
1462 | blkno, -1, &bp); | ||
1463 | if (error) | ||
1464 | return error; | ||
1465 | |||
1466 | /* | ||
1467 | * Count bytes in the two blocks combined. | ||
1468 | */ | ||
1469 | count = leafhdr.count - leafhdr.stale; | ||
1470 | bytes = state->args->geo->blksize - | ||
1471 | (state->args->geo->blksize >> 2); | ||
1472 | |||
1473 | leaf = bp->b_addr; | ||
1474 | dp->d_ops->leaf_hdr_from_disk(&hdr2, leaf); | ||
1475 | ents = dp->d_ops->leaf_ents_p(leaf); | ||
1476 | count += hdr2.count - hdr2.stale; | ||
1477 | bytes -= count * sizeof(ents[0]); | ||
1478 | |||
1479 | /* | ||
1480 | * Fits with at least 25% to spare. | ||
1481 | */ | ||
1482 | if (bytes >= 0) | ||
1483 | break; | ||
1484 | xfs_trans_brelse(state->args->trans, bp); | ||
1485 | } | ||
1486 | /* | ||
1487 | * Didn't like either block, give up. | ||
1488 | */ | ||
1489 | if (i >= 2) { | ||
1490 | *action = 0; | ||
1491 | return 0; | ||
1492 | } | ||
1493 | |||
1494 | /* | ||
1495 | * Make altpath point to the block we want to keep (the lower | ||
1496 | * numbered block) and path point to the block we want to drop. | ||
1497 | */ | ||
1498 | memcpy(&state->altpath, &state->path, sizeof(state->path)); | ||
1499 | if (blkno < blk->blkno) | ||
1500 | error = xfs_da3_path_shift(state, &state->altpath, forward, 0, | ||
1501 | &rval); | ||
1502 | else | ||
1503 | error = xfs_da3_path_shift(state, &state->path, forward, 0, | ||
1504 | &rval); | ||
1505 | if (error) { | ||
1506 | return error; | ||
1507 | } | ||
1508 | *action = rval ? 0 : 1; | ||
1509 | return 0; | ||
1510 | } | ||
1511 | |||
1512 | /* | ||
1513 | * Move all the leaf entries from drop_blk to save_blk. | ||
1514 | * This is done as part of a join operation. | ||
1515 | */ | ||
1516 | void | ||
1517 | xfs_dir2_leafn_unbalance( | ||
1518 | xfs_da_state_t *state, /* cursor */ | ||
1519 | xfs_da_state_blk_t *drop_blk, /* dead block */ | ||
1520 | xfs_da_state_blk_t *save_blk) /* surviving block */ | ||
1521 | { | ||
1522 | xfs_da_args_t *args; /* operation arguments */ | ||
1523 | xfs_dir2_leaf_t *drop_leaf; /* dead leaf structure */ | ||
1524 | xfs_dir2_leaf_t *save_leaf; /* surviving leaf structure */ | ||
1525 | struct xfs_dir3_icleaf_hdr savehdr; | ||
1526 | struct xfs_dir3_icleaf_hdr drophdr; | ||
1527 | struct xfs_dir2_leaf_entry *sents; | ||
1528 | struct xfs_dir2_leaf_entry *dents; | ||
1529 | struct xfs_inode *dp = state->args->dp; | ||
1530 | |||
1531 | args = state->args; | ||
1532 | ASSERT(drop_blk->magic == XFS_DIR2_LEAFN_MAGIC); | ||
1533 | ASSERT(save_blk->magic == XFS_DIR2_LEAFN_MAGIC); | ||
1534 | drop_leaf = drop_blk->bp->b_addr; | ||
1535 | save_leaf = save_blk->bp->b_addr; | ||
1536 | |||
1537 | dp->d_ops->leaf_hdr_from_disk(&savehdr, save_leaf); | ||
1538 | dp->d_ops->leaf_hdr_from_disk(&drophdr, drop_leaf); | ||
1539 | sents = dp->d_ops->leaf_ents_p(save_leaf); | ||
1540 | dents = dp->d_ops->leaf_ents_p(drop_leaf); | ||
1541 | |||
1542 | /* | ||
1543 | * If there are any stale leaf entries, take this opportunity | ||
1544 | * to purge them. | ||
1545 | */ | ||
1546 | if (drophdr.stale) | ||
1547 | xfs_dir3_leaf_compact(args, &drophdr, drop_blk->bp); | ||
1548 | if (savehdr.stale) | ||
1549 | xfs_dir3_leaf_compact(args, &savehdr, save_blk->bp); | ||
1550 | |||
1551 | /* | ||
1552 | * Move the entries from drop to the appropriate end of save. | ||
1553 | */ | ||
1554 | drop_blk->hashval = be32_to_cpu(dents[drophdr.count - 1].hashval); | ||
1555 | if (xfs_dir2_leafn_order(dp, save_blk->bp, drop_blk->bp)) | ||
1556 | xfs_dir3_leafn_moveents(args, drop_blk->bp, &drophdr, dents, 0, | ||
1557 | save_blk->bp, &savehdr, sents, 0, | ||
1558 | drophdr.count); | ||
1559 | else | ||
1560 | xfs_dir3_leafn_moveents(args, drop_blk->bp, &drophdr, dents, 0, | ||
1561 | save_blk->bp, &savehdr, sents, | ||
1562 | savehdr.count, drophdr.count); | ||
1563 | save_blk->hashval = be32_to_cpu(sents[savehdr.count - 1].hashval); | ||
1564 | |||
1565 | /* log the changes made when moving the entries */ | ||
1566 | dp->d_ops->leaf_hdr_to_disk(save_leaf, &savehdr); | ||
1567 | dp->d_ops->leaf_hdr_to_disk(drop_leaf, &drophdr); | ||
1568 | xfs_dir3_leaf_log_header(args, save_blk->bp); | ||
1569 | xfs_dir3_leaf_log_header(args, drop_blk->bp); | ||
1570 | |||
1571 | xfs_dir3_leaf_check(dp, save_blk->bp); | ||
1572 | xfs_dir3_leaf_check(dp, drop_blk->bp); | ||
1573 | } | ||
1574 | |||
1575 | /* | ||
1576 | * Top-level node form directory addname routine. | ||
1577 | */ | ||
1578 | int /* error */ | ||
1579 | xfs_dir2_node_addname( | ||
1580 | xfs_da_args_t *args) /* operation arguments */ | ||
1581 | { | ||
1582 | xfs_da_state_blk_t *blk; /* leaf block for insert */ | ||
1583 | int error; /* error return value */ | ||
1584 | int rval; /* sub-return value */ | ||
1585 | xfs_da_state_t *state; /* btree cursor */ | ||
1586 | |||
1587 | trace_xfs_dir2_node_addname(args); | ||
1588 | |||
1589 | /* | ||
1590 | * Allocate and initialize the state (btree cursor). | ||
1591 | */ | ||
1592 | state = xfs_da_state_alloc(); | ||
1593 | state->args = args; | ||
1594 | state->mp = args->dp->i_mount; | ||
1595 | /* | ||
1596 | * Look up the name. We're not supposed to find it, but | ||
1597 | * this gives us the insertion point. | ||
1598 | */ | ||
1599 | error = xfs_da3_node_lookup_int(state, &rval); | ||
1600 | if (error) | ||
1601 | rval = error; | ||
1602 | if (rval != -ENOENT) { | ||
1603 | goto done; | ||
1604 | } | ||
1605 | /* | ||
1606 | * Add the data entry to a data block. | ||
1607 | * Extravalid is set to a freeblock found by lookup. | ||
1608 | */ | ||
1609 | rval = xfs_dir2_node_addname_int(args, | ||
1610 | state->extravalid ? &state->extrablk : NULL); | ||
1611 | if (rval) { | ||
1612 | goto done; | ||
1613 | } | ||
1614 | blk = &state->path.blk[state->path.active - 1]; | ||
1615 | ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC); | ||
1616 | /* | ||
1617 | * Add the new leaf entry. | ||
1618 | */ | ||
1619 | rval = xfs_dir2_leafn_add(blk->bp, args, blk->index); | ||
1620 | if (rval == 0) { | ||
1621 | /* | ||
1622 | * It worked, fix the hash values up the btree. | ||
1623 | */ | ||
1624 | if (!(args->op_flags & XFS_DA_OP_JUSTCHECK)) | ||
1625 | xfs_da3_fixhashpath(state, &state->path); | ||
1626 | } else { | ||
1627 | /* | ||
1628 | * It didn't work, we need to split the leaf block. | ||
1629 | */ | ||
1630 | if (args->total == 0) { | ||
1631 | ASSERT(rval == -ENOSPC); | ||
1632 | goto done; | ||
1633 | } | ||
1634 | /* | ||
1635 | * Split the leaf block and insert the new entry. | ||
1636 | */ | ||
1637 | rval = xfs_da3_split(state); | ||
1638 | } | ||
1639 | done: | ||
1640 | xfs_da_state_free(state); | ||
1641 | return rval; | ||
1642 | } | ||
1643 | |||
1644 | /* | ||
1645 | * Add the data entry for a node-format directory name addition. | ||
1646 | * The leaf entry is added in xfs_dir2_leafn_add. | ||
1647 | * We may enter with a freespace block that the lookup found. | ||
1648 | */ | ||
1649 | static int /* error */ | ||
1650 | xfs_dir2_node_addname_int( | ||
1651 | xfs_da_args_t *args, /* operation arguments */ | ||
1652 | xfs_da_state_blk_t *fblk) /* optional freespace block */ | ||
1653 | { | ||
1654 | xfs_dir2_data_hdr_t *hdr; /* data block header */ | ||
1655 | xfs_dir2_db_t dbno; /* data block number */ | ||
1656 | struct xfs_buf *dbp; /* data block buffer */ | ||
1657 | xfs_dir2_data_entry_t *dep; /* data entry pointer */ | ||
1658 | xfs_inode_t *dp; /* incore directory inode */ | ||
1659 | xfs_dir2_data_unused_t *dup; /* data unused entry pointer */ | ||
1660 | int error; /* error return value */ | ||
1661 | xfs_dir2_db_t fbno; /* freespace block number */ | ||
1662 | struct xfs_buf *fbp; /* freespace buffer */ | ||
1663 | int findex; /* freespace entry index */ | ||
1664 | xfs_dir2_free_t *free=NULL; /* freespace block structure */ | ||
1665 | xfs_dir2_db_t ifbno; /* initial freespace block no */ | ||
1666 | xfs_dir2_db_t lastfbno=0; /* highest freespace block no */ | ||
1667 | int length; /* length of the new entry */ | ||
1668 | int logfree; /* need to log free entry */ | ||
1669 | xfs_mount_t *mp; /* filesystem mount point */ | ||
1670 | int needlog; /* need to log data header */ | ||
1671 | int needscan; /* need to rescan data frees */ | ||
1672 | __be16 *tagp; /* data entry tag pointer */ | ||
1673 | xfs_trans_t *tp; /* transaction pointer */ | ||
1674 | __be16 *bests; | ||
1675 | struct xfs_dir3_icfree_hdr freehdr; | ||
1676 | struct xfs_dir2_data_free *bf; | ||
1677 | |||
1678 | dp = args->dp; | ||
1679 | mp = dp->i_mount; | ||
1680 | tp = args->trans; | ||
1681 | length = dp->d_ops->data_entsize(args->namelen); | ||
1682 | /* | ||
1683 | * If we came in with a freespace block that means that lookup | ||
1684 | * found an entry with our hash value. This is the freespace | ||
1685 | * block for that data entry. | ||
1686 | */ | ||
1687 | if (fblk) { | ||
1688 | fbp = fblk->bp; | ||
1689 | /* | ||
1690 | * Remember initial freespace block number. | ||
1691 | */ | ||
1692 | ifbno = fblk->blkno; | ||
1693 | free = fbp->b_addr; | ||
1694 | findex = fblk->index; | ||
1695 | bests = dp->d_ops->free_bests_p(free); | ||
1696 | dp->d_ops->free_hdr_from_disk(&freehdr, free); | ||
1697 | |||
1698 | /* | ||
1699 | * This means the free entry showed that the data block had | ||
1700 | * space for our entry, so we remembered it. | ||
1701 | * Use that data block. | ||
1702 | */ | ||
1703 | if (findex >= 0) { | ||
1704 | ASSERT(findex < freehdr.nvalid); | ||
1705 | ASSERT(be16_to_cpu(bests[findex]) != NULLDATAOFF); | ||
1706 | ASSERT(be16_to_cpu(bests[findex]) >= length); | ||
1707 | dbno = freehdr.firstdb + findex; | ||
1708 | } else { | ||
1709 | /* | ||
1710 | * The data block looked at didn't have enough room. | ||
1711 | * We'll start at the beginning of the freespace entries. | ||
1712 | */ | ||
1713 | dbno = -1; | ||
1714 | findex = 0; | ||
1715 | } | ||
1716 | } else { | ||
1717 | /* | ||
1718 | * Didn't come in with a freespace block, so no data block. | ||
1719 | */ | ||
1720 | ifbno = dbno = -1; | ||
1721 | fbp = NULL; | ||
1722 | findex = 0; | ||
1723 | } | ||
1724 | |||
1725 | /* | ||
1726 | * If we don't have a data block yet, we're going to scan the | ||
1727 | * freespace blocks looking for one. Figure out what the | ||
1728 | * highest freespace block number is. | ||
1729 | */ | ||
1730 | if (dbno == -1) { | ||
1731 | xfs_fileoff_t fo; /* freespace block number */ | ||
1732 | |||
1733 | if ((error = xfs_bmap_last_offset(dp, &fo, XFS_DATA_FORK))) | ||
1734 | return error; | ||
1735 | lastfbno = xfs_dir2_da_to_db(args->geo, (xfs_dablk_t)fo); | ||
1736 | fbno = ifbno; | ||
1737 | } | ||
1738 | /* | ||
1739 | * While we haven't identified a data block, search the freeblock | ||
1740 | * data for a good data block. If we find a null freeblock entry, | ||
1741 | * indicating a hole in the data blocks, remember that. | ||
1742 | */ | ||
1743 | while (dbno == -1) { | ||
1744 | /* | ||
1745 | * If we don't have a freeblock in hand, get the next one. | ||
1746 | */ | ||
1747 | if (fbp == NULL) { | ||
1748 | /* | ||
1749 | * Happens the first time through unless lookup gave | ||
1750 | * us a freespace block to start with. | ||
1751 | */ | ||
1752 | if (++fbno == 0) | ||
1753 | fbno = xfs_dir2_byte_to_db(args->geo, | ||
1754 | XFS_DIR2_FREE_OFFSET); | ||
1755 | /* | ||
1756 | * If it's ifbno we already looked at it. | ||
1757 | */ | ||
1758 | if (fbno == ifbno) | ||
1759 | fbno++; | ||
1760 | /* | ||
1761 | * If it's off the end we're done. | ||
1762 | */ | ||
1763 | if (fbno >= lastfbno) | ||
1764 | break; | ||
1765 | /* | ||
1766 | * Read the block. There can be holes in the | ||
1767 | * freespace blocks, so this might not succeed. | ||
1768 | * This should be really rare, so there's no reason | ||
1769 | * to avoid it. | ||
1770 | */ | ||
1771 | error = xfs_dir2_free_try_read(tp, dp, | ||
1772 | xfs_dir2_db_to_da(args->geo, fbno), | ||
1773 | &fbp); | ||
1774 | if (error) | ||
1775 | return error; | ||
1776 | if (!fbp) | ||
1777 | continue; | ||
1778 | free = fbp->b_addr; | ||
1779 | findex = 0; | ||
1780 | } | ||
1781 | /* | ||
1782 | * Look at the current free entry. Is it good enough? | ||
1783 | * | ||
1784 | * The bests initialisation should be where the bufer is read in | ||
1785 | * the above branch. But gcc is too stupid to realise that bests | ||
1786 | * and the freehdr are actually initialised if they are placed | ||
1787 | * there, so we have to do it here to avoid warnings. Blech. | ||
1788 | */ | ||
1789 | bests = dp->d_ops->free_bests_p(free); | ||
1790 | dp->d_ops->free_hdr_from_disk(&freehdr, free); | ||
1791 | if (be16_to_cpu(bests[findex]) != NULLDATAOFF && | ||
1792 | be16_to_cpu(bests[findex]) >= length) | ||
1793 | dbno = freehdr.firstdb + findex; | ||
1794 | else { | ||
1795 | /* | ||
1796 | * Are we done with the freeblock? | ||
1797 | */ | ||
1798 | if (++findex == freehdr.nvalid) { | ||
1799 | /* | ||
1800 | * Drop the block. | ||
1801 | */ | ||
1802 | xfs_trans_brelse(tp, fbp); | ||
1803 | fbp = NULL; | ||
1804 | if (fblk && fblk->bp) | ||
1805 | fblk->bp = NULL; | ||
1806 | } | ||
1807 | } | ||
1808 | } | ||
1809 | /* | ||
1810 | * If we don't have a data block, we need to allocate one and make | ||
1811 | * the freespace entries refer to it. | ||
1812 | */ | ||
1813 | if (unlikely(dbno == -1)) { | ||
1814 | /* | ||
1815 | * Not allowed to allocate, return failure. | ||
1816 | */ | ||
1817 | if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0) | ||
1818 | return -ENOSPC; | ||
1819 | |||
1820 | /* | ||
1821 | * Allocate and initialize the new data block. | ||
1822 | */ | ||
1823 | if (unlikely((error = xfs_dir2_grow_inode(args, | ||
1824 | XFS_DIR2_DATA_SPACE, | ||
1825 | &dbno)) || | ||
1826 | (error = xfs_dir3_data_init(args, dbno, &dbp)))) | ||
1827 | return error; | ||
1828 | |||
1829 | /* | ||
1830 | * If (somehow) we have a freespace block, get rid of it. | ||
1831 | */ | ||
1832 | if (fbp) | ||
1833 | xfs_trans_brelse(tp, fbp); | ||
1834 | if (fblk && fblk->bp) | ||
1835 | fblk->bp = NULL; | ||
1836 | |||
1837 | /* | ||
1838 | * Get the freespace block corresponding to the data block | ||
1839 | * that was just allocated. | ||
1840 | */ | ||
1841 | fbno = dp->d_ops->db_to_fdb(args->geo, dbno); | ||
1842 | error = xfs_dir2_free_try_read(tp, dp, | ||
1843 | xfs_dir2_db_to_da(args->geo, fbno), | ||
1844 | &fbp); | ||
1845 | if (error) | ||
1846 | return error; | ||
1847 | |||
1848 | /* | ||
1849 | * If there wasn't a freespace block, the read will | ||
1850 | * return a NULL fbp. Allocate and initialize a new one. | ||
1851 | */ | ||
1852 | if (!fbp) { | ||
1853 | error = xfs_dir2_grow_inode(args, XFS_DIR2_FREE_SPACE, | ||
1854 | &fbno); | ||
1855 | if (error) | ||
1856 | return error; | ||
1857 | |||
1858 | if (dp->d_ops->db_to_fdb(args->geo, dbno) != fbno) { | ||
1859 | xfs_alert(mp, | ||
1860 | "%s: dir ino %llu needed freesp block %lld for\n" | ||
1861 | " data block %lld, got %lld ifbno %llu lastfbno %d", | ||
1862 | __func__, (unsigned long long)dp->i_ino, | ||
1863 | (long long)dp->d_ops->db_to_fdb( | ||
1864 | args->geo, dbno), | ||
1865 | (long long)dbno, (long long)fbno, | ||
1866 | (unsigned long long)ifbno, lastfbno); | ||
1867 | if (fblk) { | ||
1868 | xfs_alert(mp, | ||
1869 | " fblk 0x%p blkno %llu index %d magic 0x%x", | ||
1870 | fblk, | ||
1871 | (unsigned long long)fblk->blkno, | ||
1872 | fblk->index, | ||
1873 | fblk->magic); | ||
1874 | } else { | ||
1875 | xfs_alert(mp, " ... fblk is NULL"); | ||
1876 | } | ||
1877 | XFS_ERROR_REPORT("xfs_dir2_node_addname_int", | ||
1878 | XFS_ERRLEVEL_LOW, mp); | ||
1879 | return -EFSCORRUPTED; | ||
1880 | } | ||
1881 | |||
1882 | /* | ||
1883 | * Get a buffer for the new block. | ||
1884 | */ | ||
1885 | error = xfs_dir3_free_get_buf(args, fbno, &fbp); | ||
1886 | if (error) | ||
1887 | return error; | ||
1888 | free = fbp->b_addr; | ||
1889 | bests = dp->d_ops->free_bests_p(free); | ||
1890 | dp->d_ops->free_hdr_from_disk(&freehdr, free); | ||
1891 | |||
1892 | /* | ||
1893 | * Remember the first slot as our empty slot. | ||
1894 | */ | ||
1895 | freehdr.firstdb = | ||
1896 | (fbno - xfs_dir2_byte_to_db(args->geo, | ||
1897 | XFS_DIR2_FREE_OFFSET)) * | ||
1898 | dp->d_ops->free_max_bests(args->geo); | ||
1899 | } else { | ||
1900 | free = fbp->b_addr; | ||
1901 | bests = dp->d_ops->free_bests_p(free); | ||
1902 | dp->d_ops->free_hdr_from_disk(&freehdr, free); | ||
1903 | } | ||
1904 | |||
1905 | /* | ||
1906 | * Set the freespace block index from the data block number. | ||
1907 | */ | ||
1908 | findex = dp->d_ops->db_to_fdindex(args->geo, dbno); | ||
1909 | /* | ||
1910 | * If it's after the end of the current entries in the | ||
1911 | * freespace block, extend that table. | ||
1912 | */ | ||
1913 | if (findex >= freehdr.nvalid) { | ||
1914 | ASSERT(findex < dp->d_ops->free_max_bests(args->geo)); | ||
1915 | freehdr.nvalid = findex + 1; | ||
1916 | /* | ||
1917 | * Tag new entry so nused will go up. | ||
1918 | */ | ||
1919 | bests[findex] = cpu_to_be16(NULLDATAOFF); | ||
1920 | } | ||
1921 | /* | ||
1922 | * If this entry was for an empty data block | ||
1923 | * (this should always be true) then update the header. | ||
1924 | */ | ||
1925 | if (bests[findex] == cpu_to_be16(NULLDATAOFF)) { | ||
1926 | freehdr.nused++; | ||
1927 | dp->d_ops->free_hdr_to_disk(fbp->b_addr, &freehdr); | ||
1928 | xfs_dir2_free_log_header(args, fbp); | ||
1929 | } | ||
1930 | /* | ||
1931 | * Update the real value in the table. | ||
1932 | * We haven't allocated the data entry yet so this will | ||
1933 | * change again. | ||
1934 | */ | ||
1935 | hdr = dbp->b_addr; | ||
1936 | bf = dp->d_ops->data_bestfree_p(hdr); | ||
1937 | bests[findex] = bf[0].length; | ||
1938 | logfree = 1; | ||
1939 | } | ||
1940 | /* | ||
1941 | * We had a data block so we don't have to make a new one. | ||
1942 | */ | ||
1943 | else { | ||
1944 | /* | ||
1945 | * If just checking, we succeeded. | ||
1946 | */ | ||
1947 | if (args->op_flags & XFS_DA_OP_JUSTCHECK) | ||
1948 | return 0; | ||
1949 | |||
1950 | /* | ||
1951 | * Read the data block in. | ||
1952 | */ | ||
1953 | error = xfs_dir3_data_read(tp, dp, | ||
1954 | xfs_dir2_db_to_da(args->geo, dbno), | ||
1955 | -1, &dbp); | ||
1956 | if (error) | ||
1957 | return error; | ||
1958 | hdr = dbp->b_addr; | ||
1959 | bf = dp->d_ops->data_bestfree_p(hdr); | ||
1960 | logfree = 0; | ||
1961 | } | ||
1962 | ASSERT(be16_to_cpu(bf[0].length) >= length); | ||
1963 | /* | ||
1964 | * Point to the existing unused space. | ||
1965 | */ | ||
1966 | dup = (xfs_dir2_data_unused_t *) | ||
1967 | ((char *)hdr + be16_to_cpu(bf[0].offset)); | ||
1968 | needscan = needlog = 0; | ||
1969 | /* | ||
1970 | * Mark the first part of the unused space, inuse for us. | ||
1971 | */ | ||
1972 | xfs_dir2_data_use_free(args, dbp, dup, | ||
1973 | (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length, | ||
1974 | &needlog, &needscan); | ||
1975 | /* | ||
1976 | * Fill in the new entry and log it. | ||
1977 | */ | ||
1978 | dep = (xfs_dir2_data_entry_t *)dup; | ||
1979 | dep->inumber = cpu_to_be64(args->inumber); | ||
1980 | dep->namelen = args->namelen; | ||
1981 | memcpy(dep->name, args->name, dep->namelen); | ||
1982 | dp->d_ops->data_put_ftype(dep, args->filetype); | ||
1983 | tagp = dp->d_ops->data_entry_tag_p(dep); | ||
1984 | *tagp = cpu_to_be16((char *)dep - (char *)hdr); | ||
1985 | xfs_dir2_data_log_entry(args, dbp, dep); | ||
1986 | /* | ||
1987 | * Rescan the block for bestfree if needed. | ||
1988 | */ | ||
1989 | if (needscan) | ||
1990 | xfs_dir2_data_freescan(dp, hdr, &needlog); | ||
1991 | /* | ||
1992 | * Log the data block header if needed. | ||
1993 | */ | ||
1994 | if (needlog) | ||
1995 | xfs_dir2_data_log_header(args, dbp); | ||
1996 | /* | ||
1997 | * If the freespace entry is now wrong, update it. | ||
1998 | */ | ||
1999 | bests = dp->d_ops->free_bests_p(free); /* gcc is so stupid */ | ||
2000 | if (be16_to_cpu(bests[findex]) != be16_to_cpu(bf[0].length)) { | ||
2001 | bests[findex] = bf[0].length; | ||
2002 | logfree = 1; | ||
2003 | } | ||
2004 | /* | ||
2005 | * Log the freespace entry if needed. | ||
2006 | */ | ||
2007 | if (logfree) | ||
2008 | xfs_dir2_free_log_bests(args, fbp, findex, findex); | ||
2009 | /* | ||
2010 | * Return the data block and offset in args, then drop the data block. | ||
2011 | */ | ||
2012 | args->blkno = (xfs_dablk_t)dbno; | ||
2013 | args->index = be16_to_cpu(*tagp); | ||
2014 | return 0; | ||
2015 | } | ||
2016 | |||
2017 | /* | ||
2018 | * Lookup an entry in a node-format directory. | ||
2019 | * All the real work happens in xfs_da3_node_lookup_int. | ||
2020 | * The only real output is the inode number of the entry. | ||
2021 | */ | ||
2022 | int /* error */ | ||
2023 | xfs_dir2_node_lookup( | ||
2024 | xfs_da_args_t *args) /* operation arguments */ | ||
2025 | { | ||
2026 | int error; /* error return value */ | ||
2027 | int i; /* btree level */ | ||
2028 | int rval; /* operation return value */ | ||
2029 | xfs_da_state_t *state; /* btree cursor */ | ||
2030 | |||
2031 | trace_xfs_dir2_node_lookup(args); | ||
2032 | |||
2033 | /* | ||
2034 | * Allocate and initialize the btree cursor. | ||
2035 | */ | ||
2036 | state = xfs_da_state_alloc(); | ||
2037 | state->args = args; | ||
2038 | state->mp = args->dp->i_mount; | ||
2039 | /* | ||
2040 | * Fill in the path to the entry in the cursor. | ||
2041 | */ | ||
2042 | error = xfs_da3_node_lookup_int(state, &rval); | ||
2043 | if (error) | ||
2044 | rval = error; | ||
2045 | else if (rval == -ENOENT && args->cmpresult == XFS_CMP_CASE) { | ||
2046 | /* If a CI match, dup the actual name and return -EEXIST */ | ||
2047 | xfs_dir2_data_entry_t *dep; | ||
2048 | |||
2049 | dep = (xfs_dir2_data_entry_t *) | ||
2050 | ((char *)state->extrablk.bp->b_addr + | ||
2051 | state->extrablk.index); | ||
2052 | rval = xfs_dir_cilookup_result(args, dep->name, dep->namelen); | ||
2053 | } | ||
2054 | /* | ||
2055 | * Release the btree blocks and leaf block. | ||
2056 | */ | ||
2057 | for (i = 0; i < state->path.active; i++) { | ||
2058 | xfs_trans_brelse(args->trans, state->path.blk[i].bp); | ||
2059 | state->path.blk[i].bp = NULL; | ||
2060 | } | ||
2061 | /* | ||
2062 | * Release the data block if we have it. | ||
2063 | */ | ||
2064 | if (state->extravalid && state->extrablk.bp) { | ||
2065 | xfs_trans_brelse(args->trans, state->extrablk.bp); | ||
2066 | state->extrablk.bp = NULL; | ||
2067 | } | ||
2068 | xfs_da_state_free(state); | ||
2069 | return rval; | ||
2070 | } | ||
2071 | |||
2072 | /* | ||
2073 | * Remove an entry from a node-format directory. | ||
2074 | */ | ||
2075 | int /* error */ | ||
2076 | xfs_dir2_node_removename( | ||
2077 | struct xfs_da_args *args) /* operation arguments */ | ||
2078 | { | ||
2079 | struct xfs_da_state_blk *blk; /* leaf block */ | ||
2080 | int error; /* error return value */ | ||
2081 | int rval; /* operation return value */ | ||
2082 | struct xfs_da_state *state; /* btree cursor */ | ||
2083 | |||
2084 | trace_xfs_dir2_node_removename(args); | ||
2085 | |||
2086 | /* | ||
2087 | * Allocate and initialize the btree cursor. | ||
2088 | */ | ||
2089 | state = xfs_da_state_alloc(); | ||
2090 | state->args = args; | ||
2091 | state->mp = args->dp->i_mount; | ||
2092 | |||
2093 | /* Look up the entry we're deleting, set up the cursor. */ | ||
2094 | error = xfs_da3_node_lookup_int(state, &rval); | ||
2095 | if (error) | ||
2096 | goto out_free; | ||
2097 | |||
2098 | /* Didn't find it, upper layer screwed up. */ | ||
2099 | if (rval != -EEXIST) { | ||
2100 | error = rval; | ||
2101 | goto out_free; | ||
2102 | } | ||
2103 | |||
2104 | blk = &state->path.blk[state->path.active - 1]; | ||
2105 | ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC); | ||
2106 | ASSERT(state->extravalid); | ||
2107 | /* | ||
2108 | * Remove the leaf and data entries. | ||
2109 | * Extrablk refers to the data block. | ||
2110 | */ | ||
2111 | error = xfs_dir2_leafn_remove(args, blk->bp, blk->index, | ||
2112 | &state->extrablk, &rval); | ||
2113 | if (error) | ||
2114 | goto out_free; | ||
2115 | /* | ||
2116 | * Fix the hash values up the btree. | ||
2117 | */ | ||
2118 | xfs_da3_fixhashpath(state, &state->path); | ||
2119 | /* | ||
2120 | * If we need to join leaf blocks, do it. | ||
2121 | */ | ||
2122 | if (rval && state->path.active > 1) | ||
2123 | error = xfs_da3_join(state); | ||
2124 | /* | ||
2125 | * If no errors so far, try conversion to leaf format. | ||
2126 | */ | ||
2127 | if (!error) | ||
2128 | error = xfs_dir2_node_to_leaf(state); | ||
2129 | out_free: | ||
2130 | xfs_da_state_free(state); | ||
2131 | return error; | ||
2132 | } | ||
2133 | |||
2134 | /* | ||
2135 | * Replace an entry's inode number in a node-format directory. | ||
2136 | */ | ||
2137 | int /* error */ | ||
2138 | xfs_dir2_node_replace( | ||
2139 | xfs_da_args_t *args) /* operation arguments */ | ||
2140 | { | ||
2141 | xfs_da_state_blk_t *blk; /* leaf block */ | ||
2142 | xfs_dir2_data_hdr_t *hdr; /* data block header */ | ||
2143 | xfs_dir2_data_entry_t *dep; /* data entry changed */ | ||
2144 | int error; /* error return value */ | ||
2145 | int i; /* btree level */ | ||
2146 | xfs_ino_t inum; /* new inode number */ | ||
2147 | xfs_dir2_leaf_t *leaf; /* leaf structure */ | ||
2148 | xfs_dir2_leaf_entry_t *lep; /* leaf entry being changed */ | ||
2149 | int rval; /* internal return value */ | ||
2150 | xfs_da_state_t *state; /* btree cursor */ | ||
2151 | |||
2152 | trace_xfs_dir2_node_replace(args); | ||
2153 | |||
2154 | /* | ||
2155 | * Allocate and initialize the btree cursor. | ||
2156 | */ | ||
2157 | state = xfs_da_state_alloc(); | ||
2158 | state->args = args; | ||
2159 | state->mp = args->dp->i_mount; | ||
2160 | inum = args->inumber; | ||
2161 | /* | ||
2162 | * Lookup the entry to change in the btree. | ||
2163 | */ | ||
2164 | error = xfs_da3_node_lookup_int(state, &rval); | ||
2165 | if (error) { | ||
2166 | rval = error; | ||
2167 | } | ||
2168 | /* | ||
2169 | * It should be found, since the vnodeops layer has looked it up | ||
2170 | * and locked it. But paranoia is good. | ||
2171 | */ | ||
2172 | if (rval == -EEXIST) { | ||
2173 | struct xfs_dir2_leaf_entry *ents; | ||
2174 | /* | ||
2175 | * Find the leaf entry. | ||
2176 | */ | ||
2177 | blk = &state->path.blk[state->path.active - 1]; | ||
2178 | ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC); | ||
2179 | leaf = blk->bp->b_addr; | ||
2180 | ents = args->dp->d_ops->leaf_ents_p(leaf); | ||
2181 | lep = &ents[blk->index]; | ||
2182 | ASSERT(state->extravalid); | ||
2183 | /* | ||
2184 | * Point to the data entry. | ||
2185 | */ | ||
2186 | hdr = state->extrablk.bp->b_addr; | ||
2187 | ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || | ||
2188 | hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC)); | ||
2189 | dep = (xfs_dir2_data_entry_t *) | ||
2190 | ((char *)hdr + | ||
2191 | xfs_dir2_dataptr_to_off(args->geo, | ||
2192 | be32_to_cpu(lep->address))); | ||
2193 | ASSERT(inum != be64_to_cpu(dep->inumber)); | ||
2194 | /* | ||
2195 | * Fill in the new inode number and log the entry. | ||
2196 | */ | ||
2197 | dep->inumber = cpu_to_be64(inum); | ||
2198 | args->dp->d_ops->data_put_ftype(dep, args->filetype); | ||
2199 | xfs_dir2_data_log_entry(args, state->extrablk.bp, dep); | ||
2200 | rval = 0; | ||
2201 | } | ||
2202 | /* | ||
2203 | * Didn't find it, and we're holding a data block. Drop it. | ||
2204 | */ | ||
2205 | else if (state->extravalid) { | ||
2206 | xfs_trans_brelse(args->trans, state->extrablk.bp); | ||
2207 | state->extrablk.bp = NULL; | ||
2208 | } | ||
2209 | /* | ||
2210 | * Release all the buffers in the cursor. | ||
2211 | */ | ||
2212 | for (i = 0; i < state->path.active; i++) { | ||
2213 | xfs_trans_brelse(args->trans, state->path.blk[i].bp); | ||
2214 | state->path.blk[i].bp = NULL; | ||
2215 | } | ||
2216 | xfs_da_state_free(state); | ||
2217 | return rval; | ||
2218 | } | ||
2219 | |||
2220 | /* | ||
2221 | * Trim off a trailing empty freespace block. | ||
2222 | * Return (in rvalp) 1 if we did it, 0 if not. | ||
2223 | */ | ||
2224 | int /* error */ | ||
2225 | xfs_dir2_node_trim_free( | ||
2226 | xfs_da_args_t *args, /* operation arguments */ | ||
2227 | xfs_fileoff_t fo, /* free block number */ | ||
2228 | int *rvalp) /* out: did something */ | ||
2229 | { | ||
2230 | struct xfs_buf *bp; /* freespace buffer */ | ||
2231 | xfs_inode_t *dp; /* incore directory inode */ | ||
2232 | int error; /* error return code */ | ||
2233 | xfs_dir2_free_t *free; /* freespace structure */ | ||
2234 | xfs_mount_t *mp; /* filesystem mount point */ | ||
2235 | xfs_trans_t *tp; /* transaction pointer */ | ||
2236 | struct xfs_dir3_icfree_hdr freehdr; | ||
2237 | |||
2238 | dp = args->dp; | ||
2239 | mp = dp->i_mount; | ||
2240 | tp = args->trans; | ||
2241 | /* | ||
2242 | * Read the freespace block. | ||
2243 | */ | ||
2244 | error = xfs_dir2_free_try_read(tp, dp, fo, &bp); | ||
2245 | if (error) | ||
2246 | return error; | ||
2247 | /* | ||
2248 | * There can be holes in freespace. If fo is a hole, there's | ||
2249 | * nothing to do. | ||
2250 | */ | ||
2251 | if (!bp) | ||
2252 | return 0; | ||
2253 | free = bp->b_addr; | ||
2254 | dp->d_ops->free_hdr_from_disk(&freehdr, free); | ||
2255 | |||
2256 | /* | ||
2257 | * If there are used entries, there's nothing to do. | ||
2258 | */ | ||
2259 | if (freehdr.nused > 0) { | ||
2260 | xfs_trans_brelse(tp, bp); | ||
2261 | *rvalp = 0; | ||
2262 | return 0; | ||
2263 | } | ||
2264 | /* | ||
2265 | * Blow the block away. | ||
2266 | */ | ||
2267 | error = xfs_dir2_shrink_inode(args, | ||
2268 | xfs_dir2_da_to_db(args->geo, (xfs_dablk_t)fo), bp); | ||
2269 | if (error) { | ||
2270 | /* | ||
2271 | * Can't fail with ENOSPC since that only happens with no | ||
2272 | * space reservation, when breaking up an extent into two | ||
2273 | * pieces. This is the last block of an extent. | ||
2274 | */ | ||
2275 | ASSERT(error != -ENOSPC); | ||
2276 | xfs_trans_brelse(tp, bp); | ||
2277 | return error; | ||
2278 | } | ||
2279 | /* | ||
2280 | * Return that we succeeded. | ||
2281 | */ | ||
2282 | *rvalp = 1; | ||
2283 | return 0; | ||
2284 | } | ||
diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h new file mode 100644 index 000000000000..27ce0794d196 --- /dev/null +++ b/fs/xfs/libxfs/xfs_dir2_priv.h | |||
@@ -0,0 +1,274 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_DIR2_PRIV_H__ | ||
19 | #define __XFS_DIR2_PRIV_H__ | ||
20 | |||
21 | struct dir_context; | ||
22 | |||
23 | /* | ||
24 | * Directory offset/block conversion functions. | ||
25 | * | ||
26 | * DB blocks here are logical directory block numbers, not filesystem blocks. | ||
27 | */ | ||
28 | |||
29 | /* | ||
30 | * Convert dataptr to byte in file space | ||
31 | */ | ||
32 | static inline xfs_dir2_off_t | ||
33 | xfs_dir2_dataptr_to_byte(xfs_dir2_dataptr_t dp) | ||
34 | { | ||
35 | return (xfs_dir2_off_t)dp << XFS_DIR2_DATA_ALIGN_LOG; | ||
36 | } | ||
37 | |||
38 | /* | ||
39 | * Convert byte in file space to dataptr. It had better be aligned. | ||
40 | */ | ||
41 | static inline xfs_dir2_dataptr_t | ||
42 | xfs_dir2_byte_to_dataptr(xfs_dir2_off_t by) | ||
43 | { | ||
44 | return (xfs_dir2_dataptr_t)(by >> XFS_DIR2_DATA_ALIGN_LOG); | ||
45 | } | ||
46 | |||
47 | /* | ||
48 | * Convert byte in space to (DB) block | ||
49 | */ | ||
50 | static inline xfs_dir2_db_t | ||
51 | xfs_dir2_byte_to_db(struct xfs_da_geometry *geo, xfs_dir2_off_t by) | ||
52 | { | ||
53 | return (xfs_dir2_db_t)(by >> geo->blklog); | ||
54 | } | ||
55 | |||
56 | /* | ||
57 | * Convert dataptr to a block number | ||
58 | */ | ||
59 | static inline xfs_dir2_db_t | ||
60 | xfs_dir2_dataptr_to_db(struct xfs_da_geometry *geo, xfs_dir2_dataptr_t dp) | ||
61 | { | ||
62 | return xfs_dir2_byte_to_db(geo, xfs_dir2_dataptr_to_byte(dp)); | ||
63 | } | ||
64 | |||
65 | /* | ||
66 | * Convert byte in space to offset in a block | ||
67 | */ | ||
68 | static inline xfs_dir2_data_aoff_t | ||
69 | xfs_dir2_byte_to_off(struct xfs_da_geometry *geo, xfs_dir2_off_t by) | ||
70 | { | ||
71 | return (xfs_dir2_data_aoff_t)(by & (geo->blksize - 1)); | ||
72 | } | ||
73 | |||
74 | /* | ||
75 | * Convert dataptr to a byte offset in a block | ||
76 | */ | ||
77 | static inline xfs_dir2_data_aoff_t | ||
78 | xfs_dir2_dataptr_to_off(struct xfs_da_geometry *geo, xfs_dir2_dataptr_t dp) | ||
79 | { | ||
80 | return xfs_dir2_byte_to_off(geo, xfs_dir2_dataptr_to_byte(dp)); | ||
81 | } | ||
82 | |||
83 | /* | ||
84 | * Convert block and offset to byte in space | ||
85 | */ | ||
86 | static inline xfs_dir2_off_t | ||
87 | xfs_dir2_db_off_to_byte(struct xfs_da_geometry *geo, xfs_dir2_db_t db, | ||
88 | xfs_dir2_data_aoff_t o) | ||
89 | { | ||
90 | return ((xfs_dir2_off_t)db << geo->blklog) + o; | ||
91 | } | ||
92 | |||
93 | /* | ||
94 | * Convert block (DB) to block (dablk) | ||
95 | */ | ||
96 | static inline xfs_dablk_t | ||
97 | xfs_dir2_db_to_da(struct xfs_da_geometry *geo, xfs_dir2_db_t db) | ||
98 | { | ||
99 | return (xfs_dablk_t)(db << (geo->blklog - geo->fsblog)); | ||
100 | } | ||
101 | |||
102 | /* | ||
103 | * Convert byte in space to (DA) block | ||
104 | */ | ||
105 | static inline xfs_dablk_t | ||
106 | xfs_dir2_byte_to_da(struct xfs_da_geometry *geo, xfs_dir2_off_t by) | ||
107 | { | ||
108 | return xfs_dir2_db_to_da(geo, xfs_dir2_byte_to_db(geo, by)); | ||
109 | } | ||
110 | |||
111 | /* | ||
112 | * Convert block and offset to dataptr | ||
113 | */ | ||
114 | static inline xfs_dir2_dataptr_t | ||
115 | xfs_dir2_db_off_to_dataptr(struct xfs_da_geometry *geo, xfs_dir2_db_t db, | ||
116 | xfs_dir2_data_aoff_t o) | ||
117 | { | ||
118 | return xfs_dir2_byte_to_dataptr(xfs_dir2_db_off_to_byte(geo, db, o)); | ||
119 | } | ||
120 | |||
121 | /* | ||
122 | * Convert block (dablk) to block (DB) | ||
123 | */ | ||
124 | static inline xfs_dir2_db_t | ||
125 | xfs_dir2_da_to_db(struct xfs_da_geometry *geo, xfs_dablk_t da) | ||
126 | { | ||
127 | return (xfs_dir2_db_t)(da >> (geo->blklog - geo->fsblog)); | ||
128 | } | ||
129 | |||
130 | /* | ||
131 | * Convert block (dablk) to byte offset in space | ||
132 | */ | ||
133 | static inline xfs_dir2_off_t | ||
134 | xfs_dir2_da_to_byte(struct xfs_da_geometry *geo, xfs_dablk_t da) | ||
135 | { | ||
136 | return xfs_dir2_db_off_to_byte(geo, xfs_dir2_da_to_db(geo, da), 0); | ||
137 | } | ||
138 | |||
139 | /* | ||
140 | * Directory tail pointer accessor functions. Based on block geometry. | ||
141 | */ | ||
142 | static inline struct xfs_dir2_block_tail * | ||
143 | xfs_dir2_block_tail_p(struct xfs_da_geometry *geo, struct xfs_dir2_data_hdr *hdr) | ||
144 | { | ||
145 | return ((struct xfs_dir2_block_tail *) | ||
146 | ((char *)hdr + geo->blksize)) - 1; | ||
147 | } | ||
148 | |||
149 | static inline struct xfs_dir2_leaf_tail * | ||
150 | xfs_dir2_leaf_tail_p(struct xfs_da_geometry *geo, struct xfs_dir2_leaf *lp) | ||
151 | { | ||
152 | return (struct xfs_dir2_leaf_tail *) | ||
153 | ((char *)lp + geo->blksize - | ||
154 | sizeof(struct xfs_dir2_leaf_tail)); | ||
155 | } | ||
156 | |||
157 | /* xfs_dir2.c */ | ||
158 | extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino); | ||
159 | extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space, | ||
160 | xfs_dir2_db_t *dbp); | ||
161 | extern int xfs_dir_cilookup_result(struct xfs_da_args *args, | ||
162 | const unsigned char *name, int len); | ||
163 | |||
164 | #define S_SHIFT 12 | ||
165 | extern const unsigned char xfs_mode_to_ftype[]; | ||
166 | |||
167 | extern unsigned char xfs_dir3_get_dtype(struct xfs_mount *mp, | ||
168 | __uint8_t filetype); | ||
169 | |||
170 | |||
171 | /* xfs_dir2_block.c */ | ||
172 | extern int xfs_dir3_block_read(struct xfs_trans *tp, struct xfs_inode *dp, | ||
173 | struct xfs_buf **bpp); | ||
174 | extern int xfs_dir2_block_addname(struct xfs_da_args *args); | ||
175 | extern int xfs_dir2_block_lookup(struct xfs_da_args *args); | ||
176 | extern int xfs_dir2_block_removename(struct xfs_da_args *args); | ||
177 | extern int xfs_dir2_block_replace(struct xfs_da_args *args); | ||
178 | extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args, | ||
179 | struct xfs_buf *lbp, struct xfs_buf *dbp); | ||
180 | |||
181 | /* xfs_dir2_data.c */ | ||
182 | #ifdef DEBUG | ||
183 | #define xfs_dir3_data_check(dp,bp) __xfs_dir3_data_check(dp, bp); | ||
184 | #else | ||
185 | #define xfs_dir3_data_check(dp,bp) | ||
186 | #endif | ||
187 | |||
188 | extern int __xfs_dir3_data_check(struct xfs_inode *dp, struct xfs_buf *bp); | ||
189 | extern int xfs_dir3_data_read(struct xfs_trans *tp, struct xfs_inode *dp, | ||
190 | xfs_dablk_t bno, xfs_daddr_t mapped_bno, struct xfs_buf **bpp); | ||
191 | extern int xfs_dir3_data_readahead(struct xfs_inode *dp, xfs_dablk_t bno, | ||
192 | xfs_daddr_t mapped_bno); | ||
193 | |||
194 | extern struct xfs_dir2_data_free * | ||
195 | xfs_dir2_data_freeinsert(struct xfs_dir2_data_hdr *hdr, | ||
196 | struct xfs_dir2_data_free *bf, struct xfs_dir2_data_unused *dup, | ||
197 | int *loghead); | ||
198 | extern int xfs_dir3_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno, | ||
199 | struct xfs_buf **bpp); | ||
200 | |||
201 | /* xfs_dir2_leaf.c */ | ||
202 | extern int xfs_dir3_leafn_read(struct xfs_trans *tp, struct xfs_inode *dp, | ||
203 | xfs_dablk_t fbno, xfs_daddr_t mappedbno, struct xfs_buf **bpp); | ||
204 | extern int xfs_dir2_block_to_leaf(struct xfs_da_args *args, | ||
205 | struct xfs_buf *dbp); | ||
206 | extern int xfs_dir2_leaf_addname(struct xfs_da_args *args); | ||
207 | extern void xfs_dir3_leaf_compact(struct xfs_da_args *args, | ||
208 | struct xfs_dir3_icleaf_hdr *leafhdr, struct xfs_buf *bp); | ||
209 | extern void xfs_dir3_leaf_compact_x1(struct xfs_dir3_icleaf_hdr *leafhdr, | ||
210 | struct xfs_dir2_leaf_entry *ents, int *indexp, | ||
211 | int *lowstalep, int *highstalep, int *lowlogp, int *highlogp); | ||
212 | extern int xfs_dir3_leaf_get_buf(struct xfs_da_args *args, xfs_dir2_db_t bno, | ||
213 | struct xfs_buf **bpp, __uint16_t magic); | ||
214 | extern void xfs_dir3_leaf_log_ents(struct xfs_da_args *args, | ||
215 | struct xfs_buf *bp, int first, int last); | ||
216 | extern void xfs_dir3_leaf_log_header(struct xfs_da_args *args, | ||
217 | struct xfs_buf *bp); | ||
218 | extern int xfs_dir2_leaf_lookup(struct xfs_da_args *args); | ||
219 | extern int xfs_dir2_leaf_removename(struct xfs_da_args *args); | ||
220 | extern int xfs_dir2_leaf_replace(struct xfs_da_args *args); | ||
221 | extern int xfs_dir2_leaf_search_hash(struct xfs_da_args *args, | ||
222 | struct xfs_buf *lbp); | ||
223 | extern int xfs_dir2_leaf_trim_data(struct xfs_da_args *args, | ||
224 | struct xfs_buf *lbp, xfs_dir2_db_t db); | ||
225 | extern struct xfs_dir2_leaf_entry * | ||
226 | xfs_dir3_leaf_find_entry(struct xfs_dir3_icleaf_hdr *leafhdr, | ||
227 | struct xfs_dir2_leaf_entry *ents, int index, int compact, | ||
228 | int lowstale, int highstale, int *lfloglow, int *lfloghigh); | ||
229 | extern int xfs_dir2_node_to_leaf(struct xfs_da_state *state); | ||
230 | |||
231 | extern bool xfs_dir3_leaf_check_int(struct xfs_mount *mp, struct xfs_inode *dp, | ||
232 | struct xfs_dir3_icleaf_hdr *hdr, struct xfs_dir2_leaf *leaf); | ||
233 | |||
234 | /* xfs_dir2_node.c */ | ||
235 | extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args, | ||
236 | struct xfs_buf *lbp); | ||
237 | extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_inode *dp, | ||
238 | struct xfs_buf *bp, int *count); | ||
239 | extern int xfs_dir2_leafn_lookup_int(struct xfs_buf *bp, | ||
240 | struct xfs_da_args *args, int *indexp, | ||
241 | struct xfs_da_state *state); | ||
242 | extern int xfs_dir2_leafn_order(struct xfs_inode *dp, struct xfs_buf *leaf1_bp, | ||
243 | struct xfs_buf *leaf2_bp); | ||
244 | extern int xfs_dir2_leafn_split(struct xfs_da_state *state, | ||
245 | struct xfs_da_state_blk *oldblk, struct xfs_da_state_blk *newblk); | ||
246 | extern int xfs_dir2_leafn_toosmall(struct xfs_da_state *state, int *action); | ||
247 | extern void xfs_dir2_leafn_unbalance(struct xfs_da_state *state, | ||
248 | struct xfs_da_state_blk *drop_blk, | ||
249 | struct xfs_da_state_blk *save_blk); | ||
250 | extern int xfs_dir2_node_addname(struct xfs_da_args *args); | ||
251 | extern int xfs_dir2_node_lookup(struct xfs_da_args *args); | ||
252 | extern int xfs_dir2_node_removename(struct xfs_da_args *args); | ||
253 | extern int xfs_dir2_node_replace(struct xfs_da_args *args); | ||
254 | extern int xfs_dir2_node_trim_free(struct xfs_da_args *args, xfs_fileoff_t fo, | ||
255 | int *rvalp); | ||
256 | extern int xfs_dir2_free_read(struct xfs_trans *tp, struct xfs_inode *dp, | ||
257 | xfs_dablk_t fbno, struct xfs_buf **bpp); | ||
258 | |||
259 | /* xfs_dir2_sf.c */ | ||
260 | extern int xfs_dir2_block_sfsize(struct xfs_inode *dp, | ||
261 | struct xfs_dir2_data_hdr *block, struct xfs_dir2_sf_hdr *sfhp); | ||
262 | extern int xfs_dir2_block_to_sf(struct xfs_da_args *args, struct xfs_buf *bp, | ||
263 | int size, xfs_dir2_sf_hdr_t *sfhp); | ||
264 | extern int xfs_dir2_sf_addname(struct xfs_da_args *args); | ||
265 | extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino); | ||
266 | extern int xfs_dir2_sf_lookup(struct xfs_da_args *args); | ||
267 | extern int xfs_dir2_sf_removename(struct xfs_da_args *args); | ||
268 | extern int xfs_dir2_sf_replace(struct xfs_da_args *args); | ||
269 | |||
270 | /* xfs_dir2_readdir.c */ | ||
271 | extern int xfs_readdir(struct xfs_inode *dp, struct dir_context *ctx, | ||
272 | size_t bufsize); | ||
273 | |||
274 | #endif /* __XFS_DIR2_PRIV_H__ */ | ||
diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c new file mode 100644 index 000000000000..8f4f26af35e1 --- /dev/null +++ b/fs/xfs/libxfs/xfs_dir2_sf.c | |||
@@ -0,0 +1,1184 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #include "xfs.h" | ||
19 | #include "xfs_fs.h" | ||
20 | #include "xfs_format.h" | ||
21 | #include "xfs_log_format.h" | ||
22 | #include "xfs_trans_resv.h" | ||
23 | #include "xfs_sb.h" | ||
24 | #include "xfs_ag.h" | ||
25 | #include "xfs_mount.h" | ||
26 | #include "xfs_da_format.h" | ||
27 | #include "xfs_da_btree.h" | ||
28 | #include "xfs_inode.h" | ||
29 | #include "xfs_trans.h" | ||
30 | #include "xfs_inode_item.h" | ||
31 | #include "xfs_error.h" | ||
32 | #include "xfs_dir2.h" | ||
33 | #include "xfs_dir2_priv.h" | ||
34 | #include "xfs_trace.h" | ||
35 | #include "xfs_dinode.h" | ||
36 | |||
37 | /* | ||
38 | * Prototypes for internal functions. | ||
39 | */ | ||
40 | static void xfs_dir2_sf_addname_easy(xfs_da_args_t *args, | ||
41 | xfs_dir2_sf_entry_t *sfep, | ||
42 | xfs_dir2_data_aoff_t offset, | ||
43 | int new_isize); | ||
44 | static void xfs_dir2_sf_addname_hard(xfs_da_args_t *args, int objchange, | ||
45 | int new_isize); | ||
46 | static int xfs_dir2_sf_addname_pick(xfs_da_args_t *args, int objchange, | ||
47 | xfs_dir2_sf_entry_t **sfepp, | ||
48 | xfs_dir2_data_aoff_t *offsetp); | ||
49 | #ifdef DEBUG | ||
50 | static void xfs_dir2_sf_check(xfs_da_args_t *args); | ||
51 | #else | ||
52 | #define xfs_dir2_sf_check(args) | ||
53 | #endif /* DEBUG */ | ||
54 | #if XFS_BIG_INUMS | ||
55 | static void xfs_dir2_sf_toino4(xfs_da_args_t *args); | ||
56 | static void xfs_dir2_sf_toino8(xfs_da_args_t *args); | ||
57 | #endif /* XFS_BIG_INUMS */ | ||
58 | |||
59 | /* | ||
60 | * Given a block directory (dp/block), calculate its size as a shortform (sf) | ||
61 | * directory and a header for the sf directory, if it will fit it the | ||
62 | * space currently present in the inode. If it won't fit, the output | ||
63 | * size is too big (but not accurate). | ||
64 | */ | ||
65 | int /* size for sf form */ | ||
66 | xfs_dir2_block_sfsize( | ||
67 | xfs_inode_t *dp, /* incore inode pointer */ | ||
68 | xfs_dir2_data_hdr_t *hdr, /* block directory data */ | ||
69 | xfs_dir2_sf_hdr_t *sfhp) /* output: header for sf form */ | ||
70 | { | ||
71 | xfs_dir2_dataptr_t addr; /* data entry address */ | ||
72 | xfs_dir2_leaf_entry_t *blp; /* leaf area of the block */ | ||
73 | xfs_dir2_block_tail_t *btp; /* tail area of the block */ | ||
74 | int count; /* shortform entry count */ | ||
75 | xfs_dir2_data_entry_t *dep; /* data entry in the block */ | ||
76 | int i; /* block entry index */ | ||
77 | int i8count; /* count of big-inode entries */ | ||
78 | int isdot; /* entry is "." */ | ||
79 | int isdotdot; /* entry is ".." */ | ||
80 | xfs_mount_t *mp; /* mount structure pointer */ | ||
81 | int namelen; /* total name bytes */ | ||
82 | xfs_ino_t parent = 0; /* parent inode number */ | ||
83 | int size=0; /* total computed size */ | ||
84 | int has_ftype; | ||
85 | struct xfs_da_geometry *geo; | ||
86 | |||
87 | mp = dp->i_mount; | ||
88 | geo = mp->m_dir_geo; | ||
89 | |||
90 | /* | ||
91 | * if there is a filetype field, add the extra byte to the namelen | ||
92 | * for each entry that we see. | ||
93 | */ | ||
94 | has_ftype = xfs_sb_version_hasftype(&mp->m_sb) ? 1 : 0; | ||
95 | |||
96 | count = i8count = namelen = 0; | ||
97 | btp = xfs_dir2_block_tail_p(geo, hdr); | ||
98 | blp = xfs_dir2_block_leaf_p(btp); | ||
99 | |||
100 | /* | ||
101 | * Iterate over the block's data entries by using the leaf pointers. | ||
102 | */ | ||
103 | for (i = 0; i < be32_to_cpu(btp->count); i++) { | ||
104 | if ((addr = be32_to_cpu(blp[i].address)) == XFS_DIR2_NULL_DATAPTR) | ||
105 | continue; | ||
106 | /* | ||
107 | * Calculate the pointer to the entry at hand. | ||
108 | */ | ||
109 | dep = (xfs_dir2_data_entry_t *)((char *)hdr + | ||
110 | xfs_dir2_dataptr_to_off(geo, addr)); | ||
111 | /* | ||
112 | * Detect . and .., so we can special-case them. | ||
113 | * . is not included in sf directories. | ||
114 | * .. is included by just the parent inode number. | ||
115 | */ | ||
116 | isdot = dep->namelen == 1 && dep->name[0] == '.'; | ||
117 | isdotdot = | ||
118 | dep->namelen == 2 && | ||
119 | dep->name[0] == '.' && dep->name[1] == '.'; | ||
120 | #if XFS_BIG_INUMS | ||
121 | if (!isdot) | ||
122 | i8count += be64_to_cpu(dep->inumber) > XFS_DIR2_MAX_SHORT_INUM; | ||
123 | #endif | ||
124 | /* take into account the file type field */ | ||
125 | if (!isdot && !isdotdot) { | ||
126 | count++; | ||
127 | namelen += dep->namelen + has_ftype; | ||
128 | } else if (isdotdot) | ||
129 | parent = be64_to_cpu(dep->inumber); | ||
130 | /* | ||
131 | * Calculate the new size, see if we should give up yet. | ||
132 | */ | ||
133 | size = xfs_dir2_sf_hdr_size(i8count) + /* header */ | ||
134 | count + /* namelen */ | ||
135 | count * (uint)sizeof(xfs_dir2_sf_off_t) + /* offset */ | ||
136 | namelen + /* name */ | ||
137 | (i8count ? /* inumber */ | ||
138 | (uint)sizeof(xfs_dir2_ino8_t) * count : | ||
139 | (uint)sizeof(xfs_dir2_ino4_t) * count); | ||
140 | if (size > XFS_IFORK_DSIZE(dp)) | ||
141 | return size; /* size value is a failure */ | ||
142 | } | ||
143 | /* | ||
144 | * Create the output header, if it worked. | ||
145 | */ | ||
146 | sfhp->count = count; | ||
147 | sfhp->i8count = i8count; | ||
148 | dp->d_ops->sf_put_parent_ino(sfhp, parent); | ||
149 | return size; | ||
150 | } | ||
151 | |||
152 | /* | ||
153 | * Convert a block format directory to shortform. | ||
154 | * Caller has already checked that it will fit, and built us a header. | ||
155 | */ | ||
156 | int /* error */ | ||
157 | xfs_dir2_block_to_sf( | ||
158 | xfs_da_args_t *args, /* operation arguments */ | ||
159 | struct xfs_buf *bp, | ||
160 | int size, /* shortform directory size */ | ||
161 | xfs_dir2_sf_hdr_t *sfhp) /* shortform directory hdr */ | ||
162 | { | ||
163 | xfs_dir2_data_hdr_t *hdr; /* block header */ | ||
164 | xfs_dir2_block_tail_t *btp; /* block tail pointer */ | ||
165 | xfs_dir2_data_entry_t *dep; /* data entry pointer */ | ||
166 | xfs_inode_t *dp; /* incore directory inode */ | ||
167 | xfs_dir2_data_unused_t *dup; /* unused data pointer */ | ||
168 | char *endptr; /* end of data entries */ | ||
169 | int error; /* error return value */ | ||
170 | int logflags; /* inode logging flags */ | ||
171 | xfs_mount_t *mp; /* filesystem mount point */ | ||
172 | char *ptr; /* current data pointer */ | ||
173 | xfs_dir2_sf_entry_t *sfep; /* shortform entry */ | ||
174 | xfs_dir2_sf_hdr_t *sfp; /* shortform directory header */ | ||
175 | xfs_dir2_sf_hdr_t *dst; /* temporary data buffer */ | ||
176 | |||
177 | trace_xfs_dir2_block_to_sf(args); | ||
178 | |||
179 | dp = args->dp; | ||
180 | mp = dp->i_mount; | ||
181 | |||
182 | /* | ||
183 | * allocate a temporary destination buffer the size of the inode | ||
184 | * to format the data into. Once we have formatted the data, we | ||
185 | * can free the block and copy the formatted data into the inode literal | ||
186 | * area. | ||
187 | */ | ||
188 | dst = kmem_alloc(mp->m_sb.sb_inodesize, KM_SLEEP); | ||
189 | hdr = bp->b_addr; | ||
190 | |||
191 | /* | ||
192 | * Copy the header into the newly allocate local space. | ||
193 | */ | ||
194 | sfp = (xfs_dir2_sf_hdr_t *)dst; | ||
195 | memcpy(sfp, sfhp, xfs_dir2_sf_hdr_size(sfhp->i8count)); | ||
196 | |||
197 | /* | ||
198 | * Set up to loop over the block's entries. | ||
199 | */ | ||
200 | btp = xfs_dir2_block_tail_p(args->geo, hdr); | ||
201 | ptr = (char *)dp->d_ops->data_entry_p(hdr); | ||
202 | endptr = (char *)xfs_dir2_block_leaf_p(btp); | ||
203 | sfep = xfs_dir2_sf_firstentry(sfp); | ||
204 | /* | ||
205 | * Loop over the active and unused entries. | ||
206 | * Stop when we reach the leaf/tail portion of the block. | ||
207 | */ | ||
208 | while (ptr < endptr) { | ||
209 | /* | ||
210 | * If it's unused, just skip over it. | ||
211 | */ | ||
212 | dup = (xfs_dir2_data_unused_t *)ptr; | ||
213 | if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { | ||
214 | ptr += be16_to_cpu(dup->length); | ||
215 | continue; | ||
216 | } | ||
217 | dep = (xfs_dir2_data_entry_t *)ptr; | ||
218 | /* | ||
219 | * Skip . | ||
220 | */ | ||
221 | if (dep->namelen == 1 && dep->name[0] == '.') | ||
222 | ASSERT(be64_to_cpu(dep->inumber) == dp->i_ino); | ||
223 | /* | ||
224 | * Skip .., but make sure the inode number is right. | ||
225 | */ | ||
226 | else if (dep->namelen == 2 && | ||
227 | dep->name[0] == '.' && dep->name[1] == '.') | ||
228 | ASSERT(be64_to_cpu(dep->inumber) == | ||
229 | dp->d_ops->sf_get_parent_ino(sfp)); | ||
230 | /* | ||
231 | * Normal entry, copy it into shortform. | ||
232 | */ | ||
233 | else { | ||
234 | sfep->namelen = dep->namelen; | ||
235 | xfs_dir2_sf_put_offset(sfep, | ||
236 | (xfs_dir2_data_aoff_t) | ||
237 | ((char *)dep - (char *)hdr)); | ||
238 | memcpy(sfep->name, dep->name, dep->namelen); | ||
239 | dp->d_ops->sf_put_ino(sfp, sfep, | ||
240 | be64_to_cpu(dep->inumber)); | ||
241 | dp->d_ops->sf_put_ftype(sfep, | ||
242 | dp->d_ops->data_get_ftype(dep)); | ||
243 | |||
244 | sfep = dp->d_ops->sf_nextentry(sfp, sfep); | ||
245 | } | ||
246 | ptr += dp->d_ops->data_entsize(dep->namelen); | ||
247 | } | ||
248 | ASSERT((char *)sfep - (char *)sfp == size); | ||
249 | |||
250 | /* now we are done with the block, we can shrink the inode */ | ||
251 | logflags = XFS_ILOG_CORE; | ||
252 | error = xfs_dir2_shrink_inode(args, args->geo->datablk, bp); | ||
253 | if (error) { | ||
254 | ASSERT(error != -ENOSPC); | ||
255 | goto out; | ||
256 | } | ||
257 | |||
258 | /* | ||
259 | * The buffer is now unconditionally gone, whether | ||
260 | * xfs_dir2_shrink_inode worked or not. | ||
261 | * | ||
262 | * Convert the inode to local format and copy the data in. | ||
263 | */ | ||
264 | dp->i_df.if_flags &= ~XFS_IFEXTENTS; | ||
265 | dp->i_df.if_flags |= XFS_IFINLINE; | ||
266 | dp->i_d.di_format = XFS_DINODE_FMT_LOCAL; | ||
267 | ASSERT(dp->i_df.if_bytes == 0); | ||
268 | xfs_idata_realloc(dp, size, XFS_DATA_FORK); | ||
269 | |||
270 | logflags |= XFS_ILOG_DDATA; | ||
271 | memcpy(dp->i_df.if_u1.if_data, dst, size); | ||
272 | dp->i_d.di_size = size; | ||
273 | xfs_dir2_sf_check(args); | ||
274 | out: | ||
275 | xfs_trans_log_inode(args->trans, dp, logflags); | ||
276 | kmem_free(dst); | ||
277 | return error; | ||
278 | } | ||
279 | |||
280 | /* | ||
281 | * Add a name to a shortform directory. | ||
282 | * There are two algorithms, "easy" and "hard" which we decide on | ||
283 | * before changing anything. | ||
284 | * Convert to block form if necessary, if the new entry won't fit. | ||
285 | */ | ||
286 | int /* error */ | ||
287 | xfs_dir2_sf_addname( | ||
288 | xfs_da_args_t *args) /* operation arguments */ | ||
289 | { | ||
290 | xfs_inode_t *dp; /* incore directory inode */ | ||
291 | int error; /* error return value */ | ||
292 | int incr_isize; /* total change in size */ | ||
293 | int new_isize; /* di_size after adding name */ | ||
294 | int objchange; /* changing to 8-byte inodes */ | ||
295 | xfs_dir2_data_aoff_t offset = 0; /* offset for new entry */ | ||
296 | int pick; /* which algorithm to use */ | ||
297 | xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ | ||
298 | xfs_dir2_sf_entry_t *sfep = NULL; /* shortform entry */ | ||
299 | |||
300 | trace_xfs_dir2_sf_addname(args); | ||
301 | |||
302 | ASSERT(xfs_dir2_sf_lookup(args) == -ENOENT); | ||
303 | dp = args->dp; | ||
304 | ASSERT(dp->i_df.if_flags & XFS_IFINLINE); | ||
305 | /* | ||
306 | * Make sure the shortform value has some of its header. | ||
307 | */ | ||
308 | if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) { | ||
309 | ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount)); | ||
310 | return -EIO; | ||
311 | } | ||
312 | ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); | ||
313 | ASSERT(dp->i_df.if_u1.if_data != NULL); | ||
314 | sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; | ||
315 | ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count)); | ||
316 | /* | ||
317 | * Compute entry (and change in) size. | ||
318 | */ | ||
319 | incr_isize = dp->d_ops->sf_entsize(sfp, args->namelen); | ||
320 | objchange = 0; | ||
321 | #if XFS_BIG_INUMS | ||
322 | /* | ||
323 | * Do we have to change to 8 byte inodes? | ||
324 | */ | ||
325 | if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->i8count == 0) { | ||
326 | /* | ||
327 | * Yes, adjust the inode size. old count + (parent + new) | ||
328 | */ | ||
329 | incr_isize += | ||
330 | (sfp->count + 2) * | ||
331 | ((uint)sizeof(xfs_dir2_ino8_t) - | ||
332 | (uint)sizeof(xfs_dir2_ino4_t)); | ||
333 | objchange = 1; | ||
334 | } | ||
335 | #endif | ||
336 | new_isize = (int)dp->i_d.di_size + incr_isize; | ||
337 | /* | ||
338 | * Won't fit as shortform any more (due to size), | ||
339 | * or the pick routine says it won't (due to offset values). | ||
340 | */ | ||
341 | if (new_isize > XFS_IFORK_DSIZE(dp) || | ||
342 | (pick = | ||
343 | xfs_dir2_sf_addname_pick(args, objchange, &sfep, &offset)) == 0) { | ||
344 | /* | ||
345 | * Just checking or no space reservation, it doesn't fit. | ||
346 | */ | ||
347 | if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0) | ||
348 | return -ENOSPC; | ||
349 | /* | ||
350 | * Convert to block form then add the name. | ||
351 | */ | ||
352 | error = xfs_dir2_sf_to_block(args); | ||
353 | if (error) | ||
354 | return error; | ||
355 | return xfs_dir2_block_addname(args); | ||
356 | } | ||
357 | /* | ||
358 | * Just checking, it fits. | ||
359 | */ | ||
360 | if (args->op_flags & XFS_DA_OP_JUSTCHECK) | ||
361 | return 0; | ||
362 | /* | ||
363 | * Do it the easy way - just add it at the end. | ||
364 | */ | ||
365 | if (pick == 1) | ||
366 | xfs_dir2_sf_addname_easy(args, sfep, offset, new_isize); | ||
367 | /* | ||
368 | * Do it the hard way - look for a place to insert the new entry. | ||
369 | * Convert to 8 byte inode numbers first if necessary. | ||
370 | */ | ||
371 | else { | ||
372 | ASSERT(pick == 2); | ||
373 | #if XFS_BIG_INUMS | ||
374 | if (objchange) | ||
375 | xfs_dir2_sf_toino8(args); | ||
376 | #endif | ||
377 | xfs_dir2_sf_addname_hard(args, objchange, new_isize); | ||
378 | } | ||
379 | xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); | ||
380 | return 0; | ||
381 | } | ||
382 | |||
383 | /* | ||
384 | * Add the new entry the "easy" way. | ||
385 | * This is copying the old directory and adding the new entry at the end. | ||
386 | * Since it's sorted by "offset" we need room after the last offset | ||
387 | * that's already there, and then room to convert to a block directory. | ||
388 | * This is already checked by the pick routine. | ||
389 | */ | ||
390 | static void | ||
391 | xfs_dir2_sf_addname_easy( | ||
392 | xfs_da_args_t *args, /* operation arguments */ | ||
393 | xfs_dir2_sf_entry_t *sfep, /* pointer to new entry */ | ||
394 | xfs_dir2_data_aoff_t offset, /* offset to use for new ent */ | ||
395 | int new_isize) /* new directory size */ | ||
396 | { | ||
397 | int byteoff; /* byte offset in sf dir */ | ||
398 | xfs_inode_t *dp; /* incore directory inode */ | ||
399 | xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ | ||
400 | |||
401 | dp = args->dp; | ||
402 | |||
403 | sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; | ||
404 | byteoff = (int)((char *)sfep - (char *)sfp); | ||
405 | /* | ||
406 | * Grow the in-inode space. | ||
407 | */ | ||
408 | xfs_idata_realloc(dp, dp->d_ops->sf_entsize(sfp, args->namelen), | ||
409 | XFS_DATA_FORK); | ||
410 | /* | ||
411 | * Need to set up again due to realloc of the inode data. | ||
412 | */ | ||
413 | sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; | ||
414 | sfep = (xfs_dir2_sf_entry_t *)((char *)sfp + byteoff); | ||
415 | /* | ||
416 | * Fill in the new entry. | ||
417 | */ | ||
418 | sfep->namelen = args->namelen; | ||
419 | xfs_dir2_sf_put_offset(sfep, offset); | ||
420 | memcpy(sfep->name, args->name, sfep->namelen); | ||
421 | dp->d_ops->sf_put_ino(sfp, sfep, args->inumber); | ||
422 | dp->d_ops->sf_put_ftype(sfep, args->filetype); | ||
423 | |||
424 | /* | ||
425 | * Update the header and inode. | ||
426 | */ | ||
427 | sfp->count++; | ||
428 | #if XFS_BIG_INUMS | ||
429 | if (args->inumber > XFS_DIR2_MAX_SHORT_INUM) | ||
430 | sfp->i8count++; | ||
431 | #endif | ||
432 | dp->i_d.di_size = new_isize; | ||
433 | xfs_dir2_sf_check(args); | ||
434 | } | ||
435 | |||
436 | /* | ||
437 | * Add the new entry the "hard" way. | ||
438 | * The caller has already converted to 8 byte inode numbers if necessary, | ||
439 | * in which case we need to leave the i8count at 1. | ||
440 | * Find a hole that the new entry will fit into, and copy | ||
441 | * the first part of the entries, the new entry, and the last part of | ||
442 | * the entries. | ||
443 | */ | ||
444 | /* ARGSUSED */ | ||
445 | static void | ||
446 | xfs_dir2_sf_addname_hard( | ||
447 | xfs_da_args_t *args, /* operation arguments */ | ||
448 | int objchange, /* changing inode number size */ | ||
449 | int new_isize) /* new directory size */ | ||
450 | { | ||
451 | int add_datasize; /* data size need for new ent */ | ||
452 | char *buf; /* buffer for old */ | ||
453 | xfs_inode_t *dp; /* incore directory inode */ | ||
454 | int eof; /* reached end of old dir */ | ||
455 | int nbytes; /* temp for byte copies */ | ||
456 | xfs_dir2_data_aoff_t new_offset; /* next offset value */ | ||
457 | xfs_dir2_data_aoff_t offset; /* current offset value */ | ||
458 | int old_isize; /* previous di_size */ | ||
459 | xfs_dir2_sf_entry_t *oldsfep; /* entry in original dir */ | ||
460 | xfs_dir2_sf_hdr_t *oldsfp; /* original shortform dir */ | ||
461 | xfs_dir2_sf_entry_t *sfep; /* entry in new dir */ | ||
462 | xfs_dir2_sf_hdr_t *sfp; /* new shortform dir */ | ||
463 | struct xfs_mount *mp; | ||
464 | |||
465 | /* | ||
466 | * Copy the old directory to the stack buffer. | ||
467 | */ | ||
468 | dp = args->dp; | ||
469 | mp = dp->i_mount; | ||
470 | |||
471 | sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; | ||
472 | old_isize = (int)dp->i_d.di_size; | ||
473 | buf = kmem_alloc(old_isize, KM_SLEEP); | ||
474 | oldsfp = (xfs_dir2_sf_hdr_t *)buf; | ||
475 | memcpy(oldsfp, sfp, old_isize); | ||
476 | /* | ||
477 | * Loop over the old directory finding the place we're going | ||
478 | * to insert the new entry. | ||
479 | * If it's going to end up at the end then oldsfep will point there. | ||
480 | */ | ||
481 | for (offset = dp->d_ops->data_first_offset, | ||
482 | oldsfep = xfs_dir2_sf_firstentry(oldsfp), | ||
483 | add_datasize = dp->d_ops->data_entsize(args->namelen), | ||
484 | eof = (char *)oldsfep == &buf[old_isize]; | ||
485 | !eof; | ||
486 | offset = new_offset + dp->d_ops->data_entsize(oldsfep->namelen), | ||
487 | oldsfep = dp->d_ops->sf_nextentry(oldsfp, oldsfep), | ||
488 | eof = (char *)oldsfep == &buf[old_isize]) { | ||
489 | new_offset = xfs_dir2_sf_get_offset(oldsfep); | ||
490 | if (offset + add_datasize <= new_offset) | ||
491 | break; | ||
492 | } | ||
493 | /* | ||
494 | * Get rid of the old directory, then allocate space for | ||
495 | * the new one. We do this so xfs_idata_realloc won't copy | ||
496 | * the data. | ||
497 | */ | ||
498 | xfs_idata_realloc(dp, -old_isize, XFS_DATA_FORK); | ||
499 | xfs_idata_realloc(dp, new_isize, XFS_DATA_FORK); | ||
500 | /* | ||
501 | * Reset the pointer since the buffer was reallocated. | ||
502 | */ | ||
503 | sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; | ||
504 | /* | ||
505 | * Copy the first part of the directory, including the header. | ||
506 | */ | ||
507 | nbytes = (int)((char *)oldsfep - (char *)oldsfp); | ||
508 | memcpy(sfp, oldsfp, nbytes); | ||
509 | sfep = (xfs_dir2_sf_entry_t *)((char *)sfp + nbytes); | ||
510 | /* | ||
511 | * Fill in the new entry, and update the header counts. | ||
512 | */ | ||
513 | sfep->namelen = args->namelen; | ||
514 | xfs_dir2_sf_put_offset(sfep, offset); | ||
515 | memcpy(sfep->name, args->name, sfep->namelen); | ||
516 | dp->d_ops->sf_put_ino(sfp, sfep, args->inumber); | ||
517 | dp->d_ops->sf_put_ftype(sfep, args->filetype); | ||
518 | sfp->count++; | ||
519 | #if XFS_BIG_INUMS | ||
520 | if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && !objchange) | ||
521 | sfp->i8count++; | ||
522 | #endif | ||
523 | /* | ||
524 | * If there's more left to copy, do that. | ||
525 | */ | ||
526 | if (!eof) { | ||
527 | sfep = dp->d_ops->sf_nextentry(sfp, sfep); | ||
528 | memcpy(sfep, oldsfep, old_isize - nbytes); | ||
529 | } | ||
530 | kmem_free(buf); | ||
531 | dp->i_d.di_size = new_isize; | ||
532 | xfs_dir2_sf_check(args); | ||
533 | } | ||
534 | |||
535 | /* | ||
536 | * Decide if the new entry will fit at all. | ||
537 | * If it will fit, pick between adding the new entry to the end (easy) | ||
538 | * or somewhere else (hard). | ||
539 | * Return 0 (won't fit), 1 (easy), 2 (hard). | ||
540 | */ | ||
541 | /*ARGSUSED*/ | ||
542 | static int /* pick result */ | ||
543 | xfs_dir2_sf_addname_pick( | ||
544 | xfs_da_args_t *args, /* operation arguments */ | ||
545 | int objchange, /* inode # size changes */ | ||
546 | xfs_dir2_sf_entry_t **sfepp, /* out(1): new entry ptr */ | ||
547 | xfs_dir2_data_aoff_t *offsetp) /* out(1): new offset */ | ||
548 | { | ||
549 | xfs_inode_t *dp; /* incore directory inode */ | ||
550 | int holefit; /* found hole it will fit in */ | ||
551 | int i; /* entry number */ | ||
552 | xfs_mount_t *mp; /* filesystem mount point */ | ||
553 | xfs_dir2_data_aoff_t offset; /* data block offset */ | ||
554 | xfs_dir2_sf_entry_t *sfep; /* shortform entry */ | ||
555 | xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ | ||
556 | int size; /* entry's data size */ | ||
557 | int used; /* data bytes used */ | ||
558 | |||
559 | dp = args->dp; | ||
560 | mp = dp->i_mount; | ||
561 | |||
562 | sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; | ||
563 | size = dp->d_ops->data_entsize(args->namelen); | ||
564 | offset = dp->d_ops->data_first_offset; | ||
565 | sfep = xfs_dir2_sf_firstentry(sfp); | ||
566 | holefit = 0; | ||
567 | /* | ||
568 | * Loop over sf entries. | ||
569 | * Keep track of data offset and whether we've seen a place | ||
570 | * to insert the new entry. | ||
571 | */ | ||
572 | for (i = 0; i < sfp->count; i++) { | ||
573 | if (!holefit) | ||
574 | holefit = offset + size <= xfs_dir2_sf_get_offset(sfep); | ||
575 | offset = xfs_dir2_sf_get_offset(sfep) + | ||
576 | dp->d_ops->data_entsize(sfep->namelen); | ||
577 | sfep = dp->d_ops->sf_nextentry(sfp, sfep); | ||
578 | } | ||
579 | /* | ||
580 | * Calculate data bytes used excluding the new entry, if this | ||
581 | * was a data block (block form directory). | ||
582 | */ | ||
583 | used = offset + | ||
584 | (sfp->count + 3) * (uint)sizeof(xfs_dir2_leaf_entry_t) + | ||
585 | (uint)sizeof(xfs_dir2_block_tail_t); | ||
586 | /* | ||
587 | * If it won't fit in a block form then we can't insert it, | ||
588 | * we'll go back, convert to block, then try the insert and convert | ||
589 | * to leaf. | ||
590 | */ | ||
591 | if (used + (holefit ? 0 : size) > args->geo->blksize) | ||
592 | return 0; | ||
593 | /* | ||
594 | * If changing the inode number size, do it the hard way. | ||
595 | */ | ||
596 | #if XFS_BIG_INUMS | ||
597 | if (objchange) { | ||
598 | return 2; | ||
599 | } | ||
600 | #else | ||
601 | ASSERT(objchange == 0); | ||
602 | #endif | ||
603 | /* | ||
604 | * If it won't fit at the end then do it the hard way (use the hole). | ||
605 | */ | ||
606 | if (used + size > args->geo->blksize) | ||
607 | return 2; | ||
608 | /* | ||
609 | * Do it the easy way. | ||
610 | */ | ||
611 | *sfepp = sfep; | ||
612 | *offsetp = offset; | ||
613 | return 1; | ||
614 | } | ||
615 | |||
616 | #ifdef DEBUG | ||
617 | /* | ||
618 | * Check consistency of shortform directory, assert if bad. | ||
619 | */ | ||
620 | static void | ||
621 | xfs_dir2_sf_check( | ||
622 | xfs_da_args_t *args) /* operation arguments */ | ||
623 | { | ||
624 | xfs_inode_t *dp; /* incore directory inode */ | ||
625 | int i; /* entry number */ | ||
626 | int i8count; /* number of big inode#s */ | ||
627 | xfs_ino_t ino; /* entry inode number */ | ||
628 | int offset; /* data offset */ | ||
629 | xfs_dir2_sf_entry_t *sfep; /* shortform dir entry */ | ||
630 | xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ | ||
631 | struct xfs_mount *mp; | ||
632 | |||
633 | dp = args->dp; | ||
634 | mp = dp->i_mount; | ||
635 | |||
636 | sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; | ||
637 | offset = dp->d_ops->data_first_offset; | ||
638 | ino = dp->d_ops->sf_get_parent_ino(sfp); | ||
639 | i8count = ino > XFS_DIR2_MAX_SHORT_INUM; | ||
640 | |||
641 | for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); | ||
642 | i < sfp->count; | ||
643 | i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep)) { | ||
644 | ASSERT(xfs_dir2_sf_get_offset(sfep) >= offset); | ||
645 | ino = dp->d_ops->sf_get_ino(sfp, sfep); | ||
646 | i8count += ino > XFS_DIR2_MAX_SHORT_INUM; | ||
647 | offset = | ||
648 | xfs_dir2_sf_get_offset(sfep) + | ||
649 | dp->d_ops->data_entsize(sfep->namelen); | ||
650 | ASSERT(dp->d_ops->sf_get_ftype(sfep) < XFS_DIR3_FT_MAX); | ||
651 | } | ||
652 | ASSERT(i8count == sfp->i8count); | ||
653 | ASSERT(XFS_BIG_INUMS || i8count == 0); | ||
654 | ASSERT((char *)sfep - (char *)sfp == dp->i_d.di_size); | ||
655 | ASSERT(offset + | ||
656 | (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) + | ||
657 | (uint)sizeof(xfs_dir2_block_tail_t) <= args->geo->blksize); | ||
658 | } | ||
659 | #endif /* DEBUG */ | ||
660 | |||
661 | /* | ||
662 | * Create a new (shortform) directory. | ||
663 | */ | ||
664 | int /* error, always 0 */ | ||
665 | xfs_dir2_sf_create( | ||
666 | xfs_da_args_t *args, /* operation arguments */ | ||
667 | xfs_ino_t pino) /* parent inode number */ | ||
668 | { | ||
669 | xfs_inode_t *dp; /* incore directory inode */ | ||
670 | int i8count; /* parent inode is an 8-byte number */ | ||
671 | xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ | ||
672 | int size; /* directory size */ | ||
673 | |||
674 | trace_xfs_dir2_sf_create(args); | ||
675 | |||
676 | dp = args->dp; | ||
677 | |||
678 | ASSERT(dp != NULL); | ||
679 | ASSERT(dp->i_d.di_size == 0); | ||
680 | /* | ||
681 | * If it's currently a zero-length extent file, | ||
682 | * convert it to local format. | ||
683 | */ | ||
684 | if (dp->i_d.di_format == XFS_DINODE_FMT_EXTENTS) { | ||
685 | dp->i_df.if_flags &= ~XFS_IFEXTENTS; /* just in case */ | ||
686 | dp->i_d.di_format = XFS_DINODE_FMT_LOCAL; | ||
687 | xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE); | ||
688 | dp->i_df.if_flags |= XFS_IFINLINE; | ||
689 | } | ||
690 | ASSERT(dp->i_df.if_flags & XFS_IFINLINE); | ||
691 | ASSERT(dp->i_df.if_bytes == 0); | ||
692 | i8count = pino > XFS_DIR2_MAX_SHORT_INUM; | ||
693 | size = xfs_dir2_sf_hdr_size(i8count); | ||
694 | /* | ||
695 | * Make a buffer for the data. | ||
696 | */ | ||
697 | xfs_idata_realloc(dp, size, XFS_DATA_FORK); | ||
698 | /* | ||
699 | * Fill in the header, | ||
700 | */ | ||
701 | sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; | ||
702 | sfp->i8count = i8count; | ||
703 | /* | ||
704 | * Now can put in the inode number, since i8count is set. | ||
705 | */ | ||
706 | dp->d_ops->sf_put_parent_ino(sfp, pino); | ||
707 | sfp->count = 0; | ||
708 | dp->i_d.di_size = size; | ||
709 | xfs_dir2_sf_check(args); | ||
710 | xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); | ||
711 | return 0; | ||
712 | } | ||
713 | |||
714 | /* | ||
715 | * Lookup an entry in a shortform directory. | ||
716 | * Returns EEXIST if found, ENOENT if not found. | ||
717 | */ | ||
718 | int /* error */ | ||
719 | xfs_dir2_sf_lookup( | ||
720 | xfs_da_args_t *args) /* operation arguments */ | ||
721 | { | ||
722 | xfs_inode_t *dp; /* incore directory inode */ | ||
723 | int i; /* entry index */ | ||
724 | int error; | ||
725 | xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ | ||
726 | xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ | ||
727 | enum xfs_dacmp cmp; /* comparison result */ | ||
728 | xfs_dir2_sf_entry_t *ci_sfep; /* case-insens. entry */ | ||
729 | |||
730 | trace_xfs_dir2_sf_lookup(args); | ||
731 | |||
732 | xfs_dir2_sf_check(args); | ||
733 | dp = args->dp; | ||
734 | |||
735 | ASSERT(dp->i_df.if_flags & XFS_IFINLINE); | ||
736 | /* | ||
737 | * Bail out if the directory is way too short. | ||
738 | */ | ||
739 | if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) { | ||
740 | ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount)); | ||
741 | return -EIO; | ||
742 | } | ||
743 | ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); | ||
744 | ASSERT(dp->i_df.if_u1.if_data != NULL); | ||
745 | sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; | ||
746 | ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count)); | ||
747 | /* | ||
748 | * Special case for . | ||
749 | */ | ||
750 | if (args->namelen == 1 && args->name[0] == '.') { | ||
751 | args->inumber = dp->i_ino; | ||
752 | args->cmpresult = XFS_CMP_EXACT; | ||
753 | args->filetype = XFS_DIR3_FT_DIR; | ||
754 | return -EEXIST; | ||
755 | } | ||
756 | /* | ||
757 | * Special case for .. | ||
758 | */ | ||
759 | if (args->namelen == 2 && | ||
760 | args->name[0] == '.' && args->name[1] == '.') { | ||
761 | args->inumber = dp->d_ops->sf_get_parent_ino(sfp); | ||
762 | args->cmpresult = XFS_CMP_EXACT; | ||
763 | args->filetype = XFS_DIR3_FT_DIR; | ||
764 | return -EEXIST; | ||
765 | } | ||
766 | /* | ||
767 | * Loop over all the entries trying to match ours. | ||
768 | */ | ||
769 | ci_sfep = NULL; | ||
770 | for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count; | ||
771 | i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep)) { | ||
772 | /* | ||
773 | * Compare name and if it's an exact match, return the inode | ||
774 | * number. If it's the first case-insensitive match, store the | ||
775 | * inode number and continue looking for an exact match. | ||
776 | */ | ||
777 | cmp = dp->i_mount->m_dirnameops->compname(args, sfep->name, | ||
778 | sfep->namelen); | ||
779 | if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) { | ||
780 | args->cmpresult = cmp; | ||
781 | args->inumber = dp->d_ops->sf_get_ino(sfp, sfep); | ||
782 | args->filetype = dp->d_ops->sf_get_ftype(sfep); | ||
783 | if (cmp == XFS_CMP_EXACT) | ||
784 | return -EEXIST; | ||
785 | ci_sfep = sfep; | ||
786 | } | ||
787 | } | ||
788 | ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); | ||
789 | /* | ||
790 | * Here, we can only be doing a lookup (not a rename or replace). | ||
791 | * If a case-insensitive match was not found, return -ENOENT. | ||
792 | */ | ||
793 | if (!ci_sfep) | ||
794 | return -ENOENT; | ||
795 | /* otherwise process the CI match as required by the caller */ | ||
796 | error = xfs_dir_cilookup_result(args, ci_sfep->name, ci_sfep->namelen); | ||
797 | return error; | ||
798 | } | ||
799 | |||
800 | /* | ||
801 | * Remove an entry from a shortform directory. | ||
802 | */ | ||
803 | int /* error */ | ||
804 | xfs_dir2_sf_removename( | ||
805 | xfs_da_args_t *args) | ||
806 | { | ||
807 | int byteoff; /* offset of removed entry */ | ||
808 | xfs_inode_t *dp; /* incore directory inode */ | ||
809 | int entsize; /* this entry's size */ | ||
810 | int i; /* shortform entry index */ | ||
811 | int newsize; /* new inode size */ | ||
812 | int oldsize; /* old inode size */ | ||
813 | xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ | ||
814 | xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ | ||
815 | |||
816 | trace_xfs_dir2_sf_removename(args); | ||
817 | |||
818 | dp = args->dp; | ||
819 | |||
820 | ASSERT(dp->i_df.if_flags & XFS_IFINLINE); | ||
821 | oldsize = (int)dp->i_d.di_size; | ||
822 | /* | ||
823 | * Bail out if the directory is way too short. | ||
824 | */ | ||
825 | if (oldsize < offsetof(xfs_dir2_sf_hdr_t, parent)) { | ||
826 | ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount)); | ||
827 | return -EIO; | ||
828 | } | ||
829 | ASSERT(dp->i_df.if_bytes == oldsize); | ||
830 | ASSERT(dp->i_df.if_u1.if_data != NULL); | ||
831 | sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; | ||
832 | ASSERT(oldsize >= xfs_dir2_sf_hdr_size(sfp->i8count)); | ||
833 | /* | ||
834 | * Loop over the old directory entries. | ||
835 | * Find the one we're deleting. | ||
836 | */ | ||
837 | for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count; | ||
838 | i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep)) { | ||
839 | if (xfs_da_compname(args, sfep->name, sfep->namelen) == | ||
840 | XFS_CMP_EXACT) { | ||
841 | ASSERT(dp->d_ops->sf_get_ino(sfp, sfep) == | ||
842 | args->inumber); | ||
843 | break; | ||
844 | } | ||
845 | } | ||
846 | /* | ||
847 | * Didn't find it. | ||
848 | */ | ||
849 | if (i == sfp->count) | ||
850 | return -ENOENT; | ||
851 | /* | ||
852 | * Calculate sizes. | ||
853 | */ | ||
854 | byteoff = (int)((char *)sfep - (char *)sfp); | ||
855 | entsize = dp->d_ops->sf_entsize(sfp, args->namelen); | ||
856 | newsize = oldsize - entsize; | ||
857 | /* | ||
858 | * Copy the part if any after the removed entry, sliding it down. | ||
859 | */ | ||
860 | if (byteoff + entsize < oldsize) | ||
861 | memmove((char *)sfp + byteoff, (char *)sfp + byteoff + entsize, | ||
862 | oldsize - (byteoff + entsize)); | ||
863 | /* | ||
864 | * Fix up the header and file size. | ||
865 | */ | ||
866 | sfp->count--; | ||
867 | dp->i_d.di_size = newsize; | ||
868 | /* | ||
869 | * Reallocate, making it smaller. | ||
870 | */ | ||
871 | xfs_idata_realloc(dp, newsize - oldsize, XFS_DATA_FORK); | ||
872 | sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; | ||
873 | #if XFS_BIG_INUMS | ||
874 | /* | ||
875 | * Are we changing inode number size? | ||
876 | */ | ||
877 | if (args->inumber > XFS_DIR2_MAX_SHORT_INUM) { | ||
878 | if (sfp->i8count == 1) | ||
879 | xfs_dir2_sf_toino4(args); | ||
880 | else | ||
881 | sfp->i8count--; | ||
882 | } | ||
883 | #endif | ||
884 | xfs_dir2_sf_check(args); | ||
885 | xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); | ||
886 | return 0; | ||
887 | } | ||
888 | |||
889 | /* | ||
890 | * Replace the inode number of an entry in a shortform directory. | ||
891 | */ | ||
892 | int /* error */ | ||
893 | xfs_dir2_sf_replace( | ||
894 | xfs_da_args_t *args) /* operation arguments */ | ||
895 | { | ||
896 | xfs_inode_t *dp; /* incore directory inode */ | ||
897 | int i; /* entry index */ | ||
898 | #if XFS_BIG_INUMS || defined(DEBUG) | ||
899 | xfs_ino_t ino=0; /* entry old inode number */ | ||
900 | #endif | ||
901 | #if XFS_BIG_INUMS | ||
902 | int i8elevated; /* sf_toino8 set i8count=1 */ | ||
903 | #endif | ||
904 | xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ | ||
905 | xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ | ||
906 | |||
907 | trace_xfs_dir2_sf_replace(args); | ||
908 | |||
909 | dp = args->dp; | ||
910 | |||
911 | ASSERT(dp->i_df.if_flags & XFS_IFINLINE); | ||
912 | /* | ||
913 | * Bail out if the shortform directory is way too small. | ||
914 | */ | ||
915 | if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) { | ||
916 | ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount)); | ||
917 | return -EIO; | ||
918 | } | ||
919 | ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); | ||
920 | ASSERT(dp->i_df.if_u1.if_data != NULL); | ||
921 | sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; | ||
922 | ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count)); | ||
923 | #if XFS_BIG_INUMS | ||
924 | /* | ||
925 | * New inode number is large, and need to convert to 8-byte inodes. | ||
926 | */ | ||
927 | if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->i8count == 0) { | ||
928 | int error; /* error return value */ | ||
929 | int newsize; /* new inode size */ | ||
930 | |||
931 | newsize = | ||
932 | dp->i_df.if_bytes + | ||
933 | (sfp->count + 1) * | ||
934 | ((uint)sizeof(xfs_dir2_ino8_t) - | ||
935 | (uint)sizeof(xfs_dir2_ino4_t)); | ||
936 | /* | ||
937 | * Won't fit as shortform, convert to block then do replace. | ||
938 | */ | ||
939 | if (newsize > XFS_IFORK_DSIZE(dp)) { | ||
940 | error = xfs_dir2_sf_to_block(args); | ||
941 | if (error) { | ||
942 | return error; | ||
943 | } | ||
944 | return xfs_dir2_block_replace(args); | ||
945 | } | ||
946 | /* | ||
947 | * Still fits, convert to 8-byte now. | ||
948 | */ | ||
949 | xfs_dir2_sf_toino8(args); | ||
950 | i8elevated = 1; | ||
951 | sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; | ||
952 | } else | ||
953 | i8elevated = 0; | ||
954 | #endif | ||
955 | ASSERT(args->namelen != 1 || args->name[0] != '.'); | ||
956 | /* | ||
957 | * Replace ..'s entry. | ||
958 | */ | ||
959 | if (args->namelen == 2 && | ||
960 | args->name[0] == '.' && args->name[1] == '.') { | ||
961 | #if XFS_BIG_INUMS || defined(DEBUG) | ||
962 | ino = dp->d_ops->sf_get_parent_ino(sfp); | ||
963 | ASSERT(args->inumber != ino); | ||
964 | #endif | ||
965 | dp->d_ops->sf_put_parent_ino(sfp, args->inumber); | ||
966 | } | ||
967 | /* | ||
968 | * Normal entry, look for the name. | ||
969 | */ | ||
970 | else { | ||
971 | for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count; | ||
972 | i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep)) { | ||
973 | if (xfs_da_compname(args, sfep->name, sfep->namelen) == | ||
974 | XFS_CMP_EXACT) { | ||
975 | #if XFS_BIG_INUMS || defined(DEBUG) | ||
976 | ino = dp->d_ops->sf_get_ino(sfp, sfep); | ||
977 | ASSERT(args->inumber != ino); | ||
978 | #endif | ||
979 | dp->d_ops->sf_put_ino(sfp, sfep, args->inumber); | ||
980 | dp->d_ops->sf_put_ftype(sfep, args->filetype); | ||
981 | break; | ||
982 | } | ||
983 | } | ||
984 | /* | ||
985 | * Didn't find it. | ||
986 | */ | ||
987 | if (i == sfp->count) { | ||
988 | ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); | ||
989 | #if XFS_BIG_INUMS | ||
990 | if (i8elevated) | ||
991 | xfs_dir2_sf_toino4(args); | ||
992 | #endif | ||
993 | return -ENOENT; | ||
994 | } | ||
995 | } | ||
996 | #if XFS_BIG_INUMS | ||
997 | /* | ||
998 | * See if the old number was large, the new number is small. | ||
999 | */ | ||
1000 | if (ino > XFS_DIR2_MAX_SHORT_INUM && | ||
1001 | args->inumber <= XFS_DIR2_MAX_SHORT_INUM) { | ||
1002 | /* | ||
1003 | * And the old count was one, so need to convert to small. | ||
1004 | */ | ||
1005 | if (sfp->i8count == 1) | ||
1006 | xfs_dir2_sf_toino4(args); | ||
1007 | else | ||
1008 | sfp->i8count--; | ||
1009 | } | ||
1010 | /* | ||
1011 | * See if the old number was small, the new number is large. | ||
1012 | */ | ||
1013 | if (ino <= XFS_DIR2_MAX_SHORT_INUM && | ||
1014 | args->inumber > XFS_DIR2_MAX_SHORT_INUM) { | ||
1015 | /* | ||
1016 | * add to the i8count unless we just converted to 8-byte | ||
1017 | * inodes (which does an implied i8count = 1) | ||
1018 | */ | ||
1019 | ASSERT(sfp->i8count != 0); | ||
1020 | if (!i8elevated) | ||
1021 | sfp->i8count++; | ||
1022 | } | ||
1023 | #endif | ||
1024 | xfs_dir2_sf_check(args); | ||
1025 | xfs_trans_log_inode(args->trans, dp, XFS_ILOG_DDATA); | ||
1026 | return 0; | ||
1027 | } | ||
1028 | |||
1029 | #if XFS_BIG_INUMS | ||
1030 | /* | ||
1031 | * Convert from 8-byte inode numbers to 4-byte inode numbers. | ||
1032 | * The last 8-byte inode number is gone, but the count is still 1. | ||
1033 | */ | ||
1034 | static void | ||
1035 | xfs_dir2_sf_toino4( | ||
1036 | xfs_da_args_t *args) /* operation arguments */ | ||
1037 | { | ||
1038 | char *buf; /* old dir's buffer */ | ||
1039 | xfs_inode_t *dp; /* incore directory inode */ | ||
1040 | int i; /* entry index */ | ||
1041 | int newsize; /* new inode size */ | ||
1042 | xfs_dir2_sf_entry_t *oldsfep; /* old sf entry */ | ||
1043 | xfs_dir2_sf_hdr_t *oldsfp; /* old sf directory */ | ||
1044 | int oldsize; /* old inode size */ | ||
1045 | xfs_dir2_sf_entry_t *sfep; /* new sf entry */ | ||
1046 | xfs_dir2_sf_hdr_t *sfp; /* new sf directory */ | ||
1047 | struct xfs_mount *mp; | ||
1048 | |||
1049 | trace_xfs_dir2_sf_toino4(args); | ||
1050 | |||
1051 | dp = args->dp; | ||
1052 | mp = dp->i_mount; | ||
1053 | |||
1054 | /* | ||
1055 | * Copy the old directory to the buffer. | ||
1056 | * Then nuke it from the inode, and add the new buffer to the inode. | ||
1057 | * Don't want xfs_idata_realloc copying the data here. | ||
1058 | */ | ||
1059 | oldsize = dp->i_df.if_bytes; | ||
1060 | buf = kmem_alloc(oldsize, KM_SLEEP); | ||
1061 | oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; | ||
1062 | ASSERT(oldsfp->i8count == 1); | ||
1063 | memcpy(buf, oldsfp, oldsize); | ||
1064 | /* | ||
1065 | * Compute the new inode size. | ||
1066 | */ | ||
1067 | newsize = | ||
1068 | oldsize - | ||
1069 | (oldsfp->count + 1) * | ||
1070 | ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)); | ||
1071 | xfs_idata_realloc(dp, -oldsize, XFS_DATA_FORK); | ||
1072 | xfs_idata_realloc(dp, newsize, XFS_DATA_FORK); | ||
1073 | /* | ||
1074 | * Reset our pointers, the data has moved. | ||
1075 | */ | ||
1076 | oldsfp = (xfs_dir2_sf_hdr_t *)buf; | ||
1077 | sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; | ||
1078 | /* | ||
1079 | * Fill in the new header. | ||
1080 | */ | ||
1081 | sfp->count = oldsfp->count; | ||
1082 | sfp->i8count = 0; | ||
1083 | dp->d_ops->sf_put_parent_ino(sfp, dp->d_ops->sf_get_parent_ino(oldsfp)); | ||
1084 | /* | ||
1085 | * Copy the entries field by field. | ||
1086 | */ | ||
1087 | for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp), | ||
1088 | oldsfep = xfs_dir2_sf_firstentry(oldsfp); | ||
1089 | i < sfp->count; | ||
1090 | i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep), | ||
1091 | oldsfep = dp->d_ops->sf_nextentry(oldsfp, oldsfep)) { | ||
1092 | sfep->namelen = oldsfep->namelen; | ||
1093 | sfep->offset = oldsfep->offset; | ||
1094 | memcpy(sfep->name, oldsfep->name, sfep->namelen); | ||
1095 | dp->d_ops->sf_put_ino(sfp, sfep, | ||
1096 | dp->d_ops->sf_get_ino(oldsfp, oldsfep)); | ||
1097 | dp->d_ops->sf_put_ftype(sfep, dp->d_ops->sf_get_ftype(oldsfep)); | ||
1098 | } | ||
1099 | /* | ||
1100 | * Clean up the inode. | ||
1101 | */ | ||
1102 | kmem_free(buf); | ||
1103 | dp->i_d.di_size = newsize; | ||
1104 | xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); | ||
1105 | } | ||
1106 | |||
1107 | /* | ||
1108 | * Convert existing entries from 4-byte inode numbers to 8-byte inode numbers. | ||
1109 | * The new entry w/ an 8-byte inode number is not there yet; we leave with | ||
1110 | * i8count set to 1, but no corresponding 8-byte entry. | ||
1111 | */ | ||
1112 | static void | ||
1113 | xfs_dir2_sf_toino8( | ||
1114 | xfs_da_args_t *args) /* operation arguments */ | ||
1115 | { | ||
1116 | char *buf; /* old dir's buffer */ | ||
1117 | xfs_inode_t *dp; /* incore directory inode */ | ||
1118 | int i; /* entry index */ | ||
1119 | int newsize; /* new inode size */ | ||
1120 | xfs_dir2_sf_entry_t *oldsfep; /* old sf entry */ | ||
1121 | xfs_dir2_sf_hdr_t *oldsfp; /* old sf directory */ | ||
1122 | int oldsize; /* old inode size */ | ||
1123 | xfs_dir2_sf_entry_t *sfep; /* new sf entry */ | ||
1124 | xfs_dir2_sf_hdr_t *sfp; /* new sf directory */ | ||
1125 | struct xfs_mount *mp; | ||
1126 | |||
1127 | trace_xfs_dir2_sf_toino8(args); | ||
1128 | |||
1129 | dp = args->dp; | ||
1130 | mp = dp->i_mount; | ||
1131 | |||
1132 | /* | ||
1133 | * Copy the old directory to the buffer. | ||
1134 | * Then nuke it from the inode, and add the new buffer to the inode. | ||
1135 | * Don't want xfs_idata_realloc copying the data here. | ||
1136 | */ | ||
1137 | oldsize = dp->i_df.if_bytes; | ||
1138 | buf = kmem_alloc(oldsize, KM_SLEEP); | ||
1139 | oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; | ||
1140 | ASSERT(oldsfp->i8count == 0); | ||
1141 | memcpy(buf, oldsfp, oldsize); | ||
1142 | /* | ||
1143 | * Compute the new inode size (nb: entry count + 1 for parent) | ||
1144 | */ | ||
1145 | newsize = | ||
1146 | oldsize + | ||
1147 | (oldsfp->count + 1) * | ||
1148 | ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)); | ||
1149 | xfs_idata_realloc(dp, -oldsize, XFS_DATA_FORK); | ||
1150 | xfs_idata_realloc(dp, newsize, XFS_DATA_FORK); | ||
1151 | /* | ||
1152 | * Reset our pointers, the data has moved. | ||
1153 | */ | ||
1154 | oldsfp = (xfs_dir2_sf_hdr_t *)buf; | ||
1155 | sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; | ||
1156 | /* | ||
1157 | * Fill in the new header. | ||
1158 | */ | ||
1159 | sfp->count = oldsfp->count; | ||
1160 | sfp->i8count = 1; | ||
1161 | dp->d_ops->sf_put_parent_ino(sfp, dp->d_ops->sf_get_parent_ino(oldsfp)); | ||
1162 | /* | ||
1163 | * Copy the entries field by field. | ||
1164 | */ | ||
1165 | for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp), | ||
1166 | oldsfep = xfs_dir2_sf_firstentry(oldsfp); | ||
1167 | i < sfp->count; | ||
1168 | i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep), | ||
1169 | oldsfep = dp->d_ops->sf_nextentry(oldsfp, oldsfep)) { | ||
1170 | sfep->namelen = oldsfep->namelen; | ||
1171 | sfep->offset = oldsfep->offset; | ||
1172 | memcpy(sfep->name, oldsfep->name, sfep->namelen); | ||
1173 | dp->d_ops->sf_put_ino(sfp, sfep, | ||
1174 | dp->d_ops->sf_get_ino(oldsfp, oldsfep)); | ||
1175 | dp->d_ops->sf_put_ftype(sfep, dp->d_ops->sf_get_ftype(oldsfep)); | ||
1176 | } | ||
1177 | /* | ||
1178 | * Clean up the inode. | ||
1179 | */ | ||
1180 | kmem_free(buf); | ||
1181 | dp->i_d.di_size = newsize; | ||
1182 | xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); | ||
1183 | } | ||
1184 | #endif /* XFS_BIG_INUMS */ | ||
diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c new file mode 100644 index 000000000000..bb969337efc8 --- /dev/null +++ b/fs/xfs/libxfs/xfs_dquot_buf.c | |||
@@ -0,0 +1,290 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2006 Silicon Graphics, Inc. | ||
3 | * Copyright (c) 2013 Red Hat, Inc. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License as | ||
8 | * published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope that it would be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program; if not, write the Free Software Foundation, | ||
17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
18 | */ | ||
19 | #include "xfs.h" | ||
20 | #include "xfs_fs.h" | ||
21 | #include "xfs_shared.h" | ||
22 | #include "xfs_format.h" | ||
23 | #include "xfs_log_format.h" | ||
24 | #include "xfs_trans_resv.h" | ||
25 | #include "xfs_sb.h" | ||
26 | #include "xfs_ag.h" | ||
27 | #include "xfs_mount.h" | ||
28 | #include "xfs_inode.h" | ||
29 | #include "xfs_quota.h" | ||
30 | #include "xfs_trans.h" | ||
31 | #include "xfs_qm.h" | ||
32 | #include "xfs_error.h" | ||
33 | #include "xfs_cksum.h" | ||
34 | #include "xfs_trace.h" | ||
35 | |||
36 | int | ||
37 | xfs_calc_dquots_per_chunk( | ||
38 | unsigned int nbblks) /* basic block units */ | ||
39 | { | ||
40 | unsigned int ndquots; | ||
41 | |||
42 | ASSERT(nbblks > 0); | ||
43 | ndquots = BBTOB(nbblks); | ||
44 | do_div(ndquots, sizeof(xfs_dqblk_t)); | ||
45 | |||
46 | return ndquots; | ||
47 | } | ||
48 | |||
49 | /* | ||
50 | * Do some primitive error checking on ondisk dquot data structures. | ||
51 | */ | ||
52 | int | ||
53 | xfs_dqcheck( | ||
54 | struct xfs_mount *mp, | ||
55 | xfs_disk_dquot_t *ddq, | ||
56 | xfs_dqid_t id, | ||
57 | uint type, /* used only when IO_dorepair is true */ | ||
58 | uint flags, | ||
59 | char *str) | ||
60 | { | ||
61 | xfs_dqblk_t *d = (xfs_dqblk_t *)ddq; | ||
62 | int errs = 0; | ||
63 | |||
64 | /* | ||
65 | * We can encounter an uninitialized dquot buffer for 2 reasons: | ||
66 | * 1. If we crash while deleting the quotainode(s), and those blks got | ||
67 | * used for user data. This is because we take the path of regular | ||
68 | * file deletion; however, the size field of quotainodes is never | ||
69 | * updated, so all the tricks that we play in itruncate_finish | ||
70 | * don't quite matter. | ||
71 | * | ||
72 | * 2. We don't play the quota buffers when there's a quotaoff logitem. | ||
73 | * But the allocation will be replayed so we'll end up with an | ||
74 | * uninitialized quota block. | ||
75 | * | ||
76 | * This is all fine; things are still consistent, and we haven't lost | ||
77 | * any quota information. Just don't complain about bad dquot blks. | ||
78 | */ | ||
79 | if (ddq->d_magic != cpu_to_be16(XFS_DQUOT_MAGIC)) { | ||
80 | if (flags & XFS_QMOPT_DOWARN) | ||
81 | xfs_alert(mp, | ||
82 | "%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x", | ||
83 | str, id, be16_to_cpu(ddq->d_magic), XFS_DQUOT_MAGIC); | ||
84 | errs++; | ||
85 | } | ||
86 | if (ddq->d_version != XFS_DQUOT_VERSION) { | ||
87 | if (flags & XFS_QMOPT_DOWARN) | ||
88 | xfs_alert(mp, | ||
89 | "%s : XFS dquot ID 0x%x, version 0x%x != 0x%x", | ||
90 | str, id, ddq->d_version, XFS_DQUOT_VERSION); | ||
91 | errs++; | ||
92 | } | ||
93 | |||
94 | if (ddq->d_flags != XFS_DQ_USER && | ||
95 | ddq->d_flags != XFS_DQ_PROJ && | ||
96 | ddq->d_flags != XFS_DQ_GROUP) { | ||
97 | if (flags & XFS_QMOPT_DOWARN) | ||
98 | xfs_alert(mp, | ||
99 | "%s : XFS dquot ID 0x%x, unknown flags 0x%x", | ||
100 | str, id, ddq->d_flags); | ||
101 | errs++; | ||
102 | } | ||
103 | |||
104 | if (id != -1 && id != be32_to_cpu(ddq->d_id)) { | ||
105 | if (flags & XFS_QMOPT_DOWARN) | ||
106 | xfs_alert(mp, | ||
107 | "%s : ondisk-dquot 0x%p, ID mismatch: " | ||
108 | "0x%x expected, found id 0x%x", | ||
109 | str, ddq, id, be32_to_cpu(ddq->d_id)); | ||
110 | errs++; | ||
111 | } | ||
112 | |||
113 | if (!errs && ddq->d_id) { | ||
114 | if (ddq->d_blk_softlimit && | ||
115 | be64_to_cpu(ddq->d_bcount) > | ||
116 | be64_to_cpu(ddq->d_blk_softlimit)) { | ||
117 | if (!ddq->d_btimer) { | ||
118 | if (flags & XFS_QMOPT_DOWARN) | ||
119 | xfs_alert(mp, | ||
120 | "%s : Dquot ID 0x%x (0x%p) BLK TIMER NOT STARTED", | ||
121 | str, (int)be32_to_cpu(ddq->d_id), ddq); | ||
122 | errs++; | ||
123 | } | ||
124 | } | ||
125 | if (ddq->d_ino_softlimit && | ||
126 | be64_to_cpu(ddq->d_icount) > | ||
127 | be64_to_cpu(ddq->d_ino_softlimit)) { | ||
128 | if (!ddq->d_itimer) { | ||
129 | if (flags & XFS_QMOPT_DOWARN) | ||
130 | xfs_alert(mp, | ||
131 | "%s : Dquot ID 0x%x (0x%p) INODE TIMER NOT STARTED", | ||
132 | str, (int)be32_to_cpu(ddq->d_id), ddq); | ||
133 | errs++; | ||
134 | } | ||
135 | } | ||
136 | if (ddq->d_rtb_softlimit && | ||
137 | be64_to_cpu(ddq->d_rtbcount) > | ||
138 | be64_to_cpu(ddq->d_rtb_softlimit)) { | ||
139 | if (!ddq->d_rtbtimer) { | ||
140 | if (flags & XFS_QMOPT_DOWARN) | ||
141 | xfs_alert(mp, | ||
142 | "%s : Dquot ID 0x%x (0x%p) RTBLK TIMER NOT STARTED", | ||
143 | str, (int)be32_to_cpu(ddq->d_id), ddq); | ||
144 | errs++; | ||
145 | } | ||
146 | } | ||
147 | } | ||
148 | |||
149 | if (!errs || !(flags & XFS_QMOPT_DQREPAIR)) | ||
150 | return errs; | ||
151 | |||
152 | if (flags & XFS_QMOPT_DOWARN) | ||
153 | xfs_notice(mp, "Re-initializing dquot ID 0x%x", id); | ||
154 | |||
155 | /* | ||
156 | * Typically, a repair is only requested by quotacheck. | ||
157 | */ | ||
158 | ASSERT(id != -1); | ||
159 | ASSERT(flags & XFS_QMOPT_DQREPAIR); | ||
160 | memset(d, 0, sizeof(xfs_dqblk_t)); | ||
161 | |||
162 | d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); | ||
163 | d->dd_diskdq.d_version = XFS_DQUOT_VERSION; | ||
164 | d->dd_diskdq.d_flags = type; | ||
165 | d->dd_diskdq.d_id = cpu_to_be32(id); | ||
166 | |||
167 | if (xfs_sb_version_hascrc(&mp->m_sb)) { | ||
168 | uuid_copy(&d->dd_uuid, &mp->m_sb.sb_uuid); | ||
169 | xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk), | ||
170 | XFS_DQUOT_CRC_OFF); | ||
171 | } | ||
172 | |||
173 | return errs; | ||
174 | } | ||
175 | |||
176 | STATIC bool | ||
177 | xfs_dquot_buf_verify_crc( | ||
178 | struct xfs_mount *mp, | ||
179 | struct xfs_buf *bp) | ||
180 | { | ||
181 | struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr; | ||
182 | int ndquots; | ||
183 | int i; | ||
184 | |||
185 | if (!xfs_sb_version_hascrc(&mp->m_sb)) | ||
186 | return true; | ||
187 | |||
188 | /* | ||
189 | * if we are in log recovery, the quota subsystem has not been | ||
190 | * initialised so we have no quotainfo structure. In that case, we need | ||
191 | * to manually calculate the number of dquots in the buffer. | ||
192 | */ | ||
193 | if (mp->m_quotainfo) | ||
194 | ndquots = mp->m_quotainfo->qi_dqperchunk; | ||
195 | else | ||
196 | ndquots = xfs_calc_dquots_per_chunk( | ||
197 | XFS_BB_TO_FSB(mp, bp->b_length)); | ||
198 | |||
199 | for (i = 0; i < ndquots; i++, d++) { | ||
200 | if (!xfs_verify_cksum((char *)d, sizeof(struct xfs_dqblk), | ||
201 | XFS_DQUOT_CRC_OFF)) | ||
202 | return false; | ||
203 | if (!uuid_equal(&d->dd_uuid, &mp->m_sb.sb_uuid)) | ||
204 | return false; | ||
205 | } | ||
206 | return true; | ||
207 | } | ||
208 | |||
209 | STATIC bool | ||
210 | xfs_dquot_buf_verify( | ||
211 | struct xfs_mount *mp, | ||
212 | struct xfs_buf *bp) | ||
213 | { | ||
214 | struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr; | ||
215 | xfs_dqid_t id = 0; | ||
216 | int ndquots; | ||
217 | int i; | ||
218 | |||
219 | /* | ||
220 | * if we are in log recovery, the quota subsystem has not been | ||
221 | * initialised so we have no quotainfo structure. In that case, we need | ||
222 | * to manually calculate the number of dquots in the buffer. | ||
223 | */ | ||
224 | if (mp->m_quotainfo) | ||
225 | ndquots = mp->m_quotainfo->qi_dqperchunk; | ||
226 | else | ||
227 | ndquots = xfs_calc_dquots_per_chunk(bp->b_length); | ||
228 | |||
229 | /* | ||
230 | * On the first read of the buffer, verify that each dquot is valid. | ||
231 | * We don't know what the id of the dquot is supposed to be, just that | ||
232 | * they should be increasing monotonically within the buffer. If the | ||
233 | * first id is corrupt, then it will fail on the second dquot in the | ||
234 | * buffer so corruptions could point to the wrong dquot in this case. | ||
235 | */ | ||
236 | for (i = 0; i < ndquots; i++) { | ||
237 | struct xfs_disk_dquot *ddq; | ||
238 | int error; | ||
239 | |||
240 | ddq = &d[i].dd_diskdq; | ||
241 | |||
242 | if (i == 0) | ||
243 | id = be32_to_cpu(ddq->d_id); | ||
244 | |||
245 | error = xfs_dqcheck(mp, ddq, id + i, 0, XFS_QMOPT_DOWARN, | ||
246 | "xfs_dquot_buf_verify"); | ||
247 | if (error) | ||
248 | return false; | ||
249 | } | ||
250 | return true; | ||
251 | } | ||
252 | |||
253 | static void | ||
254 | xfs_dquot_buf_read_verify( | ||
255 | struct xfs_buf *bp) | ||
256 | { | ||
257 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
258 | |||
259 | if (!xfs_dquot_buf_verify_crc(mp, bp)) | ||
260 | xfs_buf_ioerror(bp, -EFSBADCRC); | ||
261 | else if (!xfs_dquot_buf_verify(mp, bp)) | ||
262 | xfs_buf_ioerror(bp, -EFSCORRUPTED); | ||
263 | |||
264 | if (bp->b_error) | ||
265 | xfs_verifier_error(bp); | ||
266 | } | ||
267 | |||
268 | /* | ||
269 | * we don't calculate the CRC here as that is done when the dquot is flushed to | ||
270 | * the buffer after the update is done. This ensures that the dquot in the | ||
271 | * buffer always has an up-to-date CRC value. | ||
272 | */ | ||
273 | static void | ||
274 | xfs_dquot_buf_write_verify( | ||
275 | struct xfs_buf *bp) | ||
276 | { | ||
277 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
278 | |||
279 | if (!xfs_dquot_buf_verify(mp, bp)) { | ||
280 | xfs_buf_ioerror(bp, -EFSCORRUPTED); | ||
281 | xfs_verifier_error(bp); | ||
282 | return; | ||
283 | } | ||
284 | } | ||
285 | |||
286 | const struct xfs_buf_ops xfs_dquot_buf_ops = { | ||
287 | .verify_read = xfs_dquot_buf_read_verify, | ||
288 | .verify_write = xfs_dquot_buf_write_verify, | ||
289 | }; | ||
290 | |||
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h new file mode 100644 index 000000000000..34d85aca3058 --- /dev/null +++ b/fs/xfs/libxfs/xfs_format.h | |||
@@ -0,0 +1,428 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_FORMAT_H__ | ||
19 | #define __XFS_FORMAT_H__ | ||
20 | |||
21 | /* | ||
22 | * XFS On Disk Format Definitions | ||
23 | * | ||
24 | * This header file defines all the on-disk format definitions for | ||
25 | * general XFS objects. Directory and attribute related objects are defined in | ||
26 | * xfs_da_format.h, which log and log item formats are defined in | ||
27 | * xfs_log_format.h. Everything else goes here. | ||
28 | */ | ||
29 | |||
30 | struct xfs_mount; | ||
31 | struct xfs_trans; | ||
32 | struct xfs_inode; | ||
33 | struct xfs_buf; | ||
34 | struct xfs_ifork; | ||
35 | |||
36 | /* | ||
37 | * RealTime Device format definitions | ||
38 | */ | ||
39 | |||
40 | /* Min and max rt extent sizes, specified in bytes */ | ||
41 | #define XFS_MAX_RTEXTSIZE (1024 * 1024 * 1024) /* 1GB */ | ||
42 | #define XFS_DFL_RTEXTSIZE (64 * 1024) /* 64kB */ | ||
43 | #define XFS_MIN_RTEXTSIZE (4 * 1024) /* 4kB */ | ||
44 | |||
45 | #define XFS_BLOCKSIZE(mp) ((mp)->m_sb.sb_blocksize) | ||
46 | #define XFS_BLOCKMASK(mp) ((mp)->m_blockmask) | ||
47 | #define XFS_BLOCKWSIZE(mp) ((mp)->m_blockwsize) | ||
48 | #define XFS_BLOCKWMASK(mp) ((mp)->m_blockwmask) | ||
49 | |||
50 | /* | ||
51 | * RT Summary and bit manipulation macros. | ||
52 | */ | ||
53 | #define XFS_SUMOFFS(mp,ls,bb) ((int)((ls) * (mp)->m_sb.sb_rbmblocks + (bb))) | ||
54 | #define XFS_SUMOFFSTOBLOCK(mp,s) \ | ||
55 | (((s) * (uint)sizeof(xfs_suminfo_t)) >> (mp)->m_sb.sb_blocklog) | ||
56 | #define XFS_SUMPTR(mp,bp,so) \ | ||
57 | ((xfs_suminfo_t *)((bp)->b_addr + \ | ||
58 | (((so) * (uint)sizeof(xfs_suminfo_t)) & XFS_BLOCKMASK(mp)))) | ||
59 | |||
60 | #define XFS_BITTOBLOCK(mp,bi) ((bi) >> (mp)->m_blkbit_log) | ||
61 | #define XFS_BLOCKTOBIT(mp,bb) ((bb) << (mp)->m_blkbit_log) | ||
62 | #define XFS_BITTOWORD(mp,bi) \ | ||
63 | ((int)(((bi) >> XFS_NBWORDLOG) & XFS_BLOCKWMASK(mp))) | ||
64 | |||
65 | #define XFS_RTMIN(a,b) ((a) < (b) ? (a) : (b)) | ||
66 | #define XFS_RTMAX(a,b) ((a) > (b) ? (a) : (b)) | ||
67 | |||
68 | #define XFS_RTLOBIT(w) xfs_lowbit32(w) | ||
69 | #define XFS_RTHIBIT(w) xfs_highbit32(w) | ||
70 | |||
71 | #if XFS_BIG_BLKNOS | ||
72 | #define XFS_RTBLOCKLOG(b) xfs_highbit64(b) | ||
73 | #else | ||
74 | #define XFS_RTBLOCKLOG(b) xfs_highbit32(b) | ||
75 | #endif | ||
76 | |||
77 | /* | ||
78 | * Dquot and dquot block format definitions | ||
79 | */ | ||
80 | #define XFS_DQUOT_MAGIC 0x4451 /* 'DQ' */ | ||
81 | #define XFS_DQUOT_VERSION (u_int8_t)0x01 /* latest version number */ | ||
82 | |||
83 | /* | ||
84 | * This is the main portion of the on-disk representation of quota | ||
85 | * information for a user. This is the q_core of the xfs_dquot_t that | ||
86 | * is kept in kernel memory. We pad this with some more expansion room | ||
87 | * to construct the on disk structure. | ||
88 | */ | ||
89 | typedef struct xfs_disk_dquot { | ||
90 | __be16 d_magic; /* dquot magic = XFS_DQUOT_MAGIC */ | ||
91 | __u8 d_version; /* dquot version */ | ||
92 | __u8 d_flags; /* XFS_DQ_USER/PROJ/GROUP */ | ||
93 | __be32 d_id; /* user,project,group id */ | ||
94 | __be64 d_blk_hardlimit;/* absolute limit on disk blks */ | ||
95 | __be64 d_blk_softlimit;/* preferred limit on disk blks */ | ||
96 | __be64 d_ino_hardlimit;/* maximum # allocated inodes */ | ||
97 | __be64 d_ino_softlimit;/* preferred inode limit */ | ||
98 | __be64 d_bcount; /* disk blocks owned by the user */ | ||
99 | __be64 d_icount; /* inodes owned by the user */ | ||
100 | __be32 d_itimer; /* zero if within inode limits if not, | ||
101 | this is when we refuse service */ | ||
102 | __be32 d_btimer; /* similar to above; for disk blocks */ | ||
103 | __be16 d_iwarns; /* warnings issued wrt num inodes */ | ||
104 | __be16 d_bwarns; /* warnings issued wrt disk blocks */ | ||
105 | __be32 d_pad0; /* 64 bit align */ | ||
106 | __be64 d_rtb_hardlimit;/* absolute limit on realtime blks */ | ||
107 | __be64 d_rtb_softlimit;/* preferred limit on RT disk blks */ | ||
108 | __be64 d_rtbcount; /* realtime blocks owned */ | ||
109 | __be32 d_rtbtimer; /* similar to above; for RT disk blocks */ | ||
110 | __be16 d_rtbwarns; /* warnings issued wrt RT disk blocks */ | ||
111 | __be16 d_pad; | ||
112 | } xfs_disk_dquot_t; | ||
113 | |||
114 | /* | ||
115 | * This is what goes on disk. This is separated from the xfs_disk_dquot because | ||
116 | * carrying the unnecessary padding would be a waste of memory. | ||
117 | */ | ||
118 | typedef struct xfs_dqblk { | ||
119 | xfs_disk_dquot_t dd_diskdq; /* portion that lives incore as well */ | ||
120 | char dd_fill[4]; /* filling for posterity */ | ||
121 | |||
122 | /* | ||
123 | * These two are only present on filesystems with the CRC bits set. | ||
124 | */ | ||
125 | __be32 dd_crc; /* checksum */ | ||
126 | __be64 dd_lsn; /* last modification in log */ | ||
127 | uuid_t dd_uuid; /* location information */ | ||
128 | } xfs_dqblk_t; | ||
129 | |||
130 | #define XFS_DQUOT_CRC_OFF offsetof(struct xfs_dqblk, dd_crc) | ||
131 | |||
132 | /* | ||
133 | * Remote symlink format and access functions. | ||
134 | */ | ||
135 | #define XFS_SYMLINK_MAGIC 0x58534c4d /* XSLM */ | ||
136 | |||
137 | struct xfs_dsymlink_hdr { | ||
138 | __be32 sl_magic; | ||
139 | __be32 sl_offset; | ||
140 | __be32 sl_bytes; | ||
141 | __be32 sl_crc; | ||
142 | uuid_t sl_uuid; | ||
143 | __be64 sl_owner; | ||
144 | __be64 sl_blkno; | ||
145 | __be64 sl_lsn; | ||
146 | }; | ||
147 | |||
148 | #define XFS_SYMLINK_CRC_OFF offsetof(struct xfs_dsymlink_hdr, sl_crc) | ||
149 | |||
150 | /* | ||
151 | * The maximum pathlen is 1024 bytes. Since the minimum file system | ||
152 | * blocksize is 512 bytes, we can get a max of 3 extents back from | ||
153 | * bmapi when crc headers are taken into account. | ||
154 | */ | ||
155 | #define XFS_SYMLINK_MAPS 3 | ||
156 | |||
157 | #define XFS_SYMLINK_BUF_SPACE(mp, bufsize) \ | ||
158 | ((bufsize) - (xfs_sb_version_hascrc(&(mp)->m_sb) ? \ | ||
159 | sizeof(struct xfs_dsymlink_hdr) : 0)) | ||
160 | |||
161 | |||
162 | /* | ||
163 | * Allocation Btree format definitions | ||
164 | * | ||
165 | * There are two on-disk btrees, one sorted by blockno and one sorted | ||
166 | * by blockcount and blockno. All blocks look the same to make the code | ||
167 | * simpler; if we have time later, we'll make the optimizations. | ||
168 | */ | ||
169 | #define XFS_ABTB_MAGIC 0x41425442 /* 'ABTB' for bno tree */ | ||
170 | #define XFS_ABTB_CRC_MAGIC 0x41423342 /* 'AB3B' */ | ||
171 | #define XFS_ABTC_MAGIC 0x41425443 /* 'ABTC' for cnt tree */ | ||
172 | #define XFS_ABTC_CRC_MAGIC 0x41423343 /* 'AB3C' */ | ||
173 | |||
174 | /* | ||
175 | * Data record/key structure | ||
176 | */ | ||
177 | typedef struct xfs_alloc_rec { | ||
178 | __be32 ar_startblock; /* starting block number */ | ||
179 | __be32 ar_blockcount; /* count of free blocks */ | ||
180 | } xfs_alloc_rec_t, xfs_alloc_key_t; | ||
181 | |||
182 | typedef struct xfs_alloc_rec_incore { | ||
183 | xfs_agblock_t ar_startblock; /* starting block number */ | ||
184 | xfs_extlen_t ar_blockcount; /* count of free blocks */ | ||
185 | } xfs_alloc_rec_incore_t; | ||
186 | |||
187 | /* btree pointer type */ | ||
188 | typedef __be32 xfs_alloc_ptr_t; | ||
189 | |||
190 | /* | ||
191 | * Block numbers in the AG: | ||
192 | * SB is sector 0, AGF is sector 1, AGI is sector 2, AGFL is sector 3. | ||
193 | */ | ||
194 | #define XFS_BNO_BLOCK(mp) ((xfs_agblock_t)(XFS_AGFL_BLOCK(mp) + 1)) | ||
195 | #define XFS_CNT_BLOCK(mp) ((xfs_agblock_t)(XFS_BNO_BLOCK(mp) + 1)) | ||
196 | |||
197 | |||
198 | /* | ||
199 | * Inode Allocation Btree format definitions | ||
200 | * | ||
201 | * There is a btree for the inode map per allocation group. | ||
202 | */ | ||
203 | #define XFS_IBT_MAGIC 0x49414254 /* 'IABT' */ | ||
204 | #define XFS_IBT_CRC_MAGIC 0x49414233 /* 'IAB3' */ | ||
205 | #define XFS_FIBT_MAGIC 0x46494254 /* 'FIBT' */ | ||
206 | #define XFS_FIBT_CRC_MAGIC 0x46494233 /* 'FIB3' */ | ||
207 | |||
208 | typedef __uint64_t xfs_inofree_t; | ||
209 | #define XFS_INODES_PER_CHUNK (NBBY * sizeof(xfs_inofree_t)) | ||
210 | #define XFS_INODES_PER_CHUNK_LOG (XFS_NBBYLOG + 3) | ||
211 | #define XFS_INOBT_ALL_FREE ((xfs_inofree_t)-1) | ||
212 | #define XFS_INOBT_MASK(i) ((xfs_inofree_t)1 << (i)) | ||
213 | |||
214 | static inline xfs_inofree_t xfs_inobt_maskn(int i, int n) | ||
215 | { | ||
216 | return ((n >= XFS_INODES_PER_CHUNK ? 0 : XFS_INOBT_MASK(n)) - 1) << i; | ||
217 | } | ||
218 | |||
219 | /* | ||
220 | * Data record structure | ||
221 | */ | ||
222 | typedef struct xfs_inobt_rec { | ||
223 | __be32 ir_startino; /* starting inode number */ | ||
224 | __be32 ir_freecount; /* count of free inodes (set bits) */ | ||
225 | __be64 ir_free; /* free inode mask */ | ||
226 | } xfs_inobt_rec_t; | ||
227 | |||
228 | typedef struct xfs_inobt_rec_incore { | ||
229 | xfs_agino_t ir_startino; /* starting inode number */ | ||
230 | __int32_t ir_freecount; /* count of free inodes (set bits) */ | ||
231 | xfs_inofree_t ir_free; /* free inode mask */ | ||
232 | } xfs_inobt_rec_incore_t; | ||
233 | |||
234 | |||
235 | /* | ||
236 | * Key structure | ||
237 | */ | ||
238 | typedef struct xfs_inobt_key { | ||
239 | __be32 ir_startino; /* starting inode number */ | ||
240 | } xfs_inobt_key_t; | ||
241 | |||
242 | /* btree pointer type */ | ||
243 | typedef __be32 xfs_inobt_ptr_t; | ||
244 | |||
245 | /* | ||
246 | * block numbers in the AG. | ||
247 | */ | ||
248 | #define XFS_IBT_BLOCK(mp) ((xfs_agblock_t)(XFS_CNT_BLOCK(mp) + 1)) | ||
249 | #define XFS_FIBT_BLOCK(mp) ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1)) | ||
250 | |||
251 | /* | ||
252 | * The first data block of an AG depends on whether the filesystem was formatted | ||
253 | * with the finobt feature. If so, account for the finobt reserved root btree | ||
254 | * block. | ||
255 | */ | ||
256 | #define XFS_PREALLOC_BLOCKS(mp) \ | ||
257 | (xfs_sb_version_hasfinobt(&((mp)->m_sb)) ? \ | ||
258 | XFS_FIBT_BLOCK(mp) + 1 : \ | ||
259 | XFS_IBT_BLOCK(mp) + 1) | ||
260 | |||
261 | |||
262 | |||
263 | /* | ||
264 | * BMAP Btree format definitions | ||
265 | * | ||
266 | * This includes both the root block definition that sits inside an inode fork | ||
267 | * and the record/pointer formats for the leaf/node in the blocks. | ||
268 | */ | ||
269 | #define XFS_BMAP_MAGIC 0x424d4150 /* 'BMAP' */ | ||
270 | #define XFS_BMAP_CRC_MAGIC 0x424d4133 /* 'BMA3' */ | ||
271 | |||
272 | /* | ||
273 | * Bmap root header, on-disk form only. | ||
274 | */ | ||
275 | typedef struct xfs_bmdr_block { | ||
276 | __be16 bb_level; /* 0 is a leaf */ | ||
277 | __be16 bb_numrecs; /* current # of data records */ | ||
278 | } xfs_bmdr_block_t; | ||
279 | |||
280 | /* | ||
281 | * Bmap btree record and extent descriptor. | ||
282 | * l0:63 is an extent flag (value 1 indicates non-normal). | ||
283 | * l0:9-62 are startoff. | ||
284 | * l0:0-8 and l1:21-63 are startblock. | ||
285 | * l1:0-20 are blockcount. | ||
286 | */ | ||
287 | #define BMBT_EXNTFLAG_BITLEN 1 | ||
288 | #define BMBT_STARTOFF_BITLEN 54 | ||
289 | #define BMBT_STARTBLOCK_BITLEN 52 | ||
290 | #define BMBT_BLOCKCOUNT_BITLEN 21 | ||
291 | |||
292 | typedef struct xfs_bmbt_rec { | ||
293 | __be64 l0, l1; | ||
294 | } xfs_bmbt_rec_t; | ||
295 | |||
296 | typedef __uint64_t xfs_bmbt_rec_base_t; /* use this for casts */ | ||
297 | typedef xfs_bmbt_rec_t xfs_bmdr_rec_t; | ||
298 | |||
299 | typedef struct xfs_bmbt_rec_host { | ||
300 | __uint64_t l0, l1; | ||
301 | } xfs_bmbt_rec_host_t; | ||
302 | |||
303 | /* | ||
304 | * Values and macros for delayed-allocation startblock fields. | ||
305 | */ | ||
306 | #define STARTBLOCKVALBITS 17 | ||
307 | #define STARTBLOCKMASKBITS (15 + XFS_BIG_BLKNOS * 20) | ||
308 | #define DSTARTBLOCKMASKBITS (15 + 20) | ||
309 | #define STARTBLOCKMASK \ | ||
310 | (((((xfs_fsblock_t)1) << STARTBLOCKMASKBITS) - 1) << STARTBLOCKVALBITS) | ||
311 | #define DSTARTBLOCKMASK \ | ||
312 | (((((xfs_dfsbno_t)1) << DSTARTBLOCKMASKBITS) - 1) << STARTBLOCKVALBITS) | ||
313 | |||
314 | static inline int isnullstartblock(xfs_fsblock_t x) | ||
315 | { | ||
316 | return ((x) & STARTBLOCKMASK) == STARTBLOCKMASK; | ||
317 | } | ||
318 | |||
319 | static inline int isnulldstartblock(xfs_dfsbno_t x) | ||
320 | { | ||
321 | return ((x) & DSTARTBLOCKMASK) == DSTARTBLOCKMASK; | ||
322 | } | ||
323 | |||
324 | static inline xfs_fsblock_t nullstartblock(int k) | ||
325 | { | ||
326 | ASSERT(k < (1 << STARTBLOCKVALBITS)); | ||
327 | return STARTBLOCKMASK | (k); | ||
328 | } | ||
329 | |||
330 | static inline xfs_filblks_t startblockval(xfs_fsblock_t x) | ||
331 | { | ||
332 | return (xfs_filblks_t)((x) & ~STARTBLOCKMASK); | ||
333 | } | ||
334 | |||
335 | /* | ||
336 | * Possible extent formats. | ||
337 | */ | ||
338 | typedef enum { | ||
339 | XFS_EXTFMT_NOSTATE = 0, | ||
340 | XFS_EXTFMT_HASSTATE | ||
341 | } xfs_exntfmt_t; | ||
342 | |||
343 | /* | ||
344 | * Possible extent states. | ||
345 | */ | ||
346 | typedef enum { | ||
347 | XFS_EXT_NORM, XFS_EXT_UNWRITTEN, | ||
348 | XFS_EXT_DMAPI_OFFLINE, XFS_EXT_INVALID | ||
349 | } xfs_exntst_t; | ||
350 | |||
351 | /* | ||
352 | * Incore version of above. | ||
353 | */ | ||
354 | typedef struct xfs_bmbt_irec | ||
355 | { | ||
356 | xfs_fileoff_t br_startoff; /* starting file offset */ | ||
357 | xfs_fsblock_t br_startblock; /* starting block number */ | ||
358 | xfs_filblks_t br_blockcount; /* number of blocks */ | ||
359 | xfs_exntst_t br_state; /* extent state */ | ||
360 | } xfs_bmbt_irec_t; | ||
361 | |||
362 | /* | ||
363 | * Key structure for non-leaf levels of the tree. | ||
364 | */ | ||
365 | typedef struct xfs_bmbt_key { | ||
366 | __be64 br_startoff; /* starting file offset */ | ||
367 | } xfs_bmbt_key_t, xfs_bmdr_key_t; | ||
368 | |||
369 | /* btree pointer type */ | ||
370 | typedef __be64 xfs_bmbt_ptr_t, xfs_bmdr_ptr_t; | ||
371 | |||
372 | |||
373 | /* | ||
374 | * Generic Btree block format definitions | ||
375 | * | ||
376 | * This is a combination of the actual format used on disk for short and long | ||
377 | * format btrees. The first three fields are shared by both format, but the | ||
378 | * pointers are different and should be used with care. | ||
379 | * | ||
380 | * To get the size of the actual short or long form headers please use the size | ||
381 | * macros below. Never use sizeof(xfs_btree_block). | ||
382 | * | ||
383 | * The blkno, crc, lsn, owner and uuid fields are only available in filesystems | ||
384 | * with the crc feature bit, and all accesses to them must be conditional on | ||
385 | * that flag. | ||
386 | */ | ||
387 | struct xfs_btree_block { | ||
388 | __be32 bb_magic; /* magic number for block type */ | ||
389 | __be16 bb_level; /* 0 is a leaf */ | ||
390 | __be16 bb_numrecs; /* current # of data records */ | ||
391 | union { | ||
392 | struct { | ||
393 | __be32 bb_leftsib; | ||
394 | __be32 bb_rightsib; | ||
395 | |||
396 | __be64 bb_blkno; | ||
397 | __be64 bb_lsn; | ||
398 | uuid_t bb_uuid; | ||
399 | __be32 bb_owner; | ||
400 | __le32 bb_crc; | ||
401 | } s; /* short form pointers */ | ||
402 | struct { | ||
403 | __be64 bb_leftsib; | ||
404 | __be64 bb_rightsib; | ||
405 | |||
406 | __be64 bb_blkno; | ||
407 | __be64 bb_lsn; | ||
408 | uuid_t bb_uuid; | ||
409 | __be64 bb_owner; | ||
410 | __le32 bb_crc; | ||
411 | __be32 bb_pad; /* padding for alignment */ | ||
412 | } l; /* long form pointers */ | ||
413 | } bb_u; /* rest */ | ||
414 | }; | ||
415 | |||
416 | #define XFS_BTREE_SBLOCK_LEN 16 /* size of a short form block */ | ||
417 | #define XFS_BTREE_LBLOCK_LEN 24 /* size of a long form block */ | ||
418 | |||
419 | /* sizes of CRC enabled btree blocks */ | ||
420 | #define XFS_BTREE_SBLOCK_CRC_LEN (XFS_BTREE_SBLOCK_LEN + 40) | ||
421 | #define XFS_BTREE_LBLOCK_CRC_LEN (XFS_BTREE_LBLOCK_LEN + 48) | ||
422 | |||
423 | #define XFS_BTREE_SBLOCK_CRC_OFF \ | ||
424 | offsetof(struct xfs_btree_block, bb_u.s.bb_crc) | ||
425 | #define XFS_BTREE_LBLOCK_CRC_OFF \ | ||
426 | offsetof(struct xfs_btree_block, bb_u.l.bb_crc) | ||
427 | |||
428 | #endif /* __XFS_FORMAT_H__ */ | ||
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c new file mode 100644 index 000000000000..b62771f1f4b5 --- /dev/null +++ b/fs/xfs/libxfs/xfs_ialloc.c | |||
@@ -0,0 +1,2189 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #include "xfs.h" | ||
19 | #include "xfs_fs.h" | ||
20 | #include "xfs_shared.h" | ||
21 | #include "xfs_format.h" | ||
22 | #include "xfs_log_format.h" | ||
23 | #include "xfs_trans_resv.h" | ||
24 | #include "xfs_bit.h" | ||
25 | #include "xfs_inum.h" | ||
26 | #include "xfs_sb.h" | ||
27 | #include "xfs_ag.h" | ||
28 | #include "xfs_mount.h" | ||
29 | #include "xfs_inode.h" | ||
30 | #include "xfs_btree.h" | ||
31 | #include "xfs_ialloc.h" | ||
32 | #include "xfs_ialloc_btree.h" | ||
33 | #include "xfs_alloc.h" | ||
34 | #include "xfs_rtalloc.h" | ||
35 | #include "xfs_error.h" | ||
36 | #include "xfs_bmap.h" | ||
37 | #include "xfs_cksum.h" | ||
38 | #include "xfs_trans.h" | ||
39 | #include "xfs_buf_item.h" | ||
40 | #include "xfs_icreate_item.h" | ||
41 | #include "xfs_icache.h" | ||
42 | #include "xfs_dinode.h" | ||
43 | #include "xfs_trace.h" | ||
44 | |||
45 | |||
46 | /* | ||
47 | * Allocation group level functions. | ||
48 | */ | ||
49 | static inline int | ||
50 | xfs_ialloc_cluster_alignment( | ||
51 | xfs_alloc_arg_t *args) | ||
52 | { | ||
53 | if (xfs_sb_version_hasalign(&args->mp->m_sb) && | ||
54 | args->mp->m_sb.sb_inoalignmt >= | ||
55 | XFS_B_TO_FSBT(args->mp, args->mp->m_inode_cluster_size)) | ||
56 | return args->mp->m_sb.sb_inoalignmt; | ||
57 | return 1; | ||
58 | } | ||
59 | |||
60 | /* | ||
61 | * Lookup a record by ino in the btree given by cur. | ||
62 | */ | ||
63 | int /* error */ | ||
64 | xfs_inobt_lookup( | ||
65 | struct xfs_btree_cur *cur, /* btree cursor */ | ||
66 | xfs_agino_t ino, /* starting inode of chunk */ | ||
67 | xfs_lookup_t dir, /* <=, >=, == */ | ||
68 | int *stat) /* success/failure */ | ||
69 | { | ||
70 | cur->bc_rec.i.ir_startino = ino; | ||
71 | cur->bc_rec.i.ir_freecount = 0; | ||
72 | cur->bc_rec.i.ir_free = 0; | ||
73 | return xfs_btree_lookup(cur, dir, stat); | ||
74 | } | ||
75 | |||
76 | /* | ||
77 | * Update the record referred to by cur to the value given. | ||
78 | * This either works (return 0) or gets an EFSCORRUPTED error. | ||
79 | */ | ||
80 | STATIC int /* error */ | ||
81 | xfs_inobt_update( | ||
82 | struct xfs_btree_cur *cur, /* btree cursor */ | ||
83 | xfs_inobt_rec_incore_t *irec) /* btree record */ | ||
84 | { | ||
85 | union xfs_btree_rec rec; | ||
86 | |||
87 | rec.inobt.ir_startino = cpu_to_be32(irec->ir_startino); | ||
88 | rec.inobt.ir_freecount = cpu_to_be32(irec->ir_freecount); | ||
89 | rec.inobt.ir_free = cpu_to_be64(irec->ir_free); | ||
90 | return xfs_btree_update(cur, &rec); | ||
91 | } | ||
92 | |||
93 | /* | ||
94 | * Get the data from the pointed-to record. | ||
95 | */ | ||
96 | int /* error */ | ||
97 | xfs_inobt_get_rec( | ||
98 | struct xfs_btree_cur *cur, /* btree cursor */ | ||
99 | xfs_inobt_rec_incore_t *irec, /* btree record */ | ||
100 | int *stat) /* output: success/failure */ | ||
101 | { | ||
102 | union xfs_btree_rec *rec; | ||
103 | int error; | ||
104 | |||
105 | error = xfs_btree_get_rec(cur, &rec, stat); | ||
106 | if (!error && *stat == 1) { | ||
107 | irec->ir_startino = be32_to_cpu(rec->inobt.ir_startino); | ||
108 | irec->ir_freecount = be32_to_cpu(rec->inobt.ir_freecount); | ||
109 | irec->ir_free = be64_to_cpu(rec->inobt.ir_free); | ||
110 | } | ||
111 | return error; | ||
112 | } | ||
113 | |||
114 | /* | ||
115 | * Insert a single inobt record. Cursor must already point to desired location. | ||
116 | */ | ||
117 | STATIC int | ||
118 | xfs_inobt_insert_rec( | ||
119 | struct xfs_btree_cur *cur, | ||
120 | __int32_t freecount, | ||
121 | xfs_inofree_t free, | ||
122 | int *stat) | ||
123 | { | ||
124 | cur->bc_rec.i.ir_freecount = freecount; | ||
125 | cur->bc_rec.i.ir_free = free; | ||
126 | return xfs_btree_insert(cur, stat); | ||
127 | } | ||
128 | |||
129 | /* | ||
130 | * Insert records describing a newly allocated inode chunk into the inobt. | ||
131 | */ | ||
132 | STATIC int | ||
133 | xfs_inobt_insert( | ||
134 | struct xfs_mount *mp, | ||
135 | struct xfs_trans *tp, | ||
136 | struct xfs_buf *agbp, | ||
137 | xfs_agino_t newino, | ||
138 | xfs_agino_t newlen, | ||
139 | xfs_btnum_t btnum) | ||
140 | { | ||
141 | struct xfs_btree_cur *cur; | ||
142 | struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); | ||
143 | xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); | ||
144 | xfs_agino_t thisino; | ||
145 | int i; | ||
146 | int error; | ||
147 | |||
148 | cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, btnum); | ||
149 | |||
150 | for (thisino = newino; | ||
151 | thisino < newino + newlen; | ||
152 | thisino += XFS_INODES_PER_CHUNK) { | ||
153 | error = xfs_inobt_lookup(cur, thisino, XFS_LOOKUP_EQ, &i); | ||
154 | if (error) { | ||
155 | xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); | ||
156 | return error; | ||
157 | } | ||
158 | ASSERT(i == 0); | ||
159 | |||
160 | error = xfs_inobt_insert_rec(cur, XFS_INODES_PER_CHUNK, | ||
161 | XFS_INOBT_ALL_FREE, &i); | ||
162 | if (error) { | ||
163 | xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); | ||
164 | return error; | ||
165 | } | ||
166 | ASSERT(i == 1); | ||
167 | } | ||
168 | |||
169 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); | ||
170 | |||
171 | return 0; | ||
172 | } | ||
173 | |||
174 | /* | ||
175 | * Verify that the number of free inodes in the AGI is correct. | ||
176 | */ | ||
177 | #ifdef DEBUG | ||
178 | STATIC int | ||
179 | xfs_check_agi_freecount( | ||
180 | struct xfs_btree_cur *cur, | ||
181 | struct xfs_agi *agi) | ||
182 | { | ||
183 | if (cur->bc_nlevels == 1) { | ||
184 | xfs_inobt_rec_incore_t rec; | ||
185 | int freecount = 0; | ||
186 | int error; | ||
187 | int i; | ||
188 | |||
189 | error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); | ||
190 | if (error) | ||
191 | return error; | ||
192 | |||
193 | do { | ||
194 | error = xfs_inobt_get_rec(cur, &rec, &i); | ||
195 | if (error) | ||
196 | return error; | ||
197 | |||
198 | if (i) { | ||
199 | freecount += rec.ir_freecount; | ||
200 | error = xfs_btree_increment(cur, 0, &i); | ||
201 | if (error) | ||
202 | return error; | ||
203 | } | ||
204 | } while (i == 1); | ||
205 | |||
206 | if (!XFS_FORCED_SHUTDOWN(cur->bc_mp)) | ||
207 | ASSERT(freecount == be32_to_cpu(agi->agi_freecount)); | ||
208 | } | ||
209 | return 0; | ||
210 | } | ||
211 | #else | ||
212 | #define xfs_check_agi_freecount(cur, agi) 0 | ||
213 | #endif | ||
214 | |||
215 | /* | ||
216 | * Initialise a new set of inodes. When called without a transaction context | ||
217 | * (e.g. from recovery) we initiate a delayed write of the inode buffers rather | ||
218 | * than logging them (which in a transaction context puts them into the AIL | ||
219 | * for writeback rather than the xfsbufd queue). | ||
220 | */ | ||
221 | int | ||
222 | xfs_ialloc_inode_init( | ||
223 | struct xfs_mount *mp, | ||
224 | struct xfs_trans *tp, | ||
225 | struct list_head *buffer_list, | ||
226 | xfs_agnumber_t agno, | ||
227 | xfs_agblock_t agbno, | ||
228 | xfs_agblock_t length, | ||
229 | unsigned int gen) | ||
230 | { | ||
231 | struct xfs_buf *fbuf; | ||
232 | struct xfs_dinode *free; | ||
233 | int nbufs, blks_per_cluster, inodes_per_cluster; | ||
234 | int version; | ||
235 | int i, j; | ||
236 | xfs_daddr_t d; | ||
237 | xfs_ino_t ino = 0; | ||
238 | |||
239 | /* | ||
240 | * Loop over the new block(s), filling in the inodes. For small block | ||
241 | * sizes, manipulate the inodes in buffers which are multiples of the | ||
242 | * blocks size. | ||
243 | */ | ||
244 | blks_per_cluster = xfs_icluster_size_fsb(mp); | ||
245 | inodes_per_cluster = blks_per_cluster << mp->m_sb.sb_inopblog; | ||
246 | nbufs = length / blks_per_cluster; | ||
247 | |||
248 | /* | ||
249 | * Figure out what version number to use in the inodes we create. If | ||
250 | * the superblock version has caught up to the one that supports the new | ||
251 | * inode format, then use the new inode version. Otherwise use the old | ||
252 | * version so that old kernels will continue to be able to use the file | ||
253 | * system. | ||
254 | * | ||
255 | * For v3 inodes, we also need to write the inode number into the inode, | ||
256 | * so calculate the first inode number of the chunk here as | ||
257 | * XFS_OFFBNO_TO_AGINO() only works within a filesystem block, not | ||
258 | * across multiple filesystem blocks (such as a cluster) and so cannot | ||
259 | * be used in the cluster buffer loop below. | ||
260 | * | ||
261 | * Further, because we are writing the inode directly into the buffer | ||
262 | * and calculating a CRC on the entire inode, we have ot log the entire | ||
263 | * inode so that the entire range the CRC covers is present in the log. | ||
264 | * That means for v3 inode we log the entire buffer rather than just the | ||
265 | * inode cores. | ||
266 | */ | ||
267 | if (xfs_sb_version_hascrc(&mp->m_sb)) { | ||
268 | version = 3; | ||
269 | ino = XFS_AGINO_TO_INO(mp, agno, | ||
270 | XFS_OFFBNO_TO_AGINO(mp, agbno, 0)); | ||
271 | |||
272 | /* | ||
273 | * log the initialisation that is about to take place as an | ||
274 | * logical operation. This means the transaction does not | ||
275 | * need to log the physical changes to the inode buffers as log | ||
276 | * recovery will know what initialisation is actually needed. | ||
277 | * Hence we only need to log the buffers as "ordered" buffers so | ||
278 | * they track in the AIL as if they were physically logged. | ||
279 | */ | ||
280 | if (tp) | ||
281 | xfs_icreate_log(tp, agno, agbno, mp->m_ialloc_inos, | ||
282 | mp->m_sb.sb_inodesize, length, gen); | ||
283 | } else | ||
284 | version = 2; | ||
285 | |||
286 | for (j = 0; j < nbufs; j++) { | ||
287 | /* | ||
288 | * Get the block. | ||
289 | */ | ||
290 | d = XFS_AGB_TO_DADDR(mp, agno, agbno + (j * blks_per_cluster)); | ||
291 | fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, | ||
292 | mp->m_bsize * blks_per_cluster, | ||
293 | XBF_UNMAPPED); | ||
294 | if (!fbuf) | ||
295 | return -ENOMEM; | ||
296 | |||
297 | /* Initialize the inode buffers and log them appropriately. */ | ||
298 | fbuf->b_ops = &xfs_inode_buf_ops; | ||
299 | xfs_buf_zero(fbuf, 0, BBTOB(fbuf->b_length)); | ||
300 | for (i = 0; i < inodes_per_cluster; i++) { | ||
301 | int ioffset = i << mp->m_sb.sb_inodelog; | ||
302 | uint isize = xfs_dinode_size(version); | ||
303 | |||
304 | free = xfs_make_iptr(mp, fbuf, i); | ||
305 | free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); | ||
306 | free->di_version = version; | ||
307 | free->di_gen = cpu_to_be32(gen); | ||
308 | free->di_next_unlinked = cpu_to_be32(NULLAGINO); | ||
309 | |||
310 | if (version == 3) { | ||
311 | free->di_ino = cpu_to_be64(ino); | ||
312 | ino++; | ||
313 | uuid_copy(&free->di_uuid, &mp->m_sb.sb_uuid); | ||
314 | xfs_dinode_calc_crc(mp, free); | ||
315 | } else if (tp) { | ||
316 | /* just log the inode core */ | ||
317 | xfs_trans_log_buf(tp, fbuf, ioffset, | ||
318 | ioffset + isize - 1); | ||
319 | } | ||
320 | } | ||
321 | |||
322 | if (tp) { | ||
323 | /* | ||
324 | * Mark the buffer as an inode allocation buffer so it | ||
325 | * sticks in AIL at the point of this allocation | ||
326 | * transaction. This ensures the they are on disk before | ||
327 | * the tail of the log can be moved past this | ||
328 | * transaction (i.e. by preventing relogging from moving | ||
329 | * it forward in the log). | ||
330 | */ | ||
331 | xfs_trans_inode_alloc_buf(tp, fbuf); | ||
332 | if (version == 3) { | ||
333 | /* | ||
334 | * Mark the buffer as ordered so that they are | ||
335 | * not physically logged in the transaction but | ||
336 | * still tracked in the AIL as part of the | ||
337 | * transaction and pin the log appropriately. | ||
338 | */ | ||
339 | xfs_trans_ordered_buf(tp, fbuf); | ||
340 | xfs_trans_log_buf(tp, fbuf, 0, | ||
341 | BBTOB(fbuf->b_length) - 1); | ||
342 | } | ||
343 | } else { | ||
344 | fbuf->b_flags |= XBF_DONE; | ||
345 | xfs_buf_delwri_queue(fbuf, buffer_list); | ||
346 | xfs_buf_relse(fbuf); | ||
347 | } | ||
348 | } | ||
349 | return 0; | ||
350 | } | ||
351 | |||
352 | /* | ||
353 | * Allocate new inodes in the allocation group specified by agbp. | ||
354 | * Return 0 for success, else error code. | ||
355 | */ | ||
356 | STATIC int /* error code or 0 */ | ||
357 | xfs_ialloc_ag_alloc( | ||
358 | xfs_trans_t *tp, /* transaction pointer */ | ||
359 | xfs_buf_t *agbp, /* alloc group buffer */ | ||
360 | int *alloc) | ||
361 | { | ||
362 | xfs_agi_t *agi; /* allocation group header */ | ||
363 | xfs_alloc_arg_t args; /* allocation argument structure */ | ||
364 | xfs_agnumber_t agno; | ||
365 | int error; | ||
366 | xfs_agino_t newino; /* new first inode's number */ | ||
367 | xfs_agino_t newlen; /* new number of inodes */ | ||
368 | int isaligned = 0; /* inode allocation at stripe unit */ | ||
369 | /* boundary */ | ||
370 | struct xfs_perag *pag; | ||
371 | |||
372 | memset(&args, 0, sizeof(args)); | ||
373 | args.tp = tp; | ||
374 | args.mp = tp->t_mountp; | ||
375 | |||
376 | /* | ||
377 | * Locking will ensure that we don't have two callers in here | ||
378 | * at one time. | ||
379 | */ | ||
380 | newlen = args.mp->m_ialloc_inos; | ||
381 | if (args.mp->m_maxicount && | ||
382 | args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount) | ||
383 | return -ENOSPC; | ||
384 | args.minlen = args.maxlen = args.mp->m_ialloc_blks; | ||
385 | /* | ||
386 | * First try to allocate inodes contiguous with the last-allocated | ||
387 | * chunk of inodes. If the filesystem is striped, this will fill | ||
388 | * an entire stripe unit with inodes. | ||
389 | */ | ||
390 | agi = XFS_BUF_TO_AGI(agbp); | ||
391 | newino = be32_to_cpu(agi->agi_newino); | ||
392 | agno = be32_to_cpu(agi->agi_seqno); | ||
393 | args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) + | ||
394 | args.mp->m_ialloc_blks; | ||
395 | if (likely(newino != NULLAGINO && | ||
396 | (args.agbno < be32_to_cpu(agi->agi_length)))) { | ||
397 | args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); | ||
398 | args.type = XFS_ALLOCTYPE_THIS_BNO; | ||
399 | args.prod = 1; | ||
400 | |||
401 | /* | ||
402 | * We need to take into account alignment here to ensure that | ||
403 | * we don't modify the free list if we fail to have an exact | ||
404 | * block. If we don't have an exact match, and every oher | ||
405 | * attempt allocation attempt fails, we'll end up cancelling | ||
406 | * a dirty transaction and shutting down. | ||
407 | * | ||
408 | * For an exact allocation, alignment must be 1, | ||
409 | * however we need to take cluster alignment into account when | ||
410 | * fixing up the freelist. Use the minalignslop field to | ||
411 | * indicate that extra blocks might be required for alignment, | ||
412 | * but not to use them in the actual exact allocation. | ||
413 | */ | ||
414 | args.alignment = 1; | ||
415 | args.minalignslop = xfs_ialloc_cluster_alignment(&args) - 1; | ||
416 | |||
417 | /* Allow space for the inode btree to split. */ | ||
418 | args.minleft = args.mp->m_in_maxlevels - 1; | ||
419 | if ((error = xfs_alloc_vextent(&args))) | ||
420 | return error; | ||
421 | |||
422 | /* | ||
423 | * This request might have dirtied the transaction if the AG can | ||
424 | * satisfy the request, but the exact block was not available. | ||
425 | * If the allocation did fail, subsequent requests will relax | ||
426 | * the exact agbno requirement and increase the alignment | ||
427 | * instead. It is critical that the total size of the request | ||
428 | * (len + alignment + slop) does not increase from this point | ||
429 | * on, so reset minalignslop to ensure it is not included in | ||
430 | * subsequent requests. | ||
431 | */ | ||
432 | args.minalignslop = 0; | ||
433 | } else | ||
434 | args.fsbno = NULLFSBLOCK; | ||
435 | |||
436 | if (unlikely(args.fsbno == NULLFSBLOCK)) { | ||
437 | /* | ||
438 | * Set the alignment for the allocation. | ||
439 | * If stripe alignment is turned on then align at stripe unit | ||
440 | * boundary. | ||
441 | * If the cluster size is smaller than a filesystem block | ||
442 | * then we're doing I/O for inodes in filesystem block size | ||
443 | * pieces, so don't need alignment anyway. | ||
444 | */ | ||
445 | isaligned = 0; | ||
446 | if (args.mp->m_sinoalign) { | ||
447 | ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN)); | ||
448 | args.alignment = args.mp->m_dalign; | ||
449 | isaligned = 1; | ||
450 | } else | ||
451 | args.alignment = xfs_ialloc_cluster_alignment(&args); | ||
452 | /* | ||
453 | * Need to figure out where to allocate the inode blocks. | ||
454 | * Ideally they should be spaced out through the a.g. | ||
455 | * For now, just allocate blocks up front. | ||
456 | */ | ||
457 | args.agbno = be32_to_cpu(agi->agi_root); | ||
458 | args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); | ||
459 | /* | ||
460 | * Allocate a fixed-size extent of inodes. | ||
461 | */ | ||
462 | args.type = XFS_ALLOCTYPE_NEAR_BNO; | ||
463 | args.prod = 1; | ||
464 | /* | ||
465 | * Allow space for the inode btree to split. | ||
466 | */ | ||
467 | args.minleft = args.mp->m_in_maxlevels - 1; | ||
468 | if ((error = xfs_alloc_vextent(&args))) | ||
469 | return error; | ||
470 | } | ||
471 | |||
472 | /* | ||
473 | * If stripe alignment is turned on, then try again with cluster | ||
474 | * alignment. | ||
475 | */ | ||
476 | if (isaligned && args.fsbno == NULLFSBLOCK) { | ||
477 | args.type = XFS_ALLOCTYPE_NEAR_BNO; | ||
478 | args.agbno = be32_to_cpu(agi->agi_root); | ||
479 | args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); | ||
480 | args.alignment = xfs_ialloc_cluster_alignment(&args); | ||
481 | if ((error = xfs_alloc_vextent(&args))) | ||
482 | return error; | ||
483 | } | ||
484 | |||
485 | if (args.fsbno == NULLFSBLOCK) { | ||
486 | *alloc = 0; | ||
487 | return 0; | ||
488 | } | ||
489 | ASSERT(args.len == args.minlen); | ||
490 | |||
491 | /* | ||
492 | * Stamp and write the inode buffers. | ||
493 | * | ||
494 | * Seed the new inode cluster with a random generation number. This | ||
495 | * prevents short-term reuse of generation numbers if a chunk is | ||
496 | * freed and then immediately reallocated. We use random numbers | ||
497 | * rather than a linear progression to prevent the next generation | ||
498 | * number from being easily guessable. | ||
499 | */ | ||
500 | error = xfs_ialloc_inode_init(args.mp, tp, NULL, agno, args.agbno, | ||
501 | args.len, prandom_u32()); | ||
502 | |||
503 | if (error) | ||
504 | return error; | ||
505 | /* | ||
506 | * Convert the results. | ||
507 | */ | ||
508 | newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0); | ||
509 | be32_add_cpu(&agi->agi_count, newlen); | ||
510 | be32_add_cpu(&agi->agi_freecount, newlen); | ||
511 | pag = xfs_perag_get(args.mp, agno); | ||
512 | pag->pagi_freecount += newlen; | ||
513 | xfs_perag_put(pag); | ||
514 | agi->agi_newino = cpu_to_be32(newino); | ||
515 | |||
516 | /* | ||
517 | * Insert records describing the new inode chunk into the btrees. | ||
518 | */ | ||
519 | error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen, | ||
520 | XFS_BTNUM_INO); | ||
521 | if (error) | ||
522 | return error; | ||
523 | |||
524 | if (xfs_sb_version_hasfinobt(&args.mp->m_sb)) { | ||
525 | error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen, | ||
526 | XFS_BTNUM_FINO); | ||
527 | if (error) | ||
528 | return error; | ||
529 | } | ||
530 | /* | ||
531 | * Log allocation group header fields | ||
532 | */ | ||
533 | xfs_ialloc_log_agi(tp, agbp, | ||
534 | XFS_AGI_COUNT | XFS_AGI_FREECOUNT | XFS_AGI_NEWINO); | ||
535 | /* | ||
536 | * Modify/log superblock values for inode count and inode free count. | ||
537 | */ | ||
538 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, (long)newlen); | ||
539 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, (long)newlen); | ||
540 | *alloc = 1; | ||
541 | return 0; | ||
542 | } | ||
543 | |||
544 | STATIC xfs_agnumber_t | ||
545 | xfs_ialloc_next_ag( | ||
546 | xfs_mount_t *mp) | ||
547 | { | ||
548 | xfs_agnumber_t agno; | ||
549 | |||
550 | spin_lock(&mp->m_agirotor_lock); | ||
551 | agno = mp->m_agirotor; | ||
552 | if (++mp->m_agirotor >= mp->m_maxagi) | ||
553 | mp->m_agirotor = 0; | ||
554 | spin_unlock(&mp->m_agirotor_lock); | ||
555 | |||
556 | return agno; | ||
557 | } | ||
558 | |||
559 | /* | ||
560 | * Select an allocation group to look for a free inode in, based on the parent | ||
561 | * inode and the mode. Return the allocation group buffer. | ||
562 | */ | ||
563 | STATIC xfs_agnumber_t | ||
564 | xfs_ialloc_ag_select( | ||
565 | xfs_trans_t *tp, /* transaction pointer */ | ||
566 | xfs_ino_t parent, /* parent directory inode number */ | ||
567 | umode_t mode, /* bits set to indicate file type */ | ||
568 | int okalloc) /* ok to allocate more space */ | ||
569 | { | ||
570 | xfs_agnumber_t agcount; /* number of ag's in the filesystem */ | ||
571 | xfs_agnumber_t agno; /* current ag number */ | ||
572 | int flags; /* alloc buffer locking flags */ | ||
573 | xfs_extlen_t ineed; /* blocks needed for inode allocation */ | ||
574 | xfs_extlen_t longest = 0; /* longest extent available */ | ||
575 | xfs_mount_t *mp; /* mount point structure */ | ||
576 | int needspace; /* file mode implies space allocated */ | ||
577 | xfs_perag_t *pag; /* per allocation group data */ | ||
578 | xfs_agnumber_t pagno; /* parent (starting) ag number */ | ||
579 | int error; | ||
580 | |||
581 | /* | ||
582 | * Files of these types need at least one block if length > 0 | ||
583 | * (and they won't fit in the inode, but that's hard to figure out). | ||
584 | */ | ||
585 | needspace = S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode); | ||
586 | mp = tp->t_mountp; | ||
587 | agcount = mp->m_maxagi; | ||
588 | if (S_ISDIR(mode)) | ||
589 | pagno = xfs_ialloc_next_ag(mp); | ||
590 | else { | ||
591 | pagno = XFS_INO_TO_AGNO(mp, parent); | ||
592 | if (pagno >= agcount) | ||
593 | pagno = 0; | ||
594 | } | ||
595 | |||
596 | ASSERT(pagno < agcount); | ||
597 | |||
598 | /* | ||
599 | * Loop through allocation groups, looking for one with a little | ||
600 | * free space in it. Note we don't look for free inodes, exactly. | ||
601 | * Instead, we include whether there is a need to allocate inodes | ||
602 | * to mean that blocks must be allocated for them, | ||
603 | * if none are currently free. | ||
604 | */ | ||
605 | agno = pagno; | ||
606 | flags = XFS_ALLOC_FLAG_TRYLOCK; | ||
607 | for (;;) { | ||
608 | pag = xfs_perag_get(mp, agno); | ||
609 | if (!pag->pagi_inodeok) { | ||
610 | xfs_ialloc_next_ag(mp); | ||
611 | goto nextag; | ||
612 | } | ||
613 | |||
614 | if (!pag->pagi_init) { | ||
615 | error = xfs_ialloc_pagi_init(mp, tp, agno); | ||
616 | if (error) | ||
617 | goto nextag; | ||
618 | } | ||
619 | |||
620 | if (pag->pagi_freecount) { | ||
621 | xfs_perag_put(pag); | ||
622 | return agno; | ||
623 | } | ||
624 | |||
625 | if (!okalloc) | ||
626 | goto nextag; | ||
627 | |||
628 | if (!pag->pagf_init) { | ||
629 | error = xfs_alloc_pagf_init(mp, tp, agno, flags); | ||
630 | if (error) | ||
631 | goto nextag; | ||
632 | } | ||
633 | |||
634 | /* | ||
635 | * Is there enough free space for the file plus a block of | ||
636 | * inodes? (if we need to allocate some)? | ||
637 | */ | ||
638 | ineed = mp->m_ialloc_blks; | ||
639 | longest = pag->pagf_longest; | ||
640 | if (!longest) | ||
641 | longest = pag->pagf_flcount > 0; | ||
642 | |||
643 | if (pag->pagf_freeblks >= needspace + ineed && | ||
644 | longest >= ineed) { | ||
645 | xfs_perag_put(pag); | ||
646 | return agno; | ||
647 | } | ||
648 | nextag: | ||
649 | xfs_perag_put(pag); | ||
650 | /* | ||
651 | * No point in iterating over the rest, if we're shutting | ||
652 | * down. | ||
653 | */ | ||
654 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
655 | return NULLAGNUMBER; | ||
656 | agno++; | ||
657 | if (agno >= agcount) | ||
658 | agno = 0; | ||
659 | if (agno == pagno) { | ||
660 | if (flags == 0) | ||
661 | return NULLAGNUMBER; | ||
662 | flags = 0; | ||
663 | } | ||
664 | } | ||
665 | } | ||
666 | |||
667 | /* | ||
668 | * Try to retrieve the next record to the left/right from the current one. | ||
669 | */ | ||
670 | STATIC int | ||
671 | xfs_ialloc_next_rec( | ||
672 | struct xfs_btree_cur *cur, | ||
673 | xfs_inobt_rec_incore_t *rec, | ||
674 | int *done, | ||
675 | int left) | ||
676 | { | ||
677 | int error; | ||
678 | int i; | ||
679 | |||
680 | if (left) | ||
681 | error = xfs_btree_decrement(cur, 0, &i); | ||
682 | else | ||
683 | error = xfs_btree_increment(cur, 0, &i); | ||
684 | |||
685 | if (error) | ||
686 | return error; | ||
687 | *done = !i; | ||
688 | if (i) { | ||
689 | error = xfs_inobt_get_rec(cur, rec, &i); | ||
690 | if (error) | ||
691 | return error; | ||
692 | XFS_WANT_CORRUPTED_RETURN(i == 1); | ||
693 | } | ||
694 | |||
695 | return 0; | ||
696 | } | ||
697 | |||
698 | STATIC int | ||
699 | xfs_ialloc_get_rec( | ||
700 | struct xfs_btree_cur *cur, | ||
701 | xfs_agino_t agino, | ||
702 | xfs_inobt_rec_incore_t *rec, | ||
703 | int *done) | ||
704 | { | ||
705 | int error; | ||
706 | int i; | ||
707 | |||
708 | error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_EQ, &i); | ||
709 | if (error) | ||
710 | return error; | ||
711 | *done = !i; | ||
712 | if (i) { | ||
713 | error = xfs_inobt_get_rec(cur, rec, &i); | ||
714 | if (error) | ||
715 | return error; | ||
716 | XFS_WANT_CORRUPTED_RETURN(i == 1); | ||
717 | } | ||
718 | |||
719 | return 0; | ||
720 | } | ||
721 | |||
722 | /* | ||
723 | * Allocate an inode using the inobt-only algorithm. | ||
724 | */ | ||
725 | STATIC int | ||
726 | xfs_dialloc_ag_inobt( | ||
727 | struct xfs_trans *tp, | ||
728 | struct xfs_buf *agbp, | ||
729 | xfs_ino_t parent, | ||
730 | xfs_ino_t *inop) | ||
731 | { | ||
732 | struct xfs_mount *mp = tp->t_mountp; | ||
733 | struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); | ||
734 | xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); | ||
735 | xfs_agnumber_t pagno = XFS_INO_TO_AGNO(mp, parent); | ||
736 | xfs_agino_t pagino = XFS_INO_TO_AGINO(mp, parent); | ||
737 | struct xfs_perag *pag; | ||
738 | struct xfs_btree_cur *cur, *tcur; | ||
739 | struct xfs_inobt_rec_incore rec, trec; | ||
740 | xfs_ino_t ino; | ||
741 | int error; | ||
742 | int offset; | ||
743 | int i, j; | ||
744 | |||
745 | pag = xfs_perag_get(mp, agno); | ||
746 | |||
747 | ASSERT(pag->pagi_init); | ||
748 | ASSERT(pag->pagi_inodeok); | ||
749 | ASSERT(pag->pagi_freecount > 0); | ||
750 | |||
751 | restart_pagno: | ||
752 | cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO); | ||
753 | /* | ||
754 | * If pagino is 0 (this is the root inode allocation) use newino. | ||
755 | * This must work because we've just allocated some. | ||
756 | */ | ||
757 | if (!pagino) | ||
758 | pagino = be32_to_cpu(agi->agi_newino); | ||
759 | |||
760 | error = xfs_check_agi_freecount(cur, agi); | ||
761 | if (error) | ||
762 | goto error0; | ||
763 | |||
764 | /* | ||
765 | * If in the same AG as the parent, try to get near the parent. | ||
766 | */ | ||
767 | if (pagno == agno) { | ||
768 | int doneleft; /* done, to the left */ | ||
769 | int doneright; /* done, to the right */ | ||
770 | int searchdistance = 10; | ||
771 | |||
772 | error = xfs_inobt_lookup(cur, pagino, XFS_LOOKUP_LE, &i); | ||
773 | if (error) | ||
774 | goto error0; | ||
775 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
776 | |||
777 | error = xfs_inobt_get_rec(cur, &rec, &j); | ||
778 | if (error) | ||
779 | goto error0; | ||
780 | XFS_WANT_CORRUPTED_GOTO(j == 1, error0); | ||
781 | |||
782 | if (rec.ir_freecount > 0) { | ||
783 | /* | ||
784 | * Found a free inode in the same chunk | ||
785 | * as the parent, done. | ||
786 | */ | ||
787 | goto alloc_inode; | ||
788 | } | ||
789 | |||
790 | |||
791 | /* | ||
792 | * In the same AG as parent, but parent's chunk is full. | ||
793 | */ | ||
794 | |||
795 | /* duplicate the cursor, search left & right simultaneously */ | ||
796 | error = xfs_btree_dup_cursor(cur, &tcur); | ||
797 | if (error) | ||
798 | goto error0; | ||
799 | |||
800 | /* | ||
801 | * Skip to last blocks looked up if same parent inode. | ||
802 | */ | ||
803 | if (pagino != NULLAGINO && | ||
804 | pag->pagl_pagino == pagino && | ||
805 | pag->pagl_leftrec != NULLAGINO && | ||
806 | pag->pagl_rightrec != NULLAGINO) { | ||
807 | error = xfs_ialloc_get_rec(tcur, pag->pagl_leftrec, | ||
808 | &trec, &doneleft); | ||
809 | if (error) | ||
810 | goto error1; | ||
811 | |||
812 | error = xfs_ialloc_get_rec(cur, pag->pagl_rightrec, | ||
813 | &rec, &doneright); | ||
814 | if (error) | ||
815 | goto error1; | ||
816 | } else { | ||
817 | /* search left with tcur, back up 1 record */ | ||
818 | error = xfs_ialloc_next_rec(tcur, &trec, &doneleft, 1); | ||
819 | if (error) | ||
820 | goto error1; | ||
821 | |||
822 | /* search right with cur, go forward 1 record. */ | ||
823 | error = xfs_ialloc_next_rec(cur, &rec, &doneright, 0); | ||
824 | if (error) | ||
825 | goto error1; | ||
826 | } | ||
827 | |||
828 | /* | ||
829 | * Loop until we find an inode chunk with a free inode. | ||
830 | */ | ||
831 | while (!doneleft || !doneright) { | ||
832 | int useleft; /* using left inode chunk this time */ | ||
833 | |||
834 | if (!--searchdistance) { | ||
835 | /* | ||
836 | * Not in range - save last search | ||
837 | * location and allocate a new inode | ||
838 | */ | ||
839 | xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); | ||
840 | pag->pagl_leftrec = trec.ir_startino; | ||
841 | pag->pagl_rightrec = rec.ir_startino; | ||
842 | pag->pagl_pagino = pagino; | ||
843 | goto newino; | ||
844 | } | ||
845 | |||
846 | /* figure out the closer block if both are valid. */ | ||
847 | if (!doneleft && !doneright) { | ||
848 | useleft = pagino - | ||
849 | (trec.ir_startino + XFS_INODES_PER_CHUNK - 1) < | ||
850 | rec.ir_startino - pagino; | ||
851 | } else { | ||
852 | useleft = !doneleft; | ||
853 | } | ||
854 | |||
855 | /* free inodes to the left? */ | ||
856 | if (useleft && trec.ir_freecount) { | ||
857 | rec = trec; | ||
858 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); | ||
859 | cur = tcur; | ||
860 | |||
861 | pag->pagl_leftrec = trec.ir_startino; | ||
862 | pag->pagl_rightrec = rec.ir_startino; | ||
863 | pag->pagl_pagino = pagino; | ||
864 | goto alloc_inode; | ||
865 | } | ||
866 | |||
867 | /* free inodes to the right? */ | ||
868 | if (!useleft && rec.ir_freecount) { | ||
869 | xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); | ||
870 | |||
871 | pag->pagl_leftrec = trec.ir_startino; | ||
872 | pag->pagl_rightrec = rec.ir_startino; | ||
873 | pag->pagl_pagino = pagino; | ||
874 | goto alloc_inode; | ||
875 | } | ||
876 | |||
877 | /* get next record to check */ | ||
878 | if (useleft) { | ||
879 | error = xfs_ialloc_next_rec(tcur, &trec, | ||
880 | &doneleft, 1); | ||
881 | } else { | ||
882 | error = xfs_ialloc_next_rec(cur, &rec, | ||
883 | &doneright, 0); | ||
884 | } | ||
885 | if (error) | ||
886 | goto error1; | ||
887 | } | ||
888 | |||
889 | /* | ||
890 | * We've reached the end of the btree. because | ||
891 | * we are only searching a small chunk of the | ||
892 | * btree each search, there is obviously free | ||
893 | * inodes closer to the parent inode than we | ||
894 | * are now. restart the search again. | ||
895 | */ | ||
896 | pag->pagl_pagino = NULLAGINO; | ||
897 | pag->pagl_leftrec = NULLAGINO; | ||
898 | pag->pagl_rightrec = NULLAGINO; | ||
899 | xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); | ||
900 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); | ||
901 | goto restart_pagno; | ||
902 | } | ||
903 | |||
904 | /* | ||
905 | * In a different AG from the parent. | ||
906 | * See if the most recently allocated block has any free. | ||
907 | */ | ||
908 | newino: | ||
909 | if (agi->agi_newino != cpu_to_be32(NULLAGINO)) { | ||
910 | error = xfs_inobt_lookup(cur, be32_to_cpu(agi->agi_newino), | ||
911 | XFS_LOOKUP_EQ, &i); | ||
912 | if (error) | ||
913 | goto error0; | ||
914 | |||
915 | if (i == 1) { | ||
916 | error = xfs_inobt_get_rec(cur, &rec, &j); | ||
917 | if (error) | ||
918 | goto error0; | ||
919 | |||
920 | if (j == 1 && rec.ir_freecount > 0) { | ||
921 | /* | ||
922 | * The last chunk allocated in the group | ||
923 | * still has a free inode. | ||
924 | */ | ||
925 | goto alloc_inode; | ||
926 | } | ||
927 | } | ||
928 | } | ||
929 | |||
930 | /* | ||
931 | * None left in the last group, search the whole AG | ||
932 | */ | ||
933 | error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); | ||
934 | if (error) | ||
935 | goto error0; | ||
936 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
937 | |||
938 | for (;;) { | ||
939 | error = xfs_inobt_get_rec(cur, &rec, &i); | ||
940 | if (error) | ||
941 | goto error0; | ||
942 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
943 | if (rec.ir_freecount > 0) | ||
944 | break; | ||
945 | error = xfs_btree_increment(cur, 0, &i); | ||
946 | if (error) | ||
947 | goto error0; | ||
948 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
949 | } | ||
950 | |||
951 | alloc_inode: | ||
952 | offset = xfs_lowbit64(rec.ir_free); | ||
953 | ASSERT(offset >= 0); | ||
954 | ASSERT(offset < XFS_INODES_PER_CHUNK); | ||
955 | ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) % | ||
956 | XFS_INODES_PER_CHUNK) == 0); | ||
957 | ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset); | ||
958 | rec.ir_free &= ~XFS_INOBT_MASK(offset); | ||
959 | rec.ir_freecount--; | ||
960 | error = xfs_inobt_update(cur, &rec); | ||
961 | if (error) | ||
962 | goto error0; | ||
963 | be32_add_cpu(&agi->agi_freecount, -1); | ||
964 | xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); | ||
965 | pag->pagi_freecount--; | ||
966 | |||
967 | error = xfs_check_agi_freecount(cur, agi); | ||
968 | if (error) | ||
969 | goto error0; | ||
970 | |||
971 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); | ||
972 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1); | ||
973 | xfs_perag_put(pag); | ||
974 | *inop = ino; | ||
975 | return 0; | ||
976 | error1: | ||
977 | xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); | ||
978 | error0: | ||
979 | xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); | ||
980 | xfs_perag_put(pag); | ||
981 | return error; | ||
982 | } | ||
983 | |||
984 | /* | ||
985 | * Use the free inode btree to allocate an inode based on distance from the | ||
986 | * parent. Note that the provided cursor may be deleted and replaced. | ||
987 | */ | ||
988 | STATIC int | ||
989 | xfs_dialloc_ag_finobt_near( | ||
990 | xfs_agino_t pagino, | ||
991 | struct xfs_btree_cur **ocur, | ||
992 | struct xfs_inobt_rec_incore *rec) | ||
993 | { | ||
994 | struct xfs_btree_cur *lcur = *ocur; /* left search cursor */ | ||
995 | struct xfs_btree_cur *rcur; /* right search cursor */ | ||
996 | struct xfs_inobt_rec_incore rrec; | ||
997 | int error; | ||
998 | int i, j; | ||
999 | |||
1000 | error = xfs_inobt_lookup(lcur, pagino, XFS_LOOKUP_LE, &i); | ||
1001 | if (error) | ||
1002 | return error; | ||
1003 | |||
1004 | if (i == 1) { | ||
1005 | error = xfs_inobt_get_rec(lcur, rec, &i); | ||
1006 | if (error) | ||
1007 | return error; | ||
1008 | XFS_WANT_CORRUPTED_RETURN(i == 1); | ||
1009 | |||
1010 | /* | ||
1011 | * See if we've landed in the parent inode record. The finobt | ||
1012 | * only tracks chunks with at least one free inode, so record | ||
1013 | * existence is enough. | ||
1014 | */ | ||
1015 | if (pagino >= rec->ir_startino && | ||
1016 | pagino < (rec->ir_startino + XFS_INODES_PER_CHUNK)) | ||
1017 | return 0; | ||
1018 | } | ||
1019 | |||
1020 | error = xfs_btree_dup_cursor(lcur, &rcur); | ||
1021 | if (error) | ||
1022 | return error; | ||
1023 | |||
1024 | error = xfs_inobt_lookup(rcur, pagino, XFS_LOOKUP_GE, &j); | ||
1025 | if (error) | ||
1026 | goto error_rcur; | ||
1027 | if (j == 1) { | ||
1028 | error = xfs_inobt_get_rec(rcur, &rrec, &j); | ||
1029 | if (error) | ||
1030 | goto error_rcur; | ||
1031 | XFS_WANT_CORRUPTED_GOTO(j == 1, error_rcur); | ||
1032 | } | ||
1033 | |||
1034 | XFS_WANT_CORRUPTED_GOTO(i == 1 || j == 1, error_rcur); | ||
1035 | if (i == 1 && j == 1) { | ||
1036 | /* | ||
1037 | * Both the left and right records are valid. Choose the closer | ||
1038 | * inode chunk to the target. | ||
1039 | */ | ||
1040 | if ((pagino - rec->ir_startino + XFS_INODES_PER_CHUNK - 1) > | ||
1041 | (rrec.ir_startino - pagino)) { | ||
1042 | *rec = rrec; | ||
1043 | xfs_btree_del_cursor(lcur, XFS_BTREE_NOERROR); | ||
1044 | *ocur = rcur; | ||
1045 | } else { | ||
1046 | xfs_btree_del_cursor(rcur, XFS_BTREE_NOERROR); | ||
1047 | } | ||
1048 | } else if (j == 1) { | ||
1049 | /* only the right record is valid */ | ||
1050 | *rec = rrec; | ||
1051 | xfs_btree_del_cursor(lcur, XFS_BTREE_NOERROR); | ||
1052 | *ocur = rcur; | ||
1053 | } else if (i == 1) { | ||
1054 | /* only the left record is valid */ | ||
1055 | xfs_btree_del_cursor(rcur, XFS_BTREE_NOERROR); | ||
1056 | } | ||
1057 | |||
1058 | return 0; | ||
1059 | |||
1060 | error_rcur: | ||
1061 | xfs_btree_del_cursor(rcur, XFS_BTREE_ERROR); | ||
1062 | return error; | ||
1063 | } | ||
1064 | |||
1065 | /* | ||
1066 | * Use the free inode btree to find a free inode based on a newino hint. If | ||
1067 | * the hint is NULL, find the first free inode in the AG. | ||
1068 | */ | ||
1069 | STATIC int | ||
1070 | xfs_dialloc_ag_finobt_newino( | ||
1071 | struct xfs_agi *agi, | ||
1072 | struct xfs_btree_cur *cur, | ||
1073 | struct xfs_inobt_rec_incore *rec) | ||
1074 | { | ||
1075 | int error; | ||
1076 | int i; | ||
1077 | |||
1078 | if (agi->agi_newino != cpu_to_be32(NULLAGINO)) { | ||
1079 | error = xfs_inobt_lookup(cur, agi->agi_newino, XFS_LOOKUP_EQ, | ||
1080 | &i); | ||
1081 | if (error) | ||
1082 | return error; | ||
1083 | if (i == 1) { | ||
1084 | error = xfs_inobt_get_rec(cur, rec, &i); | ||
1085 | if (error) | ||
1086 | return error; | ||
1087 | XFS_WANT_CORRUPTED_RETURN(i == 1); | ||
1088 | |||
1089 | return 0; | ||
1090 | } | ||
1091 | } | ||
1092 | |||
1093 | /* | ||
1094 | * Find the first inode available in the AG. | ||
1095 | */ | ||
1096 | error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); | ||
1097 | if (error) | ||
1098 | return error; | ||
1099 | XFS_WANT_CORRUPTED_RETURN(i == 1); | ||
1100 | |||
1101 | error = xfs_inobt_get_rec(cur, rec, &i); | ||
1102 | if (error) | ||
1103 | return error; | ||
1104 | XFS_WANT_CORRUPTED_RETURN(i == 1); | ||
1105 | |||
1106 | return 0; | ||
1107 | } | ||
1108 | |||
1109 | /* | ||
1110 | * Update the inobt based on a modification made to the finobt. Also ensure that | ||
1111 | * the records from both trees are equivalent post-modification. | ||
1112 | */ | ||
1113 | STATIC int | ||
1114 | xfs_dialloc_ag_update_inobt( | ||
1115 | struct xfs_btree_cur *cur, /* inobt cursor */ | ||
1116 | struct xfs_inobt_rec_incore *frec, /* finobt record */ | ||
1117 | int offset) /* inode offset */ | ||
1118 | { | ||
1119 | struct xfs_inobt_rec_incore rec; | ||
1120 | int error; | ||
1121 | int i; | ||
1122 | |||
1123 | error = xfs_inobt_lookup(cur, frec->ir_startino, XFS_LOOKUP_EQ, &i); | ||
1124 | if (error) | ||
1125 | return error; | ||
1126 | XFS_WANT_CORRUPTED_RETURN(i == 1); | ||
1127 | |||
1128 | error = xfs_inobt_get_rec(cur, &rec, &i); | ||
1129 | if (error) | ||
1130 | return error; | ||
1131 | XFS_WANT_CORRUPTED_RETURN(i == 1); | ||
1132 | ASSERT((XFS_AGINO_TO_OFFSET(cur->bc_mp, rec.ir_startino) % | ||
1133 | XFS_INODES_PER_CHUNK) == 0); | ||
1134 | |||
1135 | rec.ir_free &= ~XFS_INOBT_MASK(offset); | ||
1136 | rec.ir_freecount--; | ||
1137 | |||
1138 | XFS_WANT_CORRUPTED_RETURN((rec.ir_free == frec->ir_free) && | ||
1139 | (rec.ir_freecount == frec->ir_freecount)); | ||
1140 | |||
1141 | error = xfs_inobt_update(cur, &rec); | ||
1142 | if (error) | ||
1143 | return error; | ||
1144 | |||
1145 | return 0; | ||
1146 | } | ||
1147 | |||
1148 | /* | ||
1149 | * Allocate an inode using the free inode btree, if available. Otherwise, fall | ||
1150 | * back to the inobt search algorithm. | ||
1151 | * | ||
1152 | * The caller selected an AG for us, and made sure that free inodes are | ||
1153 | * available. | ||
1154 | */ | ||
1155 | STATIC int | ||
1156 | xfs_dialloc_ag( | ||
1157 | struct xfs_trans *tp, | ||
1158 | struct xfs_buf *agbp, | ||
1159 | xfs_ino_t parent, | ||
1160 | xfs_ino_t *inop) | ||
1161 | { | ||
1162 | struct xfs_mount *mp = tp->t_mountp; | ||
1163 | struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); | ||
1164 | xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); | ||
1165 | xfs_agnumber_t pagno = XFS_INO_TO_AGNO(mp, parent); | ||
1166 | xfs_agino_t pagino = XFS_INO_TO_AGINO(mp, parent); | ||
1167 | struct xfs_perag *pag; | ||
1168 | struct xfs_btree_cur *cur; /* finobt cursor */ | ||
1169 | struct xfs_btree_cur *icur; /* inobt cursor */ | ||
1170 | struct xfs_inobt_rec_incore rec; | ||
1171 | xfs_ino_t ino; | ||
1172 | int error; | ||
1173 | int offset; | ||
1174 | int i; | ||
1175 | |||
1176 | if (!xfs_sb_version_hasfinobt(&mp->m_sb)) | ||
1177 | return xfs_dialloc_ag_inobt(tp, agbp, parent, inop); | ||
1178 | |||
1179 | pag = xfs_perag_get(mp, agno); | ||
1180 | |||
1181 | /* | ||
1182 | * If pagino is 0 (this is the root inode allocation) use newino. | ||
1183 | * This must work because we've just allocated some. | ||
1184 | */ | ||
1185 | if (!pagino) | ||
1186 | pagino = be32_to_cpu(agi->agi_newino); | ||
1187 | |||
1188 | cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_FINO); | ||
1189 | |||
1190 | error = xfs_check_agi_freecount(cur, agi); | ||
1191 | if (error) | ||
1192 | goto error_cur; | ||
1193 | |||
1194 | /* | ||
1195 | * The search algorithm depends on whether we're in the same AG as the | ||
1196 | * parent. If so, find the closest available inode to the parent. If | ||
1197 | * not, consider the agi hint or find the first free inode in the AG. | ||
1198 | */ | ||
1199 | if (agno == pagno) | ||
1200 | error = xfs_dialloc_ag_finobt_near(pagino, &cur, &rec); | ||
1201 | else | ||
1202 | error = xfs_dialloc_ag_finobt_newino(agi, cur, &rec); | ||
1203 | if (error) | ||
1204 | goto error_cur; | ||
1205 | |||
1206 | offset = xfs_lowbit64(rec.ir_free); | ||
1207 | ASSERT(offset >= 0); | ||
1208 | ASSERT(offset < XFS_INODES_PER_CHUNK); | ||
1209 | ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) % | ||
1210 | XFS_INODES_PER_CHUNK) == 0); | ||
1211 | ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset); | ||
1212 | |||
1213 | /* | ||
1214 | * Modify or remove the finobt record. | ||
1215 | */ | ||
1216 | rec.ir_free &= ~XFS_INOBT_MASK(offset); | ||
1217 | rec.ir_freecount--; | ||
1218 | if (rec.ir_freecount) | ||
1219 | error = xfs_inobt_update(cur, &rec); | ||
1220 | else | ||
1221 | error = xfs_btree_delete(cur, &i); | ||
1222 | if (error) | ||
1223 | goto error_cur; | ||
1224 | |||
1225 | /* | ||
1226 | * The finobt has now been updated appropriately. We haven't updated the | ||
1227 | * agi and superblock yet, so we can create an inobt cursor and validate | ||
1228 | * the original freecount. If all is well, make the equivalent update to | ||
1229 | * the inobt using the finobt record and offset information. | ||
1230 | */ | ||
1231 | icur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO); | ||
1232 | |||
1233 | error = xfs_check_agi_freecount(icur, agi); | ||
1234 | if (error) | ||
1235 | goto error_icur; | ||
1236 | |||
1237 | error = xfs_dialloc_ag_update_inobt(icur, &rec, offset); | ||
1238 | if (error) | ||
1239 | goto error_icur; | ||
1240 | |||
1241 | /* | ||
1242 | * Both trees have now been updated. We must update the perag and | ||
1243 | * superblock before we can check the freecount for each btree. | ||
1244 | */ | ||
1245 | be32_add_cpu(&agi->agi_freecount, -1); | ||
1246 | xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); | ||
1247 | pag->pagi_freecount--; | ||
1248 | |||
1249 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1); | ||
1250 | |||
1251 | error = xfs_check_agi_freecount(icur, agi); | ||
1252 | if (error) | ||
1253 | goto error_icur; | ||
1254 | error = xfs_check_agi_freecount(cur, agi); | ||
1255 | if (error) | ||
1256 | goto error_icur; | ||
1257 | |||
1258 | xfs_btree_del_cursor(icur, XFS_BTREE_NOERROR); | ||
1259 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); | ||
1260 | xfs_perag_put(pag); | ||
1261 | *inop = ino; | ||
1262 | return 0; | ||
1263 | |||
1264 | error_icur: | ||
1265 | xfs_btree_del_cursor(icur, XFS_BTREE_ERROR); | ||
1266 | error_cur: | ||
1267 | xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); | ||
1268 | xfs_perag_put(pag); | ||
1269 | return error; | ||
1270 | } | ||
1271 | |||
1272 | /* | ||
1273 | * Allocate an inode on disk. | ||
1274 | * | ||
1275 | * Mode is used to tell whether the new inode will need space, and whether it | ||
1276 | * is a directory. | ||
1277 | * | ||
1278 | * This function is designed to be called twice if it has to do an allocation | ||
1279 | * to make more free inodes. On the first call, *IO_agbp should be set to NULL. | ||
1280 | * If an inode is available without having to performn an allocation, an inode | ||
1281 | * number is returned. In this case, *IO_agbp is set to NULL. If an allocation | ||
1282 | * needs to be done, xfs_dialloc returns the current AGI buffer in *IO_agbp. | ||
1283 | * The caller should then commit the current transaction, allocate a | ||
1284 | * new transaction, and call xfs_dialloc() again, passing in the previous value | ||
1285 | * of *IO_agbp. IO_agbp should be held across the transactions. Since the AGI | ||
1286 | * buffer is locked across the two calls, the second call is guaranteed to have | ||
1287 | * a free inode available. | ||
1288 | * | ||
1289 | * Once we successfully pick an inode its number is returned and the on-disk | ||
1290 | * data structures are updated. The inode itself is not read in, since doing so | ||
1291 | * would break ordering constraints with xfs_reclaim. | ||
1292 | */ | ||
1293 | int | ||
1294 | xfs_dialloc( | ||
1295 | struct xfs_trans *tp, | ||
1296 | xfs_ino_t parent, | ||
1297 | umode_t mode, | ||
1298 | int okalloc, | ||
1299 | struct xfs_buf **IO_agbp, | ||
1300 | xfs_ino_t *inop) | ||
1301 | { | ||
1302 | struct xfs_mount *mp = tp->t_mountp; | ||
1303 | struct xfs_buf *agbp; | ||
1304 | xfs_agnumber_t agno; | ||
1305 | int error; | ||
1306 | int ialloced; | ||
1307 | int noroom = 0; | ||
1308 | xfs_agnumber_t start_agno; | ||
1309 | struct xfs_perag *pag; | ||
1310 | |||
1311 | if (*IO_agbp) { | ||
1312 | /* | ||
1313 | * If the caller passes in a pointer to the AGI buffer, | ||
1314 | * continue where we left off before. In this case, we | ||
1315 | * know that the allocation group has free inodes. | ||
1316 | */ | ||
1317 | agbp = *IO_agbp; | ||
1318 | goto out_alloc; | ||
1319 | } | ||
1320 | |||
1321 | /* | ||
1322 | * We do not have an agbp, so select an initial allocation | ||
1323 | * group for inode allocation. | ||
1324 | */ | ||
1325 | start_agno = xfs_ialloc_ag_select(tp, parent, mode, okalloc); | ||
1326 | if (start_agno == NULLAGNUMBER) { | ||
1327 | *inop = NULLFSINO; | ||
1328 | return 0; | ||
1329 | } | ||
1330 | |||
1331 | /* | ||
1332 | * If we have already hit the ceiling of inode blocks then clear | ||
1333 | * okalloc so we scan all available agi structures for a free | ||
1334 | * inode. | ||
1335 | */ | ||
1336 | if (mp->m_maxicount && | ||
1337 | mp->m_sb.sb_icount + mp->m_ialloc_inos > mp->m_maxicount) { | ||
1338 | noroom = 1; | ||
1339 | okalloc = 0; | ||
1340 | } | ||
1341 | |||
1342 | /* | ||
1343 | * Loop until we find an allocation group that either has free inodes | ||
1344 | * or in which we can allocate some inodes. Iterate through the | ||
1345 | * allocation groups upward, wrapping at the end. | ||
1346 | */ | ||
1347 | agno = start_agno; | ||
1348 | for (;;) { | ||
1349 | pag = xfs_perag_get(mp, agno); | ||
1350 | if (!pag->pagi_inodeok) { | ||
1351 | xfs_ialloc_next_ag(mp); | ||
1352 | goto nextag; | ||
1353 | } | ||
1354 | |||
1355 | if (!pag->pagi_init) { | ||
1356 | error = xfs_ialloc_pagi_init(mp, tp, agno); | ||
1357 | if (error) | ||
1358 | goto out_error; | ||
1359 | } | ||
1360 | |||
1361 | /* | ||
1362 | * Do a first racy fast path check if this AG is usable. | ||
1363 | */ | ||
1364 | if (!pag->pagi_freecount && !okalloc) | ||
1365 | goto nextag; | ||
1366 | |||
1367 | /* | ||
1368 | * Then read in the AGI buffer and recheck with the AGI buffer | ||
1369 | * lock held. | ||
1370 | */ | ||
1371 | error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); | ||
1372 | if (error) | ||
1373 | goto out_error; | ||
1374 | |||
1375 | if (pag->pagi_freecount) { | ||
1376 | xfs_perag_put(pag); | ||
1377 | goto out_alloc; | ||
1378 | } | ||
1379 | |||
1380 | if (!okalloc) | ||
1381 | goto nextag_relse_buffer; | ||
1382 | |||
1383 | |||
1384 | error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced); | ||
1385 | if (error) { | ||
1386 | xfs_trans_brelse(tp, agbp); | ||
1387 | |||
1388 | if (error != -ENOSPC) | ||
1389 | goto out_error; | ||
1390 | |||
1391 | xfs_perag_put(pag); | ||
1392 | *inop = NULLFSINO; | ||
1393 | return 0; | ||
1394 | } | ||
1395 | |||
1396 | if (ialloced) { | ||
1397 | /* | ||
1398 | * We successfully allocated some inodes, return | ||
1399 | * the current context to the caller so that it | ||
1400 | * can commit the current transaction and call | ||
1401 | * us again where we left off. | ||
1402 | */ | ||
1403 | ASSERT(pag->pagi_freecount > 0); | ||
1404 | xfs_perag_put(pag); | ||
1405 | |||
1406 | *IO_agbp = agbp; | ||
1407 | *inop = NULLFSINO; | ||
1408 | return 0; | ||
1409 | } | ||
1410 | |||
1411 | nextag_relse_buffer: | ||
1412 | xfs_trans_brelse(tp, agbp); | ||
1413 | nextag: | ||
1414 | xfs_perag_put(pag); | ||
1415 | if (++agno == mp->m_sb.sb_agcount) | ||
1416 | agno = 0; | ||
1417 | if (agno == start_agno) { | ||
1418 | *inop = NULLFSINO; | ||
1419 | return noroom ? -ENOSPC : 0; | ||
1420 | } | ||
1421 | } | ||
1422 | |||
1423 | out_alloc: | ||
1424 | *IO_agbp = NULL; | ||
1425 | return xfs_dialloc_ag(tp, agbp, parent, inop); | ||
1426 | out_error: | ||
1427 | xfs_perag_put(pag); | ||
1428 | return error; | ||
1429 | } | ||
1430 | |||
1431 | STATIC int | ||
1432 | xfs_difree_inobt( | ||
1433 | struct xfs_mount *mp, | ||
1434 | struct xfs_trans *tp, | ||
1435 | struct xfs_buf *agbp, | ||
1436 | xfs_agino_t agino, | ||
1437 | struct xfs_bmap_free *flist, | ||
1438 | int *deleted, | ||
1439 | xfs_ino_t *first_ino, | ||
1440 | struct xfs_inobt_rec_incore *orec) | ||
1441 | { | ||
1442 | struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); | ||
1443 | xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); | ||
1444 | struct xfs_perag *pag; | ||
1445 | struct xfs_btree_cur *cur; | ||
1446 | struct xfs_inobt_rec_incore rec; | ||
1447 | int ilen; | ||
1448 | int error; | ||
1449 | int i; | ||
1450 | int off; | ||
1451 | |||
1452 | ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); | ||
1453 | ASSERT(XFS_AGINO_TO_AGBNO(mp, agino) < be32_to_cpu(agi->agi_length)); | ||
1454 | |||
1455 | /* | ||
1456 | * Initialize the cursor. | ||
1457 | */ | ||
1458 | cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO); | ||
1459 | |||
1460 | error = xfs_check_agi_freecount(cur, agi); | ||
1461 | if (error) | ||
1462 | goto error0; | ||
1463 | |||
1464 | /* | ||
1465 | * Look for the entry describing this inode. | ||
1466 | */ | ||
1467 | if ((error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i))) { | ||
1468 | xfs_warn(mp, "%s: xfs_inobt_lookup() returned error %d.", | ||
1469 | __func__, error); | ||
1470 | goto error0; | ||
1471 | } | ||
1472 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
1473 | error = xfs_inobt_get_rec(cur, &rec, &i); | ||
1474 | if (error) { | ||
1475 | xfs_warn(mp, "%s: xfs_inobt_get_rec() returned error %d.", | ||
1476 | __func__, error); | ||
1477 | goto error0; | ||
1478 | } | ||
1479 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
1480 | /* | ||
1481 | * Get the offset in the inode chunk. | ||
1482 | */ | ||
1483 | off = agino - rec.ir_startino; | ||
1484 | ASSERT(off >= 0 && off < XFS_INODES_PER_CHUNK); | ||
1485 | ASSERT(!(rec.ir_free & XFS_INOBT_MASK(off))); | ||
1486 | /* | ||
1487 | * Mark the inode free & increment the count. | ||
1488 | */ | ||
1489 | rec.ir_free |= XFS_INOBT_MASK(off); | ||
1490 | rec.ir_freecount++; | ||
1491 | |||
1492 | /* | ||
1493 | * When an inode cluster is free, it becomes eligible for removal | ||
1494 | */ | ||
1495 | if (!(mp->m_flags & XFS_MOUNT_IKEEP) && | ||
1496 | (rec.ir_freecount == mp->m_ialloc_inos)) { | ||
1497 | |||
1498 | *deleted = 1; | ||
1499 | *first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino); | ||
1500 | |||
1501 | /* | ||
1502 | * Remove the inode cluster from the AGI B+Tree, adjust the | ||
1503 | * AGI and Superblock inode counts, and mark the disk space | ||
1504 | * to be freed when the transaction is committed. | ||
1505 | */ | ||
1506 | ilen = mp->m_ialloc_inos; | ||
1507 | be32_add_cpu(&agi->agi_count, -ilen); | ||
1508 | be32_add_cpu(&agi->agi_freecount, -(ilen - 1)); | ||
1509 | xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT); | ||
1510 | pag = xfs_perag_get(mp, agno); | ||
1511 | pag->pagi_freecount -= ilen - 1; | ||
1512 | xfs_perag_put(pag); | ||
1513 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen); | ||
1514 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1)); | ||
1515 | |||
1516 | if ((error = xfs_btree_delete(cur, &i))) { | ||
1517 | xfs_warn(mp, "%s: xfs_btree_delete returned error %d.", | ||
1518 | __func__, error); | ||
1519 | goto error0; | ||
1520 | } | ||
1521 | |||
1522 | xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, agno, | ||
1523 | XFS_AGINO_TO_AGBNO(mp, rec.ir_startino)), | ||
1524 | mp->m_ialloc_blks, flist, mp); | ||
1525 | } else { | ||
1526 | *deleted = 0; | ||
1527 | |||
1528 | error = xfs_inobt_update(cur, &rec); | ||
1529 | if (error) { | ||
1530 | xfs_warn(mp, "%s: xfs_inobt_update returned error %d.", | ||
1531 | __func__, error); | ||
1532 | goto error0; | ||
1533 | } | ||
1534 | |||
1535 | /* | ||
1536 | * Change the inode free counts and log the ag/sb changes. | ||
1537 | */ | ||
1538 | be32_add_cpu(&agi->agi_freecount, 1); | ||
1539 | xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); | ||
1540 | pag = xfs_perag_get(mp, agno); | ||
1541 | pag->pagi_freecount++; | ||
1542 | xfs_perag_put(pag); | ||
1543 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1); | ||
1544 | } | ||
1545 | |||
1546 | error = xfs_check_agi_freecount(cur, agi); | ||
1547 | if (error) | ||
1548 | goto error0; | ||
1549 | |||
1550 | *orec = rec; | ||
1551 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); | ||
1552 | return 0; | ||
1553 | |||
1554 | error0: | ||
1555 | xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); | ||
1556 | return error; | ||
1557 | } | ||
1558 | |||
1559 | /* | ||
1560 | * Free an inode in the free inode btree. | ||
1561 | */ | ||
1562 | STATIC int | ||
1563 | xfs_difree_finobt( | ||
1564 | struct xfs_mount *mp, | ||
1565 | struct xfs_trans *tp, | ||
1566 | struct xfs_buf *agbp, | ||
1567 | xfs_agino_t agino, | ||
1568 | struct xfs_inobt_rec_incore *ibtrec) /* inobt record */ | ||
1569 | { | ||
1570 | struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); | ||
1571 | xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); | ||
1572 | struct xfs_btree_cur *cur; | ||
1573 | struct xfs_inobt_rec_incore rec; | ||
1574 | int offset = agino - ibtrec->ir_startino; | ||
1575 | int error; | ||
1576 | int i; | ||
1577 | |||
1578 | cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_FINO); | ||
1579 | |||
1580 | error = xfs_inobt_lookup(cur, ibtrec->ir_startino, XFS_LOOKUP_EQ, &i); | ||
1581 | if (error) | ||
1582 | goto error; | ||
1583 | if (i == 0) { | ||
1584 | /* | ||
1585 | * If the record does not exist in the finobt, we must have just | ||
1586 | * freed an inode in a previously fully allocated chunk. If not, | ||
1587 | * something is out of sync. | ||
1588 | */ | ||
1589 | XFS_WANT_CORRUPTED_GOTO(ibtrec->ir_freecount == 1, error); | ||
1590 | |||
1591 | error = xfs_inobt_insert_rec(cur, ibtrec->ir_freecount, | ||
1592 | ibtrec->ir_free, &i); | ||
1593 | if (error) | ||
1594 | goto error; | ||
1595 | ASSERT(i == 1); | ||
1596 | |||
1597 | goto out; | ||
1598 | } | ||
1599 | |||
1600 | /* | ||
1601 | * Read and update the existing record. We could just copy the ibtrec | ||
1602 | * across here, but that would defeat the purpose of having redundant | ||
1603 | * metadata. By making the modifications independently, we can catch | ||
1604 | * corruptions that we wouldn't see if we just copied from one record | ||
1605 | * to another. | ||
1606 | */ | ||
1607 | error = xfs_inobt_get_rec(cur, &rec, &i); | ||
1608 | if (error) | ||
1609 | goto error; | ||
1610 | XFS_WANT_CORRUPTED_GOTO(i == 1, error); | ||
1611 | |||
1612 | rec.ir_free |= XFS_INOBT_MASK(offset); | ||
1613 | rec.ir_freecount++; | ||
1614 | |||
1615 | XFS_WANT_CORRUPTED_GOTO((rec.ir_free == ibtrec->ir_free) && | ||
1616 | (rec.ir_freecount == ibtrec->ir_freecount), | ||
1617 | error); | ||
1618 | |||
1619 | /* | ||
1620 | * The content of inobt records should always match between the inobt | ||
1621 | * and finobt. The lifecycle of records in the finobt is different from | ||
1622 | * the inobt in that the finobt only tracks records with at least one | ||
1623 | * free inode. Hence, if all of the inodes are free and we aren't | ||
1624 | * keeping inode chunks permanently on disk, remove the record. | ||
1625 | * Otherwise, update the record with the new information. | ||
1626 | */ | ||
1627 | if (rec.ir_freecount == mp->m_ialloc_inos && | ||
1628 | !(mp->m_flags & XFS_MOUNT_IKEEP)) { | ||
1629 | error = xfs_btree_delete(cur, &i); | ||
1630 | if (error) | ||
1631 | goto error; | ||
1632 | ASSERT(i == 1); | ||
1633 | } else { | ||
1634 | error = xfs_inobt_update(cur, &rec); | ||
1635 | if (error) | ||
1636 | goto error; | ||
1637 | } | ||
1638 | |||
1639 | out: | ||
1640 | error = xfs_check_agi_freecount(cur, agi); | ||
1641 | if (error) | ||
1642 | goto error; | ||
1643 | |||
1644 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); | ||
1645 | return 0; | ||
1646 | |||
1647 | error: | ||
1648 | xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); | ||
1649 | return error; | ||
1650 | } | ||
1651 | |||
1652 | /* | ||
1653 | * Free disk inode. Carefully avoids touching the incore inode, all | ||
1654 | * manipulations incore are the caller's responsibility. | ||
1655 | * The on-disk inode is not changed by this operation, only the | ||
1656 | * btree (free inode mask) is changed. | ||
1657 | */ | ||
1658 | int | ||
1659 | xfs_difree( | ||
1660 | struct xfs_trans *tp, /* transaction pointer */ | ||
1661 | xfs_ino_t inode, /* inode to be freed */ | ||
1662 | struct xfs_bmap_free *flist, /* extents to free */ | ||
1663 | int *deleted,/* set if inode cluster was deleted */ | ||
1664 | xfs_ino_t *first_ino)/* first inode in deleted cluster */ | ||
1665 | { | ||
1666 | /* REFERENCED */ | ||
1667 | xfs_agblock_t agbno; /* block number containing inode */ | ||
1668 | struct xfs_buf *agbp; /* buffer for allocation group header */ | ||
1669 | xfs_agino_t agino; /* allocation group inode number */ | ||
1670 | xfs_agnumber_t agno; /* allocation group number */ | ||
1671 | int error; /* error return value */ | ||
1672 | struct xfs_mount *mp; /* mount structure for filesystem */ | ||
1673 | struct xfs_inobt_rec_incore rec;/* btree record */ | ||
1674 | |||
1675 | mp = tp->t_mountp; | ||
1676 | |||
1677 | /* | ||
1678 | * Break up inode number into its components. | ||
1679 | */ | ||
1680 | agno = XFS_INO_TO_AGNO(mp, inode); | ||
1681 | if (agno >= mp->m_sb.sb_agcount) { | ||
1682 | xfs_warn(mp, "%s: agno >= mp->m_sb.sb_agcount (%d >= %d).", | ||
1683 | __func__, agno, mp->m_sb.sb_agcount); | ||
1684 | ASSERT(0); | ||
1685 | return -EINVAL; | ||
1686 | } | ||
1687 | agino = XFS_INO_TO_AGINO(mp, inode); | ||
1688 | if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) { | ||
1689 | xfs_warn(mp, "%s: inode != XFS_AGINO_TO_INO() (%llu != %llu).", | ||
1690 | __func__, (unsigned long long)inode, | ||
1691 | (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino)); | ||
1692 | ASSERT(0); | ||
1693 | return -EINVAL; | ||
1694 | } | ||
1695 | agbno = XFS_AGINO_TO_AGBNO(mp, agino); | ||
1696 | if (agbno >= mp->m_sb.sb_agblocks) { | ||
1697 | xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).", | ||
1698 | __func__, agbno, mp->m_sb.sb_agblocks); | ||
1699 | ASSERT(0); | ||
1700 | return -EINVAL; | ||
1701 | } | ||
1702 | /* | ||
1703 | * Get the allocation group header. | ||
1704 | */ | ||
1705 | error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); | ||
1706 | if (error) { | ||
1707 | xfs_warn(mp, "%s: xfs_ialloc_read_agi() returned error %d.", | ||
1708 | __func__, error); | ||
1709 | return error; | ||
1710 | } | ||
1711 | |||
1712 | /* | ||
1713 | * Fix up the inode allocation btree. | ||
1714 | */ | ||
1715 | error = xfs_difree_inobt(mp, tp, agbp, agino, flist, deleted, first_ino, | ||
1716 | &rec); | ||
1717 | if (error) | ||
1718 | goto error0; | ||
1719 | |||
1720 | /* | ||
1721 | * Fix up the free inode btree. | ||
1722 | */ | ||
1723 | if (xfs_sb_version_hasfinobt(&mp->m_sb)) { | ||
1724 | error = xfs_difree_finobt(mp, tp, agbp, agino, &rec); | ||
1725 | if (error) | ||
1726 | goto error0; | ||
1727 | } | ||
1728 | |||
1729 | return 0; | ||
1730 | |||
1731 | error0: | ||
1732 | return error; | ||
1733 | } | ||
1734 | |||
1735 | STATIC int | ||
1736 | xfs_imap_lookup( | ||
1737 | struct xfs_mount *mp, | ||
1738 | struct xfs_trans *tp, | ||
1739 | xfs_agnumber_t agno, | ||
1740 | xfs_agino_t agino, | ||
1741 | xfs_agblock_t agbno, | ||
1742 | xfs_agblock_t *chunk_agbno, | ||
1743 | xfs_agblock_t *offset_agbno, | ||
1744 | int flags) | ||
1745 | { | ||
1746 | struct xfs_inobt_rec_incore rec; | ||
1747 | struct xfs_btree_cur *cur; | ||
1748 | struct xfs_buf *agbp; | ||
1749 | int error; | ||
1750 | int i; | ||
1751 | |||
1752 | error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); | ||
1753 | if (error) { | ||
1754 | xfs_alert(mp, | ||
1755 | "%s: xfs_ialloc_read_agi() returned error %d, agno %d", | ||
1756 | __func__, error, agno); | ||
1757 | return error; | ||
1758 | } | ||
1759 | |||
1760 | /* | ||
1761 | * Lookup the inode record for the given agino. If the record cannot be | ||
1762 | * found, then it's an invalid inode number and we should abort. Once | ||
1763 | * we have a record, we need to ensure it contains the inode number | ||
1764 | * we are looking up. | ||
1765 | */ | ||
1766 | cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO); | ||
1767 | error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i); | ||
1768 | if (!error) { | ||
1769 | if (i) | ||
1770 | error = xfs_inobt_get_rec(cur, &rec, &i); | ||
1771 | if (!error && i == 0) | ||
1772 | error = -EINVAL; | ||
1773 | } | ||
1774 | |||
1775 | xfs_trans_brelse(tp, agbp); | ||
1776 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); | ||
1777 | if (error) | ||
1778 | return error; | ||
1779 | |||
1780 | /* check that the returned record contains the required inode */ | ||
1781 | if (rec.ir_startino > agino || | ||
1782 | rec.ir_startino + mp->m_ialloc_inos <= agino) | ||
1783 | return -EINVAL; | ||
1784 | |||
1785 | /* for untrusted inodes check it is allocated first */ | ||
1786 | if ((flags & XFS_IGET_UNTRUSTED) && | ||
1787 | (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino))) | ||
1788 | return -EINVAL; | ||
1789 | |||
1790 | *chunk_agbno = XFS_AGINO_TO_AGBNO(mp, rec.ir_startino); | ||
1791 | *offset_agbno = agbno - *chunk_agbno; | ||
1792 | return 0; | ||
1793 | } | ||
1794 | |||
1795 | /* | ||
1796 | * Return the location of the inode in imap, for mapping it into a buffer. | ||
1797 | */ | ||
1798 | int | ||
1799 | xfs_imap( | ||
1800 | xfs_mount_t *mp, /* file system mount structure */ | ||
1801 | xfs_trans_t *tp, /* transaction pointer */ | ||
1802 | xfs_ino_t ino, /* inode to locate */ | ||
1803 | struct xfs_imap *imap, /* location map structure */ | ||
1804 | uint flags) /* flags for inode btree lookup */ | ||
1805 | { | ||
1806 | xfs_agblock_t agbno; /* block number of inode in the alloc group */ | ||
1807 | xfs_agino_t agino; /* inode number within alloc group */ | ||
1808 | xfs_agnumber_t agno; /* allocation group number */ | ||
1809 | int blks_per_cluster; /* num blocks per inode cluster */ | ||
1810 | xfs_agblock_t chunk_agbno; /* first block in inode chunk */ | ||
1811 | xfs_agblock_t cluster_agbno; /* first block in inode cluster */ | ||
1812 | int error; /* error code */ | ||
1813 | int offset; /* index of inode in its buffer */ | ||
1814 | xfs_agblock_t offset_agbno; /* blks from chunk start to inode */ | ||
1815 | |||
1816 | ASSERT(ino != NULLFSINO); | ||
1817 | |||
1818 | /* | ||
1819 | * Split up the inode number into its parts. | ||
1820 | */ | ||
1821 | agno = XFS_INO_TO_AGNO(mp, ino); | ||
1822 | agino = XFS_INO_TO_AGINO(mp, ino); | ||
1823 | agbno = XFS_AGINO_TO_AGBNO(mp, agino); | ||
1824 | if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks || | ||
1825 | ino != XFS_AGINO_TO_INO(mp, agno, agino)) { | ||
1826 | #ifdef DEBUG | ||
1827 | /* | ||
1828 | * Don't output diagnostic information for untrusted inodes | ||
1829 | * as they can be invalid without implying corruption. | ||
1830 | */ | ||
1831 | if (flags & XFS_IGET_UNTRUSTED) | ||
1832 | return -EINVAL; | ||
1833 | if (agno >= mp->m_sb.sb_agcount) { | ||
1834 | xfs_alert(mp, | ||
1835 | "%s: agno (%d) >= mp->m_sb.sb_agcount (%d)", | ||
1836 | __func__, agno, mp->m_sb.sb_agcount); | ||
1837 | } | ||
1838 | if (agbno >= mp->m_sb.sb_agblocks) { | ||
1839 | xfs_alert(mp, | ||
1840 | "%s: agbno (0x%llx) >= mp->m_sb.sb_agblocks (0x%lx)", | ||
1841 | __func__, (unsigned long long)agbno, | ||
1842 | (unsigned long)mp->m_sb.sb_agblocks); | ||
1843 | } | ||
1844 | if (ino != XFS_AGINO_TO_INO(mp, agno, agino)) { | ||
1845 | xfs_alert(mp, | ||
1846 | "%s: ino (0x%llx) != XFS_AGINO_TO_INO() (0x%llx)", | ||
1847 | __func__, ino, | ||
1848 | XFS_AGINO_TO_INO(mp, agno, agino)); | ||
1849 | } | ||
1850 | xfs_stack_trace(); | ||
1851 | #endif /* DEBUG */ | ||
1852 | return -EINVAL; | ||
1853 | } | ||
1854 | |||
1855 | blks_per_cluster = xfs_icluster_size_fsb(mp); | ||
1856 | |||
1857 | /* | ||
1858 | * For bulkstat and handle lookups, we have an untrusted inode number | ||
1859 | * that we have to verify is valid. We cannot do this just by reading | ||
1860 | * the inode buffer as it may have been unlinked and removed leaving | ||
1861 | * inodes in stale state on disk. Hence we have to do a btree lookup | ||
1862 | * in all cases where an untrusted inode number is passed. | ||
1863 | */ | ||
1864 | if (flags & XFS_IGET_UNTRUSTED) { | ||
1865 | error = xfs_imap_lookup(mp, tp, agno, agino, agbno, | ||
1866 | &chunk_agbno, &offset_agbno, flags); | ||
1867 | if (error) | ||
1868 | return error; | ||
1869 | goto out_map; | ||
1870 | } | ||
1871 | |||
1872 | /* | ||
1873 | * If the inode cluster size is the same as the blocksize or | ||
1874 | * smaller we get to the buffer by simple arithmetics. | ||
1875 | */ | ||
1876 | if (blks_per_cluster == 1) { | ||
1877 | offset = XFS_INO_TO_OFFSET(mp, ino); | ||
1878 | ASSERT(offset < mp->m_sb.sb_inopblock); | ||
1879 | |||
1880 | imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, agbno); | ||
1881 | imap->im_len = XFS_FSB_TO_BB(mp, 1); | ||
1882 | imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog); | ||
1883 | return 0; | ||
1884 | } | ||
1885 | |||
1886 | /* | ||
1887 | * If the inode chunks are aligned then use simple maths to | ||
1888 | * find the location. Otherwise we have to do a btree | ||
1889 | * lookup to find the location. | ||
1890 | */ | ||
1891 | if (mp->m_inoalign_mask) { | ||
1892 | offset_agbno = agbno & mp->m_inoalign_mask; | ||
1893 | chunk_agbno = agbno - offset_agbno; | ||
1894 | } else { | ||
1895 | error = xfs_imap_lookup(mp, tp, agno, agino, agbno, | ||
1896 | &chunk_agbno, &offset_agbno, flags); | ||
1897 | if (error) | ||
1898 | return error; | ||
1899 | } | ||
1900 | |||
1901 | out_map: | ||
1902 | ASSERT(agbno >= chunk_agbno); | ||
1903 | cluster_agbno = chunk_agbno + | ||
1904 | ((offset_agbno / blks_per_cluster) * blks_per_cluster); | ||
1905 | offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) + | ||
1906 | XFS_INO_TO_OFFSET(mp, ino); | ||
1907 | |||
1908 | imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, cluster_agbno); | ||
1909 | imap->im_len = XFS_FSB_TO_BB(mp, blks_per_cluster); | ||
1910 | imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog); | ||
1911 | |||
1912 | /* | ||
1913 | * If the inode number maps to a block outside the bounds | ||
1914 | * of the file system then return NULL rather than calling | ||
1915 | * read_buf and panicing when we get an error from the | ||
1916 | * driver. | ||
1917 | */ | ||
1918 | if ((imap->im_blkno + imap->im_len) > | ||
1919 | XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { | ||
1920 | xfs_alert(mp, | ||
1921 | "%s: (im_blkno (0x%llx) + im_len (0x%llx)) > sb_dblocks (0x%llx)", | ||
1922 | __func__, (unsigned long long) imap->im_blkno, | ||
1923 | (unsigned long long) imap->im_len, | ||
1924 | XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); | ||
1925 | return -EINVAL; | ||
1926 | } | ||
1927 | return 0; | ||
1928 | } | ||
1929 | |||
1930 | /* | ||
1931 | * Compute and fill in value of m_in_maxlevels. | ||
1932 | */ | ||
1933 | void | ||
1934 | xfs_ialloc_compute_maxlevels( | ||
1935 | xfs_mount_t *mp) /* file system mount structure */ | ||
1936 | { | ||
1937 | int level; | ||
1938 | uint maxblocks; | ||
1939 | uint maxleafents; | ||
1940 | int minleafrecs; | ||
1941 | int minnoderecs; | ||
1942 | |||
1943 | maxleafents = (1LL << XFS_INO_AGINO_BITS(mp)) >> | ||
1944 | XFS_INODES_PER_CHUNK_LOG; | ||
1945 | minleafrecs = mp->m_alloc_mnr[0]; | ||
1946 | minnoderecs = mp->m_alloc_mnr[1]; | ||
1947 | maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs; | ||
1948 | for (level = 1; maxblocks > 1; level++) | ||
1949 | maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs; | ||
1950 | mp->m_in_maxlevels = level; | ||
1951 | } | ||
1952 | |||
1953 | /* | ||
1954 | * Log specified fields for the ag hdr (inode section). The growth of the agi | ||
1955 | * structure over time requires that we interpret the buffer as two logical | ||
1956 | * regions delineated by the end of the unlinked list. This is due to the size | ||
1957 | * of the hash table and its location in the middle of the agi. | ||
1958 | * | ||
1959 | * For example, a request to log a field before agi_unlinked and a field after | ||
1960 | * agi_unlinked could cause us to log the entire hash table and use an excessive | ||
1961 | * amount of log space. To avoid this behavior, log the region up through | ||
1962 | * agi_unlinked in one call and the region after agi_unlinked through the end of | ||
1963 | * the structure in another. | ||
1964 | */ | ||
1965 | void | ||
1966 | xfs_ialloc_log_agi( | ||
1967 | xfs_trans_t *tp, /* transaction pointer */ | ||
1968 | xfs_buf_t *bp, /* allocation group header buffer */ | ||
1969 | int fields) /* bitmask of fields to log */ | ||
1970 | { | ||
1971 | int first; /* first byte number */ | ||
1972 | int last; /* last byte number */ | ||
1973 | static const short offsets[] = { /* field starting offsets */ | ||
1974 | /* keep in sync with bit definitions */ | ||
1975 | offsetof(xfs_agi_t, agi_magicnum), | ||
1976 | offsetof(xfs_agi_t, agi_versionnum), | ||
1977 | offsetof(xfs_agi_t, agi_seqno), | ||
1978 | offsetof(xfs_agi_t, agi_length), | ||
1979 | offsetof(xfs_agi_t, agi_count), | ||
1980 | offsetof(xfs_agi_t, agi_root), | ||
1981 | offsetof(xfs_agi_t, agi_level), | ||
1982 | offsetof(xfs_agi_t, agi_freecount), | ||
1983 | offsetof(xfs_agi_t, agi_newino), | ||
1984 | offsetof(xfs_agi_t, agi_dirino), | ||
1985 | offsetof(xfs_agi_t, agi_unlinked), | ||
1986 | offsetof(xfs_agi_t, agi_free_root), | ||
1987 | offsetof(xfs_agi_t, agi_free_level), | ||
1988 | sizeof(xfs_agi_t) | ||
1989 | }; | ||
1990 | #ifdef DEBUG | ||
1991 | xfs_agi_t *agi; /* allocation group header */ | ||
1992 | |||
1993 | agi = XFS_BUF_TO_AGI(bp); | ||
1994 | ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); | ||
1995 | #endif | ||
1996 | |||
1997 | xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGI_BUF); | ||
1998 | |||
1999 | /* | ||
2000 | * Compute byte offsets for the first and last fields in the first | ||
2001 | * region and log the agi buffer. This only logs up through | ||
2002 | * agi_unlinked. | ||
2003 | */ | ||
2004 | if (fields & XFS_AGI_ALL_BITS_R1) { | ||
2005 | xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS_R1, | ||
2006 | &first, &last); | ||
2007 | xfs_trans_log_buf(tp, bp, first, last); | ||
2008 | } | ||
2009 | |||
2010 | /* | ||
2011 | * Mask off the bits in the first region and calculate the first and | ||
2012 | * last field offsets for any bits in the second region. | ||
2013 | */ | ||
2014 | fields &= ~XFS_AGI_ALL_BITS_R1; | ||
2015 | if (fields) { | ||
2016 | xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS_R2, | ||
2017 | &first, &last); | ||
2018 | xfs_trans_log_buf(tp, bp, first, last); | ||
2019 | } | ||
2020 | } | ||
2021 | |||
2022 | #ifdef DEBUG | ||
2023 | STATIC void | ||
2024 | xfs_check_agi_unlinked( | ||
2025 | struct xfs_agi *agi) | ||
2026 | { | ||
2027 | int i; | ||
2028 | |||
2029 | for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) | ||
2030 | ASSERT(agi->agi_unlinked[i]); | ||
2031 | } | ||
2032 | #else | ||
2033 | #define xfs_check_agi_unlinked(agi) | ||
2034 | #endif | ||
2035 | |||
2036 | static bool | ||
2037 | xfs_agi_verify( | ||
2038 | struct xfs_buf *bp) | ||
2039 | { | ||
2040 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
2041 | struct xfs_agi *agi = XFS_BUF_TO_AGI(bp); | ||
2042 | |||
2043 | if (xfs_sb_version_hascrc(&mp->m_sb) && | ||
2044 | !uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_uuid)) | ||
2045 | return false; | ||
2046 | /* | ||
2047 | * Validate the magic number of the agi block. | ||
2048 | */ | ||
2049 | if (agi->agi_magicnum != cpu_to_be32(XFS_AGI_MAGIC)) | ||
2050 | return false; | ||
2051 | if (!XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum))) | ||
2052 | return false; | ||
2053 | |||
2054 | /* | ||
2055 | * during growfs operations, the perag is not fully initialised, | ||
2056 | * so we can't use it for any useful checking. growfs ensures we can't | ||
2057 | * use it by using uncached buffers that don't have the perag attached | ||
2058 | * so we can detect and avoid this problem. | ||
2059 | */ | ||
2060 | if (bp->b_pag && be32_to_cpu(agi->agi_seqno) != bp->b_pag->pag_agno) | ||
2061 | return false; | ||
2062 | |||
2063 | xfs_check_agi_unlinked(agi); | ||
2064 | return true; | ||
2065 | } | ||
2066 | |||
2067 | static void | ||
2068 | xfs_agi_read_verify( | ||
2069 | struct xfs_buf *bp) | ||
2070 | { | ||
2071 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
2072 | |||
2073 | if (xfs_sb_version_hascrc(&mp->m_sb) && | ||
2074 | !xfs_buf_verify_cksum(bp, XFS_AGI_CRC_OFF)) | ||
2075 | xfs_buf_ioerror(bp, -EFSBADCRC); | ||
2076 | else if (XFS_TEST_ERROR(!xfs_agi_verify(bp), mp, | ||
2077 | XFS_ERRTAG_IALLOC_READ_AGI, | ||
2078 | XFS_RANDOM_IALLOC_READ_AGI)) | ||
2079 | xfs_buf_ioerror(bp, -EFSCORRUPTED); | ||
2080 | |||
2081 | if (bp->b_error) | ||
2082 | xfs_verifier_error(bp); | ||
2083 | } | ||
2084 | |||
2085 | static void | ||
2086 | xfs_agi_write_verify( | ||
2087 | struct xfs_buf *bp) | ||
2088 | { | ||
2089 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
2090 | struct xfs_buf_log_item *bip = bp->b_fspriv; | ||
2091 | |||
2092 | if (!xfs_agi_verify(bp)) { | ||
2093 | xfs_buf_ioerror(bp, -EFSCORRUPTED); | ||
2094 | xfs_verifier_error(bp); | ||
2095 | return; | ||
2096 | } | ||
2097 | |||
2098 | if (!xfs_sb_version_hascrc(&mp->m_sb)) | ||
2099 | return; | ||
2100 | |||
2101 | if (bip) | ||
2102 | XFS_BUF_TO_AGI(bp)->agi_lsn = cpu_to_be64(bip->bli_item.li_lsn); | ||
2103 | xfs_buf_update_cksum(bp, XFS_AGI_CRC_OFF); | ||
2104 | } | ||
2105 | |||
2106 | const struct xfs_buf_ops xfs_agi_buf_ops = { | ||
2107 | .verify_read = xfs_agi_read_verify, | ||
2108 | .verify_write = xfs_agi_write_verify, | ||
2109 | }; | ||
2110 | |||
2111 | /* | ||
2112 | * Read in the allocation group header (inode allocation section) | ||
2113 | */ | ||
2114 | int | ||
2115 | xfs_read_agi( | ||
2116 | struct xfs_mount *mp, /* file system mount structure */ | ||
2117 | struct xfs_trans *tp, /* transaction pointer */ | ||
2118 | xfs_agnumber_t agno, /* allocation group number */ | ||
2119 | struct xfs_buf **bpp) /* allocation group hdr buf */ | ||
2120 | { | ||
2121 | int error; | ||
2122 | |||
2123 | trace_xfs_read_agi(mp, agno); | ||
2124 | |||
2125 | ASSERT(agno != NULLAGNUMBER); | ||
2126 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, | ||
2127 | XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), | ||
2128 | XFS_FSS_TO_BB(mp, 1), 0, bpp, &xfs_agi_buf_ops); | ||
2129 | if (error) | ||
2130 | return error; | ||
2131 | |||
2132 | xfs_buf_set_ref(*bpp, XFS_AGI_REF); | ||
2133 | return 0; | ||
2134 | } | ||
2135 | |||
2136 | int | ||
2137 | xfs_ialloc_read_agi( | ||
2138 | struct xfs_mount *mp, /* file system mount structure */ | ||
2139 | struct xfs_trans *tp, /* transaction pointer */ | ||
2140 | xfs_agnumber_t agno, /* allocation group number */ | ||
2141 | struct xfs_buf **bpp) /* allocation group hdr buf */ | ||
2142 | { | ||
2143 | struct xfs_agi *agi; /* allocation group header */ | ||
2144 | struct xfs_perag *pag; /* per allocation group data */ | ||
2145 | int error; | ||
2146 | |||
2147 | trace_xfs_ialloc_read_agi(mp, agno); | ||
2148 | |||
2149 | error = xfs_read_agi(mp, tp, agno, bpp); | ||
2150 | if (error) | ||
2151 | return error; | ||
2152 | |||
2153 | agi = XFS_BUF_TO_AGI(*bpp); | ||
2154 | pag = xfs_perag_get(mp, agno); | ||
2155 | if (!pag->pagi_init) { | ||
2156 | pag->pagi_freecount = be32_to_cpu(agi->agi_freecount); | ||
2157 | pag->pagi_count = be32_to_cpu(agi->agi_count); | ||
2158 | pag->pagi_init = 1; | ||
2159 | } | ||
2160 | |||
2161 | /* | ||
2162 | * It's possible for these to be out of sync if | ||
2163 | * we are in the middle of a forced shutdown. | ||
2164 | */ | ||
2165 | ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) || | ||
2166 | XFS_FORCED_SHUTDOWN(mp)); | ||
2167 | xfs_perag_put(pag); | ||
2168 | return 0; | ||
2169 | } | ||
2170 | |||
2171 | /* | ||
2172 | * Read in the agi to initialise the per-ag data in the mount structure | ||
2173 | */ | ||
2174 | int | ||
2175 | xfs_ialloc_pagi_init( | ||
2176 | xfs_mount_t *mp, /* file system mount structure */ | ||
2177 | xfs_trans_t *tp, /* transaction pointer */ | ||
2178 | xfs_agnumber_t agno) /* allocation group number */ | ||
2179 | { | ||
2180 | xfs_buf_t *bp = NULL; | ||
2181 | int error; | ||
2182 | |||
2183 | error = xfs_ialloc_read_agi(mp, tp, agno, &bp); | ||
2184 | if (error) | ||
2185 | return error; | ||
2186 | if (bp) | ||
2187 | xfs_trans_brelse(tp, bp); | ||
2188 | return 0; | ||
2189 | } | ||
diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h new file mode 100644 index 000000000000..95ad1c002d60 --- /dev/null +++ b/fs/xfs/libxfs/xfs_ialloc.h | |||
@@ -0,0 +1,163 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_IALLOC_H__ | ||
19 | #define __XFS_IALLOC_H__ | ||
20 | |||
21 | struct xfs_buf; | ||
22 | struct xfs_dinode; | ||
23 | struct xfs_imap; | ||
24 | struct xfs_mount; | ||
25 | struct xfs_trans; | ||
26 | struct xfs_btree_cur; | ||
27 | |||
28 | /* Move inodes in clusters of this size */ | ||
29 | #define XFS_INODE_BIG_CLUSTER_SIZE 8192 | ||
30 | |||
31 | /* Calculate and return the number of filesystem blocks per inode cluster */ | ||
32 | static inline int | ||
33 | xfs_icluster_size_fsb( | ||
34 | struct xfs_mount *mp) | ||
35 | { | ||
36 | if (mp->m_sb.sb_blocksize >= mp->m_inode_cluster_size) | ||
37 | return 1; | ||
38 | return mp->m_inode_cluster_size >> mp->m_sb.sb_blocklog; | ||
39 | } | ||
40 | |||
41 | /* | ||
42 | * Make an inode pointer out of the buffer/offset. | ||
43 | */ | ||
44 | static inline struct xfs_dinode * | ||
45 | xfs_make_iptr(struct xfs_mount *mp, struct xfs_buf *b, int o) | ||
46 | { | ||
47 | return (struct xfs_dinode *) | ||
48 | (xfs_buf_offset(b, o << (mp)->m_sb.sb_inodelog)); | ||
49 | } | ||
50 | |||
51 | /* | ||
52 | * Allocate an inode on disk. | ||
53 | * Mode is used to tell whether the new inode will need space, and whether | ||
54 | * it is a directory. | ||
55 | * | ||
56 | * To work within the constraint of one allocation per transaction, | ||
57 | * xfs_dialloc() is designed to be called twice if it has to do an | ||
58 | * allocation to make more free inodes. If an inode is | ||
59 | * available without an allocation, agbp would be set to the current | ||
60 | * agbp and alloc_done set to false. | ||
61 | * If an allocation needed to be done, agbp would be set to the | ||
62 | * inode header of the allocation group and alloc_done set to true. | ||
63 | * The caller should then commit the current transaction and allocate a new | ||
64 | * transaction. xfs_dialloc() should then be called again with | ||
65 | * the agbp value returned from the previous call. | ||
66 | * | ||
67 | * Once we successfully pick an inode its number is returned and the | ||
68 | * on-disk data structures are updated. The inode itself is not read | ||
69 | * in, since doing so would break ordering constraints with xfs_reclaim. | ||
70 | * | ||
71 | * *agbp should be set to NULL on the first call, *alloc_done set to FALSE. | ||
72 | */ | ||
73 | int /* error */ | ||
74 | xfs_dialloc( | ||
75 | struct xfs_trans *tp, /* transaction pointer */ | ||
76 | xfs_ino_t parent, /* parent inode (directory) */ | ||
77 | umode_t mode, /* mode bits for new inode */ | ||
78 | int okalloc, /* ok to allocate more space */ | ||
79 | struct xfs_buf **agbp, /* buf for a.g. inode header */ | ||
80 | xfs_ino_t *inop); /* inode number allocated */ | ||
81 | |||
82 | /* | ||
83 | * Free disk inode. Carefully avoids touching the incore inode, all | ||
84 | * manipulations incore are the caller's responsibility. | ||
85 | * The on-disk inode is not changed by this operation, only the | ||
86 | * btree (free inode mask) is changed. | ||
87 | */ | ||
88 | int /* error */ | ||
89 | xfs_difree( | ||
90 | struct xfs_trans *tp, /* transaction pointer */ | ||
91 | xfs_ino_t inode, /* inode to be freed */ | ||
92 | struct xfs_bmap_free *flist, /* extents to free */ | ||
93 | int *deleted, /* set if inode cluster was deleted */ | ||
94 | xfs_ino_t *first_ino); /* first inode in deleted cluster */ | ||
95 | |||
96 | /* | ||
97 | * Return the location of the inode in imap, for mapping it into a buffer. | ||
98 | */ | ||
99 | int | ||
100 | xfs_imap( | ||
101 | struct xfs_mount *mp, /* file system mount structure */ | ||
102 | struct xfs_trans *tp, /* transaction pointer */ | ||
103 | xfs_ino_t ino, /* inode to locate */ | ||
104 | struct xfs_imap *imap, /* location map structure */ | ||
105 | uint flags); /* flags for inode btree lookup */ | ||
106 | |||
107 | /* | ||
108 | * Compute and fill in value of m_in_maxlevels. | ||
109 | */ | ||
110 | void | ||
111 | xfs_ialloc_compute_maxlevels( | ||
112 | struct xfs_mount *mp); /* file system mount structure */ | ||
113 | |||
114 | /* | ||
115 | * Log specified fields for the ag hdr (inode section) | ||
116 | */ | ||
117 | void | ||
118 | xfs_ialloc_log_agi( | ||
119 | struct xfs_trans *tp, /* transaction pointer */ | ||
120 | struct xfs_buf *bp, /* allocation group header buffer */ | ||
121 | int fields); /* bitmask of fields to log */ | ||
122 | |||
123 | /* | ||
124 | * Read in the allocation group header (inode allocation section) | ||
125 | */ | ||
126 | int /* error */ | ||
127 | xfs_ialloc_read_agi( | ||
128 | struct xfs_mount *mp, /* file system mount structure */ | ||
129 | struct xfs_trans *tp, /* transaction pointer */ | ||
130 | xfs_agnumber_t agno, /* allocation group number */ | ||
131 | struct xfs_buf **bpp); /* allocation group hdr buf */ | ||
132 | |||
133 | /* | ||
134 | * Read in the allocation group header to initialise the per-ag data | ||
135 | * in the mount structure | ||
136 | */ | ||
137 | int | ||
138 | xfs_ialloc_pagi_init( | ||
139 | struct xfs_mount *mp, /* file system mount structure */ | ||
140 | struct xfs_trans *tp, /* transaction pointer */ | ||
141 | xfs_agnumber_t agno); /* allocation group number */ | ||
142 | |||
143 | /* | ||
144 | * Lookup a record by ino in the btree given by cur. | ||
145 | */ | ||
146 | int xfs_inobt_lookup(struct xfs_btree_cur *cur, xfs_agino_t ino, | ||
147 | xfs_lookup_t dir, int *stat); | ||
148 | |||
149 | /* | ||
150 | * Get the data from the pointed-to record. | ||
151 | */ | ||
152 | int xfs_inobt_get_rec(struct xfs_btree_cur *cur, | ||
153 | xfs_inobt_rec_incore_t *rec, int *stat); | ||
154 | |||
155 | /* | ||
156 | * Inode chunk initialisation routine | ||
157 | */ | ||
158 | int xfs_ialloc_inode_init(struct xfs_mount *mp, struct xfs_trans *tp, | ||
159 | struct list_head *buffer_list, | ||
160 | xfs_agnumber_t agno, xfs_agblock_t agbno, | ||
161 | xfs_agblock_t length, unsigned int gen); | ||
162 | |||
163 | #endif /* __XFS_IALLOC_H__ */ | ||
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c new file mode 100644 index 000000000000..c9b06f30fe86 --- /dev/null +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c | |||
@@ -0,0 +1,422 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #include "xfs.h" | ||
19 | #include "xfs_fs.h" | ||
20 | #include "xfs_shared.h" | ||
21 | #include "xfs_format.h" | ||
22 | #include "xfs_log_format.h" | ||
23 | #include "xfs_trans_resv.h" | ||
24 | #include "xfs_bit.h" | ||
25 | #include "xfs_sb.h" | ||
26 | #include "xfs_ag.h" | ||
27 | #include "xfs_mount.h" | ||
28 | #include "xfs_inode.h" | ||
29 | #include "xfs_btree.h" | ||
30 | #include "xfs_ialloc.h" | ||
31 | #include "xfs_ialloc_btree.h" | ||
32 | #include "xfs_alloc.h" | ||
33 | #include "xfs_error.h" | ||
34 | #include "xfs_trace.h" | ||
35 | #include "xfs_cksum.h" | ||
36 | #include "xfs_trans.h" | ||
37 | |||
38 | |||
39 | STATIC int | ||
40 | xfs_inobt_get_minrecs( | ||
41 | struct xfs_btree_cur *cur, | ||
42 | int level) | ||
43 | { | ||
44 | return cur->bc_mp->m_inobt_mnr[level != 0]; | ||
45 | } | ||
46 | |||
47 | STATIC struct xfs_btree_cur * | ||
48 | xfs_inobt_dup_cursor( | ||
49 | struct xfs_btree_cur *cur) | ||
50 | { | ||
51 | return xfs_inobt_init_cursor(cur->bc_mp, cur->bc_tp, | ||
52 | cur->bc_private.a.agbp, cur->bc_private.a.agno, | ||
53 | cur->bc_btnum); | ||
54 | } | ||
55 | |||
56 | STATIC void | ||
57 | xfs_inobt_set_root( | ||
58 | struct xfs_btree_cur *cur, | ||
59 | union xfs_btree_ptr *nptr, | ||
60 | int inc) /* level change */ | ||
61 | { | ||
62 | struct xfs_buf *agbp = cur->bc_private.a.agbp; | ||
63 | struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); | ||
64 | |||
65 | agi->agi_root = nptr->s; | ||
66 | be32_add_cpu(&agi->agi_level, inc); | ||
67 | xfs_ialloc_log_agi(cur->bc_tp, agbp, XFS_AGI_ROOT | XFS_AGI_LEVEL); | ||
68 | } | ||
69 | |||
70 | STATIC void | ||
71 | xfs_finobt_set_root( | ||
72 | struct xfs_btree_cur *cur, | ||
73 | union xfs_btree_ptr *nptr, | ||
74 | int inc) /* level change */ | ||
75 | { | ||
76 | struct xfs_buf *agbp = cur->bc_private.a.agbp; | ||
77 | struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); | ||
78 | |||
79 | agi->agi_free_root = nptr->s; | ||
80 | be32_add_cpu(&agi->agi_free_level, inc); | ||
81 | xfs_ialloc_log_agi(cur->bc_tp, agbp, | ||
82 | XFS_AGI_FREE_ROOT | XFS_AGI_FREE_LEVEL); | ||
83 | } | ||
84 | |||
85 | STATIC int | ||
86 | xfs_inobt_alloc_block( | ||
87 | struct xfs_btree_cur *cur, | ||
88 | union xfs_btree_ptr *start, | ||
89 | union xfs_btree_ptr *new, | ||
90 | int *stat) | ||
91 | { | ||
92 | xfs_alloc_arg_t args; /* block allocation args */ | ||
93 | int error; /* error return value */ | ||
94 | xfs_agblock_t sbno = be32_to_cpu(start->s); | ||
95 | |||
96 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); | ||
97 | |||
98 | memset(&args, 0, sizeof(args)); | ||
99 | args.tp = cur->bc_tp; | ||
100 | args.mp = cur->bc_mp; | ||
101 | args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, sbno); | ||
102 | args.minlen = 1; | ||
103 | args.maxlen = 1; | ||
104 | args.prod = 1; | ||
105 | args.type = XFS_ALLOCTYPE_NEAR_BNO; | ||
106 | |||
107 | error = xfs_alloc_vextent(&args); | ||
108 | if (error) { | ||
109 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); | ||
110 | return error; | ||
111 | } | ||
112 | if (args.fsbno == NULLFSBLOCK) { | ||
113 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | ||
114 | *stat = 0; | ||
115 | return 0; | ||
116 | } | ||
117 | ASSERT(args.len == 1); | ||
118 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | ||
119 | |||
120 | new->s = cpu_to_be32(XFS_FSB_TO_AGBNO(args.mp, args.fsbno)); | ||
121 | *stat = 1; | ||
122 | return 0; | ||
123 | } | ||
124 | |||
125 | STATIC int | ||
126 | xfs_inobt_free_block( | ||
127 | struct xfs_btree_cur *cur, | ||
128 | struct xfs_buf *bp) | ||
129 | { | ||
130 | xfs_fsblock_t fsbno; | ||
131 | int error; | ||
132 | |||
133 | fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)); | ||
134 | error = xfs_free_extent(cur->bc_tp, fsbno, 1); | ||
135 | if (error) | ||
136 | return error; | ||
137 | |||
138 | xfs_trans_binval(cur->bc_tp, bp); | ||
139 | return error; | ||
140 | } | ||
141 | |||
142 | STATIC int | ||
143 | xfs_inobt_get_maxrecs( | ||
144 | struct xfs_btree_cur *cur, | ||
145 | int level) | ||
146 | { | ||
147 | return cur->bc_mp->m_inobt_mxr[level != 0]; | ||
148 | } | ||
149 | |||
150 | STATIC void | ||
151 | xfs_inobt_init_key_from_rec( | ||
152 | union xfs_btree_key *key, | ||
153 | union xfs_btree_rec *rec) | ||
154 | { | ||
155 | key->inobt.ir_startino = rec->inobt.ir_startino; | ||
156 | } | ||
157 | |||
158 | STATIC void | ||
159 | xfs_inobt_init_rec_from_key( | ||
160 | union xfs_btree_key *key, | ||
161 | union xfs_btree_rec *rec) | ||
162 | { | ||
163 | rec->inobt.ir_startino = key->inobt.ir_startino; | ||
164 | } | ||
165 | |||
166 | STATIC void | ||
167 | xfs_inobt_init_rec_from_cur( | ||
168 | struct xfs_btree_cur *cur, | ||
169 | union xfs_btree_rec *rec) | ||
170 | { | ||
171 | rec->inobt.ir_startino = cpu_to_be32(cur->bc_rec.i.ir_startino); | ||
172 | rec->inobt.ir_freecount = cpu_to_be32(cur->bc_rec.i.ir_freecount); | ||
173 | rec->inobt.ir_free = cpu_to_be64(cur->bc_rec.i.ir_free); | ||
174 | } | ||
175 | |||
176 | /* | ||
177 | * initial value of ptr for lookup | ||
178 | */ | ||
179 | STATIC void | ||
180 | xfs_inobt_init_ptr_from_cur( | ||
181 | struct xfs_btree_cur *cur, | ||
182 | union xfs_btree_ptr *ptr) | ||
183 | { | ||
184 | struct xfs_agi *agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp); | ||
185 | |||
186 | ASSERT(cur->bc_private.a.agno == be32_to_cpu(agi->agi_seqno)); | ||
187 | |||
188 | ptr->s = agi->agi_root; | ||
189 | } | ||
190 | |||
191 | STATIC void | ||
192 | xfs_finobt_init_ptr_from_cur( | ||
193 | struct xfs_btree_cur *cur, | ||
194 | union xfs_btree_ptr *ptr) | ||
195 | { | ||
196 | struct xfs_agi *agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp); | ||
197 | |||
198 | ASSERT(cur->bc_private.a.agno == be32_to_cpu(agi->agi_seqno)); | ||
199 | ptr->s = agi->agi_free_root; | ||
200 | } | ||
201 | |||
202 | STATIC __int64_t | ||
203 | xfs_inobt_key_diff( | ||
204 | struct xfs_btree_cur *cur, | ||
205 | union xfs_btree_key *key) | ||
206 | { | ||
207 | return (__int64_t)be32_to_cpu(key->inobt.ir_startino) - | ||
208 | cur->bc_rec.i.ir_startino; | ||
209 | } | ||
210 | |||
211 | static int | ||
212 | xfs_inobt_verify( | ||
213 | struct xfs_buf *bp) | ||
214 | { | ||
215 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
216 | struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); | ||
217 | struct xfs_perag *pag = bp->b_pag; | ||
218 | unsigned int level; | ||
219 | |||
220 | /* | ||
221 | * During growfs operations, we can't verify the exact owner as the | ||
222 | * perag is not fully initialised and hence not attached to the buffer. | ||
223 | * | ||
224 | * Similarly, during log recovery we will have a perag structure | ||
225 | * attached, but the agi information will not yet have been initialised | ||
226 | * from the on disk AGI. We don't currently use any of this information, | ||
227 | * but beware of the landmine (i.e. need to check pag->pagi_init) if we | ||
228 | * ever do. | ||
229 | */ | ||
230 | switch (block->bb_magic) { | ||
231 | case cpu_to_be32(XFS_IBT_CRC_MAGIC): | ||
232 | case cpu_to_be32(XFS_FIBT_CRC_MAGIC): | ||
233 | if (!xfs_sb_version_hascrc(&mp->m_sb)) | ||
234 | return false; | ||
235 | if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid)) | ||
236 | return false; | ||
237 | if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn)) | ||
238 | return false; | ||
239 | if (pag && | ||
240 | be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno) | ||
241 | return false; | ||
242 | /* fall through */ | ||
243 | case cpu_to_be32(XFS_IBT_MAGIC): | ||
244 | case cpu_to_be32(XFS_FIBT_MAGIC): | ||
245 | break; | ||
246 | default: | ||
247 | return 0; | ||
248 | } | ||
249 | |||
250 | /* numrecs and level verification */ | ||
251 | level = be16_to_cpu(block->bb_level); | ||
252 | if (level >= mp->m_in_maxlevels) | ||
253 | return false; | ||
254 | if (be16_to_cpu(block->bb_numrecs) > mp->m_inobt_mxr[level != 0]) | ||
255 | return false; | ||
256 | |||
257 | /* sibling pointer verification */ | ||
258 | if (!block->bb_u.s.bb_leftsib || | ||
259 | (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks && | ||
260 | block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK))) | ||
261 | return false; | ||
262 | if (!block->bb_u.s.bb_rightsib || | ||
263 | (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks && | ||
264 | block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK))) | ||
265 | return false; | ||
266 | |||
267 | return true; | ||
268 | } | ||
269 | |||
270 | static void | ||
271 | xfs_inobt_read_verify( | ||
272 | struct xfs_buf *bp) | ||
273 | { | ||
274 | if (!xfs_btree_sblock_verify_crc(bp)) | ||
275 | xfs_buf_ioerror(bp, -EFSBADCRC); | ||
276 | else if (!xfs_inobt_verify(bp)) | ||
277 | xfs_buf_ioerror(bp, -EFSCORRUPTED); | ||
278 | |||
279 | if (bp->b_error) { | ||
280 | trace_xfs_btree_corrupt(bp, _RET_IP_); | ||
281 | xfs_verifier_error(bp); | ||
282 | } | ||
283 | } | ||
284 | |||
285 | static void | ||
286 | xfs_inobt_write_verify( | ||
287 | struct xfs_buf *bp) | ||
288 | { | ||
289 | if (!xfs_inobt_verify(bp)) { | ||
290 | trace_xfs_btree_corrupt(bp, _RET_IP_); | ||
291 | xfs_buf_ioerror(bp, -EFSCORRUPTED); | ||
292 | xfs_verifier_error(bp); | ||
293 | return; | ||
294 | } | ||
295 | xfs_btree_sblock_calc_crc(bp); | ||
296 | |||
297 | } | ||
298 | |||
299 | const struct xfs_buf_ops xfs_inobt_buf_ops = { | ||
300 | .verify_read = xfs_inobt_read_verify, | ||
301 | .verify_write = xfs_inobt_write_verify, | ||
302 | }; | ||
303 | |||
304 | #if defined(DEBUG) || defined(XFS_WARN) | ||
305 | STATIC int | ||
306 | xfs_inobt_keys_inorder( | ||
307 | struct xfs_btree_cur *cur, | ||
308 | union xfs_btree_key *k1, | ||
309 | union xfs_btree_key *k2) | ||
310 | { | ||
311 | return be32_to_cpu(k1->inobt.ir_startino) < | ||
312 | be32_to_cpu(k2->inobt.ir_startino); | ||
313 | } | ||
314 | |||
315 | STATIC int | ||
316 | xfs_inobt_recs_inorder( | ||
317 | struct xfs_btree_cur *cur, | ||
318 | union xfs_btree_rec *r1, | ||
319 | union xfs_btree_rec *r2) | ||
320 | { | ||
321 | return be32_to_cpu(r1->inobt.ir_startino) + XFS_INODES_PER_CHUNK <= | ||
322 | be32_to_cpu(r2->inobt.ir_startino); | ||
323 | } | ||
324 | #endif /* DEBUG */ | ||
325 | |||
326 | static const struct xfs_btree_ops xfs_inobt_ops = { | ||
327 | .rec_len = sizeof(xfs_inobt_rec_t), | ||
328 | .key_len = sizeof(xfs_inobt_key_t), | ||
329 | |||
330 | .dup_cursor = xfs_inobt_dup_cursor, | ||
331 | .set_root = xfs_inobt_set_root, | ||
332 | .alloc_block = xfs_inobt_alloc_block, | ||
333 | .free_block = xfs_inobt_free_block, | ||
334 | .get_minrecs = xfs_inobt_get_minrecs, | ||
335 | .get_maxrecs = xfs_inobt_get_maxrecs, | ||
336 | .init_key_from_rec = xfs_inobt_init_key_from_rec, | ||
337 | .init_rec_from_key = xfs_inobt_init_rec_from_key, | ||
338 | .init_rec_from_cur = xfs_inobt_init_rec_from_cur, | ||
339 | .init_ptr_from_cur = xfs_inobt_init_ptr_from_cur, | ||
340 | .key_diff = xfs_inobt_key_diff, | ||
341 | .buf_ops = &xfs_inobt_buf_ops, | ||
342 | #if defined(DEBUG) || defined(XFS_WARN) | ||
343 | .keys_inorder = xfs_inobt_keys_inorder, | ||
344 | .recs_inorder = xfs_inobt_recs_inorder, | ||
345 | #endif | ||
346 | }; | ||
347 | |||
348 | static const struct xfs_btree_ops xfs_finobt_ops = { | ||
349 | .rec_len = sizeof(xfs_inobt_rec_t), | ||
350 | .key_len = sizeof(xfs_inobt_key_t), | ||
351 | |||
352 | .dup_cursor = xfs_inobt_dup_cursor, | ||
353 | .set_root = xfs_finobt_set_root, | ||
354 | .alloc_block = xfs_inobt_alloc_block, | ||
355 | .free_block = xfs_inobt_free_block, | ||
356 | .get_minrecs = xfs_inobt_get_minrecs, | ||
357 | .get_maxrecs = xfs_inobt_get_maxrecs, | ||
358 | .init_key_from_rec = xfs_inobt_init_key_from_rec, | ||
359 | .init_rec_from_key = xfs_inobt_init_rec_from_key, | ||
360 | .init_rec_from_cur = xfs_inobt_init_rec_from_cur, | ||
361 | .init_ptr_from_cur = xfs_finobt_init_ptr_from_cur, | ||
362 | .key_diff = xfs_inobt_key_diff, | ||
363 | .buf_ops = &xfs_inobt_buf_ops, | ||
364 | #if defined(DEBUG) || defined(XFS_WARN) | ||
365 | .keys_inorder = xfs_inobt_keys_inorder, | ||
366 | .recs_inorder = xfs_inobt_recs_inorder, | ||
367 | #endif | ||
368 | }; | ||
369 | |||
370 | /* | ||
371 | * Allocate a new inode btree cursor. | ||
372 | */ | ||
373 | struct xfs_btree_cur * /* new inode btree cursor */ | ||
374 | xfs_inobt_init_cursor( | ||
375 | struct xfs_mount *mp, /* file system mount point */ | ||
376 | struct xfs_trans *tp, /* transaction pointer */ | ||
377 | struct xfs_buf *agbp, /* buffer for agi structure */ | ||
378 | xfs_agnumber_t agno, /* allocation group number */ | ||
379 | xfs_btnum_t btnum) /* ialloc or free ino btree */ | ||
380 | { | ||
381 | struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); | ||
382 | struct xfs_btree_cur *cur; | ||
383 | |||
384 | cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP); | ||
385 | |||
386 | cur->bc_tp = tp; | ||
387 | cur->bc_mp = mp; | ||
388 | cur->bc_btnum = btnum; | ||
389 | if (btnum == XFS_BTNUM_INO) { | ||
390 | cur->bc_nlevels = be32_to_cpu(agi->agi_level); | ||
391 | cur->bc_ops = &xfs_inobt_ops; | ||
392 | } else { | ||
393 | cur->bc_nlevels = be32_to_cpu(agi->agi_free_level); | ||
394 | cur->bc_ops = &xfs_finobt_ops; | ||
395 | } | ||
396 | |||
397 | cur->bc_blocklog = mp->m_sb.sb_blocklog; | ||
398 | |||
399 | if (xfs_sb_version_hascrc(&mp->m_sb)) | ||
400 | cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; | ||
401 | |||
402 | cur->bc_private.a.agbp = agbp; | ||
403 | cur->bc_private.a.agno = agno; | ||
404 | |||
405 | return cur; | ||
406 | } | ||
407 | |||
408 | /* | ||
409 | * Calculate number of records in an inobt btree block. | ||
410 | */ | ||
411 | int | ||
412 | xfs_inobt_maxrecs( | ||
413 | struct xfs_mount *mp, | ||
414 | int blocklen, | ||
415 | int leaf) | ||
416 | { | ||
417 | blocklen -= XFS_INOBT_BLOCK_LEN(mp); | ||
418 | |||
419 | if (leaf) | ||
420 | return blocklen / sizeof(xfs_inobt_rec_t); | ||
421 | return blocklen / (sizeof(xfs_inobt_key_t) + sizeof(xfs_inobt_ptr_t)); | ||
422 | } | ||
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.h b/fs/xfs/libxfs/xfs_ialloc_btree.h new file mode 100644 index 000000000000..d7ebea72c2d0 --- /dev/null +++ b/fs/xfs/libxfs/xfs_ialloc_btree.h | |||
@@ -0,0 +1,65 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_IALLOC_BTREE_H__ | ||
19 | #define __XFS_IALLOC_BTREE_H__ | ||
20 | |||
21 | /* | ||
22 | * Inode map on-disk structures | ||
23 | */ | ||
24 | |||
25 | struct xfs_buf; | ||
26 | struct xfs_btree_cur; | ||
27 | struct xfs_mount; | ||
28 | |||
29 | /* | ||
30 | * Btree block header size depends on a superblock flag. | ||
31 | */ | ||
32 | #define XFS_INOBT_BLOCK_LEN(mp) \ | ||
33 | (xfs_sb_version_hascrc(&((mp)->m_sb)) ? \ | ||
34 | XFS_BTREE_SBLOCK_CRC_LEN : XFS_BTREE_SBLOCK_LEN) | ||
35 | |||
36 | /* | ||
37 | * Record, key, and pointer address macros for btree blocks. | ||
38 | * | ||
39 | * (note that some of these may appear unused, but they are used in userspace) | ||
40 | */ | ||
41 | #define XFS_INOBT_REC_ADDR(mp, block, index) \ | ||
42 | ((xfs_inobt_rec_t *) \ | ||
43 | ((char *)(block) + \ | ||
44 | XFS_INOBT_BLOCK_LEN(mp) + \ | ||
45 | (((index) - 1) * sizeof(xfs_inobt_rec_t)))) | ||
46 | |||
47 | #define XFS_INOBT_KEY_ADDR(mp, block, index) \ | ||
48 | ((xfs_inobt_key_t *) \ | ||
49 | ((char *)(block) + \ | ||
50 | XFS_INOBT_BLOCK_LEN(mp) + \ | ||
51 | ((index) - 1) * sizeof(xfs_inobt_key_t))) | ||
52 | |||
53 | #define XFS_INOBT_PTR_ADDR(mp, block, index, maxrecs) \ | ||
54 | ((xfs_inobt_ptr_t *) \ | ||
55 | ((char *)(block) + \ | ||
56 | XFS_INOBT_BLOCK_LEN(mp) + \ | ||
57 | (maxrecs) * sizeof(xfs_inobt_key_t) + \ | ||
58 | ((index) - 1) * sizeof(xfs_inobt_ptr_t))) | ||
59 | |||
60 | extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_mount *, | ||
61 | struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t, | ||
62 | xfs_btnum_t); | ||
63 | extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int); | ||
64 | |||
65 | #endif /* __XFS_IALLOC_BTREE_H__ */ | ||
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c new file mode 100644 index 000000000000..f18fd2da49f7 --- /dev/null +++ b/fs/xfs/libxfs/xfs_inode_buf.c | |||
@@ -0,0 +1,479 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2006 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #include "xfs.h" | ||
19 | #include "xfs_fs.h" | ||
20 | #include "xfs_shared.h" | ||
21 | #include "xfs_format.h" | ||
22 | #include "xfs_log_format.h" | ||
23 | #include "xfs_trans_resv.h" | ||
24 | #include "xfs_sb.h" | ||
25 | #include "xfs_ag.h" | ||
26 | #include "xfs_mount.h" | ||
27 | #include "xfs_inode.h" | ||
28 | #include "xfs_error.h" | ||
29 | #include "xfs_cksum.h" | ||
30 | #include "xfs_icache.h" | ||
31 | #include "xfs_trans.h" | ||
32 | #include "xfs_ialloc.h" | ||
33 | #include "xfs_dinode.h" | ||
34 | |||
35 | /* | ||
36 | * Check that none of the inode's in the buffer have a next | ||
37 | * unlinked field of 0. | ||
38 | */ | ||
39 | #if defined(DEBUG) | ||
40 | void | ||
41 | xfs_inobp_check( | ||
42 | xfs_mount_t *mp, | ||
43 | xfs_buf_t *bp) | ||
44 | { | ||
45 | int i; | ||
46 | int j; | ||
47 | xfs_dinode_t *dip; | ||
48 | |||
49 | j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog; | ||
50 | |||
51 | for (i = 0; i < j; i++) { | ||
52 | dip = (xfs_dinode_t *)xfs_buf_offset(bp, | ||
53 | i * mp->m_sb.sb_inodesize); | ||
54 | if (!dip->di_next_unlinked) { | ||
55 | xfs_alert(mp, | ||
56 | "Detected bogus zero next_unlinked field in inode %d buffer 0x%llx.", | ||
57 | i, (long long)bp->b_bn); | ||
58 | } | ||
59 | } | ||
60 | } | ||
61 | #endif | ||
62 | |||
63 | /* | ||
64 | * If we are doing readahead on an inode buffer, we might be in log recovery | ||
65 | * reading an inode allocation buffer that hasn't yet been replayed, and hence | ||
66 | * has not had the inode cores stamped into it. Hence for readahead, the buffer | ||
67 | * may be potentially invalid. | ||
68 | * | ||
69 | * If the readahead buffer is invalid, we don't want to mark it with an error, | ||
70 | * but we do want to clear the DONE status of the buffer so that a followup read | ||
71 | * will re-read it from disk. This will ensure that we don't get an unnecessary | ||
72 | * warnings during log recovery and we don't get unnecssary panics on debug | ||
73 | * kernels. | ||
74 | */ | ||
75 | static void | ||
76 | xfs_inode_buf_verify( | ||
77 | struct xfs_buf *bp, | ||
78 | bool readahead) | ||
79 | { | ||
80 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
81 | int i; | ||
82 | int ni; | ||
83 | |||
84 | /* | ||
85 | * Validate the magic number and version of every inode in the buffer | ||
86 | */ | ||
87 | ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock; | ||
88 | for (i = 0; i < ni; i++) { | ||
89 | int di_ok; | ||
90 | xfs_dinode_t *dip; | ||
91 | |||
92 | dip = (struct xfs_dinode *)xfs_buf_offset(bp, | ||
93 | (i << mp->m_sb.sb_inodelog)); | ||
94 | di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) && | ||
95 | XFS_DINODE_GOOD_VERSION(dip->di_version); | ||
96 | if (unlikely(XFS_TEST_ERROR(!di_ok, mp, | ||
97 | XFS_ERRTAG_ITOBP_INOTOBP, | ||
98 | XFS_RANDOM_ITOBP_INOTOBP))) { | ||
99 | if (readahead) { | ||
100 | bp->b_flags &= ~XBF_DONE; | ||
101 | return; | ||
102 | } | ||
103 | |||
104 | xfs_buf_ioerror(bp, -EFSCORRUPTED); | ||
105 | xfs_verifier_error(bp); | ||
106 | #ifdef DEBUG | ||
107 | xfs_alert(mp, | ||
108 | "bad inode magic/vsn daddr %lld #%d (magic=%x)", | ||
109 | (unsigned long long)bp->b_bn, i, | ||
110 | be16_to_cpu(dip->di_magic)); | ||
111 | #endif | ||
112 | } | ||
113 | } | ||
114 | xfs_inobp_check(mp, bp); | ||
115 | } | ||
116 | |||
117 | |||
118 | static void | ||
119 | xfs_inode_buf_read_verify( | ||
120 | struct xfs_buf *bp) | ||
121 | { | ||
122 | xfs_inode_buf_verify(bp, false); | ||
123 | } | ||
124 | |||
125 | static void | ||
126 | xfs_inode_buf_readahead_verify( | ||
127 | struct xfs_buf *bp) | ||
128 | { | ||
129 | xfs_inode_buf_verify(bp, true); | ||
130 | } | ||
131 | |||
132 | static void | ||
133 | xfs_inode_buf_write_verify( | ||
134 | struct xfs_buf *bp) | ||
135 | { | ||
136 | xfs_inode_buf_verify(bp, false); | ||
137 | } | ||
138 | |||
139 | const struct xfs_buf_ops xfs_inode_buf_ops = { | ||
140 | .verify_read = xfs_inode_buf_read_verify, | ||
141 | .verify_write = xfs_inode_buf_write_verify, | ||
142 | }; | ||
143 | |||
144 | const struct xfs_buf_ops xfs_inode_buf_ra_ops = { | ||
145 | .verify_read = xfs_inode_buf_readahead_verify, | ||
146 | .verify_write = xfs_inode_buf_write_verify, | ||
147 | }; | ||
148 | |||
149 | |||
150 | /* | ||
151 | * This routine is called to map an inode to the buffer containing the on-disk | ||
152 | * version of the inode. It returns a pointer to the buffer containing the | ||
153 | * on-disk inode in the bpp parameter, and in the dipp parameter it returns a | ||
154 | * pointer to the on-disk inode within that buffer. | ||
155 | * | ||
156 | * If a non-zero error is returned, then the contents of bpp and dipp are | ||
157 | * undefined. | ||
158 | */ | ||
159 | int | ||
160 | xfs_imap_to_bp( | ||
161 | struct xfs_mount *mp, | ||
162 | struct xfs_trans *tp, | ||
163 | struct xfs_imap *imap, | ||
164 | struct xfs_dinode **dipp, | ||
165 | struct xfs_buf **bpp, | ||
166 | uint buf_flags, | ||
167 | uint iget_flags) | ||
168 | { | ||
169 | struct xfs_buf *bp; | ||
170 | int error; | ||
171 | |||
172 | buf_flags |= XBF_UNMAPPED; | ||
173 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno, | ||
174 | (int)imap->im_len, buf_flags, &bp, | ||
175 | &xfs_inode_buf_ops); | ||
176 | if (error) { | ||
177 | if (error == -EAGAIN) { | ||
178 | ASSERT(buf_flags & XBF_TRYLOCK); | ||
179 | return error; | ||
180 | } | ||
181 | |||
182 | if (error == -EFSCORRUPTED && | ||
183 | (iget_flags & XFS_IGET_UNTRUSTED)) | ||
184 | return -EINVAL; | ||
185 | |||
186 | xfs_warn(mp, "%s: xfs_trans_read_buf() returned error %d.", | ||
187 | __func__, error); | ||
188 | return error; | ||
189 | } | ||
190 | |||
191 | *bpp = bp; | ||
192 | *dipp = (struct xfs_dinode *)xfs_buf_offset(bp, imap->im_boffset); | ||
193 | return 0; | ||
194 | } | ||
195 | |||
196 | void | ||
197 | xfs_dinode_from_disk( | ||
198 | xfs_icdinode_t *to, | ||
199 | xfs_dinode_t *from) | ||
200 | { | ||
201 | to->di_magic = be16_to_cpu(from->di_magic); | ||
202 | to->di_mode = be16_to_cpu(from->di_mode); | ||
203 | to->di_version = from ->di_version; | ||
204 | to->di_format = from->di_format; | ||
205 | to->di_onlink = be16_to_cpu(from->di_onlink); | ||
206 | to->di_uid = be32_to_cpu(from->di_uid); | ||
207 | to->di_gid = be32_to_cpu(from->di_gid); | ||
208 | to->di_nlink = be32_to_cpu(from->di_nlink); | ||
209 | to->di_projid_lo = be16_to_cpu(from->di_projid_lo); | ||
210 | to->di_projid_hi = be16_to_cpu(from->di_projid_hi); | ||
211 | memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); | ||
212 | to->di_flushiter = be16_to_cpu(from->di_flushiter); | ||
213 | to->di_atime.t_sec = be32_to_cpu(from->di_atime.t_sec); | ||
214 | to->di_atime.t_nsec = be32_to_cpu(from->di_atime.t_nsec); | ||
215 | to->di_mtime.t_sec = be32_to_cpu(from->di_mtime.t_sec); | ||
216 | to->di_mtime.t_nsec = be32_to_cpu(from->di_mtime.t_nsec); | ||
217 | to->di_ctime.t_sec = be32_to_cpu(from->di_ctime.t_sec); | ||
218 | to->di_ctime.t_nsec = be32_to_cpu(from->di_ctime.t_nsec); | ||
219 | to->di_size = be64_to_cpu(from->di_size); | ||
220 | to->di_nblocks = be64_to_cpu(from->di_nblocks); | ||
221 | to->di_extsize = be32_to_cpu(from->di_extsize); | ||
222 | to->di_nextents = be32_to_cpu(from->di_nextents); | ||
223 | to->di_anextents = be16_to_cpu(from->di_anextents); | ||
224 | to->di_forkoff = from->di_forkoff; | ||
225 | to->di_aformat = from->di_aformat; | ||
226 | to->di_dmevmask = be32_to_cpu(from->di_dmevmask); | ||
227 | to->di_dmstate = be16_to_cpu(from->di_dmstate); | ||
228 | to->di_flags = be16_to_cpu(from->di_flags); | ||
229 | to->di_gen = be32_to_cpu(from->di_gen); | ||
230 | |||
231 | if (to->di_version == 3) { | ||
232 | to->di_changecount = be64_to_cpu(from->di_changecount); | ||
233 | to->di_crtime.t_sec = be32_to_cpu(from->di_crtime.t_sec); | ||
234 | to->di_crtime.t_nsec = be32_to_cpu(from->di_crtime.t_nsec); | ||
235 | to->di_flags2 = be64_to_cpu(from->di_flags2); | ||
236 | to->di_ino = be64_to_cpu(from->di_ino); | ||
237 | to->di_lsn = be64_to_cpu(from->di_lsn); | ||
238 | memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2)); | ||
239 | uuid_copy(&to->di_uuid, &from->di_uuid); | ||
240 | } | ||
241 | } | ||
242 | |||
243 | void | ||
244 | xfs_dinode_to_disk( | ||
245 | xfs_dinode_t *to, | ||
246 | xfs_icdinode_t *from) | ||
247 | { | ||
248 | to->di_magic = cpu_to_be16(from->di_magic); | ||
249 | to->di_mode = cpu_to_be16(from->di_mode); | ||
250 | to->di_version = from ->di_version; | ||
251 | to->di_format = from->di_format; | ||
252 | to->di_onlink = cpu_to_be16(from->di_onlink); | ||
253 | to->di_uid = cpu_to_be32(from->di_uid); | ||
254 | to->di_gid = cpu_to_be32(from->di_gid); | ||
255 | to->di_nlink = cpu_to_be32(from->di_nlink); | ||
256 | to->di_projid_lo = cpu_to_be16(from->di_projid_lo); | ||
257 | to->di_projid_hi = cpu_to_be16(from->di_projid_hi); | ||
258 | memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); | ||
259 | to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec); | ||
260 | to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec); | ||
261 | to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec); | ||
262 | to->di_mtime.t_nsec = cpu_to_be32(from->di_mtime.t_nsec); | ||
263 | to->di_ctime.t_sec = cpu_to_be32(from->di_ctime.t_sec); | ||
264 | to->di_ctime.t_nsec = cpu_to_be32(from->di_ctime.t_nsec); | ||
265 | to->di_size = cpu_to_be64(from->di_size); | ||
266 | to->di_nblocks = cpu_to_be64(from->di_nblocks); | ||
267 | to->di_extsize = cpu_to_be32(from->di_extsize); | ||
268 | to->di_nextents = cpu_to_be32(from->di_nextents); | ||
269 | to->di_anextents = cpu_to_be16(from->di_anextents); | ||
270 | to->di_forkoff = from->di_forkoff; | ||
271 | to->di_aformat = from->di_aformat; | ||
272 | to->di_dmevmask = cpu_to_be32(from->di_dmevmask); | ||
273 | to->di_dmstate = cpu_to_be16(from->di_dmstate); | ||
274 | to->di_flags = cpu_to_be16(from->di_flags); | ||
275 | to->di_gen = cpu_to_be32(from->di_gen); | ||
276 | |||
277 | if (from->di_version == 3) { | ||
278 | to->di_changecount = cpu_to_be64(from->di_changecount); | ||
279 | to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec); | ||
280 | to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec); | ||
281 | to->di_flags2 = cpu_to_be64(from->di_flags2); | ||
282 | to->di_ino = cpu_to_be64(from->di_ino); | ||
283 | to->di_lsn = cpu_to_be64(from->di_lsn); | ||
284 | memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2)); | ||
285 | uuid_copy(&to->di_uuid, &from->di_uuid); | ||
286 | to->di_flushiter = 0; | ||
287 | } else { | ||
288 | to->di_flushiter = cpu_to_be16(from->di_flushiter); | ||
289 | } | ||
290 | } | ||
291 | |||
292 | static bool | ||
293 | xfs_dinode_verify( | ||
294 | struct xfs_mount *mp, | ||
295 | struct xfs_inode *ip, | ||
296 | struct xfs_dinode *dip) | ||
297 | { | ||
298 | if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) | ||
299 | return false; | ||
300 | |||
301 | /* only version 3 or greater inodes are extensively verified here */ | ||
302 | if (dip->di_version < 3) | ||
303 | return true; | ||
304 | |||
305 | if (!xfs_sb_version_hascrc(&mp->m_sb)) | ||
306 | return false; | ||
307 | if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, | ||
308 | XFS_DINODE_CRC_OFF)) | ||
309 | return false; | ||
310 | if (be64_to_cpu(dip->di_ino) != ip->i_ino) | ||
311 | return false; | ||
312 | if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_uuid)) | ||
313 | return false; | ||
314 | return true; | ||
315 | } | ||
316 | |||
317 | void | ||
318 | xfs_dinode_calc_crc( | ||
319 | struct xfs_mount *mp, | ||
320 | struct xfs_dinode *dip) | ||
321 | { | ||
322 | __uint32_t crc; | ||
323 | |||
324 | if (dip->di_version < 3) | ||
325 | return; | ||
326 | |||
327 | ASSERT(xfs_sb_version_hascrc(&mp->m_sb)); | ||
328 | crc = xfs_start_cksum((char *)dip, mp->m_sb.sb_inodesize, | ||
329 | XFS_DINODE_CRC_OFF); | ||
330 | dip->di_crc = xfs_end_cksum(crc); | ||
331 | } | ||
332 | |||
333 | /* | ||
334 | * Read the disk inode attributes into the in-core inode structure. | ||
335 | * | ||
336 | * For version 5 superblocks, if we are initialising a new inode and we are not | ||
337 | * utilising the XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new | ||
338 | * inode core with a random generation number. If we are keeping inodes around, | ||
339 | * we need to read the inode cluster to get the existing generation number off | ||
340 | * disk. Further, if we are using version 4 superblocks (i.e. v1/v2 inode | ||
341 | * format) then log recovery is dependent on the di_flushiter field being | ||
342 | * initialised from the current on-disk value and hence we must also read the | ||
343 | * inode off disk. | ||
344 | */ | ||
345 | int | ||
346 | xfs_iread( | ||
347 | xfs_mount_t *mp, | ||
348 | xfs_trans_t *tp, | ||
349 | xfs_inode_t *ip, | ||
350 | uint iget_flags) | ||
351 | { | ||
352 | xfs_buf_t *bp; | ||
353 | xfs_dinode_t *dip; | ||
354 | int error; | ||
355 | |||
356 | /* | ||
357 | * Fill in the location information in the in-core inode. | ||
358 | */ | ||
359 | error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags); | ||
360 | if (error) | ||
361 | return error; | ||
362 | |||
363 | /* shortcut IO on inode allocation if possible */ | ||
364 | if ((iget_flags & XFS_IGET_CREATE) && | ||
365 | xfs_sb_version_hascrc(&mp->m_sb) && | ||
366 | !(mp->m_flags & XFS_MOUNT_IKEEP)) { | ||
367 | /* initialise the on-disk inode core */ | ||
368 | memset(&ip->i_d, 0, sizeof(ip->i_d)); | ||
369 | ip->i_d.di_magic = XFS_DINODE_MAGIC; | ||
370 | ip->i_d.di_gen = prandom_u32(); | ||
371 | if (xfs_sb_version_hascrc(&mp->m_sb)) { | ||
372 | ip->i_d.di_version = 3; | ||
373 | ip->i_d.di_ino = ip->i_ino; | ||
374 | uuid_copy(&ip->i_d.di_uuid, &mp->m_sb.sb_uuid); | ||
375 | } else | ||
376 | ip->i_d.di_version = 2; | ||
377 | return 0; | ||
378 | } | ||
379 | |||
380 | /* | ||
381 | * Get pointers to the on-disk inode and the buffer containing it. | ||
382 | */ | ||
383 | error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &bp, 0, iget_flags); | ||
384 | if (error) | ||
385 | return error; | ||
386 | |||
387 | /* even unallocated inodes are verified */ | ||
388 | if (!xfs_dinode_verify(mp, ip, dip)) { | ||
389 | xfs_alert(mp, "%s: validation failed for inode %lld failed", | ||
390 | __func__, ip->i_ino); | ||
391 | |||
392 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, dip); | ||
393 | error = -EFSCORRUPTED; | ||
394 | goto out_brelse; | ||
395 | } | ||
396 | |||
397 | /* | ||
398 | * If the on-disk inode is already linked to a directory | ||
399 | * entry, copy all of the inode into the in-core inode. | ||
400 | * xfs_iformat_fork() handles copying in the inode format | ||
401 | * specific information. | ||
402 | * Otherwise, just get the truly permanent information. | ||
403 | */ | ||
404 | if (dip->di_mode) { | ||
405 | xfs_dinode_from_disk(&ip->i_d, dip); | ||
406 | error = xfs_iformat_fork(ip, dip); | ||
407 | if (error) { | ||
408 | #ifdef DEBUG | ||
409 | xfs_alert(mp, "%s: xfs_iformat() returned error %d", | ||
410 | __func__, error); | ||
411 | #endif /* DEBUG */ | ||
412 | goto out_brelse; | ||
413 | } | ||
414 | } else { | ||
415 | /* | ||
416 | * Partial initialisation of the in-core inode. Just the bits | ||
417 | * that xfs_ialloc won't overwrite or relies on being correct. | ||
418 | */ | ||
419 | ip->i_d.di_magic = be16_to_cpu(dip->di_magic); | ||
420 | ip->i_d.di_version = dip->di_version; | ||
421 | ip->i_d.di_gen = be32_to_cpu(dip->di_gen); | ||
422 | ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter); | ||
423 | |||
424 | if (dip->di_version == 3) { | ||
425 | ip->i_d.di_ino = be64_to_cpu(dip->di_ino); | ||
426 | uuid_copy(&ip->i_d.di_uuid, &dip->di_uuid); | ||
427 | } | ||
428 | |||
429 | /* | ||
430 | * Make sure to pull in the mode here as well in | ||
431 | * case the inode is released without being used. | ||
432 | * This ensures that xfs_inactive() will see that | ||
433 | * the inode is already free and not try to mess | ||
434 | * with the uninitialized part of it. | ||
435 | */ | ||
436 | ip->i_d.di_mode = 0; | ||
437 | } | ||
438 | |||
439 | /* | ||
440 | * Automatically convert version 1 inode formats in memory to version 2 | ||
441 | * inode format. If the inode is modified, it will get logged and | ||
442 | * rewritten as a version 2 inode. We can do this because we set the | ||
443 | * superblock feature bit for v2 inodes unconditionally during mount | ||
444 | * and it means the reast of the code can assume the inode version is 2 | ||
445 | * or higher. | ||
446 | */ | ||
447 | if (ip->i_d.di_version == 1) { | ||
448 | ip->i_d.di_version = 2; | ||
449 | memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); | ||
450 | ip->i_d.di_nlink = ip->i_d.di_onlink; | ||
451 | ip->i_d.di_onlink = 0; | ||
452 | xfs_set_projid(ip, 0); | ||
453 | } | ||
454 | |||
455 | ip->i_delayed_blks = 0; | ||
456 | |||
457 | /* | ||
458 | * Mark the buffer containing the inode as something to keep | ||
459 | * around for a while. This helps to keep recently accessed | ||
460 | * meta-data in-core longer. | ||
461 | */ | ||
462 | xfs_buf_set_ref(bp, XFS_INO_REF); | ||
463 | |||
464 | /* | ||
465 | * Use xfs_trans_brelse() to release the buffer containing the on-disk | ||
466 | * inode, because it was acquired with xfs_trans_read_buf() in | ||
467 | * xfs_imap_to_bp() above. If tp is NULL, this is just a normal | ||
468 | * brelse(). If we're within a transaction, then xfs_trans_brelse() | ||
469 | * will only release the buffer if it is not dirty within the | ||
470 | * transaction. It will be OK to release the buffer in this case, | ||
471 | * because inodes on disk are never destroyed and we will be locking the | ||
472 | * new in-core inode before putting it in the cache where other | ||
473 | * processes can find it. Thus we don't have to worry about the inode | ||
474 | * being changed just because we released the buffer. | ||
475 | */ | ||
476 | out_brelse: | ||
477 | xfs_trans_brelse(tp, bp); | ||
478 | return error; | ||
479 | } | ||
diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h new file mode 100644 index 000000000000..9308c47f2a52 --- /dev/null +++ b/fs/xfs/libxfs/xfs_inode_buf.h | |||
@@ -0,0 +1,50 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_INODE_BUF_H__ | ||
19 | #define __XFS_INODE_BUF_H__ | ||
20 | |||
21 | struct xfs_inode; | ||
22 | struct xfs_dinode; | ||
23 | struct xfs_icdinode; | ||
24 | |||
25 | /* | ||
26 | * Inode location information. Stored in the inode and passed to | ||
27 | * xfs_imap_to_bp() to get a buffer and dinode for a given inode. | ||
28 | */ | ||
29 | struct xfs_imap { | ||
30 | xfs_daddr_t im_blkno; /* starting BB of inode chunk */ | ||
31 | ushort im_len; /* length in BBs of inode chunk */ | ||
32 | ushort im_boffset; /* inode offset in block in bytes */ | ||
33 | }; | ||
34 | |||
35 | int xfs_imap_to_bp(struct xfs_mount *, struct xfs_trans *, | ||
36 | struct xfs_imap *, struct xfs_dinode **, | ||
37 | struct xfs_buf **, uint, uint); | ||
38 | int xfs_iread(struct xfs_mount *, struct xfs_trans *, | ||
39 | struct xfs_inode *, uint); | ||
40 | void xfs_dinode_calc_crc(struct xfs_mount *, struct xfs_dinode *); | ||
41 | void xfs_dinode_to_disk(struct xfs_dinode *to, struct xfs_icdinode *from); | ||
42 | void xfs_dinode_from_disk(struct xfs_icdinode *to, struct xfs_dinode *from); | ||
43 | |||
44 | #if defined(DEBUG) | ||
45 | void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *); | ||
46 | #else | ||
47 | #define xfs_inobp_check(mp, bp) | ||
48 | #endif /* DEBUG */ | ||
49 | |||
50 | #endif /* __XFS_INODE_BUF_H__ */ | ||
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c new file mode 100644 index 000000000000..8ac9411bcf2a --- /dev/null +++ b/fs/xfs/libxfs/xfs_inode_fork.c | |||
@@ -0,0 +1,1906 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2006 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #include <linux/log2.h> | ||
19 | |||
20 | #include "xfs.h" | ||
21 | #include "xfs_fs.h" | ||
22 | #include "xfs_format.h" | ||
23 | #include "xfs_log_format.h" | ||
24 | #include "xfs_trans_resv.h" | ||
25 | #include "xfs_inum.h" | ||
26 | #include "xfs_sb.h" | ||
27 | #include "xfs_ag.h" | ||
28 | #include "xfs_mount.h" | ||
29 | #include "xfs_inode.h" | ||
30 | #include "xfs_trans.h" | ||
31 | #include "xfs_inode_item.h" | ||
32 | #include "xfs_bmap_btree.h" | ||
33 | #include "xfs_bmap.h" | ||
34 | #include "xfs_error.h" | ||
35 | #include "xfs_trace.h" | ||
36 | #include "xfs_attr_sf.h" | ||
37 | #include "xfs_dinode.h" | ||
38 | |||
39 | kmem_zone_t *xfs_ifork_zone; | ||
40 | |||
41 | STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int); | ||
42 | STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int); | ||
43 | STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int); | ||
44 | |||
45 | #ifdef DEBUG | ||
46 | /* | ||
47 | * Make sure that the extents in the given memory buffer | ||
48 | * are valid. | ||
49 | */ | ||
50 | void | ||
51 | xfs_validate_extents( | ||
52 | xfs_ifork_t *ifp, | ||
53 | int nrecs, | ||
54 | xfs_exntfmt_t fmt) | ||
55 | { | ||
56 | xfs_bmbt_irec_t irec; | ||
57 | xfs_bmbt_rec_host_t rec; | ||
58 | int i; | ||
59 | |||
60 | for (i = 0; i < nrecs; i++) { | ||
61 | xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); | ||
62 | rec.l0 = get_unaligned(&ep->l0); | ||
63 | rec.l1 = get_unaligned(&ep->l1); | ||
64 | xfs_bmbt_get_all(&rec, &irec); | ||
65 | if (fmt == XFS_EXTFMT_NOSTATE) | ||
66 | ASSERT(irec.br_state == XFS_EXT_NORM); | ||
67 | } | ||
68 | } | ||
69 | #else /* DEBUG */ | ||
70 | #define xfs_validate_extents(ifp, nrecs, fmt) | ||
71 | #endif /* DEBUG */ | ||
72 | |||
73 | |||
74 | /* | ||
75 | * Move inode type and inode format specific information from the | ||
76 | * on-disk inode to the in-core inode. For fifos, devs, and sockets | ||
77 | * this means set if_rdev to the proper value. For files, directories, | ||
78 | * and symlinks this means to bring in the in-line data or extent | ||
79 | * pointers. For a file in B-tree format, only the root is immediately | ||
80 | * brought in-core. The rest will be in-lined in if_extents when it | ||
81 | * is first referenced (see xfs_iread_extents()). | ||
82 | */ | ||
83 | int | ||
84 | xfs_iformat_fork( | ||
85 | xfs_inode_t *ip, | ||
86 | xfs_dinode_t *dip) | ||
87 | { | ||
88 | xfs_attr_shortform_t *atp; | ||
89 | int size; | ||
90 | int error = 0; | ||
91 | xfs_fsize_t di_size; | ||
92 | |||
93 | if (unlikely(be32_to_cpu(dip->di_nextents) + | ||
94 | be16_to_cpu(dip->di_anextents) > | ||
95 | be64_to_cpu(dip->di_nblocks))) { | ||
96 | xfs_warn(ip->i_mount, | ||
97 | "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.", | ||
98 | (unsigned long long)ip->i_ino, | ||
99 | (int)(be32_to_cpu(dip->di_nextents) + | ||
100 | be16_to_cpu(dip->di_anextents)), | ||
101 | (unsigned long long) | ||
102 | be64_to_cpu(dip->di_nblocks)); | ||
103 | XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW, | ||
104 | ip->i_mount, dip); | ||
105 | return -EFSCORRUPTED; | ||
106 | } | ||
107 | |||
108 | if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) { | ||
109 | xfs_warn(ip->i_mount, "corrupt dinode %Lu, forkoff = 0x%x.", | ||
110 | (unsigned long long)ip->i_ino, | ||
111 | dip->di_forkoff); | ||
112 | XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW, | ||
113 | ip->i_mount, dip); | ||
114 | return -EFSCORRUPTED; | ||
115 | } | ||
116 | |||
117 | if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) && | ||
118 | !ip->i_mount->m_rtdev_targp)) { | ||
119 | xfs_warn(ip->i_mount, | ||
120 | "corrupt dinode %Lu, has realtime flag set.", | ||
121 | ip->i_ino); | ||
122 | XFS_CORRUPTION_ERROR("xfs_iformat(realtime)", | ||
123 | XFS_ERRLEVEL_LOW, ip->i_mount, dip); | ||
124 | return -EFSCORRUPTED; | ||
125 | } | ||
126 | |||
127 | switch (ip->i_d.di_mode & S_IFMT) { | ||
128 | case S_IFIFO: | ||
129 | case S_IFCHR: | ||
130 | case S_IFBLK: | ||
131 | case S_IFSOCK: | ||
132 | if (unlikely(dip->di_format != XFS_DINODE_FMT_DEV)) { | ||
133 | XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW, | ||
134 | ip->i_mount, dip); | ||
135 | return -EFSCORRUPTED; | ||
136 | } | ||
137 | ip->i_d.di_size = 0; | ||
138 | ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip); | ||
139 | break; | ||
140 | |||
141 | case S_IFREG: | ||
142 | case S_IFLNK: | ||
143 | case S_IFDIR: | ||
144 | switch (dip->di_format) { | ||
145 | case XFS_DINODE_FMT_LOCAL: | ||
146 | /* | ||
147 | * no local regular files yet | ||
148 | */ | ||
149 | if (unlikely(S_ISREG(be16_to_cpu(dip->di_mode)))) { | ||
150 | xfs_warn(ip->i_mount, | ||
151 | "corrupt inode %Lu (local format for regular file).", | ||
152 | (unsigned long long) ip->i_ino); | ||
153 | XFS_CORRUPTION_ERROR("xfs_iformat(4)", | ||
154 | XFS_ERRLEVEL_LOW, | ||
155 | ip->i_mount, dip); | ||
156 | return -EFSCORRUPTED; | ||
157 | } | ||
158 | |||
159 | di_size = be64_to_cpu(dip->di_size); | ||
160 | if (unlikely(di_size < 0 || | ||
161 | di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) { | ||
162 | xfs_warn(ip->i_mount, | ||
163 | "corrupt inode %Lu (bad size %Ld for local inode).", | ||
164 | (unsigned long long) ip->i_ino, | ||
165 | (long long) di_size); | ||
166 | XFS_CORRUPTION_ERROR("xfs_iformat(5)", | ||
167 | XFS_ERRLEVEL_LOW, | ||
168 | ip->i_mount, dip); | ||
169 | return -EFSCORRUPTED; | ||
170 | } | ||
171 | |||
172 | size = (int)di_size; | ||
173 | error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size); | ||
174 | break; | ||
175 | case XFS_DINODE_FMT_EXTENTS: | ||
176 | error = xfs_iformat_extents(ip, dip, XFS_DATA_FORK); | ||
177 | break; | ||
178 | case XFS_DINODE_FMT_BTREE: | ||
179 | error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK); | ||
180 | break; | ||
181 | default: | ||
182 | XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW, | ||
183 | ip->i_mount); | ||
184 | return -EFSCORRUPTED; | ||
185 | } | ||
186 | break; | ||
187 | |||
188 | default: | ||
189 | XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount); | ||
190 | return -EFSCORRUPTED; | ||
191 | } | ||
192 | if (error) { | ||
193 | return error; | ||
194 | } | ||
195 | if (!XFS_DFORK_Q(dip)) | ||
196 | return 0; | ||
197 | |||
198 | ASSERT(ip->i_afp == NULL); | ||
199 | ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP | KM_NOFS); | ||
200 | |||
201 | switch (dip->di_aformat) { | ||
202 | case XFS_DINODE_FMT_LOCAL: | ||
203 | atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip); | ||
204 | size = be16_to_cpu(atp->hdr.totsize); | ||
205 | |||
206 | if (unlikely(size < sizeof(struct xfs_attr_sf_hdr))) { | ||
207 | xfs_warn(ip->i_mount, | ||
208 | "corrupt inode %Lu (bad attr fork size %Ld).", | ||
209 | (unsigned long long) ip->i_ino, | ||
210 | (long long) size); | ||
211 | XFS_CORRUPTION_ERROR("xfs_iformat(8)", | ||
212 | XFS_ERRLEVEL_LOW, | ||
213 | ip->i_mount, dip); | ||
214 | return -EFSCORRUPTED; | ||
215 | } | ||
216 | |||
217 | error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size); | ||
218 | break; | ||
219 | case XFS_DINODE_FMT_EXTENTS: | ||
220 | error = xfs_iformat_extents(ip, dip, XFS_ATTR_FORK); | ||
221 | break; | ||
222 | case XFS_DINODE_FMT_BTREE: | ||
223 | error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK); | ||
224 | break; | ||
225 | default: | ||
226 | error = -EFSCORRUPTED; | ||
227 | break; | ||
228 | } | ||
229 | if (error) { | ||
230 | kmem_zone_free(xfs_ifork_zone, ip->i_afp); | ||
231 | ip->i_afp = NULL; | ||
232 | xfs_idestroy_fork(ip, XFS_DATA_FORK); | ||
233 | } | ||
234 | return error; | ||
235 | } | ||
236 | |||
237 | /* | ||
238 | * The file is in-lined in the on-disk inode. | ||
239 | * If it fits into if_inline_data, then copy | ||
240 | * it there, otherwise allocate a buffer for it | ||
241 | * and copy the data there. Either way, set | ||
242 | * if_data to point at the data. | ||
243 | * If we allocate a buffer for the data, make | ||
244 | * sure that its size is a multiple of 4 and | ||
245 | * record the real size in i_real_bytes. | ||
246 | */ | ||
247 | STATIC int | ||
248 | xfs_iformat_local( | ||
249 | xfs_inode_t *ip, | ||
250 | xfs_dinode_t *dip, | ||
251 | int whichfork, | ||
252 | int size) | ||
253 | { | ||
254 | xfs_ifork_t *ifp; | ||
255 | int real_size; | ||
256 | |||
257 | /* | ||
258 | * If the size is unreasonable, then something | ||
259 | * is wrong and we just bail out rather than crash in | ||
260 | * kmem_alloc() or memcpy() below. | ||
261 | */ | ||
262 | if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { | ||
263 | xfs_warn(ip->i_mount, | ||
264 | "corrupt inode %Lu (bad size %d for local fork, size = %d).", | ||
265 | (unsigned long long) ip->i_ino, size, | ||
266 | XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)); | ||
267 | XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW, | ||
268 | ip->i_mount, dip); | ||
269 | return -EFSCORRUPTED; | ||
270 | } | ||
271 | ifp = XFS_IFORK_PTR(ip, whichfork); | ||
272 | real_size = 0; | ||
273 | if (size == 0) | ||
274 | ifp->if_u1.if_data = NULL; | ||
275 | else if (size <= sizeof(ifp->if_u2.if_inline_data)) | ||
276 | ifp->if_u1.if_data = ifp->if_u2.if_inline_data; | ||
277 | else { | ||
278 | real_size = roundup(size, 4); | ||
279 | ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP | KM_NOFS); | ||
280 | } | ||
281 | ifp->if_bytes = size; | ||
282 | ifp->if_real_bytes = real_size; | ||
283 | if (size) | ||
284 | memcpy(ifp->if_u1.if_data, XFS_DFORK_PTR(dip, whichfork), size); | ||
285 | ifp->if_flags &= ~XFS_IFEXTENTS; | ||
286 | ifp->if_flags |= XFS_IFINLINE; | ||
287 | return 0; | ||
288 | } | ||
289 | |||
290 | /* | ||
291 | * The file consists of a set of extents all | ||
292 | * of which fit into the on-disk inode. | ||
293 | * If there are few enough extents to fit into | ||
294 | * the if_inline_ext, then copy them there. | ||
295 | * Otherwise allocate a buffer for them and copy | ||
296 | * them into it. Either way, set if_extents | ||
297 | * to point at the extents. | ||
298 | */ | ||
299 | STATIC int | ||
300 | xfs_iformat_extents( | ||
301 | xfs_inode_t *ip, | ||
302 | xfs_dinode_t *dip, | ||
303 | int whichfork) | ||
304 | { | ||
305 | xfs_bmbt_rec_t *dp; | ||
306 | xfs_ifork_t *ifp; | ||
307 | int nex; | ||
308 | int size; | ||
309 | int i; | ||
310 | |||
311 | ifp = XFS_IFORK_PTR(ip, whichfork); | ||
312 | nex = XFS_DFORK_NEXTENTS(dip, whichfork); | ||
313 | size = nex * (uint)sizeof(xfs_bmbt_rec_t); | ||
314 | |||
315 | /* | ||
316 | * If the number of extents is unreasonable, then something | ||
317 | * is wrong and we just bail out rather than crash in | ||
318 | * kmem_alloc() or memcpy() below. | ||
319 | */ | ||
320 | if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { | ||
321 | xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %d).", | ||
322 | (unsigned long long) ip->i_ino, nex); | ||
323 | XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW, | ||
324 | ip->i_mount, dip); | ||
325 | return -EFSCORRUPTED; | ||
326 | } | ||
327 | |||
328 | ifp->if_real_bytes = 0; | ||
329 | if (nex == 0) | ||
330 | ifp->if_u1.if_extents = NULL; | ||
331 | else if (nex <= XFS_INLINE_EXTS) | ||
332 | ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; | ||
333 | else | ||
334 | xfs_iext_add(ifp, 0, nex); | ||
335 | |||
336 | ifp->if_bytes = size; | ||
337 | if (size) { | ||
338 | dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork); | ||
339 | xfs_validate_extents(ifp, nex, XFS_EXTFMT_INODE(ip)); | ||
340 | for (i = 0; i < nex; i++, dp++) { | ||
341 | xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); | ||
342 | ep->l0 = get_unaligned_be64(&dp->l0); | ||
343 | ep->l1 = get_unaligned_be64(&dp->l1); | ||
344 | } | ||
345 | XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork); | ||
346 | if (whichfork != XFS_DATA_FORK || | ||
347 | XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE) | ||
348 | if (unlikely(xfs_check_nostate_extents( | ||
349 | ifp, 0, nex))) { | ||
350 | XFS_ERROR_REPORT("xfs_iformat_extents(2)", | ||
351 | XFS_ERRLEVEL_LOW, | ||
352 | ip->i_mount); | ||
353 | return -EFSCORRUPTED; | ||
354 | } | ||
355 | } | ||
356 | ifp->if_flags |= XFS_IFEXTENTS; | ||
357 | return 0; | ||
358 | } | ||
359 | |||
360 | /* | ||
361 | * The file has too many extents to fit into | ||
362 | * the inode, so they are in B-tree format. | ||
363 | * Allocate a buffer for the root of the B-tree | ||
364 | * and copy the root into it. The i_extents | ||
365 | * field will remain NULL until all of the | ||
366 | * extents are read in (when they are needed). | ||
367 | */ | ||
368 | STATIC int | ||
369 | xfs_iformat_btree( | ||
370 | xfs_inode_t *ip, | ||
371 | xfs_dinode_t *dip, | ||
372 | int whichfork) | ||
373 | { | ||
374 | struct xfs_mount *mp = ip->i_mount; | ||
375 | xfs_bmdr_block_t *dfp; | ||
376 | xfs_ifork_t *ifp; | ||
377 | /* REFERENCED */ | ||
378 | int nrecs; | ||
379 | int size; | ||
380 | |||
381 | ifp = XFS_IFORK_PTR(ip, whichfork); | ||
382 | dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork); | ||
383 | size = XFS_BMAP_BROOT_SPACE(mp, dfp); | ||
384 | nrecs = be16_to_cpu(dfp->bb_numrecs); | ||
385 | |||
386 | /* | ||
387 | * blow out if -- fork has less extents than can fit in | ||
388 | * fork (fork shouldn't be a btree format), root btree | ||
389 | * block has more records than can fit into the fork, | ||
390 | * or the number of extents is greater than the number of | ||
391 | * blocks. | ||
392 | */ | ||
393 | if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <= | ||
394 | XFS_IFORK_MAXEXT(ip, whichfork) || | ||
395 | XFS_BMDR_SPACE_CALC(nrecs) > | ||
396 | XFS_DFORK_SIZE(dip, mp, whichfork) || | ||
397 | XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) { | ||
398 | xfs_warn(mp, "corrupt inode %Lu (btree).", | ||
399 | (unsigned long long) ip->i_ino); | ||
400 | XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW, | ||
401 | mp, dip); | ||
402 | return -EFSCORRUPTED; | ||
403 | } | ||
404 | |||
405 | ifp->if_broot_bytes = size; | ||
406 | ifp->if_broot = kmem_alloc(size, KM_SLEEP | KM_NOFS); | ||
407 | ASSERT(ifp->if_broot != NULL); | ||
408 | /* | ||
409 | * Copy and convert from the on-disk structure | ||
410 | * to the in-memory structure. | ||
411 | */ | ||
412 | xfs_bmdr_to_bmbt(ip, dfp, XFS_DFORK_SIZE(dip, ip->i_mount, whichfork), | ||
413 | ifp->if_broot, size); | ||
414 | ifp->if_flags &= ~XFS_IFEXTENTS; | ||
415 | ifp->if_flags |= XFS_IFBROOT; | ||
416 | |||
417 | return 0; | ||
418 | } | ||
419 | |||
420 | /* | ||
421 | * Read in extents from a btree-format inode. | ||
422 | * Allocate and fill in if_extents. Real work is done in xfs_bmap.c. | ||
423 | */ | ||
424 | int | ||
425 | xfs_iread_extents( | ||
426 | xfs_trans_t *tp, | ||
427 | xfs_inode_t *ip, | ||
428 | int whichfork) | ||
429 | { | ||
430 | int error; | ||
431 | xfs_ifork_t *ifp; | ||
432 | xfs_extnum_t nextents; | ||
433 | |||
434 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | ||
435 | |||
436 | if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) { | ||
437 | XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW, | ||
438 | ip->i_mount); | ||
439 | return -EFSCORRUPTED; | ||
440 | } | ||
441 | nextents = XFS_IFORK_NEXTENTS(ip, whichfork); | ||
442 | ifp = XFS_IFORK_PTR(ip, whichfork); | ||
443 | |||
444 | /* | ||
445 | * We know that the size is valid (it's checked in iformat_btree) | ||
446 | */ | ||
447 | ifp->if_bytes = ifp->if_real_bytes = 0; | ||
448 | ifp->if_flags |= XFS_IFEXTENTS; | ||
449 | xfs_iext_add(ifp, 0, nextents); | ||
450 | error = xfs_bmap_read_extents(tp, ip, whichfork); | ||
451 | if (error) { | ||
452 | xfs_iext_destroy(ifp); | ||
453 | ifp->if_flags &= ~XFS_IFEXTENTS; | ||
454 | return error; | ||
455 | } | ||
456 | xfs_validate_extents(ifp, nextents, XFS_EXTFMT_INODE(ip)); | ||
457 | return 0; | ||
458 | } | ||
459 | /* | ||
460 | * Reallocate the space for if_broot based on the number of records | ||
461 | * being added or deleted as indicated in rec_diff. Move the records | ||
462 | * and pointers in if_broot to fit the new size. When shrinking this | ||
463 | * will eliminate holes between the records and pointers created by | ||
464 | * the caller. When growing this will create holes to be filled in | ||
465 | * by the caller. | ||
466 | * | ||
467 | * The caller must not request to add more records than would fit in | ||
468 | * the on-disk inode root. If the if_broot is currently NULL, then | ||
469 | * if we are adding records, one will be allocated. The caller must also | ||
470 | * not request that the number of records go below zero, although | ||
471 | * it can go to zero. | ||
472 | * | ||
473 | * ip -- the inode whose if_broot area is changing | ||
474 | * ext_diff -- the change in the number of records, positive or negative, | ||
475 | * requested for the if_broot array. | ||
476 | */ | ||
477 | void | ||
478 | xfs_iroot_realloc( | ||
479 | xfs_inode_t *ip, | ||
480 | int rec_diff, | ||
481 | int whichfork) | ||
482 | { | ||
483 | struct xfs_mount *mp = ip->i_mount; | ||
484 | int cur_max; | ||
485 | xfs_ifork_t *ifp; | ||
486 | struct xfs_btree_block *new_broot; | ||
487 | int new_max; | ||
488 | size_t new_size; | ||
489 | char *np; | ||
490 | char *op; | ||
491 | |||
492 | /* | ||
493 | * Handle the degenerate case quietly. | ||
494 | */ | ||
495 | if (rec_diff == 0) { | ||
496 | return; | ||
497 | } | ||
498 | |||
499 | ifp = XFS_IFORK_PTR(ip, whichfork); | ||
500 | if (rec_diff > 0) { | ||
501 | /* | ||
502 | * If there wasn't any memory allocated before, just | ||
503 | * allocate it now and get out. | ||
504 | */ | ||
505 | if (ifp->if_broot_bytes == 0) { | ||
506 | new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, rec_diff); | ||
507 | ifp->if_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS); | ||
508 | ifp->if_broot_bytes = (int)new_size; | ||
509 | return; | ||
510 | } | ||
511 | |||
512 | /* | ||
513 | * If there is already an existing if_broot, then we need | ||
514 | * to realloc() it and shift the pointers to their new | ||
515 | * location. The records don't change location because | ||
516 | * they are kept butted up against the btree block header. | ||
517 | */ | ||
518 | cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0); | ||
519 | new_max = cur_max + rec_diff; | ||
520 | new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max); | ||
521 | ifp->if_broot = kmem_realloc(ifp->if_broot, new_size, | ||
522 | XFS_BMAP_BROOT_SPACE_CALC(mp, cur_max), | ||
523 | KM_SLEEP | KM_NOFS); | ||
524 | op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, | ||
525 | ifp->if_broot_bytes); | ||
526 | np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, | ||
527 | (int)new_size); | ||
528 | ifp->if_broot_bytes = (int)new_size; | ||
529 | ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <= | ||
530 | XFS_IFORK_SIZE(ip, whichfork)); | ||
531 | memmove(np, op, cur_max * (uint)sizeof(xfs_dfsbno_t)); | ||
532 | return; | ||
533 | } | ||
534 | |||
535 | /* | ||
536 | * rec_diff is less than 0. In this case, we are shrinking the | ||
537 | * if_broot buffer. It must already exist. If we go to zero | ||
538 | * records, just get rid of the root and clear the status bit. | ||
539 | */ | ||
540 | ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0)); | ||
541 | cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0); | ||
542 | new_max = cur_max + rec_diff; | ||
543 | ASSERT(new_max >= 0); | ||
544 | if (new_max > 0) | ||
545 | new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max); | ||
546 | else | ||
547 | new_size = 0; | ||
548 | if (new_size > 0) { | ||
549 | new_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS); | ||
550 | /* | ||
551 | * First copy over the btree block header. | ||
552 | */ | ||
553 | memcpy(new_broot, ifp->if_broot, | ||
554 | XFS_BMBT_BLOCK_LEN(ip->i_mount)); | ||
555 | } else { | ||
556 | new_broot = NULL; | ||
557 | ifp->if_flags &= ~XFS_IFBROOT; | ||
558 | } | ||
559 | |||
560 | /* | ||
561 | * Only copy the records and pointers if there are any. | ||
562 | */ | ||
563 | if (new_max > 0) { | ||
564 | /* | ||
565 | * First copy the records. | ||
566 | */ | ||
567 | op = (char *)XFS_BMBT_REC_ADDR(mp, ifp->if_broot, 1); | ||
568 | np = (char *)XFS_BMBT_REC_ADDR(mp, new_broot, 1); | ||
569 | memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t)); | ||
570 | |||
571 | /* | ||
572 | * Then copy the pointers. | ||
573 | */ | ||
574 | op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, | ||
575 | ifp->if_broot_bytes); | ||
576 | np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, new_broot, 1, | ||
577 | (int)new_size); | ||
578 | memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t)); | ||
579 | } | ||
580 | kmem_free(ifp->if_broot); | ||
581 | ifp->if_broot = new_broot; | ||
582 | ifp->if_broot_bytes = (int)new_size; | ||
583 | if (ifp->if_broot) | ||
584 | ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <= | ||
585 | XFS_IFORK_SIZE(ip, whichfork)); | ||
586 | return; | ||
587 | } | ||
588 | |||
589 | |||
590 | /* | ||
591 | * This is called when the amount of space needed for if_data | ||
592 | * is increased or decreased. The change in size is indicated by | ||
593 | * the number of bytes that need to be added or deleted in the | ||
594 | * byte_diff parameter. | ||
595 | * | ||
596 | * If the amount of space needed has decreased below the size of the | ||
597 | * inline buffer, then switch to using the inline buffer. Otherwise, | ||
598 | * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer | ||
599 | * to what is needed. | ||
600 | * | ||
601 | * ip -- the inode whose if_data area is changing | ||
602 | * byte_diff -- the change in the number of bytes, positive or negative, | ||
603 | * requested for the if_data array. | ||
604 | */ | ||
605 | void | ||
606 | xfs_idata_realloc( | ||
607 | xfs_inode_t *ip, | ||
608 | int byte_diff, | ||
609 | int whichfork) | ||
610 | { | ||
611 | xfs_ifork_t *ifp; | ||
612 | int new_size; | ||
613 | int real_size; | ||
614 | |||
615 | if (byte_diff == 0) { | ||
616 | return; | ||
617 | } | ||
618 | |||
619 | ifp = XFS_IFORK_PTR(ip, whichfork); | ||
620 | new_size = (int)ifp->if_bytes + byte_diff; | ||
621 | ASSERT(new_size >= 0); | ||
622 | |||
623 | if (new_size == 0) { | ||
624 | if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { | ||
625 | kmem_free(ifp->if_u1.if_data); | ||
626 | } | ||
627 | ifp->if_u1.if_data = NULL; | ||
628 | real_size = 0; | ||
629 | } else if (new_size <= sizeof(ifp->if_u2.if_inline_data)) { | ||
630 | /* | ||
631 | * If the valid extents/data can fit in if_inline_ext/data, | ||
632 | * copy them from the malloc'd vector and free it. | ||
633 | */ | ||
634 | if (ifp->if_u1.if_data == NULL) { | ||
635 | ifp->if_u1.if_data = ifp->if_u2.if_inline_data; | ||
636 | } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { | ||
637 | ASSERT(ifp->if_real_bytes != 0); | ||
638 | memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data, | ||
639 | new_size); | ||
640 | kmem_free(ifp->if_u1.if_data); | ||
641 | ifp->if_u1.if_data = ifp->if_u2.if_inline_data; | ||
642 | } | ||
643 | real_size = 0; | ||
644 | } else { | ||
645 | /* | ||
646 | * Stuck with malloc/realloc. | ||
647 | * For inline data, the underlying buffer must be | ||
648 | * a multiple of 4 bytes in size so that it can be | ||
649 | * logged and stay on word boundaries. We enforce | ||
650 | * that here. | ||
651 | */ | ||
652 | real_size = roundup(new_size, 4); | ||
653 | if (ifp->if_u1.if_data == NULL) { | ||
654 | ASSERT(ifp->if_real_bytes == 0); | ||
655 | ifp->if_u1.if_data = kmem_alloc(real_size, | ||
656 | KM_SLEEP | KM_NOFS); | ||
657 | } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { | ||
658 | /* | ||
659 | * Only do the realloc if the underlying size | ||
660 | * is really changing. | ||
661 | */ | ||
662 | if (ifp->if_real_bytes != real_size) { | ||
663 | ifp->if_u1.if_data = | ||
664 | kmem_realloc(ifp->if_u1.if_data, | ||
665 | real_size, | ||
666 | ifp->if_real_bytes, | ||
667 | KM_SLEEP | KM_NOFS); | ||
668 | } | ||
669 | } else { | ||
670 | ASSERT(ifp->if_real_bytes == 0); | ||
671 | ifp->if_u1.if_data = kmem_alloc(real_size, | ||
672 | KM_SLEEP | KM_NOFS); | ||
673 | memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data, | ||
674 | ifp->if_bytes); | ||
675 | } | ||
676 | } | ||
677 | ifp->if_real_bytes = real_size; | ||
678 | ifp->if_bytes = new_size; | ||
679 | ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork)); | ||
680 | } | ||
681 | |||
682 | void | ||
683 | xfs_idestroy_fork( | ||
684 | xfs_inode_t *ip, | ||
685 | int whichfork) | ||
686 | { | ||
687 | xfs_ifork_t *ifp; | ||
688 | |||
689 | ifp = XFS_IFORK_PTR(ip, whichfork); | ||
690 | if (ifp->if_broot != NULL) { | ||
691 | kmem_free(ifp->if_broot); | ||
692 | ifp->if_broot = NULL; | ||
693 | } | ||
694 | |||
695 | /* | ||
696 | * If the format is local, then we can't have an extents | ||
697 | * array so just look for an inline data array. If we're | ||
698 | * not local then we may or may not have an extents list, | ||
699 | * so check and free it up if we do. | ||
700 | */ | ||
701 | if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { | ||
702 | if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) && | ||
703 | (ifp->if_u1.if_data != NULL)) { | ||
704 | ASSERT(ifp->if_real_bytes != 0); | ||
705 | kmem_free(ifp->if_u1.if_data); | ||
706 | ifp->if_u1.if_data = NULL; | ||
707 | ifp->if_real_bytes = 0; | ||
708 | } | ||
709 | } else if ((ifp->if_flags & XFS_IFEXTENTS) && | ||
710 | ((ifp->if_flags & XFS_IFEXTIREC) || | ||
711 | ((ifp->if_u1.if_extents != NULL) && | ||
712 | (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)))) { | ||
713 | ASSERT(ifp->if_real_bytes != 0); | ||
714 | xfs_iext_destroy(ifp); | ||
715 | } | ||
716 | ASSERT(ifp->if_u1.if_extents == NULL || | ||
717 | ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext); | ||
718 | ASSERT(ifp->if_real_bytes == 0); | ||
719 | if (whichfork == XFS_ATTR_FORK) { | ||
720 | kmem_zone_free(xfs_ifork_zone, ip->i_afp); | ||
721 | ip->i_afp = NULL; | ||
722 | } | ||
723 | } | ||
724 | |||
725 | /* | ||
726 | * Convert in-core extents to on-disk form | ||
727 | * | ||
728 | * For either the data or attr fork in extent format, we need to endian convert | ||
729 | * the in-core extent as we place them into the on-disk inode. | ||
730 | * | ||
731 | * In the case of the data fork, the in-core and on-disk fork sizes can be | ||
732 | * different due to delayed allocation extents. We only copy on-disk extents | ||
733 | * here, so callers must always use the physical fork size to determine the | ||
734 | * size of the buffer passed to this routine. We will return the size actually | ||
735 | * used. | ||
736 | */ | ||
737 | int | ||
738 | xfs_iextents_copy( | ||
739 | xfs_inode_t *ip, | ||
740 | xfs_bmbt_rec_t *dp, | ||
741 | int whichfork) | ||
742 | { | ||
743 | int copied; | ||
744 | int i; | ||
745 | xfs_ifork_t *ifp; | ||
746 | int nrecs; | ||
747 | xfs_fsblock_t start_block; | ||
748 | |||
749 | ifp = XFS_IFORK_PTR(ip, whichfork); | ||
750 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); | ||
751 | ASSERT(ifp->if_bytes > 0); | ||
752 | |||
753 | nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); | ||
754 | XFS_BMAP_TRACE_EXLIST(ip, nrecs, whichfork); | ||
755 | ASSERT(nrecs > 0); | ||
756 | |||
757 | /* | ||
758 | * There are some delayed allocation extents in the | ||
759 | * inode, so copy the extents one at a time and skip | ||
760 | * the delayed ones. There must be at least one | ||
761 | * non-delayed extent. | ||
762 | */ | ||
763 | copied = 0; | ||
764 | for (i = 0; i < nrecs; i++) { | ||
765 | xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); | ||
766 | start_block = xfs_bmbt_get_startblock(ep); | ||
767 | if (isnullstartblock(start_block)) { | ||
768 | /* | ||
769 | * It's a delayed allocation extent, so skip it. | ||
770 | */ | ||
771 | continue; | ||
772 | } | ||
773 | |||
774 | /* Translate to on disk format */ | ||
775 | put_unaligned_be64(ep->l0, &dp->l0); | ||
776 | put_unaligned_be64(ep->l1, &dp->l1); | ||
777 | dp++; | ||
778 | copied++; | ||
779 | } | ||
780 | ASSERT(copied != 0); | ||
781 | xfs_validate_extents(ifp, copied, XFS_EXTFMT_INODE(ip)); | ||
782 | |||
783 | return (copied * (uint)sizeof(xfs_bmbt_rec_t)); | ||
784 | } | ||
785 | |||
786 | /* | ||
787 | * Each of the following cases stores data into the same region | ||
788 | * of the on-disk inode, so only one of them can be valid at | ||
789 | * any given time. While it is possible to have conflicting formats | ||
790 | * and log flags, e.g. having XFS_ILOG_?DATA set when the fork is | ||
791 | * in EXTENTS format, this can only happen when the fork has | ||
792 | * changed formats after being modified but before being flushed. | ||
793 | * In these cases, the format always takes precedence, because the | ||
794 | * format indicates the current state of the fork. | ||
795 | */ | ||
796 | void | ||
797 | xfs_iflush_fork( | ||
798 | xfs_inode_t *ip, | ||
799 | xfs_dinode_t *dip, | ||
800 | xfs_inode_log_item_t *iip, | ||
801 | int whichfork) | ||
802 | { | ||
803 | char *cp; | ||
804 | xfs_ifork_t *ifp; | ||
805 | xfs_mount_t *mp; | ||
806 | static const short brootflag[2] = | ||
807 | { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT }; | ||
808 | static const short dataflag[2] = | ||
809 | { XFS_ILOG_DDATA, XFS_ILOG_ADATA }; | ||
810 | static const short extflag[2] = | ||
811 | { XFS_ILOG_DEXT, XFS_ILOG_AEXT }; | ||
812 | |||
813 | if (!iip) | ||
814 | return; | ||
815 | ifp = XFS_IFORK_PTR(ip, whichfork); | ||
816 | /* | ||
817 | * This can happen if we gave up in iformat in an error path, | ||
818 | * for the attribute fork. | ||
819 | */ | ||
820 | if (!ifp) { | ||
821 | ASSERT(whichfork == XFS_ATTR_FORK); | ||
822 | return; | ||
823 | } | ||
824 | cp = XFS_DFORK_PTR(dip, whichfork); | ||
825 | mp = ip->i_mount; | ||
826 | switch (XFS_IFORK_FORMAT(ip, whichfork)) { | ||
827 | case XFS_DINODE_FMT_LOCAL: | ||
828 | if ((iip->ili_fields & dataflag[whichfork]) && | ||
829 | (ifp->if_bytes > 0)) { | ||
830 | ASSERT(ifp->if_u1.if_data != NULL); | ||
831 | ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork)); | ||
832 | memcpy(cp, ifp->if_u1.if_data, ifp->if_bytes); | ||
833 | } | ||
834 | break; | ||
835 | |||
836 | case XFS_DINODE_FMT_EXTENTS: | ||
837 | ASSERT((ifp->if_flags & XFS_IFEXTENTS) || | ||
838 | !(iip->ili_fields & extflag[whichfork])); | ||
839 | if ((iip->ili_fields & extflag[whichfork]) && | ||
840 | (ifp->if_bytes > 0)) { | ||
841 | ASSERT(xfs_iext_get_ext(ifp, 0)); | ||
842 | ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0); | ||
843 | (void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp, | ||
844 | whichfork); | ||
845 | } | ||
846 | break; | ||
847 | |||
848 | case XFS_DINODE_FMT_BTREE: | ||
849 | if ((iip->ili_fields & brootflag[whichfork]) && | ||
850 | (ifp->if_broot_bytes > 0)) { | ||
851 | ASSERT(ifp->if_broot != NULL); | ||
852 | ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <= | ||
853 | XFS_IFORK_SIZE(ip, whichfork)); | ||
854 | xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes, | ||
855 | (xfs_bmdr_block_t *)cp, | ||
856 | XFS_DFORK_SIZE(dip, mp, whichfork)); | ||
857 | } | ||
858 | break; | ||
859 | |||
860 | case XFS_DINODE_FMT_DEV: | ||
861 | if (iip->ili_fields & XFS_ILOG_DEV) { | ||
862 | ASSERT(whichfork == XFS_DATA_FORK); | ||
863 | xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev); | ||
864 | } | ||
865 | break; | ||
866 | |||
867 | case XFS_DINODE_FMT_UUID: | ||
868 | if (iip->ili_fields & XFS_ILOG_UUID) { | ||
869 | ASSERT(whichfork == XFS_DATA_FORK); | ||
870 | memcpy(XFS_DFORK_DPTR(dip), | ||
871 | &ip->i_df.if_u2.if_uuid, | ||
872 | sizeof(uuid_t)); | ||
873 | } | ||
874 | break; | ||
875 | |||
876 | default: | ||
877 | ASSERT(0); | ||
878 | break; | ||
879 | } | ||
880 | } | ||
881 | |||
882 | /* | ||
883 | * Return a pointer to the extent record at file index idx. | ||
884 | */ | ||
885 | xfs_bmbt_rec_host_t * | ||
886 | xfs_iext_get_ext( | ||
887 | xfs_ifork_t *ifp, /* inode fork pointer */ | ||
888 | xfs_extnum_t idx) /* index of target extent */ | ||
889 | { | ||
890 | ASSERT(idx >= 0); | ||
891 | ASSERT(idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)); | ||
892 | |||
893 | if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) { | ||
894 | return ifp->if_u1.if_ext_irec->er_extbuf; | ||
895 | } else if (ifp->if_flags & XFS_IFEXTIREC) { | ||
896 | xfs_ext_irec_t *erp; /* irec pointer */ | ||
897 | int erp_idx = 0; /* irec index */ | ||
898 | xfs_extnum_t page_idx = idx; /* ext index in target list */ | ||
899 | |||
900 | erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0); | ||
901 | return &erp->er_extbuf[page_idx]; | ||
902 | } else if (ifp->if_bytes) { | ||
903 | return &ifp->if_u1.if_extents[idx]; | ||
904 | } else { | ||
905 | return NULL; | ||
906 | } | ||
907 | } | ||
908 | |||
909 | /* | ||
910 | * Insert new item(s) into the extent records for incore inode | ||
911 | * fork 'ifp'. 'count' new items are inserted at index 'idx'. | ||
912 | */ | ||
913 | void | ||
914 | xfs_iext_insert( | ||
915 | xfs_inode_t *ip, /* incore inode pointer */ | ||
916 | xfs_extnum_t idx, /* starting index of new items */ | ||
917 | xfs_extnum_t count, /* number of inserted items */ | ||
918 | xfs_bmbt_irec_t *new, /* items to insert */ | ||
919 | int state) /* type of extent conversion */ | ||
920 | { | ||
921 | xfs_ifork_t *ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df; | ||
922 | xfs_extnum_t i; /* extent record index */ | ||
923 | |||
924 | trace_xfs_iext_insert(ip, idx, new, state, _RET_IP_); | ||
925 | |||
926 | ASSERT(ifp->if_flags & XFS_IFEXTENTS); | ||
927 | xfs_iext_add(ifp, idx, count); | ||
928 | for (i = idx; i < idx + count; i++, new++) | ||
929 | xfs_bmbt_set_all(xfs_iext_get_ext(ifp, i), new); | ||
930 | } | ||
931 | |||
932 | /* | ||
933 | * This is called when the amount of space required for incore file | ||
934 | * extents needs to be increased. The ext_diff parameter stores the | ||
935 | * number of new extents being added and the idx parameter contains | ||
936 | * the extent index where the new extents will be added. If the new | ||
937 | * extents are being appended, then we just need to (re)allocate and | ||
938 | * initialize the space. Otherwise, if the new extents are being | ||
939 | * inserted into the middle of the existing entries, a bit more work | ||
940 | * is required to make room for the new extents to be inserted. The | ||
941 | * caller is responsible for filling in the new extent entries upon | ||
942 | * return. | ||
943 | */ | ||
944 | void | ||
945 | xfs_iext_add( | ||
946 | xfs_ifork_t *ifp, /* inode fork pointer */ | ||
947 | xfs_extnum_t idx, /* index to begin adding exts */ | ||
948 | int ext_diff) /* number of extents to add */ | ||
949 | { | ||
950 | int byte_diff; /* new bytes being added */ | ||
951 | int new_size; /* size of extents after adding */ | ||
952 | xfs_extnum_t nextents; /* number of extents in file */ | ||
953 | |||
954 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); | ||
955 | ASSERT((idx >= 0) && (idx <= nextents)); | ||
956 | byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t); | ||
957 | new_size = ifp->if_bytes + byte_diff; | ||
958 | /* | ||
959 | * If the new number of extents (nextents + ext_diff) | ||
960 | * fits inside the inode, then continue to use the inline | ||
961 | * extent buffer. | ||
962 | */ | ||
963 | if (nextents + ext_diff <= XFS_INLINE_EXTS) { | ||
964 | if (idx < nextents) { | ||
965 | memmove(&ifp->if_u2.if_inline_ext[idx + ext_diff], | ||
966 | &ifp->if_u2.if_inline_ext[idx], | ||
967 | (nextents - idx) * sizeof(xfs_bmbt_rec_t)); | ||
968 | memset(&ifp->if_u2.if_inline_ext[idx], 0, byte_diff); | ||
969 | } | ||
970 | ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; | ||
971 | ifp->if_real_bytes = 0; | ||
972 | } | ||
973 | /* | ||
974 | * Otherwise use a linear (direct) extent list. | ||
975 | * If the extents are currently inside the inode, | ||
976 | * xfs_iext_realloc_direct will switch us from | ||
977 | * inline to direct extent allocation mode. | ||
978 | */ | ||
979 | else if (nextents + ext_diff <= XFS_LINEAR_EXTS) { | ||
980 | xfs_iext_realloc_direct(ifp, new_size); | ||
981 | if (idx < nextents) { | ||
982 | memmove(&ifp->if_u1.if_extents[idx + ext_diff], | ||
983 | &ifp->if_u1.if_extents[idx], | ||
984 | (nextents - idx) * sizeof(xfs_bmbt_rec_t)); | ||
985 | memset(&ifp->if_u1.if_extents[idx], 0, byte_diff); | ||
986 | } | ||
987 | } | ||
988 | /* Indirection array */ | ||
989 | else { | ||
990 | xfs_ext_irec_t *erp; | ||
991 | int erp_idx = 0; | ||
992 | int page_idx = idx; | ||
993 | |||
994 | ASSERT(nextents + ext_diff > XFS_LINEAR_EXTS); | ||
995 | if (ifp->if_flags & XFS_IFEXTIREC) { | ||
996 | erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 1); | ||
997 | } else { | ||
998 | xfs_iext_irec_init(ifp); | ||
999 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); | ||
1000 | erp = ifp->if_u1.if_ext_irec; | ||
1001 | } | ||
1002 | /* Extents fit in target extent page */ | ||
1003 | if (erp && erp->er_extcount + ext_diff <= XFS_LINEAR_EXTS) { | ||
1004 | if (page_idx < erp->er_extcount) { | ||
1005 | memmove(&erp->er_extbuf[page_idx + ext_diff], | ||
1006 | &erp->er_extbuf[page_idx], | ||
1007 | (erp->er_extcount - page_idx) * | ||
1008 | sizeof(xfs_bmbt_rec_t)); | ||
1009 | memset(&erp->er_extbuf[page_idx], 0, byte_diff); | ||
1010 | } | ||
1011 | erp->er_extcount += ext_diff; | ||
1012 | xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); | ||
1013 | } | ||
1014 | /* Insert a new extent page */ | ||
1015 | else if (erp) { | ||
1016 | xfs_iext_add_indirect_multi(ifp, | ||
1017 | erp_idx, page_idx, ext_diff); | ||
1018 | } | ||
1019 | /* | ||
1020 | * If extent(s) are being appended to the last page in | ||
1021 | * the indirection array and the new extent(s) don't fit | ||
1022 | * in the page, then erp is NULL and erp_idx is set to | ||
1023 | * the next index needed in the indirection array. | ||
1024 | */ | ||
1025 | else { | ||
1026 | uint count = ext_diff; | ||
1027 | |||
1028 | while (count) { | ||
1029 | erp = xfs_iext_irec_new(ifp, erp_idx); | ||
1030 | erp->er_extcount = min(count, XFS_LINEAR_EXTS); | ||
1031 | count -= erp->er_extcount; | ||
1032 | if (count) | ||
1033 | erp_idx++; | ||
1034 | } | ||
1035 | } | ||
1036 | } | ||
1037 | ifp->if_bytes = new_size; | ||
1038 | } | ||
1039 | |||
1040 | /* | ||
1041 | * This is called when incore extents are being added to the indirection | ||
1042 | * array and the new extents do not fit in the target extent list. The | ||
1043 | * erp_idx parameter contains the irec index for the target extent list | ||
1044 | * in the indirection array, and the idx parameter contains the extent | ||
1045 | * index within the list. The number of extents being added is stored | ||
1046 | * in the count parameter. | ||
1047 | * | ||
1048 | * |-------| |-------| | ||
1049 | * | | | | idx - number of extents before idx | ||
1050 | * | idx | | count | | ||
1051 | * | | | | count - number of extents being inserted at idx | ||
1052 | * |-------| |-------| | ||
1053 | * | count | | nex2 | nex2 - number of extents after idx + count | ||
1054 | * |-------| |-------| | ||
1055 | */ | ||
1056 | void | ||
1057 | xfs_iext_add_indirect_multi( | ||
1058 | xfs_ifork_t *ifp, /* inode fork pointer */ | ||
1059 | int erp_idx, /* target extent irec index */ | ||
1060 | xfs_extnum_t idx, /* index within target list */ | ||
1061 | int count) /* new extents being added */ | ||
1062 | { | ||
1063 | int byte_diff; /* new bytes being added */ | ||
1064 | xfs_ext_irec_t *erp; /* pointer to irec entry */ | ||
1065 | xfs_extnum_t ext_diff; /* number of extents to add */ | ||
1066 | xfs_extnum_t ext_cnt; /* new extents still needed */ | ||
1067 | xfs_extnum_t nex2; /* extents after idx + count */ | ||
1068 | xfs_bmbt_rec_t *nex2_ep = NULL; /* temp list for nex2 extents */ | ||
1069 | int nlists; /* number of irec's (lists) */ | ||
1070 | |||
1071 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); | ||
1072 | erp = &ifp->if_u1.if_ext_irec[erp_idx]; | ||
1073 | nex2 = erp->er_extcount - idx; | ||
1074 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; | ||
1075 | |||
1076 | /* | ||
1077 | * Save second part of target extent list | ||
1078 | * (all extents past */ | ||
1079 | if (nex2) { | ||
1080 | byte_diff = nex2 * sizeof(xfs_bmbt_rec_t); | ||
1081 | nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_NOFS); | ||
1082 | memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff); | ||
1083 | erp->er_extcount -= nex2; | ||
1084 | xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2); | ||
1085 | memset(&erp->er_extbuf[idx], 0, byte_diff); | ||
1086 | } | ||
1087 | |||
1088 | /* | ||
1089 | * Add the new extents to the end of the target | ||
1090 | * list, then allocate new irec record(s) and | ||
1091 | * extent buffer(s) as needed to store the rest | ||
1092 | * of the new extents. | ||
1093 | */ | ||
1094 | ext_cnt = count; | ||
1095 | ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS - erp->er_extcount); | ||
1096 | if (ext_diff) { | ||
1097 | erp->er_extcount += ext_diff; | ||
1098 | xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); | ||
1099 | ext_cnt -= ext_diff; | ||
1100 | } | ||
1101 | while (ext_cnt) { | ||
1102 | erp_idx++; | ||
1103 | erp = xfs_iext_irec_new(ifp, erp_idx); | ||
1104 | ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS); | ||
1105 | erp->er_extcount = ext_diff; | ||
1106 | xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); | ||
1107 | ext_cnt -= ext_diff; | ||
1108 | } | ||
1109 | |||
1110 | /* Add nex2 extents back to indirection array */ | ||
1111 | if (nex2) { | ||
1112 | xfs_extnum_t ext_avail; | ||
1113 | int i; | ||
1114 | |||
1115 | byte_diff = nex2 * sizeof(xfs_bmbt_rec_t); | ||
1116 | ext_avail = XFS_LINEAR_EXTS - erp->er_extcount; | ||
1117 | i = 0; | ||
1118 | /* | ||
1119 | * If nex2 extents fit in the current page, append | ||
1120 | * nex2_ep after the new extents. | ||
1121 | */ | ||
1122 | if (nex2 <= ext_avail) { | ||
1123 | i = erp->er_extcount; | ||
1124 | } | ||
1125 | /* | ||
1126 | * Otherwise, check if space is available in the | ||
1127 | * next page. | ||
1128 | */ | ||
1129 | else if ((erp_idx < nlists - 1) && | ||
1130 | (nex2 <= (ext_avail = XFS_LINEAR_EXTS - | ||
1131 | ifp->if_u1.if_ext_irec[erp_idx+1].er_extcount))) { | ||
1132 | erp_idx++; | ||
1133 | erp++; | ||
1134 | /* Create a hole for nex2 extents */ | ||
1135 | memmove(&erp->er_extbuf[nex2], erp->er_extbuf, | ||
1136 | erp->er_extcount * sizeof(xfs_bmbt_rec_t)); | ||
1137 | } | ||
1138 | /* | ||
1139 | * Final choice, create a new extent page for | ||
1140 | * nex2 extents. | ||
1141 | */ | ||
1142 | else { | ||
1143 | erp_idx++; | ||
1144 | erp = xfs_iext_irec_new(ifp, erp_idx); | ||
1145 | } | ||
1146 | memmove(&erp->er_extbuf[i], nex2_ep, byte_diff); | ||
1147 | kmem_free(nex2_ep); | ||
1148 | erp->er_extcount += nex2; | ||
1149 | xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2); | ||
1150 | } | ||
1151 | } | ||
1152 | |||
1153 | /* | ||
1154 | * This is called when the amount of space required for incore file | ||
1155 | * extents needs to be decreased. The ext_diff parameter stores the | ||
1156 | * number of extents to be removed and the idx parameter contains | ||
1157 | * the extent index where the extents will be removed from. | ||
1158 | * | ||
1159 | * If the amount of space needed has decreased below the linear | ||
1160 | * limit, XFS_IEXT_BUFSZ, then switch to using the contiguous | ||
1161 | * extent array. Otherwise, use kmem_realloc() to adjust the | ||
1162 | * size to what is needed. | ||
1163 | */ | ||
1164 | void | ||
1165 | xfs_iext_remove( | ||
1166 | xfs_inode_t *ip, /* incore inode pointer */ | ||
1167 | xfs_extnum_t idx, /* index to begin removing exts */ | ||
1168 | int ext_diff, /* number of extents to remove */ | ||
1169 | int state) /* type of extent conversion */ | ||
1170 | { | ||
1171 | xfs_ifork_t *ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df; | ||
1172 | xfs_extnum_t nextents; /* number of extents in file */ | ||
1173 | int new_size; /* size of extents after removal */ | ||
1174 | |||
1175 | trace_xfs_iext_remove(ip, idx, state, _RET_IP_); | ||
1176 | |||
1177 | ASSERT(ext_diff > 0); | ||
1178 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); | ||
1179 | new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t); | ||
1180 | |||
1181 | if (new_size == 0) { | ||
1182 | xfs_iext_destroy(ifp); | ||
1183 | } else if (ifp->if_flags & XFS_IFEXTIREC) { | ||
1184 | xfs_iext_remove_indirect(ifp, idx, ext_diff); | ||
1185 | } else if (ifp->if_real_bytes) { | ||
1186 | xfs_iext_remove_direct(ifp, idx, ext_diff); | ||
1187 | } else { | ||
1188 | xfs_iext_remove_inline(ifp, idx, ext_diff); | ||
1189 | } | ||
1190 | ifp->if_bytes = new_size; | ||
1191 | } | ||
1192 | |||
1193 | /* | ||
1194 | * This removes ext_diff extents from the inline buffer, beginning | ||
1195 | * at extent index idx. | ||
1196 | */ | ||
1197 | void | ||
1198 | xfs_iext_remove_inline( | ||
1199 | xfs_ifork_t *ifp, /* inode fork pointer */ | ||
1200 | xfs_extnum_t idx, /* index to begin removing exts */ | ||
1201 | int ext_diff) /* number of extents to remove */ | ||
1202 | { | ||
1203 | int nextents; /* number of extents in file */ | ||
1204 | |||
1205 | ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); | ||
1206 | ASSERT(idx < XFS_INLINE_EXTS); | ||
1207 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); | ||
1208 | ASSERT(((nextents - ext_diff) > 0) && | ||
1209 | (nextents - ext_diff) < XFS_INLINE_EXTS); | ||
1210 | |||
1211 | if (idx + ext_diff < nextents) { | ||
1212 | memmove(&ifp->if_u2.if_inline_ext[idx], | ||
1213 | &ifp->if_u2.if_inline_ext[idx + ext_diff], | ||
1214 | (nextents - (idx + ext_diff)) * | ||
1215 | sizeof(xfs_bmbt_rec_t)); | ||
1216 | memset(&ifp->if_u2.if_inline_ext[nextents - ext_diff], | ||
1217 | 0, ext_diff * sizeof(xfs_bmbt_rec_t)); | ||
1218 | } else { | ||
1219 | memset(&ifp->if_u2.if_inline_ext[idx], 0, | ||
1220 | ext_diff * sizeof(xfs_bmbt_rec_t)); | ||
1221 | } | ||
1222 | } | ||
1223 | |||
1224 | /* | ||
1225 | * This removes ext_diff extents from a linear (direct) extent list, | ||
1226 | * beginning at extent index idx. If the extents are being removed | ||
1227 | * from the end of the list (ie. truncate) then we just need to re- | ||
1228 | * allocate the list to remove the extra space. Otherwise, if the | ||
1229 | * extents are being removed from the middle of the existing extent | ||
1230 | * entries, then we first need to move the extent records beginning | ||
1231 | * at idx + ext_diff up in the list to overwrite the records being | ||
1232 | * removed, then remove the extra space via kmem_realloc. | ||
1233 | */ | ||
1234 | void | ||
1235 | xfs_iext_remove_direct( | ||
1236 | xfs_ifork_t *ifp, /* inode fork pointer */ | ||
1237 | xfs_extnum_t idx, /* index to begin removing exts */ | ||
1238 | int ext_diff) /* number of extents to remove */ | ||
1239 | { | ||
1240 | xfs_extnum_t nextents; /* number of extents in file */ | ||
1241 | int new_size; /* size of extents after removal */ | ||
1242 | |||
1243 | ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); | ||
1244 | new_size = ifp->if_bytes - | ||
1245 | (ext_diff * sizeof(xfs_bmbt_rec_t)); | ||
1246 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); | ||
1247 | |||
1248 | if (new_size == 0) { | ||
1249 | xfs_iext_destroy(ifp); | ||
1250 | return; | ||
1251 | } | ||
1252 | /* Move extents up in the list (if needed) */ | ||
1253 | if (idx + ext_diff < nextents) { | ||
1254 | memmove(&ifp->if_u1.if_extents[idx], | ||
1255 | &ifp->if_u1.if_extents[idx + ext_diff], | ||
1256 | (nextents - (idx + ext_diff)) * | ||
1257 | sizeof(xfs_bmbt_rec_t)); | ||
1258 | } | ||
1259 | memset(&ifp->if_u1.if_extents[nextents - ext_diff], | ||
1260 | 0, ext_diff * sizeof(xfs_bmbt_rec_t)); | ||
1261 | /* | ||
1262 | * Reallocate the direct extent list. If the extents | ||
1263 | * will fit inside the inode then xfs_iext_realloc_direct | ||
1264 | * will switch from direct to inline extent allocation | ||
1265 | * mode for us. | ||
1266 | */ | ||
1267 | xfs_iext_realloc_direct(ifp, new_size); | ||
1268 | ifp->if_bytes = new_size; | ||
1269 | } | ||
1270 | |||
1271 | /* | ||
1272 | * This is called when incore extents are being removed from the | ||
1273 | * indirection array and the extents being removed span multiple extent | ||
1274 | * buffers. The idx parameter contains the file extent index where we | ||
1275 | * want to begin removing extents, and the count parameter contains | ||
1276 | * how many extents need to be removed. | ||
1277 | * | ||
1278 | * |-------| |-------| | ||
1279 | * | nex1 | | | nex1 - number of extents before idx | ||
1280 | * |-------| | count | | ||
1281 | * | | | | count - number of extents being removed at idx | ||
1282 | * | count | |-------| | ||
1283 | * | | | nex2 | nex2 - number of extents after idx + count | ||
1284 | * |-------| |-------| | ||
1285 | */ | ||
1286 | void | ||
1287 | xfs_iext_remove_indirect( | ||
1288 | xfs_ifork_t *ifp, /* inode fork pointer */ | ||
1289 | xfs_extnum_t idx, /* index to begin removing extents */ | ||
1290 | int count) /* number of extents to remove */ | ||
1291 | { | ||
1292 | xfs_ext_irec_t *erp; /* indirection array pointer */ | ||
1293 | int erp_idx = 0; /* indirection array index */ | ||
1294 | xfs_extnum_t ext_cnt; /* extents left to remove */ | ||
1295 | xfs_extnum_t ext_diff; /* extents to remove in current list */ | ||
1296 | xfs_extnum_t nex1; /* number of extents before idx */ | ||
1297 | xfs_extnum_t nex2; /* extents after idx + count */ | ||
1298 | int page_idx = idx; /* index in target extent list */ | ||
1299 | |||
1300 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); | ||
1301 | erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0); | ||
1302 | ASSERT(erp != NULL); | ||
1303 | nex1 = page_idx; | ||
1304 | ext_cnt = count; | ||
1305 | while (ext_cnt) { | ||
1306 | nex2 = MAX((erp->er_extcount - (nex1 + ext_cnt)), 0); | ||
1307 | ext_diff = MIN(ext_cnt, (erp->er_extcount - nex1)); | ||
1308 | /* | ||
1309 | * Check for deletion of entire list; | ||
1310 | * xfs_iext_irec_remove() updates extent offsets. | ||
1311 | */ | ||
1312 | if (ext_diff == erp->er_extcount) { | ||
1313 | xfs_iext_irec_remove(ifp, erp_idx); | ||
1314 | ext_cnt -= ext_diff; | ||
1315 | nex1 = 0; | ||
1316 | if (ext_cnt) { | ||
1317 | ASSERT(erp_idx < ifp->if_real_bytes / | ||
1318 | XFS_IEXT_BUFSZ); | ||
1319 | erp = &ifp->if_u1.if_ext_irec[erp_idx]; | ||
1320 | nex1 = 0; | ||
1321 | continue; | ||
1322 | } else { | ||
1323 | break; | ||
1324 | } | ||
1325 | } | ||
1326 | /* Move extents up (if needed) */ | ||
1327 | if (nex2) { | ||
1328 | memmove(&erp->er_extbuf[nex1], | ||
1329 | &erp->er_extbuf[nex1 + ext_diff], | ||
1330 | nex2 * sizeof(xfs_bmbt_rec_t)); | ||
1331 | } | ||
1332 | /* Zero out rest of page */ | ||
1333 | memset(&erp->er_extbuf[nex1 + nex2], 0, (XFS_IEXT_BUFSZ - | ||
1334 | ((nex1 + nex2) * sizeof(xfs_bmbt_rec_t)))); | ||
1335 | /* Update remaining counters */ | ||
1336 | erp->er_extcount -= ext_diff; | ||
1337 | xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -ext_diff); | ||
1338 | ext_cnt -= ext_diff; | ||
1339 | nex1 = 0; | ||
1340 | erp_idx++; | ||
1341 | erp++; | ||
1342 | } | ||
1343 | ifp->if_bytes -= count * sizeof(xfs_bmbt_rec_t); | ||
1344 | xfs_iext_irec_compact(ifp); | ||
1345 | } | ||
1346 | |||
1347 | /* | ||
1348 | * Create, destroy, or resize a linear (direct) block of extents. | ||
1349 | */ | ||
1350 | void | ||
1351 | xfs_iext_realloc_direct( | ||
1352 | xfs_ifork_t *ifp, /* inode fork pointer */ | ||
1353 | int new_size) /* new size of extents after adding */ | ||
1354 | { | ||
1355 | int rnew_size; /* real new size of extents */ | ||
1356 | |||
1357 | rnew_size = new_size; | ||
1358 | |||
1359 | ASSERT(!(ifp->if_flags & XFS_IFEXTIREC) || | ||
1360 | ((new_size >= 0) && (new_size <= XFS_IEXT_BUFSZ) && | ||
1361 | (new_size != ifp->if_real_bytes))); | ||
1362 | |||
1363 | /* Free extent records */ | ||
1364 | if (new_size == 0) { | ||
1365 | xfs_iext_destroy(ifp); | ||
1366 | } | ||
1367 | /* Resize direct extent list and zero any new bytes */ | ||
1368 | else if (ifp->if_real_bytes) { | ||
1369 | /* Check if extents will fit inside the inode */ | ||
1370 | if (new_size <= XFS_INLINE_EXTS * sizeof(xfs_bmbt_rec_t)) { | ||
1371 | xfs_iext_direct_to_inline(ifp, new_size / | ||
1372 | (uint)sizeof(xfs_bmbt_rec_t)); | ||
1373 | ifp->if_bytes = new_size; | ||
1374 | return; | ||
1375 | } | ||
1376 | if (!is_power_of_2(new_size)){ | ||
1377 | rnew_size = roundup_pow_of_two(new_size); | ||
1378 | } | ||
1379 | if (rnew_size != ifp->if_real_bytes) { | ||
1380 | ifp->if_u1.if_extents = | ||
1381 | kmem_realloc(ifp->if_u1.if_extents, | ||
1382 | rnew_size, | ||
1383 | ifp->if_real_bytes, KM_NOFS); | ||
1384 | } | ||
1385 | if (rnew_size > ifp->if_real_bytes) { | ||
1386 | memset(&ifp->if_u1.if_extents[ifp->if_bytes / | ||
1387 | (uint)sizeof(xfs_bmbt_rec_t)], 0, | ||
1388 | rnew_size - ifp->if_real_bytes); | ||
1389 | } | ||
1390 | } | ||
1391 | /* Switch from the inline extent buffer to a direct extent list */ | ||
1392 | else { | ||
1393 | if (!is_power_of_2(new_size)) { | ||
1394 | rnew_size = roundup_pow_of_two(new_size); | ||
1395 | } | ||
1396 | xfs_iext_inline_to_direct(ifp, rnew_size); | ||
1397 | } | ||
1398 | ifp->if_real_bytes = rnew_size; | ||
1399 | ifp->if_bytes = new_size; | ||
1400 | } | ||
1401 | |||
1402 | /* | ||
1403 | * Switch from linear (direct) extent records to inline buffer. | ||
1404 | */ | ||
1405 | void | ||
1406 | xfs_iext_direct_to_inline( | ||
1407 | xfs_ifork_t *ifp, /* inode fork pointer */ | ||
1408 | xfs_extnum_t nextents) /* number of extents in file */ | ||
1409 | { | ||
1410 | ASSERT(ifp->if_flags & XFS_IFEXTENTS); | ||
1411 | ASSERT(nextents <= XFS_INLINE_EXTS); | ||
1412 | /* | ||
1413 | * The inline buffer was zeroed when we switched | ||
1414 | * from inline to direct extent allocation mode, | ||
1415 | * so we don't need to clear it here. | ||
1416 | */ | ||
1417 | memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents, | ||
1418 | nextents * sizeof(xfs_bmbt_rec_t)); | ||
1419 | kmem_free(ifp->if_u1.if_extents); | ||
1420 | ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; | ||
1421 | ifp->if_real_bytes = 0; | ||
1422 | } | ||
1423 | |||
1424 | /* | ||
1425 | * Switch from inline buffer to linear (direct) extent records. | ||
1426 | * new_size should already be rounded up to the next power of 2 | ||
1427 | * by the caller (when appropriate), so use new_size as it is. | ||
1428 | * However, since new_size may be rounded up, we can't update | ||
1429 | * if_bytes here. It is the caller's responsibility to update | ||
1430 | * if_bytes upon return. | ||
1431 | */ | ||
1432 | void | ||
1433 | xfs_iext_inline_to_direct( | ||
1434 | xfs_ifork_t *ifp, /* inode fork pointer */ | ||
1435 | int new_size) /* number of extents in file */ | ||
1436 | { | ||
1437 | ifp->if_u1.if_extents = kmem_alloc(new_size, KM_NOFS); | ||
1438 | memset(ifp->if_u1.if_extents, 0, new_size); | ||
1439 | if (ifp->if_bytes) { | ||
1440 | memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext, | ||
1441 | ifp->if_bytes); | ||
1442 | memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS * | ||
1443 | sizeof(xfs_bmbt_rec_t)); | ||
1444 | } | ||
1445 | ifp->if_real_bytes = new_size; | ||
1446 | } | ||
1447 | |||
1448 | /* | ||
1449 | * Resize an extent indirection array to new_size bytes. | ||
1450 | */ | ||
1451 | STATIC void | ||
1452 | xfs_iext_realloc_indirect( | ||
1453 | xfs_ifork_t *ifp, /* inode fork pointer */ | ||
1454 | int new_size) /* new indirection array size */ | ||
1455 | { | ||
1456 | int nlists; /* number of irec's (ex lists) */ | ||
1457 | int size; /* current indirection array size */ | ||
1458 | |||
1459 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); | ||
1460 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; | ||
1461 | size = nlists * sizeof(xfs_ext_irec_t); | ||
1462 | ASSERT(ifp->if_real_bytes); | ||
1463 | ASSERT((new_size >= 0) && (new_size != size)); | ||
1464 | if (new_size == 0) { | ||
1465 | xfs_iext_destroy(ifp); | ||
1466 | } else { | ||
1467 | ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *) | ||
1468 | kmem_realloc(ifp->if_u1.if_ext_irec, | ||
1469 | new_size, size, KM_NOFS); | ||
1470 | } | ||
1471 | } | ||
1472 | |||
1473 | /* | ||
1474 | * Switch from indirection array to linear (direct) extent allocations. | ||
1475 | */ | ||
1476 | STATIC void | ||
1477 | xfs_iext_indirect_to_direct( | ||
1478 | xfs_ifork_t *ifp) /* inode fork pointer */ | ||
1479 | { | ||
1480 | xfs_bmbt_rec_host_t *ep; /* extent record pointer */ | ||
1481 | xfs_extnum_t nextents; /* number of extents in file */ | ||
1482 | int size; /* size of file extents */ | ||
1483 | |||
1484 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); | ||
1485 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); | ||
1486 | ASSERT(nextents <= XFS_LINEAR_EXTS); | ||
1487 | size = nextents * sizeof(xfs_bmbt_rec_t); | ||
1488 | |||
1489 | xfs_iext_irec_compact_pages(ifp); | ||
1490 | ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ); | ||
1491 | |||
1492 | ep = ifp->if_u1.if_ext_irec->er_extbuf; | ||
1493 | kmem_free(ifp->if_u1.if_ext_irec); | ||
1494 | ifp->if_flags &= ~XFS_IFEXTIREC; | ||
1495 | ifp->if_u1.if_extents = ep; | ||
1496 | ifp->if_bytes = size; | ||
1497 | if (nextents < XFS_LINEAR_EXTS) { | ||
1498 | xfs_iext_realloc_direct(ifp, size); | ||
1499 | } | ||
1500 | } | ||
1501 | |||
1502 | /* | ||
1503 | * Free incore file extents. | ||
1504 | */ | ||
1505 | void | ||
1506 | xfs_iext_destroy( | ||
1507 | xfs_ifork_t *ifp) /* inode fork pointer */ | ||
1508 | { | ||
1509 | if (ifp->if_flags & XFS_IFEXTIREC) { | ||
1510 | int erp_idx; | ||
1511 | int nlists; | ||
1512 | |||
1513 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; | ||
1514 | for (erp_idx = nlists - 1; erp_idx >= 0 ; erp_idx--) { | ||
1515 | xfs_iext_irec_remove(ifp, erp_idx); | ||
1516 | } | ||
1517 | ifp->if_flags &= ~XFS_IFEXTIREC; | ||
1518 | } else if (ifp->if_real_bytes) { | ||
1519 | kmem_free(ifp->if_u1.if_extents); | ||
1520 | } else if (ifp->if_bytes) { | ||
1521 | memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS * | ||
1522 | sizeof(xfs_bmbt_rec_t)); | ||
1523 | } | ||
1524 | ifp->if_u1.if_extents = NULL; | ||
1525 | ifp->if_real_bytes = 0; | ||
1526 | ifp->if_bytes = 0; | ||
1527 | } | ||
1528 | |||
1529 | /* | ||
1530 | * Return a pointer to the extent record for file system block bno. | ||
1531 | */ | ||
1532 | xfs_bmbt_rec_host_t * /* pointer to found extent record */ | ||
1533 | xfs_iext_bno_to_ext( | ||
1534 | xfs_ifork_t *ifp, /* inode fork pointer */ | ||
1535 | xfs_fileoff_t bno, /* block number to search for */ | ||
1536 | xfs_extnum_t *idxp) /* index of target extent */ | ||
1537 | { | ||
1538 | xfs_bmbt_rec_host_t *base; /* pointer to first extent */ | ||
1539 | xfs_filblks_t blockcount = 0; /* number of blocks in extent */ | ||
1540 | xfs_bmbt_rec_host_t *ep = NULL; /* pointer to target extent */ | ||
1541 | xfs_ext_irec_t *erp = NULL; /* indirection array pointer */ | ||
1542 | int high; /* upper boundary in search */ | ||
1543 | xfs_extnum_t idx = 0; /* index of target extent */ | ||
1544 | int low; /* lower boundary in search */ | ||
1545 | xfs_extnum_t nextents; /* number of file extents */ | ||
1546 | xfs_fileoff_t startoff = 0; /* start offset of extent */ | ||
1547 | |||
1548 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); | ||
1549 | if (nextents == 0) { | ||
1550 | *idxp = 0; | ||
1551 | return NULL; | ||
1552 | } | ||
1553 | low = 0; | ||
1554 | if (ifp->if_flags & XFS_IFEXTIREC) { | ||
1555 | /* Find target extent list */ | ||
1556 | int erp_idx = 0; | ||
1557 | erp = xfs_iext_bno_to_irec(ifp, bno, &erp_idx); | ||
1558 | base = erp->er_extbuf; | ||
1559 | high = erp->er_extcount - 1; | ||
1560 | } else { | ||
1561 | base = ifp->if_u1.if_extents; | ||
1562 | high = nextents - 1; | ||
1563 | } | ||
1564 | /* Binary search extent records */ | ||
1565 | while (low <= high) { | ||
1566 | idx = (low + high) >> 1; | ||
1567 | ep = base + idx; | ||
1568 | startoff = xfs_bmbt_get_startoff(ep); | ||
1569 | blockcount = xfs_bmbt_get_blockcount(ep); | ||
1570 | if (bno < startoff) { | ||
1571 | high = idx - 1; | ||
1572 | } else if (bno >= startoff + blockcount) { | ||
1573 | low = idx + 1; | ||
1574 | } else { | ||
1575 | /* Convert back to file-based extent index */ | ||
1576 | if (ifp->if_flags & XFS_IFEXTIREC) { | ||
1577 | idx += erp->er_extoff; | ||
1578 | } | ||
1579 | *idxp = idx; | ||
1580 | return ep; | ||
1581 | } | ||
1582 | } | ||
1583 | /* Convert back to file-based extent index */ | ||
1584 | if (ifp->if_flags & XFS_IFEXTIREC) { | ||
1585 | idx += erp->er_extoff; | ||
1586 | } | ||
1587 | if (bno >= startoff + blockcount) { | ||
1588 | if (++idx == nextents) { | ||
1589 | ep = NULL; | ||
1590 | } else { | ||
1591 | ep = xfs_iext_get_ext(ifp, idx); | ||
1592 | } | ||
1593 | } | ||
1594 | *idxp = idx; | ||
1595 | return ep; | ||
1596 | } | ||
1597 | |||
1598 | /* | ||
1599 | * Return a pointer to the indirection array entry containing the | ||
1600 | * extent record for filesystem block bno. Store the index of the | ||
1601 | * target irec in *erp_idxp. | ||
1602 | */ | ||
1603 | xfs_ext_irec_t * /* pointer to found extent record */ | ||
1604 | xfs_iext_bno_to_irec( | ||
1605 | xfs_ifork_t *ifp, /* inode fork pointer */ | ||
1606 | xfs_fileoff_t bno, /* block number to search for */ | ||
1607 | int *erp_idxp) /* irec index of target ext list */ | ||
1608 | { | ||
1609 | xfs_ext_irec_t *erp = NULL; /* indirection array pointer */ | ||
1610 | xfs_ext_irec_t *erp_next; /* next indirection array entry */ | ||
1611 | int erp_idx; /* indirection array index */ | ||
1612 | int nlists; /* number of extent irec's (lists) */ | ||
1613 | int high; /* binary search upper limit */ | ||
1614 | int low; /* binary search lower limit */ | ||
1615 | |||
1616 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); | ||
1617 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; | ||
1618 | erp_idx = 0; | ||
1619 | low = 0; | ||
1620 | high = nlists - 1; | ||
1621 | while (low <= high) { | ||
1622 | erp_idx = (low + high) >> 1; | ||
1623 | erp = &ifp->if_u1.if_ext_irec[erp_idx]; | ||
1624 | erp_next = erp_idx < nlists - 1 ? erp + 1 : NULL; | ||
1625 | if (bno < xfs_bmbt_get_startoff(erp->er_extbuf)) { | ||
1626 | high = erp_idx - 1; | ||
1627 | } else if (erp_next && bno >= | ||
1628 | xfs_bmbt_get_startoff(erp_next->er_extbuf)) { | ||
1629 | low = erp_idx + 1; | ||
1630 | } else { | ||
1631 | break; | ||
1632 | } | ||
1633 | } | ||
1634 | *erp_idxp = erp_idx; | ||
1635 | return erp; | ||
1636 | } | ||
1637 | |||
1638 | /* | ||
1639 | * Return a pointer to the indirection array entry containing the | ||
1640 | * extent record at file extent index *idxp. Store the index of the | ||
1641 | * target irec in *erp_idxp and store the page index of the target | ||
1642 | * extent record in *idxp. | ||
1643 | */ | ||
1644 | xfs_ext_irec_t * | ||
1645 | xfs_iext_idx_to_irec( | ||
1646 | xfs_ifork_t *ifp, /* inode fork pointer */ | ||
1647 | xfs_extnum_t *idxp, /* extent index (file -> page) */ | ||
1648 | int *erp_idxp, /* pointer to target irec */ | ||
1649 | int realloc) /* new bytes were just added */ | ||
1650 | { | ||
1651 | xfs_ext_irec_t *prev; /* pointer to previous irec */ | ||
1652 | xfs_ext_irec_t *erp = NULL; /* pointer to current irec */ | ||
1653 | int erp_idx; /* indirection array index */ | ||
1654 | int nlists; /* number of irec's (ex lists) */ | ||
1655 | int high; /* binary search upper limit */ | ||
1656 | int low; /* binary search lower limit */ | ||
1657 | xfs_extnum_t page_idx = *idxp; /* extent index in target list */ | ||
1658 | |||
1659 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); | ||
1660 | ASSERT(page_idx >= 0); | ||
1661 | ASSERT(page_idx <= ifp->if_bytes / sizeof(xfs_bmbt_rec_t)); | ||
1662 | ASSERT(page_idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t) || realloc); | ||
1663 | |||
1664 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; | ||
1665 | erp_idx = 0; | ||
1666 | low = 0; | ||
1667 | high = nlists - 1; | ||
1668 | |||
1669 | /* Binary search extent irec's */ | ||
1670 | while (low <= high) { | ||
1671 | erp_idx = (low + high) >> 1; | ||
1672 | erp = &ifp->if_u1.if_ext_irec[erp_idx]; | ||
1673 | prev = erp_idx > 0 ? erp - 1 : NULL; | ||
1674 | if (page_idx < erp->er_extoff || (page_idx == erp->er_extoff && | ||
1675 | realloc && prev && prev->er_extcount < XFS_LINEAR_EXTS)) { | ||
1676 | high = erp_idx - 1; | ||
1677 | } else if (page_idx > erp->er_extoff + erp->er_extcount || | ||
1678 | (page_idx == erp->er_extoff + erp->er_extcount && | ||
1679 | !realloc)) { | ||
1680 | low = erp_idx + 1; | ||
1681 | } else if (page_idx == erp->er_extoff + erp->er_extcount && | ||
1682 | erp->er_extcount == XFS_LINEAR_EXTS) { | ||
1683 | ASSERT(realloc); | ||
1684 | page_idx = 0; | ||
1685 | erp_idx++; | ||
1686 | erp = erp_idx < nlists ? erp + 1 : NULL; | ||
1687 | break; | ||
1688 | } else { | ||
1689 | page_idx -= erp->er_extoff; | ||
1690 | break; | ||
1691 | } | ||
1692 | } | ||
1693 | *idxp = page_idx; | ||
1694 | *erp_idxp = erp_idx; | ||
1695 | return erp; | ||
1696 | } | ||
1697 | |||
1698 | /* | ||
1699 | * Allocate and initialize an indirection array once the space needed | ||
1700 | * for incore extents increases above XFS_IEXT_BUFSZ. | ||
1701 | */ | ||
1702 | void | ||
1703 | xfs_iext_irec_init( | ||
1704 | xfs_ifork_t *ifp) /* inode fork pointer */ | ||
1705 | { | ||
1706 | xfs_ext_irec_t *erp; /* indirection array pointer */ | ||
1707 | xfs_extnum_t nextents; /* number of extents in file */ | ||
1708 | |||
1709 | ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); | ||
1710 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); | ||
1711 | ASSERT(nextents <= XFS_LINEAR_EXTS); | ||
1712 | |||
1713 | erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS); | ||
1714 | |||
1715 | if (nextents == 0) { | ||
1716 | ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS); | ||
1717 | } else if (!ifp->if_real_bytes) { | ||
1718 | xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ); | ||
1719 | } else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) { | ||
1720 | xfs_iext_realloc_direct(ifp, XFS_IEXT_BUFSZ); | ||
1721 | } | ||
1722 | erp->er_extbuf = ifp->if_u1.if_extents; | ||
1723 | erp->er_extcount = nextents; | ||
1724 | erp->er_extoff = 0; | ||
1725 | |||
1726 | ifp->if_flags |= XFS_IFEXTIREC; | ||
1727 | ifp->if_real_bytes = XFS_IEXT_BUFSZ; | ||
1728 | ifp->if_bytes = nextents * sizeof(xfs_bmbt_rec_t); | ||
1729 | ifp->if_u1.if_ext_irec = erp; | ||
1730 | |||
1731 | return; | ||
1732 | } | ||
1733 | |||
1734 | /* | ||
1735 | * Allocate and initialize a new entry in the indirection array. | ||
1736 | */ | ||
1737 | xfs_ext_irec_t * | ||
1738 | xfs_iext_irec_new( | ||
1739 | xfs_ifork_t *ifp, /* inode fork pointer */ | ||
1740 | int erp_idx) /* index for new irec */ | ||
1741 | { | ||
1742 | xfs_ext_irec_t *erp; /* indirection array pointer */ | ||
1743 | int i; /* loop counter */ | ||
1744 | int nlists; /* number of irec's (ex lists) */ | ||
1745 | |||
1746 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); | ||
1747 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; | ||
1748 | |||
1749 | /* Resize indirection array */ | ||
1750 | xfs_iext_realloc_indirect(ifp, ++nlists * | ||
1751 | sizeof(xfs_ext_irec_t)); | ||
1752 | /* | ||
1753 | * Move records down in the array so the | ||
1754 | * new page can use erp_idx. | ||
1755 | */ | ||
1756 | erp = ifp->if_u1.if_ext_irec; | ||
1757 | for (i = nlists - 1; i > erp_idx; i--) { | ||
1758 | memmove(&erp[i], &erp[i-1], sizeof(xfs_ext_irec_t)); | ||
1759 | } | ||
1760 | ASSERT(i == erp_idx); | ||
1761 | |||
1762 | /* Initialize new extent record */ | ||
1763 | erp = ifp->if_u1.if_ext_irec; | ||
1764 | erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS); | ||
1765 | ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ; | ||
1766 | memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ); | ||
1767 | erp[erp_idx].er_extcount = 0; | ||
1768 | erp[erp_idx].er_extoff = erp_idx > 0 ? | ||
1769 | erp[erp_idx-1].er_extoff + erp[erp_idx-1].er_extcount : 0; | ||
1770 | return (&erp[erp_idx]); | ||
1771 | } | ||
1772 | |||
1773 | /* | ||
1774 | * Remove a record from the indirection array. | ||
1775 | */ | ||
1776 | void | ||
1777 | xfs_iext_irec_remove( | ||
1778 | xfs_ifork_t *ifp, /* inode fork pointer */ | ||
1779 | int erp_idx) /* irec index to remove */ | ||
1780 | { | ||
1781 | xfs_ext_irec_t *erp; /* indirection array pointer */ | ||
1782 | int i; /* loop counter */ | ||
1783 | int nlists; /* number of irec's (ex lists) */ | ||
1784 | |||
1785 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); | ||
1786 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; | ||
1787 | erp = &ifp->if_u1.if_ext_irec[erp_idx]; | ||
1788 | if (erp->er_extbuf) { | ||
1789 | xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, | ||
1790 | -erp->er_extcount); | ||
1791 | kmem_free(erp->er_extbuf); | ||
1792 | } | ||
1793 | /* Compact extent records */ | ||
1794 | erp = ifp->if_u1.if_ext_irec; | ||
1795 | for (i = erp_idx; i < nlists - 1; i++) { | ||
1796 | memmove(&erp[i], &erp[i+1], sizeof(xfs_ext_irec_t)); | ||
1797 | } | ||
1798 | /* | ||
1799 | * Manually free the last extent record from the indirection | ||
1800 | * array. A call to xfs_iext_realloc_indirect() with a size | ||
1801 | * of zero would result in a call to xfs_iext_destroy() which | ||
1802 | * would in turn call this function again, creating a nasty | ||
1803 | * infinite loop. | ||
1804 | */ | ||
1805 | if (--nlists) { | ||
1806 | xfs_iext_realloc_indirect(ifp, | ||
1807 | nlists * sizeof(xfs_ext_irec_t)); | ||
1808 | } else { | ||
1809 | kmem_free(ifp->if_u1.if_ext_irec); | ||
1810 | } | ||
1811 | ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ; | ||
1812 | } | ||
1813 | |||
1814 | /* | ||
1815 | * This is called to clean up large amounts of unused memory allocated | ||
1816 | * by the indirection array. Before compacting anything though, verify | ||
1817 | * that the indirection array is still needed and switch back to the | ||
1818 | * linear extent list (or even the inline buffer) if possible. The | ||
1819 | * compaction policy is as follows: | ||
1820 | * | ||
1821 | * Full Compaction: Extents fit into a single page (or inline buffer) | ||
1822 | * Partial Compaction: Extents occupy less than 50% of allocated space | ||
1823 | * No Compaction: Extents occupy at least 50% of allocated space | ||
1824 | */ | ||
1825 | void | ||
1826 | xfs_iext_irec_compact( | ||
1827 | xfs_ifork_t *ifp) /* inode fork pointer */ | ||
1828 | { | ||
1829 | xfs_extnum_t nextents; /* number of extents in file */ | ||
1830 | int nlists; /* number of irec's (ex lists) */ | ||
1831 | |||
1832 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); | ||
1833 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; | ||
1834 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); | ||
1835 | |||
1836 | if (nextents == 0) { | ||
1837 | xfs_iext_destroy(ifp); | ||
1838 | } else if (nextents <= XFS_INLINE_EXTS) { | ||
1839 | xfs_iext_indirect_to_direct(ifp); | ||
1840 | xfs_iext_direct_to_inline(ifp, nextents); | ||
1841 | } else if (nextents <= XFS_LINEAR_EXTS) { | ||
1842 | xfs_iext_indirect_to_direct(ifp); | ||
1843 | } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) { | ||
1844 | xfs_iext_irec_compact_pages(ifp); | ||
1845 | } | ||
1846 | } | ||
1847 | |||
1848 | /* | ||
1849 | * Combine extents from neighboring extent pages. | ||
1850 | */ | ||
1851 | void | ||
1852 | xfs_iext_irec_compact_pages( | ||
1853 | xfs_ifork_t *ifp) /* inode fork pointer */ | ||
1854 | { | ||
1855 | xfs_ext_irec_t *erp, *erp_next;/* pointers to irec entries */ | ||
1856 | int erp_idx = 0; /* indirection array index */ | ||
1857 | int nlists; /* number of irec's (ex lists) */ | ||
1858 | |||
1859 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); | ||
1860 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; | ||
1861 | while (erp_idx < nlists - 1) { | ||
1862 | erp = &ifp->if_u1.if_ext_irec[erp_idx]; | ||
1863 | erp_next = erp + 1; | ||
1864 | if (erp_next->er_extcount <= | ||
1865 | (XFS_LINEAR_EXTS - erp->er_extcount)) { | ||
1866 | memcpy(&erp->er_extbuf[erp->er_extcount], | ||
1867 | erp_next->er_extbuf, erp_next->er_extcount * | ||
1868 | sizeof(xfs_bmbt_rec_t)); | ||
1869 | erp->er_extcount += erp_next->er_extcount; | ||
1870 | /* | ||
1871 | * Free page before removing extent record | ||
1872 | * so er_extoffs don't get modified in | ||
1873 | * xfs_iext_irec_remove. | ||
1874 | */ | ||
1875 | kmem_free(erp_next->er_extbuf); | ||
1876 | erp_next->er_extbuf = NULL; | ||
1877 | xfs_iext_irec_remove(ifp, erp_idx + 1); | ||
1878 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; | ||
1879 | } else { | ||
1880 | erp_idx++; | ||
1881 | } | ||
1882 | } | ||
1883 | } | ||
1884 | |||
1885 | /* | ||
1886 | * This is called to update the er_extoff field in the indirection | ||
1887 | * array when extents have been added or removed from one of the | ||
1888 | * extent lists. erp_idx contains the irec index to begin updating | ||
1889 | * at and ext_diff contains the number of extents that were added | ||
1890 | * or removed. | ||
1891 | */ | ||
1892 | void | ||
1893 | xfs_iext_irec_update_extoffs( | ||
1894 | xfs_ifork_t *ifp, /* inode fork pointer */ | ||
1895 | int erp_idx, /* irec index to update */ | ||
1896 | int ext_diff) /* number of new extents */ | ||
1897 | { | ||
1898 | int i; /* loop counter */ | ||
1899 | int nlists; /* number of irec's (ex lists */ | ||
1900 | |||
1901 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); | ||
1902 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; | ||
1903 | for (i = erp_idx; i < nlists; i++) { | ||
1904 | ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff; | ||
1905 | } | ||
1906 | } | ||
diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h new file mode 100644 index 000000000000..7d3b1ed6dcbe --- /dev/null +++ b/fs/xfs/libxfs/xfs_inode_fork.h | |||
@@ -0,0 +1,171 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_INODE_FORK_H__ | ||
19 | #define __XFS_INODE_FORK_H__ | ||
20 | |||
21 | struct xfs_inode_log_item; | ||
22 | struct xfs_dinode; | ||
23 | |||
24 | /* | ||
25 | * The following xfs_ext_irec_t struct introduces a second (top) level | ||
26 | * to the in-core extent allocation scheme. These structs are allocated | ||
27 | * in a contiguous block, creating an indirection array where each entry | ||
28 | * (irec) contains a pointer to a buffer of in-core extent records which | ||
29 | * it manages. Each extent buffer is 4k in size, since 4k is the system | ||
30 | * page size on Linux i386 and systems with larger page sizes don't seem | ||
31 | * to gain much, if anything, by using their native page size as the | ||
32 | * extent buffer size. Also, using 4k extent buffers everywhere provides | ||
33 | * a consistent interface for CXFS across different platforms. | ||
34 | * | ||
35 | * There is currently no limit on the number of irec's (extent lists) | ||
36 | * allowed, so heavily fragmented files may require an indirection array | ||
37 | * which spans multiple system pages of memory. The number of extents | ||
38 | * which would require this amount of contiguous memory is very large | ||
39 | * and should not cause problems in the foreseeable future. However, | ||
40 | * if the memory needed for the contiguous array ever becomes a problem, | ||
41 | * it is possible that a third level of indirection may be required. | ||
42 | */ | ||
43 | typedef struct xfs_ext_irec { | ||
44 | xfs_bmbt_rec_host_t *er_extbuf; /* block of extent records */ | ||
45 | xfs_extnum_t er_extoff; /* extent offset in file */ | ||
46 | xfs_extnum_t er_extcount; /* number of extents in page/block */ | ||
47 | } xfs_ext_irec_t; | ||
48 | |||
49 | /* | ||
50 | * File incore extent information, present for each of data & attr forks. | ||
51 | */ | ||
52 | #define XFS_IEXT_BUFSZ 4096 | ||
53 | #define XFS_LINEAR_EXTS (XFS_IEXT_BUFSZ / (uint)sizeof(xfs_bmbt_rec_t)) | ||
54 | #define XFS_INLINE_EXTS 2 | ||
55 | #define XFS_INLINE_DATA 32 | ||
56 | typedef struct xfs_ifork { | ||
57 | int if_bytes; /* bytes in if_u1 */ | ||
58 | int if_real_bytes; /* bytes allocated in if_u1 */ | ||
59 | struct xfs_btree_block *if_broot; /* file's incore btree root */ | ||
60 | short if_broot_bytes; /* bytes allocated for root */ | ||
61 | unsigned char if_flags; /* per-fork flags */ | ||
62 | union { | ||
63 | xfs_bmbt_rec_host_t *if_extents;/* linear map file exts */ | ||
64 | xfs_ext_irec_t *if_ext_irec; /* irec map file exts */ | ||
65 | char *if_data; /* inline file data */ | ||
66 | } if_u1; | ||
67 | union { | ||
68 | xfs_bmbt_rec_host_t if_inline_ext[XFS_INLINE_EXTS]; | ||
69 | /* very small file extents */ | ||
70 | char if_inline_data[XFS_INLINE_DATA]; | ||
71 | /* very small file data */ | ||
72 | xfs_dev_t if_rdev; /* dev number if special */ | ||
73 | uuid_t if_uuid; /* mount point value */ | ||
74 | } if_u2; | ||
75 | } xfs_ifork_t; | ||
76 | |||
77 | /* | ||
78 | * Per-fork incore inode flags. | ||
79 | */ | ||
80 | #define XFS_IFINLINE 0x01 /* Inline data is read in */ | ||
81 | #define XFS_IFEXTENTS 0x02 /* All extent pointers are read in */ | ||
82 | #define XFS_IFBROOT 0x04 /* i_broot points to the bmap b-tree root */ | ||
83 | #define XFS_IFEXTIREC 0x08 /* Indirection array of extent blocks */ | ||
84 | |||
85 | /* | ||
86 | * Fork handling. | ||
87 | */ | ||
88 | |||
89 | #define XFS_IFORK_Q(ip) ((ip)->i_d.di_forkoff != 0) | ||
90 | #define XFS_IFORK_BOFF(ip) ((int)((ip)->i_d.di_forkoff << 3)) | ||
91 | |||
92 | #define XFS_IFORK_PTR(ip,w) \ | ||
93 | ((w) == XFS_DATA_FORK ? \ | ||
94 | &(ip)->i_df : \ | ||
95 | (ip)->i_afp) | ||
96 | #define XFS_IFORK_DSIZE(ip) \ | ||
97 | (XFS_IFORK_Q(ip) ? \ | ||
98 | XFS_IFORK_BOFF(ip) : \ | ||
99 | XFS_LITINO((ip)->i_mount, (ip)->i_d.di_version)) | ||
100 | #define XFS_IFORK_ASIZE(ip) \ | ||
101 | (XFS_IFORK_Q(ip) ? \ | ||
102 | XFS_LITINO((ip)->i_mount, (ip)->i_d.di_version) - \ | ||
103 | XFS_IFORK_BOFF(ip) : \ | ||
104 | 0) | ||
105 | #define XFS_IFORK_SIZE(ip,w) \ | ||
106 | ((w) == XFS_DATA_FORK ? \ | ||
107 | XFS_IFORK_DSIZE(ip) : \ | ||
108 | XFS_IFORK_ASIZE(ip)) | ||
109 | #define XFS_IFORK_FORMAT(ip,w) \ | ||
110 | ((w) == XFS_DATA_FORK ? \ | ||
111 | (ip)->i_d.di_format : \ | ||
112 | (ip)->i_d.di_aformat) | ||
113 | #define XFS_IFORK_FMT_SET(ip,w,n) \ | ||
114 | ((w) == XFS_DATA_FORK ? \ | ||
115 | ((ip)->i_d.di_format = (n)) : \ | ||
116 | ((ip)->i_d.di_aformat = (n))) | ||
117 | #define XFS_IFORK_NEXTENTS(ip,w) \ | ||
118 | ((w) == XFS_DATA_FORK ? \ | ||
119 | (ip)->i_d.di_nextents : \ | ||
120 | (ip)->i_d.di_anextents) | ||
121 | #define XFS_IFORK_NEXT_SET(ip,w,n) \ | ||
122 | ((w) == XFS_DATA_FORK ? \ | ||
123 | ((ip)->i_d.di_nextents = (n)) : \ | ||
124 | ((ip)->i_d.di_anextents = (n))) | ||
125 | #define XFS_IFORK_MAXEXT(ip, w) \ | ||
126 | (XFS_IFORK_SIZE(ip, w) / sizeof(xfs_bmbt_rec_t)) | ||
127 | |||
128 | int xfs_iformat_fork(struct xfs_inode *, struct xfs_dinode *); | ||
129 | void xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *, | ||
130 | struct xfs_inode_log_item *, int); | ||
131 | void xfs_idestroy_fork(struct xfs_inode *, int); | ||
132 | void xfs_idata_realloc(struct xfs_inode *, int, int); | ||
133 | void xfs_iroot_realloc(struct xfs_inode *, int, int); | ||
134 | int xfs_iread_extents(struct xfs_trans *, struct xfs_inode *, int); | ||
135 | int xfs_iextents_copy(struct xfs_inode *, struct xfs_bmbt_rec *, | ||
136 | int); | ||
137 | |||
138 | struct xfs_bmbt_rec_host * | ||
139 | xfs_iext_get_ext(struct xfs_ifork *, xfs_extnum_t); | ||
140 | void xfs_iext_insert(struct xfs_inode *, xfs_extnum_t, xfs_extnum_t, | ||
141 | struct xfs_bmbt_irec *, int); | ||
142 | void xfs_iext_add(struct xfs_ifork *, xfs_extnum_t, int); | ||
143 | void xfs_iext_add_indirect_multi(struct xfs_ifork *, int, | ||
144 | xfs_extnum_t, int); | ||
145 | void xfs_iext_remove(struct xfs_inode *, xfs_extnum_t, int, int); | ||
146 | void xfs_iext_remove_inline(struct xfs_ifork *, xfs_extnum_t, int); | ||
147 | void xfs_iext_remove_direct(struct xfs_ifork *, xfs_extnum_t, int); | ||
148 | void xfs_iext_remove_indirect(struct xfs_ifork *, xfs_extnum_t, int); | ||
149 | void xfs_iext_realloc_direct(struct xfs_ifork *, int); | ||
150 | void xfs_iext_direct_to_inline(struct xfs_ifork *, xfs_extnum_t); | ||
151 | void xfs_iext_inline_to_direct(struct xfs_ifork *, int); | ||
152 | void xfs_iext_destroy(struct xfs_ifork *); | ||
153 | struct xfs_bmbt_rec_host * | ||
154 | xfs_iext_bno_to_ext(struct xfs_ifork *, xfs_fileoff_t, int *); | ||
155 | struct xfs_ext_irec * | ||
156 | xfs_iext_bno_to_irec(struct xfs_ifork *, xfs_fileoff_t, int *); | ||
157 | struct xfs_ext_irec * | ||
158 | xfs_iext_idx_to_irec(struct xfs_ifork *, xfs_extnum_t *, int *, | ||
159 | int); | ||
160 | void xfs_iext_irec_init(struct xfs_ifork *); | ||
161 | struct xfs_ext_irec * | ||
162 | xfs_iext_irec_new(struct xfs_ifork *, int); | ||
163 | void xfs_iext_irec_remove(struct xfs_ifork *, int); | ||
164 | void xfs_iext_irec_compact(struct xfs_ifork *); | ||
165 | void xfs_iext_irec_compact_pages(struct xfs_ifork *); | ||
166 | void xfs_iext_irec_compact_full(struct xfs_ifork *); | ||
167 | void xfs_iext_irec_update_extoffs(struct xfs_ifork *, int, int); | ||
168 | |||
169 | extern struct kmem_zone *xfs_ifork_zone; | ||
170 | |||
171 | #endif /* __XFS_INODE_FORK_H__ */ | ||
diff --git a/fs/xfs/libxfs/xfs_inum.h b/fs/xfs/libxfs/xfs_inum.h new file mode 100644 index 000000000000..90efdaf1706f --- /dev/null +++ b/fs/xfs/libxfs/xfs_inum.h | |||
@@ -0,0 +1,64 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_INUM_H__ | ||
19 | #define __XFS_INUM_H__ | ||
20 | |||
21 | /* | ||
22 | * Inode number format: | ||
23 | * low inopblog bits - offset in block | ||
24 | * next agblklog bits - block number in ag | ||
25 | * next agno_log bits - ag number | ||
26 | * high agno_log-agblklog-inopblog bits - 0 | ||
27 | */ | ||
28 | |||
29 | struct xfs_mount; | ||
30 | |||
31 | #define XFS_INO_MASK(k) (__uint32_t)((1ULL << (k)) - 1) | ||
32 | #define XFS_INO_OFFSET_BITS(mp) (mp)->m_sb.sb_inopblog | ||
33 | #define XFS_INO_AGBNO_BITS(mp) (mp)->m_sb.sb_agblklog | ||
34 | #define XFS_INO_AGINO_BITS(mp) (mp)->m_agino_log | ||
35 | #define XFS_INO_AGNO_BITS(mp) (mp)->m_agno_log | ||
36 | #define XFS_INO_BITS(mp) \ | ||
37 | XFS_INO_AGNO_BITS(mp) + XFS_INO_AGINO_BITS(mp) | ||
38 | #define XFS_INO_TO_AGNO(mp,i) \ | ||
39 | ((xfs_agnumber_t)((i) >> XFS_INO_AGINO_BITS(mp))) | ||
40 | #define XFS_INO_TO_AGINO(mp,i) \ | ||
41 | ((xfs_agino_t)(i) & XFS_INO_MASK(XFS_INO_AGINO_BITS(mp))) | ||
42 | #define XFS_INO_TO_AGBNO(mp,i) \ | ||
43 | (((xfs_agblock_t)(i) >> XFS_INO_OFFSET_BITS(mp)) & \ | ||
44 | XFS_INO_MASK(XFS_INO_AGBNO_BITS(mp))) | ||
45 | #define XFS_INO_TO_OFFSET(mp,i) \ | ||
46 | ((int)(i) & XFS_INO_MASK(XFS_INO_OFFSET_BITS(mp))) | ||
47 | #define XFS_INO_TO_FSB(mp,i) \ | ||
48 | XFS_AGB_TO_FSB(mp, XFS_INO_TO_AGNO(mp,i), XFS_INO_TO_AGBNO(mp,i)) | ||
49 | #define XFS_AGINO_TO_INO(mp,a,i) \ | ||
50 | (((xfs_ino_t)(a) << XFS_INO_AGINO_BITS(mp)) | (i)) | ||
51 | #define XFS_AGINO_TO_AGBNO(mp,i) ((i) >> XFS_INO_OFFSET_BITS(mp)) | ||
52 | #define XFS_AGINO_TO_OFFSET(mp,i) \ | ||
53 | ((i) & XFS_INO_MASK(XFS_INO_OFFSET_BITS(mp))) | ||
54 | #define XFS_OFFBNO_TO_AGINO(mp,b,o) \ | ||
55 | ((xfs_agino_t)(((b) << XFS_INO_OFFSET_BITS(mp)) | (o))) | ||
56 | |||
57 | #if XFS_BIG_INUMS | ||
58 | #define XFS_MAXINUMBER ((xfs_ino_t)((1ULL << 56) - 1ULL)) | ||
59 | #else | ||
60 | #define XFS_MAXINUMBER ((xfs_ino_t)((1ULL << 32) - 1ULL)) | ||
61 | #endif | ||
62 | #define XFS_MAXINUMBER_32 ((xfs_ino_t)((1ULL << 32) - 1ULL)) | ||
63 | |||
64 | #endif /* __XFS_INUM_H__ */ | ||
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h new file mode 100644 index 000000000000..f0969c77bdbe --- /dev/null +++ b/fs/xfs/libxfs/xfs_log_format.h | |||
@@ -0,0 +1,679 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_LOG_FORMAT_H__ | ||
19 | #define __XFS_LOG_FORMAT_H__ | ||
20 | |||
21 | struct xfs_mount; | ||
22 | struct xfs_trans_res; | ||
23 | |||
24 | /* | ||
25 | * On-disk Log Format definitions. | ||
26 | * | ||
27 | * This file contains all the on-disk format definitions used within the log. It | ||
28 | * includes the physical log structure itself, as well as all the log item | ||
29 | * format structures that are written into the log and intepreted by log | ||
30 | * recovery. We start with the physical log format definitions, and then work | ||
31 | * through all the log items definitions and everything they encode into the | ||
32 | * log. | ||
33 | */ | ||
34 | typedef __uint32_t xlog_tid_t; | ||
35 | |||
36 | #define XLOG_MIN_ICLOGS 2 | ||
37 | #define XLOG_MAX_ICLOGS 8 | ||
38 | #define XLOG_HEADER_MAGIC_NUM 0xFEEDbabe /* Invalid cycle number */ | ||
39 | #define XLOG_VERSION_1 1 | ||
40 | #define XLOG_VERSION_2 2 /* Large IClogs, Log sunit */ | ||
41 | #define XLOG_VERSION_OKBITS (XLOG_VERSION_1 | XLOG_VERSION_2) | ||
42 | #define XLOG_MIN_RECORD_BSIZE (16*1024) /* eventually 32k */ | ||
43 | #define XLOG_BIG_RECORD_BSIZE (32*1024) /* 32k buffers */ | ||
44 | #define XLOG_MAX_RECORD_BSIZE (256*1024) | ||
45 | #define XLOG_HEADER_CYCLE_SIZE (32*1024) /* cycle data in header */ | ||
46 | #define XLOG_MIN_RECORD_BSHIFT 14 /* 16384 == 1 << 14 */ | ||
47 | #define XLOG_BIG_RECORD_BSHIFT 15 /* 32k == 1 << 15 */ | ||
48 | #define XLOG_MAX_RECORD_BSHIFT 18 /* 256k == 1 << 18 */ | ||
49 | #define XLOG_BTOLSUNIT(log, b) (((b)+(log)->l_mp->m_sb.sb_logsunit-1) / \ | ||
50 | (log)->l_mp->m_sb.sb_logsunit) | ||
51 | #define XLOG_LSUNITTOB(log, su) ((su) * (log)->l_mp->m_sb.sb_logsunit) | ||
52 | |||
53 | #define XLOG_HEADER_SIZE 512 | ||
54 | |||
55 | /* Minimum number of transactions that must fit in the log (defined by mkfs) */ | ||
56 | #define XFS_MIN_LOG_FACTOR 3 | ||
57 | |||
58 | #define XLOG_REC_SHIFT(log) \ | ||
59 | BTOBB(1 << (xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? \ | ||
60 | XLOG_MAX_RECORD_BSHIFT : XLOG_BIG_RECORD_BSHIFT)) | ||
61 | #define XLOG_TOTAL_REC_SHIFT(log) \ | ||
62 | BTOBB(XLOG_MAX_ICLOGS << (xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? \ | ||
63 | XLOG_MAX_RECORD_BSHIFT : XLOG_BIG_RECORD_BSHIFT)) | ||
64 | |||
65 | /* get lsn fields */ | ||
66 | #define CYCLE_LSN(lsn) ((uint)((lsn)>>32)) | ||
67 | #define BLOCK_LSN(lsn) ((uint)(lsn)) | ||
68 | |||
69 | /* this is used in a spot where we might otherwise double-endian-flip */ | ||
70 | #define CYCLE_LSN_DISK(lsn) (((__be32 *)&(lsn))[0]) | ||
71 | |||
72 | static inline xfs_lsn_t xlog_assign_lsn(uint cycle, uint block) | ||
73 | { | ||
74 | return ((xfs_lsn_t)cycle << 32) | block; | ||
75 | } | ||
76 | |||
77 | static inline uint xlog_get_cycle(char *ptr) | ||
78 | { | ||
79 | if (be32_to_cpu(*(__be32 *)ptr) == XLOG_HEADER_MAGIC_NUM) | ||
80 | return be32_to_cpu(*((__be32 *)ptr + 1)); | ||
81 | else | ||
82 | return be32_to_cpu(*(__be32 *)ptr); | ||
83 | } | ||
84 | |||
85 | /* Log Clients */ | ||
86 | #define XFS_TRANSACTION 0x69 | ||
87 | #define XFS_VOLUME 0x2 | ||
88 | #define XFS_LOG 0xaa | ||
89 | |||
90 | #define XLOG_UNMOUNT_TYPE 0x556e /* Un for Unmount */ | ||
91 | |||
92 | /* Region types for iovec's i_type */ | ||
93 | #define XLOG_REG_TYPE_BFORMAT 1 | ||
94 | #define XLOG_REG_TYPE_BCHUNK 2 | ||
95 | #define XLOG_REG_TYPE_EFI_FORMAT 3 | ||
96 | #define XLOG_REG_TYPE_EFD_FORMAT 4 | ||
97 | #define XLOG_REG_TYPE_IFORMAT 5 | ||
98 | #define XLOG_REG_TYPE_ICORE 6 | ||
99 | #define XLOG_REG_TYPE_IEXT 7 | ||
100 | #define XLOG_REG_TYPE_IBROOT 8 | ||
101 | #define XLOG_REG_TYPE_ILOCAL 9 | ||
102 | #define XLOG_REG_TYPE_IATTR_EXT 10 | ||
103 | #define XLOG_REG_TYPE_IATTR_BROOT 11 | ||
104 | #define XLOG_REG_TYPE_IATTR_LOCAL 12 | ||
105 | #define XLOG_REG_TYPE_QFORMAT 13 | ||
106 | #define XLOG_REG_TYPE_DQUOT 14 | ||
107 | #define XLOG_REG_TYPE_QUOTAOFF 15 | ||
108 | #define XLOG_REG_TYPE_LRHEADER 16 | ||
109 | #define XLOG_REG_TYPE_UNMOUNT 17 | ||
110 | #define XLOG_REG_TYPE_COMMIT 18 | ||
111 | #define XLOG_REG_TYPE_TRANSHDR 19 | ||
112 | #define XLOG_REG_TYPE_ICREATE 20 | ||
113 | #define XLOG_REG_TYPE_MAX 20 | ||
114 | |||
115 | /* | ||
116 | * Flags to log operation header | ||
117 | * | ||
118 | * The first write of a new transaction will be preceded with a start | ||
119 | * record, XLOG_START_TRANS. Once a transaction is committed, a commit | ||
120 | * record is written, XLOG_COMMIT_TRANS. If a single region can not fit into | ||
121 | * the remainder of the current active in-core log, it is split up into | ||
122 | * multiple regions. Each partial region will be marked with a | ||
123 | * XLOG_CONTINUE_TRANS until the last one, which gets marked with XLOG_END_TRANS. | ||
124 | * | ||
125 | */ | ||
126 | #define XLOG_START_TRANS 0x01 /* Start a new transaction */ | ||
127 | #define XLOG_COMMIT_TRANS 0x02 /* Commit this transaction */ | ||
128 | #define XLOG_CONTINUE_TRANS 0x04 /* Cont this trans into new region */ | ||
129 | #define XLOG_WAS_CONT_TRANS 0x08 /* Cont this trans into new region */ | ||
130 | #define XLOG_END_TRANS 0x10 /* End a continued transaction */ | ||
131 | #define XLOG_UNMOUNT_TRANS 0x20 /* Unmount a filesystem transaction */ | ||
132 | |||
133 | |||
134 | typedef struct xlog_op_header { | ||
135 | __be32 oh_tid; /* transaction id of operation : 4 b */ | ||
136 | __be32 oh_len; /* bytes in data region : 4 b */ | ||
137 | __u8 oh_clientid; /* who sent me this : 1 b */ | ||
138 | __u8 oh_flags; /* : 1 b */ | ||
139 | __u16 oh_res2; /* 32 bit align : 2 b */ | ||
140 | } xlog_op_header_t; | ||
141 | |||
142 | /* valid values for h_fmt */ | ||
143 | #define XLOG_FMT_UNKNOWN 0 | ||
144 | #define XLOG_FMT_LINUX_LE 1 | ||
145 | #define XLOG_FMT_LINUX_BE 2 | ||
146 | #define XLOG_FMT_IRIX_BE 3 | ||
147 | |||
148 | /* our fmt */ | ||
149 | #ifdef XFS_NATIVE_HOST | ||
150 | #define XLOG_FMT XLOG_FMT_LINUX_BE | ||
151 | #else | ||
152 | #define XLOG_FMT XLOG_FMT_LINUX_LE | ||
153 | #endif | ||
154 | |||
155 | typedef struct xlog_rec_header { | ||
156 | __be32 h_magicno; /* log record (LR) identifier : 4 */ | ||
157 | __be32 h_cycle; /* write cycle of log : 4 */ | ||
158 | __be32 h_version; /* LR version : 4 */ | ||
159 | __be32 h_len; /* len in bytes; should be 64-bit aligned: 4 */ | ||
160 | __be64 h_lsn; /* lsn of this LR : 8 */ | ||
161 | __be64 h_tail_lsn; /* lsn of 1st LR w/ buffers not committed: 8 */ | ||
162 | __le32 h_crc; /* crc of log record : 4 */ | ||
163 | __be32 h_prev_block; /* block number to previous LR : 4 */ | ||
164 | __be32 h_num_logops; /* number of log operations in this LR : 4 */ | ||
165 | __be32 h_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE]; | ||
166 | /* new fields */ | ||
167 | __be32 h_fmt; /* format of log record : 4 */ | ||
168 | uuid_t h_fs_uuid; /* uuid of FS : 16 */ | ||
169 | __be32 h_size; /* iclog size : 4 */ | ||
170 | } xlog_rec_header_t; | ||
171 | |||
172 | typedef struct xlog_rec_ext_header { | ||
173 | __be32 xh_cycle; /* write cycle of log : 4 */ | ||
174 | __be32 xh_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE]; /* : 256 */ | ||
175 | } xlog_rec_ext_header_t; | ||
176 | |||
177 | /* | ||
178 | * Quite misnamed, because this union lays out the actual on-disk log buffer. | ||
179 | */ | ||
180 | typedef union xlog_in_core2 { | ||
181 | xlog_rec_header_t hic_header; | ||
182 | xlog_rec_ext_header_t hic_xheader; | ||
183 | char hic_sector[XLOG_HEADER_SIZE]; | ||
184 | } xlog_in_core_2_t; | ||
185 | |||
186 | /* not an on-disk structure, but needed by log recovery in userspace */ | ||
187 | typedef struct xfs_log_iovec { | ||
188 | void *i_addr; /* beginning address of region */ | ||
189 | int i_len; /* length in bytes of region */ | ||
190 | uint i_type; /* type of region */ | ||
191 | } xfs_log_iovec_t; | ||
192 | |||
193 | |||
194 | /* | ||
195 | * Transaction Header definitions. | ||
196 | * | ||
197 | * This is the structure written in the log at the head of every transaction. It | ||
198 | * identifies the type and id of the transaction, and contains the number of | ||
199 | * items logged by the transaction so we know how many to expect during | ||
200 | * recovery. | ||
201 | * | ||
202 | * Do not change the below structure without redoing the code in | ||
203 | * xlog_recover_add_to_trans() and xlog_recover_add_to_cont_trans(). | ||
204 | */ | ||
205 | typedef struct xfs_trans_header { | ||
206 | uint th_magic; /* magic number */ | ||
207 | uint th_type; /* transaction type */ | ||
208 | __int32_t th_tid; /* transaction id (unused) */ | ||
209 | uint th_num_items; /* num items logged by trans */ | ||
210 | } xfs_trans_header_t; | ||
211 | |||
212 | #define XFS_TRANS_HEADER_MAGIC 0x5452414e /* TRAN */ | ||
213 | |||
214 | /* | ||
215 | * Log item types. | ||
216 | */ | ||
217 | #define XFS_LI_EFI 0x1236 | ||
218 | #define XFS_LI_EFD 0x1237 | ||
219 | #define XFS_LI_IUNLINK 0x1238 | ||
220 | #define XFS_LI_INODE 0x123b /* aligned ino chunks, var-size ibufs */ | ||
221 | #define XFS_LI_BUF 0x123c /* v2 bufs, variable sized inode bufs */ | ||
222 | #define XFS_LI_DQUOT 0x123d | ||
223 | #define XFS_LI_QUOTAOFF 0x123e | ||
224 | #define XFS_LI_ICREATE 0x123f | ||
225 | |||
226 | #define XFS_LI_TYPE_DESC \ | ||
227 | { XFS_LI_EFI, "XFS_LI_EFI" }, \ | ||
228 | { XFS_LI_EFD, "XFS_LI_EFD" }, \ | ||
229 | { XFS_LI_IUNLINK, "XFS_LI_IUNLINK" }, \ | ||
230 | { XFS_LI_INODE, "XFS_LI_INODE" }, \ | ||
231 | { XFS_LI_BUF, "XFS_LI_BUF" }, \ | ||
232 | { XFS_LI_DQUOT, "XFS_LI_DQUOT" }, \ | ||
233 | { XFS_LI_QUOTAOFF, "XFS_LI_QUOTAOFF" }, \ | ||
234 | { XFS_LI_ICREATE, "XFS_LI_ICREATE" } | ||
235 | |||
236 | /* | ||
237 | * Inode Log Item Format definitions. | ||
238 | * | ||
239 | * This is the structure used to lay out an inode log item in the | ||
240 | * log. The size of the inline data/extents/b-tree root to be logged | ||
241 | * (if any) is indicated in the ilf_dsize field. Changes to this structure | ||
242 | * must be added on to the end. | ||
243 | */ | ||
244 | typedef struct xfs_inode_log_format { | ||
245 | __uint16_t ilf_type; /* inode log item type */ | ||
246 | __uint16_t ilf_size; /* size of this item */ | ||
247 | __uint32_t ilf_fields; /* flags for fields logged */ | ||
248 | __uint16_t ilf_asize; /* size of attr d/ext/root */ | ||
249 | __uint16_t ilf_dsize; /* size of data/ext/root */ | ||
250 | __uint64_t ilf_ino; /* inode number */ | ||
251 | union { | ||
252 | __uint32_t ilfu_rdev; /* rdev value for dev inode*/ | ||
253 | uuid_t ilfu_uuid; /* mount point value */ | ||
254 | } ilf_u; | ||
255 | __int64_t ilf_blkno; /* blkno of inode buffer */ | ||
256 | __int32_t ilf_len; /* len of inode buffer */ | ||
257 | __int32_t ilf_boffset; /* off of inode in buffer */ | ||
258 | } xfs_inode_log_format_t; | ||
259 | |||
260 | typedef struct xfs_inode_log_format_32 { | ||
261 | __uint16_t ilf_type; /* inode log item type */ | ||
262 | __uint16_t ilf_size; /* size of this item */ | ||
263 | __uint32_t ilf_fields; /* flags for fields logged */ | ||
264 | __uint16_t ilf_asize; /* size of attr d/ext/root */ | ||
265 | __uint16_t ilf_dsize; /* size of data/ext/root */ | ||
266 | __uint64_t ilf_ino; /* inode number */ | ||
267 | union { | ||
268 | __uint32_t ilfu_rdev; /* rdev value for dev inode*/ | ||
269 | uuid_t ilfu_uuid; /* mount point value */ | ||
270 | } ilf_u; | ||
271 | __int64_t ilf_blkno; /* blkno of inode buffer */ | ||
272 | __int32_t ilf_len; /* len of inode buffer */ | ||
273 | __int32_t ilf_boffset; /* off of inode in buffer */ | ||
274 | } __attribute__((packed)) xfs_inode_log_format_32_t; | ||
275 | |||
276 | typedef struct xfs_inode_log_format_64 { | ||
277 | __uint16_t ilf_type; /* inode log item type */ | ||
278 | __uint16_t ilf_size; /* size of this item */ | ||
279 | __uint32_t ilf_fields; /* flags for fields logged */ | ||
280 | __uint16_t ilf_asize; /* size of attr d/ext/root */ | ||
281 | __uint16_t ilf_dsize; /* size of data/ext/root */ | ||
282 | __uint32_t ilf_pad; /* pad for 64 bit boundary */ | ||
283 | __uint64_t ilf_ino; /* inode number */ | ||
284 | union { | ||
285 | __uint32_t ilfu_rdev; /* rdev value for dev inode*/ | ||
286 | uuid_t ilfu_uuid; /* mount point value */ | ||
287 | } ilf_u; | ||
288 | __int64_t ilf_blkno; /* blkno of inode buffer */ | ||
289 | __int32_t ilf_len; /* len of inode buffer */ | ||
290 | __int32_t ilf_boffset; /* off of inode in buffer */ | ||
291 | } xfs_inode_log_format_64_t; | ||
292 | |||
293 | /* | ||
294 | * Flags for xfs_trans_log_inode flags field. | ||
295 | */ | ||
296 | #define XFS_ILOG_CORE 0x001 /* log standard inode fields */ | ||
297 | #define XFS_ILOG_DDATA 0x002 /* log i_df.if_data */ | ||
298 | #define XFS_ILOG_DEXT 0x004 /* log i_df.if_extents */ | ||
299 | #define XFS_ILOG_DBROOT 0x008 /* log i_df.i_broot */ | ||
300 | #define XFS_ILOG_DEV 0x010 /* log the dev field */ | ||
301 | #define XFS_ILOG_UUID 0x020 /* log the uuid field */ | ||
302 | #define XFS_ILOG_ADATA 0x040 /* log i_af.if_data */ | ||
303 | #define XFS_ILOG_AEXT 0x080 /* log i_af.if_extents */ | ||
304 | #define XFS_ILOG_ABROOT 0x100 /* log i_af.i_broot */ | ||
305 | #define XFS_ILOG_DOWNER 0x200 /* change the data fork owner on replay */ | ||
306 | #define XFS_ILOG_AOWNER 0x400 /* change the attr fork owner on replay */ | ||
307 | |||
308 | |||
309 | /* | ||
310 | * The timestamps are dirty, but not necessarily anything else in the inode | ||
311 | * core. Unlike the other fields above this one must never make it to disk | ||
312 | * in the ilf_fields of the inode_log_format, but is purely store in-memory in | ||
313 | * ili_fields in the inode_log_item. | ||
314 | */ | ||
315 | #define XFS_ILOG_TIMESTAMP 0x4000 | ||
316 | |||
317 | #define XFS_ILOG_NONCORE (XFS_ILOG_DDATA | XFS_ILOG_DEXT | \ | ||
318 | XFS_ILOG_DBROOT | XFS_ILOG_DEV | \ | ||
319 | XFS_ILOG_UUID | XFS_ILOG_ADATA | \ | ||
320 | XFS_ILOG_AEXT | XFS_ILOG_ABROOT | \ | ||
321 | XFS_ILOG_DOWNER | XFS_ILOG_AOWNER) | ||
322 | |||
323 | #define XFS_ILOG_DFORK (XFS_ILOG_DDATA | XFS_ILOG_DEXT | \ | ||
324 | XFS_ILOG_DBROOT) | ||
325 | |||
326 | #define XFS_ILOG_AFORK (XFS_ILOG_ADATA | XFS_ILOG_AEXT | \ | ||
327 | XFS_ILOG_ABROOT) | ||
328 | |||
329 | #define XFS_ILOG_ALL (XFS_ILOG_CORE | XFS_ILOG_DDATA | \ | ||
330 | XFS_ILOG_DEXT | XFS_ILOG_DBROOT | \ | ||
331 | XFS_ILOG_DEV | XFS_ILOG_UUID | \ | ||
332 | XFS_ILOG_ADATA | XFS_ILOG_AEXT | \ | ||
333 | XFS_ILOG_ABROOT | XFS_ILOG_TIMESTAMP | \ | ||
334 | XFS_ILOG_DOWNER | XFS_ILOG_AOWNER) | ||
335 | |||
336 | static inline int xfs_ilog_fbroot(int w) | ||
337 | { | ||
338 | return (w == XFS_DATA_FORK ? XFS_ILOG_DBROOT : XFS_ILOG_ABROOT); | ||
339 | } | ||
340 | |||
341 | static inline int xfs_ilog_fext(int w) | ||
342 | { | ||
343 | return (w == XFS_DATA_FORK ? XFS_ILOG_DEXT : XFS_ILOG_AEXT); | ||
344 | } | ||
345 | |||
346 | static inline int xfs_ilog_fdata(int w) | ||
347 | { | ||
348 | return (w == XFS_DATA_FORK ? XFS_ILOG_DDATA : XFS_ILOG_ADATA); | ||
349 | } | ||
350 | |||
351 | /* | ||
352 | * Incore version of the on-disk inode core structures. We log this directly | ||
353 | * into the journal in host CPU format (for better or worse) and as such | ||
354 | * directly mirrors the xfs_dinode structure as it must contain all the same | ||
355 | * information. | ||
356 | */ | ||
357 | typedef struct xfs_ictimestamp { | ||
358 | __int32_t t_sec; /* timestamp seconds */ | ||
359 | __int32_t t_nsec; /* timestamp nanoseconds */ | ||
360 | } xfs_ictimestamp_t; | ||
361 | |||
362 | /* | ||
363 | * NOTE: This structure must be kept identical to struct xfs_dinode | ||
364 | * in xfs_dinode.h except for the endianness annotations. | ||
365 | */ | ||
366 | typedef struct xfs_icdinode { | ||
367 | __uint16_t di_magic; /* inode magic # = XFS_DINODE_MAGIC */ | ||
368 | __uint16_t di_mode; /* mode and type of file */ | ||
369 | __int8_t di_version; /* inode version */ | ||
370 | __int8_t di_format; /* format of di_c data */ | ||
371 | __uint16_t di_onlink; /* old number of links to file */ | ||
372 | __uint32_t di_uid; /* owner's user id */ | ||
373 | __uint32_t di_gid; /* owner's group id */ | ||
374 | __uint32_t di_nlink; /* number of links to file */ | ||
375 | __uint16_t di_projid_lo; /* lower part of owner's project id */ | ||
376 | __uint16_t di_projid_hi; /* higher part of owner's project id */ | ||
377 | __uint8_t di_pad[6]; /* unused, zeroed space */ | ||
378 | __uint16_t di_flushiter; /* incremented on flush */ | ||
379 | xfs_ictimestamp_t di_atime; /* time last accessed */ | ||
380 | xfs_ictimestamp_t di_mtime; /* time last modified */ | ||
381 | xfs_ictimestamp_t di_ctime; /* time created/inode modified */ | ||
382 | xfs_fsize_t di_size; /* number of bytes in file */ | ||
383 | xfs_drfsbno_t di_nblocks; /* # of direct & btree blocks used */ | ||
384 | xfs_extlen_t di_extsize; /* basic/minimum extent size for file */ | ||
385 | xfs_extnum_t di_nextents; /* number of extents in data fork */ | ||
386 | xfs_aextnum_t di_anextents; /* number of extents in attribute fork*/ | ||
387 | __uint8_t di_forkoff; /* attr fork offs, <<3 for 64b align */ | ||
388 | __int8_t di_aformat; /* format of attr fork's data */ | ||
389 | __uint32_t di_dmevmask; /* DMIG event mask */ | ||
390 | __uint16_t di_dmstate; /* DMIG state info */ | ||
391 | __uint16_t di_flags; /* random flags, XFS_DIFLAG_... */ | ||
392 | __uint32_t di_gen; /* generation number */ | ||
393 | |||
394 | /* di_next_unlinked is the only non-core field in the old dinode */ | ||
395 | xfs_agino_t di_next_unlinked;/* agi unlinked list ptr */ | ||
396 | |||
397 | /* start of the extended dinode, writable fields */ | ||
398 | __uint32_t di_crc; /* CRC of the inode */ | ||
399 | __uint64_t di_changecount; /* number of attribute changes */ | ||
400 | xfs_lsn_t di_lsn; /* flush sequence */ | ||
401 | __uint64_t di_flags2; /* more random flags */ | ||
402 | __uint8_t di_pad2[16]; /* more padding for future expansion */ | ||
403 | |||
404 | /* fields only written to during inode creation */ | ||
405 | xfs_ictimestamp_t di_crtime; /* time created */ | ||
406 | xfs_ino_t di_ino; /* inode number */ | ||
407 | uuid_t di_uuid; /* UUID of the filesystem */ | ||
408 | |||
409 | /* structure must be padded to 64 bit alignment */ | ||
410 | } xfs_icdinode_t; | ||
411 | |||
412 | static inline uint xfs_icdinode_size(int version) | ||
413 | { | ||
414 | if (version == 3) | ||
415 | return sizeof(struct xfs_icdinode); | ||
416 | return offsetof(struct xfs_icdinode, di_next_unlinked); | ||
417 | } | ||
418 | |||
419 | /* | ||
420 | * Buffer Log Format defintions | ||
421 | * | ||
422 | * These are the physical dirty bitmap defintions for the log format structure. | ||
423 | */ | ||
424 | #define XFS_BLF_CHUNK 128 | ||
425 | #define XFS_BLF_SHIFT 7 | ||
426 | #define BIT_TO_WORD_SHIFT 5 | ||
427 | #define NBWORD (NBBY * sizeof(unsigned int)) | ||
428 | |||
429 | /* | ||
430 | * This flag indicates that the buffer contains on disk inodes | ||
431 | * and requires special recovery handling. | ||
432 | */ | ||
433 | #define XFS_BLF_INODE_BUF (1<<0) | ||
434 | |||
435 | /* | ||
436 | * This flag indicates that the buffer should not be replayed | ||
437 | * during recovery because its blocks are being freed. | ||
438 | */ | ||
439 | #define XFS_BLF_CANCEL (1<<1) | ||
440 | |||
441 | /* | ||
442 | * This flag indicates that the buffer contains on disk | ||
443 | * user or group dquots and may require special recovery handling. | ||
444 | */ | ||
445 | #define XFS_BLF_UDQUOT_BUF (1<<2) | ||
446 | #define XFS_BLF_PDQUOT_BUF (1<<3) | ||
447 | #define XFS_BLF_GDQUOT_BUF (1<<4) | ||
448 | |||
449 | /* | ||
450 | * This is the structure used to lay out a buf log item in the | ||
451 | * log. The data map describes which 128 byte chunks of the buffer | ||
452 | * have been logged. | ||
453 | */ | ||
454 | #define XFS_BLF_DATAMAP_SIZE ((XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK) / NBWORD) | ||
455 | |||
456 | typedef struct xfs_buf_log_format { | ||
457 | unsigned short blf_type; /* buf log item type indicator */ | ||
458 | unsigned short blf_size; /* size of this item */ | ||
459 | ushort blf_flags; /* misc state */ | ||
460 | ushort blf_len; /* number of blocks in this buf */ | ||
461 | __int64_t blf_blkno; /* starting blkno of this buf */ | ||
462 | unsigned int blf_map_size; /* used size of data bitmap in words */ | ||
463 | unsigned int blf_data_map[XFS_BLF_DATAMAP_SIZE]; /* dirty bitmap */ | ||
464 | } xfs_buf_log_format_t; | ||
465 | |||
466 | /* | ||
467 | * All buffers now need to tell recovery where the magic number | ||
468 | * is so that it can verify and calculate the CRCs on the buffer correctly | ||
469 | * once the changes have been replayed into the buffer. | ||
470 | * | ||
471 | * The type value is held in the upper 5 bits of the blf_flags field, which is | ||
472 | * an unsigned 16 bit field. Hence we need to shift it 11 bits up and down. | ||
473 | */ | ||
474 | #define XFS_BLFT_BITS 5 | ||
475 | #define XFS_BLFT_SHIFT 11 | ||
476 | #define XFS_BLFT_MASK (((1 << XFS_BLFT_BITS) - 1) << XFS_BLFT_SHIFT) | ||
477 | |||
478 | enum xfs_blft { | ||
479 | XFS_BLFT_UNKNOWN_BUF = 0, | ||
480 | XFS_BLFT_UDQUOT_BUF, | ||
481 | XFS_BLFT_PDQUOT_BUF, | ||
482 | XFS_BLFT_GDQUOT_BUF, | ||
483 | XFS_BLFT_BTREE_BUF, | ||
484 | XFS_BLFT_AGF_BUF, | ||
485 | XFS_BLFT_AGFL_BUF, | ||
486 | XFS_BLFT_AGI_BUF, | ||
487 | XFS_BLFT_DINO_BUF, | ||
488 | XFS_BLFT_SYMLINK_BUF, | ||
489 | XFS_BLFT_DIR_BLOCK_BUF, | ||
490 | XFS_BLFT_DIR_DATA_BUF, | ||
491 | XFS_BLFT_DIR_FREE_BUF, | ||
492 | XFS_BLFT_DIR_LEAF1_BUF, | ||
493 | XFS_BLFT_DIR_LEAFN_BUF, | ||
494 | XFS_BLFT_DA_NODE_BUF, | ||
495 | XFS_BLFT_ATTR_LEAF_BUF, | ||
496 | XFS_BLFT_ATTR_RMT_BUF, | ||
497 | XFS_BLFT_SB_BUF, | ||
498 | XFS_BLFT_MAX_BUF = (1 << XFS_BLFT_BITS), | ||
499 | }; | ||
500 | |||
501 | static inline void | ||
502 | xfs_blft_to_flags(struct xfs_buf_log_format *blf, enum xfs_blft type) | ||
503 | { | ||
504 | ASSERT(type > XFS_BLFT_UNKNOWN_BUF && type < XFS_BLFT_MAX_BUF); | ||
505 | blf->blf_flags &= ~XFS_BLFT_MASK; | ||
506 | blf->blf_flags |= ((type << XFS_BLFT_SHIFT) & XFS_BLFT_MASK); | ||
507 | } | ||
508 | |||
509 | static inline __uint16_t | ||
510 | xfs_blft_from_flags(struct xfs_buf_log_format *blf) | ||
511 | { | ||
512 | return (blf->blf_flags & XFS_BLFT_MASK) >> XFS_BLFT_SHIFT; | ||
513 | } | ||
514 | |||
515 | /* | ||
516 | * EFI/EFD log format definitions | ||
517 | */ | ||
518 | typedef struct xfs_extent { | ||
519 | xfs_dfsbno_t ext_start; | ||
520 | xfs_extlen_t ext_len; | ||
521 | } xfs_extent_t; | ||
522 | |||
523 | /* | ||
524 | * Since an xfs_extent_t has types (start:64, len: 32) | ||
525 | * there are different alignments on 32 bit and 64 bit kernels. | ||
526 | * So we provide the different variants for use by a | ||
527 | * conversion routine. | ||
528 | */ | ||
529 | typedef struct xfs_extent_32 { | ||
530 | __uint64_t ext_start; | ||
531 | __uint32_t ext_len; | ||
532 | } __attribute__((packed)) xfs_extent_32_t; | ||
533 | |||
534 | typedef struct xfs_extent_64 { | ||
535 | __uint64_t ext_start; | ||
536 | __uint32_t ext_len; | ||
537 | __uint32_t ext_pad; | ||
538 | } xfs_extent_64_t; | ||
539 | |||
540 | /* | ||
541 | * This is the structure used to lay out an efi log item in the | ||
542 | * log. The efi_extents field is a variable size array whose | ||
543 | * size is given by efi_nextents. | ||
544 | */ | ||
545 | typedef struct xfs_efi_log_format { | ||
546 | __uint16_t efi_type; /* efi log item type */ | ||
547 | __uint16_t efi_size; /* size of this item */ | ||
548 | __uint32_t efi_nextents; /* # extents to free */ | ||
549 | __uint64_t efi_id; /* efi identifier */ | ||
550 | xfs_extent_t efi_extents[1]; /* array of extents to free */ | ||
551 | } xfs_efi_log_format_t; | ||
552 | |||
553 | typedef struct xfs_efi_log_format_32 { | ||
554 | __uint16_t efi_type; /* efi log item type */ | ||
555 | __uint16_t efi_size; /* size of this item */ | ||
556 | __uint32_t efi_nextents; /* # extents to free */ | ||
557 | __uint64_t efi_id; /* efi identifier */ | ||
558 | xfs_extent_32_t efi_extents[1]; /* array of extents to free */ | ||
559 | } __attribute__((packed)) xfs_efi_log_format_32_t; | ||
560 | |||
561 | typedef struct xfs_efi_log_format_64 { | ||
562 | __uint16_t efi_type; /* efi log item type */ | ||
563 | __uint16_t efi_size; /* size of this item */ | ||
564 | __uint32_t efi_nextents; /* # extents to free */ | ||
565 | __uint64_t efi_id; /* efi identifier */ | ||
566 | xfs_extent_64_t efi_extents[1]; /* array of extents to free */ | ||
567 | } xfs_efi_log_format_64_t; | ||
568 | |||
569 | /* | ||
570 | * This is the structure used to lay out an efd log item in the | ||
571 | * log. The efd_extents array is a variable size array whose | ||
572 | * size is given by efd_nextents; | ||
573 | */ | ||
574 | typedef struct xfs_efd_log_format { | ||
575 | __uint16_t efd_type; /* efd log item type */ | ||
576 | __uint16_t efd_size; /* size of this item */ | ||
577 | __uint32_t efd_nextents; /* # of extents freed */ | ||
578 | __uint64_t efd_efi_id; /* id of corresponding efi */ | ||
579 | xfs_extent_t efd_extents[1]; /* array of extents freed */ | ||
580 | } xfs_efd_log_format_t; | ||
581 | |||
582 | typedef struct xfs_efd_log_format_32 { | ||
583 | __uint16_t efd_type; /* efd log item type */ | ||
584 | __uint16_t efd_size; /* size of this item */ | ||
585 | __uint32_t efd_nextents; /* # of extents freed */ | ||
586 | __uint64_t efd_efi_id; /* id of corresponding efi */ | ||
587 | xfs_extent_32_t efd_extents[1]; /* array of extents freed */ | ||
588 | } __attribute__((packed)) xfs_efd_log_format_32_t; | ||
589 | |||
590 | typedef struct xfs_efd_log_format_64 { | ||
591 | __uint16_t efd_type; /* efd log item type */ | ||
592 | __uint16_t efd_size; /* size of this item */ | ||
593 | __uint32_t efd_nextents; /* # of extents freed */ | ||
594 | __uint64_t efd_efi_id; /* id of corresponding efi */ | ||
595 | xfs_extent_64_t efd_extents[1]; /* array of extents freed */ | ||
596 | } xfs_efd_log_format_64_t; | ||
597 | |||
598 | /* | ||
599 | * Dquot Log format definitions. | ||
600 | * | ||
601 | * The first two fields must be the type and size fitting into | ||
602 | * 32 bits : log_recovery code assumes that. | ||
603 | */ | ||
604 | typedef struct xfs_dq_logformat { | ||
605 | __uint16_t qlf_type; /* dquot log item type */ | ||
606 | __uint16_t qlf_size; /* size of this item */ | ||
607 | xfs_dqid_t qlf_id; /* usr/grp/proj id : 32 bits */ | ||
608 | __int64_t qlf_blkno; /* blkno of dquot buffer */ | ||
609 | __int32_t qlf_len; /* len of dquot buffer */ | ||
610 | __uint32_t qlf_boffset; /* off of dquot in buffer */ | ||
611 | } xfs_dq_logformat_t; | ||
612 | |||
613 | /* | ||
614 | * log format struct for QUOTAOFF records. | ||
615 | * The first two fields must be the type and size fitting into | ||
616 | * 32 bits : log_recovery code assumes that. | ||
617 | * We write two LI_QUOTAOFF logitems per quotaoff, the last one keeps a pointer | ||
618 | * to the first and ensures that the first logitem is taken out of the AIL | ||
619 | * only when the last one is securely committed. | ||
620 | */ | ||
621 | typedef struct xfs_qoff_logformat { | ||
622 | unsigned short qf_type; /* quotaoff log item type */ | ||
623 | unsigned short qf_size; /* size of this item */ | ||
624 | unsigned int qf_flags; /* USR and/or GRP */ | ||
625 | char qf_pad[12]; /* padding for future */ | ||
626 | } xfs_qoff_logformat_t; | ||
627 | |||
628 | /* | ||
629 | * Disk quotas status in m_qflags, and also sb_qflags. 16 bits. | ||
630 | */ | ||
631 | #define XFS_UQUOTA_ACCT 0x0001 /* user quota accounting ON */ | ||
632 | #define XFS_UQUOTA_ENFD 0x0002 /* user quota limits enforced */ | ||
633 | #define XFS_UQUOTA_CHKD 0x0004 /* quotacheck run on usr quotas */ | ||
634 | #define XFS_PQUOTA_ACCT 0x0008 /* project quota accounting ON */ | ||
635 | #define XFS_OQUOTA_ENFD 0x0010 /* other (grp/prj) quota limits enforced */ | ||
636 | #define XFS_OQUOTA_CHKD 0x0020 /* quotacheck run on other (grp/prj) quotas */ | ||
637 | #define XFS_GQUOTA_ACCT 0x0040 /* group quota accounting ON */ | ||
638 | |||
639 | /* | ||
640 | * Conversion to and from the combined OQUOTA flag (if necessary) | ||
641 | * is done only in xfs_sb_qflags_to_disk() and xfs_sb_qflags_from_disk() | ||
642 | */ | ||
643 | #define XFS_GQUOTA_ENFD 0x0080 /* group quota limits enforced */ | ||
644 | #define XFS_GQUOTA_CHKD 0x0100 /* quotacheck run on group quotas */ | ||
645 | #define XFS_PQUOTA_ENFD 0x0200 /* project quota limits enforced */ | ||
646 | #define XFS_PQUOTA_CHKD 0x0400 /* quotacheck run on project quotas */ | ||
647 | |||
648 | #define XFS_ALL_QUOTA_ACCT \ | ||
649 | (XFS_UQUOTA_ACCT | XFS_GQUOTA_ACCT | XFS_PQUOTA_ACCT) | ||
650 | #define XFS_ALL_QUOTA_ENFD \ | ||
651 | (XFS_UQUOTA_ENFD | XFS_GQUOTA_ENFD | XFS_PQUOTA_ENFD) | ||
652 | #define XFS_ALL_QUOTA_CHKD \ | ||
653 | (XFS_UQUOTA_CHKD | XFS_GQUOTA_CHKD | XFS_PQUOTA_CHKD) | ||
654 | |||
655 | #define XFS_MOUNT_QUOTA_ALL (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\ | ||
656 | XFS_UQUOTA_CHKD|XFS_GQUOTA_ACCT|\ | ||
657 | XFS_GQUOTA_ENFD|XFS_GQUOTA_CHKD|\ | ||
658 | XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD|\ | ||
659 | XFS_PQUOTA_CHKD) | ||
660 | |||
661 | /* | ||
662 | * Inode create log item structure | ||
663 | * | ||
664 | * Log recovery assumes the first two entries are the type and size and they fit | ||
665 | * in 32 bits. Also in host order (ugh) so they have to be 32 bit aligned so | ||
666 | * decoding can be done correctly. | ||
667 | */ | ||
668 | struct xfs_icreate_log { | ||
669 | __uint16_t icl_type; /* type of log format structure */ | ||
670 | __uint16_t icl_size; /* size of log format structure */ | ||
671 | __be32 icl_ag; /* ag being allocated in */ | ||
672 | __be32 icl_agbno; /* start block of inode range */ | ||
673 | __be32 icl_count; /* number of inodes to initialise */ | ||
674 | __be32 icl_isize; /* size of inodes */ | ||
675 | __be32 icl_length; /* length of extent to initialise */ | ||
676 | __be32 icl_gen; /* inode generation number to use */ | ||
677 | }; | ||
678 | |||
679 | #endif /* __XFS_LOG_FORMAT_H__ */ | ||
diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h new file mode 100644 index 000000000000..1c55ccbb379d --- /dev/null +++ b/fs/xfs/libxfs/xfs_log_recover.h | |||
@@ -0,0 +1,66 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_LOG_RECOVER_H__ | ||
19 | #define __XFS_LOG_RECOVER_H__ | ||
20 | |||
21 | /* | ||
22 | * Macros, structures, prototypes for internal log manager use. | ||
23 | */ | ||
24 | |||
25 | #define XLOG_RHASH_BITS 4 | ||
26 | #define XLOG_RHASH_SIZE 16 | ||
27 | #define XLOG_RHASH_SHIFT 2 | ||
28 | #define XLOG_RHASH(tid) \ | ||
29 | ((((__uint32_t)tid)>>XLOG_RHASH_SHIFT) & (XLOG_RHASH_SIZE-1)) | ||
30 | |||
31 | #define XLOG_MAX_REGIONS_IN_ITEM (XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK / 2 + 1) | ||
32 | |||
33 | |||
34 | /* | ||
35 | * item headers are in ri_buf[0]. Additional buffers follow. | ||
36 | */ | ||
37 | typedef struct xlog_recover_item { | ||
38 | struct list_head ri_list; | ||
39 | int ri_type; | ||
40 | int ri_cnt; /* count of regions found */ | ||
41 | int ri_total; /* total regions */ | ||
42 | xfs_log_iovec_t *ri_buf; /* ptr to regions buffer */ | ||
43 | } xlog_recover_item_t; | ||
44 | |||
45 | struct xlog_tid; | ||
46 | typedef struct xlog_recover { | ||
47 | struct hlist_node r_list; | ||
48 | xlog_tid_t r_log_tid; /* log's transaction id */ | ||
49 | xfs_trans_header_t r_theader; /* trans header for partial */ | ||
50 | int r_state; /* not needed */ | ||
51 | xfs_lsn_t r_lsn; /* xact lsn */ | ||
52 | struct list_head r_itemq; /* q for items */ | ||
53 | } xlog_recover_t; | ||
54 | |||
55 | #define ITEM_TYPE(i) (*(ushort *)(i)->ri_buf[0].i_addr) | ||
56 | |||
57 | /* | ||
58 | * This is the number of entries in the l_buf_cancel_table used during | ||
59 | * recovery. | ||
60 | */ | ||
61 | #define XLOG_BC_TABLE_SIZE 64 | ||
62 | |||
63 | #define XLOG_RECOVER_PASS1 1 | ||
64 | #define XLOG_RECOVER_PASS2 2 | ||
65 | |||
66 | #endif /* __XFS_LOG_RECOVER_H__ */ | ||
diff --git a/fs/xfs/libxfs/xfs_log_rlimit.c b/fs/xfs/libxfs/xfs_log_rlimit.c new file mode 100644 index 000000000000..ee7e0e80246b --- /dev/null +++ b/fs/xfs/libxfs/xfs_log_rlimit.c | |||
@@ -0,0 +1,150 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2013 Jie Liu. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #include "xfs.h" | ||
19 | #include "xfs_fs.h" | ||
20 | #include "xfs_shared.h" | ||
21 | #include "xfs_format.h" | ||
22 | #include "xfs_log_format.h" | ||
23 | #include "xfs_trans_resv.h" | ||
24 | #include "xfs_ag.h" | ||
25 | #include "xfs_sb.h" | ||
26 | #include "xfs_mount.h" | ||
27 | #include "xfs_da_format.h" | ||
28 | #include "xfs_trans_space.h" | ||
29 | #include "xfs_inode.h" | ||
30 | #include "xfs_da_btree.h" | ||
31 | #include "xfs_attr_leaf.h" | ||
32 | #include "xfs_bmap_btree.h" | ||
33 | |||
34 | /* | ||
35 | * Calculate the maximum length in bytes that would be required for a local | ||
36 | * attribute value as large attributes out of line are not logged. | ||
37 | */ | ||
38 | STATIC int | ||
39 | xfs_log_calc_max_attrsetm_res( | ||
40 | struct xfs_mount *mp) | ||
41 | { | ||
42 | int size; | ||
43 | int nblks; | ||
44 | |||
45 | size = xfs_attr_leaf_entsize_local_max(mp->m_attr_geo->blksize) - | ||
46 | MAXNAMELEN - 1; | ||
47 | nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK); | ||
48 | nblks += XFS_B_TO_FSB(mp, size); | ||
49 | nblks += XFS_NEXTENTADD_SPACE_RES(mp, size, XFS_ATTR_FORK); | ||
50 | |||
51 | return M_RES(mp)->tr_attrsetm.tr_logres + | ||
52 | M_RES(mp)->tr_attrsetrt.tr_logres * nblks; | ||
53 | } | ||
54 | |||
55 | /* | ||
56 | * Iterate over the log space reservation table to figure out and return | ||
57 | * the maximum one in terms of the pre-calculated values which were done | ||
58 | * at mount time. | ||
59 | */ | ||
60 | STATIC void | ||
61 | xfs_log_get_max_trans_res( | ||
62 | struct xfs_mount *mp, | ||
63 | struct xfs_trans_res *max_resp) | ||
64 | { | ||
65 | struct xfs_trans_res *resp; | ||
66 | struct xfs_trans_res *end_resp; | ||
67 | int log_space = 0; | ||
68 | int attr_space; | ||
69 | |||
70 | attr_space = xfs_log_calc_max_attrsetm_res(mp); | ||
71 | |||
72 | resp = (struct xfs_trans_res *)M_RES(mp); | ||
73 | end_resp = (struct xfs_trans_res *)(M_RES(mp) + 1); | ||
74 | for (; resp < end_resp; resp++) { | ||
75 | int tmp = resp->tr_logcount > 1 ? | ||
76 | resp->tr_logres * resp->tr_logcount : | ||
77 | resp->tr_logres; | ||
78 | if (log_space < tmp) { | ||
79 | log_space = tmp; | ||
80 | *max_resp = *resp; /* struct copy */ | ||
81 | } | ||
82 | } | ||
83 | |||
84 | if (attr_space > log_space) { | ||
85 | *max_resp = M_RES(mp)->tr_attrsetm; /* struct copy */ | ||
86 | max_resp->tr_logres = attr_space; | ||
87 | } | ||
88 | } | ||
89 | |||
90 | /* | ||
91 | * Calculate the minimum valid log size for the given superblock configuration. | ||
92 | * Used to calculate the minimum log size at mkfs time, and to determine if | ||
93 | * the log is large enough or not at mount time. Returns the minimum size in | ||
94 | * filesystem block size units. | ||
95 | */ | ||
96 | int | ||
97 | xfs_log_calc_minimum_size( | ||
98 | struct xfs_mount *mp) | ||
99 | { | ||
100 | struct xfs_trans_res tres = {0}; | ||
101 | int max_logres; | ||
102 | int min_logblks = 0; | ||
103 | int lsunit = 0; | ||
104 | |||
105 | xfs_log_get_max_trans_res(mp, &tres); | ||
106 | |||
107 | max_logres = xfs_log_calc_unit_res(mp, tres.tr_logres); | ||
108 | if (tres.tr_logcount > 1) | ||
109 | max_logres *= tres.tr_logcount; | ||
110 | |||
111 | if (xfs_sb_version_haslogv2(&mp->m_sb) && mp->m_sb.sb_logsunit > 1) | ||
112 | lsunit = BTOBB(mp->m_sb.sb_logsunit); | ||
113 | |||
114 | /* | ||
115 | * Two factors should be taken into account for calculating the minimum | ||
116 | * log space. | ||
117 | * 1) The fundamental limitation is that no single transaction can be | ||
118 | * larger than half size of the log. | ||
119 | * | ||
120 | * From mkfs.xfs, this is considered by the XFS_MIN_LOG_FACTOR | ||
121 | * define, which is set to 3. That means we can definitely fit | ||
122 | * maximally sized 2 transactions in the log. We'll use this same | ||
123 | * value here. | ||
124 | * | ||
125 | * 2) If the lsunit option is specified, a transaction requires 2 LSU | ||
126 | * for the reservation because there are two log writes that can | ||
127 | * require padding - the transaction data and the commit record which | ||
128 | * are written separately and both can require padding to the LSU. | ||
129 | * Consider that we can have an active CIL reservation holding 2*LSU, | ||
130 | * but the CIL is not over a push threshold, in this case, if we | ||
131 | * don't have enough log space for at one new transaction, which | ||
132 | * includes another 2*LSU in the reservation, we will run into dead | ||
133 | * loop situation in log space grant procedure. i.e. | ||
134 | * xlog_grant_head_wait(). | ||
135 | * | ||
136 | * Hence the log size needs to be able to contain two maximally sized | ||
137 | * and padded transactions, which is (2 * (2 * LSU + maxlres)). | ||
138 | * | ||
139 | * Also, the log size should be a multiple of the log stripe unit, round | ||
140 | * it up to lsunit boundary if lsunit is specified. | ||
141 | */ | ||
142 | if (lsunit) { | ||
143 | min_logblks = roundup_64(BTOBB(max_logres), lsunit) + | ||
144 | 2 * lsunit; | ||
145 | } else | ||
146 | min_logblks = BTOBB(max_logres) + 2 * BBSIZE; | ||
147 | min_logblks *= XFS_MIN_LOG_FACTOR; | ||
148 | |||
149 | return XFS_BB_TO_FSB(mp, min_logblks); | ||
150 | } | ||
diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h new file mode 100644 index 000000000000..137e20937077 --- /dev/null +++ b/fs/xfs/libxfs/xfs_quota_defs.h | |||
@@ -0,0 +1,161 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_QUOTA_DEFS_H__ | ||
19 | #define __XFS_QUOTA_DEFS_H__ | ||
20 | |||
21 | /* | ||
22 | * Quota definitions shared between user and kernel source trees. | ||
23 | */ | ||
24 | |||
25 | /* | ||
26 | * Even though users may not have quota limits occupying all 64-bits, | ||
27 | * they may need 64-bit accounting. Hence, 64-bit quota-counters, | ||
28 | * and quota-limits. This is a waste in the common case, but hey ... | ||
29 | */ | ||
30 | typedef __uint64_t xfs_qcnt_t; | ||
31 | typedef __uint16_t xfs_qwarncnt_t; | ||
32 | |||
33 | /* | ||
34 | * flags for q_flags field in the dquot. | ||
35 | */ | ||
36 | #define XFS_DQ_USER 0x0001 /* a user quota */ | ||
37 | #define XFS_DQ_PROJ 0x0002 /* project quota */ | ||
38 | #define XFS_DQ_GROUP 0x0004 /* a group quota */ | ||
39 | #define XFS_DQ_DIRTY 0x0008 /* dquot is dirty */ | ||
40 | #define XFS_DQ_FREEING 0x0010 /* dquot is beeing torn down */ | ||
41 | |||
42 | #define XFS_DQ_ALLTYPES (XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP) | ||
43 | |||
44 | #define XFS_DQ_FLAGS \ | ||
45 | { XFS_DQ_USER, "USER" }, \ | ||
46 | { XFS_DQ_PROJ, "PROJ" }, \ | ||
47 | { XFS_DQ_GROUP, "GROUP" }, \ | ||
48 | { XFS_DQ_DIRTY, "DIRTY" }, \ | ||
49 | { XFS_DQ_FREEING, "FREEING" } | ||
50 | |||
51 | /* | ||
52 | * We have the possibility of all three quota types being active at once, and | ||
53 | * hence free space modification requires modification of all three current | ||
54 | * dquots in a single transaction. For this case we need to have a reservation | ||
55 | * of at least 3 dquots. | ||
56 | * | ||
57 | * However, a chmod operation can change both UID and GID in a single | ||
58 | * transaction, resulting in requiring {old, new} x {uid, gid} dquots to be | ||
59 | * modified. Hence for this case we need to reserve space for at least 4 dquots. | ||
60 | * | ||
61 | * And in the worst case, there's a rename operation that can be modifying up to | ||
62 | * 4 inodes with dquots attached to them. In reality, the only inodes that can | ||
63 | * have their dquots modified are the source and destination directory inodes | ||
64 | * due to directory name creation and removal. That can require space allocation | ||
65 | * and/or freeing on both directory inodes, and hence all three dquots on each | ||
66 | * inode can be modified. And if the directories are world writeable, all the | ||
67 | * dquots can be unique and so 6 dquots can be modified.... | ||
68 | * | ||
69 | * And, of course, we also need to take into account the dquot log format item | ||
70 | * used to describe each dquot. | ||
71 | */ | ||
72 | #define XFS_DQUOT_LOGRES(mp) \ | ||
73 | ((sizeof(struct xfs_dq_logformat) + sizeof(struct xfs_disk_dquot)) * 6) | ||
74 | |||
75 | #define XFS_IS_QUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ACCT) | ||
76 | #define XFS_IS_UQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_UQUOTA_ACCT) | ||
77 | #define XFS_IS_PQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_PQUOTA_ACCT) | ||
78 | #define XFS_IS_GQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_GQUOTA_ACCT) | ||
79 | #define XFS_IS_UQUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_UQUOTA_ENFD) | ||
80 | #define XFS_IS_GQUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_GQUOTA_ENFD) | ||
81 | #define XFS_IS_PQUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_PQUOTA_ENFD) | ||
82 | |||
83 | /* | ||
84 | * Incore only flags for quotaoff - these bits get cleared when quota(s) | ||
85 | * are in the process of getting turned off. These flags are in m_qflags but | ||
86 | * never in sb_qflags. | ||
87 | */ | ||
88 | #define XFS_UQUOTA_ACTIVE 0x1000 /* uquotas are being turned off */ | ||
89 | #define XFS_GQUOTA_ACTIVE 0x2000 /* gquotas are being turned off */ | ||
90 | #define XFS_PQUOTA_ACTIVE 0x4000 /* pquotas are being turned off */ | ||
91 | #define XFS_ALL_QUOTA_ACTIVE \ | ||
92 | (XFS_UQUOTA_ACTIVE | XFS_GQUOTA_ACTIVE | XFS_PQUOTA_ACTIVE) | ||
93 | |||
94 | /* | ||
95 | * Checking XFS_IS_*QUOTA_ON() while holding any inode lock guarantees | ||
96 | * quota will be not be switched off as long as that inode lock is held. | ||
97 | */ | ||
98 | #define XFS_IS_QUOTA_ON(mp) ((mp)->m_qflags & (XFS_UQUOTA_ACTIVE | \ | ||
99 | XFS_GQUOTA_ACTIVE | \ | ||
100 | XFS_PQUOTA_ACTIVE)) | ||
101 | #define XFS_IS_OQUOTA_ON(mp) ((mp)->m_qflags & (XFS_GQUOTA_ACTIVE | \ | ||
102 | XFS_PQUOTA_ACTIVE)) | ||
103 | #define XFS_IS_UQUOTA_ON(mp) ((mp)->m_qflags & XFS_UQUOTA_ACTIVE) | ||
104 | #define XFS_IS_GQUOTA_ON(mp) ((mp)->m_qflags & XFS_GQUOTA_ACTIVE) | ||
105 | #define XFS_IS_PQUOTA_ON(mp) ((mp)->m_qflags & XFS_PQUOTA_ACTIVE) | ||
106 | |||
107 | /* | ||
108 | * Flags to tell various functions what to do. Not all of these are meaningful | ||
109 | * to a single function. None of these XFS_QMOPT_* flags are meant to have | ||
110 | * persistent values (ie. their values can and will change between versions) | ||
111 | */ | ||
112 | #define XFS_QMOPT_DQALLOC 0x0000002 /* alloc dquot ondisk if needed */ | ||
113 | #define XFS_QMOPT_UQUOTA 0x0000004 /* user dquot requested */ | ||
114 | #define XFS_QMOPT_PQUOTA 0x0000008 /* project dquot requested */ | ||
115 | #define XFS_QMOPT_FORCE_RES 0x0000010 /* ignore quota limits */ | ||
116 | #define XFS_QMOPT_SBVERSION 0x0000040 /* change superblock version num */ | ||
117 | #define XFS_QMOPT_DOWARN 0x0000400 /* increase warning cnt if needed */ | ||
118 | #define XFS_QMOPT_DQREPAIR 0x0001000 /* repair dquot if damaged */ | ||
119 | #define XFS_QMOPT_GQUOTA 0x0002000 /* group dquot requested */ | ||
120 | #define XFS_QMOPT_ENOSPC 0x0004000 /* enospc instead of edquot (prj) */ | ||
121 | |||
122 | /* | ||
123 | * flags to xfs_trans_mod_dquot to indicate which field needs to be | ||
124 | * modified. | ||
125 | */ | ||
126 | #define XFS_QMOPT_RES_REGBLKS 0x0010000 | ||
127 | #define XFS_QMOPT_RES_RTBLKS 0x0020000 | ||
128 | #define XFS_QMOPT_BCOUNT 0x0040000 | ||
129 | #define XFS_QMOPT_ICOUNT 0x0080000 | ||
130 | #define XFS_QMOPT_RTBCOUNT 0x0100000 | ||
131 | #define XFS_QMOPT_DELBCOUNT 0x0200000 | ||
132 | #define XFS_QMOPT_DELRTBCOUNT 0x0400000 | ||
133 | #define XFS_QMOPT_RES_INOS 0x0800000 | ||
134 | |||
135 | /* | ||
136 | * flags for dqalloc. | ||
137 | */ | ||
138 | #define XFS_QMOPT_INHERIT 0x1000000 | ||
139 | |||
140 | /* | ||
141 | * flags to xfs_trans_mod_dquot. | ||
142 | */ | ||
143 | #define XFS_TRANS_DQ_RES_BLKS XFS_QMOPT_RES_REGBLKS | ||
144 | #define XFS_TRANS_DQ_RES_RTBLKS XFS_QMOPT_RES_RTBLKS | ||
145 | #define XFS_TRANS_DQ_RES_INOS XFS_QMOPT_RES_INOS | ||
146 | #define XFS_TRANS_DQ_BCOUNT XFS_QMOPT_BCOUNT | ||
147 | #define XFS_TRANS_DQ_DELBCOUNT XFS_QMOPT_DELBCOUNT | ||
148 | #define XFS_TRANS_DQ_ICOUNT XFS_QMOPT_ICOUNT | ||
149 | #define XFS_TRANS_DQ_RTBCOUNT XFS_QMOPT_RTBCOUNT | ||
150 | #define XFS_TRANS_DQ_DELRTBCOUNT XFS_QMOPT_DELRTBCOUNT | ||
151 | |||
152 | |||
153 | #define XFS_QMOPT_QUOTALL \ | ||
154 | (XFS_QMOPT_UQUOTA | XFS_QMOPT_PQUOTA | XFS_QMOPT_GQUOTA) | ||
155 | #define XFS_QMOPT_RESBLK_MASK (XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_RES_RTBLKS) | ||
156 | |||
157 | extern int xfs_dqcheck(struct xfs_mount *mp, xfs_disk_dquot_t *ddq, | ||
158 | xfs_dqid_t id, uint type, uint flags, char *str); | ||
159 | extern int xfs_calc_dquots_per_chunk(unsigned int nbblks); | ||
160 | |||
161 | #endif /* __XFS_QUOTA_H__ */ | ||
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c new file mode 100644 index 000000000000..f4dd697cac08 --- /dev/null +++ b/fs/xfs/libxfs/xfs_rtbitmap.c | |||
@@ -0,0 +1,973 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #include "xfs.h" | ||
19 | #include "xfs_fs.h" | ||
20 | #include "xfs_shared.h" | ||
21 | #include "xfs_format.h" | ||
22 | #include "xfs_log_format.h" | ||
23 | #include "xfs_trans_resv.h" | ||
24 | #include "xfs_bit.h" | ||
25 | #include "xfs_sb.h" | ||
26 | #include "xfs_ag.h" | ||
27 | #include "xfs_mount.h" | ||
28 | #include "xfs_inode.h" | ||
29 | #include "xfs_bmap.h" | ||
30 | #include "xfs_bmap_util.h" | ||
31 | #include "xfs_bmap_btree.h" | ||
32 | #include "xfs_alloc.h" | ||
33 | #include "xfs_error.h" | ||
34 | #include "xfs_trans.h" | ||
35 | #include "xfs_trans_space.h" | ||
36 | #include "xfs_trace.h" | ||
37 | #include "xfs_buf.h" | ||
38 | #include "xfs_icache.h" | ||
39 | #include "xfs_dinode.h" | ||
40 | #include "xfs_rtalloc.h" | ||
41 | |||
42 | |||
43 | /* | ||
44 | * Realtime allocator bitmap functions shared with userspace. | ||
45 | */ | ||
46 | |||
47 | /* | ||
48 | * Get a buffer for the bitmap or summary file block specified. | ||
49 | * The buffer is returned read and locked. | ||
50 | */ | ||
51 | int | ||
52 | xfs_rtbuf_get( | ||
53 | xfs_mount_t *mp, /* file system mount structure */ | ||
54 | xfs_trans_t *tp, /* transaction pointer */ | ||
55 | xfs_rtblock_t block, /* block number in bitmap or summary */ | ||
56 | int issum, /* is summary not bitmap */ | ||
57 | xfs_buf_t **bpp) /* output: buffer for the block */ | ||
58 | { | ||
59 | xfs_buf_t *bp; /* block buffer, result */ | ||
60 | xfs_inode_t *ip; /* bitmap or summary inode */ | ||
61 | xfs_bmbt_irec_t map; | ||
62 | int nmap = 1; | ||
63 | int error; /* error value */ | ||
64 | |||
65 | ip = issum ? mp->m_rsumip : mp->m_rbmip; | ||
66 | |||
67 | error = xfs_bmapi_read(ip, block, 1, &map, &nmap, XFS_DATA_FORK); | ||
68 | if (error) | ||
69 | return error; | ||
70 | |||
71 | ASSERT(map.br_startblock != NULLFSBLOCK); | ||
72 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, | ||
73 | XFS_FSB_TO_DADDR(mp, map.br_startblock), | ||
74 | mp->m_bsize, 0, &bp, NULL); | ||
75 | if (error) | ||
76 | return error; | ||
77 | *bpp = bp; | ||
78 | return 0; | ||
79 | } | ||
80 | |||
81 | /* | ||
82 | * Searching backward from start to limit, find the first block whose | ||
83 | * allocated/free state is different from start's. | ||
84 | */ | ||
85 | int | ||
86 | xfs_rtfind_back( | ||
87 | xfs_mount_t *mp, /* file system mount point */ | ||
88 | xfs_trans_t *tp, /* transaction pointer */ | ||
89 | xfs_rtblock_t start, /* starting block to look at */ | ||
90 | xfs_rtblock_t limit, /* last block to look at */ | ||
91 | xfs_rtblock_t *rtblock) /* out: start block found */ | ||
92 | { | ||
93 | xfs_rtword_t *b; /* current word in buffer */ | ||
94 | int bit; /* bit number in the word */ | ||
95 | xfs_rtblock_t block; /* bitmap block number */ | ||
96 | xfs_buf_t *bp; /* buf for the block */ | ||
97 | xfs_rtword_t *bufp; /* starting word in buffer */ | ||
98 | int error; /* error value */ | ||
99 | xfs_rtblock_t firstbit; /* first useful bit in the word */ | ||
100 | xfs_rtblock_t i; /* current bit number rel. to start */ | ||
101 | xfs_rtblock_t len; /* length of inspected area */ | ||
102 | xfs_rtword_t mask; /* mask of relevant bits for value */ | ||
103 | xfs_rtword_t want; /* mask for "good" values */ | ||
104 | xfs_rtword_t wdiff; /* difference from wanted value */ | ||
105 | int word; /* word number in the buffer */ | ||
106 | |||
107 | /* | ||
108 | * Compute and read in starting bitmap block for starting block. | ||
109 | */ | ||
110 | block = XFS_BITTOBLOCK(mp, start); | ||
111 | error = xfs_rtbuf_get(mp, tp, block, 0, &bp); | ||
112 | if (error) { | ||
113 | return error; | ||
114 | } | ||
115 | bufp = bp->b_addr; | ||
116 | /* | ||
117 | * Get the first word's index & point to it. | ||
118 | */ | ||
119 | word = XFS_BITTOWORD(mp, start); | ||
120 | b = &bufp[word]; | ||
121 | bit = (int)(start & (XFS_NBWORD - 1)); | ||
122 | len = start - limit + 1; | ||
123 | /* | ||
124 | * Compute match value, based on the bit at start: if 1 (free) | ||
125 | * then all-ones, else all-zeroes. | ||
126 | */ | ||
127 | want = (*b & ((xfs_rtword_t)1 << bit)) ? -1 : 0; | ||
128 | /* | ||
129 | * If the starting position is not word-aligned, deal with the | ||
130 | * partial word. | ||
131 | */ | ||
132 | if (bit < XFS_NBWORD - 1) { | ||
133 | /* | ||
134 | * Calculate first (leftmost) bit number to look at, | ||
135 | * and mask for all the relevant bits in this word. | ||
136 | */ | ||
137 | firstbit = XFS_RTMAX((xfs_srtblock_t)(bit - len + 1), 0); | ||
138 | mask = (((xfs_rtword_t)1 << (bit - firstbit + 1)) - 1) << | ||
139 | firstbit; | ||
140 | /* | ||
141 | * Calculate the difference between the value there | ||
142 | * and what we're looking for. | ||
143 | */ | ||
144 | if ((wdiff = (*b ^ want) & mask)) { | ||
145 | /* | ||
146 | * Different. Mark where we are and return. | ||
147 | */ | ||
148 | xfs_trans_brelse(tp, bp); | ||
149 | i = bit - XFS_RTHIBIT(wdiff); | ||
150 | *rtblock = start - i + 1; | ||
151 | return 0; | ||
152 | } | ||
153 | i = bit - firstbit + 1; | ||
154 | /* | ||
155 | * Go on to previous block if that's where the previous word is | ||
156 | * and we need the previous word. | ||
157 | */ | ||
158 | if (--word == -1 && i < len) { | ||
159 | /* | ||
160 | * If done with this block, get the previous one. | ||
161 | */ | ||
162 | xfs_trans_brelse(tp, bp); | ||
163 | error = xfs_rtbuf_get(mp, tp, --block, 0, &bp); | ||
164 | if (error) { | ||
165 | return error; | ||
166 | } | ||
167 | bufp = bp->b_addr; | ||
168 | word = XFS_BLOCKWMASK(mp); | ||
169 | b = &bufp[word]; | ||
170 | } else { | ||
171 | /* | ||
172 | * Go on to the previous word in the buffer. | ||
173 | */ | ||
174 | b--; | ||
175 | } | ||
176 | } else { | ||
177 | /* | ||
178 | * Starting on a word boundary, no partial word. | ||
179 | */ | ||
180 | i = 0; | ||
181 | } | ||
182 | /* | ||
183 | * Loop over whole words in buffers. When we use up one buffer | ||
184 | * we move on to the previous one. | ||
185 | */ | ||
186 | while (len - i >= XFS_NBWORD) { | ||
187 | /* | ||
188 | * Compute difference between actual and desired value. | ||
189 | */ | ||
190 | if ((wdiff = *b ^ want)) { | ||
191 | /* | ||
192 | * Different, mark where we are and return. | ||
193 | */ | ||
194 | xfs_trans_brelse(tp, bp); | ||
195 | i += XFS_NBWORD - 1 - XFS_RTHIBIT(wdiff); | ||
196 | *rtblock = start - i + 1; | ||
197 | return 0; | ||
198 | } | ||
199 | i += XFS_NBWORD; | ||
200 | /* | ||
201 | * Go on to previous block if that's where the previous word is | ||
202 | * and we need the previous word. | ||
203 | */ | ||
204 | if (--word == -1 && i < len) { | ||
205 | /* | ||
206 | * If done with this block, get the previous one. | ||
207 | */ | ||
208 | xfs_trans_brelse(tp, bp); | ||
209 | error = xfs_rtbuf_get(mp, tp, --block, 0, &bp); | ||
210 | if (error) { | ||
211 | return error; | ||
212 | } | ||
213 | bufp = bp->b_addr; | ||
214 | word = XFS_BLOCKWMASK(mp); | ||
215 | b = &bufp[word]; | ||
216 | } else { | ||
217 | /* | ||
218 | * Go on to the previous word in the buffer. | ||
219 | */ | ||
220 | b--; | ||
221 | } | ||
222 | } | ||
223 | /* | ||
224 | * If not ending on a word boundary, deal with the last | ||
225 | * (partial) word. | ||
226 | */ | ||
227 | if (len - i) { | ||
228 | /* | ||
229 | * Calculate first (leftmost) bit number to look at, | ||
230 | * and mask for all the relevant bits in this word. | ||
231 | */ | ||
232 | firstbit = XFS_NBWORD - (len - i); | ||
233 | mask = (((xfs_rtword_t)1 << (len - i)) - 1) << firstbit; | ||
234 | /* | ||
235 | * Compute difference between actual and desired value. | ||
236 | */ | ||
237 | if ((wdiff = (*b ^ want) & mask)) { | ||
238 | /* | ||
239 | * Different, mark where we are and return. | ||
240 | */ | ||
241 | xfs_trans_brelse(tp, bp); | ||
242 | i += XFS_NBWORD - 1 - XFS_RTHIBIT(wdiff); | ||
243 | *rtblock = start - i + 1; | ||
244 | return 0; | ||
245 | } else | ||
246 | i = len; | ||
247 | } | ||
248 | /* | ||
249 | * No match, return that we scanned the whole area. | ||
250 | */ | ||
251 | xfs_trans_brelse(tp, bp); | ||
252 | *rtblock = start - i + 1; | ||
253 | return 0; | ||
254 | } | ||
255 | |||
256 | /* | ||
257 | * Searching forward from start to limit, find the first block whose | ||
258 | * allocated/free state is different from start's. | ||
259 | */ | ||
260 | int | ||
261 | xfs_rtfind_forw( | ||
262 | xfs_mount_t *mp, /* file system mount point */ | ||
263 | xfs_trans_t *tp, /* transaction pointer */ | ||
264 | xfs_rtblock_t start, /* starting block to look at */ | ||
265 | xfs_rtblock_t limit, /* last block to look at */ | ||
266 | xfs_rtblock_t *rtblock) /* out: start block found */ | ||
267 | { | ||
268 | xfs_rtword_t *b; /* current word in buffer */ | ||
269 | int bit; /* bit number in the word */ | ||
270 | xfs_rtblock_t block; /* bitmap block number */ | ||
271 | xfs_buf_t *bp; /* buf for the block */ | ||
272 | xfs_rtword_t *bufp; /* starting word in buffer */ | ||
273 | int error; /* error value */ | ||
274 | xfs_rtblock_t i; /* current bit number rel. to start */ | ||
275 | xfs_rtblock_t lastbit; /* last useful bit in the word */ | ||
276 | xfs_rtblock_t len; /* length of inspected area */ | ||
277 | xfs_rtword_t mask; /* mask of relevant bits for value */ | ||
278 | xfs_rtword_t want; /* mask for "good" values */ | ||
279 | xfs_rtword_t wdiff; /* difference from wanted value */ | ||
280 | int word; /* word number in the buffer */ | ||
281 | |||
282 | /* | ||
283 | * Compute and read in starting bitmap block for starting block. | ||
284 | */ | ||
285 | block = XFS_BITTOBLOCK(mp, start); | ||
286 | error = xfs_rtbuf_get(mp, tp, block, 0, &bp); | ||
287 | if (error) { | ||
288 | return error; | ||
289 | } | ||
290 | bufp = bp->b_addr; | ||
291 | /* | ||
292 | * Get the first word's index & point to it. | ||
293 | */ | ||
294 | word = XFS_BITTOWORD(mp, start); | ||
295 | b = &bufp[word]; | ||
296 | bit = (int)(start & (XFS_NBWORD - 1)); | ||
297 | len = limit - start + 1; | ||
298 | /* | ||
299 | * Compute match value, based on the bit at start: if 1 (free) | ||
300 | * then all-ones, else all-zeroes. | ||
301 | */ | ||
302 | want = (*b & ((xfs_rtword_t)1 << bit)) ? -1 : 0; | ||
303 | /* | ||
304 | * If the starting position is not word-aligned, deal with the | ||
305 | * partial word. | ||
306 | */ | ||
307 | if (bit) { | ||
308 | /* | ||
309 | * Calculate last (rightmost) bit number to look at, | ||
310 | * and mask for all the relevant bits in this word. | ||
311 | */ | ||
312 | lastbit = XFS_RTMIN(bit + len, XFS_NBWORD); | ||
313 | mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit; | ||
314 | /* | ||
315 | * Calculate the difference between the value there | ||
316 | * and what we're looking for. | ||
317 | */ | ||
318 | if ((wdiff = (*b ^ want) & mask)) { | ||
319 | /* | ||
320 | * Different. Mark where we are and return. | ||
321 | */ | ||
322 | xfs_trans_brelse(tp, bp); | ||
323 | i = XFS_RTLOBIT(wdiff) - bit; | ||
324 | *rtblock = start + i - 1; | ||
325 | return 0; | ||
326 | } | ||
327 | i = lastbit - bit; | ||
328 | /* | ||
329 | * Go on to next block if that's where the next word is | ||
330 | * and we need the next word. | ||
331 | */ | ||
332 | if (++word == XFS_BLOCKWSIZE(mp) && i < len) { | ||
333 | /* | ||
334 | * If done with this block, get the previous one. | ||
335 | */ | ||
336 | xfs_trans_brelse(tp, bp); | ||
337 | error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); | ||
338 | if (error) { | ||
339 | return error; | ||
340 | } | ||
341 | b = bufp = bp->b_addr; | ||
342 | word = 0; | ||
343 | } else { | ||
344 | /* | ||
345 | * Go on to the previous word in the buffer. | ||
346 | */ | ||
347 | b++; | ||
348 | } | ||
349 | } else { | ||
350 | /* | ||
351 | * Starting on a word boundary, no partial word. | ||
352 | */ | ||
353 | i = 0; | ||
354 | } | ||
355 | /* | ||
356 | * Loop over whole words in buffers. When we use up one buffer | ||
357 | * we move on to the next one. | ||
358 | */ | ||
359 | while (len - i >= XFS_NBWORD) { | ||
360 | /* | ||
361 | * Compute difference between actual and desired value. | ||
362 | */ | ||
363 | if ((wdiff = *b ^ want)) { | ||
364 | /* | ||
365 | * Different, mark where we are and return. | ||
366 | */ | ||
367 | xfs_trans_brelse(tp, bp); | ||
368 | i += XFS_RTLOBIT(wdiff); | ||
369 | *rtblock = start + i - 1; | ||
370 | return 0; | ||
371 | } | ||
372 | i += XFS_NBWORD; | ||
373 | /* | ||
374 | * Go on to next block if that's where the next word is | ||
375 | * and we need the next word. | ||
376 | */ | ||
377 | if (++word == XFS_BLOCKWSIZE(mp) && i < len) { | ||
378 | /* | ||
379 | * If done with this block, get the next one. | ||
380 | */ | ||
381 | xfs_trans_brelse(tp, bp); | ||
382 | error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); | ||
383 | if (error) { | ||
384 | return error; | ||
385 | } | ||
386 | b = bufp = bp->b_addr; | ||
387 | word = 0; | ||
388 | } else { | ||
389 | /* | ||
390 | * Go on to the next word in the buffer. | ||
391 | */ | ||
392 | b++; | ||
393 | } | ||
394 | } | ||
395 | /* | ||
396 | * If not ending on a word boundary, deal with the last | ||
397 | * (partial) word. | ||
398 | */ | ||
399 | if ((lastbit = len - i)) { | ||
400 | /* | ||
401 | * Calculate mask for all the relevant bits in this word. | ||
402 | */ | ||
403 | mask = ((xfs_rtword_t)1 << lastbit) - 1; | ||
404 | /* | ||
405 | * Compute difference between actual and desired value. | ||
406 | */ | ||
407 | if ((wdiff = (*b ^ want) & mask)) { | ||
408 | /* | ||
409 | * Different, mark where we are and return. | ||
410 | */ | ||
411 | xfs_trans_brelse(tp, bp); | ||
412 | i += XFS_RTLOBIT(wdiff); | ||
413 | *rtblock = start + i - 1; | ||
414 | return 0; | ||
415 | } else | ||
416 | i = len; | ||
417 | } | ||
418 | /* | ||
419 | * No match, return that we scanned the whole area. | ||
420 | */ | ||
421 | xfs_trans_brelse(tp, bp); | ||
422 | *rtblock = start + i - 1; | ||
423 | return 0; | ||
424 | } | ||
425 | |||
426 | /* | ||
427 | * Read and modify the summary information for a given extent size, | ||
428 | * bitmap block combination. | ||
429 | * Keeps track of a current summary block, so we don't keep reading | ||
430 | * it from the buffer cache. | ||
431 | */ | ||
432 | int | ||
433 | xfs_rtmodify_summary( | ||
434 | xfs_mount_t *mp, /* file system mount point */ | ||
435 | xfs_trans_t *tp, /* transaction pointer */ | ||
436 | int log, /* log2 of extent size */ | ||
437 | xfs_rtblock_t bbno, /* bitmap block number */ | ||
438 | int delta, /* change to make to summary info */ | ||
439 | xfs_buf_t **rbpp, /* in/out: summary block buffer */ | ||
440 | xfs_fsblock_t *rsb) /* in/out: summary block number */ | ||
441 | { | ||
442 | xfs_buf_t *bp; /* buffer for the summary block */ | ||
443 | int error; /* error value */ | ||
444 | xfs_fsblock_t sb; /* summary fsblock */ | ||
445 | int so; /* index into the summary file */ | ||
446 | xfs_suminfo_t *sp; /* pointer to returned data */ | ||
447 | |||
448 | /* | ||
449 | * Compute entry number in the summary file. | ||
450 | */ | ||
451 | so = XFS_SUMOFFS(mp, log, bbno); | ||
452 | /* | ||
453 | * Compute the block number in the summary file. | ||
454 | */ | ||
455 | sb = XFS_SUMOFFSTOBLOCK(mp, so); | ||
456 | /* | ||
457 | * If we have an old buffer, and the block number matches, use that. | ||
458 | */ | ||
459 | if (rbpp && *rbpp && *rsb == sb) | ||
460 | bp = *rbpp; | ||
461 | /* | ||
462 | * Otherwise we have to get the buffer. | ||
463 | */ | ||
464 | else { | ||
465 | /* | ||
466 | * If there was an old one, get rid of it first. | ||
467 | */ | ||
468 | if (rbpp && *rbpp) | ||
469 | xfs_trans_brelse(tp, *rbpp); | ||
470 | error = xfs_rtbuf_get(mp, tp, sb, 1, &bp); | ||
471 | if (error) { | ||
472 | return error; | ||
473 | } | ||
474 | /* | ||
475 | * Remember this buffer and block for the next call. | ||
476 | */ | ||
477 | if (rbpp) { | ||
478 | *rbpp = bp; | ||
479 | *rsb = sb; | ||
480 | } | ||
481 | } | ||
482 | /* | ||
483 | * Point to the summary information, modify and log it. | ||
484 | */ | ||
485 | sp = XFS_SUMPTR(mp, bp, so); | ||
486 | *sp += delta; | ||
487 | xfs_trans_log_buf(tp, bp, (uint)((char *)sp - (char *)bp->b_addr), | ||
488 | (uint)((char *)sp - (char *)bp->b_addr + sizeof(*sp) - 1)); | ||
489 | return 0; | ||
490 | } | ||
491 | |||
492 | /* | ||
493 | * Set the given range of bitmap bits to the given value. | ||
494 | * Do whatever I/O and logging is required. | ||
495 | */ | ||
496 | int | ||
497 | xfs_rtmodify_range( | ||
498 | xfs_mount_t *mp, /* file system mount point */ | ||
499 | xfs_trans_t *tp, /* transaction pointer */ | ||
500 | xfs_rtblock_t start, /* starting block to modify */ | ||
501 | xfs_extlen_t len, /* length of extent to modify */ | ||
502 | int val) /* 1 for free, 0 for allocated */ | ||
503 | { | ||
504 | xfs_rtword_t *b; /* current word in buffer */ | ||
505 | int bit; /* bit number in the word */ | ||
506 | xfs_rtblock_t block; /* bitmap block number */ | ||
507 | xfs_buf_t *bp; /* buf for the block */ | ||
508 | xfs_rtword_t *bufp; /* starting word in buffer */ | ||
509 | int error; /* error value */ | ||
510 | xfs_rtword_t *first; /* first used word in the buffer */ | ||
511 | int i; /* current bit number rel. to start */ | ||
512 | int lastbit; /* last useful bit in word */ | ||
513 | xfs_rtword_t mask; /* mask o frelevant bits for value */ | ||
514 | int word; /* word number in the buffer */ | ||
515 | |||
516 | /* | ||
517 | * Compute starting bitmap block number. | ||
518 | */ | ||
519 | block = XFS_BITTOBLOCK(mp, start); | ||
520 | /* | ||
521 | * Read the bitmap block, and point to its data. | ||
522 | */ | ||
523 | error = xfs_rtbuf_get(mp, tp, block, 0, &bp); | ||
524 | if (error) { | ||
525 | return error; | ||
526 | } | ||
527 | bufp = bp->b_addr; | ||
528 | /* | ||
529 | * Compute the starting word's address, and starting bit. | ||
530 | */ | ||
531 | word = XFS_BITTOWORD(mp, start); | ||
532 | first = b = &bufp[word]; | ||
533 | bit = (int)(start & (XFS_NBWORD - 1)); | ||
534 | /* | ||
535 | * 0 (allocated) => all zeroes; 1 (free) => all ones. | ||
536 | */ | ||
537 | val = -val; | ||
538 | /* | ||
539 | * If not starting on a word boundary, deal with the first | ||
540 | * (partial) word. | ||
541 | */ | ||
542 | if (bit) { | ||
543 | /* | ||
544 | * Compute first bit not changed and mask of relevant bits. | ||
545 | */ | ||
546 | lastbit = XFS_RTMIN(bit + len, XFS_NBWORD); | ||
547 | mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit; | ||
548 | /* | ||
549 | * Set/clear the active bits. | ||
550 | */ | ||
551 | if (val) | ||
552 | *b |= mask; | ||
553 | else | ||
554 | *b &= ~mask; | ||
555 | i = lastbit - bit; | ||
556 | /* | ||
557 | * Go on to the next block if that's where the next word is | ||
558 | * and we need the next word. | ||
559 | */ | ||
560 | if (++word == XFS_BLOCKWSIZE(mp) && i < len) { | ||
561 | /* | ||
562 | * Log the changed part of this block. | ||
563 | * Get the next one. | ||
564 | */ | ||
565 | xfs_trans_log_buf(tp, bp, | ||
566 | (uint)((char *)first - (char *)bufp), | ||
567 | (uint)((char *)b - (char *)bufp)); | ||
568 | error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); | ||
569 | if (error) { | ||
570 | return error; | ||
571 | } | ||
572 | first = b = bufp = bp->b_addr; | ||
573 | word = 0; | ||
574 | } else { | ||
575 | /* | ||
576 | * Go on to the next word in the buffer | ||
577 | */ | ||
578 | b++; | ||
579 | } | ||
580 | } else { | ||
581 | /* | ||
582 | * Starting on a word boundary, no partial word. | ||
583 | */ | ||
584 | i = 0; | ||
585 | } | ||
586 | /* | ||
587 | * Loop over whole words in buffers. When we use up one buffer | ||
588 | * we move on to the next one. | ||
589 | */ | ||
590 | while (len - i >= XFS_NBWORD) { | ||
591 | /* | ||
592 | * Set the word value correctly. | ||
593 | */ | ||
594 | *b = val; | ||
595 | i += XFS_NBWORD; | ||
596 | /* | ||
597 | * Go on to the next block if that's where the next word is | ||
598 | * and we need the next word. | ||
599 | */ | ||
600 | if (++word == XFS_BLOCKWSIZE(mp) && i < len) { | ||
601 | /* | ||
602 | * Log the changed part of this block. | ||
603 | * Get the next one. | ||
604 | */ | ||
605 | xfs_trans_log_buf(tp, bp, | ||
606 | (uint)((char *)first - (char *)bufp), | ||
607 | (uint)((char *)b - (char *)bufp)); | ||
608 | error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); | ||
609 | if (error) { | ||
610 | return error; | ||
611 | } | ||
612 | first = b = bufp = bp->b_addr; | ||
613 | word = 0; | ||
614 | } else { | ||
615 | /* | ||
616 | * Go on to the next word in the buffer | ||
617 | */ | ||
618 | b++; | ||
619 | } | ||
620 | } | ||
621 | /* | ||
622 | * If not ending on a word boundary, deal with the last | ||
623 | * (partial) word. | ||
624 | */ | ||
625 | if ((lastbit = len - i)) { | ||
626 | /* | ||
627 | * Compute a mask of relevant bits. | ||
628 | */ | ||
629 | bit = 0; | ||
630 | mask = ((xfs_rtword_t)1 << lastbit) - 1; | ||
631 | /* | ||
632 | * Set/clear the active bits. | ||
633 | */ | ||
634 | if (val) | ||
635 | *b |= mask; | ||
636 | else | ||
637 | *b &= ~mask; | ||
638 | b++; | ||
639 | } | ||
640 | /* | ||
641 | * Log any remaining changed bytes. | ||
642 | */ | ||
643 | if (b > first) | ||
644 | xfs_trans_log_buf(tp, bp, (uint)((char *)first - (char *)bufp), | ||
645 | (uint)((char *)b - (char *)bufp - 1)); | ||
646 | return 0; | ||
647 | } | ||
648 | |||
649 | /* | ||
650 | * Mark an extent specified by start and len freed. | ||
651 | * Updates all the summary information as well as the bitmap. | ||
652 | */ | ||
653 | int | ||
654 | xfs_rtfree_range( | ||
655 | xfs_mount_t *mp, /* file system mount point */ | ||
656 | xfs_trans_t *tp, /* transaction pointer */ | ||
657 | xfs_rtblock_t start, /* starting block to free */ | ||
658 | xfs_extlen_t len, /* length to free */ | ||
659 | xfs_buf_t **rbpp, /* in/out: summary block buffer */ | ||
660 | xfs_fsblock_t *rsb) /* in/out: summary block number */ | ||
661 | { | ||
662 | xfs_rtblock_t end; /* end of the freed extent */ | ||
663 | int error; /* error value */ | ||
664 | xfs_rtblock_t postblock; /* first block freed > end */ | ||
665 | xfs_rtblock_t preblock; /* first block freed < start */ | ||
666 | |||
667 | end = start + len - 1; | ||
668 | /* | ||
669 | * Modify the bitmap to mark this extent freed. | ||
670 | */ | ||
671 | error = xfs_rtmodify_range(mp, tp, start, len, 1); | ||
672 | if (error) { | ||
673 | return error; | ||
674 | } | ||
675 | /* | ||
676 | * Assume we're freeing out of the middle of an allocated extent. | ||
677 | * We need to find the beginning and end of the extent so we can | ||
678 | * properly update the summary. | ||
679 | */ | ||
680 | error = xfs_rtfind_back(mp, tp, start, 0, &preblock); | ||
681 | if (error) { | ||
682 | return error; | ||
683 | } | ||
684 | /* | ||
685 | * Find the next allocated block (end of allocated extent). | ||
686 | */ | ||
687 | error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1, | ||
688 | &postblock); | ||
689 | if (error) | ||
690 | return error; | ||
691 | /* | ||
692 | * If there are blocks not being freed at the front of the | ||
693 | * old extent, add summary data for them to be allocated. | ||
694 | */ | ||
695 | if (preblock < start) { | ||
696 | error = xfs_rtmodify_summary(mp, tp, | ||
697 | XFS_RTBLOCKLOG(start - preblock), | ||
698 | XFS_BITTOBLOCK(mp, preblock), -1, rbpp, rsb); | ||
699 | if (error) { | ||
700 | return error; | ||
701 | } | ||
702 | } | ||
703 | /* | ||
704 | * If there are blocks not being freed at the end of the | ||
705 | * old extent, add summary data for them to be allocated. | ||
706 | */ | ||
707 | if (postblock > end) { | ||
708 | error = xfs_rtmodify_summary(mp, tp, | ||
709 | XFS_RTBLOCKLOG(postblock - end), | ||
710 | XFS_BITTOBLOCK(mp, end + 1), -1, rbpp, rsb); | ||
711 | if (error) { | ||
712 | return error; | ||
713 | } | ||
714 | } | ||
715 | /* | ||
716 | * Increment the summary information corresponding to the entire | ||
717 | * (new) free extent. | ||
718 | */ | ||
719 | error = xfs_rtmodify_summary(mp, tp, | ||
720 | XFS_RTBLOCKLOG(postblock + 1 - preblock), | ||
721 | XFS_BITTOBLOCK(mp, preblock), 1, rbpp, rsb); | ||
722 | return error; | ||
723 | } | ||
724 | |||
725 | /* | ||
726 | * Check that the given range is either all allocated (val = 0) or | ||
727 | * all free (val = 1). | ||
728 | */ | ||
729 | int | ||
730 | xfs_rtcheck_range( | ||
731 | xfs_mount_t *mp, /* file system mount point */ | ||
732 | xfs_trans_t *tp, /* transaction pointer */ | ||
733 | xfs_rtblock_t start, /* starting block number of extent */ | ||
734 | xfs_extlen_t len, /* length of extent */ | ||
735 | int val, /* 1 for free, 0 for allocated */ | ||
736 | xfs_rtblock_t *new, /* out: first block not matching */ | ||
737 | int *stat) /* out: 1 for matches, 0 for not */ | ||
738 | { | ||
739 | xfs_rtword_t *b; /* current word in buffer */ | ||
740 | int bit; /* bit number in the word */ | ||
741 | xfs_rtblock_t block; /* bitmap block number */ | ||
742 | xfs_buf_t *bp; /* buf for the block */ | ||
743 | xfs_rtword_t *bufp; /* starting word in buffer */ | ||
744 | int error; /* error value */ | ||
745 | xfs_rtblock_t i; /* current bit number rel. to start */ | ||
746 | xfs_rtblock_t lastbit; /* last useful bit in word */ | ||
747 | xfs_rtword_t mask; /* mask of relevant bits for value */ | ||
748 | xfs_rtword_t wdiff; /* difference from wanted value */ | ||
749 | int word; /* word number in the buffer */ | ||
750 | |||
751 | /* | ||
752 | * Compute starting bitmap block number | ||
753 | */ | ||
754 | block = XFS_BITTOBLOCK(mp, start); | ||
755 | /* | ||
756 | * Read the bitmap block. | ||
757 | */ | ||
758 | error = xfs_rtbuf_get(mp, tp, block, 0, &bp); | ||
759 | if (error) { | ||
760 | return error; | ||
761 | } | ||
762 | bufp = bp->b_addr; | ||
763 | /* | ||
764 | * Compute the starting word's address, and starting bit. | ||
765 | */ | ||
766 | word = XFS_BITTOWORD(mp, start); | ||
767 | b = &bufp[word]; | ||
768 | bit = (int)(start & (XFS_NBWORD - 1)); | ||
769 | /* | ||
770 | * 0 (allocated) => all zero's; 1 (free) => all one's. | ||
771 | */ | ||
772 | val = -val; | ||
773 | /* | ||
774 | * If not starting on a word boundary, deal with the first | ||
775 | * (partial) word. | ||
776 | */ | ||
777 | if (bit) { | ||
778 | /* | ||
779 | * Compute first bit not examined. | ||
780 | */ | ||
781 | lastbit = XFS_RTMIN(bit + len, XFS_NBWORD); | ||
782 | /* | ||
783 | * Mask of relevant bits. | ||
784 | */ | ||
785 | mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit; | ||
786 | /* | ||
787 | * Compute difference between actual and desired value. | ||
788 | */ | ||
789 | if ((wdiff = (*b ^ val) & mask)) { | ||
790 | /* | ||
791 | * Different, compute first wrong bit and return. | ||
792 | */ | ||
793 | xfs_trans_brelse(tp, bp); | ||
794 | i = XFS_RTLOBIT(wdiff) - bit; | ||
795 | *new = start + i; | ||
796 | *stat = 0; | ||
797 | return 0; | ||
798 | } | ||
799 | i = lastbit - bit; | ||
800 | /* | ||
801 | * Go on to next block if that's where the next word is | ||
802 | * and we need the next word. | ||
803 | */ | ||
804 | if (++word == XFS_BLOCKWSIZE(mp) && i < len) { | ||
805 | /* | ||
806 | * If done with this block, get the next one. | ||
807 | */ | ||
808 | xfs_trans_brelse(tp, bp); | ||
809 | error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); | ||
810 | if (error) { | ||
811 | return error; | ||
812 | } | ||
813 | b = bufp = bp->b_addr; | ||
814 | word = 0; | ||
815 | } else { | ||
816 | /* | ||
817 | * Go on to the next word in the buffer. | ||
818 | */ | ||
819 | b++; | ||
820 | } | ||
821 | } else { | ||
822 | /* | ||
823 | * Starting on a word boundary, no partial word. | ||
824 | */ | ||
825 | i = 0; | ||
826 | } | ||
827 | /* | ||
828 | * Loop over whole words in buffers. When we use up one buffer | ||
829 | * we move on to the next one. | ||
830 | */ | ||
831 | while (len - i >= XFS_NBWORD) { | ||
832 | /* | ||
833 | * Compute difference between actual and desired value. | ||
834 | */ | ||
835 | if ((wdiff = *b ^ val)) { | ||
836 | /* | ||
837 | * Different, compute first wrong bit and return. | ||
838 | */ | ||
839 | xfs_trans_brelse(tp, bp); | ||
840 | i += XFS_RTLOBIT(wdiff); | ||
841 | *new = start + i; | ||
842 | *stat = 0; | ||
843 | return 0; | ||
844 | } | ||
845 | i += XFS_NBWORD; | ||
846 | /* | ||
847 | * Go on to next block if that's where the next word is | ||
848 | * and we need the next word. | ||
849 | */ | ||
850 | if (++word == XFS_BLOCKWSIZE(mp) && i < len) { | ||
851 | /* | ||
852 | * If done with this block, get the next one. | ||
853 | */ | ||
854 | xfs_trans_brelse(tp, bp); | ||
855 | error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); | ||
856 | if (error) { | ||
857 | return error; | ||
858 | } | ||
859 | b = bufp = bp->b_addr; | ||
860 | word = 0; | ||
861 | } else { | ||
862 | /* | ||
863 | * Go on to the next word in the buffer. | ||
864 | */ | ||
865 | b++; | ||
866 | } | ||
867 | } | ||
868 | /* | ||
869 | * If not ending on a word boundary, deal with the last | ||
870 | * (partial) word. | ||
871 | */ | ||
872 | if ((lastbit = len - i)) { | ||
873 | /* | ||
874 | * Mask of relevant bits. | ||
875 | */ | ||
876 | mask = ((xfs_rtword_t)1 << lastbit) - 1; | ||
877 | /* | ||
878 | * Compute difference between actual and desired value. | ||
879 | */ | ||
880 | if ((wdiff = (*b ^ val) & mask)) { | ||
881 | /* | ||
882 | * Different, compute first wrong bit and return. | ||
883 | */ | ||
884 | xfs_trans_brelse(tp, bp); | ||
885 | i += XFS_RTLOBIT(wdiff); | ||
886 | *new = start + i; | ||
887 | *stat = 0; | ||
888 | return 0; | ||
889 | } else | ||
890 | i = len; | ||
891 | } | ||
892 | /* | ||
893 | * Successful, return. | ||
894 | */ | ||
895 | xfs_trans_brelse(tp, bp); | ||
896 | *new = start + i; | ||
897 | *stat = 1; | ||
898 | return 0; | ||
899 | } | ||
900 | |||
901 | #ifdef DEBUG | ||
902 | /* | ||
903 | * Check that the given extent (block range) is allocated already. | ||
904 | */ | ||
905 | STATIC int /* error */ | ||
906 | xfs_rtcheck_alloc_range( | ||
907 | xfs_mount_t *mp, /* file system mount point */ | ||
908 | xfs_trans_t *tp, /* transaction pointer */ | ||
909 | xfs_rtblock_t bno, /* starting block number of extent */ | ||
910 | xfs_extlen_t len) /* length of extent */ | ||
911 | { | ||
912 | xfs_rtblock_t new; /* dummy for xfs_rtcheck_range */ | ||
913 | int stat; | ||
914 | int error; | ||
915 | |||
916 | error = xfs_rtcheck_range(mp, tp, bno, len, 0, &new, &stat); | ||
917 | if (error) | ||
918 | return error; | ||
919 | ASSERT(stat); | ||
920 | return 0; | ||
921 | } | ||
922 | #else | ||
923 | #define xfs_rtcheck_alloc_range(m,t,b,l) (0) | ||
924 | #endif | ||
925 | /* | ||
926 | * Free an extent in the realtime subvolume. Length is expressed in | ||
927 | * realtime extents, as is the block number. | ||
928 | */ | ||
929 | int /* error */ | ||
930 | xfs_rtfree_extent( | ||
931 | xfs_trans_t *tp, /* transaction pointer */ | ||
932 | xfs_rtblock_t bno, /* starting block number to free */ | ||
933 | xfs_extlen_t len) /* length of extent freed */ | ||
934 | { | ||
935 | int error; /* error value */ | ||
936 | xfs_mount_t *mp; /* file system mount structure */ | ||
937 | xfs_fsblock_t sb; /* summary file block number */ | ||
938 | xfs_buf_t *sumbp = NULL; /* summary file block buffer */ | ||
939 | |||
940 | mp = tp->t_mountp; | ||
941 | |||
942 | ASSERT(mp->m_rbmip->i_itemp != NULL); | ||
943 | ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL)); | ||
944 | |||
945 | error = xfs_rtcheck_alloc_range(mp, tp, bno, len); | ||
946 | if (error) | ||
947 | return error; | ||
948 | |||
949 | /* | ||
950 | * Free the range of realtime blocks. | ||
951 | */ | ||
952 | error = xfs_rtfree_range(mp, tp, bno, len, &sumbp, &sb); | ||
953 | if (error) { | ||
954 | return error; | ||
955 | } | ||
956 | /* | ||
957 | * Mark more blocks free in the superblock. | ||
958 | */ | ||
959 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_FREXTENTS, (long)len); | ||
960 | /* | ||
961 | * If we've now freed all the blocks, reset the file sequence | ||
962 | * number to 0. | ||
963 | */ | ||
964 | if (tp->t_frextents_delta + mp->m_sb.sb_frextents == | ||
965 | mp->m_sb.sb_rextents) { | ||
966 | if (!(mp->m_rbmip->i_d.di_flags & XFS_DIFLAG_NEWRTBM)) | ||
967 | mp->m_rbmip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM; | ||
968 | *(__uint64_t *)&mp->m_rbmip->i_d.di_atime = 0; | ||
969 | xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE); | ||
970 | } | ||
971 | return 0; | ||
972 | } | ||
973 | |||
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c new file mode 100644 index 000000000000..6e93b5ef0a6b --- /dev/null +++ b/fs/xfs/libxfs/xfs_sb.c | |||
@@ -0,0 +1,836 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #include "xfs.h" | ||
19 | #include "xfs_fs.h" | ||
20 | #include "xfs_shared.h" | ||
21 | #include "xfs_format.h" | ||
22 | #include "xfs_log_format.h" | ||
23 | #include "xfs_trans_resv.h" | ||
24 | #include "xfs_bit.h" | ||
25 | #include "xfs_sb.h" | ||
26 | #include "xfs_ag.h" | ||
27 | #include "xfs_mount.h" | ||
28 | #include "xfs_inode.h" | ||
29 | #include "xfs_ialloc.h" | ||
30 | #include "xfs_alloc.h" | ||
31 | #include "xfs_error.h" | ||
32 | #include "xfs_trace.h" | ||
33 | #include "xfs_cksum.h" | ||
34 | #include "xfs_trans.h" | ||
35 | #include "xfs_buf_item.h" | ||
36 | #include "xfs_dinode.h" | ||
37 | #include "xfs_bmap_btree.h" | ||
38 | #include "xfs_alloc_btree.h" | ||
39 | #include "xfs_ialloc_btree.h" | ||
40 | |||
41 | /* | ||
42 | * Physical superblock buffer manipulations. Shared with libxfs in userspace. | ||
43 | */ | ||
44 | |||
45 | static const struct { | ||
46 | short offset; | ||
47 | short type; /* 0 = integer | ||
48 | * 1 = binary / string (no translation) | ||
49 | */ | ||
50 | } xfs_sb_info[] = { | ||
51 | { offsetof(xfs_sb_t, sb_magicnum), 0 }, | ||
52 | { offsetof(xfs_sb_t, sb_blocksize), 0 }, | ||
53 | { offsetof(xfs_sb_t, sb_dblocks), 0 }, | ||
54 | { offsetof(xfs_sb_t, sb_rblocks), 0 }, | ||
55 | { offsetof(xfs_sb_t, sb_rextents), 0 }, | ||
56 | { offsetof(xfs_sb_t, sb_uuid), 1 }, | ||
57 | { offsetof(xfs_sb_t, sb_logstart), 0 }, | ||
58 | { offsetof(xfs_sb_t, sb_rootino), 0 }, | ||
59 | { offsetof(xfs_sb_t, sb_rbmino), 0 }, | ||
60 | { offsetof(xfs_sb_t, sb_rsumino), 0 }, | ||
61 | { offsetof(xfs_sb_t, sb_rextsize), 0 }, | ||
62 | { offsetof(xfs_sb_t, sb_agblocks), 0 }, | ||
63 | { offsetof(xfs_sb_t, sb_agcount), 0 }, | ||
64 | { offsetof(xfs_sb_t, sb_rbmblocks), 0 }, | ||
65 | { offsetof(xfs_sb_t, sb_logblocks), 0 }, | ||
66 | { offsetof(xfs_sb_t, sb_versionnum), 0 }, | ||
67 | { offsetof(xfs_sb_t, sb_sectsize), 0 }, | ||
68 | { offsetof(xfs_sb_t, sb_inodesize), 0 }, | ||
69 | { offsetof(xfs_sb_t, sb_inopblock), 0 }, | ||
70 | { offsetof(xfs_sb_t, sb_fname[0]), 1 }, | ||
71 | { offsetof(xfs_sb_t, sb_blocklog), 0 }, | ||
72 | { offsetof(xfs_sb_t, sb_sectlog), 0 }, | ||
73 | { offsetof(xfs_sb_t, sb_inodelog), 0 }, | ||
74 | { offsetof(xfs_sb_t, sb_inopblog), 0 }, | ||
75 | { offsetof(xfs_sb_t, sb_agblklog), 0 }, | ||
76 | { offsetof(xfs_sb_t, sb_rextslog), 0 }, | ||
77 | { offsetof(xfs_sb_t, sb_inprogress), 0 }, | ||
78 | { offsetof(xfs_sb_t, sb_imax_pct), 0 }, | ||
79 | { offsetof(xfs_sb_t, sb_icount), 0 }, | ||
80 | { offsetof(xfs_sb_t, sb_ifree), 0 }, | ||
81 | { offsetof(xfs_sb_t, sb_fdblocks), 0 }, | ||
82 | { offsetof(xfs_sb_t, sb_frextents), 0 }, | ||
83 | { offsetof(xfs_sb_t, sb_uquotino), 0 }, | ||
84 | { offsetof(xfs_sb_t, sb_gquotino), 0 }, | ||
85 | { offsetof(xfs_sb_t, sb_qflags), 0 }, | ||
86 | { offsetof(xfs_sb_t, sb_flags), 0 }, | ||
87 | { offsetof(xfs_sb_t, sb_shared_vn), 0 }, | ||
88 | { offsetof(xfs_sb_t, sb_inoalignmt), 0 }, | ||
89 | { offsetof(xfs_sb_t, sb_unit), 0 }, | ||
90 | { offsetof(xfs_sb_t, sb_width), 0 }, | ||
91 | { offsetof(xfs_sb_t, sb_dirblklog), 0 }, | ||
92 | { offsetof(xfs_sb_t, sb_logsectlog), 0 }, | ||
93 | { offsetof(xfs_sb_t, sb_logsectsize), 0 }, | ||
94 | { offsetof(xfs_sb_t, sb_logsunit), 0 }, | ||
95 | { offsetof(xfs_sb_t, sb_features2), 0 }, | ||
96 | { offsetof(xfs_sb_t, sb_bad_features2), 0 }, | ||
97 | { offsetof(xfs_sb_t, sb_features_compat), 0 }, | ||
98 | { offsetof(xfs_sb_t, sb_features_ro_compat), 0 }, | ||
99 | { offsetof(xfs_sb_t, sb_features_incompat), 0 }, | ||
100 | { offsetof(xfs_sb_t, sb_features_log_incompat), 0 }, | ||
101 | { offsetof(xfs_sb_t, sb_crc), 0 }, | ||
102 | { offsetof(xfs_sb_t, sb_pad), 0 }, | ||
103 | { offsetof(xfs_sb_t, sb_pquotino), 0 }, | ||
104 | { offsetof(xfs_sb_t, sb_lsn), 0 }, | ||
105 | { sizeof(xfs_sb_t), 0 } | ||
106 | }; | ||
107 | |||
108 | /* | ||
109 | * Reference counting access wrappers to the perag structures. | ||
110 | * Because we never free per-ag structures, the only thing we | ||
111 | * have to protect against changes is the tree structure itself. | ||
112 | */ | ||
113 | struct xfs_perag * | ||
114 | xfs_perag_get( | ||
115 | struct xfs_mount *mp, | ||
116 | xfs_agnumber_t agno) | ||
117 | { | ||
118 | struct xfs_perag *pag; | ||
119 | int ref = 0; | ||
120 | |||
121 | rcu_read_lock(); | ||
122 | pag = radix_tree_lookup(&mp->m_perag_tree, agno); | ||
123 | if (pag) { | ||
124 | ASSERT(atomic_read(&pag->pag_ref) >= 0); | ||
125 | ref = atomic_inc_return(&pag->pag_ref); | ||
126 | } | ||
127 | rcu_read_unlock(); | ||
128 | trace_xfs_perag_get(mp, agno, ref, _RET_IP_); | ||
129 | return pag; | ||
130 | } | ||
131 | |||
132 | /* | ||
133 | * search from @first to find the next perag with the given tag set. | ||
134 | */ | ||
135 | struct xfs_perag * | ||
136 | xfs_perag_get_tag( | ||
137 | struct xfs_mount *mp, | ||
138 | xfs_agnumber_t first, | ||
139 | int tag) | ||
140 | { | ||
141 | struct xfs_perag *pag; | ||
142 | int found; | ||
143 | int ref; | ||
144 | |||
145 | rcu_read_lock(); | ||
146 | found = radix_tree_gang_lookup_tag(&mp->m_perag_tree, | ||
147 | (void **)&pag, first, 1, tag); | ||
148 | if (found <= 0) { | ||
149 | rcu_read_unlock(); | ||
150 | return NULL; | ||
151 | } | ||
152 | ref = atomic_inc_return(&pag->pag_ref); | ||
153 | rcu_read_unlock(); | ||
154 | trace_xfs_perag_get_tag(mp, pag->pag_agno, ref, _RET_IP_); | ||
155 | return pag; | ||
156 | } | ||
157 | |||
158 | void | ||
159 | xfs_perag_put( | ||
160 | struct xfs_perag *pag) | ||
161 | { | ||
162 | int ref; | ||
163 | |||
164 | ASSERT(atomic_read(&pag->pag_ref) > 0); | ||
165 | ref = atomic_dec_return(&pag->pag_ref); | ||
166 | trace_xfs_perag_put(pag->pag_mount, pag->pag_agno, ref, _RET_IP_); | ||
167 | } | ||
168 | |||
169 | /* | ||
170 | * Check the validity of the SB found. | ||
171 | */ | ||
172 | STATIC int | ||
173 | xfs_mount_validate_sb( | ||
174 | xfs_mount_t *mp, | ||
175 | xfs_sb_t *sbp, | ||
176 | bool check_inprogress, | ||
177 | bool check_version) | ||
178 | { | ||
179 | |||
180 | /* | ||
181 | * If the log device and data device have the | ||
182 | * same device number, the log is internal. | ||
183 | * Consequently, the sb_logstart should be non-zero. If | ||
184 | * we have a zero sb_logstart in this case, we may be trying to mount | ||
185 | * a volume filesystem in a non-volume manner. | ||
186 | */ | ||
187 | if (sbp->sb_magicnum != XFS_SB_MAGIC) { | ||
188 | xfs_warn(mp, "bad magic number"); | ||
189 | return -EWRONGFS; | ||
190 | } | ||
191 | |||
192 | |||
193 | if (!xfs_sb_good_version(sbp)) { | ||
194 | xfs_warn(mp, "bad version"); | ||
195 | return -EWRONGFS; | ||
196 | } | ||
197 | |||
198 | /* | ||
199 | * Version 5 superblock feature mask validation. Reject combinations the | ||
200 | * kernel cannot support up front before checking anything else. For | ||
201 | * write validation, we don't need to check feature masks. | ||
202 | */ | ||
203 | if (check_version && XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) { | ||
204 | if (xfs_sb_has_compat_feature(sbp, | ||
205 | XFS_SB_FEAT_COMPAT_UNKNOWN)) { | ||
206 | xfs_warn(mp, | ||
207 | "Superblock has unknown compatible features (0x%x) enabled.\n" | ||
208 | "Using a more recent kernel is recommended.", | ||
209 | (sbp->sb_features_compat & | ||
210 | XFS_SB_FEAT_COMPAT_UNKNOWN)); | ||
211 | } | ||
212 | |||
213 | if (xfs_sb_has_ro_compat_feature(sbp, | ||
214 | XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) { | ||
215 | xfs_alert(mp, | ||
216 | "Superblock has unknown read-only compatible features (0x%x) enabled.", | ||
217 | (sbp->sb_features_ro_compat & | ||
218 | XFS_SB_FEAT_RO_COMPAT_UNKNOWN)); | ||
219 | if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { | ||
220 | xfs_warn(mp, | ||
221 | "Attempted to mount read-only compatible filesystem read-write.\n" | ||
222 | "Filesystem can only be safely mounted read only."); | ||
223 | return -EINVAL; | ||
224 | } | ||
225 | } | ||
226 | if (xfs_sb_has_incompat_feature(sbp, | ||
227 | XFS_SB_FEAT_INCOMPAT_UNKNOWN)) { | ||
228 | xfs_warn(mp, | ||
229 | "Superblock has unknown incompatible features (0x%x) enabled.\n" | ||
230 | "Filesystem can not be safely mounted by this kernel.", | ||
231 | (sbp->sb_features_incompat & | ||
232 | XFS_SB_FEAT_INCOMPAT_UNKNOWN)); | ||
233 | return -EINVAL; | ||
234 | } | ||
235 | } | ||
236 | |||
237 | if (xfs_sb_version_has_pquotino(sbp)) { | ||
238 | if (sbp->sb_qflags & (XFS_OQUOTA_ENFD | XFS_OQUOTA_CHKD)) { | ||
239 | xfs_notice(mp, | ||
240 | "Version 5 of Super block has XFS_OQUOTA bits."); | ||
241 | return -EFSCORRUPTED; | ||
242 | } | ||
243 | } else if (sbp->sb_qflags & (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD | | ||
244 | XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD)) { | ||
245 | xfs_notice(mp, | ||
246 | "Superblock earlier than Version 5 has XFS_[PQ]UOTA_{ENFD|CHKD} bits."); | ||
247 | return -EFSCORRUPTED; | ||
248 | } | ||
249 | |||
250 | if (unlikely( | ||
251 | sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) { | ||
252 | xfs_warn(mp, | ||
253 | "filesystem is marked as having an external log; " | ||
254 | "specify logdev on the mount command line."); | ||
255 | return -EINVAL; | ||
256 | } | ||
257 | |||
258 | if (unlikely( | ||
259 | sbp->sb_logstart != 0 && mp->m_logdev_targp != mp->m_ddev_targp)) { | ||
260 | xfs_warn(mp, | ||
261 | "filesystem is marked as having an internal log; " | ||
262 | "do not specify logdev on the mount command line."); | ||
263 | return -EINVAL; | ||
264 | } | ||
265 | |||
266 | /* | ||
267 | * More sanity checking. Most of these were stolen directly from | ||
268 | * xfs_repair. | ||
269 | */ | ||
270 | if (unlikely( | ||
271 | sbp->sb_agcount <= 0 || | ||
272 | sbp->sb_sectsize < XFS_MIN_SECTORSIZE || | ||
273 | sbp->sb_sectsize > XFS_MAX_SECTORSIZE || | ||
274 | sbp->sb_sectlog < XFS_MIN_SECTORSIZE_LOG || | ||
275 | sbp->sb_sectlog > XFS_MAX_SECTORSIZE_LOG || | ||
276 | sbp->sb_sectsize != (1 << sbp->sb_sectlog) || | ||
277 | sbp->sb_blocksize < XFS_MIN_BLOCKSIZE || | ||
278 | sbp->sb_blocksize > XFS_MAX_BLOCKSIZE || | ||
279 | sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG || | ||
280 | sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG || | ||
281 | sbp->sb_blocksize != (1 << sbp->sb_blocklog) || | ||
282 | sbp->sb_inodesize < XFS_DINODE_MIN_SIZE || | ||
283 | sbp->sb_inodesize > XFS_DINODE_MAX_SIZE || | ||
284 | sbp->sb_inodelog < XFS_DINODE_MIN_LOG || | ||
285 | sbp->sb_inodelog > XFS_DINODE_MAX_LOG || | ||
286 | sbp->sb_inodesize != (1 << sbp->sb_inodelog) || | ||
287 | sbp->sb_inopblock != howmany(sbp->sb_blocksize,sbp->sb_inodesize) || | ||
288 | (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) || | ||
289 | (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || | ||
290 | (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || | ||
291 | (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */) || | ||
292 | sbp->sb_dblocks == 0 || | ||
293 | sbp->sb_dblocks > XFS_MAX_DBLOCKS(sbp) || | ||
294 | sbp->sb_dblocks < XFS_MIN_DBLOCKS(sbp) || | ||
295 | sbp->sb_shared_vn != 0)) { | ||
296 | xfs_notice(mp, "SB sanity check failed"); | ||
297 | return -EFSCORRUPTED; | ||
298 | } | ||
299 | |||
300 | /* | ||
301 | * Until this is fixed only page-sized or smaller data blocks work. | ||
302 | */ | ||
303 | if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) { | ||
304 | xfs_warn(mp, | ||
305 | "File system with blocksize %d bytes. " | ||
306 | "Only pagesize (%ld) or less will currently work.", | ||
307 | sbp->sb_blocksize, PAGE_SIZE); | ||
308 | return -ENOSYS; | ||
309 | } | ||
310 | |||
311 | /* | ||
312 | * Currently only very few inode sizes are supported. | ||
313 | */ | ||
314 | switch (sbp->sb_inodesize) { | ||
315 | case 256: | ||
316 | case 512: | ||
317 | case 1024: | ||
318 | case 2048: | ||
319 | break; | ||
320 | default: | ||
321 | xfs_warn(mp, "inode size of %d bytes not supported", | ||
322 | sbp->sb_inodesize); | ||
323 | return -ENOSYS; | ||
324 | } | ||
325 | |||
326 | if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) || | ||
327 | xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) { | ||
328 | xfs_warn(mp, | ||
329 | "file system too large to be mounted on this system."); | ||
330 | return -EFBIG; | ||
331 | } | ||
332 | |||
333 | if (check_inprogress && sbp->sb_inprogress) { | ||
334 | xfs_warn(mp, "Offline file system operation in progress!"); | ||
335 | return -EFSCORRUPTED; | ||
336 | } | ||
337 | return 0; | ||
338 | } | ||
339 | |||
340 | void | ||
341 | xfs_sb_quota_from_disk(struct xfs_sb *sbp) | ||
342 | { | ||
343 | /* | ||
344 | * older mkfs doesn't initialize quota inodes to NULLFSINO. This | ||
345 | * leads to in-core values having two different values for a quota | ||
346 | * inode to be invalid: 0 and NULLFSINO. Change it to a single value | ||
347 | * NULLFSINO. | ||
348 | * | ||
349 | * Note that this change affect only the in-core values. These | ||
350 | * values are not written back to disk unless any quota information | ||
351 | * is written to the disk. Even in that case, sb_pquotino field is | ||
352 | * not written to disk unless the superblock supports pquotino. | ||
353 | */ | ||
354 | if (sbp->sb_uquotino == 0) | ||
355 | sbp->sb_uquotino = NULLFSINO; | ||
356 | if (sbp->sb_gquotino == 0) | ||
357 | sbp->sb_gquotino = NULLFSINO; | ||
358 | if (sbp->sb_pquotino == 0) | ||
359 | sbp->sb_pquotino = NULLFSINO; | ||
360 | |||
361 | /* | ||
362 | * We need to do these manipilations only if we are working | ||
363 | * with an older version of on-disk superblock. | ||
364 | */ | ||
365 | if (xfs_sb_version_has_pquotino(sbp)) | ||
366 | return; | ||
367 | |||
368 | if (sbp->sb_qflags & XFS_OQUOTA_ENFD) | ||
369 | sbp->sb_qflags |= (sbp->sb_qflags & XFS_PQUOTA_ACCT) ? | ||
370 | XFS_PQUOTA_ENFD : XFS_GQUOTA_ENFD; | ||
371 | if (sbp->sb_qflags & XFS_OQUOTA_CHKD) | ||
372 | sbp->sb_qflags |= (sbp->sb_qflags & XFS_PQUOTA_ACCT) ? | ||
373 | XFS_PQUOTA_CHKD : XFS_GQUOTA_CHKD; | ||
374 | sbp->sb_qflags &= ~(XFS_OQUOTA_ENFD | XFS_OQUOTA_CHKD); | ||
375 | |||
376 | if (sbp->sb_qflags & XFS_PQUOTA_ACCT) { | ||
377 | /* | ||
378 | * In older version of superblock, on-disk superblock only | ||
379 | * has sb_gquotino, and in-core superblock has both sb_gquotino | ||
380 | * and sb_pquotino. But, only one of them is supported at any | ||
381 | * point of time. So, if PQUOTA is set in disk superblock, | ||
382 | * copy over sb_gquotino to sb_pquotino. | ||
383 | */ | ||
384 | sbp->sb_pquotino = sbp->sb_gquotino; | ||
385 | sbp->sb_gquotino = NULLFSINO; | ||
386 | } | ||
387 | } | ||
388 | |||
389 | void | ||
390 | xfs_sb_from_disk( | ||
391 | struct xfs_sb *to, | ||
392 | xfs_dsb_t *from) | ||
393 | { | ||
394 | to->sb_magicnum = be32_to_cpu(from->sb_magicnum); | ||
395 | to->sb_blocksize = be32_to_cpu(from->sb_blocksize); | ||
396 | to->sb_dblocks = be64_to_cpu(from->sb_dblocks); | ||
397 | to->sb_rblocks = be64_to_cpu(from->sb_rblocks); | ||
398 | to->sb_rextents = be64_to_cpu(from->sb_rextents); | ||
399 | memcpy(&to->sb_uuid, &from->sb_uuid, sizeof(to->sb_uuid)); | ||
400 | to->sb_logstart = be64_to_cpu(from->sb_logstart); | ||
401 | to->sb_rootino = be64_to_cpu(from->sb_rootino); | ||
402 | to->sb_rbmino = be64_to_cpu(from->sb_rbmino); | ||
403 | to->sb_rsumino = be64_to_cpu(from->sb_rsumino); | ||
404 | to->sb_rextsize = be32_to_cpu(from->sb_rextsize); | ||
405 | to->sb_agblocks = be32_to_cpu(from->sb_agblocks); | ||
406 | to->sb_agcount = be32_to_cpu(from->sb_agcount); | ||
407 | to->sb_rbmblocks = be32_to_cpu(from->sb_rbmblocks); | ||
408 | to->sb_logblocks = be32_to_cpu(from->sb_logblocks); | ||
409 | to->sb_versionnum = be16_to_cpu(from->sb_versionnum); | ||
410 | to->sb_sectsize = be16_to_cpu(from->sb_sectsize); | ||
411 | to->sb_inodesize = be16_to_cpu(from->sb_inodesize); | ||
412 | to->sb_inopblock = be16_to_cpu(from->sb_inopblock); | ||
413 | memcpy(&to->sb_fname, &from->sb_fname, sizeof(to->sb_fname)); | ||
414 | to->sb_blocklog = from->sb_blocklog; | ||
415 | to->sb_sectlog = from->sb_sectlog; | ||
416 | to->sb_inodelog = from->sb_inodelog; | ||
417 | to->sb_inopblog = from->sb_inopblog; | ||
418 | to->sb_agblklog = from->sb_agblklog; | ||
419 | to->sb_rextslog = from->sb_rextslog; | ||
420 | to->sb_inprogress = from->sb_inprogress; | ||
421 | to->sb_imax_pct = from->sb_imax_pct; | ||
422 | to->sb_icount = be64_to_cpu(from->sb_icount); | ||
423 | to->sb_ifree = be64_to_cpu(from->sb_ifree); | ||
424 | to->sb_fdblocks = be64_to_cpu(from->sb_fdblocks); | ||
425 | to->sb_frextents = be64_to_cpu(from->sb_frextents); | ||
426 | to->sb_uquotino = be64_to_cpu(from->sb_uquotino); | ||
427 | to->sb_gquotino = be64_to_cpu(from->sb_gquotino); | ||
428 | to->sb_qflags = be16_to_cpu(from->sb_qflags); | ||
429 | to->sb_flags = from->sb_flags; | ||
430 | to->sb_shared_vn = from->sb_shared_vn; | ||
431 | to->sb_inoalignmt = be32_to_cpu(from->sb_inoalignmt); | ||
432 | to->sb_unit = be32_to_cpu(from->sb_unit); | ||
433 | to->sb_width = be32_to_cpu(from->sb_width); | ||
434 | to->sb_dirblklog = from->sb_dirblklog; | ||
435 | to->sb_logsectlog = from->sb_logsectlog; | ||
436 | to->sb_logsectsize = be16_to_cpu(from->sb_logsectsize); | ||
437 | to->sb_logsunit = be32_to_cpu(from->sb_logsunit); | ||
438 | to->sb_features2 = be32_to_cpu(from->sb_features2); | ||
439 | to->sb_bad_features2 = be32_to_cpu(from->sb_bad_features2); | ||
440 | to->sb_features_compat = be32_to_cpu(from->sb_features_compat); | ||
441 | to->sb_features_ro_compat = be32_to_cpu(from->sb_features_ro_compat); | ||
442 | to->sb_features_incompat = be32_to_cpu(from->sb_features_incompat); | ||
443 | to->sb_features_log_incompat = | ||
444 | be32_to_cpu(from->sb_features_log_incompat); | ||
445 | to->sb_pad = 0; | ||
446 | to->sb_pquotino = be64_to_cpu(from->sb_pquotino); | ||
447 | to->sb_lsn = be64_to_cpu(from->sb_lsn); | ||
448 | } | ||
449 | |||
450 | static inline void | ||
451 | xfs_sb_quota_to_disk( | ||
452 | xfs_dsb_t *to, | ||
453 | xfs_sb_t *from, | ||
454 | __int64_t *fields) | ||
455 | { | ||
456 | __uint16_t qflags = from->sb_qflags; | ||
457 | |||
458 | /* | ||
459 | * We need to do these manipilations only if we are working | ||
460 | * with an older version of on-disk superblock. | ||
461 | */ | ||
462 | if (xfs_sb_version_has_pquotino(from)) | ||
463 | return; | ||
464 | |||
465 | if (*fields & XFS_SB_QFLAGS) { | ||
466 | /* | ||
467 | * The in-core version of sb_qflags do not have | ||
468 | * XFS_OQUOTA_* flags, whereas the on-disk version | ||
469 | * does. So, convert incore XFS_{PG}QUOTA_* flags | ||
470 | * to on-disk XFS_OQUOTA_* flags. | ||
471 | */ | ||
472 | qflags &= ~(XFS_PQUOTA_ENFD | XFS_PQUOTA_CHKD | | ||
473 | XFS_GQUOTA_ENFD | XFS_GQUOTA_CHKD); | ||
474 | |||
475 | if (from->sb_qflags & | ||
476 | (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD)) | ||
477 | qflags |= XFS_OQUOTA_ENFD; | ||
478 | if (from->sb_qflags & | ||
479 | (XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD)) | ||
480 | qflags |= XFS_OQUOTA_CHKD; | ||
481 | to->sb_qflags = cpu_to_be16(qflags); | ||
482 | *fields &= ~XFS_SB_QFLAGS; | ||
483 | } | ||
484 | |||
485 | /* | ||
486 | * GQUOTINO and PQUOTINO cannot be used together in versions of | ||
487 | * superblock that do not have pquotino. from->sb_flags tells us which | ||
488 | * quota is active and should be copied to disk. If neither are active, | ||
489 | * make sure we write NULLFSINO to the sb_gquotino field as a quota | ||
490 | * inode value of "0" is invalid when the XFS_SB_VERSION_QUOTA feature | ||
491 | * bit is set. | ||
492 | * | ||
493 | * Note that we don't need to handle the sb_uquotino or sb_pquotino here | ||
494 | * as they do not require any translation. Hence the main sb field loop | ||
495 | * will write them appropriately from the in-core superblock. | ||
496 | */ | ||
497 | if ((*fields & XFS_SB_GQUOTINO) && | ||
498 | (from->sb_qflags & XFS_GQUOTA_ACCT)) | ||
499 | to->sb_gquotino = cpu_to_be64(from->sb_gquotino); | ||
500 | else if ((*fields & XFS_SB_PQUOTINO) && | ||
501 | (from->sb_qflags & XFS_PQUOTA_ACCT)) | ||
502 | to->sb_gquotino = cpu_to_be64(from->sb_pquotino); | ||
503 | else { | ||
504 | /* | ||
505 | * We can't rely on just the fields being logged to tell us | ||
506 | * that it is safe to write NULLFSINO - we should only do that | ||
507 | * if quotas are not actually enabled. Hence only write | ||
508 | * NULLFSINO if both in-core quota inodes are NULL. | ||
509 | */ | ||
510 | if (from->sb_gquotino == NULLFSINO && | ||
511 | from->sb_pquotino == NULLFSINO) | ||
512 | to->sb_gquotino = cpu_to_be64(NULLFSINO); | ||
513 | } | ||
514 | |||
515 | *fields &= ~(XFS_SB_PQUOTINO | XFS_SB_GQUOTINO); | ||
516 | } | ||
517 | |||
518 | /* | ||
519 | * Copy in core superblock to ondisk one. | ||
520 | * | ||
521 | * The fields argument is mask of superblock fields to copy. | ||
522 | */ | ||
523 | void | ||
524 | xfs_sb_to_disk( | ||
525 | xfs_dsb_t *to, | ||
526 | xfs_sb_t *from, | ||
527 | __int64_t fields) | ||
528 | { | ||
529 | xfs_caddr_t to_ptr = (xfs_caddr_t)to; | ||
530 | xfs_caddr_t from_ptr = (xfs_caddr_t)from; | ||
531 | xfs_sb_field_t f; | ||
532 | int first; | ||
533 | int size; | ||
534 | |||
535 | ASSERT(fields); | ||
536 | if (!fields) | ||
537 | return; | ||
538 | |||
539 | xfs_sb_quota_to_disk(to, from, &fields); | ||
540 | while (fields) { | ||
541 | f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields); | ||
542 | first = xfs_sb_info[f].offset; | ||
543 | size = xfs_sb_info[f + 1].offset - first; | ||
544 | |||
545 | ASSERT(xfs_sb_info[f].type == 0 || xfs_sb_info[f].type == 1); | ||
546 | |||
547 | if (size == 1 || xfs_sb_info[f].type == 1) { | ||
548 | memcpy(to_ptr + first, from_ptr + first, size); | ||
549 | } else { | ||
550 | switch (size) { | ||
551 | case 2: | ||
552 | *(__be16 *)(to_ptr + first) = | ||
553 | cpu_to_be16(*(__u16 *)(from_ptr + first)); | ||
554 | break; | ||
555 | case 4: | ||
556 | *(__be32 *)(to_ptr + first) = | ||
557 | cpu_to_be32(*(__u32 *)(from_ptr + first)); | ||
558 | break; | ||
559 | case 8: | ||
560 | *(__be64 *)(to_ptr + first) = | ||
561 | cpu_to_be64(*(__u64 *)(from_ptr + first)); | ||
562 | break; | ||
563 | default: | ||
564 | ASSERT(0); | ||
565 | } | ||
566 | } | ||
567 | |||
568 | fields &= ~(1LL << f); | ||
569 | } | ||
570 | } | ||
571 | |||
572 | static int | ||
573 | xfs_sb_verify( | ||
574 | struct xfs_buf *bp, | ||
575 | bool check_version) | ||
576 | { | ||
577 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
578 | struct xfs_sb sb; | ||
579 | |||
580 | xfs_sb_from_disk(&sb, XFS_BUF_TO_SBP(bp)); | ||
581 | |||
582 | /* | ||
583 | * Only check the in progress field for the primary superblock as | ||
584 | * mkfs.xfs doesn't clear it from secondary superblocks. | ||
585 | */ | ||
586 | return xfs_mount_validate_sb(mp, &sb, bp->b_bn == XFS_SB_DADDR, | ||
587 | check_version); | ||
588 | } | ||
589 | |||
590 | /* | ||
591 | * If the superblock has the CRC feature bit set or the CRC field is non-null, | ||
592 | * check that the CRC is valid. We check the CRC field is non-null because a | ||
593 | * single bit error could clear the feature bit and unused parts of the | ||
594 | * superblock are supposed to be zero. Hence a non-null crc field indicates that | ||
595 | * we've potentially lost a feature bit and we should check it anyway. | ||
596 | * | ||
597 | * However, past bugs (i.e. in growfs) left non-zeroed regions beyond the | ||
598 | * last field in V4 secondary superblocks. So for secondary superblocks, | ||
599 | * we are more forgiving, and ignore CRC failures if the primary doesn't | ||
600 | * indicate that the fs version is V5. | ||
601 | */ | ||
602 | static void | ||
603 | xfs_sb_read_verify( | ||
604 | struct xfs_buf *bp) | ||
605 | { | ||
606 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
607 | struct xfs_dsb *dsb = XFS_BUF_TO_SBP(bp); | ||
608 | int error; | ||
609 | |||
610 | /* | ||
611 | * open code the version check to avoid needing to convert the entire | ||
612 | * superblock from disk order just to check the version number | ||
613 | */ | ||
614 | if (dsb->sb_magicnum == cpu_to_be32(XFS_SB_MAGIC) && | ||
615 | (((be16_to_cpu(dsb->sb_versionnum) & XFS_SB_VERSION_NUMBITS) == | ||
616 | XFS_SB_VERSION_5) || | ||
617 | dsb->sb_crc != 0)) { | ||
618 | |||
619 | if (!xfs_buf_verify_cksum(bp, XFS_SB_CRC_OFF)) { | ||
620 | /* Only fail bad secondaries on a known V5 filesystem */ | ||
621 | if (bp->b_bn == XFS_SB_DADDR || | ||
622 | xfs_sb_version_hascrc(&mp->m_sb)) { | ||
623 | error = -EFSBADCRC; | ||
624 | goto out_error; | ||
625 | } | ||
626 | } | ||
627 | } | ||
628 | error = xfs_sb_verify(bp, true); | ||
629 | |||
630 | out_error: | ||
631 | if (error) { | ||
632 | xfs_buf_ioerror(bp, error); | ||
633 | if (error == -EFSCORRUPTED || error == -EFSBADCRC) | ||
634 | xfs_verifier_error(bp); | ||
635 | } | ||
636 | } | ||
637 | |||
638 | /* | ||
639 | * We may be probed for a filesystem match, so we may not want to emit | ||
640 | * messages when the superblock buffer is not actually an XFS superblock. | ||
641 | * If we find an XFS superblock, then run a normal, noisy mount because we are | ||
642 | * really going to mount it and want to know about errors. | ||
643 | */ | ||
644 | static void | ||
645 | xfs_sb_quiet_read_verify( | ||
646 | struct xfs_buf *bp) | ||
647 | { | ||
648 | struct xfs_dsb *dsb = XFS_BUF_TO_SBP(bp); | ||
649 | |||
650 | if (dsb->sb_magicnum == cpu_to_be32(XFS_SB_MAGIC)) { | ||
651 | /* XFS filesystem, verify noisily! */ | ||
652 | xfs_sb_read_verify(bp); | ||
653 | return; | ||
654 | } | ||
655 | /* quietly fail */ | ||
656 | xfs_buf_ioerror(bp, -EWRONGFS); | ||
657 | } | ||
658 | |||
659 | static void | ||
660 | xfs_sb_write_verify( | ||
661 | struct xfs_buf *bp) | ||
662 | { | ||
663 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
664 | struct xfs_buf_log_item *bip = bp->b_fspriv; | ||
665 | int error; | ||
666 | |||
667 | error = xfs_sb_verify(bp, false); | ||
668 | if (error) { | ||
669 | xfs_buf_ioerror(bp, error); | ||
670 | xfs_verifier_error(bp); | ||
671 | return; | ||
672 | } | ||
673 | |||
674 | if (!xfs_sb_version_hascrc(&mp->m_sb)) | ||
675 | return; | ||
676 | |||
677 | if (bip) | ||
678 | XFS_BUF_TO_SBP(bp)->sb_lsn = cpu_to_be64(bip->bli_item.li_lsn); | ||
679 | |||
680 | xfs_buf_update_cksum(bp, XFS_SB_CRC_OFF); | ||
681 | } | ||
682 | |||
683 | const struct xfs_buf_ops xfs_sb_buf_ops = { | ||
684 | .verify_read = xfs_sb_read_verify, | ||
685 | .verify_write = xfs_sb_write_verify, | ||
686 | }; | ||
687 | |||
688 | const struct xfs_buf_ops xfs_sb_quiet_buf_ops = { | ||
689 | .verify_read = xfs_sb_quiet_read_verify, | ||
690 | .verify_write = xfs_sb_write_verify, | ||
691 | }; | ||
692 | |||
693 | /* | ||
694 | * xfs_mount_common | ||
695 | * | ||
696 | * Mount initialization code establishing various mount | ||
697 | * fields from the superblock associated with the given | ||
698 | * mount structure | ||
699 | */ | ||
700 | void | ||
701 | xfs_sb_mount_common( | ||
702 | struct xfs_mount *mp, | ||
703 | struct xfs_sb *sbp) | ||
704 | { | ||
705 | mp->m_agfrotor = mp->m_agirotor = 0; | ||
706 | spin_lock_init(&mp->m_agirotor_lock); | ||
707 | mp->m_maxagi = mp->m_sb.sb_agcount; | ||
708 | mp->m_blkbit_log = sbp->sb_blocklog + XFS_NBBYLOG; | ||
709 | mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT; | ||
710 | mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT; | ||
711 | mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1; | ||
712 | mp->m_agino_log = sbp->sb_inopblog + sbp->sb_agblklog; | ||
713 | mp->m_blockmask = sbp->sb_blocksize - 1; | ||
714 | mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG; | ||
715 | mp->m_blockwmask = mp->m_blockwsize - 1; | ||
716 | |||
717 | mp->m_alloc_mxr[0] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 1); | ||
718 | mp->m_alloc_mxr[1] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 0); | ||
719 | mp->m_alloc_mnr[0] = mp->m_alloc_mxr[0] / 2; | ||
720 | mp->m_alloc_mnr[1] = mp->m_alloc_mxr[1] / 2; | ||
721 | |||
722 | mp->m_inobt_mxr[0] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 1); | ||
723 | mp->m_inobt_mxr[1] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 0); | ||
724 | mp->m_inobt_mnr[0] = mp->m_inobt_mxr[0] / 2; | ||
725 | mp->m_inobt_mnr[1] = mp->m_inobt_mxr[1] / 2; | ||
726 | |||
727 | mp->m_bmap_dmxr[0] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 1); | ||
728 | mp->m_bmap_dmxr[1] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 0); | ||
729 | mp->m_bmap_dmnr[0] = mp->m_bmap_dmxr[0] / 2; | ||
730 | mp->m_bmap_dmnr[1] = mp->m_bmap_dmxr[1] / 2; | ||
731 | |||
732 | mp->m_bsize = XFS_FSB_TO_BB(mp, 1); | ||
733 | mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK, | ||
734 | sbp->sb_inopblock); | ||
735 | mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog; | ||
736 | } | ||
737 | |||
738 | /* | ||
739 | * xfs_initialize_perag_data | ||
740 | * | ||
741 | * Read in each per-ag structure so we can count up the number of | ||
742 | * allocated inodes, free inodes and used filesystem blocks as this | ||
743 | * information is no longer persistent in the superblock. Once we have | ||
744 | * this information, write it into the in-core superblock structure. | ||
745 | */ | ||
746 | int | ||
747 | xfs_initialize_perag_data( | ||
748 | struct xfs_mount *mp, | ||
749 | xfs_agnumber_t agcount) | ||
750 | { | ||
751 | xfs_agnumber_t index; | ||
752 | xfs_perag_t *pag; | ||
753 | xfs_sb_t *sbp = &mp->m_sb; | ||
754 | uint64_t ifree = 0; | ||
755 | uint64_t ialloc = 0; | ||
756 | uint64_t bfree = 0; | ||
757 | uint64_t bfreelst = 0; | ||
758 | uint64_t btree = 0; | ||
759 | int error; | ||
760 | |||
761 | for (index = 0; index < agcount; index++) { | ||
762 | /* | ||
763 | * read the agf, then the agi. This gets us | ||
764 | * all the information we need and populates the | ||
765 | * per-ag structures for us. | ||
766 | */ | ||
767 | error = xfs_alloc_pagf_init(mp, NULL, index, 0); | ||
768 | if (error) | ||
769 | return error; | ||
770 | |||
771 | error = xfs_ialloc_pagi_init(mp, NULL, index); | ||
772 | if (error) | ||
773 | return error; | ||
774 | pag = xfs_perag_get(mp, index); | ||
775 | ifree += pag->pagi_freecount; | ||
776 | ialloc += pag->pagi_count; | ||
777 | bfree += pag->pagf_freeblks; | ||
778 | bfreelst += pag->pagf_flcount; | ||
779 | btree += pag->pagf_btreeblks; | ||
780 | xfs_perag_put(pag); | ||
781 | } | ||
782 | /* | ||
783 | * Overwrite incore superblock counters with just-read data | ||
784 | */ | ||
785 | spin_lock(&mp->m_sb_lock); | ||
786 | sbp->sb_ifree = ifree; | ||
787 | sbp->sb_icount = ialloc; | ||
788 | sbp->sb_fdblocks = bfree + bfreelst + btree; | ||
789 | spin_unlock(&mp->m_sb_lock); | ||
790 | |||
791 | /* Fixup the per-cpu counters as well. */ | ||
792 | xfs_icsb_reinit_counters(mp); | ||
793 | |||
794 | return 0; | ||
795 | } | ||
796 | |||
797 | /* | ||
798 | * xfs_mod_sb() can be used to copy arbitrary changes to the | ||
799 | * in-core superblock into the superblock buffer to be logged. | ||
800 | * It does not provide the higher level of locking that is | ||
801 | * needed to protect the in-core superblock from concurrent | ||
802 | * access. | ||
803 | */ | ||
804 | void | ||
805 | xfs_mod_sb(xfs_trans_t *tp, __int64_t fields) | ||
806 | { | ||
807 | xfs_buf_t *bp; | ||
808 | int first; | ||
809 | int last; | ||
810 | xfs_mount_t *mp; | ||
811 | xfs_sb_field_t f; | ||
812 | |||
813 | ASSERT(fields); | ||
814 | if (!fields) | ||
815 | return; | ||
816 | mp = tp->t_mountp; | ||
817 | bp = xfs_trans_getsb(tp, mp, 0); | ||
818 | first = sizeof(xfs_sb_t); | ||
819 | last = 0; | ||
820 | |||
821 | /* translate/copy */ | ||
822 | |||
823 | xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb, fields); | ||
824 | |||
825 | /* find modified range */ | ||
826 | f = (xfs_sb_field_t)xfs_highbit64((__uint64_t)fields); | ||
827 | ASSERT((1LL << f) & XFS_SB_MOD_BITS); | ||
828 | last = xfs_sb_info[f + 1].offset - 1; | ||
829 | |||
830 | f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields); | ||
831 | ASSERT((1LL << f) & XFS_SB_MOD_BITS); | ||
832 | first = xfs_sb_info[f].offset; | ||
833 | |||
834 | xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF); | ||
835 | xfs_trans_log_buf(tp, bp, first, last); | ||
836 | } | ||
diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h new file mode 100644 index 000000000000..c43c2d609a24 --- /dev/null +++ b/fs/xfs/libxfs/xfs_sb.h | |||
@@ -0,0 +1,621 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_SB_H__ | ||
19 | #define __XFS_SB_H__ | ||
20 | |||
21 | /* | ||
22 | * Super block | ||
23 | * Fits into a sector-sized buffer at address 0 of each allocation group. | ||
24 | * Only the first of these is ever updated except during growfs. | ||
25 | */ | ||
26 | |||
27 | struct xfs_buf; | ||
28 | struct xfs_mount; | ||
29 | struct xfs_trans; | ||
30 | |||
31 | #define XFS_SB_MAGIC 0x58465342 /* 'XFSB' */ | ||
32 | #define XFS_SB_VERSION_1 1 /* 5.3, 6.0.1, 6.1 */ | ||
33 | #define XFS_SB_VERSION_2 2 /* 6.2 - attributes */ | ||
34 | #define XFS_SB_VERSION_3 3 /* 6.2 - new inode version */ | ||
35 | #define XFS_SB_VERSION_4 4 /* 6.2+ - bitmask version */ | ||
36 | #define XFS_SB_VERSION_5 5 /* CRC enabled filesystem */ | ||
37 | #define XFS_SB_VERSION_NUMBITS 0x000f | ||
38 | #define XFS_SB_VERSION_ALLFBITS 0xfff0 | ||
39 | #define XFS_SB_VERSION_ATTRBIT 0x0010 | ||
40 | #define XFS_SB_VERSION_NLINKBIT 0x0020 | ||
41 | #define XFS_SB_VERSION_QUOTABIT 0x0040 | ||
42 | #define XFS_SB_VERSION_ALIGNBIT 0x0080 | ||
43 | #define XFS_SB_VERSION_DALIGNBIT 0x0100 | ||
44 | #define XFS_SB_VERSION_SHAREDBIT 0x0200 | ||
45 | #define XFS_SB_VERSION_LOGV2BIT 0x0400 | ||
46 | #define XFS_SB_VERSION_SECTORBIT 0x0800 | ||
47 | #define XFS_SB_VERSION_EXTFLGBIT 0x1000 | ||
48 | #define XFS_SB_VERSION_DIRV2BIT 0x2000 | ||
49 | #define XFS_SB_VERSION_BORGBIT 0x4000 /* ASCII only case-insens. */ | ||
50 | #define XFS_SB_VERSION_MOREBITSBIT 0x8000 | ||
51 | |||
52 | /* | ||
53 | * Supported feature bit list is just all bits in the versionnum field because | ||
54 | * we've used them all up and understand them all. Except, of course, for the | ||
55 | * shared superblock bit, which nobody knows what it does and so is unsupported. | ||
56 | */ | ||
57 | #define XFS_SB_VERSION_OKBITS \ | ||
58 | ((XFS_SB_VERSION_NUMBITS | XFS_SB_VERSION_ALLFBITS) & \ | ||
59 | ~XFS_SB_VERSION_SHAREDBIT) | ||
60 | |||
61 | /* | ||
62 | * There are two words to hold XFS "feature" bits: the original | ||
63 | * word, sb_versionnum, and sb_features2. Whenever a bit is set in | ||
64 | * sb_features2, the feature bit XFS_SB_VERSION_MOREBITSBIT must be set. | ||
65 | * | ||
66 | * These defines represent bits in sb_features2. | ||
67 | */ | ||
68 | #define XFS_SB_VERSION2_RESERVED1BIT 0x00000001 | ||
69 | #define XFS_SB_VERSION2_LAZYSBCOUNTBIT 0x00000002 /* Superblk counters */ | ||
70 | #define XFS_SB_VERSION2_RESERVED4BIT 0x00000004 | ||
71 | #define XFS_SB_VERSION2_ATTR2BIT 0x00000008 /* Inline attr rework */ | ||
72 | #define XFS_SB_VERSION2_PARENTBIT 0x00000010 /* parent pointers */ | ||
73 | #define XFS_SB_VERSION2_PROJID32BIT 0x00000080 /* 32 bit project id */ | ||
74 | #define XFS_SB_VERSION2_CRCBIT 0x00000100 /* metadata CRCs */ | ||
75 | #define XFS_SB_VERSION2_FTYPE 0x00000200 /* inode type in dir */ | ||
76 | |||
77 | #define XFS_SB_VERSION2_OKBITS \ | ||
78 | (XFS_SB_VERSION2_LAZYSBCOUNTBIT | \ | ||
79 | XFS_SB_VERSION2_ATTR2BIT | \ | ||
80 | XFS_SB_VERSION2_PROJID32BIT | \ | ||
81 | XFS_SB_VERSION2_FTYPE) | ||
82 | |||
83 | /* | ||
84 | * Superblock - in core version. Must match the ondisk version below. | ||
85 | * Must be padded to 64 bit alignment. | ||
86 | */ | ||
87 | typedef struct xfs_sb { | ||
88 | __uint32_t sb_magicnum; /* magic number == XFS_SB_MAGIC */ | ||
89 | __uint32_t sb_blocksize; /* logical block size, bytes */ | ||
90 | xfs_drfsbno_t sb_dblocks; /* number of data blocks */ | ||
91 | xfs_drfsbno_t sb_rblocks; /* number of realtime blocks */ | ||
92 | xfs_drtbno_t sb_rextents; /* number of realtime extents */ | ||
93 | uuid_t sb_uuid; /* file system unique id */ | ||
94 | xfs_dfsbno_t sb_logstart; /* starting block of log if internal */ | ||
95 | xfs_ino_t sb_rootino; /* root inode number */ | ||
96 | xfs_ino_t sb_rbmino; /* bitmap inode for realtime extents */ | ||
97 | xfs_ino_t sb_rsumino; /* summary inode for rt bitmap */ | ||
98 | xfs_agblock_t sb_rextsize; /* realtime extent size, blocks */ | ||
99 | xfs_agblock_t sb_agblocks; /* size of an allocation group */ | ||
100 | xfs_agnumber_t sb_agcount; /* number of allocation groups */ | ||
101 | xfs_extlen_t sb_rbmblocks; /* number of rt bitmap blocks */ | ||
102 | xfs_extlen_t sb_logblocks; /* number of log blocks */ | ||
103 | __uint16_t sb_versionnum; /* header version == XFS_SB_VERSION */ | ||
104 | __uint16_t sb_sectsize; /* volume sector size, bytes */ | ||
105 | __uint16_t sb_inodesize; /* inode size, bytes */ | ||
106 | __uint16_t sb_inopblock; /* inodes per block */ | ||
107 | char sb_fname[12]; /* file system name */ | ||
108 | __uint8_t sb_blocklog; /* log2 of sb_blocksize */ | ||
109 | __uint8_t sb_sectlog; /* log2 of sb_sectsize */ | ||
110 | __uint8_t sb_inodelog; /* log2 of sb_inodesize */ | ||
111 | __uint8_t sb_inopblog; /* log2 of sb_inopblock */ | ||
112 | __uint8_t sb_agblklog; /* log2 of sb_agblocks (rounded up) */ | ||
113 | __uint8_t sb_rextslog; /* log2 of sb_rextents */ | ||
114 | __uint8_t sb_inprogress; /* mkfs is in progress, don't mount */ | ||
115 | __uint8_t sb_imax_pct; /* max % of fs for inode space */ | ||
116 | /* statistics */ | ||
117 | /* | ||
118 | * These fields must remain contiguous. If you really | ||
119 | * want to change their layout, make sure you fix the | ||
120 | * code in xfs_trans_apply_sb_deltas(). | ||
121 | */ | ||
122 | __uint64_t sb_icount; /* allocated inodes */ | ||
123 | __uint64_t sb_ifree; /* free inodes */ | ||
124 | __uint64_t sb_fdblocks; /* free data blocks */ | ||
125 | __uint64_t sb_frextents; /* free realtime extents */ | ||
126 | /* | ||
127 | * End contiguous fields. | ||
128 | */ | ||
129 | xfs_ino_t sb_uquotino; /* user quota inode */ | ||
130 | xfs_ino_t sb_gquotino; /* group quota inode */ | ||
131 | __uint16_t sb_qflags; /* quota flags */ | ||
132 | __uint8_t sb_flags; /* misc. flags */ | ||
133 | __uint8_t sb_shared_vn; /* shared version number */ | ||
134 | xfs_extlen_t sb_inoalignmt; /* inode chunk alignment, fsblocks */ | ||
135 | __uint32_t sb_unit; /* stripe or raid unit */ | ||
136 | __uint32_t sb_width; /* stripe or raid width */ | ||
137 | __uint8_t sb_dirblklog; /* log2 of dir block size (fsbs) */ | ||
138 | __uint8_t sb_logsectlog; /* log2 of the log sector size */ | ||
139 | __uint16_t sb_logsectsize; /* sector size for the log, bytes */ | ||
140 | __uint32_t sb_logsunit; /* stripe unit size for the log */ | ||
141 | __uint32_t sb_features2; /* additional feature bits */ | ||
142 | |||
143 | /* | ||
144 | * bad features2 field as a result of failing to pad the sb | ||
145 | * structure to 64 bits. Some machines will be using this field | ||
146 | * for features2 bits. Easiest just to mark it bad and not use | ||
147 | * it for anything else. | ||
148 | */ | ||
149 | __uint32_t sb_bad_features2; | ||
150 | |||
151 | /* version 5 superblock fields start here */ | ||
152 | |||
153 | /* feature masks */ | ||
154 | __uint32_t sb_features_compat; | ||
155 | __uint32_t sb_features_ro_compat; | ||
156 | __uint32_t sb_features_incompat; | ||
157 | __uint32_t sb_features_log_incompat; | ||
158 | |||
159 | __uint32_t sb_crc; /* superblock crc */ | ||
160 | __uint32_t sb_pad; | ||
161 | |||
162 | xfs_ino_t sb_pquotino; /* project quota inode */ | ||
163 | xfs_lsn_t sb_lsn; /* last write sequence */ | ||
164 | |||
165 | /* must be padded to 64 bit alignment */ | ||
166 | } xfs_sb_t; | ||
167 | |||
168 | #define XFS_SB_CRC_OFF offsetof(struct xfs_sb, sb_crc) | ||
169 | |||
170 | /* | ||
171 | * Superblock - on disk version. Must match the in core version above. | ||
172 | * Must be padded to 64 bit alignment. | ||
173 | */ | ||
174 | typedef struct xfs_dsb { | ||
175 | __be32 sb_magicnum; /* magic number == XFS_SB_MAGIC */ | ||
176 | __be32 sb_blocksize; /* logical block size, bytes */ | ||
177 | __be64 sb_dblocks; /* number of data blocks */ | ||
178 | __be64 sb_rblocks; /* number of realtime blocks */ | ||
179 | __be64 sb_rextents; /* number of realtime extents */ | ||
180 | uuid_t sb_uuid; /* file system unique id */ | ||
181 | __be64 sb_logstart; /* starting block of log if internal */ | ||
182 | __be64 sb_rootino; /* root inode number */ | ||
183 | __be64 sb_rbmino; /* bitmap inode for realtime extents */ | ||
184 | __be64 sb_rsumino; /* summary inode for rt bitmap */ | ||
185 | __be32 sb_rextsize; /* realtime extent size, blocks */ | ||
186 | __be32 sb_agblocks; /* size of an allocation group */ | ||
187 | __be32 sb_agcount; /* number of allocation groups */ | ||
188 | __be32 sb_rbmblocks; /* number of rt bitmap blocks */ | ||
189 | __be32 sb_logblocks; /* number of log blocks */ | ||
190 | __be16 sb_versionnum; /* header version == XFS_SB_VERSION */ | ||
191 | __be16 sb_sectsize; /* volume sector size, bytes */ | ||
192 | __be16 sb_inodesize; /* inode size, bytes */ | ||
193 | __be16 sb_inopblock; /* inodes per block */ | ||
194 | char sb_fname[12]; /* file system name */ | ||
195 | __u8 sb_blocklog; /* log2 of sb_blocksize */ | ||
196 | __u8 sb_sectlog; /* log2 of sb_sectsize */ | ||
197 | __u8 sb_inodelog; /* log2 of sb_inodesize */ | ||
198 | __u8 sb_inopblog; /* log2 of sb_inopblock */ | ||
199 | __u8 sb_agblklog; /* log2 of sb_agblocks (rounded up) */ | ||
200 | __u8 sb_rextslog; /* log2 of sb_rextents */ | ||
201 | __u8 sb_inprogress; /* mkfs is in progress, don't mount */ | ||
202 | __u8 sb_imax_pct; /* max % of fs for inode space */ | ||
203 | /* statistics */ | ||
204 | /* | ||
205 | * These fields must remain contiguous. If you really | ||
206 | * want to change their layout, make sure you fix the | ||
207 | * code in xfs_trans_apply_sb_deltas(). | ||
208 | */ | ||
209 | __be64 sb_icount; /* allocated inodes */ | ||
210 | __be64 sb_ifree; /* free inodes */ | ||
211 | __be64 sb_fdblocks; /* free data blocks */ | ||
212 | __be64 sb_frextents; /* free realtime extents */ | ||
213 | /* | ||
214 | * End contiguous fields. | ||
215 | */ | ||
216 | __be64 sb_uquotino; /* user quota inode */ | ||
217 | __be64 sb_gquotino; /* group quota inode */ | ||
218 | __be16 sb_qflags; /* quota flags */ | ||
219 | __u8 sb_flags; /* misc. flags */ | ||
220 | __u8 sb_shared_vn; /* shared version number */ | ||
221 | __be32 sb_inoalignmt; /* inode chunk alignment, fsblocks */ | ||
222 | __be32 sb_unit; /* stripe or raid unit */ | ||
223 | __be32 sb_width; /* stripe or raid width */ | ||
224 | __u8 sb_dirblklog; /* log2 of dir block size (fsbs) */ | ||
225 | __u8 sb_logsectlog; /* log2 of the log sector size */ | ||
226 | __be16 sb_logsectsize; /* sector size for the log, bytes */ | ||
227 | __be32 sb_logsunit; /* stripe unit size for the log */ | ||
228 | __be32 sb_features2; /* additional feature bits */ | ||
229 | /* | ||
230 | * bad features2 field as a result of failing to pad the sb | ||
231 | * structure to 64 bits. Some machines will be using this field | ||
232 | * for features2 bits. Easiest just to mark it bad and not use | ||
233 | * it for anything else. | ||
234 | */ | ||
235 | __be32 sb_bad_features2; | ||
236 | |||
237 | /* version 5 superblock fields start here */ | ||
238 | |||
239 | /* feature masks */ | ||
240 | __be32 sb_features_compat; | ||
241 | __be32 sb_features_ro_compat; | ||
242 | __be32 sb_features_incompat; | ||
243 | __be32 sb_features_log_incompat; | ||
244 | |||
245 | __le32 sb_crc; /* superblock crc */ | ||
246 | __be32 sb_pad; | ||
247 | |||
248 | __be64 sb_pquotino; /* project quota inode */ | ||
249 | __be64 sb_lsn; /* last write sequence */ | ||
250 | |||
251 | /* must be padded to 64 bit alignment */ | ||
252 | } xfs_dsb_t; | ||
253 | |||
254 | /* | ||
255 | * Sequence number values for the fields. | ||
256 | */ | ||
257 | typedef enum { | ||
258 | XFS_SBS_MAGICNUM, XFS_SBS_BLOCKSIZE, XFS_SBS_DBLOCKS, XFS_SBS_RBLOCKS, | ||
259 | XFS_SBS_REXTENTS, XFS_SBS_UUID, XFS_SBS_LOGSTART, XFS_SBS_ROOTINO, | ||
260 | XFS_SBS_RBMINO, XFS_SBS_RSUMINO, XFS_SBS_REXTSIZE, XFS_SBS_AGBLOCKS, | ||
261 | XFS_SBS_AGCOUNT, XFS_SBS_RBMBLOCKS, XFS_SBS_LOGBLOCKS, | ||
262 | XFS_SBS_VERSIONNUM, XFS_SBS_SECTSIZE, XFS_SBS_INODESIZE, | ||
263 | XFS_SBS_INOPBLOCK, XFS_SBS_FNAME, XFS_SBS_BLOCKLOG, | ||
264 | XFS_SBS_SECTLOG, XFS_SBS_INODELOG, XFS_SBS_INOPBLOG, XFS_SBS_AGBLKLOG, | ||
265 | XFS_SBS_REXTSLOG, XFS_SBS_INPROGRESS, XFS_SBS_IMAX_PCT, XFS_SBS_ICOUNT, | ||
266 | XFS_SBS_IFREE, XFS_SBS_FDBLOCKS, XFS_SBS_FREXTENTS, XFS_SBS_UQUOTINO, | ||
267 | XFS_SBS_GQUOTINO, XFS_SBS_QFLAGS, XFS_SBS_FLAGS, XFS_SBS_SHARED_VN, | ||
268 | XFS_SBS_INOALIGNMT, XFS_SBS_UNIT, XFS_SBS_WIDTH, XFS_SBS_DIRBLKLOG, | ||
269 | XFS_SBS_LOGSECTLOG, XFS_SBS_LOGSECTSIZE, XFS_SBS_LOGSUNIT, | ||
270 | XFS_SBS_FEATURES2, XFS_SBS_BAD_FEATURES2, XFS_SBS_FEATURES_COMPAT, | ||
271 | XFS_SBS_FEATURES_RO_COMPAT, XFS_SBS_FEATURES_INCOMPAT, | ||
272 | XFS_SBS_FEATURES_LOG_INCOMPAT, XFS_SBS_CRC, XFS_SBS_PAD, | ||
273 | XFS_SBS_PQUOTINO, XFS_SBS_LSN, | ||
274 | XFS_SBS_FIELDCOUNT | ||
275 | } xfs_sb_field_t; | ||
276 | |||
277 | /* | ||
278 | * Mask values, defined based on the xfs_sb_field_t values. | ||
279 | * Only define the ones we're using. | ||
280 | */ | ||
281 | #define XFS_SB_MVAL(x) (1LL << XFS_SBS_ ## x) | ||
282 | #define XFS_SB_UUID XFS_SB_MVAL(UUID) | ||
283 | #define XFS_SB_FNAME XFS_SB_MVAL(FNAME) | ||
284 | #define XFS_SB_ROOTINO XFS_SB_MVAL(ROOTINO) | ||
285 | #define XFS_SB_RBMINO XFS_SB_MVAL(RBMINO) | ||
286 | #define XFS_SB_RSUMINO XFS_SB_MVAL(RSUMINO) | ||
287 | #define XFS_SB_VERSIONNUM XFS_SB_MVAL(VERSIONNUM) | ||
288 | #define XFS_SB_UQUOTINO XFS_SB_MVAL(UQUOTINO) | ||
289 | #define XFS_SB_GQUOTINO XFS_SB_MVAL(GQUOTINO) | ||
290 | #define XFS_SB_QFLAGS XFS_SB_MVAL(QFLAGS) | ||
291 | #define XFS_SB_SHARED_VN XFS_SB_MVAL(SHARED_VN) | ||
292 | #define XFS_SB_UNIT XFS_SB_MVAL(UNIT) | ||
293 | #define XFS_SB_WIDTH XFS_SB_MVAL(WIDTH) | ||
294 | #define XFS_SB_ICOUNT XFS_SB_MVAL(ICOUNT) | ||
295 | #define XFS_SB_IFREE XFS_SB_MVAL(IFREE) | ||
296 | #define XFS_SB_FDBLOCKS XFS_SB_MVAL(FDBLOCKS) | ||
297 | #define XFS_SB_FEATURES2 XFS_SB_MVAL(FEATURES2) | ||
298 | #define XFS_SB_BAD_FEATURES2 XFS_SB_MVAL(BAD_FEATURES2) | ||
299 | #define XFS_SB_FEATURES_COMPAT XFS_SB_MVAL(FEATURES_COMPAT) | ||
300 | #define XFS_SB_FEATURES_RO_COMPAT XFS_SB_MVAL(FEATURES_RO_COMPAT) | ||
301 | #define XFS_SB_FEATURES_INCOMPAT XFS_SB_MVAL(FEATURES_INCOMPAT) | ||
302 | #define XFS_SB_FEATURES_LOG_INCOMPAT XFS_SB_MVAL(FEATURES_LOG_INCOMPAT) | ||
303 | #define XFS_SB_CRC XFS_SB_MVAL(CRC) | ||
304 | #define XFS_SB_PQUOTINO XFS_SB_MVAL(PQUOTINO) | ||
305 | #define XFS_SB_NUM_BITS ((int)XFS_SBS_FIELDCOUNT) | ||
306 | #define XFS_SB_ALL_BITS ((1LL << XFS_SB_NUM_BITS) - 1) | ||
307 | #define XFS_SB_MOD_BITS \ | ||
308 | (XFS_SB_UUID | XFS_SB_ROOTINO | XFS_SB_RBMINO | XFS_SB_RSUMINO | \ | ||
309 | XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | XFS_SB_GQUOTINO | \ | ||
310 | XFS_SB_QFLAGS | XFS_SB_SHARED_VN | XFS_SB_UNIT | XFS_SB_WIDTH | \ | ||
311 | XFS_SB_ICOUNT | XFS_SB_IFREE | XFS_SB_FDBLOCKS | XFS_SB_FEATURES2 | \ | ||
312 | XFS_SB_BAD_FEATURES2 | XFS_SB_FEATURES_COMPAT | \ | ||
313 | XFS_SB_FEATURES_RO_COMPAT | XFS_SB_FEATURES_INCOMPAT | \ | ||
314 | XFS_SB_FEATURES_LOG_INCOMPAT | XFS_SB_PQUOTINO) | ||
315 | |||
316 | |||
317 | /* | ||
318 | * Misc. Flags - warning - these will be cleared by xfs_repair unless | ||
319 | * a feature bit is set when the flag is used. | ||
320 | */ | ||
321 | #define XFS_SBF_NOFLAGS 0x00 /* no flags set */ | ||
322 | #define XFS_SBF_READONLY 0x01 /* only read-only mounts allowed */ | ||
323 | |||
324 | /* | ||
325 | * define max. shared version we can interoperate with | ||
326 | */ | ||
327 | #define XFS_SB_MAX_SHARED_VN 0 | ||
328 | |||
329 | #define XFS_SB_VERSION_NUM(sbp) ((sbp)->sb_versionnum & XFS_SB_VERSION_NUMBITS) | ||
330 | |||
331 | /* | ||
332 | * The first XFS version we support is a v4 superblock with V2 directories. | ||
333 | */ | ||
334 | static inline bool xfs_sb_good_v4_features(struct xfs_sb *sbp) | ||
335 | { | ||
336 | if (!(sbp->sb_versionnum & XFS_SB_VERSION_DIRV2BIT)) | ||
337 | return false; | ||
338 | |||
339 | /* check for unknown features in the fs */ | ||
340 | if ((sbp->sb_versionnum & ~XFS_SB_VERSION_OKBITS) || | ||
341 | ((sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) && | ||
342 | (sbp->sb_features2 & ~XFS_SB_VERSION2_OKBITS))) | ||
343 | return false; | ||
344 | |||
345 | return true; | ||
346 | } | ||
347 | |||
348 | static inline bool xfs_sb_good_version(struct xfs_sb *sbp) | ||
349 | { | ||
350 | if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) | ||
351 | return true; | ||
352 | if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) | ||
353 | return xfs_sb_good_v4_features(sbp); | ||
354 | return false; | ||
355 | } | ||
356 | |||
357 | /* | ||
358 | * Detect a mismatched features2 field. Older kernels read/wrote | ||
359 | * this into the wrong slot, so to be safe we keep them in sync. | ||
360 | */ | ||
361 | static inline bool xfs_sb_has_mismatched_features2(struct xfs_sb *sbp) | ||
362 | { | ||
363 | return sbp->sb_bad_features2 != sbp->sb_features2; | ||
364 | } | ||
365 | |||
366 | static inline bool xfs_sb_version_hasattr(struct xfs_sb *sbp) | ||
367 | { | ||
368 | return (sbp->sb_versionnum & XFS_SB_VERSION_ATTRBIT); | ||
369 | } | ||
370 | |||
371 | static inline void xfs_sb_version_addattr(struct xfs_sb *sbp) | ||
372 | { | ||
373 | sbp->sb_versionnum |= XFS_SB_VERSION_ATTRBIT; | ||
374 | } | ||
375 | |||
376 | static inline bool xfs_sb_version_hasquota(struct xfs_sb *sbp) | ||
377 | { | ||
378 | return (sbp->sb_versionnum & XFS_SB_VERSION_QUOTABIT); | ||
379 | } | ||
380 | |||
381 | static inline void xfs_sb_version_addquota(struct xfs_sb *sbp) | ||
382 | { | ||
383 | sbp->sb_versionnum |= XFS_SB_VERSION_QUOTABIT; | ||
384 | } | ||
385 | |||
386 | static inline bool xfs_sb_version_hasalign(struct xfs_sb *sbp) | ||
387 | { | ||
388 | return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 || | ||
389 | (sbp->sb_versionnum & XFS_SB_VERSION_ALIGNBIT)); | ||
390 | } | ||
391 | |||
392 | static inline bool xfs_sb_version_hasdalign(struct xfs_sb *sbp) | ||
393 | { | ||
394 | return (sbp->sb_versionnum & XFS_SB_VERSION_DALIGNBIT); | ||
395 | } | ||
396 | |||
397 | static inline bool xfs_sb_version_haslogv2(struct xfs_sb *sbp) | ||
398 | { | ||
399 | return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 || | ||
400 | (sbp->sb_versionnum & XFS_SB_VERSION_LOGV2BIT); | ||
401 | } | ||
402 | |||
403 | static inline bool xfs_sb_version_hasextflgbit(struct xfs_sb *sbp) | ||
404 | { | ||
405 | return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 || | ||
406 | (sbp->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT); | ||
407 | } | ||
408 | |||
409 | static inline bool xfs_sb_version_hassector(struct xfs_sb *sbp) | ||
410 | { | ||
411 | return (sbp->sb_versionnum & XFS_SB_VERSION_SECTORBIT); | ||
412 | } | ||
413 | |||
414 | static inline bool xfs_sb_version_hasasciici(struct xfs_sb *sbp) | ||
415 | { | ||
416 | return (sbp->sb_versionnum & XFS_SB_VERSION_BORGBIT); | ||
417 | } | ||
418 | |||
419 | static inline bool xfs_sb_version_hasmorebits(struct xfs_sb *sbp) | ||
420 | { | ||
421 | return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 || | ||
422 | (sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT); | ||
423 | } | ||
424 | |||
425 | /* | ||
426 | * sb_features2 bit version macros. | ||
427 | */ | ||
428 | static inline bool xfs_sb_version_haslazysbcount(struct xfs_sb *sbp) | ||
429 | { | ||
430 | return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) || | ||
431 | (xfs_sb_version_hasmorebits(sbp) && | ||
432 | (sbp->sb_features2 & XFS_SB_VERSION2_LAZYSBCOUNTBIT)); | ||
433 | } | ||
434 | |||
435 | static inline bool xfs_sb_version_hasattr2(struct xfs_sb *sbp) | ||
436 | { | ||
437 | return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) || | ||
438 | (xfs_sb_version_hasmorebits(sbp) && | ||
439 | (sbp->sb_features2 & XFS_SB_VERSION2_ATTR2BIT)); | ||
440 | } | ||
441 | |||
442 | static inline void xfs_sb_version_addattr2(struct xfs_sb *sbp) | ||
443 | { | ||
444 | sbp->sb_versionnum |= XFS_SB_VERSION_MOREBITSBIT; | ||
445 | sbp->sb_features2 |= XFS_SB_VERSION2_ATTR2BIT; | ||
446 | sbp->sb_bad_features2 |= XFS_SB_VERSION2_ATTR2BIT; | ||
447 | } | ||
448 | |||
449 | static inline void xfs_sb_version_removeattr2(struct xfs_sb *sbp) | ||
450 | { | ||
451 | sbp->sb_features2 &= ~XFS_SB_VERSION2_ATTR2BIT; | ||
452 | sbp->sb_bad_features2 &= ~XFS_SB_VERSION2_ATTR2BIT; | ||
453 | if (!sbp->sb_features2) | ||
454 | sbp->sb_versionnum &= ~XFS_SB_VERSION_MOREBITSBIT; | ||
455 | } | ||
456 | |||
457 | static inline bool xfs_sb_version_hasprojid32bit(struct xfs_sb *sbp) | ||
458 | { | ||
459 | return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) || | ||
460 | (xfs_sb_version_hasmorebits(sbp) && | ||
461 | (sbp->sb_features2 & XFS_SB_VERSION2_PROJID32BIT)); | ||
462 | } | ||
463 | |||
464 | static inline void xfs_sb_version_addprojid32bit(struct xfs_sb *sbp) | ||
465 | { | ||
466 | sbp->sb_versionnum |= XFS_SB_VERSION_MOREBITSBIT; | ||
467 | sbp->sb_features2 |= XFS_SB_VERSION2_PROJID32BIT; | ||
468 | sbp->sb_bad_features2 |= XFS_SB_VERSION2_PROJID32BIT; | ||
469 | } | ||
470 | |||
471 | /* | ||
472 | * Extended v5 superblock feature masks. These are to be used for new v5 | ||
473 | * superblock features only. | ||
474 | * | ||
475 | * Compat features are new features that old kernels will not notice or affect | ||
476 | * and so can mount read-write without issues. | ||
477 | * | ||
478 | * RO-Compat (read only) are features that old kernels can read but will break | ||
479 | * if they write. Hence only read-only mounts of such filesystems are allowed on | ||
480 | * kernels that don't support the feature bit. | ||
481 | * | ||
482 | * InCompat features are features which old kernels will not understand and so | ||
483 | * must not mount. | ||
484 | * | ||
485 | * Log-InCompat features are for changes to log formats or new transactions that | ||
486 | * can't be replayed on older kernels. The fields are set when the filesystem is | ||
487 | * mounted, and a clean unmount clears the fields. | ||
488 | */ | ||
489 | #define XFS_SB_FEAT_COMPAT_ALL 0 | ||
490 | #define XFS_SB_FEAT_COMPAT_UNKNOWN ~XFS_SB_FEAT_COMPAT_ALL | ||
491 | static inline bool | ||
492 | xfs_sb_has_compat_feature( | ||
493 | struct xfs_sb *sbp, | ||
494 | __uint32_t feature) | ||
495 | { | ||
496 | return (sbp->sb_features_compat & feature) != 0; | ||
497 | } | ||
498 | |||
499 | #define XFS_SB_FEAT_RO_COMPAT_FINOBT (1 << 0) /* free inode btree */ | ||
500 | #define XFS_SB_FEAT_RO_COMPAT_ALL \ | ||
501 | (XFS_SB_FEAT_RO_COMPAT_FINOBT) | ||
502 | #define XFS_SB_FEAT_RO_COMPAT_UNKNOWN ~XFS_SB_FEAT_RO_COMPAT_ALL | ||
503 | static inline bool | ||
504 | xfs_sb_has_ro_compat_feature( | ||
505 | struct xfs_sb *sbp, | ||
506 | __uint32_t feature) | ||
507 | { | ||
508 | return (sbp->sb_features_ro_compat & feature) != 0; | ||
509 | } | ||
510 | |||
511 | #define XFS_SB_FEAT_INCOMPAT_FTYPE (1 << 0) /* filetype in dirent */ | ||
512 | #define XFS_SB_FEAT_INCOMPAT_ALL \ | ||
513 | (XFS_SB_FEAT_INCOMPAT_FTYPE) | ||
514 | |||
515 | #define XFS_SB_FEAT_INCOMPAT_UNKNOWN ~XFS_SB_FEAT_INCOMPAT_ALL | ||
516 | static inline bool | ||
517 | xfs_sb_has_incompat_feature( | ||
518 | struct xfs_sb *sbp, | ||
519 | __uint32_t feature) | ||
520 | { | ||
521 | return (sbp->sb_features_incompat & feature) != 0; | ||
522 | } | ||
523 | |||
524 | #define XFS_SB_FEAT_INCOMPAT_LOG_ALL 0 | ||
525 | #define XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN ~XFS_SB_FEAT_INCOMPAT_LOG_ALL | ||
526 | static inline bool | ||
527 | xfs_sb_has_incompat_log_feature( | ||
528 | struct xfs_sb *sbp, | ||
529 | __uint32_t feature) | ||
530 | { | ||
531 | return (sbp->sb_features_log_incompat & feature) != 0; | ||
532 | } | ||
533 | |||
534 | /* | ||
535 | * V5 superblock specific feature checks | ||
536 | */ | ||
537 | static inline int xfs_sb_version_hascrc(struct xfs_sb *sbp) | ||
538 | { | ||
539 | return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5; | ||
540 | } | ||
541 | |||
542 | static inline int xfs_sb_version_has_pquotino(struct xfs_sb *sbp) | ||
543 | { | ||
544 | return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5; | ||
545 | } | ||
546 | |||
547 | static inline int xfs_sb_version_hasftype(struct xfs_sb *sbp) | ||
548 | { | ||
549 | return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 && | ||
550 | xfs_sb_has_incompat_feature(sbp, XFS_SB_FEAT_INCOMPAT_FTYPE)) || | ||
551 | (xfs_sb_version_hasmorebits(sbp) && | ||
552 | (sbp->sb_features2 & XFS_SB_VERSION2_FTYPE)); | ||
553 | } | ||
554 | |||
555 | static inline int xfs_sb_version_hasfinobt(xfs_sb_t *sbp) | ||
556 | { | ||
557 | return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) && | ||
558 | (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FINOBT); | ||
559 | } | ||
560 | |||
561 | /* | ||
562 | * end of superblock version macros | ||
563 | */ | ||
564 | |||
565 | static inline bool | ||
566 | xfs_is_quota_inode(struct xfs_sb *sbp, xfs_ino_t ino) | ||
567 | { | ||
568 | return (ino == sbp->sb_uquotino || | ||
569 | ino == sbp->sb_gquotino || | ||
570 | ino == sbp->sb_pquotino); | ||
571 | } | ||
572 | |||
573 | #define XFS_SB_DADDR ((xfs_daddr_t)0) /* daddr in filesystem/ag */ | ||
574 | #define XFS_SB_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_SB_DADDR) | ||
575 | #define XFS_BUF_TO_SBP(bp) ((xfs_dsb_t *)((bp)->b_addr)) | ||
576 | |||
577 | #define XFS_HDR_BLOCK(mp,d) ((xfs_agblock_t)XFS_BB_TO_FSBT(mp,d)) | ||
578 | #define XFS_DADDR_TO_FSB(mp,d) XFS_AGB_TO_FSB(mp, \ | ||
579 | xfs_daddr_to_agno(mp,d), xfs_daddr_to_agbno(mp,d)) | ||
580 | #define XFS_FSB_TO_DADDR(mp,fsbno) XFS_AGB_TO_DADDR(mp, \ | ||
581 | XFS_FSB_TO_AGNO(mp,fsbno), XFS_FSB_TO_AGBNO(mp,fsbno)) | ||
582 | |||
583 | /* | ||
584 | * File system sector to basic block conversions. | ||
585 | */ | ||
586 | #define XFS_FSS_TO_BB(mp,sec) ((sec) << (mp)->m_sectbb_log) | ||
587 | |||
588 | /* | ||
589 | * File system block to basic block conversions. | ||
590 | */ | ||
591 | #define XFS_FSB_TO_BB(mp,fsbno) ((fsbno) << (mp)->m_blkbb_log) | ||
592 | #define XFS_BB_TO_FSB(mp,bb) \ | ||
593 | (((bb) + (XFS_FSB_TO_BB(mp,1) - 1)) >> (mp)->m_blkbb_log) | ||
594 | #define XFS_BB_TO_FSBT(mp,bb) ((bb) >> (mp)->m_blkbb_log) | ||
595 | |||
596 | /* | ||
597 | * File system block to byte conversions. | ||
598 | */ | ||
599 | #define XFS_FSB_TO_B(mp,fsbno) ((xfs_fsize_t)(fsbno) << (mp)->m_sb.sb_blocklog) | ||
600 | #define XFS_B_TO_FSB(mp,b) \ | ||
601 | ((((__uint64_t)(b)) + (mp)->m_blockmask) >> (mp)->m_sb.sb_blocklog) | ||
602 | #define XFS_B_TO_FSBT(mp,b) (((__uint64_t)(b)) >> (mp)->m_sb.sb_blocklog) | ||
603 | #define XFS_B_FSB_OFFSET(mp,b) ((b) & (mp)->m_blockmask) | ||
604 | |||
605 | /* | ||
606 | * perag get/put wrappers for ref counting | ||
607 | */ | ||
608 | extern struct xfs_perag *xfs_perag_get(struct xfs_mount *, xfs_agnumber_t); | ||
609 | extern struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *, xfs_agnumber_t, | ||
610 | int tag); | ||
611 | extern void xfs_perag_put(struct xfs_perag *pag); | ||
612 | extern int xfs_initialize_perag_data(struct xfs_mount *, xfs_agnumber_t); | ||
613 | |||
614 | extern void xfs_sb_calc_crc(struct xfs_buf *); | ||
615 | extern void xfs_mod_sb(struct xfs_trans *, __int64_t); | ||
616 | extern void xfs_sb_mount_common(struct xfs_mount *, struct xfs_sb *); | ||
617 | extern void xfs_sb_from_disk(struct xfs_sb *, struct xfs_dsb *); | ||
618 | extern void xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t); | ||
619 | extern void xfs_sb_quota_from_disk(struct xfs_sb *sbp); | ||
620 | |||
621 | #endif /* __XFS_SB_H__ */ | ||
diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h new file mode 100644 index 000000000000..82404da2ca67 --- /dev/null +++ b/fs/xfs/libxfs/xfs_shared.h | |||
@@ -0,0 +1,246 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. | ||
3 | * Copyright (c) 2013 Red Hat, Inc. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License as | ||
8 | * published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope that it would be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program; if not, write the Free Software Foundation, | ||
17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
18 | */ | ||
19 | #ifndef __XFS_SHARED_H__ | ||
20 | #define __XFS_SHARED_H__ | ||
21 | |||
22 | /* | ||
23 | * Definitions shared between kernel and userspace that don't fit into any other | ||
24 | * header file that is shared with userspace. | ||
25 | */ | ||
26 | struct xfs_ifork; | ||
27 | struct xfs_buf; | ||
28 | struct xfs_buf_ops; | ||
29 | struct xfs_mount; | ||
30 | struct xfs_trans; | ||
31 | struct xfs_inode; | ||
32 | |||
33 | /* | ||
34 | * Buffer verifier operations are widely used, including userspace tools | ||
35 | */ | ||
36 | extern const struct xfs_buf_ops xfs_agf_buf_ops; | ||
37 | extern const struct xfs_buf_ops xfs_agi_buf_ops; | ||
38 | extern const struct xfs_buf_ops xfs_agf_buf_ops; | ||
39 | extern const struct xfs_buf_ops xfs_agfl_buf_ops; | ||
40 | extern const struct xfs_buf_ops xfs_allocbt_buf_ops; | ||
41 | extern const struct xfs_buf_ops xfs_attr3_leaf_buf_ops; | ||
42 | extern const struct xfs_buf_ops xfs_attr3_rmt_buf_ops; | ||
43 | extern const struct xfs_buf_ops xfs_bmbt_buf_ops; | ||
44 | extern const struct xfs_buf_ops xfs_da3_node_buf_ops; | ||
45 | extern const struct xfs_buf_ops xfs_dquot_buf_ops; | ||
46 | extern const struct xfs_buf_ops xfs_symlink_buf_ops; | ||
47 | extern const struct xfs_buf_ops xfs_agi_buf_ops; | ||
48 | extern const struct xfs_buf_ops xfs_inobt_buf_ops; | ||
49 | extern const struct xfs_buf_ops xfs_inode_buf_ops; | ||
50 | extern const struct xfs_buf_ops xfs_inode_buf_ra_ops; | ||
51 | extern const struct xfs_buf_ops xfs_dquot_buf_ops; | ||
52 | extern const struct xfs_buf_ops xfs_sb_buf_ops; | ||
53 | extern const struct xfs_buf_ops xfs_sb_quiet_buf_ops; | ||
54 | extern const struct xfs_buf_ops xfs_symlink_buf_ops; | ||
55 | |||
56 | /* | ||
57 | * Transaction types. Used to distinguish types of buffers. These never reach | ||
58 | * the log. | ||
59 | */ | ||
60 | #define XFS_TRANS_SETATTR_NOT_SIZE 1 | ||
61 | #define XFS_TRANS_SETATTR_SIZE 2 | ||
62 | #define XFS_TRANS_INACTIVE 3 | ||
63 | #define XFS_TRANS_CREATE 4 | ||
64 | #define XFS_TRANS_CREATE_TRUNC 5 | ||
65 | #define XFS_TRANS_TRUNCATE_FILE 6 | ||
66 | #define XFS_TRANS_REMOVE 7 | ||
67 | #define XFS_TRANS_LINK 8 | ||
68 | #define XFS_TRANS_RENAME 9 | ||
69 | #define XFS_TRANS_MKDIR 10 | ||
70 | #define XFS_TRANS_RMDIR 11 | ||
71 | #define XFS_TRANS_SYMLINK 12 | ||
72 | #define XFS_TRANS_SET_DMATTRS 13 | ||
73 | #define XFS_TRANS_GROWFS 14 | ||
74 | #define XFS_TRANS_STRAT_WRITE 15 | ||
75 | #define XFS_TRANS_DIOSTRAT 16 | ||
76 | /* 17 was XFS_TRANS_WRITE_SYNC */ | ||
77 | #define XFS_TRANS_WRITEID 18 | ||
78 | #define XFS_TRANS_ADDAFORK 19 | ||
79 | #define XFS_TRANS_ATTRINVAL 20 | ||
80 | #define XFS_TRANS_ATRUNCATE 21 | ||
81 | #define XFS_TRANS_ATTR_SET 22 | ||
82 | #define XFS_TRANS_ATTR_RM 23 | ||
83 | #define XFS_TRANS_ATTR_FLAG 24 | ||
84 | #define XFS_TRANS_CLEAR_AGI_BUCKET 25 | ||
85 | #define XFS_TRANS_QM_SBCHANGE 26 | ||
86 | /* | ||
87 | * Dummy entries since we use the transaction type to index into the | ||
88 | * trans_type[] in xlog_recover_print_trans_head() | ||
89 | */ | ||
90 | #define XFS_TRANS_DUMMY1 27 | ||
91 | #define XFS_TRANS_DUMMY2 28 | ||
92 | #define XFS_TRANS_QM_QUOTAOFF 29 | ||
93 | #define XFS_TRANS_QM_DQALLOC 30 | ||
94 | #define XFS_TRANS_QM_SETQLIM 31 | ||
95 | #define XFS_TRANS_QM_DQCLUSTER 32 | ||
96 | #define XFS_TRANS_QM_QINOCREATE 33 | ||
97 | #define XFS_TRANS_QM_QUOTAOFF_END 34 | ||
98 | #define XFS_TRANS_SB_UNIT 35 | ||
99 | #define XFS_TRANS_FSYNC_TS 36 | ||
100 | #define XFS_TRANS_GROWFSRT_ALLOC 37 | ||
101 | #define XFS_TRANS_GROWFSRT_ZERO 38 | ||
102 | #define XFS_TRANS_GROWFSRT_FREE 39 | ||
103 | #define XFS_TRANS_SWAPEXT 40 | ||
104 | #define XFS_TRANS_SB_COUNT 41 | ||
105 | #define XFS_TRANS_CHECKPOINT 42 | ||
106 | #define XFS_TRANS_ICREATE 43 | ||
107 | #define XFS_TRANS_CREATE_TMPFILE 44 | ||
108 | #define XFS_TRANS_TYPE_MAX 44 | ||
109 | /* new transaction types need to be reflected in xfs_logprint(8) */ | ||
110 | |||
111 | #define XFS_TRANS_TYPES \ | ||
112 | { XFS_TRANS_SETATTR_NOT_SIZE, "SETATTR_NOT_SIZE" }, \ | ||
113 | { XFS_TRANS_SETATTR_SIZE, "SETATTR_SIZE" }, \ | ||
114 | { XFS_TRANS_INACTIVE, "INACTIVE" }, \ | ||
115 | { XFS_TRANS_CREATE, "CREATE" }, \ | ||
116 | { XFS_TRANS_CREATE_TMPFILE, "CREATE_TMPFILE" }, \ | ||
117 | { XFS_TRANS_CREATE_TRUNC, "CREATE_TRUNC" }, \ | ||
118 | { XFS_TRANS_TRUNCATE_FILE, "TRUNCATE_FILE" }, \ | ||
119 | { XFS_TRANS_REMOVE, "REMOVE" }, \ | ||
120 | { XFS_TRANS_LINK, "LINK" }, \ | ||
121 | { XFS_TRANS_RENAME, "RENAME" }, \ | ||
122 | { XFS_TRANS_MKDIR, "MKDIR" }, \ | ||
123 | { XFS_TRANS_RMDIR, "RMDIR" }, \ | ||
124 | { XFS_TRANS_SYMLINK, "SYMLINK" }, \ | ||
125 | { XFS_TRANS_SET_DMATTRS, "SET_DMATTRS" }, \ | ||
126 | { XFS_TRANS_GROWFS, "GROWFS" }, \ | ||
127 | { XFS_TRANS_STRAT_WRITE, "STRAT_WRITE" }, \ | ||
128 | { XFS_TRANS_DIOSTRAT, "DIOSTRAT" }, \ | ||
129 | { XFS_TRANS_WRITEID, "WRITEID" }, \ | ||
130 | { XFS_TRANS_ADDAFORK, "ADDAFORK" }, \ | ||
131 | { XFS_TRANS_ATTRINVAL, "ATTRINVAL" }, \ | ||
132 | { XFS_TRANS_ATRUNCATE, "ATRUNCATE" }, \ | ||
133 | { XFS_TRANS_ATTR_SET, "ATTR_SET" }, \ | ||
134 | { XFS_TRANS_ATTR_RM, "ATTR_RM" }, \ | ||
135 | { XFS_TRANS_ATTR_FLAG, "ATTR_FLAG" }, \ | ||
136 | { XFS_TRANS_CLEAR_AGI_BUCKET, "CLEAR_AGI_BUCKET" }, \ | ||
137 | { XFS_TRANS_QM_SBCHANGE, "QM_SBCHANGE" }, \ | ||
138 | { XFS_TRANS_QM_QUOTAOFF, "QM_QUOTAOFF" }, \ | ||
139 | { XFS_TRANS_QM_DQALLOC, "QM_DQALLOC" }, \ | ||
140 | { XFS_TRANS_QM_SETQLIM, "QM_SETQLIM" }, \ | ||
141 | { XFS_TRANS_QM_DQCLUSTER, "QM_DQCLUSTER" }, \ | ||
142 | { XFS_TRANS_QM_QINOCREATE, "QM_QINOCREATE" }, \ | ||
143 | { XFS_TRANS_QM_QUOTAOFF_END, "QM_QOFF_END" }, \ | ||
144 | { XFS_TRANS_SB_UNIT, "SB_UNIT" }, \ | ||
145 | { XFS_TRANS_FSYNC_TS, "FSYNC_TS" }, \ | ||
146 | { XFS_TRANS_GROWFSRT_ALLOC, "GROWFSRT_ALLOC" }, \ | ||
147 | { XFS_TRANS_GROWFSRT_ZERO, "GROWFSRT_ZERO" }, \ | ||
148 | { XFS_TRANS_GROWFSRT_FREE, "GROWFSRT_FREE" }, \ | ||
149 | { XFS_TRANS_SWAPEXT, "SWAPEXT" }, \ | ||
150 | { XFS_TRANS_SB_COUNT, "SB_COUNT" }, \ | ||
151 | { XFS_TRANS_CHECKPOINT, "CHECKPOINT" }, \ | ||
152 | { XFS_TRANS_DUMMY1, "DUMMY1" }, \ | ||
153 | { XFS_TRANS_DUMMY2, "DUMMY2" }, \ | ||
154 | { XLOG_UNMOUNT_REC_TYPE, "UNMOUNT" } | ||
155 | |||
156 | /* | ||
157 | * This structure is used to track log items associated with | ||
158 | * a transaction. It points to the log item and keeps some | ||
159 | * flags to track the state of the log item. It also tracks | ||
160 | * the amount of space needed to log the item it describes | ||
161 | * once we get to commit processing (see xfs_trans_commit()). | ||
162 | */ | ||
163 | struct xfs_log_item_desc { | ||
164 | struct xfs_log_item *lid_item; | ||
165 | struct list_head lid_trans; | ||
166 | unsigned char lid_flags; | ||
167 | }; | ||
168 | |||
169 | #define XFS_LID_DIRTY 0x1 | ||
170 | |||
171 | /* log size calculation functions */ | ||
172 | int xfs_log_calc_unit_res(struct xfs_mount *mp, int unit_bytes); | ||
173 | int xfs_log_calc_minimum_size(struct xfs_mount *); | ||
174 | |||
175 | |||
176 | /* | ||
177 | * Values for t_flags. | ||
178 | */ | ||
179 | #define XFS_TRANS_DIRTY 0x01 /* something needs to be logged */ | ||
180 | #define XFS_TRANS_SB_DIRTY 0x02 /* superblock is modified */ | ||
181 | #define XFS_TRANS_PERM_LOG_RES 0x04 /* xact took a permanent log res */ | ||
182 | #define XFS_TRANS_SYNC 0x08 /* make commit synchronous */ | ||
183 | #define XFS_TRANS_DQ_DIRTY 0x10 /* at least one dquot in trx dirty */ | ||
184 | #define XFS_TRANS_RESERVE 0x20 /* OK to use reserved data blocks */ | ||
185 | #define XFS_TRANS_FREEZE_PROT 0x40 /* Transaction has elevated writer | ||
186 | count in superblock */ | ||
187 | /* | ||
188 | * Values for call flags parameter. | ||
189 | */ | ||
190 | #define XFS_TRANS_RELEASE_LOG_RES 0x4 | ||
191 | #define XFS_TRANS_ABORT 0x8 | ||
192 | |||
193 | /* | ||
194 | * Field values for xfs_trans_mod_sb. | ||
195 | */ | ||
196 | #define XFS_TRANS_SB_ICOUNT 0x00000001 | ||
197 | #define XFS_TRANS_SB_IFREE 0x00000002 | ||
198 | #define XFS_TRANS_SB_FDBLOCKS 0x00000004 | ||
199 | #define XFS_TRANS_SB_RES_FDBLOCKS 0x00000008 | ||
200 | #define XFS_TRANS_SB_FREXTENTS 0x00000010 | ||
201 | #define XFS_TRANS_SB_RES_FREXTENTS 0x00000020 | ||
202 | #define XFS_TRANS_SB_DBLOCKS 0x00000040 | ||
203 | #define XFS_TRANS_SB_AGCOUNT 0x00000080 | ||
204 | #define XFS_TRANS_SB_IMAXPCT 0x00000100 | ||
205 | #define XFS_TRANS_SB_REXTSIZE 0x00000200 | ||
206 | #define XFS_TRANS_SB_RBMBLOCKS 0x00000400 | ||
207 | #define XFS_TRANS_SB_RBLOCKS 0x00000800 | ||
208 | #define XFS_TRANS_SB_REXTENTS 0x00001000 | ||
209 | #define XFS_TRANS_SB_REXTSLOG 0x00002000 | ||
210 | |||
211 | /* | ||
212 | * Here we centralize the specification of XFS meta-data buffer reference count | ||
213 | * values. This determines how hard the buffer cache tries to hold onto the | ||
214 | * buffer. | ||
215 | */ | ||
216 | #define XFS_AGF_REF 4 | ||
217 | #define XFS_AGI_REF 4 | ||
218 | #define XFS_AGFL_REF 3 | ||
219 | #define XFS_INO_BTREE_REF 3 | ||
220 | #define XFS_ALLOC_BTREE_REF 2 | ||
221 | #define XFS_BMAP_BTREE_REF 2 | ||
222 | #define XFS_DIR_BTREE_REF 2 | ||
223 | #define XFS_INO_REF 2 | ||
224 | #define XFS_ATTR_BTREE_REF 1 | ||
225 | #define XFS_DQUOT_REF 1 | ||
226 | |||
227 | /* | ||
228 | * Flags for xfs_trans_ichgtime(). | ||
229 | */ | ||
230 | #define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */ | ||
231 | #define XFS_ICHGTIME_CHG 0x2 /* inode field change timestamp */ | ||
232 | #define XFS_ICHGTIME_CREATE 0x4 /* inode create timestamp */ | ||
233 | |||
234 | |||
235 | /* | ||
236 | * Symlink decoding/encoding functions | ||
237 | */ | ||
238 | int xfs_symlink_blocks(struct xfs_mount *mp, int pathlen); | ||
239 | int xfs_symlink_hdr_set(struct xfs_mount *mp, xfs_ino_t ino, uint32_t offset, | ||
240 | uint32_t size, struct xfs_buf *bp); | ||
241 | bool xfs_symlink_hdr_ok(xfs_ino_t ino, uint32_t offset, | ||
242 | uint32_t size, struct xfs_buf *bp); | ||
243 | void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp, | ||
244 | struct xfs_inode *ip, struct xfs_ifork *ifp); | ||
245 | |||
246 | #endif /* __XFS_SHARED_H__ */ | ||
diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c new file mode 100644 index 000000000000..5782f037eab4 --- /dev/null +++ b/fs/xfs/libxfs/xfs_symlink_remote.c | |||
@@ -0,0 +1,201 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2006 Silicon Graphics, Inc. | ||
3 | * Copyright (c) 2012-2013 Red Hat, Inc. | ||
4 | * All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License as | ||
8 | * published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope that it would be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program; if not, write the Free Software Foundation, | ||
17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
18 | */ | ||
19 | #include "xfs.h" | ||
20 | #include "xfs_fs.h" | ||
21 | #include "xfs_format.h" | ||
22 | #include "xfs_log_format.h" | ||
23 | #include "xfs_shared.h" | ||
24 | #include "xfs_trans_resv.h" | ||
25 | #include "xfs_ag.h" | ||
26 | #include "xfs_sb.h" | ||
27 | #include "xfs_mount.h" | ||
28 | #include "xfs_bmap_btree.h" | ||
29 | #include "xfs_inode.h" | ||
30 | #include "xfs_error.h" | ||
31 | #include "xfs_trace.h" | ||
32 | #include "xfs_symlink.h" | ||
33 | #include "xfs_cksum.h" | ||
34 | #include "xfs_trans.h" | ||
35 | #include "xfs_buf_item.h" | ||
36 | |||
37 | |||
38 | /* | ||
39 | * Each contiguous block has a header, so it is not just a simple pathlen | ||
40 | * to FSB conversion. | ||
41 | */ | ||
42 | int | ||
43 | xfs_symlink_blocks( | ||
44 | struct xfs_mount *mp, | ||
45 | int pathlen) | ||
46 | { | ||
47 | int buflen = XFS_SYMLINK_BUF_SPACE(mp, mp->m_sb.sb_blocksize); | ||
48 | |||
49 | return (pathlen + buflen - 1) / buflen; | ||
50 | } | ||
51 | |||
52 | int | ||
53 | xfs_symlink_hdr_set( | ||
54 | struct xfs_mount *mp, | ||
55 | xfs_ino_t ino, | ||
56 | uint32_t offset, | ||
57 | uint32_t size, | ||
58 | struct xfs_buf *bp) | ||
59 | { | ||
60 | struct xfs_dsymlink_hdr *dsl = bp->b_addr; | ||
61 | |||
62 | if (!xfs_sb_version_hascrc(&mp->m_sb)) | ||
63 | return 0; | ||
64 | |||
65 | dsl->sl_magic = cpu_to_be32(XFS_SYMLINK_MAGIC); | ||
66 | dsl->sl_offset = cpu_to_be32(offset); | ||
67 | dsl->sl_bytes = cpu_to_be32(size); | ||
68 | uuid_copy(&dsl->sl_uuid, &mp->m_sb.sb_uuid); | ||
69 | dsl->sl_owner = cpu_to_be64(ino); | ||
70 | dsl->sl_blkno = cpu_to_be64(bp->b_bn); | ||
71 | bp->b_ops = &xfs_symlink_buf_ops; | ||
72 | |||
73 | return sizeof(struct xfs_dsymlink_hdr); | ||
74 | } | ||
75 | |||
76 | /* | ||
77 | * Checking of the symlink header is split into two parts. the verifier does | ||
78 | * CRC, location and bounds checking, the unpacking function checks the path | ||
79 | * parameters and owner. | ||
80 | */ | ||
81 | bool | ||
82 | xfs_symlink_hdr_ok( | ||
83 | xfs_ino_t ino, | ||
84 | uint32_t offset, | ||
85 | uint32_t size, | ||
86 | struct xfs_buf *bp) | ||
87 | { | ||
88 | struct xfs_dsymlink_hdr *dsl = bp->b_addr; | ||
89 | |||
90 | if (offset != be32_to_cpu(dsl->sl_offset)) | ||
91 | return false; | ||
92 | if (size != be32_to_cpu(dsl->sl_bytes)) | ||
93 | return false; | ||
94 | if (ino != be64_to_cpu(dsl->sl_owner)) | ||
95 | return false; | ||
96 | |||
97 | /* ok */ | ||
98 | return true; | ||
99 | } | ||
100 | |||
101 | static bool | ||
102 | xfs_symlink_verify( | ||
103 | struct xfs_buf *bp) | ||
104 | { | ||
105 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
106 | struct xfs_dsymlink_hdr *dsl = bp->b_addr; | ||
107 | |||
108 | if (!xfs_sb_version_hascrc(&mp->m_sb)) | ||
109 | return false; | ||
110 | if (dsl->sl_magic != cpu_to_be32(XFS_SYMLINK_MAGIC)) | ||
111 | return false; | ||
112 | if (!uuid_equal(&dsl->sl_uuid, &mp->m_sb.sb_uuid)) | ||
113 | return false; | ||
114 | if (bp->b_bn != be64_to_cpu(dsl->sl_blkno)) | ||
115 | return false; | ||
116 | if (be32_to_cpu(dsl->sl_offset) + | ||
117 | be32_to_cpu(dsl->sl_bytes) >= MAXPATHLEN) | ||
118 | return false; | ||
119 | if (dsl->sl_owner == 0) | ||
120 | return false; | ||
121 | |||
122 | return true; | ||
123 | } | ||
124 | |||
125 | static void | ||
126 | xfs_symlink_read_verify( | ||
127 | struct xfs_buf *bp) | ||
128 | { | ||
129 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
130 | |||
131 | /* no verification of non-crc buffers */ | ||
132 | if (!xfs_sb_version_hascrc(&mp->m_sb)) | ||
133 | return; | ||
134 | |||
135 | if (!xfs_buf_verify_cksum(bp, XFS_SYMLINK_CRC_OFF)) | ||
136 | xfs_buf_ioerror(bp, -EFSBADCRC); | ||
137 | else if (!xfs_symlink_verify(bp)) | ||
138 | xfs_buf_ioerror(bp, -EFSCORRUPTED); | ||
139 | |||
140 | if (bp->b_error) | ||
141 | xfs_verifier_error(bp); | ||
142 | } | ||
143 | |||
144 | static void | ||
145 | xfs_symlink_write_verify( | ||
146 | struct xfs_buf *bp) | ||
147 | { | ||
148 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
149 | struct xfs_buf_log_item *bip = bp->b_fspriv; | ||
150 | |||
151 | /* no verification of non-crc buffers */ | ||
152 | if (!xfs_sb_version_hascrc(&mp->m_sb)) | ||
153 | return; | ||
154 | |||
155 | if (!xfs_symlink_verify(bp)) { | ||
156 | xfs_buf_ioerror(bp, -EFSCORRUPTED); | ||
157 | xfs_verifier_error(bp); | ||
158 | return; | ||
159 | } | ||
160 | |||
161 | if (bip) { | ||
162 | struct xfs_dsymlink_hdr *dsl = bp->b_addr; | ||
163 | dsl->sl_lsn = cpu_to_be64(bip->bli_item.li_lsn); | ||
164 | } | ||
165 | xfs_buf_update_cksum(bp, XFS_SYMLINK_CRC_OFF); | ||
166 | } | ||
167 | |||
168 | const struct xfs_buf_ops xfs_symlink_buf_ops = { | ||
169 | .verify_read = xfs_symlink_read_verify, | ||
170 | .verify_write = xfs_symlink_write_verify, | ||
171 | }; | ||
172 | |||
173 | void | ||
174 | xfs_symlink_local_to_remote( | ||
175 | struct xfs_trans *tp, | ||
176 | struct xfs_buf *bp, | ||
177 | struct xfs_inode *ip, | ||
178 | struct xfs_ifork *ifp) | ||
179 | { | ||
180 | struct xfs_mount *mp = ip->i_mount; | ||
181 | char *buf; | ||
182 | |||
183 | if (!xfs_sb_version_hascrc(&mp->m_sb)) { | ||
184 | bp->b_ops = NULL; | ||
185 | memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes); | ||
186 | return; | ||
187 | } | ||
188 | |||
189 | /* | ||
190 | * As this symlink fits in an inode literal area, it must also fit in | ||
191 | * the smallest buffer the filesystem supports. | ||
192 | */ | ||
193 | ASSERT(BBTOB(bp->b_length) >= | ||
194 | ifp->if_bytes + sizeof(struct xfs_dsymlink_hdr)); | ||
195 | |||
196 | bp->b_ops = &xfs_symlink_buf_ops; | ||
197 | |||
198 | buf = bp->b_addr; | ||
199 | buf += xfs_symlink_hdr_set(mp, ip->i_ino, 0, ifp->if_bytes, bp); | ||
200 | memcpy(buf, ifp->if_u1.if_data, ifp->if_bytes); | ||
201 | } | ||
diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c new file mode 100644 index 000000000000..f2bda7c76b8a --- /dev/null +++ b/fs/xfs/libxfs/xfs_trans_resv.c | |||
@@ -0,0 +1,894 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. | ||
3 | * Copyright (C) 2010 Red Hat, Inc. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License as | ||
8 | * published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope that it would be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program; if not, write the Free Software Foundation, | ||
17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
18 | */ | ||
19 | #include "xfs.h" | ||
20 | #include "xfs_fs.h" | ||
21 | #include "xfs_shared.h" | ||
22 | #include "xfs_format.h" | ||
23 | #include "xfs_log_format.h" | ||
24 | #include "xfs_trans_resv.h" | ||
25 | #include "xfs_sb.h" | ||
26 | #include "xfs_ag.h" | ||
27 | #include "xfs_mount.h" | ||
28 | #include "xfs_da_format.h" | ||
29 | #include "xfs_da_btree.h" | ||
30 | #include "xfs_inode.h" | ||
31 | #include "xfs_bmap_btree.h" | ||
32 | #include "xfs_ialloc.h" | ||
33 | #include "xfs_quota.h" | ||
34 | #include "xfs_trans.h" | ||
35 | #include "xfs_qm.h" | ||
36 | #include "xfs_trans_space.h" | ||
37 | #include "xfs_trace.h" | ||
38 | |||
39 | /* | ||
40 | * A buffer has a format structure overhead in the log in addition | ||
41 | * to the data, so we need to take this into account when reserving | ||
42 | * space in a transaction for a buffer. Round the space required up | ||
43 | * to a multiple of 128 bytes so that we don't change the historical | ||
44 | * reservation that has been used for this overhead. | ||
45 | */ | ||
46 | STATIC uint | ||
47 | xfs_buf_log_overhead(void) | ||
48 | { | ||
49 | return round_up(sizeof(struct xlog_op_header) + | ||
50 | sizeof(struct xfs_buf_log_format), 128); | ||
51 | } | ||
52 | |||
53 | /* | ||
54 | * Calculate out transaction log reservation per item in bytes. | ||
55 | * | ||
56 | * The nbufs argument is used to indicate the number of items that | ||
57 | * will be changed in a transaction. size is used to tell how many | ||
58 | * bytes should be reserved per item. | ||
59 | */ | ||
60 | STATIC uint | ||
61 | xfs_calc_buf_res( | ||
62 | uint nbufs, | ||
63 | uint size) | ||
64 | { | ||
65 | return nbufs * (size + xfs_buf_log_overhead()); | ||
66 | } | ||
67 | |||
68 | /* | ||
69 | * Logging inodes is really tricksy. They are logged in memory format, | ||
70 | * which means that what we write into the log doesn't directly translate into | ||
71 | * the amount of space they use on disk. | ||
72 | * | ||
73 | * Case in point - btree format forks in memory format use more space than the | ||
74 | * on-disk format. In memory, the buffer contains a normal btree block header so | ||
75 | * the btree code can treat it as though it is just another generic buffer. | ||
76 | * However, when we write it to the inode fork, we don't write all of this | ||
77 | * header as it isn't needed. e.g. the root is only ever in the inode, so | ||
78 | * there's no need for sibling pointers which would waste 16 bytes of space. | ||
79 | * | ||
80 | * Hence when we have an inode with a maximally sized btree format fork, then | ||
81 | * amount of information we actually log is greater than the size of the inode | ||
82 | * on disk. Hence we need an inode reservation function that calculates all this | ||
83 | * correctly. So, we log: | ||
84 | * | ||
85 | * - 4 log op headers for object | ||
86 | * - for the ilf, the inode core and 2 forks | ||
87 | * - inode log format object | ||
88 | * - the inode core | ||
89 | * - two inode forks containing bmap btree root blocks. | ||
90 | * - the btree data contained by both forks will fit into the inode size, | ||
91 | * hence when combined with the inode core above, we have a total of the | ||
92 | * actual inode size. | ||
93 | * - the BMBT headers need to be accounted separately, as they are | ||
94 | * additional to the records and pointers that fit inside the inode | ||
95 | * forks. | ||
96 | */ | ||
97 | STATIC uint | ||
98 | xfs_calc_inode_res( | ||
99 | struct xfs_mount *mp, | ||
100 | uint ninodes) | ||
101 | { | ||
102 | return ninodes * | ||
103 | (4 * sizeof(struct xlog_op_header) + | ||
104 | sizeof(struct xfs_inode_log_format) + | ||
105 | mp->m_sb.sb_inodesize + | ||
106 | 2 * XFS_BMBT_BLOCK_LEN(mp)); | ||
107 | } | ||
108 | |||
109 | /* | ||
110 | * The free inode btree is a conditional feature and the log reservation | ||
111 | * requirements differ slightly from that of the traditional inode allocation | ||
112 | * btree. The finobt tracks records for inode chunks with at least one free | ||
113 | * inode. A record can be removed from the tree for an inode allocation | ||
114 | * or free and thus the finobt reservation is unconditional across: | ||
115 | * | ||
116 | * - inode allocation | ||
117 | * - inode free | ||
118 | * - inode chunk allocation | ||
119 | * | ||
120 | * The 'modify' param indicates to include the record modification scenario. The | ||
121 | * 'alloc' param indicates to include the reservation for free space btree | ||
122 | * modifications on behalf of finobt modifications. This is required only for | ||
123 | * transactions that do not already account for free space btree modifications. | ||
124 | * | ||
125 | * the free inode btree: max depth * block size | ||
126 | * the allocation btrees: 2 trees * (max depth - 1) * block size | ||
127 | * the free inode btree entry: block size | ||
128 | */ | ||
129 | STATIC uint | ||
130 | xfs_calc_finobt_res( | ||
131 | struct xfs_mount *mp, | ||
132 | int alloc, | ||
133 | int modify) | ||
134 | { | ||
135 | uint res; | ||
136 | |||
137 | if (!xfs_sb_version_hasfinobt(&mp->m_sb)) | ||
138 | return 0; | ||
139 | |||
140 | res = xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)); | ||
141 | if (alloc) | ||
142 | res += xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), | ||
143 | XFS_FSB_TO_B(mp, 1)); | ||
144 | if (modify) | ||
145 | res += (uint)XFS_FSB_TO_B(mp, 1); | ||
146 | |||
147 | return res; | ||
148 | } | ||
149 | |||
150 | /* | ||
151 | * Various log reservation values. | ||
152 | * | ||
153 | * These are based on the size of the file system block because that is what | ||
154 | * most transactions manipulate. Each adds in an additional 128 bytes per | ||
155 | * item logged to try to account for the overhead of the transaction mechanism. | ||
156 | * | ||
157 | * Note: Most of the reservations underestimate the number of allocation | ||
158 | * groups into which they could free extents in the xfs_bmap_finish() call. | ||
159 | * This is because the number in the worst case is quite high and quite | ||
160 | * unusual. In order to fix this we need to change xfs_bmap_finish() to free | ||
161 | * extents in only a single AG at a time. This will require changes to the | ||
162 | * EFI code as well, however, so that the EFI for the extents not freed is | ||
163 | * logged again in each transaction. See SGI PV #261917. | ||
164 | * | ||
165 | * Reservation functions here avoid a huge stack in xfs_trans_init due to | ||
166 | * register overflow from temporaries in the calculations. | ||
167 | */ | ||
168 | |||
169 | |||
170 | /* | ||
171 | * In a write transaction we can allocate a maximum of 2 | ||
172 | * extents. This gives: | ||
173 | * the inode getting the new extents: inode size | ||
174 | * the inode's bmap btree: max depth * block size | ||
175 | * the agfs of the ags from which the extents are allocated: 2 * sector | ||
176 | * the superblock free block counter: sector size | ||
177 | * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size | ||
178 | * And the bmap_finish transaction can free bmap blocks in a join: | ||
179 | * the agfs of the ags containing the blocks: 2 * sector size | ||
180 | * the agfls of the ags containing the blocks: 2 * sector size | ||
181 | * the super block free block counter: sector size | ||
182 | * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size | ||
183 | */ | ||
184 | STATIC uint | ||
185 | xfs_calc_write_reservation( | ||
186 | struct xfs_mount *mp) | ||
187 | { | ||
188 | return XFS_DQUOT_LOGRES(mp) + | ||
189 | MAX((xfs_calc_inode_res(mp, 1) + | ||
190 | xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), | ||
191 | XFS_FSB_TO_B(mp, 1)) + | ||
192 | xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + | ||
193 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2), | ||
194 | XFS_FSB_TO_B(mp, 1))), | ||
195 | (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) + | ||
196 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2), | ||
197 | XFS_FSB_TO_B(mp, 1)))); | ||
198 | } | ||
199 | |||
200 | /* | ||
201 | * In truncating a file we free up to two extents at once. We can modify: | ||
202 | * the inode being truncated: inode size | ||
203 | * the inode's bmap btree: (max depth + 1) * block size | ||
204 | * And the bmap_finish transaction can free the blocks and bmap blocks: | ||
205 | * the agf for each of the ags: 4 * sector size | ||
206 | * the agfl for each of the ags: 4 * sector size | ||
207 | * the super block to reflect the freed blocks: sector size | ||
208 | * worst case split in allocation btrees per extent assuming 4 extents: | ||
209 | * 4 exts * 2 trees * (2 * max depth - 1) * block size | ||
210 | * the inode btree: max depth * blocksize | ||
211 | * the allocation btrees: 2 trees * (max depth - 1) * block size | ||
212 | */ | ||
213 | STATIC uint | ||
214 | xfs_calc_itruncate_reservation( | ||
215 | struct xfs_mount *mp) | ||
216 | { | ||
217 | return XFS_DQUOT_LOGRES(mp) + | ||
218 | MAX((xfs_calc_inode_res(mp, 1) + | ||
219 | xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1, | ||
220 | XFS_FSB_TO_B(mp, 1))), | ||
221 | (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) + | ||
222 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 4), | ||
223 | XFS_FSB_TO_B(mp, 1)) + | ||
224 | xfs_calc_buf_res(5, 0) + | ||
225 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), | ||
226 | XFS_FSB_TO_B(mp, 1)) + | ||
227 | xfs_calc_buf_res(2 + mp->m_ialloc_blks + | ||
228 | mp->m_in_maxlevels, 0))); | ||
229 | } | ||
230 | |||
231 | /* | ||
232 | * In renaming a files we can modify: | ||
233 | * the four inodes involved: 4 * inode size | ||
234 | * the two directory btrees: 2 * (max depth + v2) * dir block size | ||
235 | * the two directory bmap btrees: 2 * max depth * block size | ||
236 | * And the bmap_finish transaction can free dir and bmap blocks (two sets | ||
237 | * of bmap blocks) giving: | ||
238 | * the agf for the ags in which the blocks live: 3 * sector size | ||
239 | * the agfl for the ags in which the blocks live: 3 * sector size | ||
240 | * the superblock for the free block count: sector size | ||
241 | * the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size | ||
242 | */ | ||
243 | STATIC uint | ||
244 | xfs_calc_rename_reservation( | ||
245 | struct xfs_mount *mp) | ||
246 | { | ||
247 | return XFS_DQUOT_LOGRES(mp) + | ||
248 | MAX((xfs_calc_inode_res(mp, 4) + | ||
249 | xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp), | ||
250 | XFS_FSB_TO_B(mp, 1))), | ||
251 | (xfs_calc_buf_res(7, mp->m_sb.sb_sectsize) + | ||
252 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 3), | ||
253 | XFS_FSB_TO_B(mp, 1)))); | ||
254 | } | ||
255 | |||
256 | /* | ||
257 | * For removing an inode from unlinked list at first, we can modify: | ||
258 | * the agi hash list and counters: sector size | ||
259 | * the on disk inode before ours in the agi hash list: inode cluster size | ||
260 | */ | ||
261 | STATIC uint | ||
262 | xfs_calc_iunlink_remove_reservation( | ||
263 | struct xfs_mount *mp) | ||
264 | { | ||
265 | return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + | ||
266 | max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size); | ||
267 | } | ||
268 | |||
269 | /* | ||
270 | * For creating a link to an inode: | ||
271 | * the parent directory inode: inode size | ||
272 | * the linked inode: inode size | ||
273 | * the directory btree could split: (max depth + v2) * dir block size | ||
274 | * the directory bmap btree could join or split: (max depth + v2) * blocksize | ||
275 | * And the bmap_finish transaction can free some bmap blocks giving: | ||
276 | * the agf for the ag in which the blocks live: sector size | ||
277 | * the agfl for the ag in which the blocks live: sector size | ||
278 | * the superblock for the free block count: sector size | ||
279 | * the allocation btrees: 2 trees * (2 * max depth - 1) * block size | ||
280 | */ | ||
281 | STATIC uint | ||
282 | xfs_calc_link_reservation( | ||
283 | struct xfs_mount *mp) | ||
284 | { | ||
285 | return XFS_DQUOT_LOGRES(mp) + | ||
286 | xfs_calc_iunlink_remove_reservation(mp) + | ||
287 | MAX((xfs_calc_inode_res(mp, 2) + | ||
288 | xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), | ||
289 | XFS_FSB_TO_B(mp, 1))), | ||
290 | (xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + | ||
291 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), | ||
292 | XFS_FSB_TO_B(mp, 1)))); | ||
293 | } | ||
294 | |||
295 | /* | ||
296 | * For adding an inode to unlinked list we can modify: | ||
297 | * the agi hash list: sector size | ||
298 | * the unlinked inode: inode size | ||
299 | */ | ||
300 | STATIC uint | ||
301 | xfs_calc_iunlink_add_reservation(xfs_mount_t *mp) | ||
302 | { | ||
303 | return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + | ||
304 | xfs_calc_inode_res(mp, 1); | ||
305 | } | ||
306 | |||
307 | /* | ||
308 | * For removing a directory entry we can modify: | ||
309 | * the parent directory inode: inode size | ||
310 | * the removed inode: inode size | ||
311 | * the directory btree could join: (max depth + v2) * dir block size | ||
312 | * the directory bmap btree could join or split: (max depth + v2) * blocksize | ||
313 | * And the bmap_finish transaction can free the dir and bmap blocks giving: | ||
314 | * the agf for the ag in which the blocks live: 2 * sector size | ||
315 | * the agfl for the ag in which the blocks live: 2 * sector size | ||
316 | * the superblock for the free block count: sector size | ||
317 | * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size | ||
318 | */ | ||
319 | STATIC uint | ||
320 | xfs_calc_remove_reservation( | ||
321 | struct xfs_mount *mp) | ||
322 | { | ||
323 | return XFS_DQUOT_LOGRES(mp) + | ||
324 | xfs_calc_iunlink_add_reservation(mp) + | ||
325 | MAX((xfs_calc_inode_res(mp, 1) + | ||
326 | xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), | ||
327 | XFS_FSB_TO_B(mp, 1))), | ||
328 | (xfs_calc_buf_res(4, mp->m_sb.sb_sectsize) + | ||
329 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2), | ||
330 | XFS_FSB_TO_B(mp, 1)))); | ||
331 | } | ||
332 | |||
333 | /* | ||
334 | * For create, break it in to the two cases that the transaction | ||
335 | * covers. We start with the modify case - allocation done by modification | ||
336 | * of the state of existing inodes - and the allocation case. | ||
337 | */ | ||
338 | |||
339 | /* | ||
340 | * For create we can modify: | ||
341 | * the parent directory inode: inode size | ||
342 | * the new inode: inode size | ||
343 | * the inode btree entry: block size | ||
344 | * the superblock for the nlink flag: sector size | ||
345 | * the directory btree: (max depth + v2) * dir block size | ||
346 | * the directory inode's bmap btree: (max depth + v2) * block size | ||
347 | * the finobt (record modification and allocation btrees) | ||
348 | */ | ||
349 | STATIC uint | ||
350 | xfs_calc_create_resv_modify( | ||
351 | struct xfs_mount *mp) | ||
352 | { | ||
353 | return xfs_calc_inode_res(mp, 2) + | ||
354 | xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + | ||
355 | (uint)XFS_FSB_TO_B(mp, 1) + | ||
356 | xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)) + | ||
357 | xfs_calc_finobt_res(mp, 1, 1); | ||
358 | } | ||
359 | |||
360 | /* | ||
361 | * For create we can allocate some inodes giving: | ||
362 | * the agi and agf of the ag getting the new inodes: 2 * sectorsize | ||
363 | * the superblock for the nlink flag: sector size | ||
364 | * the inode blocks allocated: mp->m_ialloc_blks * blocksize | ||
365 | * the inode btree: max depth * blocksize | ||
366 | * the allocation btrees: 2 trees * (max depth - 1) * block size | ||
367 | */ | ||
368 | STATIC uint | ||
369 | xfs_calc_create_resv_alloc( | ||
370 | struct xfs_mount *mp) | ||
371 | { | ||
372 | return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + | ||
373 | mp->m_sb.sb_sectsize + | ||
374 | xfs_calc_buf_res(mp->m_ialloc_blks, XFS_FSB_TO_B(mp, 1)) + | ||
375 | xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + | ||
376 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), | ||
377 | XFS_FSB_TO_B(mp, 1)); | ||
378 | } | ||
379 | |||
380 | STATIC uint | ||
381 | __xfs_calc_create_reservation( | ||
382 | struct xfs_mount *mp) | ||
383 | { | ||
384 | return XFS_DQUOT_LOGRES(mp) + | ||
385 | MAX(xfs_calc_create_resv_alloc(mp), | ||
386 | xfs_calc_create_resv_modify(mp)); | ||
387 | } | ||
388 | |||
389 | /* | ||
390 | * For icreate we can allocate some inodes giving: | ||
391 | * the agi and agf of the ag getting the new inodes: 2 * sectorsize | ||
392 | * the superblock for the nlink flag: sector size | ||
393 | * the inode btree: max depth * blocksize | ||
394 | * the allocation btrees: 2 trees * (max depth - 1) * block size | ||
395 | * the finobt (record insertion) | ||
396 | */ | ||
397 | STATIC uint | ||
398 | xfs_calc_icreate_resv_alloc( | ||
399 | struct xfs_mount *mp) | ||
400 | { | ||
401 | return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + | ||
402 | mp->m_sb.sb_sectsize + | ||
403 | xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + | ||
404 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), | ||
405 | XFS_FSB_TO_B(mp, 1)) + | ||
406 | xfs_calc_finobt_res(mp, 0, 0); | ||
407 | } | ||
408 | |||
409 | STATIC uint | ||
410 | xfs_calc_icreate_reservation(xfs_mount_t *mp) | ||
411 | { | ||
412 | return XFS_DQUOT_LOGRES(mp) + | ||
413 | MAX(xfs_calc_icreate_resv_alloc(mp), | ||
414 | xfs_calc_create_resv_modify(mp)); | ||
415 | } | ||
416 | |||
417 | STATIC uint | ||
418 | xfs_calc_create_reservation( | ||
419 | struct xfs_mount *mp) | ||
420 | { | ||
421 | if (xfs_sb_version_hascrc(&mp->m_sb)) | ||
422 | return xfs_calc_icreate_reservation(mp); | ||
423 | return __xfs_calc_create_reservation(mp); | ||
424 | |||
425 | } | ||
426 | |||
427 | STATIC uint | ||
428 | xfs_calc_create_tmpfile_reservation( | ||
429 | struct xfs_mount *mp) | ||
430 | { | ||
431 | uint res = XFS_DQUOT_LOGRES(mp); | ||
432 | |||
433 | if (xfs_sb_version_hascrc(&mp->m_sb)) | ||
434 | res += xfs_calc_icreate_resv_alloc(mp); | ||
435 | else | ||
436 | res += xfs_calc_create_resv_alloc(mp); | ||
437 | |||
438 | return res + xfs_calc_iunlink_add_reservation(mp); | ||
439 | } | ||
440 | |||
441 | /* | ||
442 | * Making a new directory is the same as creating a new file. | ||
443 | */ | ||
444 | STATIC uint | ||
445 | xfs_calc_mkdir_reservation( | ||
446 | struct xfs_mount *mp) | ||
447 | { | ||
448 | return xfs_calc_create_reservation(mp); | ||
449 | } | ||
450 | |||
451 | |||
452 | /* | ||
453 | * Making a new symplink is the same as creating a new file, but | ||
454 | * with the added blocks for remote symlink data which can be up to 1kB in | ||
455 | * length (MAXPATHLEN). | ||
456 | */ | ||
457 | STATIC uint | ||
458 | xfs_calc_symlink_reservation( | ||
459 | struct xfs_mount *mp) | ||
460 | { | ||
461 | return xfs_calc_create_reservation(mp) + | ||
462 | xfs_calc_buf_res(1, MAXPATHLEN); | ||
463 | } | ||
464 | |||
465 | /* | ||
466 | * In freeing an inode we can modify: | ||
467 | * the inode being freed: inode size | ||
468 | * the super block free inode counter: sector size | ||
469 | * the agi hash list and counters: sector size | ||
470 | * the inode btree entry: block size | ||
471 | * the on disk inode before ours in the agi hash list: inode cluster size | ||
472 | * the inode btree: max depth * blocksize | ||
473 | * the allocation btrees: 2 trees * (max depth - 1) * block size | ||
474 | * the finobt (record insertion, removal or modification) | ||
475 | */ | ||
476 | STATIC uint | ||
477 | xfs_calc_ifree_reservation( | ||
478 | struct xfs_mount *mp) | ||
479 | { | ||
480 | return XFS_DQUOT_LOGRES(mp) + | ||
481 | xfs_calc_inode_res(mp, 1) + | ||
482 | xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + | ||
483 | xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) + | ||
484 | xfs_calc_iunlink_remove_reservation(mp) + | ||
485 | xfs_calc_buf_res(1, 0) + | ||
486 | xfs_calc_buf_res(2 + mp->m_ialloc_blks + | ||
487 | mp->m_in_maxlevels, 0) + | ||
488 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), | ||
489 | XFS_FSB_TO_B(mp, 1)) + | ||
490 | xfs_calc_finobt_res(mp, 0, 1); | ||
491 | } | ||
492 | |||
493 | /* | ||
494 | * When only changing the inode we log the inode and possibly the superblock | ||
495 | * We also add a bit of slop for the transaction stuff. | ||
496 | */ | ||
497 | STATIC uint | ||
498 | xfs_calc_ichange_reservation( | ||
499 | struct xfs_mount *mp) | ||
500 | { | ||
501 | return XFS_DQUOT_LOGRES(mp) + | ||
502 | xfs_calc_inode_res(mp, 1) + | ||
503 | xfs_calc_buf_res(1, mp->m_sb.sb_sectsize); | ||
504 | |||
505 | } | ||
506 | |||
507 | /* | ||
508 | * Growing the data section of the filesystem. | ||
509 | * superblock | ||
510 | * agi and agf | ||
511 | * allocation btrees | ||
512 | */ | ||
513 | STATIC uint | ||
514 | xfs_calc_growdata_reservation( | ||
515 | struct xfs_mount *mp) | ||
516 | { | ||
517 | return xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + | ||
518 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), | ||
519 | XFS_FSB_TO_B(mp, 1)); | ||
520 | } | ||
521 | |||
522 | /* | ||
523 | * Growing the rt section of the filesystem. | ||
524 | * In the first set of transactions (ALLOC) we allocate space to the | ||
525 | * bitmap or summary files. | ||
526 | * superblock: sector size | ||
527 | * agf of the ag from which the extent is allocated: sector size | ||
528 | * bmap btree for bitmap/summary inode: max depth * blocksize | ||
529 | * bitmap/summary inode: inode size | ||
530 | * allocation btrees for 1 block alloc: 2 * (2 * maxdepth - 1) * blocksize | ||
531 | */ | ||
532 | STATIC uint | ||
533 | xfs_calc_growrtalloc_reservation( | ||
534 | struct xfs_mount *mp) | ||
535 | { | ||
536 | return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + | ||
537 | xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), | ||
538 | XFS_FSB_TO_B(mp, 1)) + | ||
539 | xfs_calc_inode_res(mp, 1) + | ||
540 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), | ||
541 | XFS_FSB_TO_B(mp, 1)); | ||
542 | } | ||
543 | |||
544 | /* | ||
545 | * Growing the rt section of the filesystem. | ||
546 | * In the second set of transactions (ZERO) we zero the new metadata blocks. | ||
547 | * one bitmap/summary block: blocksize | ||
548 | */ | ||
549 | STATIC uint | ||
550 | xfs_calc_growrtzero_reservation( | ||
551 | struct xfs_mount *mp) | ||
552 | { | ||
553 | return xfs_calc_buf_res(1, mp->m_sb.sb_blocksize); | ||
554 | } | ||
555 | |||
556 | /* | ||
557 | * Growing the rt section of the filesystem. | ||
558 | * In the third set of transactions (FREE) we update metadata without | ||
559 | * allocating any new blocks. | ||
560 | * superblock: sector size | ||
561 | * bitmap inode: inode size | ||
562 | * summary inode: inode size | ||
563 | * one bitmap block: blocksize | ||
564 | * summary blocks: new summary size | ||
565 | */ | ||
566 | STATIC uint | ||
567 | xfs_calc_growrtfree_reservation( | ||
568 | struct xfs_mount *mp) | ||
569 | { | ||
570 | return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + | ||
571 | xfs_calc_inode_res(mp, 2) + | ||
572 | xfs_calc_buf_res(1, mp->m_sb.sb_blocksize) + | ||
573 | xfs_calc_buf_res(1, mp->m_rsumsize); | ||
574 | } | ||
575 | |||
576 | /* | ||
577 | * Logging the inode modification timestamp on a synchronous write. | ||
578 | * inode | ||
579 | */ | ||
580 | STATIC uint | ||
581 | xfs_calc_swrite_reservation( | ||
582 | struct xfs_mount *mp) | ||
583 | { | ||
584 | return xfs_calc_inode_res(mp, 1); | ||
585 | } | ||
586 | |||
587 | /* | ||
588 | * Logging the inode mode bits when writing a setuid/setgid file | ||
589 | * inode | ||
590 | */ | ||
591 | STATIC uint | ||
592 | xfs_calc_writeid_reservation( | ||
593 | struct xfs_mount *mp) | ||
594 | { | ||
595 | return xfs_calc_inode_res(mp, 1); | ||
596 | } | ||
597 | |||
598 | /* | ||
599 | * Converting the inode from non-attributed to attributed. | ||
600 | * the inode being converted: inode size | ||
601 | * agf block and superblock (for block allocation) | ||
602 | * the new block (directory sized) | ||
603 | * bmap blocks for the new directory block | ||
604 | * allocation btrees | ||
605 | */ | ||
606 | STATIC uint | ||
607 | xfs_calc_addafork_reservation( | ||
608 | struct xfs_mount *mp) | ||
609 | { | ||
610 | return XFS_DQUOT_LOGRES(mp) + | ||
611 | xfs_calc_inode_res(mp, 1) + | ||
612 | xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + | ||
613 | xfs_calc_buf_res(1, mp->m_dir_geo->blksize) + | ||
614 | xfs_calc_buf_res(XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1, | ||
615 | XFS_FSB_TO_B(mp, 1)) + | ||
616 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), | ||
617 | XFS_FSB_TO_B(mp, 1)); | ||
618 | } | ||
619 | |||
620 | /* | ||
621 | * Removing the attribute fork of a file | ||
622 | * the inode being truncated: inode size | ||
623 | * the inode's bmap btree: max depth * block size | ||
624 | * And the bmap_finish transaction can free the blocks and bmap blocks: | ||
625 | * the agf for each of the ags: 4 * sector size | ||
626 | * the agfl for each of the ags: 4 * sector size | ||
627 | * the super block to reflect the freed blocks: sector size | ||
628 | * worst case split in allocation btrees per extent assuming 4 extents: | ||
629 | * 4 exts * 2 trees * (2 * max depth - 1) * block size | ||
630 | */ | ||
631 | STATIC uint | ||
632 | xfs_calc_attrinval_reservation( | ||
633 | struct xfs_mount *mp) | ||
634 | { | ||
635 | return MAX((xfs_calc_inode_res(mp, 1) + | ||
636 | xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK), | ||
637 | XFS_FSB_TO_B(mp, 1))), | ||
638 | (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) + | ||
639 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 4), | ||
640 | XFS_FSB_TO_B(mp, 1)))); | ||
641 | } | ||
642 | |||
643 | /* | ||
644 | * Setting an attribute at mount time. | ||
645 | * the inode getting the attribute | ||
646 | * the superblock for allocations | ||
647 | * the agfs extents are allocated from | ||
648 | * the attribute btree * max depth | ||
649 | * the inode allocation btree | ||
650 | * Since attribute transaction space is dependent on the size of the attribute, | ||
651 | * the calculation is done partially at mount time and partially at runtime(see | ||
652 | * below). | ||
653 | */ | ||
654 | STATIC uint | ||
655 | xfs_calc_attrsetm_reservation( | ||
656 | struct xfs_mount *mp) | ||
657 | { | ||
658 | return XFS_DQUOT_LOGRES(mp) + | ||
659 | xfs_calc_inode_res(mp, 1) + | ||
660 | xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + | ||
661 | xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH, XFS_FSB_TO_B(mp, 1)); | ||
662 | } | ||
663 | |||
664 | /* | ||
665 | * Setting an attribute at runtime, transaction space unit per block. | ||
666 | * the superblock for allocations: sector size | ||
667 | * the inode bmap btree could join or split: max depth * block size | ||
668 | * Since the runtime attribute transaction space is dependent on the total | ||
669 | * blocks needed for the 1st bmap, here we calculate out the space unit for | ||
670 | * one block so that the caller could figure out the total space according | ||
671 | * to the attibute extent length in blocks by: | ||
672 | * ext * M_RES(mp)->tr_attrsetrt.tr_logres | ||
673 | */ | ||
674 | STATIC uint | ||
675 | xfs_calc_attrsetrt_reservation( | ||
676 | struct xfs_mount *mp) | ||
677 | { | ||
678 | return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + | ||
679 | xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK), | ||
680 | XFS_FSB_TO_B(mp, 1)); | ||
681 | } | ||
682 | |||
683 | /* | ||
684 | * Removing an attribute. | ||
685 | * the inode: inode size | ||
686 | * the attribute btree could join: max depth * block size | ||
687 | * the inode bmap btree could join or split: max depth * block size | ||
688 | * And the bmap_finish transaction can free the attr blocks freed giving: | ||
689 | * the agf for the ag in which the blocks live: 2 * sector size | ||
690 | * the agfl for the ag in which the blocks live: 2 * sector size | ||
691 | * the superblock for the free block count: sector size | ||
692 | * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size | ||
693 | */ | ||
694 | STATIC uint | ||
695 | xfs_calc_attrrm_reservation( | ||
696 | struct xfs_mount *mp) | ||
697 | { | ||
698 | return XFS_DQUOT_LOGRES(mp) + | ||
699 | MAX((xfs_calc_inode_res(mp, 1) + | ||
700 | xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH, | ||
701 | XFS_FSB_TO_B(mp, 1)) + | ||
702 | (uint)XFS_FSB_TO_B(mp, | ||
703 | XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + | ||
704 | xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), 0)), | ||
705 | (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) + | ||
706 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2), | ||
707 | XFS_FSB_TO_B(mp, 1)))); | ||
708 | } | ||
709 | |||
710 | /* | ||
711 | * Clearing a bad agino number in an agi hash bucket. | ||
712 | */ | ||
713 | STATIC uint | ||
714 | xfs_calc_clear_agi_bucket_reservation( | ||
715 | struct xfs_mount *mp) | ||
716 | { | ||
717 | return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize); | ||
718 | } | ||
719 | |||
720 | /* | ||
721 | * Clearing the quotaflags in the superblock. | ||
722 | * the super block for changing quota flags: sector size | ||
723 | */ | ||
724 | STATIC uint | ||
725 | xfs_calc_qm_sbchange_reservation( | ||
726 | struct xfs_mount *mp) | ||
727 | { | ||
728 | return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize); | ||
729 | } | ||
730 | |||
731 | /* | ||
732 | * Adjusting quota limits. | ||
733 | * the xfs_disk_dquot_t: sizeof(struct xfs_disk_dquot) | ||
734 | */ | ||
735 | STATIC uint | ||
736 | xfs_calc_qm_setqlim_reservation( | ||
737 | struct xfs_mount *mp) | ||
738 | { | ||
739 | return xfs_calc_buf_res(1, sizeof(struct xfs_disk_dquot)); | ||
740 | } | ||
741 | |||
742 | /* | ||
743 | * Allocating quota on disk if needed. | ||
744 | * the write transaction log space for quota file extent allocation | ||
745 | * the unit of quota allocation: one system block size | ||
746 | */ | ||
747 | STATIC uint | ||
748 | xfs_calc_qm_dqalloc_reservation( | ||
749 | struct xfs_mount *mp) | ||
750 | { | ||
751 | return xfs_calc_write_reservation(mp) + | ||
752 | xfs_calc_buf_res(1, | ||
753 | XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) - 1); | ||
754 | } | ||
755 | |||
756 | /* | ||
757 | * Turning off quotas. | ||
758 | * the xfs_qoff_logitem_t: sizeof(struct xfs_qoff_logitem) * 2 | ||
759 | * the superblock for the quota flags: sector size | ||
760 | */ | ||
761 | STATIC uint | ||
762 | xfs_calc_qm_quotaoff_reservation( | ||
763 | struct xfs_mount *mp) | ||
764 | { | ||
765 | return sizeof(struct xfs_qoff_logitem) * 2 + | ||
766 | xfs_calc_buf_res(1, mp->m_sb.sb_sectsize); | ||
767 | } | ||
768 | |||
769 | /* | ||
770 | * End of turning off quotas. | ||
771 | * the xfs_qoff_logitem_t: sizeof(struct xfs_qoff_logitem) * 2 | ||
772 | */ | ||
773 | STATIC uint | ||
774 | xfs_calc_qm_quotaoff_end_reservation( | ||
775 | struct xfs_mount *mp) | ||
776 | { | ||
777 | return sizeof(struct xfs_qoff_logitem) * 2; | ||
778 | } | ||
779 | |||
780 | /* | ||
781 | * Syncing the incore super block changes to disk. | ||
782 | * the super block to reflect the changes: sector size | ||
783 | */ | ||
784 | STATIC uint | ||
785 | xfs_calc_sb_reservation( | ||
786 | struct xfs_mount *mp) | ||
787 | { | ||
788 | return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize); | ||
789 | } | ||
790 | |||
791 | void | ||
792 | xfs_trans_resv_calc( | ||
793 | struct xfs_mount *mp, | ||
794 | struct xfs_trans_resv *resp) | ||
795 | { | ||
796 | /* | ||
797 | * The following transactions are logged in physical format and | ||
798 | * require a permanent reservation on space. | ||
799 | */ | ||
800 | resp->tr_write.tr_logres = xfs_calc_write_reservation(mp); | ||
801 | resp->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT; | ||
802 | resp->tr_write.tr_logflags |= XFS_TRANS_PERM_LOG_RES; | ||
803 | |||
804 | resp->tr_itruncate.tr_logres = xfs_calc_itruncate_reservation(mp); | ||
805 | resp->tr_itruncate.tr_logcount = XFS_ITRUNCATE_LOG_COUNT; | ||
806 | resp->tr_itruncate.tr_logflags |= XFS_TRANS_PERM_LOG_RES; | ||
807 | |||
808 | resp->tr_rename.tr_logres = xfs_calc_rename_reservation(mp); | ||
809 | resp->tr_rename.tr_logcount = XFS_RENAME_LOG_COUNT; | ||
810 | resp->tr_rename.tr_logflags |= XFS_TRANS_PERM_LOG_RES; | ||
811 | |||
812 | resp->tr_link.tr_logres = xfs_calc_link_reservation(mp); | ||
813 | resp->tr_link.tr_logcount = XFS_LINK_LOG_COUNT; | ||
814 | resp->tr_link.tr_logflags |= XFS_TRANS_PERM_LOG_RES; | ||
815 | |||
816 | resp->tr_remove.tr_logres = xfs_calc_remove_reservation(mp); | ||
817 | resp->tr_remove.tr_logcount = XFS_REMOVE_LOG_COUNT; | ||
818 | resp->tr_remove.tr_logflags |= XFS_TRANS_PERM_LOG_RES; | ||
819 | |||
820 | resp->tr_symlink.tr_logres = xfs_calc_symlink_reservation(mp); | ||
821 | resp->tr_symlink.tr_logcount = XFS_SYMLINK_LOG_COUNT; | ||
822 | resp->tr_symlink.tr_logflags |= XFS_TRANS_PERM_LOG_RES; | ||
823 | |||
824 | resp->tr_create.tr_logres = xfs_calc_create_reservation(mp); | ||
825 | resp->tr_create.tr_logcount = XFS_CREATE_LOG_COUNT; | ||
826 | resp->tr_create.tr_logflags |= XFS_TRANS_PERM_LOG_RES; | ||
827 | |||
828 | resp->tr_create_tmpfile.tr_logres = | ||
829 | xfs_calc_create_tmpfile_reservation(mp); | ||
830 | resp->tr_create_tmpfile.tr_logcount = XFS_CREATE_TMPFILE_LOG_COUNT; | ||
831 | resp->tr_create_tmpfile.tr_logflags |= XFS_TRANS_PERM_LOG_RES; | ||
832 | |||
833 | resp->tr_mkdir.tr_logres = xfs_calc_mkdir_reservation(mp); | ||
834 | resp->tr_mkdir.tr_logcount = XFS_MKDIR_LOG_COUNT; | ||
835 | resp->tr_mkdir.tr_logflags |= XFS_TRANS_PERM_LOG_RES; | ||
836 | |||
837 | resp->tr_ifree.tr_logres = xfs_calc_ifree_reservation(mp); | ||
838 | resp->tr_ifree.tr_logcount = XFS_INACTIVE_LOG_COUNT; | ||
839 | resp->tr_ifree.tr_logflags |= XFS_TRANS_PERM_LOG_RES; | ||
840 | |||
841 | resp->tr_addafork.tr_logres = xfs_calc_addafork_reservation(mp); | ||
842 | resp->tr_addafork.tr_logcount = XFS_ADDAFORK_LOG_COUNT; | ||
843 | resp->tr_addafork.tr_logflags |= XFS_TRANS_PERM_LOG_RES; | ||
844 | |||
845 | resp->tr_attrinval.tr_logres = xfs_calc_attrinval_reservation(mp); | ||
846 | resp->tr_attrinval.tr_logcount = XFS_ATTRINVAL_LOG_COUNT; | ||
847 | resp->tr_attrinval.tr_logflags |= XFS_TRANS_PERM_LOG_RES; | ||
848 | |||
849 | resp->tr_attrsetm.tr_logres = xfs_calc_attrsetm_reservation(mp); | ||
850 | resp->tr_attrsetm.tr_logcount = XFS_ATTRSET_LOG_COUNT; | ||
851 | resp->tr_attrsetm.tr_logflags |= XFS_TRANS_PERM_LOG_RES; | ||
852 | |||
853 | resp->tr_attrrm.tr_logres = xfs_calc_attrrm_reservation(mp); | ||
854 | resp->tr_attrrm.tr_logcount = XFS_ATTRRM_LOG_COUNT; | ||
855 | resp->tr_attrrm.tr_logflags |= XFS_TRANS_PERM_LOG_RES; | ||
856 | |||
857 | resp->tr_growrtalloc.tr_logres = xfs_calc_growrtalloc_reservation(mp); | ||
858 | resp->tr_growrtalloc.tr_logcount = XFS_DEFAULT_PERM_LOG_COUNT; | ||
859 | resp->tr_growrtalloc.tr_logflags |= XFS_TRANS_PERM_LOG_RES; | ||
860 | |||
861 | resp->tr_qm_dqalloc.tr_logres = xfs_calc_qm_dqalloc_reservation(mp); | ||
862 | resp->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT; | ||
863 | resp->tr_qm_dqalloc.tr_logflags |= XFS_TRANS_PERM_LOG_RES; | ||
864 | |||
865 | /* | ||
866 | * The following transactions are logged in logical format with | ||
867 | * a default log count. | ||
868 | */ | ||
869 | resp->tr_qm_sbchange.tr_logres = xfs_calc_qm_sbchange_reservation(mp); | ||
870 | resp->tr_qm_sbchange.tr_logcount = XFS_DEFAULT_LOG_COUNT; | ||
871 | |||
872 | resp->tr_qm_setqlim.tr_logres = xfs_calc_qm_setqlim_reservation(mp); | ||
873 | resp->tr_qm_setqlim.tr_logcount = XFS_DEFAULT_LOG_COUNT; | ||
874 | |||
875 | resp->tr_qm_quotaoff.tr_logres = xfs_calc_qm_quotaoff_reservation(mp); | ||
876 | resp->tr_qm_quotaoff.tr_logcount = XFS_DEFAULT_LOG_COUNT; | ||
877 | |||
878 | resp->tr_qm_equotaoff.tr_logres = | ||
879 | xfs_calc_qm_quotaoff_end_reservation(mp); | ||
880 | resp->tr_qm_equotaoff.tr_logcount = XFS_DEFAULT_LOG_COUNT; | ||
881 | |||
882 | resp->tr_sb.tr_logres = xfs_calc_sb_reservation(mp); | ||
883 | resp->tr_sb.tr_logcount = XFS_DEFAULT_LOG_COUNT; | ||
884 | |||
885 | /* The following transaction are logged in logical format */ | ||
886 | resp->tr_ichange.tr_logres = xfs_calc_ichange_reservation(mp); | ||
887 | resp->tr_growdata.tr_logres = xfs_calc_growdata_reservation(mp); | ||
888 | resp->tr_fsyncts.tr_logres = xfs_calc_swrite_reservation(mp); | ||
889 | resp->tr_writeid.tr_logres = xfs_calc_writeid_reservation(mp); | ||
890 | resp->tr_attrsetrt.tr_logres = xfs_calc_attrsetrt_reservation(mp); | ||
891 | resp->tr_clearagi.tr_logres = xfs_calc_clear_agi_bucket_reservation(mp); | ||
892 | resp->tr_growrtzero.tr_logres = xfs_calc_growrtzero_reservation(mp); | ||
893 | resp->tr_growrtfree.tr_logres = xfs_calc_growrtfree_reservation(mp); | ||
894 | } | ||
diff --git a/fs/xfs/libxfs/xfs_trans_resv.h b/fs/xfs/libxfs/xfs_trans_resv.h new file mode 100644 index 000000000000..1097d14cd583 --- /dev/null +++ b/fs/xfs/libxfs/xfs_trans_resv.h | |||
@@ -0,0 +1,117 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_TRANS_RESV_H__ | ||
19 | #define __XFS_TRANS_RESV_H__ | ||
20 | |||
21 | struct xfs_mount; | ||
22 | |||
23 | /* | ||
24 | * structure for maintaining pre-calculated transaction reservations. | ||
25 | */ | ||
26 | struct xfs_trans_res { | ||
27 | uint tr_logres; /* log space unit in bytes per log ticket */ | ||
28 | int tr_logcount; /* number of log operations per log ticket */ | ||
29 | int tr_logflags; /* log flags, currently only used for indicating | ||
30 | * a reservation request is permanent or not */ | ||
31 | }; | ||
32 | |||
33 | struct xfs_trans_resv { | ||
34 | struct xfs_trans_res tr_write; /* extent alloc trans */ | ||
35 | struct xfs_trans_res tr_itruncate; /* truncate trans */ | ||
36 | struct xfs_trans_res tr_rename; /* rename trans */ | ||
37 | struct xfs_trans_res tr_link; /* link trans */ | ||
38 | struct xfs_trans_res tr_remove; /* unlink trans */ | ||
39 | struct xfs_trans_res tr_symlink; /* symlink trans */ | ||
40 | struct xfs_trans_res tr_create; /* create trans */ | ||
41 | struct xfs_trans_res tr_create_tmpfile; /* create O_TMPFILE trans */ | ||
42 | struct xfs_trans_res tr_mkdir; /* mkdir trans */ | ||
43 | struct xfs_trans_res tr_ifree; /* inode free trans */ | ||
44 | struct xfs_trans_res tr_ichange; /* inode update trans */ | ||
45 | struct xfs_trans_res tr_growdata; /* fs data section grow trans */ | ||
46 | struct xfs_trans_res tr_addafork; /* add inode attr fork trans */ | ||
47 | struct xfs_trans_res tr_writeid; /* write setuid/setgid file */ | ||
48 | struct xfs_trans_res tr_attrinval; /* attr fork buffer | ||
49 | * invalidation */ | ||
50 | struct xfs_trans_res tr_attrsetm; /* set/create an attribute at | ||
51 | * mount time */ | ||
52 | struct xfs_trans_res tr_attrsetrt; /* set/create an attribute at | ||
53 | * runtime */ | ||
54 | struct xfs_trans_res tr_attrrm; /* remove an attribute */ | ||
55 | struct xfs_trans_res tr_clearagi; /* clear agi unlinked bucket */ | ||
56 | struct xfs_trans_res tr_growrtalloc; /* grow realtime allocations */ | ||
57 | struct xfs_trans_res tr_growrtzero; /* grow realtime zeroing */ | ||
58 | struct xfs_trans_res tr_growrtfree; /* grow realtime freeing */ | ||
59 | struct xfs_trans_res tr_qm_sbchange; /* change quota flags */ | ||
60 | struct xfs_trans_res tr_qm_setqlim; /* adjust quota limits */ | ||
61 | struct xfs_trans_res tr_qm_dqalloc; /* allocate quota on disk */ | ||
62 | struct xfs_trans_res tr_qm_quotaoff; /* turn quota off */ | ||
63 | struct xfs_trans_res tr_qm_equotaoff;/* end of turn quota off */ | ||
64 | struct xfs_trans_res tr_sb; /* modify superblock */ | ||
65 | struct xfs_trans_res tr_fsyncts; /* update timestamps on fsync */ | ||
66 | }; | ||
67 | |||
68 | /* shorthand way of accessing reservation structure */ | ||
69 | #define M_RES(mp) (&(mp)->m_resv) | ||
70 | |||
71 | /* | ||
72 | * Per-extent log reservation for the allocation btree changes | ||
73 | * involved in freeing or allocating an extent. | ||
74 | * 2 trees * (2 blocks/level * max depth - 1) * block size | ||
75 | */ | ||
76 | #define XFS_ALLOCFREE_LOG_RES(mp,nx) \ | ||
77 | ((nx) * (2 * XFS_FSB_TO_B((mp), 2 * XFS_AG_MAXLEVELS(mp) - 1))) | ||
78 | #define XFS_ALLOCFREE_LOG_COUNT(mp,nx) \ | ||
79 | ((nx) * (2 * (2 * XFS_AG_MAXLEVELS(mp) - 1))) | ||
80 | |||
81 | /* | ||
82 | * Per-directory log reservation for any directory change. | ||
83 | * dir blocks: (1 btree block per level + data block + free block) * dblock size | ||
84 | * bmap btree: (levels + 2) * max depth * block size | ||
85 | * v2 directory blocks can be fragmented below the dirblksize down to the fsb | ||
86 | * size, so account for that in the DAENTER macros. | ||
87 | */ | ||
88 | #define XFS_DIROP_LOG_RES(mp) \ | ||
89 | (XFS_FSB_TO_B(mp, XFS_DAENTER_BLOCKS(mp, XFS_DATA_FORK)) + \ | ||
90 | (XFS_FSB_TO_B(mp, XFS_DAENTER_BMAPS(mp, XFS_DATA_FORK) + 1))) | ||
91 | #define XFS_DIROP_LOG_COUNT(mp) \ | ||
92 | (XFS_DAENTER_BLOCKS(mp, XFS_DATA_FORK) + \ | ||
93 | XFS_DAENTER_BMAPS(mp, XFS_DATA_FORK) + 1) | ||
94 | |||
95 | /* | ||
96 | * Various log count values. | ||
97 | */ | ||
98 | #define XFS_DEFAULT_LOG_COUNT 1 | ||
99 | #define XFS_DEFAULT_PERM_LOG_COUNT 2 | ||
100 | #define XFS_ITRUNCATE_LOG_COUNT 2 | ||
101 | #define XFS_INACTIVE_LOG_COUNT 2 | ||
102 | #define XFS_CREATE_LOG_COUNT 2 | ||
103 | #define XFS_CREATE_TMPFILE_LOG_COUNT 2 | ||
104 | #define XFS_MKDIR_LOG_COUNT 3 | ||
105 | #define XFS_SYMLINK_LOG_COUNT 3 | ||
106 | #define XFS_REMOVE_LOG_COUNT 2 | ||
107 | #define XFS_LINK_LOG_COUNT 2 | ||
108 | #define XFS_RENAME_LOG_COUNT 2 | ||
109 | #define XFS_WRITE_LOG_COUNT 2 | ||
110 | #define XFS_ADDAFORK_LOG_COUNT 2 | ||
111 | #define XFS_ATTRINVAL_LOG_COUNT 1 | ||
112 | #define XFS_ATTRSET_LOG_COUNT 3 | ||
113 | #define XFS_ATTRRM_LOG_COUNT 3 | ||
114 | |||
115 | void xfs_trans_resv_calc(struct xfs_mount *mp, struct xfs_trans_resv *resp); | ||
116 | |||
117 | #endif /* __XFS_TRANS_RESV_H__ */ | ||
diff --git a/fs/xfs/libxfs/xfs_trans_space.h b/fs/xfs/libxfs/xfs_trans_space.h new file mode 100644 index 000000000000..bf9c4579334d --- /dev/null +++ b/fs/xfs/libxfs/xfs_trans_space.h | |||
@@ -0,0 +1,92 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_TRANS_SPACE_H__ | ||
19 | #define __XFS_TRANS_SPACE_H__ | ||
20 | |||
21 | /* | ||
22 | * Components of space reservations. | ||
23 | */ | ||
24 | #define XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp) \ | ||
25 | (((mp)->m_alloc_mxr[0]) - ((mp)->m_alloc_mnr[0])) | ||
26 | #define XFS_EXTENTADD_SPACE_RES(mp,w) (XFS_BM_MAXLEVELS(mp,w) - 1) | ||
27 | #define XFS_NEXTENTADD_SPACE_RES(mp,b,w)\ | ||
28 | (((b + XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp) - 1) / \ | ||
29 | XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp)) * \ | ||
30 | XFS_EXTENTADD_SPACE_RES(mp,w)) | ||
31 | #define XFS_DAENTER_1B(mp,w) \ | ||
32 | ((w) == XFS_DATA_FORK ? (mp)->m_dir_geo->fsbcount : 1) | ||
33 | #define XFS_DAENTER_DBS(mp,w) \ | ||
34 | (XFS_DA_NODE_MAXDEPTH + (((w) == XFS_DATA_FORK) ? 2 : 0)) | ||
35 | #define XFS_DAENTER_BLOCKS(mp,w) \ | ||
36 | (XFS_DAENTER_1B(mp,w) * XFS_DAENTER_DBS(mp,w)) | ||
37 | #define XFS_DAENTER_BMAP1B(mp,w) \ | ||
38 | XFS_NEXTENTADD_SPACE_RES(mp, XFS_DAENTER_1B(mp, w), w) | ||
39 | #define XFS_DAENTER_BMAPS(mp,w) \ | ||
40 | (XFS_DAENTER_DBS(mp,w) * XFS_DAENTER_BMAP1B(mp,w)) | ||
41 | #define XFS_DAENTER_SPACE_RES(mp,w) \ | ||
42 | (XFS_DAENTER_BLOCKS(mp,w) + XFS_DAENTER_BMAPS(mp,w)) | ||
43 | #define XFS_DAREMOVE_SPACE_RES(mp,w) XFS_DAENTER_BMAPS(mp,w) | ||
44 | #define XFS_DIRENTER_MAX_SPLIT(mp,nl) 1 | ||
45 | #define XFS_DIRENTER_SPACE_RES(mp,nl) \ | ||
46 | (XFS_DAENTER_SPACE_RES(mp, XFS_DATA_FORK) * \ | ||
47 | XFS_DIRENTER_MAX_SPLIT(mp,nl)) | ||
48 | #define XFS_DIRREMOVE_SPACE_RES(mp) \ | ||
49 | XFS_DAREMOVE_SPACE_RES(mp, XFS_DATA_FORK) | ||
50 | #define XFS_IALLOC_SPACE_RES(mp) \ | ||
51 | ((mp)->m_ialloc_blks + \ | ||
52 | (xfs_sb_version_hasfinobt(&mp->m_sb) ? 2 : 1 * \ | ||
53 | ((mp)->m_in_maxlevels - 1))) | ||
54 | |||
55 | /* | ||
56 | * Space reservation values for various transactions. | ||
57 | */ | ||
58 | #define XFS_ADDAFORK_SPACE_RES(mp) \ | ||
59 | ((mp)->m_dir_geo->fsbcount + XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK)) | ||
60 | #define XFS_ATTRRM_SPACE_RES(mp) \ | ||
61 | XFS_DAREMOVE_SPACE_RES(mp, XFS_ATTR_FORK) | ||
62 | /* This macro is not used - see inline code in xfs_attr_set */ | ||
63 | #define XFS_ATTRSET_SPACE_RES(mp, v) \ | ||
64 | (XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK) + XFS_B_TO_FSB(mp, v)) | ||
65 | #define XFS_CREATE_SPACE_RES(mp,nl) \ | ||
66 | (XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl)) | ||
67 | #define XFS_DIOSTRAT_SPACE_RES(mp, v) \ | ||
68 | (XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK) + (v)) | ||
69 | #define XFS_GROWFS_SPACE_RES(mp) \ | ||
70 | (2 * XFS_AG_MAXLEVELS(mp)) | ||
71 | #define XFS_GROWFSRT_SPACE_RES(mp,b) \ | ||
72 | ((b) + XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK)) | ||
73 | #define XFS_LINK_SPACE_RES(mp,nl) \ | ||
74 | XFS_DIRENTER_SPACE_RES(mp,nl) | ||
75 | #define XFS_MKDIR_SPACE_RES(mp,nl) \ | ||
76 | (XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl)) | ||
77 | #define XFS_QM_DQALLOC_SPACE_RES(mp) \ | ||
78 | (XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK) + \ | ||
79 | XFS_DQUOT_CLUSTER_SIZE_FSB) | ||
80 | #define XFS_QM_QINOCREATE_SPACE_RES(mp) \ | ||
81 | XFS_IALLOC_SPACE_RES(mp) | ||
82 | #define XFS_REMOVE_SPACE_RES(mp) \ | ||
83 | XFS_DIRREMOVE_SPACE_RES(mp) | ||
84 | #define XFS_RENAME_SPACE_RES(mp,nl) \ | ||
85 | (XFS_DIRREMOVE_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl)) | ||
86 | #define XFS_SYMLINK_SPACE_RES(mp,nl,b) \ | ||
87 | (XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl) + (b)) | ||
88 | #define XFS_IFREE_SPACE_RES(mp) \ | ||
89 | (xfs_sb_version_hasfinobt(&mp->m_sb) ? (mp)->m_in_maxlevels : 0) | ||
90 | |||
91 | |||
92 | #endif /* __XFS_TRANS_SPACE_H__ */ | ||