diff options
author | Dave Chinner <david@fromorbit.com> | 2014-05-14 19:37:44 -0400 |
---|---|---|
committer | Dave Chinner <david@fromorbit.com> | 2014-05-14 19:37:44 -0400 |
commit | b76769294ba400415fc44038c21cc2df86f9a28b (patch) | |
tree | a1ca6152391c86ae024e09d69e7b0ad75a5c61d5 | |
parent | 232c2f5c65dd80055e7163a5c82e3816119330e6 (diff) | |
parent | 53801fd97ae000793f51187b122b9475102199a8 (diff) |
Merge branch 'xfs-free-inode-btree' into for-next
-rw-r--r-- | fs/xfs/xfs_ag.h | 36 | ||||
-rw-r--r-- | fs/xfs/xfs_btree.c | 6 | ||||
-rw-r--r-- | fs/xfs/xfs_btree.h | 3 | ||||
-rw-r--r-- | fs/xfs/xfs_format.h | 14 | ||||
-rw-r--r-- | fs/xfs/xfs_fs.h | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_fsops.c | 36 | ||||
-rw-r--r-- | fs/xfs/xfs_ialloc.c | 695 | ||||
-rw-r--r-- | fs/xfs/xfs_ialloc_btree.c | 68 | ||||
-rw-r--r-- | fs/xfs/xfs_ialloc_btree.h | 3 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.c | 28 | ||||
-rw-r--r-- | fs/xfs/xfs_itable.c | 6 | ||||
-rw-r--r-- | fs/xfs/xfs_log_recover.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_sb.h | 10 | ||||
-rw-r--r-- | fs/xfs/xfs_stats.c | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_stats.h | 18 | ||||
-rw-r--r-- | fs/xfs/xfs_trans_resv.c | 53 | ||||
-rw-r--r-- | fs/xfs/xfs_trans_space.h | 7 | ||||
-rw-r--r-- | fs/xfs/xfs_types.h | 2 |
18 files changed, 852 insertions, 137 deletions
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index 0fdd4109c624..6e247a99f5db 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h | |||
@@ -160,30 +160,38 @@ typedef struct xfs_agi { | |||
160 | * still being referenced. | 160 | * still being referenced. |
161 | */ | 161 | */ |
162 | __be32 agi_unlinked[XFS_AGI_UNLINKED_BUCKETS]; | 162 | __be32 agi_unlinked[XFS_AGI_UNLINKED_BUCKETS]; |
163 | 163 | /* | |
164 | * This marks the end of logging region 1 and start of logging region 2. | ||
165 | */ | ||
164 | uuid_t agi_uuid; /* uuid of filesystem */ | 166 | uuid_t agi_uuid; /* uuid of filesystem */ |
165 | __be32 agi_crc; /* crc of agi sector */ | 167 | __be32 agi_crc; /* crc of agi sector */ |
166 | __be32 agi_pad32; | 168 | __be32 agi_pad32; |
167 | __be64 agi_lsn; /* last write sequence */ | 169 | __be64 agi_lsn; /* last write sequence */ |
168 | 170 | ||
171 | __be32 agi_free_root; /* root of the free inode btree */ | ||
172 | __be32 agi_free_level;/* levels in free inode btree */ | ||
173 | |||
169 | /* structure must be padded to 64 bit alignment */ | 174 | /* structure must be padded to 64 bit alignment */ |
170 | } xfs_agi_t; | 175 | } xfs_agi_t; |
171 | 176 | ||
172 | #define XFS_AGI_CRC_OFF offsetof(struct xfs_agi, agi_crc) | 177 | #define XFS_AGI_CRC_OFF offsetof(struct xfs_agi, agi_crc) |
173 | 178 | ||
174 | #define XFS_AGI_MAGICNUM 0x00000001 | 179 | #define XFS_AGI_MAGICNUM (1 << 0) |
175 | #define XFS_AGI_VERSIONNUM 0x00000002 | 180 | #define XFS_AGI_VERSIONNUM (1 << 1) |
176 | #define XFS_AGI_SEQNO 0x00000004 | 181 | #define XFS_AGI_SEQNO (1 << 2) |
177 | #define XFS_AGI_LENGTH 0x00000008 | 182 | #define XFS_AGI_LENGTH (1 << 3) |
178 | #define XFS_AGI_COUNT 0x00000010 | 183 | #define XFS_AGI_COUNT (1 << 4) |
179 | #define XFS_AGI_ROOT 0x00000020 | 184 | #define XFS_AGI_ROOT (1 << 5) |
180 | #define XFS_AGI_LEVEL 0x00000040 | 185 | #define XFS_AGI_LEVEL (1 << 6) |
181 | #define XFS_AGI_FREECOUNT 0x00000080 | 186 | #define XFS_AGI_FREECOUNT (1 << 7) |
182 | #define XFS_AGI_NEWINO 0x00000100 | 187 | #define XFS_AGI_NEWINO (1 << 8) |
183 | #define XFS_AGI_DIRINO 0x00000200 | 188 | #define XFS_AGI_DIRINO (1 << 9) |
184 | #define XFS_AGI_UNLINKED 0x00000400 | 189 | #define XFS_AGI_UNLINKED (1 << 10) |
185 | #define XFS_AGI_NUM_BITS 11 | 190 | #define XFS_AGI_NUM_BITS_R1 11 /* end of the 1st agi logging region */ |
186 | #define XFS_AGI_ALL_BITS ((1 << XFS_AGI_NUM_BITS) - 1) | 191 | #define XFS_AGI_ALL_BITS_R1 ((1 << XFS_AGI_NUM_BITS_R1) - 1) |
192 | #define XFS_AGI_FREE_ROOT (1 << 11) | ||
193 | #define XFS_AGI_FREE_LEVEL (1 << 12) | ||
194 | #define XFS_AGI_NUM_BITS_R2 13 | ||
187 | 195 | ||
188 | /* disk block (xfs_daddr_t) in the AG */ | 196 | /* disk block (xfs_daddr_t) in the AG */ |
189 | #define XFS_AGI_DADDR(mp) ((xfs_daddr_t)(2 << (mp)->m_sectbb_log)) | 197 | #define XFS_AGI_DADDR(mp) ((xfs_daddr_t)(2 << (mp)->m_sectbb_log)) |
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index c13d650fdb99..182bac2bb276 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c | |||
@@ -43,9 +43,10 @@ kmem_zone_t *xfs_btree_cur_zone; | |||
43 | * Btree magic numbers. | 43 | * Btree magic numbers. |
44 | */ | 44 | */ |
45 | static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = { | 45 | static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = { |
46 | { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC }, | 46 | { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC, |
47 | XFS_FIBT_MAGIC }, | ||
47 | { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, | 48 | { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, |
48 | XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC } | 49 | XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC } |
49 | }; | 50 | }; |
50 | #define xfs_btree_magic(cur) \ | 51 | #define xfs_btree_magic(cur) \ |
51 | xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum] | 52 | xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum] |
@@ -1115,6 +1116,7 @@ xfs_btree_set_refs( | |||
1115 | xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF); | 1116 | xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF); |
1116 | break; | 1117 | break; |
1117 | case XFS_BTNUM_INO: | 1118 | case XFS_BTNUM_INO: |
1119 | case XFS_BTNUM_FINO: | ||
1118 | xfs_buf_set_ref(bp, XFS_INO_BTREE_REF); | 1120 | xfs_buf_set_ref(bp, XFS_INO_BTREE_REF); |
1119 | break; | 1121 | break; |
1120 | case XFS_BTNUM_BMAP: | 1122 | case XFS_BTNUM_BMAP: |
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 875f6876ff48..a04b69422f67 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h | |||
@@ -62,6 +62,7 @@ union xfs_btree_rec { | |||
62 | #define XFS_BTNUM_CNT ((xfs_btnum_t)XFS_BTNUM_CNTi) | 62 | #define XFS_BTNUM_CNT ((xfs_btnum_t)XFS_BTNUM_CNTi) |
63 | #define XFS_BTNUM_BMAP ((xfs_btnum_t)XFS_BTNUM_BMAPi) | 63 | #define XFS_BTNUM_BMAP ((xfs_btnum_t)XFS_BTNUM_BMAPi) |
64 | #define XFS_BTNUM_INO ((xfs_btnum_t)XFS_BTNUM_INOi) | 64 | #define XFS_BTNUM_INO ((xfs_btnum_t)XFS_BTNUM_INOi) |
65 | #define XFS_BTNUM_FINO ((xfs_btnum_t)XFS_BTNUM_FINOi) | ||
65 | 66 | ||
66 | /* | 67 | /* |
67 | * For logging record fields. | 68 | * For logging record fields. |
@@ -92,6 +93,7 @@ do { \ | |||
92 | case XFS_BTNUM_CNT: __XFS_BTREE_STATS_INC(abtc, stat); break; \ | 93 | case XFS_BTNUM_CNT: __XFS_BTREE_STATS_INC(abtc, stat); break; \ |
93 | case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(bmbt, stat); break; \ | 94 | case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(bmbt, stat); break; \ |
94 | case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(ibt, stat); break; \ | 95 | case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(ibt, stat); break; \ |
96 | case XFS_BTNUM_FINO: __XFS_BTREE_STATS_INC(fibt, stat); break; \ | ||
95 | case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ | 97 | case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ |
96 | } \ | 98 | } \ |
97 | } while (0) | 99 | } while (0) |
@@ -105,6 +107,7 @@ do { \ | |||
105 | case XFS_BTNUM_CNT: __XFS_BTREE_STATS_ADD(abtc, stat, val); break; \ | 107 | case XFS_BTNUM_CNT: __XFS_BTREE_STATS_ADD(abtc, stat, val); break; \ |
106 | case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_ADD(bmbt, stat, val); break; \ | 108 | case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_ADD(bmbt, stat, val); break; \ |
107 | case XFS_BTNUM_INO: __XFS_BTREE_STATS_ADD(ibt, stat, val); break; \ | 109 | case XFS_BTNUM_INO: __XFS_BTREE_STATS_ADD(ibt, stat, val); break; \ |
110 | case XFS_BTNUM_FINO: __XFS_BTREE_STATS_ADD(fibt, stat, val); break; \ | ||
108 | case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ | 111 | case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ |
109 | } \ | 112 | } \ |
110 | } while (0) | 113 | } while (0) |
diff --git a/fs/xfs/xfs_format.h b/fs/xfs/xfs_format.h index 9898f31d05d8..34d85aca3058 100644 --- a/fs/xfs/xfs_format.h +++ b/fs/xfs/xfs_format.h | |||
@@ -202,6 +202,8 @@ typedef __be32 xfs_alloc_ptr_t; | |||
202 | */ | 202 | */ |
203 | #define XFS_IBT_MAGIC 0x49414254 /* 'IABT' */ | 203 | #define XFS_IBT_MAGIC 0x49414254 /* 'IABT' */ |
204 | #define XFS_IBT_CRC_MAGIC 0x49414233 /* 'IAB3' */ | 204 | #define XFS_IBT_CRC_MAGIC 0x49414233 /* 'IAB3' */ |
205 | #define XFS_FIBT_MAGIC 0x46494254 /* 'FIBT' */ | ||
206 | #define XFS_FIBT_CRC_MAGIC 0x46494233 /* 'FIB3' */ | ||
205 | 207 | ||
206 | typedef __uint64_t xfs_inofree_t; | 208 | typedef __uint64_t xfs_inofree_t; |
207 | #define XFS_INODES_PER_CHUNK (NBBY * sizeof(xfs_inofree_t)) | 209 | #define XFS_INODES_PER_CHUNK (NBBY * sizeof(xfs_inofree_t)) |
@@ -244,7 +246,17 @@ typedef __be32 xfs_inobt_ptr_t; | |||
244 | * block numbers in the AG. | 246 | * block numbers in the AG. |
245 | */ | 247 | */ |
246 | #define XFS_IBT_BLOCK(mp) ((xfs_agblock_t)(XFS_CNT_BLOCK(mp) + 1)) | 248 | #define XFS_IBT_BLOCK(mp) ((xfs_agblock_t)(XFS_CNT_BLOCK(mp) + 1)) |
247 | #define XFS_PREALLOC_BLOCKS(mp) ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1)) | 249 | #define XFS_FIBT_BLOCK(mp) ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1)) |
250 | |||
251 | /* | ||
252 | * The first data block of an AG depends on whether the filesystem was formatted | ||
253 | * with the finobt feature. If so, account for the finobt reserved root btree | ||
254 | * block. | ||
255 | */ | ||
256 | #define XFS_PREALLOC_BLOCKS(mp) \ | ||
257 | (xfs_sb_version_hasfinobt(&((mp)->m_sb)) ? \ | ||
258 | XFS_FIBT_BLOCK(mp) + 1 : \ | ||
259 | XFS_IBT_BLOCK(mp) + 1) | ||
248 | 260 | ||
249 | 261 | ||
250 | 262 | ||
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h index c5fc116dfaa3..d34703dbcb42 100644 --- a/fs/xfs/xfs_fs.h +++ b/fs/xfs/xfs_fs.h | |||
@@ -238,6 +238,7 @@ typedef struct xfs_fsop_resblks { | |||
238 | #define XFS_FSOP_GEOM_FLAGS_LAZYSB 0x4000 /* lazy superblock counters */ | 238 | #define XFS_FSOP_GEOM_FLAGS_LAZYSB 0x4000 /* lazy superblock counters */ |
239 | #define XFS_FSOP_GEOM_FLAGS_V5SB 0x8000 /* version 5 superblock */ | 239 | #define XFS_FSOP_GEOM_FLAGS_V5SB 0x8000 /* version 5 superblock */ |
240 | #define XFS_FSOP_GEOM_FLAGS_FTYPE 0x10000 /* inode directory types */ | 240 | #define XFS_FSOP_GEOM_FLAGS_FTYPE 0x10000 /* inode directory types */ |
241 | #define XFS_FSOP_GEOM_FLAGS_FINOBT 0x20000 /* free inode btree */ | ||
241 | 242 | ||
242 | /* | 243 | /* |
243 | * Minimum and maximum sizes need for growth checks. | 244 | * Minimum and maximum sizes need for growth checks. |
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 02fb943cbf22..3445ead7c1fc 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c | |||
@@ -104,7 +104,9 @@ xfs_fs_geometry( | |||
104 | (xfs_sb_version_hascrc(&mp->m_sb) ? | 104 | (xfs_sb_version_hascrc(&mp->m_sb) ? |
105 | XFS_FSOP_GEOM_FLAGS_V5SB : 0) | | 105 | XFS_FSOP_GEOM_FLAGS_V5SB : 0) | |
106 | (xfs_sb_version_hasftype(&mp->m_sb) ? | 106 | (xfs_sb_version_hasftype(&mp->m_sb) ? |
107 | XFS_FSOP_GEOM_FLAGS_FTYPE : 0); | 107 | XFS_FSOP_GEOM_FLAGS_FTYPE : 0) | |
108 | (xfs_sb_version_hasfinobt(&mp->m_sb) ? | ||
109 | XFS_FSOP_GEOM_FLAGS_FINOBT : 0); | ||
108 | geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ? | 110 | geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ? |
109 | mp->m_sb.sb_logsectsize : BBSIZE; | 111 | mp->m_sb.sb_logsectsize : BBSIZE; |
110 | geo->rtsectsize = mp->m_sb.sb_blocksize; | 112 | geo->rtsectsize = mp->m_sb.sb_blocksize; |
@@ -316,6 +318,10 @@ xfs_growfs_data_private( | |||
316 | agi->agi_dirino = cpu_to_be32(NULLAGINO); | 318 | agi->agi_dirino = cpu_to_be32(NULLAGINO); |
317 | if (xfs_sb_version_hascrc(&mp->m_sb)) | 319 | if (xfs_sb_version_hascrc(&mp->m_sb)) |
318 | uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_uuid); | 320 | uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_uuid); |
321 | if (xfs_sb_version_hasfinobt(&mp->m_sb)) { | ||
322 | agi->agi_free_root = cpu_to_be32(XFS_FIBT_BLOCK(mp)); | ||
323 | agi->agi_free_level = cpu_to_be32(1); | ||
324 | } | ||
319 | for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) | 325 | for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) |
320 | agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); | 326 | agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); |
321 | 327 | ||
@@ -407,6 +413,34 @@ xfs_growfs_data_private( | |||
407 | xfs_buf_relse(bp); | 413 | xfs_buf_relse(bp); |
408 | if (error) | 414 | if (error) |
409 | goto error0; | 415 | goto error0; |
416 | |||
417 | /* | ||
418 | * FINO btree root block | ||
419 | */ | ||
420 | if (xfs_sb_version_hasfinobt(&mp->m_sb)) { | ||
421 | bp = xfs_growfs_get_hdr_buf(mp, | ||
422 | XFS_AGB_TO_DADDR(mp, agno, XFS_FIBT_BLOCK(mp)), | ||
423 | BTOBB(mp->m_sb.sb_blocksize), 0, | ||
424 | &xfs_inobt_buf_ops); | ||
425 | if (!bp) { | ||
426 | error = ENOMEM; | ||
427 | goto error0; | ||
428 | } | ||
429 | |||
430 | if (xfs_sb_version_hascrc(&mp->m_sb)) | ||
431 | xfs_btree_init_block(mp, bp, XFS_FIBT_CRC_MAGIC, | ||
432 | 0, 0, agno, | ||
433 | XFS_BTREE_CRC_BLOCKS); | ||
434 | else | ||
435 | xfs_btree_init_block(mp, bp, XFS_FIBT_MAGIC, 0, | ||
436 | 0, agno, 0); | ||
437 | |||
438 | error = xfs_bwrite(bp); | ||
439 | xfs_buf_relse(bp); | ||
440 | if (error) | ||
441 | goto error0; | ||
442 | } | ||
443 | |||
410 | } | 444 | } |
411 | xfs_trans_agblocks_delta(tp, nfree); | 445 | xfs_trans_agblocks_delta(tp, nfree); |
412 | /* | 446 | /* |
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 8f711db61a0c..6ac0c2986c32 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c | |||
@@ -112,6 +112,66 @@ xfs_inobt_get_rec( | |||
112 | } | 112 | } |
113 | 113 | ||
114 | /* | 114 | /* |
115 | * Insert a single inobt record. Cursor must already point to desired location. | ||
116 | */ | ||
117 | STATIC int | ||
118 | xfs_inobt_insert_rec( | ||
119 | struct xfs_btree_cur *cur, | ||
120 | __int32_t freecount, | ||
121 | xfs_inofree_t free, | ||
122 | int *stat) | ||
123 | { | ||
124 | cur->bc_rec.i.ir_freecount = freecount; | ||
125 | cur->bc_rec.i.ir_free = free; | ||
126 | return xfs_btree_insert(cur, stat); | ||
127 | } | ||
128 | |||
129 | /* | ||
130 | * Insert records describing a newly allocated inode chunk into the inobt. | ||
131 | */ | ||
132 | STATIC int | ||
133 | xfs_inobt_insert( | ||
134 | struct xfs_mount *mp, | ||
135 | struct xfs_trans *tp, | ||
136 | struct xfs_buf *agbp, | ||
137 | xfs_agino_t newino, | ||
138 | xfs_agino_t newlen, | ||
139 | xfs_btnum_t btnum) | ||
140 | { | ||
141 | struct xfs_btree_cur *cur; | ||
142 | struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); | ||
143 | xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); | ||
144 | xfs_agino_t thisino; | ||
145 | int i; | ||
146 | int error; | ||
147 | |||
148 | cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, btnum); | ||
149 | |||
150 | for (thisino = newino; | ||
151 | thisino < newino + newlen; | ||
152 | thisino += XFS_INODES_PER_CHUNK) { | ||
153 | error = xfs_inobt_lookup(cur, thisino, XFS_LOOKUP_EQ, &i); | ||
154 | if (error) { | ||
155 | xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); | ||
156 | return error; | ||
157 | } | ||
158 | ASSERT(i == 0); | ||
159 | |||
160 | error = xfs_inobt_insert_rec(cur, XFS_INODES_PER_CHUNK, | ||
161 | XFS_INOBT_ALL_FREE, &i); | ||
162 | if (error) { | ||
163 | xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); | ||
164 | return error; | ||
165 | } | ||
166 | ASSERT(i == 1); | ||
167 | } | ||
168 | |||
169 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); | ||
170 | |||
171 | return 0; | ||
172 | } | ||
173 | |||
174 | /* | ||
115 | * Verify that the number of free inodes in the AGI is correct. | 175 | * Verify that the number of free inodes in the AGI is correct. |
116 | */ | 176 | */ |
117 | #ifdef DEBUG | 177 | #ifdef DEBUG |
@@ -303,13 +363,10 @@ xfs_ialloc_ag_alloc( | |||
303 | { | 363 | { |
304 | xfs_agi_t *agi; /* allocation group header */ | 364 | xfs_agi_t *agi; /* allocation group header */ |
305 | xfs_alloc_arg_t args; /* allocation argument structure */ | 365 | xfs_alloc_arg_t args; /* allocation argument structure */ |
306 | xfs_btree_cur_t *cur; /* inode btree cursor */ | ||
307 | xfs_agnumber_t agno; | 366 | xfs_agnumber_t agno; |
308 | int error; | 367 | int error; |
309 | int i; | ||
310 | xfs_agino_t newino; /* new first inode's number */ | 368 | xfs_agino_t newino; /* new first inode's number */ |
311 | xfs_agino_t newlen; /* new number of inodes */ | 369 | xfs_agino_t newlen; /* new number of inodes */ |
312 | xfs_agino_t thisino; /* current inode number, for loop */ | ||
313 | int isaligned = 0; /* inode allocation at stripe unit */ | 370 | int isaligned = 0; /* inode allocation at stripe unit */ |
314 | /* boundary */ | 371 | /* boundary */ |
315 | struct xfs_perag *pag; | 372 | struct xfs_perag *pag; |
@@ -459,29 +516,19 @@ xfs_ialloc_ag_alloc( | |||
459 | agi->agi_newino = cpu_to_be32(newino); | 516 | agi->agi_newino = cpu_to_be32(newino); |
460 | 517 | ||
461 | /* | 518 | /* |
462 | * Insert records describing the new inode chunk into the btree. | 519 | * Insert records describing the new inode chunk into the btrees. |
463 | */ | 520 | */ |
464 | cur = xfs_inobt_init_cursor(args.mp, tp, agbp, agno); | 521 | error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen, |
465 | for (thisino = newino; | 522 | XFS_BTNUM_INO); |
466 | thisino < newino + newlen; | 523 | if (error) |
467 | thisino += XFS_INODES_PER_CHUNK) { | 524 | return error; |
468 | cur->bc_rec.i.ir_startino = thisino; | 525 | |
469 | cur->bc_rec.i.ir_freecount = XFS_INODES_PER_CHUNK; | 526 | if (xfs_sb_version_hasfinobt(&args.mp->m_sb)) { |
470 | cur->bc_rec.i.ir_free = XFS_INOBT_ALL_FREE; | 527 | error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen, |
471 | error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, &i); | 528 | XFS_BTNUM_FINO); |
472 | if (error) { | 529 | if (error) |
473 | xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); | ||
474 | return error; | ||
475 | } | ||
476 | ASSERT(i == 0); | ||
477 | error = xfs_btree_insert(cur, &i); | ||
478 | if (error) { | ||
479 | xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); | ||
480 | return error; | 530 | return error; |
481 | } | ||
482 | ASSERT(i == 1); | ||
483 | } | 531 | } |
484 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); | ||
485 | /* | 532 | /* |
486 | * Log allocation group header fields | 533 | * Log allocation group header fields |
487 | */ | 534 | */ |
@@ -675,13 +722,10 @@ xfs_ialloc_get_rec( | |||
675 | } | 722 | } |
676 | 723 | ||
677 | /* | 724 | /* |
678 | * Allocate an inode. | 725 | * Allocate an inode using the inobt-only algorithm. |
679 | * | ||
680 | * The caller selected an AG for us, and made sure that free inodes are | ||
681 | * available. | ||
682 | */ | 726 | */ |
683 | STATIC int | 727 | STATIC int |
684 | xfs_dialloc_ag( | 728 | xfs_dialloc_ag_inobt( |
685 | struct xfs_trans *tp, | 729 | struct xfs_trans *tp, |
686 | struct xfs_buf *agbp, | 730 | struct xfs_buf *agbp, |
687 | xfs_ino_t parent, | 731 | xfs_ino_t parent, |
@@ -707,7 +751,7 @@ xfs_dialloc_ag( | |||
707 | ASSERT(pag->pagi_freecount > 0); | 751 | ASSERT(pag->pagi_freecount > 0); |
708 | 752 | ||
709 | restart_pagno: | 753 | restart_pagno: |
710 | cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); | 754 | cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO); |
711 | /* | 755 | /* |
712 | * If pagino is 0 (this is the root inode allocation) use newino. | 756 | * If pagino is 0 (this is the root inode allocation) use newino. |
713 | * This must work because we've just allocated some. | 757 | * This must work because we've just allocated some. |
@@ -940,6 +984,294 @@ error0: | |||
940 | } | 984 | } |
941 | 985 | ||
942 | /* | 986 | /* |
987 | * Use the free inode btree to allocate an inode based on distance from the | ||
988 | * parent. Note that the provided cursor may be deleted and replaced. | ||
989 | */ | ||
990 | STATIC int | ||
991 | xfs_dialloc_ag_finobt_near( | ||
992 | xfs_agino_t pagino, | ||
993 | struct xfs_btree_cur **ocur, | ||
994 | struct xfs_inobt_rec_incore *rec) | ||
995 | { | ||
996 | struct xfs_btree_cur *lcur = *ocur; /* left search cursor */ | ||
997 | struct xfs_btree_cur *rcur; /* right search cursor */ | ||
998 | struct xfs_inobt_rec_incore rrec; | ||
999 | int error; | ||
1000 | int i, j; | ||
1001 | |||
1002 | error = xfs_inobt_lookup(lcur, pagino, XFS_LOOKUP_LE, &i); | ||
1003 | if (error) | ||
1004 | return error; | ||
1005 | |||
1006 | if (i == 1) { | ||
1007 | error = xfs_inobt_get_rec(lcur, rec, &i); | ||
1008 | if (error) | ||
1009 | return error; | ||
1010 | XFS_WANT_CORRUPTED_RETURN(i == 1); | ||
1011 | |||
1012 | /* | ||
1013 | * See if we've landed in the parent inode record. The finobt | ||
1014 | * only tracks chunks with at least one free inode, so record | ||
1015 | * existence is enough. | ||
1016 | */ | ||
1017 | if (pagino >= rec->ir_startino && | ||
1018 | pagino < (rec->ir_startino + XFS_INODES_PER_CHUNK)) | ||
1019 | return 0; | ||
1020 | } | ||
1021 | |||
1022 | error = xfs_btree_dup_cursor(lcur, &rcur); | ||
1023 | if (error) | ||
1024 | return error; | ||
1025 | |||
1026 | error = xfs_inobt_lookup(rcur, pagino, XFS_LOOKUP_GE, &j); | ||
1027 | if (error) | ||
1028 | goto error_rcur; | ||
1029 | if (j == 1) { | ||
1030 | error = xfs_inobt_get_rec(rcur, &rrec, &j); | ||
1031 | if (error) | ||
1032 | goto error_rcur; | ||
1033 | XFS_WANT_CORRUPTED_GOTO(j == 1, error_rcur); | ||
1034 | } | ||
1035 | |||
1036 | XFS_WANT_CORRUPTED_GOTO(i == 1 || j == 1, error_rcur); | ||
1037 | if (i == 1 && j == 1) { | ||
1038 | /* | ||
1039 | * Both the left and right records are valid. Choose the closer | ||
1040 | * inode chunk to the target. | ||
1041 | */ | ||
1042 | if ((pagino - rec->ir_startino + XFS_INODES_PER_CHUNK - 1) > | ||
1043 | (rrec.ir_startino - pagino)) { | ||
1044 | *rec = rrec; | ||
1045 | xfs_btree_del_cursor(lcur, XFS_BTREE_NOERROR); | ||
1046 | *ocur = rcur; | ||
1047 | } else { | ||
1048 | xfs_btree_del_cursor(rcur, XFS_BTREE_NOERROR); | ||
1049 | } | ||
1050 | } else if (j == 1) { | ||
1051 | /* only the right record is valid */ | ||
1052 | *rec = rrec; | ||
1053 | xfs_btree_del_cursor(lcur, XFS_BTREE_NOERROR); | ||
1054 | *ocur = rcur; | ||
1055 | } else if (i == 1) { | ||
1056 | /* only the left record is valid */ | ||
1057 | xfs_btree_del_cursor(rcur, XFS_BTREE_NOERROR); | ||
1058 | } | ||
1059 | |||
1060 | return 0; | ||
1061 | |||
1062 | error_rcur: | ||
1063 | xfs_btree_del_cursor(rcur, XFS_BTREE_ERROR); | ||
1064 | return error; | ||
1065 | } | ||
1066 | |||
1067 | /* | ||
1068 | * Use the free inode btree to find a free inode based on a newino hint. If | ||
1069 | * the hint is NULL, find the first free inode in the AG. | ||
1070 | */ | ||
1071 | STATIC int | ||
1072 | xfs_dialloc_ag_finobt_newino( | ||
1073 | struct xfs_agi *agi, | ||
1074 | struct xfs_btree_cur *cur, | ||
1075 | struct xfs_inobt_rec_incore *rec) | ||
1076 | { | ||
1077 | int error; | ||
1078 | int i; | ||
1079 | |||
1080 | if (agi->agi_newino != cpu_to_be32(NULLAGINO)) { | ||
1081 | error = xfs_inobt_lookup(cur, agi->agi_newino, XFS_LOOKUP_EQ, | ||
1082 | &i); | ||
1083 | if (error) | ||
1084 | return error; | ||
1085 | if (i == 1) { | ||
1086 | error = xfs_inobt_get_rec(cur, rec, &i); | ||
1087 | if (error) | ||
1088 | return error; | ||
1089 | XFS_WANT_CORRUPTED_RETURN(i == 1); | ||
1090 | |||
1091 | return 0; | ||
1092 | } | ||
1093 | } | ||
1094 | |||
1095 | /* | ||
1096 | * Find the first inode available in the AG. | ||
1097 | */ | ||
1098 | error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); | ||
1099 | if (error) | ||
1100 | return error; | ||
1101 | XFS_WANT_CORRUPTED_RETURN(i == 1); | ||
1102 | |||
1103 | error = xfs_inobt_get_rec(cur, rec, &i); | ||
1104 | if (error) | ||
1105 | return error; | ||
1106 | XFS_WANT_CORRUPTED_RETURN(i == 1); | ||
1107 | |||
1108 | return 0; | ||
1109 | } | ||
1110 | |||
1111 | /* | ||
1112 | * Update the inobt based on a modification made to the finobt. Also ensure that | ||
1113 | * the records from both trees are equivalent post-modification. | ||
1114 | */ | ||
1115 | STATIC int | ||
1116 | xfs_dialloc_ag_update_inobt( | ||
1117 | struct xfs_btree_cur *cur, /* inobt cursor */ | ||
1118 | struct xfs_inobt_rec_incore *frec, /* finobt record */ | ||
1119 | int offset) /* inode offset */ | ||
1120 | { | ||
1121 | struct xfs_inobt_rec_incore rec; | ||
1122 | int error; | ||
1123 | int i; | ||
1124 | |||
1125 | error = xfs_inobt_lookup(cur, frec->ir_startino, XFS_LOOKUP_EQ, &i); | ||
1126 | if (error) | ||
1127 | return error; | ||
1128 | XFS_WANT_CORRUPTED_RETURN(i == 1); | ||
1129 | |||
1130 | error = xfs_inobt_get_rec(cur, &rec, &i); | ||
1131 | if (error) | ||
1132 | return error; | ||
1133 | XFS_WANT_CORRUPTED_RETURN(i == 1); | ||
1134 | ASSERT((XFS_AGINO_TO_OFFSET(cur->bc_mp, rec.ir_startino) % | ||
1135 | XFS_INODES_PER_CHUNK) == 0); | ||
1136 | |||
1137 | rec.ir_free &= ~XFS_INOBT_MASK(offset); | ||
1138 | rec.ir_freecount--; | ||
1139 | |||
1140 | XFS_WANT_CORRUPTED_RETURN((rec.ir_free == frec->ir_free) && | ||
1141 | (rec.ir_freecount == frec->ir_freecount)); | ||
1142 | |||
1143 | error = xfs_inobt_update(cur, &rec); | ||
1144 | if (error) | ||
1145 | return error; | ||
1146 | |||
1147 | return 0; | ||
1148 | } | ||
1149 | |||
1150 | /* | ||
1151 | * Allocate an inode using the free inode btree, if available. Otherwise, fall | ||
1152 | * back to the inobt search algorithm. | ||
1153 | * | ||
1154 | * The caller selected an AG for us, and made sure that free inodes are | ||
1155 | * available. | ||
1156 | */ | ||
1157 | STATIC int | ||
1158 | xfs_dialloc_ag( | ||
1159 | struct xfs_trans *tp, | ||
1160 | struct xfs_buf *agbp, | ||
1161 | xfs_ino_t parent, | ||
1162 | xfs_ino_t *inop) | ||
1163 | { | ||
1164 | struct xfs_mount *mp = tp->t_mountp; | ||
1165 | struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); | ||
1166 | xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); | ||
1167 | xfs_agnumber_t pagno = XFS_INO_TO_AGNO(mp, parent); | ||
1168 | xfs_agino_t pagino = XFS_INO_TO_AGINO(mp, parent); | ||
1169 | struct xfs_perag *pag; | ||
1170 | struct xfs_btree_cur *cur; /* finobt cursor */ | ||
1171 | struct xfs_btree_cur *icur; /* inobt cursor */ | ||
1172 | struct xfs_inobt_rec_incore rec; | ||
1173 | xfs_ino_t ino; | ||
1174 | int error; | ||
1175 | int offset; | ||
1176 | int i; | ||
1177 | |||
1178 | if (!xfs_sb_version_hasfinobt(&mp->m_sb)) | ||
1179 | return xfs_dialloc_ag_inobt(tp, agbp, parent, inop); | ||
1180 | |||
1181 | pag = xfs_perag_get(mp, agno); | ||
1182 | |||
1183 | /* | ||
1184 | * If pagino is 0 (this is the root inode allocation) use newino. | ||
1185 | * This must work because we've just allocated some. | ||
1186 | */ | ||
1187 | if (!pagino) | ||
1188 | pagino = be32_to_cpu(agi->agi_newino); | ||
1189 | |||
1190 | cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_FINO); | ||
1191 | |||
1192 | error = xfs_check_agi_freecount(cur, agi); | ||
1193 | if (error) | ||
1194 | goto error_cur; | ||
1195 | |||
1196 | /* | ||
1197 | * The search algorithm depends on whether we're in the same AG as the | ||
1198 | * parent. If so, find the closest available inode to the parent. If | ||
1199 | * not, consider the agi hint or find the first free inode in the AG. | ||
1200 | */ | ||
1201 | if (agno == pagno) | ||
1202 | error = xfs_dialloc_ag_finobt_near(pagino, &cur, &rec); | ||
1203 | else | ||
1204 | error = xfs_dialloc_ag_finobt_newino(agi, cur, &rec); | ||
1205 | if (error) | ||
1206 | goto error_cur; | ||
1207 | |||
1208 | offset = xfs_lowbit64(rec.ir_free); | ||
1209 | ASSERT(offset >= 0); | ||
1210 | ASSERT(offset < XFS_INODES_PER_CHUNK); | ||
1211 | ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) % | ||
1212 | XFS_INODES_PER_CHUNK) == 0); | ||
1213 | ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset); | ||
1214 | |||
1215 | /* | ||
1216 | * Modify or remove the finobt record. | ||
1217 | */ | ||
1218 | rec.ir_free &= ~XFS_INOBT_MASK(offset); | ||
1219 | rec.ir_freecount--; | ||
1220 | if (rec.ir_freecount) | ||
1221 | error = xfs_inobt_update(cur, &rec); | ||
1222 | else | ||
1223 | error = xfs_btree_delete(cur, &i); | ||
1224 | if (error) | ||
1225 | goto error_cur; | ||
1226 | |||
1227 | /* | ||
1228 | * The finobt has now been updated appropriately. We haven't updated the | ||
1229 | * agi and superblock yet, so we can create an inobt cursor and validate | ||
1230 | * the original freecount. If all is well, make the equivalent update to | ||
1231 | * the inobt using the finobt record and offset information. | ||
1232 | */ | ||
1233 | icur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO); | ||
1234 | |||
1235 | error = xfs_check_agi_freecount(icur, agi); | ||
1236 | if (error) | ||
1237 | goto error_icur; | ||
1238 | |||
1239 | error = xfs_dialloc_ag_update_inobt(icur, &rec, offset); | ||
1240 | if (error) | ||
1241 | goto error_icur; | ||
1242 | |||
1243 | /* | ||
1244 | * Both trees have now been updated. We must update the perag and | ||
1245 | * superblock before we can check the freecount for each btree. | ||
1246 | */ | ||
1247 | be32_add_cpu(&agi->agi_freecount, -1); | ||
1248 | xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); | ||
1249 | pag->pagi_freecount--; | ||
1250 | |||
1251 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1); | ||
1252 | |||
1253 | error = xfs_check_agi_freecount(icur, agi); | ||
1254 | if (error) | ||
1255 | goto error_icur; | ||
1256 | error = xfs_check_agi_freecount(cur, agi); | ||
1257 | if (error) | ||
1258 | goto error_icur; | ||
1259 | |||
1260 | xfs_btree_del_cursor(icur, XFS_BTREE_NOERROR); | ||
1261 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); | ||
1262 | xfs_perag_put(pag); | ||
1263 | *inop = ino; | ||
1264 | return 0; | ||
1265 | |||
1266 | error_icur: | ||
1267 | xfs_btree_del_cursor(icur, XFS_BTREE_ERROR); | ||
1268 | error_cur: | ||
1269 | xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); | ||
1270 | xfs_perag_put(pag); | ||
1271 | return error; | ||
1272 | } | ||
1273 | |||
1274 | /* | ||
943 | * Allocate an inode on disk. | 1275 | * Allocate an inode on disk. |
944 | * | 1276 | * |
945 | * Mode is used to tell whether the new inode will need space, and whether it | 1277 | * Mode is used to tell whether the new inode will need space, and whether it |
@@ -1098,78 +1430,34 @@ out_error: | |||
1098 | return XFS_ERROR(error); | 1430 | return XFS_ERROR(error); |
1099 | } | 1431 | } |
1100 | 1432 | ||
1101 | /* | 1433 | STATIC int |
1102 | * Free disk inode. Carefully avoids touching the incore inode, all | 1434 | xfs_difree_inobt( |
1103 | * manipulations incore are the caller's responsibility. | 1435 | struct xfs_mount *mp, |
1104 | * The on-disk inode is not changed by this operation, only the | 1436 | struct xfs_trans *tp, |
1105 | * btree (free inode mask) is changed. | 1437 | struct xfs_buf *agbp, |
1106 | */ | 1438 | xfs_agino_t agino, |
1107 | int | 1439 | struct xfs_bmap_free *flist, |
1108 | xfs_difree( | 1440 | int *delete, |
1109 | xfs_trans_t *tp, /* transaction pointer */ | 1441 | xfs_ino_t *first_ino, |
1110 | xfs_ino_t inode, /* inode to be freed */ | 1442 | struct xfs_inobt_rec_incore *orec) |
1111 | xfs_bmap_free_t *flist, /* extents to free */ | ||
1112 | int *delete, /* set if inode cluster was deleted */ | ||
1113 | xfs_ino_t *first_ino) /* first inode in deleted cluster */ | ||
1114 | { | 1443 | { |
1115 | /* REFERENCED */ | 1444 | struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); |
1116 | xfs_agblock_t agbno; /* block number containing inode */ | 1445 | xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); |
1117 | xfs_buf_t *agbp; /* buffer containing allocation group header */ | 1446 | struct xfs_perag *pag; |
1118 | xfs_agino_t agino; /* inode number relative to allocation group */ | 1447 | struct xfs_btree_cur *cur; |
1119 | xfs_agnumber_t agno; /* allocation group number */ | 1448 | struct xfs_inobt_rec_incore rec; |
1120 | xfs_agi_t *agi; /* allocation group header */ | 1449 | int ilen; |
1121 | xfs_btree_cur_t *cur; /* inode btree cursor */ | 1450 | int error; |
1122 | int error; /* error return value */ | 1451 | int i; |
1123 | int i; /* result code */ | 1452 | int off; |
1124 | int ilen; /* inodes in an inode cluster */ | ||
1125 | xfs_mount_t *mp; /* mount structure for filesystem */ | ||
1126 | int off; /* offset of inode in inode chunk */ | ||
1127 | xfs_inobt_rec_incore_t rec; /* btree record */ | ||
1128 | struct xfs_perag *pag; | ||
1129 | |||
1130 | mp = tp->t_mountp; | ||
1131 | 1453 | ||
1132 | /* | ||
1133 | * Break up inode number into its components. | ||
1134 | */ | ||
1135 | agno = XFS_INO_TO_AGNO(mp, inode); | ||
1136 | if (agno >= mp->m_sb.sb_agcount) { | ||
1137 | xfs_warn(mp, "%s: agno >= mp->m_sb.sb_agcount (%d >= %d).", | ||
1138 | __func__, agno, mp->m_sb.sb_agcount); | ||
1139 | ASSERT(0); | ||
1140 | return XFS_ERROR(EINVAL); | ||
1141 | } | ||
1142 | agino = XFS_INO_TO_AGINO(mp, inode); | ||
1143 | if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) { | ||
1144 | xfs_warn(mp, "%s: inode != XFS_AGINO_TO_INO() (%llu != %llu).", | ||
1145 | __func__, (unsigned long long)inode, | ||
1146 | (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino)); | ||
1147 | ASSERT(0); | ||
1148 | return XFS_ERROR(EINVAL); | ||
1149 | } | ||
1150 | agbno = XFS_AGINO_TO_AGBNO(mp, agino); | ||
1151 | if (agbno >= mp->m_sb.sb_agblocks) { | ||
1152 | xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).", | ||
1153 | __func__, agbno, mp->m_sb.sb_agblocks); | ||
1154 | ASSERT(0); | ||
1155 | return XFS_ERROR(EINVAL); | ||
1156 | } | ||
1157 | /* | ||
1158 | * Get the allocation group header. | ||
1159 | */ | ||
1160 | error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); | ||
1161 | if (error) { | ||
1162 | xfs_warn(mp, "%s: xfs_ialloc_read_agi() returned error %d.", | ||
1163 | __func__, error); | ||
1164 | return error; | ||
1165 | } | ||
1166 | agi = XFS_BUF_TO_AGI(agbp); | ||
1167 | ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); | 1454 | ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); |
1168 | ASSERT(agbno < be32_to_cpu(agi->agi_length)); | 1455 | ASSERT(XFS_AGINO_TO_AGBNO(mp, agino) < be32_to_cpu(agi->agi_length)); |
1456 | |||
1169 | /* | 1457 | /* |
1170 | * Initialize the cursor. | 1458 | * Initialize the cursor. |
1171 | */ | 1459 | */ |
1172 | cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); | 1460 | cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO); |
1173 | 1461 | ||
1174 | error = xfs_check_agi_freecount(cur, agi); | 1462 | error = xfs_check_agi_freecount(cur, agi); |
1175 | if (error) | 1463 | if (error) |
@@ -1261,6 +1549,7 @@ xfs_difree( | |||
1261 | if (error) | 1549 | if (error) |
1262 | goto error0; | 1550 | goto error0; |
1263 | 1551 | ||
1552 | *orec = rec; | ||
1264 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); | 1553 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); |
1265 | return 0; | 1554 | return 0; |
1266 | 1555 | ||
@@ -1269,6 +1558,182 @@ error0: | |||
1269 | return error; | 1558 | return error; |
1270 | } | 1559 | } |
1271 | 1560 | ||
1561 | /* | ||
1562 | * Free an inode in the free inode btree. | ||
1563 | */ | ||
1564 | STATIC int | ||
1565 | xfs_difree_finobt( | ||
1566 | struct xfs_mount *mp, | ||
1567 | struct xfs_trans *tp, | ||
1568 | struct xfs_buf *agbp, | ||
1569 | xfs_agino_t agino, | ||
1570 | struct xfs_inobt_rec_incore *ibtrec) /* inobt record */ | ||
1571 | { | ||
1572 | struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); | ||
1573 | xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); | ||
1574 | struct xfs_btree_cur *cur; | ||
1575 | struct xfs_inobt_rec_incore rec; | ||
1576 | int offset = agino - ibtrec->ir_startino; | ||
1577 | int error; | ||
1578 | int i; | ||
1579 | |||
1580 | cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_FINO); | ||
1581 | |||
1582 | error = xfs_inobt_lookup(cur, ibtrec->ir_startino, XFS_LOOKUP_EQ, &i); | ||
1583 | if (error) | ||
1584 | goto error; | ||
1585 | if (i == 0) { | ||
1586 | /* | ||
1587 | * If the record does not exist in the finobt, we must have just | ||
1588 | * freed an inode in a previously fully allocated chunk. If not, | ||
1589 | * something is out of sync. | ||
1590 | */ | ||
1591 | XFS_WANT_CORRUPTED_GOTO(ibtrec->ir_freecount == 1, error); | ||
1592 | |||
1593 | error = xfs_inobt_insert_rec(cur, ibtrec->ir_freecount, | ||
1594 | ibtrec->ir_free, &i); | ||
1595 | if (error) | ||
1596 | goto error; | ||
1597 | ASSERT(i == 1); | ||
1598 | |||
1599 | goto out; | ||
1600 | } | ||
1601 | |||
1602 | /* | ||
1603 | * Read and update the existing record. We could just copy the ibtrec | ||
1604 | * across here, but that would defeat the purpose of having redundant | ||
1605 | * metadata. By making the modifications independently, we can catch | ||
1606 | * corruptions that we wouldn't see if we just copied from one record | ||
1607 | * to another. | ||
1608 | */ | ||
1609 | error = xfs_inobt_get_rec(cur, &rec, &i); | ||
1610 | if (error) | ||
1611 | goto error; | ||
1612 | XFS_WANT_CORRUPTED_GOTO(i == 1, error); | ||
1613 | |||
1614 | rec.ir_free |= XFS_INOBT_MASK(offset); | ||
1615 | rec.ir_freecount++; | ||
1616 | |||
1617 | XFS_WANT_CORRUPTED_GOTO((rec.ir_free == ibtrec->ir_free) && | ||
1618 | (rec.ir_freecount == ibtrec->ir_freecount), | ||
1619 | error); | ||
1620 | |||
1621 | /* | ||
1622 | * The content of inobt records should always match between the inobt | ||
1623 | * and finobt. The lifecycle of records in the finobt is different from | ||
1624 | * the inobt in that the finobt only tracks records with at least one | ||
1625 | * free inode. Hence, if all of the inodes are free and we aren't | ||
1626 | * keeping inode chunks permanently on disk, remove the record. | ||
1627 | * Otherwise, update the record with the new information. | ||
1628 | */ | ||
1629 | if (rec.ir_freecount == mp->m_ialloc_inos && | ||
1630 | !(mp->m_flags & XFS_MOUNT_IKEEP)) { | ||
1631 | error = xfs_btree_delete(cur, &i); | ||
1632 | if (error) | ||
1633 | goto error; | ||
1634 | ASSERT(i == 1); | ||
1635 | } else { | ||
1636 | error = xfs_inobt_update(cur, &rec); | ||
1637 | if (error) | ||
1638 | goto error; | ||
1639 | } | ||
1640 | |||
1641 | out: | ||
1642 | error = xfs_check_agi_freecount(cur, agi); | ||
1643 | if (error) | ||
1644 | goto error; | ||
1645 | |||
1646 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); | ||
1647 | return 0; | ||
1648 | |||
1649 | error: | ||
1650 | xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); | ||
1651 | return error; | ||
1652 | } | ||
1653 | |||
1654 | /* | ||
1655 | * Free disk inode. Carefully avoids touching the incore inode, all | ||
1656 | * manipulations incore are the caller's responsibility. | ||
1657 | * The on-disk inode is not changed by this operation, only the | ||
1658 | * btree (free inode mask) is changed. | ||
1659 | */ | ||
1660 | int | ||
1661 | xfs_difree( | ||
1662 | struct xfs_trans *tp, /* transaction pointer */ | ||
1663 | xfs_ino_t inode, /* inode to be freed */ | ||
1664 | struct xfs_bmap_free *flist, /* extents to free */ | ||
1665 | int *delete,/* set if inode cluster was deleted */ | ||
1666 | xfs_ino_t *first_ino)/* first inode in deleted cluster */ | ||
1667 | { | ||
1668 | /* REFERENCED */ | ||
1669 | xfs_agblock_t agbno; /* block number containing inode */ | ||
1670 | struct xfs_buf *agbp; /* buffer for allocation group header */ | ||
1671 | xfs_agino_t agino; /* allocation group inode number */ | ||
1672 | xfs_agnumber_t agno; /* allocation group number */ | ||
1673 | int error; /* error return value */ | ||
1674 | struct xfs_mount *mp; /* mount structure for filesystem */ | ||
1675 | struct xfs_inobt_rec_incore rec;/* btree record */ | ||
1676 | |||
1677 | mp = tp->t_mountp; | ||
1678 | |||
1679 | /* | ||
1680 | * Break up inode number into its components. | ||
1681 | */ | ||
1682 | agno = XFS_INO_TO_AGNO(mp, inode); | ||
1683 | if (agno >= mp->m_sb.sb_agcount) { | ||
1684 | xfs_warn(mp, "%s: agno >= mp->m_sb.sb_agcount (%d >= %d).", | ||
1685 | __func__, agno, mp->m_sb.sb_agcount); | ||
1686 | ASSERT(0); | ||
1687 | return XFS_ERROR(EINVAL); | ||
1688 | } | ||
1689 | agino = XFS_INO_TO_AGINO(mp, inode); | ||
1690 | if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) { | ||
1691 | xfs_warn(mp, "%s: inode != XFS_AGINO_TO_INO() (%llu != %llu).", | ||
1692 | __func__, (unsigned long long)inode, | ||
1693 | (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino)); | ||
1694 | ASSERT(0); | ||
1695 | return XFS_ERROR(EINVAL); | ||
1696 | } | ||
1697 | agbno = XFS_AGINO_TO_AGBNO(mp, agino); | ||
1698 | if (agbno >= mp->m_sb.sb_agblocks) { | ||
1699 | xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).", | ||
1700 | __func__, agbno, mp->m_sb.sb_agblocks); | ||
1701 | ASSERT(0); | ||
1702 | return XFS_ERROR(EINVAL); | ||
1703 | } | ||
1704 | /* | ||
1705 | * Get the allocation group header. | ||
1706 | */ | ||
1707 | error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); | ||
1708 | if (error) { | ||
1709 | xfs_warn(mp, "%s: xfs_ialloc_read_agi() returned error %d.", | ||
1710 | __func__, error); | ||
1711 | return error; | ||
1712 | } | ||
1713 | |||
1714 | /* | ||
1715 | * Fix up the inode allocation btree. | ||
1716 | */ | ||
1717 | error = xfs_difree_inobt(mp, tp, agbp, agino, flist, delete, first_ino, | ||
1718 | &rec); | ||
1719 | if (error) | ||
1720 | goto error0; | ||
1721 | |||
1722 | /* | ||
1723 | * Fix up the free inode btree. | ||
1724 | */ | ||
1725 | if (xfs_sb_version_hasfinobt(&mp->m_sb)) { | ||
1726 | error = xfs_difree_finobt(mp, tp, agbp, agino, &rec); | ||
1727 | if (error) | ||
1728 | goto error0; | ||
1729 | } | ||
1730 | |||
1731 | return 0; | ||
1732 | |||
1733 | error0: | ||
1734 | return error; | ||
1735 | } | ||
1736 | |||
1272 | STATIC int | 1737 | STATIC int |
1273 | xfs_imap_lookup( | 1738 | xfs_imap_lookup( |
1274 | struct xfs_mount *mp, | 1739 | struct xfs_mount *mp, |
@@ -1300,7 +1765,7 @@ xfs_imap_lookup( | |||
1300 | * we have a record, we need to ensure it contains the inode number | 1765 | * we have a record, we need to ensure it contains the inode number |
1301 | * we are looking up. | 1766 | * we are looking up. |
1302 | */ | 1767 | */ |
1303 | cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); | 1768 | cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO); |
1304 | error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i); | 1769 | error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i); |
1305 | if (!error) { | 1770 | if (!error) { |
1306 | if (i) | 1771 | if (i) |
@@ -1488,7 +1953,16 @@ xfs_ialloc_compute_maxlevels( | |||
1488 | } | 1953 | } |
1489 | 1954 | ||
1490 | /* | 1955 | /* |
1491 | * Log specified fields for the ag hdr (inode section) | 1956 | * Log specified fields for the ag hdr (inode section). The growth of the agi |
1957 | * structure over time requires that we interpret the buffer as two logical | ||
1958 | * regions delineated by the end of the unlinked list. This is due to the size | ||
1959 | * of the hash table and its location in the middle of the agi. | ||
1960 | * | ||
1961 | * For example, a request to log a field before agi_unlinked and a field after | ||
1962 | * agi_unlinked could cause us to log the entire hash table and use an excessive | ||
1963 | * amount of log space. To avoid this behavior, log the region up through | ||
1964 | * agi_unlinked in one call and the region after agi_unlinked through the end of | ||
1965 | * the structure in another. | ||
1492 | */ | 1966 | */ |
1493 | void | 1967 | void |
1494 | xfs_ialloc_log_agi( | 1968 | xfs_ialloc_log_agi( |
@@ -1511,6 +1985,8 @@ xfs_ialloc_log_agi( | |||
1511 | offsetof(xfs_agi_t, agi_newino), | 1985 | offsetof(xfs_agi_t, agi_newino), |
1512 | offsetof(xfs_agi_t, agi_dirino), | 1986 | offsetof(xfs_agi_t, agi_dirino), |
1513 | offsetof(xfs_agi_t, agi_unlinked), | 1987 | offsetof(xfs_agi_t, agi_unlinked), |
1988 | offsetof(xfs_agi_t, agi_free_root), | ||
1989 | offsetof(xfs_agi_t, agi_free_level), | ||
1514 | sizeof(xfs_agi_t) | 1990 | sizeof(xfs_agi_t) |
1515 | }; | 1991 | }; |
1516 | #ifdef DEBUG | 1992 | #ifdef DEBUG |
@@ -1519,15 +1995,30 @@ xfs_ialloc_log_agi( | |||
1519 | agi = XFS_BUF_TO_AGI(bp); | 1995 | agi = XFS_BUF_TO_AGI(bp); |
1520 | ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); | 1996 | ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); |
1521 | #endif | 1997 | #endif |
1998 | |||
1999 | xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGI_BUF); | ||
2000 | |||
1522 | /* | 2001 | /* |
1523 | * Compute byte offsets for the first and last fields. | 2002 | * Compute byte offsets for the first and last fields in the first |
2003 | * region and log the agi buffer. This only logs up through | ||
2004 | * agi_unlinked. | ||
1524 | */ | 2005 | */ |
1525 | xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS, &first, &last); | 2006 | if (fields & XFS_AGI_ALL_BITS_R1) { |
2007 | xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS_R1, | ||
2008 | &first, &last); | ||
2009 | xfs_trans_log_buf(tp, bp, first, last); | ||
2010 | } | ||
2011 | |||
1526 | /* | 2012 | /* |
1527 | * Log the allocation group inode header buffer. | 2013 | * Mask off the bits in the first region and calculate the first and |
2014 | * last field offsets for any bits in the second region. | ||
1528 | */ | 2015 | */ |
1529 | xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGI_BUF); | 2016 | fields &= ~XFS_AGI_ALL_BITS_R1; |
1530 | xfs_trans_log_buf(tp, bp, first, last); | 2017 | if (fields) { |
2018 | xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS_R2, | ||
2019 | &first, &last); | ||
2020 | xfs_trans_log_buf(tp, bp, first, last); | ||
2021 | } | ||
1531 | } | 2022 | } |
1532 | 2023 | ||
1533 | #ifdef DEBUG | 2024 | #ifdef DEBUG |
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index f1630ff619dd..726f83a681a5 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c | |||
@@ -49,7 +49,8 @@ xfs_inobt_dup_cursor( | |||
49 | struct xfs_btree_cur *cur) | 49 | struct xfs_btree_cur *cur) |
50 | { | 50 | { |
51 | return xfs_inobt_init_cursor(cur->bc_mp, cur->bc_tp, | 51 | return xfs_inobt_init_cursor(cur->bc_mp, cur->bc_tp, |
52 | cur->bc_private.a.agbp, cur->bc_private.a.agno); | 52 | cur->bc_private.a.agbp, cur->bc_private.a.agno, |
53 | cur->bc_btnum); | ||
53 | } | 54 | } |
54 | 55 | ||
55 | STATIC void | 56 | STATIC void |
@@ -66,6 +67,21 @@ xfs_inobt_set_root( | |||
66 | xfs_ialloc_log_agi(cur->bc_tp, agbp, XFS_AGI_ROOT | XFS_AGI_LEVEL); | 67 | xfs_ialloc_log_agi(cur->bc_tp, agbp, XFS_AGI_ROOT | XFS_AGI_LEVEL); |
67 | } | 68 | } |
68 | 69 | ||
70 | STATIC void | ||
71 | xfs_finobt_set_root( | ||
72 | struct xfs_btree_cur *cur, | ||
73 | union xfs_btree_ptr *nptr, | ||
74 | int inc) /* level change */ | ||
75 | { | ||
76 | struct xfs_buf *agbp = cur->bc_private.a.agbp; | ||
77 | struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); | ||
78 | |||
79 | agi->agi_free_root = nptr->s; | ||
80 | be32_add_cpu(&agi->agi_free_level, inc); | ||
81 | xfs_ialloc_log_agi(cur->bc_tp, agbp, | ||
82 | XFS_AGI_FREE_ROOT | XFS_AGI_FREE_LEVEL); | ||
83 | } | ||
84 | |||
69 | STATIC int | 85 | STATIC int |
70 | xfs_inobt_alloc_block( | 86 | xfs_inobt_alloc_block( |
71 | struct xfs_btree_cur *cur, | 87 | struct xfs_btree_cur *cur, |
@@ -172,6 +188,17 @@ xfs_inobt_init_ptr_from_cur( | |||
172 | ptr->s = agi->agi_root; | 188 | ptr->s = agi->agi_root; |
173 | } | 189 | } |
174 | 190 | ||
191 | STATIC void | ||
192 | xfs_finobt_init_ptr_from_cur( | ||
193 | struct xfs_btree_cur *cur, | ||
194 | union xfs_btree_ptr *ptr) | ||
195 | { | ||
196 | struct xfs_agi *agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp); | ||
197 | |||
198 | ASSERT(cur->bc_private.a.agno == be32_to_cpu(agi->agi_seqno)); | ||
199 | ptr->s = agi->agi_free_root; | ||
200 | } | ||
201 | |||
175 | STATIC __int64_t | 202 | STATIC __int64_t |
176 | xfs_inobt_key_diff( | 203 | xfs_inobt_key_diff( |
177 | struct xfs_btree_cur *cur, | 204 | struct xfs_btree_cur *cur, |
@@ -202,6 +229,7 @@ xfs_inobt_verify( | |||
202 | */ | 229 | */ |
203 | switch (block->bb_magic) { | 230 | switch (block->bb_magic) { |
204 | case cpu_to_be32(XFS_IBT_CRC_MAGIC): | 231 | case cpu_to_be32(XFS_IBT_CRC_MAGIC): |
232 | case cpu_to_be32(XFS_FIBT_CRC_MAGIC): | ||
205 | if (!xfs_sb_version_hascrc(&mp->m_sb)) | 233 | if (!xfs_sb_version_hascrc(&mp->m_sb)) |
206 | return false; | 234 | return false; |
207 | if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid)) | 235 | if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid)) |
@@ -213,6 +241,7 @@ xfs_inobt_verify( | |||
213 | return false; | 241 | return false; |
214 | /* fall through */ | 242 | /* fall through */ |
215 | case cpu_to_be32(XFS_IBT_MAGIC): | 243 | case cpu_to_be32(XFS_IBT_MAGIC): |
244 | case cpu_to_be32(XFS_FIBT_MAGIC): | ||
216 | break; | 245 | break; |
217 | default: | 246 | default: |
218 | return 0; | 247 | return 0; |
@@ -316,6 +345,28 @@ static const struct xfs_btree_ops xfs_inobt_ops = { | |||
316 | #endif | 345 | #endif |
317 | }; | 346 | }; |
318 | 347 | ||
348 | static const struct xfs_btree_ops xfs_finobt_ops = { | ||
349 | .rec_len = sizeof(xfs_inobt_rec_t), | ||
350 | .key_len = sizeof(xfs_inobt_key_t), | ||
351 | |||
352 | .dup_cursor = xfs_inobt_dup_cursor, | ||
353 | .set_root = xfs_finobt_set_root, | ||
354 | .alloc_block = xfs_inobt_alloc_block, | ||
355 | .free_block = xfs_inobt_free_block, | ||
356 | .get_minrecs = xfs_inobt_get_minrecs, | ||
357 | .get_maxrecs = xfs_inobt_get_maxrecs, | ||
358 | .init_key_from_rec = xfs_inobt_init_key_from_rec, | ||
359 | .init_rec_from_key = xfs_inobt_init_rec_from_key, | ||
360 | .init_rec_from_cur = xfs_inobt_init_rec_from_cur, | ||
361 | .init_ptr_from_cur = xfs_finobt_init_ptr_from_cur, | ||
362 | .key_diff = xfs_inobt_key_diff, | ||
363 | .buf_ops = &xfs_inobt_buf_ops, | ||
364 | #if defined(DEBUG) || defined(XFS_WARN) | ||
365 | .keys_inorder = xfs_inobt_keys_inorder, | ||
366 | .recs_inorder = xfs_inobt_recs_inorder, | ||
367 | #endif | ||
368 | }; | ||
369 | |||
319 | /* | 370 | /* |
320 | * Allocate a new inode btree cursor. | 371 | * Allocate a new inode btree cursor. |
321 | */ | 372 | */ |
@@ -324,7 +375,8 @@ xfs_inobt_init_cursor( | |||
324 | struct xfs_mount *mp, /* file system mount point */ | 375 | struct xfs_mount *mp, /* file system mount point */ |
325 | struct xfs_trans *tp, /* transaction pointer */ | 376 | struct xfs_trans *tp, /* transaction pointer */ |
326 | struct xfs_buf *agbp, /* buffer for agi structure */ | 377 | struct xfs_buf *agbp, /* buffer for agi structure */ |
327 | xfs_agnumber_t agno) /* allocation group number */ | 378 | xfs_agnumber_t agno, /* allocation group number */ |
379 | xfs_btnum_t btnum) /* ialloc or free ino btree */ | ||
328 | { | 380 | { |
329 | struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); | 381 | struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); |
330 | struct xfs_btree_cur *cur; | 382 | struct xfs_btree_cur *cur; |
@@ -333,11 +385,17 @@ xfs_inobt_init_cursor( | |||
333 | 385 | ||
334 | cur->bc_tp = tp; | 386 | cur->bc_tp = tp; |
335 | cur->bc_mp = mp; | 387 | cur->bc_mp = mp; |
336 | cur->bc_nlevels = be32_to_cpu(agi->agi_level); | 388 | cur->bc_btnum = btnum; |
337 | cur->bc_btnum = XFS_BTNUM_INO; | 389 | if (btnum == XFS_BTNUM_INO) { |
390 | cur->bc_nlevels = be32_to_cpu(agi->agi_level); | ||
391 | cur->bc_ops = &xfs_inobt_ops; | ||
392 | } else { | ||
393 | cur->bc_nlevels = be32_to_cpu(agi->agi_free_level); | ||
394 | cur->bc_ops = &xfs_finobt_ops; | ||
395 | } | ||
396 | |||
338 | cur->bc_blocklog = mp->m_sb.sb_blocklog; | 397 | cur->bc_blocklog = mp->m_sb.sb_blocklog; |
339 | 398 | ||
340 | cur->bc_ops = &xfs_inobt_ops; | ||
341 | if (xfs_sb_version_hascrc(&mp->m_sb)) | 399 | if (xfs_sb_version_hascrc(&mp->m_sb)) |
342 | cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; | 400 | cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; |
343 | 401 | ||
diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h index f38b22011c4e..d7ebea72c2d0 100644 --- a/fs/xfs/xfs_ialloc_btree.h +++ b/fs/xfs/xfs_ialloc_btree.h | |||
@@ -58,7 +58,8 @@ struct xfs_mount; | |||
58 | ((index) - 1) * sizeof(xfs_inobt_ptr_t))) | 58 | ((index) - 1) * sizeof(xfs_inobt_ptr_t))) |
59 | 59 | ||
60 | extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_mount *, | 60 | extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_mount *, |
61 | struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t); | 61 | struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t, |
62 | xfs_btnum_t); | ||
62 | extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int); | 63 | extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int); |
63 | 64 | ||
64 | #endif /* __XFS_IALLOC_BTREE_H__ */ | 65 | #endif /* __XFS_IALLOC_BTREE_H__ */ |
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 6bbfcf0b3bb2..6d6b44a508f9 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -1811,9 +1811,33 @@ xfs_inactive_ifree( | |||
1811 | int error; | 1811 | int error; |
1812 | 1812 | ||
1813 | tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); | 1813 | tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); |
1814 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree, 0, 0); | 1814 | |
1815 | /* | ||
1816 | * The ifree transaction might need to allocate blocks for record | ||
1817 | * insertion to the finobt. We don't want to fail here at ENOSPC, so | ||
1818 | * allow ifree to dip into the reserved block pool if necessary. | ||
1819 | * | ||
1820 | * Freeing large sets of inodes generally means freeing inode chunks, | ||
1821 | * directory and file data blocks, so this should be relatively safe. | ||
1822 | * Only under severe circumstances should it be possible to free enough | ||
1823 | * inodes to exhaust the reserve block pool via finobt expansion while | ||
1824 | * at the same time not creating free space in the filesystem. | ||
1825 | * | ||
1826 | * Send a warning if the reservation does happen to fail, as the inode | ||
1827 | * now remains allocated and sits on the unlinked list until the fs is | ||
1828 | * repaired. | ||
1829 | */ | ||
1830 | tp->t_flags |= XFS_TRANS_RESERVE; | ||
1831 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree, | ||
1832 | XFS_IFREE_SPACE_RES(mp), 0); | ||
1815 | if (error) { | 1833 | if (error) { |
1816 | ASSERT(XFS_FORCED_SHUTDOWN(mp)); | 1834 | if (error == ENOSPC) { |
1835 | xfs_warn_ratelimited(mp, | ||
1836 | "Failed to remove inode(s) from unlinked list. " | ||
1837 | "Please free space, unmount and run xfs_repair."); | ||
1838 | } else { | ||
1839 | ASSERT(XFS_FORCED_SHUTDOWN(mp)); | ||
1840 | } | ||
1817 | xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES); | 1841 | xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES); |
1818 | return error; | 1842 | return error; |
1819 | } | 1843 | } |
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index f46338285152..cb64f222d607 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c | |||
@@ -270,7 +270,8 @@ xfs_bulkstat( | |||
270 | /* | 270 | /* |
271 | * Allocate and initialize a btree cursor for ialloc btree. | 271 | * Allocate and initialize a btree cursor for ialloc btree. |
272 | */ | 272 | */ |
273 | cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno); | 273 | cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno, |
274 | XFS_BTNUM_INO); | ||
274 | irbp = irbuf; | 275 | irbp = irbuf; |
275 | irbufend = irbuf + nirbuf; | 276 | irbufend = irbuf + nirbuf; |
276 | end_of_ag = 0; | 277 | end_of_ag = 0; |
@@ -621,7 +622,8 @@ xfs_inumbers( | |||
621 | agino = 0; | 622 | agino = 0; |
622 | continue; | 623 | continue; |
623 | } | 624 | } |
624 | cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno); | 625 | cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno, |
626 | XFS_BTNUM_INO); | ||
625 | error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_GE, | 627 | error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_GE, |
626 | &tmp); | 628 | &tmp); |
627 | if (error) { | 629 | if (error) { |
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 690a64d4444e..981af0f6504b 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c | |||
@@ -2138,7 +2138,9 @@ xlog_recover_validate_buf_type( | |||
2138 | bp->b_ops = &xfs_allocbt_buf_ops; | 2138 | bp->b_ops = &xfs_allocbt_buf_ops; |
2139 | break; | 2139 | break; |
2140 | case XFS_IBT_CRC_MAGIC: | 2140 | case XFS_IBT_CRC_MAGIC: |
2141 | case XFS_FIBT_CRC_MAGIC: | ||
2141 | case XFS_IBT_MAGIC: | 2142 | case XFS_IBT_MAGIC: |
2143 | case XFS_FIBT_MAGIC: | ||
2142 | bp->b_ops = &xfs_inobt_buf_ops; | 2144 | bp->b_ops = &xfs_inobt_buf_ops; |
2143 | break; | 2145 | break; |
2144 | case XFS_BMAP_CRC_MAGIC: | 2146 | case XFS_BMAP_CRC_MAGIC: |
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h index f7b2fe77c5a5..950d1ea058b2 100644 --- a/fs/xfs/xfs_sb.h +++ b/fs/xfs/xfs_sb.h | |||
@@ -587,7 +587,9 @@ xfs_sb_has_compat_feature( | |||
587 | return (sbp->sb_features_compat & feature) != 0; | 587 | return (sbp->sb_features_compat & feature) != 0; |
588 | } | 588 | } |
589 | 589 | ||
590 | #define XFS_SB_FEAT_RO_COMPAT_ALL 0 | 590 | #define XFS_SB_FEAT_RO_COMPAT_FINOBT (1 << 0) /* free inode btree */ |
591 | #define XFS_SB_FEAT_RO_COMPAT_ALL \ | ||
592 | (XFS_SB_FEAT_RO_COMPAT_FINOBT) | ||
591 | #define XFS_SB_FEAT_RO_COMPAT_UNKNOWN ~XFS_SB_FEAT_RO_COMPAT_ALL | 593 | #define XFS_SB_FEAT_RO_COMPAT_UNKNOWN ~XFS_SB_FEAT_RO_COMPAT_ALL |
592 | static inline bool | 594 | static inline bool |
593 | xfs_sb_has_ro_compat_feature( | 595 | xfs_sb_has_ro_compat_feature( |
@@ -641,6 +643,12 @@ static inline int xfs_sb_version_hasftype(struct xfs_sb *sbp) | |||
641 | (sbp->sb_features2 & XFS_SB_VERSION2_FTYPE)); | 643 | (sbp->sb_features2 & XFS_SB_VERSION2_FTYPE)); |
642 | } | 644 | } |
643 | 645 | ||
646 | static inline int xfs_sb_version_hasfinobt(xfs_sb_t *sbp) | ||
647 | { | ||
648 | return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) && | ||
649 | (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FINOBT); | ||
650 | } | ||
651 | |||
644 | /* | 652 | /* |
645 | * end of superblock version macros | 653 | * end of superblock version macros |
646 | */ | 654 | */ |
diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c index ce372b7d5644..f2240383d4bb 100644 --- a/fs/xfs/xfs_stats.c +++ b/fs/xfs/xfs_stats.c | |||
@@ -59,6 +59,7 @@ static int xfs_stat_proc_show(struct seq_file *m, void *v) | |||
59 | { "abtc2", XFSSTAT_END_ABTC_V2 }, | 59 | { "abtc2", XFSSTAT_END_ABTC_V2 }, |
60 | { "bmbt2", XFSSTAT_END_BMBT_V2 }, | 60 | { "bmbt2", XFSSTAT_END_BMBT_V2 }, |
61 | { "ibt2", XFSSTAT_END_IBT_V2 }, | 61 | { "ibt2", XFSSTAT_END_IBT_V2 }, |
62 | { "fibt2", XFSSTAT_END_FIBT_V2 }, | ||
62 | /* we print both series of quota information together */ | 63 | /* we print both series of quota information together */ |
63 | { "qm", XFSSTAT_END_QM }, | 64 | { "qm", XFSSTAT_END_QM }, |
64 | }; | 65 | }; |
diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h index c03ad38ceaeb..c8f238b8299a 100644 --- a/fs/xfs/xfs_stats.h +++ b/fs/xfs/xfs_stats.h | |||
@@ -183,7 +183,23 @@ struct xfsstats { | |||
183 | __uint32_t xs_ibt_2_alloc; | 183 | __uint32_t xs_ibt_2_alloc; |
184 | __uint32_t xs_ibt_2_free; | 184 | __uint32_t xs_ibt_2_free; |
185 | __uint32_t xs_ibt_2_moves; | 185 | __uint32_t xs_ibt_2_moves; |
186 | #define XFSSTAT_END_XQMSTAT (XFSSTAT_END_IBT_V2+6) | 186 | #define XFSSTAT_END_FIBT_V2 (XFSSTAT_END_IBT_V2+15) |
187 | __uint32_t xs_fibt_2_lookup; | ||
188 | __uint32_t xs_fibt_2_compare; | ||
189 | __uint32_t xs_fibt_2_insrec; | ||
190 | __uint32_t xs_fibt_2_delrec; | ||
191 | __uint32_t xs_fibt_2_newroot; | ||
192 | __uint32_t xs_fibt_2_killroot; | ||
193 | __uint32_t xs_fibt_2_increment; | ||
194 | __uint32_t xs_fibt_2_decrement; | ||
195 | __uint32_t xs_fibt_2_lshift; | ||
196 | __uint32_t xs_fibt_2_rshift; | ||
197 | __uint32_t xs_fibt_2_split; | ||
198 | __uint32_t xs_fibt_2_join; | ||
199 | __uint32_t xs_fibt_2_alloc; | ||
200 | __uint32_t xs_fibt_2_free; | ||
201 | __uint32_t xs_fibt_2_moves; | ||
202 | #define XFSSTAT_END_XQMSTAT (XFSSTAT_END_FIBT_V2+6) | ||
187 | __uint32_t xs_qm_dqreclaims; | 203 | __uint32_t xs_qm_dqreclaims; |
188 | __uint32_t xs_qm_dqreclaim_misses; | 204 | __uint32_t xs_qm_dqreclaim_misses; |
189 | __uint32_t xs_qm_dquot_dups; | 205 | __uint32_t xs_qm_dquot_dups; |
diff --git a/fs/xfs/xfs_trans_resv.c b/fs/xfs/xfs_trans_resv.c index ae368165244d..52b6c3e3203e 100644 --- a/fs/xfs/xfs_trans_resv.c +++ b/fs/xfs/xfs_trans_resv.c | |||
@@ -106,6 +106,47 @@ xfs_calc_inode_res( | |||
106 | } | 106 | } |
107 | 107 | ||
108 | /* | 108 | /* |
109 | * The free inode btree is a conditional feature and the log reservation | ||
110 | * requirements differ slightly from that of the traditional inode allocation | ||
111 | * btree. The finobt tracks records for inode chunks with at least one free | ||
112 | * inode. A record can be removed from the tree for an inode allocation | ||
113 | * or free and thus the finobt reservation is unconditional across: | ||
114 | * | ||
115 | * - inode allocation | ||
116 | * - inode free | ||
117 | * - inode chunk allocation | ||
118 | * | ||
119 | * The 'modify' param indicates to include the record modification scenario. The | ||
120 | * 'alloc' param indicates to include the reservation for free space btree | ||
121 | * modifications on behalf of finobt modifications. This is required only for | ||
122 | * transactions that do not already account for free space btree modifications. | ||
123 | * | ||
124 | * the free inode btree: max depth * block size | ||
125 | * the allocation btrees: 2 trees * (max depth - 1) * block size | ||
126 | * the free inode btree entry: block size | ||
127 | */ | ||
128 | STATIC uint | ||
129 | xfs_calc_finobt_res( | ||
130 | struct xfs_mount *mp, | ||
131 | int alloc, | ||
132 | int modify) | ||
133 | { | ||
134 | uint res; | ||
135 | |||
136 | if (!xfs_sb_version_hasfinobt(&mp->m_sb)) | ||
137 | return 0; | ||
138 | |||
139 | res = xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)); | ||
140 | if (alloc) | ||
141 | res += xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), | ||
142 | XFS_FSB_TO_B(mp, 1)); | ||
143 | if (modify) | ||
144 | res += (uint)XFS_FSB_TO_B(mp, 1); | ||
145 | |||
146 | return res; | ||
147 | } | ||
148 | |||
149 | /* | ||
109 | * Various log reservation values. | 150 | * Various log reservation values. |
110 | * | 151 | * |
111 | * These are based on the size of the file system block because that is what | 152 | * These are based on the size of the file system block because that is what |
@@ -302,6 +343,7 @@ xfs_calc_remove_reservation( | |||
302 | * the superblock for the nlink flag: sector size | 343 | * the superblock for the nlink flag: sector size |
303 | * the directory btree: (max depth + v2) * dir block size | 344 | * the directory btree: (max depth + v2) * dir block size |
304 | * the directory inode's bmap btree: (max depth + v2) * block size | 345 | * the directory inode's bmap btree: (max depth + v2) * block size |
346 | * the finobt (record modification and allocation btrees) | ||
305 | */ | 347 | */ |
306 | STATIC uint | 348 | STATIC uint |
307 | xfs_calc_create_resv_modify( | 349 | xfs_calc_create_resv_modify( |
@@ -310,7 +352,8 @@ xfs_calc_create_resv_modify( | |||
310 | return xfs_calc_inode_res(mp, 2) + | 352 | return xfs_calc_inode_res(mp, 2) + |
311 | xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + | 353 | xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + |
312 | (uint)XFS_FSB_TO_B(mp, 1) + | 354 | (uint)XFS_FSB_TO_B(mp, 1) + |
313 | xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)); | 355 | xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)) + |
356 | xfs_calc_finobt_res(mp, 1, 1); | ||
314 | } | 357 | } |
315 | 358 | ||
316 | /* | 359 | /* |
@@ -348,6 +391,7 @@ __xfs_calc_create_reservation( | |||
348 | * the superblock for the nlink flag: sector size | 391 | * the superblock for the nlink flag: sector size |
349 | * the inode btree: max depth * blocksize | 392 | * the inode btree: max depth * blocksize |
350 | * the allocation btrees: 2 trees * (max depth - 1) * block size | 393 | * the allocation btrees: 2 trees * (max depth - 1) * block size |
394 | * the finobt (record insertion) | ||
351 | */ | 395 | */ |
352 | STATIC uint | 396 | STATIC uint |
353 | xfs_calc_icreate_resv_alloc( | 397 | xfs_calc_icreate_resv_alloc( |
@@ -357,7 +401,8 @@ xfs_calc_icreate_resv_alloc( | |||
357 | mp->m_sb.sb_sectsize + | 401 | mp->m_sb.sb_sectsize + |
358 | xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + | 402 | xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + |
359 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), | 403 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), |
360 | XFS_FSB_TO_B(mp, 1)); | 404 | XFS_FSB_TO_B(mp, 1)) + |
405 | xfs_calc_finobt_res(mp, 0, 0); | ||
361 | } | 406 | } |
362 | 407 | ||
363 | STATIC uint | 408 | STATIC uint |
@@ -425,6 +470,7 @@ xfs_calc_symlink_reservation( | |||
425 | * the on disk inode before ours in the agi hash list: inode cluster size | 470 | * the on disk inode before ours in the agi hash list: inode cluster size |
426 | * the inode btree: max depth * blocksize | 471 | * the inode btree: max depth * blocksize |
427 | * the allocation btrees: 2 trees * (max depth - 1) * block size | 472 | * the allocation btrees: 2 trees * (max depth - 1) * block size |
473 | * the finobt (record insertion, removal or modification) | ||
428 | */ | 474 | */ |
429 | STATIC uint | 475 | STATIC uint |
430 | xfs_calc_ifree_reservation( | 476 | xfs_calc_ifree_reservation( |
@@ -439,7 +485,8 @@ xfs_calc_ifree_reservation( | |||
439 | xfs_calc_buf_res(2 + mp->m_ialloc_blks + | 485 | xfs_calc_buf_res(2 + mp->m_ialloc_blks + |
440 | mp->m_in_maxlevels, 0) + | 486 | mp->m_in_maxlevels, 0) + |
441 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), | 487 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), |
442 | XFS_FSB_TO_B(mp, 1)); | 488 | XFS_FSB_TO_B(mp, 1)) + |
489 | xfs_calc_finobt_res(mp, 0, 1); | ||
443 | } | 490 | } |
444 | 491 | ||
445 | /* | 492 | /* |
diff --git a/fs/xfs/xfs_trans_space.h b/fs/xfs/xfs_trans_space.h index af5dbe06cb65..df4c1f81884c 100644 --- a/fs/xfs/xfs_trans_space.h +++ b/fs/xfs/xfs_trans_space.h | |||
@@ -47,7 +47,9 @@ | |||
47 | #define XFS_DIRREMOVE_SPACE_RES(mp) \ | 47 | #define XFS_DIRREMOVE_SPACE_RES(mp) \ |
48 | XFS_DAREMOVE_SPACE_RES(mp, XFS_DATA_FORK) | 48 | XFS_DAREMOVE_SPACE_RES(mp, XFS_DATA_FORK) |
49 | #define XFS_IALLOC_SPACE_RES(mp) \ | 49 | #define XFS_IALLOC_SPACE_RES(mp) \ |
50 | ((mp)->m_ialloc_blks + (mp)->m_in_maxlevels - 1) | 50 | ((mp)->m_ialloc_blks + \ |
51 | (xfs_sb_version_hasfinobt(&mp->m_sb) ? 2 : 1 * \ | ||
52 | ((mp)->m_in_maxlevels - 1))) | ||
51 | 53 | ||
52 | /* | 54 | /* |
53 | * Space reservation values for various transactions. | 55 | * Space reservation values for various transactions. |
@@ -82,5 +84,8 @@ | |||
82 | (XFS_DIRREMOVE_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl)) | 84 | (XFS_DIRREMOVE_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl)) |
83 | #define XFS_SYMLINK_SPACE_RES(mp,nl,b) \ | 85 | #define XFS_SYMLINK_SPACE_RES(mp,nl,b) \ |
84 | (XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl) + (b)) | 86 | (XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl) + (b)) |
87 | #define XFS_IFREE_SPACE_RES(mp) \ | ||
88 | (xfs_sb_version_hasfinobt(&mp->m_sb) ? (mp)->m_in_maxlevels : 0) | ||
89 | |||
85 | 90 | ||
86 | #endif /* __XFS_TRANS_SPACE_H__ */ | 91 | #endif /* __XFS_TRANS_SPACE_H__ */ |
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h index 82bbc34d54a3..65c6e6650b1a 100644 --- a/fs/xfs/xfs_types.h +++ b/fs/xfs/xfs_types.h | |||
@@ -134,7 +134,7 @@ typedef enum { | |||
134 | 134 | ||
135 | typedef enum { | 135 | typedef enum { |
136 | XFS_BTNUM_BNOi, XFS_BTNUM_CNTi, XFS_BTNUM_BMAPi, XFS_BTNUM_INOi, | 136 | XFS_BTNUM_BNOi, XFS_BTNUM_CNTi, XFS_BTNUM_BMAPi, XFS_BTNUM_INOi, |
137 | XFS_BTNUM_MAX | 137 | XFS_BTNUM_FINOi, XFS_BTNUM_MAX |
138 | } xfs_btnum_t; | 138 | } xfs_btnum_t; |
139 | 139 | ||
140 | struct xfs_name { | 140 | struct xfs_name { |