summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Chinner <david@fromorbit.com>2014-05-14 19:37:44 -0400
committerDave Chinner <david@fromorbit.com>2014-05-14 19:37:44 -0400
commitb76769294ba400415fc44038c21cc2df86f9a28b (patch)
treea1ca6152391c86ae024e09d69e7b0ad75a5c61d5
parent232c2f5c65dd80055e7163a5c82e3816119330e6 (diff)
parent53801fd97ae000793f51187b122b9475102199a8 (diff)
Merge branch 'xfs-free-inode-btree' into for-next
-rw-r--r--fs/xfs/xfs_ag.h36
-rw-r--r--fs/xfs/xfs_btree.c6
-rw-r--r--fs/xfs/xfs_btree.h3
-rw-r--r--fs/xfs/xfs_format.h14
-rw-r--r--fs/xfs/xfs_fs.h1
-rw-r--r--fs/xfs/xfs_fsops.c36
-rw-r--r--fs/xfs/xfs_ialloc.c695
-rw-r--r--fs/xfs/xfs_ialloc_btree.c68
-rw-r--r--fs/xfs/xfs_ialloc_btree.h3
-rw-r--r--fs/xfs/xfs_inode.c28
-rw-r--r--fs/xfs/xfs_itable.c6
-rw-r--r--fs/xfs/xfs_log_recover.c2
-rw-r--r--fs/xfs/xfs_sb.h10
-rw-r--r--fs/xfs/xfs_stats.c1
-rw-r--r--fs/xfs/xfs_stats.h18
-rw-r--r--fs/xfs/xfs_trans_resv.c53
-rw-r--r--fs/xfs/xfs_trans_space.h7
-rw-r--r--fs/xfs/xfs_types.h2
18 files changed, 852 insertions, 137 deletions
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 0fdd4109c624..6e247a99f5db 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -160,30 +160,38 @@ typedef struct xfs_agi {
160 * still being referenced. 160 * still being referenced.
161 */ 161 */
162 __be32 agi_unlinked[XFS_AGI_UNLINKED_BUCKETS]; 162 __be32 agi_unlinked[XFS_AGI_UNLINKED_BUCKETS];
163 163 /*
164 * This marks the end of logging region 1 and start of logging region 2.
165 */
164 uuid_t agi_uuid; /* uuid of filesystem */ 166 uuid_t agi_uuid; /* uuid of filesystem */
165 __be32 agi_crc; /* crc of agi sector */ 167 __be32 agi_crc; /* crc of agi sector */
166 __be32 agi_pad32; 168 __be32 agi_pad32;
167 __be64 agi_lsn; /* last write sequence */ 169 __be64 agi_lsn; /* last write sequence */
168 170
171 __be32 agi_free_root; /* root of the free inode btree */
172 __be32 agi_free_level;/* levels in free inode btree */
173
169 /* structure must be padded to 64 bit alignment */ 174 /* structure must be padded to 64 bit alignment */
170} xfs_agi_t; 175} xfs_agi_t;
171 176
172#define XFS_AGI_CRC_OFF offsetof(struct xfs_agi, agi_crc) 177#define XFS_AGI_CRC_OFF offsetof(struct xfs_agi, agi_crc)
173 178
174#define XFS_AGI_MAGICNUM 0x00000001 179#define XFS_AGI_MAGICNUM (1 << 0)
175#define XFS_AGI_VERSIONNUM 0x00000002 180#define XFS_AGI_VERSIONNUM (1 << 1)
176#define XFS_AGI_SEQNO 0x00000004 181#define XFS_AGI_SEQNO (1 << 2)
177#define XFS_AGI_LENGTH 0x00000008 182#define XFS_AGI_LENGTH (1 << 3)
178#define XFS_AGI_COUNT 0x00000010 183#define XFS_AGI_COUNT (1 << 4)
179#define XFS_AGI_ROOT 0x00000020 184#define XFS_AGI_ROOT (1 << 5)
180#define XFS_AGI_LEVEL 0x00000040 185#define XFS_AGI_LEVEL (1 << 6)
181#define XFS_AGI_FREECOUNT 0x00000080 186#define XFS_AGI_FREECOUNT (1 << 7)
182#define XFS_AGI_NEWINO 0x00000100 187#define XFS_AGI_NEWINO (1 << 8)
183#define XFS_AGI_DIRINO 0x00000200 188#define XFS_AGI_DIRINO (1 << 9)
184#define XFS_AGI_UNLINKED 0x00000400 189#define XFS_AGI_UNLINKED (1 << 10)
185#define XFS_AGI_NUM_BITS 11 190#define XFS_AGI_NUM_BITS_R1 11 /* end of the 1st agi logging region */
186#define XFS_AGI_ALL_BITS ((1 << XFS_AGI_NUM_BITS) - 1) 191#define XFS_AGI_ALL_BITS_R1 ((1 << XFS_AGI_NUM_BITS_R1) - 1)
192#define XFS_AGI_FREE_ROOT (1 << 11)
193#define XFS_AGI_FREE_LEVEL (1 << 12)
194#define XFS_AGI_NUM_BITS_R2 13
187 195
188/* disk block (xfs_daddr_t) in the AG */ 196/* disk block (xfs_daddr_t) in the AG */
189#define XFS_AGI_DADDR(mp) ((xfs_daddr_t)(2 << (mp)->m_sectbb_log)) 197#define XFS_AGI_DADDR(mp) ((xfs_daddr_t)(2 << (mp)->m_sectbb_log))
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index c13d650fdb99..182bac2bb276 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -43,9 +43,10 @@ kmem_zone_t *xfs_btree_cur_zone;
43 * Btree magic numbers. 43 * Btree magic numbers.
44 */ 44 */
45static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = { 45static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = {
46 { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC }, 46 { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC,
47 XFS_FIBT_MAGIC },
47 { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, 48 { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC,
48 XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC } 49 XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC }
49}; 50};
50#define xfs_btree_magic(cur) \ 51#define xfs_btree_magic(cur) \
51 xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum] 52 xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum]
@@ -1115,6 +1116,7 @@ xfs_btree_set_refs(
1115 xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF); 1116 xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF);
1116 break; 1117 break;
1117 case XFS_BTNUM_INO: 1118 case XFS_BTNUM_INO:
1119 case XFS_BTNUM_FINO:
1118 xfs_buf_set_ref(bp, XFS_INO_BTREE_REF); 1120 xfs_buf_set_ref(bp, XFS_INO_BTREE_REF);
1119 break; 1121 break;
1120 case XFS_BTNUM_BMAP: 1122 case XFS_BTNUM_BMAP:
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h
index 875f6876ff48..a04b69422f67 100644
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/xfs_btree.h
@@ -62,6 +62,7 @@ union xfs_btree_rec {
62#define XFS_BTNUM_CNT ((xfs_btnum_t)XFS_BTNUM_CNTi) 62#define XFS_BTNUM_CNT ((xfs_btnum_t)XFS_BTNUM_CNTi)
63#define XFS_BTNUM_BMAP ((xfs_btnum_t)XFS_BTNUM_BMAPi) 63#define XFS_BTNUM_BMAP ((xfs_btnum_t)XFS_BTNUM_BMAPi)
64#define XFS_BTNUM_INO ((xfs_btnum_t)XFS_BTNUM_INOi) 64#define XFS_BTNUM_INO ((xfs_btnum_t)XFS_BTNUM_INOi)
65#define XFS_BTNUM_FINO ((xfs_btnum_t)XFS_BTNUM_FINOi)
65 66
66/* 67/*
67 * For logging record fields. 68 * For logging record fields.
@@ -92,6 +93,7 @@ do { \
92 case XFS_BTNUM_CNT: __XFS_BTREE_STATS_INC(abtc, stat); break; \ 93 case XFS_BTNUM_CNT: __XFS_BTREE_STATS_INC(abtc, stat); break; \
93 case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(bmbt, stat); break; \ 94 case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(bmbt, stat); break; \
94 case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(ibt, stat); break; \ 95 case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(ibt, stat); break; \
96 case XFS_BTNUM_FINO: __XFS_BTREE_STATS_INC(fibt, stat); break; \
95 case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ 97 case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \
96 } \ 98 } \
97} while (0) 99} while (0)
@@ -105,6 +107,7 @@ do { \
105 case XFS_BTNUM_CNT: __XFS_BTREE_STATS_ADD(abtc, stat, val); break; \ 107 case XFS_BTNUM_CNT: __XFS_BTREE_STATS_ADD(abtc, stat, val); break; \
106 case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_ADD(bmbt, stat, val); break; \ 108 case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_ADD(bmbt, stat, val); break; \
107 case XFS_BTNUM_INO: __XFS_BTREE_STATS_ADD(ibt, stat, val); break; \ 109 case XFS_BTNUM_INO: __XFS_BTREE_STATS_ADD(ibt, stat, val); break; \
110 case XFS_BTNUM_FINO: __XFS_BTREE_STATS_ADD(fibt, stat, val); break; \
108 case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ 111 case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \
109 } \ 112 } \
110} while (0) 113} while (0)
diff --git a/fs/xfs/xfs_format.h b/fs/xfs/xfs_format.h
index 9898f31d05d8..34d85aca3058 100644
--- a/fs/xfs/xfs_format.h
+++ b/fs/xfs/xfs_format.h
@@ -202,6 +202,8 @@ typedef __be32 xfs_alloc_ptr_t;
202 */ 202 */
203#define XFS_IBT_MAGIC 0x49414254 /* 'IABT' */ 203#define XFS_IBT_MAGIC 0x49414254 /* 'IABT' */
204#define XFS_IBT_CRC_MAGIC 0x49414233 /* 'IAB3' */ 204#define XFS_IBT_CRC_MAGIC 0x49414233 /* 'IAB3' */
205#define XFS_FIBT_MAGIC 0x46494254 /* 'FIBT' */
206#define XFS_FIBT_CRC_MAGIC 0x46494233 /* 'FIB3' */
205 207
206typedef __uint64_t xfs_inofree_t; 208typedef __uint64_t xfs_inofree_t;
207#define XFS_INODES_PER_CHUNK (NBBY * sizeof(xfs_inofree_t)) 209#define XFS_INODES_PER_CHUNK (NBBY * sizeof(xfs_inofree_t))
@@ -244,7 +246,17 @@ typedef __be32 xfs_inobt_ptr_t;
244 * block numbers in the AG. 246 * block numbers in the AG.
245 */ 247 */
246#define XFS_IBT_BLOCK(mp) ((xfs_agblock_t)(XFS_CNT_BLOCK(mp) + 1)) 248#define XFS_IBT_BLOCK(mp) ((xfs_agblock_t)(XFS_CNT_BLOCK(mp) + 1))
247#define XFS_PREALLOC_BLOCKS(mp) ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1)) 249#define XFS_FIBT_BLOCK(mp) ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1))
250
251/*
252 * The first data block of an AG depends on whether the filesystem was formatted
253 * with the finobt feature. If so, account for the finobt reserved root btree
254 * block.
255 */
256#define XFS_PREALLOC_BLOCKS(mp) \
257 (xfs_sb_version_hasfinobt(&((mp)->m_sb)) ? \
258 XFS_FIBT_BLOCK(mp) + 1 : \
259 XFS_IBT_BLOCK(mp) + 1)
248 260
249 261
250 262
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index c5fc116dfaa3..d34703dbcb42 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -238,6 +238,7 @@ typedef struct xfs_fsop_resblks {
238#define XFS_FSOP_GEOM_FLAGS_LAZYSB 0x4000 /* lazy superblock counters */ 238#define XFS_FSOP_GEOM_FLAGS_LAZYSB 0x4000 /* lazy superblock counters */
239#define XFS_FSOP_GEOM_FLAGS_V5SB 0x8000 /* version 5 superblock */ 239#define XFS_FSOP_GEOM_FLAGS_V5SB 0x8000 /* version 5 superblock */
240#define XFS_FSOP_GEOM_FLAGS_FTYPE 0x10000 /* inode directory types */ 240#define XFS_FSOP_GEOM_FLAGS_FTYPE 0x10000 /* inode directory types */
241#define XFS_FSOP_GEOM_FLAGS_FINOBT 0x20000 /* free inode btree */
241 242
242/* 243/*
243 * Minimum and maximum sizes need for growth checks. 244 * Minimum and maximum sizes need for growth checks.
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 02fb943cbf22..3445ead7c1fc 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -104,7 +104,9 @@ xfs_fs_geometry(
104 (xfs_sb_version_hascrc(&mp->m_sb) ? 104 (xfs_sb_version_hascrc(&mp->m_sb) ?
105 XFS_FSOP_GEOM_FLAGS_V5SB : 0) | 105 XFS_FSOP_GEOM_FLAGS_V5SB : 0) |
106 (xfs_sb_version_hasftype(&mp->m_sb) ? 106 (xfs_sb_version_hasftype(&mp->m_sb) ?
107 XFS_FSOP_GEOM_FLAGS_FTYPE : 0); 107 XFS_FSOP_GEOM_FLAGS_FTYPE : 0) |
108 (xfs_sb_version_hasfinobt(&mp->m_sb) ?
109 XFS_FSOP_GEOM_FLAGS_FINOBT : 0);
108 geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ? 110 geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ?
109 mp->m_sb.sb_logsectsize : BBSIZE; 111 mp->m_sb.sb_logsectsize : BBSIZE;
110 geo->rtsectsize = mp->m_sb.sb_blocksize; 112 geo->rtsectsize = mp->m_sb.sb_blocksize;
@@ -316,6 +318,10 @@ xfs_growfs_data_private(
316 agi->agi_dirino = cpu_to_be32(NULLAGINO); 318 agi->agi_dirino = cpu_to_be32(NULLAGINO);
317 if (xfs_sb_version_hascrc(&mp->m_sb)) 319 if (xfs_sb_version_hascrc(&mp->m_sb))
318 uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_uuid); 320 uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_uuid);
321 if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
322 agi->agi_free_root = cpu_to_be32(XFS_FIBT_BLOCK(mp));
323 agi->agi_free_level = cpu_to_be32(1);
324 }
319 for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) 325 for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++)
320 agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); 326 agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO);
321 327
@@ -407,6 +413,34 @@ xfs_growfs_data_private(
407 xfs_buf_relse(bp); 413 xfs_buf_relse(bp);
408 if (error) 414 if (error)
409 goto error0; 415 goto error0;
416
417 /*
418 * FINO btree root block
419 */
420 if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
421 bp = xfs_growfs_get_hdr_buf(mp,
422 XFS_AGB_TO_DADDR(mp, agno, XFS_FIBT_BLOCK(mp)),
423 BTOBB(mp->m_sb.sb_blocksize), 0,
424 &xfs_inobt_buf_ops);
425 if (!bp) {
426 error = ENOMEM;
427 goto error0;
428 }
429
430 if (xfs_sb_version_hascrc(&mp->m_sb))
431 xfs_btree_init_block(mp, bp, XFS_FIBT_CRC_MAGIC,
432 0, 0, agno,
433 XFS_BTREE_CRC_BLOCKS);
434 else
435 xfs_btree_init_block(mp, bp, XFS_FIBT_MAGIC, 0,
436 0, agno, 0);
437
438 error = xfs_bwrite(bp);
439 xfs_buf_relse(bp);
440 if (error)
441 goto error0;
442 }
443
410 } 444 }
411 xfs_trans_agblocks_delta(tp, nfree); 445 xfs_trans_agblocks_delta(tp, nfree);
412 /* 446 /*
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 8f711db61a0c..6ac0c2986c32 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -112,6 +112,66 @@ xfs_inobt_get_rec(
112} 112}
113 113
114/* 114/*
115 * Insert a single inobt record. Cursor must already point to desired location.
116 */
117STATIC int
118xfs_inobt_insert_rec(
119 struct xfs_btree_cur *cur,
120 __int32_t freecount,
121 xfs_inofree_t free,
122 int *stat)
123{
124 cur->bc_rec.i.ir_freecount = freecount;
125 cur->bc_rec.i.ir_free = free;
126 return xfs_btree_insert(cur, stat);
127}
128
129/*
130 * Insert records describing a newly allocated inode chunk into the inobt.
131 */
132STATIC int
133xfs_inobt_insert(
134 struct xfs_mount *mp,
135 struct xfs_trans *tp,
136 struct xfs_buf *agbp,
137 xfs_agino_t newino,
138 xfs_agino_t newlen,
139 xfs_btnum_t btnum)
140{
141 struct xfs_btree_cur *cur;
142 struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
143 xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno);
144 xfs_agino_t thisino;
145 int i;
146 int error;
147
148 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, btnum);
149
150 for (thisino = newino;
151 thisino < newino + newlen;
152 thisino += XFS_INODES_PER_CHUNK) {
153 error = xfs_inobt_lookup(cur, thisino, XFS_LOOKUP_EQ, &i);
154 if (error) {
155 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
156 return error;
157 }
158 ASSERT(i == 0);
159
160 error = xfs_inobt_insert_rec(cur, XFS_INODES_PER_CHUNK,
161 XFS_INOBT_ALL_FREE, &i);
162 if (error) {
163 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
164 return error;
165 }
166 ASSERT(i == 1);
167 }
168
169 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
170
171 return 0;
172}
173
174/*
115 * Verify that the number of free inodes in the AGI is correct. 175 * Verify that the number of free inodes in the AGI is correct.
116 */ 176 */
117#ifdef DEBUG 177#ifdef DEBUG
@@ -303,13 +363,10 @@ xfs_ialloc_ag_alloc(
303{ 363{
304 xfs_agi_t *agi; /* allocation group header */ 364 xfs_agi_t *agi; /* allocation group header */
305 xfs_alloc_arg_t args; /* allocation argument structure */ 365 xfs_alloc_arg_t args; /* allocation argument structure */
306 xfs_btree_cur_t *cur; /* inode btree cursor */
307 xfs_agnumber_t agno; 366 xfs_agnumber_t agno;
308 int error; 367 int error;
309 int i;
310 xfs_agino_t newino; /* new first inode's number */ 368 xfs_agino_t newino; /* new first inode's number */
311 xfs_agino_t newlen; /* new number of inodes */ 369 xfs_agino_t newlen; /* new number of inodes */
312 xfs_agino_t thisino; /* current inode number, for loop */
313 int isaligned = 0; /* inode allocation at stripe unit */ 370 int isaligned = 0; /* inode allocation at stripe unit */
314 /* boundary */ 371 /* boundary */
315 struct xfs_perag *pag; 372 struct xfs_perag *pag;
@@ -459,29 +516,19 @@ xfs_ialloc_ag_alloc(
459 agi->agi_newino = cpu_to_be32(newino); 516 agi->agi_newino = cpu_to_be32(newino);
460 517
461 /* 518 /*
462 * Insert records describing the new inode chunk into the btree. 519 * Insert records describing the new inode chunk into the btrees.
463 */ 520 */
464 cur = xfs_inobt_init_cursor(args.mp, tp, agbp, agno); 521 error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen,
465 for (thisino = newino; 522 XFS_BTNUM_INO);
466 thisino < newino + newlen; 523 if (error)
467 thisino += XFS_INODES_PER_CHUNK) { 524 return error;
468 cur->bc_rec.i.ir_startino = thisino; 525
469 cur->bc_rec.i.ir_freecount = XFS_INODES_PER_CHUNK; 526 if (xfs_sb_version_hasfinobt(&args.mp->m_sb)) {
470 cur->bc_rec.i.ir_free = XFS_INOBT_ALL_FREE; 527 error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen,
471 error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, &i); 528 XFS_BTNUM_FINO);
472 if (error) { 529 if (error)
473 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
474 return error;
475 }
476 ASSERT(i == 0);
477 error = xfs_btree_insert(cur, &i);
478 if (error) {
479 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
480 return error; 530 return error;
481 }
482 ASSERT(i == 1);
483 } 531 }
484 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
485 /* 532 /*
486 * Log allocation group header fields 533 * Log allocation group header fields
487 */ 534 */
@@ -675,13 +722,10 @@ xfs_ialloc_get_rec(
675} 722}
676 723
677/* 724/*
678 * Allocate an inode. 725 * Allocate an inode using the inobt-only algorithm.
679 *
680 * The caller selected an AG for us, and made sure that free inodes are
681 * available.
682 */ 726 */
683STATIC int 727STATIC int
684xfs_dialloc_ag( 728xfs_dialloc_ag_inobt(
685 struct xfs_trans *tp, 729 struct xfs_trans *tp,
686 struct xfs_buf *agbp, 730 struct xfs_buf *agbp,
687 xfs_ino_t parent, 731 xfs_ino_t parent,
@@ -707,7 +751,7 @@ xfs_dialloc_ag(
707 ASSERT(pag->pagi_freecount > 0); 751 ASSERT(pag->pagi_freecount > 0);
708 752
709 restart_pagno: 753 restart_pagno:
710 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); 754 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO);
711 /* 755 /*
712 * If pagino is 0 (this is the root inode allocation) use newino. 756 * If pagino is 0 (this is the root inode allocation) use newino.
713 * This must work because we've just allocated some. 757 * This must work because we've just allocated some.
@@ -940,6 +984,294 @@ error0:
940} 984}
941 985
942/* 986/*
987 * Use the free inode btree to allocate an inode based on distance from the
988 * parent. Note that the provided cursor may be deleted and replaced.
989 */
990STATIC int
991xfs_dialloc_ag_finobt_near(
992 xfs_agino_t pagino,
993 struct xfs_btree_cur **ocur,
994 struct xfs_inobt_rec_incore *rec)
995{
996 struct xfs_btree_cur *lcur = *ocur; /* left search cursor */
997 struct xfs_btree_cur *rcur; /* right search cursor */
998 struct xfs_inobt_rec_incore rrec;
999 int error;
1000 int i, j;
1001
1002 error = xfs_inobt_lookup(lcur, pagino, XFS_LOOKUP_LE, &i);
1003 if (error)
1004 return error;
1005
1006 if (i == 1) {
1007 error = xfs_inobt_get_rec(lcur, rec, &i);
1008 if (error)
1009 return error;
1010 XFS_WANT_CORRUPTED_RETURN(i == 1);
1011
1012 /*
1013 * See if we've landed in the parent inode record. The finobt
1014 * only tracks chunks with at least one free inode, so record
1015 * existence is enough.
1016 */
1017 if (pagino >= rec->ir_startino &&
1018 pagino < (rec->ir_startino + XFS_INODES_PER_CHUNK))
1019 return 0;
1020 }
1021
1022 error = xfs_btree_dup_cursor(lcur, &rcur);
1023 if (error)
1024 return error;
1025
1026 error = xfs_inobt_lookup(rcur, pagino, XFS_LOOKUP_GE, &j);
1027 if (error)
1028 goto error_rcur;
1029 if (j == 1) {
1030 error = xfs_inobt_get_rec(rcur, &rrec, &j);
1031 if (error)
1032 goto error_rcur;
1033 XFS_WANT_CORRUPTED_GOTO(j == 1, error_rcur);
1034 }
1035
1036 XFS_WANT_CORRUPTED_GOTO(i == 1 || j == 1, error_rcur);
1037 if (i == 1 && j == 1) {
1038 /*
1039 * Both the left and right records are valid. Choose the closer
1040 * inode chunk to the target.
1041 */
1042 if ((pagino - rec->ir_startino + XFS_INODES_PER_CHUNK - 1) >
1043 (rrec.ir_startino - pagino)) {
1044 *rec = rrec;
1045 xfs_btree_del_cursor(lcur, XFS_BTREE_NOERROR);
1046 *ocur = rcur;
1047 } else {
1048 xfs_btree_del_cursor(rcur, XFS_BTREE_NOERROR);
1049 }
1050 } else if (j == 1) {
1051 /* only the right record is valid */
1052 *rec = rrec;
1053 xfs_btree_del_cursor(lcur, XFS_BTREE_NOERROR);
1054 *ocur = rcur;
1055 } else if (i == 1) {
1056 /* only the left record is valid */
1057 xfs_btree_del_cursor(rcur, XFS_BTREE_NOERROR);
1058 }
1059
1060 return 0;
1061
1062error_rcur:
1063 xfs_btree_del_cursor(rcur, XFS_BTREE_ERROR);
1064 return error;
1065}
1066
1067/*
1068 * Use the free inode btree to find a free inode based on a newino hint. If
1069 * the hint is NULL, find the first free inode in the AG.
1070 */
1071STATIC int
1072xfs_dialloc_ag_finobt_newino(
1073 struct xfs_agi *agi,
1074 struct xfs_btree_cur *cur,
1075 struct xfs_inobt_rec_incore *rec)
1076{
1077 int error;
1078 int i;
1079
1080 if (agi->agi_newino != cpu_to_be32(NULLAGINO)) {
1081 error = xfs_inobt_lookup(cur, agi->agi_newino, XFS_LOOKUP_EQ,
1082 &i);
1083 if (error)
1084 return error;
1085 if (i == 1) {
1086 error = xfs_inobt_get_rec(cur, rec, &i);
1087 if (error)
1088 return error;
1089 XFS_WANT_CORRUPTED_RETURN(i == 1);
1090
1091 return 0;
1092 }
1093 }
1094
1095 /*
1096 * Find the first inode available in the AG.
1097 */
1098 error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i);
1099 if (error)
1100 return error;
1101 XFS_WANT_CORRUPTED_RETURN(i == 1);
1102
1103 error = xfs_inobt_get_rec(cur, rec, &i);
1104 if (error)
1105 return error;
1106 XFS_WANT_CORRUPTED_RETURN(i == 1);
1107
1108 return 0;
1109}
1110
1111/*
1112 * Update the inobt based on a modification made to the finobt. Also ensure that
1113 * the records from both trees are equivalent post-modification.
1114 */
1115STATIC int
1116xfs_dialloc_ag_update_inobt(
1117 struct xfs_btree_cur *cur, /* inobt cursor */
1118 struct xfs_inobt_rec_incore *frec, /* finobt record */
1119 int offset) /* inode offset */
1120{
1121 struct xfs_inobt_rec_incore rec;
1122 int error;
1123 int i;
1124
1125 error = xfs_inobt_lookup(cur, frec->ir_startino, XFS_LOOKUP_EQ, &i);
1126 if (error)
1127 return error;
1128 XFS_WANT_CORRUPTED_RETURN(i == 1);
1129
1130 error = xfs_inobt_get_rec(cur, &rec, &i);
1131 if (error)
1132 return error;
1133 XFS_WANT_CORRUPTED_RETURN(i == 1);
1134 ASSERT((XFS_AGINO_TO_OFFSET(cur->bc_mp, rec.ir_startino) %
1135 XFS_INODES_PER_CHUNK) == 0);
1136
1137 rec.ir_free &= ~XFS_INOBT_MASK(offset);
1138 rec.ir_freecount--;
1139
1140 XFS_WANT_CORRUPTED_RETURN((rec.ir_free == frec->ir_free) &&
1141 (rec.ir_freecount == frec->ir_freecount));
1142
1143 error = xfs_inobt_update(cur, &rec);
1144 if (error)
1145 return error;
1146
1147 return 0;
1148}
1149
1150/*
1151 * Allocate an inode using the free inode btree, if available. Otherwise, fall
1152 * back to the inobt search algorithm.
1153 *
1154 * The caller selected an AG for us, and made sure that free inodes are
1155 * available.
1156 */
1157STATIC int
1158xfs_dialloc_ag(
1159 struct xfs_trans *tp,
1160 struct xfs_buf *agbp,
1161 xfs_ino_t parent,
1162 xfs_ino_t *inop)
1163{
1164 struct xfs_mount *mp = tp->t_mountp;
1165 struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
1166 xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno);
1167 xfs_agnumber_t pagno = XFS_INO_TO_AGNO(mp, parent);
1168 xfs_agino_t pagino = XFS_INO_TO_AGINO(mp, parent);
1169 struct xfs_perag *pag;
1170 struct xfs_btree_cur *cur; /* finobt cursor */
1171 struct xfs_btree_cur *icur; /* inobt cursor */
1172 struct xfs_inobt_rec_incore rec;
1173 xfs_ino_t ino;
1174 int error;
1175 int offset;
1176 int i;
1177
1178 if (!xfs_sb_version_hasfinobt(&mp->m_sb))
1179 return xfs_dialloc_ag_inobt(tp, agbp, parent, inop);
1180
1181 pag = xfs_perag_get(mp, agno);
1182
1183 /*
1184 * If pagino is 0 (this is the root inode allocation) use newino.
1185 * This must work because we've just allocated some.
1186 */
1187 if (!pagino)
1188 pagino = be32_to_cpu(agi->agi_newino);
1189
1190 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_FINO);
1191
1192 error = xfs_check_agi_freecount(cur, agi);
1193 if (error)
1194 goto error_cur;
1195
1196 /*
1197 * The search algorithm depends on whether we're in the same AG as the
1198 * parent. If so, find the closest available inode to the parent. If
1199 * not, consider the agi hint or find the first free inode in the AG.
1200 */
1201 if (agno == pagno)
1202 error = xfs_dialloc_ag_finobt_near(pagino, &cur, &rec);
1203 else
1204 error = xfs_dialloc_ag_finobt_newino(agi, cur, &rec);
1205 if (error)
1206 goto error_cur;
1207
1208 offset = xfs_lowbit64(rec.ir_free);
1209 ASSERT(offset >= 0);
1210 ASSERT(offset < XFS_INODES_PER_CHUNK);
1211 ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
1212 XFS_INODES_PER_CHUNK) == 0);
1213 ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset);
1214
1215 /*
1216 * Modify or remove the finobt record.
1217 */
1218 rec.ir_free &= ~XFS_INOBT_MASK(offset);
1219 rec.ir_freecount--;
1220 if (rec.ir_freecount)
1221 error = xfs_inobt_update(cur, &rec);
1222 else
1223 error = xfs_btree_delete(cur, &i);
1224 if (error)
1225 goto error_cur;
1226
1227 /*
1228 * The finobt has now been updated appropriately. We haven't updated the
1229 * agi and superblock yet, so we can create an inobt cursor and validate
1230 * the original freecount. If all is well, make the equivalent update to
1231 * the inobt using the finobt record and offset information.
1232 */
1233 icur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO);
1234
1235 error = xfs_check_agi_freecount(icur, agi);
1236 if (error)
1237 goto error_icur;
1238
1239 error = xfs_dialloc_ag_update_inobt(icur, &rec, offset);
1240 if (error)
1241 goto error_icur;
1242
1243 /*
1244 * Both trees have now been updated. We must update the perag and
1245 * superblock before we can check the freecount for each btree.
1246 */
1247 be32_add_cpu(&agi->agi_freecount, -1);
1248 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
1249 pag->pagi_freecount--;
1250
1251 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1);
1252
1253 error = xfs_check_agi_freecount(icur, agi);
1254 if (error)
1255 goto error_icur;
1256 error = xfs_check_agi_freecount(cur, agi);
1257 if (error)
1258 goto error_icur;
1259
1260 xfs_btree_del_cursor(icur, XFS_BTREE_NOERROR);
1261 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
1262 xfs_perag_put(pag);
1263 *inop = ino;
1264 return 0;
1265
1266error_icur:
1267 xfs_btree_del_cursor(icur, XFS_BTREE_ERROR);
1268error_cur:
1269 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
1270 xfs_perag_put(pag);
1271 return error;
1272}
1273
1274/*
943 * Allocate an inode on disk. 1275 * Allocate an inode on disk.
944 * 1276 *
945 * Mode is used to tell whether the new inode will need space, and whether it 1277 * Mode is used to tell whether the new inode will need space, and whether it
@@ -1098,78 +1430,34 @@ out_error:
1098 return XFS_ERROR(error); 1430 return XFS_ERROR(error);
1099} 1431}
1100 1432
1101/* 1433STATIC int
1102 * Free disk inode. Carefully avoids touching the incore inode, all 1434xfs_difree_inobt(
1103 * manipulations incore are the caller's responsibility. 1435 struct xfs_mount *mp,
1104 * The on-disk inode is not changed by this operation, only the 1436 struct xfs_trans *tp,
1105 * btree (free inode mask) is changed. 1437 struct xfs_buf *agbp,
1106 */ 1438 xfs_agino_t agino,
1107int 1439 struct xfs_bmap_free *flist,
1108xfs_difree( 1440 int *delete,
1109 xfs_trans_t *tp, /* transaction pointer */ 1441 xfs_ino_t *first_ino,
1110 xfs_ino_t inode, /* inode to be freed */ 1442 struct xfs_inobt_rec_incore *orec)
1111 xfs_bmap_free_t *flist, /* extents to free */
1112 int *delete, /* set if inode cluster was deleted */
1113 xfs_ino_t *first_ino) /* first inode in deleted cluster */
1114{ 1443{
1115 /* REFERENCED */ 1444 struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
1116 xfs_agblock_t agbno; /* block number containing inode */ 1445 xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno);
1117 xfs_buf_t *agbp; /* buffer containing allocation group header */ 1446 struct xfs_perag *pag;
1118 xfs_agino_t agino; /* inode number relative to allocation group */ 1447 struct xfs_btree_cur *cur;
1119 xfs_agnumber_t agno; /* allocation group number */ 1448 struct xfs_inobt_rec_incore rec;
1120 xfs_agi_t *agi; /* allocation group header */ 1449 int ilen;
1121 xfs_btree_cur_t *cur; /* inode btree cursor */ 1450 int error;
1122 int error; /* error return value */ 1451 int i;
1123 int i; /* result code */ 1452 int off;
1124 int ilen; /* inodes in an inode cluster */
1125 xfs_mount_t *mp; /* mount structure for filesystem */
1126 int off; /* offset of inode in inode chunk */
1127 xfs_inobt_rec_incore_t rec; /* btree record */
1128 struct xfs_perag *pag;
1129
1130 mp = tp->t_mountp;
1131 1453
1132 /*
1133 * Break up inode number into its components.
1134 */
1135 agno = XFS_INO_TO_AGNO(mp, inode);
1136 if (agno >= mp->m_sb.sb_agcount) {
1137 xfs_warn(mp, "%s: agno >= mp->m_sb.sb_agcount (%d >= %d).",
1138 __func__, agno, mp->m_sb.sb_agcount);
1139 ASSERT(0);
1140 return XFS_ERROR(EINVAL);
1141 }
1142 agino = XFS_INO_TO_AGINO(mp, inode);
1143 if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) {
1144 xfs_warn(mp, "%s: inode != XFS_AGINO_TO_INO() (%llu != %llu).",
1145 __func__, (unsigned long long)inode,
1146 (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino));
1147 ASSERT(0);
1148 return XFS_ERROR(EINVAL);
1149 }
1150 agbno = XFS_AGINO_TO_AGBNO(mp, agino);
1151 if (agbno >= mp->m_sb.sb_agblocks) {
1152 xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).",
1153 __func__, agbno, mp->m_sb.sb_agblocks);
1154 ASSERT(0);
1155 return XFS_ERROR(EINVAL);
1156 }
1157 /*
1158 * Get the allocation group header.
1159 */
1160 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
1161 if (error) {
1162 xfs_warn(mp, "%s: xfs_ialloc_read_agi() returned error %d.",
1163 __func__, error);
1164 return error;
1165 }
1166 agi = XFS_BUF_TO_AGI(agbp);
1167 ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); 1454 ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
1168 ASSERT(agbno < be32_to_cpu(agi->agi_length)); 1455 ASSERT(XFS_AGINO_TO_AGBNO(mp, agino) < be32_to_cpu(agi->agi_length));
1456
1169 /* 1457 /*
1170 * Initialize the cursor. 1458 * Initialize the cursor.
1171 */ 1459 */
1172 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); 1460 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO);
1173 1461
1174 error = xfs_check_agi_freecount(cur, agi); 1462 error = xfs_check_agi_freecount(cur, agi);
1175 if (error) 1463 if (error)
@@ -1261,6 +1549,7 @@ xfs_difree(
1261 if (error) 1549 if (error)
1262 goto error0; 1550 goto error0;
1263 1551
1552 *orec = rec;
1264 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 1553 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
1265 return 0; 1554 return 0;
1266 1555
@@ -1269,6 +1558,182 @@ error0:
1269 return error; 1558 return error;
1270} 1559}
1271 1560
1561/*
1562 * Free an inode in the free inode btree.
1563 */
1564STATIC int
1565xfs_difree_finobt(
1566 struct xfs_mount *mp,
1567 struct xfs_trans *tp,
1568 struct xfs_buf *agbp,
1569 xfs_agino_t agino,
1570 struct xfs_inobt_rec_incore *ibtrec) /* inobt record */
1571{
1572 struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
1573 xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno);
1574 struct xfs_btree_cur *cur;
1575 struct xfs_inobt_rec_incore rec;
1576 int offset = agino - ibtrec->ir_startino;
1577 int error;
1578 int i;
1579
1580 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_FINO);
1581
1582 error = xfs_inobt_lookup(cur, ibtrec->ir_startino, XFS_LOOKUP_EQ, &i);
1583 if (error)
1584 goto error;
1585 if (i == 0) {
1586 /*
1587 * If the record does not exist in the finobt, we must have just
1588 * freed an inode in a previously fully allocated chunk. If not,
1589 * something is out of sync.
1590 */
1591 XFS_WANT_CORRUPTED_GOTO(ibtrec->ir_freecount == 1, error);
1592
1593 error = xfs_inobt_insert_rec(cur, ibtrec->ir_freecount,
1594 ibtrec->ir_free, &i);
1595 if (error)
1596 goto error;
1597 ASSERT(i == 1);
1598
1599 goto out;
1600 }
1601
1602 /*
1603 * Read and update the existing record. We could just copy the ibtrec
1604 * across here, but that would defeat the purpose of having redundant
1605 * metadata. By making the modifications independently, we can catch
1606 * corruptions that we wouldn't see if we just copied from one record
1607 * to another.
1608 */
1609 error = xfs_inobt_get_rec(cur, &rec, &i);
1610 if (error)
1611 goto error;
1612 XFS_WANT_CORRUPTED_GOTO(i == 1, error);
1613
1614 rec.ir_free |= XFS_INOBT_MASK(offset);
1615 rec.ir_freecount++;
1616
1617 XFS_WANT_CORRUPTED_GOTO((rec.ir_free == ibtrec->ir_free) &&
1618 (rec.ir_freecount == ibtrec->ir_freecount),
1619 error);
1620
1621 /*
1622 * The content of inobt records should always match between the inobt
1623 * and finobt. The lifecycle of records in the finobt is different from
1624 * the inobt in that the finobt only tracks records with at least one
1625 * free inode. Hence, if all of the inodes are free and we aren't
1626 * keeping inode chunks permanently on disk, remove the record.
1627 * Otherwise, update the record with the new information.
1628 */
1629 if (rec.ir_freecount == mp->m_ialloc_inos &&
1630 !(mp->m_flags & XFS_MOUNT_IKEEP)) {
1631 error = xfs_btree_delete(cur, &i);
1632 if (error)
1633 goto error;
1634 ASSERT(i == 1);
1635 } else {
1636 error = xfs_inobt_update(cur, &rec);
1637 if (error)
1638 goto error;
1639 }
1640
1641out:
1642 error = xfs_check_agi_freecount(cur, agi);
1643 if (error)
1644 goto error;
1645
1646 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
1647 return 0;
1648
1649error:
1650 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
1651 return error;
1652}
1653
1654/*
1655 * Free disk inode. Carefully avoids touching the incore inode, all
1656 * manipulations incore are the caller's responsibility.
1657 * The on-disk inode is not changed by this operation, only the
1658 * btree (free inode mask) is changed.
1659 */
1660int
1661xfs_difree(
1662 struct xfs_trans *tp, /* transaction pointer */
1663 xfs_ino_t inode, /* inode to be freed */
1664 struct xfs_bmap_free *flist, /* extents to free */
1665 int *delete,/* set if inode cluster was deleted */
1666 xfs_ino_t *first_ino)/* first inode in deleted cluster */
1667{
1668 /* REFERENCED */
1669 xfs_agblock_t agbno; /* block number containing inode */
1670 struct xfs_buf *agbp; /* buffer for allocation group header */
1671 xfs_agino_t agino; /* allocation group inode number */
1672 xfs_agnumber_t agno; /* allocation group number */
1673 int error; /* error return value */
1674 struct xfs_mount *mp; /* mount structure for filesystem */
1675 struct xfs_inobt_rec_incore rec;/* btree record */
1676
1677 mp = tp->t_mountp;
1678
1679 /*
1680 * Break up inode number into its components.
1681 */
1682 agno = XFS_INO_TO_AGNO(mp, inode);
1683 if (agno >= mp->m_sb.sb_agcount) {
1684 xfs_warn(mp, "%s: agno >= mp->m_sb.sb_agcount (%d >= %d).",
1685 __func__, agno, mp->m_sb.sb_agcount);
1686 ASSERT(0);
1687 return XFS_ERROR(EINVAL);
1688 }
1689 agino = XFS_INO_TO_AGINO(mp, inode);
1690 if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) {
1691 xfs_warn(mp, "%s: inode != XFS_AGINO_TO_INO() (%llu != %llu).",
1692 __func__, (unsigned long long)inode,
1693 (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino));
1694 ASSERT(0);
1695 return XFS_ERROR(EINVAL);
1696 }
1697 agbno = XFS_AGINO_TO_AGBNO(mp, agino);
1698 if (agbno >= mp->m_sb.sb_agblocks) {
1699 xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).",
1700 __func__, agbno, mp->m_sb.sb_agblocks);
1701 ASSERT(0);
1702 return XFS_ERROR(EINVAL);
1703 }
1704 /*
1705 * Get the allocation group header.
1706 */
1707 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
1708 if (error) {
1709 xfs_warn(mp, "%s: xfs_ialloc_read_agi() returned error %d.",
1710 __func__, error);
1711 return error;
1712 }
1713
1714 /*
1715 * Fix up the inode allocation btree.
1716 */
1717 error = xfs_difree_inobt(mp, tp, agbp, agino, flist, delete, first_ino,
1718 &rec);
1719 if (error)
1720 goto error0;
1721
1722 /*
1723 * Fix up the free inode btree.
1724 */
1725 if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
1726 error = xfs_difree_finobt(mp, tp, agbp, agino, &rec);
1727 if (error)
1728 goto error0;
1729 }
1730
1731 return 0;
1732
1733error0:
1734 return error;
1735}
1736
1272STATIC int 1737STATIC int
1273xfs_imap_lookup( 1738xfs_imap_lookup(
1274 struct xfs_mount *mp, 1739 struct xfs_mount *mp,
@@ -1300,7 +1765,7 @@ xfs_imap_lookup(
1300 * we have a record, we need to ensure it contains the inode number 1765 * we have a record, we need to ensure it contains the inode number
1301 * we are looking up. 1766 * we are looking up.
1302 */ 1767 */
1303 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); 1768 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO);
1304 error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i); 1769 error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i);
1305 if (!error) { 1770 if (!error) {
1306 if (i) 1771 if (i)
@@ -1488,7 +1953,16 @@ xfs_ialloc_compute_maxlevels(
1488} 1953}
1489 1954
1490/* 1955/*
1491 * Log specified fields for the ag hdr (inode section) 1956 * Log specified fields for the ag hdr (inode section). The growth of the agi
1957 * structure over time requires that we interpret the buffer as two logical
1958 * regions delineated by the end of the unlinked list. This is due to the size
1959 * of the hash table and its location in the middle of the agi.
1960 *
1961 * For example, a request to log a field before agi_unlinked and a field after
1962 * agi_unlinked could cause us to log the entire hash table and use an excessive
1963 * amount of log space. To avoid this behavior, log the region up through
1964 * agi_unlinked in one call and the region after agi_unlinked through the end of
1965 * the structure in another.
1492 */ 1966 */
1493void 1967void
1494xfs_ialloc_log_agi( 1968xfs_ialloc_log_agi(
@@ -1511,6 +1985,8 @@ xfs_ialloc_log_agi(
1511 offsetof(xfs_agi_t, agi_newino), 1985 offsetof(xfs_agi_t, agi_newino),
1512 offsetof(xfs_agi_t, agi_dirino), 1986 offsetof(xfs_agi_t, agi_dirino),
1513 offsetof(xfs_agi_t, agi_unlinked), 1987 offsetof(xfs_agi_t, agi_unlinked),
1988 offsetof(xfs_agi_t, agi_free_root),
1989 offsetof(xfs_agi_t, agi_free_level),
1514 sizeof(xfs_agi_t) 1990 sizeof(xfs_agi_t)
1515 }; 1991 };
1516#ifdef DEBUG 1992#ifdef DEBUG
@@ -1519,15 +1995,30 @@ xfs_ialloc_log_agi(
1519 agi = XFS_BUF_TO_AGI(bp); 1995 agi = XFS_BUF_TO_AGI(bp);
1520 ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); 1996 ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
1521#endif 1997#endif
1998
1999 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGI_BUF);
2000
1522 /* 2001 /*
1523 * Compute byte offsets for the first and last fields. 2002 * Compute byte offsets for the first and last fields in the first
2003 * region and log the agi buffer. This only logs up through
2004 * agi_unlinked.
1524 */ 2005 */
1525 xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS, &first, &last); 2006 if (fields & XFS_AGI_ALL_BITS_R1) {
2007 xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS_R1,
2008 &first, &last);
2009 xfs_trans_log_buf(tp, bp, first, last);
2010 }
2011
1526 /* 2012 /*
1527 * Log the allocation group inode header buffer. 2013 * Mask off the bits in the first region and calculate the first and
2014 * last field offsets for any bits in the second region.
1528 */ 2015 */
1529 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGI_BUF); 2016 fields &= ~XFS_AGI_ALL_BITS_R1;
1530 xfs_trans_log_buf(tp, bp, first, last); 2017 if (fields) {
2018 xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS_R2,
2019 &first, &last);
2020 xfs_trans_log_buf(tp, bp, first, last);
2021 }
1531} 2022}
1532 2023
1533#ifdef DEBUG 2024#ifdef DEBUG
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c
index f1630ff619dd..726f83a681a5 100644
--- a/fs/xfs/xfs_ialloc_btree.c
+++ b/fs/xfs/xfs_ialloc_btree.c
@@ -49,7 +49,8 @@ xfs_inobt_dup_cursor(
49 struct xfs_btree_cur *cur) 49 struct xfs_btree_cur *cur)
50{ 50{
51 return xfs_inobt_init_cursor(cur->bc_mp, cur->bc_tp, 51 return xfs_inobt_init_cursor(cur->bc_mp, cur->bc_tp,
52 cur->bc_private.a.agbp, cur->bc_private.a.agno); 52 cur->bc_private.a.agbp, cur->bc_private.a.agno,
53 cur->bc_btnum);
53} 54}
54 55
55STATIC void 56STATIC void
@@ -66,6 +67,21 @@ xfs_inobt_set_root(
66 xfs_ialloc_log_agi(cur->bc_tp, agbp, XFS_AGI_ROOT | XFS_AGI_LEVEL); 67 xfs_ialloc_log_agi(cur->bc_tp, agbp, XFS_AGI_ROOT | XFS_AGI_LEVEL);
67} 68}
68 69
70STATIC void
71xfs_finobt_set_root(
72 struct xfs_btree_cur *cur,
73 union xfs_btree_ptr *nptr,
74 int inc) /* level change */
75{
76 struct xfs_buf *agbp = cur->bc_private.a.agbp;
77 struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
78
79 agi->agi_free_root = nptr->s;
80 be32_add_cpu(&agi->agi_free_level, inc);
81 xfs_ialloc_log_agi(cur->bc_tp, agbp,
82 XFS_AGI_FREE_ROOT | XFS_AGI_FREE_LEVEL);
83}
84
69STATIC int 85STATIC int
70xfs_inobt_alloc_block( 86xfs_inobt_alloc_block(
71 struct xfs_btree_cur *cur, 87 struct xfs_btree_cur *cur,
@@ -172,6 +188,17 @@ xfs_inobt_init_ptr_from_cur(
172 ptr->s = agi->agi_root; 188 ptr->s = agi->agi_root;
173} 189}
174 190
191STATIC void
192xfs_finobt_init_ptr_from_cur(
193 struct xfs_btree_cur *cur,
194 union xfs_btree_ptr *ptr)
195{
196 struct xfs_agi *agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp);
197
198 ASSERT(cur->bc_private.a.agno == be32_to_cpu(agi->agi_seqno));
199 ptr->s = agi->agi_free_root;
200}
201
175STATIC __int64_t 202STATIC __int64_t
176xfs_inobt_key_diff( 203xfs_inobt_key_diff(
177 struct xfs_btree_cur *cur, 204 struct xfs_btree_cur *cur,
@@ -202,6 +229,7 @@ xfs_inobt_verify(
202 */ 229 */
203 switch (block->bb_magic) { 230 switch (block->bb_magic) {
204 case cpu_to_be32(XFS_IBT_CRC_MAGIC): 231 case cpu_to_be32(XFS_IBT_CRC_MAGIC):
232 case cpu_to_be32(XFS_FIBT_CRC_MAGIC):
205 if (!xfs_sb_version_hascrc(&mp->m_sb)) 233 if (!xfs_sb_version_hascrc(&mp->m_sb))
206 return false; 234 return false;
207 if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid)) 235 if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid))
@@ -213,6 +241,7 @@ xfs_inobt_verify(
213 return false; 241 return false;
214 /* fall through */ 242 /* fall through */
215 case cpu_to_be32(XFS_IBT_MAGIC): 243 case cpu_to_be32(XFS_IBT_MAGIC):
244 case cpu_to_be32(XFS_FIBT_MAGIC):
216 break; 245 break;
217 default: 246 default:
218 return 0; 247 return 0;
@@ -316,6 +345,28 @@ static const struct xfs_btree_ops xfs_inobt_ops = {
316#endif 345#endif
317}; 346};
318 347
348static const struct xfs_btree_ops xfs_finobt_ops = {
349 .rec_len = sizeof(xfs_inobt_rec_t),
350 .key_len = sizeof(xfs_inobt_key_t),
351
352 .dup_cursor = xfs_inobt_dup_cursor,
353 .set_root = xfs_finobt_set_root,
354 .alloc_block = xfs_inobt_alloc_block,
355 .free_block = xfs_inobt_free_block,
356 .get_minrecs = xfs_inobt_get_minrecs,
357 .get_maxrecs = xfs_inobt_get_maxrecs,
358 .init_key_from_rec = xfs_inobt_init_key_from_rec,
359 .init_rec_from_key = xfs_inobt_init_rec_from_key,
360 .init_rec_from_cur = xfs_inobt_init_rec_from_cur,
361 .init_ptr_from_cur = xfs_finobt_init_ptr_from_cur,
362 .key_diff = xfs_inobt_key_diff,
363 .buf_ops = &xfs_inobt_buf_ops,
364#if defined(DEBUG) || defined(XFS_WARN)
365 .keys_inorder = xfs_inobt_keys_inorder,
366 .recs_inorder = xfs_inobt_recs_inorder,
367#endif
368};
369
319/* 370/*
320 * Allocate a new inode btree cursor. 371 * Allocate a new inode btree cursor.
321 */ 372 */
@@ -324,7 +375,8 @@ xfs_inobt_init_cursor(
324 struct xfs_mount *mp, /* file system mount point */ 375 struct xfs_mount *mp, /* file system mount point */
325 struct xfs_trans *tp, /* transaction pointer */ 376 struct xfs_trans *tp, /* transaction pointer */
326 struct xfs_buf *agbp, /* buffer for agi structure */ 377 struct xfs_buf *agbp, /* buffer for agi structure */
327 xfs_agnumber_t agno) /* allocation group number */ 378 xfs_agnumber_t agno, /* allocation group number */
379 xfs_btnum_t btnum) /* ialloc or free ino btree */
328{ 380{
329 struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); 381 struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
330 struct xfs_btree_cur *cur; 382 struct xfs_btree_cur *cur;
@@ -333,11 +385,17 @@ xfs_inobt_init_cursor(
333 385
334 cur->bc_tp = tp; 386 cur->bc_tp = tp;
335 cur->bc_mp = mp; 387 cur->bc_mp = mp;
336 cur->bc_nlevels = be32_to_cpu(agi->agi_level); 388 cur->bc_btnum = btnum;
337 cur->bc_btnum = XFS_BTNUM_INO; 389 if (btnum == XFS_BTNUM_INO) {
390 cur->bc_nlevels = be32_to_cpu(agi->agi_level);
391 cur->bc_ops = &xfs_inobt_ops;
392 } else {
393 cur->bc_nlevels = be32_to_cpu(agi->agi_free_level);
394 cur->bc_ops = &xfs_finobt_ops;
395 }
396
338 cur->bc_blocklog = mp->m_sb.sb_blocklog; 397 cur->bc_blocklog = mp->m_sb.sb_blocklog;
339 398
340 cur->bc_ops = &xfs_inobt_ops;
341 if (xfs_sb_version_hascrc(&mp->m_sb)) 399 if (xfs_sb_version_hascrc(&mp->m_sb))
342 cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; 400 cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
343 401
diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h
index f38b22011c4e..d7ebea72c2d0 100644
--- a/fs/xfs/xfs_ialloc_btree.h
+++ b/fs/xfs/xfs_ialloc_btree.h
@@ -58,7 +58,8 @@ struct xfs_mount;
58 ((index) - 1) * sizeof(xfs_inobt_ptr_t))) 58 ((index) - 1) * sizeof(xfs_inobt_ptr_t)))
59 59
60extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_mount *, 60extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_mount *,
61 struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t); 61 struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t,
62 xfs_btnum_t);
62extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int); 63extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int);
63 64
64#endif /* __XFS_IALLOC_BTREE_H__ */ 65#endif /* __XFS_IALLOC_BTREE_H__ */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 6bbfcf0b3bb2..6d6b44a508f9 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1811,9 +1811,33 @@ xfs_inactive_ifree(
1811 int error; 1811 int error;
1812 1812
1813 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); 1813 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
1814 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree, 0, 0); 1814
1815 /*
1816 * The ifree transaction might need to allocate blocks for record
1817 * insertion to the finobt. We don't want to fail here at ENOSPC, so
1818 * allow ifree to dip into the reserved block pool if necessary.
1819 *
1820 * Freeing large sets of inodes generally means freeing inode chunks,
1821 * directory and file data blocks, so this should be relatively safe.
1822 * Only under severe circumstances should it be possible to free enough
1823 * inodes to exhaust the reserve block pool via finobt expansion while
1824 * at the same time not creating free space in the filesystem.
1825 *
1826 * Send a warning if the reservation does happen to fail, as the inode
1827 * now remains allocated and sits on the unlinked list until the fs is
1828 * repaired.
1829 */
1830 tp->t_flags |= XFS_TRANS_RESERVE;
1831 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree,
1832 XFS_IFREE_SPACE_RES(mp), 0);
1815 if (error) { 1833 if (error) {
1816 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1834 if (error == ENOSPC) {
1835 xfs_warn_ratelimited(mp,
1836 "Failed to remove inode(s) from unlinked list. "
1837 "Please free space, unmount and run xfs_repair.");
1838 } else {
1839 ASSERT(XFS_FORCED_SHUTDOWN(mp));
1840 }
1817 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES); 1841 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES);
1818 return error; 1842 return error;
1819 } 1843 }
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index f46338285152..cb64f222d607 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -270,7 +270,8 @@ xfs_bulkstat(
270 /* 270 /*
271 * Allocate and initialize a btree cursor for ialloc btree. 271 * Allocate and initialize a btree cursor for ialloc btree.
272 */ 272 */
273 cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno); 273 cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno,
274 XFS_BTNUM_INO);
274 irbp = irbuf; 275 irbp = irbuf;
275 irbufend = irbuf + nirbuf; 276 irbufend = irbuf + nirbuf;
276 end_of_ag = 0; 277 end_of_ag = 0;
@@ -621,7 +622,8 @@ xfs_inumbers(
621 agino = 0; 622 agino = 0;
622 continue; 623 continue;
623 } 624 }
624 cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno); 625 cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno,
626 XFS_BTNUM_INO);
625 error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_GE, 627 error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_GE,
626 &tmp); 628 &tmp);
627 if (error) { 629 if (error) {
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 690a64d4444e..981af0f6504b 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2138,7 +2138,9 @@ xlog_recover_validate_buf_type(
2138 bp->b_ops = &xfs_allocbt_buf_ops; 2138 bp->b_ops = &xfs_allocbt_buf_ops;
2139 break; 2139 break;
2140 case XFS_IBT_CRC_MAGIC: 2140 case XFS_IBT_CRC_MAGIC:
2141 case XFS_FIBT_CRC_MAGIC:
2141 case XFS_IBT_MAGIC: 2142 case XFS_IBT_MAGIC:
2143 case XFS_FIBT_MAGIC:
2142 bp->b_ops = &xfs_inobt_buf_ops; 2144 bp->b_ops = &xfs_inobt_buf_ops;
2143 break; 2145 break;
2144 case XFS_BMAP_CRC_MAGIC: 2146 case XFS_BMAP_CRC_MAGIC:
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h
index f7b2fe77c5a5..950d1ea058b2 100644
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h
@@ -587,7 +587,9 @@ xfs_sb_has_compat_feature(
587 return (sbp->sb_features_compat & feature) != 0; 587 return (sbp->sb_features_compat & feature) != 0;
588} 588}
589 589
590#define XFS_SB_FEAT_RO_COMPAT_ALL 0 590#define XFS_SB_FEAT_RO_COMPAT_FINOBT (1 << 0) /* free inode btree */
591#define XFS_SB_FEAT_RO_COMPAT_ALL \
592 (XFS_SB_FEAT_RO_COMPAT_FINOBT)
591#define XFS_SB_FEAT_RO_COMPAT_UNKNOWN ~XFS_SB_FEAT_RO_COMPAT_ALL 593#define XFS_SB_FEAT_RO_COMPAT_UNKNOWN ~XFS_SB_FEAT_RO_COMPAT_ALL
592static inline bool 594static inline bool
593xfs_sb_has_ro_compat_feature( 595xfs_sb_has_ro_compat_feature(
@@ -641,6 +643,12 @@ static inline int xfs_sb_version_hasftype(struct xfs_sb *sbp)
641 (sbp->sb_features2 & XFS_SB_VERSION2_FTYPE)); 643 (sbp->sb_features2 & XFS_SB_VERSION2_FTYPE));
642} 644}
643 645
646static inline int xfs_sb_version_hasfinobt(xfs_sb_t *sbp)
647{
648 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) &&
649 (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FINOBT);
650}
651
644/* 652/*
645 * end of superblock version macros 653 * end of superblock version macros
646 */ 654 */
diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c
index ce372b7d5644..f2240383d4bb 100644
--- a/fs/xfs/xfs_stats.c
+++ b/fs/xfs/xfs_stats.c
@@ -59,6 +59,7 @@ static int xfs_stat_proc_show(struct seq_file *m, void *v)
59 { "abtc2", XFSSTAT_END_ABTC_V2 }, 59 { "abtc2", XFSSTAT_END_ABTC_V2 },
60 { "bmbt2", XFSSTAT_END_BMBT_V2 }, 60 { "bmbt2", XFSSTAT_END_BMBT_V2 },
61 { "ibt2", XFSSTAT_END_IBT_V2 }, 61 { "ibt2", XFSSTAT_END_IBT_V2 },
62 { "fibt2", XFSSTAT_END_FIBT_V2 },
62 /* we print both series of quota information together */ 63 /* we print both series of quota information together */
63 { "qm", XFSSTAT_END_QM }, 64 { "qm", XFSSTAT_END_QM },
64 }; 65 };
diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h
index c03ad38ceaeb..c8f238b8299a 100644
--- a/fs/xfs/xfs_stats.h
+++ b/fs/xfs/xfs_stats.h
@@ -183,7 +183,23 @@ struct xfsstats {
183 __uint32_t xs_ibt_2_alloc; 183 __uint32_t xs_ibt_2_alloc;
184 __uint32_t xs_ibt_2_free; 184 __uint32_t xs_ibt_2_free;
185 __uint32_t xs_ibt_2_moves; 185 __uint32_t xs_ibt_2_moves;
186#define XFSSTAT_END_XQMSTAT (XFSSTAT_END_IBT_V2+6) 186#define XFSSTAT_END_FIBT_V2 (XFSSTAT_END_IBT_V2+15)
187 __uint32_t xs_fibt_2_lookup;
188 __uint32_t xs_fibt_2_compare;
189 __uint32_t xs_fibt_2_insrec;
190 __uint32_t xs_fibt_2_delrec;
191 __uint32_t xs_fibt_2_newroot;
192 __uint32_t xs_fibt_2_killroot;
193 __uint32_t xs_fibt_2_increment;
194 __uint32_t xs_fibt_2_decrement;
195 __uint32_t xs_fibt_2_lshift;
196 __uint32_t xs_fibt_2_rshift;
197 __uint32_t xs_fibt_2_split;
198 __uint32_t xs_fibt_2_join;
199 __uint32_t xs_fibt_2_alloc;
200 __uint32_t xs_fibt_2_free;
201 __uint32_t xs_fibt_2_moves;
202#define XFSSTAT_END_XQMSTAT (XFSSTAT_END_FIBT_V2+6)
187 __uint32_t xs_qm_dqreclaims; 203 __uint32_t xs_qm_dqreclaims;
188 __uint32_t xs_qm_dqreclaim_misses; 204 __uint32_t xs_qm_dqreclaim_misses;
189 __uint32_t xs_qm_dquot_dups; 205 __uint32_t xs_qm_dquot_dups;
diff --git a/fs/xfs/xfs_trans_resv.c b/fs/xfs/xfs_trans_resv.c
index ae368165244d..52b6c3e3203e 100644
--- a/fs/xfs/xfs_trans_resv.c
+++ b/fs/xfs/xfs_trans_resv.c
@@ -106,6 +106,47 @@ xfs_calc_inode_res(
106} 106}
107 107
108/* 108/*
109 * The free inode btree is a conditional feature and the log reservation
110 * requirements differ slightly from that of the traditional inode allocation
111 * btree. The finobt tracks records for inode chunks with at least one free
112 * inode. A record can be removed from the tree for an inode allocation
113 * or free and thus the finobt reservation is unconditional across:
114 *
115 * - inode allocation
116 * - inode free
117 * - inode chunk allocation
118 *
119 * The 'modify' param indicates to include the record modification scenario. The
120 * 'alloc' param indicates to include the reservation for free space btree
121 * modifications on behalf of finobt modifications. This is required only for
122 * transactions that do not already account for free space btree modifications.
123 *
124 * the free inode btree: max depth * block size
125 * the allocation btrees: 2 trees * (max depth - 1) * block size
126 * the free inode btree entry: block size
127 */
128STATIC uint
129xfs_calc_finobt_res(
130 struct xfs_mount *mp,
131 int alloc,
132 int modify)
133{
134 uint res;
135
136 if (!xfs_sb_version_hasfinobt(&mp->m_sb))
137 return 0;
138
139 res = xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1));
140 if (alloc)
141 res += xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
142 XFS_FSB_TO_B(mp, 1));
143 if (modify)
144 res += (uint)XFS_FSB_TO_B(mp, 1);
145
146 return res;
147}
148
149/*
109 * Various log reservation values. 150 * Various log reservation values.
110 * 151 *
111 * These are based on the size of the file system block because that is what 152 * These are based on the size of the file system block because that is what
@@ -302,6 +343,7 @@ xfs_calc_remove_reservation(
302 * the superblock for the nlink flag: sector size 343 * the superblock for the nlink flag: sector size
303 * the directory btree: (max depth + v2) * dir block size 344 * the directory btree: (max depth + v2) * dir block size
304 * the directory inode's bmap btree: (max depth + v2) * block size 345 * the directory inode's bmap btree: (max depth + v2) * block size
346 * the finobt (record modification and allocation btrees)
305 */ 347 */
306STATIC uint 348STATIC uint
307xfs_calc_create_resv_modify( 349xfs_calc_create_resv_modify(
@@ -310,7 +352,8 @@ xfs_calc_create_resv_modify(
310 return xfs_calc_inode_res(mp, 2) + 352 return xfs_calc_inode_res(mp, 2) +
311 xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + 353 xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
312 (uint)XFS_FSB_TO_B(mp, 1) + 354 (uint)XFS_FSB_TO_B(mp, 1) +
313 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)); 355 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)) +
356 xfs_calc_finobt_res(mp, 1, 1);
314} 357}
315 358
316/* 359/*
@@ -348,6 +391,7 @@ __xfs_calc_create_reservation(
348 * the superblock for the nlink flag: sector size 391 * the superblock for the nlink flag: sector size
349 * the inode btree: max depth * blocksize 392 * the inode btree: max depth * blocksize
350 * the allocation btrees: 2 trees * (max depth - 1) * block size 393 * the allocation btrees: 2 trees * (max depth - 1) * block size
394 * the finobt (record insertion)
351 */ 395 */
352STATIC uint 396STATIC uint
353xfs_calc_icreate_resv_alloc( 397xfs_calc_icreate_resv_alloc(
@@ -357,7 +401,8 @@ xfs_calc_icreate_resv_alloc(
357 mp->m_sb.sb_sectsize + 401 mp->m_sb.sb_sectsize +
358 xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + 402 xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) +
359 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), 403 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
360 XFS_FSB_TO_B(mp, 1)); 404 XFS_FSB_TO_B(mp, 1)) +
405 xfs_calc_finobt_res(mp, 0, 0);
361} 406}
362 407
363STATIC uint 408STATIC uint
@@ -425,6 +470,7 @@ xfs_calc_symlink_reservation(
425 * the on disk inode before ours in the agi hash list: inode cluster size 470 * the on disk inode before ours in the agi hash list: inode cluster size
426 * the inode btree: max depth * blocksize 471 * the inode btree: max depth * blocksize
427 * the allocation btrees: 2 trees * (max depth - 1) * block size 472 * the allocation btrees: 2 trees * (max depth - 1) * block size
473 * the finobt (record insertion, removal or modification)
428 */ 474 */
429STATIC uint 475STATIC uint
430xfs_calc_ifree_reservation( 476xfs_calc_ifree_reservation(
@@ -439,7 +485,8 @@ xfs_calc_ifree_reservation(
439 xfs_calc_buf_res(2 + mp->m_ialloc_blks + 485 xfs_calc_buf_res(2 + mp->m_ialloc_blks +
440 mp->m_in_maxlevels, 0) + 486 mp->m_in_maxlevels, 0) +
441 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), 487 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
442 XFS_FSB_TO_B(mp, 1)); 488 XFS_FSB_TO_B(mp, 1)) +
489 xfs_calc_finobt_res(mp, 0, 1);
443} 490}
444 491
445/* 492/*
diff --git a/fs/xfs/xfs_trans_space.h b/fs/xfs/xfs_trans_space.h
index af5dbe06cb65..df4c1f81884c 100644
--- a/fs/xfs/xfs_trans_space.h
+++ b/fs/xfs/xfs_trans_space.h
@@ -47,7 +47,9 @@
47#define XFS_DIRREMOVE_SPACE_RES(mp) \ 47#define XFS_DIRREMOVE_SPACE_RES(mp) \
48 XFS_DAREMOVE_SPACE_RES(mp, XFS_DATA_FORK) 48 XFS_DAREMOVE_SPACE_RES(mp, XFS_DATA_FORK)
49#define XFS_IALLOC_SPACE_RES(mp) \ 49#define XFS_IALLOC_SPACE_RES(mp) \
50 ((mp)->m_ialloc_blks + (mp)->m_in_maxlevels - 1) 50 ((mp)->m_ialloc_blks + \
51 (xfs_sb_version_hasfinobt(&mp->m_sb) ? 2 : 1 * \
52 ((mp)->m_in_maxlevels - 1)))
51 53
52/* 54/*
53 * Space reservation values for various transactions. 55 * Space reservation values for various transactions.
@@ -82,5 +84,8 @@
82 (XFS_DIRREMOVE_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl)) 84 (XFS_DIRREMOVE_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl))
83#define XFS_SYMLINK_SPACE_RES(mp,nl,b) \ 85#define XFS_SYMLINK_SPACE_RES(mp,nl,b) \
84 (XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl) + (b)) 86 (XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl) + (b))
87#define XFS_IFREE_SPACE_RES(mp) \
88 (xfs_sb_version_hasfinobt(&mp->m_sb) ? (mp)->m_in_maxlevels : 0)
89
85 90
86#endif /* __XFS_TRANS_SPACE_H__ */ 91#endif /* __XFS_TRANS_SPACE_H__ */
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h
index 82bbc34d54a3..65c6e6650b1a 100644
--- a/fs/xfs/xfs_types.h
+++ b/fs/xfs/xfs_types.h
@@ -134,7 +134,7 @@ typedef enum {
134 134
135typedef enum { 135typedef enum {
136 XFS_BTNUM_BNOi, XFS_BTNUM_CNTi, XFS_BTNUM_BMAPi, XFS_BTNUM_INOi, 136 XFS_BTNUM_BNOi, XFS_BTNUM_CNTi, XFS_BTNUM_BMAPi, XFS_BTNUM_INOi,
137 XFS_BTNUM_MAX 137 XFS_BTNUM_FINOi, XFS_BTNUM_MAX
138} xfs_btnum_t; 138} xfs_btnum_t;
139 139
140struct xfs_name { 140struct xfs_name {