aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
authorDave Chinner <david@fromorbit.com>2013-06-27 02:04:55 -0400
committerBen Myers <bpm@sgi.com>2013-06-27 15:26:21 -0400
commit28c8e41af693e4b5cd2d68218f144cf40ce15781 (patch)
tree79bea3212f87c81509af8acbf92b004f91e4a3eb /fs/xfs
parentb8402b4729495ac719a3f532c2e33ac653b222a8 (diff)
xfs: Inode create item recovery
When we find a icreate transaction, we need to get and initialise the buffers in the range that has been passed. Extract and verify the information in the item record, then loop over the range initialising and issuing the buffer writes delayed. Support an arbitrary size range to initialise so that in future when we allocate inodes in much larger chunks all kernels that understand this transaction can still recover them. Signed-off-by: Dave Chinner <david@fromorbit.com> Reviewed-by: Mark Tinguely <tinguely@sgi.com> Signed-off-by: Ben Myers <bpm@sgi.com>
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/xfs_ialloc.c37
-rw-r--r--fs/xfs/xfs_ialloc.h8
-rw-r--r--fs/xfs/xfs_log_recover.c114
3 files changed, 145 insertions, 14 deletions
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 4345c53c729c..4411565b718b 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -150,12 +150,16 @@ xfs_check_agi_freecount(
150#endif 150#endif
151 151
152/* 152/*
153 * Initialise a new set of inodes. 153 * Initialise a new set of inodes. When called without a transaction context
154 * (e.g. from recovery) we initiate a delayed write of the inode buffers rather
155 * than logging them (which in a transaction context puts them into the AIL
156 * for writeback rather than the xfsbufd queue).
154 */ 157 */
155STATIC int 158STATIC int
156xfs_ialloc_inode_init( 159xfs_ialloc_inode_init(
157 struct xfs_mount *mp, 160 struct xfs_mount *mp,
158 struct xfs_trans *tp, 161 struct xfs_trans *tp,
162 struct list_head *buffer_list,
159 xfs_agnumber_t agno, 163 xfs_agnumber_t agno,
160 xfs_agblock_t agbno, 164 xfs_agblock_t agbno,
161 xfs_agblock_t length, 165 xfs_agblock_t length,
@@ -247,18 +251,33 @@ xfs_ialloc_inode_init(
247 ino++; 251 ino++;
248 uuid_copy(&free->di_uuid, &mp->m_sb.sb_uuid); 252 uuid_copy(&free->di_uuid, &mp->m_sb.sb_uuid);
249 xfs_dinode_calc_crc(mp, free); 253 xfs_dinode_calc_crc(mp, free);
250 } else { 254 } else if (tp) {
251 /* just log the inode core */ 255 /* just log the inode core */
252 xfs_trans_log_buf(tp, fbuf, ioffset, 256 xfs_trans_log_buf(tp, fbuf, ioffset,
253 ioffset + isize - 1); 257 ioffset + isize - 1);
254 } 258 }
255 } 259 }
256 if (version == 3) { 260
257 /* need to log the entire buffer */ 261 if (tp) {
258 xfs_trans_log_buf(tp, fbuf, 0, 262 /*
259 BBTOB(fbuf->b_length) - 1); 263 * Mark the buffer as an inode allocation buffer so it
264 * sticks in AIL at the point of this allocation
265 * transaction. This ensures the they are on disk before
266 * the tail of the log can be moved past this
267 * transaction (i.e. by preventing relogging from moving
268 * it forward in the log).
269 */
270 xfs_trans_inode_alloc_buf(tp, fbuf);
271 if (version == 3) {
272 /* need to log the entire buffer */
273 xfs_trans_log_buf(tp, fbuf, 0,
274 BBTOB(fbuf->b_length) - 1);
275 }
276 } else {
277 fbuf->b_flags |= XBF_DONE;
278 xfs_buf_delwri_queue(fbuf, buffer_list);
279 xfs_buf_relse(fbuf);
260 } 280 }
261 xfs_trans_inode_alloc_buf(tp, fbuf);
262 } 281 }
263 return 0; 282 return 0;
264} 283}
@@ -303,7 +322,7 @@ xfs_ialloc_ag_alloc(
303 * First try to allocate inodes contiguous with the last-allocated 322 * First try to allocate inodes contiguous with the last-allocated
304 * chunk of inodes. If the filesystem is striped, this will fill 323 * chunk of inodes. If the filesystem is striped, this will fill
305 * an entire stripe unit with inodes. 324 * an entire stripe unit with inodes.
306 */ 325 */
307 agi = XFS_BUF_TO_AGI(agbp); 326 agi = XFS_BUF_TO_AGI(agbp);
308 newino = be32_to_cpu(agi->agi_newino); 327 newino = be32_to_cpu(agi->agi_newino);
309 agno = be32_to_cpu(agi->agi_seqno); 328 agno = be32_to_cpu(agi->agi_seqno);
@@ -402,7 +421,7 @@ xfs_ialloc_ag_alloc(
402 * rather than a linear progression to prevent the next generation 421 * rather than a linear progression to prevent the next generation
403 * number from being easily guessable. 422 * number from being easily guessable.
404 */ 423 */
405 error = xfs_ialloc_inode_init(args.mp, tp, agno, args.agbno, 424 error = xfs_ialloc_inode_init(args.mp, tp, NULL, agno, args.agbno,
406 args.len, prandom_u32()); 425 args.len, prandom_u32());
407 426
408 if (error) 427 if (error)
diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h
index c8da3df271e6..68c07320f096 100644
--- a/fs/xfs/xfs_ialloc.h
+++ b/fs/xfs/xfs_ialloc.h
@@ -150,6 +150,14 @@ int xfs_inobt_lookup(struct xfs_btree_cur *cur, xfs_agino_t ino,
150int xfs_inobt_get_rec(struct xfs_btree_cur *cur, 150int xfs_inobt_get_rec(struct xfs_btree_cur *cur,
151 xfs_inobt_rec_incore_t *rec, int *stat); 151 xfs_inobt_rec_incore_t *rec, int *stat);
152 152
153/*
154 * Inode chunk initialisation routine
155 */
156int xfs_ialloc_inode_init(struct xfs_mount *mp, struct xfs_trans *tp,
157 struct list_head *buffer_list,
158 xfs_agnumber_t agno, xfs_agblock_t agbno,
159 xfs_agblock_t length, unsigned int gen);
160
153extern const struct xfs_buf_ops xfs_agi_buf_ops; 161extern const struct xfs_buf_ops xfs_agi_buf_ops;
154 162
155#endif /* __XFS_IALLOC_H__ */ 163#endif /* __XFS_IALLOC_H__ */
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 7cf5e4eafe28..6fcc910a50b9 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -45,6 +45,7 @@
45#include "xfs_cksum.h" 45#include "xfs_cksum.h"
46#include "xfs_trace.h" 46#include "xfs_trace.h"
47#include "xfs_icache.h" 47#include "xfs_icache.h"
48#include "xfs_icreate_item.h"
48 49
49/* Need all the magic numbers and buffer ops structures from these headers */ 50/* Need all the magic numbers and buffer ops structures from these headers */
50#include "xfs_symlink.h" 51#include "xfs_symlink.h"
@@ -1617,7 +1618,10 @@ xlog_recover_add_to_trans(
1617 * form the cancelled buffer table. Hence they have tobe done last. 1618 * form the cancelled buffer table. Hence they have tobe done last.
1618 * 1619 *
1619 * 3. Inode allocation buffers must be replayed before inode items that 1620 * 3. Inode allocation buffers must be replayed before inode items that
1620 * read the buffer and replay changes into it. 1621 * read the buffer and replay changes into it. For filesystems using the
1622 * ICREATE transactions, this means XFS_LI_ICREATE objects need to get
1623 * treated the same as inode allocation buffers as they create and
1624 * initialise the buffers directly.
1621 * 1625 *
1622 * 4. Inode unlink buffers must be replayed after inode items are replayed. 1626 * 4. Inode unlink buffers must be replayed after inode items are replayed.
1623 * This ensures that inodes are completely flushed to the inode buffer 1627 * This ensures that inodes are completely flushed to the inode buffer
@@ -1632,10 +1636,17 @@ xlog_recover_add_to_trans(
1632 * from all the other buffers and move them to last. 1636 * from all the other buffers and move them to last.
1633 * 1637 *
1634 * Hence, 4 lists, in order from head to tail: 1638 * Hence, 4 lists, in order from head to tail:
1635 * - buffer_list for all buffers except cancelled/inode unlink buffers 1639 * - buffer_list for all buffers except cancelled/inode unlink buffers
1636 * - item_list for all non-buffer items 1640 * - item_list for all non-buffer items
1637 * - inode_buffer_list for inode unlink buffers 1641 * - inode_buffer_list for inode unlink buffers
1638 * - cancel_list for the cancelled buffers 1642 * - cancel_list for the cancelled buffers
1643 *
1644 * Note that we add objects to the tail of the lists so that first-to-last
1645 * ordering is preserved within the lists. Adding objects to the head of the
1646 * list means when we traverse from the head we walk them in last-to-first
1647 * order. For cancelled buffers and inode unlink buffers this doesn't matter,
1648 * but for all other items there may be specific ordering that we need to
1649 * preserve.
1639 */ 1650 */
1640STATIC int 1651STATIC int
1641xlog_recover_reorder_trans( 1652xlog_recover_reorder_trans(
@@ -1655,6 +1666,9 @@ xlog_recover_reorder_trans(
1655 xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; 1666 xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr;
1656 1667
1657 switch (ITEM_TYPE(item)) { 1668 switch (ITEM_TYPE(item)) {
1669 case XFS_LI_ICREATE:
1670 list_move_tail(&item->ri_list, &buffer_list);
1671 break;
1658 case XFS_LI_BUF: 1672 case XFS_LI_BUF:
1659 if (buf_f->blf_flags & XFS_BLF_CANCEL) { 1673 if (buf_f->blf_flags & XFS_BLF_CANCEL) {
1660 trace_xfs_log_recover_item_reorder_head(log, 1674 trace_xfs_log_recover_item_reorder_head(log,
@@ -2982,6 +2996,93 @@ xlog_recover_efd_pass2(
2982} 2996}
2983 2997
2984/* 2998/*
2999 * This routine is called when an inode create format structure is found in a
3000 * committed transaction in the log. It's purpose is to initialise the inodes
3001 * being allocated on disk. This requires us to get inode cluster buffers that
3002 * match the range to be intialised, stamped with inode templates and written
3003 * by delayed write so that subsequent modifications will hit the cached buffer
3004 * and only need writing out at the end of recovery.
3005 */
3006STATIC int
3007xlog_recover_do_icreate_pass2(
3008 struct xlog *log,
3009 struct list_head *buffer_list,
3010 xlog_recover_item_t *item)
3011{
3012 struct xfs_mount *mp = log->l_mp;
3013 struct xfs_icreate_log *icl;
3014 xfs_agnumber_t agno;
3015 xfs_agblock_t agbno;
3016 unsigned int count;
3017 unsigned int isize;
3018 xfs_agblock_t length;
3019
3020 icl = (struct xfs_icreate_log *)item->ri_buf[0].i_addr;
3021 if (icl->icl_type != XFS_LI_ICREATE) {
3022 xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad type");
3023 return EINVAL;
3024 }
3025
3026 if (icl->icl_size != 1) {
3027 xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad icl size");
3028 return EINVAL;
3029 }
3030
3031 agno = be32_to_cpu(icl->icl_ag);
3032 if (agno >= mp->m_sb.sb_agcount) {
3033 xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agno");
3034 return EINVAL;
3035 }
3036 agbno = be32_to_cpu(icl->icl_agbno);
3037 if (!agbno || agbno == NULLAGBLOCK || agbno >= mp->m_sb.sb_agblocks) {
3038 xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agbno");
3039 return EINVAL;
3040 }
3041 isize = be32_to_cpu(icl->icl_isize);
3042 if (isize != mp->m_sb.sb_inodesize) {
3043 xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad isize");
3044 return EINVAL;
3045 }
3046 count = be32_to_cpu(icl->icl_count);
3047 if (!count) {
3048 xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count");
3049 return EINVAL;
3050 }
3051 length = be32_to_cpu(icl->icl_length);
3052 if (!length || length >= mp->m_sb.sb_agblocks) {
3053 xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad length");
3054 return EINVAL;
3055 }
3056
3057 /* existing allocation is fixed value */
3058 ASSERT(count == XFS_IALLOC_INODES(mp));
3059 ASSERT(length == XFS_IALLOC_BLOCKS(mp));
3060 if (count != XFS_IALLOC_INODES(mp) ||
3061 length != XFS_IALLOC_BLOCKS(mp)) {
3062 xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count 2");
3063 return EINVAL;
3064 }
3065
3066 /*
3067 * Inode buffers can be freed. Do not replay the inode initialisation as
3068 * we could be overwriting something written after this inode buffer was
3069 * cancelled.
3070 *
3071 * XXX: we need to iterate all buffers and only init those that are not
3072 * cancelled. I think that a more fine grained factoring of
3073 * xfs_ialloc_inode_init may be appropriate here to enable this to be
3074 * done easily.
3075 */
3076 if (xlog_check_buffer_cancelled(log,
3077 XFS_AGB_TO_DADDR(mp, agno, agbno), length, 0))
3078 return 0;
3079
3080 xfs_ialloc_inode_init(mp, NULL, buffer_list, agno, agbno, length,
3081 be32_to_cpu(icl->icl_gen));
3082 return 0;
3083}
3084
3085/*
2985 * Free up any resources allocated by the transaction 3086 * Free up any resources allocated by the transaction
2986 * 3087 *
2987 * Remember that EFIs, EFDs, and IUNLINKs are handled later. 3088 * Remember that EFIs, EFDs, and IUNLINKs are handled later.
@@ -3023,6 +3124,7 @@ xlog_recover_commit_pass1(
3023 case XFS_LI_EFI: 3124 case XFS_LI_EFI:
3024 case XFS_LI_EFD: 3125 case XFS_LI_EFD:
3025 case XFS_LI_DQUOT: 3126 case XFS_LI_DQUOT:
3127 case XFS_LI_ICREATE:
3026 /* nothing to do in pass 1 */ 3128 /* nothing to do in pass 1 */
3027 return 0; 3129 return 0;
3028 default: 3130 default:
@@ -3053,6 +3155,8 @@ xlog_recover_commit_pass2(
3053 return xlog_recover_efd_pass2(log, item); 3155 return xlog_recover_efd_pass2(log, item);
3054 case XFS_LI_DQUOT: 3156 case XFS_LI_DQUOT:
3055 return xlog_recover_dquot_pass2(log, buffer_list, item); 3157 return xlog_recover_dquot_pass2(log, buffer_list, item);
3158 case XFS_LI_ICREATE:
3159 return xlog_recover_do_icreate_pass2(log, buffer_list, item);
3056 case XFS_LI_QUOTAOFF: 3160 case XFS_LI_QUOTAOFF:
3057 /* nothing to do in pass2 */ 3161 /* nothing to do in pass2 */
3058 return 0; 3162 return 0;