diff options
-rw-r--r-- | fs/xfs/xfs_bmap_btree.c | 26 | ||||
-rw-r--r-- | fs/xfs/xfs_bmap_btree.h | 3 | ||||
-rw-r--r-- | fs/xfs/xfs_bmap_util.c | 14 | ||||
-rw-r--r-- | fs/xfs/xfs_btree.c | 32 | ||||
-rw-r--r-- | fs/xfs/xfs_btree.h | 3 | ||||
-rw-r--r-- | fs/xfs/xfs_icache.c | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_icache.h | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_inode_buf.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_inode_buf.h | 18 | ||||
-rw-r--r-- | fs/xfs/xfs_log_format.h | 9 | ||||
-rw-r--r-- | fs/xfs/xfs_log_recover.c | 123 |
11 files changed, 171 insertions, 67 deletions
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index aa2eadd41bab..531b0206cce6 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c | |||
@@ -932,30 +932,40 @@ xfs_bmdr_maxrecs( | |||
932 | * we switch forks between inodes. The operation that the caller is doing will | 932 | * we switch forks between inodes. The operation that the caller is doing will |
933 | * determine whether is needs to change owner before or after the switch. | 933 | * determine whether is needs to change owner before or after the switch. |
934 | * | 934 | * |
935 | * For demand paged modification, the fork switch should be done after reading | 935 | * For demand paged transactional modification, the fork switch should be done |
936 | * in all the blocks, modifying them and pinning them in the transaction. For | 936 | * after reading in all the blocks, modifying them and pinning them in the |
937 | * modification when the buffers are already pinned in memory, the fork switch | 937 | * transaction. For modification when the buffers are already pinned in memory, |
938 | * can be done before changing the owner as we won't need to validate the owner | 938 | * the fork switch can be done before changing the owner as we won't need to |
939 | * until the btree buffers are unpinned and writes can occur again. | 939 | * validate the owner until the btree buffers are unpinned and writes can occur |
940 | * again. | ||
941 | * | ||
942 | * For recovery based ownership change, there is no transactional context and | ||
943 | * so a buffer list must be supplied so that we can record the buffers that we | ||
944 | * modified for the caller to issue IO on. | ||
940 | */ | 945 | */ |
941 | int | 946 | int |
942 | xfs_bmbt_change_owner( | 947 | xfs_bmbt_change_owner( |
943 | struct xfs_trans *tp, | 948 | struct xfs_trans *tp, |
944 | struct xfs_inode *ip, | 949 | struct xfs_inode *ip, |
945 | int whichfork, | 950 | int whichfork, |
946 | xfs_ino_t new_owner) | 951 | xfs_ino_t new_owner, |
952 | struct list_head *buffer_list) | ||
947 | { | 953 | { |
948 | struct xfs_btree_cur *cur; | 954 | struct xfs_btree_cur *cur; |
949 | int error; | 955 | int error; |
950 | 956 | ||
957 | ASSERT(tp || buffer_list); | ||
958 | ASSERT(!(tp && buffer_list)); | ||
951 | if (whichfork == XFS_DATA_FORK) | 959 | if (whichfork == XFS_DATA_FORK) |
952 | ASSERT(ip->i_d.di_format = XFS_DINODE_FMT_BTREE); | 960 | ASSERT(ip->i_d.di_format = XFS_DINODE_FMT_BTREE); |
953 | else | 961 | else |
954 | ASSERT(ip->i_d.di_aformat = XFS_DINODE_FMT_BTREE); | 962 | ASSERT(ip->i_d.di_aformat = XFS_DINODE_FMT_BTREE); |
955 | 963 | ||
956 | cur = xfs_bmbt_init_cursor(ip->i_mount, tp, ip, whichfork); | 964 | cur = xfs_bmbt_init_cursor(ip->i_mount, tp, ip, whichfork); |
957 | error = xfs_btree_change_owner(cur, new_owner); | 965 | if (!cur) |
966 | return ENOMEM; | ||
967 | |||
968 | error = xfs_btree_change_owner(cur, new_owner, buffer_list); | ||
958 | xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); | 969 | xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); |
959 | return error; | 970 | return error; |
960 | } | 971 | } |
961 | |||
diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index bceac7affa27..e367461a638e 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h | |||
@@ -237,7 +237,8 @@ extern int xfs_bmdr_maxrecs(struct xfs_mount *, int blocklen, int leaf); | |||
237 | extern int xfs_bmbt_maxrecs(struct xfs_mount *, int blocklen, int leaf); | 237 | extern int xfs_bmbt_maxrecs(struct xfs_mount *, int blocklen, int leaf); |
238 | 238 | ||
239 | extern int xfs_bmbt_change_owner(struct xfs_trans *tp, struct xfs_inode *ip, | 239 | extern int xfs_bmbt_change_owner(struct xfs_trans *tp, struct xfs_inode *ip, |
240 | int whichfork, xfs_ino_t new_owner); | 240 | int whichfork, xfs_ino_t new_owner, |
241 | struct list_head *buffer_list); | ||
241 | 242 | ||
242 | extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *, | 243 | extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *, |
243 | struct xfs_trans *, struct xfs_inode *, int); | 244 | struct xfs_trans *, struct xfs_inode *, int); |
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index ad8a91d2e011..c6dc55142cbe 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c | |||
@@ -1932,16 +1932,18 @@ xfs_swap_extents( | |||
1932 | target_log_flags = XFS_ILOG_CORE; | 1932 | target_log_flags = XFS_ILOG_CORE; |
1933 | if (ip->i_d.di_version == 3 && | 1933 | if (ip->i_d.di_version == 3 && |
1934 | ip->i_d.di_format == XFS_DINODE_FMT_BTREE) { | 1934 | ip->i_d.di_format == XFS_DINODE_FMT_BTREE) { |
1935 | target_log_flags |= XFS_ILOG_OWNER; | 1935 | target_log_flags |= XFS_ILOG_DOWNER; |
1936 | error = xfs_bmbt_change_owner(tp, ip, XFS_DATA_FORK, tip->i_ino); | 1936 | error = xfs_bmbt_change_owner(tp, ip, XFS_DATA_FORK, |
1937 | tip->i_ino, NULL); | ||
1937 | if (error) | 1938 | if (error) |
1938 | goto out_trans_cancel; | 1939 | goto out_trans_cancel; |
1939 | } | 1940 | } |
1940 | 1941 | ||
1941 | if (tip->i_d.di_version == 3 && | 1942 | if (tip->i_d.di_version == 3 && |
1942 | tip->i_d.di_format == XFS_DINODE_FMT_BTREE) { | 1943 | tip->i_d.di_format == XFS_DINODE_FMT_BTREE) { |
1943 | src_log_flags |= XFS_ILOG_OWNER; | 1944 | src_log_flags |= XFS_ILOG_DOWNER; |
1944 | error = xfs_bmbt_change_owner(tp, tip, XFS_DATA_FORK, ip->i_ino); | 1945 | error = xfs_bmbt_change_owner(tp, tip, XFS_DATA_FORK, |
1946 | ip->i_ino, NULL); | ||
1945 | if (error) | 1947 | if (error) |
1946 | goto out_trans_cancel; | 1948 | goto out_trans_cancel; |
1947 | } | 1949 | } |
@@ -1997,7 +1999,7 @@ xfs_swap_extents( | |||
1997 | break; | 1999 | break; |
1998 | case XFS_DINODE_FMT_BTREE: | 2000 | case XFS_DINODE_FMT_BTREE: |
1999 | ASSERT(ip->i_d.di_version < 3 || | 2001 | ASSERT(ip->i_d.di_version < 3 || |
2000 | (src_log_flags & XFS_ILOG_OWNER)); | 2002 | (src_log_flags & XFS_ILOG_DOWNER)); |
2001 | src_log_flags |= XFS_ILOG_DBROOT; | 2003 | src_log_flags |= XFS_ILOG_DBROOT; |
2002 | break; | 2004 | break; |
2003 | } | 2005 | } |
@@ -2017,7 +2019,7 @@ xfs_swap_extents( | |||
2017 | case XFS_DINODE_FMT_BTREE: | 2019 | case XFS_DINODE_FMT_BTREE: |
2018 | target_log_flags |= XFS_ILOG_DBROOT; | 2020 | target_log_flags |= XFS_ILOG_DBROOT; |
2019 | ASSERT(tip->i_d.di_version < 3 || | 2021 | ASSERT(tip->i_d.di_version < 3 || |
2020 | (target_log_flags & XFS_ILOG_OWNER)); | 2022 | (target_log_flags & XFS_ILOG_DOWNER)); |
2021 | break; | 2023 | break; |
2022 | } | 2024 | } |
2023 | 2025 | ||
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 047573f02702..5690e102243d 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c | |||
@@ -3907,13 +3907,16 @@ xfs_btree_get_rec( | |||
3907 | * buffer as an ordered buffer and log it appropriately. We need to ensure that | 3907 | * buffer as an ordered buffer and log it appropriately. We need to ensure that |
3908 | * we mark the region we change dirty so that if the buffer is relogged in | 3908 | * we mark the region we change dirty so that if the buffer is relogged in |
3909 | * a subsequent transaction the changes we make here as an ordered buffer are | 3909 | * a subsequent transaction the changes we make here as an ordered buffer are |
3910 | * correctly relogged in that transaction. | 3910 | * correctly relogged in that transaction. If we are in recovery context, then |
3911 | * just queue the modified buffer as delayed write buffer so the transaction | ||
3912 | * recovery completion writes the changes to disk. | ||
3911 | */ | 3913 | */ |
3912 | static int | 3914 | static int |
3913 | xfs_btree_block_change_owner( | 3915 | xfs_btree_block_change_owner( |
3914 | struct xfs_btree_cur *cur, | 3916 | struct xfs_btree_cur *cur, |
3915 | int level, | 3917 | int level, |
3916 | __uint64_t new_owner) | 3918 | __uint64_t new_owner, |
3919 | struct list_head *buffer_list) | ||
3917 | { | 3920 | { |
3918 | struct xfs_btree_block *block; | 3921 | struct xfs_btree_block *block; |
3919 | struct xfs_buf *bp; | 3922 | struct xfs_buf *bp; |
@@ -3930,16 +3933,19 @@ xfs_btree_block_change_owner( | |||
3930 | block->bb_u.s.bb_owner = cpu_to_be32(new_owner); | 3933 | block->bb_u.s.bb_owner = cpu_to_be32(new_owner); |
3931 | 3934 | ||
3932 | /* | 3935 | /* |
3933 | * Log owner change as an ordered buffer. If the block is a root block | 3936 | * If the block is a root block hosted in an inode, we might not have a |
3934 | * hosted in an inode, we might not have a buffer pointer here and we | 3937 | * buffer pointer here and we shouldn't attempt to log the change as the |
3935 | * shouldn't attempt to log the change as the information is already | 3938 | * information is already held in the inode and discarded when the root |
3936 | * held in the inode and discarded when the root block is formatted into | 3939 | * block is formatted into the on-disk inode fork. We still change it, |
3937 | * the on-disk inode fork. We still change it, though, so everything is | 3940 | * though, so everything is consistent in memory. |
3938 | * consistent in memory. | ||
3939 | */ | 3941 | */ |
3940 | if (bp) { | 3942 | if (bp) { |
3941 | xfs_trans_ordered_buf(cur->bc_tp, bp); | 3943 | if (cur->bc_tp) { |
3942 | xfs_btree_log_block(cur, bp, XFS_BB_OWNER); | 3944 | xfs_trans_ordered_buf(cur->bc_tp, bp); |
3945 | xfs_btree_log_block(cur, bp, XFS_BB_OWNER); | ||
3946 | } else { | ||
3947 | xfs_buf_delwri_queue(bp, buffer_list); | ||
3948 | } | ||
3943 | } else { | 3949 | } else { |
3944 | ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); | 3950 | ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); |
3945 | ASSERT(level == cur->bc_nlevels - 1); | 3951 | ASSERT(level == cur->bc_nlevels - 1); |
@@ -3956,7 +3962,8 @@ xfs_btree_block_change_owner( | |||
3956 | int | 3962 | int |
3957 | xfs_btree_change_owner( | 3963 | xfs_btree_change_owner( |
3958 | struct xfs_btree_cur *cur, | 3964 | struct xfs_btree_cur *cur, |
3959 | __uint64_t new_owner) | 3965 | __uint64_t new_owner, |
3966 | struct list_head *buffer_list) | ||
3960 | { | 3967 | { |
3961 | union xfs_btree_ptr lptr; | 3968 | union xfs_btree_ptr lptr; |
3962 | int level; | 3969 | int level; |
@@ -3986,7 +3993,8 @@ xfs_btree_change_owner( | |||
3986 | /* for each buffer in the level */ | 3993 | /* for each buffer in the level */ |
3987 | do { | 3994 | do { |
3988 | error = xfs_btree_block_change_owner(cur, level, | 3995 | error = xfs_btree_block_change_owner(cur, level, |
3989 | new_owner); | 3996 | new_owner, |
3997 | buffer_list); | ||
3990 | } while (!error); | 3998 | } while (!error); |
3991 | 3999 | ||
3992 | if (error != ENOENT) | 4000 | if (error != ENOENT) |
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 544b209e0256..06729b67ad58 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h | |||
@@ -445,7 +445,8 @@ int xfs_btree_new_iroot(struct xfs_btree_cur *, int *, int *); | |||
445 | int xfs_btree_insert(struct xfs_btree_cur *, int *); | 445 | int xfs_btree_insert(struct xfs_btree_cur *, int *); |
446 | int xfs_btree_delete(struct xfs_btree_cur *, int *); | 446 | int xfs_btree_delete(struct xfs_btree_cur *, int *); |
447 | int xfs_btree_get_rec(struct xfs_btree_cur *, union xfs_btree_rec **, int *); | 447 | int xfs_btree_get_rec(struct xfs_btree_cur *, union xfs_btree_rec **, int *); |
448 | int xfs_btree_change_owner(struct xfs_btree_cur *cur, __uint64_t new_owner); | 448 | int xfs_btree_change_owner(struct xfs_btree_cur *cur, __uint64_t new_owner, |
449 | struct list_head *buffer_list); | ||
449 | 450 | ||
450 | /* | 451 | /* |
451 | * btree block CRC helpers | 452 | * btree block CRC helpers |
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 16219b9c6790..7942432d9f77 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c | |||
@@ -48,7 +48,7 @@ STATIC void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, | |||
48 | /* | 48 | /* |
49 | * Allocate and initialise an xfs_inode. | 49 | * Allocate and initialise an xfs_inode. |
50 | */ | 50 | */ |
51 | STATIC struct xfs_inode * | 51 | struct xfs_inode * |
52 | xfs_inode_alloc( | 52 | xfs_inode_alloc( |
53 | struct xfs_mount *mp, | 53 | struct xfs_mount *mp, |
54 | xfs_ino_t ino) | 54 | xfs_ino_t ino) |
@@ -98,7 +98,7 @@ xfs_inode_free_callback( | |||
98 | kmem_zone_free(xfs_inode_zone, ip); | 98 | kmem_zone_free(xfs_inode_zone, ip); |
99 | } | 99 | } |
100 | 100 | ||
101 | STATIC void | 101 | void |
102 | xfs_inode_free( | 102 | xfs_inode_free( |
103 | struct xfs_inode *ip) | 103 | struct xfs_inode *ip) |
104 | { | 104 | { |
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h index 8a89f7d791bd..458e6bc22cc4 100644 --- a/fs/xfs/xfs_icache.h +++ b/fs/xfs/xfs_icache.h | |||
@@ -42,6 +42,10 @@ struct xfs_eofblocks { | |||
42 | int xfs_iget(struct xfs_mount *mp, struct xfs_trans *tp, xfs_ino_t ino, | 42 | int xfs_iget(struct xfs_mount *mp, struct xfs_trans *tp, xfs_ino_t ino, |
43 | uint flags, uint lock_flags, xfs_inode_t **ipp); | 43 | uint flags, uint lock_flags, xfs_inode_t **ipp); |
44 | 44 | ||
45 | /* recovery needs direct inode allocation capability */ | ||
46 | struct xfs_inode * xfs_inode_alloc(struct xfs_mount *mp, xfs_ino_t ino); | ||
47 | void xfs_inode_free(struct xfs_inode *ip); | ||
48 | |||
45 | void xfs_reclaim_worker(struct work_struct *work); | 49 | void xfs_reclaim_worker(struct work_struct *work); |
46 | 50 | ||
47 | int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); | 51 | int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); |
diff --git a/fs/xfs/xfs_inode_buf.c b/fs/xfs/xfs_inode_buf.c index e011d597f12f..3d25c9a5f6bc 100644 --- a/fs/xfs/xfs_inode_buf.c +++ b/fs/xfs/xfs_inode_buf.c | |||
@@ -196,7 +196,7 @@ xfs_imap_to_bp( | |||
196 | return 0; | 196 | return 0; |
197 | } | 197 | } |
198 | 198 | ||
199 | STATIC void | 199 | void |
200 | xfs_dinode_from_disk( | 200 | xfs_dinode_from_disk( |
201 | xfs_icdinode_t *to, | 201 | xfs_icdinode_t *to, |
202 | xfs_dinode_t *from) | 202 | xfs_dinode_t *from) |
diff --git a/fs/xfs/xfs_inode_buf.h b/fs/xfs/xfs_inode_buf.h index 599e6c0ca2a9..abba0ae8cf2d 100644 --- a/fs/xfs/xfs_inode_buf.h +++ b/fs/xfs/xfs_inode_buf.h | |||
@@ -32,17 +32,17 @@ struct xfs_imap { | |||
32 | ushort im_boffset; /* inode offset in block in bytes */ | 32 | ushort im_boffset; /* inode offset in block in bytes */ |
33 | }; | 33 | }; |
34 | 34 | ||
35 | int xfs_imap_to_bp(struct xfs_mount *, struct xfs_trans *, | 35 | int xfs_imap_to_bp(struct xfs_mount *, struct xfs_trans *, |
36 | struct xfs_imap *, struct xfs_dinode **, | 36 | struct xfs_imap *, struct xfs_dinode **, |
37 | struct xfs_buf **, uint, uint); | 37 | struct xfs_buf **, uint, uint); |
38 | int xfs_iread(struct xfs_mount *, struct xfs_trans *, | 38 | int xfs_iread(struct xfs_mount *, struct xfs_trans *, |
39 | struct xfs_inode *, uint); | 39 | struct xfs_inode *, uint); |
40 | void xfs_dinode_calc_crc(struct xfs_mount *, struct xfs_dinode *); | 40 | void xfs_dinode_calc_crc(struct xfs_mount *, struct xfs_dinode *); |
41 | void xfs_dinode_to_disk(struct xfs_dinode *, | 41 | void xfs_dinode_to_disk(struct xfs_dinode *to, struct xfs_icdinode *from); |
42 | struct xfs_icdinode *); | 42 | void xfs_dinode_from_disk(struct xfs_icdinode *to, struct xfs_dinode *from); |
43 | 43 | ||
44 | #if defined(DEBUG) | 44 | #if defined(DEBUG) |
45 | void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *); | 45 | void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *); |
46 | #else | 46 | #else |
47 | #define xfs_inobp_check(mp, bp) | 47 | #define xfs_inobp_check(mp, bp) |
48 | #endif /* DEBUG */ | 48 | #endif /* DEBUG */ |
diff --git a/fs/xfs/xfs_log_format.h b/fs/xfs/xfs_log_format.h index 08a6fbe03bb6..ca7e28a8ed31 100644 --- a/fs/xfs/xfs_log_format.h +++ b/fs/xfs/xfs_log_format.h | |||
@@ -474,7 +474,8 @@ typedef struct xfs_inode_log_format_64 { | |||
474 | #define XFS_ILOG_ADATA 0x040 /* log i_af.if_data */ | 474 | #define XFS_ILOG_ADATA 0x040 /* log i_af.if_data */ |
475 | #define XFS_ILOG_AEXT 0x080 /* log i_af.if_extents */ | 475 | #define XFS_ILOG_AEXT 0x080 /* log i_af.if_extents */ |
476 | #define XFS_ILOG_ABROOT 0x100 /* log i_af.i_broot */ | 476 | #define XFS_ILOG_ABROOT 0x100 /* log i_af.i_broot */ |
477 | #define XFS_ILOG_OWNER 0x200 /* change the extent tree owner on replay */ | 477 | #define XFS_ILOG_DOWNER 0x200 /* change the data fork owner on replay */ |
478 | #define XFS_ILOG_AOWNER 0x400 /* change the attr fork owner on replay */ | ||
478 | 479 | ||
479 | 480 | ||
480 | /* | 481 | /* |
@@ -488,7 +489,8 @@ typedef struct xfs_inode_log_format_64 { | |||
488 | #define XFS_ILOG_NONCORE (XFS_ILOG_DDATA | XFS_ILOG_DEXT | \ | 489 | #define XFS_ILOG_NONCORE (XFS_ILOG_DDATA | XFS_ILOG_DEXT | \ |
489 | XFS_ILOG_DBROOT | XFS_ILOG_DEV | \ | 490 | XFS_ILOG_DBROOT | XFS_ILOG_DEV | \ |
490 | XFS_ILOG_UUID | XFS_ILOG_ADATA | \ | 491 | XFS_ILOG_UUID | XFS_ILOG_ADATA | \ |
491 | XFS_ILOG_AEXT | XFS_ILOG_ABROOT) | 492 | XFS_ILOG_AEXT | XFS_ILOG_ABROOT | \ |
493 | XFS_ILOG_DOWNER | XFS_ILOG_AOWNER) | ||
492 | 494 | ||
493 | #define XFS_ILOG_DFORK (XFS_ILOG_DDATA | XFS_ILOG_DEXT | \ | 495 | #define XFS_ILOG_DFORK (XFS_ILOG_DDATA | XFS_ILOG_DEXT | \ |
494 | XFS_ILOG_DBROOT) | 496 | XFS_ILOG_DBROOT) |
@@ -500,7 +502,8 @@ typedef struct xfs_inode_log_format_64 { | |||
500 | XFS_ILOG_DEXT | XFS_ILOG_DBROOT | \ | 502 | XFS_ILOG_DEXT | XFS_ILOG_DBROOT | \ |
501 | XFS_ILOG_DEV | XFS_ILOG_UUID | \ | 503 | XFS_ILOG_DEV | XFS_ILOG_UUID | \ |
502 | XFS_ILOG_ADATA | XFS_ILOG_AEXT | \ | 504 | XFS_ILOG_ADATA | XFS_ILOG_AEXT | \ |
503 | XFS_ILOG_ABROOT | XFS_ILOG_TIMESTAMP) | 505 | XFS_ILOG_ABROOT | XFS_ILOG_TIMESTAMP | \ |
506 | XFS_ILOG_DOWNER | XFS_ILOG_AOWNER) | ||
504 | 507 | ||
505 | static inline int xfs_ilog_fbroot(int w) | 508 | static inline int xfs_ilog_fbroot(int w) |
506 | { | 509 | { |
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 1728c7c016a6..1c3b0c9c9aac 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c | |||
@@ -2629,6 +2629,82 @@ out_release: | |||
2629 | return error; | 2629 | return error; |
2630 | } | 2630 | } |
2631 | 2631 | ||
2632 | /* | ||
2633 | * Inode fork owner changes | ||
2634 | * | ||
2635 | * If we have been told that we have to reparent the inode fork, it's because an | ||
2636 | * extent swap operation on a CRC enabled filesystem has been done and we are | ||
2637 | * replaying it. We need to walk the BMBT of the appropriate fork and change the | ||
2638 | * owners of it. | ||
2639 | * | ||
2640 | * The complexity here is that we don't have an inode context to work with, so | ||
2641 | * after we've replayed the inode we need to instantiate one. This is where the | ||
2642 | * fun begins. | ||
2643 | * | ||
2644 | * We are in the middle of log recovery, so we can't run transactions. That | ||
2645 | * means we cannot use cache coherent inode instantiation via xfs_iget(), as | ||
2646 | * that will result in the corresponding iput() running the inode through | ||
2647 | * xfs_inactive(). If we've just replayed an inode core that changes the link | ||
2648 | * count to zero (i.e. it's been unlinked), then xfs_inactive() will run | ||
2649 | * transactions (bad!). | ||
2650 | * | ||
2651 | * So, to avoid this, we instantiate an inode directly from the inode core we've | ||
2652 | * just recovered. We have the buffer still locked, and all we really need to | ||
2653 | * instantiate is the inode core and the forks being modified. We can do this | ||
2654 | * manually, then run the inode btree owner change, and then tear down the | ||
2655 | * xfs_inode without having to run any transactions at all. | ||
2656 | * | ||
2657 | * Also, because we don't have a transaction context available here but need to | ||
2658 | * gather all the buffers we modify for writeback so we pass the buffer_list | ||
2659 | * instead for the operation to use. | ||
2660 | */ | ||
2661 | |||
2662 | STATIC int | ||
2663 | xfs_recover_inode_owner_change( | ||
2664 | struct xfs_mount *mp, | ||
2665 | struct xfs_dinode *dip, | ||
2666 | struct xfs_inode_log_format *in_f, | ||
2667 | struct list_head *buffer_list) | ||
2668 | { | ||
2669 | struct xfs_inode *ip; | ||
2670 | int error; | ||
2671 | |||
2672 | ASSERT(in_f->ilf_fields & (XFS_ILOG_DOWNER|XFS_ILOG_AOWNER)); | ||
2673 | |||
2674 | ip = xfs_inode_alloc(mp, in_f->ilf_ino); | ||
2675 | if (!ip) | ||
2676 | return ENOMEM; | ||
2677 | |||
2678 | /* instantiate the inode */ | ||
2679 | xfs_dinode_from_disk(&ip->i_d, dip); | ||
2680 | ASSERT(ip->i_d.di_version >= 3); | ||
2681 | |||
2682 | error = xfs_iformat_fork(ip, dip); | ||
2683 | if (error) | ||
2684 | goto out_free_ip; | ||
2685 | |||
2686 | |||
2687 | if (in_f->ilf_fields & XFS_ILOG_DOWNER) { | ||
2688 | ASSERT(in_f->ilf_fields & XFS_ILOG_DBROOT); | ||
2689 | error = xfs_bmbt_change_owner(NULL, ip, XFS_DATA_FORK, | ||
2690 | ip->i_ino, buffer_list); | ||
2691 | if (error) | ||
2692 | goto out_free_ip; | ||
2693 | } | ||
2694 | |||
2695 | if (in_f->ilf_fields & XFS_ILOG_AOWNER) { | ||
2696 | ASSERT(in_f->ilf_fields & XFS_ILOG_ABROOT); | ||
2697 | error = xfs_bmbt_change_owner(NULL, ip, XFS_ATTR_FORK, | ||
2698 | ip->i_ino, buffer_list); | ||
2699 | if (error) | ||
2700 | goto out_free_ip; | ||
2701 | } | ||
2702 | |||
2703 | out_free_ip: | ||
2704 | xfs_inode_free(ip); | ||
2705 | return error; | ||
2706 | } | ||
2707 | |||
2632 | STATIC int | 2708 | STATIC int |
2633 | xlog_recover_inode_pass2( | 2709 | xlog_recover_inode_pass2( |
2634 | struct xlog *log, | 2710 | struct xlog *log, |
@@ -2681,8 +2757,7 @@ xlog_recover_inode_pass2( | |||
2681 | error = bp->b_error; | 2757 | error = bp->b_error; |
2682 | if (error) { | 2758 | if (error) { |
2683 | xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#2)"); | 2759 | xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#2)"); |
2684 | xfs_buf_relse(bp); | 2760 | goto out_release; |
2685 | goto error; | ||
2686 | } | 2761 | } |
2687 | ASSERT(in_f->ilf_fields & XFS_ILOG_CORE); | 2762 | ASSERT(in_f->ilf_fields & XFS_ILOG_CORE); |
2688 | dip = (xfs_dinode_t *)xfs_buf_offset(bp, in_f->ilf_boffset); | 2763 | dip = (xfs_dinode_t *)xfs_buf_offset(bp, in_f->ilf_boffset); |
@@ -2692,30 +2767,31 @@ xlog_recover_inode_pass2( | |||
2692 | * like an inode! | 2767 | * like an inode! |
2693 | */ | 2768 | */ |
2694 | if (unlikely(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))) { | 2769 | if (unlikely(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))) { |
2695 | xfs_buf_relse(bp); | ||
2696 | xfs_alert(mp, | 2770 | xfs_alert(mp, |
2697 | "%s: Bad inode magic number, dip = 0x%p, dino bp = 0x%p, ino = %Ld", | 2771 | "%s: Bad inode magic number, dip = 0x%p, dino bp = 0x%p, ino = %Ld", |
2698 | __func__, dip, bp, in_f->ilf_ino); | 2772 | __func__, dip, bp, in_f->ilf_ino); |
2699 | XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)", | 2773 | XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)", |
2700 | XFS_ERRLEVEL_LOW, mp); | 2774 | XFS_ERRLEVEL_LOW, mp); |
2701 | error = EFSCORRUPTED; | 2775 | error = EFSCORRUPTED; |
2702 | goto error; | 2776 | goto out_release; |
2703 | } | 2777 | } |
2704 | dicp = item->ri_buf[1].i_addr; | 2778 | dicp = item->ri_buf[1].i_addr; |
2705 | if (unlikely(dicp->di_magic != XFS_DINODE_MAGIC)) { | 2779 | if (unlikely(dicp->di_magic != XFS_DINODE_MAGIC)) { |
2706 | xfs_buf_relse(bp); | ||
2707 | xfs_alert(mp, | 2780 | xfs_alert(mp, |
2708 | "%s: Bad inode log record, rec ptr 0x%p, ino %Ld", | 2781 | "%s: Bad inode log record, rec ptr 0x%p, ino %Ld", |
2709 | __func__, item, in_f->ilf_ino); | 2782 | __func__, item, in_f->ilf_ino); |
2710 | XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)", | 2783 | XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)", |
2711 | XFS_ERRLEVEL_LOW, mp); | 2784 | XFS_ERRLEVEL_LOW, mp); |
2712 | error = EFSCORRUPTED; | 2785 | error = EFSCORRUPTED; |
2713 | goto error; | 2786 | goto out_release; |
2714 | } | 2787 | } |
2715 | 2788 | ||
2716 | /* | 2789 | /* |
2717 | * If the inode has an LSN in it, recover the inode only if it's less | 2790 | * If the inode has an LSN in it, recover the inode only if it's less |
2718 | * than the lsn of the transaction we are replaying. | 2791 | * than the lsn of the transaction we are replaying. Note: we still |
2792 | * need to replay an owner change even though the inode is more recent | ||
2793 | * than the transaction as there is no guarantee that all the btree | ||
2794 | * blocks are more recent than this transaction, too. | ||
2719 | */ | 2795 | */ |
2720 | if (dip->di_version >= 3) { | 2796 | if (dip->di_version >= 3) { |
2721 | xfs_lsn_t lsn = be64_to_cpu(dip->di_lsn); | 2797 | xfs_lsn_t lsn = be64_to_cpu(dip->di_lsn); |
@@ -2723,7 +2799,7 @@ xlog_recover_inode_pass2( | |||
2723 | if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) { | 2799 | if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) { |
2724 | trace_xfs_log_recover_inode_skip(log, in_f); | 2800 | trace_xfs_log_recover_inode_skip(log, in_f); |
2725 | error = 0; | 2801 | error = 0; |
2726 | goto out_release; | 2802 | goto out_owner_change; |
2727 | } | 2803 | } |
2728 | } | 2804 | } |
2729 | 2805 | ||
@@ -2745,10 +2821,9 @@ xlog_recover_inode_pass2( | |||
2745 | dicp->di_flushiter < (DI_MAX_FLUSH >> 1)) { | 2821 | dicp->di_flushiter < (DI_MAX_FLUSH >> 1)) { |
2746 | /* do nothing */ | 2822 | /* do nothing */ |
2747 | } else { | 2823 | } else { |
2748 | xfs_buf_relse(bp); | ||
2749 | trace_xfs_log_recover_inode_skip(log, in_f); | 2824 | trace_xfs_log_recover_inode_skip(log, in_f); |
2750 | error = 0; | 2825 | error = 0; |
2751 | goto error; | 2826 | goto out_release; |
2752 | } | 2827 | } |
2753 | } | 2828 | } |
2754 | 2829 | ||
@@ -2760,13 +2835,12 @@ xlog_recover_inode_pass2( | |||
2760 | (dicp->di_format != XFS_DINODE_FMT_BTREE)) { | 2835 | (dicp->di_format != XFS_DINODE_FMT_BTREE)) { |
2761 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)", | 2836 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)", |
2762 | XFS_ERRLEVEL_LOW, mp, dicp); | 2837 | XFS_ERRLEVEL_LOW, mp, dicp); |
2763 | xfs_buf_relse(bp); | ||
2764 | xfs_alert(mp, | 2838 | xfs_alert(mp, |
2765 | "%s: Bad regular inode log record, rec ptr 0x%p, " | 2839 | "%s: Bad regular inode log record, rec ptr 0x%p, " |
2766 | "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", | 2840 | "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", |
2767 | __func__, item, dip, bp, in_f->ilf_ino); | 2841 | __func__, item, dip, bp, in_f->ilf_ino); |
2768 | error = EFSCORRUPTED; | 2842 | error = EFSCORRUPTED; |
2769 | goto error; | 2843 | goto out_release; |
2770 | } | 2844 | } |
2771 | } else if (unlikely(S_ISDIR(dicp->di_mode))) { | 2845 | } else if (unlikely(S_ISDIR(dicp->di_mode))) { |
2772 | if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && | 2846 | if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && |
@@ -2774,19 +2848,17 @@ xlog_recover_inode_pass2( | |||
2774 | (dicp->di_format != XFS_DINODE_FMT_LOCAL)) { | 2848 | (dicp->di_format != XFS_DINODE_FMT_LOCAL)) { |
2775 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)", | 2849 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)", |
2776 | XFS_ERRLEVEL_LOW, mp, dicp); | 2850 | XFS_ERRLEVEL_LOW, mp, dicp); |
2777 | xfs_buf_relse(bp); | ||
2778 | xfs_alert(mp, | 2851 | xfs_alert(mp, |
2779 | "%s: Bad dir inode log record, rec ptr 0x%p, " | 2852 | "%s: Bad dir inode log record, rec ptr 0x%p, " |
2780 | "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", | 2853 | "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", |
2781 | __func__, item, dip, bp, in_f->ilf_ino); | 2854 | __func__, item, dip, bp, in_f->ilf_ino); |
2782 | error = EFSCORRUPTED; | 2855 | error = EFSCORRUPTED; |
2783 | goto error; | 2856 | goto out_release; |
2784 | } | 2857 | } |
2785 | } | 2858 | } |
2786 | if (unlikely(dicp->di_nextents + dicp->di_anextents > dicp->di_nblocks)){ | 2859 | if (unlikely(dicp->di_nextents + dicp->di_anextents > dicp->di_nblocks)){ |
2787 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)", | 2860 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)", |
2788 | XFS_ERRLEVEL_LOW, mp, dicp); | 2861 | XFS_ERRLEVEL_LOW, mp, dicp); |
2789 | xfs_buf_relse(bp); | ||
2790 | xfs_alert(mp, | 2862 | xfs_alert(mp, |
2791 | "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, " | 2863 | "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, " |
2792 | "dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld", | 2864 | "dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld", |
@@ -2794,29 +2866,27 @@ xlog_recover_inode_pass2( | |||
2794 | dicp->di_nextents + dicp->di_anextents, | 2866 | dicp->di_nextents + dicp->di_anextents, |
2795 | dicp->di_nblocks); | 2867 | dicp->di_nblocks); |
2796 | error = EFSCORRUPTED; | 2868 | error = EFSCORRUPTED; |
2797 | goto error; | 2869 | goto out_release; |
2798 | } | 2870 | } |
2799 | if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) { | 2871 | if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) { |
2800 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)", | 2872 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)", |
2801 | XFS_ERRLEVEL_LOW, mp, dicp); | 2873 | XFS_ERRLEVEL_LOW, mp, dicp); |
2802 | xfs_buf_relse(bp); | ||
2803 | xfs_alert(mp, | 2874 | xfs_alert(mp, |
2804 | "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, " | 2875 | "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, " |
2805 | "dino bp 0x%p, ino %Ld, forkoff 0x%x", __func__, | 2876 | "dino bp 0x%p, ino %Ld, forkoff 0x%x", __func__, |
2806 | item, dip, bp, in_f->ilf_ino, dicp->di_forkoff); | 2877 | item, dip, bp, in_f->ilf_ino, dicp->di_forkoff); |
2807 | error = EFSCORRUPTED; | 2878 | error = EFSCORRUPTED; |
2808 | goto error; | 2879 | goto out_release; |
2809 | } | 2880 | } |
2810 | isize = xfs_icdinode_size(dicp->di_version); | 2881 | isize = xfs_icdinode_size(dicp->di_version); |
2811 | if (unlikely(item->ri_buf[1].i_len > isize)) { | 2882 | if (unlikely(item->ri_buf[1].i_len > isize)) { |
2812 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)", | 2883 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)", |
2813 | XFS_ERRLEVEL_LOW, mp, dicp); | 2884 | XFS_ERRLEVEL_LOW, mp, dicp); |
2814 | xfs_buf_relse(bp); | ||
2815 | xfs_alert(mp, | 2885 | xfs_alert(mp, |
2816 | "%s: Bad inode log record length %d, rec ptr 0x%p", | 2886 | "%s: Bad inode log record length %d, rec ptr 0x%p", |
2817 | __func__, item->ri_buf[1].i_len, item); | 2887 | __func__, item->ri_buf[1].i_len, item); |
2818 | error = EFSCORRUPTED; | 2888 | error = EFSCORRUPTED; |
2819 | goto error; | 2889 | goto out_release; |
2820 | } | 2890 | } |
2821 | 2891 | ||
2822 | /* The core is in in-core format */ | 2892 | /* The core is in in-core format */ |
@@ -2842,7 +2912,7 @@ xlog_recover_inode_pass2( | |||
2842 | } | 2912 | } |
2843 | 2913 | ||
2844 | if (in_f->ilf_size == 2) | 2914 | if (in_f->ilf_size == 2) |
2845 | goto write_inode_buffer; | 2915 | goto out_owner_change; |
2846 | len = item->ri_buf[2].i_len; | 2916 | len = item->ri_buf[2].i_len; |
2847 | src = item->ri_buf[2].i_addr; | 2917 | src = item->ri_buf[2].i_addr; |
2848 | ASSERT(in_f->ilf_size <= 4); | 2918 | ASSERT(in_f->ilf_size <= 4); |
@@ -2903,13 +2973,15 @@ xlog_recover_inode_pass2( | |||
2903 | default: | 2973 | default: |
2904 | xfs_warn(log->l_mp, "%s: Invalid flag", __func__); | 2974 | xfs_warn(log->l_mp, "%s: Invalid flag", __func__); |
2905 | ASSERT(0); | 2975 | ASSERT(0); |
2906 | xfs_buf_relse(bp); | ||
2907 | error = EIO; | 2976 | error = EIO; |
2908 | goto error; | 2977 | goto out_release; |
2909 | } | 2978 | } |
2910 | } | 2979 | } |
2911 | 2980 | ||
2912 | write_inode_buffer: | 2981 | out_owner_change: |
2982 | if (in_f->ilf_fields & (XFS_ILOG_DOWNER|XFS_ILOG_AOWNER)) | ||
2983 | error = xfs_recover_inode_owner_change(mp, dip, in_f, | ||
2984 | buffer_list); | ||
2913 | /* re-generate the checksum. */ | 2985 | /* re-generate the checksum. */ |
2914 | xfs_dinode_calc_crc(log->l_mp, dip); | 2986 | xfs_dinode_calc_crc(log->l_mp, dip); |
2915 | 2987 | ||
@@ -2923,6 +2995,9 @@ error: | |||
2923 | if (need_free) | 2995 | if (need_free) |
2924 | kmem_free(in_f); | 2996 | kmem_free(in_f); |
2925 | return XFS_ERROR(error); | 2997 | return XFS_ERROR(error); |
2998 | |||
2999 | xfs_buf_relse(bp); | ||
3000 | goto error; | ||
2926 | } | 3001 | } |
2927 | 3002 | ||
2928 | /* | 3003 | /* |