diff options
author | Christoph Hellwig <hch@lst.de> | 2013-04-03 01:11:17 -0400 |
---|---|---|
committer | Ben Myers <bpm@sgi.com> | 2013-04-21 16:03:33 -0400 |
commit | 93848a999cf9b9e4f4f77dba843a48c393f33c59 (patch) | |
tree | 92c2fb4e741a8d70e70f9d31df308d1a30ecaef6 | |
parent | 3fe58f30b4fc3f8a9084b035a02bc0c67bee8d00 (diff) |
xfs: add version 3 inode format with CRCs
Add a new inode version with a larger core. The primary objective is
to allow for a crc of the inode, and location information (uuid and ino)
to verify it was written in the right place. We also extend it by:
a creation time (for Samba);
a changecount (for NFSv4);
a flush sequence (in LSN format for recovery);
an additional inode flags field; and
some additional padding.
These additional fields are not implemented yet, but already laid
out in the structure.
[dchinner@redhat.com] Added LSN and flags field, some factoring and rework to
capture all the necessary information in the crc calculation.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Ben Myers <bpm@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
-rw-r--r-- | fs/xfs/xfs_buf_item.h | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_dinode.h | 33 | ||||
-rw-r--r-- | fs/xfs/xfs_ialloc.c | 50 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.c | 179 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.h | 26 | ||||
-rw-r--r-- | fs/xfs/xfs_inode_item.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_log_recover.c | 32 | ||||
-rw-r--r-- | fs/xfs/xfs_trans_buf.c | 5 |
8 files changed, 254 insertions, 77 deletions
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index c25660691e08..abae8c8c4ec4 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h | |||
@@ -48,6 +48,7 @@ extern kmem_zone_t *xfs_buf_item_zone; | |||
48 | #define XFS_BLF_AGF_BUF (1<<6) | 48 | #define XFS_BLF_AGF_BUF (1<<6) |
49 | #define XFS_BLF_AGFL_BUF (1<<7) | 49 | #define XFS_BLF_AGFL_BUF (1<<7) |
50 | #define XFS_BLF_AGI_BUF (1<<8) | 50 | #define XFS_BLF_AGI_BUF (1<<8) |
51 | #define XFS_BLF_DINO_BUF (1<<9) | ||
51 | 52 | ||
52 | #define XFS_BLF_TYPE_MASK \ | 53 | #define XFS_BLF_TYPE_MASK \ |
53 | (XFS_BLF_UDQUOT_BUF | \ | 54 | (XFS_BLF_UDQUOT_BUF | \ |
@@ -56,7 +57,8 @@ extern kmem_zone_t *xfs_buf_item_zone; | |||
56 | XFS_BLF_BTREE_BUF | \ | 57 | XFS_BLF_BTREE_BUF | \ |
57 | XFS_BLF_AGF_BUF | \ | 58 | XFS_BLF_AGF_BUF | \ |
58 | XFS_BLF_AGFL_BUF | \ | 59 | XFS_BLF_AGFL_BUF | \ |
59 | XFS_BLF_AGI_BUF) | 60 | XFS_BLF_AGI_BUF | \ |
61 | XFS_BLF_DINO_BUF) | ||
60 | 62 | ||
61 | #define XFS_BLF_CHUNK 128 | 63 | #define XFS_BLF_CHUNK 128 |
62 | #define XFS_BLF_SHIFT 7 | 64 | #define XFS_BLF_SHIFT 7 |
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h index 6b5bd1745dbe..f7a0e95d197a 100644 --- a/fs/xfs/xfs_dinode.h +++ b/fs/xfs/xfs_dinode.h | |||
@@ -19,7 +19,7 @@ | |||
19 | #define __XFS_DINODE_H__ | 19 | #define __XFS_DINODE_H__ |
20 | 20 | ||
21 | #define XFS_DINODE_MAGIC 0x494e /* 'IN' */ | 21 | #define XFS_DINODE_MAGIC 0x494e /* 'IN' */ |
22 | #define XFS_DINODE_GOOD_VERSION(v) (((v) == 1 || (v) == 2)) | 22 | #define XFS_DINODE_GOOD_VERSION(v) ((v) >= 1 && (v) <= 3) |
23 | 23 | ||
24 | typedef struct xfs_timestamp { | 24 | typedef struct xfs_timestamp { |
25 | __be32 t_sec; /* timestamp seconds */ | 25 | __be32 t_sec; /* timestamp seconds */ |
@@ -70,11 +70,36 @@ typedef struct xfs_dinode { | |||
70 | 70 | ||
71 | /* di_next_unlinked is the only non-core field in the old dinode */ | 71 | /* di_next_unlinked is the only non-core field in the old dinode */ |
72 | __be32 di_next_unlinked;/* agi unlinked list ptr */ | 72 | __be32 di_next_unlinked;/* agi unlinked list ptr */ |
73 | } __attribute__((packed)) xfs_dinode_t; | 73 | |
74 | /* start of the extended dinode, writable fields */ | ||
75 | __le32 di_crc; /* CRC of the inode */ | ||
76 | __be64 di_changecount; /* number of attribute changes */ | ||
77 | __be64 di_lsn; /* flush sequence */ | ||
78 | __be64 di_flags2; /* more random flags */ | ||
79 | __u8 di_pad2[16]; /* more padding for future expansion */ | ||
80 | |||
81 | /* fields only written to during inode creation */ | ||
82 | xfs_timestamp_t di_crtime; /* time created */ | ||
83 | __be64 di_ino; /* inode number */ | ||
84 | uuid_t di_uuid; /* UUID of the filesystem */ | ||
85 | |||
86 | /* structure must be padded to 64 bit alignment */ | ||
87 | } xfs_dinode_t; | ||
74 | 88 | ||
75 | #define DI_MAX_FLUSH 0xffff | 89 | #define DI_MAX_FLUSH 0xffff |
76 | 90 | ||
77 | /* | 91 | /* |
92 | * Size of the core inode on disk. Version 1 and 2 inodes have | ||
93 | * the same size, but version 3 has grown a few additional fields. | ||
94 | */ | ||
95 | static inline uint xfs_dinode_size(int version) | ||
96 | { | ||
97 | if (version == 3) | ||
98 | return sizeof(struct xfs_dinode); | ||
99 | return offsetof(struct xfs_dinode, di_crc); | ||
100 | } | ||
101 | |||
102 | /* | ||
78 | * The 32 bit link count in the inode theoretically maxes out at UINT_MAX. | 103 | * The 32 bit link count in the inode theoretically maxes out at UINT_MAX. |
79 | * Since the pathconf interface is signed, we use 2^31 - 1 instead. | 104 | * Since the pathconf interface is signed, we use 2^31 - 1 instead. |
80 | * The old inode format had a 16 bit link count, so its maximum is USHRT_MAX. | 105 | * The old inode format had a 16 bit link count, so its maximum is USHRT_MAX. |
@@ -105,7 +130,7 @@ typedef enum xfs_dinode_fmt { | |||
105 | * Inode size for given fs. | 130 | * Inode size for given fs. |
106 | */ | 131 | */ |
107 | #define XFS_LITINO(mp, version) \ | 132 | #define XFS_LITINO(mp, version) \ |
108 | ((int)(((mp)->m_sb.sb_inodesize) - sizeof(struct xfs_dinode))) | 133 | ((int)(((mp)->m_sb.sb_inodesize) - xfs_dinode_size(version))) |
109 | 134 | ||
110 | #define XFS_BROOT_SIZE_ADJ(ip) \ | 135 | #define XFS_BROOT_SIZE_ADJ(ip) \ |
111 | (XFS_BMBT_BLOCK_LEN((ip)->i_mount) - sizeof(xfs_bmdr_block_t)) | 136 | (XFS_BMBT_BLOCK_LEN((ip)->i_mount) - sizeof(xfs_bmdr_block_t)) |
@@ -133,7 +158,7 @@ typedef enum xfs_dinode_fmt { | |||
133 | * Return pointers to the data or attribute forks. | 158 | * Return pointers to the data or attribute forks. |
134 | */ | 159 | */ |
135 | #define XFS_DFORK_DPTR(dip) \ | 160 | #define XFS_DFORK_DPTR(dip) \ |
136 | ((char *)(dip) + sizeof(struct xfs_dinode)) | 161 | ((char *)dip + xfs_dinode_size(dip->di_version)) |
137 | #define XFS_DFORK_APTR(dip) \ | 162 | #define XFS_DFORK_APTR(dip) \ |
138 | (XFS_DFORK_DPTR(dip) + XFS_DFORK_BOFF(dip)) | 163 | (XFS_DFORK_DPTR(dip) + XFS_DFORK_BOFF(dip)) |
139 | #define XFS_DFORK_PTR(dip,w) \ | 164 | #define XFS_DFORK_PTR(dip,w) \ |
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 6d0a4954aa8c..3039f829c96a 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c | |||
@@ -167,6 +167,7 @@ xfs_ialloc_inode_init( | |||
167 | int version; | 167 | int version; |
168 | int i, j; | 168 | int i, j; |
169 | xfs_daddr_t d; | 169 | xfs_daddr_t d; |
170 | xfs_ino_t ino = 0; | ||
170 | 171 | ||
171 | /* | 172 | /* |
172 | * Loop over the new block(s), filling in the inodes. | 173 | * Loop over the new block(s), filling in the inodes. |
@@ -185,13 +186,29 @@ xfs_ialloc_inode_init( | |||
185 | } | 186 | } |
186 | 187 | ||
187 | /* | 188 | /* |
188 | * Figure out what version number to use in the inodes we create. | 189 | * Figure out what version number to use in the inodes we create. If |
189 | * If the superblock version has caught up to the one that supports | 190 | * the superblock version has caught up to the one that supports the new |
190 | * the new inode format, then use the new inode version. Otherwise | 191 | * inode format, then use the new inode version. Otherwise use the old |
191 | * use the old version so that old kernels will continue to be | 192 | * version so that old kernels will continue to be able to use the file |
192 | * able to use the file system. | 193 | * system. |
194 | * | ||
195 | * For v3 inodes, we also need to write the inode number into the inode, | ||
196 | * so calculate the first inode number of the chunk here as | ||
197 | * XFS_OFFBNO_TO_AGINO() only works within a filesystem block, not | ||
198 | * across multiple filesystem blocks (such as a cluster) and so cannot | ||
199 | * be used in the cluster buffer loop below. | ||
200 | * | ||
201 | * Further, because we are writing the inode directly into the buffer | ||
202 | * and calculating a CRC on the entire inode, we have ot log the entire | ||
203 | * inode so that the entire range the CRC covers is present in the log. | ||
204 | * That means for v3 inode we log the entire buffer rather than just the | ||
205 | * inode cores. | ||
193 | */ | 206 | */ |
194 | if (xfs_sb_version_hasnlink(&mp->m_sb)) | 207 | if (xfs_sb_version_hascrc(&mp->m_sb)) { |
208 | version = 3; | ||
209 | ino = XFS_AGINO_TO_INO(mp, agno, | ||
210 | XFS_OFFBNO_TO_AGINO(mp, agbno, 0)); | ||
211 | } else if (xfs_sb_version_hasnlink(&mp->m_sb)) | ||
195 | version = 2; | 212 | version = 2; |
196 | else | 213 | else |
197 | version = 1; | 214 | version = 1; |
@@ -214,17 +231,32 @@ xfs_ialloc_inode_init( | |||
214 | * individual transactions causing a lot of log traffic. | 231 | * individual transactions causing a lot of log traffic. |
215 | */ | 232 | */ |
216 | fbuf->b_ops = &xfs_inode_buf_ops; | 233 | fbuf->b_ops = &xfs_inode_buf_ops; |
217 | xfs_buf_zero(fbuf, 0, ninodes << mp->m_sb.sb_inodelog); | 234 | xfs_buf_zero(fbuf, 0, BBTOB(fbuf->b_length)); |
218 | for (i = 0; i < ninodes; i++) { | 235 | for (i = 0; i < ninodes; i++) { |
219 | int ioffset = i << mp->m_sb.sb_inodelog; | 236 | int ioffset = i << mp->m_sb.sb_inodelog; |
220 | uint isize = sizeof(struct xfs_dinode); | 237 | uint isize = xfs_dinode_size(version); |
221 | 238 | ||
222 | free = xfs_make_iptr(mp, fbuf, i); | 239 | free = xfs_make_iptr(mp, fbuf, i); |
223 | free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); | 240 | free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); |
224 | free->di_version = version; | 241 | free->di_version = version; |
225 | free->di_gen = cpu_to_be32(gen); | 242 | free->di_gen = cpu_to_be32(gen); |
226 | free->di_next_unlinked = cpu_to_be32(NULLAGINO); | 243 | free->di_next_unlinked = cpu_to_be32(NULLAGINO); |
227 | xfs_trans_log_buf(tp, fbuf, ioffset, ioffset + isize - 1); | 244 | |
245 | if (version == 3) { | ||
246 | free->di_ino = cpu_to_be64(ino); | ||
247 | ino++; | ||
248 | uuid_copy(&free->di_uuid, &mp->m_sb.sb_uuid); | ||
249 | xfs_dinode_calc_crc(mp, free); | ||
250 | } else { | ||
251 | /* just log the inode core */ | ||
252 | xfs_trans_log_buf(tp, fbuf, ioffset, | ||
253 | ioffset + isize - 1); | ||
254 | } | ||
255 | } | ||
256 | if (version == 3) { | ||
257 | /* need to log the entire buffer */ | ||
258 | xfs_trans_log_buf(tp, fbuf, 0, | ||
259 | BBTOB(fbuf->b_length) - 1); | ||
228 | } | 260 | } |
229 | xfs_trans_inode_alloc_buf(tp, fbuf); | 261 | xfs_trans_inode_alloc_buf(tp, fbuf); |
230 | } | 262 | } |
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 202ce37e66cb..558ef4947206 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -44,6 +44,7 @@ | |||
44 | #include "xfs_quota.h" | 44 | #include "xfs_quota.h" |
45 | #include "xfs_filestream.h" | 45 | #include "xfs_filestream.h" |
46 | #include "xfs_vnodeops.h" | 46 | #include "xfs_vnodeops.h" |
47 | #include "xfs_cksum.h" | ||
47 | #include "xfs_trace.h" | 48 | #include "xfs_trace.h" |
48 | #include "xfs_icache.h" | 49 | #include "xfs_icache.h" |
49 | 50 | ||
@@ -866,6 +867,17 @@ xfs_dinode_from_disk( | |||
866 | to->di_dmstate = be16_to_cpu(from->di_dmstate); | 867 | to->di_dmstate = be16_to_cpu(from->di_dmstate); |
867 | to->di_flags = be16_to_cpu(from->di_flags); | 868 | to->di_flags = be16_to_cpu(from->di_flags); |
868 | to->di_gen = be32_to_cpu(from->di_gen); | 869 | to->di_gen = be32_to_cpu(from->di_gen); |
870 | |||
871 | if (to->di_version == 3) { | ||
872 | to->di_changecount = be64_to_cpu(from->di_changecount); | ||
873 | to->di_crtime.t_sec = be32_to_cpu(from->di_crtime.t_sec); | ||
874 | to->di_crtime.t_nsec = be32_to_cpu(from->di_crtime.t_nsec); | ||
875 | to->di_flags2 = be64_to_cpu(from->di_flags2); | ||
876 | to->di_ino = be64_to_cpu(from->di_ino); | ||
877 | to->di_lsn = be64_to_cpu(from->di_lsn); | ||
878 | memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2)); | ||
879 | uuid_copy(&to->di_uuid, &from->di_uuid); | ||
880 | } | ||
869 | } | 881 | } |
870 | 882 | ||
871 | void | 883 | void |
@@ -902,6 +914,17 @@ xfs_dinode_to_disk( | |||
902 | to->di_dmstate = cpu_to_be16(from->di_dmstate); | 914 | to->di_dmstate = cpu_to_be16(from->di_dmstate); |
903 | to->di_flags = cpu_to_be16(from->di_flags); | 915 | to->di_flags = cpu_to_be16(from->di_flags); |
904 | to->di_gen = cpu_to_be32(from->di_gen); | 916 | to->di_gen = cpu_to_be32(from->di_gen); |
917 | |||
918 | if (from->di_version == 3) { | ||
919 | to->di_changecount = cpu_to_be64(from->di_changecount); | ||
920 | to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec); | ||
921 | to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec); | ||
922 | to->di_flags2 = cpu_to_be64(from->di_flags2); | ||
923 | to->di_ino = cpu_to_be64(from->di_ino); | ||
924 | to->di_lsn = cpu_to_be64(from->di_lsn); | ||
925 | memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2)); | ||
926 | uuid_copy(&to->di_uuid, &from->di_uuid); | ||
927 | } | ||
905 | } | 928 | } |
906 | 929 | ||
907 | STATIC uint | 930 | STATIC uint |
@@ -962,6 +985,47 @@ xfs_dic2xflags( | |||
962 | (XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0); | 985 | (XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0); |
963 | } | 986 | } |
964 | 987 | ||
988 | static bool | ||
989 | xfs_dinode_verify( | ||
990 | struct xfs_mount *mp, | ||
991 | struct xfs_inode *ip, | ||
992 | struct xfs_dinode *dip) | ||
993 | { | ||
994 | if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) | ||
995 | return false; | ||
996 | |||
997 | /* only version 3 or greater inodes are extensively verified here */ | ||
998 | if (dip->di_version < 3) | ||
999 | return true; | ||
1000 | |||
1001 | if (!xfs_sb_version_hascrc(&mp->m_sb)) | ||
1002 | return false; | ||
1003 | if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, | ||
1004 | offsetof(struct xfs_dinode, di_crc))) | ||
1005 | return false; | ||
1006 | if (be64_to_cpu(dip->di_ino) != ip->i_ino) | ||
1007 | return false; | ||
1008 | if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_uuid)) | ||
1009 | return false; | ||
1010 | return true; | ||
1011 | } | ||
1012 | |||
1013 | void | ||
1014 | xfs_dinode_calc_crc( | ||
1015 | struct xfs_mount *mp, | ||
1016 | struct xfs_dinode *dip) | ||
1017 | { | ||
1018 | __uint32_t crc; | ||
1019 | |||
1020 | if (dip->di_version < 3) | ||
1021 | return; | ||
1022 | |||
1023 | ASSERT(xfs_sb_version_hascrc(&mp->m_sb)); | ||
1024 | crc = xfs_start_cksum((char *)dip, mp->m_sb.sb_inodesize, | ||
1025 | offsetof(struct xfs_dinode, di_crc)); | ||
1026 | dip->di_crc = xfs_end_cksum(crc); | ||
1027 | } | ||
1028 | |||
965 | /* | 1029 | /* |
966 | * Read the disk inode attributes into the in-core inode structure. | 1030 | * Read the disk inode attributes into the in-core inode structure. |
967 | */ | 1031 | */ |
@@ -990,17 +1054,13 @@ xfs_iread( | |||
990 | if (error) | 1054 | if (error) |
991 | return error; | 1055 | return error; |
992 | 1056 | ||
993 | /* | 1057 | /* even unallocated inodes are verified */ |
994 | * If we got something that isn't an inode it means someone | 1058 | if (!xfs_dinode_verify(mp, ip, dip)) { |
995 | * (nfs or dmi) has a stale handle. | 1059 | xfs_alert(mp, "%s: validation failed for inode %lld failed", |
996 | */ | 1060 | __func__, ip->i_ino); |
997 | if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) { | 1061 | |
998 | #ifdef DEBUG | 1062 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, dip); |
999 | xfs_alert(mp, | 1063 | error = XFS_ERROR(EFSCORRUPTED); |
1000 | "%s: dip->di_magic (0x%x) != XFS_DINODE_MAGIC (0x%x)", | ||
1001 | __func__, be16_to_cpu(dip->di_magic), XFS_DINODE_MAGIC); | ||
1002 | #endif /* DEBUG */ | ||
1003 | error = XFS_ERROR(EINVAL); | ||
1004 | goto out_brelse; | 1064 | goto out_brelse; |
1005 | } | 1065 | } |
1006 | 1066 | ||
@@ -1022,10 +1082,20 @@ xfs_iread( | |||
1022 | goto out_brelse; | 1082 | goto out_brelse; |
1023 | } | 1083 | } |
1024 | } else { | 1084 | } else { |
1085 | /* | ||
1086 | * Partial initialisation of the in-core inode. Just the bits | ||
1087 | * that xfs_ialloc won't overwrite or relies on being correct. | ||
1088 | */ | ||
1025 | ip->i_d.di_magic = be16_to_cpu(dip->di_magic); | 1089 | ip->i_d.di_magic = be16_to_cpu(dip->di_magic); |
1026 | ip->i_d.di_version = dip->di_version; | 1090 | ip->i_d.di_version = dip->di_version; |
1027 | ip->i_d.di_gen = be32_to_cpu(dip->di_gen); | 1091 | ip->i_d.di_gen = be32_to_cpu(dip->di_gen); |
1028 | ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter); | 1092 | ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter); |
1093 | |||
1094 | if (dip->di_version == 3) { | ||
1095 | ip->i_d.di_ino = be64_to_cpu(dip->di_ino); | ||
1096 | uuid_copy(&ip->i_d.di_uuid, &dip->di_uuid); | ||
1097 | } | ||
1098 | |||
1029 | /* | 1099 | /* |
1030 | * Make sure to pull in the mode here as well in | 1100 | * Make sure to pull in the mode here as well in |
1031 | * case the inode is released without being used. | 1101 | * case the inode is released without being used. |
@@ -1161,6 +1231,7 @@ xfs_ialloc( | |||
1161 | xfs_buf_t **ialloc_context, | 1231 | xfs_buf_t **ialloc_context, |
1162 | xfs_inode_t **ipp) | 1232 | xfs_inode_t **ipp) |
1163 | { | 1233 | { |
1234 | struct xfs_mount *mp = tp->t_mountp; | ||
1164 | xfs_ino_t ino; | 1235 | xfs_ino_t ino; |
1165 | xfs_inode_t *ip; | 1236 | xfs_inode_t *ip; |
1166 | uint flags; | 1237 | uint flags; |
@@ -1187,7 +1258,7 @@ xfs_ialloc( | |||
1187 | * This is because we're setting fields here we need | 1258 | * This is because we're setting fields here we need |
1188 | * to prevent others from looking at until we're done. | 1259 | * to prevent others from looking at until we're done. |
1189 | */ | 1260 | */ |
1190 | error = xfs_iget(tp->t_mountp, tp, ino, XFS_IGET_CREATE, | 1261 | error = xfs_iget(mp, tp, ino, XFS_IGET_CREATE, |
1191 | XFS_ILOCK_EXCL, &ip); | 1262 | XFS_ILOCK_EXCL, &ip); |
1192 | if (error) | 1263 | if (error) |
1193 | return error; | 1264 | return error; |
@@ -1208,7 +1279,7 @@ xfs_ialloc( | |||
1208 | * the inode version number now. This way we only do the conversion | 1279 | * the inode version number now. This way we only do the conversion |
1209 | * here rather than here and in the flush/logging code. | 1280 | * here rather than here and in the flush/logging code. |
1210 | */ | 1281 | */ |
1211 | if (xfs_sb_version_hasnlink(&tp->t_mountp->m_sb) && | 1282 | if (xfs_sb_version_hasnlink(&mp->m_sb) && |
1212 | ip->i_d.di_version == 1) { | 1283 | ip->i_d.di_version == 1) { |
1213 | ip->i_d.di_version = 2; | 1284 | ip->i_d.di_version = 2; |
1214 | /* | 1285 | /* |
@@ -1258,6 +1329,19 @@ xfs_ialloc( | |||
1258 | ip->i_d.di_dmevmask = 0; | 1329 | ip->i_d.di_dmevmask = 0; |
1259 | ip->i_d.di_dmstate = 0; | 1330 | ip->i_d.di_dmstate = 0; |
1260 | ip->i_d.di_flags = 0; | 1331 | ip->i_d.di_flags = 0; |
1332 | |||
1333 | if (ip->i_d.di_version == 3) { | ||
1334 | ASSERT(ip->i_d.di_ino == ino); | ||
1335 | ASSERT(uuid_equal(&ip->i_d.di_uuid, &mp->m_sb.sb_uuid)); | ||
1336 | ip->i_d.di_crc = 0; | ||
1337 | ip->i_d.di_changecount = 1; | ||
1338 | ip->i_d.di_lsn = 0; | ||
1339 | ip->i_d.di_flags2 = 0; | ||
1340 | memset(&(ip->i_d.di_pad2[0]), 0, sizeof(ip->i_d.di_pad2)); | ||
1341 | ip->i_d.di_crtime = ip->i_d.di_mtime; | ||
1342 | } | ||
1343 | |||
1344 | |||
1261 | flags = XFS_ILOG_CORE; | 1345 | flags = XFS_ILOG_CORE; |
1262 | switch (mode & S_IFMT) { | 1346 | switch (mode & S_IFMT) { |
1263 | case S_IFIFO: | 1347 | case S_IFIFO: |
@@ -2716,20 +2800,18 @@ abort_out: | |||
2716 | 2800 | ||
2717 | STATIC int | 2801 | STATIC int |
2718 | xfs_iflush_int( | 2802 | xfs_iflush_int( |
2719 | xfs_inode_t *ip, | 2803 | struct xfs_inode *ip, |
2720 | xfs_buf_t *bp) | 2804 | struct xfs_buf *bp) |
2721 | { | 2805 | { |
2722 | xfs_inode_log_item_t *iip; | 2806 | struct xfs_inode_log_item *iip = ip->i_itemp; |
2723 | xfs_dinode_t *dip; | 2807 | struct xfs_dinode *dip; |
2724 | xfs_mount_t *mp; | 2808 | struct xfs_mount *mp = ip->i_mount; |
2725 | 2809 | ||
2726 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); | 2810 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); |
2727 | ASSERT(xfs_isiflocked(ip)); | 2811 | ASSERT(xfs_isiflocked(ip)); |
2728 | ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || | 2812 | ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || |
2729 | ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)); | 2813 | ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)); |
2730 | 2814 | ASSERT(iip != NULL && iip->ili_fields != 0); | |
2731 | iip = ip->i_itemp; | ||
2732 | mp = ip->i_mount; | ||
2733 | 2815 | ||
2734 | /* set *dip = inode's place in the buffer */ | 2816 | /* set *dip = inode's place in the buffer */ |
2735 | dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); | 2817 | dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); |
@@ -2790,9 +2872,9 @@ xfs_iflush_int( | |||
2790 | } | 2872 | } |
2791 | /* | 2873 | /* |
2792 | * bump the flush iteration count, used to detect flushes which | 2874 | * bump the flush iteration count, used to detect flushes which |
2793 | * postdate a log record during recovery. | 2875 | * postdate a log record during recovery. This is redundant as we now |
2876 | * log every change and hence this can't happen. Still, it doesn't hurt. | ||
2794 | */ | 2877 | */ |
2795 | |||
2796 | ip->i_d.di_flushiter++; | 2878 | ip->i_d.di_flushiter++; |
2797 | 2879 | ||
2798 | /* | 2880 | /* |
@@ -2868,41 +2950,30 @@ xfs_iflush_int( | |||
2868 | * need the AIL lock, because it is a 64 bit value that cannot be read | 2950 | * need the AIL lock, because it is a 64 bit value that cannot be read |
2869 | * atomically. | 2951 | * atomically. |
2870 | */ | 2952 | */ |
2871 | if (iip != NULL && iip->ili_fields != 0) { | 2953 | iip->ili_last_fields = iip->ili_fields; |
2872 | iip->ili_last_fields = iip->ili_fields; | 2954 | iip->ili_fields = 0; |
2873 | iip->ili_fields = 0; | 2955 | iip->ili_logged = 1; |
2874 | iip->ili_logged = 1; | ||
2875 | 2956 | ||
2876 | xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, | 2957 | xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, |
2877 | &iip->ili_item.li_lsn); | 2958 | &iip->ili_item.li_lsn); |
2878 | 2959 | ||
2879 | /* | 2960 | /* |
2880 | * Attach the function xfs_iflush_done to the inode's | 2961 | * Attach the function xfs_iflush_done to the inode's |
2881 | * buffer. This will remove the inode from the AIL | 2962 | * buffer. This will remove the inode from the AIL |
2882 | * and unlock the inode's flush lock when the inode is | 2963 | * and unlock the inode's flush lock when the inode is |
2883 | * completely written to disk. | 2964 | * completely written to disk. |
2884 | */ | 2965 | */ |
2885 | xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item); | 2966 | xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item); |
2886 | 2967 | ||
2887 | ASSERT(bp->b_fspriv != NULL); | 2968 | /* update the lsn in the on disk inode if required */ |
2888 | ASSERT(bp->b_iodone != NULL); | 2969 | if (ip->i_d.di_version == 3) |
2889 | } else { | 2970 | dip->di_lsn = cpu_to_be64(iip->ili_item.li_lsn); |
2890 | /* | 2971 | |
2891 | * We're flushing an inode which is not in the AIL and has | 2972 | /* generate the checksum. */ |
2892 | * not been logged. For this case we can immediately drop | 2973 | xfs_dinode_calc_crc(mp, dip); |
2893 | * the inode flush lock because we can avoid the whole | ||
2894 | * AIL state thing. It's OK to drop the flush lock now, | ||
2895 | * because we've already locked the buffer and to do anything | ||
2896 | * you really need both. | ||
2897 | */ | ||
2898 | if (iip != NULL) { | ||
2899 | ASSERT(iip->ili_logged == 0); | ||
2900 | ASSERT(iip->ili_last_fields == 0); | ||
2901 | ASSERT((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0); | ||
2902 | } | ||
2903 | xfs_ifunlock(ip); | ||
2904 | } | ||
2905 | 2974 | ||
2975 | ASSERT(bp->b_fspriv != NULL); | ||
2976 | ASSERT(bp->b_iodone != NULL); | ||
2906 | return 0; | 2977 | return 0; |
2907 | 2978 | ||
2908 | corrupt_out: | 2979 | corrupt_out: |
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index b8520b5c3a18..91129794aaec 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h | |||
@@ -150,13 +150,38 @@ typedef struct xfs_icdinode { | |||
150 | __uint16_t di_dmstate; /* DMIG state info */ | 150 | __uint16_t di_dmstate; /* DMIG state info */ |
151 | __uint16_t di_flags; /* random flags, XFS_DIFLAG_... */ | 151 | __uint16_t di_flags; /* random flags, XFS_DIFLAG_... */ |
152 | __uint32_t di_gen; /* generation number */ | 152 | __uint32_t di_gen; /* generation number */ |
153 | |||
154 | /* di_next_unlinked is the only non-core field in the old dinode */ | ||
155 | xfs_agino_t di_next_unlinked;/* agi unlinked list ptr */ | ||
156 | |||
157 | /* start of the extended dinode, writable fields */ | ||
158 | __uint32_t di_crc; /* CRC of the inode */ | ||
159 | __uint64_t di_changecount; /* number of attribute changes */ | ||
160 | xfs_lsn_t di_lsn; /* flush sequence */ | ||
161 | __uint64_t di_flags2; /* more random flags */ | ||
162 | __uint8_t di_pad2[16]; /* more padding for future expansion */ | ||
163 | |||
164 | /* fields only written to during inode creation */ | ||
165 | xfs_ictimestamp_t di_crtime; /* time created */ | ||
166 | xfs_ino_t di_ino; /* inode number */ | ||
167 | uuid_t di_uuid; /* UUID of the filesystem */ | ||
168 | |||
169 | /* structure must be padded to 64 bit alignment */ | ||
153 | } xfs_icdinode_t; | 170 | } xfs_icdinode_t; |
154 | 171 | ||
172 | static inline uint xfs_icdinode_size(int version) | ||
173 | { | ||
174 | if (version == 3) | ||
175 | return sizeof(struct xfs_icdinode); | ||
176 | return offsetof(struct xfs_icdinode, di_next_unlinked); | ||
177 | } | ||
178 | |||
155 | /* | 179 | /* |
156 | * Flags for xfs_ichgtime(). | 180 | * Flags for xfs_ichgtime(). |
157 | */ | 181 | */ |
158 | #define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */ | 182 | #define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */ |
159 | #define XFS_ICHGTIME_CHG 0x2 /* inode field change timestamp */ | 183 | #define XFS_ICHGTIME_CHG 0x2 /* inode field change timestamp */ |
184 | #define XFS_ICHGTIME_CREATE 0x4 /* inode create timestamp */ | ||
160 | 185 | ||
161 | /* | 186 | /* |
162 | * Per-fork incore inode flags. | 187 | * Per-fork incore inode flags. |
@@ -556,6 +581,7 @@ int xfs_imap_to_bp(struct xfs_mount *, struct xfs_trans *, | |||
556 | struct xfs_buf **, uint, uint); | 581 | struct xfs_buf **, uint, uint); |
557 | int xfs_iread(struct xfs_mount *, struct xfs_trans *, | 582 | int xfs_iread(struct xfs_mount *, struct xfs_trans *, |
558 | struct xfs_inode *, uint); | 583 | struct xfs_inode *, uint); |
584 | void xfs_dinode_calc_crc(struct xfs_mount *, struct xfs_dinode *); | ||
559 | void xfs_dinode_to_disk(struct xfs_dinode *, | 585 | void xfs_dinode_to_disk(struct xfs_dinode *, |
560 | struct xfs_icdinode *); | 586 | struct xfs_icdinode *); |
561 | void xfs_idestroy_fork(struct xfs_inode *, int); | 587 | void xfs_idestroy_fork(struct xfs_inode *, int); |
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index f034bd1652f0..f76ff52e43c0 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c | |||
@@ -179,7 +179,7 @@ xfs_inode_item_format( | |||
179 | nvecs = 1; | 179 | nvecs = 1; |
180 | 180 | ||
181 | vecp->i_addr = &ip->i_d; | 181 | vecp->i_addr = &ip->i_d; |
182 | vecp->i_len = sizeof(struct xfs_icdinode); | 182 | vecp->i_len = xfs_icdinode_size(ip->i_d.di_version); |
183 | vecp->i_type = XLOG_REG_TYPE_ICORE; | 183 | vecp->i_type = XLOG_REG_TYPE_ICORE; |
184 | vecp++; | 184 | vecp++; |
185 | nvecs++; | 185 | nvecs++; |
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 27b3ec214a67..287878219af7 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c | |||
@@ -1786,6 +1786,7 @@ xlog_recover_do_inode_buffer( | |||
1786 | xfs_agino_t *buffer_nextp; | 1786 | xfs_agino_t *buffer_nextp; |
1787 | 1787 | ||
1788 | trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f); | 1788 | trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f); |
1789 | bp->b_ops = &xfs_inode_buf_ops; | ||
1789 | 1790 | ||
1790 | inodes_per_buf = BBTOB(bp->b_io_length) >> mp->m_sb.sb_inodelog; | 1791 | inodes_per_buf = BBTOB(bp->b_io_length) >> mp->m_sb.sb_inodelog; |
1791 | for (i = 0; i < inodes_per_buf; i++) { | 1792 | for (i = 0; i < inodes_per_buf; i++) { |
@@ -1989,6 +1990,18 @@ xlog_recover_do_reg_buffer( | |||
1989 | } | 1990 | } |
1990 | bp->b_ops = &xfs_dquot_buf_ops; | 1991 | bp->b_ops = &xfs_dquot_buf_ops; |
1991 | break; | 1992 | break; |
1993 | case XFS_BLF_DINO_BUF: | ||
1994 | /* | ||
1995 | * we get here with inode allocation buffers, not buffers that | ||
1996 | * track unlinked list changes. | ||
1997 | */ | ||
1998 | if (*(__be16 *)bp->b_addr != cpu_to_be16(XFS_DINODE_MAGIC)) { | ||
1999 | xfs_warn(mp, "Bad INODE block magic!"); | ||
2000 | ASSERT(0); | ||
2001 | break; | ||
2002 | } | ||
2003 | bp->b_ops = &xfs_inode_buf_ops; | ||
2004 | break; | ||
1992 | default: | 2005 | default: |
1993 | break; | 2006 | break; |
1994 | } | 2007 | } |
@@ -2277,6 +2290,7 @@ xlog_recover_inode_pass2( | |||
2277 | int attr_index; | 2290 | int attr_index; |
2278 | uint fields; | 2291 | uint fields; |
2279 | xfs_icdinode_t *dicp; | 2292 | xfs_icdinode_t *dicp; |
2293 | uint isize; | ||
2280 | int need_free = 0; | 2294 | int need_free = 0; |
2281 | 2295 | ||
2282 | if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) { | 2296 | if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) { |
@@ -2302,7 +2316,7 @@ xlog_recover_inode_pass2( | |||
2302 | trace_xfs_log_recover_inode_recover(log, in_f); | 2316 | trace_xfs_log_recover_inode_recover(log, in_f); |
2303 | 2317 | ||
2304 | bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, 0, | 2318 | bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, 0, |
2305 | NULL); | 2319 | &xfs_inode_buf_ops); |
2306 | if (!bp) { | 2320 | if (!bp) { |
2307 | error = ENOMEM; | 2321 | error = ENOMEM; |
2308 | goto error; | 2322 | goto error; |
@@ -2413,7 +2427,8 @@ xlog_recover_inode_pass2( | |||
2413 | error = EFSCORRUPTED; | 2427 | error = EFSCORRUPTED; |
2414 | goto error; | 2428 | goto error; |
2415 | } | 2429 | } |
2416 | if (unlikely(item->ri_buf[1].i_len > sizeof(struct xfs_icdinode))) { | 2430 | isize = xfs_icdinode_size(dicp->di_version); |
2431 | if (unlikely(item->ri_buf[1].i_len > isize)) { | ||
2417 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)", | 2432 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)", |
2418 | XFS_ERRLEVEL_LOW, mp, dicp); | 2433 | XFS_ERRLEVEL_LOW, mp, dicp); |
2419 | xfs_buf_relse(bp); | 2434 | xfs_buf_relse(bp); |
@@ -2425,13 +2440,13 @@ xlog_recover_inode_pass2( | |||
2425 | } | 2440 | } |
2426 | 2441 | ||
2427 | /* The core is in in-core format */ | 2442 | /* The core is in in-core format */ |
2428 | xfs_dinode_to_disk(dip, item->ri_buf[1].i_addr); | 2443 | xfs_dinode_to_disk(dip, dicp); |
2429 | 2444 | ||
2430 | /* the rest is in on-disk format */ | 2445 | /* the rest is in on-disk format */ |
2431 | if (item->ri_buf[1].i_len > sizeof(struct xfs_icdinode)) { | 2446 | if (item->ri_buf[1].i_len > isize) { |
2432 | memcpy((xfs_caddr_t) dip + sizeof(struct xfs_icdinode), | 2447 | memcpy((char *)dip + isize, |
2433 | item->ri_buf[1].i_addr + sizeof(struct xfs_icdinode), | 2448 | item->ri_buf[1].i_addr + isize, |
2434 | item->ri_buf[1].i_len - sizeof(struct xfs_icdinode)); | 2449 | item->ri_buf[1].i_len - isize); |
2435 | } | 2450 | } |
2436 | 2451 | ||
2437 | fields = in_f->ilf_fields; | 2452 | fields = in_f->ilf_fields; |
@@ -2515,6 +2530,9 @@ xlog_recover_inode_pass2( | |||
2515 | } | 2530 | } |
2516 | 2531 | ||
2517 | write_inode_buffer: | 2532 | write_inode_buffer: |
2533 | /* re-generate the checksum. */ | ||
2534 | xfs_dinode_calc_crc(log->l_mp, dip); | ||
2535 | |||
2518 | ASSERT(bp->b_target->bt_mount == mp); | 2536 | ASSERT(bp->b_target->bt_mount == mp); |
2519 | bp->b_iodone = xlog_recover_iodone; | 2537 | bp->b_iodone = xlog_recover_iodone; |
2520 | xfs_buf_delwri_queue(bp, buffer_list); | 2538 | xfs_buf_delwri_queue(bp, buffer_list); |
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index f950edd0d537..8a0f6af51206 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c | |||
@@ -704,12 +704,13 @@ xfs_trans_inode_buf( | |||
704 | ASSERT(atomic_read(&bip->bli_refcount) > 0); | 704 | ASSERT(atomic_read(&bip->bli_refcount) > 0); |
705 | 705 | ||
706 | bip->bli_flags |= XFS_BLI_INODE_BUF; | 706 | bip->bli_flags |= XFS_BLI_INODE_BUF; |
707 | xfs_trans_buf_set_type(tp, bp, XFS_BLF_DINO_BUF); | ||
707 | } | 708 | } |
708 | 709 | ||
709 | /* | 710 | /* |
710 | * This call is used to indicate that the buffer is going to | 711 | * This call is used to indicate that the buffer is going to |
711 | * be staled and was an inode buffer. This means it gets | 712 | * be staled and was an inode buffer. This means it gets |
712 | * special processing during unpin - where any inodes | 713 | * special processing during unpin - where any inodes |
713 | * associated with the buffer should be removed from ail. | 714 | * associated with the buffer should be removed from ail. |
714 | * There is also special processing during recovery, | 715 | * There is also special processing during recovery, |
715 | * any replay of the inodes in the buffer needs to be | 716 | * any replay of the inodes in the buffer needs to be |
@@ -728,6 +729,7 @@ xfs_trans_stale_inode_buf( | |||
728 | 729 | ||
729 | bip->bli_flags |= XFS_BLI_STALE_INODE; | 730 | bip->bli_flags |= XFS_BLI_STALE_INODE; |
730 | bip->bli_item.li_cb = xfs_buf_iodone; | 731 | bip->bli_item.li_cb = xfs_buf_iodone; |
732 | xfs_trans_buf_set_type(tp, bp, XFS_BLF_DINO_BUF); | ||
731 | } | 733 | } |
732 | 734 | ||
733 | /* | 735 | /* |
@@ -751,6 +753,7 @@ xfs_trans_inode_alloc_buf( | |||
751 | ASSERT(atomic_read(&bip->bli_refcount) > 0); | 753 | ASSERT(atomic_read(&bip->bli_refcount) > 0); |
752 | 754 | ||
753 | bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF; | 755 | bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF; |
756 | xfs_trans_buf_set_type(tp, bp, XFS_BLF_DINO_BUF); | ||
754 | } | 757 | } |
755 | 758 | ||
756 | /* | 759 | /* |