diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-07-26 14:22:54 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-07-26 14:22:54 -0400 |
commit | 6c4155a9cd6bd07d3cb70ca5eeb6f2751de0931c (patch) | |
tree | e2fbc72ab180b3c6b0d0fcee89c29a04f050d0a1 /fs | |
parent | f315cf5e02d9910b7be893f5b5afdcc2e49d352d (diff) | |
parent | e1b4271ac261b290fdab51446996fb13e68a57be (diff) |
Merge tag 'for-linus-v3.11-rc3' of git://oss.sgi.com/xfs/xfs
Pull xfs fix from Ben Myers:
"Fix for regression in commit cca9f93a52d2 ("xfs: don't do IO when
creating an new inode"), recovery causing filesystem corruption after
a crash"
* tag 'for-linus-v3.11-rc3' of git://oss.sgi.com/xfs/xfs:
xfs: di_flushiter considered harmful
Diffstat (limited to 'fs')
-rw-r--r-- | fs/xfs/xfs_dinode.h | 3 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.c | 31 | ||||
-rw-r--r-- | fs/xfs/xfs_log_recover.c | 13 |
3 files changed, 36 insertions, 11 deletions
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h index 07d735a80a0f..e5869b50dc41 100644 --- a/fs/xfs/xfs_dinode.h +++ b/fs/xfs/xfs_dinode.h | |||
@@ -39,6 +39,9 @@ typedef struct xfs_timestamp { | |||
39 | * There is a very similar struct icdinode in xfs_inode which matches the | 39 | * There is a very similar struct icdinode in xfs_inode which matches the |
40 | * layout of the first 96 bytes of this structure, but is kept in native | 40 | * layout of the first 96 bytes of this structure, but is kept in native |
41 | * format instead of big endian. | 41 | * format instead of big endian. |
42 | * | ||
43 | * Note: di_flushiter is only used by v1/2 inodes - it's effectively a zeroed | ||
44 | * padding field for v3 inodes. | ||
42 | */ | 45 | */ |
43 | typedef struct xfs_dinode { | 46 | typedef struct xfs_dinode { |
44 | __be16 di_magic; /* inode magic # = XFS_DINODE_MAGIC */ | 47 | __be16 di_magic; /* inode magic # = XFS_DINODE_MAGIC */ |
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index b78481f99d9d..bb262c25c8de 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -896,7 +896,6 @@ xfs_dinode_to_disk( | |||
896 | to->di_projid_lo = cpu_to_be16(from->di_projid_lo); | 896 | to->di_projid_lo = cpu_to_be16(from->di_projid_lo); |
897 | to->di_projid_hi = cpu_to_be16(from->di_projid_hi); | 897 | to->di_projid_hi = cpu_to_be16(from->di_projid_hi); |
898 | memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); | 898 | memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); |
899 | to->di_flushiter = cpu_to_be16(from->di_flushiter); | ||
900 | to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec); | 899 | to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec); |
901 | to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec); | 900 | to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec); |
902 | to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec); | 901 | to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec); |
@@ -924,6 +923,9 @@ xfs_dinode_to_disk( | |||
924 | to->di_lsn = cpu_to_be64(from->di_lsn); | 923 | to->di_lsn = cpu_to_be64(from->di_lsn); |
925 | memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2)); | 924 | memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2)); |
926 | uuid_copy(&to->di_uuid, &from->di_uuid); | 925 | uuid_copy(&to->di_uuid, &from->di_uuid); |
926 | to->di_flushiter = 0; | ||
927 | } else { | ||
928 | to->di_flushiter = cpu_to_be16(from->di_flushiter); | ||
927 | } | 929 | } |
928 | } | 930 | } |
929 | 931 | ||
@@ -1029,10 +1031,14 @@ xfs_dinode_calc_crc( | |||
1029 | /* | 1031 | /* |
1030 | * Read the disk inode attributes into the in-core inode structure. | 1032 | * Read the disk inode attributes into the in-core inode structure. |
1031 | * | 1033 | * |
1032 | * If we are initialising a new inode and we are not utilising the | 1034 | * For version 5 superblocks, if we are initialising a new inode and we are not |
1033 | * XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new inode core | 1035 | * utilising the XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new |
1034 | * with a random generation number. If we are keeping inodes around, we need to | 1036 | * inode core with a random generation number. If we are keeping inodes around, |
1035 | * read the inode cluster to get the existing generation number off disk. | 1037 | * we need to read the inode cluster to get the existing generation number off |
1038 | * disk. Further, if we are using version 4 superblocks (i.e. v1/v2 inode | ||
1039 | * format) then log recovery is dependent on the di_flushiter field being | ||
1040 | * initialised from the current on-disk value and hence we must also read the | ||
1041 | * inode off disk. | ||
1036 | */ | 1042 | */ |
1037 | int | 1043 | int |
1038 | xfs_iread( | 1044 | xfs_iread( |
@@ -1054,6 +1060,7 @@ xfs_iread( | |||
1054 | 1060 | ||
1055 | /* shortcut IO on inode allocation if possible */ | 1061 | /* shortcut IO on inode allocation if possible */ |
1056 | if ((iget_flags & XFS_IGET_CREATE) && | 1062 | if ((iget_flags & XFS_IGET_CREATE) && |
1063 | xfs_sb_version_hascrc(&mp->m_sb) && | ||
1057 | !(mp->m_flags & XFS_MOUNT_IKEEP)) { | 1064 | !(mp->m_flags & XFS_MOUNT_IKEEP)) { |
1058 | /* initialise the on-disk inode core */ | 1065 | /* initialise the on-disk inode core */ |
1059 | memset(&ip->i_d, 0, sizeof(ip->i_d)); | 1066 | memset(&ip->i_d, 0, sizeof(ip->i_d)); |
@@ -2882,12 +2889,18 @@ xfs_iflush_int( | |||
2882 | __func__, ip->i_ino, ip->i_d.di_forkoff, ip); | 2889 | __func__, ip->i_ino, ip->i_d.di_forkoff, ip); |
2883 | goto corrupt_out; | 2890 | goto corrupt_out; |
2884 | } | 2891 | } |
2892 | |||
2885 | /* | 2893 | /* |
2886 | * bump the flush iteration count, used to detect flushes which | 2894 | * Inode item log recovery for v1/v2 inodes are dependent on the |
2887 | * postdate a log record during recovery. This is redundant as we now | 2895 | * di_flushiter count for correct sequencing. We bump the flush |
2888 | * log every change and hence this can't happen. Still, it doesn't hurt. | 2896 | * iteration count so we can detect flushes which postdate a log record |
2897 | * during recovery. This is redundant as we now log every change and | ||
2898 | * hence this can't happen but we need to still do it to ensure | ||
2899 | * backwards compatibility with old kernels that predate logging all | ||
2900 | * inode changes. | ||
2889 | */ | 2901 | */ |
2890 | ip->i_d.di_flushiter++; | 2902 | if (ip->i_d.di_version < 3) |
2903 | ip->i_d.di_flushiter++; | ||
2891 | 2904 | ||
2892 | /* | 2905 | /* |
2893 | * Copy the dirty parts of the inode into the on-disk | 2906 | * Copy the dirty parts of the inode into the on-disk |
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 6fcc910a50b9..7681b19aa5dc 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c | |||
@@ -2592,8 +2592,16 @@ xlog_recover_inode_pass2( | |||
2592 | goto error; | 2592 | goto error; |
2593 | } | 2593 | } |
2594 | 2594 | ||
2595 | /* Skip replay when the on disk inode is newer than the log one */ | 2595 | /* |
2596 | if (dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) { | 2596 | * di_flushiter is only valid for v1/2 inodes. All changes for v3 inodes |
2597 | * are transactional and if ordering is necessary we can determine that | ||
2598 | * more accurately by the LSN field in the V3 inode core. Don't trust | ||
2599 | * the inode versions we might be changing them here - use the | ||
2600 | * superblock flag to determine whether we need to look at di_flushiter | ||
2601 | * to skip replay when the on disk inode is newer than the log one | ||
2602 | */ | ||
2603 | if (!xfs_sb_version_hascrc(&mp->m_sb) && | ||
2604 | dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) { | ||
2597 | /* | 2605 | /* |
2598 | * Deal with the wrap case, DI_MAX_FLUSH is less | 2606 | * Deal with the wrap case, DI_MAX_FLUSH is less |
2599 | * than smaller numbers | 2607 | * than smaller numbers |
@@ -2608,6 +2616,7 @@ xlog_recover_inode_pass2( | |||
2608 | goto error; | 2616 | goto error; |
2609 | } | 2617 | } |
2610 | } | 2618 | } |
2619 | |||
2611 | /* Take the opportunity to reset the flush iteration count */ | 2620 | /* Take the opportunity to reset the flush iteration count */ |
2612 | dicp->di_flushiter = 0; | 2621 | dicp->di_flushiter = 0; |
2613 | 2622 | ||