aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-07-26 14:22:54 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-07-26 14:22:54 -0400
commit6c4155a9cd6bd07d3cb70ca5eeb6f2751de0931c (patch)
treee2fbc72ab180b3c6b0d0fcee89c29a04f050d0a1 /fs
parentf315cf5e02d9910b7be893f5b5afdcc2e49d352d (diff)
parente1b4271ac261b290fdab51446996fb13e68a57be (diff)
Merge tag 'for-linus-v3.11-rc3' of git://oss.sgi.com/xfs/xfs
Pull xfs fix from Ben Myers: "Fix for regression in commit cca9f93a52d2 ("xfs: don't do IO when creating an new inode"), recovery causing filesystem corruption after a crash" * tag 'for-linus-v3.11-rc3' of git://oss.sgi.com/xfs/xfs: xfs: di_flushiter considered harmful
Diffstat (limited to 'fs')
-rw-r--r--fs/xfs/xfs_dinode.h3
-rw-r--r--fs/xfs/xfs_inode.c31
-rw-r--r--fs/xfs/xfs_log_recover.c13
3 files changed, 36 insertions, 11 deletions
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h
index 07d735a80a0f..e5869b50dc41 100644
--- a/fs/xfs/xfs_dinode.h
+++ b/fs/xfs/xfs_dinode.h
@@ -39,6 +39,9 @@ typedef struct xfs_timestamp {
39 * There is a very similar struct icdinode in xfs_inode which matches the 39 * There is a very similar struct icdinode in xfs_inode which matches the
40 * layout of the first 96 bytes of this structure, but is kept in native 40 * layout of the first 96 bytes of this structure, but is kept in native
41 * format instead of big endian. 41 * format instead of big endian.
42 *
43 * Note: di_flushiter is only used by v1/2 inodes - it's effectively a zeroed
44 * padding field for v3 inodes.
42 */ 45 */
43typedef struct xfs_dinode { 46typedef struct xfs_dinode {
44 __be16 di_magic; /* inode magic # = XFS_DINODE_MAGIC */ 47 __be16 di_magic; /* inode magic # = XFS_DINODE_MAGIC */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index b78481f99d9d..bb262c25c8de 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -896,7 +896,6 @@ xfs_dinode_to_disk(
896 to->di_projid_lo = cpu_to_be16(from->di_projid_lo); 896 to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
897 to->di_projid_hi = cpu_to_be16(from->di_projid_hi); 897 to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
898 memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); 898 memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
899 to->di_flushiter = cpu_to_be16(from->di_flushiter);
900 to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec); 899 to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec);
901 to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec); 900 to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec);
902 to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec); 901 to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec);
@@ -924,6 +923,9 @@ xfs_dinode_to_disk(
924 to->di_lsn = cpu_to_be64(from->di_lsn); 923 to->di_lsn = cpu_to_be64(from->di_lsn);
925 memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2)); 924 memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
926 uuid_copy(&to->di_uuid, &from->di_uuid); 925 uuid_copy(&to->di_uuid, &from->di_uuid);
926 to->di_flushiter = 0;
927 } else {
928 to->di_flushiter = cpu_to_be16(from->di_flushiter);
927 } 929 }
928} 930}
929 931
@@ -1029,10 +1031,14 @@ xfs_dinode_calc_crc(
1029/* 1031/*
1030 * Read the disk inode attributes into the in-core inode structure. 1032 * Read the disk inode attributes into the in-core inode structure.
1031 * 1033 *
1032 * If we are initialising a new inode and we are not utilising the 1034 * For version 5 superblocks, if we are initialising a new inode and we are not
1033 * XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new inode core 1035 * utilising the XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new
1034 * with a random generation number. If we are keeping inodes around, we need to 1036 * inode core with a random generation number. If we are keeping inodes around,
1035 * read the inode cluster to get the existing generation number off disk. 1037 * we need to read the inode cluster to get the existing generation number off
1038 * disk. Further, if we are using version 4 superblocks (i.e. v1/v2 inode
1039 * format) then log recovery is dependent on the di_flushiter field being
1040 * initialised from the current on-disk value and hence we must also read the
1041 * inode off disk.
1036 */ 1042 */
1037int 1043int
1038xfs_iread( 1044xfs_iread(
@@ -1054,6 +1060,7 @@ xfs_iread(
1054 1060
1055 /* shortcut IO on inode allocation if possible */ 1061 /* shortcut IO on inode allocation if possible */
1056 if ((iget_flags & XFS_IGET_CREATE) && 1062 if ((iget_flags & XFS_IGET_CREATE) &&
1063 xfs_sb_version_hascrc(&mp->m_sb) &&
1057 !(mp->m_flags & XFS_MOUNT_IKEEP)) { 1064 !(mp->m_flags & XFS_MOUNT_IKEEP)) {
1058 /* initialise the on-disk inode core */ 1065 /* initialise the on-disk inode core */
1059 memset(&ip->i_d, 0, sizeof(ip->i_d)); 1066 memset(&ip->i_d, 0, sizeof(ip->i_d));
@@ -2882,12 +2889,18 @@ xfs_iflush_int(
2882 __func__, ip->i_ino, ip->i_d.di_forkoff, ip); 2889 __func__, ip->i_ino, ip->i_d.di_forkoff, ip);
2883 goto corrupt_out; 2890 goto corrupt_out;
2884 } 2891 }
2892
2885 /* 2893 /*
2886 * bump the flush iteration count, used to detect flushes which 2894 * Inode item log recovery for v1/v2 inodes are dependent on the
2887 * postdate a log record during recovery. This is redundant as we now 2895 * di_flushiter count for correct sequencing. We bump the flush
2888 * log every change and hence this can't happen. Still, it doesn't hurt. 2896 * iteration count so we can detect flushes which postdate a log record
2897 * during recovery. This is redundant as we now log every change and
2898 * hence this can't happen but we need to still do it to ensure
2899 * backwards compatibility with old kernels that predate logging all
2900 * inode changes.
2889 */ 2901 */
2890 ip->i_d.di_flushiter++; 2902 if (ip->i_d.di_version < 3)
2903 ip->i_d.di_flushiter++;
2891 2904
2892 /* 2905 /*
2893 * Copy the dirty parts of the inode into the on-disk 2906 * Copy the dirty parts of the inode into the on-disk
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 6fcc910a50b9..7681b19aa5dc 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2592,8 +2592,16 @@ xlog_recover_inode_pass2(
2592 goto error; 2592 goto error;
2593 } 2593 }
2594 2594
2595 /* Skip replay when the on disk inode is newer than the log one */ 2595 /*
2596 if (dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) { 2596 * di_flushiter is only valid for v1/2 inodes. All changes for v3 inodes
2597 * are transactional and if ordering is necessary we can determine that
2598 * more accurately by the LSN field in the V3 inode core. Don't trust
2599 * the inode versions we might be changing them here - use the
2600 * superblock flag to determine whether we need to look at di_flushiter
2601 * to skip replay when the on disk inode is newer than the log one
2602 */
2603 if (!xfs_sb_version_hascrc(&mp->m_sb) &&
2604 dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
2597 /* 2605 /*
2598 * Deal with the wrap case, DI_MAX_FLUSH is less 2606 * Deal with the wrap case, DI_MAX_FLUSH is less
2599 * than smaller numbers 2607 * than smaller numbers
@@ -2608,6 +2616,7 @@ xlog_recover_inode_pass2(
2608 goto error; 2616 goto error;
2609 } 2617 }
2610 } 2618 }
2619
2611 /* Take the opportunity to reset the flush iteration count */ 2620 /* Take the opportunity to reset the flush iteration count */
2612 dicp->di_flushiter = 0; 2621 dicp->di_flushiter = 0;
2613 2622