aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_vnodeops.c
diff options
context:
space:
mode:
authorDavid Chinner <dgc@sgi.com>2008-05-19 02:29:46 -0400
committerLachlan McIlroy <lachlan@redback.melbourne.sgi.com>2008-05-23 01:25:25 -0400
commit978b7237123d007b9fa983af6e0e2fa8f97f9934 (patch)
treeed4c8af42502efeb7ae79b166bb5890347b3de93 /fs/xfs/xfs_vnodeops.c
parentc1e554aeea12d2dab5183e011c27dee6142dc927 (diff)
[XFS] Fix fsync() b0rkage.
xfs_fsync() fails to wait for data I/O completion before checking if the inode is dirty or clean to decide whether to log the inode or not. This misses inode size updates when the data flushed by the fsync() is extending the file. Hence, like fdatasync(), we need to wait for I/o completion first, then check the inode for cleanliness. Doing so makes the behaviour of xfs_fsync() identical for fsync and fdatasync and we *always* use synchronous semantics if the inode is dirty. Therefore also kill the differences and remove the unused flags from the xfs_fsync function and callers. SGI-PV: 981296 SGI-Modid: xfs-linux-melb:xfs-kern:31033a Signed-off-by: David Chinner <dgc@sgi.com> Signed-off-by: Christoph Hellwig <hch@infradead.org> Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
Diffstat (limited to 'fs/xfs/xfs_vnodeops.c')
-rw-r--r--fs/xfs/xfs_vnodeops.c112
1 files changed, 42 insertions, 70 deletions
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 70702a60b4bb..e475e3717eb3 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -856,18 +856,14 @@ xfs_readlink(
856/* 856/*
857 * xfs_fsync 857 * xfs_fsync
858 * 858 *
859 * This is called to sync the inode and its data out to disk. 859 * This is called to sync the inode and its data out to disk. We need to hold
860 * We need to hold the I/O lock while flushing the data, and 860 * the I/O lock while flushing the data, and the inode lock while flushing the
861 * the inode lock while flushing the inode. The inode lock CANNOT 861 * inode. The inode lock CANNOT be held while flushing the data, so acquire
862 * be held while flushing the data, so acquire after we're done 862 * after we're done with that.
863 * with that.
864 */ 863 */
865int 864int
866xfs_fsync( 865xfs_fsync(
867 xfs_inode_t *ip, 866 xfs_inode_t *ip)
868 int flag,
869 xfs_off_t start,
870 xfs_off_t stop)
871{ 867{
872 xfs_trans_t *tp; 868 xfs_trans_t *tp;
873 int error; 869 int error;
@@ -875,103 +871,79 @@ xfs_fsync(
875 871
876 xfs_itrace_entry(ip); 872 xfs_itrace_entry(ip);
877 873
878 ASSERT(start >= 0 && stop >= -1);
879
880 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 874 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
881 return XFS_ERROR(EIO); 875 return XFS_ERROR(EIO);
882 876
883 if (flag & FSYNC_DATA) 877 /* capture size updates in I/O completion before writing the inode. */
884 filemap_fdatawait(vn_to_inode(XFS_ITOV(ip))->i_mapping); 878 error = filemap_fdatawait(vn_to_inode(XFS_ITOV(ip))->i_mapping);
879 if (error)
880 return XFS_ERROR(error);
885 881
886 /* 882 /*
887 * We always need to make sure that the required inode state 883 * We always need to make sure that the required inode state is safe on
888 * is safe on disk. The vnode might be clean but because 884 * disk. The vnode might be clean but we still might need to force the
889 * of committed transactions that haven't hit the disk yet. 885 * log because of committed transactions that haven't hit the disk yet.
890 * Likewise, there could be unflushed non-transactional 886 * Likewise, there could be unflushed non-transactional changes to the
891 * changes to the inode core that have to go to disk. 887 * inode core that have to go to disk and this requires us to issue
888 * a synchronous transaction to capture these changes correctly.
892 * 889 *
893 * The following code depends on one assumption: that 890 * This code relies on the assumption that if the update_* fields
894 * any transaction that changes an inode logs the core 891 * of the inode are clear and the inode is unpinned then it is clean
895 * because it has to change some field in the inode core 892 * and no action is required.
896 * (typically nextents or nblocks). That assumption
897 * implies that any transactions against an inode will
898 * catch any non-transactional updates. If inode-altering
899 * transactions exist that violate this assumption, the
900 * code breaks. Right now, it figures that if the involved
901 * update_* field is clear and the inode is unpinned, the
902 * inode is clean. Either it's been flushed or it's been
903 * committed and the commit has hit the disk unpinning the inode.
904 * (Note that xfs_inode_item_format() called at commit clears
905 * the update_* fields.)
906 */ 893 */
907 xfs_ilock(ip, XFS_ILOCK_SHARED); 894 xfs_ilock(ip, XFS_ILOCK_SHARED);
908 895
909 /* If we are flushing data then we care about update_size 896 if (!(ip->i_update_size || ip->i_update_core)) {
910 * being set, otherwise we care about update_core
911 */
912 if ((flag & FSYNC_DATA) ?
913 (ip->i_update_size == 0) :
914 (ip->i_update_core == 0)) {
915 /* 897 /*
916 * Timestamps/size haven't changed since last inode 898 * Timestamps/size haven't changed since last inode flush or
917 * flush or inode transaction commit. That means 899 * inode transaction commit. That means either nothing got
918 * either nothing got written or a transaction 900 * written or a transaction committed which caught the updates.
919 * committed which caught the updates. If the 901 * If the latter happened and the transaction hasn't hit the
920 * latter happened and the transaction hasn't 902 * disk yet, the inode will be still be pinned. If it is,
921 * hit the disk yet, the inode will be still 903 * force the log.
922 * be pinned. If it is, force the log.
923 */ 904 */
924 905
925 xfs_iunlock(ip, XFS_ILOCK_SHARED); 906 xfs_iunlock(ip, XFS_ILOCK_SHARED);
926 907
927 if (xfs_ipincount(ip)) { 908 if (xfs_ipincount(ip)) {
928 _xfs_log_force(ip->i_mount, (xfs_lsn_t)0, 909 error = _xfs_log_force(ip->i_mount, (xfs_lsn_t)0,
929 XFS_LOG_FORCE | 910 XFS_LOG_FORCE | XFS_LOG_SYNC,
930 ((flag & FSYNC_WAIT)
931 ? XFS_LOG_SYNC : 0),
932 &log_flushed); 911 &log_flushed);
933 } else { 912 } else {
934 /* 913 /*
935 * If the inode is not pinned and nothing 914 * If the inode is not pinned and nothing has changed
936 * has changed we don't need to flush the 915 * we don't need to flush the cache.
937 * cache.
938 */ 916 */
939 changed = 0; 917 changed = 0;
940 } 918 }
941 error = 0;
942 } else { 919 } else {
943 /* 920 /*
944 * Kick off a transaction to log the inode 921 * Kick off a transaction to log the inode core to get the
945 * core to get the updates. Make it 922 * updates. The sync transaction will also force the log.
946 * sync if FSYNC_WAIT is passed in (which
947 * is done by everybody but specfs). The
948 * sync transaction will also force the log.
949 */ 923 */
950 xfs_iunlock(ip, XFS_ILOCK_SHARED); 924 xfs_iunlock(ip, XFS_ILOCK_SHARED);
951 tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS); 925 tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS);
952 if ((error = xfs_trans_reserve(tp, 0, 926 error = xfs_trans_reserve(tp, 0,
953 XFS_FSYNC_TS_LOG_RES(ip->i_mount), 927 XFS_FSYNC_TS_LOG_RES(ip->i_mount), 0, 0, 0);
954 0, 0, 0))) { 928 if (error) {
955 xfs_trans_cancel(tp, 0); 929 xfs_trans_cancel(tp, 0);
956 return error; 930 return error;
957 } 931 }
958 xfs_ilock(ip, XFS_ILOCK_EXCL); 932 xfs_ilock(ip, XFS_ILOCK_EXCL);
959 933
960 /* 934 /*
961 * Note - it's possible that we might have pushed 935 * Note - it's possible that we might have pushed ourselves out
962 * ourselves out of the way during trans_reserve 936 * of the way during trans_reserve which would flush the inode.
963 * which would flush the inode. But there's no 937 * But there's no guarantee that the inode buffer has actually
964 * guarantee that the inode buffer has actually 938 * gone out yet (it's delwri). Plus the buffer could be pinned
965 * gone out yet (it's delwri). Plus the buffer 939 * anyway if it's part of an inode in another recent
966 * could be pinned anyway if it's part of an 940 * transaction. So we play it safe and fire off the
967 * inode in another recent transaction. So we 941 * transaction anyway.
968 * play it safe and fire off the transaction anyway.
969 */ 942 */
970 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 943 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
971 xfs_trans_ihold(tp, ip); 944 xfs_trans_ihold(tp, ip);
972 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 945 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
973 if (flag & FSYNC_WAIT) 946 xfs_trans_set_sync(tp);
974 xfs_trans_set_sync(tp);
975 error = _xfs_trans_commit(tp, 0, &log_flushed); 947 error = _xfs_trans_commit(tp, 0, &log_flushed);
976 948
977 xfs_iunlock(ip, XFS_ILOCK_EXCL); 949 xfs_iunlock(ip, XFS_ILOCK_EXCL);