diff options
-rw-r--r-- | fs/xfs/linux-2.6/xfs_file.c | 17 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_vnode.h | 8 | ||||
-rw-r--r-- | fs/xfs/xfs_vnodeops.c | 112 | ||||
-rw-r--r-- | fs/xfs/xfs_vnodeops.h | 3 |
4 files changed, 54 insertions, 86 deletions
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 65e78c13d4ae..5f60363b9343 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c | |||
@@ -184,19 +184,24 @@ xfs_file_release( | |||
184 | return -xfs_release(XFS_I(inode)); | 184 | return -xfs_release(XFS_I(inode)); |
185 | } | 185 | } |
186 | 186 | ||
187 | /* | ||
188 | * We ignore the datasync flag here because a datasync is effectively | ||
189 | * identical to an fsync. That is, datasync implies that we need to write | ||
190 | * only the metadata needed to be able to access the data that is written | ||
191 | * if we crash after the call completes. Hence if we are writing beyond | ||
192 | * EOF we have to log the inode size change as well, which makes it a | ||
193 | * full fsync. If we don't write beyond EOF, the inode core will be | ||
194 | * clean in memory and so we don't need to log the inode, just like | ||
195 | * fsync. | ||
196 | */ | ||
187 | STATIC int | 197 | STATIC int |
188 | xfs_file_fsync( | 198 | xfs_file_fsync( |
189 | struct file *filp, | 199 | struct file *filp, |
190 | struct dentry *dentry, | 200 | struct dentry *dentry, |
191 | int datasync) | 201 | int datasync) |
192 | { | 202 | { |
193 | int flags = FSYNC_WAIT; | ||
194 | |||
195 | if (datasync) | ||
196 | flags |= FSYNC_DATA; | ||
197 | xfs_iflags_clear(XFS_I(dentry->d_inode), XFS_ITRUNCATED); | 203 | xfs_iflags_clear(XFS_I(dentry->d_inode), XFS_ITRUNCATED); |
198 | return -xfs_fsync(XFS_I(dentry->d_inode), flags, | 204 | return -xfs_fsync(XFS_I(dentry->d_inode)); |
199 | (xfs_off_t)0, (xfs_off_t)-1); | ||
200 | } | 205 | } |
201 | 206 | ||
202 | /* | 207 | /* |
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index 9d73cb5c0fc7..25eb2a9e8d9b 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h | |||
@@ -230,14 +230,6 @@ static inline void vn_atime_to_time_t(bhv_vnode_t *vp, time_t *tt) | |||
230 | #define ATTR_NOSIZETOK 0x400 /* Don't get the SIZE token */ | 230 | #define ATTR_NOSIZETOK 0x400 /* Don't get the SIZE token */ |
231 | 231 | ||
232 | /* | 232 | /* |
233 | * Flags to vop_fsync/reclaim. | ||
234 | */ | ||
235 | #define FSYNC_NOWAIT 0 /* asynchronous flush */ | ||
236 | #define FSYNC_WAIT 0x1 /* synchronous fsync or forced reclaim */ | ||
237 | #define FSYNC_INVAL 0x2 /* flush and invalidate cached data */ | ||
238 | #define FSYNC_DATA 0x4 /* synchronous fsync of data only */ | ||
239 | |||
240 | /* | ||
241 | * Tracking vnode activity. | 233 | * Tracking vnode activity. |
242 | */ | 234 | */ |
243 | #if defined(XFS_INODE_TRACE) | 235 | #if defined(XFS_INODE_TRACE) |
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 70702a60b4bb..e475e3717eb3 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c | |||
@@ -856,18 +856,14 @@ xfs_readlink( | |||
856 | /* | 856 | /* |
857 | * xfs_fsync | 857 | * xfs_fsync |
858 | * | 858 | * |
859 | * This is called to sync the inode and its data out to disk. | 859 | * This is called to sync the inode and its data out to disk. We need to hold |
860 | * We need to hold the I/O lock while flushing the data, and | 860 | * the I/O lock while flushing the data, and the inode lock while flushing the |
861 | * the inode lock while flushing the inode. The inode lock CANNOT | 861 | * inode. The inode lock CANNOT be held while flushing the data, so acquire |
862 | * be held while flushing the data, so acquire after we're done | 862 | * after we're done with that. |
863 | * with that. | ||
864 | */ | 863 | */ |
865 | int | 864 | int |
866 | xfs_fsync( | 865 | xfs_fsync( |
867 | xfs_inode_t *ip, | 866 | xfs_inode_t *ip) |
868 | int flag, | ||
869 | xfs_off_t start, | ||
870 | xfs_off_t stop) | ||
871 | { | 867 | { |
872 | xfs_trans_t *tp; | 868 | xfs_trans_t *tp; |
873 | int error; | 869 | int error; |
@@ -875,103 +871,79 @@ xfs_fsync( | |||
875 | 871 | ||
876 | xfs_itrace_entry(ip); | 872 | xfs_itrace_entry(ip); |
877 | 873 | ||
878 | ASSERT(start >= 0 && stop >= -1); | ||
879 | |||
880 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | 874 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) |
881 | return XFS_ERROR(EIO); | 875 | return XFS_ERROR(EIO); |
882 | 876 | ||
883 | if (flag & FSYNC_DATA) | 877 | /* capture size updates in I/O completion before writing the inode. */ |
884 | filemap_fdatawait(vn_to_inode(XFS_ITOV(ip))->i_mapping); | 878 | error = filemap_fdatawait(vn_to_inode(XFS_ITOV(ip))->i_mapping); |
879 | if (error) | ||
880 | return XFS_ERROR(error); | ||
885 | 881 | ||
886 | /* | 882 | /* |
887 | * We always need to make sure that the required inode state | 883 | * We always need to make sure that the required inode state is safe on |
888 | * is safe on disk. The vnode might be clean but because | 884 | * disk. The vnode might be clean but we still might need to force the |
889 | * of committed transactions that haven't hit the disk yet. | 885 | * log because of committed transactions that haven't hit the disk yet. |
890 | * Likewise, there could be unflushed non-transactional | 886 | * Likewise, there could be unflushed non-transactional changes to the |
891 | * changes to the inode core that have to go to disk. | 887 | * inode core that have to go to disk and this requires us to issue |
888 | * a synchronous transaction to capture these changes correctly. | ||
892 | * | 889 | * |
893 | * The following code depends on one assumption: that | 890 | * This code relies on the assumption that if the update_* fields |
894 | * any transaction that changes an inode logs the core | 891 | * of the inode are clear and the inode is unpinned then it is clean |
895 | * because it has to change some field in the inode core | 892 | * and no action is required. |
896 | * (typically nextents or nblocks). That assumption | ||
897 | * implies that any transactions against an inode will | ||
898 | * catch any non-transactional updates. If inode-altering | ||
899 | * transactions exist that violate this assumption, the | ||
900 | * code breaks. Right now, it figures that if the involved | ||
901 | * update_* field is clear and the inode is unpinned, the | ||
902 | * inode is clean. Either it's been flushed or it's been | ||
903 | * committed and the commit has hit the disk unpinning the inode. | ||
904 | * (Note that xfs_inode_item_format() called at commit clears | ||
905 | * the update_* fields.) | ||
906 | */ | 893 | */ |
907 | xfs_ilock(ip, XFS_ILOCK_SHARED); | 894 | xfs_ilock(ip, XFS_ILOCK_SHARED); |
908 | 895 | ||
909 | /* If we are flushing data then we care about update_size | 896 | if (!(ip->i_update_size || ip->i_update_core)) { |
910 | * being set, otherwise we care about update_core | ||
911 | */ | ||
912 | if ((flag & FSYNC_DATA) ? | ||
913 | (ip->i_update_size == 0) : | ||
914 | (ip->i_update_core == 0)) { | ||
915 | /* | 897 | /* |
916 | * Timestamps/size haven't changed since last inode | 898 | * Timestamps/size haven't changed since last inode flush or |
917 | * flush or inode transaction commit. That means | 899 | * inode transaction commit. That means either nothing got |
918 | * either nothing got written or a transaction | 900 | * written or a transaction committed which caught the updates. |
919 | * committed which caught the updates. If the | 901 | * If the latter happened and the transaction hasn't hit the |
920 | * latter happened and the transaction hasn't | 902 | * disk yet, the inode will be still be pinned. If it is, |
921 | * hit the disk yet, the inode will be still | 903 | * force the log. |
922 | * be pinned. If it is, force the log. | ||
923 | */ | 904 | */ |
924 | 905 | ||
925 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | 906 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
926 | 907 | ||
927 | if (xfs_ipincount(ip)) { | 908 | if (xfs_ipincount(ip)) { |
928 | _xfs_log_force(ip->i_mount, (xfs_lsn_t)0, | 909 | error = _xfs_log_force(ip->i_mount, (xfs_lsn_t)0, |
929 | XFS_LOG_FORCE | | 910 | XFS_LOG_FORCE | XFS_LOG_SYNC, |
930 | ((flag & FSYNC_WAIT) | ||
931 | ? XFS_LOG_SYNC : 0), | ||
932 | &log_flushed); | 911 | &log_flushed); |
933 | } else { | 912 | } else { |
934 | /* | 913 | /* |
935 | * If the inode is not pinned and nothing | 914 | * If the inode is not pinned and nothing has changed |
936 | * has changed we don't need to flush the | 915 | * we don't need to flush the cache. |
937 | * cache. | ||
938 | */ | 916 | */ |
939 | changed = 0; | 917 | changed = 0; |
940 | } | 918 | } |
941 | error = 0; | ||
942 | } else { | 919 | } else { |
943 | /* | 920 | /* |
944 | * Kick off a transaction to log the inode | 921 | * Kick off a transaction to log the inode core to get the |
945 | * core to get the updates. Make it | 922 | * updates. The sync transaction will also force the log. |
946 | * sync if FSYNC_WAIT is passed in (which | ||
947 | * is done by everybody but specfs). The | ||
948 | * sync transaction will also force the log. | ||
949 | */ | 923 | */ |
950 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | 924 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
951 | tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS); | 925 | tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS); |
952 | if ((error = xfs_trans_reserve(tp, 0, | 926 | error = xfs_trans_reserve(tp, 0, |
953 | XFS_FSYNC_TS_LOG_RES(ip->i_mount), | 927 | XFS_FSYNC_TS_LOG_RES(ip->i_mount), 0, 0, 0); |
954 | 0, 0, 0))) { | 928 | if (error) { |
955 | xfs_trans_cancel(tp, 0); | 929 | xfs_trans_cancel(tp, 0); |
956 | return error; | 930 | return error; |
957 | } | 931 | } |
958 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 932 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
959 | 933 | ||
960 | /* | 934 | /* |
961 | * Note - it's possible that we might have pushed | 935 | * Note - it's possible that we might have pushed ourselves out |
962 | * ourselves out of the way during trans_reserve | 936 | * of the way during trans_reserve which would flush the inode. |
963 | * which would flush the inode. But there's no | 937 | * But there's no guarantee that the inode buffer has actually |
964 | * guarantee that the inode buffer has actually | 938 | * gone out yet (it's delwri). Plus the buffer could be pinned |
965 | * gone out yet (it's delwri). Plus the buffer | 939 | * anyway if it's part of an inode in another recent |
966 | * could be pinned anyway if it's part of an | 940 | * transaction. So we play it safe and fire off the |
967 | * inode in another recent transaction. So we | 941 | * transaction anyway. |
968 | * play it safe and fire off the transaction anyway. | ||
969 | */ | 942 | */ |
970 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); | 943 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); |
971 | xfs_trans_ihold(tp, ip); | 944 | xfs_trans_ihold(tp, ip); |
972 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | 945 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); |
973 | if (flag & FSYNC_WAIT) | 946 | xfs_trans_set_sync(tp); |
974 | xfs_trans_set_sync(tp); | ||
975 | error = _xfs_trans_commit(tp, 0, &log_flushed); | 947 | error = _xfs_trans_commit(tp, 0, &log_flushed); |
976 | 948 | ||
977 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 949 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h index 8abe8f186e20..57335ba4ce53 100644 --- a/fs/xfs/xfs_vnodeops.h +++ b/fs/xfs/xfs_vnodeops.h | |||
@@ -18,8 +18,7 @@ int xfs_open(struct xfs_inode *ip); | |||
18 | int xfs_setattr(struct xfs_inode *ip, struct bhv_vattr *vap, int flags, | 18 | int xfs_setattr(struct xfs_inode *ip, struct bhv_vattr *vap, int flags, |
19 | struct cred *credp); | 19 | struct cred *credp); |
20 | int xfs_readlink(struct xfs_inode *ip, char *link); | 20 | int xfs_readlink(struct xfs_inode *ip, char *link); |
21 | int xfs_fsync(struct xfs_inode *ip, int flag, xfs_off_t start, | 21 | int xfs_fsync(struct xfs_inode *ip); |
22 | xfs_off_t stop); | ||
23 | int xfs_release(struct xfs_inode *ip); | 22 | int xfs_release(struct xfs_inode *ip); |
24 | int xfs_inactive(struct xfs_inode *ip); | 23 | int xfs_inactive(struct xfs_inode *ip); |
25 | int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name, | 24 | int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name, |