diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-05-23 11:13:39 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-05-23 11:13:39 -0400 |
commit | 6483d152acffb83442b90dad1517fde8a7b1e12d (patch) | |
tree | 21ba8f033a80c9c312bb18438990feb1d3ec5ebb /fs/xfs/xfs_vnodeops.c | |
parent | cb618965bc2073267b7f9345066f502515fcfdf5 (diff) | |
parent | 6ab455eeaff6893cd06da33843e840d888cdc04a (diff) |
Merge branch 'for-linus' of git://oss.sgi.com:8090/xfs/xfs-2.6
* 'for-linus' of git://oss.sgi.com:8090/xfs/xfs-2.6:
[XFS] Fix memory corruption with small buffer reads
[XFS] Fix inode list allocation size in writeback.
[XFS] Don't allow memory reclaim to wait on the filesystem in inode
[XFS] Fix fsync() b0rkage.
[XFS] Include linux/random.h in all builds, not just debug builds.
Diffstat (limited to 'fs/xfs/xfs_vnodeops.c')
-rw-r--r-- | fs/xfs/xfs_vnodeops.c | 112 |
1 files changed, 42 insertions, 70 deletions
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 70702a60b4bb..e475e3717eb3 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c | |||
@@ -856,18 +856,14 @@ xfs_readlink( | |||
856 | /* | 856 | /* |
857 | * xfs_fsync | 857 | * xfs_fsync |
858 | * | 858 | * |
859 | * This is called to sync the inode and its data out to disk. | 859 | * This is called to sync the inode and its data out to disk. We need to hold |
860 | * We need to hold the I/O lock while flushing the data, and | 860 | * the I/O lock while flushing the data, and the inode lock while flushing the |
861 | * the inode lock while flushing the inode. The inode lock CANNOT | 861 | * inode. The inode lock CANNOT be held while flushing the data, so acquire |
862 | * be held while flushing the data, so acquire after we're done | 862 | * after we're done with that. |
863 | * with that. | ||
864 | */ | 863 | */ |
865 | int | 864 | int |
866 | xfs_fsync( | 865 | xfs_fsync( |
867 | xfs_inode_t *ip, | 866 | xfs_inode_t *ip) |
868 | int flag, | ||
869 | xfs_off_t start, | ||
870 | xfs_off_t stop) | ||
871 | { | 867 | { |
872 | xfs_trans_t *tp; | 868 | xfs_trans_t *tp; |
873 | int error; | 869 | int error; |
@@ -875,103 +871,79 @@ xfs_fsync( | |||
875 | 871 | ||
876 | xfs_itrace_entry(ip); | 872 | xfs_itrace_entry(ip); |
877 | 873 | ||
878 | ASSERT(start >= 0 && stop >= -1); | ||
879 | |||
880 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | 874 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) |
881 | return XFS_ERROR(EIO); | 875 | return XFS_ERROR(EIO); |
882 | 876 | ||
883 | if (flag & FSYNC_DATA) | 877 | /* capture size updates in I/O completion before writing the inode. */ |
884 | filemap_fdatawait(vn_to_inode(XFS_ITOV(ip))->i_mapping); | 878 | error = filemap_fdatawait(vn_to_inode(XFS_ITOV(ip))->i_mapping); |
879 | if (error) | ||
880 | return XFS_ERROR(error); | ||
885 | 881 | ||
886 | /* | 882 | /* |
887 | * We always need to make sure that the required inode state | 883 | * We always need to make sure that the required inode state is safe on |
888 | * is safe on disk. The vnode might be clean but because | 884 | * disk. The vnode might be clean but we still might need to force the |
889 | * of committed transactions that haven't hit the disk yet. | 885 | * log because of committed transactions that haven't hit the disk yet. |
890 | * Likewise, there could be unflushed non-transactional | 886 | * Likewise, there could be unflushed non-transactional changes to the |
891 | * changes to the inode core that have to go to disk. | 887 | * inode core that have to go to disk and this requires us to issue |
888 | * a synchronous transaction to capture these changes correctly. | ||
892 | * | 889 | * |
893 | * The following code depends on one assumption: that | 890 | * This code relies on the assumption that if the update_* fields |
894 | * any transaction that changes an inode logs the core | 891 | * of the inode are clear and the inode is unpinned then it is clean |
895 | * because it has to change some field in the inode core | 892 | * and no action is required. |
896 | * (typically nextents or nblocks). That assumption | ||
897 | * implies that any transactions against an inode will | ||
898 | * catch any non-transactional updates. If inode-altering | ||
899 | * transactions exist that violate this assumption, the | ||
900 | * code breaks. Right now, it figures that if the involved | ||
901 | * update_* field is clear and the inode is unpinned, the | ||
902 | * inode is clean. Either it's been flushed or it's been | ||
903 | * committed and the commit has hit the disk unpinning the inode. | ||
904 | * (Note that xfs_inode_item_format() called at commit clears | ||
905 | * the update_* fields.) | ||
906 | */ | 893 | */ |
907 | xfs_ilock(ip, XFS_ILOCK_SHARED); | 894 | xfs_ilock(ip, XFS_ILOCK_SHARED); |
908 | 895 | ||
909 | /* If we are flushing data then we care about update_size | 896 | if (!(ip->i_update_size || ip->i_update_core)) { |
910 | * being set, otherwise we care about update_core | ||
911 | */ | ||
912 | if ((flag & FSYNC_DATA) ? | ||
913 | (ip->i_update_size == 0) : | ||
914 | (ip->i_update_core == 0)) { | ||
915 | /* | 897 | /* |
916 | * Timestamps/size haven't changed since last inode | 898 | * Timestamps/size haven't changed since last inode flush or |
917 | * flush or inode transaction commit. That means | 899 | * inode transaction commit. That means either nothing got |
918 | * either nothing got written or a transaction | 900 | * written or a transaction committed which caught the updates. |
919 | * committed which caught the updates. If the | 901 | * If the latter happened and the transaction hasn't hit the |
920 | * latter happened and the transaction hasn't | 902 | * disk yet, the inode will be still be pinned. If it is, |
921 | * hit the disk yet, the inode will be still | 903 | * force the log. |
922 | * be pinned. If it is, force the log. | ||
923 | */ | 904 | */ |
924 | 905 | ||
925 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | 906 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
926 | 907 | ||
927 | if (xfs_ipincount(ip)) { | 908 | if (xfs_ipincount(ip)) { |
928 | _xfs_log_force(ip->i_mount, (xfs_lsn_t)0, | 909 | error = _xfs_log_force(ip->i_mount, (xfs_lsn_t)0, |
929 | XFS_LOG_FORCE | | 910 | XFS_LOG_FORCE | XFS_LOG_SYNC, |
930 | ((flag & FSYNC_WAIT) | ||
931 | ? XFS_LOG_SYNC : 0), | ||
932 | &log_flushed); | 911 | &log_flushed); |
933 | } else { | 912 | } else { |
934 | /* | 913 | /* |
935 | * If the inode is not pinned and nothing | 914 | * If the inode is not pinned and nothing has changed |
936 | * has changed we don't need to flush the | 915 | * we don't need to flush the cache. |
937 | * cache. | ||
938 | */ | 916 | */ |
939 | changed = 0; | 917 | changed = 0; |
940 | } | 918 | } |
941 | error = 0; | ||
942 | } else { | 919 | } else { |
943 | /* | 920 | /* |
944 | * Kick off a transaction to log the inode | 921 | * Kick off a transaction to log the inode core to get the |
945 | * core to get the updates. Make it | 922 | * updates. The sync transaction will also force the log. |
946 | * sync if FSYNC_WAIT is passed in (which | ||
947 | * is done by everybody but specfs). The | ||
948 | * sync transaction will also force the log. | ||
949 | */ | 923 | */ |
950 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | 924 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
951 | tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS); | 925 | tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS); |
952 | if ((error = xfs_trans_reserve(tp, 0, | 926 | error = xfs_trans_reserve(tp, 0, |
953 | XFS_FSYNC_TS_LOG_RES(ip->i_mount), | 927 | XFS_FSYNC_TS_LOG_RES(ip->i_mount), 0, 0, 0); |
954 | 0, 0, 0))) { | 928 | if (error) { |
955 | xfs_trans_cancel(tp, 0); | 929 | xfs_trans_cancel(tp, 0); |
956 | return error; | 930 | return error; |
957 | } | 931 | } |
958 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 932 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
959 | 933 | ||
960 | /* | 934 | /* |
961 | * Note - it's possible that we might have pushed | 935 | * Note - it's possible that we might have pushed ourselves out |
962 | * ourselves out of the way during trans_reserve | 936 | * of the way during trans_reserve which would flush the inode. |
963 | * which would flush the inode. But there's no | 937 | * But there's no guarantee that the inode buffer has actually |
964 | * guarantee that the inode buffer has actually | 938 | * gone out yet (it's delwri). Plus the buffer could be pinned |
965 | * gone out yet (it's delwri). Plus the buffer | 939 | * anyway if it's part of an inode in another recent |
966 | * could be pinned anyway if it's part of an | 940 | * transaction. So we play it safe and fire off the |
967 | * inode in another recent transaction. So we | 941 | * transaction anyway. |
968 | * play it safe and fire off the transaction anyway. | ||
969 | */ | 942 | */ |
970 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); | 943 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); |
971 | xfs_trans_ihold(tp, ip); | 944 | xfs_trans_ihold(tp, ip); |
972 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | 945 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); |
973 | if (flag & FSYNC_WAIT) | 946 | xfs_trans_set_sync(tp); |
974 | xfs_trans_set_sync(tp); | ||
975 | error = _xfs_trans_commit(tp, 0, &log_flushed); | 947 | error = _xfs_trans_commit(tp, 0, &log_flushed); |
976 | 948 | ||
977 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 949 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |