aboutsummaryrefslogtreecommitdiffstats
path: root/fs/nfsd/vfs.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/nfsd/vfs.c')
-rw-r--r--fs/nfsd/vfs.c93
1 files changed, 50 insertions, 43 deletions
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 99f835753596..4145083dcf88 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -966,6 +966,43 @@ static void kill_suid(struct dentry *dentry)
966 mutex_unlock(&dentry->d_inode->i_mutex); 966 mutex_unlock(&dentry->d_inode->i_mutex);
967} 967}
968 968
969/*
970 * Gathered writes: If another process is currently writing to the file,
971 * there's a high chance this is another nfsd (triggered by a bulk write
972 * from a client's biod). Rather than syncing the file with each write
973 * request, we sleep for 10 msec.
974 *
975 * I don't know if this roughly approximates C. Juszak's idea of
976 * gathered writes, but it's a nice and simple solution (IMHO), and it
977 * seems to work:-)
978 *
979 * Note: we do this only in the NFSv2 case, since v3 and higher have a
980 * better tool (separate unstable writes and commits) for solving this
981 * problem.
982 */
983static int wait_for_concurrent_writes(struct file *file)
984{
985 struct inode *inode = file->f_path.dentry->d_inode;
986 static ino_t last_ino;
987 static dev_t last_dev;
988 int err = 0;
989
990 if (atomic_read(&inode->i_writecount) > 1
991 || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) {
992 dprintk("nfsd: write defer %d\n", task_pid_nr(current));
993 msleep(10);
994 dprintk("nfsd: write resume %d\n", task_pid_nr(current));
995 }
996
997 if (inode->i_state & I_DIRTY) {
998 dprintk("nfsd: write sync %d\n", task_pid_nr(current));
999 err = nfsd_sync(file);
1000 }
1001 last_ino = inode->i_ino;
1002 last_dev = inode->i_sb->s_dev;
1003 return err;
1004}
1005
969static __be32 1006static __be32
970nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, 1007nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
971 loff_t offset, struct kvec *vec, int vlen, 1008 loff_t offset, struct kvec *vec, int vlen,
@@ -978,6 +1015,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
978 __be32 err = 0; 1015 __be32 err = 0;
979 int host_err; 1016 int host_err;
980 int stable = *stablep; 1017 int stable = *stablep;
1018 int use_wgather;
981 1019
982#ifdef MSNFS 1020#ifdef MSNFS
983 err = nfserr_perm; 1021 err = nfserr_perm;
@@ -996,9 +1034,10 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
996 * - the sync export option has been set, or 1034 * - the sync export option has been set, or
997 * - the client requested O_SYNC behavior (NFSv3 feature). 1035 * - the client requested O_SYNC behavior (NFSv3 feature).
998 * - The file system doesn't support fsync(). 1036 * - The file system doesn't support fsync().
999 * When gathered writes have been configured for this volume, 1037 * When NFSv2 gathered writes have been configured for this volume,
1000 * flushing the data to disk is handled separately below. 1038 * flushing the data to disk is handled separately below.
1001 */ 1039 */
1040 use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp);
1002 1041
1003 if (!file->f_op->fsync) {/* COMMIT3 cannot work */ 1042 if (!file->f_op->fsync) {/* COMMIT3 cannot work */
1004 stable = 2; 1043 stable = 2;
@@ -1007,7 +1046,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
1007 1046
1008 if (!EX_ISSYNC(exp)) 1047 if (!EX_ISSYNC(exp))
1009 stable = 0; 1048 stable = 0;
1010 if (stable && !EX_WGATHER(exp)) { 1049 if (stable && !use_wgather) {
1011 spin_lock(&file->f_lock); 1050 spin_lock(&file->f_lock);
1012 file->f_flags |= O_SYNC; 1051 file->f_flags |= O_SYNC;
1013 spin_unlock(&file->f_lock); 1052 spin_unlock(&file->f_lock);
@@ -1017,52 +1056,20 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
1017 oldfs = get_fs(); set_fs(KERNEL_DS); 1056 oldfs = get_fs(); set_fs(KERNEL_DS);
1018 host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset); 1057 host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset);
1019 set_fs(oldfs); 1058 set_fs(oldfs);
1020 if (host_err >= 0) { 1059 if (host_err < 0)
1021 *cnt = host_err; 1060 goto out_nfserr;
1022 nfsdstats.io_write += host_err; 1061 *cnt = host_err;
1023 fsnotify_modify(file->f_path.dentry); 1062 nfsdstats.io_write += host_err;
1024 } 1063 fsnotify_modify(file->f_path.dentry);
1025 1064
1026 /* clear setuid/setgid flag after write */ 1065 /* clear setuid/setgid flag after write */
1027 if (host_err >= 0 && (inode->i_mode & (S_ISUID | S_ISGID))) 1066 if (inode->i_mode & (S_ISUID | S_ISGID))
1028 kill_suid(dentry); 1067 kill_suid(dentry);
1029 1068
1030 if (host_err >= 0 && stable) { 1069 if (stable && use_wgather)
1031 static ino_t last_ino; 1070 host_err = wait_for_concurrent_writes(file);
1032 static dev_t last_dev;
1033
1034 /*
1035 * Gathered writes: If another process is currently
1036 * writing to the file, there's a high chance
1037 * this is another nfsd (triggered by a bulk write
1038 * from a client's biod). Rather than syncing the
1039 * file with each write request, we sleep for 10 msec.
1040 *
1041 * I don't know if this roughly approximates
1042 * C. Juszak's idea of gathered writes, but it's a
1043 * nice and simple solution (IMHO), and it seems to
1044 * work:-)
1045 */
1046 if (EX_WGATHER(exp)) {
1047 if (atomic_read(&inode->i_writecount) > 1
1048 || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) {
1049 dprintk("nfsd: write defer %d\n", task_pid_nr(current));
1050 msleep(10);
1051 dprintk("nfsd: write resume %d\n", task_pid_nr(current));
1052 }
1053
1054 if (inode->i_state & I_DIRTY) {
1055 dprintk("nfsd: write sync %d\n", task_pid_nr(current));
1056 host_err=nfsd_sync(file);
1057 }
1058#if 0
1059 wake_up(&inode->i_wait);
1060#endif
1061 }
1062 last_ino = inode->i_ino;
1063 last_dev = inode->i_sb->s_dev;
1064 }
1065 1071
1072out_nfserr:
1066 dprintk("nfsd: write complete host_err=%d\n", host_err); 1073 dprintk("nfsd: write complete host_err=%d\n", host_err);
1067 if (host_err >= 0) 1074 if (host_err >= 0)
1068 err = 0; 1075 err = 0;