diff options
Diffstat (limited to 'fs/nfsd/vfs.c')
-rw-r--r-- | fs/nfsd/vfs.c | 93 |
1 files changed, 50 insertions, 43 deletions
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 99f835753596..4145083dcf88 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c | |||
@@ -966,6 +966,43 @@ static void kill_suid(struct dentry *dentry) | |||
966 | mutex_unlock(&dentry->d_inode->i_mutex); | 966 | mutex_unlock(&dentry->d_inode->i_mutex); |
967 | } | 967 | } |
968 | 968 | ||
969 | /* | ||
970 | * Gathered writes: If another process is currently writing to the file, | ||
971 | * there's a high chance this is another nfsd (triggered by a bulk write | ||
972 | * from a client's biod). Rather than syncing the file with each write | ||
973 | * request, we sleep for 10 msec. | ||
974 | * | ||
975 | * I don't know if this roughly approximates C. Juszak's idea of | ||
976 | * gathered writes, but it's a nice and simple solution (IMHO), and it | ||
977 | * seems to work:-) | ||
978 | * | ||
979 | * Note: we do this only in the NFSv2 case, since v3 and higher have a | ||
980 | * better tool (separate unstable writes and commits) for solving this | ||
981 | * problem. | ||
982 | */ | ||
983 | static int wait_for_concurrent_writes(struct file *file) | ||
984 | { | ||
985 | struct inode *inode = file->f_path.dentry->d_inode; | ||
986 | static ino_t last_ino; | ||
987 | static dev_t last_dev; | ||
988 | int err = 0; | ||
989 | |||
990 | if (atomic_read(&inode->i_writecount) > 1 | ||
991 | || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) { | ||
992 | dprintk("nfsd: write defer %d\n", task_pid_nr(current)); | ||
993 | msleep(10); | ||
994 | dprintk("nfsd: write resume %d\n", task_pid_nr(current)); | ||
995 | } | ||
996 | |||
997 | if (inode->i_state & I_DIRTY) { | ||
998 | dprintk("nfsd: write sync %d\n", task_pid_nr(current)); | ||
999 | err = nfsd_sync(file); | ||
1000 | } | ||
1001 | last_ino = inode->i_ino; | ||
1002 | last_dev = inode->i_sb->s_dev; | ||
1003 | return err; | ||
1004 | } | ||
1005 | |||
969 | static __be32 | 1006 | static __be32 |
970 | nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, | 1007 | nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, |
971 | loff_t offset, struct kvec *vec, int vlen, | 1008 | loff_t offset, struct kvec *vec, int vlen, |
@@ -978,6 +1015,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, | |||
978 | __be32 err = 0; | 1015 | __be32 err = 0; |
979 | int host_err; | 1016 | int host_err; |
980 | int stable = *stablep; | 1017 | int stable = *stablep; |
1018 | int use_wgather; | ||
981 | 1019 | ||
982 | #ifdef MSNFS | 1020 | #ifdef MSNFS |
983 | err = nfserr_perm; | 1021 | err = nfserr_perm; |
@@ -996,9 +1034,10 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, | |||
996 | * - the sync export option has been set, or | 1034 | * - the sync export option has been set, or |
997 | * - the client requested O_SYNC behavior (NFSv3 feature). | 1035 | * - the client requested O_SYNC behavior (NFSv3 feature). |
998 | * - The file system doesn't support fsync(). | 1036 | * - The file system doesn't support fsync(). |
999 | * When gathered writes have been configured for this volume, | 1037 | * When NFSv2 gathered writes have been configured for this volume, |
1000 | * flushing the data to disk is handled separately below. | 1038 | * flushing the data to disk is handled separately below. |
1001 | */ | 1039 | */ |
1040 | use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp); | ||
1002 | 1041 | ||
1003 | if (!file->f_op->fsync) {/* COMMIT3 cannot work */ | 1042 | if (!file->f_op->fsync) {/* COMMIT3 cannot work */ |
1004 | stable = 2; | 1043 | stable = 2; |
@@ -1007,7 +1046,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, | |||
1007 | 1046 | ||
1008 | if (!EX_ISSYNC(exp)) | 1047 | if (!EX_ISSYNC(exp)) |
1009 | stable = 0; | 1048 | stable = 0; |
1010 | if (stable && !EX_WGATHER(exp)) { | 1049 | if (stable && !use_wgather) { |
1011 | spin_lock(&file->f_lock); | 1050 | spin_lock(&file->f_lock); |
1012 | file->f_flags |= O_SYNC; | 1051 | file->f_flags |= O_SYNC; |
1013 | spin_unlock(&file->f_lock); | 1052 | spin_unlock(&file->f_lock); |
@@ -1017,52 +1056,20 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, | |||
1017 | oldfs = get_fs(); set_fs(KERNEL_DS); | 1056 | oldfs = get_fs(); set_fs(KERNEL_DS); |
1018 | host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset); | 1057 | host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset); |
1019 | set_fs(oldfs); | 1058 | set_fs(oldfs); |
1020 | if (host_err >= 0) { | 1059 | if (host_err < 0) |
1021 | *cnt = host_err; | 1060 | goto out_nfserr; |
1022 | nfsdstats.io_write += host_err; | 1061 | *cnt = host_err; |
1023 | fsnotify_modify(file->f_path.dentry); | 1062 | nfsdstats.io_write += host_err; |
1024 | } | 1063 | fsnotify_modify(file->f_path.dentry); |
1025 | 1064 | ||
1026 | /* clear setuid/setgid flag after write */ | 1065 | /* clear setuid/setgid flag after write */ |
1027 | if (host_err >= 0 && (inode->i_mode & (S_ISUID | S_ISGID))) | 1066 | if (inode->i_mode & (S_ISUID | S_ISGID)) |
1028 | kill_suid(dentry); | 1067 | kill_suid(dentry); |
1029 | 1068 | ||
1030 | if (host_err >= 0 && stable) { | 1069 | if (stable && use_wgather) |
1031 | static ino_t last_ino; | 1070 | host_err = wait_for_concurrent_writes(file); |
1032 | static dev_t last_dev; | ||
1033 | |||
1034 | /* | ||
1035 | * Gathered writes: If another process is currently | ||
1036 | * writing to the file, there's a high chance | ||
1037 | * this is another nfsd (triggered by a bulk write | ||
1038 | * from a client's biod). Rather than syncing the | ||
1039 | * file with each write request, we sleep for 10 msec. | ||
1040 | * | ||
1041 | * I don't know if this roughly approximates | ||
1042 | * C. Juszak's idea of gathered writes, but it's a | ||
1043 | * nice and simple solution (IMHO), and it seems to | ||
1044 | * work:-) | ||
1045 | */ | ||
1046 | if (EX_WGATHER(exp)) { | ||
1047 | if (atomic_read(&inode->i_writecount) > 1 | ||
1048 | || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) { | ||
1049 | dprintk("nfsd: write defer %d\n", task_pid_nr(current)); | ||
1050 | msleep(10); | ||
1051 | dprintk("nfsd: write resume %d\n", task_pid_nr(current)); | ||
1052 | } | ||
1053 | |||
1054 | if (inode->i_state & I_DIRTY) { | ||
1055 | dprintk("nfsd: write sync %d\n", task_pid_nr(current)); | ||
1056 | host_err=nfsd_sync(file); | ||
1057 | } | ||
1058 | #if 0 | ||
1059 | wake_up(&inode->i_wait); | ||
1060 | #endif | ||
1061 | } | ||
1062 | last_ino = inode->i_ino; | ||
1063 | last_dev = inode->i_sb->s_dev; | ||
1064 | } | ||
1065 | 1071 | ||
1072 | out_nfserr: | ||
1066 | dprintk("nfsd: write complete host_err=%d\n", host_err); | 1073 | dprintk("nfsd: write complete host_err=%d\n", host_err); |
1067 | if (host_err >= 0) | 1074 | if (host_err >= 0) |
1068 | err = 0; | 1075 | err = 0; |