aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig6
-rw-r--r--fs/cifs/CHANGES6
-rw-r--r--fs/cifs/README8
-rw-r--r--fs/cifs/cifsfs.c99
-rw-r--r--fs/cifs/cifssmb.c2
-rw-r--r--fs/cifs/connect.c6
-rw-r--r--fs/cifs/dir.c18
-rw-r--r--fs/cifs/fcntl.c2
-rw-r--r--fs/cifs/file.c34
-rw-r--r--fs/cifs/inode.c6
-rw-r--r--fs/cifs/link.c6
-rw-r--r--fs/cifs/ntlmssp.c14
-rw-r--r--fs/cifs/readdir.c45
-rw-r--r--fs/cifs/xattr.c8
-rw-r--r--fs/compat.c4
-rw-r--r--fs/exec.c2
-rw-r--r--fs/ext3/ioctl.c18
-rw-r--r--fs/ext3/resize.c2
-rw-r--r--fs/fuse/dev.c35
-rw-r--r--fs/fuse/fuse_i.h12
-rw-r--r--fs/fuse/inode.c40
-rw-r--r--fs/lockd/svclock.c2
-rw-r--r--fs/locks.c9
-rw-r--r--fs/nfs/dir.c5
-rw-r--r--fs/nfs/direct.c8
-rw-r--r--fs/nfs/file.c5
-rw-r--r--fs/nfs/inode.c5
-rw-r--r--fs/nfs/nfs4proc.c10
-rw-r--r--fs/open.c24
-rw-r--r--fs/proc/base.c21
-rw-r--r--fs/reiserfs/xattr_acl.c5
-rw-r--r--fs/splice.c529
32 files changed, 682 insertions, 314 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 2524629dc835..f9b5842c8d2d 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -842,6 +842,12 @@ config TMPFS
842config HUGETLBFS 842config HUGETLBFS
843 bool "HugeTLB file system support" 843 bool "HugeTLB file system support"
844 depends X86 || IA64 || PPC64 || SPARC64 || SUPERH || BROKEN 844 depends X86 || IA64 || PPC64 || SPARC64 || SUPERH || BROKEN
845 help
846 hugetlbfs is a filesystem backing for HugeTLB pages, based on
847 ramfs. For architectures that support it, say Y here and read
848 <file:Documentation/vm/hugetlbpage.txt> for details.
849
850 If unsure, say N.
845 851
846config HUGETLB_PAGE 852config HUGETLB_PAGE
847 def_bool HUGETLBFS 853 def_bool HUGETLBFS
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 8a2de038882e..1a27ecb46c9a 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,7 +1,11 @@
1Version 1.42 1Version 1.42
2------------ 2------------
3Fix slow oplock break when mounted to different servers at the same time and 3Fix slow oplock break when mounted to different servers at the same time and
4the tids match and we try to find matching fid on wrong server. 4the tids match and we try to find matching fid on wrong server. Fix read
5looping when signing required by server (2.6.16 kernel only). Fix readdir
6vs. rename race which could cause each to hang. Return . and .. even
7if server does not. Allow searches to skip first three entries and
8begin at any location. Fix oops in find_writeable_file.
5 9
6Version 1.41 10Version 1.41
7------------ 11------------
diff --git a/fs/cifs/README b/fs/cifs/README
index b2b4d0803761..0355003f4f0a 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -511,6 +511,14 @@ LinuxExtensionsEnabled If set to one then the client will attempt to
511 support and want to map the uid and gid fields 511 support and want to map the uid and gid fields
512 to values supplied at mount (rather than the 512 to values supplied at mount (rather than the
513 actual values, then set this to zero. (default 1) 513 actual values, then set this to zero. (default 1)
514Experimental When set to 1 used to enable certain experimental
515 features (currently enables multipage writes
516 when signing is enabled, the multipage write
517 performance enhancement was disabled when
518 signing turned on in case buffer was modified
519 just before it was sent, also this flag will
520 be used to use the new experimental sessionsetup
521 code).
514 522
515These experimental features and tracing can be enabled by changing flags in 523These experimental features and tracing can be enabled by changing flags in
516/proc/fs/cifs (after the cifs module has been installed or built into the 524/proc/fs/cifs (after the cifs module has been installed or built into the
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index d4b713e5affb..c262d8874ce9 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -33,6 +33,7 @@
33#include <linux/vfs.h> 33#include <linux/vfs.h>
34#include <linux/mempool.h> 34#include <linux/mempool.h>
35#include <linux/delay.h> 35#include <linux/delay.h>
36#include <linux/kthread.h>
36#include "cifsfs.h" 37#include "cifsfs.h"
37#include "cifspdu.h" 38#include "cifspdu.h"
38#define DECLARE_GLOBALS_HERE 39#define DECLARE_GLOBALS_HERE
@@ -75,9 +76,6 @@ unsigned int cifs_max_pending = CIFS_MAX_REQ;
75module_param(cifs_max_pending, int, 0); 76module_param(cifs_max_pending, int, 0);
76MODULE_PARM_DESC(cifs_max_pending,"Simultaneous requests to server. Default: 50 Range: 2 to 256"); 77MODULE_PARM_DESC(cifs_max_pending,"Simultaneous requests to server. Default: 50 Range: 2 to 256");
77 78
78static DECLARE_COMPLETION(cifs_oplock_exited);
79static DECLARE_COMPLETION(cifs_dnotify_exited);
80
81extern mempool_t *cifs_sm_req_poolp; 79extern mempool_t *cifs_sm_req_poolp;
82extern mempool_t *cifs_req_poolp; 80extern mempool_t *cifs_req_poolp;
83extern mempool_t *cifs_mid_poolp; 81extern mempool_t *cifs_mid_poolp;
@@ -841,10 +839,6 @@ static int cifs_oplock_thread(void * dummyarg)
841 __u16 netfid; 839 __u16 netfid;
842 int rc; 840 int rc;
843 841
844 daemonize("cifsoplockd");
845 allow_signal(SIGTERM);
846
847 oplockThread = current;
848 do { 842 do {
849 if (try_to_freeze()) 843 if (try_to_freeze())
850 continue; 844 continue;
@@ -900,9 +894,9 @@ static int cifs_oplock_thread(void * dummyarg)
900 set_current_state(TASK_INTERRUPTIBLE); 894 set_current_state(TASK_INTERRUPTIBLE);
901 schedule_timeout(1); /* yield in case q were corrupt */ 895 schedule_timeout(1); /* yield in case q were corrupt */
902 } 896 }
903 } while(!signal_pending(current)); 897 } while (!kthread_should_stop());
904 oplockThread = NULL; 898
905 complete_and_exit (&cifs_oplock_exited, 0); 899 return 0;
906} 900}
907 901
908static int cifs_dnotify_thread(void * dummyarg) 902static int cifs_dnotify_thread(void * dummyarg)
@@ -910,10 +904,6 @@ static int cifs_dnotify_thread(void * dummyarg)
910 struct list_head *tmp; 904 struct list_head *tmp;
911 struct cifsSesInfo *ses; 905 struct cifsSesInfo *ses;
912 906
913 daemonize("cifsdnotifyd");
914 allow_signal(SIGTERM);
915
916 dnotifyThread = current;
917 do { 907 do {
918 if(try_to_freeze()) 908 if(try_to_freeze())
919 continue; 909 continue;
@@ -931,8 +921,9 @@ static int cifs_dnotify_thread(void * dummyarg)
931 wake_up_all(&ses->server->response_q); 921 wake_up_all(&ses->server->response_q);
932 } 922 }
933 read_unlock(&GlobalSMBSeslock); 923 read_unlock(&GlobalSMBSeslock);
934 } while(!signal_pending(current)); 924 } while (!kthread_should_stop());
935 complete_and_exit (&cifs_dnotify_exited, 0); 925
926 return 0;
936} 927}
937 928
938static int __init 929static int __init
@@ -982,32 +973,48 @@ init_cifs(void)
982 } 973 }
983 974
984 rc = cifs_init_inodecache(); 975 rc = cifs_init_inodecache();
985 if (!rc) { 976 if (rc)
986 rc = cifs_init_mids(); 977 goto out_clean_proc;
987 if (!rc) { 978
988 rc = cifs_init_request_bufs(); 979 rc = cifs_init_mids();
989 if (!rc) { 980 if (rc)
990 rc = register_filesystem(&cifs_fs_type); 981 goto out_destroy_inodecache;
991 if (!rc) { 982
992 rc = (int)kernel_thread(cifs_oplock_thread, NULL, 983 rc = cifs_init_request_bufs();
993 CLONE_FS | CLONE_FILES | CLONE_VM); 984 if (rc)
994 if(rc > 0) { 985 goto out_destroy_mids;
995 rc = (int)kernel_thread(cifs_dnotify_thread, NULL, 986
996 CLONE_FS | CLONE_FILES | CLONE_VM); 987 rc = register_filesystem(&cifs_fs_type);
997 if(rc > 0) 988 if (rc)
998 return 0; 989 goto out_destroy_request_bufs;
999 else 990
1000 cERROR(1,("error %d create dnotify thread", rc)); 991 oplockThread = kthread_run(cifs_oplock_thread, NULL, "cifsoplockd");
1001 } else { 992 if (IS_ERR(oplockThread)) {
1002 cERROR(1,("error %d create oplock thread",rc)); 993 rc = PTR_ERR(oplockThread);
1003 } 994 cERROR(1,("error %d create oplock thread", rc));
1004 } 995 goto out_unregister_filesystem;
1005 cifs_destroy_request_bufs();
1006 }
1007 cifs_destroy_mids();
1008 }
1009 cifs_destroy_inodecache();
1010 } 996 }
997
998 dnotifyThread = kthread_run(cifs_dnotify_thread, NULL, "cifsdnotifyd");
999 if (IS_ERR(dnotifyThread)) {
1000 rc = PTR_ERR(dnotifyThread);
1001 cERROR(1,("error %d create dnotify thread", rc));
1002 goto out_stop_oplock_thread;
1003 }
1004
1005 return 0;
1006
1007 out_stop_oplock_thread:
1008 kthread_stop(oplockThread);
1009 out_unregister_filesystem:
1010 unregister_filesystem(&cifs_fs_type);
1011 out_destroy_request_bufs:
1012 cifs_destroy_request_bufs();
1013 out_destroy_mids:
1014 cifs_destroy_mids();
1015 out_destroy_inodecache:
1016 cifs_destroy_inodecache();
1017 out_clean_proc:
1011#ifdef CONFIG_PROC_FS 1018#ifdef CONFIG_PROC_FS
1012 cifs_proc_clean(); 1019 cifs_proc_clean();
1013#endif 1020#endif
@@ -1025,14 +1032,8 @@ exit_cifs(void)
1025 cifs_destroy_inodecache(); 1032 cifs_destroy_inodecache();
1026 cifs_destroy_mids(); 1033 cifs_destroy_mids();
1027 cifs_destroy_request_bufs(); 1034 cifs_destroy_request_bufs();
1028 if(oplockThread) { 1035 kthread_stop(oplockThread);
1029 send_sig(SIGTERM, oplockThread, 1); 1036 kthread_stop(dnotifyThread);
1030 wait_for_completion(&cifs_oplock_exited);
1031 }
1032 if(dnotifyThread) {
1033 send_sig(SIGTERM, dnotifyThread, 1);
1034 wait_for_completion(&cifs_dnotify_exited);
1035 }
1036} 1037}
1037 1038
1038MODULE_AUTHOR("Steve French <sfrench@us.ibm.com>"); 1039MODULE_AUTHOR("Steve French <sfrench@us.ibm.com>");
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index d705500aa283..fd36892eda55 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -3119,7 +3119,7 @@ findFirstRetry:
3119 psrch_inf->endOfSearch = FALSE; 3119 psrch_inf->endOfSearch = FALSE;
3120 3120
3121 psrch_inf->entries_in_buffer = le16_to_cpu(parms->SearchCount); 3121 psrch_inf->entries_in_buffer = le16_to_cpu(parms->SearchCount);
3122 psrch_inf->index_of_last_entry = 3122 psrch_inf->index_of_last_entry = 2 /* skip . and .. */ +
3123 psrch_inf->entries_in_buffer; 3123 psrch_inf->entries_in_buffer;
3124 *pnetfid = parms->SearchHandle; 3124 *pnetfid = parms->SearchHandle;
3125 } else { 3125 } else {
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 0b86d5ca9014..d2ec806a4f32 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3447,6 +3447,12 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo,
3447 pSesInfo->server->secMode, 3447 pSesInfo->server->secMode,
3448 pSesInfo->server->capabilities, 3448 pSesInfo->server->capabilities,
3449 pSesInfo->server->timeZone)); 3449 pSesInfo->server->timeZone));
3450#ifdef CONFIG_CIFS_EXPERIMENTAL
3451 if(experimEnabled > 1)
3452 rc = CIFS_SessSetup(xid, pSesInfo, CIFS_NTLM /* type */,
3453 &ntlmv2_flag, nls_info);
3454 else
3455#endif
3450 if (extended_security 3456 if (extended_security
3451 && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY) 3457 && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY)
3452 && (pSesInfo->server->secType == NTLMSSP)) { 3458 && (pSesInfo->server->secType == NTLMSSP)) {
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 1d0ca3eaaca5..82315edc77d7 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -139,9 +139,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
139 cifs_sb = CIFS_SB(inode->i_sb); 139 cifs_sb = CIFS_SB(inode->i_sb);
140 pTcon = cifs_sb->tcon; 140 pTcon = cifs_sb->tcon;
141 141
142 mutex_lock(&direntry->d_sb->s_vfs_rename_mutex);
143 full_path = build_path_from_dentry(direntry); 142 full_path = build_path_from_dentry(direntry);
144 mutex_unlock(&direntry->d_sb->s_vfs_rename_mutex);
145 if(full_path == NULL) { 143 if(full_path == NULL) {
146 FreeXid(xid); 144 FreeXid(xid);
147 return -ENOMEM; 145 return -ENOMEM;
@@ -316,9 +314,7 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
316 cifs_sb = CIFS_SB(inode->i_sb); 314 cifs_sb = CIFS_SB(inode->i_sb);
317 pTcon = cifs_sb->tcon; 315 pTcon = cifs_sb->tcon;
318 316
319 mutex_lock(&direntry->d_sb->s_vfs_rename_mutex);
320 full_path = build_path_from_dentry(direntry); 317 full_path = build_path_from_dentry(direntry);
321 mutex_unlock(&direntry->d_sb->s_vfs_rename_mutex);
322 if(full_path == NULL) 318 if(full_path == NULL)
323 rc = -ENOMEM; 319 rc = -ENOMEM;
324 else if (pTcon->ses->capabilities & CAP_UNIX) { 320 else if (pTcon->ses->capabilities & CAP_UNIX) {
@@ -440,6 +436,20 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, struct name
440 cifs_sb = CIFS_SB(parent_dir_inode->i_sb); 436 cifs_sb = CIFS_SB(parent_dir_inode->i_sb);
441 pTcon = cifs_sb->tcon; 437 pTcon = cifs_sb->tcon;
442 438
439 /*
440 * Don't allow the separator character in a path component.
441 * The VFS will not allow "/", but "\" is allowed by posix.
442 */
443 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)) {
444 int i;
445 for (i = 0; i < direntry->d_name.len; i++)
446 if (direntry->d_name.name[i] == '\\') {
447 cFYI(1, ("Invalid file name"));
448 FreeXid(xid);
449 return ERR_PTR(-EINVAL);
450 }
451 }
452
443 /* can not grab the rename sem here since it would 453 /* can not grab the rename sem here since it would
444 deadlock in the cases (beginning of sys_rename itself) 454 deadlock in the cases (beginning of sys_rename itself)
445 in which we already have the sb rename sem */ 455 in which we already have the sb rename sem */
diff --git a/fs/cifs/fcntl.c b/fs/cifs/fcntl.c
index ec4dfe9bf5ef..633a93811328 100644
--- a/fs/cifs/fcntl.c
+++ b/fs/cifs/fcntl.c
@@ -86,9 +86,7 @@ int cifs_dir_notify(struct file * file, unsigned long arg)
86 cifs_sb = CIFS_SB(file->f_dentry->d_sb); 86 cifs_sb = CIFS_SB(file->f_dentry->d_sb);
87 pTcon = cifs_sb->tcon; 87 pTcon = cifs_sb->tcon;
88 88
89 mutex_lock(&file->f_dentry->d_sb->s_vfs_rename_mutex);
90 full_path = build_path_from_dentry(file->f_dentry); 89 full_path = build_path_from_dentry(file->f_dentry);
91 mutex_unlock(&file->f_dentry->d_sb->s_vfs_rename_mutex);
92 90
93 if(full_path == NULL) { 91 if(full_path == NULL) {
94 rc = -ENOMEM; 92 rc = -ENOMEM;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 5c497c529772..e152bf6afa60 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -203,9 +203,7 @@ int cifs_open(struct inode *inode, struct file *file)
203 } 203 }
204 } 204 }
205 205
206 mutex_lock(&inode->i_sb->s_vfs_rename_mutex);
207 full_path = build_path_from_dentry(file->f_dentry); 206 full_path = build_path_from_dentry(file->f_dentry);
208 mutex_unlock(&inode->i_sb->s_vfs_rename_mutex);
209 if (full_path == NULL) { 207 if (full_path == NULL) {
210 FreeXid(xid); 208 FreeXid(xid);
211 return -ENOMEM; 209 return -ENOMEM;
@@ -906,8 +904,7 @@ static ssize_t cifs_write(struct file *file, const char *write_data,
906 if (rc != 0) 904 if (rc != 0)
907 break; 905 break;
908 } 906 }
909 /* BB FIXME We can not sign across two buffers yet */ 907 if(experimEnabled || (pTcon->ses->server->secMode &
910 if((pTcon->ses->server->secMode &
911 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) == 0) { 908 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) == 0) {
912 struct kvec iov[2]; 909 struct kvec iov[2];
913 unsigned int len; 910 unsigned int len;
@@ -923,13 +920,13 @@ static ssize_t cifs_write(struct file *file, const char *write_data,
923 *poffset, &bytes_written, 920 *poffset, &bytes_written,
924 iov, 1, long_op); 921 iov, 1, long_op);
925 } else 922 } else
926 /* BB FIXME fixup indentation of line below */ 923 rc = CIFSSMBWrite(xid, pTcon,
927 rc = CIFSSMBWrite(xid, pTcon, 924 open_file->netfid,
928 open_file->netfid, 925 min_t(const int, cifs_sb->wsize,
929 min_t(const int, cifs_sb->wsize, 926 write_size - total_written),
930 write_size - total_written), 927 *poffset, &bytes_written,
931 *poffset, &bytes_written, 928 write_data + total_written,
932 write_data + total_written, NULL, long_op); 929 NULL, long_op);
933 } 930 }
934 if (rc || (bytes_written == 0)) { 931 if (rc || (bytes_written == 0)) {
935 if (total_written) 932 if (total_written)
@@ -968,6 +965,16 @@ struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode)
968 struct cifsFileInfo *open_file; 965 struct cifsFileInfo *open_file;
969 int rc; 966 int rc;
970 967
968 /* Having a null inode here (because mapping->host was set to zero by
969 the VFS or MM) should not happen but we had reports of on oops (due to
970 it being zero) during stress testcases so we need to check for it */
971
972 if(cifs_inode == NULL) {
973 cERROR(1,("Null inode passed to cifs_writeable_file"));
974 dump_stack();
975 return NULL;
976 }
977
971 read_lock(&GlobalSMBSeslock); 978 read_lock(&GlobalSMBSeslock);
972 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { 979 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
973 if (open_file->closePend) 980 if (open_file->closePend)
@@ -1093,12 +1100,11 @@ static int cifs_writepages(struct address_space *mapping,
1093 if (cifs_sb->wsize < PAGE_CACHE_SIZE) 1100 if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1094 return generic_writepages(mapping, wbc); 1101 return generic_writepages(mapping, wbc);
1095 1102
1096 /* BB FIXME we do not have code to sign across multiple buffers yet,
1097 so go to older writepage style write which we can sign if needed */
1098 if((cifs_sb->tcon->ses) && (cifs_sb->tcon->ses->server)) 1103 if((cifs_sb->tcon->ses) && (cifs_sb->tcon->ses->server))
1099 if(cifs_sb->tcon->ses->server->secMode & 1104 if(cifs_sb->tcon->ses->server->secMode &
1100 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) 1105 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
1101 return generic_writepages(mapping, wbc); 1106 if(!experimEnabled)
1107 return generic_writepages(mapping, wbc);
1102 1108
1103 /* 1109 /*
1104 * BB: Is this meaningful for a non-block-device file system? 1110 * BB: Is this meaningful for a non-block-device file system?
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 957ddd1571c6..4093764ef461 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -722,9 +722,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
722 cifs_sb = CIFS_SB(inode->i_sb); 722 cifs_sb = CIFS_SB(inode->i_sb);
723 pTcon = cifs_sb->tcon; 723 pTcon = cifs_sb->tcon;
724 724
725 mutex_lock(&inode->i_sb->s_vfs_rename_mutex);
726 full_path = build_path_from_dentry(direntry); 725 full_path = build_path_from_dentry(direntry);
727 mutex_unlock(&inode->i_sb->s_vfs_rename_mutex);
728 if (full_path == NULL) { 726 if (full_path == NULL) {
729 FreeXid(xid); 727 FreeXid(xid);
730 return -ENOMEM; 728 return -ENOMEM;
@@ -807,9 +805,7 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry)
807 cifs_sb = CIFS_SB(inode->i_sb); 805 cifs_sb = CIFS_SB(inode->i_sb);
808 pTcon = cifs_sb->tcon; 806 pTcon = cifs_sb->tcon;
809 807
810 mutex_lock(&inode->i_sb->s_vfs_rename_mutex);
811 full_path = build_path_from_dentry(direntry); 808 full_path = build_path_from_dentry(direntry);
812 mutex_unlock(&inode->i_sb->s_vfs_rename_mutex);
813 if (full_path == NULL) { 809 if (full_path == NULL) {
814 FreeXid(xid); 810 FreeXid(xid);
815 return -ENOMEM; 811 return -ENOMEM;
@@ -1141,9 +1137,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1141 rc = 0; 1137 rc = 0;
1142 } 1138 }
1143 1139
1144 mutex_lock(&direntry->d_sb->s_vfs_rename_mutex);
1145 full_path = build_path_from_dentry(direntry); 1140 full_path = build_path_from_dentry(direntry);
1146 mutex_unlock(&direntry->d_sb->s_vfs_rename_mutex);
1147 if (full_path == NULL) { 1141 if (full_path == NULL) {
1148 FreeXid(xid); 1142 FreeXid(xid);
1149 return -ENOMEM; 1143 return -ENOMEM;
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 9562f5bba65c..2ec99f833142 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -48,10 +48,8 @@ cifs_hardlink(struct dentry *old_file, struct inode *inode,
48/* No need to check for cross device links since server will do that 48/* No need to check for cross device links since server will do that
49 BB note DFS case in future though (when we may have to check) */ 49 BB note DFS case in future though (when we may have to check) */
50 50
51 mutex_lock(&inode->i_sb->s_vfs_rename_mutex);
52 fromName = build_path_from_dentry(old_file); 51 fromName = build_path_from_dentry(old_file);
53 toName = build_path_from_dentry(direntry); 52 toName = build_path_from_dentry(direntry);
54 mutex_unlock(&inode->i_sb->s_vfs_rename_mutex);
55 if((fromName == NULL) || (toName == NULL)) { 53 if((fromName == NULL) || (toName == NULL)) {
56 rc = -ENOMEM; 54 rc = -ENOMEM;
57 goto cifs_hl_exit; 55 goto cifs_hl_exit;
@@ -103,9 +101,7 @@ cifs_follow_link(struct dentry *direntry, struct nameidata *nd)
103 101
104 xid = GetXid(); 102 xid = GetXid();
105 103
106 mutex_lock(&direntry->d_sb->s_vfs_rename_mutex);
107 full_path = build_path_from_dentry(direntry); 104 full_path = build_path_from_dentry(direntry);
108 mutex_unlock(&direntry->d_sb->s_vfs_rename_mutex);
109 105
110 if (!full_path) 106 if (!full_path)
111 goto out_no_free; 107 goto out_no_free;
@@ -164,9 +160,7 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname)
164 cifs_sb = CIFS_SB(inode->i_sb); 160 cifs_sb = CIFS_SB(inode->i_sb);
165 pTcon = cifs_sb->tcon; 161 pTcon = cifs_sb->tcon;
166 162
167 mutex_lock(&inode->i_sb->s_vfs_rename_mutex);
168 full_path = build_path_from_dentry(direntry); 163 full_path = build_path_from_dentry(direntry);
169 mutex_unlock(&inode->i_sb->s_vfs_rename_mutex);
170 164
171 if(full_path == NULL) { 165 if(full_path == NULL) {
172 FreeXid(xid); 166 FreeXid(xid);
diff --git a/fs/cifs/ntlmssp.c b/fs/cifs/ntlmssp.c
index 78866f925747..115359cc7a32 100644
--- a/fs/cifs/ntlmssp.c
+++ b/fs/cifs/ntlmssp.c
@@ -121,6 +121,20 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, const int type,
121 } 121 }
122 122
123 123
124 /* copy session key */
125
126 /* if Unicode, align strings to two byte boundary */
127
128 /* copy user name */ /* BB Do we need to special case null user name? */
129
130 /* copy domain name */
131
132 /* copy Linux version */
133
134 /* copy network operating system name */
135
136 /* update bcc and smb buffer length */
137
124/* rc = SendReceive2(xid, ses, iov, num_iovecs, &resp_buf_type, 0); */ 138/* rc = SendReceive2(xid, ses, iov, num_iovecs, &resp_buf_type, 0); */
125 /* SMB request buf freed in SendReceive2 */ 139 /* SMB request buf freed in SendReceive2 */
126 140
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 2f6e2825571e..b689c5035124 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -404,9 +404,7 @@ static int initiate_cifs_search(const int xid, struct file *file)
404 if(pTcon == NULL) 404 if(pTcon == NULL)
405 return -EINVAL; 405 return -EINVAL;
406 406
407 mutex_lock(&file->f_dentry->d_sb->s_vfs_rename_mutex);
408 full_path = build_path_from_dentry(file->f_dentry); 407 full_path = build_path_from_dentry(file->f_dentry);
409 mutex_unlock(&file->f_dentry->d_sb->s_vfs_rename_mutex);
410 408
411 if(full_path == NULL) { 409 if(full_path == NULL) {
412 return -ENOMEM; 410 return -ENOMEM;
@@ -592,6 +590,13 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
592 first_entry_in_buffer = 590 first_entry_in_buffer =
593 cifsFile->srch_inf.index_of_last_entry - 591 cifsFile->srch_inf.index_of_last_entry -
594 cifsFile->srch_inf.entries_in_buffer; 592 cifsFile->srch_inf.entries_in_buffer;
593
594 /* if first entry in buf is zero then is first buffer
595 in search response data which means it is likely . and ..
596 will be in this buffer, although some servers do not return
597 . and .. for the root of a drive and for those we need
598 to start two entries earlier */
599
595/* dump_cifs_file_struct(file, "In fce ");*/ 600/* dump_cifs_file_struct(file, "In fce ");*/
596 if(((index_to_find < cifsFile->srch_inf.index_of_last_entry) && 601 if(((index_to_find < cifsFile->srch_inf.index_of_last_entry) &&
597 is_dir_changed(file)) || 602 is_dir_changed(file)) ||
@@ -634,23 +639,14 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
634 char * end_of_smb = cifsFile->srch_inf.ntwrk_buf_start + 639 char * end_of_smb = cifsFile->srch_inf.ntwrk_buf_start +
635 smbCalcSize((struct smb_hdr *) 640 smbCalcSize((struct smb_hdr *)
636 cifsFile->srch_inf.ntwrk_buf_start); 641 cifsFile->srch_inf.ntwrk_buf_start);
642
643 current_entry = cifsFile->srch_inf.srch_entries_start;
637 first_entry_in_buffer = cifsFile->srch_inf.index_of_last_entry 644 first_entry_in_buffer = cifsFile->srch_inf.index_of_last_entry
638 - cifsFile->srch_inf.entries_in_buffer; 645 - cifsFile->srch_inf.entries_in_buffer;
639 pos_in_buf = index_to_find - first_entry_in_buffer; 646 pos_in_buf = index_to_find - first_entry_in_buffer;
640 cFYI(1,("found entry - pos_in_buf %d",pos_in_buf)); 647 cFYI(1,("found entry - pos_in_buf %d",pos_in_buf));
641 current_entry = cifsFile->srch_inf.srch_entries_start;
642 for(i=0;(i<(pos_in_buf)) && (current_entry != NULL);i++) { 648 for(i=0;(i<(pos_in_buf)) && (current_entry != NULL);i++) {
643 /* go entry by entry figuring out which is first */ 649 /* go entry by entry figuring out which is first */
644 /* if( . or ..)
645 skip */
646 rc = cifs_entry_is_dot(current_entry,cifsFile);
647 if(rc == 1) /* is . or .. so skip */ {
648 cFYI(1,("Entry is .")); /* BB removeme BB */
649 /* continue; */
650 } else if (rc == 2 ) {
651 cFYI(1,("Entry is ..")); /* BB removeme BB */
652 /* continue; */
653 }
654 current_entry = nxt_dir_entry(current_entry,end_of_smb); 650 current_entry = nxt_dir_entry(current_entry,end_of_smb);
655 } 651 }
656 if((current_entry == NULL) && (i < pos_in_buf)) { 652 if((current_entry == NULL) && (i < pos_in_buf)) {
@@ -770,6 +766,11 @@ static int cifs_filldir(char *pfindEntry, struct file *file,
770 if(file->f_dentry == NULL) 766 if(file->f_dentry == NULL)
771 return -ENOENT; 767 return -ENOENT;
772 768
769 rc = cifs_entry_is_dot(pfindEntry,pCifsF);
770 /* skip . and .. since we added them first */
771 if(rc != 0)
772 return 0;
773
773 cifs_sb = CIFS_SB(file->f_dentry->d_sb); 774 cifs_sb = CIFS_SB(file->f_dentry->d_sb);
774 775
775 qstring.name = scratch_buf; 776 qstring.name = scratch_buf;
@@ -898,22 +899,22 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
898 899
899 switch ((int) file->f_pos) { 900 switch ((int) file->f_pos) {
900 case 0: 901 case 0:
901 /*if (filldir(direntry, ".", 1, file->f_pos, 902 if (filldir(direntry, ".", 1, file->f_pos,
902 file->f_dentry->d_inode->i_ino, DT_DIR) < 0) { 903 file->f_dentry->d_inode->i_ino, DT_DIR) < 0) {
903 cERROR(1, ("Filldir for current dir failed ")); 904 cERROR(1, ("Filldir for current dir failed"));
904 rc = -ENOMEM; 905 rc = -ENOMEM;
905 break; 906 break;
906 } 907 }
907 file->f_pos++; */ 908 file->f_pos++;
908 case 1: 909 case 1:
909 /* if (filldir(direntry, "..", 2, file->f_pos, 910 if (filldir(direntry, "..", 2, file->f_pos,
910 file->f_dentry->d_parent->d_inode->i_ino, DT_DIR) < 0) { 911 file->f_dentry->d_parent->d_inode->i_ino, DT_DIR) < 0) {
911 cERROR(1, ("Filldir for parent dir failed ")); 912 cERROR(1, ("Filldir for parent dir failed "));
912 rc = -ENOMEM; 913 rc = -ENOMEM;
913 break; 914 break;
914 } 915 }
915 file->f_pos++; */ 916 file->f_pos++;
916 case 2: 917 default:
917 /* 1) If search is active, 918 /* 1) If search is active,
918 is in current search buffer? 919 is in current search buffer?
919 if it before then restart search 920 if it before then restart search
@@ -927,7 +928,6 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
927 return rc; 928 return rc;
928 } 929 }
929 } 930 }
930 default:
931 if(file->private_data == NULL) { 931 if(file->private_data == NULL) {
932 rc = -EINVAL; 932 rc = -EINVAL;
933 FreeXid(xid); 933 FreeXid(xid);
@@ -947,8 +947,6 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
947 kfree(cifsFile->search_resume_name); 947 kfree(cifsFile->search_resume_name);
948 cifsFile->search_resume_name = NULL; */ 948 cifsFile->search_resume_name = NULL; */
949 949
950 /* BB account for . and .. in f_pos as special case */
951
952 rc = find_cifs_entry(xid,pTcon, file, 950 rc = find_cifs_entry(xid,pTcon, file,
953 &current_entry,&num_to_fill); 951 &current_entry,&num_to_fill);
954 if(rc) { 952 if(rc) {
@@ -977,7 +975,8 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
977 num_to_fill, i)); 975 num_to_fill, i));
978 break; 976 break;
979 } 977 }
980 978 /* if buggy server returns . and .. late do
979 we want to check for that here? */
981 rc = cifs_filldir(current_entry, file, 980 rc = cifs_filldir(current_entry, file,
982 filldir, direntry,tmp_buf); 981 filldir, direntry,tmp_buf);
983 file->f_pos++; 982 file->f_pos++;
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index 3938444d87b2..7754d641775e 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -62,9 +62,7 @@ int cifs_removexattr(struct dentry * direntry, const char * ea_name)
62 cifs_sb = CIFS_SB(sb); 62 cifs_sb = CIFS_SB(sb);
63 pTcon = cifs_sb->tcon; 63 pTcon = cifs_sb->tcon;
64 64
65 mutex_lock(&sb->s_vfs_rename_mutex);
66 full_path = build_path_from_dentry(direntry); 65 full_path = build_path_from_dentry(direntry);
67 mutex_unlock(&sb->s_vfs_rename_mutex);
68 if(full_path == NULL) { 66 if(full_path == NULL) {
69 FreeXid(xid); 67 FreeXid(xid);
70 return -ENOMEM; 68 return -ENOMEM;
@@ -116,9 +114,7 @@ int cifs_setxattr(struct dentry * direntry, const char * ea_name,
116 cifs_sb = CIFS_SB(sb); 114 cifs_sb = CIFS_SB(sb);
117 pTcon = cifs_sb->tcon; 115 pTcon = cifs_sb->tcon;
118 116
119 mutex_lock(&sb->s_vfs_rename_mutex);
120 full_path = build_path_from_dentry(direntry); 117 full_path = build_path_from_dentry(direntry);
121 mutex_unlock(&sb->s_vfs_rename_mutex);
122 if(full_path == NULL) { 118 if(full_path == NULL) {
123 FreeXid(xid); 119 FreeXid(xid);
124 return -ENOMEM; 120 return -ENOMEM;
@@ -223,9 +219,7 @@ ssize_t cifs_getxattr(struct dentry * direntry, const char * ea_name,
223 cifs_sb = CIFS_SB(sb); 219 cifs_sb = CIFS_SB(sb);
224 pTcon = cifs_sb->tcon; 220 pTcon = cifs_sb->tcon;
225 221
226 mutex_lock(&sb->s_vfs_rename_mutex);
227 full_path = build_path_from_dentry(direntry); 222 full_path = build_path_from_dentry(direntry);
228 mutex_unlock(&sb->s_vfs_rename_mutex);
229 if(full_path == NULL) { 223 if(full_path == NULL) {
230 FreeXid(xid); 224 FreeXid(xid);
231 return -ENOMEM; 225 return -ENOMEM;
@@ -341,9 +335,7 @@ ssize_t cifs_listxattr(struct dentry * direntry, char * data, size_t buf_size)
341 cifs_sb = CIFS_SB(sb); 335 cifs_sb = CIFS_SB(sb);
342 pTcon = cifs_sb->tcon; 336 pTcon = cifs_sb->tcon;
343 337
344 mutex_lock(&sb->s_vfs_rename_mutex);
345 full_path = build_path_from_dentry(direntry); 338 full_path = build_path_from_dentry(direntry);
346 mutex_unlock(&sb->s_vfs_rename_mutex);
347 if(full_path == NULL) { 339 if(full_path == NULL) {
348 FreeXid(xid); 340 FreeXid(xid);
349 return -ENOMEM; 341 return -ENOMEM;
diff --git a/fs/compat.c b/fs/compat.c
index 7f8e26ea427c..2e32bd340474 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1217,6 +1217,10 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
1217 if (ret < 0) 1217 if (ret < 0)
1218 goto out; 1218 goto out;
1219 1219
1220 ret = security_file_permission(file, type == READ ? MAY_READ:MAY_WRITE);
1221 if (ret)
1222 goto out;
1223
1220 fnv = NULL; 1224 fnv = NULL;
1221 if (type == READ) { 1225 if (type == READ) {
1222 fn = file->f_op->read; 1226 fn = file->f_op->read;
diff --git a/fs/exec.c b/fs/exec.c
index 4121bb559739..3a79d97ac234 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -712,7 +712,7 @@ static int de_thread(struct task_struct *tsk)
712 attach_pid(current, PIDTYPE_PID, current->pid); 712 attach_pid(current, PIDTYPE_PID, current->pid);
713 attach_pid(current, PIDTYPE_PGID, current->signal->pgrp); 713 attach_pid(current, PIDTYPE_PGID, current->signal->pgrp);
714 attach_pid(current, PIDTYPE_SID, current->signal->session); 714 attach_pid(current, PIDTYPE_SID, current->signal->session);
715 list_add_tail(&current->tasks, &init_task.tasks); 715 list_add_tail_rcu(&current->tasks, &init_task.tasks);
716 716
717 current->group_leader = current; 717 current->group_leader = current;
718 leader->group_leader = current; 718 leader->group_leader = current;
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c
index aaf1da17b6d4..8c22aa9a7fbb 100644
--- a/fs/ext3/ioctl.c
+++ b/fs/ext3/ioctl.c
@@ -48,6 +48,7 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
48 if (!S_ISDIR(inode->i_mode)) 48 if (!S_ISDIR(inode->i_mode))
49 flags &= ~EXT3_DIRSYNC_FL; 49 flags &= ~EXT3_DIRSYNC_FL;
50 50
51 mutex_lock(&inode->i_mutex);
51 oldflags = ei->i_flags; 52 oldflags = ei->i_flags;
52 53
53 /* The JOURNAL_DATA flag is modifiable only by root */ 54 /* The JOURNAL_DATA flag is modifiable only by root */
@@ -60,8 +61,10 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
60 * This test looks nicer. Thanks to Pauline Middelink 61 * This test looks nicer. Thanks to Pauline Middelink
61 */ 62 */
62 if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL)) { 63 if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL)) {
63 if (!capable(CAP_LINUX_IMMUTABLE)) 64 if (!capable(CAP_LINUX_IMMUTABLE)) {
65 mutex_unlock(&inode->i_mutex);
64 return -EPERM; 66 return -EPERM;
67 }
65 } 68 }
66 69
67 /* 70 /*
@@ -69,14 +72,18 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
69 * the relevant capability. 72 * the relevant capability.
70 */ 73 */
71 if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) { 74 if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) {
72 if (!capable(CAP_SYS_RESOURCE)) 75 if (!capable(CAP_SYS_RESOURCE)) {
76 mutex_unlock(&inode->i_mutex);
73 return -EPERM; 77 return -EPERM;
78 }
74 } 79 }
75 80
76 81
77 handle = ext3_journal_start(inode, 1); 82 handle = ext3_journal_start(inode, 1);
78 if (IS_ERR(handle)) 83 if (IS_ERR(handle)) {
84 mutex_unlock(&inode->i_mutex);
79 return PTR_ERR(handle); 85 return PTR_ERR(handle);
86 }
80 if (IS_SYNC(inode)) 87 if (IS_SYNC(inode))
81 handle->h_sync = 1; 88 handle->h_sync = 1;
82 err = ext3_reserve_inode_write(handle, inode, &iloc); 89 err = ext3_reserve_inode_write(handle, inode, &iloc);
@@ -93,11 +100,14 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
93 err = ext3_mark_iloc_dirty(handle, inode, &iloc); 100 err = ext3_mark_iloc_dirty(handle, inode, &iloc);
94flags_err: 101flags_err:
95 ext3_journal_stop(handle); 102 ext3_journal_stop(handle);
96 if (err) 103 if (err) {
104 mutex_unlock(&inode->i_mutex);
97 return err; 105 return err;
106 }
98 107
99 if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) 108 if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL))
100 err = ext3_change_inode_journal_flag(inode, jflag); 109 err = ext3_change_inode_journal_flag(inode, jflag);
110 mutex_unlock(&inode->i_mutex);
101 return err; 111 return err;
102 } 112 }
103 case EXT3_IOC_GETVERSION: 113 case EXT3_IOC_GETVERSION:
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index c5ffa8523968..8aac5334680d 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -213,7 +213,7 @@ static int setup_new_group_blocks(struct super_block *sb,
213 goto exit_bh; 213 goto exit_bh;
214 } 214 }
215 lock_buffer(bh); 215 lock_buffer(bh);
216 memcpy(gdb->b_data, sbi->s_group_desc[i], bh->b_size); 216 memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, bh->b_size);
217 set_buffer_uptodate(gdb); 217 set_buffer_uptodate(gdb);
218 unlock_buffer(bh); 218 unlock_buffer(bh);
219 ext3_journal_dirty_metadata(handle, gdb); 219 ext3_journal_dirty_metadata(handle, gdb);
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index cc750c68fe70..104a62dadb94 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -128,14 +128,24 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
128 } 128 }
129} 129}
130 130
131void fuse_remove_background(struct fuse_conn *fc, struct fuse_req *req) 131/*
132 * Called with sbput_sem held for read (request_end) or write
133 * (fuse_put_super). By the time fuse_put_super() is finished, all
134 * inodes belonging to background requests must be released, so the
135 * iputs have to be done within the locked region.
136 */
137void fuse_release_background(struct fuse_conn *fc, struct fuse_req *req)
132{ 138{
133 list_del_init(&req->bg_entry); 139 iput(req->inode);
140 iput(req->inode2);
141 spin_lock(&fc->lock);
142 list_del(&req->bg_entry);
134 if (fc->num_background == FUSE_MAX_BACKGROUND) { 143 if (fc->num_background == FUSE_MAX_BACKGROUND) {
135 fc->blocked = 0; 144 fc->blocked = 0;
136 wake_up_all(&fc->blocked_waitq); 145 wake_up_all(&fc->blocked_waitq);
137 } 146 }
138 fc->num_background--; 147 fc->num_background--;
148 spin_unlock(&fc->lock);
139} 149}
140 150
141/* 151/*
@@ -165,27 +175,22 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req)
165 wake_up(&req->waitq); 175 wake_up(&req->waitq);
166 fuse_put_request(fc, req); 176 fuse_put_request(fc, req);
167 } else { 177 } else {
168 struct inode *inode = req->inode;
169 struct inode *inode2 = req->inode2;
170 struct file *file = req->file;
171 void (*end) (struct fuse_conn *, struct fuse_req *) = req->end; 178 void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
172 req->end = NULL; 179 req->end = NULL;
173 req->inode = NULL;
174 req->inode2 = NULL;
175 req->file = NULL;
176 if (!list_empty(&req->bg_entry))
177 fuse_remove_background(fc, req);
178 spin_unlock(&fc->lock); 180 spin_unlock(&fc->lock);
181 down_read(&fc->sbput_sem);
182 if (fc->mounted)
183 fuse_release_background(fc, req);
184 up_read(&fc->sbput_sem);
185
186 /* fput must go outside sbput_sem, otherwise it can deadlock */
187 if (req->file)
188 fput(req->file);
179 189
180 if (end) 190 if (end)
181 end(fc, req); 191 end(fc, req);
182 else 192 else
183 fuse_put_request(fc, req); 193 fuse_put_request(fc, req);
184
185 if (file)
186 fput(file);
187 iput(inode);
188 iput(inode2);
189 } 194 }
190} 195}
191 196
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 59661c481d9d..0474202cb5dc 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -258,9 +258,15 @@ struct fuse_conn {
258 /** waitq for blocked connection */ 258 /** waitq for blocked connection */
259 wait_queue_head_t blocked_waitq; 259 wait_queue_head_t blocked_waitq;
260 260
261 /** RW semaphore for exclusion with fuse_put_super() */
262 struct rw_semaphore sbput_sem;
263
261 /** The next unique request id */ 264 /** The next unique request id */
262 u64 reqctr; 265 u64 reqctr;
263 266
267 /** Mount is active */
268 unsigned mounted;
269
264 /** Connection established, cleared on umount, connection 270 /** Connection established, cleared on umount, connection
265 abort and device release */ 271 abort and device release */
266 unsigned connected; 272 unsigned connected;
@@ -471,11 +477,11 @@ void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req);
471void request_send_background(struct fuse_conn *fc, struct fuse_req *req); 477void request_send_background(struct fuse_conn *fc, struct fuse_req *req);
472 478
473/** 479/**
474 * Remove request from the the background list 480 * Release inodes and file associated with background request
475 */ 481 */
476void fuse_remove_background(struct fuse_conn *fc, struct fuse_req *req); 482void fuse_release_background(struct fuse_conn *fc, struct fuse_req *req);
477 483
478/** Abort all requests */ 484/* Abort all requests */
479void fuse_abort_conn(struct fuse_conn *fc); 485void fuse_abort_conn(struct fuse_conn *fc);
480 486
481/** 487/**
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 43a6fc0db8a7..7627022446b2 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -204,26 +204,17 @@ static void fuse_put_super(struct super_block *sb)
204{ 204{
205 struct fuse_conn *fc = get_fuse_conn_super(sb); 205 struct fuse_conn *fc = get_fuse_conn_super(sb);
206 206
207 down_write(&fc->sbput_sem);
208 while (!list_empty(&fc->background))
209 fuse_release_background(fc,
210 list_entry(fc->background.next,
211 struct fuse_req, bg_entry));
212
207 spin_lock(&fc->lock); 213 spin_lock(&fc->lock);
214 fc->mounted = 0;
208 fc->connected = 0; 215 fc->connected = 0;
209 while (!list_empty(&fc->background)) {
210 struct fuse_req *req = list_entry(fc->background.next,
211 struct fuse_req, bg_entry);
212 struct inode *inode = req->inode;
213 struct inode *inode2 = req->inode2;
214
215 /* File would hold a reference to vfsmount */
216 BUG_ON(req->file);
217 req->inode = NULL;
218 req->inode2 = NULL;
219 fuse_remove_background(fc, req);
220
221 spin_unlock(&fc->lock);
222 iput(inode);
223 iput(inode2);
224 spin_lock(&fc->lock);
225 }
226 spin_unlock(&fc->lock); 216 spin_unlock(&fc->lock);
217 up_write(&fc->sbput_sem);
227 /* Flush all readers on this fs */ 218 /* Flush all readers on this fs */
228 kill_fasync(&fc->fasync, SIGIO, POLL_IN); 219 kill_fasync(&fc->fasync, SIGIO, POLL_IN);
229 wake_up_all(&fc->waitq); 220 wake_up_all(&fc->waitq);
@@ -395,6 +386,7 @@ static struct fuse_conn *new_conn(void)
395 INIT_LIST_HEAD(&fc->processing); 386 INIT_LIST_HEAD(&fc->processing);
396 INIT_LIST_HEAD(&fc->io); 387 INIT_LIST_HEAD(&fc->io);
397 INIT_LIST_HEAD(&fc->background); 388 INIT_LIST_HEAD(&fc->background);
389 init_rwsem(&fc->sbput_sem);
398 kobj_set_kset_s(fc, connections_subsys); 390 kobj_set_kset_s(fc, connections_subsys);
399 kobject_init(&fc->kobj); 391 kobject_init(&fc->kobj);
400 atomic_set(&fc->num_waiting, 0); 392 atomic_set(&fc->num_waiting, 0);
@@ -508,11 +500,6 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
508 if (file->f_op != &fuse_dev_operations) 500 if (file->f_op != &fuse_dev_operations)
509 return -EINVAL; 501 return -EINVAL;
510 502
511 /* Setting file->private_data can't race with other mount()
512 instances, since BKL is held for ->get_sb() */
513 if (file->private_data)
514 return -EINVAL;
515
516 fc = new_conn(); 503 fc = new_conn();
517 if (!fc) 504 if (!fc)
518 return -ENOMEM; 505 return -ENOMEM;
@@ -548,7 +535,14 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
548 if (err) 535 if (err)
549 goto err_free_req; 536 goto err_free_req;
550 537
538 /* Setting file->private_data can't race with other mount()
539 instances, since BKL is held for ->get_sb() */
540 err = -EINVAL;
541 if (file->private_data)
542 goto err_kobject_del;
543
551 sb->s_root = root_dentry; 544 sb->s_root = root_dentry;
545 fc->mounted = 1;
552 fc->connected = 1; 546 fc->connected = 1;
553 kobject_get(&fc->kobj); 547 kobject_get(&fc->kobj);
554 file->private_data = fc; 548 file->private_data = fc;
@@ -563,6 +557,8 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
563 557
564 return 0; 558 return 0;
565 559
560 err_kobject_del:
561 kobject_del(&fc->kobj);
566 err_free_req: 562 err_free_req:
567 fuse_request_free(init_req); 563 fuse_request_free(init_req);
568 err_put_root: 564 err_put_root:
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index d2b66bad7d50..3ef739120dff 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -650,7 +650,7 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
650 svc_wake_up(block->b_daemon); 650 svc_wake_up(block->b_daemon);
651} 651}
652 652
653void nlmsvc_grant_release(void *data) 653static void nlmsvc_grant_release(void *data)
654{ 654{
655 struct nlm_rqst *call = data; 655 struct nlm_rqst *call = data;
656 656
diff --git a/fs/locks.c b/fs/locks.c
index dda83d6cd48b..efad798824dc 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2230,7 +2230,12 @@ void steal_locks(fl_owner_t from)
2230 2230
2231 lock_kernel(); 2231 lock_kernel();
2232 j = 0; 2232 j = 0;
2233 rcu_read_lock(); 2233
2234 /*
2235 * We are not taking a ref to the file structures, so
2236 * we need to acquire ->file_lock.
2237 */
2238 spin_lock(&files->file_lock);
2234 fdt = files_fdtable(files); 2239 fdt = files_fdtable(files);
2235 for (;;) { 2240 for (;;) {
2236 unsigned long set; 2241 unsigned long set;
@@ -2248,7 +2253,7 @@ void steal_locks(fl_owner_t from)
2248 set >>= 1; 2253 set >>= 1;
2249 } 2254 }
2250 } 2255 }
2251 rcu_read_unlock(); 2256 spin_unlock(&files->file_lock);
2252 unlock_kernel(); 2257 unlock_kernel();
2253} 2258}
2254EXPORT_SYMBOL(steal_locks); 2259EXPORT_SYMBOL(steal_locks);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index a23f34894167..cae74dd4c7f5 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -128,15 +128,14 @@ struct inode_operations nfs4_dir_inode_operations = {
128static int 128static int
129nfs_opendir(struct inode *inode, struct file *filp) 129nfs_opendir(struct inode *inode, struct file *filp)
130{ 130{
131 int res = 0; 131 int res;
132 132
133 dfprintk(VFS, "NFS: opendir(%s/%ld)\n", 133 dfprintk(VFS, "NFS: opendir(%s/%ld)\n",
134 inode->i_sb->s_id, inode->i_ino); 134 inode->i_sb->s_id, inode->i_ino);
135 135
136 lock_kernel(); 136 lock_kernel();
137 /* Call generic open code in order to cache credentials */ 137 /* Call generic open code in order to cache credentials */
138 if (!res) 138 res = nfs_open(inode, filp);
139 res = nfs_open(inode, filp);
140 unlock_kernel(); 139 unlock_kernel();
141 return res; 140 return res;
142} 141}
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 0f583cb16ddb..3c72b0c07283 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -112,10 +112,9 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode
112 */ 112 */
113ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs) 113ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs)
114{ 114{
115 struct dentry *dentry = iocb->ki_filp->f_dentry;
116
117 dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n", 115 dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n",
118 dentry->d_name.name, (long long) pos, nr_segs); 116 iocb->ki_filp->f_dentry->d_name.name,
117 (long long) pos, nr_segs);
119 118
120 return -EINVAL; 119 return -EINVAL;
121} 120}
@@ -468,7 +467,6 @@ static const struct rpc_call_ops nfs_commit_direct_ops = {
468static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) 467static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
469{ 468{
470 struct nfs_write_data *data = dreq->commit_data; 469 struct nfs_write_data *data = dreq->commit_data;
471 struct rpc_task *task = &data->task;
472 470
473 data->inode = dreq->inode; 471 data->inode = dreq->inode;
474 data->cred = dreq->ctx->cred; 472 data->cred = dreq->ctx->cred;
@@ -489,7 +487,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
489 /* Note: task.tk_ops->rpc_release will free dreq->commit_data */ 487 /* Note: task.tk_ops->rpc_release will free dreq->commit_data */
490 dreq->commit_data = NULL; 488 dreq->commit_data = NULL;
491 489
492 dprintk("NFS: %5u initiated commit call\n", task->tk_pid); 490 dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
493 491
494 lock_kernel(); 492 lock_kernel();
495 rpc_execute(&data->task); 493 rpc_execute(&data->task);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index f1df2c8d9259..fade02c15e6e 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -534,10 +534,9 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
534 */ 534 */
535static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) 535static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
536{ 536{
537 struct inode * inode = filp->f_mapping->host;
538
539 dprintk("NFS: nfs_flock(f=%s/%ld, t=%x, fl=%x)\n", 537 dprintk("NFS: nfs_flock(f=%s/%ld, t=%x, fl=%x)\n",
540 inode->i_sb->s_id, inode->i_ino, 538 filp->f_dentry->d_inode->i_sb->s_id,
539 filp->f_dentry->d_inode->i_ino,
541 fl->fl_type, fl->fl_flags); 540 fl->fl_type, fl->fl_flags);
542 541
543 /* 542 /*
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 2f7656b911b6..d0b991a92327 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -700,12 +700,9 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
700 /* 700 /*
701 * Display superblock I/O counters 701 * Display superblock I/O counters
702 */ 702 */
703 for (cpu = 0; cpu < NR_CPUS; cpu++) { 703 for_each_possible_cpu(cpu) {
704 struct nfs_iostats *stats; 704 struct nfs_iostats *stats;
705 705
706 if (!cpu_possible(cpu))
707 continue;
708
709 preempt_disable(); 706 preempt_disable();
710 stats = per_cpu_ptr(nfss->io_stats, cpu); 707 stats = per_cpu_ptr(nfss->io_stats, cpu);
711 708
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 47ece1dd3c67..d86c0db7b1e8 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1218,7 +1218,7 @@ out:
1218 return status; 1218 return status;
1219} 1219}
1220 1220
1221static void nfs4_intent_set_file(struct nameidata *nd, struct dentry *dentry, struct nfs4_state *state) 1221static int nfs4_intent_set_file(struct nameidata *nd, struct dentry *dentry, struct nfs4_state *state)
1222{ 1222{
1223 struct file *filp; 1223 struct file *filp;
1224 1224
@@ -1227,8 +1227,10 @@ static void nfs4_intent_set_file(struct nameidata *nd, struct dentry *dentry, st
1227 struct nfs_open_context *ctx; 1227 struct nfs_open_context *ctx;
1228 ctx = (struct nfs_open_context *)filp->private_data; 1228 ctx = (struct nfs_open_context *)filp->private_data;
1229 ctx->state = state; 1229 ctx->state = state;
1230 } else 1230 return 0;
1231 nfs4_close_state(state, nd->intent.open.flags); 1231 }
1232 nfs4_close_state(state, nd->intent.open.flags);
1233 return PTR_ERR(filp);
1232} 1234}
1233 1235
1234struct dentry * 1236struct dentry *
@@ -1835,7 +1837,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
1835 nfs_setattr_update_inode(state->inode, sattr); 1837 nfs_setattr_update_inode(state->inode, sattr);
1836 } 1838 }
1837 if (status == 0 && nd != NULL && (nd->flags & LOOKUP_OPEN)) 1839 if (status == 0 && nd != NULL && (nd->flags & LOOKUP_OPEN))
1838 nfs4_intent_set_file(nd, dentry, state); 1840 status = nfs4_intent_set_file(nd, dentry, state);
1839 else 1841 else
1840 nfs4_close_state(state, flags); 1842 nfs4_close_state(state, flags);
1841out: 1843out:
diff --git a/fs/open.c b/fs/open.c
index c32c89d6d8db..53ec28c36777 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -331,7 +331,10 @@ out:
331 331
332asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length) 332asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length)
333{ 333{
334 return do_sys_ftruncate(fd, length, 1); 334 long ret = do_sys_ftruncate(fd, length, 1);
335 /* avoid REGPARM breakage on x86: */
336 prevent_tail_call(ret);
337 return ret;
335} 338}
336 339
337/* LFS versions of truncate are only needed on 32 bit machines */ 340/* LFS versions of truncate are only needed on 32 bit machines */
@@ -343,7 +346,10 @@ asmlinkage long sys_truncate64(const char __user * path, loff_t length)
343 346
344asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length) 347asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length)
345{ 348{
346 return do_sys_ftruncate(fd, length, 0); 349 long ret = do_sys_ftruncate(fd, length, 0);
350 /* avoid REGPARM breakage on x86: */
351 prevent_tail_call(ret);
352 return ret;
347} 353}
348#endif 354#endif
349 355
@@ -1093,20 +1099,30 @@ long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
1093 1099
1094asmlinkage long sys_open(const char __user *filename, int flags, int mode) 1100asmlinkage long sys_open(const char __user *filename, int flags, int mode)
1095{ 1101{
1102 long ret;
1103
1096 if (force_o_largefile()) 1104 if (force_o_largefile())
1097 flags |= O_LARGEFILE; 1105 flags |= O_LARGEFILE;
1098 1106
1099 return do_sys_open(AT_FDCWD, filename, flags, mode); 1107 ret = do_sys_open(AT_FDCWD, filename, flags, mode);
1108 /* avoid REGPARM breakage on x86: */
1109 prevent_tail_call(ret);
1110 return ret;
1100} 1111}
1101EXPORT_SYMBOL_GPL(sys_open); 1112EXPORT_SYMBOL_GPL(sys_open);
1102 1113
1103asmlinkage long sys_openat(int dfd, const char __user *filename, int flags, 1114asmlinkage long sys_openat(int dfd, const char __user *filename, int flags,
1104 int mode) 1115 int mode)
1105{ 1116{
1117 long ret;
1118
1106 if (force_o_largefile()) 1119 if (force_o_largefile())
1107 flags |= O_LARGEFILE; 1120 flags |= O_LARGEFILE;
1108 1121
1109 return do_sys_open(dfd, filename, flags, mode); 1122 ret = do_sys_open(dfd, filename, flags, mode);
1123 /* avoid REGPARM breakage on x86: */
1124 prevent_tail_call(ret);
1125 return ret;
1110} 1126}
1111EXPORT_SYMBOL_GPL(sys_openat); 1127EXPORT_SYMBOL_GPL(sys_openat);
1112 1128
diff --git a/fs/proc/base.c b/fs/proc/base.c
index a3a3eecef689..6cc77dc3f3ff 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -297,16 +297,20 @@ static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsm
297 297
298 files = get_files_struct(task); 298 files = get_files_struct(task);
299 if (files) { 299 if (files) {
300 rcu_read_lock(); 300 /*
301 * We are not taking a ref to the file structure, so we must
302 * hold ->file_lock.
303 */
304 spin_lock(&files->file_lock);
301 file = fcheck_files(files, fd); 305 file = fcheck_files(files, fd);
302 if (file) { 306 if (file) {
303 *mnt = mntget(file->f_vfsmnt); 307 *mnt = mntget(file->f_vfsmnt);
304 *dentry = dget(file->f_dentry); 308 *dentry = dget(file->f_dentry);
305 rcu_read_unlock(); 309 spin_unlock(&files->file_lock);
306 put_files_struct(files); 310 put_files_struct(files);
307 return 0; 311 return 0;
308 } 312 }
309 rcu_read_unlock(); 313 spin_unlock(&files->file_lock);
310 put_files_struct(files); 314 put_files_struct(files);
311 } 315 }
312 return -ENOENT; 316 return -ENOENT;
@@ -1523,7 +1527,12 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
1523 if (!files) 1527 if (!files)
1524 goto out_unlock; 1528 goto out_unlock;
1525 inode->i_mode = S_IFLNK; 1529 inode->i_mode = S_IFLNK;
1526 rcu_read_lock(); 1530
1531 /*
1532 * We are not taking a ref to the file structure, so we must
1533 * hold ->file_lock.
1534 */
1535 spin_lock(&files->file_lock);
1527 file = fcheck_files(files, fd); 1536 file = fcheck_files(files, fd);
1528 if (!file) 1537 if (!file)
1529 goto out_unlock2; 1538 goto out_unlock2;
@@ -1531,7 +1540,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
1531 inode->i_mode |= S_IRUSR | S_IXUSR; 1540 inode->i_mode |= S_IRUSR | S_IXUSR;
1532 if (file->f_mode & 2) 1541 if (file->f_mode & 2)
1533 inode->i_mode |= S_IWUSR | S_IXUSR; 1542 inode->i_mode |= S_IWUSR | S_IXUSR;
1534 rcu_read_unlock(); 1543 spin_unlock(&files->file_lock);
1535 put_files_struct(files); 1544 put_files_struct(files);
1536 inode->i_op = &proc_pid_link_inode_operations; 1545 inode->i_op = &proc_pid_link_inode_operations;
1537 inode->i_size = 64; 1546 inode->i_size = 64;
@@ -1541,7 +1550,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
1541 return NULL; 1550 return NULL;
1542 1551
1543out_unlock2: 1552out_unlock2:
1544 rcu_read_unlock(); 1553 spin_unlock(&files->file_lock);
1545 put_files_struct(files); 1554 put_files_struct(files);
1546out_unlock: 1555out_unlock:
1547 iput(inode); 1556 iput(inode);
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index 58c418fbca2c..97ae1b92bc47 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -408,8 +408,9 @@ int reiserfs_cache_default_acl(struct inode *inode)
408 acl = reiserfs_get_acl(inode, ACL_TYPE_DEFAULT); 408 acl = reiserfs_get_acl(inode, ACL_TYPE_DEFAULT);
409 reiserfs_read_unlock_xattrs(inode->i_sb); 409 reiserfs_read_unlock_xattrs(inode->i_sb);
410 reiserfs_read_unlock_xattr_i(inode); 410 reiserfs_read_unlock_xattr_i(inode);
411 ret = acl ? 1 : 0; 411 ret = (acl && !IS_ERR(acl));
412 posix_acl_release(acl); 412 if (ret)
413 posix_acl_release(acl);
413 } 414 }
414 415
415 return ret; 416 return ret;
diff --git a/fs/splice.c b/fs/splice.c
index 8d57e89924a6..447ebc0a37f3 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -27,15 +27,22 @@
27#include <linux/buffer_head.h> 27#include <linux/buffer_head.h>
28#include <linux/module.h> 28#include <linux/module.h>
29#include <linux/syscalls.h> 29#include <linux/syscalls.h>
30#include <linux/uio.h>
31
32struct partial_page {
33 unsigned int offset;
34 unsigned int len;
35};
30 36
31/* 37/*
32 * Passed to the actors 38 * Passed to splice_to_pipe
33 */ 39 */
34struct splice_desc { 40struct splice_pipe_desc {
35 unsigned int len, total_len; /* current and remaining length */ 41 struct page **pages; /* page map */
42 struct partial_page *partial; /* pages[] may not be contig */
43 int nr_pages; /* number of pages in map */
36 unsigned int flags; /* splice flags */ 44 unsigned int flags; /* splice flags */
37 struct file *file; /* file to read/write */ 45 struct pipe_buf_operations *ops;/* ops associated with output pipe */
38 loff_t pos; /* file position */
39}; 46};
40 47
41/* 48/*
@@ -50,7 +57,8 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *info,
50 struct page *page = buf->page; 57 struct page *page = buf->page;
51 struct address_space *mapping = page_mapping(page); 58 struct address_space *mapping = page_mapping(page);
52 59
53 WARN_ON(!PageLocked(page)); 60 lock_page(page);
61
54 WARN_ON(!PageUptodate(page)); 62 WARN_ON(!PageUptodate(page));
55 63
56 /* 64 /*
@@ -65,8 +73,10 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *info,
65 if (PagePrivate(page)) 73 if (PagePrivate(page))
66 try_to_release_page(page, mapping_gfp_mask(mapping)); 74 try_to_release_page(page, mapping_gfp_mask(mapping));
67 75
68 if (!remove_mapping(mapping, page)) 76 if (!remove_mapping(mapping, page)) {
77 unlock_page(page);
69 return 1; 78 return 1;
79 }
70 80
71 buf->flags |= PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU; 81 buf->flags |= PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU;
72 return 0; 82 return 0;
@@ -125,6 +135,19 @@ static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info,
125 kunmap(buf->page); 135 kunmap(buf->page);
126} 136}
127 137
138static void *user_page_pipe_buf_map(struct file *file,
139 struct pipe_inode_info *pipe,
140 struct pipe_buffer *buf)
141{
142 return kmap(buf->page);
143}
144
145static void user_page_pipe_buf_unmap(struct pipe_inode_info *pipe,
146 struct pipe_buffer *buf)
147{
148 kunmap(buf->page);
149}
150
128static void page_cache_pipe_buf_get(struct pipe_inode_info *info, 151static void page_cache_pipe_buf_get(struct pipe_inode_info *info,
129 struct pipe_buffer *buf) 152 struct pipe_buffer *buf)
130{ 153{
@@ -140,19 +163,33 @@ static struct pipe_buf_operations page_cache_pipe_buf_ops = {
140 .get = page_cache_pipe_buf_get, 163 .get = page_cache_pipe_buf_get,
141}; 164};
142 165
166static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe,
167 struct pipe_buffer *buf)
168{
169 return 1;
170}
171
172static struct pipe_buf_operations user_page_pipe_buf_ops = {
173 .can_merge = 0,
174 .map = user_page_pipe_buf_map,
175 .unmap = user_page_pipe_buf_unmap,
176 .release = page_cache_pipe_buf_release,
177 .steal = user_page_pipe_buf_steal,
178 .get = page_cache_pipe_buf_get,
179};
180
143/* 181/*
144 * Pipe output worker. This sets up our pipe format with the page cache 182 * Pipe output worker. This sets up our pipe format with the page cache
145 * pipe buffer operations. Otherwise very similar to the regular pipe_writev(). 183 * pipe buffer operations. Otherwise very similar to the regular pipe_writev().
146 */ 184 */
147static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages, 185static ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
148 int nr_pages, unsigned long offset, 186 struct splice_pipe_desc *spd)
149 unsigned long len, unsigned int flags)
150{ 187{
151 int ret, do_wakeup, i; 188 int ret, do_wakeup, page_nr;
152 189
153 ret = 0; 190 ret = 0;
154 do_wakeup = 0; 191 do_wakeup = 0;
155 i = 0; 192 page_nr = 0;
156 193
157 if (pipe->inode) 194 if (pipe->inode)
158 mutex_lock(&pipe->inode->i_mutex); 195 mutex_lock(&pipe->inode->i_mutex);
@@ -168,27 +205,19 @@ static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages,
168 if (pipe->nrbufs < PIPE_BUFFERS) { 205 if (pipe->nrbufs < PIPE_BUFFERS) {
169 int newbuf = (pipe->curbuf + pipe->nrbufs) & (PIPE_BUFFERS - 1); 206 int newbuf = (pipe->curbuf + pipe->nrbufs) & (PIPE_BUFFERS - 1);
170 struct pipe_buffer *buf = pipe->bufs + newbuf; 207 struct pipe_buffer *buf = pipe->bufs + newbuf;
171 struct page *page = pages[i++];
172 unsigned long this_len;
173
174 this_len = PAGE_CACHE_SIZE - offset;
175 if (this_len > len)
176 this_len = len;
177 208
178 buf->page = page; 209 buf->page = spd->pages[page_nr];
179 buf->offset = offset; 210 buf->offset = spd->partial[page_nr].offset;
180 buf->len = this_len; 211 buf->len = spd->partial[page_nr].len;
181 buf->ops = &page_cache_pipe_buf_ops; 212 buf->ops = spd->ops;
182 pipe->nrbufs++; 213 pipe->nrbufs++;
214 page_nr++;
215 ret += buf->len;
216
183 if (pipe->inode) 217 if (pipe->inode)
184 do_wakeup = 1; 218 do_wakeup = 1;
185 219
186 ret += this_len; 220 if (!--spd->nr_pages)
187 len -= this_len;
188 offset = 0;
189 if (!--nr_pages)
190 break;
191 if (!len)
192 break; 221 break;
193 if (pipe->nrbufs < PIPE_BUFFERS) 222 if (pipe->nrbufs < PIPE_BUFFERS)
194 continue; 223 continue;
@@ -196,7 +225,7 @@ static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages,
196 break; 225 break;
197 } 226 }
198 227
199 if (flags & SPLICE_F_NONBLOCK) { 228 if (spd->flags & SPLICE_F_NONBLOCK) {
200 if (!ret) 229 if (!ret)
201 ret = -EAGAIN; 230 ret = -EAGAIN;
202 break; 231 break;
@@ -231,8 +260,8 @@ static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages,
231 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 260 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
232 } 261 }
233 262
234 while (i < nr_pages) 263 while (page_nr < spd->nr_pages)
235 page_cache_release(pages[i++]); 264 page_cache_release(spd->pages[page_nr++]);
236 265
237 return ret; 266 return ret;
238} 267}
@@ -243,15 +272,24 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
243 unsigned int flags) 272 unsigned int flags)
244{ 273{
245 struct address_space *mapping = in->f_mapping; 274 struct address_space *mapping = in->f_mapping;
246 unsigned int offset, nr_pages; 275 unsigned int loff, nr_pages;
247 struct page *pages[PIPE_BUFFERS]; 276 struct page *pages[PIPE_BUFFERS];
277 struct partial_page partial[PIPE_BUFFERS];
248 struct page *page; 278 struct page *page;
249 pgoff_t index; 279 pgoff_t index, end_index;
250 int i, error; 280 loff_t isize;
281 size_t total_len;
282 int error;
283 struct splice_pipe_desc spd = {
284 .pages = pages,
285 .partial = partial,
286 .flags = flags,
287 .ops = &page_cache_pipe_buf_ops,
288 };
251 289
252 index = *ppos >> PAGE_CACHE_SHIFT; 290 index = *ppos >> PAGE_CACHE_SHIFT;
253 offset = *ppos & ~PAGE_CACHE_MASK; 291 loff = *ppos & ~PAGE_CACHE_MASK;
254 nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 292 nr_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
255 293
256 if (nr_pages > PIPE_BUFFERS) 294 if (nr_pages > PIPE_BUFFERS)
257 nr_pages = PIPE_BUFFERS; 295 nr_pages = PIPE_BUFFERS;
@@ -261,14 +299,24 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
261 * read-ahead if this is a non-zero offset (we are likely doing small 299 * read-ahead if this is a non-zero offset (we are likely doing small
262 * chunk splice and the page is already there) for a single page. 300 * chunk splice and the page is already there) for a single page.
263 */ 301 */
264 if (!offset || nr_pages > 1) 302 if (!loff || spd.nr_pages > 1)
265 do_page_cache_readahead(mapping, in, index, nr_pages); 303 do_page_cache_readahead(mapping, in, index, spd.nr_pages);
266 304
267 /* 305 /*
268 * Now fill in the holes: 306 * Now fill in the holes:
269 */ 307 */
270 error = 0; 308 error = 0;
271 for (i = 0; i < nr_pages; i++, index++) { 309 total_len = 0;
310 for (spd.nr_pages = 0; spd.nr_pages < nr_pages; spd.nr_pages++, index++) {
311 unsigned int this_len;
312
313 if (!len)
314 break;
315
316 /*
317 * this_len is the max we'll use from this page
318 */
319 this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff);
272find_page: 320find_page:
273 /* 321 /*
274 * lookup the page for this index 322 * lookup the page for this index
@@ -276,14 +324,6 @@ find_page:
276 page = find_get_page(mapping, index); 324 page = find_get_page(mapping, index);
277 if (!page) { 325 if (!page) {
278 /* 326 /*
279 * If in nonblock mode then dont block on
280 * readpage (we've kicked readahead so there
281 * will be asynchronous progress):
282 */
283 if (flags & SPLICE_F_NONBLOCK)
284 break;
285
286 /*
287 * page didn't exist, allocate one 327 * page didn't exist, allocate one
288 */ 328 */
289 page = page_cache_alloc_cold(mapping); 329 page = page_cache_alloc_cold(mapping);
@@ -304,6 +344,13 @@ find_page:
304 * If the page isn't uptodate, we may need to start io on it 344 * If the page isn't uptodate, we may need to start io on it
305 */ 345 */
306 if (!PageUptodate(page)) { 346 if (!PageUptodate(page)) {
347 /*
348 * If in nonblock mode then dont block on waiting
349 * for an in-flight io page
350 */
351 if (flags & SPLICE_F_NONBLOCK)
352 break;
353
307 lock_page(page); 354 lock_page(page);
308 355
309 /* 356 /*
@@ -336,13 +383,46 @@ readpage:
336 goto find_page; 383 goto find_page;
337 break; 384 break;
338 } 385 }
386
387 /*
388 * i_size must be checked after ->readpage().
389 */
390 isize = i_size_read(mapping->host);
391 end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
392 if (unlikely(!isize || index > end_index)) {
393 page_cache_release(page);
394 break;
395 }
396
397 /*
398 * if this is the last page, see if we need to shrink
399 * the length and stop
400 */
401 if (end_index == index) {
402 loff = PAGE_CACHE_SIZE - (isize & ~PAGE_CACHE_MASK);
403 if (total_len + loff > isize) {
404 page_cache_release(page);
405 break;
406 }
407 /*
408 * force quit after adding this page
409 */
410 nr_pages = spd.nr_pages;
411 this_len = min(this_len, loff);
412 loff = 0;
413 }
339 } 414 }
340fill_it: 415fill_it:
341 pages[i] = page; 416 pages[spd.nr_pages] = page;
417 partial[spd.nr_pages].offset = loff;
418 partial[spd.nr_pages].len = this_len;
419 len -= this_len;
420 total_len += this_len;
421 loff = 0;
342 } 422 }
343 423
344 if (i) 424 if (spd.nr_pages)
345 return move_to_pipe(pipe, pages, i, offset, len, flags); 425 return splice_to_pipe(pipe, &spd);
346 426
347 return error; 427 return error;
348} 428}
@@ -369,17 +449,20 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
369 while (len) { 449 while (len) {
370 ret = __generic_file_splice_read(in, ppos, pipe, len, flags); 450 ret = __generic_file_splice_read(in, ppos, pipe, len, flags);
371 451
372 if (ret <= 0) 452 if (ret < 0)
373 break; 453 break;
454 else if (!ret) {
455 if (spliced)
456 break;
457 if (flags & SPLICE_F_NONBLOCK) {
458 ret = -EAGAIN;
459 break;
460 }
461 }
374 462
375 *ppos += ret; 463 *ppos += ret;
376 len -= ret; 464 len -= ret;
377 spliced += ret; 465 spliced += ret;
378
379 if (!(flags & SPLICE_F_NONBLOCK))
380 continue;
381 ret = -EAGAIN;
382 break;
383 } 466 }
384 467
385 if (spliced) 468 if (spliced)
@@ -392,14 +475,13 @@ EXPORT_SYMBOL(generic_file_splice_read);
392 475
393/* 476/*
394 * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos' 477 * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos'
395 * using sendpage(). 478 * using sendpage(). Return the number of bytes sent.
396 */ 479 */
397static int pipe_to_sendpage(struct pipe_inode_info *info, 480static int pipe_to_sendpage(struct pipe_inode_info *info,
398 struct pipe_buffer *buf, struct splice_desc *sd) 481 struct pipe_buffer *buf, struct splice_desc *sd)
399{ 482{
400 struct file *file = sd->file; 483 struct file *file = sd->file;
401 loff_t pos = sd->pos; 484 loff_t pos = sd->pos;
402 unsigned int offset;
403 ssize_t ret; 485 ssize_t ret;
404 void *ptr; 486 void *ptr;
405 int more; 487 int more;
@@ -414,16 +496,13 @@ static int pipe_to_sendpage(struct pipe_inode_info *info,
414 if (IS_ERR(ptr)) 496 if (IS_ERR(ptr))
415 return PTR_ERR(ptr); 497 return PTR_ERR(ptr);
416 498
417 offset = pos & ~PAGE_CACHE_MASK;
418 more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len; 499 more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len;
419 500
420 ret = file->f_op->sendpage(file, buf->page, offset, sd->len, &pos,more); 501 ret = file->f_op->sendpage(file, buf->page, buf->offset, sd->len,
502 &pos, more);
421 503
422 buf->ops->unmap(info, buf); 504 buf->ops->unmap(info, buf);
423 if (ret == sd->len) 505 return ret;
424 return 0;
425
426 return -EIO;
427} 506}
428 507
429/* 508/*
@@ -452,7 +531,7 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf,
452 struct file *file = sd->file; 531 struct file *file = sd->file;
453 struct address_space *mapping = file->f_mapping; 532 struct address_space *mapping = file->f_mapping;
454 gfp_t gfp_mask = mapping_gfp_mask(mapping); 533 gfp_t gfp_mask = mapping_gfp_mask(mapping);
455 unsigned int offset; 534 unsigned int offset, this_len;
456 struct page *page; 535 struct page *page;
457 pgoff_t index; 536 pgoff_t index;
458 char *src; 537 char *src;
@@ -468,20 +547,22 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf,
468 index = sd->pos >> PAGE_CACHE_SHIFT; 547 index = sd->pos >> PAGE_CACHE_SHIFT;
469 offset = sd->pos & ~PAGE_CACHE_MASK; 548 offset = sd->pos & ~PAGE_CACHE_MASK;
470 549
550 this_len = sd->len;
551 if (this_len + offset > PAGE_CACHE_SIZE)
552 this_len = PAGE_CACHE_SIZE - offset;
553
471 /* 554 /*
472 * Reuse buf page, if SPLICE_F_MOVE is set. 555 * Reuse buf page, if SPLICE_F_MOVE is set.
473 */ 556 */
474 if (sd->flags & SPLICE_F_MOVE) { 557 if (sd->flags & SPLICE_F_MOVE) {
475 /* 558 /*
476 * If steal succeeds, buf->page is now pruned from the vm 559 * If steal succeeds, buf->page is now pruned from the vm
477 * side (LRU and page cache) and we can reuse it. 560 * side (LRU and page cache) and we can reuse it. The page
561 * will also be looked on successful return.
478 */ 562 */
479 if (buf->ops->steal(info, buf)) 563 if (buf->ops->steal(info, buf))
480 goto find_page; 564 goto find_page;
481 565
482 /*
483 * this will also set the page locked
484 */
485 page = buf->page; 566 page = buf->page;
486 if (add_to_page_cache(page, mapping, index, gfp_mask)) 567 if (add_to_page_cache(page, mapping, index, gfp_mask))
487 goto find_page; 568 goto find_page;
@@ -490,18 +571,30 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf,
490 lru_cache_add(page); 571 lru_cache_add(page);
491 } else { 572 } else {
492find_page: 573find_page:
493 ret = -ENOMEM; 574 page = find_lock_page(mapping, index);
494 page = find_or_create_page(mapping, index, gfp_mask); 575 if (!page) {
495 if (!page) 576 ret = -ENOMEM;
496 goto out_nomem; 577 page = page_cache_alloc_cold(mapping);
578 if (unlikely(!page))
579 goto out_nomem;
580
581 /*
582 * This will also lock the page
583 */
584 ret = add_to_page_cache_lru(page, mapping, index,
585 gfp_mask);
586 if (unlikely(ret))
587 goto out;
588 }
497 589
498 /* 590 /*
499 * If the page is uptodate, it is also locked. If it isn't 591 * We get here with the page locked. If the page is also
500 * uptodate, we can mark it uptodate if we are filling the 592 * uptodate, we don't need to do more. If it isn't, we
501 * full page. Otherwise we need to read it in first... 593 * may need to bring it in if we are not going to overwrite
594 * the full page.
502 */ 595 */
503 if (!PageUptodate(page)) { 596 if (!PageUptodate(page)) {
504 if (sd->len < PAGE_CACHE_SIZE) { 597 if (this_len < PAGE_CACHE_SIZE) {
505 ret = mapping->a_ops->readpage(file, page); 598 ret = mapping->a_ops->readpage(file, page);
506 if (unlikely(ret)) 599 if (unlikely(ret))
507 goto out; 600 goto out;
@@ -520,14 +613,12 @@ find_page:
520 ret = -EIO; 613 ret = -EIO;
521 goto out; 614 goto out;
522 } 615 }
523 } else { 616 } else
524 WARN_ON(!PageLocked(page));
525 SetPageUptodate(page); 617 SetPageUptodate(page);
526 }
527 } 618 }
528 } 619 }
529 620
530 ret = mapping->a_ops->prepare_write(file, page, 0, sd->len); 621 ret = mapping->a_ops->prepare_write(file, page, offset, offset+this_len);
531 if (ret == AOP_TRUNCATED_PAGE) { 622 if (ret == AOP_TRUNCATED_PAGE) {
532 page_cache_release(page); 623 page_cache_release(page);
533 goto find_page; 624 goto find_page;
@@ -537,41 +628,42 @@ find_page:
537 if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) { 628 if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) {
538 char *dst = kmap_atomic(page, KM_USER0); 629 char *dst = kmap_atomic(page, KM_USER0);
539 630
540 memcpy(dst + offset, src + buf->offset, sd->len); 631 memcpy(dst + offset, src + buf->offset, this_len);
541 flush_dcache_page(page); 632 flush_dcache_page(page);
542 kunmap_atomic(dst, KM_USER0); 633 kunmap_atomic(dst, KM_USER0);
543 } 634 }
544 635
545 ret = mapping->a_ops->commit_write(file, page, 0, sd->len); 636 ret = mapping->a_ops->commit_write(file, page, offset, offset+this_len);
546 if (ret == AOP_TRUNCATED_PAGE) { 637 if (ret == AOP_TRUNCATED_PAGE) {
547 page_cache_release(page); 638 page_cache_release(page);
548 goto find_page; 639 goto find_page;
549 } else if (ret) 640 } else if (ret)
550 goto out; 641 goto out;
551 642
643 /*
644 * Return the number of bytes written.
645 */
646 ret = this_len;
552 mark_page_accessed(page); 647 mark_page_accessed(page);
553 balance_dirty_pages_ratelimited(mapping); 648 balance_dirty_pages_ratelimited(mapping);
554out: 649out:
555 if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) { 650 if (!(buf->flags & PIPE_BUF_FLAG_STOLEN))
556 page_cache_release(page); 651 page_cache_release(page);
557 unlock_page(page); 652
558 } 653 unlock_page(page);
559out_nomem: 654out_nomem:
560 buf->ops->unmap(info, buf); 655 buf->ops->unmap(info, buf);
561 return ret; 656 return ret;
562} 657}
563 658
564typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *,
565 struct splice_desc *);
566
567/* 659/*
568 * Pipe input worker. Most of this logic works like a regular pipe, the 660 * Pipe input worker. Most of this logic works like a regular pipe, the
569 * key here is the 'actor' worker passed in that actually moves the data 661 * key here is the 'actor' worker passed in that actually moves the data
570 * to the wanted destination. See pipe_to_file/pipe_to_sendpage above. 662 * to the wanted destination. See pipe_to_file/pipe_to_sendpage above.
571 */ 663 */
572static ssize_t move_from_pipe(struct pipe_inode_info *pipe, struct file *out, 664ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
573 loff_t *ppos, size_t len, unsigned int flags, 665 loff_t *ppos, size_t len, unsigned int flags,
574 splice_actor *actor) 666 splice_actor *actor)
575{ 667{
576 int ret, do_wakeup, err; 668 int ret, do_wakeup, err;
577 struct splice_desc sd; 669 struct splice_desc sd;
@@ -597,16 +689,22 @@ static ssize_t move_from_pipe(struct pipe_inode_info *pipe, struct file *out,
597 sd.len = sd.total_len; 689 sd.len = sd.total_len;
598 690
599 err = actor(pipe, buf, &sd); 691 err = actor(pipe, buf, &sd);
600 if (err) { 692 if (err <= 0) {
601 if (!ret && err != -ENODATA) 693 if (!ret && err != -ENODATA)
602 ret = err; 694 ret = err;
603 695
604 break; 696 break;
605 } 697 }
606 698
607 ret += sd.len; 699 ret += err;
608 buf->offset += sd.len; 700 buf->offset += err;
609 buf->len -= sd.len; 701 buf->len -= err;
702
703 sd.len -= err;
704 sd.pos += err;
705 sd.total_len -= err;
706 if (sd.len)
707 continue;
610 708
611 if (!buf->len) { 709 if (!buf->len) {
612 buf->ops = NULL; 710 buf->ops = NULL;
@@ -617,8 +715,6 @@ static ssize_t move_from_pipe(struct pipe_inode_info *pipe, struct file *out,
617 do_wakeup = 1; 715 do_wakeup = 1;
618 } 716 }
619 717
620 sd.pos += sd.len;
621 sd.total_len -= sd.len;
622 if (!sd.total_len) 718 if (!sd.total_len)
623 break; 719 break;
624 } 720 }
@@ -686,23 +782,27 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
686 struct address_space *mapping = out->f_mapping; 782 struct address_space *mapping = out->f_mapping;
687 ssize_t ret; 783 ssize_t ret;
688 784
689 ret = move_from_pipe(pipe, out, ppos, len, flags, pipe_to_file); 785 ret = splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file);
690 786 if (ret > 0) {
691 /*
692 * If file or inode is SYNC and we actually wrote some data, sync it.
693 */
694 if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(mapping->host))
695 && ret > 0) {
696 struct inode *inode = mapping->host; 787 struct inode *inode = mapping->host;
697 int err;
698 788
699 mutex_lock(&inode->i_mutex); 789 *ppos += ret;
700 err = generic_osync_inode(mapping->host, mapping, 790
701 OSYNC_METADATA|OSYNC_DATA); 791 /*
702 mutex_unlock(&inode->i_mutex); 792 * If file or inode is SYNC and we actually wrote some data,
793 * sync it.
794 */
795 if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) {
796 int err;
797
798 mutex_lock(&inode->i_mutex);
799 err = generic_osync_inode(inode, mapping,
800 OSYNC_METADATA|OSYNC_DATA);
801 mutex_unlock(&inode->i_mutex);
703 802
704 if (err) 803 if (err)
705 ret = err; 804 ret = err;
805 }
706 } 806 }
707 807
708 return ret; 808 return ret;
@@ -724,7 +824,7 @@ EXPORT_SYMBOL(generic_file_splice_write);
724ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out, 824ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out,
725 loff_t *ppos, size_t len, unsigned int flags) 825 loff_t *ppos, size_t len, unsigned int flags)
726{ 826{
727 return move_from_pipe(pipe, out, ppos, len, flags, pipe_to_sendpage); 827 return splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_sendpage);
728} 828}
729 829
730EXPORT_SYMBOL(generic_splice_sendpage); 830EXPORT_SYMBOL(generic_splice_sendpage);
@@ -811,7 +911,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
811 911
812 /* 912 /*
813 * We don't have an immediate reader, but we'll read the stuff 913 * We don't have an immediate reader, but we'll read the stuff
814 * out of the pipe right after the move_to_pipe(). So set 914 * out of the pipe right after the splice_to_pipe(). So set
815 * PIPE_READERS appropriately. 915 * PIPE_READERS appropriately.
816 */ 916 */
817 pipe->readers = 1; 917 pipe->readers = 1;
@@ -904,6 +1004,7 @@ static long do_splice(struct file *in, loff_t __user *off_in,
904{ 1004{
905 struct pipe_inode_info *pipe; 1005 struct pipe_inode_info *pipe;
906 loff_t offset, *off; 1006 loff_t offset, *off;
1007 long ret;
907 1008
908 pipe = in->f_dentry->d_inode->i_pipe; 1009 pipe = in->f_dentry->d_inode->i_pipe;
909 if (pipe) { 1010 if (pipe) {
@@ -918,7 +1019,12 @@ static long do_splice(struct file *in, loff_t __user *off_in,
918 } else 1019 } else
919 off = &out->f_pos; 1020 off = &out->f_pos;
920 1021
921 return do_splice_from(pipe, out, off, len, flags); 1022 ret = do_splice_from(pipe, out, off, len, flags);
1023
1024 if (off_out && copy_to_user(off_out, off, sizeof(loff_t)))
1025 ret = -EFAULT;
1026
1027 return ret;
922 } 1028 }
923 1029
924 pipe = out->f_dentry->d_inode->i_pipe; 1030 pipe = out->f_dentry->d_inode->i_pipe;
@@ -934,12 +1040,185 @@ static long do_splice(struct file *in, loff_t __user *off_in,
934 } else 1040 } else
935 off = &in->f_pos; 1041 off = &in->f_pos;
936 1042
937 return do_splice_to(in, off, pipe, len, flags); 1043 ret = do_splice_to(in, off, pipe, len, flags);
1044
1045 if (off_in && copy_to_user(off_in, off, sizeof(loff_t)))
1046 ret = -EFAULT;
1047
1048 return ret;
938 } 1049 }
939 1050
940 return -EINVAL; 1051 return -EINVAL;
941} 1052}
942 1053
1054/*
1055 * Map an iov into an array of pages and offset/length tupples. With the
1056 * partial_page structure, we can map several non-contiguous ranges into
1057 * our ones pages[] map instead of splitting that operation into pieces.
1058 * Could easily be exported as a generic helper for other users, in which
1059 * case one would probably want to add a 'max_nr_pages' parameter as well.
1060 */
1061static int get_iovec_page_array(const struct iovec __user *iov,
1062 unsigned int nr_vecs, struct page **pages,
1063 struct partial_page *partial)
1064{
1065 int buffers = 0, error = 0;
1066
1067 /*
1068 * It's ok to take the mmap_sem for reading, even
1069 * across a "get_user()".
1070 */
1071 down_read(&current->mm->mmap_sem);
1072
1073 while (nr_vecs) {
1074 unsigned long off, npages;
1075 void __user *base;
1076 size_t len;
1077 int i;
1078
1079 /*
1080 * Get user address base and length for this iovec.
1081 */
1082 error = get_user(base, &iov->iov_base);
1083 if (unlikely(error))
1084 break;
1085 error = get_user(len, &iov->iov_len);
1086 if (unlikely(error))
1087 break;
1088
1089 /*
1090 * Sanity check this iovec. 0 read succeeds.
1091 */
1092 if (unlikely(!len))
1093 break;
1094 error = -EFAULT;
1095 if (unlikely(!base))
1096 break;
1097
1098 /*
1099 * Get this base offset and number of pages, then map
1100 * in the user pages.
1101 */
1102 off = (unsigned long) base & ~PAGE_MASK;
1103 npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
1104 if (npages > PIPE_BUFFERS - buffers)
1105 npages = PIPE_BUFFERS - buffers;
1106
1107 error = get_user_pages(current, current->mm,
1108 (unsigned long) base, npages, 0, 0,
1109 &pages[buffers], NULL);
1110
1111 if (unlikely(error <= 0))
1112 break;
1113
1114 /*
1115 * Fill this contiguous range into the partial page map.
1116 */
1117 for (i = 0; i < error; i++) {
1118 const int plen = min_t(size_t, len, PAGE_SIZE) - off;
1119
1120 partial[buffers].offset = off;
1121 partial[buffers].len = plen;
1122
1123 off = 0;
1124 len -= plen;
1125 buffers++;
1126 }
1127
1128 /*
1129 * We didn't complete this iov, stop here since it probably
1130 * means we have to move some of this into a pipe to
1131 * be able to continue.
1132 */
1133 if (len)
1134 break;
1135
1136 /*
1137 * Don't continue if we mapped fewer pages than we asked for,
1138 * or if we mapped the max number of pages that we have
1139 * room for.
1140 */
1141 if (error < npages || buffers == PIPE_BUFFERS)
1142 break;
1143
1144 nr_vecs--;
1145 iov++;
1146 }
1147
1148 up_read(&current->mm->mmap_sem);
1149
1150 if (buffers)
1151 return buffers;
1152
1153 return error;
1154}
1155
1156/*
1157 * vmsplice splices a user address range into a pipe. It can be thought of
1158 * as splice-from-memory, where the regular splice is splice-from-file (or
1159 * to file). In both cases the output is a pipe, naturally.
1160 *
1161 * Note that vmsplice only supports splicing _from_ user memory to a pipe,
1162 * not the other way around. Splicing from user memory is a simple operation
1163 * that can be supported without any funky alignment restrictions or nasty
1164 * vm tricks. We simply map in the user memory and fill them into a pipe.
1165 * The reverse isn't quite as easy, though. There are two possible solutions
1166 * for that:
1167 *
1168 * - memcpy() the data internally, at which point we might as well just
1169 * do a regular read() on the buffer anyway.
1170 * - Lots of nasty vm tricks, that are neither fast nor flexible (it
1171 * has restriction limitations on both ends of the pipe).
1172 *
1173 * Alas, it isn't here.
1174 *
1175 */
1176static long do_vmsplice(struct file *file, const struct iovec __user *iov,
1177 unsigned long nr_segs, unsigned int flags)
1178{
1179 struct pipe_inode_info *pipe = file->f_dentry->d_inode->i_pipe;
1180 struct page *pages[PIPE_BUFFERS];
1181 struct partial_page partial[PIPE_BUFFERS];
1182 struct splice_pipe_desc spd = {
1183 .pages = pages,
1184 .partial = partial,
1185 .flags = flags,
1186 .ops = &user_page_pipe_buf_ops,
1187 };
1188
1189 if (unlikely(!pipe))
1190 return -EBADF;
1191 if (unlikely(nr_segs > UIO_MAXIOV))
1192 return -EINVAL;
1193 else if (unlikely(!nr_segs))
1194 return 0;
1195
1196 spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial);
1197 if (spd.nr_pages <= 0)
1198 return spd.nr_pages;
1199
1200 return splice_to_pipe(pipe, &spd);
1201}
1202
1203asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov,
1204 unsigned long nr_segs, unsigned int flags)
1205{
1206 struct file *file;
1207 long error;
1208 int fput;
1209
1210 error = -EBADF;
1211 file = fget_light(fd, &fput);
1212 if (file) {
1213 if (file->f_mode & FMODE_WRITE)
1214 error = do_vmsplice(file, iov, nr_segs, flags);
1215
1216 fput_light(file, fput);
1217 }
1218
1219 return error;
1220}
1221
943asmlinkage long sys_splice(int fd_in, loff_t __user *off_in, 1222asmlinkage long sys_splice(int fd_in, loff_t __user *off_in,
944 int fd_out, loff_t __user *off_out, 1223 int fd_out, loff_t __user *off_out,
945 size_t len, unsigned int flags) 1224 size_t len, unsigned int flags)
@@ -979,7 +1258,9 @@ static int link_pipe(struct pipe_inode_info *ipipe,
979 size_t len, unsigned int flags) 1258 size_t len, unsigned int flags)
980{ 1259{
981 struct pipe_buffer *ibuf, *obuf; 1260 struct pipe_buffer *ibuf, *obuf;
982 int ret = 0, do_wakeup = 0, i; 1261 int ret, do_wakeup, i, ipipe_first;
1262
1263 ret = do_wakeup = ipipe_first = 0;
983 1264
984 /* 1265 /*
985 * Potential ABBA deadlock, work around it by ordering lock 1266 * Potential ABBA deadlock, work around it by ordering lock
@@ -987,6 +1268,7 @@ static int link_pipe(struct pipe_inode_info *ipipe,
987 * could deadlock (one doing tee from A -> B, the other from B -> A). 1268 * could deadlock (one doing tee from A -> B, the other from B -> A).
988 */ 1269 */
989 if (ipipe->inode < opipe->inode) { 1270 if (ipipe->inode < opipe->inode) {
1271 ipipe_first = 1;
990 mutex_lock(&ipipe->inode->i_mutex); 1272 mutex_lock(&ipipe->inode->i_mutex);
991 mutex_lock(&opipe->inode->i_mutex); 1273 mutex_lock(&opipe->inode->i_mutex);
992 } else { 1274 } else {
@@ -1035,9 +1317,11 @@ static int link_pipe(struct pipe_inode_info *ipipe,
1035 1317
1036 /* 1318 /*
1037 * We have input available, but no output room. 1319 * We have input available, but no output room.
1038 * If we already copied data, return that. 1320 * If we already copied data, return that. If we
1321 * need to drop the opipe lock, it must be ordered
1322 * last to avoid deadlocks.
1039 */ 1323 */
1040 if (flags & SPLICE_F_NONBLOCK) { 1324 if ((flags & SPLICE_F_NONBLOCK) || !ipipe_first) {
1041 if (!ret) 1325 if (!ret)
1042 ret = -EAGAIN; 1326 ret = -EAGAIN;
1043 break; 1327 break;
@@ -1071,7 +1355,12 @@ static int link_pipe(struct pipe_inode_info *ipipe,
1071 if (ret) 1355 if (ret)
1072 break; 1356 break;
1073 } 1357 }
1074 if (flags & SPLICE_F_NONBLOCK) { 1358 /*
1359 * pipe_wait() drops the ipipe mutex. To avoid deadlocks
1360 * with another process, we can only safely do that if
1361 * the ipipe lock is ordered last.
1362 */
1363 if ((flags & SPLICE_F_NONBLOCK) || ipipe_first) {
1075 if (!ret) 1364 if (!ret)
1076 ret = -EAGAIN; 1365 ret = -EAGAIN;
1077 break; 1366 break;